# this is the modules that implements the swished daemon package SWISHED::Core; use strict; use warnings; use SWISHED; # for $SWISHED::VERSION use SWISH::API; use URI::Escape; # for uri_escape() and uri_unescape() use CGI; # for param() use vars qw( %swish_apis ); # persistent hash of indexnames -> SWISHE::APIs sub close_indices { %swish_apis = (); } ############################################ # dosearch() # reads params from CGI::param(), then # print()'s output which is expected to go through a web server to # a client like SWISH::API::Remote sub do_search { # our protocol is based on the swish-e command line exe. # we expect a query string with the following # f=indexname (looks for env var SWISHED_INDEX_INDEXNAME) # (indexname must match /^[a-z_]+\w*$/ and will be uppercased and have # 'SWISHED_INDEX_' prepended. Default is to use SWISHED_INDEX_DEFAULT, # as if 'default' was passed.) # w=search (or absent if no search desired) # m=1 (or absent if no metadata desired) # p=prop,prop2,prop3 # m=max # b=begin # s=sort string # default swish properties include: # swishdocpath swishrank swishdocsize swishtitle swishdbfile # swishlastmodified swishreccount swishfilenum # these read from $r->args under modperl my $w = CGI::param("w") || ""; # word to search for my $h = CGI::param("h") || ""; # do they want Header data returned? my $M = CGI::param("M") || ""; # get metanames? my $P = CGI::param("P") || ""; # get properties? my $b = CGI::param("b") || 0; # begin results at rec num my $m = CGI::param("m") || 10; # max results my $p = CGI::param("p") || "swishdocpath,swishrank,swishtitle"; my @props = split(/,/, $p); # note that we do NO error checking here. my $d = CGI::param("d") || ""; # debug level. not used yet, but documented in PROTOCOL my $s = CGI::param("s") || ""; # sort spec my $f = CGI::param("f") || "DEFAULT"; # the default index is called DEFAULT. # while (my ($k, $v) = each %ENV) { print ("d: ENV{$k} = $ENV{$k}\n"); } # test print("e: swished.modperl: not a valid indexname: $f\n") && return unless $f =~ /^[a-zA-Z]+\w*$/i; # must begin with a letter print("e: swished.modperl: no env var found by name: SWISHED_INDEX_$f\n") && return unless exists($ENV{ "SWISHED_INDEX_$f" }); my $index = $ENV{ "SWISHED_INDEX_$f" }; # the actual full path to the index. # $f is the 'name', like 'DEFAULT' # create the SWISH::API object if there isn't one. TODO: Factor this out. unless( exists $swish_apis{$f} ) { $swish_apis{$f} = SWISH::API->new ( $index ); print("d: pid $$ opened index $index for search '$w'\n"); } my $swish = $swish_apis{$f}; my $search = $swish->New_Search_Object(); #print "Searching for $w in $index\n"; print("e: " . $swish->ErrorString() . "\n") if $swish->Error(); print("k: " . join("&", map { "$_=$props[$_]" } (0 .. $#props)) . "\n" ); # output the k: line with the props they asked for and their indexes, # like k: 0=swishdocpath,1=swishrank,2=swishtitle eval { # they want some kind of descriptive header of meta-data, get it first. if ($h) { my %headers = _get_headers( $swish ); my @parts = map { uri_escape($_) . "=" . uri_escape($headers{$_}) } keys (%headers); print "h: " . join("&", @parts) . "\n"; } if ($M) { my %metas = _get_metanames( $swish ); # hash of ID->"Name,Type" my @parts = map { uri_escape($_) . "=" . uri_escape($metas{$_}) } keys (%metas); print "M: " . join("&", @parts) . "\n"; } if ($P) { my %props = _get_properties( $swish ); my @parts = map { uri_escape($_) . "=" . uri_escape($props{$_}) } keys (%props); print "P: " . join("&", @parts) . "\n"; } }; if ($@) { print("e: $@\n"); }; # show our error fetching the descriptive data. if ($w ne '') { # we have a search term. Do the search. my $results; eval { $search->SetSort( $s ) if $s; $results = $search->Execute( $w ); $results->SeekResult( $b ) if $b; }; if ($@) { print("e: $@\n"); }; my $cnt = 0; eval { print("m: hits=" . $results->Hits() . "&swished_version=$SWISHED::VERSION\n"); no warnings; # skip complaints about undefs from $result->ResultPropertyString() # loop over results and create r: lines while ( ($cnt++ < $m) && (my $result = $results->NextResult() ) ) { print( "r: " . join("&", map { "$_=" . uri_escape($result->ResultPropertyStr( $props[$_] ) ) } (0 .. $#props) ) . "\n" ); } }; if ($@) { print("e: $@\n"); }; } } ##################################################### # _get_headers( $swish ) # based on get_header_info() suggested by pek # returns refs to hash of headers, and lists of metas and properties sub _get_headers { my ($swish) = @_; my %headers; # hash to return my $index_name = ( $swish->IndexNames )[ 0 ]; # assume the first file is representative # for each header name for my $n ($swish->HeaderNames) { # get the value and store it my $val = $swish->HeaderValue($index_name, $n); $val = '' unless defined $val; $headers{$n} = $val; } return %headers; } ############################################ # _get_properties( $swish ) # returns hash of IDs=>"Name,Type" sub _get_properties { my ($swish) = @_; my $index_name = ( $swish->IndexNames )[ 0 ]; # assume the first file is representative my @props = $swish->PropertyList( $index_name ); return _create_metaprop_hash( \@props ); } ############################################ # _get_metanames( $swish ) # returns hash of IDs=>"Name,Type" sub _get_metanames { my ($swish) = @_; my $index_name = ( $swish->IndexNames )[ 0 ]; # assume the first file is representative my @metas = $swish->MetaList( $index_name ); return _create_metaprop_hash( \@metas ); } ############################################ # _create_metaprop_hash( $ref_to_list_of_hashes) # converts values like from $swish->MetaList() or $swish->PropertiesList() # into a convenient hash for SWISHED::Core # returns hash of IDs=>"Name,Type" sub _create_metaprop_hash { my $listref = shift; my %ret; for my $meta (@$listref) { $ret{$meta->ID} = $meta->Name . "," . $meta->Type; } return %ret; } 1; =head1 NAME SWISHED::Core - perl module to provide a persistent swish-e daemon =head1 SYNOPSIS Put lines like the following in your httpd.conf file to use SWISHED as a mod_perl 2.0 handler. See the docs for examples on how to use swished as a CGI or Apache::Registry handler: PerlRequire /usr/local/swished/lib/startup.pl PerlPassEnv SWISHED_INDEX_DEFAULT PerlResponseHandler SWISHED::Handler PerlSetEnv SWISHED_INDEX_DEFAULT /var/lib/sman/sman.index # specify your default index here, above is from # sman-update at http://search.cpan.org/~joshr/Sman/ SetHandler perl-script =head1 DESCRIPTION Swished is the core module providing a persistent swish-e daemon. See SWISHED::swished and SWISHED::Handler for examples. =head1 AUTHOR Josh Rabinowitz =head1 SEE ALSO L, L, L, L, L =cut __END__ # $Log: Core.pm,v $ # Revision 1.10 2006/07/06 18:00:52 joshr # bump to version 0.10, comment and documentation changes # # Revision 1.9 2006/06/17 17:11:10 joshr # MANY changes to add headers, INDEXMETANAMES and INDEXPROPERTIES output; # rewrote pek's code. # # Revision 1.8 2006/06/04 16:59:52 joshr # removed code in prep of pek rewrite # #