View file File name : Simple.pm Content : package WWW::Search::Simple; use strict; use warnings; =head1 NAME WWW::Search::Simple - class for searching any web site =head1 SYNOPSIS require WWW::Search; $search = new WWW::Search('Simple'); =head1 DESCRIPTION This class is a specialization of WWW::Search for simple web based search indices. It extracts all links from a given page. This class exports no public interface; all interaction should be done through WWW::Search objects. Note that this module will probably get a lot of false hits. =head1 AUTHOR C<WWW::Search::Simple> is written by Paul Lindner, <lindner@itu.int> =head1 COPYRIGHT Copyright (c) 1997,98 by the United Nations Administrative Committee on Coordination (ACC) All rights reserved. =cut use base 'WWW::Search'; use Carp (); use HTML::TreeBuilder; use WWW::SearchResult; my $debug = 0; sub _native_setup_search { my ($self, $native_query, $native_opt) = @_; my ($native_url); my ($default_native_url) = "http://www.itu.int/cgi-bin/SFgate?application=itu&database=local//usr/local/wais/WWW/www-pages&listenv=table&httppath=/usr/local/www-data/&httpprefix=/&tie=and&maxhits=%n&text=%s"; if (defined($native_opt)) { #print "Got " . join(' ', keys(%$native_opt)) . "\n"; # Process options.. # Substitute query terms for %s... if ($self->{'search_url'} && $native_opt->{'search_args'}) { $native_url = $native_opt->{'search_url'} . "?" . $native_opt->{'search_args'}; } # if } # if $native_url = $default_native_url if (!$native_url); $native_url =~ s/%s/$native_query/g; # Substitute search terms... $self->user_agent(); $self->{_next_to_retrieve} = 0; $self->{_base_url} = $self->{_next_url} = $native_url; } # _native_setup_search sub _native_retrieve_some { my ($self) = @_; my ($hit) = (); my ($hits_found) = 0; # fast exit if already done return undef if (!defined($self->{_next_url})); # get some print "GET " . $self->{_next_url} . "\n" if ($debug); my($response) = $self->http_request($self->{search_method}, $self->{_next_url}); $self->{response} = $response; if (!$response->is_success) { print "Some problem\n" if ($debug); return undef; } my $score = 800; my $results = $response->content(); my($h) = new HTML::TreeBuilder; $h->parse($results); for (@{ $h->extract_links(qw(a)) }) { my($link, $linkelem) = @$_; my($linkobj) = new URI::URL $link, $self->{_next_url}; print "Fixing $link\n" if ($debug); my($hit) = new WWW::SearchResult; $hit->add_url($linkobj->abs->as_string()); $hit->title(join(' ',@{$linkelem->content})); $hit->score($score); $hit->normalized_score($score); if ($hit->title !~ /HASH\(0x/) { $hits_found++; push(@{$self->{cache}}, $hit); } # if $score = int ($score * .95); } # for $self->approximate_result_count($hits_found); $self->{_next_url} = undef; return($hits_found); } # _native_retrieve_some 1;