View file File name : TermInfosReader.pm Content :package Plucene::Index::TermInfosReader; =head1 NAME Plucene::Index::TermInfosReader - read the term infos file =head1 SYNOPSIS my $reader = Plucene::Index::TermInfosReader->new( $dir_name, $segment, $fis); my Plucene::Index::TermInfo $term_info = $reader->get(Plucene::Index::Term $term); my Plucene::Index::SegmentTermEnum $enum = $reader->terms(Plucene::Index::Term $term); =head1 DESCRIPTION This reads a term infos file. =head1 METHODS =cut use strict; use warnings; use Memoize; use Carp qw/confess/; use Plucene::Index::SegmentTermEnum; use Plucene::Index::TermInfosWriter; use Plucene::Store::InputStream; =head2 new my $reader = Plucene::Index::TermInfosReader->new( $dir_name, $segment, $fis); This will create a new Plucene::Index::TermInfosReader object with the passed directory name, segment name and field infos. =cut sub new { my ($class, $dir, $seg, $fis) = @_; my $file = "$dir/$seg.tis"; confess("$file is already open!") unless -s $file; my $self = bless { directory => $dir, segment => $seg, field_infos => $fis, enum => Plucene::Index::SegmentTermEnum->new( Plucene::Store::InputStream->new($file), $fis, 0 ), }, $class; $self->{size} = $self->{enum}->size; $self->_read_index; return $self; } sub _read_index { my $self = shift; my $index_enum = Plucene::Index::SegmentTermEnum->new( Plucene::Store::InputStream->new( "$self->{directory}/$self->{segment}.tii"), $self->{field_infos}, 1 ); my $size = $index_enum->size; $self->{index_terms} = []; $self->{index_infos} = []; $self->{index_pointers} = []; for (my $i = 0 ; $index_enum->next ; $i++) { $self->{index_terms}->[$i] = $index_enum->term; # Need to clone here. $self->{index_infos}->[$i] = Plucene::Index::TermInfo->new({ %{ $index_enum->term_info } }); $self->{index_pointers}->[$i] = $index_enum->index_pointer; } } memoize('_get_index_offset'); sub _get_index_offset { my ($self, $term) = @_; my $lo = 0; my $hi = $#{ $self->{index_terms} }; while ($hi >= $lo) { my $mid = ($lo + $hi) >> 1; # Terms are comparable, hooray my $delta = $term->_cmp($self->{index_terms}->[$mid]); if ($delta < 0) { $hi = $mid - 1; } elsif ($delta > 0) { $lo = $mid + 1; } else { return $mid } } return $hi; } =head2 get my Plucene::Index::TermInfo $term_info = $reader->get(Plucene::Index::Term $term); =cut sub get { my ($self, $term) = @_; return unless $self->{size}; $self->_seek_enum($self->_get_index_offset($term)); return $self->_scan_enum($term); } sub _seek_enum { my ($self, $offset) = @_; $self->{enum}->seek( $self->{index_pointers}->[$offset], $offset * Plucene::Index::TermInfosWriter::INDEX_INTERVAL() - 1, $self->{index_terms}->[$offset], $self->{index_infos}->[$offset]); } sub _scan_enum { my ($self, $term) = @_; 1 while $term->gt($self->{enum}->term) && $self->{enum}->next; return $self->{enum}->term_info if $self->{enum}->term and $self->{enum}->term->eq($term); return; } =head2 get_int / get_position These are never called. =cut sub get_int { } sub get_position { } =head2 terms my Plucene::Index::SegmentTermEnum $enum = $reader->terms(Plucene::Index::Term $term); This will return the Plucene::Index::SegmentTermEnum for the passed-in Plucene::Index::Term. =cut sub terms { my ($self, $term) = @_; $term ? $self->get($term) : $self->_seek_enum(0); $self->{enum}->clone; } 1;