Edit file File name : TermInfosWriter.pm Content :package Plucene::Index::TermInfosWriter; =head1 NAME Plucene::Index::TermInfosWriter - write to the term infos file =head1 SYNOPSIS my $writer = Plucene::Index::TermInfosWriter->new( $dir_name, $segment, $field_infos); $writer->add(Plucene::Index::Term $term, Plucene::Index::TermInfo $term_info); $writer->write_term(Plucene::Index::Term $term); =head1 DESCRIPTION This will allow for the writing and adding to a term infos file for a particular segment. It also writes the term infos index. =head1 METHODS =cut use strict; use warnings; use constant INDEX_INTERVAL => 128; use Carp qw(confess carp); use Plucene::Store::OutputStream; use Plucene::Index::Term; use Plucene::Index::TermInfo; =head2 new my $writer = Plucene::Index::TermInfosWriter->new( $dir_name, $segment, $field_infos); This will create a new Plucene::Index::TermInfosWriter object. =cut sub new { my ($class, $d, $segment, $fis, $is_i) = @_; my $self = bless { field_infos => $fis, is_index => $is_i, size => 0, last_term => Plucene::Index::Term->new({ field => "", text => "" }), last_ti => Plucene::Index::TermInfo->new, last_index_ptr => 0, output => Plucene::Store::OutputStream->new( "$d/$segment.ti" . ($is_i ? "i" : "s") ), }, $class; confess("No field_infos!") unless $self->{field_infos}; $self->{output}->write_int(0); # Will be filled in when DESTROYed if (!$is_i) { $self->{other} = $class->new($d, $segment, $fis, 1); $self->{other}->{other} = $self; # My enemy's enemy is my friend } return $self; } =head2 break_ref This will break a circular reference. =cut # Damned circular references. sub break_ref { undef shift->{other} } =head2 add $writer->add(Plucene::Index::Term $term, Plucene::Index::TermInfo $term_info); This will add the term and term info to the term infos file. =cut sub add { my ($self, $term, $ti) = @_; no warnings 'uninitialized'; carp sprintf "Can't add out-of-order term %s lt %s (%s lt %s)", $term->text, $self->{last_term}->text, $term->field, $self->{last_term}->{field} if !$self->{is_index} && $term->lt($self->{last_term}); carp "Frequency pointer out of order" if $ti->freq_pointer < $self->{last_ti}->freq_pointer; carp "Proximity pointer out of order" if $ti->prox_pointer < $self->{last_ti}->prox_pointer; $self->{other}->add($self->{last_term}, $self->{last_ti}) if !$self->{is_index} and (($self->{size} % INDEX_INTERVAL) == 0); $self->write_term($term); $self->{output}->write_vint($ti->doc_freq); $self->{output} ->write_vlong($ti->freq_pointer - $self->{last_ti}->freq_pointer); $self->{output} ->write_vlong($ti->prox_pointer - $self->{last_ti}->prox_pointer); if ($self->{is_index}) { # I bet Tony will think about subclassing # at this point $self->{output}->write_vlong( $self->{other}->{output}->tell - $self->{last_index_pointer}); $self->{last_index_pointer} = $self->{other}->{output}->tell; } $self->{last_ti} = $ti->clone; $self->{size}++; } =head2 write_term $writer->write_term(Plucene::Index::Term $term); This will write the term to the term infos file. =cut sub write_term { my ($self, $term) = @_; my $text = $term->text || ""; no warnings 'uninitialized'; # Find longest common prefix ($text ^ $self->{last_term}->text) =~ /^(\0*)/; my $start = length $1; $self->{output}->write_vint($start); $self->{output}->write_string(substr($text, $start)); $self->{output} ->write_vint($self->{field_infos}->field_number($term->field)); $self->{last_term} = $term; } sub DESTROY { my $self = shift; $self->{output}->seek(0, 0); $self->{output}->write_int($self->{size}); } 1; Save