View file

File name : Writer.pm

Content :

package Plucene::Index::Writer;

=head1 NAME 

Plucene::Index::Writer - write an index.

=head1 SYNOPSIS

	my $writer = Plucene::Index::Writer->new($path, $analyser, $create);

	$writer->add_document($doc);
	$writer->add_indexes(@dirs);

	$writer->optimize; # called before close
	
	my $doc_count = $writer->doc_count;

	my $mergefactor = $writer->mergefactor;

	$writer->set_mergefactor($value);

=head1 DESCRIPTION

This is the writer class.

If an index will not have more documents added for a while and optimal search
performance is desired, then the C<optimize> method should be called before the
index is closed.

=head1 METHODS

=cut

use strict;
use warnings;

use Carp qw/cluck croak/;
use Fcntl qw(O_EXCL O_CREAT O_WRONLY);
use File::Path qw(mkpath);
use List::Util qw(sum);
use File::Temp qw(tempdir);

use Plucene::Index::DocumentWriter;
use Plucene::Index::SegmentInfos;
use Plucene::Index::SegmentInfo;
use Plucene::Index::SegmentReader;
use Plucene::Index::SegmentMerger;
use Plucene::Utils;

use constant MAX_FIELD_LENGTH => 10_000;

our $max_merge_docs = ~0;

=head2 new

	my $writer = Plucene::Index::Writer->new($path, $analyser, $create);

This will create a new Plucene::Index::Writer object.
	
The third argument to the constructor determines whether a new index is
created, or whether an existing index is opened for the addition of new
documents.

=cut

sub new {
	my ($class, $path, $analyzer, $create) = @_;
	$create = 0 unless defined $create;
	if (!-d $path) {
		croak "Couldn't write into $path - it doesn't exist" unless $create;
		mkpath($path) or croak "Couldn't create $path - $!";
	}

	my $lock = "$path/write.lock";

	my $self = bless {
		directory => $path,
		analyzer  => $analyzer,
		lock      => $lock,       # There are many like it, but this one is mine
		segmentinfos  => new Plucene::Index::SegmentInfos(),
		tmp_directory => tempdir(CLEANUP => 1),
		mergefactor   => 10,
	}, $class;

	local *FH;
	sysopen FH, $lock, O_EXCL | O_CREAT | O_WRONLY
		or croak "Couldn't get lock";
	close *FH;

	do_locked {
		$create
			? $self->{segmentinfos}->write($path)
			: $self->{segmentinfos}->read($path);
		}
		"$path/commit.lock";

	return $self;
}

=head2 mergefactor / set_mergefactor

	my $mergefactor = $writer->mergefactor;

	$writer->set_mergefactor($value);

Get / set the mergefactor. It defaults to 5.

=cut

sub mergefactor { $_[0]->{mergefactor} }

sub set_mergefactor {
	$_[0]->{mergefactor} = $_[1] || $_[0]->mergefactor || 10;
}

sub DESTROY {
	my $self = shift;
	unlink $self->{lock} if $self->{lock};
	$self->_flush;
}

=head2 doc_count

	my $doc_count = $writer->doc_count;

=cut

sub doc_count { sum map $_->doc_count(), $_[0]->{segmentinfos}->segments }

=head2 add_document

	$writer->add_document($doc);

Adds a document to the index. After the document has been added, a merge takes
place if there are more than C<$Plucene::Index::Writer::mergefactor> segments
in the index. This defaults to 10, but can be set to whatever value is optimal 
for your application.   
	
=cut

sub add_document {
	my ($self, $doc) = @_;

	my $dw = Plucene::Index::DocumentWriter->new($self->{tmp_directory},
		$self->{analyzer}, MAX_FIELD_LENGTH);
	my $segname = $self->_new_segname;
	$dw->add_document($segname, $doc);

	#lock $self;
	$self->{segmentinfos}->add_element(
		Plucene::Index::SegmentInfo->new({
				name      => $segname,
				doc_count => 1,
				dir       => $self->{tmp_directory} }));
	$self->_maybe_merge_segments;
}

sub _new_segname {
	"_" . $_[0]->{segmentinfos}->{counter}++    # Urgh
}

sub _flush {
	my $self        = shift;
	my @segs        = $self->{segmentinfos}->segments;
	my $min_segment = $#segs;
	my $doc_count   = 0;
	while ($min_segment >= 0
		and $segs[$min_segment]->dir eq $self->{tmp_directory}) {
		$doc_count += $segs[$min_segment]->doc_count;
		$min_segment--;
	}
	if ( $min_segment < 0
		or ($doc_count + $segs[$min_segment]->doc_count > $self->mergefactor)
		or !($segs[-1]->dir eq $self->{tmp_directory})) {
		$min_segment++;
	}
	return if $min_segment > @segs;
	$self->_merge_segments($min_segment);
}

=head2 optimize

	$writer->optimize;

Merges all segments together into a single segment, optimizing an index
for search. This should be the last method called on an indexer, as it 
invalidates the writer object.

=cut

sub optimize {
	my $self = shift;
	my $segments;
	while (
		($segments = scalar $self->{segmentinfos}->segments) > 1
		or

		# If it's fragmented
		(
			$segments == 1 and    # or it's not fragmented
			(
				Plucene::Index::SegmentReader->has_deletions(    # but has deletions
					$self->{segmentinfos}->info(0))))
		) {
		my $minseg = $segments - $self->mergefactor;
		$self->_merge_segments($minseg < 0 ? 0 : $minseg);
	}
}

=head2 add_indexes

	$writer->add_indexes(@dirs);

Merges all segments from an array of indexes into this index.

This may be used to parallelize batch indexing.  A large document
collection can be broken into sub-collections.  Each sub-collection can be
indexed in parallel, on a different thread, process or machine.  The
complete index can then be created by merging sub-collection indexes
with this method.

After this completes, the index is optimized.

=cut

sub add_indexes {
	my ($self, @dirs) = @_;
	$self->optimize;
	for my $dir (@dirs) {
		my $sis = new Plucene::Index::SegmentInfos;
		$sis->read($dir);
		$self->{segmentinfos}->add_element($_) for $sis->segments;
	}
	$self->optimize;
}

# Incremental segment merger.
# Or even this code - SC
sub _maybe_merge_segments {
	my $self              = shift;
	my $target_merge_docs = $self->mergefactor;
	while ($target_merge_docs <= $max_merge_docs) {
		cluck("No segments defined!") unless $self->{segmentinfos};
		my $min_seg    = scalar $self->{segmentinfos}->segments;
		my $merge_docs = 0;
		while (--$min_seg >= 0) {
			my $si = $self->{segmentinfos}->info($min_seg);
			last if $si->doc_count >= $target_merge_docs;
			$merge_docs += $si->doc_count;
		}
		last unless $merge_docs >= $target_merge_docs;
		$self->_merge_segments($min_seg + 1);
		$target_merge_docs *= $self->mergefactor;
	}
}

# Pops segments off of segmentInfos stack down to minSegment, merges
# them, and pushes the merged index onto the top of the segmentInfos stack.
sub _merge_segments {
	my $self        = shift;
	my $min_segment = shift;
	my $mergedname  = $self->_new_segname;
	my $mergedcount = 0;
	my $merger      = Plucene::Index::SegmentMerger->new({
			dir     => $self->{directory},
			segment => $mergedname
		});
	my @to_delete;
	my @segments = $self->{segmentinfos}->segments;
	return if $#segments < $min_segment;

	for my $si (@segments[ $min_segment .. $#segments ]) {
		my $reader = Plucene::Index::SegmentReader->new($si);
		$merger->add($reader);
		push @to_delete, $reader
			if $reader->directory eq $self->{directory}
			or $reader->directory eq $self->{tmp_directory};
		$mergedcount += $si->doc_count;
	}
	$merger->merge;

	$self->{segmentinfos}->{segments} =    # This is a bit naughty
		[
		($self->{segmentinfos}->segments)[ 0 .. $min_segment - 1 ],
		Plucene::Index::SegmentInfo->new({
				name      => $mergedname,
				dir       => $self->{directory},
				doc_count => $mergedcount
			}) ];
	do_locked {
		$self->{segmentinfos}->write($self->{directory});
		$self->_delete_segments(@to_delete);
		}
		"$self->{directory}/commit.lock";

}

sub _delete_segments {
	my ($self, @to_delete) = @_;
	my @try_later = $self->_delete($self->_read_deletable_files);
	for my $reader (@to_delete) {
		for my $file ($reader->files) {
			push @try_later, $self->_delete("$reader->{directory}/$file");
		}
	}
	$self->_write_deletable_files(@try_later);
}

sub _delete {
	my ($self, @files) = @_;
	my @failed;
	for (@files) { unlink $_ or push @failed, $_ }
	return @failed;
}

sub _read_deletable_files {
	my $self = shift;
	return unless -e (my $dfile = "$self->{directory}/deletable");
	open my $fh, $dfile or die $!;
	chomp(my @files = <$fh>);
	return @files;
}

sub _write_deletable_files {
	my ($self, @files) = @_;
	my $dfile = "$self->{directory}/deletable";
	open my $fh, ">" . $dfile . ".new" or die $!;
	print $fh "$_\n" for @files;
	close($fh);
	rename $dfile . ".new", $dfile;
}

1;

DATA DRIVEN ECOMMERCE

Ready to improve your ecommerce business’ performance?

Get better lead generation, engagement and enhanced ROI with a more customer centered approach.

See our services

Take this quiz

Services

Our Services

Let us help boost your internet exposure, turn more visitors into clients, save your time and increase your revenue. We’ve got a range of products and services to help you reach your goal.

For more information, click the one which suits your needs.

Build Optimise Serve Systemise
Build

Build your ecommerce website

“A website is a window through which your business says hello to the world.”

Learn more

Optimise

Optimise your website and other online platforms

“Google only loves you when everyone else loves you first.” ~ Wendy Piersall

Learn more

Serve

Serve and engage your audience

Provide them with the best services and make them loyal fans of your products.

Learn more

Systemise

Apply a systemised approach

Systemise and set up your actionable metrics by creating a Growth Scorecard.

Learn more

Build your ecommerce website

“A website is a window through which your business says hello to the world.”

Learn more

Optimise your website and other online platforms

“Google only loves you when everyone else loves you first.” ~ Wendy Piersall

Learn more

Serve and engage your audience

Provide them with the best services and make them loyal fans of your products.

Learn more

Apply a systemised approach

Systemise and set up your actionable metrics by creating a Growth Scorecard.

Learn more

metaverse marketing

Strategy

Boost your online sales

Providing an immersive experience

As Web3 is gradually gaining a foothold on the web, we’ve also structured our marketing solutions and strategies with you in mind. You tell us what you want to achieve and we will run an analysis of your ecommerce business in order to give you pointers on what to improve on, in order to boost your sales.

Not only that, we also offer done for you services, geared towards relieving you of the burden of having to figure things out by yourself.

Learn More

Latest Posts

Testimonials

What People Are Saying

Eva and I met at personal development course and renmained in contact. Over the years, I was fascinated by the care, compassion and energy she put into everything she does. She’s always there for you supporting and giving ideas to help you to carry on in whatever you are engage with.
Even though is she quite technical, she is nurturing and doen’t zoom straight into the technology part of it until necessary. She helped me to clearly identify my customer’s avatar and product pathways. Her step-by-step advice was invaluable as we embarked on the technical implementation itself.

Johanna Tresierra
Educator, Spanish Mentor and Entrepreneur, SpanishwithJo Ltd
I was referred to Eva by a friend. After our discussion, I took the opportunity to work with her to grow my business online and increase my revenue especially since I did not consider myself to be very technical and wanted a very personalised service.
Previously, I mainly showcased my products at in person events and definitely had to pivot my business online due to lockdown-Covid-19. Since working with Eva, I have launched some of my products on Amazon and we are now working on a Q4 promotion plan to add a subscription model to my business. Eva offers a personalised service including a hand holding teaching/mentoring approach which I vitally need at this stage in my business.

Jacalyn Belgrave
Owner, Ndulge Skincare Products Ltd

Like what you’ve seen so far?

Let’s Talk!

Click here

Gel4y Mini Shell

View file

Ready to improve your ecommerce business’ performance?

Services

Our Services

Strategy

Boost your online sales

Providing an immersive experience

Latest Posts

Testimonials

What People Are Saying

Like what you’ve seen so far?

Let’s Talk!