# $Id: Index.pm 53 2015-07-14 23:14:34Z stro $ package CPAN::SQLite::Index; use strict; use warnings; our $VERSION = '0.211'; use English qw/-no_match_vars/; use CPAN::SQLite::Info; use CPAN::SQLite::State; use CPAN::SQLite::Populate; use CPAN::SQLite::DBI qw($tables); use File::Spec::Functions qw(catfile); use File::Basename; use File::Path; use HTTP::Tiny; unless ($ENV{CPAN_SQLITE_NO_LOG_FILES}) { $ENV{CPAN_SQLITE_DEBUG} = 1; } our ($oldout); my $log_file = 'cpan_sqlite_log.' . time; # This is usually already defined in real life, but tests need it to be set $CPAN::FrontEnd ||= "CPAN::Shell"; sub new { my ($class, %args) = @_; if ($args{setup} and $args{reindex}) { die "Reindexing must be done on an exisiting database"; } my $self = {index => undef, state => undef, %args}; return bless $self, $class; } sub index { my $self = shift; my $setup = $self->{'setup'}; if ($setup) { my $db_name = catfile($self->{'db_dir'}, $self->{db_name}); if (-f $db_name) { $CPAN::FrontEnd->myprint("Removing existing $db_name ... "); if (unlink $db_name) { $CPAN::FrontEnd->myprint("Done.\n"); } else { $CPAN::FrontEnd->mywarn("Failed: $!\n"); } } } my $log = catfile($self->{'log_dir'}, $log_file); unless ($ENV{'CPAN_SQLITE_NO_LOG_FILES'}) { $oldout = error_fh($log); } my $log_cleanup = $ENV{'CPAN_SQLITE_LOG_FILES_CLEANUP'}; $log_cleanup = 30 unless defined $log_cleanup; if ($log_cleanup and $log_cleanup =~ /^\d+$/) { if (opendir(my $DIR, $self->{'log_dir'})) { my @files = grep { /cpan_sqlite_log\./ } readdir $DIR; closedir $DIR; @files = grep { -C $_ > $log_cleanup } map { catfile($self->{'log_dir'}, $_) } @files; if (@files) { $CPAN::FrontEnd->myprint('Cleaning old log files ... '); unlink @files; $CPAN::FrontEnd->myprint("Done.\n"); } } } if ($self->{'update_indices'}) { $CPAN::FrontEnd->myprint('Fetching index files ... '); if ($self->fetch_cpan_indices()) { $CPAN::FrontEnd->myprint("Done.\n"); } else { $CPAN::FrontEnd->mywarn("Failed\n"); return; } } $CPAN::FrontEnd->myprint('Gathering information from index files ... '); if ($self->fetch_info()) { $CPAN::FrontEnd->myprint("Done.\n"); } else { $CPAN::FrontEnd->mywarn("Failed\n"); return; } unless ($setup) { $CPAN::FrontEnd->myprint('Obtaining current state of database ... '); if ($self->state()) { $CPAN::FrontEnd->myprint("Done.\n"); } else { $CPAN::FrontEnd->mywarn("Failed\n"); return; } } $CPAN::FrontEnd->myprint('Populating database tables ... '); if ($self->populate()) { $CPAN::FrontEnd->myprint("Done.\n"); } else { $CPAN::FrontEnd->mywarn("Failed\n"); return; } return 1; } sub fetch_cpan_indices { my $self = shift; my $CPAN = $self->{CPAN}; my $indices = { '01mailrc.txt.gz' => 'authors', '02packages.details.txt.gz' => 'modules', '03modlist.data.gz' => 'modules', }; foreach my $index (keys %$indices) { my $file = catfile($CPAN, $indices->{$index}, $index); next if (-e $file and -M $file < 1); my $dir = dirname($file); unless (-d $dir) { mkpath($dir, 0, oct(755)) or die "Cannot mkpath $dir: $!"; } my @urllist = @{$self->{urllist}}; foreach my $cpan(@urllist) { my $from = join '/', ($cpan, $indices->{$index}, $index); if (my $response = HTTP::Tiny->new->get($from)) { if ($response->{'success'}) { if (open(my $FILE, '>', $file)) { binmode $FILE; print $FILE $response->{'content'}; if (close($FILE)) { next; } } } } } unless (-f $file) { $CPAN::FrontEnd->mywarn("Cannot retrieve '$file'"); return; } } return 1; } sub fetch_info { my $self = shift; my %wanted = map {$_ => $self->{$_}} qw(CPAN ignore keep_source_where); my $info = CPAN::SQLite::Info->new(%wanted); $info->fetch_info() or return; my @tables = qw(dists mods auths info); my $index; foreach my $table(@tables) { my $class = __PACKAGE__ . '::' . $table; my $this = {info => $info->{$table}}; $index->{$table} = bless $this, $class; } $self->{index} = $index; return 1; } sub state { my $self = shift; my %wanted = map {$_ => $self->{$_}} qw(db_name index setup reindex db_dir); my $state = CPAN::SQLite::State->new(%wanted); $state->state() or return; $self->{state} = $state; return 1; } sub populate { my $self = shift; my %wanted = map {$_ => $self->{$_}} qw(db_name index setup state db_dir); my $db = CPAN::SQLite::Populate->new(%wanted); $db->populate() or return; return 1; } sub error_fh { my $file = shift; open(my $tmp, '>', $file) or die "Cannot open $file: $!"; close $tmp; # Should be open(my $oldout, '>&', \*STDOUT); but it fails on 5.6.2 open(my $oldout, '>&STDOUT'); open(STDOUT, '>', $file) or die "Cannot tie STDOUT to $file: $!"; select STDOUT; $| = 1; return $oldout; } sub DESTROY { unless ($ENV{CPAN_SQLITE_NO_LOG_FILES}) { close STDOUT; open(STDOUT, '>&', $oldout) if $oldout; } return; } 1; =head1 NAME CPAN::SQLite::Index - set up or update database tables. =head1 VERSION version 0.211 =head1 SYNOPSIS my $index = CPAN::SQLite::Index->new(setup => 1); $index->index(); =head1 DESCRIPTION This is the main module used to set up or update the database tables used to store information from the CPAN and ppm indices. The creation of the object my $index = CPAN::SQLite::Index->new(%args); accepts two possible arguments: =over 3 =item * setup =E 1 This (optional) argument specifies that the database is being set up. Any existing tables will be dropped. =item * reindex =E value This (optional) argument specifies distribution names that one would like to reindex in an existing database. These may be specified as either a scalar, for a single distribution, or as an array reference for a list of distributions. =back =head1 DETAILS Calling $index->index(); will start the indexing procedure. Various messages detailing the progress will written to I, which by default will be captured into a file F, where the extension is the C