Re: [greenstone-devel] build problem

From John R. McPherson
DateFri, 30 Jul 2004 11:56:13 +1200
Subject Re: [greenstone-devel] build problem
In-Reply-To (5-2-0-9-2-20040720153950-019db3e8-tofa-pobox-stanford-edu)
Tom Farrell wrote:

> We do a build at each step of the way, using a maxdocs of 20, to make sure
> it works. It does, and the collection looks and behaves well. The problem
> is that when we increase the number of docs in the build to anything over
> 22, the build fails with the error:
>
> "buildcol.pl> GAPLug: processing HASH47f5.dirdoc.xml
> buildcol.pl> WARNING: No plugin could process HASH47f5.dirdoc.xml
> buildcol.pl> Not a GLOB reference at C:Program
> Filesgsdl/perllib/gsprintf.pm line 61.
> buildcol.pl> Command failed."
>
> It doesn't matter which actual document is processed as number 23; the
> error always appears at that point.
>
> Anyone have any ideas - it's a bit frustrating.

Hi,
after some digging around, I think the problem is to do with greenstone
printing out the error message... it might be trying to print out the
"WARNING: No plugin could process ..." message to an uninitialised
filehandle, which is why the gsprintf module complains.

I've attached a modified plugin.pm that should take the place of the one
in <gsdldir>perllibplugin.pm - can you let us know if this fixes it or
not?

Incidentally, if an archive doc.xml can't be processed, it normally
means that a file has the wrong encoding and is no in valid utf-8...

John McPherson


<<attachment>>
Type: text/plain
Filename: plugin.pm

###########################################################################
#
# plugin.pm -- functions to handle using plugins
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package plugin;

use strict; # to pick up typos and undeclared variables...
no strict 'refs'; # ...but allow filehandles to be variables and vice versa

require util;
use gsprintf;

# global variables
my $stats = {'num_processed' => 0,
'num_blocked' => 0,
'num_not_processed' => 0,
'num_not_recognised' => 0,
'num_archives' => 0
};
my ($verbosity, $outhandle, $failhandle, $globaloptions);


sub gsprintf
{
return &gsprintf::gsprintf(@_);
}

#globaloptions contains any options that should be passed to all plugins
sub load_plugins {
my ($plugin_list) = shift @_;
($verbosity, $outhandle, $failhandle, $globaloptions) = @_; # globals
my @plugin_objects = ();

$verbosity = 2 unless defined $verbosity;
$outhandle = 'STDERR' unless defined $outhandle;
$failhandle = 'STDERR' unless defined $failhandle;

map { $_ = ""$_""; } @$globaloptions;
my $globals = join (",", @$globaloptions);

foreach my $pluginoptions (@$plugin_list) {
my $pluginname = shift @$pluginoptions;
next unless defined $pluginname;

# find the plugin
my $colplugname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/plugins",
"${pluginname}.pm");
my $mainplugname = &util::filename_cat($ENV{'GSDLHOME'},"perllib/plugins",
"${pluginname}.pm");
if (-e $colplugname) { require $colplugname; }
elsif (-e $mainplugname) { require $mainplugname; }
else {
gsprintf($outhandle, "{plugin.could_not_find_plugin} ",
$pluginname);
die " ";
}

# create a plugin object
my ($plugobj);
map { $_ = ""$_""; } @$pluginoptions;
my $options = join (",", @$pluginoptions);
if ($globals) {
if (@$pluginoptions) {
$options .= ",";
}
$options .= "$globals";
}
$options =~ s/$/\$/g;

eval ("$plugobj = new $pluginname($options)");
die "$@" if $@;

# initialize plugin
$plugobj->init($verbosity, $outhandle, $failhandle);

# add this object to the list
push (@plugin_objects, $plugobj);
}

return @plugin_objects;
}


sub begin {
my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;

map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
}

sub read {
my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli, $aux) = @_;

$maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /d/;
$gli = 0 unless defined $gli;

my $rv = 0;
my $glifile = $file;
$glifile =~ s/^[/\]+//; # file sometimes starts with a / so get rid of it
# Announce to GLI that we are handling a file
print STDERR "<File n='$glifile'> " if $gli;

# the .kill file is a handy (if not very elegant) way of aborting
# an import.pl or buildcol.pl process
if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
gsprintf($outhandle, "{plugin.kill_file} ");
die " ";
}

my $had_error = 0;
# pass this file by each of the plugins in turn until one
# is found which will process it
# read must return:
# undef - could not recognise
# -1 - tried but error
# 0 - blocked
# anything else for successful processing
foreach my $plugobj (@$pluginfo) {
$rv = $plugobj->read($pluginfo, $base_dir, $file,
$metadata, $processor, $maxdocs, $gli, $aux);
if (defined $rv) {
if ($rv == -1) {
# an error has occurred
$had_error = 1;
print STDERR "<ProcessingError n='$glifile'> " if $gli;
} else {
return $rv;
}
} # else undefined - was not recognised by the plugin
}

if ($had_error) {
# was recognised but couldn't be processed
if ($verbosity >= 2) {
gsprintf($outhandle, "{plugin.no_plugin_could_process} ", $file);
}
# tell the GLI that it was not processed
print STDERR "<NonProcessedFile n='$glifile'> " if $gli;

$file =~ s/.*?([^\/]+)$/$1/;
gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file} ");
$stats->{'num_not_processed'} ++;
} else {
# was not recognised
if ($verbosity >= 2) {
gsprintf($outhandle, "{plugin.no_plugin_could_recognise} ",$file);
}
# tell the GLI that it was not processed
print STDERR "<NonRecognisedFile n='$glifile'> " if $gli;

$file =~ s/.*?([^\/]+)$/$1/;
gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file} ");
$stats->{'num_not_recognised'} ++;
}

return 0;
}

# write out some general stats that the plugins have compiled - note that
# the buildcol.pl process doesn't currently call this process so the stats
# are only output after import.pl -
sub write_stats {
my ($pluginfo, $statshandle, $faillog, $gli) = @_;

$gli = 0 unless defined $gli;

foreach my $plugobj (@$pluginfo) {
$plugobj->compile_stats($stats);
}

my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
$stats->{'num_not_processed'} + $stats->{'num_not_recognised'};

print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'> " if $gli;

if ($total == 1) {
gsprintf($statshandle, "* {plugin.one_considered} ");
} else {
gsprintf($statshandle, "* {plugin.n_considered} ", $total);
}
if ($stats->{'num_archives'}) {
if ($stats->{'num_archives'} == 1) {
gsprintf($statshandle, " ({plugin.including_archive}) ");
}
else {
gsprintf($statshandle, " ({plugin.including_archives}) ",
$stats->{'num_archives'});
}
}
if ($stats->{'num_processed'} == 1) {
gsprintf($statshandle, "* {plugin.one_included} ");
} else {
gsprintf($statshandle, "* {plugin.n_included} ", $stats->{'num_processed'});
}
if ($stats->{'num_not_recognised'}) {
if ($stats->{'num_not_recognised'} == 1) {
gsprintf($statshandle, "* {plugin.one_unrecognised} ");
} else {
gsprintf($statshandle, "* {plugin.n_unrecognised} ",
$stats->{'num_not_recognised'});
}

}
if ($stats->{'num_not_processed'}) {
if ($stats->{'num_not_processed'} == 1) {
gsprintf($statshandle, "* {plugin.one_rejected} ");
} else {
gsprintf($statshandle, "* {plugin.n_rejected} ",
$stats->{'num_not_processed'});
}
}
if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
gsprintf($statshandle, " {plugin.see_faillog} ", $faillog);
}
}

sub end {
my ($pluginfo, $processor) = @_;
map { $_->end($processor); } @$pluginfo;
}

1;