Tom Farrell wrote:
> We do a build at each step of the way, using a maxdocs of 20, to make sure
> it works. It does, and the collection looks and behaves well. The problem
> is that when we increase the number of docs in the build to anything over
> 22, the build fails with the error:
>
> "buildcol.pl> GAPLug: processing HASH47f5.dirdoc.xml
> buildcol.pl> WARNING: No plugin could process HASH47f5.dirdoc.xml
> buildcol.pl> Not a GLOB reference at C:Program
> Filesgsdl/perllib/gsprintf.pm line 61.
> buildcol.pl> Command failed."
>
> It doesn't matter which actual document is processed as number 23; the
> error always appears at that point.
>
> Anyone have any ideas - it's a bit frustrating.
Hi,
after some digging around, I think the problem is to do with greenstone
printing out the error message... it might be trying to print out the
"WARNING: No plugin could process ..." message to an uninitialised
filehandle, which is why the gsprintf module complains.
I've attached a modified plugin.pm that should take the place of the one
in <gsdldir>perllibplugin.pm - can you let us know if this fixes it or
not?
Incidentally, if an archive doc.xml can't be processed, it normally
means that a file has the wrong encoding and is no in valid utf-8...
John McPherson
<<attachment>> Type: text/plain
Filename: plugin.pm
###########################################################################
#
# plugin.pm -- functions to handle using plugins
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################
package plugin;
use strict; # to pick up typos and undeclared variables...
no strict 'refs'; # ...but allow filehandles to be variables and vice versa
require util;
use gsprintf;
# global variables
my $stats = {'num_processed' => 0,
'num_blocked' => 0,
'num_not_processed' => 0,
'num_not_recognised' => 0,
'num_archives' => 0
};
my ($verbosity, $outhandle, $failhandle, $globaloptions);
sub gsprintf
{
return &gsprintf::gsprintf(@_);
}
#globaloptions contains any options that should be passed to all plugins
sub load_plugins {
my ($plugin_list) = shift @_;
($verbosity, $outhandle, $failhandle, $globaloptions) = @_; # globals
my @plugin_objects = ();
$verbosity = 2 unless defined $verbosity;
$outhandle = 'STDERR' unless defined $outhandle;
$failhandle = 'STDERR' unless defined $failhandle;
map { $_ = ""$_""; } @$globaloptions;
my $globals = join (",", @$globaloptions);
foreach my $pluginoptions (@$plugin_list) {
my $pluginname = shift @$pluginoptions;
next unless defined $pluginname;
# find the plugin
my $colplugname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/plugins",
"${pluginname}.pm");
my $mainplugname = &util::filename_cat($ENV{'GSDLHOME'},"perllib/plugins",
"${pluginname}.pm");
if (-e $colplugname) { require $colplugname; }
elsif (-e $mainplugname) { require $mainplugname; }
else {
gsprintf($outhandle, "{plugin.could_not_find_plugin}
",
$pluginname);
die "
";
}
# create a plugin object
my ($plugobj);
map { $_ = ""$_""; } @$pluginoptions;
my $options = join (",", @$pluginoptions);
if ($globals) {
if (@$pluginoptions) {
$options .= ",";
}
$options .= "$globals";
}
$options =~ s/$/\$/g;
eval ("$plugobj = new $pluginname($options)");
die "$@" if $@;
# initialize plugin
$plugobj->init($verbosity, $outhandle, $failhandle);
# add this object to the list
push (@plugin_objects, $plugobj);
}
return @plugin_objects;
}
sub begin {
my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
}
sub read {
my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli, $aux) = @_;
$maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /d/;
$gli = 0 unless defined $gli;
my $rv = 0;
my $glifile = $file;
$glifile =~ s/^[/\]+//; # file sometimes starts with a / so get rid of it
# Announce to GLI that we are handling a file
print STDERR "<File n='$glifile'>
" if $gli;
# the .kill file is a handy (if not very elegant) way of aborting
# an import.pl or buildcol.pl process
if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
gsprintf($outhandle, "{plugin.kill_file}
");
die "
";
}
my $had_error = 0;
# pass this file by each of the plugins in turn until one
# is found which will process it
# read must return:
# undef - could not recognise
# -1 - tried but error
# 0 - blocked
# anything else for successful processing
foreach my $plugobj (@$pluginfo) {
$rv = $plugobj->read($pluginfo, $base_dir, $file,
$metadata, $processor, $maxdocs, $gli, $aux);
if (defined $rv) {
if ($rv == -1) {
# an error has occurred
$had_error = 1;
print STDERR "<ProcessingError n='$glifile'>
" if $gli;
} else {
return $rv;
}
} # else undefined - was not recognised by the plugin
}
if ($had_error) {
# was recognised but couldn't be processed
if ($verbosity >= 2) {
gsprintf($outhandle, "{plugin.no_plugin_could_process}
", $file);
}
# tell the GLI that it was not processed
print STDERR "<NonProcessedFile n='$glifile'>
" if $gli;
$file =~ s/.*?([^\/]+)$/$1/;
gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}
");
$stats->{'num_not_processed'} ++;
} else {
# was not recognised
if ($verbosity >= 2) {
gsprintf($outhandle, "{plugin.no_plugin_could_recognise}
",$file);
}
# tell the GLI that it was not processed
print STDERR "<NonRecognisedFile n='$glifile'>
" if $gli;
$file =~ s/.*?([^\/]+)$/$1/;
gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}
");
$stats->{'num_not_recognised'} ++;
}
return 0;
}
# write out some general stats that the plugins have compiled - note that
# the buildcol.pl process doesn't currently call this process so the stats
# are only output after import.pl -
sub write_stats {
my ($pluginfo, $statshandle, $faillog, $gli) = @_;
$gli = 0 unless defined $gli;
foreach my $plugobj (@$pluginfo) {
$plugobj->compile_stats($stats);
}
my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
$stats->{'num_not_processed'} + $stats->{'num_not_recognised'};
print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>
" if $gli;
if ($total == 1) {
gsprintf($statshandle, "* {plugin.one_considered}
");
} else {
gsprintf($statshandle, "* {plugin.n_considered}
", $total);
}
if ($stats->{'num_archives'}) {
if ($stats->{'num_archives'} == 1) {
gsprintf($statshandle, " ({plugin.including_archive})
");
}
else {
gsprintf($statshandle, " ({plugin.including_archives})
",
$stats->{'num_archives'});
}
}
if ($stats->{'num_processed'} == 1) {
gsprintf($statshandle, "* {plugin.one_included}
");
} else {
gsprintf($statshandle, "* {plugin.n_included}
", $stats->{'num_processed'});
}
if ($stats->{'num_not_recognised'}) {
if ($stats->{'num_not_recognised'} == 1) {
gsprintf($statshandle, "* {plugin.one_unrecognised}
");
} else {
gsprintf($statshandle, "* {plugin.n_unrecognised}
",
$stats->{'num_not_recognised'});
}
}
if ($stats->{'num_not_processed'}) {
if ($stats->{'num_not_processed'} == 1) {
gsprintf($statshandle, "* {plugin.one_rejected}
");
} else {
gsprintf($statshandle, "* {plugin.n_rejected}
",
$stats->{'num_not_processed'});
}
}
if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
gsprintf($statshandle, " {plugin.see_faillog}
", $faillog);
}
}
sub end {
my ($pluginfo, $processor) = @_;
map { $_->end($processor); } @$pluginfo;
}
1; |