69 lines
2.5 KiB
Diff
69 lines
2.5 KiB
Diff
--- lib/Gscan2pdf/Tesseract.pm.orig 2020-07-12 21:27:40.576078589 +0200
|
|
+++ lib/Gscan2pdf/Tesseract.pm 2020-07-12 21:27:58.302854959 +0200
|
|
@@ -22,25 +22,25 @@
|
|
return $installed if $setup;
|
|
|
|
( undef, my $exe ) =
|
|
- Gscan2pdf::Document::exec_command( [ 'which', 'tesseract' ] );
|
|
+ Gscan2pdf::Document::exec_command( [ 'which', 'tesseract-ocr' ] );
|
|
return if ( not defined $exe or $exe eq $EMPTY );
|
|
$installed = 1;
|
|
|
|
# Only support 3.02.01 or better, so that
|
|
# we can use --list-langs and not bother with tessdata
|
|
( undef, my $out, my $err ) =
|
|
- Gscan2pdf::Document::exec_command( [ 'tesseract', '-v' ] );
|
|
- if ( $err =~ /^tesseract[ ]([\d.]+)/xsm ) {
|
|
+ Gscan2pdf::Document::exec_command( [ 'tesseract-ocr', '-v' ] );
|
|
+ if ( $err =~ /^tesseract-ocr[ ]([\d.]+)/xsm ) {
|
|
$version = $1;
|
|
}
|
|
- elsif ( $out =~ /^tesseract[ ]([\d.]+)/xsm ) {
|
|
+ elsif ( $out =~ /^tesseract-ocr[ ]([\d.]+)/xsm ) {
|
|
$version = $1;
|
|
}
|
|
if ( not $version ) { return }
|
|
if ( $version !~ /^\d+[.]\d+$/xsm ) { $version = 'v' . $version }
|
|
$version = version->parse($version);
|
|
if ( $version > version->parse('v3.02.00') ) {
|
|
- $logger->info("Found tesseract version $version.");
|
|
+ $logger->info("Found tesseract-ocr version $version.");
|
|
$setup = 1;
|
|
return $installed;
|
|
}
|
|
@@ -101,14 +101,14 @@
|
|
|
|
my @codes;
|
|
my ( undef, $out, $err ) =
|
|
- Gscan2pdf::Document::exec_command( [ 'tesseract', '--list-langs' ] );
|
|
+ Gscan2pdf::Document::exec_command( [ 'tesseract-ocr', '--list-langs' ] );
|
|
@codes = split /\n/xsm, $err ? $err : $out;
|
|
if ( $codes[0] =~ /^List[ ]of[ ]available[ ]languages/xsm ) {
|
|
shift @codes;
|
|
}
|
|
|
|
for (@codes) {
|
|
- $logger->info("Found tesseract language $_");
|
|
+ $logger->info("Found tesseract-ocr language $_");
|
|
if ( defined $iso639{$_} ) {
|
|
$languages{$_} = $iso639{$_};
|
|
}
|
|
@@ -163,7 +163,7 @@
|
|
}
|
|
if ( $version > version->parse('v3.05.00') ) {
|
|
$cmd = [
|
|
- 'tesseract', $tif,
|
|
+ 'tesseract-ocr', $tif,
|
|
$path . $name, '--dpi', $options{dpi}, '-l',
|
|
$options{language}, '-c',
|
|
'tessedit_create_hocr=1',
|
|
@@ -172,7 +172,7 @@
|
|
}
|
|
else {
|
|
$cmd = [
|
|
- 'tesseract', $tif, $path . $name, '-l',
|
|
+ 'tesseract-ocr', $tif, $path . $name, '-l',
|
|
$options{language}, '-c', 'tessedit_create_hocr=1',
|
|
];
|
|
}
|