From 7f7ff14e139597406fd0c4e02980c07573822542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20W=C3=B3jcik?= Date: Mon, 3 Dec 2018 21:46:48 +0100 Subject: [PATCH] tesseract-ocr: update to 4.0.0. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes: #5380 [via git-merge-pr] Signed-off-by: Jürgen Buchmüller --- common/shlibs | 2 +- srcpkgs/tesseract-ocr-bre | 1 + srcpkgs/tesseract-ocr-cos | 1 + srcpkgs/tesseract-ocr-div | 1 + srcpkgs/tesseract-ocr-fao | 1 + srcpkgs/tesseract-ocr-fil | 1 + srcpkgs/tesseract-ocr-fry | 1 + srcpkgs/tesseract-ocr-gla | 1 + srcpkgs/tesseract-ocr-hye | 1 + srcpkgs/tesseract-ocr-kur_ara | 1 + srcpkgs/tesseract-ocr-ltz | 1 + srcpkgs/tesseract-ocr-mon | 1 + srcpkgs/tesseract-ocr-mri | 1 + srcpkgs/tesseract-ocr-oci | 1 + srcpkgs/tesseract-ocr-que | 1 + srcpkgs/tesseract-ocr-snd | 1 + srcpkgs/tesseract-ocr-sun | 1 + srcpkgs/tesseract-ocr-tat | 1 + srcpkgs/tesseract-ocr-ton | 1 + srcpkgs/tesseract-ocr-yor | 1 + .../patches/svutil-sys_select_h.patch | 12 -- srcpkgs/tesseract-ocr/template | 195 ++++++++++++++++-- 22 files changed, 194 insertions(+), 34 deletions(-) create mode 120000 srcpkgs/tesseract-ocr-bre create mode 120000 srcpkgs/tesseract-ocr-cos create mode 120000 srcpkgs/tesseract-ocr-div create mode 120000 srcpkgs/tesseract-ocr-fao create mode 120000 srcpkgs/tesseract-ocr-fil create mode 120000 srcpkgs/tesseract-ocr-fry create mode 120000 srcpkgs/tesseract-ocr-gla create mode 120000 srcpkgs/tesseract-ocr-hye create mode 120000 srcpkgs/tesseract-ocr-kur_ara create mode 120000 srcpkgs/tesseract-ocr-ltz create mode 120000 srcpkgs/tesseract-ocr-mon create mode 120000 srcpkgs/tesseract-ocr-mri create mode 120000 srcpkgs/tesseract-ocr-oci create mode 120000 srcpkgs/tesseract-ocr-que create mode 120000 srcpkgs/tesseract-ocr-snd create mode 120000 srcpkgs/tesseract-ocr-sun create mode 120000 srcpkgs/tesseract-ocr-tat create mode 120000 srcpkgs/tesseract-ocr-ton create mode 120000 srcpkgs/tesseract-ocr-yor delete mode 100644 srcpkgs/tesseract-ocr/patches/svutil-sys_select_h.patch diff --git a/common/shlibs b/common/shlibs index bc3f36653ae..b310d9bb7f3 100644 --- a/common/shlibs +++ b/common/shlibs @@ -2077,7 +2077,7 @@ libhttp_parser.so.2.8 http-parser-2.8.0_1 libmaa.so.4 libmaa-1.4.2_1 libcodeblocks.so.0 codeblocks-13.12_1 liblept.so.5 leptonica-1.73_1 -libtesseract.so.3 tesseract-ocr-3.02.02_1 +libtesseract.so.4 tesseract-ocr-4.0.0_1 libffmpegthumbnailer.so.4 ffmpegthumbnailer-2.0.10_1 libopenraw.so.7 libopenraw-0.1.0_1 libopenrawgnome.so.7 libopenraw-0.1.0_1 diff --git a/srcpkgs/tesseract-ocr-bre b/srcpkgs/tesseract-ocr-bre new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-bre @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-cos b/srcpkgs/tesseract-ocr-cos new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-cos @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-div b/srcpkgs/tesseract-ocr-div new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-div @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-fao b/srcpkgs/tesseract-ocr-fao new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-fao @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-fil b/srcpkgs/tesseract-ocr-fil new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-fil @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-fry b/srcpkgs/tesseract-ocr-fry new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-fry @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-gla b/srcpkgs/tesseract-ocr-gla new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-gla @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-hye b/srcpkgs/tesseract-ocr-hye new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-hye @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-kur_ara b/srcpkgs/tesseract-ocr-kur_ara new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-kur_ara @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-ltz b/srcpkgs/tesseract-ocr-ltz new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-ltz @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-mon b/srcpkgs/tesseract-ocr-mon new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-mon @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-mri b/srcpkgs/tesseract-ocr-mri new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-mri @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-oci b/srcpkgs/tesseract-ocr-oci new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-oci @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-que b/srcpkgs/tesseract-ocr-que new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-que @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-snd b/srcpkgs/tesseract-ocr-snd new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-snd @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-sun b/srcpkgs/tesseract-ocr-sun new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-sun @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-tat b/srcpkgs/tesseract-ocr-tat new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-tat @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-ton b/srcpkgs/tesseract-ocr-ton new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-ton @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr-yor b/srcpkgs/tesseract-ocr-yor new file mode 120000 index 00000000000..79bcf15f05b --- /dev/null +++ b/srcpkgs/tesseract-ocr-yor @@ -0,0 +1 @@ +tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr/patches/svutil-sys_select_h.patch b/srcpkgs/tesseract-ocr/patches/svutil-sys_select_h.patch deleted file mode 100644 index 2dceb83f7ca..00000000000 --- a/srcpkgs/tesseract-ocr/patches/svutil-sys_select_h.patch +++ /dev/null @@ -1,12 +0,0 @@ -Add include required to build with muslc libc - ---- viewer/svutil.cpp 2012-03-03 12:53:33.000000000 +0100 -+++ viewer/svutil.cpp 2015-05-28 17:51:46.917525843 +0200 -@@ -39,6 +39,7 @@ - #include - #include - #include -+#include - #ifdef __linux__ - #include - #endif diff --git a/srcpkgs/tesseract-ocr/template b/srcpkgs/tesseract-ocr/template index 4491b73bea7..76cc5c24485 100644 --- a/srcpkgs/tesseract-ocr/template +++ b/srcpkgs/tesseract-ocr/template @@ -1,12 +1,11 @@ # Template file for 'tesseract-ocr' pkgname=tesseract-ocr -version=3.05.02 -revision=3 -wrksrc=tesseract-${version} -_tessdata_ver=074c37215b01ab8cc47a0e06ff7356383883d775 +version=4.0.0 +revision=1 +wrksrc="tesseract-${version}" build_style=gnu-configure configure_args="LIBLEPT_HEADERSDIR=${XBPS_CROSS_BASE}/usr/include" -hostmakedepends="automake libtool pkg-config leptonica" +hostmakedepends="automake libtool pkg-config leptonica libxslt asciidoc" makedepends="cairo-devel pango-devel leptonica-devel icu-devel" short_desc="Tesseract Open Source OCR engine" maintainer="Jürgen Buchmüller " @@ -14,24 +13,25 @@ license="Apache-2.0" homepage="https://github.com/tesseract-ocr/tesseract" distfiles=" https://github.com/tesseract-ocr/tesseract/archive/${version}.tar.gz>${pkgname}-${version}.tar.gz - https://github.com/tesseract-ocr/tessdata/archive/${_tessdata_ver}.tar.gz>tessdata-${_tessdata_ver}.tar.gz" -checksum=" - 494d64ffa7069498a97b909a0e65a35a213989e0184f1ea15332933a90d43445 - e33dea2118f447848a76e0fa5d50d45a2b8630cccc6adeb8d58221a1d09d6007" + https://github.com/tesseract-ocr/tessdata/archive/${version}.tar.gz>tessdata-${version}.tar.gz" +checksum="a1f5422ca49a32e5f35c54dee5112b11b99928fc9f4ee6695cdc6768d69f61dd + 38c637d3a1763f6c3d32e8f1d979f045668676ec5feb8ee1869ee77cedd31b08" # Create a package for one specific language $1 pkg_lang() { local f lang=$1 vmkdir usr/share/tessdata - for f in $(find ${wrksrc}/tessdata -name "${lang}.*" -o -name "${lang}_frak.*"); do + for f in $(find ${wrksrc}/tessdata -name "${lang}.*" \ + -o -name "${lang}_frak.*" \ + -o -name "${lang}_vert.*" ); do vinstall $f 644 usr/share/tessdata rm $f done } post_extract() { - mv ${XBPS_BUILDDIR}/tessdata-${_tessdata_ver}/* ${wrksrc}/tessdata - rmdir ${XBPS_BUILDDIR}/tessdata-${_tessdata_ver} + mv ${XBPS_BUILDDIR}/tessdata-${version}/* ${wrksrc}/tessdata + rmdir ${XBPS_BUILDDIR}/tessdata-${version} } pre_configure() { NOCONFIGURE=1 ./autogen.sh @@ -39,6 +39,9 @@ pre_configure() { post_build() { make ${makejobs} training } +do_check() { + : # submodule not in tarball +} post_install() { local lang # Rename binary to avoid conflict with tesseract package @@ -46,8 +49,6 @@ post_install() { mv ${DESTDIR}/usr/share/man/man1/tesseract{,-ocr}.1 vdoc ChangeLog vdoc README.md - vdoc testing/eurotext.tif - vdoc testing/phototest.tif vlicense ${FILESDIR}/COPYING LICENSE-tessdata # Move the pseudo languges "equ" (math / equation detection) and # "osd" (orientation and script detection) to the main package @@ -113,13 +114,13 @@ tesseract-ocr-all_package() { conflicts="tesseract-ocr-basic>=0" short_desc+=" - all languages data" # All available languages - for lang in afr amh ara asm aze aze_cyrl bel ben bod bos bul cat ceb \ - ces chi_sim chi_tra chr cym dan deu dzo ell eng enm epo est eus \ - fas fin fra frk frm gle glg grc guj hat heb hin hrv hun iku ind isl ita \ - ita_old jav jpn kan kat kat_old kaz khm kir kor kur lao lat lav lit mal mar \ - mkd mlt msa mya nep nld nor ori pan pol por pus ron rus san sin slk slv \ - spa spa_old sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur \ - uig ukr urd uzb uzb_cyrl vie yid; do + for lang in afr amh ara asm aze aze_cyrl bel ben bod bos bre bul cat ceb \ + ces chi_sim chi_tra chr cos cym dan deu div dzo ell eng enm epo est eus fao \ + fas fil fin fra frk frm fry gla gle glg grc guj hat heb hin hrv hun hye iku ind isl ita \ + ita_old jav jpn kan kat kat_old kaz khm kir kor kur kur_ara lao lat lav lit ltz mal mar \ + mkd mlt mon mri msa mya nep nld nor oci ori pan pol por que pus ron rus san sin slk slv \ + snd spa spa_old sqi srp srp_latn sun swa swe syr tam tat tel tgk tgl tha tir ton tur \ + uig ukr urd uzb uzb_cyrl vie yor yid; do depends+=" tesseract-ocr-${lang}>=${version}_${revision}" done } @@ -203,6 +204,14 @@ tesseract-ocr-bos_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-bre_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Breton language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-bul_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -259,6 +268,14 @@ tesseract-ocr-chr_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-cos_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Corsican language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-cym_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -283,6 +300,14 @@ tesseract-ocr-deu_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-div_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Dhivehi language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-dzo_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -339,6 +364,14 @@ tesseract-ocr-eus_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-fao_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Faroese language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-fas_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -347,6 +380,14 @@ tesseract-ocr-fas_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-fil_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Filipino language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-fin_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -379,6 +420,22 @@ tesseract-ocr-frm_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-fry_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Frisian language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} +tesseract-ocr-gla_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Scottish Gaelic language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-gle_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -451,6 +508,14 @@ tesseract-ocr-hun_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-hye_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Armenian language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-iku_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -571,6 +636,14 @@ tesseract-ocr-kur_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-kur_ara_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Kurdish (Arabic) language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-lao_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -603,6 +676,14 @@ tesseract-ocr-lit_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-ltz_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Luxembourgish language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-mal_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -635,6 +716,22 @@ tesseract-ocr-mlt_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-mon_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Mongolian language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} +tesseract-ocr-mri_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Maori language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-msa_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -675,6 +772,14 @@ tesseract-ocr-nor_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-oci_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Occitan (post 1500) language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-ori_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -715,6 +820,14 @@ tesseract-ocr-pus_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-que_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Quechua language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-ron_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -763,6 +876,14 @@ tesseract-ocr-slv_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-snd_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Sindhi language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-spa_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -803,6 +924,14 @@ tesseract-ocr-srp_latn_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-sun_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Sundanese language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-swa_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -835,6 +964,14 @@ tesseract-ocr-tam_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-tat_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Tatar language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-tel_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -875,6 +1012,14 @@ tesseract-ocr-tir_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-ton_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Tonga language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-tur_package() { noarch=yes depends="${sourcepkg}>=${version}_${revision}" @@ -939,3 +1084,11 @@ tesseract-ocr-yid_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-yor_package() { + noarch=yes + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Yoruba language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +}