|
|
|
@ -1,12 +1,11 @@
|
|
|
|
|
# Template file for 'tesseract-ocr'
|
|
|
|
|
pkgname=tesseract-ocr
|
|
|
|
|
version=3.05.02
|
|
|
|
|
revision=3
|
|
|
|
|
wrksrc=tesseract-${version}
|
|
|
|
|
_tessdata_ver=074c37215b01ab8cc47a0e06ff7356383883d775
|
|
|
|
|
version=4.0.0
|
|
|
|
|
revision=1
|
|
|
|
|
wrksrc="tesseract-${version}"
|
|
|
|
|
build_style=gnu-configure
|
|
|
|
|
configure_args="LIBLEPT_HEADERSDIR=${XBPS_CROSS_BASE}/usr/include"
|
|
|
|
|
hostmakedepends="automake libtool pkg-config leptonica"
|
|
|
|
|
hostmakedepends="automake libtool pkg-config leptonica libxslt asciidoc"
|
|
|
|
|
makedepends="cairo-devel pango-devel leptonica-devel icu-devel"
|
|
|
|
|
short_desc="Tesseract Open Source OCR engine"
|
|
|
|
|
maintainer="Jürgen Buchmüller <pullmoll@t-online.de>"
|
|
|
|
@ -14,24 +13,25 @@ license="Apache-2.0"
|
|
|
|
|
homepage="https://github.com/tesseract-ocr/tesseract"
|
|
|
|
|
distfiles="
|
|
|
|
|
https://github.com/tesseract-ocr/tesseract/archive/${version}.tar.gz>${pkgname}-${version}.tar.gz
|
|
|
|
|
https://github.com/tesseract-ocr/tessdata/archive/${_tessdata_ver}.tar.gz>tessdata-${_tessdata_ver}.tar.gz"
|
|
|
|
|
checksum="
|
|
|
|
|
494d64ffa7069498a97b909a0e65a35a213989e0184f1ea15332933a90d43445
|
|
|
|
|
e33dea2118f447848a76e0fa5d50d45a2b8630cccc6adeb8d58221a1d09d6007"
|
|
|
|
|
https://github.com/tesseract-ocr/tessdata/archive/${version}.tar.gz>tessdata-${version}.tar.gz"
|
|
|
|
|
checksum="a1f5422ca49a32e5f35c54dee5112b11b99928fc9f4ee6695cdc6768d69f61dd
|
|
|
|
|
38c637d3a1763f6c3d32e8f1d979f045668676ec5feb8ee1869ee77cedd31b08"
|
|
|
|
|
|
|
|
|
|
# Create a package for one specific language $1
|
|
|
|
|
pkg_lang() {
|
|
|
|
|
local f lang=$1
|
|
|
|
|
vmkdir usr/share/tessdata
|
|
|
|
|
for f in $(find ${wrksrc}/tessdata -name "${lang}.*" -o -name "${lang}_frak.*"); do
|
|
|
|
|
for f in $(find ${wrksrc}/tessdata -name "${lang}.*" \
|
|
|
|
|
-o -name "${lang}_frak.*" \
|
|
|
|
|
-o -name "${lang}_vert.*" ); do
|
|
|
|
|
vinstall $f 644 usr/share/tessdata
|
|
|
|
|
rm $f
|
|
|
|
|
done
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
post_extract() {
|
|
|
|
|
mv ${XBPS_BUILDDIR}/tessdata-${_tessdata_ver}/* ${wrksrc}/tessdata
|
|
|
|
|
rmdir ${XBPS_BUILDDIR}/tessdata-${_tessdata_ver}
|
|
|
|
|
mv ${XBPS_BUILDDIR}/tessdata-${version}/* ${wrksrc}/tessdata
|
|
|
|
|
rmdir ${XBPS_BUILDDIR}/tessdata-${version}
|
|
|
|
|
}
|
|
|
|
|
pre_configure() {
|
|
|
|
|
NOCONFIGURE=1 ./autogen.sh
|
|
|
|
@ -39,6 +39,9 @@ pre_configure() {
|
|
|
|
|
post_build() {
|
|
|
|
|
make ${makejobs} training
|
|
|
|
|
}
|
|
|
|
|
do_check() {
|
|
|
|
|
: # submodule not in tarball
|
|
|
|
|
}
|
|
|
|
|
post_install() {
|
|
|
|
|
local lang
|
|
|
|
|
# Rename binary to avoid conflict with tesseract package
|
|
|
|
@ -46,8 +49,6 @@ post_install() {
|
|
|
|
|
mv ${DESTDIR}/usr/share/man/man1/tesseract{,-ocr}.1
|
|
|
|
|
vdoc ChangeLog
|
|
|
|
|
vdoc README.md
|
|
|
|
|
vdoc testing/eurotext.tif
|
|
|
|
|
vdoc testing/phototest.tif
|
|
|
|
|
vlicense ${FILESDIR}/COPYING LICENSE-tessdata
|
|
|
|
|
# Move the pseudo languges "equ" (math / equation detection) and
|
|
|
|
|
# "osd" (orientation and script detection) to the main package
|
|
|
|
@ -113,13 +114,13 @@ tesseract-ocr-all_package() {
|
|
|
|
|
conflicts="tesseract-ocr-basic>=0"
|
|
|
|
|
short_desc+=" - all languages data"
|
|
|
|
|
# All available languages
|
|
|
|
|
for lang in afr amh ara asm aze aze_cyrl bel ben bod bos bul cat ceb \
|
|
|
|
|
ces chi_sim chi_tra chr cym dan deu dzo ell eng enm epo est eus \
|
|
|
|
|
fas fin fra frk frm gle glg grc guj hat heb hin hrv hun iku ind isl ita \
|
|
|
|
|
ita_old jav jpn kan kat kat_old kaz khm kir kor kur lao lat lav lit mal mar \
|
|
|
|
|
mkd mlt msa mya nep nld nor ori pan pol por pus ron rus san sin slk slv \
|
|
|
|
|
spa spa_old sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur \
|
|
|
|
|
uig ukr urd uzb uzb_cyrl vie yid; do
|
|
|
|
|
for lang in afr amh ara asm aze aze_cyrl bel ben bod bos bre bul cat ceb \
|
|
|
|
|
ces chi_sim chi_tra chr cos cym dan deu div dzo ell eng enm epo est eus fao \
|
|
|
|
|
fas fil fin fra frk frm fry gla gle glg grc guj hat heb hin hrv hun hye iku ind isl ita \
|
|
|
|
|
ita_old jav jpn kan kat kat_old kaz khm kir kor kur kur_ara lao lat lav lit ltz mal mar \
|
|
|
|
|
mkd mlt mon mri msa mya nep nld nor oci ori pan pol por que pus ron rus san sin slk slv \
|
|
|
|
|
snd spa spa_old sqi srp srp_latn sun swa swe syr tam tat tel tgk tgl tha tir ton tur \
|
|
|
|
|
uig ukr urd uzb uzb_cyrl vie yor yid; do
|
|
|
|
|
depends+=" tesseract-ocr-${lang}>=${version}_${revision}"
|
|
|
|
|
done
|
|
|
|
|
}
|
|
|
|
@ -203,6 +204,14 @@ tesseract-ocr-bos_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-bre_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Breton language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-bul_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -259,6 +268,14 @@ tesseract-ocr-chr_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-cos_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Corsican language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-cym_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -283,6 +300,14 @@ tesseract-ocr-deu_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-div_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Dhivehi language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-dzo_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -339,6 +364,14 @@ tesseract-ocr-eus_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-fao_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Faroese language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-fas_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -347,6 +380,14 @@ tesseract-ocr-fas_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-fil_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Filipino language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-fin_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -379,6 +420,22 @@ tesseract-ocr-frm_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-fry_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Frisian language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-gla_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Scottish Gaelic language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-gle_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -451,6 +508,14 @@ tesseract-ocr-hun_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-hye_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Armenian language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-iku_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -571,6 +636,14 @@ tesseract-ocr-kur_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-kur_ara_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Kurdish (Arabic) language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-lao_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -603,6 +676,14 @@ tesseract-ocr-lit_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-ltz_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Luxembourgish language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-mal_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -635,6 +716,22 @@ tesseract-ocr-mlt_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-mon_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Mongolian language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-mri_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Maori language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-msa_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -675,6 +772,14 @@ tesseract-ocr-nor_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-oci_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Occitan (post 1500) language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-ori_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -715,6 +820,14 @@ tesseract-ocr-pus_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-que_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Quechua language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-ron_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -763,6 +876,14 @@ tesseract-ocr-slv_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-snd_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Sindhi language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-spa_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -803,6 +924,14 @@ tesseract-ocr-srp_latn_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-sun_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Sundanese language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-swa_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -835,6 +964,14 @@ tesseract-ocr-tam_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-tat_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Tatar language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-tel_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -875,6 +1012,14 @@ tesseract-ocr-tir_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-ton_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Tonga language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-tur_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
@ -939,3 +1084,11 @@ tesseract-ocr-yid_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-yor_package() {
|
|
|
|
|
noarch=yes
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Yoruba language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|