|
|
|
@ -1,11 +1,12 @@
|
|
|
|
|
# Template file for 'tesseract-ocr'
|
|
|
|
|
pkgname=tesseract-ocr
|
|
|
|
|
version=4.1.0
|
|
|
|
|
revision=2
|
|
|
|
|
version=4.1.1
|
|
|
|
|
revision=1
|
|
|
|
|
_tessdataver=4.0.0
|
|
|
|
|
wrksrc="tesseract-${version}"
|
|
|
|
|
build_style=gnu-configure
|
|
|
|
|
configure_args="LIBLEPT_HEADERSDIR=${XBPS_CROSS_BASE}/usr/include $(vopt_enable openmp)"
|
|
|
|
|
make_build_args="all training"
|
|
|
|
|
hostmakedepends="automake libtool pkg-config leptonica libxslt asciidoc"
|
|
|
|
|
makedepends="cairo-devel pango-devel leptonica-devel $(vopt_if openmp libgomp-devel) icu-devel"
|
|
|
|
|
short_desc="Tesseract Open Source OCR engine"
|
|
|
|
@ -15,7 +16,7 @@ homepage="https://github.com/tesseract-ocr/tesseract"
|
|
|
|
|
distfiles="
|
|
|
|
|
https://github.com/tesseract-ocr/tesseract/archive/${version}.tar.gz>${pkgname}-${version}.tar.gz
|
|
|
|
|
https://github.com/tesseract-ocr/tessdata/archive/${_tessdataver}.tar.gz>tessdata-${_tessdataver}.tar.gz"
|
|
|
|
|
checksum="5c5ed5f1a76888dc57a83704f24ae02f8319849f5c4cf19d254296978a1a1961
|
|
|
|
|
checksum="2a66ff0d8595bff8f04032165e6c936389b1e5727c3ce5a27b3e059d218db1cb
|
|
|
|
|
38c637d3a1763f6c3d32e8f1d979f045668676ec5feb8ee1869ee77cedd31b08"
|
|
|
|
|
|
|
|
|
|
build_options="openmp"
|
|
|
|
@ -24,12 +25,21 @@ desc_option_openmp="Enable Open MP (gomp)"
|
|
|
|
|
|
|
|
|
|
# Create a package for one specific language $1
|
|
|
|
|
pkg_lang() {
|
|
|
|
|
local f lang=$1
|
|
|
|
|
vmkdir usr/share/tessdata
|
|
|
|
|
local f script lang=$1
|
|
|
|
|
case "$1" in
|
|
|
|
|
script-*)
|
|
|
|
|
script=/script
|
|
|
|
|
lang=${1#script-}
|
|
|
|
|
;;
|
|
|
|
|
*) script=
|
|
|
|
|
lang=$1
|
|
|
|
|
;;
|
|
|
|
|
esac
|
|
|
|
|
vmkdir usr/share/tessdata${script}
|
|
|
|
|
for f in $(find ${wrksrc}/tessdata -name "${lang}.*" \
|
|
|
|
|
-o -name "${lang}_frak.*" \
|
|
|
|
|
-o -name "${lang}_vert.*" ); do
|
|
|
|
|
vinstall $f 644 usr/share/tessdata
|
|
|
|
|
-o -name "${lang}_frak.*" \
|
|
|
|
|
-o -name "${lang}_vert.*" ); do
|
|
|
|
|
vinstall $f 644 usr/share/tessdata${script}
|
|
|
|
|
rm $f
|
|
|
|
|
done
|
|
|
|
|
}
|
|
|
|
@ -41,9 +51,6 @@ post_extract() {
|
|
|
|
|
pre_configure() {
|
|
|
|
|
NOCONFIGURE=1 ./autogen.sh
|
|
|
|
|
}
|
|
|
|
|
post_build() {
|
|
|
|
|
make ${makejobs} training
|
|
|
|
|
}
|
|
|
|
|
do_check() {
|
|
|
|
|
: # submodule not in tarball
|
|
|
|
|
}
|
|
|
|
@ -125,7 +132,13 @@ tesseract-ocr-all_package() {
|
|
|
|
|
ita_old jav jpn kan kat kat_old kaz khm kir kor kur kur_ara lao lat lav lit ltz mal mar \
|
|
|
|
|
mkd mlt mon mri msa mya nep nld nor oci ori pan pol por que pus ron rus san sin slk slv \
|
|
|
|
|
snd spa spa_old sqi srp srp_latn sun swa swe syr tam tat tel tgk tgl tha tir ton tur \
|
|
|
|
|
uig ukr urd uzb uzb_cyrl vie yor yid; do
|
|
|
|
|
uig ukr urd uzb uzb_cyrl vie yid yor \
|
|
|
|
|
script-Arabic script-Armenian script-Bengali script-Canadian_Aboriginal script-Cherokee \
|
|
|
|
|
script-Cyrillic script-Devanagari script-Ethiopic script-Fraktur script-Georgian \
|
|
|
|
|
script-Greek script-Gujarati script-Gurmukhi script-HanS script-HanT script-Hangul \
|
|
|
|
|
script-Hebrew script-Japanese script-Kannada script-Khmer script-Lao script-Latin \
|
|
|
|
|
script-Malayalam script-Myanmar script-Oriya script-Sinhala script-Syriac script-Tamil \
|
|
|
|
|
script-Telugu script-Thaana script-Thai script-Tibetan script-Vietnamese; do
|
|
|
|
|
depends+=" tesseract-ocr-${lang}>=${version}_${revision}"
|
|
|
|
|
done
|
|
|
|
|
}
|
|
|
|
@ -1097,3 +1110,267 @@ tesseract-ocr-yor_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Arabic_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Arabic script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Armenian_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Armenian script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Bengali_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Bengali script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Canadian_Aboriginal_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Canadian Aboriginal script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Cherokee_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Cherokee script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Cyrillic_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Cyrillic script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Devanagari_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Devanagari script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Ethiopic_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Ethiopic script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Fraktur_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Fraktur script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Georgian_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Georgian script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Greek_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Greek script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Gujarati_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Gujarati script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Gurmukhi_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Gurmukhi script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-HanS_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - HanS script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-HanT_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - HanT script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Hangul_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Hangul script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Hebrew_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Hebrew script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Japanese_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Japanese script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Kannada_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Kannada script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Khmer_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Khmer script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Lao_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Lao script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Latin_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Latin script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Malayalam_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Malayalam script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Myanmar_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Myanmar script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Oriya_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Oriya script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Sinhala_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Sinhala script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Syriac_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Syriac script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Tamil_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Tamil script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Telugu_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Telugu script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Thaana_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Thaana script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Thai_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Thai script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Tibetan_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Tibetan script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-script-Vietnamese_package() {
|
|
|
|
|
archs=noarch
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Vietnamese script data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|