void-packages/srcpkgs/ccextractor/patches/fix-ocr.patch

107 lines
2.9 KiB
Diff

--- a/src/lib_ccx/hardsubx.c
+++ b/src/lib_ccx/hardsubx.c
@@ -221,7 +221,7 @@
char *pars_values = strdup("/dev/null");
char *tessdata_path = NULL;
- char *lang = options->ocrlang;
+ char *lang = (char *)options->ocrlang;
if (!lang)
lang = "eng"; // English is default language
@@ -245,7 +245,7 @@
int ret = -1;
- if (!strncmp("4.", TessVersion(), 2))
+ if (!strncmp("4.", TessVersion(), 2) || !strncmp("5.", TessVersion(), 2))
{
char tess_path[1024];
if (ccx_options.ocr_oem < 0)
--- a/src/lib_ccx/ocr.c
+++ b/src/lib_ccx/ocr.c
@@ -97,36 +97,22 @@
char *probe_tessdata_location(const char *lang)
{
int ret = 0;
- char *tessdata_dir_path = getenv("TESSDATA_PREFIX");
- ret = search_language_pack(tessdata_dir_path, lang);
- if (!ret)
- return tessdata_dir_path;
-
- tessdata_dir_path = "./";
- ret = search_language_pack(tessdata_dir_path, lang);
- if (!ret)
- return tessdata_dir_path;
-
- tessdata_dir_path = "/usr/share/";
- ret = search_language_pack(tessdata_dir_path, lang);
- if (!ret)
- return tessdata_dir_path;
-
- tessdata_dir_path = "/usr/local/share/";
- ret = search_language_pack(tessdata_dir_path, lang);
- if (!ret)
- return tessdata_dir_path;
-
- tessdata_dir_path = "/usr/share/tesseract-ocr/";
- ret = search_language_pack(tessdata_dir_path, lang);
- if (!ret)
- return tessdata_dir_path;
-
- tessdata_dir_path = "/usr/share/tesseract-ocr/4.00/";
- ret = search_language_pack(tessdata_dir_path, lang);
- if (!ret)
- return tessdata_dir_path;
+ const char *paths[] = {
+ getenv("TESSDATA_PREFIX"),
+ "./",
+ "/usr/share/",
+ "/usr/local/share/",
+ "/usr/share/tesseract-ocr/",
+ "/usr/share/tesseract-ocr/4.00/",
+ "/usr/share/tesseract-ocr/5/",
+ "/usr/share/tesseract/"};
+
+ for (int i = 0; i < sizeof(paths) / sizeof(paths[0]); i++)
+ {
+ if (!search_language_pack(paths[i], lang))
+ return (char *)paths[i];
+ }
return NULL;
}
@@ -174,7 +160,7 @@
char *pars_values = strdup("tess.log");
ctx->api = TessBaseAPICreate();
- if (!strncmp("4.", TessVersion(), 2))
+ if (!strncmp("4.", TessVersion(), 2) || !strncmp("5.", TessVersion(), 2))
{
char tess_path[1024];
snprintf(tess_path, 1024, "%s%s%s", tessdata_path, "/", "tessdata");
@@ -331,6 +317,11 @@
}
BOX *crop_points = ignore_alpha_at_edge(copy->alpha, copy->data, w, h, color_pix, &color_pix_out);
+
+ l_int32 x, y, _w, _h;
+
+ boxGetGeometry(crop_points, &x, &y, &_w, &_h);
+
// Converting image to grayscale for OCR to avoid issues with transparency
cpix_gs = pixConvertRGBToGray(cpix, 0.0, 0.0, 0.0);
@@ -426,8 +417,8 @@
{
for (int j = x1; j <= x2; j++)
{
- if (copy->data[(crop_points->y + i) * w + (crop_points->x + j)] != firstpixel)
- histogram[copy->data[(crop_points->y + i) * w + (crop_points->x + j)]]++;
+ if (copy->data[(y + i) * w + (x + j)] != firstpixel)
+ histogram[copy->data[(y + i) * w + (x + j)]]++;
}
}
/* sorted in increasing order of intensity */