107 lines
2.9 KiB
Diff
107 lines
2.9 KiB
Diff
--- a/src/lib_ccx/hardsubx.c
|
|
+++ b/src/lib_ccx/hardsubx.c
|
|
@@ -221,7 +221,7 @@
|
|
char *pars_values = strdup("/dev/null");
|
|
char *tessdata_path = NULL;
|
|
|
|
- char *lang = options->ocrlang;
|
|
+ char *lang = (char *)options->ocrlang;
|
|
if (!lang)
|
|
lang = "eng"; // English is default language
|
|
|
|
@@ -245,7 +245,7 @@
|
|
|
|
int ret = -1;
|
|
|
|
- if (!strncmp("4.", TessVersion(), 2))
|
|
+ if (!strncmp("4.", TessVersion(), 2) || !strncmp("5.", TessVersion(), 2))
|
|
{
|
|
char tess_path[1024];
|
|
if (ccx_options.ocr_oem < 0)
|
|
--- a/src/lib_ccx/ocr.c
|
|
+++ b/src/lib_ccx/ocr.c
|
|
@@ -97,36 +97,22 @@
|
|
char *probe_tessdata_location(const char *lang)
|
|
{
|
|
int ret = 0;
|
|
- char *tessdata_dir_path = getenv("TESSDATA_PREFIX");
|
|
|
|
- ret = search_language_pack(tessdata_dir_path, lang);
|
|
- if (!ret)
|
|
- return tessdata_dir_path;
|
|
-
|
|
- tessdata_dir_path = "./";
|
|
- ret = search_language_pack(tessdata_dir_path, lang);
|
|
- if (!ret)
|
|
- return tessdata_dir_path;
|
|
-
|
|
- tessdata_dir_path = "/usr/share/";
|
|
- ret = search_language_pack(tessdata_dir_path, lang);
|
|
- if (!ret)
|
|
- return tessdata_dir_path;
|
|
-
|
|
- tessdata_dir_path = "/usr/local/share/";
|
|
- ret = search_language_pack(tessdata_dir_path, lang);
|
|
- if (!ret)
|
|
- return tessdata_dir_path;
|
|
-
|
|
- tessdata_dir_path = "/usr/share/tesseract-ocr/";
|
|
- ret = search_language_pack(tessdata_dir_path, lang);
|
|
- if (!ret)
|
|
- return tessdata_dir_path;
|
|
-
|
|
- tessdata_dir_path = "/usr/share/tesseract-ocr/4.00/";
|
|
- ret = search_language_pack(tessdata_dir_path, lang);
|
|
- if (!ret)
|
|
- return tessdata_dir_path;
|
|
+ const char *paths[] = {
|
|
+ getenv("TESSDATA_PREFIX"),
|
|
+ "./",
|
|
+ "/usr/share/",
|
|
+ "/usr/local/share/",
|
|
+ "/usr/share/tesseract-ocr/",
|
|
+ "/usr/share/tesseract-ocr/4.00/",
|
|
+ "/usr/share/tesseract-ocr/5/",
|
|
+ "/usr/share/tesseract/"};
|
|
+
|
|
+ for (int i = 0; i < sizeof(paths) / sizeof(paths[0]); i++)
|
|
+ {
|
|
+ if (!search_language_pack(paths[i], lang))
|
|
+ return (char *)paths[i];
|
|
+ }
|
|
|
|
return NULL;
|
|
}
|
|
@@ -174,7 +160,7 @@
|
|
char *pars_values = strdup("tess.log");
|
|
|
|
ctx->api = TessBaseAPICreate();
|
|
- if (!strncmp("4.", TessVersion(), 2))
|
|
+ if (!strncmp("4.", TessVersion(), 2) || !strncmp("5.", TessVersion(), 2))
|
|
{
|
|
char tess_path[1024];
|
|
snprintf(tess_path, 1024, "%s%s%s", tessdata_path, "/", "tessdata");
|
|
@@ -331,6 +317,11 @@
|
|
}
|
|
|
|
BOX *crop_points = ignore_alpha_at_edge(copy->alpha, copy->data, w, h, color_pix, &color_pix_out);
|
|
+
|
|
+ l_int32 x, y, _w, _h;
|
|
+
|
|
+ boxGetGeometry(crop_points, &x, &y, &_w, &_h);
|
|
+
|
|
// Converting image to grayscale for OCR to avoid issues with transparency
|
|
cpix_gs = pixConvertRGBToGray(cpix, 0.0, 0.0, 0.0);
|
|
|
|
@@ -426,8 +417,8 @@
|
|
{
|
|
for (int j = x1; j <= x2; j++)
|
|
{
|
|
- if (copy->data[(crop_points->y + i) * w + (crop_points->x + j)] != firstpixel)
|
|
- histogram[copy->data[(crop_points->y + i) * w + (crop_points->x + j)]]++;
|
|
+ if (copy->data[(y + i) * w + (x + j)] != firstpixel)
|
|
+ histogram[copy->data[(y + i) * w + (x + j)]]++;
|
|
}
|
|
}
|
|
/* sorted in increasing order of intensity */
|