Skip to content

Commit

Permalink
Add a command-line argument to set visible PDF image
Browse files Browse the repository at this point in the history
Fix issue tesseract-ocr#210.

This adds an optional command-line argument to set the image which
will be used when generating a PDF image.

This addresses a niche case where the user wishes to use an optimized
image for OCR but maintain the visual appearance of the original image
when generating a PDF.

Signed-off-by: David Hendricks <david.hendricks@gmail.com>
  • Loading branch information
dhendrix authored and David Hendricks committed Jan 19, 2020
1 parent a6871a8 commit 66bd756
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 1 deletion.
32 changes: 32 additions & 0 deletions api/baseapi.cpp
Expand Up @@ -122,6 +122,8 @@ TessBaseAPI::TessBaseAPI()
page_res_(NULL),
input_file_(NULL),
input_image_(NULL),
visible_pdf_image_file_(NULL),
visible_pdf_image_(NULL),
output_file_(NULL),
datapath_(NULL),
language_(NULL),
Expand Down Expand Up @@ -205,6 +207,14 @@ void TessBaseAPI::SetInputName(const char* name) {
*input_file_ = name;
}

void TessBaseAPI::SetVisiblePdfImageFilename(const char* name) {
if (visible_pdf_image_file_ == NULL)
visible_pdf_image_file_ = new STRING(name);
else
*visible_pdf_image_file_ = name;
}


/** Set the name of the output files. Needed only for debugging. */
void TessBaseAPI::SetOutputName(const char* name) {
if (output_file_ == NULL)
Expand Down Expand Up @@ -948,16 +958,34 @@ void TessBaseAPI::SetInputImage(Pix *pix) {
input_image_ = pixCopy(NULL, pix);
}

void TessBaseAPI::SetVisiblePdfImage(Pix *pix) {
if (visible_pdf_image_)
pixDestroy(&visible_pdf_image_);
visible_pdf_image_ = NULL;
if (pix)
visible_pdf_image_ = pixCopy(NULL, pix);
}

Pix* TessBaseAPI::GetInputImage() {
return input_image_;
}

Pix* TessBaseAPI::GetVisiblePdfImage() {
return visible_pdf_image_;
}

const char * TessBaseAPI::GetInputName() {
if (input_file_)
return input_file_->c_str();
return NULL;
}

const char * TessBaseAPI::GetVisiblePdfImageFilename() {
if (visible_pdf_image_file_)
return visible_pdf_image_file_->c_str();
return NULL;
}

const char * TessBaseAPI::GetDatapath() {
return tesseract_->datadir.c_str();
}
Expand Down Expand Up @@ -2119,6 +2147,10 @@ void TessBaseAPI::End() {
pixDestroy(&input_image_);
input_image_ = NULL;
}
if (visible_pdf_image_ != NULL) {
pixDestroy(&visible_pdf_image_);
visible_pdf_image_ = NULL;
}
if (output_file_ != NULL) {
delete output_file_;
output_file_ = NULL;
Expand Down
6 changes: 6 additions & 0 deletions api/baseapi.h
Expand Up @@ -146,6 +146,10 @@ class TESS_API TessBaseAPI {
Pix* GetInputImage();
int GetSourceYResolution();
const char* GetDatapath();
void SetVisiblePdfImageFilename(const char* name);
const char *GetVisiblePdfImageFilename();
void SetVisiblePdfImage(Pix *pix);
Pix* GetVisiblePdfImage();

/** Set the name of the bonus output files. Needed only for debugging. */
void SetOutputName(const char* name);
Expand Down Expand Up @@ -871,6 +875,8 @@ class TESS_API TessBaseAPI {
PAGE_RES* page_res_; ///< The page-level data.
STRING* input_file_; ///< Name used by training code.
Pix* input_image_; ///< Image used for searchable PDF
STRING* visible_pdf_image_file_;
Pix* visible_pdf_image_; ///< Image used in output PDF
STRING* output_file_; ///< Name used by debug code.
STRING* datapath_; ///< Current location of tessdata.
STRING* language_; ///< Last initialized language.
Expand Down
14 changes: 13 additions & 1 deletion api/pdfrenderer.cpp
Expand Up @@ -841,9 +841,19 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
size_t n;
char buf[kBasicBufSize];
Pix *pix = api->GetInputImage();
char *filename = (char *)api->GetInputName();
int ppi = api->GetSourceYResolution();
Pix *pix;
int destroy_pix = 0;

if (api->GetVisiblePdfImageFilename()) {
pix = pixRead(api->GetVisiblePdfImageFilename());
api->SetVisiblePdfImage(pix);
destroy_pix = 1;
} else {
pix = api->GetInputImage();
}

if (!pix || ppi <= 0)
return false;
double width = pixGetWidth(pix) * 72.0 / ppi;
Expand Down Expand Up @@ -915,6 +925,8 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
AppendData(pdf_object, objsize);
AppendPDFObjectDIY(objsize);
delete[] pdf_object;
if (destroy_pix)
pixDestroy(&pix);
return true;
}

Expand Down
9 changes: 9 additions & 0 deletions api/tesseractmain.cpp
Expand Up @@ -220,6 +220,7 @@ void ParseArgs(const int argc, char** argv,
const char** image,
const char** outputbase,
const char** datapath,
const char **visible_pdf_image_file,
bool* list_langs,
bool* print_parameters,
GenericVector<STRING>* vars_vec,
Expand Down Expand Up @@ -277,6 +278,9 @@ void ParseArgs(const int argc, char** argv,
} else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
// handled properly after api init
++i;
} else if (strcmp(argv[i], "--visible-pdf-image") == 0 && i + 1 < argc) {
*visible_pdf_image_file = argv[i + 1];
++i;
} else if (*image == NULL) {
*image = argv[i];
} else if (*outputbase == NULL) {
Expand Down Expand Up @@ -363,6 +367,7 @@ int main(int argc, char **argv) {
const char* image = NULL;
const char* outputbase = NULL;
const char* datapath = NULL;
const char *visible_pdf_image_file = NULL;
bool list_langs = false;
bool print_parameters = false;
GenericVector<STRING> vars_vec, vars_values;
Expand All @@ -376,6 +381,7 @@ int main(int argc, char **argv) {

ParseArgs(argc, argv,
&lang, &image, &outputbase, &datapath,
&visible_pdf_image_file,
&list_langs, &print_parameters,
&vars_vec, &vars_values, &arg_i, &pagesegmode);

Expand Down Expand Up @@ -414,6 +420,9 @@ int main(int argc, char **argv) {

FixPageSegMode(&api, pagesegmode);

if (visible_pdf_image_file)
api.SetVisiblePdfImageFilename(visible_pdf_image_file);

if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
int ret_val = 0;

Expand Down

0 comments on commit 66bd756

Please sign in to comment.