Skip to content

Commit

Permalink
PDF Renderer: allow to specify an alternate image or resolution progr…
Browse files Browse the repository at this point in the history
…ammatically.

Support new rendering_dpi api params.
Add pdf renderer tests.
Install pdf font in cmake tool chain.

resolves tesseract-ocr#210
resolves tesseract-ocr#3798
  • Loading branch information
phymbert committed Dec 18, 2023
1 parent ea0b245 commit ec35987
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 3 deletions.
7 changes: 7 additions & 0 deletions CMakeLists.txt
Expand Up @@ -100,6 +100,7 @@ option(DISABLE_TIFF "Disable build with libtiff (if available)" OFF)
option(DISABLE_ARCHIVE "Disable build with libarchive (if available)" OFF)
option(DISABLE_CURL "Disable build with libcurl (if available)" OFF)
option(INSTALL_CONFIGS "Install tesseract configs" ON)
option(INSTALL_PDF_TTF "Install pdf font file" ON)

if(NOT ${CMAKE_VERSION} VERSION_LESS "3.15.0")
if(WIN32 AND MSVC)
Expand Down Expand Up @@ -573,6 +574,8 @@ endif(ENABLE_OPENCL)
message(STATUS "Use system ICU Library [USE_SYSTEM_ICU]: ${USE_SYSTEM_ICU}")
message(
STATUS "Install tesseract configs [INSTALL_CONFIGS]: ${INSTALL_CONFIGS}")
message(
STATUS "Install tesseract pdf font [INSTALL_PDF_TTF]: ${INSTALL_PDF_TTF}")
message(STATUS "--------------------------------------------------------")
message(STATUS)

Expand Down Expand Up @@ -984,6 +987,10 @@ if(INSTALL_CONFIGS)
install(FILES ${TESSERACT_TESSCONFIGS}
DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/tessconfigs)
endif()
if (INSTALL_PDF_TTF)
install(FILES tessdata/pdf.ttf
DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata)
endif ()

# ##############################################################################
# uninstall target
Expand Down
5 changes: 5 additions & 0 deletions Makefile.am
Expand Up @@ -1269,6 +1269,7 @@ check_PROGRAMS += paragraphs_test
if !DISABLED_LEGACY_ENGINE
check_PROGRAMS += params_model_test
endif # !DISABLED_LEGACY_ENGINE
check_PROGRAMS += pdfrenderer_test
check_PROGRAMS += progress_test
check_PROGRAMS += qrsequence_test
check_PROGRAMS += recodebeam_test
Expand Down Expand Up @@ -1497,6 +1498,10 @@ progress_test_CPPFLAGS = $(unittest_CPPFLAGS)
progress_test_LDFLAGS = $(OPENCL_LDFLAGS) $(LEPTONICA_LIBS)
progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)

pdfrenderer_test_SOURCES = unittest/pdfrenderer_test.cc
pdfrenderer_test_CPPFLAGS = $(unittest_CPPFLAGS)
pdfrenderer_test_LDADD = $(TESS_LIBS) $(TRAINING_LIBS)

qrsequence_test_SOURCES = unittest/qrsequence_test.cc
qrsequence_test_CPPFLAGS = $(unittest_CPPFLAGS)
qrsequence_test_LDADD = $(TESS_LIBS)
Expand Down
34 changes: 34 additions & 0 deletions include/tesseract/renderer.h
Expand Up @@ -106,6 +106,23 @@ class TESS_API TessResultRenderer {
return imagenum_;
}

/**
* Specifies an alternate image to render with the extracted text.
* It must be called after BeginDocument and before AddImage.
*/
void SetRenderingImage(Pix *rendering_image) {
rendering_image_ = rendering_image;
}

/**
* Specifies the expected rendering resolution.
* If not set, rendering_dpi api params will be used, else the source image
* resolution.
*/
void SetRenderingResolution(int rendering_dpi) {
rendering_dpi_ = rendering_dpi;
}

protected:
/**
* Called by concrete classes.
Expand Down Expand Up @@ -139,12 +156,29 @@ class TESS_API TessResultRenderer {
// This method will grow the output buffer if needed.
void AppendData(const char *s, int len);

// Renderers can call this to get the actual image to render with extracted
// text. This method returns:
// - the rendering image set by the caller or
// - the input image scaled to the rendering_dpi field if defined or
// - the input image from the api otherwise
Pix *GetRenderingImage(TessBaseAPI *api);

// Resolution of the rendering image either set manually by the caller or with
// the rendering_dpi api parameter.
int GetRenderingResolution(TessBaseAPI *api);

// Reset rendering image and dpi to previous state. Destroy scaled rendered
// image if exists.
void ResetRenderingState(Pix *rendering_image_prev, int rendering_dpi_prev);

private:
TessResultRenderer *next_; // Can link multiple renderers together
FILE *fout_; // output file pointer
const char *file_extension_; // standard extension for generated output
std::string title_; // title of document being rendered
int imagenum_; // index of last image added
Pix *rendering_image_; // Image to render with the extracted text
int rendering_dpi_; // Resolution of the rendering_image
bool happy_; // I get grumpy when the disk fills up, etc.
};

Expand Down
13 changes: 10 additions & 3 deletions src/api/pdfrenderer.cpp
Expand Up @@ -319,7 +319,12 @@ static bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) {
}

char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double height) {
double ppi = api->GetSourceYResolution();
double input_image_ppi = api->GetSourceYResolution();
double ppi = GetRenderingResolution(api);
double scale = 1;
if (input_image_ppi > 0) {
scale = ppi / input_image_ppi;
}

// These initial conditions are all arbitrary and will be overwritten
double old_x = 0.0, old_y = 0.0;
Expand Down Expand Up @@ -369,6 +374,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
int x1, y1, x2, y2;
res_it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
x1 *= scale; y1 *= scale; x2 *= scale; y2 *= scale;
ClipBaseline(ppi, x1, y1, x2, y2, &line_x1, &line_y1, &line_x2, &line_y2);
}

Expand Down Expand Up @@ -403,6 +409,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
{
int word_x1, word_y1, word_x2, word_y2;
res_it->Baseline(RIL_WORD, &word_x1, &word_y1, &word_x2, &word_y2);
word_x1 *= scale; word_y1 *= scale; word_x2 *= scale; word_y2 *= scale;
GetWordBaseline(writing_direction, ppi, height, word_x1, word_y1, word_x2, word_y2, line_x1,
line_y1, line_x2, line_y2, &x, &y, &word_length);
}
Expand Down Expand Up @@ -809,9 +816,9 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, const char *filename, long int obj
}

bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) {
Pix *pix = api->GetInputImage();
Pix *pix = GetRenderingImage(api);
const char *filename = api->GetInputName();
int ppi = api->GetSourceYResolution();
int ppi = GetRenderingResolution(api);
if (!pix || ppi <= 0) {
return false;
}
Expand Down
62 changes: 62 additions & 0 deletions src/api/renderer.cpp
Expand Up @@ -18,12 +18,14 @@
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include <allheaders.h>
#include <tesseract/baseapi.h>
#include <tesseract/renderer.h>
#include <cstring>
#include <memory> // std::unique_ptr
#include <string> // std::string
#include "serialis.h" // Serialize
#include "tprintf.h"

namespace tesseract {

Expand All @@ -36,6 +38,8 @@ TessResultRenderer::TessResultRenderer(const char *outputbase, const char *exten
, file_extension_(extension)
, title_("")
, imagenum_(-1)
, rendering_image_(nullptr)
, rendering_dpi_(0)
, happy_(true) {
if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
std::string outfile = std::string(outputbase) + "." + extension;
Expand Down Expand Up @@ -90,13 +94,71 @@ bool TessResultRenderer::AddImage(TessBaseAPI *api) {
return false;
}
++imagenum_;
Pix *rendering_image_prev = rendering_image_;
int rendering_dpi_prev = rendering_dpi_;
bool ok = AddImageHandler(api);
ResetRenderingState(rendering_image_prev, rendering_dpi_prev);
if (next_) {
ok = next_->AddImage(api) && ok;
}
return ok;
}

void TessResultRenderer::ResetRenderingState(Pix *rendering_image_prev,
int rendering_dpi_prev) {
if (rendering_image_ != rendering_image_prev) {
pixDestroy(&rendering_image_);
rendering_image_ = rendering_image_prev;
}
if (rendering_dpi_ != rendering_dpi_prev) {
rendering_dpi_ = rendering_dpi_prev;
}
}

Pix *TessResultRenderer::GetRenderingImage(TessBaseAPI *api) {
if (!rendering_image_) {
Pix *source_image = api->GetInputImage();
int source_dpi = api->GetSourceYResolution();
if (!source_image || source_dpi <= 0) {
happy_ = false;
return nullptr;
}

int rendering_dpi = GetRenderingResolution(api);
if (rendering_dpi != source_dpi) {
float scale = (float)rendering_dpi / (float)source_dpi;

rendering_image_ = pixScale(source_image, scale, scale);
} else {
return source_image;
}
}
return rendering_image_;
}

int TessResultRenderer::GetRenderingResolution(tesseract::TessBaseAPI *api) {
if (rendering_dpi_) {
return rendering_dpi_;
}
int source_dpi = api->GetSourceYResolution();
int rendering_dpi;
if (api->GetIntVariable("rendering_dpi", &rendering_dpi) &&
rendering_dpi > 0 && rendering_dpi != source_dpi) {
if (rendering_dpi < kMinCredibleResolution ||
rendering_dpi > kMaxCredibleResolution) {
#if !defined(NDEBUG)
tprintf(
"Warning: User defined rendering dpi %d is outside of expected range "
"(%d - %d)!\n",
rendering_dpi, kMinCredibleResolution, kMaxCredibleResolution);
#endif
}
rendering_dpi_ = rendering_dpi;
return rendering_dpi_;
}
return source_dpi;
}

bool TessResultRenderer::EndDocument() {
if (!happy_) {
return false;
Expand Down
1 change: 1 addition & 0 deletions src/ccmain/tesseractclass.cpp
Expand Up @@ -349,6 +349,7 @@ Tesseract::Tesseract()
, BOOL_MEMBER(textonly_pdf, false, "Create PDF with only one invisible text layer",
this->params())
, INT_MEMBER(jpg_quality, 85, "Set JPEG quality level", this->params())
, INT_MEMBER(rendering_dpi, 0, "Scaled input image resolution before rendering", this->params())
, INT_MEMBER(user_defined_dpi, 0, "Specify DPI for input image", this->params())
, INT_MEMBER(min_characters_to_try, 50, "Specify minimum characters to try during OSD",
this->params())
Expand Down
1 change: 1 addition & 0 deletions src/ccmain/tesseractclass.h
Expand Up @@ -903,6 +903,7 @@ class TESS_API Tesseract : public Wordrec {
BOOL_VAR_H(tessedit_create_pdf);
BOOL_VAR_H(textonly_pdf);
INT_VAR_H(jpg_quality);
INT_VAR_H(rendering_dpi);
INT_VAR_H(user_defined_dpi);
INT_VAR_H(min_characters_to_try);
STRING_VAR_H(unrecognised_char);
Expand Down

0 comments on commit ec35987

Please sign in to comment.