Skip to content

Commit

Permalink
PDF Renderer: allow caller to specify an alternate image or resolution.
Browse files Browse the repository at this point in the history
Add pdf renderer tests.
Install pdf font in cmake tool chain.

resolves tesseract-ocr#210
resolves tesseract-ocr#3798
  • Loading branch information
phymbert committed Dec 18, 2023
1 parent ea0b245 commit 5f1e7d5
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 3 deletions.
7 changes: 7 additions & 0 deletions CMakeLists.txt
Expand Up @@ -100,6 +100,7 @@ option(DISABLE_TIFF "Disable build with libtiff (if available)" OFF)
option(DISABLE_ARCHIVE "Disable build with libarchive (if available)" OFF)
option(DISABLE_CURL "Disable build with libcurl (if available)" OFF)
option(INSTALL_CONFIGS "Install tesseract configs" ON)
option(INSTALL_PDF_TTF "Install pdf font file" ON)

if(NOT ${CMAKE_VERSION} VERSION_LESS "3.15.0")
if(WIN32 AND MSVC)
Expand Down Expand Up @@ -573,6 +574,8 @@ endif(ENABLE_OPENCL)
message(STATUS "Use system ICU Library [USE_SYSTEM_ICU]: ${USE_SYSTEM_ICU}")
message(
STATUS "Install tesseract configs [INSTALL_CONFIGS]: ${INSTALL_CONFIGS}")
message(
STATUS "Install tesseract pdf font [INSTALL_PDF_TTF]: ${INSTALL_PDF_TTF}")
message(STATUS "--------------------------------------------------------")
message(STATUS)

Expand Down Expand Up @@ -984,6 +987,10 @@ if(INSTALL_CONFIGS)
install(FILES ${TESSERACT_TESSCONFIGS}
DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/tessconfigs)
endif()
if (INSTALL_PDF_TTF)
install(FILES tessdata/pdf.ttf
DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata)
endif ()

# ##############################################################################
# uninstall target
Expand Down
6 changes: 6 additions & 0 deletions Makefile.am
Expand Up @@ -1269,6 +1269,7 @@ check_PROGRAMS += paragraphs_test
if !DISABLED_LEGACY_ENGINE
check_PROGRAMS += params_model_test
endif # !DISABLED_LEGACY_ENGINE
check_PROGRAMS += pdfrenderer_test
check_PROGRAMS += progress_test
check_PROGRAMS += qrsequence_test
check_PROGRAMS += recodebeam_test
Expand Down Expand Up @@ -1497,6 +1498,11 @@ progress_test_CPPFLAGS = $(unittest_CPPFLAGS)
progress_test_LDFLAGS = $(OPENCL_LDFLAGS) $(LEPTONICA_LIBS)
progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)

pdfrenderer_test_SOURCES = unittest/pdfrenderer_test.cc
pdfrenderer_test_CPPFLAGS = $(unittest_CPPFLAGS)
pdfrenderer_test_LDFLAGS = $(LEPTONICA_LIBS)
pdfrenderer_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)

qrsequence_test_SOURCES = unittest/qrsequence_test.cc
qrsequence_test_CPPFLAGS = $(unittest_CPPFLAGS)
qrsequence_test_LDADD = $(TESS_LIBS)
Expand Down
31 changes: 31 additions & 0 deletions include/tesseract/renderer.h
Expand Up @@ -106,6 +106,22 @@ class TESS_API TessResultRenderer {
return imagenum_;
}

/**
* Allow the caller to choose an alternate image to render with the extracted text.
* It must be called after BeginDocument and before AddImage.
*/
void SetRenderingImage(Pix *rendering_image) {
rendering_image_ = rendering_image;
}

/**
* Allow the caller to specify the target rendering resolution.
* If not set, rendering_dpi api params will be used.
*/
void SetRenderingResolution(int rendering_dpi) {
rendering_dpi_ = rendering_dpi;
}

protected:
/**
* Called by concrete classes.
Expand Down Expand Up @@ -139,12 +155,27 @@ class TESS_API TessResultRenderer {
// This method will grow the output buffer if needed.
void AppendData(const char *s, int len);

// Renderers can call this to get the actual image to render with extracted text.
// This method returns:
// - the rendering image set by the caller or
// - the input image scaled to the rendering_dpi field if defined or
// - the input image from the api otherwise
Pix* GetRenderingImage(TessBaseAPI *api);

// Resolution of the rendering image either set manually by the caller or with the rendering_dpi api parameter.
int GetRenderingResolution(TessBaseAPI *api);

// Reset rendering image and dpi to previous state and destroy scaled image
void ResetRenderingState(Pix *rendering_image_prev, int rendering_dpi_prev);

private:
TessResultRenderer *next_; // Can link multiple renderers together
FILE *fout_; // output file pointer
const char *file_extension_; // standard extension for generated output
std::string title_; // title of document being rendered
int imagenum_; // index of last image added
Pix *rendering_image_; // Image to render with the extracted text
int rendering_dpi_; // Resolution of the rendering_image
bool happy_; // I get grumpy when the disk fills up, etc.
};

Expand Down
13 changes: 10 additions & 3 deletions src/api/pdfrenderer.cpp
Expand Up @@ -319,7 +319,12 @@ static bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) {
}

char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double height) {
double ppi = api->GetSourceYResolution();
double input_image_ppi = api->GetSourceYResolution();
double ppi = GetRenderingResolution(api);
double scale = 1;
if (input_image_ppi > 0) {
scale = ppi / input_image_ppi;
}

// These initial conditions are all arbitrary and will be overwritten
double old_x = 0.0, old_y = 0.0;
Expand Down Expand Up @@ -369,6 +374,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
int x1, y1, x2, y2;
res_it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
x1 *= scale; y1 *= scale; x2 *= scale; y2 *= scale;
ClipBaseline(ppi, x1, y1, x2, y2, &line_x1, &line_y1, &line_x2, &line_y2);
}

Expand Down Expand Up @@ -403,6 +409,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
{
int word_x1, word_y1, word_x2, word_y2;
res_it->Baseline(RIL_WORD, &word_x1, &word_y1, &word_x2, &word_y2);
word_x1 *= scale; word_y1 *= scale; word_x2 *= scale; word_y2 *= scale;
GetWordBaseline(writing_direction, ppi, height, word_x1, word_y1, word_x2, word_y2, line_x1,
line_y1, line_x2, line_y2, &x, &y, &word_length);
}
Expand Down Expand Up @@ -809,9 +816,9 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, const char *filename, long int obj
}

bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) {
Pix *pix = api->GetInputImage();
Pix *pix = GetRenderingImage(api);
const char *filename = api->GetInputName();
int ppi = api->GetSourceYResolution();
int ppi = GetRenderingResolution(api);
if (!pix || ppi <= 0) {
return false;
}
Expand Down
61 changes: 61 additions & 0 deletions src/api/renderer.cpp
Expand Up @@ -18,12 +18,14 @@
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include <allheaders.h>
#include <tesseract/baseapi.h>
#include <tesseract/renderer.h>
#include <cstring>
#include <memory> // std::unique_ptr
#include <string> // std::string
#include "serialis.h" // Serialize
#include "tprintf.h"

namespace tesseract {

Expand All @@ -36,6 +38,8 @@ TessResultRenderer::TessResultRenderer(const char *outputbase, const char *exten
, file_extension_(extension)
, title_("")
, imagenum_(-1)
, rendering_image_(nullptr)
, rendering_dpi_(0)
, happy_(true) {
if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
std::string outfile = std::string(outputbase) + "." + extension;
Expand Down Expand Up @@ -90,13 +94,70 @@ bool TessResultRenderer::AddImage(TessBaseAPI *api) {
return false;
}
++imagenum_;
Pix *rendering_image_prev = rendering_image_;
int rendering_dpi_prev = rendering_dpi_;
bool ok = AddImageHandler(api);
ResetRenderingState(rendering_image_prev, rendering_dpi_prev);
if (next_) {
ok = next_->AddImage(api) && ok;
}
return ok;
}

void TessResultRenderer::ResetRenderingState(Pix* rendering_image_prev, int rendering_dpi_prev){
if (rendering_image_ != rendering_image_prev) {
pixDestroy(&rendering_image_);
rendering_image_ = rendering_image_prev;
}
if (rendering_dpi_ != rendering_dpi_prev) {
rendering_dpi_ = rendering_dpi_prev;
}
}

Pix* TessResultRenderer::GetRenderingImage(TessBaseAPI *api) {
if (!rendering_image_) {
Pix *source_image = api->GetInputImage();
int source_dpi = api->GetSourceYResolution();
if (!source_image || source_dpi <= 0) {
happy_ = false;
return nullptr;
}

int rendering_dpi = GetRenderingResolution(api);
if (rendering_dpi != source_dpi) {
float scale = (float)rendering_dpi / (float)source_dpi;

rendering_image_ = pixScale(source_image, scale, scale);
} else {
return source_image;
}
}
return rendering_image_;
}

int TessResultRenderer::GetRenderingResolution(tesseract::TessBaseAPI *api) {
if (rendering_dpi_) {
return rendering_dpi_;
}
int source_dpi = api->GetSourceYResolution();
int rendering_dpi;
if (api->GetIntVariable("rendering_dpi", &rendering_dpi) &&
rendering_dpi > 0 && rendering_dpi != source_dpi) {
if (rendering_dpi < kMinCredibleResolution ||
rendering_dpi > kMaxCredibleResolution) {
#if !defined(NDEBUG)
tprintf(
"Warning: User defined rendering dpi %d is outside of expected range "
"(%d - %d)!\n",
rendering_dpi, kMinCredibleResolution, kMaxCredibleResolution);
#endif
}
rendering_dpi_ = rendering_dpi;
return rendering_dpi_;
}
return source_dpi;
}

bool TessResultRenderer::EndDocument() {
if (!happy_) {
return false;
Expand Down
1 change: 1 addition & 0 deletions src/ccmain/tesseractclass.cpp
Expand Up @@ -349,6 +349,7 @@ Tesseract::Tesseract()
, BOOL_MEMBER(textonly_pdf, false, "Create PDF with only one invisible text layer",
this->params())
, INT_MEMBER(jpg_quality, 85, "Set JPEG quality level", this->params())
, INT_MEMBER(rendering_dpi, 0, "Scale input image resolution for rendering", this->params())
, INT_MEMBER(user_defined_dpi, 0, "Specify DPI for input image", this->params())
, INT_MEMBER(min_characters_to_try, 50, "Specify minimum characters to try during OSD",
this->params())
Expand Down
1 change: 1 addition & 0 deletions src/ccmain/tesseractclass.h
Expand Up @@ -903,6 +903,7 @@ class TESS_API Tesseract : public Wordrec {
BOOL_VAR_H(tessedit_create_pdf);
BOOL_VAR_H(textonly_pdf);
INT_VAR_H(jpg_quality);
INT_VAR_H(rendering_dpi);
INT_VAR_H(user_defined_dpi);
INT_VAR_H(min_characters_to_try);
STRING_VAR_H(unrecognised_char);
Expand Down

0 comments on commit 5f1e7d5

Please sign in to comment.