Skip to content

Commit

Permalink
Fixed font name encoding issue: #194, #246
Browse files Browse the repository at this point in the history
  • Loading branch information
dothinking committed Jan 5, 2024
1 parent 769a220 commit 248a319
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
1 change: 1 addition & 0 deletions pdf2docx/font/Fonts.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def extract(cls, fitz_doc):
fonts = []
for xref in xrefs:
basename, ext, _, buffer = fitz_doc.extract_font(xref)
basename = bytes(ord(c) for c in basename).decode()
name = cls._normalized_font_name(basename)

try:
Expand Down
3 changes: 2 additions & 1 deletion pdf2docx/text/TextSpan.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def __init__(self, raw:dict=None):
# font metrics
# line_height is the standard single line height used in relative line spacing,
# while exact line spacing is used when line_height==-1 by default.
self.font = raw.get('font', '')
font_name = raw.get('font', '')
self.font = bytes(ord(c) for c in font_name).decode() # in case unicode in font name
self.size = raw.get('size', 12.0)
self.ascender = raw.get('ascender', 1.0)
self.descender = raw.get('descender', 0.0)
Expand Down

0 comments on commit 248a319

Please sign in to comment.