Skip to content

Commit

Permalink
Upgrade PDFBox API to v2.0.23 (#415)
Browse files Browse the repository at this point in the history
  • Loading branch information
TomRoush committed Jun 2, 2022
1 parent dfeae2f commit d69e8d8
Show file tree
Hide file tree
Showing 65 changed files with 832 additions and 434 deletions.
Expand Up @@ -395,4 +395,43 @@ public void testDeleteFont() throws IOException

Assert.assertTrue(tempPdfFile.delete());
}

/**
* PDFBOX-5115: U+00AD (soft hyphen) should work with WinAnsiEncoding.
*/
@Test
public void testSoftHyphen() throws IOException
{
String text = "- \u00AD";
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PDDocument doc = new PDDocument();
PDPage page = new PDPage();
doc.addPage(page);
PDFont font1 = PDType1Font.HELVETICA;
PDFont font2 = PDType0Font.load(doc, testContext.getAssets().open(
"com/tom_roush/pdfbox/resources/ttf/LiberationSans-Regular.ttf"));

Assert.assertEquals(font1.getStringWidth("-"), font1.getStringWidth("\u00AD"), 0);
Assert.assertEquals(font2.getStringWidth("-"), font2.getStringWidth("\u00AD"), 0);

PDPageContentStream cs = new PDPageContentStream(doc, page);
cs.beginText();
cs.newLineAtOffset(100, 500);
cs.setFont(font1, 10);
cs.showText(text);
cs.newLineAtOffset(0, 100);
cs.setFont(font2, 10);
cs.showText(text);
cs.endText();
cs.close();
doc.save(baos);
doc.close();

doc = PDDocument.load(baos.toByteArray());
PDFTextStripper stripper = new PDFTextStripper();
stripper.setLineSeparator("\n");
String extractedText = stripper.getText(doc);
Assert.assertEquals(text + "\n" + text, extractedText.trim());
doc.close();
}
}
Expand Up @@ -200,19 +200,6 @@ public void testFlattenPDFBOX3396_4() throws IOException
flattenAndCompare(sourceUrl, targetFileName);
}

/*
* PDFBOX-3587 Empty template.
*/
// disabled as there is a missing character with the available fonts on the test server
// @Test
public void testFlattenOpenOfficeForm() throws IOException
{
String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12839977/OpenOfficeForm.pdf";
String targetFileName = "OpenOfficeForm.pdf";

flattenAndCompare(sourceUrl, targetFileName);
}

/*
* PDFBOX-3587 Filled template.
*/
Expand Down
Expand Up @@ -253,11 +253,11 @@ public class AFMParser
*/
public static final String START_KERN_PAIRS1 = "StartKernPairs1";
/**
* This is the start compisites data section.
* This is the start composites data section.
*/
public static final String START_COMPOSITES = "StartComposites";
/**
* This is the end compisites data section.
* This is the end composites data section.
*/
public static final String END_COMPOSITES = "EndComposites";
/**
Expand Down
26 changes: 14 additions & 12 deletions library/src/main/java/com/tom_roush/fontbox/cff/CFFParser.java
Expand Up @@ -111,7 +111,7 @@ else if (TAG_TTFONLY.equals(firstTag))
stringIndex = readStringIndexData(input);
byte[][] globalSubrIndex = readIndexData(input);

List<CFFFont> fonts = new ArrayList<CFFFont>();
List<CFFFont> fonts = new ArrayList<CFFFont>(nameIndex.length);
for (int i = 0; i < nameIndex.length; i++)
{
CFFFont font = parseFont(input, nameIndex[i], topDictIndex[i]);
Expand Down Expand Up @@ -270,7 +270,7 @@ else if (b0 == 28 || b0 == 29)
}
else if (b0 == 30)
{
entry.operands.add(readRealNumber(input, b0));
entry.operands.add(readRealNumber(input));
}
else if (b0 >= 32 && b0 <= 254)
{
Expand Down Expand Up @@ -330,19 +330,18 @@ else if (b0 >= 251 && b0 <= 254)
}
}

/**
* @param b0
*/
private static Double readRealNumber(CFFDataInput input, int b0) throws IOException
private static Double readRealNumber(CFFDataInput input) throws IOException
{
StringBuilder sb = new StringBuilder();
boolean done = false;
boolean exponentMissing = false;
boolean hasExponent = false;
int[] nibbles = new int[2];
while (!done)
{
int b = input.readUnsignedByte();
int[] nibbles = { b / 16, b % 16 };
nibbles[0] = b / 16;
nibbles[1] = b % 16;
for (int nibble : nibbles)
{
switch (nibble)
Expand Down Expand Up @@ -392,7 +391,8 @@ private static Double readRealNumber(CFFDataInput input, int b0) throws IOExcept
done = true;
break;
default:
throw new IllegalArgumentException();
// can only be a programming error because a nibble is between 0 and F
throw new IllegalArgumentException("illegal nibble " + nibble);
}
}
}
Expand Down Expand Up @@ -435,11 +435,13 @@ private CFFFont parseFont(CFFDataInput input, String name, byte[] topDictIndex)
boolean isCIDFont = topDict.getEntry("ROS") != null;
if (isCIDFont)
{
font = new CFFCIDFont();
CFFCIDFont cffCIDFont = new CFFCIDFont();
DictData.Entry rosEntry = topDict.getEntry("ROS");
((CFFCIDFont) font).setRegistry(readString(rosEntry.getNumber(0).intValue()));
((CFFCIDFont) font).setOrdering(readString(rosEntry.getNumber(1).intValue()));
((CFFCIDFont) font).setSupplement(rosEntry.getNumber(2).intValue());
cffCIDFont.setRegistry(readString(rosEntry.getNumber(0).intValue()));
cffCIDFont.setOrdering(readString(rosEntry.getNumber(1).intValue()));
cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue());

font = cffCIDFont;
}
else
{
Expand Down
Expand Up @@ -325,15 +325,15 @@ private void drawAlternatingCurve(List<Number> numbers, boolean horizontal)
if (horizontal)
{
addCommand(Arrays.asList(numbers.get(0), 0,
numbers.get(1), numbers.get(2), last ? numbers.get(4)
: 0, numbers.get(3)),
numbers.get(1), numbers.get(2), last ? numbers.get(4)
: 0, numbers.get(3)),
new CharStringCommand(8));
}
else
{
addCommand(Arrays.asList(0, numbers.get(0),
numbers.get(1), numbers.get(2), numbers.get(3),
last ? numbers.get(4) : 0),
numbers.get(1), numbers.get(2), numbers.get(3),
last ? numbers.get(4) : 0),
new CharStringCommand(8));
}
numbers = numbers.subList(last ? 5 : 4, numbers.size());
Expand All @@ -358,8 +358,8 @@ private void drawCurve(List<Number> numbers, boolean horizontal)
else
{
addCommand(Arrays.asList(first ? numbers.get(0) : 0, numbers.get(first ? 1 : 0), numbers
.get(first ? 2 : 1), numbers.get(first ? 3 : 2),
0, numbers.get(first ? 4 : 3)),
.get(first ? 2 : 1), numbers.get(first ? 3 : 2),
0, numbers.get(first ? 4 : 3)),
new CharStringCommand(8));
}
numbers = numbers.subList(first ? 5 : 4, numbers.size());
Expand All @@ -382,8 +382,9 @@ private void addCommand(List<Number> numbers, CharStringCommand command)

private static <E> List<List<E>> split(List<E> list, int size)
{
List<List<E>> result = new ArrayList<List<E>>();
for (int i = 0; i < list.size() / size; i++)
int listSize = list.size() / size;
List<List<E>> result = new ArrayList<List<E>>(listSize);
for (int i = 0; i < listSize; i++)
{
result.add(list.subList(i * size, (i + 1) * size));
}
Expand Down
18 changes: 18 additions & 0 deletions library/src/main/java/com/tom_roush/fontbox/cmap/CMap.java
Expand Up @@ -50,6 +50,9 @@ public class CMap
// Unicode mappings
private final Map<Integer,String> charToUnicode = new HashMap<Integer,String>();

// inverted map
Map <String, byte[]> unicodeToByteCodes = new HashMap<String, byte[]>();

// CID mappings
private final Map<Integer,Integer> codeToCid = new HashMap<Integer,Integer>();
private final List<CIDRange> codeToCidRanges = new ArrayList<CIDRange>();
Expand Down Expand Up @@ -206,6 +209,7 @@ private int getCodeFromArray( byte[] data, int offset, int length )
*/
void addCharMapping(byte[] codes, String unicode)
{
unicodeToByteCodes.put(unicode, codes.clone()); // clone needed, bytes is modified later
int code = getCodeFromArray(codes, 0, codes.length);
charToUnicode.put(code, unicode);

Expand All @@ -216,6 +220,17 @@ void addCharMapping(byte[] codes, String unicode)
}
}

/**
* Get the code bytes for an unicode string.
*
* @param unicode
* @return the code bytes or null if there is none.
*/
public byte[] getCodesFromUnicode(String unicode)
{
return unicodeToByteCodes.get(unicode);
}

/**
* This will add a CID mapping.
*
Expand Down Expand Up @@ -275,6 +290,9 @@ void useCmap( CMap cmap )
charToUnicode.putAll(cmap.charToUnicode);
codeToCid.putAll(cmap.codeToCid);
codeToCidRanges.addAll(cmap.codeToCidRanges);

// unicodeToByteCodes should be filled too, but this isn't possible in 2.0.*
// because we don't know the code length
}

/**
Expand Down
56 changes: 37 additions & 19 deletions library/src/main/java/com/tom_roush/fontbox/cmap/CMapParser.java
Expand Up @@ -42,13 +42,25 @@ public class CMapParser

private final byte[] tokenParserByteBuffer = new byte[512];

private boolean strictMode = false;

/**
* Creates a new instance of CMapParser.
*/
public CMapParser()
{
}

/**
* Creates a new instance of CMapParser.
*
* @param strictMode activates the strict mode used for inline CMaps
*/
public CMapParser(boolean strictMode)
{
this.strictMode = strictMode;
}

/**
* Parse a CMAP file on the file system.
*
Expand Down Expand Up @@ -86,6 +98,8 @@ public CMap parsePredefined(String name) throws IOException
try
{
input = getExternalCMap(name);
// deactivate strict mode
strictMode = false;
return parse(input);
}
finally
Expand Down Expand Up @@ -329,7 +343,7 @@ private void parseBegincidrange(int numberOfLines, PushbackInputStream cmapStrea
{
int mappedCID = createIntFromBytes(startCode);
result.addCIDMapping(mappedCode++, mappedCID);
increment(startCode);
increment(startCode, startCode.length - 1, false);
}
}
}
Expand Down Expand Up @@ -406,18 +420,16 @@ else if (nextToken instanceof byte[])
{
for (int i = 0; i < 256; i++)
{
startCode[1] = (byte) i;
tokenBytes[1] = (byte) i;
addMappingFrombfrange(result, startCode, 0xff, tokenBytes);

startCode[0] = (byte) i;
startCode[1] = 0;
tokenBytes[0] = (byte) i;
tokenBytes[1] = 0;
addMappingFrombfrange(result, startCode, 256, tokenBytes);
}
}
else
{
// PDFBOX-4661: avoid overflow of the last byte, all following values are undefined
int values = Math.min(end - start,
255 - (tokenBytes[tokenBytes.length - 1] & 0xFF)) + 1;
addMappingFrombfrange(result, startCode, values, tokenBytes);
addMappingFrombfrange(result, startCode, end - start + 1, tokenBytes);
}
}
}
Expand All @@ -430,7 +442,7 @@ private void addMappingFrombfrange(CMap cmap, byte[] startCode, List<byte[]> tok
{
String value = createStringFromBytes(tokenBytes);
cmap.addCharMapping(startCode, value);
increment(startCode);
increment(startCode, startCode.length - 1, false);
}
}

Expand All @@ -441,8 +453,12 @@ private void addMappingFrombfrange(CMap cmap, byte[] startCode, int values,
{
String value = createStringFromBytes(tokenBytes);
cmap.addCharMapping(startCode, value);
increment(startCode);
increment(tokenBytes);
if (!increment(tokenBytes, tokenBytes.length - 1, strictMode))
{
// overflow detected -> stop adding further mappings
break;
}
increment(startCode, startCode.length - 1, false);
}
}

Expand Down Expand Up @@ -718,22 +734,24 @@ private boolean isDelimiter(int aByte)
}
}

private void increment(byte[] data)
{
increment(data, data.length - 1);
}

private void increment(byte[] data, int position)
private boolean increment(byte[] data, int position, boolean useStrictMode)
{
if (position > 0 && (data[position] & 0xFF) == 255)
{
// PDFBOX-4661: avoid overflow of the last byte, all following values are undefined
// PDFBOX-5090: strict mode has to be used for CMaps within pdfs
if (useStrictMode)
{
return false;
}
data[position] = 0;
increment(data, position - 1);
increment(data, position - 1, useStrictMode);
}
else
{
data[position] = (byte) (data[position] + 1);
}
return true;
}

private int createIntFromBytes(byte[] bytes)
Expand Down

0 comments on commit d69e8d8

Please sign in to comment.