Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into bleeding-edge
Browse files Browse the repository at this point in the history
# Conflicts:
#	examples/node/getinfo.js
#	examples/node/pdf2png/pdf2png.js
#	examples/node/pdf2svg.js
#	examples/text-only/pdf2svg.js
#	test/pdfs/.gitignore
#	test/unit/api_spec.js
  • Loading branch information
stephanrauh committed Dec 3, 2021
2 parents f6e40a1 + e9e4b91 commit 84b8e70
Show file tree
Hide file tree
Showing 11 changed files with 209 additions and 78 deletions.
128 changes: 116 additions & 12 deletions src/core/catalog.js
Expand Up @@ -22,15 +22,16 @@ import {
isRefsEqual,
isStream,
Name,
Ref,
RefSet,
RefSetCache,
} from "./primitives.js";
import {
collectActions,
MissingDataException,
PageDictMissingException,
recoverJsURL,
toRomanNumerals,
XRefEntryException,
} from "./core_utils.js";
import {
createPromiseCapability,
Expand Down Expand Up @@ -1085,21 +1086,20 @@ class Catalog {
});
}

getPageDict(pageIndex, skipCount = false) {
getPageDict(pageIndex) {
const capability = createPromiseCapability();
const nodesToVisit = [this._catDict.getRaw("Pages")];
const visitedNodes = new RefSet();
const xref = this.xref,
pageKidsCountCache = this.pageKidsCountCache;
let count,
currentPageIndex = 0;
let currentPageIndex = 0;

function next() {
while (nodesToVisit.length) {
const currentNode = nodesToVisit.pop();

if (isRef(currentNode)) {
count = pageKidsCountCache.get(currentNode);
const count = pageKidsCountCache.get(currentNode);
// Skip nodes where the page can't be.
if (count > 0 && currentPageIndex + count < pageIndex) {
currentPageIndex += count;
Expand Down Expand Up @@ -1146,8 +1146,15 @@ class Catalog {
return;
}

count = currentNode.get("Count");
if (Number.isInteger(count) && count >= 0 && !skipCount) {
let count;
try {
count = currentNode.get("Count");
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
}
if (Number.isInteger(count) && count >= 0) {
// Cache the Kids count, since it can reduce redundant lookups in
// documents where all nodes are found at *one* level of the tree.
const objId = currentNode.objId;
Expand All @@ -1161,13 +1168,28 @@ class Catalog {
}
}

const kids = currentNode.get("Kids");
let kids;
try {
kids = currentNode.get("Kids");
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
}
if (!Array.isArray(kids)) {
// Prevent errors in corrupt PDF documents that violate the
// specification by *inlining* Page dicts directly in the Kids
// array, rather than using indirect objects (fixes issue9540.pdf).
let type;
try {
type = currentNode.get("Type");
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
}
if (
isName(currentNode.get("Type"), "Page") ||
isName(type, "Page") ||
(!currentNode.has("Type") && currentNode.has("Contents"))
) {
if (currentPageIndex === pageIndex) {
Expand All @@ -1191,14 +1213,96 @@ class Catalog {
nodesToVisit.push(kids[last]);
}
}
capability.reject(
new PageDictMissingException(`Page index ${pageIndex} not found.`)
);
capability.reject(new Error(`Page index ${pageIndex} not found.`));
}
next();
return capability.promise;
}

/**
* Eagerly fetches the entire /Pages-tree; should ONLY be used as a fallback.
* @returns {Map}
*/
getAllPageDicts() {
const queue = [{ currentNode: this.toplevelPagesDict, posInKids: 0 }];
const visitedNodes = new RefSet();
const map = new Map();
let pageIndex = 0;

function addPageDict(pageDict, pageRef) {
map.set(pageIndex++, [pageDict, pageRef]);
}
function addPageError(msg) {
map.set(pageIndex++, [new FormatError(msg), null]);
}

while (queue.length > 0) {
const queueItem = queue[queue.length - 1];
const { currentNode, posInKids } = queueItem;

let kids;
try {
kids = currentNode.get("Kids");
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
if (ex instanceof XRefEntryException) {
throw ex;
}
}
if (!Array.isArray(kids)) {
addPageError("Page dictionary kids object is not an array.");
break;
}

if (posInKids >= kids.length) {
queue.pop();
continue;
}

const kidObj = kids[posInKids];
let obj;
if (kidObj instanceof Ref) {
try {
obj = this.xref.fetch(kidObj);
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
if (ex instanceof XRefEntryException) {
throw ex;
}
}
// Prevent circular references in the /Pages tree.
if (visitedNodes.has(kidObj)) {
addPageError("Pages tree contains circular reference.");
break;
}
visitedNodes.put(kidObj);
} else {
// Prevent errors in corrupt PDF documents that violate the
// specification by *inlining* Page dicts directly in the Kids
// array, rather than using indirect objects (see issue9540.pdf).
obj = kidObj;
}
if (!(obj instanceof Dict)) {
addPageError(
"Page dictionary kid reference points to wrong type of object."
);
break;
}

if (isDict(obj, "Page") || !obj.has("Kids")) {
addPageDict(obj, kidObj instanceof Ref ? kidObj : null);
} else {
queue.push({ currentNode: obj, posInKids: 0 });
}
queueItem.posInKids++;
}
return map;
}

getPageIndex(pageRef) {
const cachedPageIndex = this.pageIndexCache.get(pageRef);
if (cachedPageIndex !== undefined) {
Expand Down
7 changes: 0 additions & 7 deletions src/core/core_utils.js
Expand Up @@ -60,12 +60,6 @@ class MissingDataException extends BaseException {
}
}

class PageDictMissingException extends BaseException {
constructor(msg) {
super(msg, "PageDictMissingException");
}
}

class ParserEOFException extends BaseException {
constructor(msg) {
super(msg, "ParserEOFException");
Expand Down Expand Up @@ -547,7 +541,6 @@ export {
isWhiteSpace,
log2,
MissingDataException,
PageDictMissingException,
ParserEOFException,
parseXFAPath,
readInt8,
Expand Down
73 changes: 49 additions & 24 deletions src/core/document.js
Expand Up @@ -50,7 +50,6 @@ import {
getInheritableProperty,
isWhiteSpace,
MissingDataException,
PageDictMissingException,
validateCSSFont,
XRefEntryException,
XRefParseException,
Expand Down Expand Up @@ -1354,28 +1353,30 @@ class PDFDocument {
}

async checkLastPage(recoveryMode = false) {
this.catalog.setActualNumPages(); // Ensure that it's always reset.
const { catalog, pdfManager } = this;

catalog.setActualNumPages(); // Ensure that it's always reset.
let numPages;

try {
await Promise.all([
this.pdfManager.ensureDoc("xfaFactory"),
this.pdfManager.ensureDoc("linearization"),
this.pdfManager.ensureCatalog("numPages"),
pdfManager.ensureDoc("xfaFactory"),
pdfManager.ensureDoc("linearization"),
pdfManager.ensureCatalog("numPages"),
]);

if (this.xfaFactory) {
return; // The Page count is always calculated for XFA-documents.
} else if (this.linearization) {
numPages = this.linearization.numPages;
} else {
numPages = this.catalog.numPages;
numPages = catalog.numPages;
}

if (numPages === 1) {
return;
} else if (!Number.isInteger(numPages)) {
if (!Number.isInteger(numPages)) {
throw new FormatError("Page count is not an integer.");
} else if (numPages <= 1) {
return;
}
await this.getPage(numPages - 1);
} catch (reason) {
Expand All @@ -1385,24 +1386,48 @@ class PDFDocument {
// subsequent `this.getPage` calls.
await this.cleanup();

let pageIndex = 1; // The first page was already loaded.
while (true) {
try {
await this.getPage(pageIndex, /* skipCount = */ true);
} catch (reasonLoop) {
if (reasonLoop instanceof PageDictMissingException) {
break;
}
if (reasonLoop instanceof XRefEntryException) {
if (!recoveryMode) {
throw new XRefParseException();
}
break;
let pagesTree;
try {
pagesTree = await pdfManager.ensureCatalog("getAllPageDicts");
} catch (reasonAll) {
if (reasonAll instanceof XRefEntryException) {
if (!recoveryMode) {
throw new XRefParseException();
}
}
pageIndex++;
catalog.setActualNumPages(1);
return;
}

for (const [pageIndex, [pageDict, ref]] of pagesTree) {
let promise;
if (pageDict instanceof Error) {
promise = Promise.reject(pageDict);

// Prevent "uncaught exception: Object"-messages in the console.
promise.catch(() => {});
} else {
promise = Promise.resolve(
new Page({
pdfManager,
xref: this.xref,
pageIndex,
pageDict,
ref,
globalIdFactory: this._globalIdFactory,
fontCache: catalog.fontCache,
builtInCMapCache: catalog.builtInCMapCache,
standardFontDataCache: catalog.standardFontDataCache,
globalImageCache: catalog.globalImageCache,
nonBlendModesSet: catalog.nonBlendModesSet,
xfaFactory: null,
})
);
}

this._pagePromises.set(pageIndex, promise);
}
this.catalog.setActualNumPages(pageIndex);
catalog.setActualNumPages(pagesTree.size);
}
}

Expand Down
Binary file added test/pdfs/GHOSTSCRIPT-698804-1-fuzzed.pdf
Binary file not shown.
Binary file added test/pdfs/poppler-395-0-fuzzed.pdf
Binary file not shown.
Binary file added test/pdfs/poppler-742-0-fuzzed.pdf
Binary file not shown.
Binary file added test/pdfs/poppler-937-0-fuzzed.pdf
Binary file not shown.
25 changes: 23 additions & 2 deletions web/base_viewer.js
Expand Up @@ -56,6 +56,11 @@ import { XfaLayerBuilder } from "./xfa_layer_builder.js";

const DEFAULT_CACHE_SIZE = 10;

const PagesCountLimit = {
FORCE_SCROLL_MODE_PAGE: 15000,
FORCE_LAZY_PAGE_INIT: 7500,
};

/**
* @typedef {Object} PDFViewerOptions
* @property {HTMLDivElement} container - The container for the viewer element.
Expand Down Expand Up @@ -688,6 +693,16 @@ class BaseViewer {
// Rendering (potentially) depends on this, hence fetching it immediately.
const optionalContentConfigPromise = pdfDocument.getOptionalContentConfig();

// Given that browsers don't handle huge amounts of DOM-elements very well,
// enforce usage of PAGE-scrolling when loading *very* long/large documents.
if (pagesCount > PagesCountLimit.FORCE_SCROLL_MODE_PAGE) {
console.warn(
"Forcing PAGE-scrolling for performance reasons, given the length of the document."
);
const mode = (this._scrollMode = ScrollMode.PAGE);
this.eventBus.dispatch("scrollmodechanged", { source: this, mode });
}

this._pagesCapability.promise.then(() => {
this.eventBus.dispatch("pagesloaded", {
source: this,
Expand Down Expand Up @@ -792,7 +807,10 @@ class BaseViewer {

// In addition to 'disableAutoFetch' being set, also attempt to reduce
// resource usage when loading *very* long/large documents.
if (pdfDocument.loadingParams.disableAutoFetch || pagesCount > 7500) {
if (
pdfDocument.loadingParams.disableAutoFetch ||
pagesCount > PagesCountLimit.FORCE_LAZY_PAGE_INIT
) {
// XXX: Printing is semi-broken with auto fetch disabled.
this._pagesCapability.resolve();
return;
Expand Down Expand Up @@ -1887,6 +1905,9 @@ class BaseViewer {
if (!isValidScrollMode(mode)) {
throw new Error(`Invalid scroll mode: ${mode}`);
}
if (this.pagesCount > PagesCountLimit.FORCE_SCROLL_MODE_PAGE) {
return; // Disabled for performance reasons.
}
this._previousScrollMode = this._scrollMode;

this._scrollMode = mode;
Expand Down Expand Up @@ -2175,4 +2196,4 @@ class BaseViewer {
}
}

export { BaseViewer, PDFPageViewBuffer };
export { BaseViewer, PagesCountLimit, PDFPageViewBuffer };

0 comments on commit 84b8e70

Please sign in to comment.