Skip to content
This repository has been archived by the owner on Jan 2, 2023. It is now read-only.

Commit

Permalink
Make internal linking more adhere to HTML spec.
Browse files Browse the repository at this point in the history
see #4406

* Support lookup through decoded url fragement
* Escape special characters when building query selector
  • Loading branch information
allenwu1973 committed Jul 10, 2019
1 parent 3e2a75a commit b8640ef
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 3 deletions.
Binary file added src/lib/.pdfconverter.cc.swp
Binary file not shown.
14 changes: 11 additions & 3 deletions src/lib/pdfconverter.cc
Expand Up @@ -39,6 +39,7 @@
#include <io.h>
#endif

#include "utilities.hh"
#include "dllbegin.inc"
using namespace wkhtmltopdf;
using namespace wkhtmltopdf::settings;
Expand Down Expand Up @@ -560,18 +561,25 @@ void PdfConverterPrivate::findLinks(QWebFrame * frame, QVector<QPair<QWebElement
if (href.isEmpty()) continue;
href=frame->baseUrl().resolved(href);
QString key = QUrl::fromPercentEncoding(href.toString(QUrl::RemoveFragment).toLocal8Bit());
QString decodedFragement = QUrl::fromPercentEncoding(href.fragment().toLocal8Bit());

QString escapedFragement = escapeCSS(href.fragment());
QString escapedDecodedFragement = escapeCSS(decodedFragement);

if (urlToPageObj.contains(key)) {
if (ulocal) {
PageObject * p = urlToPageObj[key];
QWebElement e;
if (!href.hasFragment())
e = p->page->mainFrame()->findFirstElement("body");
else {
e = p->page->mainFrame()->findFirstElement("a[name=\""+href.fragment()+"\"]");
e = p->page->mainFrame()->findFirstElement("#"+escapedFragement);
if (e.isNull())
e = p->page->mainFrame()->findFirstElement("a[name=\""+escapedFragement+"\"]");
if (e.isNull())
e = p->page->mainFrame()->findFirstElement("*[id=\""+href.fragment()+"\"]");
e = p->page->mainFrame()->findFirstElement("#"+escapedDecodedFragement);
if (e.isNull())
e = p->page->mainFrame()->findFirstElement("*[name=\""+href.fragment()+"\"]");
e = p->page->mainFrame()->findFirstElement("a[name=\""+escapedDecodedFragement+"\"]");
}
if (!e.isNull()) {
p->anchors[href.toString()] = e;
Expand Down
45 changes: 45 additions & 0 deletions src/lib/utilities.cc
Expand Up @@ -25,6 +25,9 @@
#include <QMetaEnum>
#include <QNetworkReply>

#define CSS_ESCAPE_CHARACTER(character) (QString("\\") + character)
#define CSS_ESCAPE_UNICODE_AS_CODEPOINT(unicode) QString("\\%1").arg(unicode, 0, 16)

void loadSvg(QSvgRenderer * & ptr, const QString & path, const char * def, int w, int h) {
delete ptr;
ptr = 0;
Expand Down Expand Up @@ -179,6 +182,48 @@ int handleError(bool success, int errorCode) {
return success?EXIT_SUCCESS:EXIT_FAILURE;
}

// Escape a CSS selector as per CSS.escape() in CSS Object Model spec
// see https://www.w3.org/TR/cssom-1/#the-css.escape()-method
QString escapeCSS(QString input) {
QString output;
QTextStream outputStream(&output);

if (input == "-")
return QString("\\-");

for (int i=0; i<input.length(); i++) {
QChar character = input.at(i);
ushort unicode = character.unicode();

if (unicode == 0)
outputStream << QChar(0xFFFD);

else if (
unicode < 0x1F || unicode == 0x7F ||
(
(i == 0 || (i == 1 && input[0].unicode() == 0x2d)) &&
unicode >= 0x30 && unicode <= 0x39
)
)
outputStream << CSS_ESCAPE_UNICODE_AS_CODEPOINT(character);

else if (
unicode >= 0x80 || unicode == 0x2D || unicode == 0x5F ||
(unicode >= 0x30 && unicode <= 0x39) ||
(unicode >= 0x41 && unicode <= 0x5A) ||
(unicode >= 0x61 && unicode <= 0x7A)
)
outputStream << character;

else
outputStream << CSS_ESCAPE_CHARACTER(character);

}

return output;
}


QSvgRenderer * MyLooksStyle::checkbox = 0;
QSvgRenderer * MyLooksStyle::checkbox_checked = 0;
QSvgRenderer * MyLooksStyle::radiobutton = 0;
Expand Down
2 changes: 2 additions & 0 deletions src/lib/utilities.hh
Expand Up @@ -56,5 +56,7 @@ public slots:

DLL_PUBLIC int handleError(bool success, int errorCode);

QString escapeCSS(QString input);

#include <dllend.inc>
#endif //__UTILITIES_HH__

0 comments on commit b8640ef

Please sign in to comment.