Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RTL support for html2canvas #1222

Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
121 changes: 109 additions & 12 deletions src/TextBounds.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ import {TEXT_DECORATION} from './parsing/textDecoration';

import FEATURES from './Feature';

const RTL = /[\u0590-\u074f]/;
const UNICODE = /[^\u0000-\u00ff]/;

const hasRtlCharacters = (word: string): boolean => RTL.test(word);
const hasUnicodeCharacters = (text: string): boolean => UNICODE.test(text);

const encodeCodePoint = (codePoint: number): string => ucs2.encode([codePoint]);
Expand All @@ -30,7 +32,9 @@ export const parseTextBounds = (
node: Text
): Array<TextBounds> => {
const codePoints = ucs2.decode(value);
const letterRendering = parent.style.letterSpacing !== 0 || hasUnicodeCharacters(value);
const letterRendering =
parent.style.letterSpacing !== 0 ||
(!hasUnicodeCharacters(value) && !hasRtlCharacters(value));
const textList = letterRendering ? codePoints.map(encodeCodePoint) : splitWords(codePoints);
const length = textList.length;
const textBounds = [];
Expand Down Expand Up @@ -76,18 +80,78 @@ const getRangeBounds = (node: Text, offset: number, length: number): Bounds => {
};

const splitWords = (codePoints: Array<number>): Array<string> => {
// Get words and whether they are RTL or LTR
const {words, rtlIndicators} = findWordsAndWordCategories(codePoints);
// For RTL words, we should swap brackets
flipCharactersForRtlWordsIfNeeded(words, rtlIndicators);
return words;
};

const isWordBoundary = (characterCode: number): boolean => {
return (
[
32, // <space>
13, // \r
10, // \n
9, // \t
45 // -
].indexOf(characterCode) !== -1
);
};

const SUPPORTED_RTL_CATEGORIES = {
ARABIC: 'ARABIC',
HEBREW: 'HEBREW',
SYRIAC: 'SYRIAC'
};

const LTR_CATEGORY = 'LTR';

const CHARACTERS_TO_FLIP_IF_NEXT_WORD_IS_RTL = ['(', '[', '{'];

const CHARACTERS_TO_FLIP_IF_PREVIOUS_WORD_IS_RTL = [')', ']', '}'];

const CHARACTER_TO_FLIPPED_CHARACTER_MAP = {
// PARENS
'(': ')',
')': '(',
// BRACKETS
'[': ']',
']': '[',
// BRACES
'{': '}',
'}': '{'
};

type FindWordsAndWordCategoriesResult = {
rtlIndicators: Array<boolean>,
words: Array<string>
};

const findWordsAndWordCategories = (
codePoints: Array<number>
): FindWordsAndWordCategoriesResult => {
const words = [];
const rtlIndicators = [];
let i = 0;
let onWordBoundary = false;
// Store whether the text is LTR or RTL
let previousScriptCategory = null;
let word;
while (codePoints.length) {
const codePointScriptCategory = getUtf8ScriptCategory(codePoints[i]);
if (previousScriptCategory == null) {
previousScriptCategory = codePointScriptCategory;
}
if (isWordBoundary(codePoints[i]) === onWordBoundary) {
word = codePoints.splice(0, i);
if (word.length) {
words.push(ucs2.encode(word));
rtlIndicators.push(isRtlCategory(previousScriptCategory));
}
onWordBoundary = !onWordBoundary;
i = 0;
previousScriptCategory = null;
} else {
i++;
}
Expand All @@ -96,20 +160,53 @@ const splitWords = (codePoints: Array<number>): Array<string> => {
word = codePoints.splice(0, i);
if (word.length) {
words.push(ucs2.encode(word));
rtlIndicators.push(isRtlCategory(previousScriptCategory));
}
}
}
return words;

return {
words,
rtlIndicators
};
};

const isWordBoundary = (characterCode: number): boolean => {
return (
[
32, // <space>
13, // \r
10, // \n
9, // \t
45 // -
].indexOf(characterCode) !== -1
);
const flipCharactersForRtlWordsIfNeeded = (
words: Array<string>,
rtlIndicators: Array<boolean>
): void => {
words.forEach((word, indexOfWord) => {
const isNextWordRtl = rtlIndicators[indexOfWord + 1] || false;
const isPreviousWordRtl = rtlIndicators[indexOfWord - 1] || false;
word.split('').forEach((letter, indexOfLetter) => {
if (
(CHARACTERS_TO_FLIP_IF_NEXT_WORD_IS_RTL.indexOf(letter) !== -1 && isNextWordRtl) ||
(CHARACTERS_TO_FLIP_IF_PREVIOUS_WORD_IS_RTL.indexOf(letter) !== -1 &&
isPreviousWordRtl)
) {
words[indexOfWord] = replaceIndexAt(
word,
indexOfLetter,
CHARACTER_TO_FLIPPED_CHARACTER_MAP[letter]
);
}
});
});
};

const replaceIndexAt = (word: string, indexToReplace: number, replacementString: string): string =>
word.substring(0, indexToReplace) + replacementString + word.substring(indexToReplace + 1);

const isRtlCategory = (category: string | null): boolean => !!category && category !== LTR_CATEGORY;

const getUtf8ScriptCategory = (char: number): string => {
if (char >= 0x0590 && char <= 0x05ff) {
return SUPPORTED_RTL_CATEGORIES.HEBREW;
} else if (char >= 0x0600 && char <= 0x06ff) {
return SUPPORTED_RTL_CATEGORIES.ARABIC;
} else if (char >= 0x0700 && char <= 0x074f) {
return SUPPORTED_RTL_CATEGORIES.SYRIAC;
} else {
return LTR_CATEGORY;
}
};