Skip to content

Commit

Permalink
docx: docxHtml should not normalize space of elements that have sibli…
Browse files Browse the repository at this point in the history
…ngs in the content
  • Loading branch information
bjrmatos committed Jul 4, 2023
1 parent 650f8f8 commit 9b9298a
Show file tree
Hide file tree
Showing 2 changed files with 214 additions and 3 deletions.
46 changes: 44 additions & 2 deletions packages/jsreport-docx/lib/postprocess/html/parseHtmlToDocxMeta.js
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,26 @@ function normalizeChildNodes ($, mode, data, childNodes) {
const middleNodes = newChildNodes.length > 2 ? newChildNodes.slice(1, -1) : []

if (firstChildNode?.nodeType === NODE_TYPES.TEXT) {
firstChildNode.nodeValue = firstChildNode.nodeValue.replace(/^[ ]+([^ ]*)/, '$1')
let normalizeStartingSpace = true

// if the parent is not block element and the previous sibling of parent is text or element then don't normalize
if (
firstChildNode?.parentNode?.nodeType === NODE_TYPES.ELEMENT &&
!(mode === 'block' ? isBlockElement(firstChildNode?.parentNode) : false)
) {
const isTextOrElement = (
isTextElement(firstChildNode.parentNode.previousSibling) ||
firstChildNode.parentNode.previousSibling?.nodeType === NODE_TYPES.ELEMENT
)

if (isTextOrElement) {
normalizeStartingSpace = false
}
}

if (normalizeStartingSpace) {
firstChildNode.nodeValue = firstChildNode.nodeValue.replace(/^[ ]+([^ ]*)/, '$1')
}

// the next sibling here should be already normalized (so it will be either text or element)
const nextSiblingNode = firstChildNode.nextSibling
Expand All @@ -507,7 +526,26 @@ function normalizeChildNodes ($, mode, data, childNodes) {
}

if (lastChildNode?.nodeType === NODE_TYPES.TEXT) {
lastChildNode.nodeValue = lastChildNode.nodeValue.replace(/([^ ]*)[ ]+$/, '$1')
let normalizeEndingSpace = true

// if the parent is not block element and the next sibling of parent is text or element then don't normalize
if (
lastChildNode?.parentNode?.nodeType === NODE_TYPES.ELEMENT &&
!(mode === 'block' ? isBlockElement(lastChildNode?.parentNode) : false)
) {
const isTextOrElement = (
isTextElement(lastChildNode.parentNode.nextSibling) ||
lastChildNode.parentNode.nextSibling?.nodeType === NODE_TYPES.ELEMENT
)

if (isTextOrElement) {
normalizeEndingSpace = false
}
}

if (normalizeEndingSpace) {
lastChildNode.nodeValue = lastChildNode.nodeValue.replace(/([^ ]*)[ ]+$/, '$1')
}

// the previous sibling here should be already normalized (so it will be either text or element)
const previousSiblingNode = lastChildNode.previousSibling
Expand Down Expand Up @@ -1139,6 +1177,10 @@ function applySpacingIfNeeded (parentMeta, data) {
}

function isTextElement (node) {
if (node == null) {
return false
}

return node.nodeType === NODE_TYPES.TEXT
}

Expand Down
171 changes: 170 additions & 1 deletion packages/jsreport-docx/test/htmlTest.js
Original file line number Diff line number Diff line change
Expand Up @@ -11013,6 +11013,11 @@ describe('docx html embed', () => {
})
}

// NOTE: when dealing with white space related issues, always remember
// that we want to match what the browser produces as the rendered/visual output.
// we don't care if internally in the browser the DOM node keeps preserving the white space
// when accessing .textContent, we only care about the visual output.
// ALWAYS REMEMBER THIS WHEN FIXING OR DEALING WITH PROBLEMS RELATED TO WHITE SPACE
describe('white space handling in html input', () => {
for (const mode of ['block', 'inline']) {
const templateSpaceStr = '<p> ... ... </p>'
Expand Down Expand Up @@ -11096,11 +11101,49 @@ describe('docx html embed', () => {
const textNodes = nodeListToArray(paragraphNodes[0].getElementsByTagName('w:t'))
should(textNodes.length).eql(2)
commonHtmlTextAssertions(textNodes[0], templateTextNodesForDocxHtml[0].parentNode)
should(textNodes[0].textContent).eql('Hello')
should(textNodes[0].textContent).eql('Hello ')
should(textNodes[1].textContent).eql('World')
}
})

const templateSpaceMultipleInlineStr = '<span> ... </span>\n\n <span> ... </span>\n'

it(`${mode} mode - ignore space in multiple inline ${templateSpaceMultipleInlineStr}`, async () => {
const docxTemplateBuf = fs.readFileSync(path.join(__dirname, `${mode === 'block' ? 'html-embed-block' : 'html-embed-inline'}.docx`))

const result = await reporter.render({
template: {
engine: 'handlebars',
recipe: 'docx',
docx: {
templateAsset: {
content: docxTemplateBuf
}
}
},
data: {
html: createHtml(templateSpaceMultipleInlineStr, ['Hello', 'World'])
}
})

// Write document for easier debugging
fs.writeFileSync('out.docx', result.content)

const [templateDoc] = await getDocumentsFromDocxBuf(docxTemplateBuf, ['word/document.xml'])
const templateTextNodesForDocxHtml = getTextNodesMatching(templateDoc, `{{docxHtml content=html${mode === 'block' ? '' : ' inline=true'}}}`)
const [doc] = await getDocumentsFromDocxBuf(result.content, ['word/document.xml'])

const paragraphNodes = nodeListToArray(doc.getElementsByTagName('w:p'))

should(paragraphNodes.length).eql(1)

const textNodes = nodeListToArray(paragraphNodes[0].getElementsByTagName('w:t'))
should(textNodes.length).eql(2)
commonHtmlTextAssertions(textNodes[0], templateTextNodesForDocxHtml[0].parentNode)
should(textNodes[0].textContent).eql('Hello ')
should(textNodes[1].textContent).eql('World')
})

const templateLineBreakStr = '\n<p>\n...</p>\n'

it(`${mode} mode - ignore line break ${templateLineBreakStr}`, async () => {
Expand Down Expand Up @@ -11478,6 +11521,132 @@ describe('docx html embed', () => {
should(textNodes[1].textContent).eql(' is a ')
should(textNodes[2].textContent).eql('javascript reporting server')
})

const templateLeadingSpaceInInlineStr = '<p> ...<span> ...</span><span> ...</span><span> ...</span> ...</p>'

it(`${mode} mode - preserve leading space in inline elements that have siblings ${templateLeadingSpaceInInlineStr}`, async () => {
const docxTemplateBuf = fs.readFileSync(path.join(__dirname, `${mode === 'block' ? 'html-embed-block' : 'html-embed-inline'}.docx`))

const result = await reporter.render({
template: {
engine: 'handlebars',
recipe: 'docx',
docx: {
templateAsset: {
content: docxTemplateBuf
}
}
},
data: {
html: createHtml(templateLeadingSpaceInInlineStr, ['Sintomi', 'vari', 'spesso', 'altalenanti prova prova prova', 'ora meglio?'])
}
})

// Write document for easier debugging
fs.writeFileSync('out.docx', result.content)

const [templateDoc] = await getDocumentsFromDocxBuf(docxTemplateBuf, ['word/document.xml'])
const templateTextNodesForDocxHtml = getTextNodesMatching(templateDoc, `{{docxHtml content=html${mode === 'block' ? '' : ' inline=true'}}}`)
const [doc] = await getDocumentsFromDocxBuf(result.content, ['word/document.xml'])

const paragraphNodes = nodeListToArray(doc.getElementsByTagName('w:p'))

should(paragraphNodes.length).eql(1)

const textNodes = nodeListToArray(paragraphNodes[0].getElementsByTagName('w:t'))
should(textNodes.length).eql(5)
commonHtmlTextAssertions(textNodes[0], templateTextNodesForDocxHtml[0].parentNode)
should(textNodes[0].textContent).eql('Sintomi')
should(textNodes[1].textContent).eql(' vari')
should(textNodes[2].textContent).eql(' spesso')
should(textNodes[3].textContent).eql(' altalenanti prova prova prova')
should(textNodes[4].textContent).eql(' ora meglio?')
})

const templateTrailingSpaceInInlineStr = '<p>... <span>... </span><span>... </span><span>... </span>...</p>'

it(`${mode} mode - preserve trailing space in inline elements that have siblings ${templateTrailingSpaceInInlineStr}`, async () => {
const docxTemplateBuf = fs.readFileSync(path.join(__dirname, `${mode === 'block' ? 'html-embed-block' : 'html-embed-inline'}.docx`))

const result = await reporter.render({
template: {
engine: 'handlebars',
recipe: 'docx',
docx: {
templateAsset: {
content: docxTemplateBuf
}
}
},
data: {
html: createHtml(templateTrailingSpaceInInlineStr, ['Sintomi', 'vari', 'spesso', 'altalenanti prova prova prova', 'ora meglio?'])
}
})

// Write document for easier debugging
fs.writeFileSync('out.docx', result.content)

const [templateDoc] = await getDocumentsFromDocxBuf(docxTemplateBuf, ['word/document.xml'])
const templateTextNodesForDocxHtml = getTextNodesMatching(templateDoc, `{{docxHtml content=html${mode === 'block' ? '' : ' inline=true'}}}`)
const [doc] = await getDocumentsFromDocxBuf(result.content, ['word/document.xml'])

const paragraphNodes = nodeListToArray(doc.getElementsByTagName('w:p'))

should(paragraphNodes.length).eql(1)

const textNodes = nodeListToArray(paragraphNodes[0].getElementsByTagName('w:t'))
should(textNodes.length).eql(5)
commonHtmlTextAssertions(textNodes[0], templateTextNodesForDocxHtml[0].parentNode)
should(textNodes[0].textContent).eql('Sintomi ')
should(textNodes[1].textContent).eql('vari ')
should(textNodes[2].textContent).eql('spesso ')
should(textNodes[3].textContent).eql('altalenanti prova prova prova ')
should(textNodes[4].textContent).eql('ora meglio?')
})

const templateWithSpaceEntityStr = '<p>&nbsp;...&nbsp;<span>&nbsp;...&nbsp;</span><span>&nbsp;...&nbsp;</span><span>&nbsp;...&nbsp;</span>&nbsp;...&nbsp;</p>'

it(`${mode} mode - preserve &nbsp; space ${templateWithSpaceEntityStr}`, async () => {
const docxTemplateBuf = fs.readFileSync(path.join(__dirname, `${mode === 'block' ? 'html-embed-block' : 'html-embed-inline'}.docx`))

const result = await reporter.render({
template: {
engine: 'handlebars',
recipe: 'docx',
docx: {
templateAsset: {
content: docxTemplateBuf
}
}
},
data: {
html: createHtml(templateWithSpaceEntityStr, ['Sintomi', 'vari', 'spesso', 'altalenanti prova prova prova', 'ora meglio?'])
}
})

// Write document for easier debugging
fs.writeFileSync('out.docx', result.content)

const [templateDoc] = await getDocumentsFromDocxBuf(docxTemplateBuf, ['word/document.xml'])
const templateTextNodesForDocxHtml = getTextNodesMatching(templateDoc, `{{docxHtml content=html${mode === 'block' ? '' : ' inline=true'}}}`)
const [doc] = await getDocumentsFromDocxBuf(result.content, ['word/document.xml'])

const paragraphNodes = nodeListToArray(doc.getElementsByTagName('w:p'))

should(paragraphNodes.length).eql(1)

const textNodes = nodeListToArray(paragraphNodes[0].getElementsByTagName('w:t'))
should(textNodes.length).eql(5)
commonHtmlTextAssertions(textNodes[0], templateTextNodesForDocxHtml[0].parentNode)

const nbspSpace = String.fromCharCode(160)

should(textNodes[0].textContent).eql(`${nbspSpace}Sintomi${nbspSpace}`)
should(textNodes[1].textContent).eql(`${nbspSpace}vari${nbspSpace}`)
should(textNodes[2].textContent).eql(`${nbspSpace}spesso${nbspSpace}`)
should(textNodes[3].textContent).eql(`${nbspSpace}altalenanti prova prova prova${nbspSpace}`)
should(textNodes[4].textContent).eql(`${nbspSpace}ora meglio?${nbspSpace}`)
})
}
})

Expand Down

0 comments on commit 9b9298a

Please sign in to comment.