Skip to content

Commit

Permalink
Merge pull request #2507 from oleibman/word2493b
Browse files Browse the repository at this point in the history
ODText Reader : Improve Section Reader
  • Loading branch information
Progi1984 committed Nov 30, 2023
2 parents b0e1e41 + bdcd104 commit e76b701
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 11 deletions.
1 change: 1 addition & 0 deletions docs/changes/1.x/1.2.0.md
Expand Up @@ -31,6 +31,7 @@
- Added Support for Language, both for document overall and individual text elements
- Template : Set a checkbox by [@nxtpge](https://github.com/nxtpge) in [#2509](https://github.com/PHPOffice/PHPWord/pull/2509)
- ODText / RTF / Word2007 Writer : Add field FILENAME by [@milkyway-git](https://github.com/milkyway-git) in [#2510](https://github.com/PHPOffice/PHPWord/pull/2510)
- ODText Reader : Improve Section Reader by [@oleibman](https://github.com/oleibman) in [#2507](https://github.com/PHPOffice/PHPWord/pull/2507)

### Bug fixes

Expand Down
5 changes: 0 additions & 5 deletions phpstan-baseline.neon
Expand Up @@ -165,11 +165,6 @@ parameters:
count: 1
path: src/PhpWord/Reader/HTML.php

-
message: "#^Call to an undefined method DOMNode\\:\\:getAttribute\\(\\)\\.$#"
count: 2
path: src/PhpWord/Reader/ODText/Content.php

-
message: "#^Offset 'textNodes' on array\\{changed\\: PhpOffice\\\\PhpWord\\\\Element\\\\TrackChange, textNodes\\: DOMNodeList\\<DOMElement\\>\\} in isset\\(\\) always exists and is not nullable\\.$#"
count: 1
Expand Down
12 changes: 12 additions & 0 deletions src/PhpWord/Element/TextRun.php
Expand Up @@ -78,4 +78,16 @@ public function setParagraphStyle($style = null)

return $this->paragraphStyle;
}

public function getText(): string
{
$outstr = '';
foreach ($this->getElements() as $element) {
if ($element instanceof Text) {
$outstr .= $element->getText();
}
}

return $outstr;
}
}
81 changes: 75 additions & 6 deletions src/PhpWord/Reader/ODText/Content.php
Expand Up @@ -18,7 +18,10 @@
namespace PhpOffice\PhpWord\Reader\ODText;

use DateTime;
use DOMElement;
use DOMNodeList;
use PhpOffice\Math\Reader\MathML;
use PhpOffice\PhpWord\Element\Section;
use PhpOffice\PhpWord\Element\TrackChange;
use PhpOffice\PhpWord\PhpWord;
use PhpOffice\PhpWord\Shared\XMLReader;
Expand All @@ -30,6 +33,9 @@
*/
class Content extends AbstractPart
{
/** @var ?Section */
private $section;

/**
* Read content.xml.
*/
Expand All @@ -41,17 +47,28 @@ public function read(PhpWord $phpWord): void
$trackedChanges = [];

$nodes = $xmlReader->getElements('office:body/office:text/*');
$this->section = null;
$this->processNodes($nodes, $xmlReader, $phpWord);
$this->section = null;
}

/** @param DOMNodeList<DOMElement> $nodes */
public function processNodes(DOMNodeList $nodes, XMLReader $xmlReader, PhpWord $phpWord): void
{
if ($nodes->length > 0) {
$section = $phpWord->addSection();
foreach ($nodes as $node) {
// $styleName = $xmlReader->getAttribute('text:style-name', $node);
switch ($node->nodeName) {
case 'text:h': // Heading
$depth = $xmlReader->getAttribute('text:outline-level', $node);
$section->addTitle($node->nodeValue, $depth);
$this->getSection($phpWord)->addTitle($node->nodeValue, $depth);

break;
case 'text:p': // Paragraph
$styleName = $xmlReader->getAttribute('text:style-name', $node);
if (substr($styleName, 0, 2) === 'SB') {
break;
}
$element = $xmlReader->getElement('draw:frame/draw:object', $node);
if ($element) {
$mathFile = str_replace('./', '', $element->getAttribute('xlink:href')) . '/content.xml';
Expand All @@ -65,11 +82,13 @@ public function read(PhpWord $phpWord): void
$reader = new MathML();
$math = $reader->read($mathXML);

$section->addFormula($math);
$this->getSection($phpWord)->addFormula($math);
}
}
} else {
$children = $node->childNodes;
$spans = false;
/** @var DOMElement $child */
foreach ($children as $child) {
switch ($child->nodeName) {
case 'text:change-start':
Expand All @@ -89,16 +108,49 @@ public function read(PhpWord $phpWord): void
$changed = $trackedChanges[$changeId];
}

break;
case 'text:span':
$spans = true;

break;
}
}

$element = $section->addText($node->nodeValue);
if ($spans) {
$element = $this->getSection($phpWord)->addTextRun();
foreach ($children as $child) {
switch ($child->nodeName) {
case 'text:span':
/** @var DOMElement $child2 */
foreach ($child->childNodes as $child2) {
switch ($child2->nodeName) {
case '#text':
$element->addText($child2->nodeValue);

break;
case 'text:tab':
$element->addText("\t");

break;
case 'text:s':
$spaces = (int) $child2->getAttribute('text:c') ?: 1;
$element->addText(str_repeat(' ', $spaces));

break;
}
}

break;
}
}
} else {
$element = $this->getSection($phpWord)->addText($node->nodeValue);
}
if (isset($changed) && is_array($changed)) {
$element->setTrackChange($changed['changed']);
if (isset($changed['textNodes'])) {
foreach ($changed['textNodes'] as $changedNode) {
$element = $section->addText($changedNode->nodeValue);
$element = $this->getSection($phpWord)->addText($changedNode->nodeValue);
$element->setTrackChange($changed['changed']);
}
}
Expand All @@ -110,7 +162,7 @@ public function read(PhpWord $phpWord): void
$listItems = $xmlReader->getElements('text:list-item/text:p', $node);
foreach ($listItems as $listItem) {
// $listStyleName = $xmlReader->getAttribute('text:style-name', $listItem);
$section->addListItem($listItem->nodeValue, 0);
$this->getSection($phpWord)->addListItem($listItem->nodeValue, 0);
}

break;
Expand All @@ -129,9 +181,26 @@ public function read(PhpWord $phpWord): void
$trackedChanges[$changedRegion->getAttribute('text:id')] = ['changed' => $changed, 'textNodes' => $textNodes];
}

break;
case 'text:section': // Section
// $sectionStyleName = $xmlReader->getAttribute('text:style-name', $listItem);
$this->section = $phpWord->addSection();
$children = $node->childNodes;
$this->processNodes($children, $xmlReader, $phpWord);

break;
}
}
}
}

private function getSection(PhpWord $phpWord): Section
{
$section = $this->section;
if ($section === null) {
$section = $this->section = $phpWord->addSection();
}

return $section;
}
}
83 changes: 83 additions & 0 deletions tests/PhpWordTests/Reader/ODText/ODTextSectionTest.php
@@ -0,0 +1,83 @@
<?php
/**
* This file is part of PHPWord - A pure PHP library for reading and writing
* word processing documents.
*
* PHPWord is free software distributed under the terms of the GNU Lesser
* General Public License version 3 as published by the Free Software Foundation.
*
* For the full copyright and license information, please read the LICENSE
* file that was distributed with this source code. For the full list of
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
*
* @see https://github.com/PHPOffice/PHPWord
*
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
*/

namespace PhpOffice\PhpWordTests\Reader\ODText;

use PhpOffice\PhpWord\IOFactory;
use PhpOffice\PhpWord\PhpWord;
use PhpOffice\PhpWord\Settings;

class ODTextSectionTest extends \PHPUnit\Framework\TestCase
{
/** @var string */
private $filename = '';

protected function tearDown(): void
{
if ($this->filename !== '') {
unlink($this->filename);
$this->filename = '';
}
}

public function testWriteThenReadSection(): void
{
$dir = 'tests/PhpWordTests/_files';
Settings::setOutputEscapingEnabled(true);
$phpWord = new PhpWord();
$section = $phpWord->addSection();
$inputText = ['days', 'monday', 'tuesday'];
$inputText[] = "Tab\tthen two spaces then done.";
foreach ($inputText as $text) {
$section->addText($text);
}
$writer = IOFactory::createWriter($phpWord, 'ODText');
$this->filename = "$dir/sectiontest.odt";
$writer->save($this->filename);

$reader = IOFactory::createReader('ODText');
$phpWord2 = $reader->load($this->filename);
$outputText = [];
foreach ($phpWord2->getSections() as $section) {
foreach ($section->getElements() as $element) {
if (is_object($element) && method_exists($element, 'getText')) {
$outputText[] = $element->getText();
}
}
}
self::assertSame($inputText, $outputText);
}

public function testReadNoSections(): void
{
$dir = 'tests/PhpWordTests/_files/documents';
$inputText = ['days', 'monday', 'tuesday'];

$reader = IOFactory::createReader('ODText');
$filename = "$dir/word.2493.nosection.odt";
$phpWord2 = $reader->load($filename);
$outputText = [];
foreach ($phpWord2->getSections() as $section) {
foreach ($section->getElements() as $element) {
if (is_object($element) && method_exists($element, 'getText')) {
$outputText[] = $element->getText();
}
}
}
self::assertSame($inputText, $outputText);
}
}
Binary file not shown.

0 comments on commit e76b701

Please sign in to comment.