Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SearchIndex: Implement Collections (WIP) #3810

Draft
wants to merge 9 commits into
base: searchIndex-restructure
Choose a base branch
from
65 changes: 65 additions & 0 deletions _test/tests/inc/Search/Collection/FullTextCollectionSearchTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
<?php

namespace tests\Search\Collection;

use dokuwiki\Search\Collection\FulltextCollection;
use dokuwiki\Search\Collection\FulltextCollectionSearch;
use dokuwiki\Search\Index\MemoryIndex;
use dokuwiki\Search\QueryParser;
use dokuwiki\Search\Tokenizer;

class FullTextCollectionSearchTest extends \DokuWikiTest
{

public function testExactTerm()
{
// add some content to the indexes
$collection = new FulltextCollection('page', 'w', 'i', 'pageword');
$collection->lock();
$collection->addEntity('page1', ['dokuwiki', 'dokuwiki', 'dokuwikis', 'doku', 'wiki']);
$collection->addEntity('page2', ['dokuwiki', 'other', 'words']);
$collection->unlock();

// add search term
$search = new FulltextCollectionSearch($collection);
$term = $search->addTerm('dokuwiki');

// execute search
$search->execute();

// inspect the term updates first:

// exact search should only match one token
$this->assertEquals(['dokuwiki'], $term->getTokens());
// that token is 8 chars and should be the first in the index
$this->assertEquals([0], $term->getTokenIDsByLength(8));
// the dokuwiki token is two times on page1 and 1 time on page2
$this->assertEquals(['page1' => 2, 'page2' => 1], $term->getEntityFrequencies());

// now get data from the collectionSearch:

// entity IDs should be available
$this->assertEquals([0 => 'page1', 1 => 'page2'], $search->getEntities());

}

public function xxxRealWord()
{
$tokens = Tokenizer::getWords(rawWiki('wiki:syntax'));
$collection = new FulltextCollection('page', 'word', 'w', 'pageword');
$collection->addEntity('wiki:syntax', $tokens);

$search = new FulltextCollectionSearch($collection);

$search->addTerm('dokuwiki');
$search->addTerm('*wiki');
$search->addTerm('doku*');
$search->addTerm('*kuwi*');

$result = $search->execute();

$this->assertEquals([], $result);
}


}
64 changes: 64 additions & 0 deletions _test/tests/inc/Search/Collection/FullTextCollectionTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
<?php

namespace tests\Search\Collection;

use dokuwiki\Search\Collection\FulltextCollection;
use dokuwiki\Search\Index\MemoryIndex;
use dokuwiki\Search\QueryParser;
use dokuwiki\Search\Tokenizer;

class FullTextCollectionTest extends \DokuWikiTest
{

/**
* Add data and directly check the underlying indexes for correctness
*/
public function testDirectly()
{
$index = new FulltextCollection('entity', 'token', 'freq', 'reverse');

$tokens = ['one', 'two', 'three', 'four', 'two'];
$index->lock();
$index->addEntity('test', $tokens);
$index->unlock();

$idxEntity = new MemoryIndex('entity');
$this->assertEquals('test', $idxEntity->retrieveRow(0));

$idxToken = new MemoryIndex('token', '3');
$this->assertEquals('one', $idxToken->retrieveRow(0));
$this->assertEquals('two', $idxToken->retrieveRow(1));

$idxFreq = new MemoryIndex('freq', '3');
$this->assertEquals('0*1', $idxFreq->retrieveRow(0)); // one is 1x on page 0
$this->assertEquals('0*2', $idxFreq->retrieveRow(1)); // two is 2x on page 0

$idxRev = new MemoryIndex('reverse');
$this->assertEquals('3*0:3*1:5*0:4*0', $idxRev->retrieveRow(0));

// remove one of the tokens
$tokens = ['two', 'three', 'four', 'two'];
$index->lock();
$index->addEntity('test', $tokens);
$index->unlock();

$idxFreq = new MemoryIndex('freq', '3');
$this->assertEquals('', $idxFreq->retrieveRow(0)); // one is not on page 0
}

/**
* Test reverse lookup
*
* A lookup for the page should return the word frequencies
*/
public function testReverse()
{
$index = new FulltextCollection('page', 'word', 'w', 'pageword');
$index->lock();
$index->addEntity('wiki:syntax', ['dokuwiki']);
$index->unlock();

$len = strlen('dokuwiki');
$this->assertEquals([$len => [0 => 0]], $index->getReverseAssignments('wiki:syntax'));
}
}
98 changes: 98 additions & 0 deletions _test/tests/inc/Search/Collection/TermTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
<?php

namespace tests\Search\Collection;

use dokuwiki\Search\Collection\FulltextCollection;
use dokuwiki\Search\Collection\FulltextCollectionSearch;
use dokuwiki\Search\Collection\Term;
use dokuwiki\Search\Exception\SearchException;
use dokuwiki\Search\Index\MemoryIndex;
use dokuwiki\Search\QueryParser;
use dokuwiki\Search\Tokenizer;

class TermTest extends \DokuWikiTest
{
public function basicExact()
{
$term = new Term('dokuwiki');

$this->assertEquals('dokuwiki', $term->getOriginal());
$this->assertEquals('dokuwiki', $term->getBase());
$this->assertEquals('dokuwiki', $term->getQuoted());
$this->assertEquals(8, $term->getLength());
$this->assertEquals(Term::WILDCARD_NONE, $term->getWildcard());
}

public function testBasicLeftWildcard()
{
$term = new Term('*wiki');

$this->assertEquals('*wiki', $term->getOriginal());
$this->assertEquals('wiki', $term->getBase());
$this->assertEquals('.*wiki', $term->getQuoted());
$this->assertEquals(4, $term->getLength());
$this->assertEquals(Term::WILDCARD_START, $term->getWildcard());
}

public function testBasicRightWildcard()
{
$term = new Term('wiki*');

$this->assertEquals('wiki*', $term->getOriginal());
$this->assertEquals('wiki', $term->getBase());
$this->assertEquals('wiki.*', $term->getQuoted());
$this->assertEquals(4, $term->getLength());
$this->assertEquals(Term::WILDCARD_END, $term->getWildcard());
}

public function testBasicBothWildcard()
{
$term = new Term('*wiki*');

$this->assertEquals('*wiki*', $term->getOriginal());
$this->assertEquals('wiki', $term->getBase());
$this->assertEquals('.*wiki.*', $term->getQuoted());
$this->assertEquals(4, $term->getLength());
$this->assertEquals(Term::WILDCARD_START + Term::WILDCARD_END, $term->getWildcard());
}

public function testBadTerm()
{
$this->expectException(SearchException::class);
$this->expectDeprecationMessageMatches('/short/i');
new Term('');
}

public function testTokenAdding()
{
$term = new Term('*wiki*');
$term->addTokens(8, [0 => 'dokuwiki']);
$term->addTokens(5, [0 => 'wikis', 134 => 'awiki']);

$this->assertEquals(['dokuwiki', 'wikis', 'awiki'], $term->getTokens());

$this->assertEquals([0], $term->getTokenIDsByLength(8));
$this->assertEquals([0, 134], $term->getTokenIDsByLength(5));
$this->assertEquals([], $term->getTokenIDsByLength(3));
}

public function testFrequencyAdding()
{
$term = new Term('dokuwiki');

$term->addEntityFrequency(7, 7);
$term->addEntityFrequency(7, 7);
$term->addEntityFrequency(8, 1);

$this->assertEquals([7 => 14, 8 => 1], $term->getEntityFrequencies());

$map = [
7 => 'page1',
8 => 'page2'
];
$term->resolveEntities($map);

$this->assertEquals(['page1' => 14, 'page2' => 1], $term->getEntityFrequencies());
}

}
72 changes: 72 additions & 0 deletions _test/tests/inc/Search/Index/AbstractIndexTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
<?php

namespace dokuwiki\test\Search\Index;

use dokuwiki\Search\Index\AbstractIndex;

abstract class AbstractIndexTest extends \DokuWikiTest
{

/**
* Return a new writable index
*
* @return AbstractIndex
*/
abstract protected function getIndex();

public function testGetRowID()
{
$index = $this->getIndex();
$result = $index->getRowID('foo');
$index->save();
$this->assertEquals(0, $result);

$result = $index->getRowID('bar');
$index->save();
$this->assertEquals(1, $result);

$result = $index->getRowID('foo');
$index->save();
$this->assertEquals(0, $result);
}

public function testGetRowIDs()
{
$index = $this->getIndex();
$result = $index->getRowIDs(['foo', 'bar', 'baz']);
$index->save();
$this->assertEquals(['foo' => 0, 'bar' => 1, 'baz' => 2], $result);

$result = $index->getRowIDs(['foo', 'bang', 'baz']);
$index->save();
$this->assertEquals(['foo' => 0, 'baz' => 2, 'bang' => 3], $result);
}

public function testRetrieve()
{
$index = $this->getIndex();
$index->getRowIDs(['foo', 'bar', 'baz']); // add data
$index->save();

$this->assertEquals('bar', $index->retrieveRow(1));
$this->assertEquals('', $index->retrieveRow(5)); // non existent, but will be created with padding
$index->save();

// rows up to 5 exist now, 7 does not and is ignored
$this->assertEquals([0 => 'foo', 2 => 'baz', 4 => ''], $index->retrieveRows([0, 2, 4, 7]));
$index->save();
}

public function testSearch()
{
$index = $this->getIndex();
$index->getRowIDs(['foo', 'bar', 'baz', 'bazzel']);
$index->save();

$result = $index->search('/^ba.$/');
$this->assertEquals(
[1 => 'bar', 2 => 'baz'],
$result
);
}
}
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
<?php

use dokuwiki\Search\Index\AbstractIndex;
namespace dokuwiki\test\Search\Index;

use dokuwiki\Search\Index\FileIndex;

class FileIndexTest extends \DokuWikiTest
class FileIndexTest extends AbstractIndexTest
{
/**
* @return AbstractIndex
*/
protected function getIndex() {
protected function getIndex()
{
static $count = 0;
return new FileIndex('index', $count++);
return new FileIndex('index', $count++, true);
}

public function testChangeRow()
{

$index = $this->getIndex();

$index->changeRow(5, 'test');
Expand Down Expand Up @@ -45,27 +43,4 @@ public function testRetrieveRow()
$this->assertEquals(11, count($full));
}

public function testGetRowId()
{
$index = $this->getIndex();
$result = $index->getRowID('foo');
$this->assertEquals(0, $result);

$result = $index->getRowID('bar');
$this->assertEquals(1, $result);

$result = $index->getRowID('foo');
$this->assertEquals(0, $result);
}

public function testGetRowIDs()
{
$index = $this->getIndex();
$result = $index->getRowIDs(['foo', 'bar', 'baz']);
$this->assertEquals(['foo' => 0, 'bar' => 1, 'baz' => 2], $result);

$result = $index->getRowIDs(['foo', 'bang', 'baz']);
$this->assertEquals(['foo' => 0, 'baz' => 2, 'bang' => 3], $result);

}
}