Skip to content

Commit

Permalink
Improve performance on whitespace-heavy AFF files.
Browse files Browse the repository at this point in the history
Rather than stripping the comments from all lines of an AFF file at once, handle the lines one at a time.

Additionally, use the native .trim() instead of running our own trimming regular expression. We were already using .trim() in other places, so browser compatibility shouldn't be an issue.
  • Loading branch information
cfinke committed Oct 19, 2021
1 parent 9d40304 commit 9955cf1
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 32 deletions.
16 changes: 7 additions & 9 deletions tests/general.js
Expand Up @@ -7,16 +7,14 @@ function run() {
deepEqual(empty_dict.dictionary, null);
});

test("Comments are removed from affix files", function () {
equal(empty_dict._removeAffixComments("# abc\ndef # ghi\n # jkl\nmnop qrst\n##"), "def # ghi\nmnop qrst", "Comment-only lines are removed.");
equal(empty_dict._removeAffixComments(""), "", "Handles empty input.");
test("Comments are removed from affix file lines", function () {
equal(empty_dict._removeAffixComments("# abc"), "", "Comment-only lines are removed.");
equal(empty_dict._removeAffixComments("def # ghi"), "def # ghi", "Lines that don't begin with comments are not modified");
equal(empty_dict._removeAffixComments(" # jkl"), "", "Comment-only lines beginning with whitespace are removed.");
equal(empty_dict._removeAffixComments("mnop qrst"), "mnop qrst", "Lines with no comments are not modified.");
equal(empty_dict._removeAffixComments("##"), "", "Comment-only lines are removed.");
equal(empty_dict._removeAffixComments(""), "", "Empty lines are not modified.");
equal(empty_dict._removeAffixComments("abc"), "abc", "Handles input that doesn't need changing.");
equal(empty_dict._removeAffixComments(" abc"), "abc", "Leading whitespace is removed.");
equal(empty_dict._removeAffixComments(" abc "), "abc", "Leading and trailing whitespace is removed.");
equal(empty_dict._removeAffixComments("\n\n\abc\n"), "abc", "Leading and trailing newlines are removed.");
equal(empty_dict._removeAffixComments("\n\n"), "", "Consecutive newlines are removed.");
equal(empty_dict._removeAffixComments("\t"), "", "Tabs are treated as whitespace.");
equal(empty_dict._removeAffixComments("\n\t \t\n\n"), "", "All whitespace is treated the same.");
});

test("_readFile can load a file synchronously", function() {
Expand Down
2 changes: 1 addition & 1 deletion typo/package.json
@@ -1,6 +1,6 @@
{
"name": "typo-js",
"version": "1.2.0",
"version": "1.2.1",
"description": "A Hunspell-style spellchecker.",
"main": "typo.js",
"repository": {
Expand Down
38 changes: 16 additions & 22 deletions typo/typo.js
Expand Up @@ -287,13 +287,16 @@ Typo.prototype = {
var line, subline, numEntries, lineParts;
var i, j, _len, _jlen;

// Remove comment lines
data = this._removeAffixComments(data);

var lines = data.split(/\r?\n/);

for (i = 0, _len = lines.length; i < _len; i++) {
line = lines[i];
// Remove comment lines
line = this._removeAffixComments(lines[i]);
line = line.trim();

if ( ! line ) {
continue;
}

var definitionParts = line.split(/\s+/);

Expand Down Expand Up @@ -385,30 +388,21 @@ Typo.prototype = {
},

/**
* Removes comment lines and then cleans up blank lines and trailing whitespace.
* Removes comments.
*
* @param {String} data The data from an affix file.
* @return {String} The cleaned-up data.
* @param {String} data A line from an affix file.
* @return {String} The cleaned-up line.
*/

_removeAffixComments : function (data) {
// Remove comments
_removeAffixComments : function (line) {
// This used to remove any string starting with '#' up to the end of the line,
// but some COMPOUNDRULE definitions include '#' as part of the rule.
// I haven't seen any affix files that use comments on the same line as real data,
// so I don't think this will break anything.
data = data.replace(/^\s*#.*$/mg, "");

// Trim each line
data = data.replace(/^\s\s*/m, '').replace(/\s\s*$/m, '');

// Remove blank lines.
data = data.replace(/\n{2,}/g, "\n");

// Trim the entire string
data = data.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
// So, only remove lines that begin with a comment, optionally preceded by whitespace.
if ( line.match( /^\s*#/, "" ) ) {
return '';
}

return data;
return line;
},

/**
Expand Down

0 comments on commit 9955cf1

Please sign in to comment.