Skip to content
This repository has been archived by the owner on Sep 22, 2023. It is now read-only.

Commit

Permalink
Merge pull request #203 from 30-seconds/search-optimize
Browse files Browse the repository at this point in the history
Optimize search engine, update ranker
Resolves #196
Resolves #206
  • Loading branch information
Chalarangelo committed Nov 15, 2020
2 parents f512e52 + 78f2555 commit 60cfdaa
Show file tree
Hide file tree
Showing 13 changed files with 185 additions and 38 deletions.
Binary file modified assets/icons.woff2
Binary file not shown.
2 changes: 1 addition & 1 deletion content/configs/30python.json
Expand Up @@ -14,5 +14,5 @@
"foreColor": "#ffffff",
"iconName": "python"
},
"biasPenaltyMultiplier": 1.1
"biasPenaltyMultiplier": 1.05
}
2 changes: 1 addition & 1 deletion content/configs/30react.json
Expand Up @@ -19,5 +19,5 @@
"foreColor": "#61dafb",
"iconName": "react"
},
"biasPenaltyMultiplier": 1.3
"biasPenaltyMultiplier": 1.25
}
2 changes: 1 addition & 1 deletion content/sources/30blog
2 changes: 1 addition & 1 deletion content/sources/30code
2 changes: 1 addition & 1 deletion content/sources/30css
Submodule 30css updated from d658f5 to 558148
2 changes: 1 addition & 1 deletion content/sources/30python
2 changes: 1 addition & 1 deletion content/sources/30react
31 changes: 21 additions & 10 deletions src/config/rankingEngine.json
Expand Up @@ -11,53 +11,60 @@
"beginner": 2,
"bind": 2,
"blog": 5,
"boolean": 1,
"border": 2,
"browser": 2,
"button": 1,
"c#": 2,
"cascade": 4,
"center": 1,
"cheatsheet": 2,
"children": 1,
"class": 1,
"color": 1,
"collection": 5,
"collection": 3,
"component": 3,
"comprehension": 5,
"copy": 3,
"css": 4,
"dart": 1,
"date": 4,
"deep": 1,
"delay": 2,
"design": 2,
"destructuring": 4,
"devtools": 2,
"dictionary": 4,
"display": 1,
"effect": 4,
"event": 4,
"explode": 5,
"filter": 5,
"flex": 5,
"foreach": 3,
"format": 2,
"fragment": 2,
"function": 4,
"generator": 2,
"get": 2,
"git": 2,
"go": 1,
"grid": 5,
"group": 2,
"hook": 4,
"hover": 2,
"html": 1,
"index": 1,
"input": 1,
"interactivity": 2,
"intermediate": 4,
"iterator": 3,
"javascript": 5,
"join": 4,
"json": 1,
"json": 3,
"key": 2,
"layout": 4,
"lifecycle": 2,
"list": 4,
"list": 5,
"loop": 3,
"map": 5,
"margin": 3,
Expand All @@ -69,24 +76,26 @@
"overflow": 3,
"padding": 3,
"pattern": 2,
"performance": 1,
"php": 1,
"promise": 5,
"props": 1,
"push": 3,
"python": 3,
"random": 1,
"query": 1,
"random": 2,
"range": 3,
"react": 4,
"recursion": 1,
"reduce": 5,
"redux": 3,
"ref": 2,
"reflection": 3,
"regexp": 2,
"regexp": 3,
"render": 3,
"replace": 3,
"scroll": 1,
"selector": 1,
"seo": 2,
"shadow": 4,
"slice": 2,
"sort": 3,
Expand All @@ -98,13 +107,15 @@
"substring": 3,
"table": 1,
"testing": 3,
"timeout": 1,
"timeout": 2,
"transition": 2,
"type": 1,
"unique": 1,
"url": 1,
"variable": 1,
"visual": 2,
"webdev": 3
"visual": 4,
"webdev": 3,
"whitespace": 1
},
"keywordScoreLimit": 60,
"keywordCountLimit": 14,
Expand Down
131 changes: 120 additions & 11 deletions src/config/searchEngine.json
@@ -1,9 +1,8 @@
{
"stopWords": [
"serverStopWords": [
"a",
"about",
"above",
"after",
"again",
"against",
"all",
Expand All @@ -18,17 +17,12 @@
"be",
"because",
"been",
"before",
"being",
"below",
"between",
"both",
"but",
"by",
"can't",
"cannot",
"check",
"checks",
"could",
"couldn't",
"did",
Expand All @@ -40,7 +34,6 @@
"don't",
"down",
"during",
"each",
"few",
"for",
"from",
Expand Down Expand Up @@ -93,8 +86,6 @@
"of",
"off",
"on",
"once",
"only",
"or",
"other",
"ought",
Expand All @@ -106,7 +97,6 @@
"own",
"return",
"returns",
"same",
"shan't",
"she",
"she'd",
Expand Down Expand Up @@ -178,5 +168,124 @@
"yours",
"yourself",
"yourselves"
],
"clientStopWords": [
"a",
"am",
"an",
"are",
"aren't",
"be",
"been",
"being",
"by",
"can't",
"cannot",
"could",
"couldn't",
"did",
"didn't",
"do",
"does",
"doesn't",
"doing",
"don't",
"for",
"had",
"hadn't",
"has",
"hasn't",
"have",
"haven't",
"having",
"he",
"he'd",
"he'll",
"he's",
"her",
"hers",
"herself",
"him",
"himself",
"his",
"i",
"i'd",
"i'll",
"i'm",
"i've",
"if",
"is",
"isn't",
"it",
"it's",
"its",
"itself",
"let's",
"me",
"mustn't",
"my",
"myself",
"no",
"nor",
"of",
"or",
"other",
"ought",
"our",
"ours",
"ourselves",
"shan't",
"she",
"she'd",
"she'll",
"she's",
"should",
"shouldn't",
"so",
"such",
"that",
"that's",
"the",
"their",
"theirs",
"them",
"themselves",
"then",
"there",
"there's",
"these",
"they",
"they'd",
"they'll",
"they're",
"they've",
"those",
"through",
"too",
"was",
"wasn't",
"we",
"we'd",
"we'll",
"we're",
"we've",
"were",
"weren't",
"who",
"who's",
"whom",
"with",
"won't",
"would",
"wouldn't",
"you",
"you'd",
"you'll",
"you're",
"you've",
"your",
"yours",
"yourself",
"yourselves"
]
}
23 changes: 18 additions & 5 deletions src/engines/searchIndexingEngine.js
@@ -1,5 +1,5 @@
import searchEngineConfig from 'config/searchEngine';
const { stopWords } = searchEngineConfig;
const { serverStopWords, clientStopWords } = searchEngineConfig;

// Standard suffix manipulations.
const step2list = {
Expand Down Expand Up @@ -172,21 +172,34 @@ const stem = str => {
* Removes stop words from an array of words:
* - Use the list of stop words to remove stop words from the given array
*/
const cleanStopWords = words =>
const cleanStopWords = (stopWords, words) =>
words
.filter(tkn => !stopWords.includes(tkn));

const cleanServerStopWords = words => cleanStopWords(serverStopWords, words);
const cleanClientStopWords = words => cleanStopWords(clientStopWords, words);

/**
* Deduplicates a list of tokens.
*/
const deduplicateTokens = tokens => [...new Set(tokens)];
const deduplicateTokens = tokens => [...new Set(tokens.map(t => t.replace(/['-]$/, '')))];

/**
* Given a string, produce a list of tokens.
* Given a string, produce a list of tokens (server-side variant).
*/
const parseTokens = str =>
deduplicateTokens(
cleanStopWords(tokenize(str)).map(tkn => stem(tkn))
cleanServerStopWords(tokenize(str)).map(tkn => stem(tkn))
).filter(tkn =>
!!tkn && tkn.length > 1 && !/^-?\d+$/i.test(tkn) && !/^[()[\]$^.;:|\\/%&*#@!%,"'~`\-+=]+$/i.test(tkn)
);

/**
* Given a string, produce a list of tokens (client-side variant).
*/
export const quickParseTokens = str =>
deduplicateTokens(
cleanClientStopWords(tokenize(str)).map(tkn => stem(tkn))
).filter(tkn =>
!!tkn && tkn.length > 1 && !/^-?\d+$/i.test(tkn) && !/^[()[\]$^.;:|\\/%&*#@!%,"'~`\-+=]+$/i.test(tkn)
);
Expand Down
12 changes: 10 additions & 2 deletions src/engines/searchIndexingEngine.test.js
@@ -1,9 +1,17 @@
import searchIndexingEngine from './searchIndexingEngine';
import searchIndexingEngine, { quickParseTokens as clientSearchEngine } from './searchIndexingEngine';

describe('searchIndexingEngine', () => {
it('parses the tokens from the given string', () => {
const str = 'Creates an object with the same values as the provided object and keys generated by running the provided function for each key. Use Object.keys(obj) to iterate over the object\'s keys. Use Array.prototype.reduce() to create a new object with the same values and mapped keys using fn. Additionally, yeet caress seed recreational practicality';
const result = ['creat', 'object', 'valu', 'provid', 'kei', 'gener', 'run', 'function', 'us', 'obj', 'iter', "object'", 'arrai', 'prototyp', 'reduc', 'new', 'map', 'fn', 'addition', 'yeet', 'caress', 'seed', 'recreat', 'practic'];
const result = ['creat', 'object', 'same', 'valu', 'provid', 'kei', 'gener', 'run', 'function', 'each', 'us', 'obj', 'iter', 'arrai', 'prototyp', 'reduc', 'new', 'map', 'fn', 'addition', 'yeet', 'caress', 'seed', 'recreat', 'practic'];
expect(searchIndexingEngine(str)).toEqual(result);
});
});

describe('clientSearchEngine', () => {
it('parses the tokens from the given string', () => {
const str = 'Creates an object with the same values as the provided object and keys generated by running the provided function for each key. Use Object.keys(obj) to iterate over the object\'s keys. Use Array.prototype.reduce() to create a new object with the same values and mapped keys using fn. Additionally, yeet caress seed recreational practicality';
const result = ['creat', 'object', 'same', 'valu', 'as', 'provid', 'and', 'kei', 'gener', 'run', 'function', 'each', 'us', 'obj', 'to', 'iter', 'over', 'arrai', 'prototyp', 'reduc', 'new', 'map', 'fn', 'addition', 'yeet', 'caress', 'seed', 'recreat', 'practic'];
expect(clientSearchEngine(str)).toEqual(result);
});
});

0 comments on commit 60cfdaa

Please sign in to comment.