Skip to content

Commit

Permalink
Add scoring algorithm for history suggestions (extracted from BSK)
Browse files Browse the repository at this point in the history
  • Loading branch information
CrisBarreiro committed Apr 10, 2024
1 parent 461d2f9 commit 5b68c24
Show file tree
Hide file tree
Showing 2 changed files with 197 additions and 0 deletions.
101 changes: 101 additions & 0 deletions app/src/main/java/com/duckduckgo/app/autocomplete/api/AutoComplete.kt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package com.duckduckgo.app.autocomplete.api

import android.net.Uri
import androidx.core.net.toUri
import com.duckduckgo.app.autocomplete.api.AutoComplete.AutoCompleteResult
import com.duckduckgo.app.autocomplete.api.AutoComplete.AutoCompleteSuggestion.AutoCompleteBookmarkSuggestion
Expand All @@ -30,6 +31,7 @@ import com.duckduckgo.savedsites.api.models.SavedSite.Bookmark
import com.squareup.anvil.annotations.ContributesBinding
import io.reactivex.Observable
import javax.inject.Inject
import org.jetbrains.annotations.VisibleForTesting

interface AutoComplete {
fun autoComplete(query: String): Observable<AutoCompleteResult>
Expand Down Expand Up @@ -143,6 +145,105 @@ class AutoCompleteApi @Inject constructor(
return favorites.asSequence().sortByRank(query)
}

@VisibleForTesting
fun score(
title: String?,
url: Uri,
visitCount: Int,
query: String,
queryTokens: List<String>? = null,
): Int {
// To optimize, query tokens can be precomputed
val tokens = queryTokens ?: tokensFrom(query)

var score = 0
val lowercasedTitle = title?.lowercase() ?: ""
val queryCount = query.count()
val nakedUrl = url.naked()
val domain = url.host?.removePrefix("www.") ?: ""

// Full matches
if (nakedUrl.startsWith(query)) {
score += 300
// Prioritize root URLs most
if (url.isRoot()) score += 2000
} else if (lowercasedTitle.startsWith(query)) {
score += 200
if (url.isRoot()) score += 2000
} else if (queryCount > 2 && domain.contains(query)) {
score += 150
} else if (queryCount > 2 && lowercasedTitle.contains(" $query")) { // Exact match from the beginning of the word within string.
score += 100
} else {
// Tokenized matches
if (tokens.size > 1) {
var matchesAllTokens = true
for (token in tokens) {
// Match only from the beginning of the word to avoid unintuitive matches.
if (!lowercasedTitle.startsWith(token) && !lowercasedTitle.contains(" $token") && !nakedUrl.startsWith(token)) {
matchesAllTokens = false
break
}
}

if (matchesAllTokens) {
// Score tokenized matches
score += 10

// Boost score if first token matches:
val firstToken = tokens.firstOrNull()
if (firstToken != null) { // nakedUrlString - high score boost
if (nakedUrl.startsWith(firstToken)) {
score += 70
} else if (lowercasedTitle.startsWith(firstToken)) { // beginning of the title - moderate score boost
score += 50
}
}
}
}
}

if (score > 0) {
// Second sort based on visitCount
score *= 1000
score += visitCount
}

return score
}

private fun Uri.isRoot(): Boolean {
return (path.isNullOrEmpty() || path == "/") &&
query == null &&
fragment == null &&
userInfo == null
}

@VisibleForTesting
fun tokensFrom(query: String): List<String> {
return query
.split(Regex("\\s+"))
.filter { it.isNotEmpty() }
.map { it.lowercase() }
}

private fun Uri.naked(): String {
if (host == null) {
return toString().removePrefix("//")
}

val builder = buildUpon()

builder.scheme(null)
builder.authority(host!!.removePrefix("www."))

if (path?.lastOrNull() == '/') {
builder.path(path!!.dropLast(1))
}

return builder.build().toString().removePrefix("//")
}

private fun Sequence<SavedSite>.sortByRank(query: String): List<SavedSite> {
return this.map { RankedBookmark(savedSite = it) }
.map { scoreTitle(it, query) }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package com.duckduckgo.app.autocomplete.api

import androidx.core.net.toUri
import androidx.test.ext.junit.runners.AndroidJUnit4
import com.duckduckgo.app.autocomplete.api.AutoComplete.AutoCompleteResult
import com.duckduckgo.app.autocomplete.api.AutoComplete.AutoCompleteSuggestion
Expand Down Expand Up @@ -542,6 +543,101 @@ class AutoCompleteApiTest {
)
}

@Test
fun testWhenQueryIsJustWhitespaces_ThenTokensAreEmpty() {
val query = " \t\n\t\t \t \t \n\n\n "
val tokens = testee.tokensFrom(query)

assertEquals(0, tokens.size)
}

@Test
fun testWhenQueryContainsTabsOrNewlines_ThenResultIsTheSameAsIfThereAreSpaces() {
val spaceQuery = "testing query tokens"
val tabQuery = "testing\tquery\ttokens"
val newlineQuery = "testing\nquery\ntokens"
val spaceTokens = testee.tokensFrom(spaceQuery)
val tabTokens = testee.tokensFrom(tabQuery)
val newlineTokens = testee.tokensFrom(newlineQuery)

assertEquals(listOf("testing", "query", "tokens"), spaceTokens)
assertEquals(spaceTokens, tabTokens)
assertEquals(spaceTokens, newlineTokens)
}

@Test
fun testWhenURLMatchesWithQuery_ThenScoreIsIncreased() {
val query = "testcase.com/no"
val score = testee.score(
"Test case website",
"https://www.testcase.com/notroot".toUri(),
100,
query,
)

assertTrue(score > 0)
}

@Test
fun testWhenTitleMatchesFromTheBeginning_ThenScoreIsIncreased() {
val query = "test"
val score1 = testee.score(
"Test case website",
"https://www.website.com".toUri(),
100,
query,
)

val score2 = testee.score(
"Case test website 2",
"https://www.website2.com".toUri(),
100,
query,
)

assertTrue(score1 > score2)
}

@Test
fun testWhenDomainMatchesFromTheBeginning_ThenScoreIsIncreased() {
val query = "test"
val score1 = testee.score(
"Website",
"https://www.test.com".toUri(),
100,
query,
)

val score2 = testee.score(
"Website 2",
"https://www.websitetest.com".toUri(),
100,
query,
)

assertTrue(score1 > score2)
}

@Test
fun testWhenThereIsMoreVisitCount_ThenScoreIsIncreased() {
val query = "website"
val score1 = testee.score(
"Website",
"https://www.website.com".toUri(),
100,
query,
)

val score2 = testee.score(
"Website 2",
"https://www.website2.com".toUri(),
101,
query,
)

assertTrue(score1 < score2)
}

private fun favorite(
id: String = UUID.randomUUID().toString(),
title: String = "title",
Expand Down

0 comments on commit 5b68c24

Please sign in to comment.