Skip to content

Commit

Permalink
Extract scoring logic out of Autocomplete
Browse files Browse the repository at this point in the history
  • Loading branch information
CrisBarreiro committed May 16, 2024
1 parent c2caede commit 38500c8
Show file tree
Hide file tree
Showing 7 changed files with 276 additions and 203 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import com.duckduckgo.app.autocomplete.api.AutoComplete.AutoCompleteSuggestion.A
import com.duckduckgo.app.autocomplete.api.AutoComplete.AutoCompleteSuggestion.AutoCompleteHistoryRelatedSuggestion.AutoCompleteHistorySearchSuggestion
import com.duckduckgo.app.autocomplete.api.AutoComplete.AutoCompleteSuggestion.AutoCompleteSearchSuggestion
import com.duckduckgo.app.autocomplete.api.AutoCompleteApi
import com.duckduckgo.app.autocomplete.api.AutoCompleteScorer
import com.duckduckgo.app.autocomplete.api.AutoCompleteService
import com.duckduckgo.app.browser.LongPressHandler.RequiredAction
import com.duckduckgo.app.browser.LongPressHandler.RequiredAction.DownloadFile
Expand Down Expand Up @@ -307,6 +308,9 @@ class BrowserTabViewModelTest {
@Mock
private lateinit var mockAutoCompleteService: AutoCompleteService

@Mock
private lateinit var mockAutoCompleteScorer: AutoCompleteScorer

@Mock
private lateinit var mockWidgetCapabilities: WidgetCapabilities

Expand Down Expand Up @@ -476,7 +480,7 @@ class BrowserTabViewModelTest {
fireproofWebsiteDao = db.fireproofWebsiteDao()
locationPermissionsDao = db.locationPermissionsDao()

mockAutoCompleteApi = AutoCompleteApi(mockAutoCompleteService, mockSavedSitesRepository, mockNavigationHistory)
mockAutoCompleteApi = AutoCompleteApi(mockAutoCompleteService, mockSavedSitesRepository, mockNavigationHistory, mockAutoCompleteScorer)
val fireproofWebsiteRepositoryImpl = FireproofWebsiteRepositoryImpl(
fireproofWebsiteDao,
coroutineRule.testDispatcherProvider,
Expand Down
108 changes: 2 additions & 106 deletions app/src/main/java/com/duckduckgo/app/autocomplete/api/AutoComplete.kt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

package com.duckduckgo.app.autocomplete.api

import android.net.Uri
import androidx.core.net.toUri
import com.duckduckgo.app.autocomplete.api.AutoComplete.AutoCompleteResult
import com.duckduckgo.app.autocomplete.api.AutoComplete.AutoCompleteSuggestion
Expand All @@ -42,7 +41,6 @@ import io.reactivex.Observable
import java.io.InterruptedIOException
import javax.inject.Inject
import kotlin.math.max
import org.jetbrains.annotations.VisibleForTesting

const val maximumNumberOfSuggestions = 12
const val maximumNumberOfTopHits = 2
Expand Down Expand Up @@ -90,6 +88,7 @@ class AutoCompleteApi @Inject constructor(
private val autoCompleteService: AutoCompleteService,
private val repository: SavedSitesRepository,
private val navigationHistory: NavigationHistory,
private val autoCompleteScorer: AutoCompleteScorer,
) : AutoComplete {

override fun autoComplete(query: String): Observable<AutoCompleteResult> {
Expand Down Expand Up @@ -227,105 +226,6 @@ class AutoCompleteApi @Inject constructor(
return history.asSequence().sortHistoryByRank(query)
}

@VisibleForTesting
fun score(
title: String?,
url: Uri,
visitCount: Int,
query: String,
queryTokens: List<String>? = null,
): Int {
// To optimize, query tokens can be precomputed
val tokens = queryTokens ?: tokensFrom(query)

var score = DEFAULT_SCORE
val lowercasedTitle = title?.lowercase() ?: ""
val queryCount = query.count()
val nakedUrl = url.naked()
val domain = url.host?.removePrefix("www.") ?: ""

// Full matches
if (nakedUrl.startsWith(query)) {
score += 300
// Prioritize root URLs most
if (url.isRoot()) score += 2000
} else if (lowercasedTitle.startsWith(query)) {
score += 200
if (url.isRoot()) score += 2000
} else if (queryCount > 2 && domain.contains(query)) {
score += 150
} else if (queryCount > 2 && lowercasedTitle.contains(" $query")) { // Exact match from the beginning of the word within string.
score += 100
} else {
// Tokenized matches
if (tokens.size > 1) {
var matchesAllTokens = true
for (token in tokens) {
// Match only from the beginning of the word to avoid unintuitive matches.
if (!lowercasedTitle.startsWith(token) && !lowercasedTitle.contains(" $token") && !nakedUrl.startsWith(token)) {
matchesAllTokens = false
break
}
}

if (matchesAllTokens) {
// Score tokenized matches
score += 10

// Boost score if first token matches:
val firstToken = tokens.firstOrNull()
if (firstToken != null) { // nakedUrlString - high score boost
if (nakedUrl.startsWith(firstToken)) {
score += 70
} else if (lowercasedTitle.startsWith(firstToken)) { // beginning of the title - moderate score boost
score += 50
}
}
}
}
}

if (score > 0) {
// Second sort based on visitCount
score *= 1000
score += visitCount
}

return score
}

private fun Uri.isRoot(): Boolean {
return (path.isNullOrEmpty() || path == "/") &&
query == null &&
fragment == null &&
userInfo == null
}

@VisibleForTesting
fun tokensFrom(query: String): List<String> {
return query
.split(Regex("\\s+"))
.filter { it.isNotEmpty() }
.map { it.lowercase() }
}

private fun Uri.naked(): String {
if (host == null) {
return toString().removePrefix("//")
}

val builder = buildUpon()

builder.scheme(null)
builder.authority(host!!.removePrefix("www."))

if (path?.lastOrNull() == '/') {
builder.path(path!!.dropLast(1))
}

return builder.build().toString().removePrefix("//")
}

private fun Sequence<SavedSite>.sortByRank(query: String): List<RankedSuggestion<AutoCompleteBookmarkSuggestion>> {
return this.map { savedSite ->
RankedSuggestion(
Expand Down Expand Up @@ -362,7 +262,7 @@ class AutoCompleteApi @Inject constructor(
)
}
}.let { suggestion ->
RankedSuggestion(suggestion, score(entry.title, entry.url, entry.visits.size, query))
RankedSuggestion(suggestion, autoCompleteScorer.score(entry.title, entry.url, entry.visits.size, query))
}
}.filter { it.score > 0 }
.toList()
Expand Down Expand Up @@ -417,10 +317,6 @@ class AutoCompleteApi @Inject constructor(
return this.toUri().toStringDropScheme().removePrefix("www.")
}

companion object {
private const val DEFAULT_SCORE = -1
}

private data class RankedSuggestion<T : AutoCompleteSuggestion> (
val suggestion: T,
val score: Int = DEFAULT_SCORE,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Copyright (c) 2024 DuckDuckGo
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.duckduckgo.app.autocomplete.api

import android.net.Uri
import com.duckduckgo.di.scopes.AppScope
import com.squareup.anvil.annotations.ContributesBinding
import javax.inject.Inject

interface AutoCompleteScorer {
fun score(
title: String?,
url: Uri,
visitCount: Int,
query: String,
queryTokens: List<String>? = null,
): Int
}

@ContributesBinding(AppScope::class)
class RealAutoCompleteScorer @Inject constructor() : AutoCompleteScorer {
override fun score(
title: String?,
url: Uri,
visitCount: Int,
query: String,
queryTokens: List<String>?,
): Int {
// To optimize, query tokens can be precomputed
val tokens = queryTokens ?: query.tokensFrom()

var score = DEFAULT_SCORE
val lowercasedTitle = title?.lowercase() ?: ""
val queryCount = query.count()
val nakedUrl = url.naked()
val domain = url.host?.removePrefix("www.") ?: ""

// Full matches
if (nakedUrl.startsWith(query)) {
score += 300
// Prioritize root URLs most
if (url.isRoot()) score += 2000
} else if (lowercasedTitle.startsWith(query)) {
score += 200
if (url.isRoot()) score += 2000
} else if (queryCount > 2 && domain.contains(query)) {
score += 150
} else if (queryCount > 2 && lowercasedTitle.contains(" $query")) { // Exact match from the beginning of the word within string.
score += 100
} else {
// Tokenized matches
if (tokens.size > 1) {
var matchesAllTokens = true
for (token in tokens) {
// Match only from the beginning of the word to avoid unintuitive matches.
if (!lowercasedTitle.startsWith(token) && !lowercasedTitle.contains(" $token") && !nakedUrl.startsWith(token)) {
matchesAllTokens = false
break
}
}

if (matchesAllTokens) {
// Score tokenized matches
score += 10

// Boost score if first token matches:
val firstToken = tokens.firstOrNull()
if (firstToken != null) { // nakedUrlString - high score boost
if (nakedUrl.startsWith(firstToken)) {
score += 70
} else if (lowercasedTitle.startsWith(firstToken)) { // beginning of the title - moderate score boost
score += 50
}
}
}
}
}

if (score > 0) {
// Second sort based on visitCount
score *= 1000
score += visitCount
}

return score
}

private fun Uri.naked(): String {
if (host == null) {
return toString().removePrefix("//")
}

val builder = buildUpon()

builder.scheme(null)
builder.authority(host!!.removePrefix("www."))

if (path?.lastOrNull() == '/') {
builder.path(path!!.dropLast(1))
}

return builder.build().toString().removePrefix("//")
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright (c) 2024 DuckDuckGo
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.duckduckgo.app.autocomplete.api

import android.net.Uri

fun Uri.isRoot(): Boolean {
return (path.isNullOrEmpty() || path == "/") &&
query == null &&
fragment == null &&
userInfo == null
}

fun String.tokensFrom(): List<String> {
return this
.split(Regex("\\s+"))
.filter { it.isNotEmpty() }
.map { it.lowercase() }
}

const val DEFAULT_SCORE = -1

0 comments on commit 38500c8

Please sign in to comment.