Skip to content

Commit

Permalink
Tokens in Evaluator Tests (#730)
Browse files Browse the repository at this point in the history
  • Loading branch information
javipacheco committed Apr 30, 2024
1 parent aab8e49 commit 2227379
Show file tree
Hide file tree
Showing 11 changed files with 123 additions and 193 deletions.
57 changes: 46 additions & 11 deletions core/src/commonMain/kotlin/com/xebia/functional/xef/llm/Chat.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@ package com.xebia.functional.xef.llm

import com.xebia.functional.openai.generated.api.Chat
import com.xebia.functional.openai.generated.model.CreateChatCompletionRequest
import com.xebia.functional.openai.generated.model.CreateChatCompletionResponse
import com.xebia.functional.openai.generated.model.CreateChatCompletionResponseChoicesInner
import com.xebia.functional.xef.AIError
import com.xebia.functional.xef.conversation.AiDsl
import com.xebia.functional.xef.conversation.Conversation
import com.xebia.functional.xef.llm.models.MessageWithUsage
import com.xebia.functional.xef.llm.models.MessagesUsage
import com.xebia.functional.xef.llm.models.MessagesWithUsage
import com.xebia.functional.xef.prompt.Prompt
import com.xebia.functional.xef.prompt.PromptBuilder
import com.xebia.functional.xef.store.Memory
import kotlinx.coroutines.flow.*

@AiDsl
Expand Down Expand Up @@ -54,9 +60,34 @@ suspend fun Chat.promptMessage(prompt: Prompt, scope: Conversation = Conversatio
suspend fun Chat.promptMessages(
prompt: Prompt,
scope: Conversation = Conversation()
): List<String> =
): List<String> = promptResponse(prompt, scope) { it.message.content }.first

@AiDsl
suspend fun Chat.promptMessageAndUsage(
prompt: Prompt,
scope: Conversation = Conversation()
): MessageWithUsage {
val response = promptMessagesAndUsage(prompt, scope)
val message = response.messages.firstOrNull() ?: throw AIError.NoResponse()
return MessageWithUsage(message, response.usage)
}

@AiDsl
suspend fun Chat.promptMessagesAndUsage(
prompt: Prompt,
scope: Conversation = Conversation()
): MessagesWithUsage {
val response = promptResponse(prompt, scope) { it.message.content }
return MessagesWithUsage(response.first, response.second.usage?.let { MessagesUsage(it) })
}

private suspend fun <T> Chat.promptResponse(
prompt: Prompt,
scope: Conversation = Conversation(),
block: suspend Chat.(CreateChatCompletionResponseChoicesInner) -> T?
): Pair<List<T>, CreateChatCompletionResponse> =
scope.metric.promptSpan(prompt) {
val promptMemories = prompt.messages.toMemory(scope)
val promptMemories: List<Memory> = prompt.messages.toMemory(scope)
val adaptedPrompt = PromptCalculator.adaptPromptToConversationAndModel(prompt, scope)

adaptedPrompt.addMetrics(scope)
Expand All @@ -72,13 +103,17 @@ suspend fun Chat.promptMessages(
seed = adaptedPrompt.configuration.seed,
)

createChatCompletion(request)
.addMetrics(scope)
.choices
.addChoiceToMemory(
scope,
promptMemories,
prompt.configuration.messagePolicy.addMessagesToConversation
)
.mapNotNull { it.message.content }
val createResponse: CreateChatCompletionResponse = createChatCompletion(request)
Pair(
createResponse
.addMetrics(scope)
.choices
.addChoiceToMemory(
scope,
promptMemories,
prompt.configuration.messagePolicy.addMessagesToConversation
)
.mapNotNull { block(it) },
createResponse
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.xebia.functional.xef.llm.models

import com.xebia.functional.openai.generated.model.CompletionUsage

data class MessagesWithUsage(val messages: List<String>, val usage: MessagesUsage?)

data class MessageWithUsage(val message: String, val usage: MessagesUsage?)

data class MessagesUsage(val completionTokens: Int, val promptTokens: Int, val totalTokens: Int) {
companion object {
operator fun invoke(usage: CompletionUsage) =
MessagesUsage(
completionTokens = usage.completionTokens,
promptTokens = usage.promptTokens,
totalTokens = usage.totalTokens
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ data class SuiteSpec(
output.description.value,
item.context,
output.value,
output.tokens,
classification,
success.contains(classification)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,12 @@ value class Html(val value: String) {
const outputDiv = document.createElement('pre');
outputDiv.classList.add('output');
outputDiv.innerText = 'Output: ' + test.output;
outputDiv.addEventListener('click', function() {
this.classList.toggle('expanded');
});
blockDiv.appendChild(outputDiv);
const usageDiv = document.createElement('pre');
usageDiv.classList.add('output');
usageDiv.innerText = 'Usage: \n Completion Tokens: ' + test.usage?.completionTokens + '\n Prompt Tokens: ' + test.usage?.promptTokens + '\n Total Tokens: ' + test.usage?.totalTokens;
blockDiv.appendChild(usageDiv);
const result = document.createElement('div');
result.classList.add('score', test.success ? 'score-passed' : 'score-failed');
Expand Down Expand Up @@ -123,16 +125,11 @@ value class Html(val value: String) {
.output {
color: #666;
cursor: pointer;
white-space: nowrap;
white-space: normal;
overflow: hidden;
text-overflow: ellipsis;
}
.output.expanded {
white-space: normal;
}
.score {
font-weight: bold;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ data class OutputResult<E>(
val description: String,
val contextDescription: String,
val output: String,
val usage: OutputTokens?,
val result: E,
val success: Boolean
) where E : AI.PromptClassifier, E : Enum<E>
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ value class Markdown(val value: String) {
|<blockquote>
|${outputResult.output}
|</blockquote>
|- Usage:
|<blockquote>
|${outputResult.usage?.let { usage ->
"""
|Completion Tokens: ${usage.completionTokens}
|Prompt Tokens: ${usage.promptTokens}
|Total Tokens: ${usage.totalTokens}
""".trimMargin()
} ?: "No usage information available"}
|</blockquote>
|
|Result: ${if (outputResult.success) "✅ Success" else "❌ Failure"} (${outputResult.result})
""".trimMargin()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,39 @@
package com.xebia.functional.xef.evaluator.models

import com.xebia.functional.xef.llm.models.MessageWithUsage
import com.xebia.functional.xef.llm.models.MessagesUsage
import kotlin.jvm.JvmSynthetic
import kotlinx.serialization.Serializable

@Serializable data class OutputDescription(val value: String)

@Serializable
data class OutputResponse(val description: OutputDescription, val value: String) {
data class OutputResponse(
val description: OutputDescription,
val tokens: OutputTokens?,
val value: String
) {
companion object {
@JvmSynthetic
suspend operator fun invoke(
description: OutputDescription,
block: suspend () -> String
): OutputResponse = OutputResponse(description, block())
block: suspend () -> MessageWithUsage
): OutputResponse {
val response = block()
return OutputResponse(description, response.usage?.let { OutputTokens(it) }, response.message)
}
}
}

@Serializable
data class OutputTokens(
val promptTokens: Int? = null,
val completionTokens: Int? = null,
val totalTokens: Int? = null
) {
companion object {
@JvmSynthetic
operator fun invoke(usage: MessagesUsage): OutputTokens =
OutputTokens(usage.promptTokens, usage.completionTokens, usage.totalTokens)
}
}
13 changes: 0 additions & 13 deletions evaluator/src/main/resources/web/index.html

This file was deleted.

65 changes: 0 additions & 65 deletions evaluator/src/main/resources/web/script.js

This file was deleted.

87 changes: 0 additions & 87 deletions evaluator/src/main/resources/web/style.css

This file was deleted.

0 comments on commit 2227379

Please sign in to comment.