From a1c74ae3b0d8d482d2fb29796a590543676bfed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=CC=81=20Carlos=20Montan=CC=83ez?= Date: Thu, 21 Mar 2024 16:03:08 +0100 Subject: [PATCH 1/3] added classifier and sample metric --- .../kotlin/com/xebia/functional/xef/AI.kt | 39 +++++++++++++++++++ evaluator/build.gradle.kts | 1 + .../xef/evaluator/metrics/AnswerAccuracy.kt | 27 +++++++++++++ .../xef/evaluator/models/Metrics.kt | 7 ++++ examples/build.gradle.kts | 1 + .../xef/dsl/classify/AnswerAccuracy.kt | 19 +++++++++ 6 files changed, 94 insertions(+) create mode 100644 evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/metrics/AnswerAccuracy.kt create mode 100644 evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/models/Metrics.kt create mode 100644 examples/src/main/kotlin/com/xebia/functional/xef/dsl/classify/AnswerAccuracy.kt diff --git a/core/src/commonMain/kotlin/com/xebia/functional/xef/AI.kt b/core/src/commonMain/kotlin/com/xebia/functional/xef/AI.kt index 9ed01a68a..714311ead 100644 --- a/core/src/commonMain/kotlin/com/xebia/functional/xef/AI.kt +++ b/core/src/commonMain/kotlin/com/xebia/functional/xef/AI.kt @@ -9,6 +9,7 @@ import com.xebia.functional.xef.conversation.AiDsl import com.xebia.functional.xef.conversation.Conversation import com.xebia.functional.xef.llm.fromEnvironment import com.xebia.functional.xef.prompt.Prompt +import kotlin.coroutines.cancellation.CancellationException import kotlin.reflect.KClass import kotlin.reflect.KType import kotlin.reflect.typeOf @@ -20,6 +21,10 @@ import kotlinx.serialization.serializer sealed interface AI { + interface PromptClassifier { + fun template(input: String, output: String, context: String): String + } + companion object { fun chat( @@ -65,6 +70,40 @@ sealed interface AI { } .invoke(prompt) + /** + * Classify a prompt using a given enum. + * + * @param input The input to the model. + * @param output The output to the model. + * @param context The context to the model. + * @param model The model to use. + * @param target The target type to return. + * @param api The chat API to use. + * @param conversation The conversation to use. + * @return The classified enum. + * @throws IllegalArgumentException If no enum values are found. + */ + @AiDsl + @Throws(IllegalArgumentException::class, CancellationException::class) + suspend inline fun classify( + input: String, + output: String, + context: String, + model: CreateChatCompletionRequestModel = CreateChatCompletionRequestModel.gpt_4_1106_preview, + target: KType = typeOf(), + api: ChatApi = fromEnvironment(::ChatApi), + conversation: Conversation = Conversation() + ): E where E : PromptClassifier, E : Enum { + val value = enumValues().firstOrNull() ?: error("No enum values found") + return invoke( + prompt = value.template(input, output, context), + model = model, + target = target, + api = api, + conversation = conversation + ) + } + @AiDsl suspend inline operator fun invoke( prompt: String, diff --git a/evaluator/build.gradle.kts b/evaluator/build.gradle.kts index db9b3ab51..eb3355939 100644 --- a/evaluator/build.gradle.kts +++ b/evaluator/build.gradle.kts @@ -18,6 +18,7 @@ java { dependencies { api(libs.kotlinx.serialization.json) detektPlugins(project(":detekt-rules")) + implementation(projects.xefCore) } detekt { diff --git a/evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/metrics/AnswerAccuracy.kt b/evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/metrics/AnswerAccuracy.kt new file mode 100644 index 000000000..0a75c52e2 --- /dev/null +++ b/evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/metrics/AnswerAccuracy.kt @@ -0,0 +1,27 @@ +package com.xebia.functional.xef.evaluator.metrics + +import com.xebia.functional.xef.AI + +enum class AnswerAccuracy : AI.PromptClassifier { + yes, + no; + + override fun template(input: String, output: String, context: String): String { + return """| + |Return one of the following based on if the output is factual consistent or not with the given + | + | $input + | + | + | $output + | + | + | $context + | + |Return one of the following: + | - if `yes`: It's consistent + | - if `no`: It's inconsistent + """ + .trimMargin() + } +} diff --git a/evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/models/Metrics.kt b/evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/models/Metrics.kt new file mode 100644 index 000000000..592f32ad6 --- /dev/null +++ b/evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/models/Metrics.kt @@ -0,0 +1,7 @@ +package com.xebia.functional.xef.evaluator.models + +sealed interface MetricValues + +sealed interface Metric { + fun template(input: String, output: String, context: String, metricValues: MetricValues): String +} diff --git a/examples/build.gradle.kts b/examples/build.gradle.kts index b0139755c..b29a285e2 100644 --- a/examples/build.gradle.kts +++ b/examples/build.gradle.kts @@ -16,6 +16,7 @@ java { dependencies { implementation(projects.xefCore) + implementation(projects.xefEvaluator) implementation(projects.xefFilesystem) implementation(projects.xefPdf) implementation(projects.xefSql) diff --git a/examples/src/main/kotlin/com/xebia/functional/xef/dsl/classify/AnswerAccuracy.kt b/examples/src/main/kotlin/com/xebia/functional/xef/dsl/classify/AnswerAccuracy.kt new file mode 100644 index 000000000..4081c68cb --- /dev/null +++ b/examples/src/main/kotlin/com/xebia/functional/xef/dsl/classify/AnswerAccuracy.kt @@ -0,0 +1,19 @@ +package com.xebia.functional.xef.dsl.classify + +import com.xebia.functional.openai.models.CreateChatCompletionRequestModel +import com.xebia.functional.xef.AI +import com.xebia.functional.xef.evaluator.metrics.AnswerAccuracy + +suspend fun main() { + println( + AI.classify("Do I love Xef?", "I love Xef", "The answer responds the question") + ) + println( + AI.classify( + input = "Do I love Xef?", + output = "I have three opened PRs", + context = "The answer responds the question", + model = CreateChatCompletionRequestModel.gpt_3_5_turbo_0125 + ) + ) +} From cef473b9c3e471c4fd43294c6e000f694b3bc61b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=CC=81=20Carlos=20Montan=CC=83ez?= Date: Thu, 21 Mar 2024 16:17:17 +0100 Subject: [PATCH 2/3] added comments and updated example description --- .../functional/xef/evaluator/metrics/AnswerAccuracy.kt | 6 +++--- .../xebia/functional/xef/dsl/classify/AnswerAccuracy.kt | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/metrics/AnswerAccuracy.kt b/evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/metrics/AnswerAccuracy.kt index 0a75c52e2..2fd0d7ca7 100644 --- a/evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/metrics/AnswerAccuracy.kt +++ b/evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/metrics/AnswerAccuracy.kt @@ -8,7 +8,7 @@ enum class AnswerAccuracy : AI.PromptClassifier { override fun template(input: String, output: String, context: String): String { return """| - |Return one of the following based on if the output is factual consistent or not with the given + |You are an expert en evaluating whether the `output` is consistent with the given `input` and `context`. | | $input | @@ -19,8 +19,8 @@ enum class AnswerAccuracy : AI.PromptClassifier { | $context | |Return one of the following: - | - if `yes`: It's consistent - | - if `no`: It's inconsistent + | - if the answer it's consistent: `yes` + | - if the answer it's not consistent: `no` """ .trimMargin() } diff --git a/examples/src/main/kotlin/com/xebia/functional/xef/dsl/classify/AnswerAccuracy.kt b/examples/src/main/kotlin/com/xebia/functional/xef/dsl/classify/AnswerAccuracy.kt index 4081c68cb..040570954 100644 --- a/examples/src/main/kotlin/com/xebia/functional/xef/dsl/classify/AnswerAccuracy.kt +++ b/examples/src/main/kotlin/com/xebia/functional/xef/dsl/classify/AnswerAccuracy.kt @@ -4,6 +4,14 @@ import com.xebia.functional.openai.models.CreateChatCompletionRequestModel import com.xebia.functional.xef.AI import com.xebia.functional.xef.evaluator.metrics.AnswerAccuracy +/** + * This is a simple example of how to use the `AI.classify` function to classify the accuracy of an + * answer. In this case, it's using the `AnswerAccuracy` enum class to classify if the answer is + * consistent or not. + * + * You can extend the `AI.PromptClassifier` interface to create your own classification. Override + * the `template` function to define the prompt to be used in the classification. + */ suspend fun main() { println( AI.classify("Do I love Xef?", "I love Xef", "The answer responds the question") From 9078d85733b735a8ea08853c6eef10bcfacfdd83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=CC=81=20Carlos=20Montan=CC=83ez?= Date: Thu, 21 Mar 2024 17:06:09 +0100 Subject: [PATCH 3/3] removed non necessary object --- .../com/xebia/functional/xef/evaluator/models/Metrics.kt | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/models/Metrics.kt diff --git a/evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/models/Metrics.kt b/evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/models/Metrics.kt deleted file mode 100644 index 592f32ad6..000000000 --- a/evaluator/src/main/kotlin/com/xebia/functional/xef/evaluator/models/Metrics.kt +++ /dev/null @@ -1,7 +0,0 @@ -package com.xebia.functional.xef.evaluator.models - -sealed interface MetricValues - -sealed interface Metric { - fun template(input: String, output: String, context: String, metricValues: MetricValues): String -}