Feat summarize audio topic (#594)

* refactor ai commands * fix json command * fix extract story command * may summarize topic for audio
xiaolai · May 9, 2024 · 69a6f72 · 69a6f72
1 parent 5436b20
commit 69a6f72
Show file tree

Hide file tree

Showing 18 changed files with 245 additions and 232 deletions.
diff --git a/enjoy/src/commands/analyze.command.ts b/enjoy/src/commands/analyze.command.ts
@@ -1,5 +1,5 @@
-import { ChatOpenAI } from "@langchain/openai";
 import { ChatPromptTemplate } from "@langchain/core/prompts";
+import { textCommand } from "./text.command";
 
 export const analyzeCommand = async (
   text: string,
@@ -10,29 +10,14 @@ export const analyzeCommand = async (
     baseUrl?: string;
   }
 ): Promise<string> => {
-  const { key, temperature = 0, baseUrl } = options;
-  let { modelName = "gpt-4-turbo" } = options;
+  if (!text) throw new Error("Text is required");
 
-  const chatModel = new ChatOpenAI({
-    openAIApiKey: key,
-    modelName,
-    temperature,
-    configuration: {
-      baseURL: baseUrl,
-    },
-    cache: false,
-    verbose: true,
-    maxRetries: 2,
-  });
-
-  const prompt = ChatPromptTemplate.fromMessages([
+  const prompt = await ChatPromptTemplate.fromMessages([
     ["system", SYSTEM_PROMPT],
     ["human", text],
-  ]);
-
-  const response = await prompt.pipe(chatModel).invoke({});
+  ]).format({});
 
-  return response.text;
+  return textCommand(prompt, options);
 };
 
 const SYSTEM_PROMPT = `你是我的英语教练，我将提供英语文本，你将帮助我分析文本的句子结构、语法和词汇/短语，并对文本进行详细解释。请用中文回答，并按以下格式返回结果：

diff --git a/enjoy/src/commands/extract-story.command.ts b/enjoy/src/commands/extract-story.command.ts
@@ -1,73 +1,37 @@
-import { ChatOpenAI } from "@langchain/openai";
 import { ChatPromptTemplate } from "@langchain/core/prompts";
-import { zodToJsonSchema } from "zod-to-json-schema";
 import { z } from "zod";
-import { RESPONSE_JSON_FORMAT_MODELS } from "@/constants";
+import { jsonCommand } from "./json.command";
 
 export const extractStoryCommand = async (
-  content: string,
+  text: string,
   options: {
     key: string;
     modelName?: string;
     temperature?: number;
     baseUrl?: string;
   }
 ): Promise<{ words: string[]; idioms: string[] }> => {
-  const { key, temperature = 0, baseUrl } = options;
-  let { modelName = "gpt-4-turbo" } = options;
-
-  if (RESPONSE_JSON_FORMAT_MODELS.indexOf(modelName) === -1) {
-    modelName = "gpt-4-turbo";
-  }
-
-  const saveExtraction = z.object({
+  const schema = z.object({
     words: z.array(z.string().describe("extracted word")),
     idioms: z.array(z.string().describe("extracted idiom")),
   });
 
-  const chatModel = new ChatOpenAI({
-    openAIApiKey: key,
-    modelName,
-    temperature,
-    modelKwargs: {
-      response_format: {
-        type: "json_object",
-      },
-    },
-    configuration: {
-      baseURL: baseUrl,
-    },
-    cache: true,
-    verbose: true,
-    maxRetries: 2,
-  }).bind({
-    tools: [
-      {
-        type: "function",
-        function: {
-          name: "save_extraction",
-          description: "Save the extracted words and idioms from a text",
-          parameters: zodToJsonSchema(saveExtraction),
-        },
-      },
-    ],
-  });
-
-  const prompt = ChatPromptTemplate.fromMessages([
+  const prompt = await ChatPromptTemplate.fromMessages([
     ["system", EXTRACT_STORY_PROMPT],
     ["human", "{text}"],
-  ]);
-
-  const response = await prompt.pipe(chatModel).invoke({
+  ]).format({
     learning_language: "English",
-    text: content,
+    text,
   });
 
-  return JSON.parse(
-    response.additional_kwargs?.tool_calls?.[0]?.function?.arguments || "{}"
-  );
+  return jsonCommand(prompt, { ...options, schema });
 };
 
 const EXTRACT_STORY_PROMPT = `
-I am an {learning_language} beginner and only have a grasp of 500 high-frequency basic words. You are an {learning_language} learning assistant robot, and your task is to analyze the article I provide and extract all the meaningful words and idioms that I may not be familiar with. Specifically, it should include common words used in uncommon ways. Return in JSON format.
+I am an {learning_language} beginner and only have a grasp of 500 high-frequency basic words. You are an {learning_language} learning assistant robot, and your task is to analyze the article I provide and extract all the meaningful words and idioms that I may not be familiar with. Specifically, it should include common words used in uncommon ways. Return in JSON format like following:
+
+{{
+  words: ["word1", "word2", ...],
+  idiom: ["idiom1", "idiom2", ...]
+}}
 `;
diff --git a/enjoy/src/commands/index.ts b/enjoy/src/commands/index.ts
@@ -2,5 +2,8 @@ export * from "./extract-story.command";
 export * from "./lookup.command";
 export * from "./translate.command";
 export * from "./ipa.command";
+export * from "./json.command";
 export * from "./analyze.command";
 export * from "./punctuate.command";
+export * from "./summarize-topic.command";
+export * from "./text.command";
diff --git a/enjoy/src/commands/ipa.command.ts b/enjoy/src/commands/ipa.command.ts
@@ -1,11 +1,6 @@
-import { ChatOpenAI } from "@langchain/openai";
 import { ChatPromptTemplate } from "@langchain/core/prompts";
 import { z } from "zod";
-import {
-  StructuredOutputParser,
-  OutputFixingParser,
-} from "langchain/output_parsers";
-import { RESPONSE_JSON_FORMAT_MODELS } from "@/constants";
+import { jsonCommand } from "./json.command";
 
 export const ipaCommand = async (
   text: string,
@@ -16,14 +11,9 @@ export const ipaCommand = async (
     baseUrl?: string;
   }
 ): Promise<{ words?: { word?: string; ipa?: string }[] }> => {
-  const { key, temperature = 0, baseUrl } = options;
-  let { modelName = "gpt-4-turbo" } = options;
+  if (!text) throw new Error("Text is required");
 
-  if (RESPONSE_JSON_FORMAT_MODELS.indexOf(modelName) === -1) {
-    modelName = "gpt-4-turbo";
-  }
-
-  const responseSchema = z.object({
+  const schema = z.object({
     words: z.array(
       z.object({
         word: z.string().nonempty(),
@@ -32,51 +22,15 @@ export const ipaCommand = async (
     ),
   });
 
-  const parser = StructuredOutputParser.fromZodSchema(responseSchema);
-  const fixParser = OutputFixingParser.fromLLM(
-    new ChatOpenAI({
-      openAIApiKey: key,
-      modelName,
-      temperature: 0,
-      configuration: {
-        baseURL: baseUrl,
-      },
-    }),
-    parser
-  );
-
-  const chatModel = new ChatOpenAI({
-    openAIApiKey: key,
-    modelName,
-    temperature,
-    configuration: {
-      baseURL: baseUrl,
-    },
-    modelKwargs: {
-      response_format: {
-        type: "json_object",
-      },
-    },
-    cache: true,
-    verbose: true,
-    maxRetries: 2,
-  });
-
-  const prompt = ChatPromptTemplate.fromMessages([
+  const prompt = await ChatPromptTemplate.fromMessages([
     ["system", SYSTEM_PROMPT],
     ["human", "{text}"],
-  ]);
-
-  const response = await prompt.pipe(chatModel).invoke({
+  ]).format({
     learning_language: "English",
     text,
   });
 
-  try {
-    return await parser.parse(response.text);
-  } catch (e) {
-    return await fixParser.parse(response.text);
-  }
+  return jsonCommand(prompt, { ...options, schema });
 };
 
 const SYSTEM_PROMPT = `Generate an array of JSON objects for each {learning_language} word in the given text, with each object containing two keys: 'word' and 'ipa', where 'ipa' is the International Phonetic Alphabet (IPA) representation of the word. Return the array in JSON format only. The output should be structured like this:

diff --git a/enjoy/src/commands/json.command.ts b/enjoy/src/commands/json.command.ts
@@ -0,0 +1,48 @@
+import { ChatOpenAI } from "@langchain/openai";
+import { RESPONSE_JSON_FORMAT_MODELS } from "@/constants";
+import { zodToJsonSchema } from "zod-to-json-schema";
+
+export const jsonCommand = async (
+  prompt: string,
+  options: {
+    key: string;
+    modelName?: string;
+    temperature?: number;
+    baseUrl?: string;
+    schema: any;
+  }
+): Promise<any> => {
+  const { key, temperature = 0, baseUrl, schema } = options;
+  let { modelName = "gpt-4-turbo" } = options;
+
+  if (RESPONSE_JSON_FORMAT_MODELS.indexOf(modelName) === -1) {
+    modelName = "gpt-4-turbo";
+  }
+
+  const chatModel = new ChatOpenAI({
+    openAIApiKey: key,
+    modelName,
+    temperature,
+    modelKwargs: {
+      response_format: {
+        type: "json_object",
+      },
+    },
+    configuration: {
+      baseURL: baseUrl,
+    },
+    cache: true,
+    verbose: true,
+    maxRetries: 1,
+  });
+
+  const structuredOutput = chatModel.withStructuredOutput(
+    zodToJsonSchema(schema),
+    {
+      method: "jsonMode",
+    }
+  );
+
+  const response = await structuredOutput.invoke(prompt);
+  return response;
+};
diff --git a/enjoy/src/commands/lookup.command.ts b/enjoy/src/commands/lookup.command.ts
@@ -1,11 +1,6 @@
-import { ChatOpenAI } from "@langchain/openai";
 import { ChatPromptTemplate } from "@langchain/core/prompts";
 import { z } from "zod";
-import {
-  StructuredOutputParser,
-  OutputFixingParser,
-} from "langchain/output_parsers";
-import { RESPONSE_JSON_FORMAT_MODELS } from "@/constants";
+import { jsonCommand } from "./json.command";
 
 export const lookupCommand = async (
   params: {
@@ -29,16 +24,9 @@ export const lookupCommand = async (
   translation?: string;
   lemma?: string;
 }> => {
-  const { key, temperature = 0, baseUrl } = options;
-  let { modelName = "gpt-4-turbo" } = options;
-
-  if (RESPONSE_JSON_FORMAT_MODELS.indexOf(modelName) === -1) {
-    modelName = "gpt-4-turbo";
-  }
-
   const { word, context, meaningOptions } = params;
 
-  const responseSchema = z.object({
+  const schema = z.object({
     id: z.string().optional(),
     word: z.string().optional(),
     context_translation: z.string().optional(),
@@ -49,37 +37,10 @@ export const lookupCommand = async (
     lemma: z.string().optional(),
   });
 
-  const parser = StructuredOutputParser.fromZodSchema(responseSchema);
-  const fixParser = OutputFixingParser.fromLLM(
-    new ChatOpenAI({
-      openAIApiKey: key,
-      modelName,
-      temperature: 0,
-      configuration: {
-        baseURL: baseUrl,
-      },
-    }),
-    parser
-  );
-
-  const chatModel = new ChatOpenAI({
-    openAIApiKey: key,
-    modelName,
-    temperature,
-    configuration: {
-      baseURL: baseUrl,
-    },
-    cache: true,
-    verbose: true,
-    maxRetries: 2,
-  });
-
-  const prompt = ChatPromptTemplate.fromMessages([
+  const prompt = await ChatPromptTemplate.fromMessages([
     ["system", DICITIONARY_PROMPT],
     ["human", "{input}"],
-  ]);
-
-  const response = await prompt.pipe(chatModel).invoke({
+  ]).format({
     learning_language: "English",
     native_language: "Chinese",
     input: JSON.stringify({
@@ -89,11 +50,7 @@ export const lookupCommand = async (
     }),
   });
 
-  try {
-    return await parser.parse(response.text);
-  } catch (e) {
-    return await fixParser.parse(response.text);
-  }
+  return jsonCommand(prompt, { ...options, schema });
 };
 
 const DICITIONARY_PROMPT = `You are an {learning_language}-{native_language} dictionary. I will provide "word(it also maybe a phrase)" and "context" as input, you should return the "word", "lemma", "pronunciation", "pos(part of speech, maybe empty for phrase)", "definition", "translation" and "context_translation" as output. If I provide "definitions", you should try to select the appropriate one for the given context, and return the id of selected definition as "id". If none are suitable, generate a new definition for me. If no context is provided, return the most common definition. If you do not know the appropriate definition, return an empty string for "definition" and "translation".

diff --git a/enjoy/src/commands/punctuate.command.ts b/enjoy/src/commands/punctuate.command.ts
@@ -1,5 +1,5 @@
-import { ChatOpenAI } from "@langchain/openai";
 import { ChatPromptTemplate } from "@langchain/core/prompts";
+import { textCommand } from "./text.command";
 
 export const punctuateCommand = async (
   text: string,
@@ -10,29 +10,14 @@ export const punctuateCommand = async (
     baseUrl?: string;
   }
 ): Promise<string> => {
-  const { key, temperature = 0, baseUrl } = options;
-  let { modelName = "gpt-4-turbo" } = options;
+  if (!text) throw new Error("Text is required");
 
-  const chatModel = new ChatOpenAI({
-    openAIApiKey: key,
-    modelName,
-    temperature,
-    configuration: {
-      baseURL: baseUrl,
-    },
-    cache: false,
-    verbose: true,
-    maxRetries: 2,
-  });
-
-  const prompt = ChatPromptTemplate.fromMessages([
+  const prompt = await ChatPromptTemplate.fromMessages([
     ["system", SYSTEM_PROMPT],
     ["human", text],
-  ]);
-
-  const response = await prompt.pipe(chatModel).invoke({});
+  ]).format({});
 
-  return response.text;
+  return textCommand(prompt, options);
 };
 
 const SYSTEM_PROMPT = `Please add proper punctuation to the text I provide you. Return the corrected text only.`;