lenML
diff --git a/‎src/GenerateReply.ts
Lines changed: 206 additions & 0 deletions b/‎src/GenerateReply.ts
Lines changed: 206 additions & 0 deletions
diff --git a/‎src/RequestChecker.ts
Lines changed: 151 additions & 0 deletions b/‎src/RequestChecker.ts
Lines changed: 151 additions & 0 deletions
@@ -0,0 +1,206 @@
+import { FastifyReply, FastifyRequest } from "fastify";
+import { CreatePayload, OpenAI_NS } from "./tyeps";
+import { client, system_fingerprint } from "./constants";
+import { claude_stop_to_openai_stop } from "./utils";
+
+export class GenerateReply {
+  constructor(
+    public payload: CreatePayload,
+    public request: FastifyRequest<{
+      Body: OpenAI_NS.CompletionRequest;
+    }>,
+    public reply: FastifyReply
+  ) {}
+
+  async request_stream_api() {
+    const { request, reply, payload } = this;
+    const { model, created } = payload;
+    delete (payload as any).created;
+
+    const stream_id = Math.random().toString(36).substring(2, 15);
+    async function* build_streaming() {
+      const message_stream = await client.messages.create({
+        ...payload,
+        stream: true,
+      });
+      let input_tokens = 0;
+      let current_id = stream_id;
+      let current_model = model;
+      for await (const data of message_stream) {
+        if (request.socket.closed) {
+          message_stream.controller.abort();
+          break;
+        }
+        switch (data.type) {
+          case "message_start": {
+            const { content, id, model } = data.message;
+            current_id = id;
+            current_model = model;
+            yield {
+              id,
+              object: "chat.completion.chunk",
+              created,
+              model,
+              system_fingerprint,
+              choices: [
+                {
+                  index: 0,
+                  delta: {
+                    role: "assistant",
+                    content: "",
+                  },
+                  logprobs: null,
+                  finish_reason: null,
+                },
+              ],
+            };
+            input_tokens = data.message.usage.input_tokens;
+            break;
+          }
+          case "message_stop": {
+            // 这个最后会触发，但是用不到
+            break;
+          }
+          case "message_delta": {
+            // 这个会在 stop 之前触发，带有 stop_reason
+            yield {
+              id: current_id,
+              object: "chat.completion.chunk",
+              created,
+              model,
+              system_fingerprint,
+              choices: [
+                {
+                  index: 0,
+                  delta: {
+                    content: "",
+                    role: "assistant",
+                  },
+                  logprobs: null,
+                  finish_reason: data.delta.stop_reason
+                    ? claude_stop_to_openai_stop(data.delta.stop_reason)
+                    : null,
+                },
+              ],
+              usage: {
+                prompt_tokens: input_tokens,
+                completion_tokens: data.usage.output_tokens,
+                total_tokens: input_tokens + data.usage.output_tokens,
+              },
+            };
+            break;
+          }
+          case "content_block_delta":
+            {
+              // 这个才是真正的 delta。。。
+              const block = data.delta;
+              if (block.type !== "text_delta") {
+                // 直接无视
+                break;
+              }
+              yield {
+                id: current_id,
+                object: "chat.completion.chunk",
+                created,
+                model: current_model,
+                system_fingerprint,
+                choices: [
+                  {
+                    index: 0,
+                    delta: {
+                      role: "assistant",
+                      content: block.text,
+                    },
+                    logprobs: null,
+                    finish_reason: null,
+                  },
+                ],
+              };
+            }
+            break;
+        }
+      }
+    }
+
+    reply.raw.writeHead(200, {
+      "Content-Type": "text/event-stream",
+      "Cache-Control": "no-cache",
+      Connection: "keep-alive",
+      "Access-Control-Allow-Origin": "*",
+      "Transfer-Encoding": "chunked",
+    });
+    // reply.raw.write("\n\n");
+    try {
+      for await (const data of build_streaming()) {
+        // console.log(data);
+        reply.raw.write("data: " + JSON.stringify(data));
+        reply.raw.write("\n\n");
+      }
+    } catch (error) {
+      console.error(error);
+      console.error(JSON.stringify(error));
+      throw error;
+    }
+
+    reply.raw.write("data: [DONE]");
+    reply.raw.write("\n\n");
+
+    reply.raw.end();
+  }
+
+  async request_api() {
+    const { request, reply, payload } = this;
+    const { created, model } = payload;
+    delete (payload as any).created;
+
+    const result = await client.messages.create({
+      ...payload,
+      stream: false,
+    });
+    const openai_response = {
+      id: result.id,
+      object: "chat.completion",
+      created: created,
+      model: result.model,
+      system_fingerprint: system_fingerprint,
+      choices: [
+        ...result.content.map((x: any, index) => ({
+          index,
+          message: {
+            role: "assistant",
+            // NOTE: 这里的 x 有可能是 tool_use，但是我们不支持 tools 所以都是 text
+            content: x.text ?? "",
+          },
+          logprobs: null,
+          finish_reason: claude_stop_to_openai_stop(
+            result.stop_reason ?? "stop"
+          ),
+        })),
+      ],
+      usage: {
+        prompt_tokens: result.usage.input_tokens,
+        completion_tokens: result.usage.output_tokens,
+        total_tokens: result.usage.input_tokens + result.usage.output_tokens,
+      },
+    };
+
+    return openai_response;
+  }
+
+  async run() {
+    const { request, reply } = this;
+    try {
+      if (request.body.stream) {
+        return await this.request_stream_api();
+      }
+      return await this.request_api();
+    } catch (error) {
+      await reply.code(500).send({
+        message:
+          error instanceof Error ? error.message : "Internal Server Error",
+        code: "internal_error",
+      });
+      return;
+    }
+  }
+}
@@ -0,0 +1,151 @@
+import { FastifyReply, FastifyRequest } from "fastify";
+import { CreatePayload, OpenAI_NS } from "./tyeps";
+import {
+  private_key,
+  claude_model_names_map,
+  support_models,
+  ensure_first_mode,
+} from "./constants";
+
+export class RequestPreProcess {
+  constructor(
+    public payload: OpenAI_NS.CompletionRequest,
+    public request: FastifyRequest<{
+      Body: OpenAI_NS.CompletionRequest;
+    }>,
+    public reply: FastifyReply
+  ) {}
+
+  async check_headers() {
+    const { request, reply } = this;
+    if (private_key) {
+      const { headers } = request;
+      const { authorization } = headers;
+      if (!authorization) {
+        await reply
+          .code(401)
+          .header("WWW-Authenticate", "Bearer realm='openai'")
+          .send({
+            message: "Authorization header is required",
+            code: "no_auth",
+          });
+        return false;
+      }
+      const [scheme, token] = authorization.split(" ");
+      if (scheme !== "Bearer") {
+        await reply
+          .code(401)
+          .header("WWW-Authenticate", "Bearer realm='openai'")
+          .send({
+            message: "Authorization scheme must be Bearer",
+            code: "invalid_scheme",
+          });
+        return false;
+      }
+      if (token !== private_key) {
+        await reply
+          .code(403)
+          .send({ message: "Invalid API key", code: "invalid_api_key" });
+        return false;
+      }
+    }
+    return true;
+  }
+
+  async check(): Promise<Required<CreatePayload> | null> {
+    const { payload, reply } = this;
+    const {
+      messages,
+      model: _model,
+      max_tokens = 512,
+      temperature = 0.75,
+      top_p = 1,
+      stop,
+      stream = false,
+      // 下面的都不支持...
+      n = 1,
+      presence_penalty = 0,
+      frequency_penalty = 0,
+    } = payload;
+
+    const model = claude_model_names_map[_model] ?? _model;
+    if (!support_models.includes(model)) {
+      await reply.code(400).send({
+        message: `model ${model} is not supported`,
+        code: "model_not_supported",
+      });
+      return null;
+    }
+
+    if (!Array.isArray(messages)) {
+      throw new Error("messages should be an array");
+    }
+
+    const ensure_result = await this.ensure_first_message_is_user();
+    if (!ensure_result) {
+      return null;
+    }
+    const { system0, no_sys_messages } = ensure_result;
+
+    return {
+      created: Math.ceil(new Date().getTime() / 1000),
+      system: system0?.content,
+      messages: no_sys_messages,
+      model,
+      max_tokens,
+      temperature,
+      top_p,
+      stop_sequences: !stop ? undefined : Array.isArray(stop) ? stop : [stop],
+    };
+  }
+
+  async ensure_first_message_is_user() {
+    const { reply } = this;
+    const { messages } = this.payload;
+
+    // 因为 claude 的 system 需要单独设置，所以从 messages 中提取出来
+    // NOTE: 所以也就是说，只支持一个 system
+    const system0 = messages.find((m) => m.role === "system");
+    const no_sys_messages = messages.filter((m) => m.role !== "system") as {
+      role: "user" | "assistant";
+      content: string;
+    }[];
+    if (no_sys_messages[0].role !== "user") {
+      // ensure first message is 'user' role
+      switch (ensure_first_mode) {
+        case "remove": {
+          // NOTE: if the first message is not user, remove it until the first user message
+          while (no_sys_messages[0].role !== ("user" as any)) {
+            no_sys_messages.shift();
+          }
+          break;
+        }
+        case "continue": {
+          // NOTE: if the first message is not user, add a user message `continue`
+          no_sys_messages.unshift({
+            role: "user",
+            content: "continue",
+          });
+          break;
+        }
+        default: {
+          console.warn(
+            `ensure_first_mode ${ensure_first_mode} is not supported, use 'remove' instead`
+          );
+        }
+      }
+    }
+    if (no_sys_messages.length === 0) {
+      await reply.code(400).send({
+        message: "messages should contain at least one user message",
+        code: "messages_empty",
+      });
+      return null;
+    }
+
+    return {
+      system0,
+      no_sys_messages,
+    };
+  }
+}