Skip to content

Commit

Permalink
Remove KoboldSamplingParams
Browse files Browse the repository at this point in the history
  • Loading branch information
mgoin committed Apr 12, 2024
1 parent 00e2b56 commit 344699d
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 19 deletions.
1 change: 1 addition & 0 deletions vllm/entrypoints/kobold/api_server.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# UPSTREAM SYNC: keep up-to-date with ../openai/api_server.py
import asyncio
import importlib
import inspect
Expand Down
19 changes: 0 additions & 19 deletions vllm/entrypoints/kobold/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,6 @@
# ========== KoboldAI ========== #


class KoboldSamplingParams(BaseModel):
n: int = Field(1, alias="n")
best_of: Optional[int] = Field(None, alias="best_of")
presence_penalty: float = Field(0.0, alias="presence_penalty")
frequency_penalty: float = Field(0.0, alias="rep_pen")
temperature: float = Field(1.0, alias="temperature")
top_p: float = Field(1.0, alias="top_p")
top_k: float = Field(-1, alias="top_k")
min_p: float = Field(0.0, alias="min_p")
use_beam_search: bool = Field(False, alias="use_beam_search")
length_penalty: float = Field(1.0, alias="length_penalty")
early_stopping: Union[bool, str] = Field(False, alias="early_stopping")
stop: Union[None, str, List[str]] = Field(None, alias="stop_sequence")
include_stop_str_in_output: Optional[bool] = False
ignore_eos: bool = Field(False, alias="ignore_eos")
max_tokens: int = Field(16, alias="max_length")
logprobs: Optional[int] = Field(None, alias="logprobs")


class KAIGenerationInputSchema(BaseModel):
genkey: Optional[str] = None
prompt: str
Expand Down

1 comment on commit 344699d

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: 344699d Previous: 788b4e5 Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.2.1+cu121"} 3.8182870993420073 prompts/s 3.8194073015836993 prompts/s 1.00
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.2.1+cu121"} 1466.222246147331 tokens/s 1466.6524038081407 tokens/s 1.00

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.