forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
268 changed files
with
10,240 additions
and
3,104 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# This script build the Neuron docker image and run the API server inside the container. | ||
# It serves a sanity check for compilation and basic model usage. | ||
set -e | ||
|
||
# Try building the docker image | ||
aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-west-2.amazonaws.com | ||
docker build -t neuron -f Dockerfile.neuron . | ||
|
||
# Setup cleanup | ||
remove_docker_container() { docker rm -f neuron || true; } | ||
trap remove_docker_container EXIT | ||
remove_docker_container | ||
|
||
# Run the image | ||
docker run --device=/dev/neuron0 --device=/dev/neuron1 --network host --name neuron neuron python3 -m vllm.entrypoints.api_server \ | ||
--model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --max-num-seqs 8 --max-model-len 128 --block-size 128 --device neuron --tensor-parallel-size 2 & | ||
|
||
# Wait for the server to start | ||
wait_for_server_to_start() { | ||
timeout=300 | ||
counter=0 | ||
|
||
while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do | ||
sleep 1 | ||
counter=$((counter + 1)) | ||
if [ $counter -ge $timeout ]; then | ||
echo "Timeout after $timeout seconds" | ||
break | ||
fi | ||
done | ||
} | ||
wait_for_server_to_start | ||
|
||
# Test a simple prompt | ||
curl -X POST -H "Content-Type: application/json" \ | ||
localhost:8000/generate \ | ||
-d '{"prompt": "San Francisco is a"}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
name: install whl | ||
description: 'installs found whl based on python version into specified venv' | ||
inputs: | ||
python: | ||
description: 'python version, e.g. 3.10.12' | ||
required: true | ||
venv: | ||
description: 'name for python virtual environment' | ||
required: true | ||
runs: | ||
using: composite | ||
steps: | ||
- id: install_whl | ||
run: | | ||
# move source directories | ||
mv vllm vllm-ignore | ||
mv csrc csrc-ignore | ||
# activate and install | ||
COMMIT=${{ github.sha }} | ||
VENV="${{ env.VENV_BASE }}-${COMMIT:0:7}" | ||
source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate | ||
pip3 install -r requirements-dev.txt | ||
BASE=$(./.github/scripts/convert-version ${{ inputs.python }}) | ||
WHL=$(find . -type f -iname "*${BASE}*.whl") | ||
WHL_BASENAME=$(basename ${WHL}) | ||
pip3 install ${WHL}[sparse] | ||
shell: bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
13a1f5b
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bigger_is_better
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.2.1+cu121"}
4.026795745265708
prompts/s3.80234884054723
prompts/s0.94
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.2.1+cu121"}
1546.2895661820319
tokens/s1460.1019547701362
tokens/s0.94
This comment was automatically generated by workflow using github-action-benchmark.