Skip to content

Less Sampler Allocations #95

Less Sampler Allocations

Less Sampler Allocations #95

Workflow file for this run

name: Benchmark Test
on:
push:
branches: [master]
pull_request:
branches: [master]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-benchmark
cancel-in-progress: true
jobs:
linux-benchmark-cuda:
if: contains(github.event.pull_request.labels.*.name, 'benchmark')
runs-on: [self-hosted, linux, gpu]
strategy:
fail-fast: false
matrix:
build: [cuda11, cuda12]
include:
- build: cuda11
image: nvidia/cuda:11.7.1-devel-ubuntu22.04
modeldir: /llamasharp_ci/models_benchmark
- build: cuda12
image: nvidia/cuda:12.1.1-devel-ubuntu22.04
modeldir: /llamasharp_ci/models_benchmark
container:
image: ${{ matrix.image }}
env:
BENCHMARK_MODEL_DIR: ${{ matrix.modeldir }}
ports:
- 80
volumes:
- /llamasharp_ci:/llamasharp_ci
options: --gpus=all --ipc=host --runtime=nvidia
steps:
- uses: actions/checkout@v4
- name: Install libraries
run: |
apt update
apt install -y curl libicu-dev
apt-get install wget
wget https://packages.microsoft.com/config/ubuntu/22.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb
dpkg -i packages-microsoft-prod.deb
rm packages-microsoft-prod.deb
apt-get update && apt-get install -y dotnet-sdk-8.0
- name: Prepare models
run: |
apt-get update
apt-get install -y python3.10 python3-pip
python3 --version
pip install huggingface_hub
python3 .github/download_models.py --model-dir ${{ matrix.modeldir }} --model-list LLama.Benchmark/Assets/models.txt --endpoint https://hf-mirror.com
- name: Clear package cache
run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear
- name: Restore packages
run: dotnet restore LLamaSharp.sln
- name: Build
run: |
dotnet clean
dotnet build LLama/LLamaSharp.csproj -c Release --no-restore
dotnet build LLama.Benchmark/LLama.Benchmark.csproj -c Release --no-restore
- name: Run benchmark test
run: dotnet run --project LLama.Benchmark/LLama.Benchmark.csproj -c Release --anyCategories LLama
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: Benchmark_Results
path: BenchmarkDotNet.Artifacts/results/*
windows-benchmark-cuda:
if: contains(github.event.pull_request.labels.*.name, 'benchmark')
runs-on: [self-hosted, windows, gpu]
strategy:
fail-fast: false
matrix:
build: [cuda11]
include:
- build: cuda11
modeldir: F:\Models\LLamaSharpBenchmark
env:
AGENT_TOOLSDIRECTORY: D:\Libs\github\runner-cache
BENCHMARK_MODEL_DIR: ${{ matrix.modeldir }}
steps:
- name: Settings
run: |
set http_proxy=127.0.0.1:7891
set https_proxy=127.0.0.1:7891
- uses: actions/checkout@v4
- name: Clear package cache
run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear
- name: Restore packages
run: dotnet restore LLamaSharp.sln
- name: Build
run: |
dotnet clean
dotnet build LLama/LLamaSharp.csproj -c Release --no-restore
dotnet build LLama.Benchmark/LLama.Benchmark.csproj -c Release --no-restore
- name: Run benchmark test
run: dotnet run --project LLama.Benchmark/LLama.Benchmark.csproj -c Release --anyCategories LLama
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: Benchmark_Results
path: BenchmarkDotNet.Artifacts/results/*