init🎉:

This commit is contained in:
arslantu 2024-04-06 10:10:57 +08:00
commit ccfb462fb8
6 changed files with 74 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
tests/
qwen_deploy/

31
docker/Dockerfile Normal file
View File

@ -0,0 +1,31 @@
FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-devel
ARG PIP_SOURCE=https://pypi.tuna.tsinghua.edu.cn/simple
ARG BUILD_TMP=/build_tmp
RUN mkdir $BUILD_TMP
# install basic dependencies
COPY sources.list /etc/apt/sources.list
RUN apt-get clean all && apt-get update && apt-get install -y --no-install-recommends \
python3.8 \
git \
curl \
wget
# install python requirements
RUN pip install flash-attn==2.4.2 --no-build-isolation
COPY requirements.txt $BUILD_TMP/.
RUN pip install -i $PIP_SOURCE -r $BUILD_TMP/requirements.txt
# set local
ENV LANG C.UTF-8 LC_ALL=C.UTF-8
# clean up
RUN rm -rf $BUILD_TMP
# initialize workspace
WORKDIR /workspace
CMD ["/bin/bash", "/workspace/start.sh"]

24
docker/docker-compose.yml Normal file
View File

@ -0,0 +1,24 @@
version: "3"
services:
api_server:
build: .
restart: unless-stopped
network_mode: host
volumes:
- ./start.sh:/workspace/start.sh
# change here to mount all your models
- models_path:/workspace/models # "models" contain multiple models
environment:
# change "main" to your model name
- MODEL_PATH=/workspace/models/main
- MAX_MODEL_LEN=4096 # max model input length
- HOST=127.0.0.1
- PORT=9001 # change to your port
- API_KEY=token-123456 # change to your api key for security
deploy:
resources:
reservations:
devices:
- driver: "nvidia"
device_ids: ['0'] # gpu id, change to your gpu id
capabilities: ["gpu"]

3
docker/requirements.txt Normal file
View File

@ -0,0 +1,3 @@
accelerate==0.28.0
vllm==0.4.0.post1
openai==1.16.1

13
docker/sources.list Normal file
View File

@ -0,0 +1,13 @@
# 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
# 预发布软件源,不建议启用
# deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse

1
docker/start.sh Normal file
View File

@ -0,0 +1 @@
python -m vllm.entrypoints.openai.api_server --dtype bfloat16 --model ${MODEL_PATH} --max-model-len ${MAX_MODEL_LEN} --host ${HOST} --port ${PORT} --api-key ${API_KEY}