init🎉:
This commit is contained in:
commit
ccfb462fb8
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
tests/
|
||||||
|
qwen_deploy/
|
31
docker/Dockerfile
Normal file
31
docker/Dockerfile
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-devel
|
||||||
|
|
||||||
|
ARG PIP_SOURCE=https://pypi.tuna.tsinghua.edu.cn/simple
|
||||||
|
ARG BUILD_TMP=/build_tmp
|
||||||
|
|
||||||
|
RUN mkdir $BUILD_TMP
|
||||||
|
|
||||||
|
# install basic dependencies
|
||||||
|
COPY sources.list /etc/apt/sources.list
|
||||||
|
RUN apt-get clean all && apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
python3.8 \
|
||||||
|
git \
|
||||||
|
curl \
|
||||||
|
wget
|
||||||
|
|
||||||
|
# install python requirements
|
||||||
|
RUN pip install flash-attn==2.4.2 --no-build-isolation
|
||||||
|
|
||||||
|
COPY requirements.txt $BUILD_TMP/.
|
||||||
|
RUN pip install -i $PIP_SOURCE -r $BUILD_TMP/requirements.txt
|
||||||
|
|
||||||
|
# set local
|
||||||
|
ENV LANG C.UTF-8 LC_ALL=C.UTF-8
|
||||||
|
|
||||||
|
# clean up
|
||||||
|
RUN rm -rf $BUILD_TMP
|
||||||
|
|
||||||
|
# initialize workspace
|
||||||
|
WORKDIR /workspace
|
||||||
|
|
||||||
|
CMD ["/bin/bash", "/workspace/start.sh"]
|
24
docker/docker-compose.yml
Normal file
24
docker/docker-compose.yml
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
version: "3"
|
||||||
|
services:
|
||||||
|
api_server:
|
||||||
|
build: .
|
||||||
|
restart: unless-stopped
|
||||||
|
network_mode: host
|
||||||
|
volumes:
|
||||||
|
- ./start.sh:/workspace/start.sh
|
||||||
|
# change here to mount all your models
|
||||||
|
- models_path:/workspace/models # "models" contain multiple models
|
||||||
|
environment:
|
||||||
|
# change "main" to your model name
|
||||||
|
- MODEL_PATH=/workspace/models/main
|
||||||
|
- MAX_MODEL_LEN=4096 # max model input length
|
||||||
|
- HOST=127.0.0.1
|
||||||
|
- PORT=9001 # change to your port
|
||||||
|
- API_KEY=token-123456 # change to your api key for security
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: "nvidia"
|
||||||
|
device_ids: ['0'] # gpu id, change to your gpu id
|
||||||
|
capabilities: ["gpu"]
|
3
docker/requirements.txt
Normal file
3
docker/requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
accelerate==0.28.0
|
||||||
|
vllm==0.4.0.post1
|
||||||
|
openai==1.16.1
|
13
docker/sources.list
Normal file
13
docker/sources.list
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释
|
||||||
|
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
|
||||||
|
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
|
||||||
|
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
|
||||||
|
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
|
||||||
|
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
|
||||||
|
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
|
||||||
|
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
|
||||||
|
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
|
||||||
|
|
||||||
|
# 预发布软件源,不建议启用
|
||||||
|
# deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse
|
||||||
|
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse
|
1
docker/start.sh
Normal file
1
docker/start.sh
Normal file
|
@ -0,0 +1 @@
|
||||||
|
python -m vllm.entrypoints.openai.api_server --dtype bfloat16 --model ${MODEL_PATH} --max-model-len ${MAX_MODEL_LEN} --host ${HOST} --port ${PORT} --api-key ${API_KEY}
|
Loading…
Reference in New Issue
Block a user