commit ccfb462fb8f3d2d2d77fe8687b4c02a6cb10f952 Author: arslantu Date: Sat Apr 6 10:10:57 2024 +0800 init🎉: diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ede3602 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +tests/ +qwen_deploy/ \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..b028d33 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,31 @@ +FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-devel + +ARG PIP_SOURCE=https://pypi.tuna.tsinghua.edu.cn/simple +ARG BUILD_TMP=/build_tmp + +RUN mkdir $BUILD_TMP + +# install basic dependencies +COPY sources.list /etc/apt/sources.list +RUN apt-get clean all && apt-get update && apt-get install -y --no-install-recommends \ + python3.8 \ + git \ + curl \ + wget + +# install python requirements +RUN pip install flash-attn==2.4.2 --no-build-isolation + +COPY requirements.txt $BUILD_TMP/. +RUN pip install -i $PIP_SOURCE -r $BUILD_TMP/requirements.txt + +# set local +ENV LANG C.UTF-8 LC_ALL=C.UTF-8 + +# clean up +RUN rm -rf $BUILD_TMP + +# initialize workspace +WORKDIR /workspace + +CMD ["/bin/bash", "/workspace/start.sh"] diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..f392541 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,24 @@ +version: "3" +services: + api_server: + build: . + restart: unless-stopped + network_mode: host + volumes: + - ./start.sh:/workspace/start.sh + # change here to mount all your models + - models_path:/workspace/models # "models" contain multiple models + environment: + # change "main" to your model name + - MODEL_PATH=/workspace/models/main + - MAX_MODEL_LEN=4096 # max model input length + - HOST=127.0.0.1 + - PORT=9001 # change to your port + - API_KEY=token-123456 # change to your api key for security + deploy: + resources: + reservations: + devices: + - driver: "nvidia" + device_ids: ['0'] # gpu id, change to your gpu id + capabilities: ["gpu"] \ No newline at end of file diff --git a/docker/requirements.txt b/docker/requirements.txt new file mode 100644 index 0000000..c2f412d --- /dev/null +++ b/docker/requirements.txt @@ -0,0 +1,3 @@ +accelerate==0.28.0 +vllm==0.4.0.post1 +openai==1.16.1 \ No newline at end of file diff --git a/docker/sources.list b/docker/sources.list new file mode 100644 index 0000000..a247bbf --- /dev/null +++ b/docker/sources.list @@ -0,0 +1,13 @@ +# 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释 +deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse +deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse +deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse +deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse + +# 预发布软件源,不建议启用 +# deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse diff --git a/docker/start.sh b/docker/start.sh new file mode 100644 index 0000000..b0f19eb --- /dev/null +++ b/docker/start.sh @@ -0,0 +1 @@ +python -m vllm.entrypoints.openai.api_server --dtype bfloat16 --model ${MODEL_PATH} --max-model-len ${MAX_MODEL_LEN} --host ${HOST} --port ${PORT} --api-key ${API_KEY} \ No newline at end of file