init🎉:

2024-04-06 10:10:57 +08:00
commit ccfb462fb8
6 changed files with 74 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 tests/
 qwen_deploy/
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -0,0 +1,31 @@
 FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-devel
 ARG PIP_SOURCE=https://pypi.tuna.tsinghua.edu.cn/simple
 ARG BUILD_TMP=/build_tmp
 RUN mkdir $BUILD_TMP
 # install basic dependencies
 COPY sources.list /etc/apt/sources.list
 RUN apt-get clean all && apt-get update && apt-get install -y --no-install-recommends \
    python3.8 \
    git \
    curl \
    wget
 # install python requirements
 RUN pip install flash-attn==2.4.2 --no-build-isolation
 COPY requirements.txt $BUILD_TMP/.
 RUN pip install -i $PIP_SOURCE -r $BUILD_TMP/requirements.txt
 # set local
 ENV LANG C.UTF-8 LC_ALL=C.UTF-8
 # clean up
 RUN rm -rf $BUILD_TMP
 # initialize workspace
 WORKDIR /workspace
 CMD ["/bin/bash", "/workspace/start.sh"]
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@ -0,0 +1,24 @@
 version: "3"
 services:
    api_server:
        build: .
        restart: unless-stopped
        network_mode: host
        volumes:
            - ./start.sh:/workspace/start.sh
            # change here to mount all your models
            - models_path:/workspace/models # "models" contain multiple models
        environment:
            # change "main" to your model name
            - MODEL_PATH=/workspace/models/main
            - MAX_MODEL_LEN=4096  # max model input length
            - HOST=127.0.0.1
            - PORT=9001  # change to your port
            - API_KEY=token-123456  # change to your api key for security
        deploy:
            resources:
                reservations:
                    devices:
                        - driver: "nvidia"
                          device_ids: ['0']  # gpu id, change to your gpu id
                          capabilities: ["gpu"]
--- a/docker/requirements.txt
+++ b/docker/requirements.txt
@ -0,0 +1,3 @@
 accelerate==0.28.0
 vllm==0.4.0.post1
 openai==1.16.1
--- a/docker/sources.list
+++ b/docker/sources.list
@ -0,0 +1,13 @@
 # 默认注释了源码镜像以提高 apt update 速度，如有需要可自行取消注释
 deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
 # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
 deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
 # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
 deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
 # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
 deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
 # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
 # 预发布软件源，不建议启用
 # deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse
 # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse
--- a/docker/start.sh
+++ b/docker/start.sh
@ -0,0 +1 @@
 python -m vllm.entrypoints.openai.api_server --dtype bfloat16 --model ${MODEL_PATH} --max-model-len ${MAX_MODEL_LEN} --host ${HOST} --port ${PORT} --api-key ${API_KEY}
		`@ -0,0 +1 @@`
							`python -m vllm.entrypoints.openai.api_server --dtype bfloat16 --model ${MODEL_PATH} --max-model-len ${MAX_MODEL_LEN} --host ${HOST} --port ${PORT} --api-key ${API_KEY}`