diff --git a/docker/Dockerfile b/docker/Dockerfile index b028d33..af3f9dc 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,21 +1,11 @@ -FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-devel +FROM docker-registry.arslantu.xyz/pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel ARG PIP_SOURCE=https://pypi.tuna.tsinghua.edu.cn/simple ARG BUILD_TMP=/build_tmp RUN mkdir $BUILD_TMP -# install basic dependencies -COPY sources.list /etc/apt/sources.list -RUN apt-get clean all && apt-get update && apt-get install -y --no-install-recommends \ - python3.8 \ - git \ - curl \ - wget - # install python requirements -RUN pip install flash-attn==2.4.2 --no-build-isolation - COPY requirements.txt $BUILD_TMP/. RUN pip install -i $PIP_SOURCE -r $BUILD_TMP/requirements.txt diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index f007542..75fd6b7 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -11,7 +11,7 @@ services: environment: # change "main" to your model name - MODEL_PATH=/workspace/models/main - - MAX_MODEL_LEN=4096 # max model input length + - MAX_MODEL_LEN=8192 # max model input length - HOST=127.0.0.1 - PORT=${SERVER_PORT_1} # change to your port - API_KEY=token-123456 # change to your api key for security @@ -33,7 +33,7 @@ services: environment: # change "main" to your model name - MODEL_PATH=/workspace/models/main - - MAX_MODEL_LEN=4096 # max model input length + - MAX_MODEL_LEN=8192 # max model input length - HOST=127.0.0.1 - PORT=${SERVER_PORT_2} # change to your port - API_KEY=token-123456 # change to your api key for security diff --git a/docker/requirements.txt b/docker/requirements.txt index c2f412d..75ff71b 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -1,3 +1 @@ -accelerate==0.28.0 -vllm==0.4.0.post1 -openai==1.16.1 \ No newline at end of file +vllm==0.4.3 \ No newline at end of file diff --git a/docker/start.sh b/docker/start.sh index b0f19eb..366d095 100644 --- a/docker/start.sh +++ b/docker/start.sh @@ -1 +1 @@ -python -m vllm.entrypoints.openai.api_server --dtype bfloat16 --model ${MODEL_PATH} --max-model-len ${MAX_MODEL_LEN} --host ${HOST} --port ${PORT} --api-key ${API_KEY} \ No newline at end of file +python -m vllm.entrypoints.openai.api_server --dtype auto --model ${MODEL_PATH} --max-model-len ${MAX_MODEL_LEN} --host ${HOST} --port ${PORT} --api-key ${API_KEY} \ No newline at end of file