change env to fit qwen2

This commit is contained in:
arslantu 2024-06-11 21:21:09 +08:00
parent fa5c6f01f9
commit 7615832695
4 changed files with 5 additions and 17 deletions

View File

@ -1,21 +1,11 @@
FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-devel
FROM docker-registry.arslantu.xyz/pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel
ARG PIP_SOURCE=https://pypi.tuna.tsinghua.edu.cn/simple
ARG BUILD_TMP=/build_tmp
RUN mkdir $BUILD_TMP
# install basic dependencies
COPY sources.list /etc/apt/sources.list
RUN apt-get clean all && apt-get update && apt-get install -y --no-install-recommends \
python3.8 \
git \
curl \
wget
# install python requirements
RUN pip install flash-attn==2.4.2 --no-build-isolation
COPY requirements.txt $BUILD_TMP/.
RUN pip install -i $PIP_SOURCE -r $BUILD_TMP/requirements.txt

View File

@ -11,7 +11,7 @@ services:
environment:
# change "main" to your model name
- MODEL_PATH=/workspace/models/main
- MAX_MODEL_LEN=4096 # max model input length
- MAX_MODEL_LEN=8192 # max model input length
- HOST=127.0.0.1
- PORT=${SERVER_PORT_1} # change to your port
- API_KEY=token-123456 # change to your api key for security
@ -33,7 +33,7 @@ services:
environment:
# change "main" to your model name
- MODEL_PATH=/workspace/models/main
- MAX_MODEL_LEN=4096 # max model input length
- MAX_MODEL_LEN=8192 # max model input length
- HOST=127.0.0.1
- PORT=${SERVER_PORT_2} # change to your port
- API_KEY=token-123456 # change to your api key for security

View File

@ -1,3 +1 @@
accelerate==0.28.0
vllm==0.4.0.post1
openai==1.16.1
vllm==0.4.3

View File

@ -1 +1 @@
python -m vllm.entrypoints.openai.api_server --dtype bfloat16 --model ${MODEL_PATH} --max-model-len ${MAX_MODEL_LEN} --host ${HOST} --port ${PORT} --api-key ${API_KEY}
python -m vllm.entrypoints.openai.api_server --dtype auto --model ${MODEL_PATH} --max-model-len ${MAX_MODEL_LEN} --host ${HOST} --port ${PORT} --api-key ${API_KEY}