change env to fit qwen2
This commit is contained in:
parent
fa5c6f01f9
commit
7615832695
|
@ -1,21 +1,11 @@
|
|||
FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-devel
|
||||
FROM docker-registry.arslantu.xyz/pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel
|
||||
|
||||
ARG PIP_SOURCE=https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
ARG BUILD_TMP=/build_tmp
|
||||
|
||||
RUN mkdir $BUILD_TMP
|
||||
|
||||
# install basic dependencies
|
||||
COPY sources.list /etc/apt/sources.list
|
||||
RUN apt-get clean all && apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3.8 \
|
||||
git \
|
||||
curl \
|
||||
wget
|
||||
|
||||
# install python requirements
|
||||
RUN pip install flash-attn==2.4.2 --no-build-isolation
|
||||
|
||||
COPY requirements.txt $BUILD_TMP/.
|
||||
RUN pip install -i $PIP_SOURCE -r $BUILD_TMP/requirements.txt
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ services:
|
|||
environment:
|
||||
# change "main" to your model name
|
||||
- MODEL_PATH=/workspace/models/main
|
||||
- MAX_MODEL_LEN=4096 # max model input length
|
||||
- MAX_MODEL_LEN=8192 # max model input length
|
||||
- HOST=127.0.0.1
|
||||
- PORT=${SERVER_PORT_1} # change to your port
|
||||
- API_KEY=token-123456 # change to your api key for security
|
||||
|
@ -33,7 +33,7 @@ services:
|
|||
environment:
|
||||
# change "main" to your model name
|
||||
- MODEL_PATH=/workspace/models/main
|
||||
- MAX_MODEL_LEN=4096 # max model input length
|
||||
- MAX_MODEL_LEN=8192 # max model input length
|
||||
- HOST=127.0.0.1
|
||||
- PORT=${SERVER_PORT_2} # change to your port
|
||||
- API_KEY=token-123456 # change to your api key for security
|
||||
|
|
|
@ -1,3 +1 @@
|
|||
accelerate==0.28.0
|
||||
vllm==0.4.0.post1
|
||||
openai==1.16.1
|
||||
vllm==0.4.3
|
|
@ -1 +1 @@
|
|||
python -m vllm.entrypoints.openai.api_server --dtype bfloat16 --model ${MODEL_PATH} --max-model-len ${MAX_MODEL_LEN} --host ${HOST} --port ${PORT} --api-key ${API_KEY}
|
||||
python -m vllm.entrypoints.openai.api_server --dtype auto --model ${MODEL_PATH} --max-model-len ${MAX_MODEL_LEN} --host ${HOST} --port ${PORT} --api-key ${API_KEY}
|
Loading…
Reference in New Issue
Block a user