change env to fit qwen2
This commit is contained in:
parent
fa5c6f01f9
commit
7615832695
|
@ -1,21 +1,11 @@
|
||||||
FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-devel
|
FROM docker-registry.arslantu.xyz/pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel
|
||||||
|
|
||||||
ARG PIP_SOURCE=https://pypi.tuna.tsinghua.edu.cn/simple
|
ARG PIP_SOURCE=https://pypi.tuna.tsinghua.edu.cn/simple
|
||||||
ARG BUILD_TMP=/build_tmp
|
ARG BUILD_TMP=/build_tmp
|
||||||
|
|
||||||
RUN mkdir $BUILD_TMP
|
RUN mkdir $BUILD_TMP
|
||||||
|
|
||||||
# install basic dependencies
|
|
||||||
COPY sources.list /etc/apt/sources.list
|
|
||||||
RUN apt-get clean all && apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
python3.8 \
|
|
||||||
git \
|
|
||||||
curl \
|
|
||||||
wget
|
|
||||||
|
|
||||||
# install python requirements
|
# install python requirements
|
||||||
RUN pip install flash-attn==2.4.2 --no-build-isolation
|
|
||||||
|
|
||||||
COPY requirements.txt $BUILD_TMP/.
|
COPY requirements.txt $BUILD_TMP/.
|
||||||
RUN pip install -i $PIP_SOURCE -r $BUILD_TMP/requirements.txt
|
RUN pip install -i $PIP_SOURCE -r $BUILD_TMP/requirements.txt
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ services:
|
||||||
environment:
|
environment:
|
||||||
# change "main" to your model name
|
# change "main" to your model name
|
||||||
- MODEL_PATH=/workspace/models/main
|
- MODEL_PATH=/workspace/models/main
|
||||||
- MAX_MODEL_LEN=4096 # max model input length
|
- MAX_MODEL_LEN=8192 # max model input length
|
||||||
- HOST=127.0.0.1
|
- HOST=127.0.0.1
|
||||||
- PORT=${SERVER_PORT_1} # change to your port
|
- PORT=${SERVER_PORT_1} # change to your port
|
||||||
- API_KEY=token-123456 # change to your api key for security
|
- API_KEY=token-123456 # change to your api key for security
|
||||||
|
@ -33,7 +33,7 @@ services:
|
||||||
environment:
|
environment:
|
||||||
# change "main" to your model name
|
# change "main" to your model name
|
||||||
- MODEL_PATH=/workspace/models/main
|
- MODEL_PATH=/workspace/models/main
|
||||||
- MAX_MODEL_LEN=4096 # max model input length
|
- MAX_MODEL_LEN=8192 # max model input length
|
||||||
- HOST=127.0.0.1
|
- HOST=127.0.0.1
|
||||||
- PORT=${SERVER_PORT_2} # change to your port
|
- PORT=${SERVER_PORT_2} # change to your port
|
||||||
- API_KEY=token-123456 # change to your api key for security
|
- API_KEY=token-123456 # change to your api key for security
|
||||||
|
|
|
@ -1,3 +1 @@
|
||||||
accelerate==0.28.0
|
vllm==0.4.3
|
||||||
vllm==0.4.0.post1
|
|
||||||
openai==1.16.1
|
|
|
@ -1 +1 @@
|
||||||
python -m vllm.entrypoints.openai.api_server --dtype bfloat16 --model ${MODEL_PATH} --max-model-len ${MAX_MODEL_LEN} --host ${HOST} --port ${PORT} --api-key ${API_KEY}
|
python -m vllm.entrypoints.openai.api_server --dtype auto --model ${MODEL_PATH} --max-model-len ${MAX_MODEL_LEN} --host ${HOST} --port ${PORT} --api-key ${API_KEY}
|
Loading…
Reference in New Issue
Block a user