qwen_deploy/docker/docker-compose.yml
2024-04-06 10:10:57 +08:00

24 lines
897 B
YAML

version: "3"
services:
api_server:
build: .
restart: unless-stopped
network_mode: host
volumes:
- ./start.sh:/workspace/start.sh
# change here to mount all your models
- models_path:/workspace/models # "models" contain multiple models
environment:
# change "main" to your model name
- MODEL_PATH=/workspace/models/main
- MAX_MODEL_LEN=4096 # max model input length
- HOST=127.0.0.1
- PORT=9001 # change to your port
- API_KEY=token-123456 # change to your api key for security
deploy:
resources:
reservations:
devices:
- driver: "nvidia"
device_ids: ['0'] # gpu id, change to your gpu id
capabilities: ["gpu"]