version: "3" services: api_server-1: build: . restart: unless-stopped network_mode: host volumes: - ./start.sh:/workspace/start.sh # change here to mount all your models - ./models:/workspace/models # "models" contain multiple models environment: # change "main" to your model name - MODEL_PATH=/workspace/models/main - MAX_MODEL_LEN=4096 # max model input length - HOST=127.0.0.1 - PORT=9001 # change to your port - API_KEY=token-123456 # change to your api key for security deploy: resources: reservations: devices: - driver: "nvidia" device_ids: ['0'] # gpu id, change to your gpu id capabilities: ["gpu"]