qwen_deploy/docker/docker-compose.yml

version: "3"
services:
    api_server-1:
        build: .
        restart: unless-stopped
        network_mode: host
        volumes:
            - ./start.sh:/workspace/start.sh
            # change here to mount all your models
            - ./models:/workspace/models # "models" contain multiple models
        environment:
            # change "main" to your model name
            - MODEL_PATH=/workspace/models/main
            - MAX_MODEL_LEN=4096  # max model input length
            - HOST=127.0.0.1
            - PORT=${SERVER_PORT_1}  # change to your port
            - API_KEY=token-123456  # change to your api key for security
        deploy:
            resources:
                reservations:
                    devices:
                        - driver: "nvidia"
                          device_ids: ['0']  # gpu id, change to your gpu id
                          capabilities: ["gpu"]
    api_server-2:
        build: .
        restart: unless-stopped
        network_mode: host
        volumes:
            - ./start.sh:/workspace/start.sh
            # change here to mount all your models
            - ./models:/workspace/models # "models" contain multiple models
        environment:
            # change "main" to your model name
            - MODEL_PATH=/workspace/models/main
            - MAX_MODEL_LEN=4096  # max model input length
            - HOST=127.0.0.1
            - PORT=${SERVER_PORT_2}  # change to your port
            - API_KEY=token-123456  # change to your api key for security
            - ROOT_PATH=/  # change to your root path
        deploy:
            resources:
                reservations:
                    devices:
                        - driver: "nvidia"
                          device_ids: ['1']  # gpu id, change to your gpu id
                          capabilities: ["gpu"]