#!/bin/bash
set -e
source /home/ubuntu/vllm_env/bin/activate
export VLLM_ATTENTION_BACKEND=TORCH_SDPA

STAGE_CFG=$(python3 -c "import vllm_omni; import os; print(os.path.join(os.path.dirname(vllm_omni.__file__), 'model_executor', 'stage_configs', 'qwen3_tts.yaml'))" 2>/dev/null)
echo "Stage config: $STAGE_CFG"

if [ ! -f "$STAGE_CFG" ]; then
    echo "Stage config not found at $STAGE_CFG"
    STAGE_CFG="/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm_omni/model_executor/stage_configs/qwen3_tts.yaml"
    echo "Trying fallback: $STAGE_CFG"
fi

exec vllm-omni serve Qwen/Qwen3-TTS-12Hz-1.7B-Base \
    --stage-configs-path "$STAGE_CFG" \
    --host 0.0.0.0 --port 8091 \
    --trust-remote-code --enforce-eager