#!/usr/bin/env python3
import os, sys

os.environ["VLLM_ATTENTION_BACKEND"] = "TORCH_SDPA"

stage_cfg = "/home/ubuntu/qwen3_tts_optimized.yaml"
print(f"Using stage config: {stage_cfg}", flush=True)

cmd = [
    "/home/ubuntu/vllm_env/bin/vllm-omni", "serve",
    "Qwen/Qwen3-TTS-12Hz-1.7B-Base",
    "--omni",
    "--stage-configs-path", stage_cfg,
    "--host", "0.0.0.0",
    "--port", "8091",
    "--trust-remote-code",
]

print(f"Running: {' '.join(cmd)}", flush=True)
os.execvpe(cmd[0], cmd, os.environ)
