#!/usr/bin/env bash
# Launch a single 4090 on Vast.ai and deploy the neucodec worker.
# Usage: bash launch_vast.sh [offer_id]
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
OFFER_ID="${1:-}"

if [ -z "$OFFER_ID" ]; then
    echo "Searching for cheapest reliable RTX 4090..."
    OFFER_ID=$(vastai search offers \
        'gpu_name=RTX_4090 num_gpus=1 reliability>0.95 inet_down>200 disk_space>=50 cuda_vers>=12.0' \
        -o 'dph_total' --raw 2>/dev/null | python3 -c "import json,sys; d=json.load(sys.stdin); print(d[0]['id'])")
    echo "Best offer: $OFFER_ID"
fi

echo "=== Creating instance from offer $OFFER_ID ==="
RESULT=$(vastai create instance "$OFFER_ID" \
    --image pytorch/pytorch:2.6.0-cuda12.6-cudnn9-runtime \
    --disk 50 \
    --ssh --direct \
    --onstart-cmd "apt-get update -qq && apt-get install -y -qq libsndfile1 > /dev/null 2>&1" \
    --label "neucodec-worker" \
    --raw 2>&1)

echo "$RESULT"
INSTANCE_ID=$(echo "$RESULT" | python3 -c "import json,sys; print(json.load(sys.stdin)['new_contract'])" 2>/dev/null || echo "")

if [ -z "$INSTANCE_ID" ]; then
    echo "ERROR: Failed to create instance"
    echo "$RESULT"
    exit 1
fi

echo "Instance ID: $INSTANCE_ID"
echo "Waiting for instance to start..."

# Poll until running
for i in $(seq 1 60); do
    STATUS=$(vastai show instance "$INSTANCE_ID" --raw 2>/dev/null | python3 -c "import json,sys; print(json.load(sys.stdin).get('actual_status',''))" 2>/dev/null || echo "")
    if [ "$STATUS" = "running" ]; then
        echo "Instance is running!"
        break
    fi
    echo "  Status: ${STATUS:-pending}... ($i/60)"
    sleep 10
done

# Get SSH info
SSH_INFO=$(vastai ssh-url "$INSTANCE_ID" 2>/dev/null)
echo "SSH: $SSH_INFO"

# Parse SSH host and port
SSH_HOST=$(echo "$SSH_INFO" | sed 's/ssh:\/\///' | cut -d: -f1)
SSH_PORT=$(echo "$SSH_INFO" | sed 's/ssh:\/\///' | cut -d: -f2 | cut -dp -f2)

# Wait for SSH to be ready
echo "Waiting for SSH..."
for i in $(seq 1 30); do
    if ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 -p "$SSH_PORT" root@"$SSH_HOST" "echo ok" 2>/dev/null; then
        echo "SSH connected!"
        break
    fi
    sleep 5
done

# Copy files
echo "=== Deploying worker files ==="
SCP_OPTS="-o StrictHostKeyChecking=no -P $SSH_PORT"
scp $SCP_OPTS "$SCRIPT_DIR/worker.py" root@"$SSH_HOST":/workspace/
scp $SCP_OPTS "$SCRIPT_DIR/.env" root@"$SSH_HOST":/workspace/
scp $SCP_OPTS "$SCRIPT_DIR/requirements.txt" root@"$SSH_HOST":/workspace/
scp $SCP_OPTS "$SCRIPT_DIR/status.py" root@"$SSH_HOST":/workspace/

# Install deps and start worker
echo "=== Installing dependencies and starting worker ==="
ssh -o StrictHostKeyChecking=no -p "$SSH_PORT" root@"$SSH_HOST" bash << 'REMOTE_EOF'
set -euo pipefail
cd /workspace
apt-get update -qq && apt-get install -y -qq libsndfile1 > /dev/null 2>&1
pip install --quiet -r requirements.txt 2>&1 | tail -3

# Pre-download model
python3 -c "from neucodec import NeuCodec; NeuCodec.from_pretrained('neuphonic/neucodec')" 2>/dev/null

# Start worker in background with nohup
set -a && source .env && set +a
nohup python3 worker.py --worker-id "vast-$(hostname)-$$" > /workspace/worker.log 2>&1 &
WORKER_PID=$!
echo "Worker started: PID=$WORKER_PID"
echo "$WORKER_PID" > /workspace/worker.pid

# Verify it's running
sleep 5
if kill -0 $WORKER_PID 2>/dev/null; then
    echo "Worker is running. Tail of log:"
    tail -10 /workspace/worker.log
else
    echo "ERROR: Worker died immediately"
    cat /workspace/worker.log
    exit 1
fi
REMOTE_EOF

echo ""
echo "=== DEPLOYMENT COMPLETE ==="
echo "Instance ID: $INSTANCE_ID"
echo "SSH: ssh -p $SSH_PORT root@$SSH_HOST"
echo "Logs: ssh -p $SSH_PORT root@$SSH_HOST 'tail -f /workspace/worker.log'"
echo "Status: set -a && source .env && set +a && python3 status.py"
echo ""
echo "To destroy: vastai destroy instance $INSTANCE_ID"
