#!/usr/bin/env bash
# ──────────────────────────────────────────────────────────────────
# Vast.ai Fleet Deployment — Validation Pipeline (slim image)
#
# Image is lightweight (code+deps only, ~6GB). Models download on
# first startup using HF_TOKEN passed as env var.
#
# Usage:
#   ./deploy_validation.sh                    # deploy 50 workers
#   ./deploy_validation.sh 10                 # deploy 10 workers
#   ./deploy_validation.sh 5 --dry-run        # preview
# ──────────────────────────────────────────────────────────────────
set -euo pipefail

NUM_WORKERS=${1:-50}
DRY_RUN=${2:-""}
IMAGE="bharathkumar192/validation-worker:latest"
DISK_GB=50
LABEL_PREFIX="val"

if [ -f ".env" ]; then
  set -a; source ".env"; set +a
fi

: "${DATABASE_URL:?Set DATABASE_URL}"
: "${R2_ENDPOINT_URL:?Set R2_ENDPOINT_URL}"
: "${R2_ACCESS_KEY_ID:?Set R2_ACCESS_KEY_ID}"
: "${R2_SECRET_ACCESS_KEY:?Set R2_SECRET_ACCESS_KEY}"
: "${HF_TOKEN:?Set HF_TOKEN}"
: "${VASTAI_KEY:?Set VASTAI_KEY}"
: "${DOCKER_USERNAME:?Set DOCKER_USERNAME}"
: "${DOCKER_PAT:?Set DOCKER_PAT}"

echo "=== Validation Fleet Deployment ==="
echo "Workers: $NUM_WORKERS | Image: $IMAGE"
echo ""

echo "Searching for GPU offers..."
OFFERS=$(vastai search offers \
    'gpu_name in [RTX_3090, RTX_4090, RTX_A6000] disk_space >= 50 inet_down >= 200 reliability > 0.95 num_gpus = 1' \
    --order 'dph_total' \
    --limit "$((NUM_WORKERS + 20))" \
    --raw 2>/dev/null || true)

if [ -z "$OFFERS" ]; then
    echo "ERROR: No suitable offers found."
    exit 1
fi

echo "$OFFERS" | python3 -c "
import json, sys
for o in json.load(sys.stdin)[:20]:
    print(f\"{o['id']} {o.get('gpu_name','?')} \${o.get('dph_total',0):.3f}/hr\")
" 2>/dev/null
echo ""

# Single-line env — all secrets passed at runtime, nothing baked.
ENV_COMMON="-e DATABASE_URL=${DATABASE_URL} -e R2_ENDPOINT_URL=${R2_ENDPOINT_URL} -e R2_ACCESS_KEY_ID=${R2_ACCESS_KEY_ID} -e R2_SECRET_ACCESS_KEY=${R2_SECRET_ACCESS_KEY} -e HF_TOKEN=${HF_TOKEN} -e CONFORMER_BATCH_SIZE=32 -e MMS_BATCH_SIZE=64 -e VOX_BATCH_SIZE=64 -e WAV2VEC_BATCH_SIZE=32 -e PARQUET_SHARD_SIZE=50 -e R2_VALIDATION_SOURCE=transcribed -e R2_VALIDATION_OUTPUT=validation-results"

# Onstart: launch worker directly. Models download inside pipeline.load_models().
# tee to file + stdout so vastai logs can see progress.
ONSTART='cd /app && python -m validations.main 2>&1 | tee /var/log/worker.log &'

CREATED=0
OFFER_LIST=($(echo "$OFFERS" | python3 -c "
import json, sys
for o in json.load(sys.stdin):
    print(o['id'])
" 2>/dev/null))

for i in $(seq 1 "$NUM_WORKERS"); do
    OFFER_IDX=$(( (i - 1) % ${#OFFER_LIST[@]} ))
    OFFER_ID="${OFFER_LIST[$OFFER_IDX]}"
    WORKER_ID="${LABEL_PREFIX}-$(printf '%03d' "$i")"
    GPU_TYPE=$(echo "$OFFERS" | python3 -c "
import json, sys
for o in json.load(sys.stdin):
    if str(o['id']) == '$OFFER_ID':
        print(o.get('gpu_name','unknown').replace(' ','_')); break
" 2>/dev/null || echo "unknown")

    echo "[$i/$NUM_WORKERS] $WORKER_ID → offer $OFFER_ID ($GPU_TYPE)"

    if [ "$DRY_RUN" = "--dry-run" ]; then
        echo "  [DRY RUN]"
        continue
    fi

    vastai create instance "$OFFER_ID" \
        --image "$IMAGE" \
        --disk "$DISK_GB" \
        --ssh --direct \
        --login "-u $DOCKER_USERNAME -p $DOCKER_PAT docker.io" \
        --env "$ENV_COMMON -e WORKER_ID=$WORKER_ID -e GPU_TYPE=$GPU_TYPE" \
        --onstart-cmd "$ONSTART" \
        --label "$WORKER_ID" \
        2>/dev/null && CREATED=$((CREATED + 1)) || echo "  WARN: Failed to create $WORKER_ID"

    sleep 0.5
done

echo ""
echo "=== Deployed $CREATED / $NUM_WORKERS ==="
echo "Image: $IMAGE (slim — models download on startup)"
echo ""
echo "Monitor:  vastai show instances"
echo "Logs:     vastai logs <instance_id>"
echo "Destroy:  vastai destroy instance <id>"
