#!/usr/bin/env bash
set -euo pipefail

ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
ENV_FILE="${ROOT_DIR}/.env"

if [ -f "${ENV_FILE}" ]; then
    set -a
    # shellcheck disable=SC1090
    . "${ENV_FILE}"
    set +a
fi

IMAGE="${DOCKER_USERNAME:-bharathkumar192}/validation-worker:recover-20260307"
QUEUE_TABLE="${VALIDATION_RECOVER_QUEUE_TABLE:-validation_recover_queue_20260307}"
OUTPUT_BUCKET="${R2_VALIDATION_OUTPUT:-validationsrecoverfinal}"
MODEL_BUCKET="${R2_VALIDATION_MODEL_BUCKET:-validation-results}"
REFERENCE_BUCKET="${R2_VALIDATION_REFERENCE_BUCKET:-${MODEL_BUCKET}}"
REFERENCE_MODE="${RECOVER_REFERENCE_MODE:-database}"
TX_PARQUET_KEY="${RECOVER_TX_PARQUET_KEY:-reference-data/transcription_results_recover.parquet}"
FLAGS_PARQUET_KEY="${RECOVER_FLAGS_PARQUET_KEY:-reference-data/transcription_flags_recover.parquet}"
VALIDATED_PARQUET_KEY="${RECOVER_VALIDATED_PARQUET_KEY:-reference-data/validated_segment_ids.parquet}"
MANIFEST_KEY="${RECOVER_REFERENCE_MANIFEST_KEY:-reference-data/recover_reference_manifest.json}"
MODELS="${MODELS:-all}"
DISK="${DISK:-80}"
GPU_TYPE="${GPU_TYPE:-unknown}"
WORKER_PREFIX="${WORKER_PREFIX:-recover}"
MAX_VIDEOS="${MAX_VIDEOS:-0}"
R2_SKIP_UPLOAD="${R2_SKIP_UPLOAD:-false}"
DRY_RUN="false"

usage() {
    cat <<'EOF'
Usage:
  ./scripts/deploy_recover_validation.sh [options] <offer_id1> [offer_id2] ...

Options:
  --image <image>              Docker image to deploy
  --queue-table <table>        Recover queue table to consume
  --output-bucket <bucket>     Bucket for validation parquet shards
  --model-bucket <bucket>      Bucket that stores validation model tarballs
  --reference-bucket <bucket>  Bucket that stores recover reference parquet files
  --reference-mode <mode>      Recover reference source: database or parquet
  --tx-parquet-key <key>       R2 key for recover transcription parquet
  --flags-parquet-key <key>    R2 key for recover flags parquet
  --validated-parquet-key <key> R2 key for already-validated segment IDs parquet
   --models <models>            Model selection passed to validations.main
  --disk <gb>                  Disk size in GB
  --gpu-type <label>           Worker gpu_type label stored in DB
  --worker-prefix <prefix>     Prefix for WORKER_ID and Vast label
  --max-videos <n>             Optional per-worker max videos
  --dry-run                    Print commands without creating instances

Required env vars:
  VASTAI_KEY
  R2_ENDPOINT_URL
  R2_ACCESS_KEY_ID
  R2_SECRET_ACCESS_KEY
  R2_BUCKET
  DATABASE_URL
  DOCKER_USERNAME
  DOCKER_PAT

Example:
  ./scripts/deploy_recover_validation.sh \
    --queue-table validation_recover_queue_20260307 \
    --output-bucket validationsrecoverfinal \
    --model-bucket validation-results \
    --reference-mode parquet \
    12345678 23456789
EOF
}

require_env() {
    local name="$1"
    if [ -z "${!name:-}" ]; then
        echo "Missing required env var: ${name}" >&2
        exit 1
    fi
}

OFFERS=()
while [ $# -gt 0 ]; do
    case "$1" in
        --image)
            IMAGE="$2"
            shift 2
            ;;
        --queue-table)
            QUEUE_TABLE="$2"
            shift 2
            ;;
        --output-bucket)
            OUTPUT_BUCKET="$2"
            shift 2
            ;;
        --model-bucket)
            MODEL_BUCKET="$2"
            shift 2
            ;;
        --reference-bucket)
            REFERENCE_BUCKET="$2"
            shift 2
            ;;
        --reference-mode)
            REFERENCE_MODE="$2"
            shift 2
            ;;
        --tx-parquet-key)
            TX_PARQUET_KEY="$2"
            shift 2
            ;;
        --flags-parquet-key)
            FLAGS_PARQUET_KEY="$2"
            shift 2
            ;;
        --validated-parquet-key)
            VALIDATED_PARQUET_KEY="$2"
            shift 2
            ;;
        --models)
            MODELS="$2"
            shift 2
            ;;
        --disk)
            DISK="$2"
            shift 2
            ;;
        --gpu-type)
            GPU_TYPE="$2"
            shift 2
            ;;
        --worker-prefix)
            WORKER_PREFIX="$2"
            shift 2
            ;;
        --max-videos)
            MAX_VIDEOS="$2"
            shift 2
            ;;
        --dry-run)
            DRY_RUN="true"
            shift
            ;;
        -h|--help)
            usage
            exit 0
            ;;
        *)
            OFFERS+=("$1")
            shift
            ;;
    esac
done

if [ ${#OFFERS[@]} -eq 0 ]; then
    usage
    exit 1
fi

require_env VASTAI_KEY
require_env R2_ENDPOINT_URL
require_env R2_ACCESS_KEY_ID
require_env R2_SECRET_ACCESS_KEY
require_env R2_BUCKET
require_env DATABASE_URL
require_env DOCKER_USERNAME
require_env DOCKER_PAT

EXTRA_BATCH_ENVS=""
case "${GPU_TYPE}" in
    RTX_4090|RTX_4090D|RTX_3090|RTX_3090_Ti|RTX_A5000|RTX_A6000|RTX_4090_*|RTX_3090_*|RTX_A5000_*|RTX_A6000_*)
        EXTRA_BATCH_ENVS="-e CONFORMER_BATCH_SIZE=4 -e VOX_BATCH_SIZE=8"
        ;;
    RTX_4080|RTX_4070_Ti|RTX_3080|RTX_3080_Ti|RTX_2080_Ti|RTX_A4000|A10|A10G|A40|L4|RTX_4080_*|RTX_4070_Ti_*|RTX_3080_*|RTX_2080_Ti_*|RTX_A4000_*|A10_*|A40_*|L4_*)
        EXTRA_BATCH_ENVS="-e CONFORMER_BATCH_SIZE=2 -e VOX_BATCH_SIZE=4"
        ;;
esac

ENV_STR="-e R2_ENDPOINT_URL=${R2_ENDPOINT_URL} \
-e R2_ACCESS_KEY_ID=${R2_ACCESS_KEY_ID} \
-e R2_SECRET_ACCESS_KEY=${R2_SECRET_ACCESS_KEY} \
-e R2_BUCKET=${R2_BUCKET} \
-e DATABASE_URL=${DATABASE_URL} \
-e R2_VALIDATION_OUTPUT=${OUTPUT_BUCKET} \
-e R2_VALIDATION_MODEL_BUCKET=${MODEL_BUCKET} \
-e R2_VALIDATION_REFERENCE_BUCKET=${REFERENCE_BUCKET} \
-e RECOVER_REFERENCE_MODE=${REFERENCE_MODE} \
-e RECOVER_TX_PARQUET_KEY=${TX_PARQUET_KEY} \
-e RECOVER_FLAGS_PARQUET_KEY=${FLAGS_PARQUET_KEY} \
-e RECOVER_VALIDATED_PARQUET_KEY=${VALIDATED_PARQUET_KEY} \
-e RECOVER_REFERENCE_MANIFEST_KEY=${MANIFEST_KEY} \
-e PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
-e VALIDATION_RECOVER_QUEUE_TABLE=${QUEUE_TABLE} \
-e R2_SKIP_UPLOAD=${R2_SKIP_UPLOAD} \
-e MAX_VIDEOS=${MAX_VIDEOS} ${EXTRA_BATCH_ENVS}"

if [ "${MAX_VIDEOS}" -gt 0 ]; then
    ONSTART="mkdir -p /var/log/portal; printenv >> /etc/environment; cd /app; python -m validations.main --recover --models ${MODELS} 2>&1 | tee -a /var/log/portal/recover_validation.log"
else
    ONSTART="mkdir -p /var/log/portal; printenv >> /etc/environment; cd /app; while true; do python -m validations.main --recover --models ${MODELS} 2>&1 | tee -a /var/log/portal/recover_validation.log; echo 'Worker exited, restarting in 10s...'; sleep 10; done &"
fi

TOTAL=${#OFFERS[@]}
COUNT=0

for OFFER_ID in "${OFFERS[@]}"; do
    COUNT=$((COUNT + 1))
    WORKER_ID="${WORKER_PREFIX}-${OFFER_ID}"
    LABEL="${WORKER_ID}"
    FULL_ENV="${ENV_STR} -e WORKER_ID=${WORKER_ID} -e GPU_TYPE=${GPU_TYPE}"

    echo "[${COUNT}/${TOTAL}] Creating recover worker ${WORKER_ID} from offer ${OFFER_ID}"

    CMD=(
        vastai --api-key "${VASTAI_KEY}" create instance "${OFFER_ID}"
        --image "${IMAGE}"
        --disk "${DISK}"
        --ssh --direct
        --env "${FULL_ENV}"
        --login "-u ${DOCKER_USERNAME} -p ${DOCKER_PAT} docker.io"
        --onstart-cmd "${ONSTART}"
        --label "${LABEL}"
    )

    if [ "${DRY_RUN}" = "true" ]; then
        printf 'DRY RUN:'
        printf ' %q' "${CMD[@]}"
        printf '\n'
        continue
    fi

    "${CMD[@]}"
done

if [ "${DRY_RUN}" = "true" ]; then
    echo "Dry run complete."
else
    echo "Deployment complete. Monitor with:"
    echo "  vastai --api-key \"\$VASTAI_KEY\" show instances"
    echo "  vastai --api-key \"\$VASTAI_KEY\" logs <instance_id>"
fi
