"""Central configuration constants for Maya ASR."""

import hashlib
from pathlib import Path

# Paths
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
SFT_DATA_ROOT = Path("/root/sft_data")
FINAL_EXPORT_ROOT = SFT_DATA_ROOT / "final-export" / "production" / "shards"
EXPERIMENTS_DIR = PROJECT_ROOT / "experiments"

# Languages
LANGUAGES = ["as", "bn", "en", "gu", "hi", "kn", "ml", "mr", "or", "pa", "ta", "te"]

# Shard structure
SHARD_FILES = ["audio.tar", "audio_index.parquet", "manifest.json", "metadata.parquet"]

# Model
ENCODER_DIM = 1024
ENCODER_LAYERS = 32
GEMMA_HIDDEN_DIM = 1536


def file_sha256(path: Path) -> str:
    """Compute SHA-256 hex digest of a file."""
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(1 << 20), b""):
            h.update(chunk)
    return h.hexdigest()
