{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 22.23,
    "wer_norm": 22.05,
    "wer_numcanon": 22.05,
    "space_norm_wer": 17.97,
    "mer": 9.38,
    "cer_norm": 8.87,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.18,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.08,
      "norm_to_mer": 12.67
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 19.56,
    "wer_norm": 19.03,
    "wer_numcanon": 19.03,
    "space_norm_wer": 12.83,
    "mer": 4.3,
    "cer_norm": 4.39,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.53,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.2,
      "norm_to_mer": 14.73
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 25.46,
    "wer_norm": 13.53,
    "wer_numcanon": 13.53,
    "space_norm_wer": 8.68,
    "mer": 8.89,
    "cer_norm": 8.73,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 11.94,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.84,
      "norm_to_mer": 4.64
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 19.17,
    "wer_norm": 19.15,
    "wer_numcanon": 19.15,
    "space_norm_wer": 13.96,
    "mer": 4.95,
    "cer_norm": 4.94,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.02,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 5.19,
      "norm_to_mer": 14.2
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 14.73,
    "wer_norm": 14.71,
    "wer_numcanon": 14.71,
    "space_norm_wer": 10.64,
    "mer": 4.87,
    "cer_norm": 4.66,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.02,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.07,
      "norm_to_mer": 9.85
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 29.8,
    "wer_norm": 29.6,
    "wer_numcanon": 29.6,
    "space_norm_wer": 20.46,
    "mer": 6.04,
    "cer_norm": 6.41,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.2,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.13,
      "norm_to_mer": 23.55
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 42.31,
    "wer_norm": 42.02,
    "wer_numcanon": 42.02,
    "space_norm_wer": 31.32,
    "mer": 7.88,
    "cer_norm": 7.99,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.29,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.7,
      "norm_to_mer": 34.14
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 25.29,
    "wer_norm": 25.27,
    "wer_numcanon": 25.27,
    "space_norm_wer": 17.94,
    "mer": 6.62,
    "cer_norm": 6.8,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.02,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 7.33,
      "norm_to_mer": 18.65
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 26.26,
    "wer_norm": 26.26,
    "wer_numcanon": 26.26,
    "space_norm_wer": 16.05,
    "mer": 5.09,
    "cer_norm": 5.51,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.2,
      "norm_to_mer": 21.17
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 19.07,
    "wer_norm": 18.73,
    "wer_numcanon": 18.73,
    "space_norm_wer": 14.67,
    "mer": 5.6,
    "cer_norm": 5.29,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.35,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.05,
      "norm_to_mer": 13.12
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 33.96,
    "wer_norm": 33.62,
    "wer_numcanon": 33.62,
    "space_norm_wer": 23.86,
    "mer": 7.02,
    "cer_norm": 7.14,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.33,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.76,
      "norm_to_mer": 26.61
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 27.2,
    "wer_norm": 26.75,
    "wer_numcanon": 26.75,
    "space_norm_wer": 17.1,
    "mer": 4.97,
    "cer_norm": 5.36,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.44,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.66,
      "norm_to_mer": 21.79
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 25.21,
    "wer_norm": 24.08,
    "wer_numcanon": 24.08,
    "space_norm_wer": 17.1,
    "mer": 6.42,
    "cer_norm": 6.47
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 25.42,
    "wer_norm": 24.23,
    "wer_numcanon": 24.23,
    "space_norm_wer": 17.12,
    "mer": 6.3,
    "cer_norm": 6.34
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/qwen3-asr-mixed-v2-ckpt-50000",
    "checkpoint_name": "ckpt-50000",
    "model_id": "qwen3-asr-mixed-v2",
    "model_type": "qwen3-asr-1.7B-mixed-v2",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 64,
    "inference_time_sec": 871.4,
    "total_audio_sec": 40354.46,
    "rtf": 0.0216,
    "timestamp": "2026-04-03T21:35:03Z",
    "gpu": "NVIDIA A100-SXM4-80GB",
    "framework": "transformers",
    "normalization_version": "v1"
  }
}