{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 18.18,
    "wer_norm": 18.01,
    "wer_numcanon": 18.01,
    "space_norm_wer": 15.16,
    "mer": 7.87,
    "cer_norm": 7.44,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.16,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 2.86,
      "norm_to_mer": 10.15
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 16.32,
    "wer_norm": 15.81,
    "wer_numcanon": 15.81,
    "space_norm_wer": 9.96,
    "mer": 2.91,
    "cer_norm": 3.1,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.51,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 5.85,
      "norm_to_mer": 12.9
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 28.4,
    "wer_norm": 12.09,
    "wer_numcanon": 12.09,
    "space_norm_wer": 7.64,
    "mer": 8.23,
    "cer_norm": 8.07,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 16.31,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.45,
      "norm_to_mer": 3.86
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 15.72,
    "wer_norm": 15.72,
    "wer_numcanon": 15.72,
    "space_norm_wer": 10.96,
    "mer": 3.68,
    "cer_norm": 3.72,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.76,
      "norm_to_mer": 12.04
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 13.12,
    "wer_norm": 13.12,
    "wer_numcanon": 13.12,
    "space_norm_wer": 8.91,
    "mer": 3.71,
    "cer_norm": 3.66,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.21,
      "norm_to_mer": 9.41
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 24.75,
    "wer_norm": 24.7,
    "wer_numcanon": 24.7,
    "space_norm_wer": 15.89,
    "mer": 4.21,
    "cer_norm": 4.63,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.05,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.81,
      "norm_to_mer": 20.49
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 39.1,
    "wer_norm": 39.1,
    "wer_numcanon": 39.1,
    "space_norm_wer": 29.53,
    "mer": 6.92,
    "cer_norm": 7.0,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.56,
      "norm_to_mer": 32.18
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 21.16,
    "wer_norm": 21.16,
    "wer_numcanon": 21.16,
    "space_norm_wer": 14.53,
    "mer": 5.12,
    "cer_norm": 5.34,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.63,
      "norm_to_mer": 16.04
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 22.77,
    "wer_norm": 22.73,
    "wer_numcanon": 22.73,
    "space_norm_wer": 13.12,
    "mer": 4.07,
    "cer_norm": 4.54,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.04,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.61,
      "norm_to_mer": 18.67
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 15.9,
    "wer_norm": 15.42,
    "wer_numcanon": 15.42,
    "space_norm_wer": 12.02,
    "mer": 4.45,
    "cer_norm": 4.18,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.49,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 3.4,
      "norm_to_mer": 10.97
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 27.76,
    "wer_norm": 27.74,
    "wer_numcanon": 27.74,
    "space_norm_wer": 18.52,
    "mer": 5.0,
    "cer_norm": 5.23,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.02,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.22,
      "norm_to_mer": 22.75
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 23.2,
    "wer_norm": 23.2,
    "wer_numcanon": 23.2,
    "space_norm_wer": 14.04,
    "mer": 3.84,
    "cer_norm": 4.25,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.16,
      "norm_to_mer": 19.36
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 21.96,
    "wer_norm": 20.57,
    "wer_numcanon": 20.57,
    "space_norm_wer": 14.17,
    "mer": 5.08,
    "cer_norm": 5.2
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 22.2,
    "wer_norm": 20.73,
    "wer_numcanon": 20.73,
    "space_norm_wer": 14.19,
    "mer": 5.0,
    "cer_norm": 5.1
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/qwen3-asr-mixed-v2-ckpt-400000",
    "checkpoint_name": "ckpt-400000",
    "model_id": "qwen3-asr-mixed-v2",
    "model_type": "qwen3-asr-1.7B-mixed-v2",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 128,
    "inference_time_sec": 681.24,
    "total_audio_sec": 40354.46,
    "rtf": 0.0169,
    "timestamp": "2026-04-08T04:58:12Z",
    "gpu": "NVIDIA A100-SXM4-80GB",
    "framework": "transformers",
    "normalization_version": "v1"
  }
}