{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 15.43,
    "wer_norm": 15.27,
    "wer_numcanon": 15.27,
    "space_norm_wer": 12.6,
    "mer": 6.35,
    "cer_norm": 6.01,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.16,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 2.66,
      "norm_to_mer": 8.92
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 15.94,
    "wer_norm": 15.44,
    "wer_numcanon": 15.44,
    "space_norm_wer": 9.63,
    "mer": 2.73,
    "cer_norm": 2.93,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.49,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 5.81,
      "norm_to_mer": 12.71
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 28.03,
    "wer_norm": 12.17,
    "wer_numcanon": 12.17,
    "space_norm_wer": 7.6,
    "mer": 8.19,
    "cer_norm": 8.04,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 15.86,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.56,
      "norm_to_mer": 3.98
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 15.54,
    "wer_norm": 15.54,
    "wer_numcanon": 15.54,
    "space_norm_wer": 10.79,
    "mer": 3.63,
    "cer_norm": 3.67,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.74,
      "norm_to_mer": 11.91
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 13.08,
    "wer_norm": 13.08,
    "wer_numcanon": 13.08,
    "space_norm_wer": 9.07,
    "mer": 3.8,
    "cer_norm": 3.7,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.02,
      "norm_to_mer": 9.28
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 24.33,
    "wer_norm": 24.3,
    "wer_numcanon": 24.3,
    "space_norm_wer": 15.69,
    "mer": 4.15,
    "cer_norm": 4.58,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.04,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.61,
      "norm_to_mer": 20.14
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 39.04,
    "wer_norm": 39.04,
    "wer_numcanon": 39.04,
    "space_norm_wer": 29.5,
    "mer": 6.84,
    "cer_norm": 6.94,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.54,
      "norm_to_mer": 32.2
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 20.96,
    "wer_norm": 20.96,
    "wer_numcanon": 20.96,
    "space_norm_wer": 14.3,
    "mer": 5.14,
    "cer_norm": 5.36,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.67,
      "norm_to_mer": 15.82
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 22.09,
    "wer_norm": 22.03,
    "wer_numcanon": 22.03,
    "space_norm_wer": 12.73,
    "mer": 3.76,
    "cer_norm": 4.23,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.06,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.3,
      "norm_to_mer": 18.27
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 16.01,
    "wer_norm": 15.54,
    "wer_numcanon": 15.54,
    "space_norm_wer": 12.06,
    "mer": 4.39,
    "cer_norm": 4.14,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.47,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 3.48,
      "norm_to_mer": 11.15
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 27.82,
    "wer_norm": 27.82,
    "wer_numcanon": 27.82,
    "space_norm_wer": 18.7,
    "mer": 5.03,
    "cer_norm": 5.26,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.11,
      "norm_to_mer": 22.79
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 22.83,
    "wer_norm": 22.83,
    "wer_numcanon": 22.83,
    "space_norm_wer": 13.69,
    "mer": 3.79,
    "cer_norm": 4.19,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.14,
      "norm_to_mer": 19.04
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 21.46,
    "wer_norm": 20.12,
    "wer_numcanon": 20.12,
    "space_norm_wer": 13.79,
    "mer": 4.89,
    "cer_norm": 5.01
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 21.76,
    "wer_norm": 20.33,
    "wer_numcanon": 20.33,
    "space_norm_wer": 13.86,
    "mer": 4.82,
    "cer_norm": 4.92
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/qwen3-asr-mixed-v2-ckpt-500000",
    "checkpoint_name": "ckpt-500000",
    "model_id": "qwen3-asr-mixed-v2",
    "model_type": "qwen3-asr-1.7B-mixed-v2",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 128,
    "inference_time_sec": 723.92,
    "total_audio_sec": 40354.46,
    "rtf": 0.0179,
    "timestamp": "2026-04-09T17:18:13Z",
    "gpu": "NVIDIA A100-SXM4-80GB",
    "framework": "transformers",
    "normalization_version": "v1"
  }
}