{
  "title": "Train Dashboard",
  "uid": "rayTrainDashboard",
  "version": 1,
  "schemaVersion": 27,
  "style": "dark",
  "editable": true,
  "graphTooltip": 1,
  "refresh": false,
  "panels": [],

  "time": {
    "from": "now-30m",
    "to": "now"
  },

  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },

  "templating": {
    "list": [
      {
        "name": "datasource",
        "type": "datasource",
        "description": "Filter queries of a specific Prometheus type.",
        "datasource": null,
        "query": "prometheus",
        "refresh": 1,
        "hide": 2,
        "includeAll": false,
        "multi": false,
        "current": {
          "selected": false
        }
      },

      {
        "name": "SessionName",
        "type": "query",
        "description": "Filter queries to specific ray sessions.",
        "datasource": "${datasource}",
        "definition": "label_values(ray_train_worker_group_start_total_time_s{{{global_filters}}}, SessionName)",
        "query": {
          "query": "label_values(ray_train_worker_group_start_total_time_s{{{global_filters}}}, SessionName)",
          "refId": "StandardVariableQuery"
        },
        "refresh": 1,
        "hide": 0,
        "includeAll": true,
        "multi": false,
        "allValue": ".*",
        "sort": 2,
        "current": {
          "selected": true,
          "text": ["All"],
          "value": ["$__all"]
        }
      },

      {
        "name": "TrainRunName",
        "type": "query",
        "description": "Filter queries to specific Ray Train run names.",
        "datasource": "${datasource}",
        "definition": "label_values(ray_train_worker_group_start_total_time_s{{{global_filters}}}, ray_train_run_name)",
        "query": {
          "query": "label_values(ray_train_worker_group_start_total_time_s{{{global_filters}}}, ray_train_run_name)",
          "refId": "StandardVariableQuery"
        },
        "refresh": 1,
        "hide": 0,
        "includeAll": true,
        "multi": false,
        "allValue": ".*",
        "sort": 2,
        "current": {
          "selected": true,
          "text": ["All"],
          "value": ["$__all"]
        }
      },

      {
        "name": "TrainRunId",
        "type": "query",
        "description": "Filter queries to specific Ray Train run ids.",
        "datasource": "${datasource}",
        "definition": "label_values(ray_train_worker_group_start_total_time_s{{{global_filters}}}, ray_train_run_id)",
        "query": {
          "query": "label_values(ray_train_worker_group_start_total_time_s{{{global_filters}}}, ray_train_run_id)",
          "refId": "StandardVariableQuery"
        },
        "refresh": 1,
        "hide": 2,
        "includeAll": true,
        "multi": false,
        "allValue": ".*",
        "sort": 2,
        "current": {
          "selected": true,
          "text": ["All"],
          "value": ["$__all"]
        }
      },

      {
        "name": "TrainWorkerWorldRank",
        "type": "query",
        "description": "Filter queries to specific Ray Train worker world ranks.",
        "datasource": "${datasource}",
        "definition": "label_values(ray_train_report_total_blocked_time_s{{{global_filters}}}, ray_train_worker_world_rank)",
        "query": {
          "query": "label_values(ray_train_report_total_blocked_time_s{{{global_filters}}}, ray_train_worker_world_rank)",
          "refId": "StandardVariableQuery"
        },
        "refresh": 1,
        "hide": 0,
        "includeAll": true,
        "multi": false,
        "allValue": ".*",
        "sort": 2,
        "current": {
          "selected": true,
          "text": ["All"],
          "value": ["$__all"]
        }
      },

      {
        "name": "TrainWorkerActorId",
        "type": "query",
        "description": "Filter queries to specific Ray Train worker actor ids.",
        "datasource": "${datasource}",
        "definition": "label_values(ray_train_report_total_blocked_time_s{{{global_filters}}}, ray_train_worker_actor_id)",
        "query": {
          "query": "label_values(ray_train_report_total_blocked_time_s{{{global_filters}}}, ray_train_worker_actor_id)",
          "refId": "StandardVariableQuery"
        },
        "refresh": 1,
        "hide": 2,
        "includeAll": true,
        "multi": false,
        "allValue": ".*",
        "sort": 2,
        "current": {
          "selected": true,
          "text": ["All"],
          "value": ["$__all"]
        }
      },

      {
        "name": "Instance",
        "type": "query",
        "description": "Filter queries to specific node instances.",
        "datasource": "${datasource}",
        "definition": "label_values(ray_node_network_receive_speed{{{global_filters}}}, instance)",
        "query": {
          "query": "label_values(ray_node_network_receive_speed{{{global_filters}}}, instance)",
          "refId": "StandardVariableQuery"
        },
        "refresh": 1,
        "hide": 2,
        "includeAll": true,
        "multi": false,
        "allValue": ".*",
        "sort": 2,
        "current": {
          "selected": true,
          "text": ["All"],
          "value": ["$__all"]
        }
      },

      {
        "name": "GpuIndex",
        "type": "query",
        "description": "Filter queries to specific GPU indices.",
        "datasource": "${datasource}",
        "definition": "label_values(ray_node_gpus_utilization{{{global_filters}}}, GpuIndex)",
        "query": {
          "query": "label_values(ray_node_gpus_utilization{{{global_filters}}}, GpuIndex)",
          "refId": "StandardVariableQuery"
        },
        "refresh": 1,
        "hide": 2,
        "includeAll": true,
        "multi": true,
        "allValue": ".*",
        "sort": 2,
        "current": {
          "selected": true,
          "text": ["All"],
          "value": ["$__all"]
        }
      },

      {
        "name": "GpuDeviceName",
        "type": "query",
        "description": "Filter queries to specific GPU device names.",
        "datasource": "${datasource}",
        "definition": "label_values(ray_node_gpus_utilization{{{global_filters}}}, GpuDeviceName)",
        "query": {
          "query": "label_values(ray_node_gpus_utilization{{{global_filters}}}, GpuDeviceName)",
          "refId": "StandardVariableQuery"
        },
        "refresh": 1,
        "hide": 2,
        "includeAll": true,
        "multi": true,
        "allValue": ".*",
        "sort": 2,
        "current": {
          "selected": true,
          "text": ["All"],
          "value": ["$__all"]
        }
      },

      {
        "current": {
          "text": [
            "All"
          ],
          "value": [
            "$__all"
          ]
        },
        "description": "Filter queries to specific Ray node types (head or worker).",
        "includeAll": true,
        "multi": true,
        "name": "RayNodeType",
        "options": [
          {
            "selected": false,
            "text": "All",
            "value": "$__all"
          },
          {
            "selected": false,
            "text": "Head Node",
            "value": "head"
          },
          {
            "selected": false,
            "text": "Worker Node",
            "value": "worker"
          }
        ],
        "query": "head, worker",
        "type": "custom"
      }
    ]
  }
}
