o
    ߗi                  
   @   s  U d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlmZ d dlmZ ddgZe s]d dlZG d	d
 d
Zdd Zeejd _eejd _dS d dlmZ d dlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) e *e+Z,erzd dl-m.Z. W n e/y   e,0d Y nw G dd dej1Z2e2 Z3e2e4d< d de5fddZ6G dd dZddde5dee7df de
ee5df  defddZdS )!    Nreduce)chain)DictListOptionalTupleTYPE_CHECKINGUnion)is_available)not_noneinit_device_mesh
DeviceMeshc                   @   s   e Zd ZdS )_DeviceMeshStubN)__name__
__module____qualname__ r   r   [/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/torch/distributed/device_mesh.pyr      s    r   c                   C   s   d S Nr   r   r   r   r   _init_device_mesh_stub   s   r   ztorch.distributed.device_mesh)Backend)_find_pg_by_ranks_and_tag_get_default_group_get_group_tagget_backendget_process_group_ranksget_rankget_world_sizeinit_process_groupis_initialized	new_groupProcessGroupsplit_group)	ArrayLikezCDeviceMesh requires numpy >= 1.21 to be installed for type checkingc                	   @   s&  e Zd Zd%ddZd&ddZddd	eed
f deeed
f  ddfddZ		d'ddde
e ddfddZd(ddZddde
e fddZededefddZededefddZdddedefddZ	d'dedede
ej ddfdd Zdeeed
f  fd!d"Zdddeded fd#d$ZdS ))_MeshEnvreturnNc                 C   s"   g | _ i | _i | _i | _i | _d S r   )
mesh_stackchild_to_root_mappingmesh_dim_group_optionsroot_to_flatten_mappingflatten_name_to_root_dimsselfr   r   r   __init__C   s   z_MeshEnv.__init__r   c                 C   s    t | jdkrtd| jd S )Nr   z#No device mesh is currently active!)lenr'   RuntimeErrorr,   r   r   r   get_current_meshO   s   
z_MeshEnv.get_current_meshdevice_meshsubmesh_dim_names.submesh_dimsc                    sN   fdd|D } j }g }g }d}t||D ]K\}	}
t|	dkrO|j|	d | |	d | d}||	d |  |t|	d 7 }|| j  |
 jd  q||	d |  | j|	d   qtt|j	}|D ]}|
| ql|jg ||R  jdg|R  }  }|D ]}t j||dd}||v r|}q||_ | j|< |S )	Nc                    s    g | ]}t  fd d|dqS )c                    s   |  j | S r   )meshsize)xyr3   r   r   <lambda>`   s    z5_MeshEnv.create_sub_mesh.<locals>.<listcomp>.<lambda>   r   ).0mesh_dimr:   r   r   
<listcomp>^   s    
z,_MeshEnv.create_sub_mesh.<locals>.<listcomp>r   r<   r/   )	start_dimend_dimFmesh_dim_names_init_backend)r6   zipr0   flattenappendr*   _dim_group_infoslistrangendimremovepermutereshaper   r   device_typer(   )r-   r3   r4   r5   slice_dim_sizemesh_tensorslice_dim_idxslice_dim_group_infonum_dims_flattenmesh_dim_indicesmesh_dim_namemesh_dims_remained_idxidxpg_ranks_by_dimcur_rankmesh_ndsubmeshres_submeshr   r:   r   create_sub_meshT   sh   

	


z_MeshEnv.create_sub_meshrV   c                    sr  t |  fddt|jD }|sd fdd|D }| j i  tg tt j| j  	 R  }||v rLt
| d  dd| d | jv r_|| j  v r_| j  | S t|j }tt jj}|D ]}|| qq jjg ||R  d	|}  }	|D ]}
t j|
|fd
}|	|
v r|}q | j|< || j i |< t|| j  |< |S )Nc                    s   g | ]
}t  j|qS r   )r   rC   index)r=   flattened_mesh_dim_name	root_meshr   r   r?      s    z0_MeshEnv.create_flatten_mesh.<locals>.<listcomp>_c                    s   g | ]	}t  j| qS r   )r   rC   )r=   dimra   r   r   r?      s    z# already exists for submesh of the . z5The mesh_dim_names of submesh and flattened mesh are z-. Please specify another valid mesh_dim_name.r/   rC   )_mesh_resourcesget_root_meshr   rC   joinr+   
setdefaultr   rI   keysr1   r*   mathprodr6   r7   rJ   rK   rL   rM   rN   r   r   rO   r(   tuple)r-   r3   rV   flatten_dims_in_rootinvalid_dim_namesflattened_mesh_dim_sizeremained_dims_in_rootflatten_dim_in_rootrY   rZ   r[   flattened_meshres_flattened_meshr   ra   r   create_flatten_mesh   sb   






z_MeshEnv.create_flatten_meshc                 C   s   | j |d }|s|S |S r   )r(   get)r-   r3   rb   r   r   r   rh      s   z_MeshEnv.get_root_meshc                 C   sD   |  |}|j}|r |r t|dksJ d|d }| ||S dS )z
            Returns the index of the mesh dim in the root mesh.
            The device_mesh passed in needs to be sliced out from the root mesh
            or submesh of the root mesh.
            r<   z"The submesh can only be a 1D mesh.r   N)rh   rC   r0   get_mesh_dim_by_name)r-   r3   rb   child_mesh_dim_nameschild_mesh_dim_namer   r   r   get_root_mesh_dim   s   
z_MeshEnv.get_root_mesh_dimrO   c                 C   s   t |  S r   )_get_device_handledevice_countrO   r   r   r   num_devices_per_host      z_MeshEnv.num_devices_per_hostc                 C   s   t  t|  S r   )r   r%   r   r~   r   r   r   	num_hosts   s   z_MeshEnv.num_hostsc                 C   sT   |j d u st|j dkrtd||j vr"td| dd|j  t|j |S )Nr   zNo `mesh_dim_names` found.zMesh dimension 'z' does not exist.z.Available mesh dimensions are: mesh_dim_names=)rC   r0   KeyErrorr   r_   )r-   r3   rV   r   r   r   rx     s   



z_MeshEnv.get_mesh_dim_by_namerd   backend
pg_optionsc                 C   s   ||f| j |< d S r   )r)   )r-   rd   r   r   r   r   r   _set_mesh_dim_group_options  s   z$_MeshEnv._set_mesh_dim_group_optionsc           	         s   ||  |krtd| j|i  | j| }g |j| t fdd|D s4td| d  dd}g }|D ]4}||v rN|| }|d }|| n|j|}||f ||krltd| dd	| d
d|}q:|S )z
            Validate whether the mesh_dim_names is valid for slicing the given device_mesh.
            If valid, return dim indexes of the slice mesh in the device mesh.
            z'Cannot create a submesh from a submesh.c                 3   s    | ]}| v V  qd S r   r   )r=   rV   valid_mesh_dim_namesr   r   	<genexpr>2  s
    
z0_MeshEnv._get_slice_mesh_dims.<locals>.<genexpr>zInvalid mesh_dim_names z% specified. Valid mesh_dim_names are .r/   z specified. z!Found mesh dim indices to slice: re   z.Mesh dim indices should be in ascending order.)	rh   r1   r+   rj   rC   allr   rG   r_   )	r-   r3   rC   r+   curr_idxslice_mesh_dimsrV   mesh_indicesnext_idxr   r   r   _get_slice_mesh_dims  sD   


z_MeshEnv._get_slice_mesh_dimsc           	      C   s|   |  ||}|jd|d|j|}| }g }|D ]}t|j||fdd}||v r3|j| gng |_|	| q|S )z`
            Return all the submeshes of a given mesh dimension of the device mesh.
            r/   FrB   )
rx   r6   swapdimsrN   r7   r   r   rO   rH   rG   )	r-   r3   rV   r>   rY   rZ   res_submeshesmesh_1dr\   r   r   r   _get_all_submeshesS  s&   z_MeshEnv._get_all_submeshesr&   Nr&   r   r   )r3   r   r&   r   )r   r   r   r.   r2   r   strr   intr^   r   rv   rh   r{   staticmethodr   r   rx   C10dBackendOptionsr   r   r   r   r   r   r   r%   B   sj    



P

B


4r%   rg   cudarO   c                 C   s   t t| dS )a:  
        Get the module corresponding to the device_type which is cuda or cuda-like device.
        For example, when the device_type is cuda, the module `torch.cuda` is returned.
        Return None when there is no corresponding module for device_type, otherwise
        return the corresponding module.
        N)getattrtorchr~   r   r   r   r|   r  s   r|   c                   @   s  e Zd ZU dZeed< ejed< ee	edf  ed< ddddede
ejd	f dee	edf  d
eddf
ddZdd Zdd Zd7ddZd8ddZdefddZdd ZdedefddZde
ee	edf f dd fddZd9dee
eef  defd d!Zdee fd"d#Ze	d9dd$d%e
eee f dedee
ejd	f  dee	edf  dd f
d&d'Zd9dee defd(d)Zedefd*d+Zede	edf fd,d-Z defd.d/Z!d9dee
eef  defd0d1Z"deee  fd2d3Z#d9d4ee dd fd5d6Z$dS ):r   a  
        DeviceMesh represents a mesh of devices, where layout of devices could be
        represented as a n-d dimension array, and each value of the n-d dimensional
        array is the global id of the default process group ranks.

        DeviceMesh could be used to describe the layout of devices across the cluster,
        and serves as a proxy for communication among the device lists within the cluster.

        DeviceMesh can be used as a context manager.

        .. note::
            DeviceMesh follows SPMD programming model, which means the same PyTorch Python program
            is running on all processes/ranks in the cluster. Therefore, users need to make sure the
            `mesh` array (which describes the layout of devices) should be identical across all ranks.
            Inconsistent `mesh` will lead to silent hang.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
            mesh (ndarray): A multi-dimensional array or an integer tensor describing the layout
                of devices, where the IDs are global IDs of the default process group.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        The following program runs on each process/rank in an SPMD manner. In this example, we have 2
        hosts with 4 GPUs each.
        A reduction over the first dimension of mesh will reduce across
        columns (0, 4), .. and (3, 7), a reduction over the second dimension
        of mesh reduces across rows (0, 1, 2, 3) and (4, 5, 6, 7).

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import DeviceMesh
            >>>
            >>> # Initialize device mesh as (2, 4) to represent the topology
            >>> # of cross-host(dim 0), and within-host (dim 1).
            >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
        rO   r6   .rC   NTrB   r$   rD   r&   c                C   s
  || _ t|tjr|jjdkrtd| t|tjr%| jtj	dntj
|dtj	d| _|r5t|nd | _t| j  | _d | _|dkr|rS|   |   t r`t dkr`t | _| jt k }|ddv sqJ |ddkr~|d  nd | _d S d S )	Ncpuz!`mesh` must be a CPU tensor, got dtypedevicer   xlathreadedr   )r   r<   )rO   
isinstancer   Tensorr   type
ValueErrordetachtor   tensorr6   rn   rC   rF   tolist_flatten_mesh_list
_thread_id_get_or_create_default_group_init_process_groupsr    r   	threading	get_identr   nonzeror7   _coordinate_on_dim)r-   rO   r6   rC   rD   rank_coordsr   r   r   r.     s*   

zDeviceMesh.__init__c                 C   s   t  }|st  t }| j |kr td| d| j  dt| j}|sN|rN| }||krF|| dkrFtd| d| d| j d|	t
 |  t S )	Nz2Mesh should not be bigger than default world size z, but found z ranks!r   z8DeviceMesh only support homogeneous hardware, but found z ranks and  z	 devices!)r    r   r   r6   numelr1   r|   rO   r}   
set_devicer   r   )r-   default_initialized
world_sizedevice_handler   r   r   r   r     s0   
z'DeviceMesh._get_or_create_default_groupc                 C   s  g }t  }| jjdkr:| j t kr:ttt }tj	 r,t
|dkr,td|ddn|}|t|||jf nt| jjD ]}| jd|d| j|}|tjv r^tj| \}}nd\}}| jrmd| j|  nd	| }	d }t|d
d  }
d urt||| |	d}|D ]8}| }|
d u rt||||	d}|  |v rt||krtd|   d| d|tt|||jf qq@|| _d S )Nr<   gloozcpu:gloo,cuda:ncclmesh_default)r   ranks
group_descr/   )NNmesh_	mesh_dim_bound_device_id)	parent_pgr   split_ranksr   )r   r   r   r   zFEach device mesh dimension should get only one process group, but got z in !)r   r6   rK   r   r   rI   rJ   r   r   r   r   r!   rG   r   
group_namer   rN   r7   rg   r)   rC   r   r#   r   r   r0   r1   r   rH   )r-   dim_group_infosdefault_groupr   	dim_grouprd   rY   r   r   r   r   dim_meshsubgroup_ranksr   r   r   r     s   
	


zDeviceMesh._init_process_groupsc                 C   s   t j|  | S r   )rg   r'   rG   r,   r   r   r   	__enter__b  s   zDeviceMesh.__enter__c                 C   s   t j  d S r   )rg   r'   pop)r-   exc_type	exc_valueexc_tracebackr   r   r   __exit__h  s   zDeviceMesh.__exit__c                 C   sJ   | j sd| j d| j  d}|S d| j d| j  d| j  d}|S )NzDeviceMesh('z', )z, mesh_dim_names=)rC   rO   r6   r   )r-   device_mesh_reprr   r   r   __repr__l  s    zDeviceMesh.__repr__c                 C   s:   t | dd | _| jst| j| jj| j| j| jf| _| jS )N_hash)	r   r   hashr   r6   shaperO   rC   r   r,   r   r   r   __hash__t  s   	zDeviceMesh.__hash__otherc                 C   sb   t |tsdS t| t|krdS | j|jko0| jj|jjko0| j|jko0| j|jko0| j|jkS )NFT)	r   r   idr   r6   r   rO   rC   r   )r-   r   r   r   r   __eq__  s   



zDeviceMesh.__eq__c                 C   s|   | j stdt|tr|fn|}|| j kr| S t| |}tjj	  t
| ||}W d   |S 1 s7w   Y  |S )aU
  
            Slice the current DeviceMesh based on the mesh_dim_names given to create a submesh.
            The submesh created consists of the dimensions and the communicators indicated by
            ``mesh_dim_names``

            Args:
                mesh_dim_names (Union[str, Tuple[str]]): the name or the tuple of names of the
                mesh dimension of the DeviceMesh to create the submesh for.
            Returns:
                A :class:`DeviceMesh` object

            The following program runs on each process/rank in an SPMD manner in a world size of 8.
            In the first example:
                Calling mesh_2d["tp"] on rank 0, 1, 2, 3 returns a 1D submesh of DeviceMesh:([0, 1, 2, 3]).
                Calling mesh_2d["tp"] on rank 4, 5, 6, 7 returns a 1D submesh of  DeviceMesh:([4, 5, 6, 7]).
                Calling mesh_2d["dp"] on rank 0, 4 returns a 1D submesh of  DeviceMesh:([0, 4]).
                Calling mesh_2d["dp"] on rank 1, 5 returns a 1D submesh of  DeviceMesh:([1, 5]).
                Calling mesh_2d["dp"] on rank 2, 6 returns a 1D submesh of  DeviceMesh:([2, 6]).
                Calling mesh_2d["dp"] on rank 3, 7 returns a 1D submesh of  DeviceMesh:([3, 7]).

            In the second example:
                Calling mesh_3d["dp", "cp"] on rank 0, 1, 4, 5 returns a 2D submesh of DeviceMesh:([[0, 1], [4, 5]]).
                Calling mesh_3d["dp", "cp"] on rank 2, 3, 6, 7 returns a 2D submesh of DeviceMesh:([[2, 3], [6, 7]]).
                Calling mesh_3d["cp", "dp"] on rank 0, 1, 4, 5 returns a 2D submesh of DeviceMesh:([[0, 4], [1, 5]]).
                Calling mesh_3d["cp", "dp"] on rank 2, 3, 6, 7 returns a 2D submesh of DeviceMesh:([[2, 6], [3, 7]]).

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize a 2D device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh_2d = init_device_mesh(device_type="cuda", (2,4), mesh_dim_names=("dp", "tp"))
                >>> tp_mesh = mesh_2d["tp"]
                >>> dp_mesh = mesh_2d["dp"]
                >>>
                >>> # Initialize a 3D mesh.
                >>> mesh_3d = init_device_mesh(device_type="cuda", (2,2,2), mesh_dim_names=("dp", "pp", "cp"))
                >>> # The order of the mesh_dim_names provided deteremines the order of dimensions in the submesh.
                >>> dp_cp_mesh = mesh_3d["dp", "cp"]
                >>> cp_dp_mesh = mesh_3d["cp", "dp"]
            z1Cannot slice a DeviceMesh without mesh_dim_names!N)rC   r1   r   r   rg   r   r   _subclassesfake_tensorunset_fake_temporarilyr^   )r-   rC   r   r\   r   r   r   __getitem__  s"   -

zDeviceMesh.__getitem__r>   c                 C   s   t | ds	td| jjdkr|du rtd| jj ddd| jjdkr6|du r6tt| jd	 dd
  S t| }tj	
|d}|r[|| v r[|| jd	 dd
 }tt| S t|trft| |n|}tt| j| dd
  S )a  
            Returns the single ProcessGroup specified by mesh_dim, or, if mesh_dim is not specified and the
            DeviceMesh is 1-dimensional, returns the only ProcessGroup in the mesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                A :class:`ProcessGroup` object.
            rH   z*DeviceMesh process groups not initialized!r<   NFound the DeviceMesh have  dimensionsJOptional kwarg `mesh_dim` needs to be specified when device_mesh.ndim > 1.zmIf you want to get the list of all the ProcessGroups in the DeviceMesh,please use `get_all_groups()` instead.r      )hasattrr1   r6   rK   r   r   rH   rg   rh   r*   rw   rk   r   r   rx   )r-   r>   rb   r*   r   r   r   r   	get_group  s4   

zDeviceMesh.get_groupc                    s    fddt  jjD S )z
            Returns a list of ProcessGroups for all mesh dimensions.

            Returns:
                A list of :class:`ProcessGroup` object.
            c                    s   g | ]}  |qS r   )r   )r=   ir,   r   r   r?     s    z-DeviceMesh.get_all_groups.<locals>.<listcomp>)rJ   r6   rK   r,   r   r,   r   get_all_groups  s   zDeviceMesh.get_all_groupsrf   groupc                C   sF  t | trLt| }t |tjr| |ks#|dur/t |tjs/||kr/tdt| d| tj|dtj	d}t
|||dd}t| || jfg|_|S t| }t|dkrZtd	|du rbtd
t |tjrr| jtj	ddntj|dtj	d}|jt|krtd|  dt| dt
|||dd}dd |D |_|S )aM  
            Constructs a :class:`DeviceMesh` with ``device_type`` from an
            existing :class:`ProcessGroup`.

            The constructed device mesh has number of dimensions equal to the
            number of groups passed. If more than one group is passed, then the
            ``mesh`` argument is required.
            NzInvalid mesh z for ProcessGroup with ranks r   r   FrB   r   z.Expects at least one ProcessGroup to be passedz0Must pass mesh if passing multiple ProcessGroups)r   r   zEExpects mesh with ndim equal to number of ProcessGroups but got mesh z and z ProcessGroupsc                 S   s    g | ]}t |t||jfqS r   )r   r   r   )r=   r   r   r   r   r?   J  s    z)DeviceMesh.from_group.<locals>.<listcomp>)r   r"   r   r   r   r   r   r   r   r   r   r   r   rH   rI   r0   r   r   rK   )r   rO   r6   rC   group_ranksr3   groupsr   r   r   
from_group  s\   



zDeviceMesh.from_groupc                 C   s   |d u r	| j  S | j |S r   )r6   r   r7   )r-   r>   r   r   r   r7   T  s   zDeviceMesh.sizec                 C   s   | j jS r   )r6   rK   r,   r   r   r   rK   W  s   zDeviceMesh.ndimc                 C   s   t | jjS r   )rn   r6   r   r,   r   r   r   r   [  r   zDeviceMesh.shapec                 C   s   t  S )z:
            Returns the current global rank.
            )r   r,   r   r   r   r   _  s   zDeviceMesh.get_rankc                 C   s`   | j dkr|du rtd| jj  dd|du rd}t| |}t|ts*J dtt|S )a{  
            Returns the local rank of the given mesh_dim of the DeviceMesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                An integer denotes the local rank.

            The following program runs on each process/rank in an SPMD manner. In this example, we have 2
            hosts with 4 GPUs each.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 0, 1, 2, 3 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 4, 5, 6, 7 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 0, 4 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 1, 5 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 2, 6 would return 2.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 3, 7 would return 3.

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
            r<   Nr   r   r   r   z1We expect ProcessGroup before calling `get_rank`!)rK   r1   r6   r   r   r   r"   r   )r-   r>   mesh_dim_groupr   r   r   get_local_ranke  s   zDeviceMesh.get_local_rankc                 C   s   | j r| j S dS )z
            Return the relative indices of this rank relative to all
            dimensions of the mesh. If this rank is not part of the mesh, return None.
            N)r   r,   r   r   r   get_coordinate  s   zDeviceMesh.get_coordinaterV   c                 C   s   | j stdt| |S )a\  
            Returns a 1D DeviceMesh by flattening the current DeviceMesh.

            If no mesh_dim_name is provided, the default is a string concatentaing the mesh_dim_names of the
            given submesh with each mesh_dim_name separated by "_". For example, if we have a 3D mesh
            DeviceMesh([[[0, 1], [2, 3]], [[4, 5], [6, 7]]], mesh_dim_names=("dp", "cp", "tp")), calling
            mesh_3d["dp", "cp"]._flatten() will create a 1D submesh DeviceMesh([0, 1, 2, 3], mesh_dim_names=("dp_cp",))
            on rank 0, 1, 2, 3 and a 1D submesh DeviceMesh([4, 5, 6, 7], mesh_dim_names=("dp_cp",)) on rank 4, 5, 6, 7.

            After the flattened dimension is created, to access the flattened dimesnion in mesh_3d, one can use the
            existing slicing method to obtain the flattened mesh through calling mesh_3d["dp_cp"].
            z3Cannot flatten a DeviceMesh without mesh_dim_names!)rC   r1   rg   rv   )r-   rV   r   r   r   _flatten  s
   zDeviceMesh._flattenr   r   r   )%r   r   r   __doc__r   __annotations__r   r   r   r   r
   boolr.   r   r   r   r   r   r   objectr   r   r   r"   r   r   r   r   r   r7   propertyrK   r   r   r   r   r   r   r   r   r   r   {  sp   
 '

*
t

 I.	B *rf   
mesh_shape.rC   r&   c                C   s   |dur.t t|t |krtdd| t |t |kr.tddt | dt | d| r=|  s=td|  d	d
td tjt|tj	d
|}W d   n1 s[w   Y  t| ||d}|S )a  
        Initializes a `DeviceMesh` based on `device_type`, `mesh_shape`, and `mesh_dim_names` parameters.

        This creates a DeviceMesh with an n-dimensional array layout, where `n` is the length of `mesh_shape`.
        If `mesh_dim_names` is provided, each dimension is labeled as `mesh_dim_names[i]`.

        .. note::
            `init_device_mesh` follows SPMD programming model, meaning the same PyTorch Python program
            runs on all processes/ranks in the cluster. Ensure `mesh_shape` (the dimensions of the nD array
            describing device layout) is identical across all ranks. Inconsistent `mesh_shape` may lead to hanging.

        .. note::
            If no process group is found, init_device_mesh will initialize distributed process group/groups
            required for distributed communications behind the scene.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
                Passing in a device type with a GPU index, such as "cuda:0", is not allowed.
            mesh_shape (Tuple[int]): A tuple defining the dimensions of the multi-dimensional array
                describing the layout of devices.
            mesh_dim_names (Tuple[str], optional): A tuple of mesh dimension names to assign to each dimension
                of the multi-dimensional array describing the layout of devices. Its length must match the length
                of `mesh_shape`. Each string in `mesh_dim_names` must be unique.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import init_device_mesh
            >>>
            >>> mesh_1d = init_device_mesh("cuda", mesh_shape=(8,))
            >>> mesh_2d = init_device_mesh("cuda", mesh_shape=(2, 8), mesh_dim_names=("dp", "tp"))

        Nz"Each mesh_dim_name must be unique.z/Found repeated mesh_dim_name in mesh_dim_names z6mesh_shape and mesh_dim_names should have same length!zFound len(mesh_dim_names): z and len(mesh_shape):r   z0Device type with index is not supported but got re   zUIf you maintained a 'torch.device' object, it's recommended to pass in 'device.type'.r   r   )rO   r6   rC   )r0   setr1   isalphar   r   arangerl   rm   r   viewr   )rO   r   rC   r6   r3   r   r   r   r     s2   )
)r   )8loggingrl   r   	functoolsr   	itertoolsr   typingr   r   r   r   r	   r
   r   torch.distributedr   torch.utils._typing_utilsr   __all__sysr   r   modulesr   r   torch._C._distributed_c10dr   r   "torch.distributed.distributed_c10dr   r   r   r   r   r   r   r   r    r!   r"   r#   	getLoggerr   loggernumpy.typingr$   ImportErrorwarninglocalr%   rg   r   r   r|   r   r   r   r   r   <module>   sf   
 8
  0	    7
