o
    "i}                  
   @   s  U d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	 d dl
Z
d dlmZ ddlmZ ddgZe sQd dlZG dd	 d	Zd
d Zeejd _eejd _dS d dlmZmZmZmZmZmZmZmZmZm Z  e !e"Z#erzd dl$m%Z% W n e&y   e#'d Y nw G dd dej(Z)e) Z*e)e+d< dde,fddZ-G dd dZddde,dee.df deee,df  defddZdS )    N)DictListOptionalTupleTYPE_CHECKINGUnion)is_available   )not_noneinit_device_mesh
DeviceMeshc                   @   s   e Zd ZdS )_DeviceMeshStubN)__name__
__module____qualname__ r   r   [/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch/distributed/device_mesh.pyr      s    r   c                   C   s   d S Nr   r   r   r   r   _init_device_mesh_stub   s   r   ztorch.distributed.device_mesh)
_find_pg_by_ranks_and_tag_get_default_group_get_group_tagget_process_group_ranksget_rankget_world_sizeinit_process_groupis_initialized	new_groupProcessGroup)	ArrayLikezCDeviceMesh requires numpy >= 1.21 to be installed for type checkingc                	   @   s   e Zd ZdddZd ddZddd	eed
f ddfddZddded fddZ	dddee
 fddZedede
fddZedede
fddZdddede
fddZ	d!de
dedeej ddfddZdS )"_MeshEnvreturnNc                 C   s   g | _ i | _i | _d S r   )
mesh_stackchild_to_parent_mappingmesh_dim_group_optionsselfr   r   r   __init__?   s   z_MeshEnv.__init__r   c                 C   s    t | jdkrtd| jd S )Nr   z#No device mesh is currently active!)lenr"   RuntimeErrorr%   r   r   r   get_current_meshF   s   
z_MeshEnv.get_current_meshparent_meshsubmesh_dim_names.c                    s    fdd|D } fdd|D }t t jj}|D ]}|| q jjg ||R  jdg|R  }  }|D ]}	t j	|	|dd}
||	v rM|
}q< |_
 fdd|D |_ | j|< |S )Nc                    s   g | ]
}t  j|qS r   )r
   mesh_dim_namesindex.0mesh_dim_namer,   r   r   
<listcomp>O   s    z._MeshEnv.create_child_mesh.<locals>.<listcomp>c                    s   g | ]} j |qS r   )meshsizer1   mesh_dimr3   r   r   r4   S   s    r(   Fr.   _init_backendc                    s   g | ]} j | qS r   )_dim_group_infosr7   r3   r   r   r4   n   s    
)listranger5   ndimremovepermutereshaper   r   device_type_parent_meshr;   r#   )r&   r,   r-   submesh_dimssubmesh_dim_sizesmesh_dims_remainedsubmesh_dimpg_ranks_by_dimcur_rankmesh_ndsubmeshres_submeshr   r3   r   create_child_meshK   sF   



z_MeshEnv.create_child_meshdevice_meshc                 C   s   | j |d S r   )r#   get)r&   rN   r   r   r   get_parent_meshu   s   z_MeshEnv.get_parent_meshc                 C   sD   |  |}|j}|r |r t|dksJ d|d }| ||S dS )z
            Return the index of the mesh dim in the parent mesh.
            The device_mesh passed in needs to be sliced out from a parent mesh.
               z%The child mesh can only be a 1D mesh.r   N)rP   r.   r)   get_mesh_dim_by_name)r&   rN   r,   child_mesh_dim_nameschild_mesh_dim_namer   r   r   get_parent_mesh_dimx   s   
z_MeshEnv.get_parent_mesh_dimrB   c                 C   s   t |  S r   )_get_device_handledevice_countrB   r   r   r   num_devices_per_host      z_MeshEnv.num_devices_per_hostc                 C   s   t  t|  S r   )r   r    rY   rX   r   r   r   	num_hosts   s   z_MeshEnv.num_hostsr2   c                 C   sT   |j d u st|j dkrtd||j vr"td| dd|j  t|j |S )Nr   zNo `mesh_dim_names` found.zMesh dimension 'z' does not exist.z.Available mesh dimensions are: mesh_dim_names=)r.   r)   KeyErrorr
   r/   )r&   rN   r2   r   r   r   rR      s   



z_MeshEnv.get_mesh_dim_by_namedimbackend
pg_optionsc                 C   s   ||f| j |< d S r   )r$   )r&   r]   r^   r_   r   r   r   _set_mesh_dim_group_options   s   z$_MeshEnv._set_mesh_dim_group_optionsr!   Nr!   r   r   )r   r   r   r'   r+   r   strrM   r   rP   intrU   staticmethodrY   r[   rR   r   Optionsr`   r   r   r   r   r    >   sB    



*
r    _mesh_resourcescudarB   c                 C   s   t t| dS )a:  
        Get the module corresponding to the device_type which is cuda or cuda-like device.
        For example, when the device_type is cuda, the module `torch.cuda` is returned.
        Return None when there is no corresponding module for device_type, otherwise
        return the corresponding module.
        N)getattrtorchrX   r   r   r   rV      s   rV   c                   @   s  e Zd ZU dZeed< ejed< ee	edf  ed< ddddede
ejd	f dee	edf  d
eddf
ddZdd Zdd Zd4ddZd5ddZdefddZdd ZdedefddZde
ee	edf f dd fddZd6dee
eef  defd d!Zdee fd"d#Ze	d6dd$d%e
eee f dedee
ejd	f  dee	edf  dd f
d&d'Zd6dee defd(d)Zedefd*d+Zede	edf fd,d-Z defd.d/Z!d6dee
eef  defd0d1Z"deee  fd2d3Z#dS )7r   a  
        DeviceMesh represents a mesh of devices, where layout of devices could be
        represented as a n-d dimension array, and each value of the n-d dimensional
        array is the global id of the default process group ranks.

        DeviceMesh could be used to describe the layout of devices across the cluster,
        and serves as a proxy for communication among the device lists within the cluster.

        DeviceMesh can be used as a context manager.

        .. note::
            DeviceMesh follows SPMD programming model, which means the same PyTorch Python program
            is running on all processes/ranks in the cluster. Therefore, users need to make sure the
            `mesh` array (which describes the layout of devices) should be identical across all ranks.
            Inconsistent `mesh` will lead to silent hang.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
            mesh (ndarray): A multi-dimensional array or an integer tensor describing the layout
                of devices, where the IDs are global IDs of the default process group.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        The following program runs on each process/rank in an SPMD manner. In this example, we have 2
        hosts with 4 GPUs each.
        A reduction over the first dimension of mesh will reduce across
        columns (0, 4), .. and (3, 7), a reduction over the second dimension
        of mesh reduces across rows (0, 1, 2, 3) and (4, 5, 6, 7).

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import DeviceMesh
            >>>
            >>> # Initialize device mesh as (2, 4) to represent the topology
            >>> # of cross-host(dim 0), and within-host (dim 1).
            >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
        rB   r5   .r.   NTr9   r   r:   r!   c                C   s   || _ t|tjr|jjdkrtd| t|tjr%| jtj	dntj
|dtj	d| _|r5t|nd | _t| j  | _d | _t | _|dkr{|rX|   |   | jt k }|ddv siJ |ddkrv|d  nd | _d S d S )Ncpuz!`mesh` must be a CPU tensor, got dtypedevicerm   xlar   )r   rQ   )rB   
isinstancerj   Tensorro   type
ValueErrordetachtord   tensorr5   tupler.   flattentolist_flatten_mesh_listrC   	threading	get_ident
_thread_id_get_or_create_default_group_init_process_groupsr   nonzeror6   _coordinate_on_dim)r&   rB   r5   r.   r:   rank_coordsr   r   r   r'      s(   

zDeviceMesh.__init__c                 C   s   t  }|st  t }| j |krtd| j  dt| j}|sK|rK| }||krC|| dkrCtd| d| d| j d|	t
 |  t S )Nz=Mesh should not be bigger than default world size, but found z ranks!r   z8DeviceMesh only support homogeneous hardware, but found z ranks and  z	 devices!)r   r   r   r5   numelr*   rV   rB   rW   
set_devicer   r   )r&   default_initialized
world_sizedevice_handlerY   r   r   r   r   	  s0   
z'DeviceMesh._get_or_create_default_groupc           	      C   s  g }| j jdkr#| j  t kr#|tt ttt t j	f n^t| j jD ]W}| j 
d|d| j |}|D ]C}| }|tjv rOtj| \}}nd\}}t|||d}|  |v rt||krrtd| j d| d|tt|||j	f q<q)|| _d S )NrQ   r(   )NN)ranksr^   r_   zFEach device mesh dimension should get only one process group, but got z in !)r5   r>   r   r   appendr   r   r<   r=   
group_nameswapdimsrA   r6   rz   rg   r$   r   r   r)   r*   r
   r;   )	r&   dim_group_infosr]   rH   dim_meshsubgroup_ranksr^   r_   	dim_groupr   r   r   r   &  sR   	



$zDeviceMesh._init_process_groupsc                 C   s   t j|  | S r   )rg   r"   r   r%   r   r   r   	__enter__j  s   zDeviceMesh.__enter__c                 C   s   t j  d S r   )rg   r"   pop)r&   exc_type	exc_valueexc_tracebackr   r   r   __exit__p  s   zDeviceMesh.__exit__c                 C   s:   | j sd| j  d}|S d| j  d| j  d}|S )NzDeviceMesh()z, mesh_dim_names=)r.   r5   rz   )r&   device_mesh_reprr   r   r   __repr__t  s   zDeviceMesh.__repr__c                 C   s>   t | dd | _| jst| j| jj| j| j| j| j	f| _| jS )N_hash)
ri   r   hashr{   r5   shaperB   r.   rC   r~   r%   r   r   r   __hash__|  s   
zDeviceMesh.__hash__otherc                 C   sn   t |tsdS t| t|krdS | j|jko6| jj|jjko6| j|jko6| j|jko6| j|jko6| j	|j	kS )NFT)
rq   r   idr{   r5   r   rB   r.   rC   r~   )r&   r   r   r   r   __eq__  s   




zDeviceMesh.__eq__c                    s    j stdt|tr|fn|}d| d j  d}| j kr" S t|t j ks6t fdd|D s:t||d } j |}t| j |t| D ]\}}||kr\t|qPt	
 |}|S )a  
            Slice the current DeviceMesh based on the mesh_dim_name given to create a child
            DeviceMesh.

            Args:
                mesh_dim_name (Union[str, Tuple[str]]): the name or the tuple of names of the
                mesh dimension of the parent DeviceMesh to create the child DeviceMesh for.
            Returns:
                A :class:`DeviceMesh` object

            The following program runs on each process/rank in an SPMD manner. In this example, we have 2
            hosts with 4 GPUs each.
            Calling mesh["tp"] on rank 0, 1, 2, 3 would return a 1D child DeviceMesh:([0, 1, 2, 3]).
            Calling mesh["tp"] on rank 4, 5, 6, 7 would return a 1D child DeviceMesh:([4, 5, 6, 7]).
            Calling mesh["dp"] on rank 0, 4 would return a 1D child DeviceMesh:([0, 4]).
            Calling mesh["dp"] on rank 1, 5 would return a 1D child DeviceMesh:([1, 5]).
            Calling mesh["dp"] on rank 2, 6 would return a 1D child DeviceMesh:([2, 6]).
            Calling mesh["dp"] on rank 3, 7 would return a 1D child DeviceMesh:([3, 7]).

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
            z1Cannot slice a DeviceMesh without mesh_dim_names!zInvalid mesh_dim_name zG specified. Valid mesh_dim_names should be a contiguous subsequence of .c                 3   s    | ]}| j v V  qd S r   r.   r0   r%   r   r   	<genexpr>  s    

z)DeviceMesh.__getitem__.<locals>.<genexpr>r   )r.   r*   rq   rc   r)   allr\   r/   ziprg   rM   )r&   r.   	error_msgoutermost_dim_nameoutermost_dim_idxijrK   r   r%   r   __getitem__  s2   
zDeviceMesh.__getitem__r8   c                 C   s   t | ds	td| jjdkr|du rtd| jj ddd| jjdkr,|du r,d	}nt|tr7t| |n|}tt	| j
| dd
  S )a  
            Returns the single ProcessGroup specified by mesh_dim, or, if mesh_dim is not specified and the
            DeviceMesh is 1-dimensional, returns the only ProcessGroup in the mesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                A :class:`ProcessGroup` object.
            r;   z*DeviceMesh process groups not initialized!rQ   NFound the DeviceMesh have  dimensionsJOptional kwarg `mesh_dim` needs to be specified when device_mesh.ndim > 1.zmIf you want to get the list of all the ProcessGroups in the DeviceMesh,please use `get_all_groups()` instead.r   r	   )hasattrr*   r5   r>   rq   rc   rg   rR   r
   r   r;   r&   r8   r   r   r   	get_group  s"   
zDeviceMesh.get_groupc                    s    fddt  jjD S )z
            Returns a list of ProcessGroups for all mesh dimensions.

            Returns:
                A list of :class:`ProcessGroup` object.
            c                    s   g | ]}  |qS r   )r   )r1   r   r%   r   r   r4     s    z-DeviceMesh.get_all_groups.<locals>.<listcomp>)r=   r5   r>   r%   r   r%   r   get_all_groups  s   zDeviceMesh.get_all_groupsr   groupc                C   s:  t | trFt| }t |tjr| |ks|dur)||kr)tdt| d| tj|dtj	d}t
|||dd}t| || jfg|_|S t| }t|dkrTtd	|du r\td
t |tjrl| jtj	ddntj|dtj	d}|jt|krtd|  dt| dt
|||dd}dd |D |_|S )aN  
            Contstructs a :class:`DeviceMesh` with ``device_type`` from an
            existing :class:`ProcessGroup`.

            The constructed device mesh has number of dimensions equal to the
            number of groups passed. If more than one group is passed, then the
            ``mesh`` argument is required.
            NzInvalid mesh z for ProcessGroup with ranks rk   rn   Fr9   r   z.Expects at least one ProcessGroup to be passedz0Must pass mesh if passing multiple ProcessGroups)rm   ro   zEExpects mesh with ndim equal to number of ProcessGroups but got mesh z and z ProcessGroupsc                 S   s    g | ]}t |t||jfqS r   )r   r   r   )r1   r   r   r   r   r4   <  s    z)DeviceMesh.from_group.<locals>.<listcomp>)rq   r   r   rj   rr   rz   rt   rc   rw   rd   r   r   r   r;   r<   r)   ru   rv   r>   )r   rB   r5   r.   group_ranksrN   groupsr   r   r   
from_group  sV   


zDeviceMesh.from_groupc                 C   s   |d u r	| j  S | j |S r   )r5   r   r6   r   r   r   r   r6   F  s   zDeviceMesh.sizec                 C   s   | j jS r   )r5   r>   r%   r   r   r   r>   I  s   zDeviceMesh.ndimc                 C   s   t | jjS r   )rx   r5   r   r%   r   r   r   r   M  rZ   zDeviceMesh.shapec                 C   s   t  S )z:
            Returns the current global rank.
            )r   r%   r   r   r   r   Q  s   zDeviceMesh.get_rankc                 C   s`   | j dkr|du rtd| jj  dd|du rd}t| |}t|ts*J dtt|S )a{  
            Returns the local rank of the given mesh_dim of the DeviceMesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                An integer denotes the local rank.

            The following program runs on each process/rank in an SPMD manner. In this example, we have 2
            hosts with 4 GPUs each.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 0, 1, 2, 3 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 4, 5, 6, 7 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 0, 4 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 1, 5 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 2, 6 would return 2.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 3, 7 would return 3.

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
            rQ   Nr   r   r   r   z1We expect ProcessGroup before calling `get_rank`!)r>   r*   r5   r
   r   rq   r   r   )r&   r8   mesh_dim_groupr   r   r   get_local_rankW  s   zDeviceMesh.get_local_rankc                 C   s   | j r| j S dS )z
            Return the relative indices of this rank relative to all
            dimensions of the mesh. If this rank is not part of the mesh, return None.
            N)r   r%   r   r   r   get_coordinate  s   zDeviceMesh.get_coordinaterb   ra   r   )$r   r   r   __doc__rc   __annotations__rj   rr   r   r   r   boolr'   r   r   r   r   r   r   objectr   r   rd   r   r   r   r   re   r   r6   propertyr>   r   r   r   r   r   r   r   r   r      sn   
 '

(
D

 ?$	> *r   
mesh_shape.r.   r!   c                C   s   |dur.t t|t |krtdd| t |t |kr.tddt | dt | d| r=|  s=td|  d	d
td tjt|tj	d
|}W d   n1 s[w   Y  t| ||d}|S )a  
        Initializes a `DeviceMesh` based on `device_type`, `mesh_shape`, and `mesh_dim_names` parameters.

        This creates a DeviceMesh with an n-dimensional array layout, where `n` is the length of `mesh_shape`.
        If `mesh_dim_names` is provided, each dimension is labeled as `mesh_dim_names[i]`.

        .. note::
            `init_device_mesh` follows SPMD programming model, meaning the same PyTorch Python program
            runs on all processes/ranks in the cluster. Ensure `mesh_shape` (the dimensions of the nD array
            describing device layout) is identical across all ranks. Inconsistent `mesh_shape` may lead to hanging.

        .. note::
            If no process group is found, init_device_mesh will initialize distributed process group/groups
            required for distributed communications behind the scene.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
                Passing in a device type with a GPU index, such as "cuda:0", is not allowed.
            mesh_shape (Tuple[int]): A tuple defining the dimensions of the multi-dimensional array
                describing the layout of devices.
            mesh_dim_names (Tuple[str], optional): A tuple of mesh dimension names to assign to each dimension
                of the multi-dimensional array describing the layout of devices. Its length must match the length
                of `mesh_shape`. Each string in `mesh_dim_names` must be unique.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import init_device_mesh
            >>>
            >>> mesh_1d = init_device_mesh("cuda", mesh_shape=(8,))
            >>> mesh_2d = init_device_mesh("cuda", mesh_shape=(2, 8), mesh_dim_names=("dp", "tp"))

        Nz"Each mesh_dim_name must be unique.z/Found repeated mesh_dim_name in mesh_dim_names z6mesh_shape and mesh_dim_names should have same length!zFound len(mesh_dim_names): z and len(mesh_shape):r   z4Device type with GPU index is not supported but got z. zUIf you maintained a 'torch.device' object, it's recommended to pass in 'device.type'.rk   rl   )rB   r5   r.   )r)   setr*   isalpharj   ro   arangemathprodrd   viewr   )rB   r   r.   r5   rN   r   r   r   r     s2   )
)rh   )/loggingr   r|   typingr   r   r   r   r   r   rj   torch.distributedr   utils._typing_utilsr
   __all__sysr   r   modulesr   r   "torch.distributed.distributed_c10dr   r   r   r   r   r   r   r   r   r   	getLoggerr   loggernumpy.typingr   ImportErrorwarninglocalr    rg   r   rc   rV   rd   r   r   r   r   <module>   sZ   
 0
l	   Z
