o
    "i#                     @   s   d dl Z d dlmZmZmZmZmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ ddlmZmZ dZG dd	 d	eZd
ededefddZG dd dZeed	ddejdeeejj  dejfddZdejdefddZdS )    N)	AnycastDictIterableListNoReturnOptionalSetTuple)_State)DistributedDataParallel   )_get_registrycontract c                       s   e Zd Zd fddZefdejdeej deej de	ddf
d	d
Z
dddZdejdeej ddfddZdddZdddZdejdeedf dee	ef defddZdejdeej dejdejfddZ  ZS )_ReplicateStatereturnNc                    sN   t    t | _d| _t | _| j| _g | _d| _	d | _
i | _g | _d S )NF)super__init__nnParameterListmodulehas_initialized_param_list_orig_module_param_names_no_sync
_init_args_init_kwargs_comm_hook_argsself	__class__ e/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch/distributed/_composable/replicate.pyr      s   



z_ReplicateState.__init__r   ignored_modulesignored_paramsprefixc           
      C   s   t |rd S ||v rd S |tkr| dnt}|jddD ]\}}||vr5| j| | j| |  q| D ]\}}	| j|	||| | d q:d S )N.F)recurse)r(   )_is_fully_sharded_ROOT_MODULE_PREFIXnamed_parametersr   appendr   named_children_collect_params)
r!   r   r&   r'   r(   recurse_prefixnpnamechild_moduler$   r$   r%   r0      s&   
z_ReplicateState._collect_paramsc                    s"   t jdd fdd}|  d S )NT)	recursivec                      s<    j d usJ  j j i  j    t  _ i  _d S N)r   initr   register_comm_hooktupler$   r    r$   r%   
_lazy_init?   s
   
z-_ReplicateState.lazy_init.<locals>._lazy_init)torch_disable_dynamo)r!   r;   r$   r    r%   	lazy_init>   s   

z_ReplicateState.lazy_initc                 K   s   | j rd S d| _ |dd }|| _dd |D }ddlm} || | ||| d|v rV|d d urM|d }t|tjrG|j	dkrGd |d	< n
|g|d	< nd |d	< |
d t| jfi || _t| jt| j_d S )
NTdevice_meshc                 S   s   h | ]}|  D ]}|qqS r$   )
parameters).0mr3   r$   r$   r%   	<setcomp>V   s    z'_ReplicateState.init.<locals>.<setcomp>r   )_localize_dtensor	device_idcpu
device_ids)r   getr   %torch.distributed.tensor.parallel.ddprD   r0   
isinstancer<   devicetypepopr   r   _ddpweakrefref	replicatestate_ddp_weakref)r!   r   r&   kwargsr?   r'   rD   rE   r$   r$   r%   r8   I   s&   

z_ReplicateState.initc                 C   s0   | j D ]\}}| jj|i | q| j   d S r7   )r   rN   r9   clear)r!   	comm_argscomm_kwargsr$   r$   r%   r9   r   s   z"_ReplicateState.register_comm_hookc                 O   s   || _ || _d S r7   )r   r   r!   argsrT   r$   r$   r%   record_init_argsw   s   
z _ReplicateState.record_init_argsrY   .rT   c                 C   s2   | j s| jr
|   | j | j_| jj|i |S r7   )r   r   r>   r   rN   require_backward_grad_sync_pre_forward)r!   r   rY   rT   r$   r$   r%   forward_pre_hook{   s   z _ReplicateState.forward_pre_hookinputoutputc                 C   s   | j |S r7   )rN   _post_forward)r!   r   r^   r_   r$   r$   r%   forward_post_hook   s   z!_ReplicateState.forward_post_hookr   N)__name__
__module____qualname__r   r,   r   Moduler	   	Parameterstrr0   r>   r8   r9   rZ   r
   r   r   r]   r<   Tensorra   __classcell__r$   r$   r"   r%   r      sT    

 

)



r   rY   rT   r   c                  O   s   t d)NzGDDP does not support deepcopy. Please use state dict for serialization.)AssertionError)rY   rT   r$   r$   r%   unimplemented_deepcopy   s   rl   c                   @   s0   e Zd Zdd ZdeddfddZd
dd	ZdS )DDPc                 O   s"   | j d }|j|g|R i |S )z
        Override ``__new__`` to remove the DDP class and directly construct
        the original class for cases like indexing into a container module.
           )__mro____new__)clsrY   rT   orig_clsr$   r$   r%   rp      s   
zDDP.__new__requires_gradient_syncr   Nc                 C   s   | t | _dS )a  
        Sets if the module should sync gradients. This can be used to implement
        gradient accumulation without communication.

        Args:
            requires_gradient_sync (bool): Whether to reduce gradients for the
                module's parameters.
        N)rQ   rR   r   )r!   rs   r$   r$   r%   set_requires_gradient_sync   s   	zDDP.set_requires_gradient_syncc                 O   s   t | j||f d S r7   )rQ   rR   r   r.   rX   r$   r$   r%   r9      s   zDDP.register_comm_hookrb   )rc   rd   re   rp   boolrt   r9   r$   r$   r$   r%   rm      s    
rm   )	state_clsr   r&   c                 K   s&  t jd d|v rt|d tt jfstdt|d  t| r'td|du r.i }nt	|}t
tt| }| j|jdd |dd}|durkd	d
lm} ||durkd	dlm}m} | | | | | |j |j| |fi | | j}dti}	td|j t|f|	}
|
| _| S )zReplicates a module

    Args:
        module (torch.nn.Module): module to replicate

    Example::
        >>> # xdoctest: +REQUIRES(module:torch._C._distributed_c10d)
        >>> module = nn.Linear(3, 3)
        >>> replicate(module)
    ztorch.distributed.replicaterE   z6Expected device_id to be int or torch.device, but got zGCannot apply `replicate()` on a Module already managed by `fully_shard`NT)with_kwargsr?   r   )_mesh_resources)rD   _reconstruct_dtensor__deepcopy__rm   )r<   _C_log_api_usage_oncerJ   intrK   RuntimeErrorrL   r+   setr   r   rQ   rR   register_forward_pre_hookr]   rH   torch.distributed.device_meshrx   get_parent_meshrI   rD   ry   register_forward_hookra   rZ   r#   rl   rc   rm   )r   r&   rT   rR   r?   rx   rD   ry   rq   dctnew_clsr$   r$   r%   rQ      s>   


rQ   c                 C   s   t | }|du r
dS d|v S )z+Check if module is marked with fully_shard.NFfully_shard)r   )r   registryr$   r$   r%   r+      s   r+   r7   )rO   typingr   r   r   r   r   r   r   r	   r
   r<   torch.nnr   #torch.distributed._composable_stater   torch.nn.parallelr   r   r   r,   r   rl   rm   rf   rQ   ru   r+   r$   r$   r$   r%   <module>   s*   ,}E