o
    ,i                     @   s   d dl Z d dlZd dlmZmZmZmZmZmZm	Z	m
Z
 ddlmZ d dlmZ d dlmZ ddgZd	e	ejee eed
f eeef f deej fddZ		ddee dee deeeeef   deeee	eejf    dee f
ddZdS )    N)AnyDictListOptionalSequenceTupleUnioncast   )Module_get_device_index)ExceptionWrapper	get_a_varparallel_applyobj.returnc                 C   sx   t | tjr| S t | ttfr!tt| D ]}t |tjr |  S qt | tr:tt|  D ]}t |tjr9|  S q-d S )N)	
isinstancetorchTensorlisttuplemapr   dictitems)r   result r   ^/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.pyr   
   s   
modulesinputs
kwargs_tupdevicesc           
         s  t | t |ksJ dt |  dt | |dur%t | t |ks$J nttttf i ft |  }|durBt | t |ksAJ ndgt |  }dd |D }dd |D }t i t t	 		ddt
dtd	td
tttf dttt
tjf  dttjj ddffdd t | dkr fddtt| ||||D }|D ]}|  q|D ]}|  qn d| d |d |d |d |d  g }tt |D ]}| }	t|	tr|	  ||	 q|S )a  Apply each `module` in :attr:`modules` in parallel on each of :attr:`devices`.

    Args:
        modules (Module): modules to be parallelized
        inputs (tensor): inputs to the modules
        devices (list of int or torch.device): CUDA devices

    :attr:`modules`, :attr:`inputs`, :attr:`kwargs_tup` (if given), and
    :attr:`devices` (if given) should all have same length. Moreover, each
    element of :attr:`inputs` can either be a single object as the only argument
    to a module, or a collection of positional arguments.
    zThe number of modules z& is not equal to the number of inputs Nc                 S   s   g | ]}t |d qS )Tr   .0xr   r   r   
<listcomp>3   s    z"parallel_apply.<locals>.<listcomp>c                 S   s   g | ]}t j|qS r   )r   cudacurrent_streamr"   r   r   r   r%   4   s    imoduleinputkwargsdevicestreamr   c                    s  t  |d u r5t|}|d u r1 td|  dd| < W d    d S 1 s*w   Y  d S | }|d u r?t j|}zpt j|G t j|1 t j	j
d d t|ttfsa|f}||i |}W d    n1 srw   Y  W d    n1 sw   Y  W d    n1 sw   Y   || < W d    W d S 1 sw   Y  W d S  ty    td|  d| d| < W d    Y d S 1 sw   Y  Y d S w )Nzin replica zQ, no device was provided and no tensor input was found; device cannot be resolved)wherer&   )enabledz on device )r   set_grad_enabledr   r   
get_devicer&   r'   r,   r-   ampautocastr   r   r   	Exception)r(   r)   r*   r+   r,   r-   toutput)autocast_enabledgrad_enabledlockresultsr   r   _worker9   sN   


  
&&zparallel_apply.<locals>._worker   c              
      s4   g | ]\}\}}}}}t j ||||||fd qS ))targetargs)	threadingThread)r#   r(   r)   r*   r+   r,   r-   )r;   r   r   r%   ]   s
    
r   NN)lenr	   r   strr   r?   Lockr   is_grad_enabledis_autocast_enabledintr   r   r   r,   r&   Stream	enumeratezipstartjoinranger   r   reraiseappend)
r   r   r    r!   streamsthreadsthreadoutputsr(   r6   r   )r;   r7   r8   r9   r:   r   r      s\   ,

#


&
rA   )r?   r   typingr   r   r   r   r   r   r   r	   r   r   torch.cuda._utilsr   torch._utilsr   __all__r   r   rC   rG   r,   r   r   r   r   r   <module>   s*    (: