o
    xi*T                     @  s   d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
Z
ddl
mZ ddlmZ dae	r>dd	lmZ dd
lmZ dddZed\ZZdddZdddZG dd dZG dd de
jjZdS )zPyTorch-specific functionality.    )annotationsN)reduce)mul)TYPE_CHECKING)util)Node)Tensor)Modulec                   s    du rt   t| drt|  S t| dr|   S t| dr%| jS  t|  z fddt| D W S  t	yC   g  Y S w )zFigure out the shape of tensors possibly embedded in tuples.

    for example:
    - [0,0] returns (2)
    - ([0,0], [0,0]) returns (2,2)
    - (([0,0], [0,0]),[0,0]) returns ((2,2),2).
    Nsize	get_shapeshapec                   s&   g | ]}t | vrt| nd qS )r   )idnested_shape).0itemseen W/home/ubuntu/.local/lib/python3.10/site-packages/wandb/integration/torch/wandb_torch.py
<listcomp>+   s    z nested_shape.<locals>.<listcomp>)
sethasattrlistr
   r   as_listr   addr   	TypeError)array_or_tupler   r   r   r   r      s    



r      log_freqintreturn	list[int]c                 C  s   ddg}| |t < |S )z3Create tracking structure used by log_track_update.r   )LOG_TRACK_THRESHOLD)r   	log_trackr   r   r   log_track_init8   s   r$   r#   boolc                 C  s0   | t   d7  < | t  | t k rdS d| t < dS )zmCount (log_track[0]) up to threshold (log_track[1]), reset count (log_track[0]) and return true when reached.   Fr   T)LOG_TRACK_COUNTr"   )r#   r   r   r   log_track_update?   s
   r(   c                   @  sx   e Zd ZdZdd Z			d%d&ddZ			d%d&ddZdd Zdd Zdd Z	dd Z
dd Zd'd d!Zd(d"d#Zd$S ))TorchHistoryz$History methods specific to PyTorch.c                 C  s,   t jddai | _d| _d | _tj| _d S )NtorchCould not import torch@   )	wandbr   
get_moduler*   _hook_handles	_num_bins_is_cuda_histc_supported
TorchGraph
hook_torchselfr   r   r   __init__K   s
   zTorchHistory.__init__ r   moduler	   namestrprefixr   r   r    Nonec              
     s   | t |dsg |_fddt| z| fdd}|jd < |jd  W d	S  tyO } ztd| d W Y d	}~d	S d	}~ww )
zThis instruments hooks into the pytorch module.

        log parameters after a forward pass
        log_freq - log gradients/parameters every N batches.
        _wandb_hook_namesc                   sT   t |sd S |  D ]\}}t|tjjr|j}n|}| d  |  q
d S )Nparameters/)	r(   named_parameters
isinstancer*   autogradVariabledatalog_tensor_statscpu)r8   input_outputr#   r9   	parameterrC   )r;   r5   r   r   parameter_log_hooke   s   z@TorchHistory.add_log_parameters_hook.<locals>.parameter_log_hookc                   s   | || S Nr   )modinpoutp)log_track_paramsrI   r   r   <lambda>s   s    z6TorchHistory.add_log_parameters_hook.<locals>.<lambda>r>   (Trying to register forward_hook failed (z ) - skipping parameter tracking.N)	r   r=   r$   register_forward_hookr/   appendRuntimeErrorr-   termwarn)r5   r8   r9   r;   r   hooker   )rN   rI   r;   r5   r   add_log_parameters_hookS   s"   

z$TorchHistory.add_log_parameters_hookc                 C  sf   || }t |dsg |_| D ] \}}|jr0t|}|jd| |  | |d| | | qdS )aQ  This instruments hooks into the PyTorch module slog gradients after a backward pass.

        Args:
            module: torch.nn.Module - the module to instrument
            name: str - the name of the module
            prefix: str - the prefix to add to the name
            log_freq: log gradients/parameters every N batches
        r=   z
gradients/N)r   r=   r?   requires_gradr$   rR   _hook_variable_gradient_stats)r5   r8   r9   r;   r   rH   log_track_gradr   r   r   add_log_gradients_hook~   s   
z#TorchHistory.add_log_gradients_hookc                 C  s"  t |ttfr8t |ttfr.t |d ttfr.dd |D }t |ttfr.t |d ttfstdd |D }|  }t|dsSt|}t	d|j
 d|j d}|jrl|  }| }| |  }|}|d	}|jr| jdu rz	|j| jd
 W n ty   d| _Y nw d| _| js| }nt |tjjtjjfs|tjj}|jst |tjtjfs|tj}| |rdS | |}|  }|  }|r|dkrdn|}|dk rdn|}||kr||}}||krt| g}|   }t||g}	n|j| j||d}|   }tj ||| jd d}	|r{|	! }
|! }d}t"|
d }t#|D ]'}|
| }|
|d  }|dkrW|dksc||d krg|dkrg|} nqA||  |7  < t|}t|
}	t$j%j&|t$j'|( |	( fdidd dS )zPAdd distribution statistics on a tensor's elements to the current History entry.r   c                 S  s   g | ]	}|D ]}|qqS r   r   )r   sublistr   r   r   r   r          z1TorchHistory.log_tensor_stats.<locals>.<listcomp>c                 S  s   g | ]}|   d qS ))detachclonereshape)r   tr   r   r   r      s    r   zExpected Tensor, not .Nr^   )binsFT)rd   minmaxr&   )steps)np_histogram)commit))r@   tupler   r*   catr_   r`   r   typer   
__module____name__	is_sparserE   coalesce_valuesnumelra   is_cudar1   histcr0   rS   cudaFloatTensorDoubleTensor_no_finite_values_remove_infs_nansre   r   rf   r   linspacenumpylenranger-   run_log	Histogramtolist)r5   tensorr9   clssparse_zerosbacking_valuesflattmintmaxrd   bins_np	tensor_npbin_idxnum_bucketsistartendr   r   r   rD      s   








,


zTorchHistory.log_tensor_statsc                   s   t |tjjst|}td|j d|j j	}|dur.
|r.td dfdd | fdd	}|j< |S )
zZLogs a Variable's gradient's distribution statistics next time backward() is called on it.zExpected torch.Variable, not rc   Nz(A hook has already been set under name ""c                   s   t |sd S | j  d S rJ   )r(   rD   rC   )gradr#   )r9   r5   r   r   	_callback  s   z=TorchHistory._hook_variable_gradient_stats.<locals>._callbackc                   s
    | S rJ   r   )r   )r   r#   r   r   rO     s   
 z<TorchHistory._hook_variable_gradient_stats.<locals>.<lambda>)r@   r*   rA   rB   rl   r   rm   rn   r/   get_torch_hook_handle_is_valid
ValueErrorregister_hook)r5   varr9   r#   r   handler   )r   r#   r9   r5   r   rY     s   
z*TorchHistory._hook_variable_gradient_statsc                 C  s"   | j  D ]}|  qi | _ d S rJ   )r/   valuesremove)r5   r   r   r   r   
unhook_all  s   

zTorchHistory.unhook_allc                 C  s   | j |}|  d S rJ   )r/   popr   )r5   r9   r   r   r   r   unhook     zTorchHistory.unhookc                 C  s   |  }|d u r
dS |j|v S )NF)hooks_dict_refr   )r5   r   dr   r   r   r   #  s   
z(TorchHistory._torch_hook_handle_is_validr   r   r%   c                 C  s&   |j tdgkpt|   S Nr   )r   r*   Sizeisfiniteallr   r5   r   r   r   r   rx   *  s   &zTorchHistory._no_finite_valuesc                 C  s    t | s|t | }|S rJ   )r*   r   r   r   r   r   r   ry   -  s   zTorchHistory._remove_infs_nansN)r7   r7   r   )
r8   r	   r9   r:   r;   r:   r   r   r    r<   )r   r   r    r%   )r   r   r    r   )rn   rm   __qualname____doc__r6   rW   r[   rD   rY   r   r   r   rx   ry   r   r   r   r   r)   H   s$    .j
r)   c                      sV   e Zd Z fddZedddZdd Z	dd	d
Zedd Zedd Z	  Z
S )r2   c                   s   t  d t | _d S )Nr*   )superr6   r   _graph_hooksr4   	__class__r   r   r6   5  r   zTorchGraph.__init__Nr   c                 C  s$   t d t }|j|||d |S )Nz<logging graph, to disable use `wandb.watch(log_graph=False)`)	graph_idx)r-   termlogr2   hook_torch_modules)r   model	criterionr   graphr   r   r   r3   9  s   
zTorchGraph.hook_torchc                   s     fdd}|S )Nc                   s   t | jvr	d S t|ts|f}dd |  D }tt | t| t||dd |D d}| jt | < | 	 D ]	}| jt |< q: 
|  jsvt|d drZ|d j _nt|d trv|d rvt|d d drv|d d j _ jt | h8  _jstjjd < d S d S )Nc                 S  s    g | ]\}}|t | fqS r   )r   r
   )r   pnameparamr   r   r   r   I  s    zNTorchGraph.create_forward_hook.<locals>.after_forward_hook.<locals>.<listcomp>c                 S  s   g | ]
\}}t t|d qS )r&   )r   r   )r   r   r
   r   r   r   r   T  s    )r   r9   
class_nameoutput_shape
parametersnum_parametersr   grad_fngraph_)r   r   r@   rj   r?   r   r:   r   nodes_by_idr   add_nodecriterion_passedr   r   r   r   r-   r~   summary)r8   inputrG   r   noder   r   r   r9   r5   r   r   after_forward_hookC  sB   

z:TorchGraph.create_forward_hook.<locals>.after_forward_hookr   )r5   r9   r   r   r   r   r   create_forward_hook@  s   (zTorchGraph.create_forward_hookc                   st  t dd d}| }t|dr|jrtdd|_|r!||_d|_| D ]\}}	|p.t|}|r7|d | }|d7 }t	|	 j
jsE d S  fd	d
dD }
|d u rT|}t	|	t|
rd| j|	||d q%|  jt|	hO  _z,|	| ||}|tjjjdtt| < t|dsg |_|jdtt|  W q% ty } ztjd| ddd W Y d }~q%d }~ww d S )Nr*   r+   r   _wandb_watch_calledzYou can only call `wandb.watch` once per model.  Pass a new instance of the model if you need to call wandb.watch again in your code.Trc   r&   c                   s$   g | ]}t  j|rt j|qS r   )r   nngetattr)r   module_classnamer*   r   r   r     s    

z1TorchGraph.hook_torch_modules.<locals>.<listcomp>)	Container
Sequential
ModuleList
ModuleDict)r;   parentz	topology/r=   rP   z) - skipping graph tracking.F)repeat)r   r.   r   r   r   r   r   named_childrenr:   r@   r   r	   rj   r   r   r   rQ   r   r-   r~   _torchr/   r=   rR   rS   rT   )r5   r8   r   r;   r   r   layersr   r9   
sub_modulemodule_types
graph_hookrV   r   r   r   r   m  sZ   




zTorchGraph.hook_torch_modulesc           $        s  t dd dd |jD } fdd|jD }dd |D }|d  }i }i }i }	| D ]H\}
}||
 }t|j jjs@q/|j}i }||t	|< t
 |	|j< |D ]#}|| }t|j jjrv|j}||t	|< |	|j |t	|  qSq/d	d |d  D }i }i } fd
d|jD D ]O}t	|j}
d}d}d}|jD ]4}t|j jjsq|j}|t	| }|
|v r|t	| }|du st||ft||fkr|}|}|}q|||
< |j||j< q|  }t }tt||d d}|| ||_i }dd |jD }| |\}}tt|d  D ]c\}} ||j	}t	|}
t| jjs-q|
|vr4qt	||
 j}!|!|v rE||! }"ntt|||! d }"||!< ||" |||" tt|||
 d}#|#||
< ||# ||"|# q|S )a  Recover something like neural net layers from PyTorch Module's and the compute graph from a Variable.

        Example output for a multi-layer RNN. We confusingly assign shared embedding values
        to the encoder, but ordered next to the decoder.

        rnns.0.linear.module.weight_raw rnns.0
        rnns.0.linear.module.bias rnns.0
        rnns.1.linear.module.weight_raw rnns.1
        rnns.1.linear.module.bias rnns.1
        rnns.2.linear.module.weight_raw rnns.2
        rnns.2.linear.module.bias rnns.2
        rnns.3.linear.module.weight_raw rnns.3
        rnns.3.linear.module.bias rnns.3
        decoder.weight encoder
        decoder.bias decoder
        r*   r+   c                 S  s   i | ]}t ||qS r   r   r   nr   r   r   
<dictcomp>  s    z0TorchGraph.from_torch_layers.<locals>.<dictcomp>c                   s    g | ]}t |j jjr|qS r   r@   objr   	Parameterr   r   r   r   r     s
    z0TorchGraph.from_torch_layers.<locals>.<listcomp>c                 S  s   i | ]	}t |j|jqS r   )r   r   r9   r   r   r   r   r     r]   r   c                 S  s   i | ]	\}}t ||qS r   r   )r   r   r   r   r   r   r     r]   c                 3  s$    | ]}t |j jjr|V  qd S rJ   r   r   r   r   r   	<genexpr>  s    
z/TorchGraph.from_torch_layers.<locals>.<genexpr>N)r   r   c                 S  s   i | ]}t |j|qS r   )r   r   r   r   r   r   r     s    )r   r.   nodesreachable_descendentsitemsr@   r   r   r	   r   r   r9   r   r   descendent_bfsr|   	itertoolscountr   nextr   rootfrom_torch_compute_graphreversedr   ancestor_bfsr   add_edge)$r   module_graphvariablemodule_nodes_by_hashmodule_parameter_nodesnames_by_pidreachable_param_nodesreachable_paramsmodule_reachable_paramsnamespidreachable_nodesr   r8   reachable_hash	reachabler   node_depthsparameter_module_namesparameter_modules
param_node	best_node
best_depthbest_reachable_paramsdepthreduced_module_graphrmg_idsrmg_rootrmg_nodes_by_pidmodule_nodes_by_pidcompute_graphcompute_node_vars_mid
rmg_module	rmg_paramr   r   r   from_torch_layers  s   











zTorchGraph.from_torch_layersc                 C  sV   t dd}t }||_d|_| D ]}| j|| 7  _qt	|j
|_|S )Nr{   zCould not import numpyr   )r   r.   r-   r   r   child_parametersr   prodr
   rl   rn   r   )r   nidr8   r{   r   rH   r   r   r   node_from_module!  s   zTorchGraph.node_from_moduler   )NNr   N)rn   rm   r   r6   classmethodr3   r   r   r  r  __classcell__r   r   r   r   r2   4  s    .
?
tr2   rJ   )r   r   r    r!   )r#   r   r    r%   )r   
__future__r   r   	functoolsr   operatorr   typingr   r-   r   wandb.data_typesr   r*   r   torch.nnr	   r   r}   r'   r"   r$   r(   r)   
data_typesGraphr2   r   r   r   r   <module>   s(    
 

	 m