o
    }oiG                  
   @   sn  d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	 d dl
Z
d dlZd dlmZ d dlm  mZ d dlmZmZmZ zd dlZdaW n eefyR   daY nw G dd	 d	eZejejejd
ZG dd dejZG dd dejZde fddZ!de de fddZ"dd Z#dd Z$dd Z%dd Z&dPdd Z'dPd!d"Z(dPd#d$Z)dPd%d&Z*dZ+zdd d'l,m-Z- d d(l.m/Z/ d d)l0m1Z1m2Z2 d d*l3m1Z4 d d+l5m6Z6 d d,l7m8Z8m9Z9 d-ejd.eej: fd/d0Z;d-ejfd1d2Z<d-ejd.eej= fd3d4Z>d-ejd.eej= fd5d6Z?e;e;e;e;e>e>e?e<d7Z@W n eAy' ZB z
i Z@dZ+W Y dZB[BndZB[Bww d8e	ej d9e	ej d.eejgeej f fd:d;ZCd-ejd.eej= fd<d=ZDd8e	ej d9e	ej d.eejgeej f fd>d?ZEd@ejdAee ejf fdBdCZF	dQd@ejdDee eejgeej f f d.ejfdEdFZGdGejfdHdIZHi ZId@ejd.ejfdJdKZJd@ejfdLdMZKdNdO ZLdS )R    N)nullcontext)Enum)CallableDictOptionalType)CastToFloatCastToFloatAllloggingTFc                   @   s   e Zd ZdZdZdZdS )ExportFormatzAWhich format to use when exporting a Neural Module for deployment      N)__name__
__module____qualname____doc__ONNXTORCHSCRIPT r   r   K/home/ubuntu/.local/lib/python3.10/site-packages/nemo/utils/export_utils.pyr   #   s    r   )z.ptz.tsz.onnxc                       s&   e Zd Zd fdd	Zdd Z  ZS )TorchRMSNormư>c                    s   t    || _|| _dS )z(
        LayerNorm without bias
        N)super__init__weightvariance_epsilon)selfr   eps	__class__r   r   r   2   s   

zTorchRMSNorm.__init__c                 C   s\   | tjdjddd}|t|| j  }| jjtj	tj
fv r)| | jj}| j| S )Nr   T)keepdim)totorchfloat32powmeanrsqrtr   r   dtypefloat16bfloat16)r   hidden_statesvariancer   r   r   forward:   s
   
zTorchRMSNorm.forward)r   r   r   r   r   r-   __classcell__r   r   r   r   r   1   s    r   c                       s&   e Zd Z fddZdddZ  ZS )LinearWithBiasSkipc                    s$   t t|   || _|| _|| _d S N)r   r0   r   biasr   skip_bias_add)r   r   r2   r3   r   r   r   r   E   s   
zLinearWithBiasSkip.__init__Nc                 C   s:   |d u r| j }| jrt||| jfS t||| jd fS r1   )r   r3   Flinearr2   )r   xr   r   r   r   r-   K   s
   zLinearWithBiasSkip.forwardr1   r.   r   r   r   r   r0   D   s    r0   filenamec                 C   s>   t j| \}}zt|  W S  ty   td|  dw )NzExport file z4 extension does not correspond to any export format!)ospathsplitext	_EXT_DICTlowerKeyError
ValueError)r7   _extr   r   r   get_export_formatS   s   rA   outputprependc                 C   s8   |dkr| S t j| \}}| d| }t j||S )Nr   -)r8   r9   splitjoin)rB   rC   r9   r7   r   r   r   augment_filename[   s
   rG   c                 C   s   t | dr| jS | jS Nforward_for_export)hasattrrI   r-   )r   r   r   r   forward_methodd   s   
rK   c                 C   s<   t | }d }t|dr|j}|j}||_||fS d }||fS rH   )typerJ   rI   r-   )r   tpold_forward_methodrK   r   r   r   wrap_forward_methodk   s   
rO   c                 C   s6   t | }i }t|d tr|d }|d d }||fS )Nr    )list
isinstancedict)input_example
input_list
input_dictr   r   r   parse_input_examplew   s   rV   c                 C   s   i }|s| |  t| |D ]\}}|  ||< q|S t|D ]+}d }||v r4||   }nt|dkrB|   }|| v rN|d urN|||< q#|S )Nr   )extendvalueszipcpunumpyreversedlenpop)ort_input_namesinput_namesrU   rT   odictkvvalr   r   r   to_onnxrt_input   s    re   {Gz?c              	   C   s   d}|D ]8}t |\}}tjjddd  | j|i |}tj|}	|o,t|	||||}W d    n1 s7w   Y  q|rAdnd}
t	d| d|
  |S )	NTcudaF)enabledSUCCESSFAILzTorchscript generated at z% verified with torchscript forward : )
rV   r#   ampautocastr-   jitloadrun_ts_and_comparer
   info)modelrB   input_examplescheck_toleranceall_goodrS   rT   rU   output_examplets_modelstatusr   r   r   verify_torchscript   s   
rx   c                 C   s   t |}dd |jjD }ts#td| d t jj|dd d S t	
 }t	jj|_t	j| |dgd}~d}	|D ](}
t|
\}}| j|i |}t|tsT|f}t||||}|	oct||||}	q<|	rid	nd
}td| d|  |	S )Nc                 S   s   g | ]}|j qS r   )name).0noder   r   r   
<listcomp>   s    z"verify_runtime.<locals>.<listcomp>zONNX generated at z9, not verified - please install onnxruntime_gpu package.
T)
full_checkCUDAExecutionProvider)sess_options	providersri   rj   z verified with onnxruntime : )onnxrn   graphinputort_availabler
   warningcheckercheck_modelonnxruntimeSessionOptionsGraphOptimizationLevelORT_ENABLE_BASICgraph_optimization_levelInferenceSessionSerializeToStringrV   r-   rQ   tuplere   run_ort_and_comparerp   )rq   rB   rr   r`   rs   
onnx_modelr_   onnx_session_optsessrt   rS   rT   rU   ru   	ort_inputrw   r   r   r   verify_runtime   s.   


r   c              	   C   s   | |i |}d}t |D ]K\}}|| }	t|	rX|d}
td| d|	j d d}ztj|
|	 ||ds<d}W n t	yH   d}Y nw |sXt
d|	 d	|
  d}q|S )
NTrZ   Checking output 	, shape: :
rtolatolFz%Results mismatch! PyTorch(expected):
z
TorchScript:
)	enumerater#   	is_tensorr"   r
   debugshapeallcloserZ   	Exceptionrp   )rv   ts_input_listts_input_dictru   rs   ts_outrt   ioutexpectedtout	this_goodr   r   r   ro      s(   

ro   c                 C   s   |  d |}d}t|D ]U\}}|| }t|rat|}	td| d|j d d}
ztj|	|	 |d| ds=d}
W n t
yI   d}
Y nw |
satd|j d	| d
|	j d|	  d}q|S )NTr   r   r   d   r   Fz0onnxruntime results mismatch! PyTorch(expected, z):
z
ONNXruntime, )runr   r#   r   
from_numpyr
   r   r   r   rZ   r   rp   )r   r   ru   rs   ort_outrt   r   r   r   r   r   r   r   r   r      s,   

r   )FastLayerNorm)MixedFusedRMSNorm)FusedLayerNormMixedFusedLayerNorm)r   )FusedScaleMaskSoftmax)ColumnParallelLinearRowParallelLinearnreturnc                 C   s   t |  }t| tst| tr| j| j| j}}}n$t| tr-| j	j
| jd}}}nt| tr>| j	j
| jd}}}ndS |  }tj||||j|jd}|j|dd |S )z
        Replaces Apex's FusedLayerNorm with nn.LayerNorm. This is required for ONNX export.
        Args:
           n: the FusedLayerNorm pytorch module to replace
        Returns:
           Equivalent LayerNorm module
        TN)r   elementwise_affinedevicer(   strict)next
parametersrQ   r   r   normalized_shaper   r   MCoreFusedLayerNormr   r   r   epsilon
state_dictnn	LayerNormr   r(   load_state_dict)r   pr   r   affinen_statemodr   r   r   replace_FusedLayerNorm   s   	

r   c                 C   s:   t |  }t| trt|  d | j|j}|S dS )z
        Replaces Apex's MixedFusedRMSNorm with equivalent Pytorch layer. This is required for ONNX export.
        Args:
           n: the MixedFusedRMSNorm pytorch module to replace
        Returns:
           Equivalent module
        r   N)	r   r   rQ   r   r   r   r   r"   r   )r   r   r   r   r   r   replace_MixedFusedRMSNorm  s
   	
r   c                 C   s\   t | tst | tstdt|  j}t| j| j	| j
|}|  }|j|dd |S )z
        Replaces Apex's ColumnParallelLinear or RowParallelLinear with nn.Linear
        Args:
           n: the nn.Module pytorch module to replace
        Returns:
           Equivalent Linear module
        zSThis function can only change the ColumnParallelLinear or RowParallelLinear module.Fr   )rQ   r   r   r>   r   r   r   r0   r   r2   r3   r"   r   r   )r   devr   r   r   r   r   replace_ParallelLinear%  s   r   c                 C   sD   t | tstd| j  | S t| j| j| jd| j| j	| j
}|S )z
        Replaces Apex's FusedScaleMaskSoftmax with nn.LayerNorm. This is required for ONNX export.
        Args:
           n: the FusedScaleMaskSoftmax module to replace
        Returns:
           Equivalent LayerNorm module
        zEThis function can only change the FusedScaleMaskSoftmax module, got: F)rQ   r   r
   r   r   input_in_fp16input_in_bf16attn_mask_type	mask_funcsoftmax_in_fp32scale)r   r   r   r   r   replace_FusedScaleMaskSoftmax7  s   
r   )r   r   r   r   r   r   r   r   BaseTDestTc                    s$   dt jdtt j f fdd}|S )a+  
    Generic function generator to replace BaseT module with DestT. BaseT and DestT should have same atrributes. No weights are copied.
    Args:
        BaseT : module type to replace
        DestT : destination module type
    Returns:
        swap function to replace BaseT module with DestT
    r   r   c                    s.   t  sd S  fdd jD }| }|S )Nc                    s   g | ]}t  |d qS r1   )getattr)rz   ry   r   r   r   r|   g  s    z8simple_replace.<locals>.expansion_fn.<locals>.<listcomp>)rQ   __constants__)r   argsr   r   r   r   r   expansion_fnd  s
   
z$simple_replace.<locals>.expansion_fnr   Moduler   r   r   r   r   r   r   simple_replaceZ  s    
r   c                 C   s0   ddl m} || j| j| jd| j| j| j}|S )z
    Replaces MatchedScaleMaskSoftmax with exportable softmax layer
    Args:
        n: module to replace
    Returns:
        exportable module
    r   )MatchedScaleMaskSoftmaxF):nemo.collections.nlp.modules.common.megatron.fused_softmaxr   r   r   r   r   r   r   )r   r   r   r   r   r   replace_MatchedScaleMaskSoftmaxn  s
   	r   c                    s"   dt jdtt j f fdd}|S )z
    Generic function generator to replace BaseT module with DestT wrapper.
    Args:
        BaseT : module type to replace
        DestT : destination module type
    Returns:
        swap function to replace BaseT module with DestT
    r   r   c                    s    | }|S r1   r   )r   r   r   r   r   r     s   z!wrap_module.<locals>.expansion_fnr   r   r   r   r   wrap_module  s   
r   rq   mappingc                 C   sN   |  D ] \}}|d}| }|dd D ]}|j| }q||j|d < q| S )a  
    This function swaps nested modules as specified by "dot paths" in mod with a desired replacement. This allows
    for swapping nested modules through arbitrary levels if children

    NOTE: This occurs in place, if you want to preserve model then make sure to copy it first.

    .Nr    )itemsrE   _modules)rq   r   r9   new_modexpanded_path
parent_modsub_pathr   r   r   swap_modules  s   
r   
expansionsc                 C   sp   i }|   D ]\}}t|j}||v r|| |}|r|||< qt|dkr1tdt| d t| | | S )a  
    Top-level function to replace modules in model, specified by class name with a desired replacement.
    NOTE: This occurs in place, if you want to preserve model then make sure to copy it first.
    Args:
        model : top level module
        expansions : replacement dictionary: module class name -> replacement function generator
    Returns:
        model, possibly modified in-place
    r   zSwapped z modules)named_modulesrL   r   r]   r
   rp   r   )rq   r   r   ry   mm_typeswappedr   r   r   replace_modules  s   

r   r   c                 C   s   t j| S r1   )r#   rm   script)r   r   r   r   script_module  s   r   c                 C   s0   dt dti}t| t t| | t| t dS )a,  
    Top-level function to replace 'default set' of modules in model, called from _prepare_for_export.
    NOTE: This occurs in place, if you want to preserve model then make sure to copy it first.
    Args:
        model : top level module
    Returns:
        model, possibly modified in-place
    r   N)r   r   r   default_Apex_replacementsscript_replacements)rq   default_replacementsr   r   r   replace_for_export  s
   



r   c                 C   sP   ddl m} ttjtttjtttjtttjtt|t	d}t
| | dS )a  
    Function to put additional to/from float32 casts around operations known to require full precision.
    It was used with an extra post-parse script to have TRT preserve extra precision when --fp16 needed.
    Should not be needed with TRT 8.6.1 or later.
    r   )MaskedInstanceNorm1d)BatchNorm1dBatchNorm2dr   InstanceNorm1dr   N)'nemo.collections.tts.modules.submodulesr   r   r   r   r   r   r   r   r	   r   )rq   r   default_cast_replacementsr   r   r   add_casts_around_norms  s   



r   c           
      C   s$  t | }i }t|jj|D ]	\}}|||j< qt|jj|D ]	\}}|||j< q|jjD ]8}tt	|jD ]}|j| |v rI||j|  |j|< q6tt	|jD ]}|j| |v rd||j|  |j|< qQq-tt	|D ]}	||	 |jj|	 _qltt	|D ]}	||	 |jj|	 _q~t 
||  d S r1   )r   rn   rY   r   r   ry   rB   r{   ranger]   save)
rB   r`   output_namesr   
rename_mapinpry   r   r   r   r   r   r   rename_onnx_io  s*   
r  )rf   r1   )Mr8   
contextlibr   enumr   typingr   r   r   r   r   r#   torch.nnr   torch.nn.functional
functionalr4   
nemo.utilsr   r	   r
   r   r   ImportErrorModuleNotFoundErrorr   r   r   r;   r   r   r0   strrA   rG   rK   rO   rV   re   rx   r   ro   r   apex_available"apex.contrib.layer_norm.layer_normr   apex.normalizationr   #apex.normalization.fused_layer_normr   r   &megatron.core.fusions.fused_layer_normr   #megatron.core.fusions.fused_softmaxr   $megatron.core.tensor_parallel.layersr   r   r   r   r   Linearr   r   r   r   er   r   r   r   r   r   r   r   r   r  r   r   r   r   <module>   s   	





44
