o
    }oi                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlm  m	Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d	Zd
d ZeG dd dZdd ZG dd dejejej Z!e"dkrd dl#Z#e#$ Z%e%j&ddd e%j&de'dd e%( Z)e Z*eee*Z+e ,ddgddgD ]C\Z-Z.e-re.rqe!e*e-e.B dZ/e0ej1j2ddZ3ee-e.dZ4ee4Z5ej6j7e/e+ej8e)j9e)j:dddd d dd!de5gd"e3dd# qdS dS )$    N)
DataLoader)	lightning)llm)fn)io)track_io)	JitConfigJitTransformz#/home/TestData/lite/hf_cache/squad/c                    sD   t  d   fdd}tj| d jd}|j|ddg dd	 |S )
N	tokenizerc                    sX   d}| d }| d }| d d }t |tr|d }||||d } |}||dS )	NzM
    ### Instruction:
    {}

    ### Input:
    {}

    ### Response:
    {}contextquestionanswerstextr   z<eos>)	input_idslabels)
isinstancelistformattext_to_ids)examplespromptinstructioninputoutputr   tokensr
    Z/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/test_nemo_jit_cb.pyfmt&   s   	


z"make_squad_hf_dataset.<locals>.fmtztrain[:100])splitpad_token_idF   )idtitler   r   r   )batched
batch_sizeremove_columns)getattrr   HFDatasetDataModuleeos_idmap)	data_pathr
   r   
datamoduler   r   r   make_squad_hf_dataset#   s   r-   c                   @   s0   e Zd Zddg dfddZdd Zdd	 Zd
S )OrdTokenizer0u     )bos_idr)   pad_idc                 C   s(   || _ || _|| _t| j|k sJ d S N)
vocab_sizenum_reserved_tokensspecial_token_nameslen)selfr4   r5   r6   r   r   r   __init__G   s   zOrdTokenizer.__init__c                 C   s:   || j di v r| j d |S || j v r| j | S t)Nr6   )__dict__getindexAttributeError)r8   namer   r   r   __getattr__M   s
   

zOrdTokenizer.__getattr__c                    s0   t t fddt |}t| jk sJ |S )Nc                    s    j t|  S r3   )r5   ord)xr8   r   r   <lambda>V   s    z*OrdTokenizer.text_to_ids.<locals>.<lambda>)r   r*   maxr4   )r8   r   	token_idsr   rB   r   r   U   s   zOrdTokenizer.text_to_idsN)__name__
__module____qualname__r9   r?   r   r   r   r   r   r.   E   s    r.   c                 C   s   |   } | jd }| jd |jd kr+| dd dd d f  } |ddd f  }n*| jd |jd d krE| dd dd d f  } ntdt|j d t| j | d||dfS )N.   z%Mismatched labels and logits shapes ( )floatshape
contiguous
ValueErrorstrview)logitsr   n_clsr   r   r   align_labels[   s   
 rU   c                       s<   e Zd Z		d fdd	ZdddZdd	 Zd
d Z  ZS )DummyJitModelNFc                    s   t    || _|| _d S r3   )superr9   has_jitr
   )r8   r
   rX   	__class__r   r   r9   i   s   

zDummyJitModel.__init__returnc              	   C   s@   t | dsttddtjdddddtdd| _d S d S )Nmoduler/   i      i   g?)dropout)hasattrnn
Sequential	EmbeddingTransformerEncoderLayerLinearr\   rB   r   r   r   configure_modelr   s   



zDummyJitModel.configure_modelc                 C   sr   | j di |}| jr| j jd usJ t| j jsJ n| j jd u s$J tjjjj}t	| j |s7J t
| j |S )Nr   )r\   rX   _compiled_call_implcallabletorchr`   modules	containerra   r   type)r8   batchr   expected_clsr   r   r   forwardz   s   zDummyJitModel.forwardc                 C   sz   | j rt| ds
J | jdksJ | jnt| drJ |d}|dd }| d|d i}t||\}}t||S )N	_compiledTr   	loss_maskr   r   )	rX   r_   ro   popr;   rn   rU   Fcross_entropy)r8   rl   r   rp   r   rS   r   r   r   training_step   s   
zDummyJitModel.training_step)NF)r[   N)rF   rG   rH   r9   re   rn   rt   __classcell__r   r   rY   r   rV   h   s    
	rV   __main__z	--devicesrK   )defaultz--max-steps)rk   rw   TF)r
   rX   gh㈵>)lr)	use_torchuse_thundergpuautog        g      ?)devices	max_stepsacceleratorstrategylog_every_n_stepslimit_val_batchesnum_sanity_val_stepsaccumulate_grad_batchesgradient_clip_valuse_distributed_sampler	callbacks)modeldatatraineroptimlog);	itertoolsfiddlefdlr   plrh   torch.nnr`   torch.nn.functional
functionalrr   torch.utils.datar   nemonlnemo.collectionsr   nemo.collections.llmr   nemo.lightningr   nemo.lightning.io.mixinr    nemo.lightning.pytorch.callbacksr   r	   	DATA_PATHr-   r.   rU   LightningModuleIOMixinFNMixinrV   rF   argparseArgumentParserparseradd_argumentint
parse_argsargsr
   r   productry   rz   r   buildsgdpytorch_sgd_with_flat_lrr   
jit_config	transformapifinetuneTrainerr}   r~   r   r   r   r   <module>   sn   "*
