o
    Xi                     @  s   d dl mZ d dlmZmZ d dlZd dlZ							d3d4ddZ											d5d6d$d%Z		&	'	(	)			d7d8d1d2Z
dS )9    )annotations)AnySequenceN            F
batch_sizeint
seq_length
vocab_sizetype_sequence_label_sizetype_vocab_size
num_labelsnum_choicesuse_input_maskbooluse_token_type_ids
use_labelsreturntuple[Any, ...]c
                 C  s   t jj| |g|}
d }|rtt| |}d }|r-|dks#J dt jj| |g|}d }d }d }|	ri|dks=J d|dksEJ d|dksMJ dt jj| g|}t jj| |g|}t jj| g|}|
|||||fS )Nr   ztype_vocab_size is nullz type_sequence_label_size is nullznum_labels is nullznum_choices is null)
onnxscripttoolstransformers_models
ids_tensortorchtrilones)r	   r   r   r   r   r   r   r   r   r   	input_ids
input_masktoken_type_idssequence_labelstoken_labelschoice_labels r$   `/home/ubuntu/.local/lib/python3.10/site-packages/onnxscript/tools/transformers_models/mistral.py_prepare_config_and_inputs   sF   r&   )      )   r)   )          c         eagerT
input_dimsSequence[tuple[int, int]]	with_mask0tuple[Any, list[tuple[torch.Tensor, ...]], dict]c              
     s  ddl m} ddlm  |||||||||d}ddddi}|
r+|ddddi |	r0|	|_ddd}|
r]G  fdddtjj}g }| D ]\}}|	|||||
 qG||||fS G  fdddtjj}g }| D ]\}}|	|||||
 qm||||fS )z
    Returns a model.
    See `MistralConfig
    <https://huggingface.co/docs/transformers/main/en/model_doc/mistral#transformers.MistralConfig>`_.
    The parameters are chosen for a unit test configuration.
    r   )MistralConfigMistralModel)num_hidden_layersr   hidden_sizeintermediate_sizemax_position_embeddingsnum_attention_headsnum_key_value_headssliding_windowbatchlength)r      rB   r
   seqr   r4   r   c                 S  s.   t | |||d\}}}}}}|r||fS |fS )N)r	   r   r   r   )r&   )r@   rC   r   r4   r   _r   r$   r$   r%   generate_example_inputsm   s    z2get_mistral_model.<locals>.generate_example_inputsc                      &   e Zd Z fddZdd Z  ZS )z6get_mistral_model.<locals>.MistralModelWrapperWithMaskc                      t     || _d S Nsuper__init__modelselfconfigr8   	__class__r$   r%   rK         
z?get_mistral_model.<locals>.MistralModelWrapperWithMask.__init__c                 S  s   | j ||dd}| S )NF)attention_mask	use_cacherL   to_tuple)rN   r   rS   model_outputr$   r$   r%   forward   s   z>get_mistral_model.<locals>.MistralModelWrapperWithMask.forward__name__
__module____qualname__rK   rX   __classcell__r$   r7   rQ   r%   MistralModelWrapperWithMask       r_   c                      rF   )z.get_mistral_model.<locals>.MistralModelWrapperc                   rG   rH   rI   rM   rP   r$   r%   rK      rR   z7get_mistral_model.<locals>.MistralModelWrapper.__init__c                 S  s   | j |dd}| S )NF)rT   rU   )rN   r   rW   r$   r$   r%   rX      s   z6get_mistral_model.<locals>.MistralModelWrapper.forwardrY   r$   r7   r^   r%   MistralModelWrapper   r`   ra   N)r@   r
   rC   r
   r   r
   r4   r   )
transformersr6   ,transformers.models.mistral.modeling_mistralr8   update_attn_implementationr   nnModuleappend)r2   r:   r9   r   r;   r<   r=   r>   r?   re   r4   r6   rO   dynamic_shapesrE   r_   example_args_collectionbsra   r$   r7   r%   get_mistral_modelE   s>   
	rm      
   smallrB   warmuprepeatrO   strr9   implementationri   c                 C  s   |dkrt tjj|| |d|ddddd||d
}n>|d	kr5t tjj|| |d
|d
d
ddd
d||d}n#|dv rPt tjj|| |d|dddddd||d}ntd|dtdi |S )a  
    Returns a model Phi to test or benchmark.

    Args:
        warmup: Number of inputs to generate.
        repeat: Number of inputs to generate for repeat.
        config: small, medium or large
        num_hidden_layers: number of hidden layers
        implementation: eager or sdpa
        with_mask: One or two inputs.
        dynamic_shapes: dynamic shapes or not

    Returns:
        Model and list of inputs.
    rp   r-   r.   r   r/   r   r   )
r2   r:   r9   r   r;   r<   r=   r>   re   r4   mediumi   r0   )r2   r:   r9   r   r;   r=   r>   r<   r?   re   r4   )largedefaulti }  i 8  r,   i   zUnexpected configuration .Nr$   )dictr   r   r   get_input_dims_for_llm
ValueErrorrm   )rq   rr   rO   r9   rt   ri   r4   	conf_dictr$   r$   r%   get_mistral_model_from_config   sb   r}   )r   r   r   r   FFF)r	   r
   r   r
   r   r
   r   r
   r   r
   r   r
   r   r
   r   r   r   r   r   r   r   r   )r'   r-   r   r.   r   r/   r   r   r0   r1   T)r2   r3   r4   r   r   r5   )rn   ro   rp   rB   r1   FT)rq   r
   rr   r
   rO   rs   r9   r
   rt   rs   ri   r   r4   r   r   r5   )
__future__r   typingr   r   r   $onnxscript.tools.transformers_modelsr   r&   rm   r}   r$   r$   r$   r%   <module>   s@   7`