o
    iV                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZmZmZmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddl m!Z! ddl"m"Z" e rd dl#Z$d dl#m%Z% d dl#m&Z' G dd de%Z(e	) j*Z+ee,j-j-a.t.j-j-a/de+ d0 Z1G dd dZ2dee3ej4f de3de3fddZ5de2de3d e3d!e6e7e3e8f  fd"d#Z9d$e3d%e6e3 fd&d'Z:d(e3de3d e3fd)d*Z;d(e3de3d!e6e7e3e8f  fd+d,Z<d-e3d.e=fd/d0Z>d-e3de2d1e3d.e7e3e3e6f fd2d3Z?de2de3d!e6e7e3e8f  d.e3fd4d5Z@de2d!e6e7e3e8f  fd6d7ZAde2de3d e3d!e6e7e3e8f  d8e8f
d9d:ZB			dKd;e3d<ee3 d=ee d>ee3 d.ef
d?d@ZCdAe3d.e8fdBdCZDdDdE ZEdFefdGdHZFG dIdJ dJe!ZGdS )L    N)ArgumentParser	Namespace)date)Path)AnyCallableOptionalUnion   )CONFIG_MAPPING_NAMESMODEL_NAMES_MAPPING)FEATURE_EXTRACTOR_MAPPING_NAMES)IMAGE_PROCESSOR_MAPPING_NAMES)PROCESSOR_MAPPING_NAMES)TOKENIZER_MAPPING_NAMES)VIDEO_PROCESSOR_MAPPING_NAMES)is_libcst_available   )BaseTransformersCLICommand)add_fast_image_processor)
CSTVisitor)matchersc                   @   sL   e Zd ZdZdd ZdejddfddZdejfd	d
Zdej	fddZ
dS )ClassFinderzC
        A visitor to find all classes in a python module.
        c                 C   s   g | _ g | _d| _d S NF)classespublic_classesis_in_classself r   \/home/ubuntu/.local/lib/python3.10/site-packages/transformers/commands/add_new_model_like.py__init__/   s   
zClassFinder.__init__nodereturnNc                 C   s   | j |jj d| _dS )zwRecord class names. We assume classes always only appear at top-level (i.e. no class definition in function or similar)TN)r   appendnamevaluer   r   r"   r   r   r    visit_ClassDef4   s   
zClassFinder.visit_ClassDefc                 C   s
   d| _ d S r   )r   r'   r   r   r    leave_ClassDef9   s   
zClassFinder.leave_ClassDefc                 C   s   t jt jt jt  dgdgd}| js:t ||r<|jd jd j	j
}|dkr>|jd j
j}dd |D | _dS dS dS dS )	z:Record all public classes inside the `__all__` assignment.)target)targets)bodyr   __all__c                 S   s   g | ]}|j j qS r   )r&   ).0elementr   r   r    
<listcomp>E   s    z9ClassFinder.visit_SimpleStatementLine.<locals>.<listcomp>N)mSimpleStatementLineAssignAssignTargetNamer   matchesr,   r+   r*   r&   elementsr   )r   r"   !simple_top_level_assign_structureassigned_variabler7   r   r   r    visit_SimpleStatementLine<   s   z%ClassFinder.visit_SimpleStatementLine)__name__
__module____qualname____doc__r!   cstClassDefr(   r)   r2   r:   r   r   r   r    r   *   s    r   z
# coding=utf-8
# Copyright aN   the HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
c                   @   s   e Zd ZdZdefddZdS )
ModelInfoszI
    Retrieve the basic information about an existing model classes.
    lowercase_namec                 C   s   |  dddd| _| jtvr| jdd | jtvr%t| dt| j | _t| j | _| jdd| _| jt	v rSt	| j \| _
| _| jdkrNd n| j| _nd\| _
| _t| jd\| _| _t| jd | _t| jd | _t| jd | _d S )	N _-z is not a valid model nameConfig PreTrainedTokenizerFast)NN)lowerreplacerB   r   
ValueErrorr   
paper_nameconfig_classcamelcase_namer   tokenizer_classfast_tokenizer_classr   getimage_processor_classfast_image_processor_classr   video_processor_classr   feature_extractor_classr   processor_class)r   rB   r   r   r    r!   c   s&   


zModelInfos.__init__N)r;   r<   r=   r>   strr!   r   r   r   r    rA   ^   s    rA   	file_namenew_content	add_afterc                 C   s   t | ddd}| }W d   n1 sw   Y  ||d\}}|| | | }t | ddd}|| W d   dS 1 sCw   Y  dS )ac  
    A utility to add some content inside a given file.

    Args:
        file_name (`str` or `os.PathLike`):
            The name of the file in which we want to insert some content.
        new_content (`str`):
            The content to add.
       add_after (`str`):
           The new content is added just after the first instance matching it.
    rutf-8encodingNr   w)openreadsplitwrite)rX   rY   rZ   fold_contentbeforeafterr   r   r    add_content_to_file   s   
"rh   old_model_infosnew_lowercase_namenew_model_paper_namefilenames_to_addc              	      s  d dd |dddD }| j | j} fdd|dd	 D }g }|D ] \}}td
|rA|d \}	}
|	|
p<|f|d< q(|||f q(tt	d d d d| d| ddd tt	d d d d| d| ddd |D ]P\}}|r|dd}t
t	d d | }| }W d	   n1 sw   Y  td  d|tj}|D ]}tt	d d | | ||||d qqsd	S )a  
    Add a model to all the relevant mappings in the auto module.

    Args:
        old_model_infos (`ModelInfos`):
            The structure containing the class information of the old model.
        new_lowercase_name (`str`):
            The new lowercase model name.
        new_model_paper_name (`str`):
            The fully cased name (as in the official paper name) of the new model.
        filenames_to_add (`list[tuple[str, bool]]`):
            A list of tuples of all potential filenames to add for a new model, along a boolean flag describing if we
            should add this file or not. For example, [(`modeling_xxx.px`, True), (`configuration_xxx.py`, True), (`tokenization_xxx.py`, False),...]
    rG   c                 s       | ]}|  V  qd S Ntitler.   xr   r   r    	<genexpr>       z-add_model_to_auto_mappings.<locals>.<genexpr>rE   rD   c                    s    g | ]\}}|  d |fqS )autorJ   r.   filenameto_addold_lowercase_namer   r    r0      s    z.add_model_to_auto_mappings.<locals>.<listcomp>r   Nz2(?:tokenization)|(?:image_processing)_auto_fast.pymodelsru   zconfiguration_auto.pyz
        ("z", "z
Config"),
zOCONFIG_MAPPING_NAMES = OrderedDict[str, str](
    [
        # Add configs here
rY   rZ   z"),
zcMODEL_NAMES_MAPPING = OrderedDict[str, str](
    [
        # Add full (and cased) model names here
_fast.py.pyz( {8,12}\(\s*"z",.*?\),\n)(?: {4,12}\(|\]))joinrJ   rb   rB   rN   researchr$   rh   TRANSFORMERS_PATHr`   ra   findallDOTALL)ri   rj   rk   rl   new_cased_nameold_cased_namecorrected_filenames_to_addfilery   previous_fileprevious_to_addrx   rd   matching_linesmatchr   rz   r    add_model_to_auto_mappings   sR   "


r   new_paper_namer   c           	      C   s   d}t ddtdd| }td|  d|  d}g }|D ]}d	| d
| }d|v r1|d7 }|| qd|}|| | S )a4  
    Create a new doc file to fill for the new model.

    Args:
        new_paper_name (`str`):
            The fully cased name (as in the official paper name) of the new model.
        public_classes (`list[str]`):
            A list of all the public classes that the model will have in the library.
    u   

⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer.

-->

z# ?rG   zcoding=utf-8
z<!--z
        # z#

        ## Overview

        The a<   model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>.
        <INSERT SHORT SUMMARY HERE>

        The abstract from the paper is the following:

        <INSERT PAPER ABSTRACT HERE>

        Tips:

        <INSERT TIPS ABOUT MODEL HERE>

        This model was contributed by [INSERT YOUR HF USERNAME HERE](https://huggingface.co/<INSERT YOUR HF USERNAME HERE>).
        The original code can be found [here](<INSERT LINK TO GITHUB REPO HERE>).

        ## Usage examples

        <INSERT SOME NICE EXAMPLES HERE>

        z## z

[[autodoc]] Modelz
    - forward

)r   sub	COPYRIGHTrJ   textwrapdedentr$   r   )	r   r   
added_notecopyright_for_markdowndoc_templatedoc_for_classesclass_doc	class_docr   r   r    create_doc_file   s$   
r   r{   c                 C   s   t d d d d }t|d}| }W d   n1 sw   Y  td|  d|d	}d
| d| d}tt d d d d ||d dS )a  
    Insert the new model in the doc `_toctree.yaml`, in the same section as the old model.

    Args:
        old_lowercase_name (`str`):
            The old lowercase model name.
        new_lowercase_name (`str`):
            The old lowercase model name.
        new_model_paper_name (`str`):
            The fully cased name (as in the official paper name) of the new model.
    docssourceenz_toctree.ymlr[   Nz- local: model_doc/z\n {8}title: .*?\nr   z      - local: model_doc/z
        title: 
r~   )	REPO_PATHr`   ra   r   r   grouprh   )r{   rj   rk   toc_filerd   contentold_model_tocnew_tocr   r   r    insert_model_in_doc_toc  s   

r   c                    sB    fdd|D }d dd |D }ttd| d }|S )a/  
    Create the `__init__.py` file to add in the new model folder.

    Args:
        old_lowercase_name (`str`):
            The old lowercase model name.
        new_lowercase_name (`str`):
            The new lowercase model name.
        filenames_to_add (`list[tuple[str, bool]]`):
            A list of tuples of all potential filenames to add for a new model, along a boolean flag describing if we
            should add this file or not. For example, [(`modeling_xxx.px`, True), (`configuration_xxx.py`, True), (`tokenization_xxx.py`, False),...]
    c                    s(   g | ]\}}|   d d|fqS )r   rG   rv   rw   rj   r{   r   r    r0   5  s    z$create_init_file.<locals>.<listcomp>
            c                 s   s$    | ]\}}|rd | dV  qdS )zfrom .z	 import *Nr   )r.   r   ry   r   r   r    rs   9  s   " z#create_init_file.<locals>.<genexpr>z
        from typing import TYPE_CHECKING

        from ...utils import _LazyModule
        from ...utils.import_utils import define_import_structure


        if TYPE_CHECKING:
            z
        else:
            import sys

            _file = globals()["__file__"]
            sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
        )r   r   r   r   )r{   rj   rl   imports	init_filer   r   r    create_init_file(  s   r   module_namer#   c                 C   s\   t | ddd}| }W d   n1 sw   Y  t|}t }|| |j|jfS )z
    Find the name of all classes defined in `module_name`, including public ones (defined in `__all__`).

    Args:
        module_name (`str`):
            The full path to the python module from which to extract classes.
    r[   r\   r]   N)r`   ra   r?   parse_moduler   visitr   r   )r   r   source_codemodulevisitorr   r   r    find_all_classes_from_fileN  s   


r   r   c                    s   t | \}}d| jdd dd}|jd| ddd	d
 |D  }d fdd
|D } fdd|D }|||fS )a  
    Extract the modular structure that will be needed to copy a file `module_name` using modular.

    Args:
        module_name (`str`):
            The full path to the python module to copy with modular.
        old_model_infos (`ModelInfos`):
            The structure containing the class information of the old model.
        new_cased_name (`str`):
            The new cased model name.
    .Nr   rG   zfrom ..z import z, c                 s   s    | ]}|V  qd S rn   r   r.   r   r   r   r    rs   o  s    z)find_modular_structure.<locals>.<genexpr>r   c                 3   s*    | ]}d |   d| dV  qdS )zclass (z):
    passNrv   r   r   r   r   r    rs   p  s    
c                    s   g | ]}|  qS r   rv   r   r   r   r    r0   s      z*find_modular_structure.<locals>.<listcomp>)r   r   partsrJ   rN   )r   ri   r   all_classesr   import_locationr   modular_classesr   r   r    find_modular_structure^  s    
r   c                 C   s   d dd |dddD }| j}td | }d}d}g }|D ]$\}	}
|
rFt||	 | |\}}}|d| 7 }|d| 7 }|| q"d	 d
d |D }td| d}t	| | | }dd |D }||fS )a  
    Create a new modular file which will copy the old model, based on the new name and the different filenames
    (modules) to add.

    Args:
        old_model_infos (`ModelInfos`):
            The structure containing the class information of the old model.
        new_lowercase_name (`str`):
            The new lowercase model name.
        filenames_to_add (`list[tuple[str, bool]]`):
            A list of tuples of all potential filenames to add for a new model, along a boolean flag describing if we
            should add this file or not. For example, [(`modeling_xxx.px`, True), (`configuration_xxx.py`, True), (`tokenization_xxx.py`, False),...]
    rG   c                 s   rm   rn   ro   rq   r   r   r    rs     rt   z&create_modular_file.<locals>.<genexpr>rE   rD   r}   r   r   r   c                 s   s    | ]}| d V  qdS ),Nr   r.   public_classr   r   r    rs     s    z"

        __all__ = [
            z
        ]
        c                 S   s   g | ]}| d dqS )"rG   rv   r   r   r   r    r0     r   z'create_modular_file.<locals>.<listcomp>)
r   rJ   rb   rB   r   r   extendr   r   r   )ri   rj   rl   r   r{   old_folder_rootall_imports
all_bodiesall_public_classesrx   ry   r   r,   r   public_classes_formattedall_statementmodular_filer   r   r    create_modular_filew  s0   "


	r   c              	      sj  d dd  dddD }| j| j} fdd|dd	 D }g }|D ]$\}}td
  d|rF|d \}}	||	pA|f|d< q)|||f q)i }
|D ]`\}}|r| }td d  | }|	 smqRt
|d}| }W d	   n1 sw   Y  |d}d}|| dr|d7 }|| dstd ||d	  }||||
|< qR|
S )a  
    Create the test files for the new model. It basically copies over the old test files and adjust the class names.

    Args:
        old_model_infos (`ModelInfos`):
            The structure containing the class information of the old model.
        new_lowercase_name (`str`):
            The new lowercase model name.
        filenames_to_add (`list[tuple[str, bool]]`):
            A list of tuples of all potential filenames to add for a new model, along a boolean flag describing if we
            should add this file or not. For example, [(`modeling_xxx.px`, True), (`configuration_xxx.py`, True), (`tokenization_xxx.py`, False),...]
    rG   c                 s   rm   rn   ro   rq   r   r   r    rs     rt   z$create_test_files.<locals>.<genexpr>rE   rD   c                    s$   g | ]\}}d |   |fqS )test_rv   rw   r   r   r    r0     s    z%create_test_files.<locals>.<listcomp>r   Nz+test_(?:tokenization)|(?:image_processing)_r   r|   testsr}   r[   r   r   #)r   rJ   rb   rB   rN   r   r   r$   r   is_filer`   ra   
startswithr   )ri   rj   rl   r   r   r   r   ry   r   r   
test_filesnew_fileoriginal_test_fileoriginal_test_pathrd   	test_code
test_linesidxr   r   r    create_test_files  s>   "


r   create_fast_image_processorc              	   C   s  t  std| j}td | }tj|dd t| ||\}}t|d| d d}	|	| W d   n1 s:w   Y  t	|||}
t|d	 d}	|	|
 W d   n1 s\w   Y  t
td d	 d
| ddd t| ||| td d | }tj|dd t|d	 d W d   n1 sw   Y  t| ||}| D ] \}}t|| d}	|	| W d   n1 sw   Y  qt||}ttd d d d | d d}	|	| W d   n1 sw   Y  t||| |rt|d td d	 }tjdd|||dgttjd tjdd|||gttjd tjg dttjd tjddgttjd tjdd|gttjd dS )a{  
    Creates a new model module like a given model of the Transformers library.

    Args:
        old_model_infos (`ModelInfos`):
            The structure containing the class information of the old model.
        new_lowercase_name (`str`):
            The new lowercase model name.
        new_model_paper_name (`str`):
            The fully cased name (as in the official paper name) of the new model.
        filenames_to_add (`list[tuple[str, bool]]`):
            A list of tuples of all potential filenames to add for a new model, along a boolean flag describing if we
            should add this file or not. For example, [(`modeling_xxx.px`, True), (`configuration_xxx.py`, True), (`tokenization_xxx.py`, False),...]
        create_fast_image_processor (`bool`):
            If it makes sense, whether to add a fast processor as well, even if the old model does not have one.
    zHYou need to install `libcst` to run this command -> `pip install libcst`r}   T)exist_okmodular_r   r_   Nz__init__.pyz
    from .z
 import *
zif TYPE_CHECKING:
r~   r   r   r   r   	model_docz.md)
model_nameruffcheckz--fix)cwdstdoutformat)pythonzutils/check_doc_toc.pyz--fix_and_overwriter   zutils/sort_auto_mappings.pyz utils/modular_model_converter.py)r   rK   rB   r   osmakedirsr   r`   rc   r   rh   r   r   r   itemsr   r   r   
subprocessrunDEVNULL)ri   rj   rk   rl   r   r{   new_module_folderr   r   rd   r   tests_folderr   rx   r   doc_filemodel_init_filer   r   r    create_new_model_like  sn   


&

r   questiondefault_value
convert_tofallback_messagec                 C   s   |  ds	| d } |dur|  d| d} d}|sKt| }|dur)t|dkr)|}|durAz||}d}W n ty@   d}Y nw d}|sIt| |r|S )a4  
    A utility function that asks a question to the user to get an answer, potentially looping until it gets a valid
    answer.

    Args:
        question (`str`):
            The question to ask the user.
        default_value (`str`, *optional*):
            A potential default value that will be used when the answer is empty.
        convert_to (`Callable`, *optional*):
            If set, the answer will be passed to this function. If this function raises an error on the provided
            answer, the question will be asked again.
        fallback_message (`str`, *optional*):
            A message that will be displayed each time the question is asked again to the user.

    Returns:
        `Any`: The answer provided by the user (or the default), passed through the potential conversion function.
    rC   Nz [z] Fr   T)endswithinputlen	Exceptionprint)r   r   r   r   valid_answeranswerr   r   r    get_user_field>  s*   
r   rr   c                 C   s.   |   dv rdS |   dv rdS t|  d)z&
    Converts a string to a bool.
    )1yyestrueT)0nnofalseFz0 is not a value that can be converted to a bool.)rI   rK   )rr   r   r   r    convert_to_boolo  s
   r  c                  C   s^  t t } d}|s=td}|| v rd}n&t| d t|| }t|dkr;t|dkr3d|}td| d |r
t	|}t
d	}t
d
ddd |dD d}d}d}d}	d}
d}d}d}|jdurtt
d| dtdd}|jdurt
d| dtdd}|jdurt
d| dtdd}	|jdurt
d| dtdd}
|jdurt
d| dtdd}|jdurt
d| dtdd}|jdurt
d| dtdd}|j}d| ddfd| ddfd| d|fd| d|fd | d|	fd | d|
fd!| d|fd"| d|fd#| d|ff	}d}|	r(|
s(t
d$td%d&d'}|||||fS )(zE
    Ask the user for the necessary inputs to add the new model.
    FzWWhat model would you like to duplicate? Please provide it as lowercase, e.g. `llama`): Tz is not a valid model type.r   z or zDid you mean ?zSWhat is the new model name? Please provide it as snake lowercase, e.g. `new_model`?zUWhat is the fully cased name you would like to appear in the doc (e.g. `NeW ModEl`)? rG   c                 s   rm   rn   ro   rq   r   r   r    rs     rt   z!get_user_input.<locals>.<genexpr>rD   )r   NzHDo you want to create a new tokenizer? If `no`, it will use the same as z (y/n)?z.Please answer yes/no, y/n, true/false or 1/0. )r   r   zMDo you want to create a new fast tokenizer? If `no`, it will use the same as zNDo you want to create a new image processor? If `no`, it will use the same as zSDo you want to create a new fast image processor? If `no`, it will use the same as zNDo you want to create a new video processor? If `no`, it will use the same as zPDo you want to create a new feature extractor? If `no`, it will use the same as zHDo you want to create a new processor? If `no`, it will use the same as configuration_r   	modeling_tokenization_r   image_processing_video_processing_feature_extraction_processing_zA fast image processor can be created from the slow one, but modifications might be needed. Should we add a fast image processor class for this model (recommended) (y/n)? r   z-Please answer yes/no, y/n, true/false or 1/0.)r   r   r   )listr   keysr   r   difflibget_close_matchesr   r   rA   r   rb   rO   r  rP   rR   rS   rT   rU   rV   rB   )model_typesvalid_model_typeold_model_typenear_choicesri   rj   rk   add_tokenizeradd_fast_tokenizeradd_image_processorr   add_video_processoradd_feature_extractoradd_processorr{   rl   r   r   r   r    get_user_inputz  s   














r  argsc                 C   s   t | jdS )N)path_to_repo)AddNewModelLikeCommandr  )r  r   r   r    "add_new_model_like_command_factory  s   r   c                   @   s0   e Zd ZedefddZd	ddZdd ZdS )
r  parserc                 C   s*   |  d}|jdtdd |jtd d S )Nzadd-new-model-likez--path_to_repozFWhen not using an editable install, the path to the Transformers repo.)typehelp)func)
add_parseradd_argumentrW   set_defaultsr   )r!  add_new_model_like_parserr   r   r    register_subcommand  s
   
z*AddNewModelLikeCommand.register_subcommandNc                 K   s$   t  \| _| _| _| _| _|| _d S rn   )r  ri   rj   rk   rl   r   r  )r   r  kwargsr   r   r    r!     s   
zAddNewModelLikeCommand.__init__c                 C   s@   | j d urt| j atd d at| j| j| j| j| j	d d S )Nsrctransformers)ri   rj   rk   rl   r   )
r  r   r   r   r   ri   rj   rk   rl   r   r   r   r   r    r      s   


zAddNewModelLikeCommand.runrn   )r;   r<   r=   staticmethodr   r)  r!   r   r   r   r   r    r    s
    

r  )NNN)Hr  r   r   r   r   argparser   r   datetimer   pathlibr   typingr   r   r   r	   models.auto.configuration_autor   r   #models.auto.feature_extraction_autor   !models.auto.image_processing_autor   models.auto.processing_autor   models.auto.tokenization_autor   !models.auto.video_processing_autor   utilsr   rG   r   r   libcstr?   r   r   r1   r   todayyearCURRENT_YEAR__file__parentr   r   lstripr   rA   rW   PathLikerh   r  tupleboolr   r   r   r   setr   r   r   r   r   r   r  r  r   r  r   r   r   r    <module>   s   
 "
C8"&

42
c
1o