o
    ¾e¦iÊ(  ã                   @   sX   d Z ddlmZ ddlmZ ddlmZ ddlmZ e 	e
¡ZG dd„ deƒZdgZd	S )
zDETR model configurationé   )Ú%consolidate_backbone_kwargs_to_config)ÚPreTrainedConfig)Úloggingé   )Ú
AutoConfigc                       sz   e Zd ZdZdZdeiZdgZdddœZ				
																											d‡ fdd„	Z	‡  Z
S )Ú
DetrConfiga_  
    This is the configuration class to store the configuration of a [`DetrModel`]. It is used to instantiate a DETR
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the DETR
    [facebook/detr-resnet-50](https://huggingface.co/facebook/detr-resnet-50) architecture.

    Configuration objects inherit from [`PreTrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PreTrainedConfig`] for more information.

    Args:
        backbone_config (`Union[dict, "PreTrainedConfig"]`, *optional*, defaults to `ResNetConfig()`):
            The configuration of the backbone model. Only used in case `use_timm_backbone` is set to `False` in which
            case it will default to `ResNetConfig()`.
        num_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        num_queries (`int`, *optional*, defaults to 100):
            Number of object queries, i.e. detection slots. This is the maximal number of objects [`DetrModel`] can
            detect in a single image. For COCO, we recommend 100 queries.
        d_model (`int`, *optional*, defaults to 256):
            This parameter is a general dimension parameter, defining dimensions for components such as the encoder layer and projection parameters in the decoder layer, among others.
        encoder_layers (`int`, *optional*, defaults to 6):
            Number of encoder layers.
        decoder_layers (`int`, *optional*, defaults to 6):
            Number of decoder layers.
        encoder_attention_heads (`int`, *optional*, defaults to 8):
            Number of attention heads for each attention layer in the Transformer encoder.
        decoder_attention_heads (`int`, *optional*, defaults to 8):
            Number of attention heads for each attention layer in the Transformer decoder.
        decoder_ffn_dim (`int`, *optional*, defaults to 2048):
            Dimension of the "intermediate" (often named feed-forward) layer in decoder.
        encoder_ffn_dim (`int`, *optional*, defaults to 2048):
            Dimension of the "intermediate" (often named feed-forward) layer in decoder.
        activation_function (`str` or `function`, *optional*, defaults to `"relu"`):
            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
            `"relu"`, `"silu"` and `"gelu_new"` are supported.
        dropout (`float`, *optional*, defaults to 0.1):
            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        activation_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for activations inside the fully connected layer.
        init_std (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        init_xavier_std (`float`, *optional*, defaults to 1):
            The scaling factor used for the Xavier initialization gain in the HM Attention map module.
        encoder_layerdrop (`float`, *optional*, defaults to 0.0):
            The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://huggingface.co/papers/1909.11556)
            for more details.
        decoder_layerdrop (`float`, *optional*, defaults to 0.0):
            The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://huggingface.co/papers/1909.11556)
            for more details.
        auxiliary_loss (`bool`, *optional*, defaults to `False`):
            Whether auxiliary decoding losses (loss at each decoder layer) are to be used.
        position_embedding_type (`str`, *optional*, defaults to `"sine"`):
            Type of position embeddings to be used on top of the image features. One of `"sine"` or `"learned"`.
        dilation (`bool`, *optional*, defaults to `False`):
            Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when
            `use_timm_backbone` = `True`.
        class_cost (`float`, *optional*, defaults to 1):
            Relative weight of the classification error in the Hungarian matching cost.
        bbox_cost (`float`, *optional*, defaults to 5):
            Relative weight of the L1 error of the bounding box coordinates in the Hungarian matching cost.
        giou_cost (`float`, *optional*, defaults to 2):
            Relative weight of the generalized IoU loss of the bounding box in the Hungarian matching cost.
        mask_loss_coefficient (`float`, *optional*, defaults to 1):
            Relative weight of the Focal loss in the panoptic segmentation loss.
        dice_loss_coefficient (`float`, *optional*, defaults to 1):
            Relative weight of the DICE/F-1 loss in the panoptic segmentation loss.
        bbox_loss_coefficient (`float`, *optional*, defaults to 5):
            Relative weight of the L1 bounding box loss in the object detection loss.
        giou_loss_coefficient (`float`, *optional*, defaults to 2):
            Relative weight of the generalized IoU loss in the object detection loss.
        eos_coefficient (`float`, *optional*, defaults to 0.1):
            Relative classification weight of the 'no-object' class in the object detection loss.

    Examples:

    ```python
    >>> from transformers import DetrConfig, DetrModel

    >>> # Initializing a DETR facebook/detr-resnet-50 style configuration
    >>> configuration = DetrConfig()

    >>> # Initializing a model (with random weights) from the facebook/detr-resnet-50 style configuration
    >>> model = DetrModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```ÚdetrÚbackbone_configÚpast_key_valuesÚd_modelÚencoder_attention_heads)Úhidden_sizeÚnum_attention_headsNr   éd   é   é   é   ç        TÚrelué   çš™™™™™¹?ç{®Gáz”?ç      ð?FÚsineé   é   r   c           "         s.  |  di ¡} |   d|¡dd|   dg d¢¡dœ}!|r!|   dd	¡|!d< td|d
dddgi|!dœ|¤Ž\}}|| _|| _|| _|| _|| _|| _|| _|| _	|| _
|	| _|| _|| _|| _|| _|| _|| _|
| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _tƒ j dd|i|¤Ž d S )NÚbackbone_kwargsÚnum_channelsTFÚout_indices)r   r   r   é   )r   Úfeatures_onlyÚuse_pretrained_backboner   Úoutput_strideé   Úresnet50ÚresnetÚout_featuresÚstage4)r	   Údefault_backboneÚdefault_config_typeÚdefault_config_kwargsÚtimm_default_kwargsÚis_encoder_decoder© )!Úgetr   r	   r   Únum_queriesr   Úencoder_ffn_dimÚencoder_layersr   Údecoder_ffn_dimÚdecoder_layersÚdecoder_attention_headsÚdropoutÚattention_dropoutÚactivation_dropoutÚactivation_functionÚinit_stdÚinit_xavier_stdÚencoder_layerdropÚdecoder_layerdropÚnum_hidden_layersÚauxiliary_lossÚposition_embedding_typeÚ
class_costÚ	bbox_costÚ	giou_costÚmask_loss_coefficientÚdice_loss_coefficientÚbbox_loss_coefficientÚgiou_loss_coefficientÚeos_coefficientÚsuperÚ__init__)"Úselfr	   r   r/   r1   r0   r   r3   r2   r4   r;   r<   r,   r8   r   r5   r6   r7   r9   r:   r>   r?   Údilationr@   rA   rB   rC   rD   rE   rF   rG   Úkwargsr   r+   ©Ú	__class__r-   úi/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/detr/configuration_detr.pyrI   |   s^   "
üû
ú	zDetrConfig.__init__)Nr   r   r   r   r   r   r   r   r   r   Tr   r   r   r   r   r   r   Fr   Fr   r   r   r   r   r   r   r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__Ú
model_typer   Úsub_configsÚkeys_to_ignore_at_inferenceÚattribute_maprI   Ú__classcell__r-   r-   rM   rO   r      sN    Zþár   N)rS   Úbackbone_utilsr   Úconfiguration_utilsr   Úutilsr   Úautor   Ú
get_loggerrP   Úloggerr   Ú__all__r-   r-   r-   rO   Ú<module>   s   
 
;