o
    ߥim                     @   s   d dl mZmZ d dlZd dlZd dlZd dlmZ d dl	m  m
Z d dlmZ d dlmZmZmZ G dd dejZ					dd	d
ZG dd dejZG dd deZG dd deedZG dd deZG dd dejZdS )    )ABCMetaabstractmethodN)
ConvModule)
BaseModule	auto_fp16
force_fp32c                       s$   e Zd Z fddZdd Z  ZS )LinearClassifierc                    s    t t|   t||| _d S N)superr   __init__nnLinear
classifier)selfin_channelsnum_classes	__class__ _/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/cv/vision_middleware/head.pyr      s   zLinearClassifier.__init__c                 C   s   |  |d S )N)r   r   xr   r   r   forward   s   zLinearClassifier.forward__name__
__module____qualname__r   r   __classcell__r   r   r   r   r      s    r   nearestTc                 C   s   t | ||||S r	   )Finterpolate)inputsizescale_factormodealign_cornerswarningr   r   r   resize   s   r(   c                       s.   e Zd Z				d fdd	Zdd Z  ZS )UpsampleNr   c                    sV   t t|   || _t|trtdd |D | _n	|r t|nd | _|| _|| _	d S )Nc                 s   s    | ]}t |V  qd S r	   )float).0factorr   r   r   	<genexpr>/   s    z$Upsample.__init__.<locals>.<genexpr>)
r
   r)   r   r#   
isinstancetupler$   r*   r%   r&   )r   r#   r$   r%   r&   r   r   r   r   '   s   

zUpsample.__init__c                    s>    j s fdd|jdd  D }n j }t||d  j jS )Nc                    s   g | ]	}t | j qS r   )intr$   )r+   tr   r   r   
<listcomp>7   s    z$Upsample.forward.<locals>.<listcomp>)r#   shaper(   r%   r&   )r   r   r#   r   r2   r   r   5   s   zUpsample.forward)NNr   Nr   r   r   r   r   r)   %   s    r)   c                       sV   e Zd ZdZdddddddddeddedd	d
df fdd	Ze dd Z  ZS )FPNa
  Feature Pyramid Network.
    This neck is the implementation of `Feature Pyramid Networks for Object
    Detection <https://arxiv.org/abs/1612.03144>`_.
    Args:
        in_channels (list[int]): Number of input channels per scale.
        out_channels (int): Number of output channels (used at each scale).
        num_outs (int): Number of output scales.
        start_level (int): Index of the start input backbone level used to
            build the feature pyramid. Default: 0.
        end_level (int): Index of the end input backbone level (exclusive) to
            build the feature pyramid. Default: -1, which means the last level.
        add_extra_convs (bool | str): If bool, it decides whether to add conv
            layers on top of the original feature maps. Default to False.
            If True, its actual mode is specified by `extra_convs_on_inputs`.
            If str, it specifies the source feature map of the extra convs.
            Only the following options are allowed
            - 'on_input': Last feat map of neck inputs (i.e. backbone feature).
            - 'on_lateral': Last feature map after lateral convs.
            - 'on_output': The last output feature map after fpn convs.
        extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs
            on the original feature from the backbone. If True,
            it is equivalent to `add_extra_convs='on_input'`. If False, it is
            equivalent to set `add_extra_convs='on_output'`. Default to True.
        relu_before_extra_convs (bool): Whether to apply relu before the extra
            conv. Default: False.
        no_norm_on_lateral (bool): Whether to apply norm on lateral.
            Default: False.
        conv_cfg (dict): Config dict for convolution layer. Default: None.
        norm_cfg (dict): Config dict for normalization layer. Default: None.
        act_cfg (dict): Config dict for activation layer in ConvModule.
            Default: None.
        upsample_cfg (dict): Config dict for interpolate layer.
            Default: dict(mode='nearest').
        init_cfg (dict or list[dict], optional): Initialization config dict.
    Example:
        >>> import torch
        >>> in_channels = [2, 3, 5, 7]
        >>> scales = [340, 170, 84, 43]
        >>> inputs = [torch.rand(1, c, s, s)
        ...           for c, s in zip(in_channels, scales)]
        >>> self = FPN(in_channels, 11, len(in_channels)).eval()
        >>> outputs = self.forward(inputs)
        >>> for i in range(len(outputs)):
        ...     print(f'outputs[{i}].shape = {outputs[i].shape}')
        outputs[0].shape = torch.Size([1, 11, 340, 340])
        outputs[1].shape = torch.Size([1, 11, 170, 170])
        outputs[2].shape = torch.Size([1, 11, 84, 84])
        outputs[3].shape = torch.Size([1, 11, 43, 43])
    r   r   FNr   )r%   XavierConv2duniform)typelayerdistributionc                    s  t t| | t|tsJ || _|| _t|| _|| _	|| _
|	| _d| _| | _|dkr=| j| _|| j| ks<J n|| _|t|ksHJ ||| ksPJ || _|| _|| _t|ttfsbJ t|trn|dv smJ n|ry|rvd| _nd| _t | _t | _t| j| jD ],}t|| |d|
| js|nd |dd}t||dd|
||dd	}| j| | j| q|| j | j }| jr|dkrt|D ]-}|d
kr| jdkr| j| jd  }n|}t||ddd|
||dd	}| j| qd S d S d S )NFr   )on_input
on_lateral	on_outputr=   r?      )conv_cfgnorm_cfgact_cfginplace   )paddingrA   rB   rC   rD   r      )striderF   rA   rB   rC   rD   )r
   r6   r   r.   listr   out_channelslennum_insnum_outsrelu_before_extra_convsno_norm_on_lateralfp16_enabledcopyupsample_cfgbackbone_end_levelstart_level	end_leveladd_extra_convsstrboolr   
ModuleListlateral_convs	fpn_convsranger   append)r   r   rJ   rM   rT   rU   rV   extra_convs_on_inputsrN   rO   rA   rB   rC   rR   init_cfgil_convfpn_convextra_levelsextra_fpn_convr   r   r   r   p   s   






zFPN.__init__c                    s  t  t jksJ  fddtjD t }t|d ddD ]?}djv rA|d  t| fi j |d < q$|d  jdd  }|d  t| fd|ij |d < q$fd	dt|D }jt |krj	stj| D ]}|
tj|d ddd
 qt|S j	dkr jd  }nj	dkrd }nj	dkr|d }nt|
j| | t|d jD ]!}jr|
j| t|d  q|
j| |d  qt|S )Nc                    s"   g | ]\}}| |j   qS r   )rT   )r+   r`   lateral_convinputsr   r   r   r3      s    zFPN.forward.<locals>.<listcomp>r@   r   r   r$   rG   r#   c                    s   g | ]}j |  | qS r   )r[   r+   r`   )lateralsr   r   r   r3      s    )rH   r=   r>   r?   )rK   r   	enumeraterZ   r\   rR   r(   r5   rM   rV   r]   r    
max_pool2drS   NotImplementedErrorr[   rN   relur/   )r   rg   used_backbone_levelsr`   
prev_shapeoutsextra_sourcer   )rg   ri   r   r   r      sR   





 zFPN.forward)	r   r   r   __doc__dictr   r   r   r   r   r   r   r   r6   =   s$    6_r6   c                       s   e Zd ZdZdddddeddddeddd	d
dddeddedddd fdd
Zdd Zdd Zdd Ze	 e
dd Zdd Zdd Zd d! Z  ZS )"BaseDecodeHeada  Base class for BaseDecodeHead.
    Args:
        in_channels (int|Sequence[int]): Input channels.
        channels (int): Channels after modules, before conv_seg.
        num_classes (int): Number of classes.
        out_channels (int): Output channels of conv_seg.
        threshold (float): Threshold for binary segmentation in the case of
            `out_channels==1`. Default: None.
        dropout_ratio (float): Ratio of dropout layer. Default: 0.1.
        conv_cfg (dict|None): Config of conv layers. Default: None.
        norm_cfg (dict|None): Config of norm layers. Default: None.
        act_cfg (dict): Config of activation layers.
            Default: dict(type='ReLU')
        in_index (int|Sequence[int]): Input feature index. Default: -1
        input_transform (str|None): Transformation type of input features.
            Options: 'resize_concat', 'multiple_select', None.
            'resize_concat': Multiple feature maps will be resize to the
                same size as first one and than concat together.
                Usually used in FCN head of HRNet.
            'multiple_select': Multiple feature maps will be bundle into
                a list and passed into decode head.
            None: Only one select feature map is allowed.
            Default: None.
        loss_decode (dict | Sequence[dict]): Config of decode loss.
            The `loss_name` is property of corresponding loss function which
            could be shown in training log. If you want this loss
            item to be included into the backward graph, `loss_` must be the
            prefix of the name. Defaults to 'loss_ce'.
             e.g. dict(type='CrossEntropyLoss'),
             [dict(type='CrossEntropyLoss', loss_name='loss_ce'),
              dict(type='DiceLoss', loss_name='loss_dice')]
            Default: dict(type='CrossEntropyLoss').
        ignore_index (int | None): The label index to be ignored. When using
            masked BCE loss, ignore_index should be set to None. Default: 255.
        sampler (dict|None): The config of segmentation map sampler.
            Default: None.
        align_corners (bool): align_corners argument of F.interpolate.
            Default: False.
        init_cfg (dict or list[dict], optional): Initialization config dict.
    N皙?ReLU)r:   r   CrossEntropyLossF      ?r:   use_sigmoidloss_weight   Normalg{Gz?conv_seg)name)r:   stdoverride)rJ   	thresholddropout_ratiorA   rB   rC   in_indexinput_transformloss_decodeignore_indexsamplerr&   r_   c                   s   t t| | | ||
| || _|| _|| _|| _|	| _|
| _	|| _
|| _|d u r6|dkr4td |}||krH|dkrHtd| d| |dkrW|d u rWd}td || _|| _|| _tj|| jdd| _|d	krut|| _nd | _d
| _d S )NrG   zFor binary segmentation, we suggest using`out_channels = 1` to define the outputchannels of segmentor, and use `threshold`to convert seg_logist into a predictionapplying a thresholdr@   zout_channels should be equal to num_classes,except binary segmentation set out_channels == 1 andnum_classes == 2, but got out_channels=zand num_classes=g333333?z7threshold is not defined for binary, and defaultsto 0.3)kernel_sizer   F)r
   rt   r   _init_inputschannelsr   rA   rB   rC   r   r   r&   warningswarn
ValueErrorr   rJ   r   r   r8   r~   	Dropout2ddropoutrP   )r   r   r   r   rJ   r   r   rA   rB   rC   r   r   r   r   r   r&   r_   r   r   r   r   /  sB   


zBaseDecodeHead.__init__c                 C   s    d| j  d| j d| j }|S )zExtra repr.zinput_transform=z, ignore_index=z, align_corners=)r   r   r&   )r   sr   r   r   
extra_reprp  s   
zBaseDecodeHead.extra_reprc                 C   s   |dur
|dv s
J || _ || _|dur@t|ttfsJ t|ttfs&J t|t|ks0J |dkr;t|| _dS || _dS t|tsGJ t|tsNJ || _dS )a  Check and initialize input transforms.
        The in_channels, in_index and input_transform must match.
        Specifically, when input_transform is None, only single feature map
        will be selected. So in_channels and in_index must be of type int.
        When input_transform
        Args:
            in_channels (int|Sequence[int]): Input channels.
            in_index (int|Sequence[int]): Input feature index.
            input_transform (str|None): Transformation type of input features.
                Options: 'resize_concat', 'multiple_select', None.
                'resize_concat': Multiple feature maps will be resize to the
                    same size as first one and than concat together.
                    Usually used in FCN head of HRNet.
                'multiple_select': Multiple feature maps will be bundle into
                    a list and passed into decode head.
                None: Only one select feature map is allowed.
        N)resize_concatmultiple_selectr   )	r   r   r.   rI   r/   rK   sumr   r0   )r   r   r   r   r   r   r   r   w  s   

zBaseDecodeHead._init_inputsc                    st   j dkr" fddjD   fdd D }tj|dd  S j dkr3 fddjD   S  j   S )	zTransform inputs for decoder.
        Args:
            inputs (list[Tensor]): List of multi-level img features.
        Returns:
            Tensor: The transformed inputs
        r   c                       g | ]} | qS r   r   rh   rg   r   r   r3         z4BaseDecodeHead._transform_inputs.<locals>.<listcomp>c                    s,   g | ]}t | d  jdd djdqS )r   rG   Nbilinear)r"   r#   r%   r&   )r(   r5   r&   )r+   r   rf   r   r   r3     s    r@   dimr   c                    r   r   r   rh   r   r   r   r3     r   )r   r   torchcat)r   rg   upsampled_inputsr   rf   r   _transform_inputs  s   


z BaseDecodeHead._transform_inputsc                 C   s   dS )z Placeholder of forward function.Nr   )r   rg   r   r   r   r     s   zBaseDecodeHead.forwardc                 C   s   | |}|  ||}|S )a  Forward function for training.
        Args:
            inputs (list[Tensor]): List of multi-level img features.
            img_metas (list[dict]): List of image info dict where each dict
                has: 'img_shape', 'scale_factor', 'flip', and may also contain
                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
                For details on the values of these keys see
                `mmseg/datasets/pipelines/formatting.py:Collect`.
            gt_semantic_seg (Tensor): Semantic segmentation masks
                used if the architecture supports semantic segmentation task.
            train_cfg (dict): The training config.
        Returns:
            dict[str, Tensor]: a dictionary of loss components
        )losses)r   rg   	img_metasgt_semantic_seg	train_cfg
seg_logitsr   r   r   r   forward_train  s   zBaseDecodeHead.forward_trainc                 C   s
   |  |S )aK  Forward function for testing.
        Args:
            inputs (list[Tensor]): List of multi-level img features.
            img_metas (list[dict]): List of image info dict where each dict
                has: 'img_shape', 'scale_factor', 'flip', and may also contain
                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
                For details on the values of these keys see
                `mmseg/datasets/pipelines/formatting.py:Collect`.
            test_cfg (dict): The testing config.
        Returns:
            Tensor: Output segmentation map.
        )r   )r   rg   r   test_cfgr   r   r   forward_test  s   
zBaseDecodeHead.forward_testc                 C   s"   | j dur
|  |}| |}|S )zClassify each pixel.N)r   r~   )r   featoutputr   r   r   cls_seg  s   


zBaseDecodeHead.cls_seg)r   r   r   rr   rs   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rt     s>    .A$rt   )	metaclassc                       s(   e Zd ZdZ fddZdd Z  ZS )FPNHeadaO  Panoptic Feature Pyramid Networks.
    This head is the implementation of `Semantic FPN
    <https://arxiv.org/abs/1901.02446>`_.
    Args:
        feature_strides (tuple[int]): The strides for input feature maps.
            stack_lateral. All strides suppose to be power of 2. The first
            one is of largest resolution.
    c                    s  t t| jd
ddi| t|t| jksJ t||d ks"J || _t | _	t
t|D ]V}tdtt|| t|d  }g }t
|D ]1}|t|dkrY| j| n| j| jdd| j| j| jd || |d kr||tdd| jd	 qK| j	tj|  q0d S )Nr   r   r   r@   rE   )rF   rA   rB   rC   rG   r   )r$   r%   r&   r   )r
   r   r   rK   r   minfeature_stridesr   rY   scale_headsr\   maxr0   nplog2r]   r   r   rA   rB   rC   r)   r&   
Sequential)r   r   kwargsr`   head_length
scale_headkr   r   r   r     sL   
	zFPNHead.__init__c                 C   sn   |  |}| jd |d }tdt| jD ]}|t| j| || |jdd  d| jd }q| |}|S )Nr   r@   rG   r   )r#   r%   r&   )	r   r   r\   rK   r   r(   r5   r&   r   )r   rg   r   r   r`   r   r   r   r     s   


zFPNHead.forward)r   r   r   rr   r   r   r   r   r   r   r   r     s    	r   c                       s   e Zd ZdZdeg ddddeg dg dg d	d
ddeddddeddddd	f fdd	Z						dddZdd Z  ZS ) FPNSegmentorz
    Packed Sementor Head
    Args:
        fpn_layer_indices: tuple of the indices of layers
        neck_cfg: dict of FPN params
        head_cfg: dict of FPNHead params
    )rE            )   r   r   r         )r   rJ   rM   )r   r   r   r   )r   r@   rG   rE   )r                ru      BNT)r:   requires_gradFrw   rx   ry   )	r   r   r   r   r   r   rB   r&   r   c                    s   t t|   || _|d d }tttj||dddt|t	 tj||dddtj||dddt
 tjdddg| _td
i || _td
i || _|d | _tj }tjd tjjdd| jdfd	}tj| || _d S )Nr   r   rG   )r   rH   r   *   r|   rE   r#   r   )r
   r   r   fpn_layer_indicesr   rY   r   ConvTranspose2dBatchNorm2dGELUIdentity	MaxPool2dpre_fpnr6   fpn_neckr   fpn_headNUM_CLASSESr   random	get_stateseedrandint	set_statePALETTE)r   r   neck_cfghead_cfgwidthstatepaletter   r   r   r   '  s,   


zFPNSegmentor.__init__N r         ?c	                 C   s  t |}| }|}	|du r5| jdu r2tj }
tjd tjjdd| j	dfd}tj
|
 n| j}t|}|jd | j	ksDJ |jd dksMJ t|jdksVJ d|  k rad	ksdJ  J |	jd | j	ksnJ |	jd |jd ks|	jd |jd krt|	|jd |jd fdd
d}	|	d }	tj|	dd}	tj|	jd |	jd dftjd}t|D ]\}}|||	|kddf< q|ddddf }|d|  ||  }|tj}|durd}|rt ||| |durt || |s|std dS dS dS )a  Draw `result` over `img`.
        Args:
            img (str or Tensor): The image to be displayed.
            result (Tensor): The semantic segmentation results to draw over
                `img`.
            palette (list[list[int]]] | np.ndarray | None): The palette of
                segmentation map. If None is given, random palette will be
                generated. Default: None
            win_name (str): The window name.
            wait_time (int): Value of waitKey param.
                Default: 0.
            show (bool): Whether to show the image.
                Default: False.
            out_file (str or None): The filename to write the image.
                Default: None.
            opacity(float): Opacity of painted segmentation map.
                Default 0.5.
                Must be in (0, 1] range.
        Returns:
            img (Tensor): Only if not `show` or `out_file`
        Nr   r   r|   rE   r   r@   rG   rx   r   Tr   )dtype.r   FzMshow==False and out_file is not specified, only result image will be returned)mmcvimreadrQ   r   r   r   r   r   r   r   r   arrayr5   rK   r(   r   argmaxzerosuint8rj   astypeimshowimwriter   r   )r   imgresultr   win_nameshow	wait_timeout_fileopacitysegr   	color_seglabelcolorr   r   r   show_resultV  sL   



("

zFPNSegmentor.show_resultc                    sH   fdd j D  fddttD   S )Nc                    r   r   r   )r+   idx)r   r   r   r3     r   z(FPNSegmentor.forward.<locals>.<listcomp>c                    s   g | ]} j | | qS r   )r   rh   r   r   r   r3     s    )r   r\   rK   r   r   r   r   r   r   r     s
   

zFPNSegmentor.forward)Nr   Fr   Nr   )	r   r   r   rr   rs   r   r   r   r   r   r   r   r   r     s@    	
2
Sr   )NNr   NT)abcr   r   r   numpyr   r   torch.nnr   torch.nn.functional
functionalr    mmcv.cnnr   mmcv.runnerr   r   r   Moduler   r(   r)   r6   rt   r   r   r   r   r   r   <module>   s,   

 I `: