o
    ߥi                     @   s`   d dl Z d dlmZ d dlm  mZ G dd dejZG dd dejZG dd dejZ	dS )    Nc                       s$   e Zd Z fddZdd Z  ZS )	ConvLayerc              	      sF   t t|   ttdtj||dddt|tjdd| _	d S )N      r   kernel_sizepaddingT)inplace)
superr   __init__nn
SequentialReflectionPad2dConv2dBatchNorm2dReLUconv)selfin_chout_ch	__class__ c/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/cv/image_body_reshaping/model.pyr
   	   s   
zConvLayer.__init__c                 C   s   |  |}|S )N)r   )r   xr   r   r   forward   s   
zConvLayer.forward)__name__
__module____qualname__r
   r   __classcell__r   r   r   r   r      s    r   c                       s,   e Zd Z fddZdd Zdd Z  ZS )SASAc                    s   t t|   || _tj||d dd| _tj||d dd| _tj||dd| _tjd|d dd| _	t
td| _tjdd| _t | _d S )N   r   )in_channelsout_channelsr          dim)r	   r   r
   	chanel_inr   r   
query_convkey_conv
value_convmag_conv	ParametertorchzerosgammaSoftmaxsoftmaxSigmoidsigmoid)r   in_dimr   r   r   r
      s"   


zSASA.__init__c           
      C   s  t j|d d ddd d d d f ddd}t |dd}t j|d d ddd d d d f ddd}t |dd}t j|d d ddd d d d f ddd}t |dd}|d d dd d d d f d}d| }t j|||||fdd	}	tj|	||fd
d}	|	S )Nr   r   T)r'   keepdimr      r       r&   area)sizemode)r.   sumclamp	unsqueezecatFinterpolate)
r   paf_magtarget_heighttarget_width
torso_mask	arms_mask	legs_maskfg_maskbg_maskYr   r   r   structure_encoder*   s   ,,,"zSASA.structure_encoderc                 C   s  |  \}}}}| |||}| ||d|| }t|ddd|}	|	t|	 }
| |
}| 	||d|| ddd}| 
||d|| }t||}|t| }| |}|| }| ||d|| }t||ddd}|||||}| j| | }||fS )a;  extract self-attention features.
        Args:
            X : input feature maps( B x C x H x W)
            PAF_mag : ( B x C x H x W), 1 denotes connectivity, 0 denotes non-connectivity

        Returns:
            out : self attention value + input feature
            Y: B X N X N (N is Width*Height)
        r%   r      r   )r:   rK   r,   viewr.   bmmpermutemeanr4   r)   r*   r+   r0   )r   XPAF_magm_batchsizeCheightwidthrJ   connectivity_mask_vecaffinityaffinity_centeredaffinity_sigmoid
proj_queryproj_keyselfatten_mapselfatten_centeredselfatten_sigmoidSASA_map
proj_valueoutr   r   r   r   ;   s6   


zSASA.forward)r   r   r   r
   rK   r   r   r   r   r   r   r      s    r   c                       s2   e Zd Zd fdd	ZdddZdd	d
Z  ZS )FlowGeneratorFc                    sd  t t|   || _tt|dtddtdtddtddtdtddtddtdtddtddtdtddtddtddtddtdd| _t	dd| _	ttddtj
ddd	d
tddtddtj
ddd	d
tddtddtddtddtddtjdddddt tj
ddd	d
| _d}tjj|dt|d d d| _d S )N@   rL         i   i   )r5   bilinearT)scale_factorr;   align_cornersr$   r   r   r   r7      )r   strider   )r	   rc   r
   deep_supervisionr   r   r   	MaxPool2dEncoderr   Upsampler   TanhDecoderr.   intdilation)r   
n_channelsrl   dilation_ksizer   r   r   r
   f   sV   zFlowGenerator.__init__rg   r/   皙?c                 C   s   |  \}}}}	tt|t|	g\}
}| |	d  d d }|
 |d  d d }
t|d|
dfdd}||j}|d| |  }t	j
||||d}|S )Nr   g       @r%   r   rL   )r;   padding_mode)r:   r.   meshgridarangefloatr?   r>   todevicer@   grid_sample)r   r   flowr;   rw   coffnchwyvxvgridgrid_xwarp_xr   r   r   warp   s   "zFlowGenerator.warpc                 C   s   t j||fdd}| |}| \}}}}| |d d }	| ||	\}
}| |
}|dddd}| j|||d}t j	|d	dd
}||fS )a  extract self-attention features.
        Args:
            img : input numpy image
            skeleton_map : skeleton map of input image
            coef: warp degree

        Returns:
            warp_x : warped image
            flow: predicted flow
        r   r&   g      ?g      ?r   rL   r   )r   g      )minmax)
r.   r?   rn   r:   rs   r   rq   rO   r   r=   )r   imgskeleton_mapcoef
img_concatrQ   _rU   rV   rR   rb   rJ   r~   r   r   r   r   r      s   

zFlowGenerator.forward)F)rg   r/   rv   )rv   )r   r   r   r
   r   r   r   r   r   r   r   rc   d   s    
0rc   )
r.   torch.nnr   torch.nn.functional
functionalr@   Moduler   r   rc   r   r   r   r   <module>   s   N