o
    ॵi*                     @   s   d dl Z d dlmZmZ d dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlmZ d dlmZmZ d d	lmZ d d
lm Z  d dl!m"Z" d dl#m$Z$ e$ Z%ej&e"j'ej(dG dd deZ)dS )    N)AnyDict)Image)	Pipelines)DDIMSampler)	box2squre
box_in_boxexpand_bboxexpand_image_maskget_bbox_from_maskpad_to_squaresobel)
OutputKeys)InputPipeline)	PIPELINES)
load_image)Tasks)
get_logger)module_namec                       s   e Zd ZdZdef fddZedddZded	e	ee
f fd
dZ				dde	ee
f d	e	ee
f fddZde	ee
f d	e	ee
f fddZ  ZS )AnydoorPipelinea   AnyDoor Pipeline.

    Examples:

    >>> from modelscope.pipelines import pipeline
    >>> from modelscope.utils.constant import Tasks
    >>> from PIL import Image

    >>> ref_image = 'data/test/images/image_anydoor_fg.png'
    >>> ref_mask = 'data/test/images/image_anydoor_fg_mask.png'
    >>> bg_image = 'data/test/images/image_anydoor_bg.png'
    >>> bg_mask = 'data/test/images/image_anydoor_bg_mask.png'

    >>> anydoor_pipeline = pipeline(Tasks.image_to_image_generation, model='damo/AnyDoor')
    >>> out = anydoor_pipeline((ref_image, ref_mask, bg_image, bg_mask))
    >>> assert isinstance(out['output_img'], Image.Image)
    modelc                    sT   t  jdd|i| tj| jj| jjj}| j	| j
|dd t| j| _dS )z
        use `model` to create a action detection pipeline for prediction
        Args:
            model: model id on modelscope hub.
        r   cuda)locationN )super__init__ospathjoinr   	model_dircfg
model_pathload_state_dict_get_state_dictr   ddim_sampler)selfr   kwargs
model_ckpt	__class__r   \/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/cv/anydoor_pipeline.pyr   1   s   zAnydoorPipeline.__init__cpuc                 C   sr   dd }t j| \}}| dkrdd l}|jj| |d}n|tj| t|d}||}t	d|  d |S )	Nc                 S   s   |  d| S )N
state_dict)get)dr   r   r+   get_state_dictA   s   z7AnydoorPipeline._get_state_dict.<locals>.get_state_dictz.safetensorsr   )device)map_locationzLoaded state_dict from [])
r   r   splitextlowersafetensors.torchtorch	load_fileloadr1   print)	ckpt_pathr   r0   _	extensionsafetensorsr-   r   r   r+   r$   >   s   zAnydoorPipeline._get_state_dictinputsreturnc                 C   sD  |\}}}}t t|d}t t t|ddkddt j}t t|d}t t t|ddkddt j}t|}t |||gd}|| t 	|d d|   }|\}	}
}}||	|
||d d f }||	|
||f }t j
dd	d
 }t|||d\}}t |||gd}t|ddd}t|t jdt j}t|d ddd}t|t jdt j}|d d d d df }||}}t |||gd}t||d }t|}t||ddgd}t||ddgd}t||}|\}	}
}}||	|
||d d f }||	|
||f }t||}|\}	}
}}t|t j|| |
|	 f}t|t j|| |
|	 f}|dkt j}| }|||	|
||d d f< | d }d||	|
||d d f< t |||gd}|jd |jd }}t|dddt j}t|dddt j}t|dddt j}|jd |jd }}t|t jdt j}t|t jdt j}t|t jdt jdkt j}|d }|d d }|d d }t ||d d d d d df gd}t|| | | t ||||gt |d}|S )NRGBL      r            
   )ratioF)	pad_valuerandom)   rM   g?g333333?g?      @g              ?   rQ   g      ?     _@)	tar_imagerefjpghintextra_sizestar_box_yyxx_crop)npasarrayr   convertwhereastypeuint8r   stack	ones_likerL   randintr
   r   cv2resizer   r	   r   r   copyshapefloat32concatenatedictarray)r&   r?   	ref_imageref_maskrS   tar_maskref_box_yyxx
ref_mask_3masked_ref_imagey1y2x1x2rJ   masked_ref_image_composeref_mask_composeref_image_collagetar_box_yyxxrX   cropped_target_imagecropped_tar_maskcollagecollage_maskH1W1H2W2itemr   r   r+   
preprocessP   s  










&zAnydoorPipeline.preprocessrD   rO      rN   r   c              
      s  |d    }|d }|d }d}|  tjfddt|D ddtd		 |   tj fd
dt|D dd t d		  d\}	}
g| j
 gd}g| j
tdg| gd}d|	d |
d f}|gd | j
_| jj||||dd||d\}}| j
|}t|dd d    }|d d d d d d d df }t|dd}|d }t|dddd d d d d f }|d    }|d    }t||||dS )NrS   rT   rV   rD   c                       g | ]} qS r   r   .0r<   )controlr   r+   
<listcomp>       z+AnydoorPipeline.forward.<locals>.<listcomp>r   )dimzb h w c -> b c h wc                    r   r   r   r   )
clip_inputr   r+   r      r   rP   )c_concatc_crossattn)rD      rM   rM            F)verboseetaunconditional_guidance_scaleunconditional_conditioningzb c h w -> b h w crR   rE   rF   rW   rX   )predrS   sizesrX   )r,   numpyfloatr   r7   r_   rangeeinops	rearrangecloner   get_learned_conditioningzeroscontrol_scalesr%   sampledecode_first_stagerY   cliprh   )r&   r   num_samplesstrength
ddim_stepsscalerS   rT   rV   HWcondun_condre   samplesr<   	x_samplesresultr   r   rX   r   )r   r   r+   forward   sh    

 $zAnydoorPipeline.forwardc                 C   s  |d }|d }|d }|d }|\}}}}	|\}
}}}t ||	|f}d}||krj||| || f ||
| || || || d d f< t| ddd}|ddd }tj|d	d
}t	j
|iS ||	k rt|	| d }|	| | }|d d || d d f }nt|| d }|| | }||| d d d d f }| }||| || f ||
| || || || d d f< t|ddd}|ddd }tj|d	d
}t	j
|iS )Nr   rS   r   rX   r      r   rD   rA   )mode)rb   rc   r7   
from_numpyrd   permuter   r   	fromarrayr   
OUTPUT_IMGint)r&   r?   r   rS   rW   rX   r|   r}   r~   r   rp   rq   rr   rs   m	gen_imagepad1pad2r   r   r+   postprocess  s6   >
>
zAnydoorPipeline.postprocess)r,   )rD   rO   r   rN   )__name__
__module____qualname____doc__strr   staticmethodr$   r   r   r   r   r   r   __classcell__r   r   r)   r+   r      s     t


*?r   )*r   typingr   r   rb   r   r   rY   requestsr7   PILr   modelscope.metainfor   -modelscope.models.cv.anydoor.cldm.ddim_hackedr   0modelscope.models.cv.anydoor.datasets.data_utilsr   r   r	   r
   r   r   r   modelscope.outputsr   modelscope.pipelines.baser   r   modelscope.pipelines.builderr   modelscope.preprocessors.imager   modelscope.utils.constantr   modelscope.utils.loggerr   loggerregister_moduleimage_to_image_generationanydoorr   r   r   r   r+   <module>   s,   $