o
    ॵi+                     @   s  d dl Z d dlZd dlmZ d dlmZmZ d dlZd dl	Z
d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dl m!Z! d dl"m#Z# ddl$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, ej-dkrd dl.Z/nd dl0m/Z/ ej-dkrej1j2Zej1j23  e# Z4dZ5dZ6dZ7g dZ8ej9j:j;Z;ej9j:<ddd ej9j:<ddd ej=ej>ej>dG dd deZ?dS )    N)AnyDict)	Pipelines)OCRDetection)
OutputKeys)InputPipeline)	PIPELINES)	LoadImage)Config)	ModelFileTasks)device_placement)
get_logger   )SegLinkDetectorboxes_from_bitmap	cal_widthcombine_segments_pythondecode_segments_links_python
nms_pythonpolygons_from_bitmaprboxes_to_polygonsz2.0)slim         )皙?r   r   r   r   r   node_thresholdg?zConfidence threshold for nodeslink_thresholdg333333?zConfidence threshold for links)module_namec                       s   e Zd ZdZdef fddZ fddZdedeee	f fd	d
Z
deee	f deee	f fddZdeee	f deee	f fddZ  ZS )OCRDetectionPipelinea   OCR Detection Pipeline.

    Example:

    ```python
    >>> from modelscope.pipelines import pipeline

    >>> ocr_detection = pipeline('ocr_detection', model='damo/cv_resnet18_ocr-detection-line-level_damo')
    >>> result = ocr_detection('https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/ocr_detection.jpg')

        {'polygons': array([[220,  14, 780,  14, 780,  64, 220,  64],
       [196, 369, 604, 370, 604, 425, 196, 425],
       [ 21, 730, 425, 731, 425, 787,  21, 786],
       [421, 731, 782, 731, 782, 789, 421, 789],
       [  0, 121, 109,   0, 147,  35,  26, 159],
       [697, 160, 773, 160, 773, 197, 697, 198],
       [547, 205, 623, 205, 623, 244, 547, 244],
       [548, 161, 623, 161, 623, 199, 547, 199],
       [698, 206, 772, 206, 772, 244, 698, 244]])}
    ```
    note:
    model = damo/cv_resnet18_ocr-detection-line-level_damo, for general text line detection, based on SegLink++.
    model = damo/cv_resnet18_ocr-detection-word-level_damo, for general text word detection, based on SegLink++.
    model = damo/cv_resnet50_ocr-detection-vlpt, for toaltext dataset, based on VLPT_pretrained DBNet.
    model = damo/cv_resnet18_ocr-detection-db-line-level_damo, for general text line detection, based on DBNet.

    modelc                    s  t |ts	J dt jd"d|i| td|  ttj	
|tj}t|dr8t|jdr8|jj| _nd| _| jdkrT| j| j| _| j  td dS t  t
t
| jtjd}t | _tjd	d
}d	|j_tj|d| _| j ; t | j!| j" tj#tj$g ddd| _%i | _&tj'dtj(d tj)dg t*dtj+dd}tj,-d|}t. }|j/| j%dd}	g g g }
}}t0|	D ]d\}}|d |d |d }}}t1|t2}tj34t5|ddg}tj34t5|ddgddddf }tj34t5|ddgddddf }tj6||gdd}|
7| |7| |7| qt8| j%dd }t9||
||t:|j;d\}}}}t<|||\}}|| j&d< || j&d < W d   n	1 sjw   Y  | j }td!|  tj,=|> }|?|| W d   n	1 sw   Y  W d   n1 sw   Y  W d   dS W d   dS 1 sw   Y  dS )#z
        use `model` to create a OCR detection pipeline for prediction
        Args:
            model: model id on modelscope hub.
        zmodel must be a single strr"   zloading model from dir 
model_typez	SegLink++DBNetzloading model donezcheckpoint-80000T)allow_soft_placement)config)r      r'      input_images)shapename )reuseglobal_stepr   F)initializerdtype	trainablegCl?)is_trainingr         Naxisr(   )anchor_sizescombined_rboxescombined_countszloading model from  )@
isinstancestrsuper__init__loggerinfor   	from_fileospathjoinr   CONFIGURATIONhasattrr"   r#   todeviceocr_detectorevaltfreset_default_graphospTF_CHECKPOINT_FOLDERget_default_graph_graphConfigProtogpu_optionsallow_growthSession_session
as_defaultr   	frameworkdevice_nameplaceholderfloat32r)   outputvariable_scope
AUTO_REUSEget_variableconstant_initializerint64trainExponentialMovingAverager   build_model	enumeratemultiplyOFFSET_VARIANCEnnsoftmaxreshapeconcatappendr*   r   listr8   r   Savervariables_to_restorerestore)selfr"   kwargscfgs
model_pathr&   r.   variable_averagesdetectorall_maps	all_nodes	all_linksall_regimapscls_mapslnk_mapsreg_mapscls_problnk_prob_poslnk_prob_mutlnk_prob
image_sizesegmentsgroup_indicessegment_counts_r9   r:   sessmodel_loader	__class__r;   b/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/cv/ocr_detection_pipeline.pyr?   Q   s   







2 $zOCRDetectionPipeline.__init__c                    s   t  j|fi |S )aK  
        Detect text instance in the text image.

        Args:
            input (`Image`):
                The pipeline handles three types of images:

                - A string containing an HTTP link pointing to an image
                - A string containing a local path to an image
                - An image loaded in PIL or opencv directly

                The pipeline currently supports single image input.

        Return:
            An array of contour polygons of detected N text instances in image,
            every row is [x1, y1, x2, y2, x3, y3, x4, y4, ...].
        )r>   __call__)rq   inputrr   r   r;   r   r      s   zOCRDetectionPipeline.__call__r   returnc                 C   s   | j dkr| |}|S t|}|j\}}}tjt||t||dftjd}||d |d |d d f< d}t	
|||f}	t	|	t	j}	|	tjg dtjd }	| j & t||g}tt||t||g}
|
| jd< || jd< W d    n1 sw   Y  dtj|	d	d
i}|S )Nr$   r(   )r0   r'   )gQ^@gR1]@g\(Y@	orig_sizeresize_sizeimgr   r6   )r#   preprocessorr
   convert_to_ndarrayr*   npzerosmaxr[   cv2resizecvtColorCOLOR_RGB2BGRarrayrQ   rW   rL   stackr\   expand_dims)rq   r   resultr   hwcimg_padr   img_pad_resizer   r;   r;   r   
preprocess   s*   


"

zOCRDetectionPipeline.preprocessc              	   C   s   | j dkr| |}|S | j 7 | j ! | j|d i}| jj| j|d}|W  d    W  d    S 1 s;w   Y  W d    d S 1 sKw   Y  d S )Nr$   r   )	feed_dict)r#   rJ   rQ   rW   rV   r)   runr\   )rq   r   outputsr   sess_outputsr;   r;   r   forward   s   

"zOCRDetectionPipeline.forwardinputsc              
   C   st  | j dkrtj|d i}|S |d d }|d d }|dks%||jd k r)td|d |d d f }|d \}}|d \}}t|}	t|t| }
t|t| }tdt	|	d d d d d	f | |d
 |	d d d d d	f< tdt	|	d d d
d d	f |
 |d
 |	d d d
d d	f< t
|	tj}	dd |	 D }t|}tdd |D }tj|i}|S )Nr$   det_polygonsr9   r   r:   z"modelscope error: No text detectedr   r   r3   r   c                 S   s   g | ]	}|t |g qS r;   )r   .0or;   r;   r   
<listcomp>  s    z4OCRDetectionPipeline.postprocess.<locals>.<listcomp>c                 S   s   g | ]}|d d qS )Nr   r;   r   r;   r;   r   r     s    )r#   r   POLYGONSr*   	Exceptionr   floatr   maximumminimumroundastypeint32tolistr   r   )rq   r   r   rboxescountorig_horig_wresize_hresize_wpolygonsscale_yscale_xdt_n9dt_nmsdt_polygonsr;   r;   r   postprocess   s2   
&&
z OCRDetectionPipeline.postprocess)__name__
__module____qualname____doc__r=   r?   r   r   r   r   r   r   r   __classcell__r;   r;   r   r   r!   2   s    _"*r!   )@mathrC   os.pathrD   rN   typingr   r   r   numpyr   
tensorflowrL   torchmodelscope.metainfor   "modelscope.models.cv.ocr_detectionr   modelscope.outputsr   modelscope.pipelines.baser   r   modelscope.pipelines.builderr	   modelscope.preprocessorsr
   modelscope.utils.configr   modelscope.utils.constantr   r   modelscope.utils.devicer   modelscope.utils.loggerr   	ocr_utilsr   r   r   r   r   r   r   r   __version__tf_slimr   tensorflow.contribcompatv1disable_eager_executionr@   RBOX_DIM
OFFSET_DIMWORD_POLYGON_DIMrg   appflagsFLAGSDEFINE_floatregister_moduleocr_detectionr!   r;   r;   r;   r   <module>   sR   (



