o
    @@£iƒ  ã                   @   s    d Z ddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ G dd„ deƒZdS )zK
This implementation of EventListener extracts all Image objects on a Page
é    N)ÚImage)ÚCanvas)ÚCanvasStreamProcessor)ÚBeginPageEvent)ÚEndPageEvent)ÚEvent)ÚEventListener)ÚImageRenderEvent)ÚDocument)ÚPagec                   @   s‚   e Zd ZdZdd„ Zdefdd„Zddd„Zddd„Zd	e	j
ee	jej f fdd„Zeded	e	j
ee	jej f fdd„ƒZd
S )ÚImageExtractionzS
    This implementation of EventListener extracts all Image objects on a Page
    c                 C   s   i | _ d| _dS )z8
        Constructs a new SimpleImageExtraction
        éÿÿÿÿN)Ú_image_render_info_per_pageÚ_current_page©Úself© r   úa/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/borb/toolkit/image/image_extraction.pyÚ__init__    s   
zImageExtraction.__init__Úpagec                 C   s   |  j d7  _ d S )Né   )r   )r   r   r   r   r   Ú_begin_page+   s   zImageExtraction._begin_pageÚeventr   ÚreturnNc                 C   s4   t |tƒr|  | ¡ ¡ t |tƒr|  |¡ d S d S ©N)Ú
isinstancer   r   Úget_pager	   Ú_render_image)r   r   r   r   r   Ú_event_occurred.   s
   

ÿzImageExtraction._event_occurredÚimage_render_eventr	   c                 C   s2   | j | jvrg | j| j < | j| j   | ¡ ¡ d S r   )r   r   ÚappendÚ	get_image)r   r   r   r   r   r   4   s
   ÿzImageExtraction._render_imagec                 C   s   | j S )zL
        This function returns a typing.List[Image] on a given page
        )r   r   r   r   r   Ú
get_imagesB   s   zImageExtraction.get_imagesÚpdfc                 C   s’   i }t |  ¡  ¡ p
dƒ}td|ƒD ]5}|  |¡}t |d d ¡}tƒ }| t	|ƒ¡ t
|tƒ g ƒ ||g¡ | t|ƒ¡ | ¡ d ||< q|S )zÂ
        This function returns the images used in a given PDF
        :param pdf:     the PDF to be analysed
        :return:        the images (typing.List[PIL.Image.Image]) in the PDF
        r   ÚContentsÚDecodedBytes)ÚintÚget_document_infoÚget_number_of_pagesÚranger   ÚioÚBytesIOr   r   r   r   r   Úreadr   r"   )r#   Úimages_of_each_pageÚnumber_of_pagesÚpage_nrr   Úpage_sourceÚcser   r   r   Úget_images_from_pdfH   s   	
z#ImageExtraction.get_images_from_pdf)r   r   r   N)r   r	   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r   ÚtypingÚDictr&   ÚListÚPILImageModuler   r"   Ústaticmethodr
   r2   r   r   r   r   r      s    

 ÿþr   )r6   r*   r7   ÚPILr   r:   Úborb.pdf.canvas.canvasr   Ú'borb.pdf.canvas.canvas_stream_processorr   Ú&borb.pdf.canvas.event.begin_page_eventr   Ú$borb.pdf.canvas.event.end_page_eventr   Ú$borb.pdf.canvas.event.event_listenerr   r   Ú(borb.pdf.canvas.event.image_render_eventr	   Úborb.pdf.document.documentr
   Úborb.pdf.page.pager   r   r   r   r   r   Ú<module>   s   