o
    f@                     @   s   d Z ddlZddlmZmZ ddlmZ ddlmZ ddlm	Z
 ddlmZ ddlmZ ddlmZ ddlmZ zddlZW n eyI   dZY nw G d	d
 d
ejZG dd deZeddZG dd deZdd ZdddZdd ZdS ))	PdfObjectPdfImage    N)c_uintc_float)Path)
namedtuple)PdfiumError)	PdfMatrix)	PdfBitmapc                       sZ   e Zd ZdZ fddZd fdd	Zedd	 Zd
d Zdd Z	dd Z
dd Z  ZS )r   a  
    Page object helper class.
    
    When constructing a :class:`.PdfObject`, an instance of a more specific subclass may be returned instead,
    depending on the object's :attr:`.type` (e. g. :class:`.PdfImage`).
    
    Attributes:
        raw (FPDF_PAGEOBJECT):
            The underlying PDFium pageobject handle.
        type (int):
            The object's type (:data:`FPDF_PAGEOBJ_*`).
        page (PdfPage):
            Reference to the page this pageobject belongs to. May be None if it does not belong to a page yet.
        pdf (PdfDocument):
            Reference to the document this pageobject belongs to. May be None if the object does not belong to a document yet.
            This attribute is always set if :attr:`.page` is set.
        level (int):
            Nesting level signifying the number of parent Form XObjects, at the time of construction.
            Zero if the object is not nested in a Form XObject.
    c                    s8   t |}|t jkrt t}nt t}||_|S N)pdfium_cFPDFPageObj_GetTypeFPDF_PAGEOBJ_IMAGEsuper__new__r   r   type)clsrawargskwargsr   instance	__class__ W/home/ubuntu/webapp/venv/lib/python3.10/site-packages/pypdfium2/_helpers/pageobjects.pyr   -   s   

zPdfObject.__new__Nr   c                    sf   ||||f\| _ | _| _| _|d ur&| jd u r|j| _n
| j|jur&tdt jtj|d u d d S )Nz;*page* must belong to *pdf* when constructing a pageobject.)
needs_free)	r   pagepdflevel
ValueErrorr   __init__r   FPDFPageObj_Destroy)selfr   r   r   r   r   r   r   r    9   s   

zPdfObject.__init__c                 C   s   | j d u r| jS | j S r   r   r   )r"   r   r   r   parentF   s   zPdfObject.parentc                 C   s`   | j du r	tdt t t t f\}}}}t| ||||}|s&td|j|j|j|jfS )z
        Get the position of the object on the page.
        
        Returns:
            A tuple of four :class:`float` coordinates for left, bottom, right, and top.
        Nz.Must not call get_pos() on a loose pageobject.zFailed to locate pageobject.)r   RuntimeErrorr   r   FPDFPageObj_GetBoundsr   value)r"   lbrtokr   r   r   get_posL   s   
zPdfObject.get_posc                 C   s*   t  }t | |}|stdt|S )z\
        Returns:
            PdfMatrix: The pageobject's current transform matrix.
        z#Failed to get matrix of pageobject.)r   	FS_MATRIXFPDFPageObj_GetMatrixr   r	   from_raw)r"   	fs_matrixr,   r   r   r   
get_matrix^   s
   
zPdfObject.get_matrixc                 C   s   t | |}|stddS )zs
        Parameters:
            matrix (PdfMatrix): Set this matrix as the pageobject's transform matrix.
        z#Failed to set matrix of pageobject.N)r   FPDFPageObj_SetMatrixr   )r"   matrixr,   r   r   r   
set_matrixj   s   zPdfObject.set_matrixc                 C   s   t j| g| R   dS )z
        Parameters:
            matrix (PdfMatrix): Multiply the page object's current transform matrix by this matrix.
        N)r   FPDFPageObj_Transformget)r"   r4   r   r   r   	transformt   s   zPdfObject.transform)NNr   )__name__
__module____qualname____doc__r   r    propertyr$   r-   r2   r5   r8   __classcell__r   r   r   r   r      s    

r   c                   @   sj   e Zd ZdZdZedd Zdd Zdd ZdddZ	dddZ
dddZdddZdddZdd Zd	S )r   zC
    Image object helper class (specific kind of page object).
    )ASCIIHexDecodeASCII85DecodeRunLengthDecodeFlateDecode	LZWDecodec                 C   s   t |}| |d|dS )a  
        Parameters:
            pdf (PdfDocument): The document to which the new image object shall be added.
        Returns:
            PdfImage: Handle to a new, empty image.
            Note that position and size of the image are defined by its matrix, which defaults to the identity matrix.
            This means that new images will appear as a tiny square of 1x1 units on the bottom left corner of the page.
            Use :class:`.PdfMatrix` and :meth:`.set_matrix` to adjust size and position.
        Nr#   )r   FPDFPageObj_NewImageObj)r   r   raw_imgr   r   r   new   s   
zPdfImage.newc                 C   s(   t  }t | | j|}|std|S )aB  
        Retrieve image metadata including DPI, bits per pixel, color space, and size.
        If the image does not belong to a page yet, bits per pixel and color space will be unset (0).
        
        Note:
            * The DPI values signify the resolution of the image on the PDF page, not the DPI metadata embedded in the image file.
            * Due to issues in PDFium, this function can be slow. If you only need image size, prefer the faster :meth:`.get_size` instead.
        
        Returns:
            FPDF_IMAGEOBJ_METADATA: Image metadata structure
        zFailed to get image metadata.)r   FPDF_IMAGEOBJ_METADATAFPDFImageObj_GetImageMetadatar   r   )r"   metadatar,   r   r   r   get_metadata   s
   zPdfImage.get_metadatac                 C   s4   t  t  }}t| ||}|std|j|jfS )z
        .. versionadded:: 4.8/5731
        
        Returns:
            (int, int): Image dimensions as a tuple of (width, height).
        zFailed to get image size.)r   r   FPDFImageObj_GetImagePixelSizer   r'   )r"   whr,   r   r   r   get_size   s
   zPdfImage.get_sizeNFTc                 C   s   t |ttfrt|d}d}nt|dr|}ntd| dt|\}}|r,tj	ntj
}t|\}	}
||	|
| |}|sCtd|rX|D ]}t| qG|rV|  dS dS | j j|7  _|rk| jj| dS dS )a  
        Set a JPEG as the image object's content.
        
        Parameters:
            source (str | pathlib.Path | typing.BinaryIO):
                Input JPEG, given as file path or readable byte buffer.
            pages (list[PdfPage] | None):
                If replacing an image, pass in a list of loaded pages that might contain it, to update their cache.
                (The same image may be shown multiple times in different transforms across a PDF.)
                May be None or an empty sequence if the image is not shared.
            inline (bool):
                Whether to load the image content into memory. If True, the buffer may be closed after this function call.
                Otherwise, the buffer needs to remain open until the PDF is closed.
            autoclose (bool):
                If the input is a buffer, whether it should be automatically closed once not needed by the PDF anymore.
        rbTr*   zCannot load JPEG from z" - not a file path or byte buffer.z&Failed to load JPEG into image object.N)
isinstancestrr   openpdfium_i	is_bufferr   get_bufreaderr   FPDFImageObj_LoadJpegFileInlineFPDFImageObj_LoadJpegFilepages_c_arrayr   idcloser   _data_holder_data_closerappend)r"   sourcepagesinline	autoclosebuffer	bufaccessto_holdloaderc_pages
page_countr,   datar   r   r   	load_jpeg   s,   

zPdfImage.load_jpegc                 C   s.   t |\}}t||| |}|stddS )a  
        Set a bitmap as the image object's content.
        The pixel data will be flate compressed (as of PDFium 5418).
        
        Parameters:
            bitmap (PdfBitmap):
                The bitmap to inject into the image object.
            pages (list[PdfPage] | None):
                A list of loaded pages that might contain the image object. See :meth:`.load_jpeg`.
        zFailed to set image to bitmap.N)rS   rX   r   FPDFImageObj_SetBitmapr   )r"   bitmapr_   rf   rg   r,   r   r   r   
set_bitmap   s
   zPdfImage.set_bitmapc                 C   sV   |r| j du rtdt| j | j| }nt| }|du r&td|  dt|S )a@  
        Get a bitmap rasterization of the image.
        
        Parameters:
            render (bool):
                Whether the image should be rendered, thereby applying possible transform matrices and alpha masks.
        Returns:
            PdfBitmap: Image bitmap (with a buffer allocated by PDFium).
        Nz0Cannot get rendered bitmap of loose page object.zFailed to get bitmap of image .)	r   r%   r   FPDFImageObj_GetRenderedBitmapr   FPDFImageObj_GetBitmapr   r
   r0   )r"   render
raw_bitmapr   r   r   
get_bitmap   s   


zPdfImage.get_bitmapc                 C   s8   |rt jnt j}|| dd}tj|  }|| || |S )aS  
        Parameters:
            decode_simple (bool):
                If True, apply simple filters, resulting in semi-decoded data (see :attr:`.SIMPLE_FILTERS`).
                Otherwise, the raw data will be returned.
        Returns:
            ctypes.Array: The data of the image stream (as :class:`~ctypes.c_ubyte` array).
        Nr   )r    FPDFImageObj_GetImageDataDecodedFPDFImageObj_GetImageDataRawctypesc_ubyte)r"   decode_simplefuncn_bytesrb   r   r   r   get_data  s
   	zPdfImage.get_datac                 C   sp   g }t | }t|D ]*}t | |dd}t|}t | ||| |jd}|r0|| jv r0q|	| q|S )z
        Parameters:
            skip_simple (bool):
                If True, exclude simple filters.
        Returns:
            list[str]: A list of image filters, to be applied in order (from lowest to highest index).
        Nr   zutf-8)
r    FPDFImageObj_GetImageFilterCountrangeFPDFImageObj_GetImageFilterru   create_string_bufferr'   decodeSIMPLE_FILTERSr]   )r"   skip_simplefilterscountilengthrb   fr   r   r   get_filters   s   	

zPdfImage.get_filtersc                 O   s   t | g|R i |}t|}t|ttfr8t| d| d}|| W d   dS 1 s1w   Y  dS t|drE|| dS t	d| d)a  
        Extract the image into an independently usable file or byte buffer.
        Where possible within PDFium's limited public API, it will be attempted to transfer the image data directly,
        avoiding an unnecessary layer of decoding and re-encoding.
        Otherwise, the fully decoded data will be retrieved and (re-)encoded using :mod:`PIL`.
        
        As PDFium does not expose all required information, only DCTDecode (JPEG) and JPXDecode (JPEG 2000) images can be extracted directly.
        For images with complex filters, the bitmap data is used. Otherwise, ``get_data(decode_simple=True)`` is used, which avoids lossy conversion for images whose bit depth or colour format is not supported by PDFium's bitmap implementation.
        
        Parameters:
            dest (str | io.BytesIO):
                File prefix or byte buffer to which the image shall be written.
            fb_format (str):
                The image format to use in case it is necessary to (re-)encode the data.
            fb_render (bool):
                Whether the image should be rendered if falling back to bitmap-based extraction.
        rm   wbNrL   zCannot extract to '')
_extract_smartnextrP   rQ   r   rR   sendrS   rT   r   )r"   destr   r   extraction_genformatbufr   r   r   extract8  s   "zPdfImage.extract)NFTr   )F)r9   r:   r;   r<   r   classmethodrF   rJ   rN   ri   rl   rr   rz   r   r   r   r   r   r   r   ~   s    


-


r   	ImageInfoz0format mode metadata all_filters complex_filtersc                   @   s   e Zd ZdS )ImageNotExtractableErrorN)r9   r:   r;   r   r   r   r   r   ]  s    r   c                 C   s:   | t jkr|dkrdS dS | t jkrdS | t jkrdS d S )N   1LRGBCMYK)r   FPDF_COLORSPACE_DEVICEGRAYFPDF_COLORSPACE_DEVICERGBFPDF_COLORSPACE_DEVICECMYK)
colorspacebppr   r   r   _get_pil_modea  s   


r   Fc           	   	   c   s    zt | \}}W n ty   | j|d }Y n#w d }|j}|dkr=|j}tj|j	|j
|jf| jddd|j	dd}|rL|rC|n|j	dkrJdnd	}|V }|rX|j||d
n
|| d V  d S  d V  d S )N)rp   r   Trw   r   r   r   tiffpng)r   )_extract_directr   rr   to_pilr   rI   PILImage
frombuffermodewidthheightrz   savewrite)		image_obj	fb_format	fb_renderrh   info	pil_imager   rI   rb   r   r   r   r   p  s.   




r   c           	      C   s   |   }dd |D }|  }t|j|j}t|dkr4|r'| jdd}d}nAtdtj	
|j dt|d	kr`|d }|d
krK| jdd}d}n|dkrX| jdd}d}ntd| dtd| dt|||||}||fS )Nc                 S   s   g | ]	}|t jvr|qS r   )r   r   ).0r   r   r   r   
<listcomp>  s    z#_extract_direct.<locals>.<listcomp>r   Tr   r   zUnhandled color space z  - don't know how to treat data.r   	DCTDecodejpg	JPXDecodejp2zUnhandled complex filter rm   z'Cannot handle multiple complex filters )r   rJ   r   r   bits_per_pixellenrz   r   rS   ColorspaceToStrr7   r   )	r   all_filterscomplex_filtersrI   r   out_data
out_formatr   r   r   r   r   r     s*   r   )NF) __all__ru   r   r   pathlibr   collectionsr   pypdfium2.rawr   r   pypdfium2.internalinternalrS   pypdfium2._helpers.miscr   pypdfium2._helpers.matrixr	   pypdfium2._helpers.bitmapr
   	PIL.Imager   ImportErrorAutoCloseabler   r   r   	Exceptionr   r   r   r   r   r   r   r   <module>   s.   h 
]
