o
    f                     @   s   d dl Z d dlZd dlmZmZmZmZmZmZm	Z	 d dl
mZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ eeZedZedZ G dd dZ!dS )    N)BinaryIO	ContainerDictIteratorListOptionalTuple)Rect   )settings)PDFDocumentPDFTextExtractionNotAllowedPDFNoPageLabels)	PDFParser)PDFObjectNotFound)
dict_value)	int_value)
list_value)resolve1)LITPagePagesc                   @   s   e Zd ZdZdedededee ddf
dd	Zdefd
dZ	h dZ
ededed  fddZe					ddedeee  dededededed  fddZdS )PDFPageak  An object that holds the information about a page.

    A PDFPage object is merely a convenience class that has a set
    of keys and values, which describe the properties of a page
    and point to its contents.

    Attributes:
      doc: a PDFDocument object.
      pageid: any Python object that can uniquely identify the page.
      attrs: a dictionary of page attributes.
      contents: a list of PDFStream objects that represents the page content.
      lastmod: the last modified time of the page.
      resources: a dictionary of resources used by the page.
      mediabox: the physical size of the page.
      cropbox: the crop rectangle of the page.
      rotate: the page rotation (in degree).
      annots: the page annotations.
      beads: a chain that represents natural reading order.
      label: the page's label (typically, the logical page number).
    docpageidattrslabelreturnNc                 C   s   || _ || _t|| _|| _t| jd| _t| jdt | _	t| jd | _
d| jv r8t| jd | _n| j
| _t| jddd d | _| jd| _| jd	| _d
| jv ret| jd
 }ng }t|tso|g}|| _dS )zInitialize a page object.

        doc: a PDFDocument object.
        pageid: any Python object that can uniquely identify the page.
        attrs: a dictionary of page attributes.
        label: page label string.
        LastModified	ResourcesMediaBoxCropBoxRotater   ih  AnnotsBContentsN)r   r   r   r   r   r   getlastmoddict	resourcesmediaboxcropboxr   rotateannotsbeads
isinstancelistcontents)selfr   r   r   r   r1    r3   I/home/ubuntu/webapp/venv/lib/python3.10/site-packages/pdfminer/pdfpage.py__init__-   s*   





zPDFPage.__init__c                 C   s   d | j| jS )Nz(<PDFPage: Resources={!r}, MediaBox={!r}>)formatr)   r*   )r2   r3   r3   r4   __repr__O   s   zPDFPage.__repr__>   r"   r!   r    r   documentc           	      #   s"   dt dttt f dttttt tt t f f f  f fddz }W n ty6   t	d }Y nw d}dj
v rZj
d j
}|D ]\}} ||t|V  d}qI|sjD ]/}| D ](}z|}t|tr|d	tu r ||t|V  W qe ty   Y qew q_d S )
Nobjparentr   c                 3   s    t | tr| }t| }n	| j}t|  }| D ]\}}| jv r0||vr0|||< q|d}|d u rBt	j
sB|d}|tu red|v retd|d  t|d D ]
}||E d H  qXd S |tu rvtd| ||fV  d S d S )NTypetypeKidszPages: Kids=%rzPage: %r)r/   intr   getobjcopyobjiditemsINHERITABLE_ATTRSr&   r   STRICTLITERAL_PAGESlogdebugr   LITERAL_PAGE)r9   r:   rA   treekv	tree_typecclsr8   searchr3   r4   rP   X   s,   


z$PDFPage.create_pages.<locals>.searchFr   Tr;   )objectr   strr   r   r>   get_page_labelsr   	itertoolsrepeatcatalognextxrefs
get_objidsr?   r/   r(   r&   rH   r   )	rO   r8   page_labelspagesobjectsrA   rI   xrefr9   r3   rN   r4   create_pagesV   s@   



zPDFPage.create_pagesr    TFfppagenosmaxpagespasswordcachingcheck_extractablec                 c   s    t |}t|||d}|js"|rd| }	t|	d| }
t|
 t| |D ]\}}|r4||vr4q)|V  |rB||d krB d S q)d S )N)rc   rd   z"Text extraction is not allowed: %rzThe PDF %r contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this caser
   )r   r   is_extractabler   rF   warning	enumerater^   )rO   r`   ra   rb   rc   rd   re   parserr   	error_msgwarning_msgpagenopager3   r3   r4   	get_pages   s*   
 zPDFPage.get_pages)Nr   r_   TF)__name__
__module____qualname____doc__r   rQ   r   rR   r5   r7   rC   classmethodr   r^   r   r   r>   boolrn   r3   r3   r3   r4   r      sL    
"3
r   )"rT   loggingtypingr   r   r   r   r   r   r   pdfminer.utilsr	   r_   r   pdfdocumentr   r   r   	pdfparserr   pdftypesr   r   r   r   r   psparserr   	getLoggerro   rF   rH   rE   r   r3   r3   r3   r4   <module>   s"    $
