o
    f                  	   @   s  d dl Z d dlmZ d dlmZmZmZmZ ddlm	Z	 ddl
mZ ddl
mZ ddl
mZ dd	l
mZ dd
l
mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ er`ddlmZ e eZG dd deZG dd deeeeedf  ZG dd deZdS )    N)BytesIO)BinaryIOTYPE_CHECKINGOptionalUnion   )settings)PDFException)	PDFObjRef)	PDFStream)
dict_value)	int_value)KWD)PSEOF)	PSKeyword)PSStackParser)PSSyntaxError)PDFDocumentc                   @   s   e Zd ZdS )PDFSyntaxErrorN)__name__
__module____qualname__ r   r   K/home/ubuntu/webapp/venv/lib/python3.10/site-packages/pdfminer/pdfparser.pyr      s    r   c                   @   sr   e Zd ZdZdeddfddZdd	d
ZedZedZ	edZ
edZedZedZdededdfddZdS )	PDFParsera  
    PDFParser fetch PDF objects from a file stream.
    It can handle indirect references by referring to
    a PDF document set by set_document method.
    It also reads XRefs at the end of every PDF file.

    Typical usage:
      parser = PDFParser(fp)
      parser.read_xref()
      parser.read_xref(fallback=True) # optional
      parser.set_document(doc)
      parser.seek(offset)
      parser.nextobject()

    fpreturnNc                 C   s   t | | d | _d| _d S )NF)r   __init__docfallback)selfr   r   r   r   r   -   s   
zPDFParser.__init__r   r   c                 C   s
   || _ dS )z0Associates the parser with a PDFDocument object.N)r   )r    r   r   r   r   set_document2   s   
zPDFParser.set_document   Rs   nulls   endobjs   streams   xrefs	   startxrefpostokenc              	   C   s  || j | jfv r| j| d  dS || ju r!| j| d  dS || ju r/| |df dS || ju rtt| j	dkrrz,| d\\}}\}}t
|t
|}}| jdusWJ t| j||}| ||f W dS  tyq   Y dS w dS || ju rS| d\\}}t|}d}| jszt|d }W n ty   tjrtd| Y nw | | z|  \}}	W n ty   tjrtdY dS w |t|	7 }| j| t| j|}
| ||  	 z|  \}}	W n ty   tjrtdY n+w d	|	v r|	d	}||7 }| jr|
|	d| 7 }
n|t|	7 }| jr#|
|	7 }
q| ||  td
||||
dd  | jdus@J t|t |
| jj!}| ||f dS | ||f dS )zHandles PDF-related keywords.r      N   r   Lengthz/Length is undefined: %rzUnexpected EOFs	   endstreamz-Stream: pos=%d, objlen=%d, dic=%r, data=%r...
   )"KEYWORD_XREFKEYWORD_STARTXREFadd_resultspopKEYWORD_ENDOBJKEYWORD_NULLpush	KEYWORD_Rlencurstackintr   r
   r   KEYWORD_STREAMr   r   r   KeyErrorr   STRICTr   seeknextliner   r   	bytearrayreadindexlogdebugr   bytesdecipher)r    r#   r$   _objidgennoobjdicobjlenlinedatalineposistreamr   r   r   
do_keyword=   s   


	



zPDFParser.do_keyword)r   r   r   N)r   r   r   __doc__r   r   r!   r   r0   r.   r-   r4   r)   r*   r3   r   rK   r   r   r   r   r      s    
r   c                   @   sJ   e Zd ZdZdeddfddZdddZed	Zd
e	de
ddfddZdS )PDFStreamParsera(  
    PDFStreamParser is used to parse PDF content streams
    that is contained in each page and has instructions
    for rendering the page. A reference to a PDF document is
    needed because a PDF content stream can also have
    indirect references to other objects in the same document.
    rG   r   Nc                 C   s   t | t| d S N)r   r   r   )r    rG   r   r   r   r      s   zPDFStreamParser.__init__c                 C   s   | j |    d S rN   )r+   popall)r    r   r   r   flush   s   zPDFStreamParser.flushs   objr#   r$   c                 C   s   || j u r5z%| d\\}}\}}t|t|}}t| j||}| ||f W d S  ty4   Y d S w || j| jfv rFt	j
rDtdd S | ||f d S )Nr&   zKeyword endobj found in stream)r0   r,   r3   r
   r   r/   r   KEYWORD_OBJr-   r   r6   r   )r    r#   r$   r@   rA   rB   rC   r   r   r   rK      s    
zPDFStreamParser.do_keyword)r   N)r   r   r   rL   r>   r   rP   r   rQ   r3   r   rK   r   r   r   r   rM      s    
rM   )loggingior   typingr   r   r   r    r   pdftypesr	   r
   r   r   r   psparserr   r   r   r   r   pdfdocumentr   	getLoggerr   r<   r   r   rM   r   r   r   r   <module>   s(    
 q