o
    f>                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddl m!Z! ddl m"Z" e#e$Z%G dd de&Z'G dd dZ(G dd de(Z)G dd de(Z*G dd de*Z+G dd de(Z,G dd de,Z-G dd de)Z.G dd  d e,Z/G d!d" d"e)Z0G d#d$ d$e,Z1G d%d& d&Z2G d'd( d(ee Z3d)ee4 d*dfd+d,Z5e$d-kre5ej6 dS dS ).a   Adobe character mapping (CMap) support.

CMaps provide the mapping between character codes and Unicode
code-points to character ids (CIDs).

More information is available on the Adobe website:

  http://opensource.adobe.com/wiki/display/cmap/CMap+Resources

    N)AnyBinaryIODictIterableIteratorListMutableMappingOptionalTextIOTupleUnioncastSet   )name2unicode)KWD)PSEOF)	PSKeyword)	PSLiteral)PSStackParser)PSSyntaxError)literal_name)choplist)nunpackc                   @      e Zd ZdS )	CMapErrorN__name__
__module____qualname__ r    r    H/home/ubuntu/webapp/venv/lib/python3.10/site-packages/pdfminer/cmapdb.pyr   1       r   c                   @   s   e Zd ZdZdeddfddZdefddZd	ed
eddfddZ	dede
ddfddZde
deeee
f ddfddZdddZdedee
 fddZdS )CMapBaser   kwargsreturnNc                 K   s   |  | _d S N)copyattrsselfr$   r    r    r!   __init__9      zCMapBase.__init__c                 C   s   | j dddkS )NWModer   r(   getr*   r    r    r!   is_vertical<   s   zCMapBase.is_verticalkvc                 C   s   || j |< d S r&   )r(   )r*   r2   r3   r    r    r!   set_attr?   r,   zCMapBase.set_attrcodecidc                 C      d S r&   r    )r*   r5   r6   r    r    r!   add_code2cidB      zCMapBase.add_code2cidc                 C   r7   r&   r    )r*   r6   r5   r    r    r!   add_cid2unichrE   r9   zCMapBase.add_cid2unichrcmapc                 C   r7   r&   r    r*   r;   r    r    r!   use_cmapH   r9   zCMapBase.use_cmapc                 C   s   t r&   )NotImplementedError)r*   r5   r    r    r!   decodeK   r9   zCMapBase.decode)r;   r#   r%   N)r   r   r   debugobjectr+   boolr1   strr4   intr8   r   r   bytesr:   r=   r   r?   r    r    r    r!   r#   5   s     
r#   c                	   @   s   e Zd Zdeeef ddfddZdefddZdeddfd	d
Z	de
dee fddZejddfdedeeeef  deedf ddfddZdS )CMapr$   r%   Nc                 K      t j| fi | i | _d S r&   )r#   r+   code2cidr)   r    r    r!   r+   P      
zCMap.__init__c                 C      d| j d S )Nz
<CMap: %s>CMapNamer.   r0   r    r    r!   __repr__T      zCMap.__repr__r;   c                    sV   t |tsJ tt|dtttf dtttf dd f fdd  | j|j d S )Ndstsrcr%   c                    s@   |  D ]\}}t|tri }|| |<  || q|| |< qd S r&   )items
isinstancedict)rN   rO   r2   r3   dr'   r    r!   r'   Z   s   

zCMap.use_cmap.<locals>.copy)rQ   rF   rC   typer   rD   rA   rH   r<   r    rT   r!   r=   W   s   *	zCMap.use_cmapr5   c                 c   sj    t d| | | j}t|D ]#}||v r/|| }t|tr%|V  | j}qttttf |}q| j}qd S )Nzdecode: %r, %r)	logr@   rH   iterrQ   rD   r   r   rA   )r*   r5   rS   ixr    r    r!   r?   e   s   
zCMap.decoder    outrH   .c                 C   sr   |d u r	| j }d}t| D ]'\}}||f }t|tr'|d||f  q| j|tttt	f ||d qd S )Nr    zcode %r = cid %d
)rZ   rH   r5   )
rH   sortedrP   rQ   rD   writedumpr   r   rA   )r*   rZ   rH   r5   r2   r3   cr    r    r!   r]   s   s   

 z	CMap.dump)r   r   r   r   rC   rD   r+   rL   r#   r=   rE   r   r?   sysstdoutr
   r	   r   rA   r   r]   r    r    r    r!   rF   O   s"    
rF   c                   @   &   e Zd Zdedeedf fddZdS )IdentityCMapr5   r%   .c                 C   s$   t |d }|rtd| |S dS )N   z>%dHr    lenstructunpackr*   r5   nr    r    r!   r?      s   zIdentityCMap.decodeNr   r   r   rE   r   rD   r?   r    r    r    r!   rb          rb   c                   @   ra   )IdentityCMapByter5   r%   .c                 C   s    t |}|rtd| |S dS )Nz>%dBr    rd   rh   r    r    r!   r?      s   zIdentityCMapByte.decodeNrj   r    r    r    r!   rl      rk   rl   c                   @   s^   e Zd Zdeeef ddfddZdefddZdedefd	d
Ze	j
fdeddfddZdS )
UnicodeMapr$   r%   Nc                 K   rG   r&   )r#   r+   
cid2unichrr)   r    r    r!   r+      rI   zUnicodeMap.__init__c                 C   rJ   )Nz<UnicodeMap: %s>rK   r.   r0   r    r    r!   rL      rM   zUnicodeMap.__repr__r6   c                 C   s   t d| | | j| S )Nget_unichr: %r, %r)rV   r@   rn   r*   r6   r    r    r!   
get_unichr   s   
zUnicodeMap.get_unichrrZ   c                 C   s.   t | j D ]\}}|d||f  qd S )Nzcid %d = unicode %r
)r[   rn   rP   r\   )r*   rZ   r2   r3   r    r    r!   r]      s   zUnicodeMap.dump)r   r   r   r   rC   rD   r+   rL   rq   r_   r`   r
   r]   r    r    r    r!   rm      s
    rm   c                   @   s   e Zd ZdedefddZdS )IdentityUnicodeMapr6   r%   c                 C   s   t d| | t|S )z+Interpret character id as unicode codepointro   )rV   r@   chrrp   r    r    r!   rq      s   zIdentityUnicodeMap.get_unichrN)r   r   r   rD   rC   rq   r    r    r    r!   rr      s    rr   c                   @   s"   e Zd ZdededdfddZdS )FileCMapr5   r6   r%   Nc                 C   s   t |tr
t |tsJ tt|t|f| j}|d d D ]}t|}||v r5ttttf || }qi }|||< |}qt|d }|||< d S )N)	rQ   rC   rD   rU   rH   ordr   r   rA   )r*   r5   r6   rS   r^   citr    r    r!   r8      s   zFileCMap.add_code2cid)r   r   r   rC   rD   r8   r    r    r    r!   rt      s    rt   c                   @   s,   e Zd Zdedeeeef ddfddZdS )FileUnicodeMapr6   r5   r%   Nc                 C   s   t |tsJ tt|t |tr t |jtsJ t|j}nt |tr,|dd}nt |tr6t	|}nt
||dkrH| j|dkrHd S || j|< d S )NzUTF-16BEignore     )rQ   rD   rC   rU   r   namer   rE   r?   rs   	TypeErrorrn   r/   )r*   r6   r5   unichrr    r    r!   r:      s   



zFileUnicodeMap.add_cid2unichr)r   r   r   rD   r   r   rE   r:   r    r    r    r!   ry      s    $ry   c                       s*   e Zd Zdededdf fddZ  ZS )PyCMapr}   moduler%   Nc                    s.   t  j|d |j| _|jrd| jd< d S d S N)rK   r   r-   )superr+   CODE2CIDrH   IS_VERTICALr(   )r*   r}   r   	__class__r    r!   r+      s
   zPyCMap.__init__)r   r   r   rC   r   r+   __classcell__r    r    r   r!   r      s    "r   c                       s.   e Zd Zdedededdf fddZ  ZS )PyUnicodeMapr}   r   verticalr%   Nc                    s4   t  j|d |r|j| _d| jd< d S |j| _d S r   )r   r+   CID2UNICHR_Vrn   r(   CID2UNICHR_H)r*   r}   r   r   r   r    r!   r+      s
   zPyUnicodeMap.__init__)r   r   r   rC   r   rB   r+   r   r    r    r   r!   r      s    &r   c                   @   s   e Zd ZU i Zeeef ed< i Zeee	e
 f ed< G dd deZededefddZededefd	d
ZeddededefddZdS )CMapDB_cmap_cache_umap_cachec                   @   r   )zCMapDB.CMapNotFoundNr   r    r    r    r!   CMapNotFound   r"   r   r}   r%   c              	   C   s   | dd}d| }td| tjddtjtjt	df}|D ].}tj||}tj
|rRt|}ztt|dt| W |    S |  w q$t|)	N  z%s.pickle.gzzloading: %r	CMAP_PATHz/usr/share/pdfminer/r;   r    )replacerV   r@   osenvironr/   pathjoindirname__file__existsgzipopenrU   rC   pickleloadsreadcloser   r   )clsr}   filename
cmap_paths	directoryr   gzfiler    r    r!   
_load_data   s   

zCMapDB._load_datac                 C   s   |dkr	t ddS |dkrt ddS |dkrtddS |dkr$tddS z| j| W S  ty3   Y nw | |}t|| | j|< }|S )Nz
Identity-Hr   )r-   z
Identity-Vr   OneByteIdentityHOneByteIdentityV)rb   rl   r   KeyErrorr   r   )r   r}   datar;   r    r    r!   get_cmap  s    




zCMapDB.get_cmapFr   c                    sZ   z| j  | W S  ty   Y nw | d   fdddD | j < | j  | S )Nzto-unicode-%sc                    s   g | ]}t  |qS r    )r   ).0r3   r   r}   r    r!   
<listcomp>  s    z*CMapDB.get_unicode_map.<locals>.<listcomp>)FT)r   r   r   )r   r}   r   r    r   r!   get_unicode_map  s   zCMapDB.get_unicode_mapN)F)r   r   r   r   r   rC   r   __annotations__r   r   r   r   r   classmethodr   r   r#   r   rB   rm   r   r    r    r    r!   r      s   
 r   c                   @   s   e Zd ZdededdfddZd ddZed	Zed
Z	edZ
edZedZedZedZedZedZedZedZedZedZedZedZedZdededdfddZdeddfddZdS )!
CMapParserr;   fpr%   Nc                 C   s$   t | | || _d| _t | _d S )NT)r   r+   r;   _in_cmapset	_warnings)r*   r;   r   r    r    r!   r+   !  s
   zCMapParser.__init__c                 C   s$   z|    W d S  ty   Y d S w r&   )
nextobjectr   r0   r    r    r!   run)  s   
zCMapParser.runs	   begincmaps   endcmaps   usecmaps   defs   begincodespaceranges   endcodespaceranges   begincidranges   endcidranges   begincidchars
   endcidchars   beginbfranges
   endbfranges   beginbfchars	   endbfchars   beginnotdefranges   endnotdefrangepostokenc                 C   s*  || j u rd| _|   dS || ju rd| _dS | jsdS || ju rDz| d\\}}\}}| jt|| W dS  t	yC   Y dS w || j
u rtz| d\\}}| jtt| W dS  t	yi   Y dS  tjys   Y dS w || ju r|   dS || ju r|   dS || ju r|   dS || ju r2dd |  D }td|D ]\}}	}
t|ts| d	 qt|	ts| d
 qt|
ts| d qt|t|	kr| d q|dd }|	dd }||kr| d q|dd }|	dd }t|}t|}t|}t|| d D ]}|td|| | d  }| j|
| | qqdS || ju r>|   dS || ju rmdd |  D }td|D ]\}
}t|trit|
tri| j|
| qRdS || j u ry|   dS || j!u r7dd |  D }td|D ]\}}	}t|ts| d qt|	ts| d qt|t|	kr| d qt|}t|	}t|t"rt||| d kr| d t#t||d |D ]\}
}| j|
| qqt|tsJ |dd }t|}|dd }t|}t|| d D ]}|td|| | d  }| j|| | qqdS || j$u rC|   dS || j%u rtdd |  D }td|D ]\}
}t|
trpt|trp| jt|
| qWdS || j&u r|   dS || j'u r|   dS | (||f dS )z[ToUnicode CMaps

        See Section 5.9.2 - ToUnicode CMaps of the PDF Reference.
        TNFrc   r   c                 S      g | ]\}}|qS r    r    r   __objr    r    r!   r   p      z)CMapParser.do_keyword.<locals>.<listcomp>   z0The start object of begincidrange is not a byte.z.The end object of begincidrange is not a byte.z.The cid object of begincidrange is not a byte.z?The start and end byte of begincidrange have different lengths.zGThe prefix of the start and end byte of begincidrange are not the same.z>Lc                 S   r   r    r    r   r    r    r!   r     r   c                 S   r   r    r    r   r    r    r!   r     r   zThe start object is not a byte.zThe end object is not a byte.z.The start and end byte have different lengths.zPThe difference between the start and end offsets does not match the code length.c                 S   r   r    r    r   r    r    r!   r     r   ))KEYWORD_BEGINCMAPr   popallKEYWORD_ENDCMAPKEYWORD_DEFpopr;   r4   r   r   KEYWORD_USECMAPr=   r   r   r   KEYWORD_BEGINCODESPACERANGEKEYWORD_ENDCODESPACERANGEKEYWORD_BEGINCIDRANGEKEYWORD_ENDCIDRANGEr   rQ   rE   
_warn_oncerD   re   r   rangerf   packr:   KEYWORD_BEGINCIDCHARKEYWORD_ENDCIDCHARKEYWORD_BEGINBFRANGEKEYWORD_ENDBFRANGElistzipKEYWORD_BEGINBFCHARKEYWORD_ENDBFCHARKEYWORD_BEGINNOTDEFRANGEKEYWORD_ENDNOTDEFRANGEpush)r*   r   r   _r2   r3   cmapnameobjs
start_byteend_byter6   start_prefix
end_prefixsvarevarstartendvlenrX   rY   r5   unicode_valuevarbaseprefixr    r    r!   
do_keywordA  s  















zCMapParser.do_keywordmsgc                 C   s0   || j vr| j | d}t||  dS dS )z!Warn once for each unique messagezIgnoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. N)r   addrV   warning)r*   r   base_msgr    r    r!   r     s   
zCMapParser._warn_once)r%   N)r   r   r   r#   r   r+   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rD   r   r   rC   r   r    r    r    r!   r      s,    
 r   argvr%   c                 C   s^   ddl m} |dt | dd  }|D ]}t|d}t }t||  |  |  qd S )Nr   )warnzThe function main() from cmapdb.py will be removed in 2023. It was probably introduced for testing purposes a long time ago, and no longer relevant. Feel free to create a GitHub issue if you disagree.r   rb)	warningsr   DeprecationWarningr   ry   r   r   r   r]   )r   r   argsfnamer   r;   r    r    r!   main  s   

r   __main__)7__doc__r   loggingr   os.pathr   rf   r_   typingr   r   r   r   r   r   r   r	   r
   r   r   r   r   
encodingdbr   psparserr   r   r   r   r   r   r   utilsr   r   	getLoggerr   rV   	Exceptionr   r#   rF   rb   rl   rm   rr   rt   ry   r   r   r   r   rC   r   r   r    r    r    r!   <module>   sL    <
5		
9 E