o
    f                     @   s   d dl Z d dlZd dlmZmZmZmZ ddlmZ ddl	m
Z
 ddlmZ edZe eZded	efd
dZded	dfddZG dd dZdS )    N)DictIterableOptionalcast   )glyphname2unicode)ENCODING)	PSLiteralz[0-9a-fA-F]+namereturnc                    s8  t | tstd| t| f | dd } | d}t|dkr)dtt|S | t	v r1t	|  S | 
drk| d t rjt d dkrj fd	d
tdt dD }|D ]}t| qYtt|}d|S n+| 
dr| d}t|rdt|  krdkrn nt|dd}t| t|S td|  )a  Converts Adobe glyph names to Unicode numbers.

    In contrast to the specification, this raises a KeyError instead of return
    an empty string when the key is unknown.
    This way the caller must explicitly define what to do
    when there is not a match.

    Reference:
    https://github.com/adobe-type-tools/agl-specification#2-the-mapping

    :returns unicode character if name resembles something,
    otherwise a KeyError
    zcCould not convert unicode name "%s" to character because it should be of type str but is of type %s.r   _r    uni   c                    s$   g | ]}t  ||d   ddqS )r      base)int).0iname_without_uni L/home/ubuntu/webapp/venv/lib/python3.10/site-packages/pdfminer/encodingdb.py
<listcomp>0   s    z name2unicode.<locals>.<listcomp>u   r   r   zXCould not convert unicode name "%s" to character because it does not match specification)
isinstancestrKeyErrortypesplitlenjoinmapname2unicoder   
startswithstripHEXADECIMALmatchrange#raise_key_error_for_invalid_unicodechrr   )r
   
componentsunicode_digitsdigit
charactersname_without_uunicode_digitr   r   r   r&      sD   










&r&   r3   c                 C   s*   d|   k r
dk rn dS t d|  dS )zUnicode values should not be in the range D800 through DFFF because
    that is used for surrogate pairs in UTF-16

    :raises KeyError if unicode digit is invalid
    i  i   zHUnicode digit %d is invalid because it is in the range D800 through DFFFN)r    )r3   r   r   r   r,   G   s   r,   c                
   @   s   e Zd ZU i Zeeef ed< i Zeeef ed< i Z	eeef ed< i Z
eeef ed< eD ]#\ZZZZZeeZer@eee< erFeee< erLee	e< erRee
e< q/eee	e
dZe	ddedeee  d	eeef fd
dZdS )
EncodingDBstd2unicodemac2unicodewin2unicodepdf2unicode)StandardEncodingMacRomanEncodingWinAnsiEncodingPDFDocEncodingNr
   diffr   c                 C   s   | j || j}|rN| }d}|D ];}t|tr|}qt|trMzttt	|j
||< W n ttfyH } ztt	| W Y d }~nd }~ww |d7 }q|S )Nr   r   )	encodingsgetr5   copyr   r   r	   r&   r   r   r
   r    
ValueErrorlogdebug)clsr
   r=   cid2unicodecidxer   r   r   get_encodingl   s"   

zEncodingDB.get_encoding)N)__name__
__module____qualname__r5   r   r   r   __annotations__r6   r7   r8   r   r
   stdmacwinpdfr&   cr>   classmethodr   r   objectrI   r   r   r   r   r4   T   s<   
 

r4   )loggingretypingr   r   r   r   	glyphlistr   	latin_encr   psparserr	   compiler)   	getLoggerrJ   rB   r   r&   r   r,   r4   r   r   r   r   <module>   s    

9