o
    f܋                     @   s  d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddlm!Z! ddlm"Z" ddlm#Z# ddlm$Z$ ddlm%Z% ddlm&Z& ddlm'Z' ddlm(Z( ddl)m*Z* ddl+m,Z, ddl+m-Z- ddl.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z4 ddlm5Z5m6Z6m7Z7m8Z8m9Z9m:Z: ddlm;Z; dd lm<Z< dd!lm=Z= dd"lm>Z> e?e@ZAG d#d$ d$e*ZBG d%d& d&eBZCed'e
ee5ZDG d(d) d)eBeeD ZEG d*d+ d+eEe5 ZFG d,d- d-eEe5 ZGG d.d/ d/eEe5 ZHG d0d1 d1eEe5 ZIdS )2    N)BinaryIODictGenericListOptionalSequenceTextIOTupleTypeVarUnioncast)PDFColorSpace   )utils)ImageWriter)LAParamsLTComponentTextGroupElement)LTAnno)LTChar)LTContainer)LTCurve)LTFigure)LTImageLTItem)LTLayoutContainer)LTLine)LTPage)LTRect)LTText)	LTTextBox)LTTextBoxVertical)LTTextGroup)
LTTextLine)PDFTextDevice)PDFFont)PDFUnicodeNotDefined)PDFGraphicStatePDFResourceManager)PDFPage)	PDFStream)AnyIOPointMatrixRectPathSegmentmake_compat_str)apply_matrix_pt)bbox2str)enc)mult_matrixc                   @   s(  e Zd ZU eed< eed< 		d0dededee	 ddfd	d
Z
dededdfddZdeddfddZdedededdfddZdeddfddZdededdfddZdededededee ddfd d!Zded"ed#ed$ed%ed&ed'ed(edefd)d*Zd"ed&edefd+d,Zd-eddfd.d/ZdS )1PDFLayoutAnalyzercur_itemctmr   Nrsrcmgrpagenolaparamsreturnc                 C   s"   t | | || _|| _g | _d S N)r%   __init__r:   r;   _stackselfr9   r:   r;    rB   K/home/ubuntu/webapp/venv/lib/python3.10/site-packages/pdfminer/converter.pyr>   9   s   
zPDFLayoutAnalyzer.__init__pagec                 C   s`   |j \}}}}t|||f\}}t|||f\}}ddt|| t|| f}t| j|| _d S )Nr   )mediaboxr2   absr   r:   r7   )rA   rD   r8   x0y0x1y1rE   rB   rB   rC   
begin_pageD   s
   zPDFLayoutAnalyzer.begin_pagec                 C   sl   | j rJ tt| j t| jtsJ tt| j| jd ur'| j| j |  j	d7  _	| 
| j d S )Nr   )r?   strlen
isinstancer7   r   typer;   analyzer:   receive_layout)rA   rD   rB   rB   rC   end_pageK   s   
zPDFLayoutAnalyzer.end_pagenamebboxmatrixc                 C   s(   | j | j t||t|| j| _d S r=   )r?   appendr7   r   r5   r8   )rA   rS   rT   rU   rB   rB   rC   begin_figureS   s   zPDFLayoutAnalyzer.begin_figure_c                 C   s@   | j }t| j tsJ tt| j | j | _ | j | d S r=   )r7   rN   r   rL   rO   r?   popadd)rA   rX   figrB   rB   rC   
end_figureW   s   zPDFLayoutAnalyzer.end_figurestreamc                 C   sR   t | jtsJ tt| jt||| jj| jj| jj| jj	f}| j
| d S r=   )rN   r7   r   rL   rO   r   rG   rH   rI   rJ   rZ   )rA   rS   r]   itemrB   rB   rC   render_image]   s   zPDFLayoutAnalyzer.render_imagegstatestrokefillevenoddpathc                    s4  d dd  D }|dd dkrdS |ddkr;td|D ]} |d|d }||||| q!dS  fd	d
 D }	fdd
|	D }
dd
  D }fdd
 D }dd
 t||D }|dv rt|j	|
d |
d ||||j
|j||jd
}j| dS |dv r|
\\}}\}}\}}\}}}|
d |
d k}||ko||ko||ko||kp||ko||ko||ko||k}|r|rt|j	g |
d |
d R ||||j
|j||j	}j| dS t|j	|
||||j
|j||j	}j| dS t|j	|
||||j
|j||j	}j| dS )z@Paint paths described in section 4.4 of the PDF reference manual c                 s   s    | ]}|d  V  qdS )r   NrB   ).0xrB   rB   rC   	<genexpr>o   s    z/PDFLayoutAnalyzer.paint_path.<locals>.<genexpr>Nr   mzm[^m]+r   c                    s:   g | ]}t t|d  dkr|dd n d  dd qS )r   hN)r   r-   )rf   p)rd   rB   rC   
<listcomp>   s    .z0PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>c                    s   g | ]}t  j|qS rB   )r2   r8   )rf   ptrA   rB   rC   rm          c                 S   s   g | ]}t |d  qS )r   )rL   rf   	operationrB   rB   rC   rm      rp   c              	      s8   g | ]} fd dt |ddd |ddd D qS )c                    s(   g | ]\}}t  jt|t|fqS rB   )r2   r8   float)rf   operand1operand2ro   rB   rC   rm      s    z;PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>.<listcomp>r   N   )ziprq   ro   rB   rC   rm      s    
c                 S   s"   g | ]\}}t t|g|R qS rB   )r   r0   )rf   orl   rB   rB   rC   rm      s    >   mlmlh)original_pathdashing_style>   mlllhmllll   rv   )joincountrefinditerstartend
paint_pathrw   r   	linewidthscolorncolordashr7   rZ   r   r   )rA   r`   ra   rb   rc   rd   shaperi   subpathraw_ptspts	operatorstransformed_pointstransformed_pathlinerG   rH   rI   rJ   x2y2x3y3rX   is_closed_loophas_square_coordinatesrectcurverB   )rd   rA   rC   r   f   s   	


zPDFLayoutAnalyzer.paint_pathfontfontsizescalingrisecidncsgraphicstatec	                 C   s   z| |}	t|	tsJ tt|	W n ty#   | ||}	Y nw ||}
||}t||||||	|
|||
}| j	
| |jS r=   )	to_unichrrN   rL   rO   r'   handle_undefined_char
char_width	char_dispr   r7   rZ   adv)rA   rU   r   r   r   r   r   r   r   text	textwidthtextdispr^   rB   rB   rC   render_char   s,   


zPDFLayoutAnalyzer.render_charc                 C   s   t d|| d| S )Nzundefined: %r, %rz(cid:%d))logdebug)rA   r   r   rB   rB   rC   r      s   z'PDFLayoutAnalyzer.handle_undefined_charltpagec                 C      d S r=   rB   rA   r   rB   rB   rC   rQ     s   z PDFLayoutAnalyzer.receive_layoutr   N) __name__
__module____qualname__r   __annotations__r.   r)   intr   r   r>   r*   rK   rR   rL   r/   rW   r\   r+   r_   r(   boolr   r0   r   r&   rs   r   r   r   r   rQ   rB   rB   rB   rC   r6   5   sj   
 
	
v	

!r6   c                	   @   sP   e Zd Z		ddededee ddfddZd	eddfd
dZ	defddZ
dS )PDFPageAggregatorr   Nr9   r:   r;   r<   c                 C   s   t j| |||d d | _d S N)r:   r;   )r6   r>   resultr@   rB   rB   rC   r>     s   
zPDFPageAggregator.__init__r   c                 C   s
   || _ d S r=   r   r   rB   rB   rC   rQ     s   
z PDFPageAggregator.receive_layoutc                 C   s   | j d usJ | j S r=   r   ro   rB   rB   rC   
get_result  s   zPDFPageAggregator.get_resultr   )r   r   r   r)   r   r   r   r>   r   rQ   r   rB   rB   rB   rC   r     s    
	r   IOTypec                   @   sP   e Zd Z			ddededededee d	dfd
dZ	e
ded	efddZdS )PDFConverterutf-8r   Nr9   outfpcodecr:   r;   r<   c                 C   s0   t j| |||d || _|| _| | j| _d S r   )r6   r>   r   r   _is_binary_streamoutfp_binary)rA   r9   r   r   r:   r;   rB   rB   rC   r>     s   zPDFConverter.__init__c                 C   sV   dt | ddv r
dS t| drdS t| tjrdS t| tjr!dS t| tjr)dS dS )z"Test if an stream is binary or notbmodere   TF)getattrhasattrrN   ioBytesIOStringIO
TextIOBase)r   rB   rB   rC   r   )  s   
zPDFConverter._is_binary_stream)r   r   N)r   r   r   r)   r   rL   r   r   r   r>   staticmethodr,   r   r   rB   rB   rB   rC   r     s&    
r   c                       s   e Zd Z					d dedededed	ee d
e	dee
 ddf fddZdeddfddZdeddfddZdededdfddZdede	de	de	dee ddfddZ  ZS )!TextConverterr   r   NFr9   r   r   r:   r;   
showpagenoimagewriterr<   c                    s&   t  j|||||d || _|| _d S )Nr   r:   r;   )superr>   r   r   )rA   r9   r   r   r:   r;   r   r   	__class__rB   rC   r>   <  s   

zTextConverter.__init__r   c                 C   sF   t || jd}| jrtt| j|  d S tt	| j| d S )Nignore)
r   compatible_encode_methodr   r   r   r   r   writeencoder   rA   r   rB   rB   rC   
write_textJ  s   zTextConverter.write_textr   c                    sD   dt dd f fdd jrd|j   | d d S )Nr^   r<   c                    sz   t | tr| D ]} | qnt | tr|   t | tr'd d S t | tr9jd ur;j|  d S d S d S )N
)	rN   r   r    r   get_textr!   r   r   export_imager^   childrenderrA   rB   rC   r   R  s   





z,TextConverter.receive_layout.<locals>.renderzPage %s
)r   r   r   pageidr   rB   r   rC   rQ   Q  s
   zTextConverter.receive_layoutrS   r]   c                 C   s    | j d u rd S t| || d S r=   )r   r   r_   )rA   rS   r]   rB   rB   rC   r_   f  s   
zTextConverter.render_imager`   ra   rb   rc   rd   c                 C   r   r=   rB   )rA   r`   ra   rb   rc   rd   rB   rB   rC   r   l  s   zTextConverter.paint_path)r   r   NFN)r   r   r   r)   r,   rL   r   r   r   r   r   r>   r   r   rQ   r+   r_   r(   r   r0   r   __classcell__rB   rB   r   rC   r   ;  sN    	r   c                    @   s  e Zd ZdddddddZddd	Z	
											dMdedededede	e
 dededededede	e dede	eeef  de	eeef  d dfd!d"Zd#ed dfd$d%ZdNd&d'ZdNd(d)Zd#ed dfd*d+Zd,ed-ed.ed/ed0ed1ed dfd2d3Zd,ed-ed4ed dfd5d6Zd4ed-ed.ed/ed0ed1ed dfd7d8Zd,ed#ed.ed/ed9ed dfd:d;Z	<dOd,ed-ed.ed/ed0ed1ed=ed dfd>d?Zd,ed dfd@dAZd#edBedCed dfdDdEZdNdFdGZdHed dfdIdJZdNdKdLZ dS )PHTMLConverteryellowmagentacyanredblackgray)figuretextlinetextbox	textgroupr   rD   blue)r   charr   r   N      ?normalT2   r   r9   r   r   r:   r;   scale	fontscale
layoutmoder   
pagemarginr   r   rect_colorstext_colorsr<   c                 C   s   t j| |||||d | j| j krtd|d u rddi}|d u r'ddd}|| _|| _|| _|	| _|
| _	|| _
|| _|| _|rO| j| j | j| j | j	| _d | _g | _|   d S )Nr   )Codec is required for a binary I/O outputr   r   r   )r   rD   )r   r>   r   r   
ValueErrorr   r   r   r   r   r   r   r   updateRECT_COLORSTEXT_COLORS_yoffset_font
_fontstackwrite_header)rA   r9   r   r   r:   r;   r   r   r   r   r   r   r   r   r   rB   rB   rC   r>     s2   
zHTMLConverter.__init__r   c                 C   :   | j rtt| j|| j  d S tt| j| d S r=   r   r   r   r   r   r   r   r   rB   rB   rC   r     
   zHTMLConverter.writec                 C   s8   |  d | jrd| j }nd}|  | |  d d S )Nz<html><head>
zA<meta http-equiv="Content-Type" content="text/html; charset=%s">
z5<meta http-equiv="Content-Type" content="text/html">
z</head><body>
)r   r   )rA   srB   rB   rC   r    s   


zHTMLConverter.write_headerc                 C   s<   dd t d| jD }dd| }| | | d d S )Nc                 S   s   g | ]}d  ||qS )z<a href="#{}">{}</a>)format)rf   irB   rB   rC   rm     s    z.HTMLConverter.write_footer.<locals>.<listcomp>r   z8<div style="position:absolute; top:0px;">Page: %s</div>
z, z</body></html>
)ranger:   r   r   )rA   
page_linksr  rB   rB   rC   write_footer  s   


zHTMLConverter.write_footerc                 C   s   |  t| d S r=   )r   r4   r   rB   rB   rC   r     s   zHTMLConverter.write_textcolorborderwidthrg   ywrj   c           	      C   sT   | j |}|d ur(d|||| j | j| | j || j || j f }| | d S )Nzn<span style="position:absolute; border: %s %dpx solid; left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>
)r   getr   r   r   )	rA   r  r  rg   r  r  rj   color2r  rB   rB   rC   
place_rect  s   
zHTMLConverter.place_rectr^   c                 C   s    |  |||j|j|j|j d S r=   )r  rG   rJ   widthheight)rA   r  r  r^   rB   rB   rC   place_border  s   zHTMLConverter.place_borderc           	      C   sZ   | j d ur+| j |}dt|||| j | j| | j || j || j f }| | d S )Nzd<img src="%s" border="%d" style="position:absolute; left:%dpx; top:%dpx;" width="%d" height="%d" />
)r   r   r4   r   r   r   )	rA   r^   r  rg   r  r  rj   rS   r  rB   rB   rC   place_image  s   

zHTMLConverter.place_imagesizec                 C   sd   | j |}|d ur0d||| j | j| | j || j | j f }| | | | | d d S )NzP<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;"></span>
)r   r  r   r   r   r   r   )rA   r  r   rg   r  r  r  r  rB   rB   rC   
place_text  s   



zHTMLConverter.place_textFalsewriting_modec           	   	   C   sV   | j | j d | _d||||| j | j| | j || j || j f }| | d S )Nzv<div style="position:absolute; border: %s %dpx solid; writing-mode:%s; left:%dpx; top:%dpx; width:%dpx; height:%dpx;">)r   rV   r   r   r   r   )	rA   r  r  rg   r  r  rj   r  r  rB   rB   rC   	begin_div  s   

zHTMLConverter.begin_divc                 C   s.   | j d ur
| d | j | _ | d d S )N</span>z</div>)r   r   r   rY   )rA   r  rB   rB   rC   end_div5  s
   


zHTMLConverter.end_divfontnamer   c                 C   sf   ||f}|| j kr,| j d ur| d |dd }| d||| j | j f  || _ | | d S )Nr  +z.<span style="font-family: %s; font-size:%dpx">)r   r   splitr   r   r   )rA   r   r  r   r   fontname_without_subset_tagrB   rB   rC   put_text<  s   



zHTMLConverter.put_textc                 C      |  d d S )Nz<br>r   ro   rB   rB   rC   put_newlineK     
zHTMLConverter.put_newliner   c                    sV   dt ttf dd ffdddtdd f fdd  |  jj7  _d S )Nr^   r<   c                    s.   t | tr dd|  | D ]}| qd S )Nr   r   )rN   r#   r  r   rA   
show_grouprB   rC   r)  P  s
   

z0HTMLConverter.receive_layout.<locals>.show_groupc              
      s  t | trJ j| j7  _dd|  jr0dj| j j   d| j	| j	 | D ]} | q2| j
d urH| j
D ]}| qAd S t | trXdd|  d S t | tr{dd| j| j| j| j | D ]} | qmd d S t | tr| d| j| j| j| j d S jdkrt | trdd|  | D ]} | qd S t | trӈd	d|  d	t| jd | j| jd
 | D ]} | qd S t | trdd|  d|  | j| j| j d S t | tr
| D ]} | qjdkr  d S t | tr2d	d| j| j| j| j|   | D ]} | q#d	 d S t | trIt| j }!|  || j d S t | t"rV#|   d S )NrD   r   z*<div style="position:absolute; top:%dpx;">z<a name="{}">Page {}</a></div>
r   r   exactr   r      r   loose)$rN   r   r   rJ   r  r   r   r   r  r   groupsr   r   r  rG   r  r  r  r   r  r   r$   r!   r  rL   indexr   r   r  r&  get_writing_moder1   r  r#  r    r   )r^   r   groupr  r   rA   r)  rB   rC   r   W  s   




4
2


-
+


%



	

z,HTMLConverter.receive_layout.<locals>.render)r   r#   r   r   r   r   r   rB   r1  rC   rQ   O  s
    IzHTMLConverter.receive_layoutc                 C      |    d S r=   r
  ro   rB   rB   rC   close     zHTMLConverter.close)r   r   Nr   r   r   Tr   Nr   NNr<   N)r  )!r   r   r   r   r   r)   r,   rL   r   r   r   rs   r   r   r   r>   r   r  r
  r   r  r   r  r   r  r  r  r  r#  r&  r   rQ   r4  rB   rB   rB   rC   r   w  s    
		

/




	

Ur   c                   @   s   e Zd ZedZ					ddededed	e	d
e
e de
e deddfddZdeddfddZdddZdddZdeddfddZdeddfddZdddZdS ) XMLConverterz[ ---]r   r   NFr9   r   r   r:   r;   r   stripcontrolr<   c                 C   sD   t j| |||||d | j| j krtd|| _|| _|   d S )Nr   r   )r   r>   r   r   r   r   r8  r  )rA   r9   r   r   r:   r;   r   r8  rB   rB   rC   r>     s   
zXMLConverter.__init__r   c                 C   r  r=   r  r   rB   rB   rC   r     r  zXMLConverter.writec                 C   s0   | j r| d| j   n| d | d d S )Nz%<?xml version="1.0" encoding="%s" ?>
z<?xml version="1.0" ?>
z<pages>
r   r   ro   rB   rB   rC   r    s
   

zXMLConverter.write_headerc                 C   r$  )Nz	</pages>
r%  ro   rB   rB   rC   r
    r'  zXMLConverter.write_footerc                 C   s&   | j r
| jd|}| t| d S Nre   )r8  CONTROLsubr   r4   r   rB   rB   rC   r     s   zXMLConverter.write_textr   c                    s>   dt dd ffdddt dd f fdd  | d S )Nr^   r<   c                    sf   t | tr d| jt| jf  d S t | tr1 dt| j  | D ]}| q% d d S )Nz<textbox id="%d" bbox="%s" />
z<textgroup bbox="%s">
z</textgroup>
)rN   r!   r   r.  r3   rT   r#   r   r(  rB   rC   r)    s   
	


z/XMLConverter.receive_layout.<locals>.show_groupc                    s  t | tr?d| jt| j| jf }| | D ]} | q| jd ur8d | jD ]}| q,d d d S t | trUd| j	t| jf }| d S t | t
rkd| j	t| jf }| d S t | trd| j	t| j|  f }| d S t | trd| jt| jf }| | D ]} | qd	 d S t | trǈd
t| j  | D ]} | qd d S t | trd}t | trd}d| jt| j|f }| | D ]} | qd d S t | tr"dt| jt| j| jj| jj| jf }| |   d d S t | tr3d|    d S t | trbjd urUj| }dt|| j | j!f  d S d| j | j!f  d S J t"d| f)Nz%<page id="%s" bbox="%s" rotate="%d">
z	<layout>
z
</layout>
z</page>
z"<line linewidth="%d" bbox="%s" />
z"<rect linewidth="%d" bbox="%s" />
z+<curve linewidth="%d" bbox="%s" pts="%s"/>
z<figure name="%s" bbox="%s">
z
</figure>
z<textline bbox="%s">
z</textline>
re   z wmode="vertical"z<textbox id="%d" bbox="%s"%s>
z</textbox>
zD<text font="%s" bbox="%s" colourspace="%s" ncolour="%s" size="%.3f">z</text>
z<text>%s</text>
z*<image src="%s" width="%d" height="%d" />
z!<image width="%d" height="%d" />
F	Unhandled)#rN   r   r   r3   rT   rotater   r-  r   r   r   r   get_ptsr   rS   r$   r!   r"   r.  r   r4   r  r   r   r   r  r   r   r    r   r   r   r  r  rL   )r^   r  r   r0  wmoderS   r1  rB   rC   r     s   








J

D

>

7



1


,







z+XMLConverter.receive_layout.<locals>.renderr   r   rB   r1  rC   rQ     s   \zXMLConverter.receive_layoutc                 C   r2  r=   r3  ro   rB   rB   rC   r4  J  r5  zXMLConverter.close)r   r   NNFr6  )r   r   r   r   compiler;  r)   r,   rL   r   r   r   r   r   r>   r   r  r
  r   r   rQ   r4  rB   rB   rB   rC   r7    s<    
	


mr7  c                   @   s   e Zd ZdZedZ				d#deded	e	d
e
dee defddZdede	fddZde	ddfddZd$ddZd$ddZde	ddfddZd$ddZdeddfdd Zd$d!d"ZdS )%HOCRConverterzKExtract an hOCR representation from explicit text information within a PDF.z[\x00-\x08\x0b-\x0c\x0e-\x1f]utf8r   NFr9   r   r   r:   r;   r8  c                 C   s.   t j| |||||d || _d| _|   d S )Nr   F)r   r>   r8  within_charsr  )rA   r9   r   r   r:   r;   r8  rB   rB   rC   r>   b  s   	zHOCRConverter.__init__rT   r<   c           
      C   s\   |\}}}}t |}t | jd | }t |}t | jd | }	d| d| d| d|	 S )N   zbbox  )r   	page_bbox)
rA   rT   in_x0in_y0in_x1in_y1out_x0out_y0out_x1out_y1rB   rB   rC   	bbox_reprr  s   zHOCRConverter.bbox_reprr   c                 C   s>   | j r|| j }tt| j| d S tt| j| d S r=   )r   r   r   r   r   r   r   )rA   r   encoded_textrB   rB   rC   r   {  s   zHOCRConverter.writec                 C   sl   | j r| d| j   n| d | d | d | d | d | d | d | d	 d S )
NzQ<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en' charset='%s'>
zD<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en'>
z<head>
z<title></title>
zE<meta http-equiv='Content-Type' content='text/html;charset=utf-8' />
zA<meta name='ocr-system' content='pdfminer.six HOCR Converter' />
zR  <meta name='ocr-capabilities' content='ocr_page ocr_block ocr_line ocrx_word'/>
z</head>
z<body>
r9  ro   rB   rB   rC   r    s,   


zHOCRConverter.write_headerc                 C   s   |  d |  d d S )Nz0<!-- comment in the following line to debug -->
zD<!--script src='https://unpkg.com/hocrjs'></script--></body></html>
r%  ro   rB   rB   rC   r
    s   
zHOCRConverter.write_footerc                 C   s"   | j r
| jd|}| | d S r:  )r8  r;  r<  r   r   rB   rB   rC   r     s   zHOCRConverter.write_textc                 C   sn   t | jdkr2d}d| jv rd}d| jv r|d7 }| d| j| j|| | j| j| j| j f  d| _d S )	Nr   re   Italiczfont-style: italic; Boldzfont-weight: bold; zg<span style='font:"%s"; font-size:%d; %s' class='ocrx_word' title='%s; x_font %s; x_fsize %d'>%s</span>F)	rM   working_textworking_fontr   working_sizerP  working_bboxstriprD  )rA   bold_and_italic_stylesrB   rB   rC   
write_word  s&   



zHOCRConverter.write_wordr   c                    s$   dt dd f fdd  | d S )Nr^   r<   c                    s  j rt| tr  t| tr3| j_d| j	| jf  | D ]} | q%d d S t| t
rSd	| j  | D ]} | qEd d S t| trvd| j	| jf  | D ]} | qhd d S t| trj sd_ |  _| j_| j_| j_d S t|   dkr  |   d S jd | jd ksj| jksj| jkrш  | j_| j_| j_ j|  7  _jd jd | jd	 jd
 f_d S d S )Nz*<div class='ocr_page' id='%s' title='%s'>
z</div>
z"<span class='ocr_line' title='%s'>r  z+<div class='ocr_block' id='%d' title='%s'>
Tr   r   rv   rE  )rD  rN   r   rZ  r   rT   rG  r   r   rP  r$   r!   r.  r   r   rT  rW  r  rU  r  rV  rM   rX  )r^   r   
child_liner   rB   rC   r     sh   








z,HOCRConverter.receive_layout.<locals>.renderr   r   rB   r   rC   rQ     s   8zHOCRConverter.receive_layoutc                 C   r2  r=   r3  ro   rB   rB   rC   r4    s   zHOCRConverter.close)rC  r   NFr6  )r   r   r   __doc__r   rA  r;  r)   r,   rL   r   r   r   r   r>   r/   rP  r   r  r
  r   rZ  r   rQ   r4  rB   rB   rB   rC   rB  O  s8    

	


;rB  )Jr   loggingr   typingr   r   r   r   r   r   r   r	   r
   r   r   pdfminer.pdfcolorr   re   r   imager   layoutr   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   	pdfdevicer%   pdffontr&   r'   	pdfinterpr(   r)   pdfpager*   pdftypesr+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   	getLoggerr   r   r6   r   r   r   r   r   r7  rB  rB   rB   rB   rC   <module>   s`    4 
 Q <  4 '