HEX

File: //usr/lib/python3/dist-packages/ocrmypdf/pdfinfo/__pycache__/layout.cpython-38.pyc
U

��Z^E$�@s`ddlZddlmZddlmZddlmZddlZddlZddl	Zddl
ZddlmZddl
mZddlmZmZmZmZddlmZdd	lmZmZmZdd
lmZddlmZmZdd
lm Z e�!d�Z"ej#dk�rdd�Z$e$ej%_$ej&Z'd$dd�Z(e(e_&ej&Z)dd�Z*e*e_&dd�Z+dd�Z,dd�Z-Gdd�de�Z.Gdd�de�Z/d d!�Z0d"d#�Z1dS)%�N)�copysign)�Path)�patch)�PDFLayoutAnalyzer)�glyphname2unicode)�LAParams�LTChar�LTPage�	LTTextBox)�PDFTextExtractionNotAllowed)�PDFFont�
PDFSimpleFont�PDFUnicodeNotDefined)�PDFPage)�bbox2str�
matrix2str�)�EncryptedPdfErrorz[0-9]+Z20181108cCs�|tkrt|S|�d�s$|�d�r,t|��|�d�rlztt|dd�d��WStk
rjt|��YnXt�|�}|s�t|��tt|�d���S)aYFix pdfminer's name2unicode function

        Font cids that are mapped to names of the form /g123 seem to be, by convention
        characters with no corresponding Unicode entry. These can be subsetted fonts
        or symbolic fonts. There seems to be no way to map /g123 fonts to Unicode,
        barring a ToUnicode data structure.
        �g�aZuni�N�r)	r�
startswith�KeyError�chr�int�
ValueError�
STRIP_NAME�search�group)�name�m�r"�9/usr/lib/python3/dist-packages/ocrmypdf/pdfinfo/layout.py�name2unicode-s

r$cCs&t||||�|jdkr"|j|_dS)Nr)�original_PDFFont_init�descent)�self�
descriptor�widthsZ
default_widthr"r"r#�PDFFont__init__Gs
r*cCs&t||||�|js"d|kr"i|_dS)NZEncoding)�original_PDFSimpleFont_initZunicode_mapZcid2unicode)r'r(r)�specr"r"r#�PDFSimpleFont__init__[sr-cCs8|jd|jd}|dkr(|j|j}|td|j�S)Nr�r��?)�bbox�ascentr&r�vscale)r'�hr"r"r#�!PDFType3Font__PScript5_get_heightnsr4cCs|jtd|j�S�Nr/)r&rr2�r'r"r"r#�"PDFType3Font__PScript5_get_descentusr7cCs|jtd|j�Sr5)r1rr2r6r"r"r#�!PDFType3Font__PScript5_get_ascentysr8cs<eZdZdZdZ�fdd�Zdd�Zdd�Zd	d
�Z�Z	S)�LTStateAwareCharzDA subclass of LTChar that tracks text render mode at time of drawing)�
rendermode�_text�matrix�fontname�advZupright�size�widthZheightr0Zx0Zx1Zy0Zy1cs*t��|||||||||	|
�
|j|_dS�N)�super�__init__Zrenderr:)r'r<�font�fontsize�scaling�rise�text�	textwidth�textdisp�ncs�graphicstate�	textstate��	__class__r"r#rC�s�zLTStateAwareChar.__init__c	Cstt|jt�ot|jt�}z<|r,|j|jkWS|j\}}|j\}}||koR|j|jkWSttfk
rnYdSXdS)a
Check if characters can be combined into a textline

        We consider characters compatible if:
            - the Unicode mapping is known, and both have the same render mode
            - the Unicode mapping is unknown but both are part of the same font
        FN)�
isinstancer;�strr:r�AttributeError)r'�objZboth_unicode_mappedZfont0�_Zfont1r"r"r#�
is_compatible�s

zLTStateAwareChar.is_compatiblecCst|jt�rdS|jS)Nu�)rPr;�tupler6r"r"r#�get_text�szLTStateAwareChar.get_textc	Cs0d|jjt|j�t|j�|j|j|j|�	�fS)Nz6<%s %s matrix=%s rendermode=%r font=%r adv=%s text=%r>)
rO�__name__rr0rr<r:r=r>rWr6r"r"r#�__repr__�s�zLTStateAwareChar.__repr__)
rX�
__module__�__qualname__�__doc__�	__slots__rCrUrWrY�
__classcell__r"r"rNr#r9}sr9csbeZdZdZd�fdd�	Z�fdd�Zdd	�Z�fd
d�Zdd
�Zdd�Z	dd�Z
dd�Z�ZS)�TextPositionTrackerz=A page layout analyzer that pays attention to text visibilityr.Ncs&t��|||�d|_d|_d|_dSrA)rBrCrM�result�cur_item)r'Zrsrcmgr�pageno�laparamsrNr"r#rC�szTextPositionTracker.__init__cs"t��||�t|j|j�|_dSrA)rB�
begin_pager	rbZmediaboxra)r'�pageZctmrNr"r#rd�szTextPositionTracker.begin_pagecCsl|jrttt|j����t|jt�s6ttt|j����|jdk	rN|j�	|j�|j
d7_
|�|j�dS)Nr.)Z_stack�AssertionErrorrQ�lenrPrar	�typercZanalyzerb�receive_layout)r'rer"r"r#�end_page�s
zTextPositionTracker.end_pagecs"|��|_t��|j|||�dSrA)�copyrMrB�
render_string)r'rM�seqrKrLrNr"r#rl�s
z!TextPositionTracker.render_stringc	
Cs�z(|�|�}	t|	t�s&ttt|	����Wn tk
rH|�||�}	YnX|�|�}
|�|�}t	||||||	|
||||j
�}|j�|�|j
SrA)Z	to_unichrrPrQrfrhr�handle_undefined_charZ
char_widthZ	char_dispr9rMra�addr>)
r'r<rDrErFrG�cidrKrLrHrIrJ�itemr"r"r#�render_char�s,


�
zTextPositionTracker.render_charcCs
|j|fSrA)r=)r'rDrpr"r"r#rnsz)TextPositionTracker.handle_undefined_charcCs
||_dSrA�r`)r'Zltpager"r"r#risz"TextPositionTracker.receive_layoutcCs|jSrArsr6r"r"r#�
get_result
szTextPositionTracker.get_result)r.N)
rXrZr[r\rCrdrjrlrrrnrirtr^r"r"rNr#r_�sr_c		Cs�tjjdd�}t|t�d�}tj�||�}|rJtjddtt	t
d�}|��z^z>t
|��d��&}tj||gdd�}|�t|��W5QRXWntk
r�t��YnXW5|r�|��X|��S)	NT)Zcaching)rczpdfminer.pdffont.PDFType3Font)r,Z
get_ascentZget_descentZ
get_height�rbr)ZpagenosZmaxpages)�pdfminerZ	pdfinterpZPDFResourceManagerr_rZPDFPageInterpreterrZmultipler8r7r4�start�stopr�openrZ	get_pagesZprocess_page�nextrrrt)	ZinfilerbZ
pscript5_modeZrmanZdevZinterpZpatcher�frer"r"r#�get_page_analysiss*�
r|c	csL|D]B}t|t�r|Vqzt|�EdHWqtk
rDYqYqXqdSrA)rPr
�get_text_boxes�	TypeError)rSZchildr"r"r#r}*s
r})N)2�reZmathr�pathlibrZ
unittest.mockrrvZpdfminer.encodingdbZpdfminer.pdfdeviceZpdfminer.pdfinterpZpdfminer.converterrZpdfminer.glyphlistrZpdfminer.layoutrrr	r
Zpdfminer.pdfdocumentrZpdfminer.pdffontrr
rZpdfminer.pdfpagerZpdfminer.utilsrr�
exceptionsr�compiler�__version__r$Z
encodingdbrCr%r*r+r-r4r7r8r9r_r|r}r"r"r"r#�<module>s@


S>