HEX
Server: Apache
System: Linux srv1.prosuiteplus.com 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64
User: prosuiteplus (1001)
PHP: 8.3.20
Disabled: NONE
Upload Files
File: //lib/python3/dist-packages/ocrmypdf/__pycache__/_sync.cpython-38.pyc
U

��Z^$:�@s�ddlZddlZddlZddlZddlZddlZddlZddlmZddl	m
Z
ddlmZddl
Z
ddlmZddlmZddlmZmZmZdd	lmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,dd
l-m.Z.m/Z/m0Z0ddl1m2Z2m3Z3ddl4m5Z5dd
l6m7Z7ddl8m9Z9edd�Z:dd�Z;dd�Z<dd�Z=dd�Z>dd�Z?dd�Z@dd�ZAGdd �d eB�ZCd!d"�ZDd)d$d%�ZEd*d'd(�ZFdS)+�N)�
namedtuple)�Path)�mkdtemp)�tqdm�)�
OcrGrafter)�
PDFContext�cleanup_working_files�make_logger)�convert_to_pdfa�
copy_final�create_ocr_image�create_pdf_page_from_image�create_visible_page_jpg�generate_postscript_stub�get_orientation_correction�get_pdfinfo�is_ocr_required�merge_sidecars�metadata_fixup�ocr_tesseract_hocr�ocr_tesseract_textonly_pdf�optimize_pdf�preprocess_clean�preprocess_deskew�preprocess_remove_background�	rasterize�rasterize_preview�render_hocr_page�!should_visible_page_image_use_jpg�triage�validate_pdfinfo_options)�check_requested_output_file�create_input_file�report_output_file_size)�ExitCode�ExitCodeException)�qpdf)�available_cpu_count)�file_claims_pdfa�
PageResultz>pageno, pdf_page_from_image, ocr, text, orientation_correctioncCs.|rt||�}|rt||�}|r*t||�}|S�N)rrr)�page_contextZimage�remove_background�deskew�clean�r0�0/usr/lib/python3/dist-packages/ocrmypdf/_sync.py�
preprocessJs


r2cCs`|j}d}d}d}d}t|��rL|jr<t|j|�}t||�}t|j||dd�}t|j|j	|j
g�s~t|||j|j
dd�}}	nV|js�t|||j|j
|j	d�}	|j
r�t|j||ddd�}
n|}
t||
|j|j
|jd�}t||�}d}|j�s|	}t|j��rt||�}t||�}|jdk�r2t||�\}
}t|
|�}|jd	k�rLt||�\}}t|j||||d
�S)NrF)�
correction�remove_vectors)r/TZ_ocr)r3r4Z
output_tagZhocrZsandwich)�pagenoZpdf_page_from_imageZocr�text�orientation_correction)�optionsrZrotate_pagesr�originrr�anyr/Zclean_finalr4r2r-r.Zlossless_reconstructionr
rZpageinforrZpdf_rendererrrrr*r5)r,r8r7Zpdf_page_from_image_outZocr_outZtext_outZrasterize_preview_outZ
rasterize_outZ	ocr_imageZpreprocess_outZrasterize_ocr_outZ
ocr_image_outZvisible_image_outZhocr_outr0r0r1�exec_page_syncTs�
������
��
��r;cCs:|}|jj�d�r&t|�}t|||�}t||�}t||�S)N�pdfa)r8�output_type�
startswithrrrr)Zpdf_file�contextZpdf_outZps_stub_outr0r0r1�post_process�s
r@cCs@t�tjtj�tj�|�}t��}g|_|�|�|tj	_
dS)z Initialize a process pool workerN)�signal�SIGINT�SIG_IGN�loggingZhandlersZQueueHandler�	getLogger�
addHandler�PIL�Image�MAX_IMAGE_PIXELS)�queue�
max_pixels�h�rootr0r0r1�worker_init�s
rNcCs|tj_dSr+)rGrHrI)Z_queuerKr0r0r1�worker_thread_init�srOcCsnz.|��}|dkrWqjt�|j�}|�|�Wqtk
rfddl}tdtj	d�|j
tj	d�YqXqdS)a�Listen to the worker processes and forward the messages to logging

    For simplicity this is a thread rather than a process. Only one process
    should actually write to sys.stderr or whatever we're using, so if this is
    made into a process the main application needs to be directed to it.

    See https://docs.python.org/3/howto/logging-cookbook.html#logging-to-a-single-file-from-multiple-processes
    NrzLogging problem)�file)�getrDrE�nameZhandle�	Exception�	traceback�print�sys�stderr�	print_exc)rJ�recordZloggerrTr0r0r1�log_listener�srZc
CsNtt|j�|jj�}|dkr*|j�d|�td|jj|�}|jjdkrVtj	�
�|j_|jj�dt|��|dkr�|j�d|�|jj
r�ddlm}t}n
tj}t}dgt|j�}t|�}t�d	�}tjt|fd
�}|��tdt|j�dd
d|jjd���}	||||tjjfd�}
z�zh|
�!t"|�#��}z2|�$�}|j%||j&<|	�'�|�(|�|	�'�Wnt)k
�rzY�q�YnX�q,WnNt*k
�r�|
�+��Yn0t,k
�r�tj	�-dd��s�|
�+��YnXW5|�d�|
��|
� �XW5QRX|� �|jj.�r(t/||�}
t0|
|jj.|�|�1�}t2||�}t0||jj3|�dS)z!Execute the pipeline concurrentlyrz&Start processing %d pages concurrently�NZOMP_THREAD_LIMITz&Using Tesseract OpenMP thread limit %dr)�Pool���)�target�args�ZOCRZpageg�?)ZtotalZdescZunitZ
unit_scale�disable)Z	processes�initializerZinitargs�PYTEST_CURRENT_TEST�)4�min�len�pdfinfor8�jobs�log�infoZ
tesseract_env�os�environ�copy�
setdefault�strZuse_threadsZmultiprocessing.dummyr\rO�multiprocessingrNrZQueue�	threadingZThreadrZ�startr�progress_barrGrHrIZ
put_nowait�close�joinZimap_unorderedr;Zget_page_contexts�nextr6r5�updateZ
graft_page�
StopIteration�KeyboardInterruptZ	terminaterSrQZsidecarrr�finalizer@�output_file)r?Zmax_workersZtess_threadsr\rbZsidecarsZocrgraftZ	log_queueZlistenerZpbarZpoolZresultsZpage_resultr6Zpdfr0r0r1�exec_concurrent�sv

�
�




r|c@seZdZdZdS)�
NeverRaisez!An exception that is never raisedN)�__name__�
__module__�__qualname__�__doc__r0r0r0r1r}Fsr}cCs$tjdkr||kStj�||�SdS)N�nt)rkrR�path�samefile)�f1�f2r0r0r1r�Ls
r�rdcCsBtj|dd�}|�tj�t�d�}|�|�t�|��|�|S)NT)Zdelayz7[%(asctime)s] - %(name)s - %(levelname)7s - %(message)s)rDZFileHandlerZsetLevel�DEBUGZ	FormatterZsetFormatterrErF)Zlog_filename�prefixZlog_file_handlerZ	formatterr0r0r1�configure_debug_loggingSs�
r�Fc
Cs�t|t�}|jst�|_tdd�}|js2|jdkrPtj�	dd�sPt
t|�d��z,�zt
|�t||�\}}t||tj�|d�||�}t||j|jd�}t||||�}t|�t|�|jd	kr�|�d
�n�t|jtj�r�n�|j�d��r0t|j�}	|	d�r|�d
|	d�n|�d|	d�t j!WW��RSt"�#|j|��sX|�d�t j$WW��*St%|||j�W�n|�sxt&nt'k
�r�}
z4|jdk�r�|�(d�n
|�)d�t j*WY�W��Sd}
~
XYn�|�s�t+nt'k
�r6}
zFt,|
��r|�)dt-|
�jt,|
��n|�)t-|
�j�|
j.WY�W�XSd}
~
XYnF|�sBt/nt'k
�rz}
z|�(d�t j0WY�W�Sd}
~
XYnXW5t||�Xt j1S)Nzcom.github.ocrmypdf.)r�rrcrdz	debug.logz
origin.pdf)Zdetailed_page_analysisZprogbar�-zOutput sent to stdoutr<�passz!Output file is a %s (as expected)Zconformancez5Output file is okay but is not PDF/A (seems to be %s)z)Output file: The generated PDF is INVALIDryz%s: %sz2An exception occurred while executing the pipeline)2r
r~rhr(rZkeep_temporary_files�verboserkrlrQr�rr	r"r#r r�rurZredo_ocrrsrr!r|r{rjr��devnullr=r>r)Zwarningr%Zpdfa_conversion_failedr'ZcheckZinvalid_output_pdfr$ryr}Z	exception�errorZctrl_cr&ro�typeZ	exit_coderSZother_error�ok)r8ZapiriZwork_folderZstart_input_fileZoriginal_filenameZ
origin_pdfrgr?Z	pdfa_info�er0r0r1�run_pipeline^s|

��	�


��



$r�)rd)F)GrDZlogging.handlersrprkrArVrq�collectionsr�pathlibrZtempfilerrGrZ_graftrZ_jobcontextrr	r
Z	_pipelinerrr
rrrrrrrrrrrrrrrrrrr r!Z_validationr"r#r$�
exceptionsr%r&�execr'Zhelpersr(r<r)r*r2r;r@rNrOrZr|rSr}r�r�r�r0r0r0r1�<module>sDd�
W
b