HEX
Server: Apache
System: Linux srv1.prosuiteplus.com 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64
User: prosuiteplus (1001)
PHP: 8.3.20
Disabled: NONE
Upload Files
File: //usr/lib/python3/dist-packages/ocrmypdf/__pycache__/optimize.cpython-38.pyc
U

��Z^+T�@srddlZddlZddlZddlmZddlmZddlm	Z	ddl
Z
ddl
mZmZddl
mZddlmZdd	lmZdd
lmZddlmZddlmZmZdd
lmZdZdZdd�Zdd�Zdd�Zdd�Z dd�Z!dd�Z"dd�Z#dd�Z$d d!�Z%d"d#�Z&d$d%�Z'd&d'�Z(d(d)�Z)d*d+�Z*d,d-�Z+d.d/�Z,d0d1�Z-d7d2d3�Z.e/d4k�rne.ej0dej0d5ej0d6�dS)8�N)�defaultdict��fspath)�Path)�
Dictionary�Name)�Image)�tqdm�)�	leptonica)�
PDFContext)�OutputFileAccessError)�jbig2enc�pngquant)�safe_symlink�K�FcCst||d�|���S)N�08dr)�root�xref�ext�r�3/usr/lib/python3/dist-packages/ocrmypdf/optimize.py�img_name(srcCst||d�S)N�.png�r�rrrrr�png_name,srcCst||d�S)N�.jpgrrrrr�jpg_name0srcCst||d�S)Nz.tifrrrrr�tif_name4sr cCs�|jtjkrdS|jdkr*|�d|�dSt�|�}t|j�dkrR|�d|�dS|jd}|j	dkrjdS|dtj
kr|dStj|kr�dS||fS)N�dzSkipping small image, xref %sr
z#Skipping multiply filtered, xref %sr�)ZSubtyperr�Length�debug�pikepdfZPdfImage�lenZfilter_decodeparms�bits_per_componentZ	JPXDecodeZDecode)�piker�log�imager�pim�filtdprrr�extract_image_filter8s"




r-c	Cs�t|||||�}|dkrdS|\}}|jdkr�|tjkr�t��r�zFt||d��}	|	�d��}
|j|
d�}W5QRX|	�	|	�
|��Wntjk
r�YdSX||fSdS)Nr
r�wb��stream)
r-r'r�JBIG2DecoderZ	availabler�open�
extract_to�rename�with_suffixr%�UnsupportedImageTypeError�r(rr)r*r�options�resultr+r,Zimgname�frrrr�extract_image_jbig2Rs&���r;c	CsHt|||||�}|dkrdS|\}}|jdkr2dSz
|jWntk
rRYdSX|dtjkr�|jdkr�zFt||d��}	|	�d��}
|j	|
d�}W5QRX|	�
|	�|��Wntj
k
r�YdSX||fS|j�r|j|jk�r|jdk�r|���t||��|dfS|j�sD|j|jk�rD|���t||��|dfSdS)	Nr
r�rr.r/�r)r-r'Zindexed�NotImplementedErrorr�	DCTDecode�optimizerr2r3r4r5r%r6Z
colorspaceZSIMPLE_COLORSPACESZas_pil_image�saverr7rrr�extract_image_generichs>

�
��rBccsPt�}t�}i}d}t|j�D]�\}	}
z|
jj}Wntk
rJYqYnXt|���D]\\}}
|
jddkrpqX|
jd}t	|
d�r�|
j
jd}|�|�|�|�||krX|	||<qXq||}|D]�}|�|df�}
z|||||
||d�}Wn>t
k
�r,}z|�d|t|��|d7}W5d}~XYq�X|r�|\}}||||fVq�dS)a�Extract image using extract_fn

    Enumerate images on each page, lookup their xref/ID number in the PDF.
    Exclude images that are soft masks (i.e. alpha transparency related).
    Record the page number on which an image is first used, since images may be
    used on multiple pages (or multiple times on the same page).

    Current we do not check Form XObjects or other objects that may contain
    images, and we don't evaluate alternate images or thumbnails.

    extract_fn must decide if wants to extract the image in this context. If
    it does a tuple should be returned: (xref, ext) where .ext is the file
    extension. extract_fn must also extract the file it finds interesting.
    rr
�SMask)r(rr)r*rr8zImage xref %s, error %sN)�set�	enumerateZpagesZ	ResourcesZXObject�AttributeError�dict�items�objgen�hasattrrC�add�
get_object�	Exceptionr$�repr)r(rr)r8Z
extract_fnZ
include_xrefsZ
exclude_xrefsZpageno_for_xref�errors�pagenoZpageZxobjsZ_imnamer*rZ
smask_xrefZ
working_xrefsr9�e�_rrrr�extract_images�sJ




�
rSc	Csvg}g}t||||t�D]>\}}}|�d||�|dkrD|�|�q|dkr|�|�q|�dt|�t|��||fS)z0Extract any >=2bpp image we think we can improvezxref = %s ext = %srrz&Optimizable images: JPEGs: %s PNGs: %s)rSrBr$�appendr&)	r(rr)r8�jpegs�pngsrRrrrrr�extract_images_generic�srWc	Cshtt�}t||||t�D]&\}}}||j}||�||f�qdd�|��D�}|�dt|�f�|S)z?Extract any bitonal image that we think we can improve as JBIG2cSs"i|]\}}t|�dkr||�qS)r)r&)�.0�groupZxrefsrrr�
<dictcomp>�sz(extract_images_jbig2.<locals>.<dictcomp>z$Optimizable images: JBIG2 groups: %s)	r�listrSr;�jbig2_page_group_sizerTrHr$r&)	r(rr)r8�jbig2_groupsrPrrrYrrr�extract_images_jbig2�s�
�r^c
Cs�dd�}dd�}|jdkr |}n|}tjj|jd��n}||||�}tt|�dd|jd	��>}	tj�|�D]*}
|
�	�}|j
r�|�|j
���|	�
�qhW5QRXW5QRXd
S)z&Produce JBIG2 images from their groupsc3sN|��D]@\}}d|d��}|jtjt���fdd�|D�|d�}|VqdS)NrYrc3s|]\}}t�||�VqdS)Nr)rXrr�rrr�	<genexpr>szE_produce_jbig2_images.<locals>.jbig2_group_futures.<locals>.<genexpr>)�cwdZinfilesZ
out_prefix)rH�submitrZ
convert_groupr)�executorr�groupsrY�	xref_exts�prefix�futurerr_r�jbig2_group_futures�s�z2_produce_jbig2_images.<locals>.jbig2_group_futurescssr|��D]d\}}d|d��}t|�D]F\}}|\}}	|jtjt|�t|||	�||�d|d��d�}
|
Vq$qdS)NrYr�.�04d)ra�infile�outfile)rHrErbrZconvert_singlerr)rcrrdrYrerf�n�xref_extrrrgrrr�jbig2_single_futuress
�z3_produce_jbig2_images.<locals>.jbig2_single_futuresr
�Zmax_workersZJBIG2�item)�total�desc�unit�disableN)r\�
concurrent�futures�ThreadPoolExecutor�jobsr	r&�progress_bar�as_completedr9�stderrr$�decode�update)r]rr)r8rhroZ
jbig2_futuresrcrw�pbarrg�procrrr�_produce_jbig2_images�s&
�r�cCs�t||||�|��D]�\}}d|d��}||d}|��r^|��}	t�||	�}
t|
d�}n|jdkrnd}nt|��t	|�D]J\}}
|
\}}||d|d��}|��}|�
|d	�}|j|tj
|d
�q~qdS)a�Convert images to JBIG2 and insert into PDF.

    When the JBIG2 page group size is > 1 we do several JBIG2 images at once
    and build a symbol dictionary that will span several pages. Each JBIG2
    image must reference to its symbol dictionary. If too many pages shared the
    same dictionary JBIG2 encoding becomes more expensive and less efficient.
    The default value of 10 was determined through testing. Currently this
    must be lossy encoding since jbig2enc does not support refinement coding.

    When the JBIG2 symbolic coder is not used, each JBIG2 stands on its own
    and needs no dictionary. Currently this must be lossless JBIG2.
    rYrz.sym)ZJBIG2Globalsr
Nrirjr��filterZdecode_parms)r�rH�existsZ
read_bytesr%�Streamrr\�FileNotFoundErrorrErL�writerr1)r(r]rr)r8rYrerfZ
jbig2_symfileZjbig2_globals_dataZ
jbig2_globalsZjbig2_globals_dictrmrnrrRZ
jbig2_im_fileZ
jbig2_im_data�im_objrrr�convert_to_jbig2(s*
�r�c
Cs�t|dd|jd�D]�}tt||��}|�d�}t�t|���}|jt|�d|j	d�W5QRX|�
�j|�
�jkr�|�d|�qt
j�|�}	|�|d�}
|
j|	��tjd	�qdS)
NZJPEGsr*)rsrtruz.opt.jpgT)r@Zqualityz!xref %s, jpeg, made larger - skipr)r�)r	rzrrr5rr2rrA�jpeg_quality�stat�st_sizer$r�CompressedDatarLr��readrr?)r(rUrr)r8rZin_jpgZopt_jpgZim�compdatar�rrr�transcode_jpegsNs �

 r�cCs�t�}|jdkr�td|jd�td|jd�f}tjj|jd���}g}	|D]L}
|�	|||
��|	�
|�tj
|||
�t||
�|d|d��|�|
�qJtdt|	�d|jd	�� }tj�|	�D]}|��q�W5QRXW5QRX|D�]}
|�|
d�}
zFtj�t||
��}|jd
k�r(|�tjjd�}ntj�t||
��}Wn8tjk
�rt}z|�|�WY�q�W5d}~XYnXt|�t |
j!j"�k�r�|�	dt|��dt |
j!j"����q�|j#tjj$k�r�t%||
||�S|j#tjjkr�t&||
||�Sq�dS)
Nr<�
r!rprr
ZPNGsr*)rsrrrtru�1z7pngquant: pngquant did not improve over original image z > )'rDr@�max�png_quality�minrvrwrxryr$rTrbrZquantizerrKr	r&rzr{r~rLrZPixr2�modeZgenerate_pdf_ci_dataZleptZL_G4_ENCODEr�ZLeptonicaError�error�intZstream_dictr#�typeZL_FLATE_ENCODE�rewrite_png�rewrite_png_as_g4)r(�imagesZ
image_name_fnrr)r8Zmodifiedr�rcrwrrZ_futurer�Zpixr�rQrrr�transcode_pngsesd
����	�

�r�cCsvd|_|j|_|j|_|�|���|�d|j���t	j
|krD|`
t	j|krR|`tdt
|j�|jd�|_t	j|_dS)Nr
z
PNG to G4 ���)�KZBlackIs1�Columns)�BitsPerComponent�w�Width�h�Heightr�r�r$rIr�	PredictorZDecodeParmsr�boolZ
minisblackZCCITTFaxDecodeZFilter)r(r�r�r)rrrr��s 

�r�c
Cs|jdkrdnd}t|d�}|dkr<|j|_|j|_|j|_|j|_|j|_|j	|_
|�d|j�d|j
�d|j�d|j���|j
dkr�|��}tj�|�}t�|t|��}tjtj|j
d|g}	|	}
n4|jdkr�tj}
n"|jd	kr�tj}
n|jd
kr�tj}
|
|_|j|��tj|d�dS)Nr�r
)r�zPNG z
: palette=z spp=z bps=r=�r�)�	predictorrZbpsr�ZsppZColorsr�r�r�r�r�r$rIZncolorsZget_palette_pdf_stringr%ZObject�parser��bytesrZIndexedZ	DeviceRGBZ
DeviceGrayZ
DeviceCMYKZ
ColorSpacer�r�ZFlateDecode)r(r�r�r)r�ZdparmsZpalette_pdf_stringZpalette_dataZpalette_streamZpaletteZcsrrrr��s4
"�



r�c	Cs�|j}|j}|jdkr$t||�dS|jdkrB|jdkr<tnd|_|jdkr`|jdkrZtnd|_|jdkrz|j	rtdnd|_t
j�|���}t
|�jd}|jdd	�t||||�\}}	t|||||�t||	t|||�t||||�}
t||
|||�t
|��d
�}|��|j|f|�W5QRXt
|���j}t
|���j}
|
dk�r\tdt���d���||
}d|
|}|�d
|d�dd|d�d��|dk�r�|�d�t
�|��}|��|j|f|�W5QRXn
t||�dS)Nrr=�(�r�r
r�T)�exist_okz.opt.pdfzeOutput file not created after optimizing. We probably ran out of disk space in the temporary folder: rizOptimize ratio: z.2fz
 savings: r!z.1f�%z7Image optimization did not improve the file - discarded)r)r8r@rr��DEFAULT_JPEG_QUALITYr��DEFAULT_PNG_QUALITYr\�jbig2_lossyr%ZPdfr2r�parent�mkdirrWr�r�rr^r�r5Zremove_unreferenced_resourcesrAr�r�r
�tempfileZ
gettempdir�info)�
input_fileZoutput_file�contextZ
save_settingsr)r8r(rrUrVr]Ztarget_fileZ
input_sizeZoutput_sizeZratioZsavingsrrrr@�sJ





� 

r@c
Cs�ddlm}ddlm}Gdd�d�}|||t|�dddd�}|��N}t|||d�}	t|�d}
t||
|	td	d	t	j
jd
��|t|
�t|��W5QRXdS)Nr)�TemporaryDirectory)�copyc@seZdZdZdd�ZdS)zmain.<locals>.OptimizeOptionszEmulate ocrmypdf's optionscSs:||_||_||_||_||_d|_||_d|_d|_dS)NrTF)	r�ryr@r�r�r\r��quietrz)�selfr�ry�	optimize_r�r��jb2lossyrrr�__init__'sz&main.<locals>.OptimizeOptions.__init__N)�__name__�
__module__�__qualname__�__doc__r�rrrr�OptimizeOptions$sr�F)r�ryr�r�r�r�zout.pdfT)Zcompress_streamsZ
preserve_pdfaZobject_stream_mode)
r�r�Zshutilr�r�rrr@rGr%ZObjectStreamModeZgenerater)rkrl�levelryr�r�r�r8Ztdr�Ztmpoutrrr�main s2�	��
r��__main__r<r=)r
)1Zconcurrent.futuresrv�sysr��collectionsr�osr�pathlibrr%rrZPILrr	�rZ_jobcontextr�
exceptionsr
�execrrZhelpersrr�r�rrrr r-r;rBrSrWr^r�r�r�r�r�r�r@r�r��argvrrrr�<module>sH:50&=55
-