HEX
Server: Apache
System: Linux srv1.prosuiteplus.com 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64
User: prosuiteplus (1001)
PHP: 8.3.20
Disabled: NONE
Upload Files
File: //usr/lib/python3/dist-packages/html5lib/__pycache__/_inputstream.cpython-38.pyc
U

63)Z�~�!@sddlmZmZmZddlmZmZddlmZm	Z	ddl
Z
ddlZddlZddl
mZmZmZmZddl
mZddlmZdd	lmZzdd
lmZWnek
r�eZYnXedd�eD��Zed
d�eD��Zedd�eD��Zeeddg�BZdZej�rFeddk�r"e� d�dk�s&t!�e�"edd�e#d�d�Z$n
e�"e�Z$e%dddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4g �Z&e�"d5�Z'iZ(Gd6d7�d7e)�Z*d8d9�Z+Gd:d;�d;e)�Z,Gd<d=�d=e,�Z-Gd>d?�d?e.�Z/Gd@dA�dAe)�Z0GdBdC�dCe)�Z1dDdE�Z2dS)F�)�absolute_import�division�unicode_literals)�	text_type�binary_type)�http_client�urllibN�)�EOF�spaceCharacters�asciiLetters�asciiUppercase)�_ReparseException)�_utils)�StringIO)�BytesIOcCsg|]}|�d��qS��ascii��encode��.0�item�r�7/usr/lib/python3/dist-packages/html5lib/_inputstream.py�
<listcomp>srcCsg|]}|�d��qSrrrrrrrscCsg|]}|�d��qSrrrrrrrs�>�<u�[---Ÿ﷐-﷯￾￿🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿]����]z"\uD800-\uDFFF"i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��	i��	i��
i��
i��i��i��i��i��
i��
i��i��i��i��i��i��z[	-
 -/:-@\[-`{-~]c@sHeZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dS)�BufferedStreamz�Buffering for streams that do not have buffering of their own

    The buffer is implemented as a list of chunks on the assumption that
    joining many strings will be slow since it is O(n**2)
    cCs||_g|_ddg|_dS)Nrr)�stream�buffer�position)�selfr!rrr�__init__@szBufferedStream.__init__cCs<d}|jd|jd�D]}|t|�7}q||jd7}|S�Nrr	)r"r#�len)r$�pos�chunkrrr�tellEs
zBufferedStream.tellcCsT||��kst�|}d}t|j|�|krF|t|j|�8}|d7}q||g|_dSr&)�_bufferedBytes�AssertionErrorr'r"r#)r$r(�offset�irrr�seekLs
zBufferedStream.seekcCsT|js|�|�S|jdt|j�krF|jdt|jd�krF|�|�S|�|�SdS)Nrr	r)r"�_readStreamr#r'�_readFromBuffer)r$�bytesrrr�readUs
�
zBufferedStream.readcCstdd�|jD��S)NcSsg|]}t|��qSr)r'rrrrr_sz1BufferedStream._bufferedBytes.<locals>.<listcomp>)�sumr"�r$rrrr+^szBufferedStream._bufferedBytescCs<|j�|�}|j�|�|jdd7<t|�|jd<|Sr&)r!r3r"�appendr#r')r$r2�datarrrr0as
zBufferedStream._readStreamcCs�|}g}|jd}|jd}|t|j�kr�|dkr�|dks>t�|j|}|t|�|krl|}|||g|_n"t|�|}|t|�g|_|d7}|�||||��||8}d}q|r�|�|�|��d�|�S)Nrr	�)r#r'r"r,r6r0�join)r$r2ZremainingBytes�rvZbufferIndexZbufferOffsetZbufferedDataZbytesToReadrrrr1hs&


zBufferedStream._readFromBufferN)�__name__�
__module__�__qualname__�__doc__r%r*r/r3r+r0r1rrrrr 9s		r cKs�t|tj�s(t|tjj�r.t|jtj�r.d}n&t|d�rJt|�d�t	�}n
t|t	�}|r�dd�|D�}|rvt
d|��t|f|�St|f|�SdS)NFr3rcSsg|]}|�d�r|�qS)Z	_encoding)�endswith)r�xrrrr�s
z#HTMLInputStream.<locals>.<listcomp>z3Cannot set an encoding with a unicode input, set %r)
�
isinstancerZHTTPResponserZresponseZaddbase�fp�hasattrr3r�	TypeError�HTMLUnicodeInputStream�HTMLBinaryInputStream)�source�kwargsZ	isUnicodeZ	encodingsrrr�HTMLInputStream�s��

rIc@speZdZdZdZdd�Zdd�Zdd�Zd	d
�Zdd�Z	d
d�Z
ddd�Zdd�Zdd�Z
ddd�Zdd�ZdS)rE��Provides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    i(cCsZtjsd|_ntd�dkr$|j|_n|j|_dg|_td�df|_|�	|�|_
|��dS)�Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        Nu􏿿r	r�utf-8�certain)r�supports_lone_surrogates�reportCharacterErrorsr'�characterErrorsUCS4�characterErrorsUCS2ZnewLines�lookupEncoding�charEncoding�
openStream�
dataStream�reset)r$rGrrrr%�s
zHTMLUnicodeInputStream.__init__cCs.d|_d|_d|_g|_d|_d|_d|_dS)N�r)r)�	chunkSize�chunkOffset�errors�prevNumLines�prevNumCols�_bufferedCharacterr5rrrrV�szHTMLUnicodeInputStream.resetcCst|d�r|}nt|�}|S�zvProduces a file object from source.

        source can be either a file object, local filename or a string.

        r3)rCr�r$rGr!rrrrT�s
z!HTMLUnicodeInputStream.openStreamcCsT|j}|�dd|�}|j|}|�dd|�}|dkr@|j|}n||d}||fS)N�
rrr	)r)�countr[�rfindr\)r$r-r)ZnLinesZpositionLineZlastLinePosZpositionColumnrrr�	_position�s
z HTMLUnicodeInputStream._positioncCs|�|j�\}}|d|fS)z:Returns (line, col) of the current position in the stream.r	)rcrY)r$�line�colrrrr#�szHTMLUnicodeInputStream.positioncCs6|j|jkr|��stS|j}|j|}|d|_|S)zo Read one character from the stream or queue if available. Return
            EOF when EOF is reached.
        r	)rYrX�	readChunkr
r))r$rY�charrrrrg�s

zHTMLUnicodeInputStream.charNcCs�|dkr|j}|�|j�\|_|_d|_d|_d|_|j�|�}|j	rX|j	|}d|_	n|s`dSt
|�dkr�t|d�}|dks�d|kr�dkr�nn|d|_	|dd�}|jr�|�|�|�
d	d
�}|�
dd
�}||_t
|�|_dS)
NrWrFr	r�
��i��z
r`�
T)�_defaultChunkSizercrXr[r\r)rYrUr3r]r'�ordrO�replace)r$rXr7Zlastvrrrrf�s0
 


z HTMLUnicodeInputStream.readChunkcCs(ttt�|���D]}|j�d�qdS)N�invalid-codepoint)�ranger'�invalid_unicode_re�findallrZr6)r$r7�_rrrrP%sz*HTMLUnicodeInputStream.characterErrorsUCS4cCs�d}t�|�D]�}|rqt|���}|��}t�|||d��rrt�|||d��}|tkrl|j	�
d�d}q|dkr�|dkr�|t|�dkr�|j	�
d�qd}|j	�
d�qdS)NF�rnTrii��r	)rp�finditerrl�group�startrZisSurrogatePairZsurrogatePairToCodepoint�non_bmp_invalid_codepointsrZr6r')r$r7�skip�matchZ	codepointr(Zchar_valrrrrQ)s"�z*HTMLUnicodeInputStream.characterErrorsUCS2Fc
Cszt||f}Wnhtk
rx|D]}t|�dks$t�q$d�dd�|D��}|sZd|}t�d|�}t||f<YnXg}|�|j|j	�}|dkr�|j	|j
kr�q�n0|��}||j
kr�|�|j|j	|��||_	q�|�|j|j	d��|�
�s~q�q~d�|�}	|	S)z� Returns a string of characters from the stream up to but not
        including any character in 'characters' or EOF. 'characters' must be
        a container that supports the 'in' method and iteration over its
        characters.
        �rWcSsg|]}dt|��qS)z\x%02x)rl)r�crrrrNsz5HTMLUnicodeInputStream.charsUntil.<locals>.<listcomp>z^%sz[%s]+N)�charsUntilRegEx�KeyErrorrlr,r9�re�compileryr)rYrX�endr6rf)
r$Z
charactersZopposite�charsr{Zregexr:�mr��rrrr�
charsUntil@s0 

z!HTMLUnicodeInputStream.charsUntilcCsT|dk	rP|jdkr.||j|_|jd7_n"|jd8_|j|j|ksPt�dSr&)rYr)rXr,)r$rgrrr�ungetos
zHTMLUnicodeInputStream.unget)N)F)r;r<r=r>rkr%rVrTrcr#rgrfrPrQr�r�rrrrrE�s 
&
/rEc@sLeZdZdZddd�Zdd�Zd	d
�Zddd�Zd
d�Zdd�Z	dd�Z
dS)rFrJN�windows-1252TcCsn|�|�|_t�||j�d|_d|_||_||_||_||_	||_
|�|�|_|jddk	sbt
�|��dS)rKi�drN)rT�	rawStreamrEr%�numBytesMeta�numBytesChardet�override_encoding�transport_encoding�same_origin_parent_encoding�likely_encoding�default_encoding�determineEncodingrSr,rV)r$rGr�r�r�r�r�Z
useChardetrrrr%�szHTMLBinaryInputStream.__init__cCs&|jdj�|jd�|_t�|�dS)Nrrm)rSZ
codec_info�streamreaderr�rUrErVr5rrrrV�szHTMLBinaryInputStream.resetcCsDt|d�r|}nt|�}z|�|���Wnt|�}YnX|Sr^)rCrr/r*r r_rrrrT�s
z HTMLBinaryInputStream.openStreamcCs�|��df}|ddk	r|St|j�df}|ddk	r:|St|j�df}|ddk	rX|S|��df}|ddk	rt|St|j�df}|ddk	r�|dj�d�s�|St|j�df}|ddk	r�|S|�rpzddl	m
}Wntk
r�Yn�Xg}|�}|j�s<|j
�|j�}t|t��st�|�s&�q<|�|�|�|�q�|��t|jd�}|j
�d�|dk	�rp|dfSt|j�df}|ddk	�r�|Std�dfS)NrMrZ	tentativezutf-16)�UniversalDetector�encodingr�)�	detectBOMrRr�r��detectEncodingMetar��name�
startswithr�Zchardet.universaldetectorr��ImportErrorZdoner�r3r�rAr2r,r6Zfeed�close�resultr/r�)r$ZchardetrSr�ZbuffersZdetectorr"r�rrrr��sR

z'HTMLBinaryInputStream.determineEncodingcCs�|jddkst�t|�}|dkr&dS|jdkrFtd�}|dk	s�t�nT||jdkrf|jddf|_n4|j�d�|df|_|��td|jd|f��dS)Nr	rM��utf-16be�utf-16lerLrzEncoding changed from %s to %s)rSr,rRr�r�r/rVr)r$ZnewEncodingrrr�changeEncodings

z$HTMLBinaryInputStream.changeEncodingc
Cs�tjdtjdtjdtjdtjdi}|j�d�}t|t	�s<t
�|�|dd��}d}|s~|�|�}d}|s~|�|dd	��}d	}|r�|j�|�t
|�S|j�d
�dSdS)z�Attempts to detect at BOM at the start of the stream. If
        an encoding can be determined from the BOM return the name of the
        encoding otherwise return NonerLr�r�zutf-32lezutf-32be�N�rsr)�codecs�BOM_UTF8�BOM_UTF16_LE�BOM_UTF16_BE�BOM_UTF32_LE�BOM_UTF32_BEr�r3rAr2r,�getr/rR)r$ZbomDict�stringr�r/rrrr�s4�
zHTMLBinaryInputStream.detectBOMcCsV|j�|j�}t|t�st�t|�}|j�d�|��}|dk	rR|j	dkrRt
d�}|S)z9Report the encoding declared by the meta element
        rNr�rL)r�r3r�rAr2r,�EncodingParserr/�getEncodingr�rR)r$r"�parserr�rrrr�9sz(HTMLBinaryInputStream.detectEncodingMeta)NNNNr�T)T)r;r<r=r>r%rVrTr�r�r�r�rrrrrF�s�
*
>"rFc@s�eZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dd�Zeee
�Z
dd�Zee�Zefdd�Zdd�Zdd�Zdd�ZdS)�
EncodingBytesz�String-like object with an associated position and various extra methods
    If the position is ever greater than the string length then an exception is
    raisedcCst|t�st�t�||���S�N)rAr2r,�__new__�lower�r$�valuerrrr�LszEncodingBytes.__new__cCs
d|_dS)Nr)rcr�rrrr%PszEncodingBytes.__init__cCs|Sr�rr5rrr�__iter__TszEncodingBytes.__iter__cCs>|jd}|_|t|�kr"t�n|dkr.t�|||d�S)Nr	r�rcr'�
StopIterationrD�r$�prrr�__next__WszEncodingBytes.__next__cCs|��Sr�)r�r5rrr�next_szEncodingBytes.nextcCsB|j}|t|�krt�n|dkr$t�|d|_}|||d�Sr&r�r�rrr�previouscszEncodingBytes.previouscCs|jt|�krt�||_dSr��rcr'r�)r$r#rrr�setPositionlszEncodingBytes.setPositioncCs*|jt|�krt�|jdkr"|jSdSdS)Nrr�r5rrr�getPositionqs

zEncodingBytes.getPositioncCs||j|jd�S�Nr	)r#r5rrr�getCurrentByte{szEncodingBytes.getCurrentBytecCsH|j}|t|�kr>|||d�}||kr4||_|S|d7}q||_dS)zSkip past a list of charactersr	N�r#r'rc�r$r�r�r{rrrrx�s
zEncodingBytes.skipcCsH|j}|t|�kr>|||d�}||kr4||_|S|d7}q||_dSr�r�r�rrr�	skipUntil�s
zEncodingBytes.skipUntilcCs>|j}|||t|��}|�|�}|r:|jt|�7_|S)z�Look for a sequence of bytes at the start of a string. If the bytes
        are found return True and advance the position to the byte after the
        match. Otherwise return False and leave the position alone)r#r'r�)r$r2r�r7r:rrr�
matchBytes�s
zEncodingBytes.matchBytescCsR||jd��|�}|dkrJ|jdkr,d|_|j|t|�d7_dSt�dS)z�Look for the next sequence of bytes matching a given sequence. If
        a match is found advance the position to the last byte of the matchNrrr	T)r#�findrcr'r�)r$r2ZnewPositionrrr�jumpTo�s
zEncodingBytes.jumpToN)r;r<r=r>r�r%r�r�r�r�r�r��propertyr#r��currentByte�spaceCharactersBytesrxr�r�r�rrrrr�Hs 	
r�c@sXeZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dd�Zdd�ZdS)r�z?Mini parser for detecting character encoding from meta elementscCst|�|_d|_dS)z3string - the data to work on for encoding detectionN)r�r7r��r$r7rrrr%�s
zEncodingParser.__init__c
Cs�d|jfd|jfd|jfd|jfd|jfd|jff}|jD]Z}d}|D]D\}}|j�|�rFz|�}Wq�WqFtk
r�d}Yq�YqFXqF|s:q�q:|jS)	Ns<!--s<metas</s<!s<?rTF)	�
handleComment�
handleMeta�handlePossibleEndTag�handleOther�handlePossibleStartTagr7r�r�r�)r$ZmethodDispatchrrZkeepParsing�key�methodrrrr��s(�

zEncodingParser.getEncodingcCs|j�d�S)zSkip over commentss-->�r7r�r5rrrr��szEncodingParser.handleCommentcCs�|jjtkrdSd}d}|��}|dkr,dS|ddkr\|ddk}|r�|dk	r�||_dSq|ddkr�|d}t|�}|dk	r�||_dSq|ddkrtt|d��}|��}|dk	rt|�}|dk	r|r�||_dS|}qdS)	NTFrs
http-equivr	scontent-type�charsetscontent)	r7r�r��getAttributer�rR�ContentAttrParserr��parse)r$Z	hasPragmaZpendingEncoding�attrZtentativeEncoding�codecZ
contentParserrrrr��s8zEncodingParser.handleMetacCs
|�d�S)NF)�handlePossibleTagr5rrrr��sz%EncodingParser.handlePossibleStartTagcCst|j�|�d�S)NT)r�r7r�r5rrrr��s
z#EncodingParser.handlePossibleEndTagcCsb|j}|jtkr(|r$|��|��dS|�t�}|dkrD|��n|��}|dk	r^|��}qLdS)NTr)r7r��asciiLettersBytesr�r�r��spacesAngleBracketsr�)r$ZendTagr7r{r�rrrr��s



z EncodingParser.handlePossibleTagcCs|j�d�S)Nrr�r5rrrr�szEncodingParser.handleOthercCs�|j}|�ttdg�B�}|dks2t|�dks2t�|dkr>dSg}g}|dkrV|rVq�nX|tkrj|��}q�nD|dkr�d�|�dfS|tkr�|�|�	��n|dkr�dS|�|�t
|�}qF|dkr�|��d�|�dfSt
|�|��}|dk�rJ|}t
|�}||k�r"t
|�d�|�d�|�fS|tk�r<|�|�	��q�|�|�q�nJ|d	k�rbd�|�dfS|tk�r||�|�	��n|dk�r�dS|�|�t
|�}|tk�r�d�|�d�|�fS|tk�r�|�|�	��n|dk�r�dS|�|��q�dS)
z_Return a name,value pair for the next attribute in the stream,
        if one is found, or None�/Nr	)rN�=)r�rr8)�'�"r)
r7rxr��	frozensetr'r,r9�asciiUppercaseBytesr6r�r�r�r�)r$r7r{ZattrNameZ	attrValueZ	quoteCharrrrr�sb











zEncodingParser.getAttributeN)
r;r<r=r>r%r�r�r�r�r�r�r�r�rrrrr��s$r�c@seZdZdd�Zdd�ZdS)r�cCst|t�st�||_dSr�)rAr2r,r7r�rrrr%fszContentAttrParser.__init__cCs�z�|j�d�|jjd7_|j��|jjdks<WdS|jjd7_|j��|jjdkr�|jj}|jjd7_|jj}|j�|�r�|j||jj�WSWdSnR|jj}z"|j�t�|j||jj�WWStk
�r�|j|d�YWSXWntk
�rYdSXdS)Nr�r	r�)r�r�)r7r�r#rxr�r�r�r�)r$Z	quoteMarkZoldPositionrrrr�js.

zContentAttrParser.parseN)r;r<r=r%r�rrrrr�esr�cCsft|t�r0z|�d�}Wntk
r.YdSX|dk	r^zt�|�WStk
rZYdSXndSdS)z{Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding.rN)rAr�decode�UnicodeDecodeError�webencodings�lookup�AttributeError)r�rrrrR�s

rR)3Z
__future__rrrZsixrrZ	six.movesrrr�r~r�Z	constantsr
rrr
rrWr�iorrr�r�r�r�r�r�Zinvalid_unicode_no_surrogaterNrar,r�evalrp�setrwZascii_punctuation_rer|�objectr rIrErFr2r�r�r�rRrrrr�<module>s�
"��
�
JgIh6'