
     fh-                         d dl mZ d dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZ ddlmZ ddlmZmZmZ  G d d	          Z G d
 d          Zeeef         Ze
e         Z G d d          ZdS )    )aliases)sha256)dumps)AnyDictIteratorListOptionalTupleUnion   )TOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                      e Zd Z	 d%dededededddee         fd	Zd
e	defdZ
d
e	defdZedefd            ZdefdZdefdZd&dZedefd            Zedee         fd            Zedefd            Zedefd            Zedee         fd            Zedefd            Zedefd            Zedefd            Zedefd            Zedefd            Zedefd            Zeded          fd            Zedefd            Zedee         fd            Zedee         fd             Z d'd"edefd#Z!edefd$            Z"dS )(CharsetMatchNpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesCoherenceMatchesdecoded_payloadc                     || _         || _        || _        || _        || _        d | _        g | _        d| _        d | _        d | _	        || _
        d S )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string)selfr   r   r   r   r   r   s          \/var/www/api.educacionweb.es/myenv/lib/python3.11/site-packages/charset_normalizer/models.py__init__zCharsetMatch.__init__   s[      '.'6,5%348+-,/"04/3&5    otherreturnc                     t          |t                    sGt          d                    t	          |j                  t	          | j                                      | j        |j        k    o| j        |j        k    S )Nz&__eq__ cannot be invoked on {} and {}.)
isinstancer   	TypeErrorformatstr	__class__encodingfingerprintr(   r,   s     r)   __eq__zCharsetMatch.__eq__$   ss    %.. 	8??((#dn*=*=   
 }.X43CuGX3XXr+   c                    t          |t                    st          t          | j        |j        z
            }t          | j        |j        z
            }|dk     r|dk    r| j        |j        k    S |dk     rC|dk    r=t          | j                  t          k    r| j        |j        k     S | j	        |j	        k    S | j        |j        k     S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gz?g{Gz?)
r/   r   
ValueErrorabschaos	coherencelenr   r   multi_byte_usage)r(   r,   chaos_differencecoherence_differences       r)   __lt__zCharsetMatch.__lt__-   s     %.. 	"%dj5;&>"?"?&)$.5?*J&K&K d""';d'B'B>EO33$$)=)E)E 4=!!%555zEK//(5+AAAzEK''r+   c                 j    dt          t          |                     t          | j                  z  z
  S )Ng      ?)r=   r2   rawr(   s    r)   r>   zCharsetMatch.multi_byte_usageC   s&    c#d))nns48}}455r+   c                 ^    | j          t          | j        | j        d          | _         | j         S )Nstrict)r'   r2   r   r   rD   s    r)   __str__zCharsetMatch.__str__G   s)    <t}dnhGGDL|r+   c                 B    d                     | j        | j                  S )Nz<CharsetMatch '{}' bytes({})>)r1   r4   r5   rD   s    r)   __repr__zCharsetMatch.__repr__M   s    .55dmTEUVVVr+   c                     t          |t                    r|| k    r't          d                    |j                            d |_        | j                            |           d S )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r/   r   r9   r1   r3   r'   r#   appendr6   s     r)   add_submatchzCharsetMatch.add_submatchP   sk    %.. 	%4--MTTO    E"""""r+   c                     | j         S N)r   rD   s    r)   r4   zCharsetMatch.encoding[   s
    ~r+   c                     g }t          j                    D ]F\  }}| j        |k    r|                    |           &| j        |k    r|                    |           G|S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr4   rK   )r(   also_known_asups       r)   encoding_aliaseszCharsetMatch.encoding_aliases_   sn    
 $&MOO 	( 	(DAq}!!$$Q''''!##$$Q'''r+   c                     | j         S rN   r!   rD   s    r)   bomzCharsetMatch.boml       ##r+   c                     | j         S rN   rV   rD   s    r)   byte_order_markzCharsetMatch.byte_order_markp   rX   r+   c                 $    d | j         D             S )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        c                     g | ]
}|d          S )r    ).0es     r)   
<listcomp>z*CharsetMatch.languages.<locals>.<listcomp>z   s    ...!...r+   r    rD   s    r)   r   zCharsetMatch.languagest   s     /.do....r+   c                    | j         shd| j        v rdS ddlm}m} t          | j                  r || j                  n || j                  }t          |          dk    sd|v rdS |d         S | j         d         d         S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r    could_be_from_charsetcharset_normalizer.cdre   rf   r   r4   r=   )r(   re   rf   r   s       r)   languagezCharsetMatch.language|   s      	  $444 y XWWWWWWW *$-887%%dm444''66  9~~""my&@&@ yQ<q!!$$r+   c                     | j         S rN   )r   rD   s    r)   r;   zCharsetMatch.chaos   s    $$r+   c                 :    | j         sdS | j         d         d         S )Nr   r   r   ra   rD   s    r)   r<   zCharsetMatch.coherence   s#     	3q!!$$r+   c                 4    t          | j        dz  d          S Nd      )ndigits)roundr;   rD   s    r)   percent_chaoszCharsetMatch.percent_chaos   s    TZ#%q1111r+   c                 4    t          | j        dz  d          S rn   )rr   r<   rD   s    r)   percent_coherencezCharsetMatch.percent_coherence   s    T^c)15555r+   c                     | j         S )z+
        Original untouched bytes.
        )r   rD   s    r)   rC   zCharsetMatch.raw   s    
 }r+   c                     | j         S rN   )r#   rD   s    r)   submatchzCharsetMatch.submatch   s
    |r+   c                 2    t          | j                  dk    S Nr   )r=   r#   rD   s    r)   has_submatchzCharsetMatch.has_submatch   s    4<  1$$r+   c                     | j         | j         S d t          |           D             }t          t          d |D                                 | _         | j         S )Nc                 ,    g | ]}t          |          S r]   )r   )r^   chars     r)   r`   z*CharsetMatch.alphabets.<locals>.<listcomp>   s-     0
 0
 0
$(M$0
 0
 0
r+   c                     h | ]}||S r]   r]   )r^   rs     r)   	<setcomp>z)CharsetMatch.alphabets.<locals>.<setcomp>   s    +L+L+L!!+LA+L+L+Lr+   )r"   r2   sortedlist)r(   detected_rangess     r)   	alphabetszCharsetMatch.alphabets   sj    +''0
 0
,/II0
 0
 0
  &d+L+L+L+L+L&M&MNN##r+   c                 6    | j         gd | j        D             z   S )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        c                     g | ]	}|j         
S r]   )r4   )r^   ms     r)   r`   z6CharsetMatch.could_be_from_charset.<locals>.<listcomp>   s    "D"D"D!1:"D"D"Dr+   )r   r#   rD   s    r)   rh   z"CharsetMatch.could_be_from_charset   s%     "D"Dt|"D"D"DDDr+   utf_8r4   c                     | j         | j         |k    r/|| _         t          |                               |d          | _        | j        S )z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        Nreplace)r&   r2   encoder%   )r(   r4   s     r)   outputzCharsetMatch.output   sI    
  (D,AX,M,M$,D!#&t99#3#3Hi#H#HD ##r+   c                 h    t          |                                                                           S )zw
        Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
        )r   r   	hexdigestrD   s    r)   r5   zCharsetMatch.fingerprint   s&    
 dkkmm$$..000r+   rN   )r,   r   r-   N)r   )#__name__
__module____qualname__bytesr2   floatboolr
   r*   objectr7   rA   propertyr>   rG   rI   rL   r4   r	   rT   rW   rZ   r   rj   r;   r<   rs   ru   rC   rx   r{   r   rh   r   r5   r]   r+   r)   r   r   
   s        *.6 66 6 	6
 6 &6 "#6 6 6 62YF Yt Y Y Y Y(F (t ( ( ( (, 6% 6 6 6 X6    W# W W W W	# 	# 	# 	# #    X 
$s) 
 
 
 X
 $T $ $ $ X$ $ $ $ $ X$ /49 / / / X/ %# % % % X%6 %u % % % X% %5 % % % X%
 2u 2 2 2 X2 65 6 6 6 X6 U    X $~.    X %d % % % X% 	$49 	$ 	$ 	$ X	$ EtCy E E E XE	$ 	$s 	$ 	$ 	$ 	$ 	$ 1S 1 1 1 X1 1 1r+   r   c                       e Zd ZdZddeee                  fdZdee         fdZ	de
eef         defdZdefd	Zdefd
ZdeddfdZded         fdZded         fdZdS )CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nresultsc                 6    |rt          |          ng | _        d S rN   )r   _results)r(   r   s     r)   r*   zCharsetMatches.__init__   s    ?F,NF7OOOBr+   r-   c              #   $   K   | j         E d {V  d S rN   r   rD   s    r)   __iter__zCharsetMatches.__iter__   s&      =         r+   itemc                     t          |t                    r| j        |         S t          |t                    r't	          |d          }| j        D ]}||j        v r|c S t          )z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r/   intr   r2   r   rh   KeyError)r(   r   results      r)   __getitem__zCharsetMatches.__getitem__   sv    
 dC   	'=&&dC   	"T5))D- " "6777!MMM 8r+   c                 *    t          | j                  S rN   r=   r   rD   s    r)   __len__zCharsetMatches.__len__   s    4=!!!r+   c                 2    t          | j                  dk    S rz   r   rD   s    r)   __bool__zCharsetMatches.__bool__   s    4=!!A%%r+   c                    t          |t                    s4t          d                    t	          |j                                      t          |j                  t          k    rB| j	        D ]:}|j
        |j
        k    r(|j        |j        k    r|                    |            dS ;| j	                            |           t          | j	                  | _	        dS )z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r/   r   r9   r1   r2   r3   r=   rC   r   r   r5   r;   rL   rK   r   )r(   r   matchs      r)   rK   zCharsetMatches.append  s    
 $-- 	?FF''    tx==,,,  $(888U[DJ=V=V&&t,,,FFT"""t}--r+   r   c                 .    | j         sdS | j         d         S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r   rD   s    r)   bestzCharsetMatches.best  s      } 	4}Qr+   c                 *    |                                  S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   rD   s    r)   firstzCharsetMatches.first  s     yy{{r+   rN   )r   r   r   __doc__r
   r	   r   r*   r   r   r   r   r2   r   r   r   r   rK   r   r   r]   r+   r)   r   r      s0        
O Ol); < O O O O!(<0 ! ! ! !c3h L    " " " " "&$ & & & &.< .D . . . .( h~.        x/      r+   r   c                       e Zd Zdedee         dee         dee         dedee         deded	ed
ee         defdZe	de
eef         fd            ZdefdZdS )CliDetectionResultpathr4   rT   alternative_encodingsrj   r   r   r;   r<   unicode_pathis_preferredc                     || _         |
| _        || _        || _        || _        || _        || _        || _        || _        |	| _	        || _
        d S rN   )r   r   r4   rT   r   rj   r   r   r;   r<   r   )r(   r   r4   rT   r   rj   r   r   r;   r<   r   r   s               r)   r*   zCliDetectionResult.__init__)  s\     	+7'/+;0E"%$-$2!
 )".r+   r-   c                     | j         | j        | j        | j        | j        | j        | j        | j        | j        | j	        | j
        dS )Nr   r4   rT   r   rj   r   r   r;   r<   r   r   r   rD   s    r)   __dict__zCliDetectionResult.__dict__C  sO     I $ 5%)%?"1Z - -
 
 	
r+   c                 0    t          | j        dd          S )NT   )ensure_asciiindent)r   r   rD   s    r)   to_jsonzCliDetectionResult.to_jsonS  s    T]a@@@@r+   N)r   r   r   r2   r
   r	   r   r   r*   r   r   r   r   r   r]   r+   r)   r   r   (  s        // 3-/ s)	/
  $Cy/ / 9/ / / / sm/ / / / /4 
$sCx. 
 
 
 X
A A A A A A Ar+   r   N)encodings.aliasesr   hashlibr   jsonr   typingr   r   r   r	   r
   r   r   constantr   utilsr   r   r   r   r   r2   r   CoherenceMatchr   r   r]   r+   r)   <module>r      sZ   % % % % % %             D D D D D D D D D D D D D D D D D D & & & & & & C C C C C C C C C CT1 T1 T1 T1 T1 T1 T1 T1n@ @ @ @ @ @ @ @F sEz"' ,A ,A ,A ,A ,A ,A ,A ,A ,A ,Ar+   