|
|
| __init__ (self, bytes payload, str guessed_encoding, float mean_mess_ratio, bool has_sig_or_bom, CoherenceMatches languages, str|None decoded_payload=None, str|None preemptive_declaration=None) |
| |
|
bool | __eq__ (self, object other) |
| |
| bool | __lt__ (self, object other) |
| |
|
float | multi_byte_usage (self) |
| |
|
str | __str__ (self) |
| |
|
str | __repr__ (self) |
| |
|
None | add_submatch (self, CharsetMatch other) |
| |
|
str | encoding (self) |
| |
| list[str] | encoding_aliases (self) |
| |
|
bool | bom (self) |
| |
|
bool | byte_order_mark (self) |
| |
| list[str] | languages (self) |
| |
| str | language (self) |
| |
|
float | chaos (self) |
| |
|
float | coherence (self) |
| |
|
float | percent_chaos (self) |
| |
|
float | percent_coherence (self) |
| |
| bytes | raw (self) |
| |
|
list[CharsetMatch] | submatch (self) |
| |
|
bool | has_submatch (self) |
| |
|
list[str] | alphabets (self) |
| |
| list[str] | could_be_from_charset (self) |
| |
| bytes | output (self, str encoding="utf_8") |
| |
| str | fingerprint (self) |
| |
|
|
| _payload |
| |
|
| _string |
| |
|
| _leaves |
| |
|
| _unicode_ranges |
| |
|
| _output_encoding |
| |
|
| _output_payload |
| |
◆ __lt__()
| bool charset_normalizer.models.CharsetMatch.__lt__ |
( |
|
self, |
|
|
object |
other |
|
) |
| |
Implemented to make sorted available upon CharsetMatches items.
◆ could_be_from_charset()
| list[str] charset_normalizer.models.CharsetMatch.could_be_from_charset |
( |
|
self | ) |
|
The complete list of encoding that output the exact SAME str result and therefore could be the originating
encoding.
This list does include the encoding available in property 'encoding'.
◆ encoding_aliases()
| list[str] charset_normalizer.models.CharsetMatch.encoding_aliases |
( |
|
self | ) |
|
Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
◆ fingerprint()
| str charset_normalizer.models.CharsetMatch.fingerprint |
( |
|
self | ) |
|
Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
◆ language()
| str charset_normalizer.models.CharsetMatch.language |
( |
|
self | ) |
|
Most probable language found in decoded sequence. If none were detected or inferred, the property will return
"Unknown".
◆ languages()
| list[str] charset_normalizer.models.CharsetMatch.languages |
( |
|
self | ) |
|
Return the complete list of possible languages found in decoded sequence.
Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
◆ output()
| bytes charset_normalizer.models.CharsetMatch.output |
( |
|
self, |
|
|
str |
encoding = "utf_8" |
|
) |
| |
Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
Any errors will be simply ignored by the encoder NOT replaced.
◆ raw()
| bytes charset_normalizer.models.CharsetMatch.raw |
( |
|
self | ) |
|
Original untouched bytes.
The documentation for this class was generated from the following file:
- docs/help/help-venv/lib/python3.12/site-packages/charset_normalizer/models.py