|
| | __init__ (self, Optional[Iterable[Any]] parser_args=None, Optional[Dict[str, Any]] parser_kwargs=None, **Any kwargs) |
| |
| Iterable[Tuple[str, Optional[_Encoding], Optional[_Encoding], bool]] | prepare_markup (self, _RawMarkup markup, Optional[_Encoding] user_specified_encoding=None, Optional[_Encoding] document_declared_encoding=None, Optional[_Encodings] exclude_encodings=None) |
| |
| None | feed (self, _RawMarkup markup, type[BeautifulSoupHTMLParser] _parser_class=BeautifulSoupHTMLParser) |
| |
| bool | set_up_substitutions (self, Tag tag) |
| |
| None | initialize_soup (self, BeautifulSoup soup) |
| |
| None | reset (self) |
| |
| bool | can_be_empty_element (self, str tag_name) |
| |
| str | test_fragment_to_document (self, str fragment) |
| |
|
|
bool | is_xml = False |
| |
|
bool | picklable = True |
| |
|
str | NAME = HTMLPARSER |
| |
|
list | features = [NAME, HTML, STRICT] |
| |
|
Tuple | parser_args [Iterable[Any], Dict[str, Any]] |
| |
|
bool | TRACKS_LINE_NUMBERS = True |
| |
| Optional | DEFAULT_EMPTY_ELEMENT_TAGS |
| |
|
Set | DEFAULT_BLOCK_ELEMENTS |
| |
| dict | DEFAULT_STRING_CONTAINERS |
| |
| dict | DEFAULT_CDATA_LIST_ATTRIBUTES |
| |
|
set | DEFAULT_PRESERVE_WHITESPACE_TAGS = set(["pre", "textarea"]) |
| |
|
Any | USE_DEFAULT = object() |
| |
|
str | NAME = "[Unknown tree builder]" |
| |
|
list | ALTERNATE_NAMES = [] |
| |
|
list | features = [] |
| |
|
bool | is_xml = False |
| |
|
bool | picklable = False |
| |
|
Optional | soup [BeautifulSoup] |
| |
|
Optional | empty_element_tags = None |
| |
|
Dict | cdata_list_attributes [str, Set[str]] |
| |
|
Set | preserve_whitespace_tags [str] |
| |
|
Dict | string_containers [str, Type[NavigableString]] |
| |
|
bool | tracks_line_numbers |
| |
|
Dict | DEFAULT_CDATA_LIST_ATTRIBUTES = defaultdict(set) |
| |
|
Set | DEFAULT_PRESERVE_WHITESPACE_TAGS = set() |
| |
|
dict | DEFAULT_STRING_CONTAINERS = {} |
| |
|
Optional | DEFAULT_EMPTY_ELEMENT_TAGS = None |
| |
|
bool | TRACKS_LINE_NUMBERS = False |
| |
A Beautiful soup `bs4.builder.TreeBuilder` that uses the
:py:class:`html.parser.HTMLParser` parser, found in the Python
standard library.
| Iterable[Tuple[str, Optional[_Encoding], Optional[_Encoding], bool]] bs4.builder._htmlparser.HTMLParserTreeBuilder.prepare_markup |
( |
|
self, |
|
|
_RawMarkup |
markup, |
|
|
Optional[_Encoding] |
user_specified_encoding = None, |
|
|
Optional[_Encoding] |
document_declared_encoding = None, |
|
|
Optional[_Encodings] |
exclude_encodings = None |
|
) |
| |
Run any preliminary steps necessary to make incoming markup
acceptable to the parser.
:param markup: Some markup -- probably a bytestring.
:param user_specified_encoding: The user asked to try this encoding.
:param document_declared_encoding: The markup itself claims to be
in this encoding.
:param exclude_encodings: The user asked _not_ to try any of
these encodings.
:yield: A series of 4-tuples: (markup, encoding, declared encoding,
has undergone character replacement)
Each 4-tuple represents a strategy for parsing the document.
This TreeBuilder uses Unicode, Dammit to convert the markup
into Unicode, so the ``markup`` element of the tuple will
always be a string.
Reimplemented from bs4.builder.TreeBuilder.