= Python Html Parser =
'''`html.parser`''' is a module for parsing HTML.
<>
----
== Usage ==
{{{
from html.parser import HTMLParser
def clean_attrs(attrs):
"""Clean raw attributes into a dictionary.
Attributes are passed to the parser as a tuple of pairs.
Styles are encoded as a string.
"""
clean = {}
for pair in attrs:
key, value = pair
if key == "style":
for style_pair in value.split(";"):
if len(style_pair.strip()) == 0:
continue
style_key, style_value = style_pair.split(":")
clean[key][style_key.strip()] = style_value.strip()
else:
clean[key] = value
return clean
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.in_div = False
def handle_starttag(self, tag, _attrs):
attrs = clean_attrs(_attrs)
if tag == "div":
self.in_div = True
def handle_endtag(self, tag):
if tag == "div":
self.in_div = False
def handle_data(self, data):
if self.in_div:
print(data)
def parse(string):
parser = MyHTMLParser()
try:
parser.feed(string)
finally:
parser.close()
}}}
----
== See also ==
[[https://docs.python.org/3/library/html.parser.html|Python html.parser module documentation]]
----
CategoryRicottone