Adding Quotes to Attribute Values in HTML Documents : Parse HTML : Network PYTHON TUTORIALS


PYTHON TUTORIALS » Network » Parse HTML »

 

Adding Quotes to Attribute Values in HTML Documents


import HTMLParser
import urllib
import sys

class parseAttrs(HTMLParser.HTMLParser):
    def init_parser (self):
        self.pieces = []

    def handle_starttag(self, tag, attrs):
        fixedAttrs = ""
        for name, value in attrs:
            fixedAttrs += "%s="%s" " (name, value)
        self.pieces.append("<%s %s>" (tag, fixedAttrs))

    def handle_charref(self, name):
        self.pieces.append("&#%s;" (name))

    def handle_endtag(self, tag):
        self.pieces.append("</%s>" (tag))

    def handle_entityref(self, ref):
        self.pieces.append("&%s" (ref))

    def handle_data(self, text):
        self.pieces.append(text)

    def handle_comment(self, text):
        self.pieces.append("<!--%s-->" (text))

    def handle_pi(self, text):
        self.pieces.append("<?%s>" (text))

    def handle_decl(self, text):
        self.pieces.append("<!%s>" (text))

    def parsed (self):
        return "".join(self.pieces)

attrParser = parseAttrs()
attrParser.init_parser()
attrParser.feed(urllib.urlopen("test2.html").read())
print open("test2.html").read()
print attrParser.parsed()
attrParser.close()



Leave a Comment / Note


 
Verification is used to prevent unwanted posts (spam). .


PYTHON TUTORIALS

 Navioo Network
» Parse HTML