Head line handler : ContentHandler : XML PYTHON TUTORIALS


PYTHON TUTORIALS » XML » ContentHandler »

 

Head line handler


from xml.sax.handler import ContentHandler
from xml.sax import parse

class HeadlineHandler(ContentHandler):

    in_headline = 0

    def __init__(self, headlines):
        ContentHandler.__init__(self)
        self.headlines = headlines
        self.data = []

    def startElement(self, name, attrs):
        if name == 'h1':
            self.in_headline = 1

    def endElement(self, name):
        if name == 'h1':
            text = ''.join(self.data)
            self.data = []
            self.headlines.append(text)
            self.in_headline = 0

    def characters(self, string):
        if self.in_headline:
            self.data.append(string)

headlines = []
parse('website.xml', HeadlineHandler(headlines))

print 'The following <h1> elements were found:'
for h in headlines:
    print h



Leave a Comment / Note


 
Verification is used to prevent unwanted posts (spam). .


PYTHON TUTORIALS

 Navioo XML
» ContentHandler