I am just getting started with Python/Jython and the SAX parser (xml.sax). I wrote a simple content handler as a test.
from __future__ import with_statement
from xml.sax import make_parser, handler
from xml.sax.handler import ContentHandler
class CountingHandler(ContentHandler):
def __init__(self):
self.counter = 0
def startElement(self, name, attrs):
self.counter += 1
def main(argv=sys.argv):
parser = make_parser()
h = CountingHandler()
parser.setContentHandler(h)
with open(argv[1], "r") as input:
parser.parse(input)
When I run this on some documents (not all), I get an error:
Traceback (most recent call last):
File "src/sciencenetworks/xmltools.py", line 93, in <module>
sys.exit(main())
File "src/sciencenetworks/xmltools.py", line 88, in main
parser.parse(input)
File "/amd.home/home/staudt/workspace/jython/Lib/xml/sax/drivers2/drv_javasax.py", line 141, in parse
self._parser.parse(JyInputSourceWrapper(source))
File "/amd.home/home/staudt/workspace/jython/Lib/xml/sax/drivers2/drv_javasax.py", line 90, in resolveEntity
return JyInputSourceWrapper(self._resolver.resolveEntity(pubId, sysId))
File "/amd.home/home/staudt/workspace/jython/Lib/xml/sax/drivers2/drv_javasax.py", line 75, in __init__
if source.getByteStream():
AttributeError: 'unicode' object has no attribute 'getByteStream'
When I look into the source code of drv_javasax.py, it seems like input is not recognized as a file like object, which it is.
Any ideas on how to fix this?