""" parsers.py: contains Parser class for zgw.py """ from web.net import htmlquote from pymarc import MARCReader, marc8_to_unicode from settings import IGNORE_UNICODE_ERRORS class ParseError(Exception): """Base class for exceptions from this module""" pass class Parser: """Base class for parsers used by zgw""" def to_html(self, result): """ Takes a result, converts it to Unicode (assumes result is MARC8), escapes characters for HTML, and adds linebreak tags where newlines occur. """ result = result.replace('\n', '
\n') sanitized = htmlquote(result) return sanitized.replace('<br/>', '
\n') def to_unicode(self): """Converts MARC8 encoded data to Unicode.""" result = self.__str__() result_html = Parser().to_html(result) try: result_out = marc8_to_unicode(result_html) except: if IGNORE_UNICODE_ERRORS == True: result_out = "NOTE: MARC8 to Unicode conversion failed on this \ record.
\n%s" % result else: raise return result_out