import csv import re import time import string import sys import urllib import urllib2 import pymarc PUNC_RE = re.compile('[%s]' % string.punctuation) VIAF_URL = 'http://orlabs.oclc.org/viaf/' QUERY_BASE = VIAF_URL + 'search/VIAF?query=local.personalName+all+' QUERY_PARAMS = '+&version=1.1&maximumRecords=100&operation=searchRetrieve&stylesheet=/viaf/xsl/Results.xsl&sortKeys=holdingscount&recordSchema=BriefMarcXML' def normalize(instr): return urllib.quote_plus(re.sub(PUNC_RE, ' ', instr).strip()) def grab_response(name): """docstring for grab_response""" url = '%s%%22%s%%22%s' % (QUERY_BASE, normalize(name), QUERY_PARAMS) #print url response = urllib.urlopen(url) return response def do_it(r): r_ = u'' r_ = unicode(r.__str__()) print r_.encode("utf-8", 'ignore') def is_not_none(n): return n is not None def grab_auth(identifier): """grab viaf authority record""" url = VIAF_URL + identifier #print url auth = urllib.urlopen(url) return auth #_ = input('String to grab:') reader = csv.DictReader(file(sys.argv[1], 'rU')) writer = csv.writer(file(sys.argv[2], 'wt')) writer.writerow(('InName', 'LOC', 'ParsedName', 'ParsedId')) for row in reader: xh = pymarc.XmlHandler() inname = row['Uniform_Author_Name'] lc = row['LOC'] pymarc.parse_xml(grab_response(inname), xh) xh.records = filter(is_not_none, xh.records) for rec in xh.records: #print rec['001'].data try: identifier = rec['001'].data if identifier.startswith('LC'): pn = rec['100'].format_field() writer.writerow((inname, lc, pn, identifier)) print "%s, %s, %s, %s" % (inname, lc, pn, identifier) break else: pass except: identifier = 'Fail' pn = 'Fail' writer.writerow((inname, lc, pn, identifier)) print "%s, %s, %s, %s" % (inname, lc, pn, identifier) time.sleep(1) writer.close() #map(do_it, xh.records)