diff options
-rw-r--r-- | src/fetch/scripts/dark_horse_comics.py | 60 | ||||
-rw-r--r-- | src/fetch/scripts/fr.allocine.py | 60 | ||||
-rw-r--r-- | src/fetch/scripts/ministerio_de_cultura.py | 92 | ||||
-rwxr-xr-x | src/translators/griffith2tellico.py | 52 |
4 files changed, 132 insertions, 132 deletions
diff --git a/src/fetch/scripts/dark_horse_comics.py b/src/fetch/scripts/dark_horse_comics.py index 4f3b651..22b3175 100644 --- a/src/fetch/scripts/dark_horse_comics.py +++ b/src/fetch/scripts/dark_horse_comics.py @@ -33,7 +33,7 @@ Update (checked) = %{title} """ import sys, os, re, md5, random, string -import urllib, urllib2, time, base64 +import urllib.request, urllib.parse, urllib.error, time, base64 import xml.dom.minidom XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>""" @@ -45,7 +45,7 @@ VERSION = "0.2" def genMD5(): """ - Generates and returns a random md5 string. Its main purpose is to allow random + Generates and returns a random md5 string. Its main purpose is to allow random image file name generation. """ obj = md5.new() @@ -62,7 +62,7 @@ class BasicTellicoDOM: self.__root = self.__doc.createElement('tellico') self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') self.__root.setAttribute('syntaxVersion', '9') - + self.__collection = self.__doc.createElement('collection') self.__collection.setAttribute('title', 'My Comics') self.__collection.setAttribute('type', '6') @@ -78,7 +78,7 @@ class BasicTellicoDOM: def addEntry(self, movieData): """ - Add a comic entry. + Add a comic entry. Returns an entry node instance """ d = movieData @@ -86,7 +86,7 @@ class BasicTellicoDOM: entryNode.setAttribute('id', str(self.__currentId)) titleNode = self.__doc.createElement('title') - titleNode.appendChild(self.__doc.createTextNode(unicode(d['title'], 'latin-1').encode('utf-8'))) + titleNode.appendChild(self.__doc.createTextNode(str(d['title'], 'latin-1').encode('utf-8'))) yearNode = self.__doc.createElement('pub_year') yearNode.appendChild(self.__doc.createTextNode(d['pub_year'])) @@ -101,25 +101,25 @@ class BasicTellicoDOM: writersNode = self.__doc.createElement('writers') for g in d['writer']: writerNode = self.__doc.createElement('writer') - writerNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + writerNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8'))) writersNode.appendChild(writerNode) genresNode = self.__doc.createElement('genres') for g in d['genre']: genreNode = self.__doc.createElement('genre') - genreNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + genreNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8'))) genresNode.appendChild(genreNode) commentsNode = self.__doc.createElement('comments') #for g in d['comments']: # commentsNode.appendChild(self.__doc.createTextNode(unicode("%s\n\n" % g, 'latin-1').encode('utf-8'))) commentsData = string.join(d['comments'], '\n\n') - commentsNode.appendChild(self.__doc.createTextNode(unicode(commentsData, 'latin-1').encode('utf-8'))) + commentsNode.appendChild(self.__doc.createTextNode(str(commentsData, 'latin-1').encode('utf-8'))) artistsNode = self.__doc.createElement('artists') - for k, v in d['artist'].iteritems(): + for k, v in d['artist'].items(): artistNode = self.__doc.createElement('artist') - artistNode.appendChild(self.__doc.createTextNode(unicode(v, 'latin-1').encode('utf-8'))) + artistNode.appendChild(self.__doc.createTextNode(str(v, 'latin-1').encode('utf-8'))) artistsNode.appendChild(artistNode) pagesNode = self.__doc.createElement('pages') @@ -132,13 +132,13 @@ class BasicTellicoDOM: imageNode = self.__doc.createElement('image') imageNode.setAttribute('format', 'JPEG') imageNode.setAttribute('id', d['image'][0]) - imageNode.appendChild(self.__doc.createTextNode(unicode(d['image'][1], 'latin-1').encode('utf-8'))) + imageNode.appendChild(self.__doc.createTextNode(str(d['image'][1], 'latin-1').encode('utf-8'))) coverNode = self.__doc.createElement('cover') coverNode.appendChild(self.__doc.createTextNode(d['image'][0])) - for name in ( 'writersNode', 'genresNode', 'artistsNode', 'pagesNode', 'yearNode', - 'titleNode', 'issueNode', 'commentsNode', 'pubNode', 'langNode', + for name in ( 'writersNode', 'genresNode', 'artistsNode', 'pagesNode', 'yearNode', + 'titleNode', 'issueNode', 'commentsNode', 'pubNode', 'langNode', 'countryNode' ): entryNode.appendChild(eval(name)) @@ -147,7 +147,7 @@ class BasicTellicoDOM: self.__images.appendChild(imageNode) self.__collection.appendChild(entryNode) - + self.__currentId += 1 return entryNode @@ -156,17 +156,17 @@ class BasicTellicoDOM: Prints entry's XML content to stdout """ try: - print nEntry.toxml() + print(nEntry.toxml()) except: - print sys.stderr, "Error while outputing XML content from entry to Tellico" + print(sys.stderr, "Error while outputing XML content from entry to Tellico") def printXMLTree(self): """ Outputs XML content to stdout """ self.__collection.appendChild(self.__images) - print XML_HEADER; print DOCTYPE - print self.__root.toxml() + print(XML_HEADER); print(DOCTYPE) + print(self.__root.toxml()) class DarkHorseParser: @@ -189,11 +189,11 @@ class DarkHorseParser: 'colorist' : '<b>Colorist: *</b> *<a.*>(?P<colorist>.*)</a>', 'genre' : '<b>Genre: *</b> *<a.*?>(?P<genre>.*?)</a><br>', 'format' : '<b>Format: *</b> *(?P<format>.*?)<br>', - } + } # Compile patterns objects self.__regExpsPO = {} - for k, pattern in self.__regExps.iteritems(): + for k, pattern in self.__regExps.items(): self.__regExpsPO[k] = re.compile(pattern) self.__domTree = BasicTellicoDOM() @@ -211,18 +211,18 @@ class DarkHorseParser: """ Fetch HTML data from url """ - u = urllib2.urlopen(url) + u = urllib.request.urlopen(url) self.__data = u.read() u.close() def __fetchMovieLinks(self): """ - Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent() + Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent() that need to be parsed. """ matchList = re.findall("""<a *href="%s(?P<page>.*?)">(?P<title>.*?)</a>""" % self.__basePath.replace('?', '\?'), self.__data) if not matchList: return None - + return matchList def __fetchCover(self, path, delete = True): @@ -231,7 +231,7 @@ class DarkHorseParser: The image is deleted if delete is True """ md5 = genMD5() - imObj = urllib2.urlopen(path.strip()) + imObj = urllib.request.urlopen(path.strip()) img = imObj.read() imObj.close() imgPath = "/tmp/%s.jpeg" % md5 @@ -240,7 +240,7 @@ class DarkHorseParser: f.write(img) f.close() except: - print sys.stderr, "Error: could not write image into /tmp" + print(sys.stderr, "Error: could not write image into /tmp") b64data = (md5 + '.jpeg', base64.encodestring(img)) @@ -249,7 +249,7 @@ class DarkHorseParser: try: os.remove(imgPath) except: - print sys.stderr, "Error: could not delete temporary image /tmp/%s.jpeg" % md5 + print(sys.stderr, "Error: could not delete temporary image /tmp/%s.jpeg" % md5) return b64data @@ -286,7 +286,7 @@ class DarkHorseParser: data['image'] = b64img data['pub_year'] = NULLSTRING - for name, po in self.__regExpsPO.iteritems(): + for name, po in self.__regExpsPO.items(): data[name] = NULLSTRING if name == 'desc': matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I) @@ -363,7 +363,7 @@ class DarkHorseParser: if not len(title): return self.__title = title - self.__getHTMLContent("%s%s" % (self.__baseURL, self.__searchURL % urllib.quote(self.__title))) + self.__getHTMLContent("%s%s" % (self.__baseURL, self.__searchURL % urllib.parse.quote(self.__title))) # Get all links links = self.__fetchMovieLinks() @@ -381,11 +381,11 @@ class DarkHorseParser: return None def halt(): - print "HALT." + print("HALT.") sys.exit(0) def showUsage(): - print "Usage: %s comic" % sys.argv[0] + print("Usage: %s comic" % sys.argv[0]) sys.exit(1) def main(): diff --git a/src/fetch/scripts/fr.allocine.py b/src/fetch/scripts/fr.allocine.py index 97a2247..83e41d0 100644 --- a/src/fetch/scripts/fr.allocine.py +++ b/src/fetch/scripts/fr.allocine.py @@ -15,20 +15,20 @@ # *************************************************************************** # Version 0.4: 2007-08-27 -# * Fixed parsing errors: some fields in allocine's HTML pages have changed recently. Multiple actors and genres +# * Fixed parsing errors: some fields in allocine's HTML pages have changed recently. Multiple actors and genres # could not be retrieved. Fixed bad http request error due to some changes in HTML code. # # Version 0.3: # * Fixed parsing: some fields in allocine's HTML pages have changed. Movie's image could not be fetched anymore. Fixed. -# +# # Version 0.2: # * Fixed parsing: allocine's HTML pages have changed. Movie's image could not be fetched anymore. -# +# # Version 0.1: # * Initial release. import sys, os, re, md5, random -import urllib, urllib2, time, base64 +import urllib.request, urllib.parse, urllib.error, time, base64 import xml.dom.minidom XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>""" @@ -48,16 +48,16 @@ class BasicTellicoDOM: self.__root = self.__doc.createElement('tellico') self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') self.__root.setAttribute('syntaxVersion', '9') - + self.__collection = self.__doc.createElement('collection') self.__collection.setAttribute('title', 'My Movies') self.__collection.setAttribute('type', '3') - + self.__fields = self.__doc.createElement('fields') # Add all default (standard) fields self.__dfltField = self.__doc.createElement('field') self.__dfltField.setAttribute('name', '_default') - + # Add a custom 'Collection' field self.__customField = self.__doc.createElement('field') self.__customField.setAttribute('name', 'titre-original') @@ -67,7 +67,7 @@ class BasicTellicoDOM: self.__customField.setAttribute('format', '1') self.__customField.setAttribute('type', '1') self.__customField.setAttribute('i18n', 'yes') - + self.__fields.appendChild(self.__dfltField) self.__fields.appendChild(self.__customField) self.__collection.appendChild(self.__fields) @@ -90,23 +90,23 @@ class BasicTellicoDOM: entryNode.setAttribute('id', str(self.__currentId)) titleNode = self.__doc.createElement('title') - titleNode.appendChild(self.__doc.createTextNode(unicode(d['title'], 'latin-1').encode('utf-8'))) + titleNode.appendChild(self.__doc.createTextNode(str(d['title'], 'latin-1').encode('utf-8'))) otitleNode = self.__doc.createElement('titre-original') - otitleNode.appendChild(self.__doc.createTextNode(unicode(d['otitle'], 'latin-1').encode('utf-8'))) + otitleNode.appendChild(self.__doc.createTextNode(str(d['otitle'], 'latin-1').encode('utf-8'))) yearNode = self.__doc.createElement('year') - yearNode.appendChild(self.__doc.createTextNode(unicode(d['year'], 'latin-1').encode('utf-8'))) + yearNode.appendChild(self.__doc.createTextNode(str(d['year'], 'latin-1').encode('utf-8'))) genresNode = self.__doc.createElement('genres') for g in d['genres']: genreNode = self.__doc.createElement('genre') - genreNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + genreNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8'))) genresNode.appendChild(genreNode) natsNode = self.__doc.createElement('nationalitys') natNode = self.__doc.createElement('nat') - natNode.appendChild(self.__doc.createTextNode(unicode(d['nat'], 'latin-1').encode('utf-8'))) + natNode.appendChild(self.__doc.createTextNode(str(d['nat'], 'latin-1').encode('utf-8'))) natsNode.appendChild(natNode) castsNode = self.__doc.createElement('casts') @@ -114,7 +114,7 @@ class BasicTellicoDOM: castNode = self.__doc.createElement('cast') col1Node = self.__doc.createElement('column') col2Node = self.__doc.createElement('column') - col1Node.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + col1Node.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8'))) castNode.appendChild(col1Node) castNode.appendChild(col2Node) castsNode.appendChild(castNode) @@ -122,17 +122,17 @@ class BasicTellicoDOM: dirsNode = self.__doc.createElement('directors') for g in d['dirs']: dirNode = self.__doc.createElement('director') - dirNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + dirNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8'))) dirsNode.appendChild(dirNode) timeNode = self.__doc.createElement('running-time') - timeNode.appendChild(self.__doc.createTextNode(unicode(d['time'], 'latin-1').encode('utf-8'))) + timeNode.appendChild(self.__doc.createTextNode(str(d['time'], 'latin-1').encode('utf-8'))) - allocineNode = self.__doc.createElement(unicode('allociné-link', 'latin-1').encode('utf-8')) - allocineNode.appendChild(self.__doc.createTextNode(unicode(d['allocine'], 'latin-1').encode('utf-8'))) + allocineNode = self.__doc.createElement(str('allociné-link', 'latin-1').encode('utf-8')) + allocineNode.appendChild(self.__doc.createTextNode(str(d['allocine'], 'latin-1').encode('utf-8'))) plotNode = self.__doc.createElement('plot') - plotNode.appendChild(self.__doc.createTextNode(unicode(d['plot'], 'latin-1').encode('utf-8'))) + plotNode.appendChild(self.__doc.createTextNode(str(d['plot'], 'latin-1').encode('utf-8'))) if d['image']: imageNode = self.__doc.createElement('image') @@ -140,12 +140,12 @@ class BasicTellicoDOM: imageNode.setAttribute('id', d['image'][0]) imageNode.setAttribute('width', '120') imageNode.setAttribute('height', '160') - imageNode.appendChild(self.__doc.createTextNode(unicode(d['image'][1], 'latin-1').encode('utf-8'))) + imageNode.appendChild(self.__doc.createTextNode(str(d['image'][1], 'latin-1').encode('utf-8'))) coverNode = self.__doc.createElement('cover') coverNode.appendChild(self.__doc.createTextNode(d['image'][0])) - for name in ( 'titleNode', 'otitleNode', 'yearNode', 'genresNode', 'natsNode', + for name in ( 'titleNode', 'otitleNode', 'yearNode', 'genresNode', 'natsNode', 'castsNode', 'dirsNode', 'timeNode', 'allocineNode', 'plotNode' ): entryNode.appendChild(eval(name)) @@ -154,7 +154,7 @@ class BasicTellicoDOM: self.__images.appendChild(imageNode) self.__collection.appendChild(entryNode) - + self.__currentId += 1 def printXML(self): @@ -162,8 +162,8 @@ class BasicTellicoDOM: Outputs XML content to stdout """ self.__collection.appendChild(self.__images) - print XML_HEADER; print DOCTYPE - print self.__root.toxml() + print(XML_HEADER); print(DOCTYPE) + print(self.__root.toxml()) class AlloCineParser: @@ -185,7 +185,7 @@ class AlloCineParser: 'otitle' : 'Titre original *: *<i>(?P<otitle>.+?)</i>', 'plot' : """(?s)<td valign="top" style="padding:10 0 0 0"><div align="justify"><h4> *(?P<plot>.+?) *</h4>""", 'image' : """<td valign="top" width="120".*?<img src="(?P<image>.+?)" border"""} - + self.__domTree = BasicTellicoDOM() @@ -203,7 +203,7 @@ class AlloCineParser: Fetch HTML data from url """ - u = urllib2.urlopen(url) + u = urllib.request.urlopen(url) self.__data = u.read() u.close() @@ -224,7 +224,7 @@ class AlloCineParser: matches = data = {} - for name, regexp in self.__regExps.iteritems(): + for name, regexp in self.__regExps.items(): if name == 'image': matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I) else: @@ -272,7 +272,7 @@ class AlloCineParser: elif name == 'image': # Save image to a temporary folder md5 = genMD5() - imObj = urllib2.urlopen(matches[name][0].strip()) + imObj = urllib.request.urlopen(matches[name][0].strip()) img = imObj.read() imObj.close() imgPath = "/tmp/%s.jpeg" % md5 @@ -303,7 +303,7 @@ class AlloCineParser: if not len(title): return self.__title = title - self.__getHTMLContent(self.__searchURL % urllib.quote(self.__title)) + self.__getHTMLContent(self.__searchURL % urllib.parse.quote(self.__title)) # Get all links links = self.__fetchMovieLinks() @@ -321,7 +321,7 @@ class AlloCineParser: def showUsage(): - print "Usage: %s movietitle" % sys.argv[0] + print("Usage: %s movietitle" % sys.argv[0]) sys.exit(1) def main(): diff --git a/src/fetch/scripts/ministerio_de_cultura.py b/src/fetch/scripts/ministerio_de_cultura.py index 8a768f9..19ea7be 100644 --- a/src/fetch/scripts/ministerio_de_cultura.py +++ b/src/fetch/scripts/ministerio_de_cultura.py @@ -37,10 +37,10 @@ ISBN (checked) = -i %1 UPC (checked) = -i %1 Update (checked) = %{title} -** Please note that this script is also part of the Tellico's distribution. +** Please note that this script is also part of the Tellico's distribution. ** You will always find the latest version in the SVN trunk of Tellico -SVN Version: +SVN Version: * Removes translators for Authors List * Adds translators to translator field * Change from "Collection" to "Series" @@ -85,7 +85,7 @@ Version 0.1: """ import sys, os, re, md5, random, string -import urllib, urllib2, time, base64 +import urllib.request, urllib.parse, urllib.error, time, base64 import xml.dom.minidom, types import socket @@ -95,7 +95,7 @@ NULLSTRING = '' VERSION = "0.3.2" -ISBN, AUTHOR, TITLE = range(3) +ISBN, AUTHOR, TITLE = list(range(3)) TRANSLATOR_STR = "tr." EDLIT_STR = "ed. lit." @@ -111,16 +111,16 @@ class BasicTellicoDOM: self.__root = self.__doc.createElement('tellico') self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') self.__root.setAttribute('syntaxVersion', '9') - + self.__collection = self.__doc.createElement('collection') self.__collection.setAttribute('title', 'My Books') self.__collection.setAttribute('type', '2') - self.__fields = self.__doc.createElement('fields') + self.__fields = self.__doc.createElement('fields') # Add all default (standard) fields - self.__dfltField = self.__doc.createElement('field') - self.__dfltField.setAttribute('name', '_default') - + self.__dfltField = self.__doc.createElement('field') + self.__dfltField.setAttribute('name', '_default') + # Add a custom 'Collection' field (Left by reference for # the future) #self.__customCollectionField = self.__doc.createElement('field') @@ -146,18 +146,18 @@ class BasicTellicoDOM: def addEntry(self, movieData): """ - Add a comic entry. + Add a comic entry. Returns an entry node instance """ d = movieData # Convert all strings to UTF-8 - for i in d.keys(): - if type(d[i]) == types.ListType: - d[i] = [unicode(d[i][j], 'latin-1').encode('utf-8') for j in range(len(d[i]))] - elif type(d[i]) == types.StringType: - d[i] = unicode(d[i], 'latin-1').encode('utf-8') + for i in list(d.keys()): + if type(d[i]) == list: + d[i] = [str(d[i][j], 'latin-1').encode('utf-8') for j in range(len(d[i]))] + elif type(d[i]) == bytes: + d[i] = str(d[i], 'latin-1').encode('utf-8') entryNode = self.__doc.createElement('entry') entryNode.setAttribute('id', str(self.__currentId)) @@ -213,7 +213,7 @@ class BasicTellicoDOM: translatorNode = self.__doc.createElement('translator') translatorNode.appendChild(self.__doc.createTextNode(d['translator'])) - for name in ( 'title', 'year', 'pub', 'langs', 'keyword', 'ed', 'writers', + for name in ( 'title', 'year', 'pub', 'langs', 'keyword', 'ed', 'writers', 'comments', 'pages', 'isbn', 'price', 'series', 'seriesNum', 'translator' ): entryNode.appendChild(eval(name + 'Node')) @@ -228,17 +228,17 @@ class BasicTellicoDOM: """ try: - print nEntry.toxml() + print(nEntry.toxml()) except: - print sys.stderr, "Error while outputing XML content from entry to Tellico" + print(sys.stderr, "Error while outputing XML content from entry to Tellico") def printXMLTree(self): """ Outputs XML content to stdout """ - print XML_HEADER; print DOCTYPE - print self.__root.toxml() + print(XML_HEADER); print(DOCTYPE) + print(self.__root.toxml()) class MinisterioCulturaParser: @@ -264,11 +264,11 @@ class MinisterioCulturaParser: 'cdu' : '<th scope="row">CDU:.*?<td><span>(?P<cdu>.*?)</span></td>', 'encuadernacion': '<th scope="row">Encuadernación:.*?<td>.*?<span>(?P<encuadernacion>.*?)</span>', 'series' : '<th scope="row">Colección:.*?<td>.*?<span>(?P<series>.*?)</span>' - } + } # Compile patterns objects self.__regExpsPO = {} - for k, pattern in self.__regExps.iteritems(): + for k, pattern in self.__regExps.items(): self.__regExpsPO[k] = re.compile(pattern) self.__domTree = BasicTellicoDOM() @@ -296,10 +296,10 @@ class MinisterioCulturaParser: """ Fetch HTML data from url """ - + try: - u = urllib2.urlopen(url) - except Exception, e: + u = urllib.request.urlopen(url) + except Exception as e: u.close() sys.exit(""" Network error while getting HTML content. @@ -312,7 +312,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage: def __fetchBookLinks(self): """ - Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent() + Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent() that need to be parsed. """ @@ -333,10 +333,10 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage: data['comments'] = [] # Empty string if series not available - data['series_num'] = NULLSTRING + data['series_num'] = NULLSTRING data['translator'] = NULLSTRING - for name, po in self.__regExpsPO.iteritems(): + for name, po in self.__regExpsPO.items(): data[name] = NULLSTRING matches[name] = re.search(self.__regExps[name], self.__data, re.S | re.I) @@ -391,22 +391,22 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage: elif name == 'cdu': data['comments'].append('CDU: ' + matches[name].group('cdu').strip()) - + elif name == 'notas': data['comments'].append(matches[name].group('notas').strip()) - + elif name == 'series': d = matches[name].group('series').strip() d = re.sub(' ', ' ', d) data[name] = d # data[name] can contain something like 'Byblos, 162/24' - # Maybe better to add the reg exp to get seriesNum in self.__regExps + # Maybe better to add the reg exp to get seriesNum in self.__regExps p = re.compile('[0-9]+$') s = re.search(p, data[name]) if s: - # if series ends with a number, it seems that is a + # if series ends with a number, it seems that is a # number of the book inside the series. We save in seriesNum data['series_num'] = s.group() @@ -434,7 +434,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage: # Sometimes, the search engine outputs some image between a elements if d.strip()[:4] != '<img': data[name].append(d.strip()) - + # Move tr authors (translators) to translators list translator = self.__getSpecialRol(data[name], TRANSLATOR_STR) edlit = self.__getSpecialRol(data[name], EDLIT_STR) @@ -470,12 +470,12 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage: def __getBook(self, data, kind = ISBN): - if not len(data): - raise EngineError, "No data given. Unable to proceed." + if not len(data): + raise EngineError("No data given. Unable to proceed.") if kind == ISBN: self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \ - (urllib.quote(data), # ISBN + (urllib.parse.quote(data), # ISBN NULLSTRING, # AUTHOR NULLSTRING), # TITLE self.__suffixURL) @@ -483,7 +483,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage: elif kind == AUTHOR: self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \ (NULLSTRING, # ISBN - urllib.quote(data), # AUTHOR + urllib.parse.quote(data), # AUTHOR NULLSTRING), # TITLE self.__suffixURL) ) @@ -492,7 +492,7 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage: self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \ (NULLSTRING, # ISBN NULLSTRING, # AUTHOR - urllib.quote(data)), # TITLE + urllib.parse.quote(data)), # TITLE self.__suffixURL) ) @@ -519,12 +519,12 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage: if authors[j] == special: special_rol.append(authors[j-1]) j += 1 - + return special_rol def __removeSpecialsFromAuthors(self, authors, specials, string): """ - Receives a list with authors+translators and removes 'tr.' and + Receives a list with authors+translators and removes 'tr.' and authors from there. Example: authors: ['Stephen King','Lorenzo Cortina','tr.','Rosalía Vázquez','tr.'] translators: ['Lorenzo Cortina','Rosalía Vázquez'] @@ -551,16 +551,16 @@ Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage: return prefix + string.join(translators, '; ') def halt(): - print "HALT." + print("HALT.") sys.exit(0) def showUsage(): - print """Usage: %s options + print("""Usage: %s options Where options are: -t title -i (ISBN|UPC) -a author - -m filename (support for multiple ISBN/UPC search)""" % sys.argv[0] + -m filename (support for multiple ISBN/UPC search)""" % sys.argv[0]) sys.exit(1) def main(): @@ -573,7 +573,7 @@ def main(): isbnStringList = NULLSTRING opts = {'-t' : TITLE, '-i' : ISBN, '-a' : AUTHOR, '-m' : isbnStringList} - if sys.argv[1] not in opts.keys(): + if sys.argv[1] not in list(opts.keys()): showUsage() if sys.argv[1] == '-m': @@ -584,8 +584,8 @@ def main(): sys.argv[2] = string.join([d[:-1] for d in data], ';') sys.argv[1] = '-i' f.close() - except IOError, e: - print "Error: %s" % e + except IOError as e: + print("Error: %s" % e) sys.exit(1) parser = MinisterioCulturaParser() diff --git a/src/translators/griffith2tellico.py b/src/translators/griffith2tellico.py index 24bfb41..ccba70a 100755 --- a/src/translators/griffith2tellico.py +++ b/src/translators/griffith2tellico.py @@ -21,7 +21,7 @@ import xml.dom.minidom try: import sqlite3 except: - print sys.stderr, "The Python sqlite3 module is required to import Griffith databases." + print(sys.stderr, "The Python sqlite3 module is required to import Griffith databases.") exit(1) DB_PATH = os.environ['HOME'] + '/.griffith/griffith.db' @@ -36,7 +36,7 @@ class BasicTellicoDOM: self.__root = self.__doc.createElement('tellico') self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') self.__root.setAttribute('syntaxVersion', '9') - + self.__collection = self.__doc.createElement('collection') self.__collection.setAttribute('title', 'Griffith Import') self.__collection.setAttribute('type', '3') @@ -45,7 +45,7 @@ class BasicTellicoDOM: # Add all default (standard) fields self.__dfltField = self.__doc.createElement('field') self.__dfltField.setAttribute('name', '_default') - + # change the rating to have a maximum of 10 self.__ratingField = self.__doc.createElement('field') self.__ratingField.setAttribute('name', 'rating') @@ -63,7 +63,7 @@ class BasicTellicoDOM: propNode.setAttribute('name', 'minimum') propNode.appendChild(self.__doc.createTextNode('1')) self.__ratingField.appendChild(propNode); - + # Add a custom 'Original Title' field self.__titleField = self.__doc.createElement('field') self.__titleField.setAttribute('name', 'orig-title') @@ -73,7 +73,7 @@ class BasicTellicoDOM: self.__titleField.setAttribute('format', '1') self.__titleField.setAttribute('type', '1') self.__titleField.setAttribute('i18n', 'yes') - + self.__keywordField = self.__doc.createElement('field') self.__keywordField.setAttribute('name', 'keyword') self.__keywordField.setAttribute('title', 'Keywords') @@ -98,13 +98,13 @@ class BasicTellicoDOM: self.__fields.appendChild(self.__keywordField) self.__fields.appendChild(self.__urlField) self.__collection.appendChild(self.__fields) - + self.__images = self.__doc.createElement('images') self.__root.appendChild(self.__collection) self.__doc.appendChild(self.__root) self.__fieldsMap = dict(country='nationality', - classification='certification', + classification='certification', runtime='running-time', o_title='orig-title', notes='comments', @@ -121,7 +121,7 @@ class BasicTellicoDOM: # make sure unique set = {} media = [set.setdefault(e,e) for e in orig_media if e not in set] - + mediaField = self.__doc.createElement('field') mediaField.setAttribute('name', 'medium') mediaField.setAttribute('title', 'Medium') @@ -140,17 +140,17 @@ class BasicTellicoDOM: entryNode = self.__doc.createElement('entry') entryNode.setAttribute('id', movieData['id']) - for key, values in movieData.iteritems(): + for key, values in movieData.items(): if key == 'id': continue - - if self.__fieldsMap.has_key(key): + + if key in self.__fieldsMap: field = self.__fieldsMap[key] else: field = key parentNode = self.__doc.createElement(field + 's') - + for value in values: if len(value) == 0: continue node = self.__doc.createElement(field) @@ -163,18 +163,18 @@ class BasicTellicoDOM: imageNode.appendChild(self.__doc.createTextNode(value[1])) self.__images.appendChild(imageNode) value = value[0] # value was (id, md5) - + if field == 'cast': for v in value: columnNode = self.__doc.createElement('column') columnNode.appendChild(self.__doc.createTextNode(v.strip())) node.appendChild(columnNode) - + else: node.appendChild(self.__doc.createTextNode(value.strip())) - + if node.hasChildNodes(): parentNode.appendChild(node) - + if parentNode.hasChildNodes(): entryNode.appendChild(parentNode) self.__collection.appendChild(entryNode) @@ -184,8 +184,8 @@ class BasicTellicoDOM: Outputs XML content to stdout """ self.__collection.appendChild(self.__images) - print XML_HEADER; print DOCTYPE - print self.__root.toxml() + print(XML_HEADER); print(DOCTYPE) + print(self.__root.toxml()) class GriffithParser: @@ -210,7 +210,7 @@ class GriffithParser: media = list([row[0].encode('utf-8') for row in c.fetchall()]) self.__domTree.addMedia(media) - + def __fetchMovieIds(self): """ Retrieve all movie ids @@ -230,7 +230,7 @@ class GriffithParser: 'country','genre','classification','plot', 'runtime','o_title','studio','notes','image', '[cast]','loaned','color','site') - + c = self.__conn.cursor() c.execute("SELECT %s FROM movies WHERE movie_id=%s" % (','.join(columns),id)) row = c.fetchone() @@ -240,14 +240,14 @@ class GriffithParser: for i in range(len(columns)): if row[i] == None : continue - + try: value = row[i].encode('utf-8') except: value = str(row[i]) - + col = columns[i].replace('[','').replace(']','') - + if col == 'genre' or col == 'studio': values = value.split('/') elif col == 'plot' or col == 'notes': @@ -280,19 +280,19 @@ class GriffithParser: media = list([row[0].encode('utf-8') for row in c.fetchall()]) if len(media) > 0: data['medium'] = media - + # get all tags c.execute("SELECT name FROM tags WHERE tag_id IN (SELECT tag_id FROM movie_tag WHERE movie_id=%s)" % id) tags = list([row[0].encode('utf-8') for row in c.fetchall()]) if len(tags) > 0: data['tag'] = tags - + # get all languages c.execute("SELECT name FROM languages WHERE lang_id IN (SELECT lang_id FROM movie_lang WHERE movie_id=%s)" % id) langs = list([row[0].encode('utf-8') for row in c.fetchall()]) if len(langs) > 0: data['language'] = langs - + return data |