diff options
Diffstat (limited to 'src/fetch/scripts/dark_horse_comics.py')
-rw-r--r-- | src/fetch/scripts/dark_horse_comics.py | 60 |
1 files changed, 30 insertions, 30 deletions
diff --git a/src/fetch/scripts/dark_horse_comics.py b/src/fetch/scripts/dark_horse_comics.py index 4f3b651..22b3175 100644 --- a/src/fetch/scripts/dark_horse_comics.py +++ b/src/fetch/scripts/dark_horse_comics.py @@ -33,7 +33,7 @@ Update (checked) = %{title} """ import sys, os, re, md5, random, string -import urllib, urllib2, time, base64 +import urllib.request, urllib.parse, urllib.error, time, base64 import xml.dom.minidom XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>""" @@ -45,7 +45,7 @@ VERSION = "0.2" def genMD5(): """ - Generates and returns a random md5 string. Its main purpose is to allow random + Generates and returns a random md5 string. Its main purpose is to allow random image file name generation. """ obj = md5.new() @@ -62,7 +62,7 @@ class BasicTellicoDOM: self.__root = self.__doc.createElement('tellico') self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') self.__root.setAttribute('syntaxVersion', '9') - + self.__collection = self.__doc.createElement('collection') self.__collection.setAttribute('title', 'My Comics') self.__collection.setAttribute('type', '6') @@ -78,7 +78,7 @@ class BasicTellicoDOM: def addEntry(self, movieData): """ - Add a comic entry. + Add a comic entry. Returns an entry node instance """ d = movieData @@ -86,7 +86,7 @@ class BasicTellicoDOM: entryNode.setAttribute('id', str(self.__currentId)) titleNode = self.__doc.createElement('title') - titleNode.appendChild(self.__doc.createTextNode(unicode(d['title'], 'latin-1').encode('utf-8'))) + titleNode.appendChild(self.__doc.createTextNode(str(d['title'], 'latin-1').encode('utf-8'))) yearNode = self.__doc.createElement('pub_year') yearNode.appendChild(self.__doc.createTextNode(d['pub_year'])) @@ -101,25 +101,25 @@ class BasicTellicoDOM: writersNode = self.__doc.createElement('writers') for g in d['writer']: writerNode = self.__doc.createElement('writer') - writerNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + writerNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8'))) writersNode.appendChild(writerNode) genresNode = self.__doc.createElement('genres') for g in d['genre']: genreNode = self.__doc.createElement('genre') - genreNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + genreNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8'))) genresNode.appendChild(genreNode) commentsNode = self.__doc.createElement('comments') #for g in d['comments']: # commentsNode.appendChild(self.__doc.createTextNode(unicode("%s\n\n" % g, 'latin-1').encode('utf-8'))) commentsData = string.join(d['comments'], '\n\n') - commentsNode.appendChild(self.__doc.createTextNode(unicode(commentsData, 'latin-1').encode('utf-8'))) + commentsNode.appendChild(self.__doc.createTextNode(str(commentsData, 'latin-1').encode('utf-8'))) artistsNode = self.__doc.createElement('artists') - for k, v in d['artist'].iteritems(): + for k, v in d['artist'].items(): artistNode = self.__doc.createElement('artist') - artistNode.appendChild(self.__doc.createTextNode(unicode(v, 'latin-1').encode('utf-8'))) + artistNode.appendChild(self.__doc.createTextNode(str(v, 'latin-1').encode('utf-8'))) artistsNode.appendChild(artistNode) pagesNode = self.__doc.createElement('pages') @@ -132,13 +132,13 @@ class BasicTellicoDOM: imageNode = self.__doc.createElement('image') imageNode.setAttribute('format', 'JPEG') imageNode.setAttribute('id', d['image'][0]) - imageNode.appendChild(self.__doc.createTextNode(unicode(d['image'][1], 'latin-1').encode('utf-8'))) + imageNode.appendChild(self.__doc.createTextNode(str(d['image'][1], 'latin-1').encode('utf-8'))) coverNode = self.__doc.createElement('cover') coverNode.appendChild(self.__doc.createTextNode(d['image'][0])) - for name in ( 'writersNode', 'genresNode', 'artistsNode', 'pagesNode', 'yearNode', - 'titleNode', 'issueNode', 'commentsNode', 'pubNode', 'langNode', + for name in ( 'writersNode', 'genresNode', 'artistsNode', 'pagesNode', 'yearNode', + 'titleNode', 'issueNode', 'commentsNode', 'pubNode', 'langNode', 'countryNode' ): entryNode.appendChild(eval(name)) @@ -147,7 +147,7 @@ class BasicTellicoDOM: self.__images.appendChild(imageNode) self.__collection.appendChild(entryNode) - + self.__currentId += 1 return entryNode @@ -156,17 +156,17 @@ class BasicTellicoDOM: Prints entry's XML content to stdout """ try: - print nEntry.toxml() + print(nEntry.toxml()) except: - print sys.stderr, "Error while outputing XML content from entry to Tellico" + print(sys.stderr, "Error while outputing XML content from entry to Tellico") def printXMLTree(self): """ Outputs XML content to stdout """ self.__collection.appendChild(self.__images) - print XML_HEADER; print DOCTYPE - print self.__root.toxml() + print(XML_HEADER); print(DOCTYPE) + print(self.__root.toxml()) class DarkHorseParser: @@ -189,11 +189,11 @@ class DarkHorseParser: 'colorist' : '<b>Colorist: *</b> *<a.*>(?P<colorist>.*)</a>', 'genre' : '<b>Genre: *</b> *<a.*?>(?P<genre>.*?)</a><br>', 'format' : '<b>Format: *</b> *(?P<format>.*?)<br>', - } + } # Compile patterns objects self.__regExpsPO = {} - for k, pattern in self.__regExps.iteritems(): + for k, pattern in self.__regExps.items(): self.__regExpsPO[k] = re.compile(pattern) self.__domTree = BasicTellicoDOM() @@ -211,18 +211,18 @@ class DarkHorseParser: """ Fetch HTML data from url """ - u = urllib2.urlopen(url) + u = urllib.request.urlopen(url) self.__data = u.read() u.close() def __fetchMovieLinks(self): """ - Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent() + Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent() that need to be parsed. """ matchList = re.findall("""<a *href="%s(?P<page>.*?)">(?P<title>.*?)</a>""" % self.__basePath.replace('?', '\?'), self.__data) if not matchList: return None - + return matchList def __fetchCover(self, path, delete = True): @@ -231,7 +231,7 @@ class DarkHorseParser: The image is deleted if delete is True """ md5 = genMD5() - imObj = urllib2.urlopen(path.strip()) + imObj = urllib.request.urlopen(path.strip()) img = imObj.read() imObj.close() imgPath = "/tmp/%s.jpeg" % md5 @@ -240,7 +240,7 @@ class DarkHorseParser: f.write(img) f.close() except: - print sys.stderr, "Error: could not write image into /tmp" + print(sys.stderr, "Error: could not write image into /tmp") b64data = (md5 + '.jpeg', base64.encodestring(img)) @@ -249,7 +249,7 @@ class DarkHorseParser: try: os.remove(imgPath) except: - print sys.stderr, "Error: could not delete temporary image /tmp/%s.jpeg" % md5 + print(sys.stderr, "Error: could not delete temporary image /tmp/%s.jpeg" % md5) return b64data @@ -286,7 +286,7 @@ class DarkHorseParser: data['image'] = b64img data['pub_year'] = NULLSTRING - for name, po in self.__regExpsPO.iteritems(): + for name, po in self.__regExpsPO.items(): data[name] = NULLSTRING if name == 'desc': matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I) @@ -363,7 +363,7 @@ class DarkHorseParser: if not len(title): return self.__title = title - self.__getHTMLContent("%s%s" % (self.__baseURL, self.__searchURL % urllib.quote(self.__title))) + self.__getHTMLContent("%s%s" % (self.__baseURL, self.__searchURL % urllib.parse.quote(self.__title))) # Get all links links = self.__fetchMovieLinks() @@ -381,11 +381,11 @@ class DarkHorseParser: return None def halt(): - print "HALT." + print("HALT.") sys.exit(0) def showUsage(): - print "Usage: %s comic" % sys.argv[0] + print("Usage: %s comic" % sys.argv[0]) sys.exit(1) def main(): |