summaryrefslogtreecommitdiffstats
path: root/src/fetch/scripts/dark_horse_comics.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/fetch/scripts/dark_horse_comics.py')
-rw-r--r--src/fetch/scripts/dark_horse_comics.py60
1 files changed, 30 insertions, 30 deletions
diff --git a/src/fetch/scripts/dark_horse_comics.py b/src/fetch/scripts/dark_horse_comics.py
index 4f3b651..22b3175 100644
--- a/src/fetch/scripts/dark_horse_comics.py
+++ b/src/fetch/scripts/dark_horse_comics.py
@@ -33,7 +33,7 @@ Update (checked) = %{title}
"""
import sys, os, re, md5, random, string
-import urllib, urllib2, time, base64
+import urllib.request, urllib.parse, urllib.error, time, base64
import xml.dom.minidom
XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>"""
@@ -45,7 +45,7 @@ VERSION = "0.2"
def genMD5():
"""
- Generates and returns a random md5 string. Its main purpose is to allow random
+ Generates and returns a random md5 string. Its main purpose is to allow random
image file name generation.
"""
obj = md5.new()
@@ -62,7 +62,7 @@ class BasicTellicoDOM:
self.__root = self.__doc.createElement('tellico')
self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/')
self.__root.setAttribute('syntaxVersion', '9')
-
+
self.__collection = self.__doc.createElement('collection')
self.__collection.setAttribute('title', 'My Comics')
self.__collection.setAttribute('type', '6')
@@ -78,7 +78,7 @@ class BasicTellicoDOM:
def addEntry(self, movieData):
"""
- Add a comic entry.
+ Add a comic entry.
Returns an entry node instance
"""
d = movieData
@@ -86,7 +86,7 @@ class BasicTellicoDOM:
entryNode.setAttribute('id', str(self.__currentId))
titleNode = self.__doc.createElement('title')
- titleNode.appendChild(self.__doc.createTextNode(unicode(d['title'], 'latin-1').encode('utf-8')))
+ titleNode.appendChild(self.__doc.createTextNode(str(d['title'], 'latin-1').encode('utf-8')))
yearNode = self.__doc.createElement('pub_year')
yearNode.appendChild(self.__doc.createTextNode(d['pub_year']))
@@ -101,25 +101,25 @@ class BasicTellicoDOM:
writersNode = self.__doc.createElement('writers')
for g in d['writer']:
writerNode = self.__doc.createElement('writer')
- writerNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8')))
+ writerNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8')))
writersNode.appendChild(writerNode)
genresNode = self.__doc.createElement('genres')
for g in d['genre']:
genreNode = self.__doc.createElement('genre')
- genreNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8')))
+ genreNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8')))
genresNode.appendChild(genreNode)
commentsNode = self.__doc.createElement('comments')
#for g in d['comments']:
# commentsNode.appendChild(self.__doc.createTextNode(unicode("%s\n\n" % g, 'latin-1').encode('utf-8')))
commentsData = string.join(d['comments'], '\n\n')
- commentsNode.appendChild(self.__doc.createTextNode(unicode(commentsData, 'latin-1').encode('utf-8')))
+ commentsNode.appendChild(self.__doc.createTextNode(str(commentsData, 'latin-1').encode('utf-8')))
artistsNode = self.__doc.createElement('artists')
- for k, v in d['artist'].iteritems():
+ for k, v in d['artist'].items():
artistNode = self.__doc.createElement('artist')
- artistNode.appendChild(self.__doc.createTextNode(unicode(v, 'latin-1').encode('utf-8')))
+ artistNode.appendChild(self.__doc.createTextNode(str(v, 'latin-1').encode('utf-8')))
artistsNode.appendChild(artistNode)
pagesNode = self.__doc.createElement('pages')
@@ -132,13 +132,13 @@ class BasicTellicoDOM:
imageNode = self.__doc.createElement('image')
imageNode.setAttribute('format', 'JPEG')
imageNode.setAttribute('id', d['image'][0])
- imageNode.appendChild(self.__doc.createTextNode(unicode(d['image'][1], 'latin-1').encode('utf-8')))
+ imageNode.appendChild(self.__doc.createTextNode(str(d['image'][1], 'latin-1').encode('utf-8')))
coverNode = self.__doc.createElement('cover')
coverNode.appendChild(self.__doc.createTextNode(d['image'][0]))
- for name in ( 'writersNode', 'genresNode', 'artistsNode', 'pagesNode', 'yearNode',
- 'titleNode', 'issueNode', 'commentsNode', 'pubNode', 'langNode',
+ for name in ( 'writersNode', 'genresNode', 'artistsNode', 'pagesNode', 'yearNode',
+ 'titleNode', 'issueNode', 'commentsNode', 'pubNode', 'langNode',
'countryNode' ):
entryNode.appendChild(eval(name))
@@ -147,7 +147,7 @@ class BasicTellicoDOM:
self.__images.appendChild(imageNode)
self.__collection.appendChild(entryNode)
-
+
self.__currentId += 1
return entryNode
@@ -156,17 +156,17 @@ class BasicTellicoDOM:
Prints entry's XML content to stdout
"""
try:
- print nEntry.toxml()
+ print(nEntry.toxml())
except:
- print sys.stderr, "Error while outputing XML content from entry to Tellico"
+ print(sys.stderr, "Error while outputing XML content from entry to Tellico")
def printXMLTree(self):
"""
Outputs XML content to stdout
"""
self.__collection.appendChild(self.__images)
- print XML_HEADER; print DOCTYPE
- print self.__root.toxml()
+ print(XML_HEADER); print(DOCTYPE)
+ print(self.__root.toxml())
class DarkHorseParser:
@@ -189,11 +189,11 @@ class DarkHorseParser:
'colorist' : '<b>Colorist: *</b> *<a.*>(?P<colorist>.*)</a>',
'genre' : '<b>Genre: *</b> *<a.*?>(?P<genre>.*?)</a><br>',
'format' : '<b>Format: *</b> *(?P<format>.*?)<br>',
- }
+ }
# Compile patterns objects
self.__regExpsPO = {}
- for k, pattern in self.__regExps.iteritems():
+ for k, pattern in self.__regExps.items():
self.__regExpsPO[k] = re.compile(pattern)
self.__domTree = BasicTellicoDOM()
@@ -211,18 +211,18 @@ class DarkHorseParser:
"""
Fetch HTML data from url
"""
- u = urllib2.urlopen(url)
+ u = urllib.request.urlopen(url)
self.__data = u.read()
u.close()
def __fetchMovieLinks(self):
"""
- Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent()
+ Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent()
that need to be parsed.
"""
matchList = re.findall("""<a *href="%s(?P<page>.*?)">(?P<title>.*?)</a>""" % self.__basePath.replace('?', '\?'), self.__data)
if not matchList: return None
-
+
return matchList
def __fetchCover(self, path, delete = True):
@@ -231,7 +231,7 @@ class DarkHorseParser:
The image is deleted if delete is True
"""
md5 = genMD5()
- imObj = urllib2.urlopen(path.strip())
+ imObj = urllib.request.urlopen(path.strip())
img = imObj.read()
imObj.close()
imgPath = "/tmp/%s.jpeg" % md5
@@ -240,7 +240,7 @@ class DarkHorseParser:
f.write(img)
f.close()
except:
- print sys.stderr, "Error: could not write image into /tmp"
+ print(sys.stderr, "Error: could not write image into /tmp")
b64data = (md5 + '.jpeg', base64.encodestring(img))
@@ -249,7 +249,7 @@ class DarkHorseParser:
try:
os.remove(imgPath)
except:
- print sys.stderr, "Error: could not delete temporary image /tmp/%s.jpeg" % md5
+ print(sys.stderr, "Error: could not delete temporary image /tmp/%s.jpeg" % md5)
return b64data
@@ -286,7 +286,7 @@ class DarkHorseParser:
data['image'] = b64img
data['pub_year'] = NULLSTRING
- for name, po in self.__regExpsPO.iteritems():
+ for name, po in self.__regExpsPO.items():
data[name] = NULLSTRING
if name == 'desc':
matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I)
@@ -363,7 +363,7 @@ class DarkHorseParser:
if not len(title): return
self.__title = title
- self.__getHTMLContent("%s%s" % (self.__baseURL, self.__searchURL % urllib.quote(self.__title)))
+ self.__getHTMLContent("%s%s" % (self.__baseURL, self.__searchURL % urllib.parse.quote(self.__title)))
# Get all links
links = self.__fetchMovieLinks()
@@ -381,11 +381,11 @@ class DarkHorseParser:
return None
def halt():
- print "HALT."
+ print("HALT.")
sys.exit(0)
def showUsage():
- print "Usage: %s comic" % sys.argv[0]
+ print("Usage: %s comic" % sys.argv[0])
sys.exit(1)
def main():