mirror of
https://github.com/noDRM/DeDRM_tools.git
synced 2024-11-16 19:06:09 +06:00
tools v1.5
This commit is contained in:
parent
6fb13373cf
commit
8e7d2657a4
|
@ -1,5 +1,5 @@
|
||||||
#! /usr/bin/python
|
#! /usr/bin/python
|
||||||
# For use in Topaz Scripts version 2.3
|
# For use in Topaz Scripts version 2.6
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# For use with Topaz Scripts Version 2.3
|
# For use with Topaz Scripts Version 2.6
|
||||||
|
|
||||||
class Unbuffered:
|
class Unbuffered:
|
||||||
def __init__(self, stream):
|
def __init__(self, stream):
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#! /usr/bin/python
|
#! /usr/bin/python
|
||||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||||
# For use with Topaz Scripts Version 2.4
|
# For use with Topaz Scripts Version 2.6
|
||||||
|
|
||||||
class Unbuffered:
|
class Unbuffered:
|
||||||
def __init__(self, stream):
|
def __init__(self, stream):
|
||||||
|
@ -315,6 +315,12 @@ class PageParser(object):
|
||||||
'version.findlists' : (1, 'scalar_text', 0, 0),
|
'version.findlists' : (1, 'scalar_text', 0, 0),
|
||||||
'version.page_num' : (1, 'scalar_text', 0, 0),
|
'version.page_num' : (1, 'scalar_text', 0, 0),
|
||||||
'version.page_type' : (1, 'scalar_text', 0, 0),
|
'version.page_type' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.bad_text' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.glyph_mismatch' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.margins' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.staggered_lines' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.paragraph_continuation' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.toc' : (1, 'scalar_text', 0, 0),
|
||||||
|
|
||||||
'stylesheet' : (1, 'snippets', 1, 0),
|
'stylesheet' : (1, 'snippets', 1, 0),
|
||||||
'style' : (1, 'snippets', 1, 0),
|
'style' : (1, 'snippets', 1, 0),
|
||||||
|
@ -662,16 +668,19 @@ class PageParser(object):
|
||||||
def process(self):
|
def process(self):
|
||||||
|
|
||||||
# peek at the first bytes to see what type of file it is
|
# peek at the first bytes to see what type of file it is
|
||||||
magic = self.fo.read(11)
|
magic = self.fo.read(9)
|
||||||
if (magic[0:1] == 'p') and (magic[2:10] == '__PAGE__'):
|
if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'):
|
||||||
first_token = 'info'
|
first_token = 'info'
|
||||||
elif (magic[0:1] == 'g') and (magic[2:11] == '__GLYPH__'):
|
elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'):
|
||||||
skip = self.fo.read(1)
|
skip = self.fo.read(2)
|
||||||
|
first_token = 'info'
|
||||||
|
elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'):
|
||||||
|
skip = self.fo.read(3)
|
||||||
first_token = 'info'
|
first_token = 'info'
|
||||||
else :
|
else :
|
||||||
# other0.dat file
|
# other0.dat file
|
||||||
first_token = None
|
first_token = None
|
||||||
self.fo.seek(-11,1)
|
self.fo.seek(-9,1)
|
||||||
|
|
||||||
|
|
||||||
# main loop to read and build the document tree
|
# main loop to read and build the document tree
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#! /usr/bin/python
|
#! /usr/bin/python
|
||||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||||
# For use with Topaz Scripts Version 2.3
|
# For use with Topaz Scripts Version 2.6
|
||||||
|
|
||||||
import csv
|
import csv
|
||||||
import sys
|
import sys
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#! /usr/bin/python
|
#! /usr/bin/python
|
||||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||||
# For use with Topaz Scripts Version 2.3
|
# For use with Topaz Scripts Version 2.6
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import csv
|
import csv
|
||||||
|
@ -32,6 +32,8 @@ class DocParser(object):
|
||||||
self.link_id = []
|
self.link_id = []
|
||||||
self.link_title = []
|
self.link_title = []
|
||||||
self.link_page = []
|
self.link_page = []
|
||||||
|
self.link_href = []
|
||||||
|
self.link_type = []
|
||||||
self.dehyphen_rootid = []
|
self.dehyphen_rootid = []
|
||||||
self.paracont_stemid = []
|
self.paracont_stemid = []
|
||||||
self.parastems_stemid = []
|
self.parastems_stemid = []
|
||||||
|
@ -197,6 +199,7 @@ class DocParser(object):
|
||||||
# get the class
|
# get the class
|
||||||
def getClass(self, pclass):
|
def getClass(self, pclass):
|
||||||
nclass = pclass
|
nclass = pclass
|
||||||
|
|
||||||
# class names are an issue given topaz may start them with numerals (not allowed),
|
# class names are an issue given topaz may start them with numerals (not allowed),
|
||||||
# use a mix of cases (which cause some browsers problems), and actually
|
# use a mix of cases (which cause some browsers problems), and actually
|
||||||
# attach numbers after "_reclustered*" to the end to deal classeses that inherit
|
# attach numbers after "_reclustered*" to the end to deal classeses that inherit
|
||||||
|
@ -206,7 +209,10 @@ class DocParser(object):
|
||||||
# so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
|
# so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
|
||||||
# that exists in the stylesheet first, and then adding this specific class
|
# that exists in the stylesheet first, and then adding this specific class
|
||||||
# after
|
# after
|
||||||
|
|
||||||
|
# also some class names have spaces in them so need to convert to dashes
|
||||||
if nclass != None :
|
if nclass != None :
|
||||||
|
nclass = nclass.replace(' ','-')
|
||||||
classres = ''
|
classres = ''
|
||||||
nclass = nclass.lower()
|
nclass = nclass.lower()
|
||||||
nclass = 'cl-' + nclass
|
nclass = 'cl-' + nclass
|
||||||
|
@ -334,7 +340,7 @@ class DocParser(object):
|
||||||
result.append(('svg', num))
|
result.append(('svg', num))
|
||||||
return pclass, result
|
return pclass, result
|
||||||
|
|
||||||
# this type of paragrph may be made up of multiple spans, inline
|
# this type of paragraph may be made up of multiple spans, inline
|
||||||
# word monograms (images), and words with semantic meaning,
|
# word monograms (images), and words with semantic meaning,
|
||||||
# plus glyphs used to form starting letter of first word
|
# plus glyphs used to form starting letter of first word
|
||||||
|
|
||||||
|
@ -391,6 +397,9 @@ class DocParser(object):
|
||||||
result.append(('img' + word_class, int(argres)))
|
result.append(('img' + word_class, int(argres)))
|
||||||
word_class = ''
|
word_class = ''
|
||||||
|
|
||||||
|
elif name.endswith('region.img.src'):
|
||||||
|
result.append(('img' + word_class, int(argres)))
|
||||||
|
|
||||||
if (sp_first != -1) and (sp_last != -1):
|
if (sp_first != -1) and (sp_last != -1):
|
||||||
for wordnum in xrange(sp_first, sp_last):
|
for wordnum in xrange(sp_first, sp_last):
|
||||||
result.append(('ocr', wordnum))
|
result.append(('ocr', wordnum))
|
||||||
|
@ -437,6 +446,8 @@ class DocParser(object):
|
||||||
if (type == 'end'):
|
if (type == 'end'):
|
||||||
parares += ' '
|
parares += ' '
|
||||||
|
|
||||||
|
lstart = len(parares)
|
||||||
|
|
||||||
cnt = len(pdesc)
|
cnt = len(pdesc)
|
||||||
|
|
||||||
for j in xrange( 0, cnt) :
|
for j in xrange( 0, cnt) :
|
||||||
|
@ -449,18 +460,24 @@ class DocParser(object):
|
||||||
|
|
||||||
if handle_links:
|
if handle_links:
|
||||||
link = self.link_id[num]
|
link = self.link_id[num]
|
||||||
if (link > 0):
|
if (link > 0):
|
||||||
|
linktype = self.link_type[link-1]
|
||||||
title = self.link_title[link-1]
|
title = self.link_title[link-1]
|
||||||
if (title == "") or (parares.rfind(title) < 0):
|
if (title == "") or (parares.rfind(title) < 0):
|
||||||
title='_link_'
|
title=parares[lstart:]
|
||||||
ptarget = self.link_page[link-1] - 1
|
if linktype == 'external' :
|
||||||
linkhtml = '<a href="#page%04d">' % ptarget
|
linkhref = self.link_href[link-1]
|
||||||
|
linkhtml = '<a href="%s">' % linkhref
|
||||||
|
else :
|
||||||
|
ptarget = self.link_page[link-1] - 1
|
||||||
|
linkhtml = '<a href="#page%04d">' % ptarget
|
||||||
linkhtml += title + '</a>'
|
linkhtml += title + '</a>'
|
||||||
pos = parares.rfind(title)
|
pos = parares.rfind(title)
|
||||||
if pos >= 0:
|
if pos >= 0:
|
||||||
parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
|
parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
|
||||||
else :
|
else :
|
||||||
parares += linkhtml
|
parares += linkhtml
|
||||||
|
lstart = len(parares)
|
||||||
if word == '_link_' : word = ''
|
if word == '_link_' : word = ''
|
||||||
elif (link < 0) :
|
elif (link < 0) :
|
||||||
if word == '_link_' : word = ''
|
if word == '_link_' : word = ''
|
||||||
|
@ -532,6 +549,14 @@ class DocParser(object):
|
||||||
# collect link destination page numbers
|
# collect link destination page numbers
|
||||||
self.link_page = self.getData('info.links.page',0,-1)
|
self.link_page = self.getData('info.links.page',0,-1)
|
||||||
|
|
||||||
|
# collect link types (container versus external)
|
||||||
|
(pos, argres) = self.findinDoc('info.links.type',0,-1)
|
||||||
|
if argres : self.link_type = argres.split('|')
|
||||||
|
|
||||||
|
# collect link destinations
|
||||||
|
(pos, argres) = self.findinDoc('info.links.href',0,-1)
|
||||||
|
if argres : self.link_href = argres.split('|')
|
||||||
|
|
||||||
# collect link titles
|
# collect link titles
|
||||||
(pos, argres) = self.findinDoc('info.links.title',0,-1)
|
(pos, argres) = self.findinDoc('info.links.title',0,-1)
|
||||||
if argres :
|
if argres :
|
||||||
|
@ -641,16 +666,18 @@ class DocParser(object):
|
||||||
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||||
|
|
||||||
|
|
||||||
elif (regtype == 'synth_fcvr.center') or (regtype == 'synth_text.center'):
|
elif (regtype == 'synth_fcvr.center'):
|
||||||
(pos, simgsrc) = self.findinDoc('img.src',start,end)
|
(pos, simgsrc) = self.findinDoc('img.src',start,end)
|
||||||
if simgsrc:
|
if simgsrc:
|
||||||
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
|
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
|
||||||
|
|
||||||
else :
|
else :
|
||||||
print 'Warning: region type', regtype
|
print ' Making region type', regtype,
|
||||||
(pos, temp) = self.findinDoc('paragraph',start,end)
|
(pos, temp) = self.findinDoc('paragraph',start,end)
|
||||||
if pos != -1:
|
(pos2, temp) = self.findinDoc('span',start,end)
|
||||||
print ' is a "text" region'
|
if pos != -1 or pos2 != -1:
|
||||||
|
print ' a "text" region'
|
||||||
|
orig_regtype = regtype
|
||||||
regtype = 'fixed'
|
regtype = 'fixed'
|
||||||
ptype = 'full'
|
ptype = 'full'
|
||||||
# check to see if this is a continution from the previous page
|
# check to see if this is a continution from the previous page
|
||||||
|
@ -658,6 +685,11 @@ class DocParser(object):
|
||||||
ptype = 'end'
|
ptype = 'end'
|
||||||
first_para_continued = False
|
first_para_continued = False
|
||||||
(pclass, pdesc) = self.getParaDescription(start,end, regtype)
|
(pclass, pdesc) = self.getParaDescription(start,end, regtype)
|
||||||
|
if not pclass:
|
||||||
|
if orig_regtype.endswith('.right') : pclass = 'cl-right'
|
||||||
|
elif orig_regtype.endswith('.center') : pclass = 'cl-center'
|
||||||
|
elif orig_regtype.endswith('.left') : pclass = 'cl-left'
|
||||||
|
elif orig_regtype.endswith('.justify') : pclass = 'cl-justify'
|
||||||
if pclass and (ptype == 'full') and (len(pclass) >= 6):
|
if pclass and (ptype == 'full') and (len(pclass) >= 6):
|
||||||
tag = 'p'
|
tag = 'p'
|
||||||
if pclass[3:6] == 'h1-' : tag = 'h4'
|
if pclass[3:6] == 'h1-' : tag = 'h4'
|
||||||
|
@ -669,7 +701,7 @@ class DocParser(object):
|
||||||
else :
|
else :
|
||||||
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||||
else :
|
else :
|
||||||
print ' is a "graphic" region'
|
print ' a "graphic" region'
|
||||||
(pos, simgsrc) = self.findinDoc('img.src',start,end)
|
(pos, simgsrc) = self.findinDoc('img.src',start,end)
|
||||||
if simgsrc:
|
if simgsrc:
|
||||||
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
|
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#! /usr/bin/python
|
#! /usr/bin/python
|
||||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||||
# For use with Topaz Scripts Version 2.3
|
# For use with Topaz Scripts Version 2.6
|
||||||
|
|
||||||
class Unbuffered:
|
class Unbuffered:
|
||||||
def __init__(self, stream):
|
def __init__(self, stream):
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#! /usr/bin/python
|
#! /usr/bin/python
|
||||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||||
# For use with Topaz Scripts Version 2.3
|
# For use with Topaz Scripts Version 2.6
|
||||||
|
|
||||||
class Unbuffered:
|
class Unbuffered:
|
||||||
def __init__(self, stream):
|
def __init__(self, stream):
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#! /usr/bin/python
|
#! /usr/bin/python
|
||||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||||
# For use with Topaz Scripts Version 2.3
|
# For use with Topaz Scripts Version 2.6
|
||||||
|
|
||||||
class Unbuffered:
|
class Unbuffered:
|
||||||
def __init__(self, stream):
|
def __init__(self, stream):
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#! /usr/bin/python
|
#! /usr/bin/python
|
||||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||||
# For use with Topaz Scripts Version 2.3
|
# For use with Topaz Scripts Version 2.6
|
||||||
|
|
||||||
import csv
|
import csv
|
||||||
import sys
|
import sys
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#! /usr/bin/python
|
#! /usr/bin/python
|
||||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||||
# For use with Topaz Scripts Version 2.3
|
# For use with Topaz Scripts Version 2.6
|
||||||
|
|
||||||
import csv
|
import csv
|
||||||
import sys
|
import sys
|
||||||
|
@ -85,7 +85,10 @@ class DocParser(object):
|
||||||
def process(self):
|
def process(self):
|
||||||
|
|
||||||
classlst = ''
|
classlst = ''
|
||||||
csspage = ''
|
csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n'
|
||||||
|
csspage += '.cl-right { text-align: right; }\n'
|
||||||
|
csspage += '.cl-left { text-align: left; }\n'
|
||||||
|
csspage += '.cl-justify { text-align: justify; }\n'
|
||||||
|
|
||||||
# generate a list of each <style> starting point in the stylesheet
|
# generate a list of each <style> starting point in the stylesheet
|
||||||
styleList= self.posinDoc('book.stylesheet.style')
|
styleList= self.posinDoc('book.stylesheet.style')
|
||||||
|
@ -108,6 +111,7 @@ class DocParser(object):
|
||||||
# get the style class
|
# get the style class
|
||||||
(pos, sclass) = self.findinDoc('style.class',start,end)
|
(pos, sclass) = self.findinDoc('style.class',start,end)
|
||||||
if sclass != None:
|
if sclass != None:
|
||||||
|
sclass = sclass.replace(' ','-')
|
||||||
sclass = '.cl-' + sclass.lower()
|
sclass = '.cl-' + sclass.lower()
|
||||||
else :
|
else :
|
||||||
sclass = ''
|
sclass = ''
|
||||||
|
@ -115,6 +119,7 @@ class DocParser(object):
|
||||||
# check for any "after class" specifiers
|
# check for any "after class" specifiers
|
||||||
(pos, aftclass) = self.findinDoc('style._after_class',start,end)
|
(pos, aftclass) = self.findinDoc('style._after_class',start,end)
|
||||||
if aftclass != None:
|
if aftclass != None:
|
||||||
|
aftclass = aftclass.replace(' ','-')
|
||||||
aftclass = '.cl-' + aftclass.lower()
|
aftclass = '.cl-' + aftclass.lower()
|
||||||
else :
|
else :
|
||||||
aftclass = ''
|
aftclass = ''
|
||||||
|
@ -216,7 +221,8 @@ class DocParser(object):
|
||||||
if ctype == 'h3_' :
|
if ctype == 'h3_' :
|
||||||
csspage += 'h6' + cssline + '\n'
|
csspage += 'h6' + cssline + '\n'
|
||||||
|
|
||||||
csspage += self.stags[tag] + cssline + '\n'
|
if cssline != ' { }':
|
||||||
|
csspage += self.stags[tag] + cssline + '\n'
|
||||||
|
|
||||||
|
|
||||||
return csspage, classlst
|
return csspage, classlst
|
||||||
|
|
|
@ -1,4 +1,14 @@
|
||||||
Canges in 2.3
|
Changes in 2.6
|
||||||
|
- fix for many additional version tags
|
||||||
|
- fixes to generate better links
|
||||||
|
- fixes to handle external links
|
||||||
|
- now handles new "marker" page .dat files
|
||||||
|
- improved special region handling
|
||||||
|
- properly handle class names with spaces
|
||||||
|
- handle default alignment for synthetic regions
|
||||||
|
|
||||||
|
|
||||||
|
Changes in 2.3
|
||||||
- fix for use with non-latin1 based systems (thank you Tedd)
|
- fix for use with non-latin1 based systems (thank you Tedd)
|
||||||
- fixes for out of order tokens in xml
|
- fixes for out of order tokens in xml
|
||||||
|
|
||||||
|
|
|
@ -53,8 +53,9 @@
|
||||||
# 0.12 - Fix added to prevent lowercasing of image names when the pml code itself uses a different case in the link name.
|
# 0.12 - Fix added to prevent lowercasing of image names when the pml code itself uses a different case in the link name.
|
||||||
# 0.13 - change to unbuffered stdout for use with gui front ends
|
# 0.13 - change to unbuffered stdout for use with gui front ends
|
||||||
# 0.14 - contributed enhancement to support --make-pmlz switch
|
# 0.14 - contributed enhancement to support --make-pmlz switch
|
||||||
|
# 0.15 - enabled high-ascii to pml character encoding. DropBook now works on Mac.
|
||||||
|
|
||||||
__version__='0.14'
|
__version__='0.15'
|
||||||
|
|
||||||
# Import Psyco if available
|
# Import Psyco if available
|
||||||
try:
|
try:
|
||||||
|
@ -465,17 +466,6 @@ class EreaderProcessor(object):
|
||||||
data = sect[62:]
|
data = sect[62:]
|
||||||
return sanitizeFileName(name), data
|
return sanitizeFileName(name), data
|
||||||
|
|
||||||
def cleanPML(self,pml):
|
|
||||||
# Update old \b font tag with correct \B bold font tag
|
|
||||||
pml2 = pml.replace('\\b', '\\B')
|
|
||||||
# Convert special characters to proper PML code. High ASCII start at (\x82, \a130) and go up to (\xff, \a255)
|
|
||||||
for k in xrange(130,256):
|
|
||||||
# a2b_hex takes in a hexidecimal as a string and converts it
|
|
||||||
# to a binary ascii code that we search and replace for
|
|
||||||
badChar=binascii.a2b_hex('%02x' % k)
|
|
||||||
pml2 = pml2.replace(badChar, '\\a%03d' % k)
|
|
||||||
#end for k
|
|
||||||
return pml2
|
|
||||||
|
|
||||||
# def getChapterNamePMLOffsetData(self):
|
# def getChapterNamePMLOffsetData(self):
|
||||||
# cv = ''
|
# cv = ''
|
||||||
|
@ -564,6 +554,14 @@ class EreaderProcessor(object):
|
||||||
|
|
||||||
return r
|
return r
|
||||||
|
|
||||||
|
def cleanPML(pml):
|
||||||
|
# Convert special characters to proper PML code. High ASCII start at (\x80, \a128) and go up to (\xff, \a255)
|
||||||
|
pml2 = pml
|
||||||
|
for k in xrange(128,256):
|
||||||
|
badChar = chr(k)
|
||||||
|
pml2 = pml2.replace(badChar, '\\a%03d' % k)
|
||||||
|
return pml2
|
||||||
|
|
||||||
def convertEreaderToPml(infile, name, cc, outdir):
|
def convertEreaderToPml(infile, name, cc, outdir):
|
||||||
if not os.path.exists(outdir):
|
if not os.path.exists(outdir):
|
||||||
os.makedirs(outdir)
|
os.makedirs(outdir)
|
||||||
|
@ -585,7 +583,7 @@ def convertEreaderToPml(infile, name, cc, outdir):
|
||||||
print " Extracting pml"
|
print " Extracting pml"
|
||||||
pml_string = er.getText()
|
pml_string = er.getText()
|
||||||
pmlfilename = bookname + ".pml"
|
pmlfilename = bookname + ".pml"
|
||||||
file(os.path.join(outdir, pmlfilename),'wb').write(pml_string)
|
file(os.path.join(outdir, pmlfilename),'wb').write(cleanPML(pml_string))
|
||||||
|
|
||||||
# bkinfo = er.getBookInfo()
|
# bkinfo = er.getBookInfo()
|
||||||
# if bkinfo != '':
|
# if bkinfo != '':
|
||||||
|
@ -677,7 +675,7 @@ def main(argv=None):
|
||||||
search_time = end_time - start_time
|
search_time = end_time - start_time
|
||||||
print 'elapsed time: %.2f seconds' % (search_time, )
|
print 'elapsed time: %.2f seconds' % (search_time, )
|
||||||
if make_pmlz :
|
if make_pmlz :
|
||||||
print 'output in %s' % zipname
|
print 'output is %s' % zipname
|
||||||
else :
|
else :
|
||||||
print 'output in %s' % outdir
|
print 'output in %s' % outdir
|
||||||
print "done"
|
print "done"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user