From 2819550411bdcce3da9c49802febed769a7642ac Mon Sep 17 00:00:00 2001 From: Apprentice Alf Date: Sun, 14 Feb 2010 15:47:48 +0000 Subject: [PATCH] tools v1.1 --- Adobe_EPUB_Tools/ineptepub.pyw | 25 ++++++----- Topaz_Tools/lib/flatxml2html.py | 78 ++++++++++++++++++++------------- 2 files changed, 61 insertions(+), 42 deletions(-) diff --git a/Adobe_EPUB_Tools/ineptepub.pyw b/Adobe_EPUB_Tools/ineptepub.pyw index 7f8cb45..14e51ff 100644 --- a/Adobe_EPUB_Tools/ineptepub.pyw +++ b/Adobe_EPUB_Tools/ineptepub.pyw @@ -62,7 +62,7 @@ class ASN1Parser(object): def __init__(self, bytes): self.bytes = bytes self.index = 0 - + def get(self, length): if self.index + length > len(self.bytes): raise ASN1Error("Error decoding ASN.1") @@ -72,22 +72,22 @@ class ASN1Parser(object): x |= self.bytes[self.index] self.index += 1 return x - + def getFixBytes(self, lengthBytes): bytes = self.bytes[self.index : self.index+lengthBytes] self.index += lengthBytes return bytes - + def getVarBytes(self, lengthLength): lengthBytes = self.get(lengthLength) return self.getFixBytes(lengthBytes) - + def getFixList(self, length, lengthList): l = [0] * lengthList for x in range(lengthList): l[x] = self.get(length) return l - + def getVarList(self, length, lengthLength): lengthList = self.get(lengthLength) if lengthList % length != 0: @@ -97,19 +97,19 @@ class ASN1Parser(object): for x in range(lengthList): l[x] = self.get(length) return l - + def startLengthCheck(self, lengthLength): self.lengthCheck = self.get(lengthLength) self.indexCheck = self.index - + def setLengthCheck(self, length): self.lengthCheck = length self.indexCheck = self.index - + def stopLengthCheck(self): if (self.index - self.indexCheck) != self.lengthCheck: raise ASN1Error("Error decoding ASN.1") - + def atLengthCheck(self): if (self.index - self.indexCheck) < self.lengthCheck: return False @@ -162,7 +162,7 @@ class Decryptor(object): path = elem.get('URI', None) if path is not None: encrypted.add(path) - + def decompress(self, bytes): dc = zlib.decompressobj(-15) bytes = dc.decompress(bytes) @@ -170,7 +170,7 @@ class Decryptor(object): if ex: bytes = bytes + ex return bytes - + def decrypt(self, path, data): if path in self._encrypted: data = self._aes.decrypt(data)[16:] @@ -336,5 +336,6 @@ def gui_main(): return 0 if __name__ == '__main__': - # sys.exit(cli_main()) + if len(sys.argv) > 1: + sys.exit(cli_main()) sys.exit(gui_main()) diff --git a/Topaz_Tools/lib/flatxml2html.py b/Topaz_Tools/lib/flatxml2html.py index 9e3080b..6047749 100644 --- a/Topaz_Tools/lib/flatxml2html.py +++ b/Topaz_Tools/lib/flatxml2html.py @@ -346,35 +346,40 @@ class DocParser(object): if end == -1 : end = self.docSize + # seems some xml has last* coming before first* so we have to + # handle any order + sp_first = -1 + sp_last = -1 + + gl_first = -1 + gl_last = -1 + + ws_first = -1 + ws_last = -1 + + word_class = '' + while (line < end) : (name, argres) = self.lineinDoc(line) - # handle both span and _span if name.endswith('span.firstWord') : - first = int(argres) - (name, argres) = self.lineinDoc(line+1) - if not name.endswith('span.lastWord'): - print 'Error: - incorrect _span ordering inside paragraph' - last = int(argres) - for wordnum in xrange(first, last): - result.append(('ocr', wordnum)) - line += 1 + sp_first = int(argres) + + elif name.endswith('span.lastWord') : + sp_last = int(argres) elif name.endswith('word.firstGlyph') : - first = int(argres) - (name, argres) = self.lineinDoc(line+1) - if not name.endswith('word.lastGlyph'): - print 'Error: - incorrect glyph ordering inside word in paragraph' - last = int(argres) - glyphList = [] - for glyphnum in xrange(first, last): - glyphList.append(glyphnum) - num = self.svgcount - self.glyphs_to_image(glyphList) - self.svgcount += 1 - result.append(('svg', num)) - line += 1 + gl_first = int(argres) + + elif name.endswith('word.lastGlyph') : + gl_last = int(argres) + + elif name.endswith('word_semantic.firstWord'): + ws_first = int(argres) + + elif name.endswith('word_semantic.lastWord'): + ws_last = int(argres) elif name.endswith('word.class'): (cname, space) = argres.split('-',1) @@ -386,15 +391,28 @@ class DocParser(object): result.append(('img' + word_class, int(argres))) word_class = '' - elif name.endswith('word_semantic.firstWord'): - first = int(argres) - (name, argres) = self.lineinDoc(line+1) - if not name.endswith('word_semantic.lastWord'): - print 'Error: - incorrect word_semantic ordering inside paragraph' - last = int(argres) - for wordnum in xrange(first, last): + if (sp_first != -1) and (sp_last != -1): + for wordnum in xrange(sp_first, sp_last): result.append(('ocr', wordnum)) - line += 1 + sp_first = -1 + sp_last = -1 + + if (gl_first != -1) and (gl_last != -1): + glyphList = [] + for glyphnum in xrange(gl_first, gl_last): + glyphList.append(glyphnum) + num = self.svgcount + self.glyphs_to_image(glyphList) + self.svgcount += 1 + result.append(('svg', num)) + gl_first = -1 + gl_last = -1 + + if (ws_first != -1) and (ws_last != -1): + for wordnum in xrange(ws_first, ws_last): + result.append(('ocr', wordnum)) + ws_first = -1 + ws_last = -1 line += 1