mirror of
https://github.com/noDRM/DeDRM_tools.git
synced 2024-11-16 19:06:09 +06:00
topazscripts 1.3 by some_updates
This commit is contained in:
parent
0a437510f6
commit
c1e5943471
|
@ -160,101 +160,159 @@ class PageParser(object):
|
||||||
# tag : (number of arguments, argument type, subtags present, special case of subtags presents when escaped)
|
# tag : (number of arguments, argument type, subtags present, special case of subtags presents when escaped)
|
||||||
|
|
||||||
token_tags = {
|
token_tags = {
|
||||||
'book' : (1, 'snippets', 1, 0),
|
'x' : (1, 'scalar_number', 0, 0),
|
||||||
'version' : (1, 'snippets', 1, 0),
|
'y' : (1, 'scalar_number', 0, 0),
|
||||||
'stylesheet' : (1, 'snippets', 1, 0),
|
|
||||||
'links' : (0, 'number', 0, 1),
|
|
||||||
'pages' : (0, 'number', 0, 1),
|
|
||||||
'page' : (1, 'snippets', 1, 0),
|
|
||||||
'group' : (1, 'snippets', 1, 0),
|
|
||||||
'region' : (1, 'snippets', 1, 0),
|
|
||||||
'reflow' : (1, 'number', 1, 0),
|
|
||||||
'img' : (1, 'snippets', 1, 0),
|
|
||||||
'paragraph' : (1, 'snippets', 1, 0),
|
|
||||||
'extratokens' : (1, 'snippets', 1, 0),
|
|
||||||
'style' : (1, 'snippets', 1, 0),
|
|
||||||
'rule' : (1, 'snippets', 1, 0),
|
|
||||||
'_span' : (1, 'snippets', 1, 0),
|
|
||||||
'word_semantic': (1, 'snippets', 1, 1),
|
|
||||||
'value' : (1, 'scalar_text', 0, 0),
|
|
||||||
'h' : (1, 'scalar_number', 0, 0),
|
'h' : (1, 'scalar_number', 0, 0),
|
||||||
'w' : (1, 'scalar_number', 0, 0),
|
'w' : (1, 'scalar_number', 0, 0),
|
||||||
'firstWord' : (1, 'scalar_number', 0, 0),
|
'firstWord' : (1, 'scalar_number', 0, 0),
|
||||||
'lastWord' : (1, 'scalar_number', 0, 0),
|
'lastWord' : (1, 'scalar_number', 0, 0),
|
||||||
'x' : (1, 'number', 0, 0),
|
'rootID' : (1, 'scalar_number', 0, 0),
|
||||||
'y' : (1, 'number', 0, 0),
|
'stemID' : (1, 'scalar_number', 0, 0),
|
||||||
|
'type' : (1, 'scalar_text', 0, 0),
|
||||||
|
|
||||||
|
'info' : (0, 'number', 1, 0),
|
||||||
|
|
||||||
|
'info.word' : (0, 'number', 1, 1),
|
||||||
|
'info.word.ocrText' : (1, 'text', 0, 0),
|
||||||
|
'info.word.firstGlyph' : (1, 'raw', 0, 0),
|
||||||
|
'info.word.lastGlyph' : (1, 'raw', 0, 0),
|
||||||
|
'info.word.bl' : (1, 'raw', 0, 0),
|
||||||
|
'info.word.link_id' : (1, 'number', 0, 0),
|
||||||
|
|
||||||
|
'glyph' : (0, 'number', 1, 1),
|
||||||
|
'glyph.x' : (1, 'number', 0, 0),
|
||||||
|
'glyph.y' : (1, 'number', 0, 0),
|
||||||
|
'glyph.glyphID' : (1, 'number', 0, 0),
|
||||||
|
|
||||||
|
'dehyphen' : (0, 'number', 1, 1),
|
||||||
|
'dehyphen.rootID' : (1, 'number', 0, 0),
|
||||||
|
'dehyphen.stemID' : (1, 'number', 0, 0),
|
||||||
|
'dehyphen.stemPage' : (1, 'number', 0, 0),
|
||||||
|
'dehyphen.sh' : (1, 'number', 0, 0),
|
||||||
|
|
||||||
|
'links' : (0, 'number', 1, 1),
|
||||||
'links.page' : (1, 'number', 0, 0),
|
'links.page' : (1, 'number', 0, 0),
|
||||||
'link_id' : (1, 'number', 0, 0),
|
'links.rel' : (1, 'number', 0, 0),
|
||||||
'glyph' : (0, 'number', 1, 1),
|
'links.row' : (1, 'number', 0, 0),
|
||||||
|
'links.title' : (1, 'text', 0, 0),
|
||||||
|
'links.href' : (1, 'text', 0, 0),
|
||||||
|
'links.type' : (1, 'text', 0, 0),
|
||||||
|
|
||||||
|
'paraCont' : (0, 'number', 1, 1),
|
||||||
|
'paraCont.rootID' : (1, 'number', 0, 0),
|
||||||
|
'paraCont.stemID' : (1, 'number', 0, 0),
|
||||||
|
'paraCont.stemPage' : (1, 'number', 0, 0),
|
||||||
|
|
||||||
|
'paraStems' : (0, 'number', 1, 1),
|
||||||
|
'paraStems.stemID' : (1, 'number', 0, 0),
|
||||||
|
|
||||||
|
'wordStems' : (0, 'number', 1, 1),
|
||||||
|
'wordStems.stemID' : (1, 'number', 0, 0),
|
||||||
|
|
||||||
|
'page' : (1, 'snippets', 1, 0),
|
||||||
|
'page.pageid' : (1, 'scalar_text', 0, 0),
|
||||||
|
'page.pagelabel' : (1, 'scalar_text', 0, 0),
|
||||||
|
'page.type' : (1, 'scalar_text', 0, 0),
|
||||||
|
'page.h' : (1, 'scalar_number', 0, 0),
|
||||||
|
'page.w' : (1, 'scalar_number', 0, 0),
|
||||||
|
'page.startID' : (1, 'scalar_number', 0, 0),
|
||||||
|
|
||||||
|
'group' : (1, 'snippets', 1, 0),
|
||||||
|
'group.type' : (1, 'scalar_text', 0, 0),
|
||||||
|
|
||||||
|
'region' : (1, 'snippets', 1, 0),
|
||||||
|
'region.type' : (1, 'scalar_text', 0, 0),
|
||||||
|
'region.x' : (1, 'scalar_number', 0, 0),
|
||||||
|
'region.y' : (1, 'scalar_number', 0, 0),
|
||||||
|
'region.h' : (1, 'scalar_number', 0, 0),
|
||||||
|
'region.w' : (1, 'scalar_number', 0, 0),
|
||||||
|
|
||||||
|
'img' : (1, 'snippets', 1, 0),
|
||||||
|
'img.x' : (1, 'scalar_number', 0, 0),
|
||||||
|
'img.y' : (1, 'scalar_number', 0, 0),
|
||||||
|
'img.h' : (1, 'scalar_number', 0, 0),
|
||||||
|
'img.w' : (1, 'scalar_number', 0, 0),
|
||||||
|
'img.src' : (1, 'scalar_number', 0, 0),
|
||||||
|
|
||||||
|
'paragraph' : (1, 'snippets', 1, 0),
|
||||||
|
'paragraph.class' : (1, 'scalar_text', 0, 0),
|
||||||
|
'paragraph.firstWord' : (1, 'scalar_number', 0, 0),
|
||||||
|
'paragraph.lastWord' : (1, 'scalar_number', 0, 0),
|
||||||
|
|
||||||
|
'word_semantic' : (1, 'snippets', 1, 1),
|
||||||
|
'word_semantic.type' : (1, 'scalar_text', 0, 0),
|
||||||
|
'word_semantic.firstWord' : (1, 'scalar_number', 0, 0),
|
||||||
|
'word_semantic.lastWord' : (1, 'scalar_number', 0, 0),
|
||||||
|
|
||||||
|
'word' : (1, 'snippets', 1, 0),
|
||||||
|
'word.type' : (1, 'scalar_text', 0, 0),
|
||||||
|
'word.class' : (1, 'scalar_text', 0, 0),
|
||||||
|
|
||||||
|
'_span' : (1, 'snippets', 1, 0),
|
||||||
|
'_span.firstWord' : (1, 'scalar_number', 0, 0),
|
||||||
|
'-span.lastWord' : (1, 'scalar_number', 0, 0),
|
||||||
|
|
||||||
|
'extratokens' : (1, 'snippets', 1, 0),
|
||||||
|
'extratokens.type' : (1, 'scalar_text', 0, 0),
|
||||||
|
'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0),
|
||||||
|
'extratokens.lastGlyph' : (1, 'scalar_number', 0, 0),
|
||||||
|
|
||||||
'glyph.h' : (1, 'number', 0, 0),
|
'glyph.h' : (1, 'number', 0, 0),
|
||||||
'glyph.w' : (1, 'number', 0, 0),
|
'glyph.w' : (1, 'number', 0, 0),
|
||||||
'sh' : (1, 'number', 0, 0),
|
'glyph.use' : (1, 'number', 0, 0),
|
||||||
'word' : (0, 'number', 1, 1),
|
'glyph.vtx' : (1, 'number', 0, 1),
|
||||||
'src' : (1, 'scalar_number', 0, 0),
|
'glyph.len' : (1, 'number', 0, 1),
|
||||||
'rel' : (1, 'number', 0, 0),
|
'glyph.dpi' : (1, 'number', 0, 0),
|
||||||
'row' : (1, 'number', 0, 0),
|
'vtx' : (0, 'number', 1, 1),
|
||||||
'startID' : (1, 'number', 0, 1),
|
'vtx.x' : (1, 'number', 0, 0),
|
||||||
|
'vtx.y' : (1, 'number', 0, 0),
|
||||||
|
'len' : (0, 'number', 1, 1),
|
||||||
|
'len.n' : (1, 'number', 0, 0),
|
||||||
|
|
||||||
|
'book' : (1, 'snippets', 1, 0),
|
||||||
|
'version' : (1, 'snippets', 1, 0),
|
||||||
|
'version.FlowEdit_1_id' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.FlowEdit_1_version' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.Schema_id' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.Schema_version' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.Topaz_version' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.WordDetailEdit_1_id' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.ZoneEdit_1_id' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.ZoneEdit_1_version' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.chapterheaders' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.creation_date' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.header_footer' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.init_from_ocr' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.letter_insertion' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.xmlinj_convert' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.xmlinj_reflow' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.xmlinj_transform' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.findlists' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.page_num' : (1, 'scalar_text', 0, 0),
|
||||||
|
'version.page_type' : (1, 'scalar_text', 0, 0),
|
||||||
|
|
||||||
|
'stylesheet' : (1, 'snippets', 1, 0),
|
||||||
|
'style' : (1, 'snippets', 1, 0),
|
||||||
|
'style._tag' : (1, 'scalar_text', 0, 0),
|
||||||
|
'style.type' : (1, 'scalar_text', 0, 0),
|
||||||
|
'style._parent_type' : (1, 'scalar_text', 0, 0),
|
||||||
|
'style.class' : (1, 'scalar_text', 0, 0),
|
||||||
|
'style._after_class' : (1, 'scalar_text', 0, 0),
|
||||||
|
'rule' : (1, 'snippets', 1, 0),
|
||||||
|
'rule.attr' : (1, 'scalar_text', 0, 0),
|
||||||
|
'rule.value' : (1, 'scalar_text', 0, 0),
|
||||||
|
|
||||||
|
'original' : (0, 'number', 1, 1),
|
||||||
|
'original.pnum' : (1, 'number', 0, 0),
|
||||||
|
'original.pid' : (1, 'text', 0, 0),
|
||||||
|
'pages' : (0, 'number', 1, 1),
|
||||||
|
'pages.ref' : (1, 'number', 0, 0),
|
||||||
|
'pages.id' : (1, 'number', 0, 0),
|
||||||
|
'startID' : (0, 'number', 1, 1),
|
||||||
'startID.page' : (1, 'number', 0, 0),
|
'startID.page' : (1, 'number', 0, 0),
|
||||||
'glyphID' : (1, 'number', 0, 0),
|
'startID.id' : (1, 'number', 0, 0),
|
||||||
'rootID' : (1, 'number', 0, 0),
|
|
||||||
'stemID' : (1, 'number', 0, 0),
|
|
||||||
'margin-top' : (1, 'number', 0, 0),
|
|
||||||
'stemPage' : (1, 'number', 0, 0),
|
|
||||||
'dehyphen' : (1, 'number', 1, 1),
|
|
||||||
'rootID' : (1, 'number', 0, 0),
|
|
||||||
'paraCont' : (1, 'number', 1, 1),
|
|
||||||
'paraStems' : (1, 'number', 1, 1),
|
|
||||||
'wordStems' : (1, 'number', 1, 1),
|
|
||||||
'original' : (0, 'number', 0, 1),
|
|
||||||
'use' : (1, 'number', 0, 0),
|
|
||||||
'vtx' : (1, 'number', 0, 1),
|
|
||||||
'len' : (1, 'number', 0, 1),
|
|
||||||
'dpi' : (1, 'number', 0, 0),
|
|
||||||
'n' : (1, 'number', 0, 0),
|
|
||||||
'id' : (1, 'number', 0, 0),
|
|
||||||
'ref' : (1, 'number', 0, 0),
|
|
||||||
'pnum' : (1, 'number', 0, 0),
|
|
||||||
'pid' : (1, 'text', 0, 0),
|
|
||||||
'info' : (0, 'number', 1, 0),
|
|
||||||
'bl' : (1, 'raw', 0, 0),
|
|
||||||
'firstGlyph' : (1, 'raw', 0, 0),
|
|
||||||
'lastGlyph' : (1, 'raw', 0, 0),
|
|
||||||
'ocrText' : (1, 'text', 0, 0),
|
|
||||||
'title' : (1, 'text', 0, 0),
|
|
||||||
'href' : (1, 'text', 0, 0),
|
|
||||||
'_parent_type' : (1, 'text', 0, 0),
|
|
||||||
'attr' : (1, 'scalar_text', 0, 0),
|
|
||||||
'justify' : (1, 'scalar_text', 0, 0),
|
|
||||||
'align' : (1, 'scalar_text', 0, 0),
|
|
||||||
'layout' : (1, 'scalar_text', 0, 0),
|
|
||||||
'pageid' : (1, 'scalar_text', 0, 0),
|
|
||||||
'pagelabel' : (1, 'scalar_text', 0, 0),
|
|
||||||
'type' : (1, 'text', 0, 0),
|
|
||||||
'class' : (1, 'scalar_text', 0, 0),
|
|
||||||
'container' : (1, 'scalar_text', 0, 0),
|
|
||||||
'_after_class' : (1, 'scalar_text', 0, 0),
|
|
||||||
'_tag' : (1, 'scalar_text', 0, 0),
|
|
||||||
'pos' : (1, 'scalar_text', 0, 0),
|
|
||||||
'page_num' : (1, 'scalar_text', 0, 0),
|
|
||||||
'page_type' : (1, 'scalar_text', 0, 0),
|
|
||||||
'findlists' : (1, 'scalar_text', 0, 0),
|
|
||||||
'FlowEdit_1_id' : (1, 'scalar_text', 0, 0),
|
|
||||||
'FlowEdit_1_version' : (1, 'scalar_text', 0, 0),
|
|
||||||
'Schema_id' : (1, 'scalar_text', 0, 0),
|
|
||||||
'Schema_version' : (1, 'scalar_text', 0, 0),
|
|
||||||
'Topaz_version' : (1, 'scalar_text', 0, 0),
|
|
||||||
'WordDetailEdit_1_id' : (1, 'scalar_text', 0, 0),
|
|
||||||
'WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0),
|
|
||||||
'ZoneEdit_1_id' : (1, 'scalar_text', 0, 0),
|
|
||||||
'ZoneEdit_1_version' : (1, 'scalar_text', 0, 0),
|
|
||||||
'chapterheaders' : (1, 'scalar_text', 0, 0),
|
|
||||||
'creation_date' : (1, 'scalar_text', 0, 0),
|
|
||||||
'header_footer' : (1, 'scalar_text', 0, 0),
|
|
||||||
'init_from_ocr' : (1, 'scalar_text', 0, 0),
|
|
||||||
'letter_insertion' : (1, 'scalar_text', 0, 0),
|
|
||||||
'xmlinj_convert' : (1, 'scalar_text', 0, 0),
|
|
||||||
'xmlinj_reflow' : (1, 'scalar_text', 0, 0),
|
|
||||||
'xmlinj_transform' : (1, 'scalar_text', 0, 0),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -404,101 +462,25 @@ class PageParser(object):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
# loop: pass though values unchanged
|
|
||||||
# DO NOT CHANGE - this has proven to be correct
|
# general loop code gracisouly submitted by "skindle" - thank you!
|
||||||
def doLoop76Mode0(self, argtype, cnt):
|
def doLoop76Mode(self, argtype, cnt, mode):
|
||||||
result = []
|
result = []
|
||||||
|
adj = 0
|
||||||
|
if mode & 1:
|
||||||
|
adj = readEncodedNumber(self.fo)
|
||||||
|
mode = mode >> 1
|
||||||
|
x = []
|
||||||
for i in xrange(cnt):
|
for i in xrange(cnt):
|
||||||
result.append(self.formatArg(readEncodedNumber(self.fo), argtype))
|
x.append(readEncodedNumber(self.fo) - adj)
|
||||||
return result
|
for i in xrange(mode):
|
||||||
|
for j in xrange(1, cnt):
|
||||||
|
x[j] = x[j] + x[j - 1]
|
||||||
# loop generating values relative to the *negative*
|
|
||||||
# of the offset - don't ask why - it just is
|
|
||||||
# DO NOT CHANGE - this has proven to be correct
|
|
||||||
def doLoop76Mode1(self, argtype, cnt):
|
|
||||||
result = []
|
|
||||||
offset = -readEncodedNumber(self.fo)
|
|
||||||
for i in xrange(cnt):
|
for i in xrange(cnt):
|
||||||
val = readEncodedNumber(self.fo) + offset
|
result.append(self.formatArg(x[i],argtype))
|
||||||
result.append(self.formatArg(val, argtype))
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
# loop generating values with starting value and accumulation
|
|
||||||
# DO NOT CHANGE - this has proven to be the correct
|
|
||||||
def doLoop76Mode2(self, argtype, cnt):
|
|
||||||
result = []
|
|
||||||
ptr = readEncodedNumber(self.fo)
|
|
||||||
result.append(self.formatArg(ptr, argtype))
|
|
||||||
for i in xrange(cnt-1):
|
|
||||||
ptr = ptr + readEncodedNumber(self.fo)
|
|
||||||
result.append(self.formatArg(ptr, argtype))
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
# loop generating values with starting value and accumulation
|
|
||||||
# **after** subtracting adjustment value from each
|
|
||||||
# DO NOT CHANGE - this has been proven to be correct
|
|
||||||
def doLoop76Mode3(self, argtype, cnt):
|
|
||||||
result = []
|
|
||||||
adj = readEncodedNumber(self.fo)
|
|
||||||
ptr = readEncodedNumber(self.fo)
|
|
||||||
ptr = ptr - adj
|
|
||||||
result.append(self.formatArg(ptr, argtype))
|
|
||||||
for i in xrange(cnt-1):
|
|
||||||
ptr = ptr + readEncodedNumber(self.fo) - adj
|
|
||||||
result.append(self.formatArg(ptr,argtype))
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
# loop using runing sum of data values and starting value
|
|
||||||
# with accumulation to get new value
|
|
||||||
# Again, don't ask it took me forever to figure this out
|
|
||||||
# DO NOT CHANGE - this has been proven to be correct
|
|
||||||
def doLoop76Mode4(self, argtype, cnt):
|
|
||||||
result = []
|
|
||||||
val = readEncodedNumber(self.fo)
|
|
||||||
runsum = val
|
|
||||||
ptr = val
|
|
||||||
result.append(self.formatArg(ptr, argtype))
|
|
||||||
for i in xrange(cnt-1):
|
|
||||||
runsum += readEncodedNumber(self.fo)
|
|
||||||
ptr = ptr + runsum
|
|
||||||
result.append(self.formatArg(ptr,argtype))
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
# loop using and extra value as an adjustment
|
|
||||||
# and a running sum of the values after subtracting
|
|
||||||
# the adjustment, added to a ptr to get a new pointer
|
|
||||||
def doLoop76Mode5(self, argtype, cnt):
|
|
||||||
result = []
|
|
||||||
adj = readEncodedNumber(self.fo)
|
|
||||||
ptr = 0
|
|
||||||
runsum = 0
|
|
||||||
for i in xrange(cnt):
|
|
||||||
val = readEncodedNumber(self.fo)
|
|
||||||
runsum += (val - adj)
|
|
||||||
ptr = ptr +runsum
|
|
||||||
result.append(self.formatArg(ptr,argtype))
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
# FIXME: I have only 4 points to work this out with inside my book
|
|
||||||
# So may be wrong but it is correct for my 4 points
|
|
||||||
def doLoop76Mode6(self, argtype, cnt):
|
|
||||||
result = []
|
|
||||||
oldval = 0
|
|
||||||
for i in xrange(cnt):
|
|
||||||
val = readEncodedNumber(self.fo)
|
|
||||||
ptr= (3 * oldval) + val + 1
|
|
||||||
result.append(self.formatArg(ptr,argtype))
|
|
||||||
oldval = val
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# dispatches loop commands bytes with various modes
|
# dispatches loop commands bytes with various modes
|
||||||
# The 0x76 style loops are used to build vectors
|
# The 0x76 style loops are used to build vectors
|
||||||
|
|
||||||
|
@ -507,57 +489,20 @@ class PageParser(object):
|
||||||
# since they did not appear in the test cases
|
# since they did not appear in the test cases
|
||||||
|
|
||||||
def decodeCMD(self, cmd, argtype):
|
def decodeCMD(self, cmd, argtype):
|
||||||
|
|
||||||
# if (cmd == 0x72):
|
|
||||||
# self.doLoop72(argtype)
|
|
||||||
# result =[]
|
|
||||||
# return result
|
|
||||||
|
|
||||||
if (cmd == 0x76):
|
if (cmd == 0x76):
|
||||||
|
|
||||||
# loop with cnt, and mode to control loop styles
|
# loop with cnt, and mode to control loop styles
|
||||||
cnt = readEncodedNumber(self.fo)
|
cnt = readEncodedNumber(self.fo)
|
||||||
mode = readEncodedNumber(self.fo)
|
mode = readEncodedNumber(self.fo)
|
||||||
|
|
||||||
if self.debug : print 'Loop for', cnt, 'with mode', mode, ': '
|
if self.debug : print 'Loop for', cnt, 'with mode', mode, ': '
|
||||||
|
return self.doLoop76Mode(argtype, cnt, mode)
|
||||||
if (mode == 0x00):
|
|
||||||
return self.doLoop76Mode0(argtype, cnt)
|
|
||||||
|
|
||||||
elif (mode == 0x01):
|
|
||||||
return self.doLoop76Mode1(argtype, cnt)
|
|
||||||
|
|
||||||
elif (mode == 0x02):
|
|
||||||
return self.doLoop76Mode2(argtype, cnt)
|
|
||||||
|
|
||||||
elif (mode == 0x03):
|
|
||||||
return self.doLoop76Mode3(argtype, cnt)
|
|
||||||
|
|
||||||
elif (mode == 0x04):
|
|
||||||
return self.doLoop76Mode4(argtype, cnt)
|
|
||||||
|
|
||||||
elif (mode == 0x05):
|
|
||||||
return self.doLoop76Mode5(argtype, cnt)
|
|
||||||
|
|
||||||
elif (mode == 0x06):
|
|
||||||
return self.doLoop76Mode6(argtype, cnt)
|
|
||||||
|
|
||||||
else:
|
|
||||||
|
|
||||||
if self.debug :
|
|
||||||
# try to mark any unknown loop comands
|
|
||||||
# if they exist, unless they are used to process
|
|
||||||
# text or some other known list, we won't be able to prove them correct
|
|
||||||
print '*** Unknown Loop 0x%x %d %d :' % (cmd, cnt, mode)
|
|
||||||
for i in xrange(cnt):
|
|
||||||
val = readEncodedNumber(self.fo)
|
|
||||||
print ' 0x%x' % val,
|
|
||||||
print ' '
|
|
||||||
result = []
|
|
||||||
return result
|
|
||||||
|
|
||||||
if self.dbug: print "Unknown command", cmd
|
if self.dbug: print "Unknown command", cmd
|
||||||
result = []
|
result = []
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# add full tag path to injected snippets
|
# add full tag path to injected snippets
|
||||||
def updateName(self, tag, prefix):
|
def updateName(self, tag, prefix):
|
||||||
|
@ -727,7 +672,7 @@ class PageParser(object):
|
||||||
self.doc.append(tag)
|
self.doc.append(tag)
|
||||||
else:
|
else:
|
||||||
if self.debug:
|
if self.debug:
|
||||||
print "Mina Loop: Unknown value: %x" % v
|
print "Main Loop: Unknown value: %x" % v
|
||||||
|
|
||||||
|
|
||||||
# now do snippet injection
|
# now do snippet injection
|
||||||
|
|
|
@ -11,9 +11,16 @@ from struct import unpack
|
||||||
|
|
||||||
|
|
||||||
class DocParser(object):
|
class DocParser(object):
|
||||||
def __init__(self, flatxml, fileid):
|
def __init__(self, flatxml, classlst, fileid):
|
||||||
self.id = os.path.basename(fileid).replace('.dat','')
|
self.id = os.path.basename(fileid).replace('.dat','')
|
||||||
self.flatdoc = flatxml.split('\n')
|
self.flatdoc = flatxml.split('\n')
|
||||||
|
self.classList = {}
|
||||||
|
tmpList = classlst.split('\n')
|
||||||
|
for pclass in tmpList:
|
||||||
|
if pclass != '':
|
||||||
|
# remove the leading period from the css name
|
||||||
|
cname = pclass[1:]
|
||||||
|
self.classList[cname] = True
|
||||||
self.ocrtext = []
|
self.ocrtext = []
|
||||||
self.link_id = []
|
self.link_id = []
|
||||||
self.link_title = []
|
self.link_title = []
|
||||||
|
@ -22,6 +29,18 @@ class DocParser(object):
|
||||||
self.paracont_stemid = []
|
self.paracont_stemid = []
|
||||||
self.parastems_stemid = []
|
self.parastems_stemid = []
|
||||||
|
|
||||||
|
# find tag if within pos to end inclusive
|
||||||
|
def lineinDoc(self, pos) :
|
||||||
|
docList = self.flatdoc
|
||||||
|
cnt = len(docList)
|
||||||
|
if (pos >= 0) and (pos < cnt) :
|
||||||
|
item = docList[pos]
|
||||||
|
if item.find('=') >= 0:
|
||||||
|
(name, argres) = item.split('=',1)
|
||||||
|
else :
|
||||||
|
name = item
|
||||||
|
argres = ''
|
||||||
|
return name, argres
|
||||||
|
|
||||||
|
|
||||||
# find tag if within pos to end inclusive
|
# find tag if within pos to end inclusive
|
||||||
|
@ -61,91 +80,161 @@ class DocParser(object):
|
||||||
return startpos
|
return startpos
|
||||||
|
|
||||||
|
|
||||||
# get a description of the paragraph
|
# build a description of the paragraph
|
||||||
def getParaDescription(self, start, end):
|
def getParaDescription(self, start, end):
|
||||||
|
|
||||||
|
result = []
|
||||||
|
|
||||||
# normal paragraph
|
# normal paragraph
|
||||||
(pos, pclass) = self.findinDoc('paragraph.class',start,end)
|
(pos, pclass) = self.findinDoc('paragraph.class',start,end)
|
||||||
|
|
||||||
# class names are an issue given topaz starts them with numerals (not allowed)
|
# class names are an issue given topaz may start them with numerals (not allowed),
|
||||||
# use a mix of cases, (which cause some browsers problems), and actually
|
# use a mix of cases (which cause some browsers problems), and actually
|
||||||
# attach numbers after "reclustered*" to the end to deal with reflow issues
|
# attach numbers after "_reclustered*" to the end to deal with reflow issues
|
||||||
# so we clean this up by lowercasing, prepend 'cl_', and remove all end pieces after reclustered
|
# but then not actually provide all of these _reclustereed classes in the stylesheet!
|
||||||
|
|
||||||
|
# so we clean this up by lowercasing, prepend 'cl_', and if not in the class
|
||||||
|
# list from the stylesheet, trying once more with "_reclustered*" removed
|
||||||
|
# if still not in stylesheet, let it pass as is
|
||||||
pclass = pclass.lower()
|
pclass = pclass.lower()
|
||||||
pclass = 'cl_' + pclass
|
pclass = 'cl_' + pclass
|
||||||
p = pclass.find('reclustered')
|
if pclass not in self.classList:
|
||||||
if p > 0 : pclass = pclass[0:p+11]
|
p = pclass.find('_reclustered')
|
||||||
|
if p > 0 :
|
||||||
|
baseclass = pclass[0:p]
|
||||||
|
if baseclass in self.classList:
|
||||||
|
pclass = baseclass
|
||||||
|
|
||||||
|
# build up a description of the paragraph in result and return it
|
||||||
|
# first check for the basic - all words paragraph
|
||||||
(pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
|
(pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
|
||||||
(pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
|
(pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
|
||||||
if (sfirst != None) and (slast != None) :
|
if (sfirst != None) and (slast != None) :
|
||||||
return pclass, int(sfirst), int(slast)
|
first = int(sfirst)
|
||||||
|
last = int(slast)
|
||||||
|
for wordnum in xrange(first, last):
|
||||||
|
result.append(('ocr', wordnum))
|
||||||
|
return pclass, result
|
||||||
|
|
||||||
# some paragraphs are instead split into multiple spans and some even have word_semantic tags as well
|
# this type of paragrph may be made up of multiple _spans, inline
|
||||||
# so walk through this region keeping track of the first firstword, and the last lastWord
|
# word monograms (images) and words with semantic meaning
|
||||||
# on any items that have it
|
|
||||||
(pos, sfirst) = self.findinDoc('firstWord',start, end)
|
# need to parse this type line by line
|
||||||
first = int(sfirst)
|
line = start + 1
|
||||||
last = -1
|
word_class = ''
|
||||||
for i in xrange(pos+1,end):
|
|
||||||
(pos, slast) = self.findinDoc('lastWord',i,i+1)
|
|
||||||
if slast != None:
|
|
||||||
last = int(slast)
|
|
||||||
return pclass, first, last
|
|
||||||
|
|
||||||
|
while (line < end) :
|
||||||
|
|
||||||
def buildParagraph(self, cname, first, last, type, regtype) :
|
(name, argres) = self.lineinDoc(line)
|
||||||
|
|
||||||
|
if name.endswith('_span.firstWord') :
|
||||||
|
first = int(argres)
|
||||||
|
(name, argres) = self.lineinDoc(line+1)
|
||||||
|
if not name.endswith('_span.lastWord'):
|
||||||
|
print 'Error: - incorrect _span ordering inside paragraph'
|
||||||
|
last = int(argres)
|
||||||
|
for wordnum in xrange(first, last):
|
||||||
|
result.append(('ocr', wordnum))
|
||||||
|
line += 1
|
||||||
|
|
||||||
|
elif name.endswith('word.class'):
|
||||||
|
(cname, space) = argres.split('-',1)
|
||||||
|
if cname == 'spaceafter':
|
||||||
|
word_class = 'sa'
|
||||||
|
|
||||||
|
elif name.endswith('word.img.src'):
|
||||||
|
result.append(('img' + word_class, int(argres)))
|
||||||
|
word_class = ''
|
||||||
|
|
||||||
|
elif name.endswith('word_semantic.firstWord'):
|
||||||
|
first = int(argres)
|
||||||
|
(name, argres) = self.lineinDoc(line+1)
|
||||||
|
if not name.endswith('word_semantic.lastWord'):
|
||||||
|
print 'Error: - incorrect word_semantic ordering inside paragraph'
|
||||||
|
last = int(argres)
|
||||||
|
for wordnum in xrange(first, last):
|
||||||
|
result.append(('ocr', wordnum))
|
||||||
|
line += 1
|
||||||
|
|
||||||
|
line += 1
|
||||||
|
|
||||||
|
return pclass, result
|
||||||
|
|
||||||
|
|
||||||
|
def buildParagraph(self, cname, pdesc, type, regtype) :
|
||||||
parares = ''
|
parares = ''
|
||||||
sep =''
|
sep =''
|
||||||
|
|
||||||
br_lb = False
|
br_lb = False
|
||||||
if (regtype == 'fixed') or (regtype == 'chapterheading') :
|
if (regtype == 'fixed') or (regtype == 'chapterheading') :
|
||||||
br_lb = True
|
br_lb = True
|
||||||
|
|
||||||
handle_links = False
|
handle_links = False
|
||||||
if len(self.link_id) > 0:
|
if len(self.link_id) > 0:
|
||||||
handle_links = True
|
handle_links = True
|
||||||
|
|
||||||
if (type == 'full') or (type == 'begin') :
|
if (type == 'full') or (type == 'begin') :
|
||||||
parares += '<p class="' + cname + '">'
|
parares += '<p class="' + cname + '">'
|
||||||
|
|
||||||
if (type == 'end'):
|
if (type == 'end'):
|
||||||
parares += ' '
|
parares += ' '
|
||||||
for j in xrange(first, last) :
|
|
||||||
word = self.ocrtext[j]
|
|
||||||
sep = ' '
|
|
||||||
|
|
||||||
if handle_links:
|
cnt = len(pdesc)
|
||||||
link = self.link_id[j]
|
|
||||||
if (link > 0):
|
for j in xrange( 0, cnt) :
|
||||||
title = self.link_title[link-1]
|
|
||||||
if title == "": title='_link_'
|
(wtype, num) = pdesc[j]
|
||||||
ptarget = self.link_page[link-1] - 1
|
|
||||||
linkhtml = '<a href="#page%04d">' % ptarget
|
if wtype == 'ocr' :
|
||||||
linkhtml += title + '</a>'
|
word = self.ocrtext[num]
|
||||||
pos = parares.rfind(title)
|
sep = ' '
|
||||||
if pos >= 0:
|
|
||||||
parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
|
if handle_links:
|
||||||
|
link = self.link_id[num]
|
||||||
|
if (link > 0):
|
||||||
|
title = self.link_title[link-1]
|
||||||
|
if title == "": title='_link_'
|
||||||
|
ptarget = self.link_page[link-1] - 1
|
||||||
|
linkhtml = '<a href="#page%04d">' % ptarget
|
||||||
|
linkhtml += title + '</a>'
|
||||||
|
pos = parares.rfind(title)
|
||||||
|
if pos >= 0:
|
||||||
|
parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
|
||||||
|
else :
|
||||||
|
parares += linkhtml
|
||||||
|
if word == '_link_' : word = ''
|
||||||
|
elif (link < 0) :
|
||||||
|
if word == '_link_' : word = ''
|
||||||
|
|
||||||
|
if word == '_lb_':
|
||||||
|
if (num-1) in self.dehyphen_rootid :
|
||||||
|
word = ''
|
||||||
|
sep = ''
|
||||||
|
elif handle_links :
|
||||||
|
word = ''
|
||||||
|
sep = ''
|
||||||
|
elif br_lb :
|
||||||
|
word = '<br />\n'
|
||||||
|
sep = ''
|
||||||
else :
|
else :
|
||||||
parares += linkhtml
|
word = '\n'
|
||||||
if word == '_link_' : word = ''
|
sep = ''
|
||||||
elif (link < 0) :
|
|
||||||
if word == '_link_' : word = ''
|
|
||||||
|
|
||||||
if word == '_lb_':
|
if num in self.dehyphen_rootid :
|
||||||
if (j-1) in self.dehyphen_rootid :
|
word = word[0:-1]
|
||||||
word = ''
|
|
||||||
sep = ''
|
|
||||||
elif handle_links :
|
|
||||||
word = ''
|
|
||||||
sep = ''
|
|
||||||
elif br_lb :
|
|
||||||
word = '<br />\n'
|
|
||||||
sep = ''
|
|
||||||
else :
|
|
||||||
word = '\n'
|
|
||||||
sep = ''
|
sep = ''
|
||||||
|
|
||||||
if j in self.dehyphen_rootid :
|
parares += word + sep
|
||||||
word = word[0:-1]
|
|
||||||
|
elif wtype == 'img' :
|
||||||
sep = ''
|
sep = ''
|
||||||
|
parares += '<img src="img/img%04d.jpg" alt="" />' % num
|
||||||
|
parares += sep
|
||||||
|
|
||||||
parares += word + sep
|
elif wtype == 'imgsa' :
|
||||||
|
sep = ' '
|
||||||
|
parares += '<img src="img/img%04d.jpg" alt="" />' % num
|
||||||
|
parares += sep
|
||||||
|
|
||||||
if len(sep) > 0 : parares = parares[0:-1]
|
if len(sep) > 0 : parares = parares[0:-1]
|
||||||
if (type == 'full') or (type == 'end') :
|
if (type == 'full') or (type == 'end') :
|
||||||
|
@ -222,7 +311,7 @@ class DocParser(object):
|
||||||
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
|
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
|
||||||
|
|
||||||
elif regtype == 'chapterheading' :
|
elif regtype == 'chapterheading' :
|
||||||
(pclass, first, last) = self.getParaDescription(start,end)
|
(pclass, pdesc) = self.getParaDescription(start,end)
|
||||||
if not breakSet:
|
if not breakSet:
|
||||||
htmlpage += '<div style="page-break-after: always;"> </div>\n'
|
htmlpage += '<div style="page-break-after: always;"> </div>\n'
|
||||||
breakSet = True
|
breakSet = True
|
||||||
|
@ -234,7 +323,7 @@ class DocParser(object):
|
||||||
if pclass[3:7] == 'ch2-' : tag = 'h2'
|
if pclass[3:7] == 'ch2-' : tag = 'h2'
|
||||||
if pclass[3:7] == 'ch3-' : tag = 'h3'
|
if pclass[3:7] == 'ch3-' : tag = 'h3'
|
||||||
htmlpage += '<' + tag + ' class="' + pclass + '">'
|
htmlpage += '<' + tag + ' class="' + pclass + '">'
|
||||||
htmlpage += self.buildParagraph(pclass,first,last,'middle', regtype)
|
htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
|
||||||
htmlpage += '</' + tag + '>'
|
htmlpage += '</' + tag + '>'
|
||||||
|
|
||||||
elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') :
|
elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') :
|
||||||
|
@ -247,17 +336,17 @@ class DocParser(object):
|
||||||
if not anchorSet:
|
if not anchorSet:
|
||||||
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
|
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
|
||||||
anchorSet = True
|
anchorSet = True
|
||||||
(pclass, first, last) = self.getParaDescription(start,end)
|
(pclass, pdesc) = self.getParaDescription(start,end)
|
||||||
if ptype == 'full' :
|
if ptype == 'full' :
|
||||||
tag = 'p'
|
tag = 'p'
|
||||||
if pclass[3:6] == 'h1-' : tag = 'h4'
|
if pclass[3:6] == 'h1-' : tag = 'h4'
|
||||||
if pclass[3:6] == 'h2-' : tag = 'h5'
|
if pclass[3:6] == 'h2-' : tag = 'h5'
|
||||||
if pclass[3:6] == 'h3-' : tag = 'h6'
|
if pclass[3:6] == 'h3-' : tag = 'h6'
|
||||||
htmlpage += '<' + tag + ' class="' + pclass + '">'
|
htmlpage += '<' + tag + ' class="' + pclass + '">'
|
||||||
htmlpage += self.buildParagraph(pclass, first, last, 'middle', regtype)
|
htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
|
||||||
htmlpage += '</' + tag + '>'
|
htmlpage += '</' + tag + '>'
|
||||||
else :
|
else :
|
||||||
htmlpage += self.buildParagraph(pclass, first, last, ptype, regtype)
|
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||||
|
|
||||||
|
|
||||||
elif (regtype == 'tocentry') :
|
elif (regtype == 'tocentry') :
|
||||||
|
@ -271,12 +360,43 @@ class DocParser(object):
|
||||||
if not anchorSet:
|
if not anchorSet:
|
||||||
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
|
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
|
||||||
anchorSet = True
|
anchorSet = True
|
||||||
(pclass, first, last) = self.getParaDescription(start,end)
|
(pclass, pdesc) = self.getParaDescription(start,end)
|
||||||
htmlpage += self.buildParagraph(pclass, first, last, ptype, regtype)
|
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||||
|
|
||||||
|
elif regtype == 'synth_fcvr.center' :
|
||||||
|
if not anchorSet:
|
||||||
|
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
|
||||||
|
anchorSet = True
|
||||||
|
(pos, simgsrc) = self.findinDoc('img.src',start,end)
|
||||||
|
if simgsrc:
|
||||||
|
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
|
||||||
|
|
||||||
else :
|
else :
|
||||||
print 'Unknown region type', regtype
|
print 'Warning: Unknown region type', regtype
|
||||||
print 'Warning: skipping this region'
|
print 'Treating this like a "fixed" region'
|
||||||
|
regtype = 'fixed'
|
||||||
|
ptype = 'full'
|
||||||
|
# check to see if this is a continution from the previous page
|
||||||
|
if (len(self.parastems_stemid) > 0):
|
||||||
|
ptype = 'end'
|
||||||
|
self.parastems_stemid=[]
|
||||||
|
else:
|
||||||
|
if not anchorSet:
|
||||||
|
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
|
||||||
|
anchorSet = True
|
||||||
|
(pclass, desc) = self.getParaDescription(start,end)
|
||||||
|
if ptype == 'full' :
|
||||||
|
tag = 'p'
|
||||||
|
if pclass[3:6] == 'h1-' : tag = 'h4'
|
||||||
|
if pclass[3:6] == 'h2-' : tag = 'h5'
|
||||||
|
if pclass[3:6] == 'h3-' : tag = 'h6'
|
||||||
|
htmlpage += '<' + tag + ' class="' + pclass + '">'
|
||||||
|
htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
|
||||||
|
htmlpage += '</' + tag + '>'
|
||||||
|
else :
|
||||||
|
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if len(self.paracont_stemid) > 0 :
|
if len(self.paracont_stemid) > 0 :
|
||||||
if htmlpage[-4:] == '</p>':
|
if htmlpage[-4:] == '</p>':
|
||||||
|
@ -289,10 +409,10 @@ class DocParser(object):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def convert2HTML(flatxml, fileid):
|
def convert2HTML(flatxml, classlst, fileid):
|
||||||
|
|
||||||
# create a document parser
|
# create a document parser
|
||||||
dp = DocParser(flatxml, fileid)
|
dp = DocParser(flatxml, classlst, fileid)
|
||||||
|
|
||||||
htmlpage = dp.process()
|
htmlpage = dp.process()
|
||||||
|
|
||||||
|
|
|
@ -95,22 +95,27 @@ def main(argv):
|
||||||
htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
|
htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
|
||||||
htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
|
htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
|
||||||
|
|
||||||
|
# get some scaling info from metadata to use while processing styles
|
||||||
|
fontsize = '135'
|
||||||
|
if 'fontSize' in meta_array:
|
||||||
|
fontsize = meta_array['fontSize']
|
||||||
|
|
||||||
print ' ', 'other0000.dat'
|
print ' ', 'other0000.dat'
|
||||||
fname = os.path.join(bookDir,'other0000.dat')
|
fname = os.path.join(bookDir,'other0000.dat')
|
||||||
xname = os.path.join(bookDir, 'style.css')
|
xname = os.path.join(bookDir, 'style.css')
|
||||||
xmlstr = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
|
xmlstr = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
|
||||||
cssstr = '<style>\n'
|
htmlstr += '<style>\n'
|
||||||
cssstr += stylexml2css.convert2CSS(xmlstr)
|
cssstr , classlst = stylexml2css.convert2CSS(xmlstr, fontsize)
|
||||||
cssstr += '</style>\n'
|
|
||||||
file(xname, 'wb').write(cssstr)
|
file(xname, 'wb').write(cssstr)
|
||||||
htmlstr += cssstr
|
htmlstr += cssstr
|
||||||
|
htmlstr += '</style>\n'
|
||||||
htmlstr += '</head>\n<body>\n'
|
htmlstr += '</head>\n<body>\n'
|
||||||
|
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
print ' ', filename
|
print ' ', filename
|
||||||
fname = os.path.join(pageDir,filename)
|
fname = os.path.join(pageDir,filename)
|
||||||
flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
|
flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
|
||||||
htmlstr += flatxml2html.convert2HTML(flat_xml, fname)
|
htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname)
|
||||||
|
|
||||||
htmlstr += '</body>\n</html>\n'
|
htmlstr += '</body>\n</html>\n'
|
||||||
|
|
||||||
|
|
|
@ -10,286 +10,301 @@ import decode_meta
|
||||||
|
|
||||||
|
|
||||||
class GParser(object):
|
class GParser(object):
|
||||||
def __init__(self, flatxml):
|
def __init__(self, flatxml):
|
||||||
self.flatdoc = flatxml.split('\n')
|
self.flatdoc = flatxml.split('\n')
|
||||||
self.dpi = 1440
|
self.dpi = 1440
|
||||||
self.gh = self.getData('info.glyph.h')
|
self.gh = self.getData('info.glyph.h')
|
||||||
self.gw = self.getData('info.glyph.w')
|
self.gw = self.getData('info.glyph.w')
|
||||||
self.guse = self.getData('info.glyph.use')
|
self.guse = self.getData('info.glyph.use')
|
||||||
self.count = len(self.guse)
|
self.count = len(self.guse)
|
||||||
self.gvtx = self.getData('info.glyph.vtx')
|
self.gvtx = self.getData('info.glyph.vtx')
|
||||||
self.glen = self.getData('info.glyph.len')
|
self.glen = self.getData('info.glyph.len')
|
||||||
self.gdpi = self.getData('info.glyph.dpi')
|
self.gdpi = self.getData('info.glyph.dpi')
|
||||||
self.vx = self.getData('info.vtx.x')
|
self.vx = self.getData('info.vtx.x')
|
||||||
self.vy = self.getData('info.vtx.y')
|
self.vy = self.getData('info.vtx.y')
|
||||||
self.vlen = self.getData('info.len.n')
|
self.vlen = self.getData('info.len.n')
|
||||||
self.glen.append(len(self.vlen))
|
self.glen.append(len(self.vlen))
|
||||||
self.gvtx.append(len(self.vx))
|
self.gvtx.append(len(self.vx))
|
||||||
|
|
||||||
def getData(self, path):
|
def getData(self, path):
|
||||||
result = None
|
result = None
|
||||||
cnt = len(self.flatdoc)
|
cnt = len(self.flatdoc)
|
||||||
for j in xrange(cnt):
|
for j in xrange(cnt):
|
||||||
item = self.flatdoc[j]
|
item = self.flatdoc[j]
|
||||||
if item.find('=') >= 0:
|
if item.find('=') >= 0:
|
||||||
(name, argt) = item.split('=')
|
(name, argt) = item.split('=')
|
||||||
argres = argt.split('|')
|
argres = argt.split('|')
|
||||||
else:
|
else:
|
||||||
name = item
|
name = item
|
||||||
argres = []
|
argres = []
|
||||||
if (name == path):
|
if (name == path):
|
||||||
result = argres
|
result = argres
|
||||||
break
|
break
|
||||||
if (len(argres) > 0) :
|
if (len(argres) > 0) :
|
||||||
for j in xrange(0,len(argres)):
|
for j in xrange(0,len(argres)):
|
||||||
argres[j] = int(argres[j])
|
argres[j] = int(argres[j])
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def getPath(self, gly):
|
def getPath(self, gly):
|
||||||
path = ''
|
path = ''
|
||||||
if (gly < 0) or (gly >= self.count):
|
if (gly < 0) or (gly >= self.count):
|
||||||
return path
|
return path
|
||||||
tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]-1]
|
tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]-1]
|
||||||
ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]-1]
|
ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]-1]
|
||||||
p = 0
|
p = 0
|
||||||
for k in xrange(self.glen[gly], self.glen[gly+1]):
|
for k in xrange(self.glen[gly], self.glen[gly+1]):
|
||||||
if (p == 0):
|
if (p == 0):
|
||||||
zx = tx[0:self.vlen[k]+1]
|
zx = tx[0:self.vlen[k]+1]
|
||||||
zy = ty[0:self.vlen[k]+1]
|
zy = ty[0:self.vlen[k]+1]
|
||||||
else:
|
else:
|
||||||
zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
|
zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
|
||||||
zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
|
zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
|
||||||
p += 1
|
p += 1
|
||||||
for j in xrange(0, len(zx)):
|
j = 0
|
||||||
if (j == 0):
|
while ( j < len(zx) ):
|
||||||
path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
|
if (j == 0):
|
||||||
else:
|
# Start Position.
|
||||||
path += 'L %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
|
path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
|
||||||
path += 'z'
|
elif (j <= len(zx)-3):
|
||||||
return path
|
# Cubic Bezier Curve
|
||||||
|
path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
|
||||||
|
j += 2
|
||||||
|
elif (j == len(zx)-2):
|
||||||
|
# Cubic Bezier Curve to Start Position
|
||||||
|
path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
|
||||||
|
j += 1
|
||||||
|
elif (j == len(zx)-1):
|
||||||
|
# Quadratic Bezier Curve to Start Position
|
||||||
|
path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
|
||||||
|
|
||||||
|
j += 1
|
||||||
|
path += 'z'
|
||||||
|
return path
|
||||||
|
|
||||||
class PParser(object):
|
class PParser(object):
|
||||||
def __init__(self, flatxml):
|
def __init__(self, flatxml):
|
||||||
self.flatdoc = flatxml.split('\n')
|
self.flatdoc = flatxml.split('\n')
|
||||||
self.temp = []
|
self.temp = []
|
||||||
self.ph = self.getData('page.h')[0]
|
foo = self.getData('page.h') or self.getData('book.h')
|
||||||
self.pw = self.getData('page.w')[0]
|
self.ph = foo[0]
|
||||||
self.gx = self.getData('info.glyph.x')
|
foo = self.getData('page.w') or self.getData('book.w')
|
||||||
self.gy = self.getData('info.glyph.y')
|
self.pw = foo[0]
|
||||||
self.gid = self.getData('info.glyph.glyphID')
|
self.gx = self.getData('info.glyph.x')
|
||||||
|
self.gy = self.getData('info.glyph.y')
|
||||||
|
self.gid = self.getData('info.glyph.glyphID')
|
||||||
|
|
||||||
def getData(self, path):
|
def getData(self, path):
|
||||||
result = None
|
result = None
|
||||||
cnt = len(self.flatdoc)
|
cnt = len(self.flatdoc)
|
||||||
for j in xrange(cnt):
|
for j in xrange(cnt):
|
||||||
item = self.flatdoc[j]
|
item = self.flatdoc[j]
|
||||||
if item.find('=') >= 0:
|
if item.find('=') >= 0:
|
||||||
(name, argt) = item.split('=')
|
(name, argt) = item.split('=')
|
||||||
argres = argt.split('|')
|
argres = argt.split('|')
|
||||||
else:
|
else:
|
||||||
name = item
|
name = item
|
||||||
argres = []
|
argres = []
|
||||||
if (name.endswith(path)):
|
if (name.endswith(path)):
|
||||||
result = argres
|
result = argres
|
||||||
break
|
break
|
||||||
if (len(argres) > 0) :
|
if (len(argres) > 0) :
|
||||||
for j in xrange(0,len(argres)):
|
for j in xrange(0,len(argres)):
|
||||||
argres[j] = int(argres[j])
|
argres[j] = int(argres[j])
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def getDataTemp(self, path):
|
def getDataTemp(self, path):
|
||||||
result = None
|
result = None
|
||||||
cnt = len(self.temp)
|
cnt = len(self.temp)
|
||||||
for j in xrange(cnt):
|
for j in xrange(cnt):
|
||||||
item = self.temp[j]
|
item = self.temp[j]
|
||||||
if item.find('=') >= 0:
|
if item.find('=') >= 0:
|
||||||
(name, argt) = item.split('=')
|
(name, argt) = item.split('=')
|
||||||
argres = argt.split('|')
|
argres = argt.split('|')
|
||||||
else:
|
else:
|
||||||
name = item
|
name = item
|
||||||
argres = []
|
argres = []
|
||||||
if (name.endswith(path)):
|
if (name.endswith(path)):
|
||||||
result = argres
|
result = argres
|
||||||
self.temp.pop(j)
|
self.temp.pop(j)
|
||||||
break
|
break
|
||||||
if (len(argres) > 0) :
|
if (len(argres) > 0) :
|
||||||
for j in xrange(0,len(argres)):
|
for j in xrange(0,len(argres)):
|
||||||
argres[j] = int(argres[j])
|
argres[j] = int(argres[j])
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def getImages(self):
|
def getImages(self):
|
||||||
result = []
|
result = []
|
||||||
self.temp = self.flatdoc
|
self.temp = self.flatdoc
|
||||||
while (self.getDataTemp('region.img') != None):
|
while (self.getDataTemp('img') != None):
|
||||||
h = self.getDataTemp('region.img.h')[0]
|
h = self.getDataTemp('img.h')[0]
|
||||||
w = self.getDataTemp('region.img.w')[0]
|
w = self.getDataTemp('img.w')[0]
|
||||||
x = self.getDataTemp('region.img.x')[0]
|
x = self.getDataTemp('img.x')[0]
|
||||||
y = self.getDataTemp('region.img.y')[0]
|
y = self.getDataTemp('img.y')[0]
|
||||||
src = self.getDataTemp('region.img.src')[0]
|
src = self.getDataTemp('img.src')[0]
|
||||||
result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
|
result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def getGlyphs(self,glyfname):
|
def getGlyphs(self,glyfname):
|
||||||
result = []
|
result = []
|
||||||
if (self.gid != None) and (len(self.gid) > 0):
|
if (self.gid != None) and (len(self.gid) > 0):
|
||||||
glyphs = []
|
glyphs = []
|
||||||
for j in set(self.gid):
|
for j in set(self.gid):
|
||||||
glyphs.append(j)
|
glyphs.append(j)
|
||||||
glyphs.sort()
|
glyphs.sort()
|
||||||
gfile = open(glyfname, 'r')
|
gfile = open(glyfname, 'r')
|
||||||
j = 0
|
j = 0
|
||||||
while True :
|
while True :
|
||||||
inp = gfile.readline()
|
inp = gfile.readline()
|
||||||
if (inp == ''):
|
if (inp == ''):
|
||||||
break
|
break
|
||||||
id='id="gl%d"' % glyphs[j]
|
id='id="gl%d"' % glyphs[j]
|
||||||
if (inp.find(id) > 0):
|
if (inp.find(id) > 0):
|
||||||
result.append(inp)
|
result.append(inp)
|
||||||
j += 1
|
j += 1
|
||||||
if (j == len(glyphs)):
|
if (j == len(glyphs)):
|
||||||
break
|
break
|
||||||
gfile.close()
|
gfile.close()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def usage():
|
def usage():
|
||||||
print 'Usage: '
|
print 'Usage: '
|
||||||
print ' '
|
print ' '
|
||||||
print ' gensvg.py unencryptedBookDir'
|
print ' gensvg.py unencryptedBookDir'
|
||||||
print ' '
|
print ' '
|
||||||
|
|
||||||
|
|
||||||
def main(argv):
|
def main(argv):
|
||||||
bookDir = ''
|
bookDir = ''
|
||||||
|
|
||||||
if len(argv) == 0:
|
if len(argv) == 0:
|
||||||
argv = sys.argv
|
argv = sys.argv
|
||||||
else :
|
else :
|
||||||
argv = argv.split()
|
argv = argv.split()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
opts, args = getopt.getopt(argv[1:], "h:")
|
opts, args = getopt.getopt(argv[1:], "h:")
|
||||||
|
|
||||||
except getopt.GetoptError, err:
|
except getopt.GetoptError, err:
|
||||||
print str(err)
|
print str(err)
|
||||||
usage()
|
usage()
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
if len(opts) == 0 and len(args) == 0 :
|
if len(opts) == 0 and len(args) == 0 :
|
||||||
usage()
|
usage()
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
for o, a in opts:
|
for o, a in opts:
|
||||||
if o =="-h":
|
if o =="-h":
|
||||||
usage()
|
usage()
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
bookDir = args[0]
|
bookDir = args[0]
|
||||||
|
|
||||||
if not os.path.exists(bookDir) :
|
if not os.path.exists(bookDir) :
|
||||||
print "Can not find directory with unencrypted book"
|
print "Can not find directory with unencrypted book"
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
dictFile = os.path.join(bookDir,'dict0000.dat')
|
dictFile = os.path.join(bookDir,'dict0000.dat')
|
||||||
|
|
||||||
if not os.path.exists(dictFile) :
|
if not os.path.exists(dictFile) :
|
||||||
print "Can not find dict0000.dat file"
|
print "Can not find dict0000.dat file"
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
pageDir = os.path.join(bookDir,'page')
|
pageDir = os.path.join(bookDir,'page')
|
||||||
if not os.path.exists(pageDir) :
|
if not os.path.exists(pageDir) :
|
||||||
print "Can not find page directory in unencrypted book"
|
print "Can not find page directory in unencrypted book"
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
imgDir = os.path.join(bookDir,'img')
|
imgDir = os.path.join(bookDir,'img')
|
||||||
if not os.path.exists(imgDir) :
|
if not os.path.exists(imgDir) :
|
||||||
print "Can not find image directory in unencrypted book"
|
print "Can not find image directory in unencrypted book"
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
glyphsDir = os.path.join(bookDir,'glyphs')
|
glyphsDir = os.path.join(bookDir,'glyphs')
|
||||||
if not os.path.exists(glyphsDir) :
|
if not os.path.exists(glyphsDir) :
|
||||||
print "Can not find glyphs directory in unencrypted book"
|
print "Can not find glyphs directory in unencrypted book"
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
metaFile = os.path.join(bookDir,'metadata0000.dat')
|
metaFile = os.path.join(bookDir,'metadata0000.dat')
|
||||||
if not os.path.exists(metaFile) :
|
if not os.path.exists(metaFile) :
|
||||||
print "Can not find metadata0000.dat in unencrypted book"
|
print "Can not find metadata0000.dat in unencrypted book"
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
svgDir = os.path.join(bookDir,'svg')
|
svgDir = os.path.join(bookDir,'svg')
|
||||||
if not os.path.exists(svgDir) :
|
if not os.path.exists(svgDir) :
|
||||||
os.makedirs(svgDir)
|
os.makedirs(svgDir)
|
||||||
|
|
||||||
|
|
||||||
print 'Processing Meta Data ... '
|
print 'Processing Meta Data ... '
|
||||||
|
|
||||||
print ' ', 'metadata0000.dat'
|
print ' ', 'metadata0000.dat'
|
||||||
fname = os.path.join(bookDir,'metadata0000.dat')
|
fname = os.path.join(bookDir,'metadata0000.dat')
|
||||||
metadata = decode_meta.getMetaArray(fname)
|
metadata = decode_meta.getMetaArray(fname)
|
||||||
|
|
||||||
print 'Processing Glyphs ... '
|
print 'Processing Glyphs ... '
|
||||||
|
|
||||||
filenames = os.listdir(glyphsDir)
|
filenames = os.listdir(glyphsDir)
|
||||||
filenames = sorted(filenames)
|
filenames = sorted(filenames)
|
||||||
|
|
||||||
glyfname = os.path.join(svgDir,'glyphs.svg')
|
glyfname = os.path.join(svgDir,'glyphs.svg')
|
||||||
glyfile = open(glyfname, 'w')
|
glyfile = open(glyfname, 'w')
|
||||||
glyfile.write('<?xml version="1.0" standalone="no"?>\n')
|
glyfile.write('<?xml version="1.0" standalone="no"?>\n')
|
||||||
glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
|
glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
|
||||||
glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
|
glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
|
||||||
glyfile.write('<title>Glyphs for %s</title>\n' % metadata['Title'])
|
glyfile.write('<title>Glyphs for %s</title>\n' % metadata['Title'])
|
||||||
glyfile.write('<defs>\n')
|
glyfile.write('<defs>\n')
|
||||||
counter = 0
|
counter = 0
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
print ' ', filename
|
print ' ', filename
|
||||||
fname = os.path.join(glyphsDir,filename)
|
fname = os.path.join(glyphsDir,filename)
|
||||||
flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
|
flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
|
||||||
gp = GParser(flat_xml)
|
gp = GParser(flat_xml)
|
||||||
for i in xrange(0, gp.count):
|
for i in xrange(0, gp.count):
|
||||||
path = gp.getPath(i)
|
path = gp.getPath(i)
|
||||||
glyfile.write('<path id="gl%d" d="%s" fill="black" />\n' % (counter * 256 + i, path))
|
glyfile.write('<path id="gl%d" d="%s" fill="black" />\n' % (counter * 256 + i, path))
|
||||||
counter += 1
|
counter += 1
|
||||||
glyfile.write('</defs>\n')
|
glyfile.write('</defs>\n')
|
||||||
glyfile.write('</svg>\n')
|
glyfile.write('</svg>\n')
|
||||||
glyfile.close()
|
glyfile.close()
|
||||||
|
|
||||||
print 'Processing Pages ... '
|
print 'Processing Pages ... '
|
||||||
|
|
||||||
scaledpi = 720
|
scaledpi = 720
|
||||||
filenames = os.listdir(pageDir)
|
filenames = os.listdir(pageDir)
|
||||||
filenames = sorted(filenames)
|
filenames = sorted(filenames)
|
||||||
counter = 0
|
counter = 0
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
print ' ', filename
|
print ' ', filename
|
||||||
fname = os.path.join(pageDir,filename)
|
fname = os.path.join(pageDir,filename)
|
||||||
flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
|
flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
|
||||||
pp = PParser(flat_xml)
|
pp = PParser(flat_xml)
|
||||||
pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w')
|
pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w')
|
||||||
pfile.write('<?xml version="1.0" standalone="no"?>\n')
|
pfile.write('<?xml version="1.0" standalone="no"?>\n')
|
||||||
pfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
|
pfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
|
||||||
pfile.write('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
|
pfile.write('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
|
||||||
pfile.write('<title>Page %d - %s by %s</title>\n' % (counter, metadata['Title'],metadata['Authors']))
|
pfile.write('<title>Page %d - %s by %s</title>\n' % (counter, metadata['Title'],metadata['Authors']))
|
||||||
if (pp.gid != None):
|
if (pp.gid != None):
|
||||||
pfile.write('<defs>\n')
|
pfile.write('<defs>\n')
|
||||||
gdefs = pp.getGlyphs(glyfname)
|
gdefs = pp.getGlyphs(glyfname)
|
||||||
for j in xrange(0,len(gdefs)):
|
for j in xrange(0,len(gdefs)):
|
||||||
pfile.write(gdefs[j])
|
pfile.write(gdefs[j])
|
||||||
pfile.write('</defs>\n')
|
pfile.write('</defs>\n')
|
||||||
for j in xrange(0,len(pp.gid)):
|
for j in xrange(0,len(pp.gid)):
|
||||||
pfile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
|
pfile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
|
||||||
img = pp.getImages()
|
img = pp.getImages()
|
||||||
if (img != None):
|
if (img != None):
|
||||||
for j in xrange(0,len(img)):
|
for j in xrange(0,len(img)):
|
||||||
pfile.write(img[j])
|
pfile.write(img[j])
|
||||||
pfile.write('</svg>')
|
pfile.write('</svg>')
|
||||||
pfile.close()
|
pfile.close()
|
||||||
counter += 1
|
counter += 1
|
||||||
|
|
||||||
print 'Processing Complete'
|
print 'Processing Complete'
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main(''))
|
sys.exit(main(''))
|
|
@ -1,3 +1,13 @@
|
||||||
|
Contributors:
|
||||||
|
cmbtc - removal of drm which made all of this possible
|
||||||
|
clarknova - for all of the svg and glyph generation and many other bug fixes and improvements
|
||||||
|
skindle - for figuing out the general case for the mode loops
|
||||||
|
some updates - for conversion to xml, basic html
|
||||||
|
DiapDealer - for extensive testing and feeback
|
||||||
|
|
||||||
|
and others for posting, feedback and testing
|
||||||
|
|
||||||
|
|
||||||
This is experimental and it will probably not work for you but...
|
This is experimental and it will probably not work for you but...
|
||||||
|
|
||||||
ALSO: Please do not use any of this to steal. Theft is wrong.
|
ALSO: Please do not use any of this to steal. Theft is wrong.
|
||||||
|
|
|
@ -11,8 +11,9 @@ from struct import unpack
|
||||||
|
|
||||||
|
|
||||||
class DocParser(object):
|
class DocParser(object):
|
||||||
def __init__(self, flatxml):
|
def __init__(self, flatxml, fontsize):
|
||||||
self.flatdoc = flatxml.split('\n')
|
self.flatdoc = flatxml.split('\n')
|
||||||
|
self.fontsize = int(fontsize)
|
||||||
|
|
||||||
stags = {
|
stags = {
|
||||||
'paragraph' : 'p',
|
'paragraph' : 'p',
|
||||||
|
@ -20,14 +21,14 @@ class DocParser(object):
|
||||||
}
|
}
|
||||||
|
|
||||||
attr_val_map = {
|
attr_val_map = {
|
||||||
'hang' : ('text-indent: ', 135),
|
'hang' : 'text-indent: ',
|
||||||
'indent' : ('text-indent: ', 135),
|
'indent' : 'text-indent: ',
|
||||||
'line-space' : ('line-height: ', 190),
|
'line-space' : 'line-height: ',
|
||||||
'margin-bottom' : ('margin-bottom: ', 135),
|
'margin-bottom' : 'margin-bottom: ',
|
||||||
'margin-left' : ('margin-left: ', 135),
|
'margin-left' : 'margin-left: ',
|
||||||
'margin-right' : ('margin-right: ', 135),
|
'margin-right' : 'margin-right: ',
|
||||||
'margin-top' : ('margin-top: ', 135),
|
'margin-top' : 'margin-top: ',
|
||||||
'space-after' : ('padding-bottom: ', 135),
|
'space-after' : 'padding-bottom: ',
|
||||||
}
|
}
|
||||||
|
|
||||||
attr_str_map = {
|
attr_str_map = {
|
||||||
|
@ -55,7 +56,7 @@ class DocParser(object):
|
||||||
for j in xrange(pos, end):
|
for j in xrange(pos, end):
|
||||||
item = docList[j]
|
item = docList[j]
|
||||||
if item.find('=') >= 0:
|
if item.find('=') >= 0:
|
||||||
(name, argres) = item.split('=')
|
(name, argres) = item.split('=',1)
|
||||||
else :
|
else :
|
||||||
name = item
|
name = item
|
||||||
argres = ''
|
argres = ''
|
||||||
|
@ -81,6 +82,7 @@ class DocParser(object):
|
||||||
|
|
||||||
def process(self):
|
def process(self):
|
||||||
|
|
||||||
|
classlst = ''
|
||||||
csspage = ''
|
csspage = ''
|
||||||
|
|
||||||
# generate a list of each <style> starting point in the stylesheet
|
# generate a list of each <style> starting point in the stylesheet
|
||||||
|
@ -132,23 +134,19 @@ class DocParser(object):
|
||||||
else :
|
else :
|
||||||
# handle value based attributes
|
# handle value based attributes
|
||||||
if attr in self.attr_val_map :
|
if attr in self.attr_val_map :
|
||||||
(name, scale) = self.attr_val_map[attr]
|
name = self.attr_val_map[attr]
|
||||||
|
scale = self.fontsize
|
||||||
|
if attr == 'line-space': scale = scale * 1.41
|
||||||
if not ((attr == 'hang') and (int(val) == 0)) :
|
if not ((attr == 'hang') and (int(val) == 0)) :
|
||||||
ems = int(val)/scale
|
ems = int(val)/scale
|
||||||
cssargs[attr] = (self.attr_val_map[attr][0], ems)
|
cssargs[attr] = (self.attr_val_map[attr], ems)
|
||||||
keep = True
|
keep = True
|
||||||
|
|
||||||
start = pos + 1
|
start = pos + 1
|
||||||
|
|
||||||
# disable all of the after class tags until I figure out how to handle them
|
# disable all of the after class tags until I figure out how to handle them
|
||||||
# remove all numerals after the "reclustered"
|
|
||||||
|
|
||||||
if aftclass != "" : keep = False
|
if aftclass != "" : keep = False
|
||||||
|
|
||||||
p = sclass.find('reclustered')
|
|
||||||
if p >= 0:
|
|
||||||
sclass = sclass[0:p+11]
|
|
||||||
|
|
||||||
if keep :
|
if keep :
|
||||||
# make sure line-space does not go below 1em
|
# make sure line-space does not go below 1em
|
||||||
if 'line-space' in cssargs:
|
if 'line-space' in cssargs:
|
||||||
|
@ -156,7 +154,7 @@ class DocParser(object):
|
||||||
val = cssargs['line-space'][1]
|
val = cssargs['line-space'][1]
|
||||||
if val < 1.0: val = 1.0
|
if val < 1.0: val = 1.0
|
||||||
del cssargs['line-space']
|
del cssargs['line-space']
|
||||||
cssargs['line-space'] = (self.attr_val_map['line-space'][0], val)
|
cssargs['line-space'] = (self.attr_val_map['line-space'], val)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -165,7 +163,7 @@ class DocParser(object):
|
||||||
hseg = cssargs['hang'][0]
|
hseg = cssargs['hang'][0]
|
||||||
hval = cssargs['hang'][1]
|
hval = cssargs['hang'][1]
|
||||||
del cssargs['hang']
|
del cssargs['hang']
|
||||||
cssargs['hang'] = (self.attr_val_map['hang'][0], -hval)
|
cssargs['hang'] = (self.attr_val_map['hang'], -hval)
|
||||||
mval = 0
|
mval = 0
|
||||||
mseg = 'margin-left: '
|
mseg = 'margin-left: '
|
||||||
if 'margin-left' in cssargs:
|
if 'margin-left' in cssargs:
|
||||||
|
@ -188,6 +186,9 @@ class DocParser(object):
|
||||||
|
|
||||||
cssline += '}'
|
cssline += '}'
|
||||||
|
|
||||||
|
if sclass != '' :
|
||||||
|
classlst += sclass + '\n'
|
||||||
|
|
||||||
# handle special case of paragraph class used inside chapter heading
|
# handle special case of paragraph class used inside chapter heading
|
||||||
# and non-chapter headings
|
# and non-chapter headings
|
||||||
if sclass != '' :
|
if sclass != '' :
|
||||||
|
@ -206,15 +207,16 @@ class DocParser(object):
|
||||||
csspage += 'h6' + cssline + '\n'
|
csspage += 'h6' + cssline + '\n'
|
||||||
|
|
||||||
csspage += self.stags[tag] + cssline + '\n'
|
csspage += self.stags[tag] + cssline + '\n'
|
||||||
|
|
||||||
return csspage
|
|
||||||
|
return csspage, classlst
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def convert2CSS(flatxml):
|
def convert2CSS(flatxml, fontsize):
|
||||||
|
|
||||||
# create a document parser
|
# create a document parser
|
||||||
dp = DocParser(flatxml)
|
dp = DocParser(flatxml, fontsize)
|
||||||
|
|
||||||
csspage = dp.process()
|
csspage = dp.process()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user