mirror of
https://github.com/noDRM/DeDRM_tools.git
synced 2024-11-16 19:06:09 +06:00
topazscripts 1.5
This commit is contained in:
parent
c1e5943471
commit
a1fec0b54d
20
Topaz_Tools/lib/changes.txt
Normal file
20
Topaz_Tools/lib/changes.txt
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
Changes in version 1.5
|
||||||
|
- completely reworked generation of styles to use actual page heights and widths
|
||||||
|
- added new script getpagedim.py to support the above
|
||||||
|
- style names with underscores in them are now properly paired with their base class
|
||||||
|
- fixed hanging indents that did not ever set a left margin
|
||||||
|
- added support for a number of not previously known region types
|
||||||
|
- added support for a previously unknown snippet - <empty></empty>
|
||||||
|
- corrected a bug that caused unknown regions to abort the program
|
||||||
|
- added code to make the handling of unknown regions better in general
|
||||||
|
- corrected a bug that caused the last link on a page to be missing (if it was the last thing on the page)
|
||||||
|
|
||||||
|
Changes in version 1.3
|
||||||
|
- font generation by gensvg.py is now greatly improved with support for contour points added
|
||||||
|
- support for more region types
|
||||||
|
- support for inline images in paragraphs or text fields (ie. initial graphics for the first letter of a word)
|
||||||
|
- greatly improved dtd information used for the xml to prevent parsing mistakes
|
||||||
|
|
||||||
|
Version 1.0
|
||||||
|
- initial release
|
||||||
|
|
|
@ -93,7 +93,7 @@ def convert(i):
|
||||||
for j in xrange(len(val)):
|
for j in xrange(len(val)):
|
||||||
c = ord(val[j:j+1])
|
c = ord(val[j:j+1])
|
||||||
result += '%02x' % c
|
result += '%02x' % c
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -209,6 +209,8 @@ class PageParser(object):
|
||||||
'wordStems' : (0, 'number', 1, 1),
|
'wordStems' : (0, 'number', 1, 1),
|
||||||
'wordStems.stemID' : (1, 'number', 0, 0),
|
'wordStems.stemID' : (1, 'number', 0, 0),
|
||||||
|
|
||||||
|
'empty' : (1, 'snippets', 1, 0),
|
||||||
|
|
||||||
'page' : (1, 'snippets', 1, 0),
|
'page' : (1, 'snippets', 1, 0),
|
||||||
'page.pageid' : (1, 'scalar_text', 0, 0),
|
'page.pageid' : (1, 'scalar_text', 0, 0),
|
||||||
'page.pagelabel' : (1, 'scalar_text', 0, 0),
|
'page.pagelabel' : (1, 'scalar_text', 0, 0),
|
||||||
|
@ -750,6 +752,7 @@ def main(argv):
|
||||||
|
|
||||||
# read in the string table dictionary
|
# read in the string table dictionary
|
||||||
dict = Dictionary(dictFile)
|
dict = Dictionary(dictFile)
|
||||||
|
# dict.dumpDict()
|
||||||
|
|
||||||
# create a page parser
|
# create a page parser
|
||||||
pp = PageParser(pageFile, dict, debug, flat_xml)
|
pp = PageParser(pageFile, dict, debug, flat_xml)
|
||||||
|
|
|
@ -90,20 +90,23 @@ class DocParser(object):
|
||||||
|
|
||||||
# class names are an issue given topaz may start them with numerals (not allowed),
|
# class names are an issue given topaz may start them with numerals (not allowed),
|
||||||
# use a mix of cases (which cause some browsers problems), and actually
|
# use a mix of cases (which cause some browsers problems), and actually
|
||||||
# attach numbers after "_reclustered*" to the end to deal with reflow issues
|
# attach numbers after "_reclustered*" to the end to deal classeses that inherit
|
||||||
# but then not actually provide all of these _reclustereed classes in the stylesheet!
|
# from a base class (but then not actually provide all of these _reclustereed
|
||||||
|
# classes in the stylesheet!
|
||||||
|
|
||||||
# so we clean this up by lowercasing, prepend 'cl_', and if not in the class
|
# so we clean this up by lowercasing, prepend 'cl_', and getting any baseclass
|
||||||
# list from the stylesheet, trying once more with "_reclustered*" removed
|
# that exists in the stylesheet first, and then adding this specific class
|
||||||
# if still not in stylesheet, let it pass as is
|
# after
|
||||||
|
classres = ''
|
||||||
pclass = pclass.lower()
|
pclass = pclass.lower()
|
||||||
pclass = 'cl_' + pclass
|
pclass = 'cl-' + pclass
|
||||||
if pclass not in self.classList:
|
p = pclass.find('_')
|
||||||
p = pclass.find('_reclustered')
|
if p > 0 :
|
||||||
if p > 0 :
|
baseclass = pclass[0:p]
|
||||||
baseclass = pclass[0:p]
|
if baseclass in self.classList:
|
||||||
if baseclass in self.classList:
|
classres += baseclass + ' '
|
||||||
pclass = baseclass
|
classres += pclass
|
||||||
|
pclass = classres
|
||||||
|
|
||||||
# build up a description of the paragraph in result and return it
|
# build up a description of the paragraph in result and return it
|
||||||
# first check for the basic - all words paragraph
|
# first check for the basic - all words paragraph
|
||||||
|
@ -123,6 +126,12 @@ class DocParser(object):
|
||||||
line = start + 1
|
line = start + 1
|
||||||
word_class = ''
|
word_class = ''
|
||||||
|
|
||||||
|
# if end is -1 then we must search to end of document
|
||||||
|
if end == -1 :
|
||||||
|
docList = self.flatdoc
|
||||||
|
cnt = len(docList)
|
||||||
|
end = cnt
|
||||||
|
|
||||||
while (line < end) :
|
while (line < end) :
|
||||||
|
|
||||||
(name, argres) = self.lineinDoc(line)
|
(name, argres) = self.lineinDoc(line)
|
||||||
|
@ -139,7 +148,8 @@ class DocParser(object):
|
||||||
|
|
||||||
elif name.endswith('word.class'):
|
elif name.endswith('word.class'):
|
||||||
(cname, space) = argres.split('-',1)
|
(cname, space) = argres.split('-',1)
|
||||||
if cname == 'spaceafter':
|
if space == '' : space = '0'
|
||||||
|
if (cname == 'spaceafter') and (int(space) > 0) :
|
||||||
word_class = 'sa'
|
word_class = 'sa'
|
||||||
|
|
||||||
elif name.endswith('word.img.src'):
|
elif name.endswith('word.img.src'):
|
||||||
|
@ -166,7 +176,7 @@ class DocParser(object):
|
||||||
sep =''
|
sep =''
|
||||||
|
|
||||||
br_lb = False
|
br_lb = False
|
||||||
if (regtype == 'fixed') or (regtype == 'chapterheading') :
|
if (regtype == 'fixed') or (regtype == 'chapterheading'):
|
||||||
br_lb = True
|
br_lb = True
|
||||||
|
|
||||||
handle_links = False
|
handle_links = False
|
||||||
|
@ -193,7 +203,8 @@ class DocParser(object):
|
||||||
link = self.link_id[num]
|
link = self.link_id[num]
|
||||||
if (link > 0):
|
if (link > 0):
|
||||||
title = self.link_title[link-1]
|
title = self.link_title[link-1]
|
||||||
if title == "": title='_link_'
|
if (title == "") or (parares.rfind(title) < 0):
|
||||||
|
title='_link_'
|
||||||
ptarget = self.link_page[link-1] - 1
|
ptarget = self.link_page[link-1] - 1
|
||||||
linkhtml = '<a href="#page%04d">' % ptarget
|
linkhtml = '<a href="#page%04d">' % ptarget
|
||||||
linkhtml += title + '</a>'
|
linkhtml += title + '</a>'
|
||||||
|
@ -326,7 +337,7 @@ class DocParser(object):
|
||||||
htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
|
htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
|
||||||
htmlpage += '</' + tag + '>'
|
htmlpage += '</' + tag + '>'
|
||||||
|
|
||||||
elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') :
|
elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem') :
|
||||||
ptype = 'full'
|
ptype = 'full'
|
||||||
# check to see if this is a continution from the previous page
|
# check to see if this is a continution from the previous page
|
||||||
if (len(self.parastems_stemid) > 0):
|
if (len(self.parastems_stemid) > 0):
|
||||||
|
@ -348,7 +359,6 @@ class DocParser(object):
|
||||||
else :
|
else :
|
||||||
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||||
|
|
||||||
|
|
||||||
elif (regtype == 'tocentry') :
|
elif (regtype == 'tocentry') :
|
||||||
ptype = 'full'
|
ptype = 'full'
|
||||||
# check to see if this is a continution from the previous page
|
# check to see if this is a continution from the previous page
|
||||||
|
@ -363,7 +373,7 @@ class DocParser(object):
|
||||||
(pclass, pdesc) = self.getParaDescription(start,end)
|
(pclass, pdesc) = self.getParaDescription(start,end)
|
||||||
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||||
|
|
||||||
elif regtype == 'synth_fcvr.center' :
|
elif (regtype == 'synth_fcvr.center') or (regtype == 'synth_text.center'):
|
||||||
if not anchorSet:
|
if not anchorSet:
|
||||||
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
|
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
|
||||||
anchorSet = True
|
anchorSet = True
|
||||||
|
@ -373,30 +383,38 @@ class DocParser(object):
|
||||||
|
|
||||||
else :
|
else :
|
||||||
print 'Warning: Unknown region type', regtype
|
print 'Warning: Unknown region type', regtype
|
||||||
print 'Treating this like a "fixed" region'
|
(pos, temp) = self.findinDoc('paragraph',start,end)
|
||||||
regtype = 'fixed'
|
if temp:
|
||||||
ptype = 'full'
|
print 'Treating this like a "text" region'
|
||||||
# check to see if this is a continution from the previous page
|
regtype = 'fixed'
|
||||||
if (len(self.parastems_stemid) > 0):
|
ptype = 'full'
|
||||||
ptype = 'end'
|
# check to see if this is a continution from the previous page
|
||||||
self.parastems_stemid=[]
|
if (len(self.parastems_stemid) > 0):
|
||||||
else:
|
ptype = 'end'
|
||||||
|
self.parastems_stemid=[]
|
||||||
|
else:
|
||||||
|
if not anchorSet:
|
||||||
|
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
|
||||||
|
anchorSet = True
|
||||||
|
(pclass, pdesc) = self.getParaDescription(start,end)
|
||||||
|
if ptype == 'full' :
|
||||||
|
tag = 'p'
|
||||||
|
if pclass[3:6] == 'h1-' : tag = 'h4'
|
||||||
|
if pclass[3:6] == 'h2-' : tag = 'h5'
|
||||||
|
if pclass[3:6] == 'h3-' : tag = 'h6'
|
||||||
|
htmlpage += '<' + tag + ' class="' + pclass + '">'
|
||||||
|
htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
|
||||||
|
htmlpage += '</' + tag + '>'
|
||||||
|
else :
|
||||||
|
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||||
|
else :
|
||||||
|
print 'Treating this like a "image" region'
|
||||||
if not anchorSet:
|
if not anchorSet:
|
||||||
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
|
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
|
||||||
anchorSet = True
|
anchorSet = True
|
||||||
(pclass, desc) = self.getParaDescription(start,end)
|
(pos, simgsrc) = self.findinDoc('img.src',start,end)
|
||||||
if ptype == 'full' :
|
if simgsrc:
|
||||||
tag = 'p'
|
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
|
||||||
if pclass[3:6] == 'h1-' : tag = 'h4'
|
|
||||||
if pclass[3:6] == 'h2-' : tag = 'h5'
|
|
||||||
if pclass[3:6] == 'h3-' : tag = 'h6'
|
|
||||||
htmlpage += '<' + tag + ' class="' + pclass + '">'
|
|
||||||
htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
|
|
||||||
htmlpage += '</' + tag + '>'
|
|
||||||
else :
|
|
||||||
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if len(self.paracont_stemid) > 0 :
|
if len(self.paracont_stemid) > 0 :
|
||||||
if htmlpage[-4:] == '</p>':
|
if htmlpage[-4:] == '</p>':
|
||||||
|
|
|
@ -8,7 +8,7 @@ import convert2xml
|
||||||
import flatxml2html
|
import flatxml2html
|
||||||
import decode_meta
|
import decode_meta
|
||||||
import stylexml2css
|
import stylexml2css
|
||||||
|
import getpagedim
|
||||||
|
|
||||||
def usage():
|
def usage():
|
||||||
print 'Usage: '
|
print 'Usage: '
|
||||||
|
@ -86,6 +86,7 @@ def main(argv):
|
||||||
|
|
||||||
htmlstr += '<head>\n'
|
htmlstr += '<head>\n'
|
||||||
|
|
||||||
|
# process metadata and retrieve fontSize info
|
||||||
print ' ', 'metadata0000.dat'
|
print ' ', 'metadata0000.dat'
|
||||||
fname = os.path.join(bookDir,'metadata0000.dat')
|
fname = os.path.join(bookDir,'metadata0000.dat')
|
||||||
xname = os.path.join(bookDir, 'metadata.txt')
|
xname = os.path.join(bookDir, 'metadata.txt')
|
||||||
|
@ -100,12 +101,27 @@ def main(argv):
|
||||||
if 'fontSize' in meta_array:
|
if 'fontSize' in meta_array:
|
||||||
fontsize = meta_array['fontSize']
|
fontsize = meta_array['fontSize']
|
||||||
|
|
||||||
|
# also get the size of a normal text page
|
||||||
|
spage = '1'
|
||||||
|
if 'firstTextPage' in meta_array:
|
||||||
|
spage = meta_array['firstTextPage']
|
||||||
|
pnum = int(spage)
|
||||||
|
|
||||||
|
# get page height and width from first text page for use in stylesheet scaling
|
||||||
|
pname = 'page%04d.dat' % pnum
|
||||||
|
fname = os.path.join(pageDir,pname)
|
||||||
|
flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
|
||||||
|
(ph, pw) = getpagedim.getPageDim(flat_xml)
|
||||||
|
if (ph == '-1') : ph = 11000
|
||||||
|
if (pw == '-1') : pw = 8500
|
||||||
|
|
||||||
|
# now build up the style sheet
|
||||||
print ' ', 'other0000.dat'
|
print ' ', 'other0000.dat'
|
||||||
fname = os.path.join(bookDir,'other0000.dat')
|
fname = os.path.join(bookDir,'other0000.dat')
|
||||||
xname = os.path.join(bookDir, 'style.css')
|
xname = os.path.join(bookDir, 'style.css')
|
||||||
xmlstr = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
|
xmlstr = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
|
||||||
htmlstr += '<style>\n'
|
htmlstr += '<style>\n'
|
||||||
cssstr , classlst = stylexml2css.convert2CSS(xmlstr, fontsize)
|
cssstr , classlst = stylexml2css.convert2CSS(xmlstr, fontsize, ph, pw)
|
||||||
file(xname, 'wb').write(cssstr)
|
file(xname, 'wb').write(cssstr)
|
||||||
htmlstr += cssstr
|
htmlstr += cssstr
|
||||||
htmlstr += '</style>\n'
|
htmlstr += '</style>\n'
|
||||||
|
|
53
Topaz_Tools/lib/getpagedim.py
Normal file
53
Topaz_Tools/lib/getpagedim.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
#! /usr/bin/python
|
||||||
|
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||||
|
|
||||||
|
from __future__ import with_statement
|
||||||
|
import csv
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import getopt
|
||||||
|
from struct import pack
|
||||||
|
from struct import unpack
|
||||||
|
|
||||||
|
|
||||||
|
class DocParser(object):
|
||||||
|
def __init__(self, flatxml):
|
||||||
|
self.flatdoc = flatxml.split('\n')
|
||||||
|
|
||||||
|
|
||||||
|
# find tag if within pos to end inclusive
|
||||||
|
def findinDoc(self, tagpath, pos, end) :
|
||||||
|
result = None
|
||||||
|
docList = self.flatdoc
|
||||||
|
cnt = len(docList)
|
||||||
|
if end == -1 :
|
||||||
|
end = cnt
|
||||||
|
else:
|
||||||
|
end = min(cnt,end)
|
||||||
|
foundat = -1
|
||||||
|
for j in xrange(pos, end):
|
||||||
|
item = docList[j]
|
||||||
|
if item.find('=') >= 0:
|
||||||
|
(name, argres) = item.split('=')
|
||||||
|
else :
|
||||||
|
name = item
|
||||||
|
argres = ''
|
||||||
|
if name.endswith(tagpath) :
|
||||||
|
result = argres
|
||||||
|
foundat = j
|
||||||
|
break
|
||||||
|
return foundat, result
|
||||||
|
|
||||||
|
def process(self):
|
||||||
|
(pos, sph) = self.findinDoc('page.h',0,-1)
|
||||||
|
(pos, spw) = self.findinDoc('page.w',0,-1)
|
||||||
|
if (sph == None): sph = '-1'
|
||||||
|
if (spw == None): spw = '-1'
|
||||||
|
return sph, spw
|
||||||
|
|
||||||
|
|
||||||
|
def getPageDim(flatxml):
|
||||||
|
# create a document parser
|
||||||
|
dp = DocParser(flatxml)
|
||||||
|
(ph, pw) = dp.process()
|
||||||
|
return ph, pw
|
|
@ -3,7 +3,8 @@ Contributors:
|
||||||
clarknova - for all of the svg and glyph generation and many other bug fixes and improvements
|
clarknova - for all of the svg and glyph generation and many other bug fixes and improvements
|
||||||
skindle - for figuing out the general case for the mode loops
|
skindle - for figuing out the general case for the mode loops
|
||||||
some updates - for conversion to xml, basic html
|
some updates - for conversion to xml, basic html
|
||||||
DiapDealer - for extensive testing and feeback
|
DiapDealer - for extensive testing and feedback
|
||||||
|
stewball - for extensive testing and feedback
|
||||||
|
|
||||||
and others for posting, feedback and testing
|
and others for posting, feedback and testing
|
||||||
|
|
||||||
|
@ -23,12 +24,13 @@ decode_meta.py - converts metadata0000.dat to human readable text (for the most
|
||||||
convert2xml.py - converts page*.dat, other*.dat, and glyphs*.dat files to pseudo xml descriptions
|
convert2xml.py - converts page*.dat, other*.dat, and glyphs*.dat files to pseudo xml descriptions
|
||||||
flatxml2html.py - converts a "flattened" xml description to html using the ocrtext
|
flatxml2html.py - converts a "flattened" xml description to html using the ocrtext
|
||||||
stylexml2css.py - converts stylesheet "flattened" xml into css (as best it can)
|
stylexml2css.py - converts stylesheet "flattened" xml into css (as best it can)
|
||||||
|
getpagedim.py - reads page0000.dat to get the book height and width parameters
|
||||||
genxml.py - main program to convert everything to xml
|
genxml.py - main program to convert everything to xml
|
||||||
genhtml.py - main program to generate "book.html"
|
genhtml.py - main program to generate "book.html"
|
||||||
gensvg.py - (author: clarknova) main program to create an svg grpahic of each page
|
gensvg.py - (author: clarknova) main program to create an svg grpahic of each page
|
||||||
|
|
||||||
Please note, gensvg.py, genhtml.py, and genxml.py import and use
|
Please note, gensvg.py, genhtml.py, and genxml.py import and use
|
||||||
decode_meta.py, convert2xml.py, flatxml2html.py, and stylexml2css.py
|
decode_meta.py, convert2xml.py, flatxml2html.py, getpagedim.py and stylexml2css.py
|
||||||
so please keep all of these python scripts together in the same place.
|
so please keep all of these python scripts together in the same place.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -11,9 +11,11 @@ from struct import unpack
|
||||||
|
|
||||||
|
|
||||||
class DocParser(object):
|
class DocParser(object):
|
||||||
def __init__(self, flatxml, fontsize):
|
def __init__(self, flatxml, fontsize, ph, pw):
|
||||||
self.flatdoc = flatxml.split('\n')
|
self.flatdoc = flatxml.split('\n')
|
||||||
self.fontsize = int(fontsize)
|
self.fontsize = int(fontsize)
|
||||||
|
self.ph = int(ph) * 1.0
|
||||||
|
self.pw = int(pw) * 1.0
|
||||||
|
|
||||||
stags = {
|
stags = {
|
||||||
'paragraph' : 'p',
|
'paragraph' : 'p',
|
||||||
|
@ -106,14 +108,14 @@ class DocParser(object):
|
||||||
# get the style class
|
# get the style class
|
||||||
(pos, sclass) = self.findinDoc('style.class',start,end)
|
(pos, sclass) = self.findinDoc('style.class',start,end)
|
||||||
if sclass != None:
|
if sclass != None:
|
||||||
sclass = '.cl_' + sclass.lower()
|
sclass = '.cl-' + sclass.lower()
|
||||||
else :
|
else :
|
||||||
sclass = ''
|
sclass = ''
|
||||||
|
|
||||||
# check for any "after class" specifiers
|
# check for any "after class" specifiers
|
||||||
(pos, aftclass) = self.findinDoc('style._after_class',start,end)
|
(pos, aftclass) = self.findinDoc('style._after_class',start,end)
|
||||||
if aftclass != None:
|
if aftclass != None:
|
||||||
aftclass = '.cl_' + aftclass.lower()
|
aftclass = '.cl-' + aftclass.lower()
|
||||||
else :
|
else :
|
||||||
aftclass = ''
|
aftclass = ''
|
||||||
|
|
||||||
|
@ -121,8 +123,8 @@ class DocParser(object):
|
||||||
|
|
||||||
while True :
|
while True :
|
||||||
|
|
||||||
(pos, attr) = self.findinDoc('style.rule.attr', start, end)
|
(pos1, attr) = self.findinDoc('style.rule.attr', start, end)
|
||||||
(pos, val) = self.findinDoc('style.rule.value', start, end)
|
(pos2, val) = self.findinDoc('style.rule.value', start, end)
|
||||||
|
|
||||||
if attr == None : break
|
if attr == None : break
|
||||||
|
|
||||||
|
@ -135,28 +137,34 @@ class DocParser(object):
|
||||||
# handle value based attributes
|
# handle value based attributes
|
||||||
if attr in self.attr_val_map :
|
if attr in self.attr_val_map :
|
||||||
name = self.attr_val_map[attr]
|
name = self.attr_val_map[attr]
|
||||||
scale = self.fontsize
|
if attr in ('margin-bottom', 'margin-top', 'space-after') :
|
||||||
if attr == 'line-space': scale = scale * 1.41
|
scale = self.ph
|
||||||
|
elif attr in ('margin-right', 'indent', 'margin-left', 'hang') :
|
||||||
|
scale = self.pw
|
||||||
|
elif attr == 'line-space':
|
||||||
|
scale = self.fontsize * 2.0
|
||||||
|
|
||||||
if not ((attr == 'hang') and (int(val) == 0)) :
|
if not ((attr == 'hang') and (int(val) == 0)) :
|
||||||
ems = int(val)/scale
|
pv = float(val)/scale
|
||||||
cssargs[attr] = (self.attr_val_map[attr], ems)
|
cssargs[attr] = (self.attr_val_map[attr], pv)
|
||||||
keep = True
|
keep = True
|
||||||
|
|
||||||
start = pos + 1
|
start = max(pos1, pos2) + 1
|
||||||
|
|
||||||
# disable all of the after class tags until I figure out how to handle them
|
# disable all of the after class tags until I figure out how to handle them
|
||||||
if aftclass != "" : keep = False
|
if aftclass != "" : keep = False
|
||||||
|
|
||||||
if keep :
|
if keep :
|
||||||
# make sure line-space does not go below 1em
|
# make sure line-space does not go below 100% or above 300% since
|
||||||
|
# it can be wacky in some styles
|
||||||
if 'line-space' in cssargs:
|
if 'line-space' in cssargs:
|
||||||
seg = cssargs['line-space'][0]
|
seg = cssargs['line-space'][0]
|
||||||
val = cssargs['line-space'][1]
|
val = cssargs['line-space'][1]
|
||||||
if val < 1.0: val = 1.0
|
if val < 1.0: val = 1.0
|
||||||
|
if val > 3.0: val = 3.0
|
||||||
del cssargs['line-space']
|
del cssargs['line-space']
|
||||||
cssargs['line-space'] = (self.attr_val_map['line-space'], val)
|
cssargs['line-space'] = (self.attr_val_map['line-space'], val)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# handle modifications for css style hanging indents
|
# handle modifications for css style hanging indents
|
||||||
if 'hang' in cssargs:
|
if 'hang' in cssargs:
|
||||||
|
@ -166,11 +174,13 @@ class DocParser(object):
|
||||||
cssargs['hang'] = (self.attr_val_map['hang'], -hval)
|
cssargs['hang'] = (self.attr_val_map['hang'], -hval)
|
||||||
mval = 0
|
mval = 0
|
||||||
mseg = 'margin-left: '
|
mseg = 'margin-left: '
|
||||||
|
mval = hval
|
||||||
if 'margin-left' in cssargs:
|
if 'margin-left' in cssargs:
|
||||||
mseg = cssargs['margin-left'][0]
|
mseg = cssargs['margin-left'][0]
|
||||||
mval = cssargs['margin-left'][1]
|
mval = cssargs['margin-left'][1]
|
||||||
|
if mval < 0: mval = 0
|
||||||
mval = hval + mval
|
mval = hval + mval
|
||||||
cssargs['margin-left'] = (mseg, mval)
|
cssargs['margin-left'] = (mseg, mval)
|
||||||
if 'indent' in cssargs:
|
if 'indent' in cssargs:
|
||||||
del cssargs['indent']
|
del cssargs['indent']
|
||||||
|
|
||||||
|
@ -181,7 +191,7 @@ class DocParser(object):
|
||||||
if mval == '':
|
if mval == '':
|
||||||
cssline += mseg + ' '
|
cssline += mseg + ' '
|
||||||
else :
|
else :
|
||||||
aseg = mseg + '%.1fem;' % mval
|
aseg = mseg + '%.1f%%;' % (mval * 100.0)
|
||||||
cssline += aseg + ' '
|
cssline += aseg + ' '
|
||||||
|
|
||||||
cssline += '}'
|
cssline += '}'
|
||||||
|
@ -213,10 +223,14 @@ class DocParser(object):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def convert2CSS(flatxml, fontsize):
|
def convert2CSS(flatxml, fontsize, ph, pw):
|
||||||
|
|
||||||
|
print ' ', 'Using font size:',fontsize
|
||||||
|
print ' ', 'Using page height:', ph
|
||||||
|
print ' ', 'Using page width:', pw
|
||||||
|
|
||||||
# create a document parser
|
# create a document parser
|
||||||
dp = DocParser(flatxml, fontsize)
|
dp = DocParser(flatxml, fontsize, ph, pw)
|
||||||
|
|
||||||
csspage = dp.process()
|
csspage = dp.process()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user