DeDRM_tools/DeDRM_plugin/stylexml2css.py

291 lines
11 KiB
Python
Raw Permalink Normal View History

2012-11-20 19:28:12 +06:00
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.6
2012-11-20 19:28:12 +06:00
import csv
import sys
import os
import getopt
import re
from struct import pack
from struct import unpack
2012-12-27 05:17:56 +06:00
debug = False
2012-11-20 19:28:12 +06:00
class DocParser(object):
def __init__(self, flatxml, fontsize, ph, pw):
self.flatdoc = flatxml.split(b'\n')
2012-11-20 19:28:12 +06:00
self.fontsize = int(fontsize)
self.ph = int(ph) * 1.0
self.pw = int(pw) * 1.0
stags = {
b'paragraph' : 'p',
b'graphic' : '.graphic'
2012-11-20 19:28:12 +06:00
}
attr_val_map = {
b'hang' : 'text-indent: ',
b'indent' : 'text-indent: ',
b'line-space' : 'line-height: ',
b'margin-bottom' : 'margin-bottom: ',
b'margin-left' : 'margin-left: ',
b'margin-right' : 'margin-right: ',
b'margin-top' : 'margin-top: ',
b'space-after' : 'padding-bottom: ',
2012-11-20 19:28:12 +06:00
}
attr_str_map = {
b'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
b'align-left' : 'text-align: left;',
b'align-right' : 'text-align: right;',
b'align-justify' : 'text-align: justify;',
b'display-inline' : 'display: inline;',
b'pos-left' : 'text-align: left;',
b'pos-right' : 'text-align: right;',
b'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
2012-11-20 19:28:12 +06:00
}
# find tag if within pos to end inclusive
def findinDoc(self, tagpath, pos, end) :
result = None
docList = self.flatdoc
cnt = len(docList)
if end == -1 :
end = cnt
else:
end = min(cnt,end)
foundat = -1
for j in range(pos, end):
2012-11-20 19:28:12 +06:00
item = docList[j]
if item.find(b'=') >= 0:
(name, argres) = item.split(b'=',1)
2012-11-20 19:28:12 +06:00
else :
name = item
argres = b''
if (isinstance(tagpath,str)):
tagpath = tagpath.encode('utf-8')
2012-11-20 19:28:12 +06:00
if name.endswith(tagpath) :
result = argres
foundat = j
break
return foundat, result
# return list of start positions for the tagpath
def posinDoc(self, tagpath):
startpos = []
pos = 0
res = b""
2012-11-20 19:28:12 +06:00
while res != None :
(foundpos, res) = self.findinDoc(tagpath, pos, -1)
if res != None :
startpos.append(foundpos)
pos = foundpos + 1
return startpos
# returns a vector of integers for the tagpath
def getData(self, tagpath, pos, end, clean=False):
if clean:
digits_only = re.compile(rb'''([0-9]+)''')
2012-11-20 19:28:12 +06:00
argres=[]
(foundat, argt) = self.findinDoc(tagpath, pos, end)
if (argt != None) and (len(argt) > 0) :
argList = argt.split(b'|')
2012-11-20 19:28:12 +06:00
for strval in argList:
if clean:
m = re.search(digits_only, strval)
if m != None:
strval = m.group()
argres.append(int(strval))
return argres
def process(self):
classlst = ''
csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n'
csspage += '.cl-right { text-align: right; }\n'
csspage += '.cl-left { text-align: left; }\n'
csspage += '.cl-justify { text-align: justify; }\n'
# generate a list of each <style> starting point in the stylesheet
styleList= self.posinDoc(b'book.stylesheet.style')
2012-11-20 19:28:12 +06:00
stylecnt = len(styleList)
styleList.append(-1)
# process each style converting what you can
if debug: print(' ', 'Processing styles.')
for j in range(stylecnt):
if debug: print(' ', 'Processing style %d' %(j))
2012-11-20 19:28:12 +06:00
start = styleList[j]
end = styleList[j+1]
(pos, tag) = self.findinDoc(b'style._tag',start,end)
2012-11-20 19:28:12 +06:00
if tag == None :
(pos, tag) = self.findinDoc(b'style.type',start,end)
2012-11-20 19:28:12 +06:00
# Is this something we know how to convert to css
if tag in self.stags :
# get the style class
(pos, sclass) = self.findinDoc(b'style.class',start,end)
2012-11-20 19:28:12 +06:00
if sclass != None:
sclass = sclass.replace(b' ',b'-')
sclass = b'.cl-' + sclass.lower()
2012-11-20 19:28:12 +06:00
else :
sclass = b''
2012-11-20 19:28:12 +06:00
if debug: print('sclass', sclass)
2012-12-27 05:17:56 +06:00
2012-11-20 19:28:12 +06:00
# check for any "after class" specifiers
(pos, aftclass) = self.findinDoc(b'style._after_class',start,end)
2012-11-20 19:28:12 +06:00
if aftclass != None:
aftclass = aftclass.replace(b' ',b'-')
aftclass = b'.cl-' + aftclass.lower()
2012-11-20 19:28:12 +06:00
else :
aftclass = b''
2012-11-20 19:28:12 +06:00
if debug: print('aftclass', aftclass)
2012-12-27 05:17:56 +06:00
2012-11-20 19:28:12 +06:00
cssargs = {}
while True :
(pos1, attr) = self.findinDoc(b'style.rule.attr', start, end)
(pos2, val) = self.findinDoc(b'style.rule.value', start, end)
2012-11-20 19:28:12 +06:00
if debug: print('attr', attr)
if debug: print('val', val)
2012-12-27 05:17:56 +06:00
2012-11-20 19:28:12 +06:00
if attr == None : break
if (attr == b'display') or (attr == b'pos') or (attr == b'align'):
2012-11-20 19:28:12 +06:00
# handle text based attributess
attr = attr + b'-' + val
2012-11-20 19:28:12 +06:00
if attr in self.attr_str_map :
cssargs[attr] = (self.attr_str_map[attr], b'')
2012-11-20 19:28:12 +06:00
else :
# handle value based attributes
if attr in self.attr_val_map :
name = self.attr_val_map[attr]
if attr in (b'margin-bottom', b'margin-top', b'space-after') :
2012-11-20 19:28:12 +06:00
scale = self.ph
elif attr in (b'margin-right', b'indent', b'margin-left', b'hang') :
2012-11-20 19:28:12 +06:00
scale = self.pw
elif attr == b'line-space':
2012-11-20 19:28:12 +06:00
scale = self.fontsize * 2.0
else:
print("Scale not defined!")
scale = 1.0
2012-12-27 05:17:56 +06:00
if not val:
2012-11-20 19:28:12 +06:00
val = 0
if not ((attr == b'hang') and (int(val) == 0)):
2013-10-03 00:59:40 +06:00
try:
f = float(val)
except:
print("Warning: unrecognised val, ignoring")
2013-10-03 00:59:40 +06:00
val = 0
2012-11-20 19:28:12 +06:00
pv = float(val)/scale
cssargs[attr] = (self.attr_val_map[attr], pv)
keep = True
start = max(pos1, pos2) + 1
# disable all of the after class tags until I figure out how to handle them
if aftclass != "" : keep = False
if keep :
if debug: print('keeping style')
2012-11-20 19:28:12 +06:00
# make sure line-space does not go below 100% or above 300% since
# it can be wacky in some styles
if b'line-space' in cssargs:
seg = cssargs[b'line-space'][0]
val = cssargs[b'line-space'][1]
2012-11-20 19:28:12 +06:00
if val < 1.0: val = 1.0
if val > 3.0: val = 3.0
del cssargs[b'line-space']
cssargs[b'line-space'] = (self.attr_val_map[b'line-space'], val)
2012-11-20 19:28:12 +06:00
# handle modifications for css style hanging indents
if b'hang' in cssargs:
hseg = cssargs[b'hang'][0]
hval = cssargs[b'hang'][1]
del cssargs[b'hang']
cssargs[b'hang'] = (self.attr_val_map[b'hang'], -hval)
2012-11-20 19:28:12 +06:00
mval = 0
mseg = 'margin-left: '
mval = hval
if b'margin-left' in cssargs:
mseg = cssargs[b'margin-left'][0]
mval = cssargs[b'margin-left'][1]
2012-11-20 19:28:12 +06:00
if mval < 0: mval = 0
mval = hval + mval
cssargs[b'margin-left'] = (mseg, mval)
if b'indent' in cssargs:
del cssargs[b'indent']
2012-11-20 19:28:12 +06:00
cssline = sclass + ' { '
for key in iter(cssargs):
mseg = cssargs[key][0]
mval = cssargs[key][1]
if mval == '':
cssline += mseg + ' '
else :
aseg = mseg + '%.1f%%;' % (mval * 100.0)
cssline += aseg + ' '
cssline += '}'
if sclass != '' :
classlst += sclass + '\n'
# handle special case of paragraph class used inside chapter heading
# and non-chapter headings
if sclass != '' :
ctype = sclass[4:7]
if ctype == 'ch1' :
csspage += 'h1' + cssline + '\n'
if ctype == 'ch2' :
csspage += 'h2' + cssline + '\n'
if ctype == 'ch3' :
csspage += 'h3' + cssline + '\n'
if ctype == 'h1-' :
csspage += 'h4' + cssline + '\n'
if ctype == 'h2-' :
csspage += 'h5' + cssline + '\n'
if ctype == 'h3_' :
csspage += 'h6' + cssline + '\n'
if cssline != ' { }':
csspage += self.stags[tag] + cssline + '\n'
return csspage, classlst
def convert2CSS(flatxml, fontsize, ph, pw):
print(' ', 'Using font size:',fontsize)
print(' ', 'Using page height:', ph)
print(' ', 'Using page width:', pw)
2012-11-20 19:28:12 +06:00
# create a document parser
dp = DocParser(flatxml, fontsize, ph, pw)
if debug: print(' ', 'Created DocParser.')
2012-11-20 19:28:12 +06:00
csspage = dp.process()
if debug: print(' ', 'Processed DocParser.')
2012-11-20 19:28:12 +06:00
return csspage
def getpageIDMap(flatxml):
dp = DocParser(flatxml, 0, 0, 0)
pageidnumbers = dp.getData('info.original.pid', 0, -1, True)
return pageidnumbers