#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
import sys
import csv
import os
import getopt
from struct import pack
from struct import unpack
class PParser(object):
def __init__(self, gd, flatxml, meta_array):
self.gd = gd
self.flatdoc = flatxml.split('\n')
self.docSize = len(self.flatdoc)
self.temp = []
self.ph = -1
self.pw = -1
startpos = self.posinDoc('page.h') or self.posinDoc('book.h')
for p in startpos:
(name, argres) = self.lineinDoc(p)
self.ph = max(self.ph, int(argres))
startpos = self.posinDoc('page.w') or self.posinDoc('book.w')
for p in startpos:
(name, argres) = self.lineinDoc(p)
self.pw = max(self.pw, int(argres))
if self.ph <= 0:
self.ph = int(meta_array.get('pageHeight', '11000'))
if self.pw <= 0:
self.pw = int(meta_array.get('pageWidth', '8500'))
res = []
startpos = self.posinDoc('info.glyph.x')
for p in startpos:
argres = self.getDataatPos('info.glyph.x', p)
res.extend(argres)
self.gx = res
res = []
startpos = self.posinDoc('info.glyph.y')
for p in startpos:
argres = self.getDataatPos('info.glyph.y', p)
res.extend(argres)
self.gy = res
res = []
startpos = self.posinDoc('info.glyph.glyphID')
for p in startpos:
argres = self.getDataatPos('info.glyph.glyphID', p)
res.extend(argres)
self.gid = res
# return tag at line pos in document
def lineinDoc(self, pos) :
if (pos >= 0) and (pos < self.docSize) :
item = self.flatdoc[pos]
if item.find('=') >= 0:
(name, argres) = item.split('=',1)
else :
name = item
argres = ''
return name, argres
# find tag in doc if within pos to end inclusive
def findinDoc(self, tagpath, pos, end) :
result = None
if end == -1 :
end = self.docSize
else:
end = min(self.docSize, end)
foundat = -1
for j in xrange(pos, end):
item = self.flatdoc[j]
if item.find('=') >= 0:
(name, argres) = item.split('=',1)
else :
name = item
argres = ''
if name.endswith(tagpath) :
result = argres
foundat = j
break
return foundat, result
# return list of start positions for the tagpath
def posinDoc(self, tagpath):
startpos = []
pos = 0
res = ""
while res != None :
(foundpos, res) = self.findinDoc(tagpath, pos, -1)
if res != None :
startpos.append(foundpos)
pos = foundpos + 1
return startpos
def getData(self, path):
result = None
cnt = len(self.flatdoc)
for j in xrange(cnt):
item = self.flatdoc[j]
if item.find('=') >= 0:
(name, argt) = item.split('=')
argres = argt.split('|')
else:
name = item
argres = []
if (name.endswith(path)):
result = argres
break
if (len(argres) > 0) :
for j in xrange(0,len(argres)):
argres[j] = int(argres[j])
return result
def getDataatPos(self, path, pos):
result = None
item = self.flatdoc[pos]
if item.find('=') >= 0:
(name, argt) = item.split('=')
argres = argt.split('|')
else:
name = item
argres = []
if (len(argres) > 0) :
for j in xrange(0,len(argres)):
argres[j] = int(argres[j])
if (name.endswith(path)):
result = argres
return result
def getDataTemp(self, path):
result = None
cnt = len(self.temp)
for j in xrange(cnt):
item = self.temp[j]
if item.find('=') >= 0:
(name, argt) = item.split('=')
argres = argt.split('|')
else:
name = item
argres = []
if (name.endswith(path)):
result = argres
self.temp.pop(j)
break
if (len(argres) > 0) :
for j in xrange(0,len(argres)):
argres[j] = int(argres[j])
return result
def getImages(self):
result = []
self.temp = self.flatdoc
while (self.getDataTemp('img') != None):
h = self.getDataTemp('img.h')[0]
w = self.getDataTemp('img.w')[0]
x = self.getDataTemp('img.x')[0]
y = self.getDataTemp('img.y')[0]
src = self.getDataTemp('img.src')[0]
result.append('\n' % (src, x, y, w, h))
return result
def getGlyphs(self):
result = []
if (self.gid != None) and (len(self.gid) > 0):
glyphs = []
for j in set(self.gid):
glyphs.append(j)
glyphs.sort()
for gid in glyphs:
id='id="gl%d"' % gid
path = self.gd.lookup(id)
if path:
result.append(id + ' ' + path)
return result
def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi):
ml = ''
pp = PParser(gdict, flat_xml, meta_array)
ml += '\n'
if (raw):
ml += '\n'
ml += '