#! /usr/bin/python # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab import sys import csv import os import getopt from struct import pack from struct import unpack class PParser(object): def __init__(self, gd, flatxml, meta_array): self.gd = gd self.flatdoc = flatxml.split('\n') self.docSize = len(self.flatdoc) self.temp = [] self.ph = -1 self.pw = -1 startpos = self.posinDoc('page.h') or self.posinDoc('book.h') for p in startpos: (name, argres) = self.lineinDoc(p) self.ph = max(self.ph, int(argres)) startpos = self.posinDoc('page.w') or self.posinDoc('book.w') for p in startpos: (name, argres) = self.lineinDoc(p) self.pw = max(self.pw, int(argres)) if self.ph <= 0: self.ph = int(meta_array.get('pageHeight', '11000')) if self.pw <= 0: self.pw = int(meta_array.get('pageWidth', '8500')) res = [] startpos = self.posinDoc('info.glyph.x') for p in startpos: argres = self.getDataatPos('info.glyph.x', p) res.extend(argres) self.gx = res res = [] startpos = self.posinDoc('info.glyph.y') for p in startpos: argres = self.getDataatPos('info.glyph.y', p) res.extend(argres) self.gy = res res = [] startpos = self.posinDoc('info.glyph.glyphID') for p in startpos: argres = self.getDataatPos('info.glyph.glyphID', p) res.extend(argres) self.gid = res # return tag at line pos in document def lineinDoc(self, pos) : if (pos >= 0) and (pos < self.docSize) : item = self.flatdoc[pos] if item.find('=') >= 0: (name, argres) = item.split('=',1) else : name = item argres = '' return name, argres # find tag in doc if within pos to end inclusive def findinDoc(self, tagpath, pos, end) : result = None if end == -1 : end = self.docSize else: end = min(self.docSize, end) foundat = -1 for j in range(pos, end): item = self.flatdoc[j] if item.find('=') >= 0: (name, argres) = item.split('=',1) else : name = item argres = '' if name.endswith(tagpath) : result = argres foundat = j break return foundat, result # return list of start positions for the tagpath def posinDoc(self, tagpath): startpos = [] pos = 0 res = "" while res != None : (foundpos, res) = self.findinDoc(tagpath, pos, -1) if res != None : startpos.append(foundpos) pos = foundpos + 1 return startpos def getData(self, path): result = None cnt = len(self.flatdoc) for j in range(cnt): item = self.flatdoc[j] if item.find('=') >= 0: (name, argt) = item.split('=') argres = argt.split('|') else: name = item argres = [] if (name.endswith(path)): result = argres break if (len(argres) > 0) : for j in range(0,len(argres)): argres[j] = int(argres[j]) return result def getDataatPos(self, path, pos): result = None item = self.flatdoc[pos] if item.find('=') >= 0: (name, argt) = item.split('=') argres = argt.split('|') else: name = item argres = [] if (len(argres) > 0) : for j in range(0,len(argres)): argres[j] = int(argres[j]) if (name.endswith(path)): result = argres return result def getDataTemp(self, path): result = None cnt = len(self.temp) for j in range(cnt): item = self.temp[j] if item.find('=') >= 0: (name, argt) = item.split('=') argres = argt.split('|') else: name = item argres = [] if (name.endswith(path)): result = argres self.temp.pop(j) break if (len(argres) > 0) : for j in range(0,len(argres)): argres[j] = int(argres[j]) return result def getImages(self): result = [] self.temp = self.flatdoc while (self.getDataTemp('img') != None): h = self.getDataTemp('img.h')[0] w = self.getDataTemp('img.w')[0] x = self.getDataTemp('img.x')[0] y = self.getDataTemp('img.y')[0] src = self.getDataTemp('img.src')[0] result.append('\n' % (src, x, y, w, h)) return result def getGlyphs(self): result = [] if (self.gid != None) and (len(self.gid) > 0): glyphs = [] for j in set(self.gid): glyphs.append(j) glyphs.sort() for gid in glyphs: id='id="gl%d"' % gid path = self.gd.lookup(id) if path: result.append(id + ' ' + path) return result def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi): mlst = [] pp = PParser(gdict, flat_xml, meta_array) mlst.append('\n') if (raw): mlst.append('\n') mlst.append('\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1)) mlst.append('Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors'])) else: mlst.append('\n') mlst.append('\n') mlst.append('Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors'])) mlst.append('\n') mlst.append('\n') mlst.append('\n') mlst.append('\n') if previd == None: mlst.append('\n') else: mlst.append('\n') mlst.append('' % (pp.pw, pp.ph)) if (pp.gid != None): mlst.append('\n') gdefs = pp.getGlyphs() for j in range(0,len(gdefs)): mlst.append(gdefs[j]) mlst.append('\n') img = pp.getImages() if (img != None): for j in range(0,len(img)): mlst.append(img[j]) if (pp.gid != None): for j in range(0,len(pp.gid)): mlst.append('\n' % (pp.gid[j], pp.gx[j], pp.gy[j])) if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0): xpos = "%d" % (pp.pw // 3) ypos = "%d" % (pp.ph // 3) mlst.append('This page intentionally left blank.\n') if (raw) : mlst.append('') else : mlst.append('\n') if nextid == None: mlst.append('\n') else : mlst.append('\n') mlst.append('\n') mlst.append('zoom in - zoom out\n') mlst.append('\n') mlst.append('\n') return "".join(mlst)