#!/usr/bin/env python3 # -*- coding: utf-8 -*- # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab # Python 3 for calibre 5.0 from __future__ import print_function # Wrap a stream so that output gets flushed immediately # and also make sure that any unicode strings get # encoded using "replace" before writing them. class SafeUnbuffered: def __init__(self, stream): self.stream = stream self.encoding = stream.encoding if self.encoding == None: self.encoding = "utf-8" def write(self, data): if isinstance(data, str): data = data.encode(self.encoding,"replace") self.stream.buffer.write(data) self.stream.buffer.flush() def __getattr__(self, attr): return getattr(self.stream, attr) import sys import csv import os import getopt from struct import pack from struct import unpack class TpzDRMError(Exception): pass # local support routines if 'calibre' in sys.modules: inCalibre = True else: inCalibre = False if inCalibre : from calibre_plugins.dedrm import convert2xml from calibre_plugins.dedrm import flatxml2html from calibre_plugins.dedrm import flatxml2svg from calibre_plugins.dedrm import stylexml2css else : from . import convert2xml from . import flatxml2html from . import flatxml2svg from . import stylexml2css # global switch buildXML = False # Get a 7 bit encoded number from a file def readEncodedNumber(file): flag = False c = file.read(1) if (len(c) == 0): return None data = ord(c) if data == 0xFF: flag = True c = file.read(1) if (len(c) == 0): return None data = ord(c) if data >= 0x80: datax = (data & 0x7F) while data >= 0x80 : c = file.read(1) if (len(c) == 0): return None data = ord(c) datax = (datax <<7) + (data & 0x7F) data = datax if flag: data = -data return data # Get a length prefixed string from the file def lengthPrefixString(data): return encodeNumber(len(data))+data def readString(file): stringLength = readEncodedNumber(file) if (stringLength == None): return None sv = file.read(stringLength) if (len(sv) != stringLength): return "" return unpack(str(stringLength)+"s",sv)[0] def getMetaArray(metaFile): # parse the meta file result = {} fo = open(metaFile,'rb') size = readEncodedNumber(fo) for i in range(size): tag = readString(fo) value = readString(fo) result[tag] = value # print(tag, value) fo.close() return result # dictionary of all text strings by index value class Dictionary(object): def __init__(self, dictFile): self.filename = dictFile self.size = 0 self.fo = open(dictFile,'rb') self.stable = [] self.size = readEncodedNumber(self.fo) for i in range(self.size): self.stable.append(self.escapestr(readString(self.fo))) self.pos = 0 def escapestr(self, str): str = str.replace('&','&') str = str.replace('<','<') str = str.replace('>','>') str = str.replace('=','=') return str def lookup(self,val): if ((val >= 0) and (val < self.size)) : self.pos = val return self.stable[self.pos] else: print("Error: %d outside of string table limits" % val) raise TpzDRMError('outside or string table limits') # sys.exit(-1) def getSize(self): return self.size def getPos(self): return self.pos class PageDimParser(object): def __init__(self, flatxml): self.flatdoc = flatxml.split('\n') # find tag if within pos to end inclusive def findinDoc(self, tagpath, pos, end) : result = None docList = self.flatdoc cnt = len(docList) if end == -1 : end = cnt else: end = min(cnt,end) foundat = -1 for j in range(pos, end): item = docList[j] if item.find('=') >= 0: (name, argres) = item.split('=') else : name = item argres = '' if name.endswith(tagpath) : result = argres foundat = j break return foundat, result def process(self): (pos, sph) = self.findinDoc('page.h',0,-1) (pos, spw) = self.findinDoc('page.w',0,-1) if (sph == None): sph = '-1' if (spw == None): spw = '-1' return sph, spw def getPageDim(flatxml): # create a document parser dp = PageDimParser(flatxml) (ph, pw) = dp.process() return ph, pw class GParser(object): def __init__(self, flatxml): self.flatdoc = flatxml.split('\n') self.dpi = 1440 self.gh = self.getData('info.glyph.h') self.gw = self.getData('info.glyph.w') self.guse = self.getData('info.glyph.use') if self.guse : self.count = len(self.guse) else : self.count = 0 self.gvtx = self.getData('info.glyph.vtx') self.glen = self.getData('info.glyph.len') self.gdpi = self.getData('info.glyph.dpi') self.vx = self.getData('info.vtx.x') self.vy = self.getData('info.vtx.y') self.vlen = self.getData('info.len.n') if self.vlen : self.glen.append(len(self.vlen)) elif self.glen: self.glen.append(0) if self.vx : self.gvtx.append(len(self.vx)) elif self.gvtx : self.gvtx.append(0) def getData(self, path): result = None cnt = len(self.flatdoc) for j in range(cnt): item = self.flatdoc[j] if item.find('=') >= 0: (name, argt) = item.split('=') argres = argt.split('|') else: name = item argres = [] if (name == path): result = argres break if (len(argres) > 0) : for j in range(0,len(argres)): argres[j] = int(argres[j]) return result def getGlyphDim(self, gly): if self.gdpi[gly] == 0: return 0, 0 maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly] maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly] return maxh, maxw def getPath(self, gly): path = '' if (gly < 0) or (gly >= self.count): return path tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]] ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]] p = 0 for k in range(self.glen[gly], self.glen[gly+1]): if (p == 0): zx = tx[0:self.vlen[k]+1] zy = ty[0:self.vlen[k]+1] else: zx = tx[self.vlen[k-1]+1:self.vlen[k]+1] zy = ty[self.vlen[k-1]+1:self.vlen[k]+1] p += 1 j = 0 while ( j < len(zx) ): if (j == 0): # Start Position. path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly]) elif (j <= len(zx)-3): # Cubic Bezier Curve path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly]) j += 2 elif (j == len(zx)-2): # Cubic Bezier Curve to Start Position path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly]) j += 1 elif (j == len(zx)-1): # Quadratic Bezier Curve to Start Position path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly]) j += 1 path += 'z' return path # dictionary of all text strings by index value class GlyphDict(object): def __init__(self): self.gdict = {} def lookup(self, id): # id='id="gl%d"' % val if id in self.gdict: return self.gdict[id] return None def addGlyph(self, val, path): id='id="gl%d"' % val self.gdict[id] = path def generateBook(bookDir, raw, fixedimage): # sanity check Topaz file extraction if not os.path.exists(bookDir) : print("Can not find directory with unencrypted book") return 1 dictFile = os.path.join(bookDir,'dict0000.dat') if not os.path.exists(dictFile) : print("Can not find dict0000.dat file") return 1 pageDir = os.path.join(bookDir,'page') if not os.path.exists(pageDir) : print("Can not find page directory in unencrypted book") return 1 imgDir = os.path.join(bookDir,'img') if not os.path.exists(imgDir) : print("Can not find image directory in unencrypted book") return 1 glyphsDir = os.path.join(bookDir,'glyphs') if not os.path.exists(glyphsDir) : print("Can not find glyphs directory in unencrypted book") return 1 metaFile = os.path.join(bookDir,'metadata0000.dat') if not os.path.exists(metaFile) : print("Can not find metadata0000.dat in unencrypted book") return 1 svgDir = os.path.join(bookDir,'svg') if not os.path.exists(svgDir) : os.makedirs(svgDir) if buildXML: xmlDir = os.path.join(bookDir,'xml') if not os.path.exists(xmlDir) : os.makedirs(xmlDir) otherFile = os.path.join(bookDir,'other0000.dat') if not os.path.exists(otherFile) : print("Can not find other0000.dat in unencrypted book") return 1 print("Updating to color images if available") spath = os.path.join(bookDir,'color_img') dpath = os.path.join(bookDir,'img') filenames = os.listdir(spath) filenames = sorted(filenames) for filename in filenames: imgname = filename.replace('color','img') sfile = os.path.join(spath,filename) dfile = os.path.join(dpath,imgname) imgdata = open(sfile,'rb').read() open(dfile,'wb').write(imgdata) print("Creating cover.jpg") isCover = False cpath = os.path.join(bookDir,'img') cpath = os.path.join(cpath,'img0000.jpg') if os.path.isfile(cpath): cover = open(cpath, 'rb').read() cpath = os.path.join(bookDir,'cover.jpg') open(cpath, 'wb').write(cover) isCover = True print('Processing Dictionary') dict = Dictionary(dictFile) print('Processing Meta Data and creating OPF') meta_array = getMetaArray(metaFile) # replace special chars in title and authors like & < > title = meta_array.get('Title','No Title Provided') title = title.replace('&','&') title = title.replace('<','<') title = title.replace('>','>') meta_array['Title'] = title authors = meta_array.get('Authors','No Authors Provided') authors = authors.replace('&','&') authors = authors.replace('<','<') authors = authors.replace('>','>') meta_array['Authors'] = authors if buildXML: xname = os.path.join(xmlDir, 'metadata.xml') mlst = [] for key in meta_array: mlst.append('\n') metastr = "".join(mlst) mlst = None open(xname, 'wb').write(metastr) print('Processing StyleSheet') # get some scaling info from metadata to use while processing styles # and first page info fontsize = '135' if 'fontSize' in meta_array: fontsize = meta_array['fontSize'] # also get the size of a normal text page # get the total number of pages unpacked as a safety check filenames = os.listdir(pageDir) numfiles = len(filenames) spage = '1' if 'firstTextPage' in meta_array: spage = meta_array['firstTextPage'] pnum = int(spage) if pnum >= numfiles or pnum < 0: # metadata is wrong so just select a page near the front # 10% of the book to get a normal text page pnum = int(0.10 * numfiles) # print "first normal text page is", spage # get page height and width from first text page for use in stylesheet scaling pname = 'page%04d.dat' % (pnum - 1) fname = os.path.join(pageDir,pname) flat_xml = convert2xml.fromData(dict, fname) (ph, pw) = getPageDim(flat_xml) if (ph == '-1') or (ph == '0') : ph = '11000' if (pw == '-1') or (pw == '0') : pw = '8500' meta_array['pageHeight'] = ph meta_array['pageWidth'] = pw if 'fontSize' not in meta_array.keys(): meta_array['fontSize'] = fontsize # process other.dat for css info and for map of page files to svg images # this map is needed because some pages actually are made up of multiple # pageXXXX.xml files xname = os.path.join(bookDir, 'style.css') flat_xml = convert2xml.fromData(dict, otherFile) # extract info.original.pid to get original page information pageIDMap = {} pageidnums = stylexml2css.getpageIDMap(flat_xml) if len(pageidnums) == 0: filenames = os.listdir(pageDir) numfiles = len(filenames) for k in range(numfiles): pageidnums.append(k) # create a map from page ids to list of page file nums to process for that page for i in range(len(pageidnums)): id = pageidnums[i] if id in pageIDMap.keys(): pageIDMap[id].append(i) else: pageIDMap[id] = [i] # now get the css info cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw) open(xname, 'wb').write(cssstr) if buildXML: xname = os.path.join(xmlDir, 'other0000.xml') open(xname, 'wb').write(convert2xml.getXML(dict, otherFile)) print('Processing Glyphs') gd = GlyphDict() filenames = os.listdir(glyphsDir) filenames = sorted(filenames) glyfname = os.path.join(svgDir,'glyphs.svg') glyfile = open(glyfname, 'w') glyfile.write('\n') glyfile.write('\n') glyfile.write('\n') glyfile.close() print(" ") # start up the html # also build up tocentries while processing html htmlFileName = "book.html" hlst = [] hlst.append('\n') hlst.append('\n') hlst.append('\n') hlst.append('
\n') hlst.append('\n') hlst.append('