diff --git a/.gitignore b/.gitignore index 436624b..67cac1d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,9 +2,6 @@ __pycache__/ *.pyc -# C extensions -*.so - # Distribution / packaging .Python env/ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py b/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py index 233b462..a2afd66 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py @@ -19,8 +19,8 @@ class K4DeDRM(FileTypePlugin): description = 'Removes DRM from Mobipocket, Kindle/Mobi, Kindle/Topaz and Kindle/Print Replica files. Provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc.' supported_platforms = ['osx', 'windows', 'linux'] # Platforms this plugin will run on author = 'DiapDealer, SomeUpdates' # The author of this plugin - version = (0, 4, 1) # The version number of this plugin - file_types = set(['prc','mobi','azw','azw1','azw4','tpz']) # The file types that this plugin will be applied to + version = (0, 4, 2) # The version number of this plugin + file_types = set(['prc','mobi','azw','azw1','azw3','azw4','tpz']) # The file types that this plugin will be applied to on_import = True # Run this plugin during the import priority = 210 # run this plugin before mobidedrm, k4pcdedrm, k4dedrm minimum_calibre_version = (0, 7, 55) @@ -140,6 +140,9 @@ class K4DeDRM(FileTypePlugin): if mb.getPrintReplica(): of = self.temporary_file(bookname+'.azw4') print 'K4MobiDeDRM v%s: Print Replica format detected.' % plug_ver + elif mb.getMobiVersion() >= 8: + print 'K4MobiDeDRM v%s: Stand-alone KF8 format detected.' % plug_ver + of = self.temporary_file(bookname+'.azw3') else: of = self.temporary_file(bookname+'.mobi') mb.getMobiFile(of.name) diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/aescbc.py b/Calibre_Plugins/K4MobiDeDRM_plugin/aescbc.py new file mode 100644 index 0000000..5667511 --- /dev/null +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/aescbc.py @@ -0,0 +1,568 @@ +#! /usr/bin/env python + +""" + Routines for doing AES CBC in one file + + Modified by some_updates to extract + and combine only those parts needed for AES CBC + into one simple to add python file + + Original Version + Copyright (c) 2002 by Paul A. Lambert + Under: + CryptoPy Artisitic License Version 1.0 + See the wonderful pure python package cryptopy-1.2.5 + and read its LICENSE.txt for complete license details. +""" + +class CryptoError(Exception): + """ Base class for crypto exceptions """ + def __init__(self,errorMessage='Error!'): + self.message = errorMessage + def __str__(self): + return self.message + +class InitCryptoError(CryptoError): + """ Crypto errors during algorithm initialization """ +class BadKeySizeError(InitCryptoError): + """ Bad key size error """ +class EncryptError(CryptoError): + """ Error in encryption processing """ +class DecryptError(CryptoError): + """ Error in decryption processing """ +class DecryptNotBlockAlignedError(DecryptError): + """ Error in decryption processing """ + +def xorS(a,b): + """ XOR two strings """ + assert len(a)==len(b) + x = [] + for i in range(len(a)): + x.append( chr(ord(a[i])^ord(b[i]))) + return ''.join(x) + +def xor(a,b): + """ XOR two strings """ + x = [] + for i in range(min(len(a),len(b))): + x.append( chr(ord(a[i])^ord(b[i]))) + return ''.join(x) + +""" + Base 'BlockCipher' and Pad classes for cipher instances. + BlockCipher supports automatic padding and type conversion. The BlockCipher + class was written to make the actual algorithm code more readable and + not for performance. +""" + +class BlockCipher: + """ Block ciphers """ + def __init__(self): + self.reset() + + def reset(self): + self.resetEncrypt() + self.resetDecrypt() + def resetEncrypt(self): + self.encryptBlockCount = 0 + self.bytesToEncrypt = '' + def resetDecrypt(self): + self.decryptBlockCount = 0 + self.bytesToDecrypt = '' + + def encrypt(self, plainText, more = None): + """ Encrypt a string and return a binary string """ + self.bytesToEncrypt += plainText # append plainText to any bytes from prior encrypt + numBlocks, numExtraBytes = divmod(len(self.bytesToEncrypt), self.blockSize) + cipherText = '' + for i in range(numBlocks): + bStart = i*self.blockSize + ctBlock = self.encryptBlock(self.bytesToEncrypt[bStart:bStart+self.blockSize]) + self.encryptBlockCount += 1 + cipherText += ctBlock + if numExtraBytes > 0: # save any bytes that are not block aligned + self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] + else: + self.bytesToEncrypt = '' + + if more == None: # no more data expected from caller + finalBytes = self.padding.addPad(self.bytesToEncrypt,self.blockSize) + if len(finalBytes) > 0: + ctBlock = self.encryptBlock(finalBytes) + self.encryptBlockCount += 1 + cipherText += ctBlock + self.resetEncrypt() + return cipherText + + def decrypt(self, cipherText, more = None): + """ Decrypt a string and return a string """ + self.bytesToDecrypt += cipherText # append to any bytes from prior decrypt + + numBlocks, numExtraBytes = divmod(len(self.bytesToDecrypt), self.blockSize) + if more == None: # no more calls to decrypt, should have all the data + if numExtraBytes != 0: + raise DecryptNotBlockAlignedError, 'Data not block aligned on decrypt' + + # hold back some bytes in case last decrypt has zero len + if (more != None) and (numExtraBytes == 0) and (numBlocks >0) : + numBlocks -= 1 + numExtraBytes = self.blockSize + + plainText = '' + for i in range(numBlocks): + bStart = i*self.blockSize + ptBlock = self.decryptBlock(self.bytesToDecrypt[bStart : bStart+self.blockSize]) + self.decryptBlockCount += 1 + plainText += ptBlock + + if numExtraBytes > 0: # save any bytes that are not block aligned + self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] + else: + self.bytesToEncrypt = '' + + if more == None: # last decrypt remove padding + plainText = self.padding.removePad(plainText, self.blockSize) + self.resetDecrypt() + return plainText + + +class Pad: + def __init__(self): + pass # eventually could put in calculation of min and max size extension + +class padWithPadLen(Pad): + """ Pad a binary string with the length of the padding """ + + def addPad(self, extraBytes, blockSize): + """ Add padding to a binary string to make it an even multiple + of the block size """ + blocks, numExtraBytes = divmod(len(extraBytes), blockSize) + padLength = blockSize - numExtraBytes + return extraBytes + padLength*chr(padLength) + + def removePad(self, paddedBinaryString, blockSize): + """ Remove padding from a binary string """ + if not(0 6 and i%Nk == 4 : + temp = [ Sbox[byte] for byte in temp ] # SubWord(temp) + w.append( [ w[i-Nk][byte]^temp[byte] for byte in range(4) ] ) + return w + +Rcon = (0,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36, # note extra '0' !!! + 0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6, + 0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91) + +#------------------------------------- +def AddRoundKey(algInstance, keyBlock): + """ XOR the algorithm state with a block of key material """ + for column in range(algInstance.Nb): + for row in range(4): + algInstance.state[column][row] ^= keyBlock[column][row] +#------------------------------------- + +def SubBytes(algInstance): + for column in range(algInstance.Nb): + for row in range(4): + algInstance.state[column][row] = Sbox[algInstance.state[column][row]] + +def InvSubBytes(algInstance): + for column in range(algInstance.Nb): + for row in range(4): + algInstance.state[column][row] = InvSbox[algInstance.state[column][row]] + +Sbox = (0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5, + 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76, + 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0, + 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0, + 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc, + 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15, + 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a, + 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75, + 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0, + 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84, + 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b, + 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf, + 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85, + 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8, + 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5, + 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2, + 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17, + 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73, + 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88, + 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb, + 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c, + 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79, + 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9, + 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08, + 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6, + 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a, + 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e, + 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e, + 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94, + 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf, + 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68, + 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16) + +InvSbox = (0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38, + 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, + 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87, + 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, + 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d, + 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, + 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2, + 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, + 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16, + 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, + 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda, + 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, + 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a, + 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, + 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02, + 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, + 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea, + 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, + 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85, + 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, + 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89, + 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, + 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20, + 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, + 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31, + 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, + 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d, + 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, + 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0, + 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, + 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26, + 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d) + +#------------------------------------- +""" For each block size (Nb), the ShiftRow operation shifts row i + by the amount Ci. Note that row 0 is not shifted. + Nb C1 C2 C3 + ------------------- """ +shiftOffset = { 4 : ( 0, 1, 2, 3), + 5 : ( 0, 1, 2, 3), + 6 : ( 0, 1, 2, 3), + 7 : ( 0, 1, 2, 4), + 8 : ( 0, 1, 3, 4) } +def ShiftRows(algInstance): + tmp = [0]*algInstance.Nb # list of size Nb + for r in range(1,4): # row 0 reamains unchanged and can be skipped + for c in range(algInstance.Nb): + tmp[c] = algInstance.state[(c+shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] + for c in range(algInstance.Nb): + algInstance.state[c][r] = tmp[c] +def InvShiftRows(algInstance): + tmp = [0]*algInstance.Nb # list of size Nb + for r in range(1,4): # row 0 reamains unchanged and can be skipped + for c in range(algInstance.Nb): + tmp[c] = algInstance.state[(c+algInstance.Nb-shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] + for c in range(algInstance.Nb): + algInstance.state[c][r] = tmp[c] +#------------------------------------- +def MixColumns(a): + Sprime = [0,0,0,0] + for j in range(a.Nb): # for each column + Sprime[0] = mul(2,a.state[j][0])^mul(3,a.state[j][1])^mul(1,a.state[j][2])^mul(1,a.state[j][3]) + Sprime[1] = mul(1,a.state[j][0])^mul(2,a.state[j][1])^mul(3,a.state[j][2])^mul(1,a.state[j][3]) + Sprime[2] = mul(1,a.state[j][0])^mul(1,a.state[j][1])^mul(2,a.state[j][2])^mul(3,a.state[j][3]) + Sprime[3] = mul(3,a.state[j][0])^mul(1,a.state[j][1])^mul(1,a.state[j][2])^mul(2,a.state[j][3]) + for i in range(4): + a.state[j][i] = Sprime[i] + +def InvMixColumns(a): + """ Mix the four bytes of every column in a linear way + This is the opposite operation of Mixcolumn """ + Sprime = [0,0,0,0] + for j in range(a.Nb): # for each column + Sprime[0] = mul(0x0E,a.state[j][0])^mul(0x0B,a.state[j][1])^mul(0x0D,a.state[j][2])^mul(0x09,a.state[j][3]) + Sprime[1] = mul(0x09,a.state[j][0])^mul(0x0E,a.state[j][1])^mul(0x0B,a.state[j][2])^mul(0x0D,a.state[j][3]) + Sprime[2] = mul(0x0D,a.state[j][0])^mul(0x09,a.state[j][1])^mul(0x0E,a.state[j][2])^mul(0x0B,a.state[j][3]) + Sprime[3] = mul(0x0B,a.state[j][0])^mul(0x0D,a.state[j][1])^mul(0x09,a.state[j][2])^mul(0x0E,a.state[j][3]) + for i in range(4): + a.state[j][i] = Sprime[i] + +#------------------------------------- +def mul(a, b): + """ Multiply two elements of GF(2^m) + needed for MixColumn and InvMixColumn """ + if (a !=0 and b!=0): + return Alogtable[(Logtable[a] + Logtable[b])%255] + else: + return 0 + +Logtable = ( 0, 0, 25, 1, 50, 2, 26, 198, 75, 199, 27, 104, 51, 238, 223, 3, + 100, 4, 224, 14, 52, 141, 129, 239, 76, 113, 8, 200, 248, 105, 28, 193, + 125, 194, 29, 181, 249, 185, 39, 106, 77, 228, 166, 114, 154, 201, 9, 120, + 101, 47, 138, 5, 33, 15, 225, 36, 18, 240, 130, 69, 53, 147, 218, 142, + 150, 143, 219, 189, 54, 208, 206, 148, 19, 92, 210, 241, 64, 70, 131, 56, + 102, 221, 253, 48, 191, 6, 139, 98, 179, 37, 226, 152, 34, 136, 145, 16, + 126, 110, 72, 195, 163, 182, 30, 66, 58, 107, 40, 84, 250, 133, 61, 186, + 43, 121, 10, 21, 155, 159, 94, 202, 78, 212, 172, 229, 243, 115, 167, 87, + 175, 88, 168, 80, 244, 234, 214, 116, 79, 174, 233, 213, 231, 230, 173, 232, + 44, 215, 117, 122, 235, 22, 11, 245, 89, 203, 95, 176, 156, 169, 81, 160, + 127, 12, 246, 111, 23, 196, 73, 236, 216, 67, 31, 45, 164, 118, 123, 183, + 204, 187, 62, 90, 251, 96, 177, 134, 59, 82, 161, 108, 170, 85, 41, 157, + 151, 178, 135, 144, 97, 190, 220, 252, 188, 149, 207, 205, 55, 63, 91, 209, + 83, 57, 132, 60, 65, 162, 109, 71, 20, 42, 158, 93, 86, 242, 211, 171, + 68, 17, 146, 217, 35, 32, 46, 137, 180, 124, 184, 38, 119, 153, 227, 165, + 103, 74, 237, 222, 197, 49, 254, 24, 13, 99, 140, 128, 192, 247, 112, 7) + +Alogtable= ( 1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53, + 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170, + 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49, + 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205, + 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136, + 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154, + 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163, + 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160, + 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65, + 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117, + 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128, + 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84, + 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202, + 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14, + 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23, + 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246, 1) + + + + +""" + AES Encryption Algorithm + The AES algorithm is just Rijndael algorithm restricted to the default + blockSize of 128 bits. +""" + +class AES(Rijndael): + """ The AES algorithm is the Rijndael block cipher restricted to block + sizes of 128 bits and key sizes of 128, 192 or 256 bits + """ + def __init__(self, key = None, padding = padWithPadLen(), keySize=16): + """ Initialize AES, keySize is in bytes """ + if not (keySize == 16 or keySize == 24 or keySize == 32) : + raise BadKeySizeError, 'Illegal AES key size, must be 16, 24, or 32 bytes' + + Rijndael.__init__( self, key, padding=padding, keySize=keySize, blockSize=16 ) + + self.name = 'AES' + + +""" + CBC mode of encryption for block ciphers. + This algorithm mode wraps any BlockCipher to make a + Cipher Block Chaining mode. +""" +from random import Random # should change to crypto.random!!! + + +class CBC(BlockCipher): + """ The CBC class wraps block ciphers to make cipher block chaining (CBC) mode + algorithms. The initialization (IV) is automatic if set to None. Padding + is also automatic based on the Pad class used to initialize the algorithm + """ + def __init__(self, blockCipherInstance, padding = padWithPadLen()): + """ CBC algorithms are created by initializing with a BlockCipher instance """ + self.baseCipher = blockCipherInstance + self.name = self.baseCipher.name + '_CBC' + self.blockSize = self.baseCipher.blockSize + self.keySize = self.baseCipher.keySize + self.padding = padding + self.baseCipher.padding = noPadding() # baseCipher should NOT pad!! + self.r = Random() # for IV generation, currently uses + # mediocre standard distro version <---------------- + import time + newSeed = time.ctime()+str(self.r) # seed with instance location + self.r.seed(newSeed) # to make unique + self.reset() + + def setKey(self, key): + self.baseCipher.setKey(key) + + # Overload to reset both CBC state and the wrapped baseCipher + def resetEncrypt(self): + BlockCipher.resetEncrypt(self) # reset CBC encrypt state (super class) + self.baseCipher.resetEncrypt() # reset base cipher encrypt state + + def resetDecrypt(self): + BlockCipher.resetDecrypt(self) # reset CBC state (super class) + self.baseCipher.resetDecrypt() # reset base cipher decrypt state + + def encrypt(self, plainText, iv=None, more=None): + """ CBC encryption - overloads baseCipher to allow optional explicit IV + when iv=None, iv is auto generated! + """ + if self.encryptBlockCount == 0: + self.iv = iv + else: + assert(iv==None), 'IV used only on first call to encrypt' + + return BlockCipher.encrypt(self,plainText, more=more) + + def decrypt(self, cipherText, iv=None, more=None): + """ CBC decryption - overloads baseCipher to allow optional explicit IV + when iv=None, iv is auto generated! + """ + if self.decryptBlockCount == 0: + self.iv = iv + else: + assert(iv==None), 'IV used only on first call to decrypt' + + return BlockCipher.decrypt(self, cipherText, more=more) + + def encryptBlock(self, plainTextBlock): + """ CBC block encryption, IV is set with 'encrypt' """ + auto_IV = '' + if self.encryptBlockCount == 0: + if self.iv == None: + # generate IV and use + self.iv = ''.join([chr(self.r.randrange(256)) for i in range(self.blockSize)]) + self.prior_encr_CT_block = self.iv + auto_IV = self.prior_encr_CT_block # prepend IV if it's automatic + else: # application provided IV + assert(len(self.iv) == self.blockSize ),'IV must be same length as block' + self.prior_encr_CT_block = self.iv + """ encrypt the prior CT XORed with the PT """ + ct = self.baseCipher.encryptBlock( xor(self.prior_encr_CT_block, plainTextBlock) ) + self.prior_encr_CT_block = ct + return auto_IV+ct + + def decryptBlock(self, encryptedBlock): + """ Decrypt a single block """ + + if self.decryptBlockCount == 0: # first call, process IV + if self.iv == None: # auto decrypt IV? + self.prior_CT_block = encryptedBlock + return '' + else: + assert(len(self.iv)==self.blockSize),"Bad IV size on CBC decryption" + self.prior_CT_block = self.iv + + dct = self.baseCipher.decryptBlock(encryptedBlock) + """ XOR the prior decrypted CT with the prior CT """ + dct_XOR_priorCT = xor( self.prior_CT_block, dct ) + + self.prior_CT_block = encryptedBlock + + return dct_XOR_priorCT + + +""" + AES_CBC Encryption Algorithm +""" + +class AES_CBC(CBC): + """ AES encryption in CBC feedback mode """ + def __init__(self, key=None, padding=padWithPadLen(), keySize=16): + CBC.__init__( self, AES(key, noPadding(), keySize), padding) + self.name = 'AES_CBC' diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.dll b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.dll new file mode 100644 index 0000000..26d740d Binary files /dev/null and b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.dll differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.py b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.py new file mode 100644 index 0000000..e25a0c8 --- /dev/null +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto.py @@ -0,0 +1,290 @@ +#! /usr/bin/env python + +import sys, os +import hmac +from struct import pack +import hashlib + + +# interface to needed routines libalfcrypto +def _load_libalfcrypto(): + import ctypes + from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \ + Structure, c_ulong, create_string_buffer, addressof, string_at, cast, sizeof + + pointer_size = ctypes.sizeof(ctypes.c_voidp) + name_of_lib = None + if sys.platform.startswith('darwin'): + name_of_lib = 'libalfcrypto.dylib' + elif sys.platform.startswith('win'): + if pointer_size == 4: + name_of_lib = 'alfcrypto.dll' + else: + name_of_lib = 'alfcrypto64.dll' + else: + if pointer_size == 4: + name_of_lib = 'libalfcrypto32.so' + else: + name_of_lib = 'libalfcrypto64.so' + + libalfcrypto = sys.path[0] + os.sep + name_of_lib + + if not os.path.isfile(libalfcrypto): + raise Exception('libalfcrypto not found') + + libalfcrypto = CDLL(libalfcrypto) + + c_char_pp = POINTER(c_char_p) + c_int_p = POINTER(c_int) + + + def F(restype, name, argtypes): + func = getattr(libalfcrypto, name) + func.restype = restype + func.argtypes = argtypes + return func + + # aes cbc decryption + # + # struct aes_key_st { + # unsigned long rd_key[4 *(AES_MAXNR + 1)]; + # int rounds; + # }; + # + # typedef struct aes_key_st AES_KEY; + # + # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); + # + # + # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, + # const unsigned long length, const AES_KEY *key, + # unsigned char *ivec, const int enc); + + AES_MAXNR = 14 + + class AES_KEY(Structure): + _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)] + + AES_KEY_p = POINTER(AES_KEY) + AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p, c_int]) + AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p]) + + + + # Pukall 1 Cipher + # unsigned char *PC1(const unsigned char *key, unsigned int klen, const unsigned char *src, + # unsigned char *dest, unsigned int len, int decryption); + + PC1 = F(c_char_p, 'PC1', [c_char_p, c_ulong, c_char_p, c_char_p, c_ulong, c_ulong]) + + # Topaz Encryption + # typedef struct _TpzCtx { + # unsigned int v[2]; + # } TpzCtx; + # + # void topazCryptoInit(TpzCtx *ctx, const unsigned char *key, int klen); + # void topazCryptoDecrypt(const TpzCtx *ctx, const unsigned char *in, unsigned char *out, int len); + + class TPZ_CTX(Structure): + _fields_ = [('v', c_long * 2)] + + TPZ_CTX_p = POINTER(TPZ_CTX) + topazCryptoInit = F(None, 'topazCryptoInit', [TPZ_CTX_p, c_char_p, c_ulong]) + topazCryptoDecrypt = F(None, 'topazCryptoDecrypt', [TPZ_CTX_p, c_char_p, c_char_p, c_ulong]) + + + class AES_CBC(object): + def __init__(self): + self._blocksize = 0 + self._keyctx = None + self._iv = 0 + + def set_decrypt_key(self, userkey, iv): + self._blocksize = len(userkey) + if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) : + raise Exception('AES CBC improper key used') + return + keyctx = self._keyctx = AES_KEY() + self._iv = iv + rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx) + if rv < 0: + raise Exception('Failed to initialize AES CBC key') + + def decrypt(self, data): + out = create_string_buffer(len(data)) + mutable_iv = create_string_buffer(self._iv, len(self._iv)) + rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, mutable_iv, 0) + if rv == 0: + raise Exception('AES CBC decryption failed') + return out.raw + + class Pukall_Cipher(object): + def __init__(self): + self.key = None + + def PC1(self, key, src, decryption=True): + self.key = key + out = create_string_buffer(len(src)) + de = 0 + if decryption: + de = 1 + rv = PC1(key, len(key), src, out, len(src), de) + return out.raw + + class Topaz_Cipher(object): + def __init__(self): + self._ctx = None + + def ctx_init(self, key): + tpz_ctx = self._ctx = TPZ_CTX() + topazCryptoInit(tpz_ctx, key, len(key)) + return tpz_ctx + + def decrypt(self, data, ctx=None): + if ctx == None: + ctx = self._ctx + out = create_string_buffer(len(data)) + topazCryptoDecrypt(ctx, data, out, len(data)) + return out.raw + + print "Using Library AlfCrypto DLL/DYLIB/SO" + return (AES_CBC, Pukall_Cipher, Topaz_Cipher) + + +def _load_python_alfcrypto(): + + import aescbc + + class Pukall_Cipher(object): + def __init__(self): + self.key = None + + def PC1(self, key, src, decryption=True): + sum1 = 0; + sum2 = 0; + keyXorVal = 0; + if len(key)!=16: + print "Bad key length!" + return None + wkey = [] + for i in xrange(8): + wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1])) + dst = "" + for i in xrange(len(src)): + temp1 = 0; + byteXorVal = 0; + for j in xrange(8): + temp1 ^= wkey[j] + sum2 = (sum2+j)*20021 + sum1 + sum1 = (temp1*346)&0xFFFF + sum2 = (sum2+sum1)&0xFFFF + temp1 = (temp1*20021+1)&0xFFFF + byteXorVal ^= temp1 ^ sum2 + curByte = ord(src[i]) + if not decryption: + keyXorVal = curByte * 257; + curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF + if decryption: + keyXorVal = curByte * 257; + for j in xrange(8): + wkey[j] ^= keyXorVal; + dst+=chr(curByte) + return dst + + class Topaz_Cipher(object): + def __init__(self): + self._ctx = None + + def ctx_init(self, key): + ctx1 = 0x0CAFFE19E + for keyChar in key: + keyByte = ord(keyChar) + ctx2 = ctx1 + ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF ) + self._ctx = [ctx1, ctx2] + return [ctx1,ctx2] + + def decrypt(self, data, ctx=None): + if ctx == None: + ctx = self._ctx + ctx1 = ctx[0] + ctx2 = ctx[1] + plainText = "" + for dataChar in data: + dataByte = ord(dataChar) + m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF + ctx2 = ctx1 + ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF) + plainText += chr(m) + return plainText + + class AES_CBC(object): + def __init__(self): + self._key = None + self._iv = None + self.aes = None + + def set_decrypt_key(self, userkey, iv): + self._key = userkey + self._iv = iv + self.aes = aescbc.AES_CBC(userkey, aescbc.noPadding(), len(userkey)) + + def decrypt(self, data): + iv = self._iv + cleartext = self.aes.decrypt(iv + data) + return cleartext + + return (AES_CBC, Pukall_Cipher, Topaz_Cipher) + + +def _load_crypto(): + AES_CBC = Pukall_Cipher = Topaz_Cipher = None + cryptolist = (_load_libalfcrypto, _load_python_alfcrypto) + for loader in cryptolist: + try: + AES_CBC, Pukall_Cipher, Topaz_Cipher = loader() + break + except (ImportError, Exception): + pass + return AES_CBC, Pukall_Cipher, Topaz_Cipher + +AES_CBC, Pukall_Cipher, Topaz_Cipher = _load_crypto() + + +class KeyIVGen(object): + # this only exists in openssl so we will use pure python implementation instead + # PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1', + # [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p]) + def pbkdf2(self, passwd, salt, iter, keylen): + + def xorstr( a, b ): + if len(a) != len(b): + raise Exception("xorstr(): lengths differ") + return ''.join((chr(ord(x)^ord(y)) for x, y in zip(a, b))) + + def prf( h, data ): + hm = h.copy() + hm.update( data ) + return hm.digest() + + def pbkdf2_F( h, salt, itercount, blocknum ): + U = prf( h, salt + pack('>i',blocknum ) ) + T = U + for i in range(2, itercount+1): + U = prf( h, U ) + T = xorstr( T, U ) + return T + + sha = hashlib.sha1 + digest_size = sha().digest_size + # l - number of output blocks to produce + l = keylen / digest_size + if keylen % digest_size != 0: + l += 1 + h = hmac.new( passwd, None, sha ) + T = "" + for i in range(1, l+1): + T += pbkdf2_F( h, salt, iter, i ) + return T[0: keylen] + + diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto64.dll b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto64.dll new file mode 100644 index 0000000..7bef68e Binary files /dev/null and b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto64.dll differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto_src.zip b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto_src.zip new file mode 100644 index 0000000..269810c Binary files /dev/null and b/Calibre_Plugins/K4MobiDeDRM_plugin/alfcrypto_src.zip differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/cmbtc_v2.2.py b/Calibre_Plugins/K4MobiDeDRM_plugin/cmbtc_v2.2.py new file mode 100644 index 0000000..7be7a8a --- /dev/null +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/cmbtc_v2.2.py @@ -0,0 +1,899 @@ +#! /usr/bin/python + +""" + +Comprehensive Mazama Book DRM with Topaz Cryptography V2.2 + +-----BEGIN PUBLIC KEY----- +MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDdBHJ4CNc6DNFCw4MRCw4SWAK6 +M8hYfnNEI0yQmn5Ti+W8biT7EatpauE/5jgQMPBmdNrDr1hbHyHBSP7xeC2qlRWC +B62UCxeu/fpfnvNHDN/wPWWH4jynZ2M6cdcnE5LQ+FfeKqZn7gnG2No1U9h7oOHx +y2/pHuYme7U1TsgSjwIDAQAB +-----END PUBLIC KEY----- + +""" + +from __future__ import with_statement + +import csv +import sys +import os +import getopt +import zlib +from struct import pack +from struct import unpack +from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \ + create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \ + string_at, Structure, c_void_p, cast +import _winreg as winreg +import Tkinter +import Tkconstants +import tkMessageBox +import traceback +import hashlib + +MAX_PATH = 255 + +kernel32 = windll.kernel32 +advapi32 = windll.advapi32 +crypt32 = windll.crypt32 + +global kindleDatabase +global bookFile +global bookPayloadOffset +global bookHeaderRecords +global bookMetadata +global bookKey +global command + +# +# Various character maps used to decrypt books. Probably supposed to act as obfuscation +# + +charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M" +charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_" +charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" +charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789" + +# +# Exceptions for all the problems that might happen during the script +# + +class CMBDTCError(Exception): + pass + +class CMBDTCFatal(Exception): + pass + +# +# Stolen stuff +# + +class DataBlob(Structure): + _fields_ = [('cbData', c_uint), + ('pbData', c_void_p)] +DataBlob_p = POINTER(DataBlob) + +def GetSystemDirectory(): + GetSystemDirectoryW = kernel32.GetSystemDirectoryW + GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint] + GetSystemDirectoryW.restype = c_uint + def GetSystemDirectory(): + buffer = create_unicode_buffer(MAX_PATH + 1) + GetSystemDirectoryW(buffer, len(buffer)) + return buffer.value + return GetSystemDirectory +GetSystemDirectory = GetSystemDirectory() + + +def GetVolumeSerialNumber(): + GetVolumeInformationW = kernel32.GetVolumeInformationW + GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint, + POINTER(c_uint), POINTER(c_uint), + POINTER(c_uint), c_wchar_p, c_uint] + GetVolumeInformationW.restype = c_uint + def GetVolumeSerialNumber(path): + vsn = c_uint(0) + GetVolumeInformationW(path, None, 0, byref(vsn), None, None, None, 0) + return vsn.value + return GetVolumeSerialNumber +GetVolumeSerialNumber = GetVolumeSerialNumber() + + +def GetUserName(): + GetUserNameW = advapi32.GetUserNameW + GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)] + GetUserNameW.restype = c_uint + def GetUserName(): + buffer = create_unicode_buffer(32) + size = c_uint(len(buffer)) + while not GetUserNameW(buffer, byref(size)): + buffer = create_unicode_buffer(len(buffer) * 2) + size.value = len(buffer) + return buffer.value.encode('utf-16-le')[::2] + return GetUserName +GetUserName = GetUserName() + + +def CryptUnprotectData(): + _CryptUnprotectData = crypt32.CryptUnprotectData + _CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p, + c_void_p, c_void_p, c_uint, DataBlob_p] + _CryptUnprotectData.restype = c_uint + def CryptUnprotectData(indata, entropy): + indatab = create_string_buffer(indata) + indata = DataBlob(len(indata), cast(indatab, c_void_p)) + entropyb = create_string_buffer(entropy) + entropy = DataBlob(len(entropy), cast(entropyb, c_void_p)) + outdata = DataBlob() + if not _CryptUnprotectData(byref(indata), None, byref(entropy), + None, None, 0, byref(outdata)): + raise CMBDTCFatal("Failed to Unprotect Data") + return string_at(outdata.pbData, outdata.cbData) + return CryptUnprotectData +CryptUnprotectData = CryptUnprotectData() + +# +# Returns the MD5 digest of "message" +# + +def MD5(message): + ctx = hashlib.md5() + ctx.update(message) + return ctx.digest() + +# +# Returns the MD5 digest of "message" +# + +def SHA1(message): + ctx = hashlib.sha1() + ctx.update(message) + return ctx.digest() + +# +# Open the book file at path +# + +def openBook(path): + try: + return open(path,'rb') + except: + raise CMBDTCFatal("Could not open book file: " + path) +# +# Encode the bytes in data with the characters in map +# + +def encode(data, map): + result = "" + for char in data: + value = ord(char) + Q = (value ^ 0x80) // len(map) + R = value % len(map) + result += map[Q] + result += map[R] + return result + +# +# Hash the bytes in data and then encode the digest with the characters in map +# + +def encodeHash(data,map): + return encode(MD5(data),map) + +# +# Decode the string in data with the characters in map. Returns the decoded bytes +# + +def decode(data,map): + result = "" + for i in range (0,len(data),2): + high = map.find(data[i]) + low = map.find(data[i+1]) + value = (((high * 0x40) ^ 0x80) & 0xFF) + low + result += pack("B",value) + return result + +# +# Locate and open the Kindle.info file (Hopefully in the way it is done in the Kindle application) +# + +def openKindleInfo(): + regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\") + path = winreg.QueryValueEx(regkey, 'Local AppData')[0] + return open(path+'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info','r') + +# +# Parse the Kindle.info file and return the records as a list of key-values +# + +def parseKindleInfo(): + DB = {} + infoReader = openKindleInfo() + infoReader.read(1) + data = infoReader.read() + items = data.split('{') + + for item in items: + splito = item.split(':') + DB[splito[0]] =splito[1] + return DB + +# +# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string. (Totally not optimal) +# + +def findNameForHash(hash): + names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"] + result = "" + for name in names: + if hash == encodeHash(name, charMap2): + result = name + break + return name + +# +# Print all the records from the kindle.info file (option -i) +# + +def printKindleInfo(): + for record in kindleDatabase: + name = findNameForHash(record) + if name != "" : + print (name) + print ("--------------------------\n") + else : + print ("Unknown Record") + print getKindleInfoValueForHash(record) + print "\n" +# +# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record +# + +def getKindleInfoValueForHash(hashedKey): + global kindleDatabase + encryptedValue = decode(kindleDatabase[hashedKey],charMap2) + return CryptUnprotectData(encryptedValue,"") + +# +# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record +# + +def getKindleInfoValueForKey(key): + return getKindleInfoValueForHash(encodeHash(key,charMap2)) + +# +# Get a 7 bit encoded number from the book file +# + +def bookReadEncodedNumber(): + flag = False + data = ord(bookFile.read(1)) + + if data == 0xFF: + flag = True + data = ord(bookFile.read(1)) + + if data >= 0x80: + datax = (data & 0x7F) + while data >= 0x80 : + data = ord(bookFile.read(1)) + datax = (datax <<7) + (data & 0x7F) + data = datax + + if flag: + data = -data + return data + +# +# Encode a number in 7 bit format +# + +def encodeNumber(number): + result = "" + negative = False + flag = 0 + + if number < 0 : + number = -number + 1 + negative = True + + while True: + byte = number & 0x7F + number = number >> 7 + byte += flag + result += chr(byte) + flag = 0x80 + if number == 0 : + if (byte == 0xFF and negative == False) : + result += chr(0x80) + break + + if negative: + result += chr(0xFF) + + return result[::-1] + +# +# Get a length prefixed string from the file +# + +def bookReadString(): + stringLength = bookReadEncodedNumber() + return unpack(str(stringLength)+"s",bookFile.read(stringLength))[0] + +# +# Returns a length prefixed string +# + +def lengthPrefixString(data): + return encodeNumber(len(data))+data + + +# +# Read and return the data of one header record at the current book file position [[offset,compressedLength,decompressedLength],...] +# + +def bookReadHeaderRecordData(): + nbValues = bookReadEncodedNumber() + values = [] + for i in range (0,nbValues): + values.append([bookReadEncodedNumber(),bookReadEncodedNumber(),bookReadEncodedNumber()]) + return values + +# +# Read and parse one header record at the current book file position and return the associated data [[offset,compressedLength,decompressedLength],...] +# + +def parseTopazHeaderRecord(): + if ord(bookFile.read(1)) != 0x63: + raise CMBDTCFatal("Parse Error : Invalid Header") + + tag = bookReadString() + record = bookReadHeaderRecordData() + return [tag,record] + +# +# Parse the header of a Topaz file, get all the header records and the offset for the payload +# + +def parseTopazHeader(): + global bookHeaderRecords + global bookPayloadOffset + magic = unpack("4s",bookFile.read(4))[0] + + if magic != 'TPZ0': + raise CMBDTCFatal("Parse Error : Invalid Header, not a Topaz file") + + nbRecords = bookReadEncodedNumber() + bookHeaderRecords = {} + + for i in range (0,nbRecords): + result = parseTopazHeaderRecord() + bookHeaderRecords[result[0]] = result[1] + + if ord(bookFile.read(1)) != 0x64 : + raise CMBDTCFatal("Parse Error : Invalid Header") + + bookPayloadOffset = bookFile.tell() + +# +# Get a record in the book payload, given its name and index. If necessary the record is decrypted. The record is not decompressed +# + +def getBookPayloadRecord(name, index): + encrypted = False + + try: + recordOffset = bookHeaderRecords[name][index][0] + except: + raise CMBDTCFatal("Parse Error : Invalid Record, record not found") + + bookFile.seek(bookPayloadOffset + recordOffset) + + tag = bookReadString() + if tag != name : + raise CMBDTCFatal("Parse Error : Invalid Record, record name doesn't match") + + recordIndex = bookReadEncodedNumber() + + if recordIndex < 0 : + encrypted = True + recordIndex = -recordIndex -1 + + if recordIndex != index : + raise CMBDTCFatal("Parse Error : Invalid Record, index doesn't match") + + if bookHeaderRecords[name][index][2] != 0 : + record = bookFile.read(bookHeaderRecords[name][index][2]) + else: + record = bookFile.read(bookHeaderRecords[name][index][1]) + + if encrypted: + ctx = topazCryptoInit(bookKey) + record = topazCryptoDecrypt(record,ctx) + + return record + +# +# Extract, decrypt and decompress a book record indicated by name and index and print it or save it in "filename" +# + +def extractBookPayloadRecord(name, index, filename): + compressed = False + + try: + compressed = bookHeaderRecords[name][index][2] != 0 + record = getBookPayloadRecord(name,index) + except: + print("Could not find record") + + if compressed: + try: + record = zlib.decompress(record) + except: + raise CMBDTCFatal("Could not decompress record") + + if filename != "": + try: + file = open(filename,"wb") + file.write(record) + file.close() + except: + raise CMBDTCFatal("Could not write to destination file") + else: + print(record) + +# +# return next record [key,value] from the book metadata from the current book position +# + +def readMetadataRecord(): + return [bookReadString(),bookReadString()] + +# +# Parse the metadata record from the book payload and return a list of [key,values] +# + +def parseMetadata(): + global bookHeaderRecords + global bookPayloadAddress + global bookMetadata + bookMetadata = {} + bookFile.seek(bookPayloadOffset + bookHeaderRecords["metadata"][0][0]) + tag = bookReadString() + if tag != "metadata" : + raise CMBDTCFatal("Parse Error : Record Names Don't Match") + + flags = ord(bookFile.read(1)) + nbRecords = ord(bookFile.read(1)) + + for i in range (0,nbRecords) : + record =readMetadataRecord() + bookMetadata[record[0]] = record[1] + +# +# Returns two bit at offset from a bit field +# + +def getTwoBitsFromBitField(bitField,offset): + byteNumber = offset // 4 + bitPosition = 6 - 2*(offset % 4) + + return ord(bitField[byteNumber]) >> bitPosition & 3 + +# +# Returns the six bits at offset from a bit field +# + +def getSixBitsFromBitField(bitField,offset): + offset *= 3 + value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2) + return value + +# +# 8 bits to six bits encoding from hash to generate PID string +# + +def encodePID(hash): + global charMap3 + PID = "" + for position in range (0,8): + PID += charMap3[getSixBitsFromBitField(hash,position)] + return PID + +# +# Context initialisation for the Topaz Crypto +# + +def topazCryptoInit(key): + ctx1 = 0x0CAFFE19E + + for keyChar in key: + keyByte = ord(keyChar) + ctx2 = ctx1 + ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF ) + return [ctx1,ctx2] + +# +# decrypt data with the context prepared by topazCryptoInit() +# + +def topazCryptoDecrypt(data, ctx): + ctx1 = ctx[0] + ctx2 = ctx[1] + + plainText = "" + + for dataChar in data: + dataByte = ord(dataChar) + m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF + ctx2 = ctx1 + ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF) + plainText += chr(m) + + return plainText + +# +# Decrypt a payload record with the PID +# + +def decryptRecord(data,PID): + ctx = topazCryptoInit(PID) + return topazCryptoDecrypt(data, ctx) + +# +# Try to decrypt a dkey record (contains the book PID) +# + +def decryptDkeyRecord(data,PID): + record = decryptRecord(data,PID) + fields = unpack("3sB8sB8s3s",record) + + if fields[0] != "PID" or fields[5] != "pid" : + raise CMBDTCError("Didn't find PID magic numbers in record") + elif fields[1] != 8 or fields[3] != 8 : + raise CMBDTCError("Record didn't contain correct length fields") + elif fields[2] != PID : + raise CMBDTCError("Record didn't contain PID") + + return fields[4] + +# +# Decrypt all the book's dkey records (contain the book PID) +# + +def decryptDkeyRecords(data,PID): + nbKeyRecords = ord(data[0]) + records = [] + data = data[1:] + for i in range (0,nbKeyRecords): + length = ord(data[0]) + try: + key = decryptDkeyRecord(data[1:length+1],PID) + records.append(key) + except CMBDTCError: + pass + data = data[1+length:] + + return records + +# +# Encryption table used to generate the device PID +# + +def generatePidEncryptionTable() : + table = [] + for counter1 in range (0,0x100): + value = counter1 + for counter2 in range (0,8): + if (value & 1 == 0) : + value = value >> 1 + else : + value = value >> 1 + value = value ^ 0xEDB88320 + table.append(value) + return table + +# +# Seed value used to generate the device PID +# + +def generatePidSeed(table,dsn) : + value = 0 + for counter in range (0,4) : + index = (ord(dsn[counter]) ^ value) &0xFF + value = (value >> 8) ^ table[index] + return value + +# +# Generate the device PID +# + +def generateDevicePID(table,dsn,nbRoll): + seed = generatePidSeed(table,dsn) + pidAscii = "" + pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF] + index = 0 + + for counter in range (0,nbRoll): + pid[index] = pid[index] ^ ord(dsn[counter]) + index = (index+1) %8 + + for counter in range (0,8): + index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7) + pidAscii += charMap4[index] + return pidAscii + +# +# Create decrypted book payload +# + +def createDecryptedPayload(payload): + + # store data to be able to create the header later + headerData= [] + currentOffset = 0 + + # Add social DRM to decrypted files + + try: + data = getKindleInfoValueForKey("kindle.name.info")+":"+ getKindleInfoValueForKey("login") + if payload!= None: + payload.write(lengthPrefixString("sdrm")) + payload.write(encodeNumber(0)) + payload.write(data) + else: + currentOffset += len(lengthPrefixString("sdrm")) + currentOffset += len(encodeNumber(0)) + currentOffset += len(data) + except: + pass + + for headerRecord in bookHeaderRecords: + name = headerRecord + newRecord = [] + + if name != "dkey" : + + for index in range (0,len(bookHeaderRecords[name])) : + offset = currentOffset + + if payload != None: + # write tag + payload.write(lengthPrefixString(name)) + # write data + payload.write(encodeNumber(index)) + payload.write(getBookPayloadRecord(name, index)) + + else : + currentOffset += len(lengthPrefixString(name)) + currentOffset += len(encodeNumber(index)) + currentOffset += len(getBookPayloadRecord(name, index)) + newRecord.append([offset,bookHeaderRecords[name][index][1],bookHeaderRecords[name][index][2]]) + + headerData.append([name,newRecord]) + + + + return headerData + +# +# Create decrypted book +# + +def createDecryptedBook(outputFile): + outputFile = open(outputFile,"wb") + # Write the payload in a temporary file + headerData = createDecryptedPayload(None) + outputFile.write("TPZ0") + outputFile.write(encodeNumber(len(headerData))) + + for header in headerData : + outputFile.write(chr(0x63)) + outputFile.write(lengthPrefixString(header[0])) + outputFile.write(encodeNumber(len(header[1]))) + for numbers in header[1] : + outputFile.write(encodeNumber(numbers[0])) + outputFile.write(encodeNumber(numbers[1])) + outputFile.write(encodeNumber(numbers[2])) + + outputFile.write(chr(0x64)) + createDecryptedPayload(outputFile) + outputFile.close() + +# +# Set the command to execute by the programm according to cmdLine parameters +# + +def setCommand(name) : + global command + if command != "" : + raise CMBDTCFatal("Invalid command line parameters") + else : + command = name + +# +# Program usage +# + +def usage(): + print("\nUsage:") + print("\nCMBDTC.py [options] bookFileName\n") + print("-p Adds a PID to the list of PIDs that are tried to decrypt the book key (can be used several times)") + print("-d Saves a decrypted copy of the book") + print("-r Prints or writes to disk a record indicated in the form name:index (e.g \"img:0\")") + print("-o Output file name to write records and decrypted books") + print("-v Verbose (can be used several times)") + print("-i Prints kindle.info database") + +# +# Main +# + +def main(argv=sys.argv): + global kindleDatabase + global bookMetadata + global bookKey + global bookFile + global command + + progname = os.path.basename(argv[0]) + + verbose = 0 + recordName = "" + recordIndex = 0 + outputFile = "" + PIDs = [] + kindleDatabase = None + command = "" + + + try: + opts, args = getopt.getopt(sys.argv[1:], "vdir:o:p:") + except getopt.GetoptError, err: + # print help information and exit: + print str(err) # will print something like "option -a not recognized" + usage() + sys.exit(2) + + if len(opts) == 0 and len(args) == 0 : + usage() + sys.exit(2) + + for o, a in opts: + if o == "-v": + verbose+=1 + if o == "-i": + setCommand("printInfo") + if o =="-o": + if a == None : + raise CMBDTCFatal("Invalid parameter for -o") + outputFile = a + if o =="-r": + setCommand("printRecord") + try: + recordName,recordIndex = a.split(':') + except: + raise CMBDTCFatal("Invalid parameter for -r") + if o =="-p": + PIDs.append(a) + if o =="-d": + setCommand("doit") + + if command == "" : + raise CMBDTCFatal("No action supplied on command line") + + # + # Read the encrypted database + # + + try: + kindleDatabase = parseKindleInfo() + except Exception, message: + if verbose>0: + print(message) + + if kindleDatabase != None : + if command == "printInfo" : + printKindleInfo() + + # + # Compute the DSN + # + + # Get the Mazama Random number + MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber") + + # Get the HDD serial + encodedSystemVolumeSerialNumber = encodeHash(str(GetVolumeSerialNumber(GetSystemDirectory().split('\\')[0] + '\\')),charMap1) + + # Get the current user name + encodedUsername = encodeHash(GetUserName(),charMap1) + + # concat, hash and encode + DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1) + + if verbose >1: + print("DSN: " + DSN) + + # + # Compute the device PID + # + + table = generatePidEncryptionTable() + devicePID = generateDevicePID(table,DSN,4) + PIDs.append(devicePID) + + if verbose > 0: + print("Device PID: " + devicePID) + + # + # Open book and parse metadata + # + + if len(args) == 1: + + bookFile = openBook(args[0]) + parseTopazHeader() + parseMetadata() + + # + # Compute book PID + # + + # Get the account token + + if kindleDatabase != None: + kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens") + + if verbose >1: + print("Account Token: " + kindleAccountToken) + + keysRecord = bookMetadata["keys"] + keysRecordRecord = bookMetadata[keysRecord] + + pidHash = SHA1(DSN+kindleAccountToken+keysRecord+keysRecordRecord) + + bookPID = encodePID(pidHash) + PIDs.append(bookPID) + + if verbose > 0: + print ("Book PID: " + bookPID ) + + # + # Decrypt book key + # + + dkey = getBookPayloadRecord('dkey', 0) + + bookKeys = [] + for PID in PIDs : + bookKeys+=decryptDkeyRecords(dkey,PID) + + if len(bookKeys) == 0 : + if verbose > 0 : + print ("Book key could not be found. Maybe this book is not registered with this device.") + else : + bookKey = bookKeys[0] + if verbose > 0: + print("Book key: " + bookKey.encode('hex')) + + + + if command == "printRecord" : + extractBookPayloadRecord(recordName,int(recordIndex),outputFile) + if outputFile != "" and verbose>0 : + print("Wrote record to file: "+outputFile) + elif command == "doit" : + if outputFile!="" : + createDecryptedBook(outputFile) + if verbose >0 : + print ("Decrypted book saved. Don't pirate!") + elif verbose > 0: + print("Output file name was not supplied.") + + return 0 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py b/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py index 5667511..5312a38 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py @@ -1,568 +1,844 @@ -#! /usr/bin/env python +#! /usr/bin/python +# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab +# For use with Topaz Scripts Version 2.6 -""" - Routines for doing AES CBC in one file +class Unbuffered: + def __init__(self, stream): + self.stream = stream + def write(self, data): + self.stream.write(data) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) - Modified by some_updates to extract - and combine only those parts needed for AES CBC - into one simple to add python file +import sys +sys.stdout=Unbuffered(sys.stdout) - Original Version - Copyright (c) 2002 by Paul A. Lambert - Under: - CryptoPy Artisitic License Version 1.0 - See the wonderful pure python package cryptopy-1.2.5 - and read its LICENSE.txt for complete license details. -""" +import csv +import os +import getopt +from struct import pack +from struct import unpack -class CryptoError(Exception): - """ Base class for crypto exceptions """ - def __init__(self,errorMessage='Error!'): - self.message = errorMessage - def __str__(self): - return self.message +class TpzDRMError(Exception): + pass -class InitCryptoError(CryptoError): - """ Crypto errors during algorithm initialization """ -class BadKeySizeError(InitCryptoError): - """ Bad key size error """ -class EncryptError(CryptoError): - """ Error in encryption processing """ -class DecryptError(CryptoError): - """ Error in decryption processing """ -class DecryptNotBlockAlignedError(DecryptError): - """ Error in decryption processing """ +# Get a 7 bit encoded number from string. The most +# significant byte comes first and has the high bit (8th) set -def xorS(a,b): - """ XOR two strings """ - assert len(a)==len(b) - x = [] - for i in range(len(a)): - x.append( chr(ord(a[i])^ord(b[i]))) - return ''.join(x) +def readEncodedNumber(file): + flag = False + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) -def xor(a,b): - """ XOR two strings """ - x = [] - for i in range(min(len(a),len(b))): - x.append( chr(ord(a[i])^ord(b[i]))) - return ''.join(x) + if data == 0xFF: + flag = True + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) -""" - Base 'BlockCipher' and Pad classes for cipher instances. - BlockCipher supports automatic padding and type conversion. The BlockCipher - class was written to make the actual algorithm code more readable and - not for performance. -""" + if data >= 0x80: + datax = (data & 0x7F) + while data >= 0x80 : + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) + datax = (datax <<7) + (data & 0x7F) + data = datax -class BlockCipher: - """ Block ciphers """ - def __init__(self): - self.reset() + if flag: + data = -data + return data - def reset(self): - self.resetEncrypt() - self.resetDecrypt() - def resetEncrypt(self): - self.encryptBlockCount = 0 - self.bytesToEncrypt = '' - def resetDecrypt(self): - self.decryptBlockCount = 0 - self.bytesToDecrypt = '' - def encrypt(self, plainText, more = None): - """ Encrypt a string and return a binary string """ - self.bytesToEncrypt += plainText # append plainText to any bytes from prior encrypt - numBlocks, numExtraBytes = divmod(len(self.bytesToEncrypt), self.blockSize) - cipherText = '' - for i in range(numBlocks): - bStart = i*self.blockSize - ctBlock = self.encryptBlock(self.bytesToEncrypt[bStart:bStart+self.blockSize]) - self.encryptBlockCount += 1 - cipherText += ctBlock - if numExtraBytes > 0: # save any bytes that are not block aligned - self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] +# returns a binary string that encodes a number into 7 bits +# most significant byte first which has the high bit set + +def encodeNumber(number): + result = "" + negative = False + flag = 0 + + if number < 0 : + number = -number + 1 + negative = True + + while True: + byte = number & 0x7F + number = number >> 7 + byte += flag + result += chr(byte) + flag = 0x80 + if number == 0 : + if (byte == 0xFF and negative == False) : + result += chr(0x80) + break + + if negative: + result += chr(0xFF) + + return result[::-1] + + + +# create / read a length prefixed string from the file + +def lengthPrefixString(data): + return encodeNumber(len(data))+data + +def readString(file): + stringLength = readEncodedNumber(file) + if (stringLength == None): + return "" + sv = file.read(stringLength) + if (len(sv) != stringLength): + return "" + return unpack(str(stringLength)+"s",sv)[0] + + +# convert a binary string generated by encodeNumber (7 bit encoded number) +# to the value you would find inside the page*.dat files to be processed + +def convert(i): + result = '' + val = encodeNumber(i) + for j in xrange(len(val)): + c = ord(val[j:j+1]) + result += '%02x' % c + return result + + + +# the complete string table used to store all book text content +# as well as the xml tokens and values that make sense out of it + +class Dictionary(object): + def __init__(self, dictFile): + self.filename = dictFile + self.size = 0 + self.fo = file(dictFile,'rb') + self.stable = [] + self.size = readEncodedNumber(self.fo) + for i in xrange(self.size): + self.stable.append(self.escapestr(readString(self.fo))) + self.pos = 0 + + def escapestr(self, str): + str = str.replace('&','&') + str = str.replace('<','<') + str = str.replace('>','>') + str = str.replace('=','=') + return str + + def lookup(self,val): + if ((val >= 0) and (val < self.size)) : + self.pos = val + return self.stable[self.pos] else: - self.bytesToEncrypt = '' + print "Error - %d outside of string table limits" % val + raise TpzDRMError('outside of string table limits') + # sys.exit(-1) - if more == None: # no more data expected from caller - finalBytes = self.padding.addPad(self.bytesToEncrypt,self.blockSize) - if len(finalBytes) > 0: - ctBlock = self.encryptBlock(finalBytes) - self.encryptBlockCount += 1 - cipherText += ctBlock - self.resetEncrypt() - return cipherText + def getSize(self): + return self.size - def decrypt(self, cipherText, more = None): - """ Decrypt a string and return a string """ - self.bytesToDecrypt += cipherText # append to any bytes from prior decrypt + def getPos(self): + return self.pos - numBlocks, numExtraBytes = divmod(len(self.bytesToDecrypt), self.blockSize) - if more == None: # no more calls to decrypt, should have all the data - if numExtraBytes != 0: - raise DecryptNotBlockAlignedError, 'Data not block aligned on decrypt' + def dumpDict(self): + for i in xrange(self.size): + print "%d %s %s" % (i, convert(i), self.stable[i]) + return - # hold back some bytes in case last decrypt has zero len - if (more != None) and (numExtraBytes == 0) and (numBlocks >0) : - numBlocks -= 1 - numExtraBytes = self.blockSize +# parses the xml snippets that are represented by each page*.dat file. +# also parses the other0.dat file - the main stylesheet +# and information used to inject the xml snippets into page*.dat files - plainText = '' - for i in range(numBlocks): - bStart = i*self.blockSize - ptBlock = self.decryptBlock(self.bytesToDecrypt[bStart : bStart+self.blockSize]) - self.decryptBlockCount += 1 - plainText += ptBlock +class PageParser(object): + def __init__(self, filename, dict, debug, flat_xml): + self.fo = file(filename,'rb') + self.id = os.path.basename(filename).replace('.dat','') + self.dict = dict + self.debug = debug + self.flat_xml = flat_xml + self.tagpath = [] + self.doc = [] + self.snippetList = [] - if numExtraBytes > 0: # save any bytes that are not block aligned - self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] + + # hash table used to enable the decoding process + # This has all been developed by trial and error so it may still have omissions or + # contain errors + # Format: + # tag : (number of arguments, argument type, subtags present, special case of subtags presents when escaped) + + token_tags = { + 'x' : (1, 'scalar_number', 0, 0), + 'y' : (1, 'scalar_number', 0, 0), + 'h' : (1, 'scalar_number', 0, 0), + 'w' : (1, 'scalar_number', 0, 0), + 'firstWord' : (1, 'scalar_number', 0, 0), + 'lastWord' : (1, 'scalar_number', 0, 0), + 'rootID' : (1, 'scalar_number', 0, 0), + 'stemID' : (1, 'scalar_number', 0, 0), + 'type' : (1, 'scalar_text', 0, 0), + + 'info' : (0, 'number', 1, 0), + + 'info.word' : (0, 'number', 1, 1), + 'info.word.ocrText' : (1, 'text', 0, 0), + 'info.word.firstGlyph' : (1, 'raw', 0, 0), + 'info.word.lastGlyph' : (1, 'raw', 0, 0), + 'info.word.bl' : (1, 'raw', 0, 0), + 'info.word.link_id' : (1, 'number', 0, 0), + + 'glyph' : (0, 'number', 1, 1), + 'glyph.x' : (1, 'number', 0, 0), + 'glyph.y' : (1, 'number', 0, 0), + 'glyph.glyphID' : (1, 'number', 0, 0), + + 'dehyphen' : (0, 'number', 1, 1), + 'dehyphen.rootID' : (1, 'number', 0, 0), + 'dehyphen.stemID' : (1, 'number', 0, 0), + 'dehyphen.stemPage' : (1, 'number', 0, 0), + 'dehyphen.sh' : (1, 'number', 0, 0), + + 'links' : (0, 'number', 1, 1), + 'links.page' : (1, 'number', 0, 0), + 'links.rel' : (1, 'number', 0, 0), + 'links.row' : (1, 'number', 0, 0), + 'links.title' : (1, 'text', 0, 0), + 'links.href' : (1, 'text', 0, 0), + 'links.type' : (1, 'text', 0, 0), + + 'paraCont' : (0, 'number', 1, 1), + 'paraCont.rootID' : (1, 'number', 0, 0), + 'paraCont.stemID' : (1, 'number', 0, 0), + 'paraCont.stemPage' : (1, 'number', 0, 0), + + 'paraStems' : (0, 'number', 1, 1), + 'paraStems.stemID' : (1, 'number', 0, 0), + + 'wordStems' : (0, 'number', 1, 1), + 'wordStems.stemID' : (1, 'number', 0, 0), + + 'empty' : (1, 'snippets', 1, 0), + + 'page' : (1, 'snippets', 1, 0), + 'page.pageid' : (1, 'scalar_text', 0, 0), + 'page.pagelabel' : (1, 'scalar_text', 0, 0), + 'page.type' : (1, 'scalar_text', 0, 0), + 'page.h' : (1, 'scalar_number', 0, 0), + 'page.w' : (1, 'scalar_number', 0, 0), + 'page.startID' : (1, 'scalar_number', 0, 0), + + 'group' : (1, 'snippets', 1, 0), + 'group.type' : (1, 'scalar_text', 0, 0), + 'group._tag' : (1, 'scalar_text', 0, 0), + + 'region' : (1, 'snippets', 1, 0), + 'region.type' : (1, 'scalar_text', 0, 0), + 'region.x' : (1, 'scalar_number', 0, 0), + 'region.y' : (1, 'scalar_number', 0, 0), + 'region.h' : (1, 'scalar_number', 0, 0), + 'region.w' : (1, 'scalar_number', 0, 0), + 'region.orientation' : (1, 'scalar_number', 0, 0), + + 'empty_text_region' : (1, 'snippets', 1, 0), + + 'img' : (1, 'snippets', 1, 0), + 'img.x' : (1, 'scalar_number', 0, 0), + 'img.y' : (1, 'scalar_number', 0, 0), + 'img.h' : (1, 'scalar_number', 0, 0), + 'img.w' : (1, 'scalar_number', 0, 0), + 'img.src' : (1, 'scalar_number', 0, 0), + 'img.color_src' : (1, 'scalar_number', 0, 0), + + 'paragraph' : (1, 'snippets', 1, 0), + 'paragraph.class' : (1, 'scalar_text', 0, 0), + 'paragraph.firstWord' : (1, 'scalar_number', 0, 0), + 'paragraph.lastWord' : (1, 'scalar_number', 0, 0), + 'paragraph.lastWord' : (1, 'scalar_number', 0, 0), + 'paragraph.gridSize' : (1, 'scalar_number', 0, 0), + 'paragraph.gridBottomCenter' : (1, 'scalar_number', 0, 0), + 'paragraph.gridTopCenter' : (1, 'scalar_number', 0, 0), + 'paragraph.gridBeginCenter' : (1, 'scalar_number', 0, 0), + 'paragraph.gridEndCenter' : (1, 'scalar_number', 0, 0), + + + 'word_semantic' : (1, 'snippets', 1, 1), + 'word_semantic.type' : (1, 'scalar_text', 0, 0), + 'word_semantic.firstWord' : (1, 'scalar_number', 0, 0), + 'word_semantic.lastWord' : (1, 'scalar_number', 0, 0), + + 'word' : (1, 'snippets', 1, 0), + 'word.type' : (1, 'scalar_text', 0, 0), + 'word.class' : (1, 'scalar_text', 0, 0), + 'word.firstGlyph' : (1, 'scalar_number', 0, 0), + 'word.lastGlyph' : (1, 'scalar_number', 0, 0), + + '_span' : (1, 'snippets', 1, 0), + '_span.firstWord' : (1, 'scalar_number', 0, 0), + '_span.lastWord' : (1, 'scalar_number', 0, 0), + '_span.gridSize' : (1, 'scalar_number', 0, 0), + '_span.gridBottomCenter' : (1, 'scalar_number', 0, 0), + '_span.gridTopCenter' : (1, 'scalar_number', 0, 0), + '_span.gridBeginCenter' : (1, 'scalar_number', 0, 0), + '_span.gridEndCenter' : (1, 'scalar_number', 0, 0), + + 'span' : (1, 'snippets', 1, 0), + 'span.firstWord' : (1, 'scalar_number', 0, 0), + 'span.lastWord' : (1, 'scalar_number', 0, 0), + 'span.gridSize' : (1, 'scalar_number', 0, 0), + 'span.gridBottomCenter' : (1, 'scalar_number', 0, 0), + 'span.gridTopCenter' : (1, 'scalar_number', 0, 0), + 'span.gridBeginCenter' : (1, 'scalar_number', 0, 0), + 'span.gridEndCenter' : (1, 'scalar_number', 0, 0), + + 'extratokens' : (1, 'snippets', 1, 0), + 'extratokens.type' : (1, 'scalar_text', 0, 0), + 'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0), + 'extratokens.lastGlyph' : (1, 'scalar_number', 0, 0), + + 'glyph.h' : (1, 'number', 0, 0), + 'glyph.w' : (1, 'number', 0, 0), + 'glyph.use' : (1, 'number', 0, 0), + 'glyph.vtx' : (1, 'number', 0, 1), + 'glyph.len' : (1, 'number', 0, 1), + 'glyph.dpi' : (1, 'number', 0, 0), + 'vtx' : (0, 'number', 1, 1), + 'vtx.x' : (1, 'number', 0, 0), + 'vtx.y' : (1, 'number', 0, 0), + 'len' : (0, 'number', 1, 1), + 'len.n' : (1, 'number', 0, 0), + + 'book' : (1, 'snippets', 1, 0), + 'version' : (1, 'snippets', 1, 0), + 'version.FlowEdit_1_id' : (1, 'scalar_text', 0, 0), + 'version.FlowEdit_1_version' : (1, 'scalar_text', 0, 0), + 'version.Schema_id' : (1, 'scalar_text', 0, 0), + 'version.Schema_version' : (1, 'scalar_text', 0, 0), + 'version.Topaz_version' : (1, 'scalar_text', 0, 0), + 'version.WordDetailEdit_1_id' : (1, 'scalar_text', 0, 0), + 'version.WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0), + 'version.ZoneEdit_1_id' : (1, 'scalar_text', 0, 0), + 'version.ZoneEdit_1_version' : (1, 'scalar_text', 0, 0), + 'version.chapterheaders' : (1, 'scalar_text', 0, 0), + 'version.creation_date' : (1, 'scalar_text', 0, 0), + 'version.header_footer' : (1, 'scalar_text', 0, 0), + 'version.init_from_ocr' : (1, 'scalar_text', 0, 0), + 'version.letter_insertion' : (1, 'scalar_text', 0, 0), + 'version.xmlinj_convert' : (1, 'scalar_text', 0, 0), + 'version.xmlinj_reflow' : (1, 'scalar_text', 0, 0), + 'version.xmlinj_transform' : (1, 'scalar_text', 0, 0), + 'version.findlists' : (1, 'scalar_text', 0, 0), + 'version.page_num' : (1, 'scalar_text', 0, 0), + 'version.page_type' : (1, 'scalar_text', 0, 0), + 'version.bad_text' : (1, 'scalar_text', 0, 0), + 'version.glyph_mismatch' : (1, 'scalar_text', 0, 0), + 'version.margins' : (1, 'scalar_text', 0, 0), + 'version.staggered_lines' : (1, 'scalar_text', 0, 0), + 'version.paragraph_continuation' : (1, 'scalar_text', 0, 0), + 'version.toc' : (1, 'scalar_text', 0, 0), + + 'stylesheet' : (1, 'snippets', 1, 0), + 'style' : (1, 'snippets', 1, 0), + 'style._tag' : (1, 'scalar_text', 0, 0), + 'style.type' : (1, 'scalar_text', 0, 0), + 'style._parent_type' : (1, 'scalar_text', 0, 0), + 'style.class' : (1, 'scalar_text', 0, 0), + 'style._after_class' : (1, 'scalar_text', 0, 0), + 'rule' : (1, 'snippets', 1, 0), + 'rule.attr' : (1, 'scalar_text', 0, 0), + 'rule.value' : (1, 'scalar_text', 0, 0), + + 'original' : (0, 'number', 1, 1), + 'original.pnum' : (1, 'number', 0, 0), + 'original.pid' : (1, 'text', 0, 0), + 'pages' : (0, 'number', 1, 1), + 'pages.ref' : (1, 'number', 0, 0), + 'pages.id' : (1, 'number', 0, 0), + 'startID' : (0, 'number', 1, 1), + 'startID.page' : (1, 'number', 0, 0), + 'startID.id' : (1, 'number', 0, 0), + + } + + + # full tag path record keeping routines + def tag_push(self, token): + self.tagpath.append(token) + def tag_pop(self): + if len(self.tagpath) > 0 : + self.tagpath.pop() + def tagpath_len(self): + return len(self.tagpath) + def get_tagpath(self, i): + cnt = len(self.tagpath) + if i < cnt : result = self.tagpath[i] + for j in xrange(i+1, cnt) : + result += '.' + self.tagpath[j] + return result + + + # list of absolute command byte values values that indicate + # various types of loop meachanisms typically used to generate vectors + + cmd_list = (0x76, 0x76) + + # peek at and return 1 byte that is ahead by i bytes + def peek(self, aheadi): + c = self.fo.read(aheadi) + if (len(c) == 0): + return None + self.fo.seek(-aheadi,1) + c = c[-1:] + return ord(c) + + + # get the next value from the file being processed + def getNext(self): + nbyte = self.peek(1); + if (nbyte == None): + return None + val = readEncodedNumber(self.fo) + return val + + + # format an arg by argtype + def formatArg(self, arg, argtype): + if (argtype == 'text') or (argtype == 'scalar_text') : + result = self.dict.lookup(arg) + elif (argtype == 'raw') or (argtype == 'number') or (argtype == 'scalar_number') : + result = arg + elif (argtype == 'snippets') : + result = arg + else : + print "Error Unknown argtype %s" % argtype + sys.exit(-2) + return result + + + # process the next tag token, recursively handling subtags, + # arguments, and commands + def procToken(self, token): + + known_token = False + self.tag_push(token) + + if self.debug : print 'Processing: ', self.get_tagpath(0) + cnt = self.tagpath_len() + for j in xrange(cnt): + tkn = self.get_tagpath(j) + if tkn in self.token_tags : + num_args = self.token_tags[tkn][0] + argtype = self.token_tags[tkn][1] + subtags = self.token_tags[tkn][2] + splcase = self.token_tags[tkn][3] + ntags = -1 + known_token = True + break + + if known_token : + + # handle subtags if present + subtagres = [] + if (splcase == 1): + # this type of tag uses of escape marker 0x74 indicate subtag count + if self.peek(1) == 0x74: + skip = readEncodedNumber(self.fo) + subtags = 1 + num_args = 0 + + if (subtags == 1): + ntags = readEncodedNumber(self.fo) + if self.debug : print 'subtags: ' + token + ' has ' + str(ntags) + for j in xrange(ntags): + val = readEncodedNumber(self.fo) + subtagres.append(self.procToken(self.dict.lookup(val))) + + # arguments can be scalars or vectors of text or numbers + argres = [] + if num_args > 0 : + firstarg = self.peek(1) + if (firstarg in self.cmd_list) and (argtype != 'scalar_number') and (argtype != 'scalar_text'): + # single argument is a variable length vector of data + arg = readEncodedNumber(self.fo) + argres = self.decodeCMD(arg,argtype) + else : + # num_arg scalar arguments + for i in xrange(num_args): + argres.append(self.formatArg(readEncodedNumber(self.fo), argtype)) + + # build the return tag + result = [] + tkn = self.get_tagpath(0) + result.append(tkn) + result.append(subtagres) + result.append(argtype) + result.append(argres) + self.tag_pop() + return result + + # all tokens that need to be processed should be in the hash + # table if it may indicate a problem, either new token + # or an out of sync condition else: - self.bytesToEncrypt = '' - - if more == None: # last decrypt remove padding - plainText = self.padding.removePad(plainText, self.blockSize) - self.resetDecrypt() - return plainText + result = [] + if (self.debug): + print 'Unknown Token:', token + self.tag_pop() + return result -class Pad: - def __init__(self): - pass # eventually could put in calculation of min and max size extension - -class padWithPadLen(Pad): - """ Pad a binary string with the length of the padding """ - - def addPad(self, extraBytes, blockSize): - """ Add padding to a binary string to make it an even multiple - of the block size """ - blocks, numExtraBytes = divmod(len(extraBytes), blockSize) - padLength = blockSize - numExtraBytes - return extraBytes + padLength*chr(padLength) - - def removePad(self, paddedBinaryString, blockSize): - """ Remove padding from a binary string """ - if not(0> 1 + x = [] + for i in xrange(cnt): + x.append(readEncodedNumber(self.fo) - adj) + for i in xrange(mode): + for j in xrange(1, cnt): + x[j] = x[j] + x[j - 1] + for i in xrange(cnt): + result.append(self.formatArg(x[i],argtype)) + return result - def _toBString(self, block): - """ Convert block (array of bytes) to binary string """ - l = [] - for col in block: - for rowElement in col: - l.append(chr(rowElement)) - return ''.join(l) -#------------------------------------- -""" Number of rounds Nr = NrTable[Nb][Nk] - Nb Nk=4 Nk=5 Nk=6 Nk=7 Nk=8 - ------------------------------------- """ -NrTable = {4: {4:10, 5:11, 6:12, 7:13, 8:14}, - 5: {4:11, 5:11, 6:12, 7:13, 8:14}, - 6: {4:12, 5:12, 6:12, 7:13, 8:14}, - 7: {4:13, 5:13, 6:13, 7:13, 8:14}, - 8: {4:14, 5:14, 6:14, 7:14, 8:14}} -#------------------------------------- -def keyExpansion(algInstance, keyString): - """ Expand a string of size keySize into a larger array """ - Nk, Nb, Nr = algInstance.Nk, algInstance.Nb, algInstance.Nr # for readability - key = [ord(byte) for byte in keyString] # convert string to list - w = [[key[4*i],key[4*i+1],key[4*i+2],key[4*i+3]] for i in range(Nk)] - for i in range(Nk,Nb*(Nr+1)): - temp = w[i-1] # a four byte column - if (i%Nk) == 0 : - temp = temp[1:]+[temp[0]] # RotWord(temp) - temp = [ Sbox[byte] for byte in temp ] - temp[0] ^= Rcon[i/Nk] - elif Nk > 6 and i%Nk == 4 : - temp = [ Sbox[byte] for byte in temp ] # SubWord(temp) - w.append( [ w[i-Nk][byte]^temp[byte] for byte in range(4) ] ) - return w + # dispatches loop commands bytes with various modes + # The 0x76 style loops are used to build vectors -Rcon = (0,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36, # note extra '0' !!! - 0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6, - 0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91) + # This was all derived by trial and error and + # new loop types may exist that are not handled here + # since they did not appear in the test cases -#------------------------------------- -def AddRoundKey(algInstance, keyBlock): - """ XOR the algorithm state with a block of key material """ - for column in range(algInstance.Nb): - for row in range(4): - algInstance.state[column][row] ^= keyBlock[column][row] -#------------------------------------- + def decodeCMD(self, cmd, argtype): + if (cmd == 0x76): -def SubBytes(algInstance): - for column in range(algInstance.Nb): - for row in range(4): - algInstance.state[column][row] = Sbox[algInstance.state[column][row]] + # loop with cnt, and mode to control loop styles + cnt = readEncodedNumber(self.fo) + mode = readEncodedNumber(self.fo) -def InvSubBytes(algInstance): - for column in range(algInstance.Nb): - for row in range(4): - algInstance.state[column][row] = InvSbox[algInstance.state[column][row]] + if self.debug : print 'Loop for', cnt, 'with mode', mode, ': ' + return self.doLoop76Mode(argtype, cnt, mode) -Sbox = (0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5, - 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76, - 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0, - 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0, - 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc, - 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15, - 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a, - 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75, - 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0, - 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84, - 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b, - 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf, - 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85, - 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8, - 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5, - 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2, - 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17, - 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73, - 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88, - 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb, - 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c, - 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79, - 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9, - 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08, - 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6, - 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a, - 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e, - 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e, - 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94, - 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf, - 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68, - 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16) + if self.dbug: print "Unknown command", cmd + result = [] + return result -InvSbox = (0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38, - 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, - 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87, - 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, - 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d, - 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, - 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2, - 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, - 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16, - 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, - 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda, - 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, - 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a, - 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, - 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02, - 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, - 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea, - 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, - 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85, - 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, - 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89, - 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, - 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20, - 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, - 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31, - 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, - 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d, - 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, - 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0, - 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, - 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26, - 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d) -#------------------------------------- -""" For each block size (Nb), the ShiftRow operation shifts row i - by the amount Ci. Note that row 0 is not shifted. - Nb C1 C2 C3 - ------------------- """ -shiftOffset = { 4 : ( 0, 1, 2, 3), - 5 : ( 0, 1, 2, 3), - 6 : ( 0, 1, 2, 3), - 7 : ( 0, 1, 2, 4), - 8 : ( 0, 1, 3, 4) } -def ShiftRows(algInstance): - tmp = [0]*algInstance.Nb # list of size Nb - for r in range(1,4): # row 0 reamains unchanged and can be skipped - for c in range(algInstance.Nb): - tmp[c] = algInstance.state[(c+shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] - for c in range(algInstance.Nb): - algInstance.state[c][r] = tmp[c] -def InvShiftRows(algInstance): - tmp = [0]*algInstance.Nb # list of size Nb - for r in range(1,4): # row 0 reamains unchanged and can be skipped - for c in range(algInstance.Nb): - tmp[c] = algInstance.state[(c+algInstance.Nb-shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] - for c in range(algInstance.Nb): - algInstance.state[c][r] = tmp[c] -#------------------------------------- -def MixColumns(a): - Sprime = [0,0,0,0] - for j in range(a.Nb): # for each column - Sprime[0] = mul(2,a.state[j][0])^mul(3,a.state[j][1])^mul(1,a.state[j][2])^mul(1,a.state[j][3]) - Sprime[1] = mul(1,a.state[j][0])^mul(2,a.state[j][1])^mul(3,a.state[j][2])^mul(1,a.state[j][3]) - Sprime[2] = mul(1,a.state[j][0])^mul(1,a.state[j][1])^mul(2,a.state[j][2])^mul(3,a.state[j][3]) - Sprime[3] = mul(3,a.state[j][0])^mul(1,a.state[j][1])^mul(1,a.state[j][2])^mul(2,a.state[j][3]) - for i in range(4): - a.state[j][i] = Sprime[i] -def InvMixColumns(a): - """ Mix the four bytes of every column in a linear way - This is the opposite operation of Mixcolumn """ - Sprime = [0,0,0,0] - for j in range(a.Nb): # for each column - Sprime[0] = mul(0x0E,a.state[j][0])^mul(0x0B,a.state[j][1])^mul(0x0D,a.state[j][2])^mul(0x09,a.state[j][3]) - Sprime[1] = mul(0x09,a.state[j][0])^mul(0x0E,a.state[j][1])^mul(0x0B,a.state[j][2])^mul(0x0D,a.state[j][3]) - Sprime[2] = mul(0x0D,a.state[j][0])^mul(0x09,a.state[j][1])^mul(0x0E,a.state[j][2])^mul(0x0B,a.state[j][3]) - Sprime[3] = mul(0x0B,a.state[j][0])^mul(0x0D,a.state[j][1])^mul(0x09,a.state[j][2])^mul(0x0E,a.state[j][3]) - for i in range(4): - a.state[j][i] = Sprime[i] + # add full tag path to injected snippets + def updateName(self, tag, prefix): + name = tag[0] + subtagList = tag[1] + argtype = tag[2] + argList = tag[3] + nname = prefix + '.' + name + nsubtaglist = [] + for j in subtagList: + nsubtaglist.append(self.updateName(j,prefix)) + ntag = [] + ntag.append(nname) + ntag.append(nsubtaglist) + ntag.append(argtype) + ntag.append(argList) + return ntag -#------------------------------------- -def mul(a, b): - """ Multiply two elements of GF(2^m) - needed for MixColumn and InvMixColumn """ - if (a !=0 and b!=0): - return Alogtable[(Logtable[a] + Logtable[b])%255] - else: + + + # perform depth first injection of specified snippets into this one + def injectSnippets(self, snippet): + snipno, tag = snippet + name = tag[0] + subtagList = tag[1] + argtype = tag[2] + argList = tag[3] + nsubtagList = [] + if len(argList) > 0 : + for j in argList: + asnip = self.snippetList[j] + aso, atag = self.injectSnippets(asnip) + atag = self.updateName(atag, name) + nsubtagList.append(atag) + argtype='number' + argList=[] + if len(nsubtagList) > 0 : + subtagList.extend(nsubtagList) + tag = [] + tag.append(name) + tag.append(subtagList) + tag.append(argtype) + tag.append(argList) + snippet = [] + snippet.append(snipno) + snippet.append(tag) + return snippet + + + + # format the tag for output + def formatTag(self, node): + name = node[0] + subtagList = node[1] + argtype = node[2] + argList = node[3] + fullpathname = name.split('.') + nodename = fullpathname.pop() + ilvl = len(fullpathname) + indent = ' ' * (3 * ilvl) + rlst = [] + rlst.append(indent + '<' + nodename + '>') + if len(argList) > 0: + alst = [] + for j in argList: + if (argtype == 'text') or (argtype == 'scalar_text') : + alst.append(j + '|') + else : + alst.append(str(j) + ',') + argres = "".join(alst) + argres = argres[0:-1] + if argtype == 'snippets' : + rlst.append('snippets:' + argres) + else : + rlst.append(argres) + if len(subtagList) > 0 : + rlst.append('\n') + for j in subtagList: + if len(j) > 0 : + rlst.append(self.formatTag(j)) + rlst.append(indent + '\n') + else: + rlst.append('\n') + return "".join(rlst) + + + # flatten tag + def flattenTag(self, node): + name = node[0] + subtagList = node[1] + argtype = node[2] + argList = node[3] + rlst = [] + rlst.append(name) + if (len(argList) > 0): + alst = [] + for j in argList: + if (argtype == 'text') or (argtype == 'scalar_text') : + alst.append(j + '|') + else : + alst.append(str(j) + '|') + argres = "".join(alst) + argres = argres[0:-1] + if argtype == 'snippets' : + rlst.append('.snippets=' + argres) + else : + rlst.append('=' + argres) + rlst.append('\n') + for j in subtagList: + if len(j) > 0 : + rlst.append(self.flattenTag(j)) + return "".join(rlst) + + + # reduce create xml output + def formatDoc(self, flat_xml): + rlst = [] + for j in self.doc : + if len(j) > 0: + if flat_xml: + rlst.append(self.flattenTag(j)) + else: + rlst.append(self.formatTag(j)) + result = "".join(rlst) + if self.debug : print result + return result + + + + # main loop - parse the page.dat files + # to create structured document and snippets + + # FIXME: value at end of magic appears to be a subtags count + # but for what? For now, inject an 'info" tag as it is in + # every dictionary and seems close to what is meant + # The alternative is to special case the last _ "0x5f" to mean something + + def process(self): + + # peek at the first bytes to see what type of file it is + magic = self.fo.read(9) + if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'): + first_token = 'info' + elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'): + skip = self.fo.read(2) + first_token = 'info' + elif (magic[0:1] == 'p') and (magic[2:8] == '_PAGE_'): + first_token = 'info' + elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'): + skip = self.fo.read(3) + first_token = 'info' + else : + # other0.dat file + first_token = None + self.fo.seek(-9,1) + + + # main loop to read and build the document tree + while True: + + if first_token != None : + # use "inserted" first token 'info' for page and glyph files + tag = self.procToken(first_token) + if len(tag) > 0 : + self.doc.append(tag) + first_token = None + + v = self.getNext() + if (v == None): + break + + if (v == 0x72): + self.doLoop72('number') + elif (v > 0) and (v < self.dict.getSize()) : + tag = self.procToken(self.dict.lookup(v)) + if len(tag) > 0 : + self.doc.append(tag) + else: + if self.debug: + print "Main Loop: Unknown value: %x" % v + if (v == 0): + if (self.peek(1) == 0x5f): + skip = self.fo.read(1) + first_token = 'info' + + # now do snippet injection + if len(self.snippetList) > 0 : + if self.debug : print 'Injecting Snippets:' + snippet = self.injectSnippets(self.snippetList[0]) + snipno = snippet[0] + tag_add = snippet[1] + if self.debug : print self.formatTag(tag_add) + if len(tag_add) > 0: + self.doc.append(tag_add) + + # handle generation of xml output + xmlpage = self.formatDoc(self.flat_xml) + + return xmlpage + + +def fromData(dict, fname): + flat_xml = True + debug = False + pp = PageParser(fname, dict, debug, flat_xml) + xmlpage = pp.process() + return xmlpage + +def getXML(dict, fname): + flat_xml = False + debug = False + pp = PageParser(fname, dict, debug, flat_xml) + xmlpage = pp.process() + return xmlpage + +def usage(): + print 'Usage: ' + print ' convert2xml.py dict0000.dat infile.dat ' + print ' ' + print ' Options:' + print ' -h print this usage help message ' + print ' -d turn on debug output to check for potential errors ' + print ' --flat-xml output the flattened xml page description only ' + print ' ' + print ' This program will attempt to convert a page*.dat file or ' + print ' glyphs*.dat file, using the dict0000.dat file, to its xml description. ' + print ' ' + print ' Use "cmbtc_dump.py" first to unencrypt, uncompress, and dump ' + print ' the *.dat files from a Topaz format e-book.' + +# +# Main +# + +def main(argv): + dictFile = "" + pageFile = "" + debug = False + flat_xml = False + printOutput = False + if len(argv) == 0: + printOutput = True + argv = sys.argv + + try: + opts, args = getopt.getopt(argv[1:], "hd", ["flat-xml"]) + + except getopt.GetoptError, err: + + # print help information and exit: + print str(err) # will print something like "option -a not recognized" + usage() + sys.exit(2) + + if len(opts) == 0 and len(args) == 0 : + usage() + sys.exit(2) + + for o, a in opts: + if o =="-d": + debug=True + if o =="-h": + usage() + sys.exit(0) + if o =="--flat-xml": + flat_xml = True + + dictFile, pageFile = args[0], args[1] + + # read in the string table dictionary + dict = Dictionary(dictFile) + # dict.dumpDict() + + # create a page parser + pp = PageParser(pageFile, dict, debug, flat_xml) + + xmlpage = pp.process() + + if printOutput: + print xmlpage return 0 -Logtable = ( 0, 0, 25, 1, 50, 2, 26, 198, 75, 199, 27, 104, 51, 238, 223, 3, - 100, 4, 224, 14, 52, 141, 129, 239, 76, 113, 8, 200, 248, 105, 28, 193, - 125, 194, 29, 181, 249, 185, 39, 106, 77, 228, 166, 114, 154, 201, 9, 120, - 101, 47, 138, 5, 33, 15, 225, 36, 18, 240, 130, 69, 53, 147, 218, 142, - 150, 143, 219, 189, 54, 208, 206, 148, 19, 92, 210, 241, 64, 70, 131, 56, - 102, 221, 253, 48, 191, 6, 139, 98, 179, 37, 226, 152, 34, 136, 145, 16, - 126, 110, 72, 195, 163, 182, 30, 66, 58, 107, 40, 84, 250, 133, 61, 186, - 43, 121, 10, 21, 155, 159, 94, 202, 78, 212, 172, 229, 243, 115, 167, 87, - 175, 88, 168, 80, 244, 234, 214, 116, 79, 174, 233, 213, 231, 230, 173, 232, - 44, 215, 117, 122, 235, 22, 11, 245, 89, 203, 95, 176, 156, 169, 81, 160, - 127, 12, 246, 111, 23, 196, 73, 236, 216, 67, 31, 45, 164, 118, 123, 183, - 204, 187, 62, 90, 251, 96, 177, 134, 59, 82, 161, 108, 170, 85, 41, 157, - 151, 178, 135, 144, 97, 190, 220, 252, 188, 149, 207, 205, 55, 63, 91, 209, - 83, 57, 132, 60, 65, 162, 109, 71, 20, 42, 158, 93, 86, 242, 211, 171, - 68, 17, 146, 217, 35, 32, 46, 137, 180, 124, 184, 38, 119, 153, 227, 165, - 103, 74, 237, 222, 197, 49, 254, 24, 13, 99, 140, 128, 192, 247, 112, 7) + return xmlpage -Alogtable= ( 1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53, - 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170, - 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49, - 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205, - 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136, - 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154, - 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163, - 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160, - 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65, - 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117, - 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128, - 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84, - 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202, - 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14, - 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23, - 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246, 1) - - - - -""" - AES Encryption Algorithm - The AES algorithm is just Rijndael algorithm restricted to the default - blockSize of 128 bits. -""" - -class AES(Rijndael): - """ The AES algorithm is the Rijndael block cipher restricted to block - sizes of 128 bits and key sizes of 128, 192 or 256 bits - """ - def __init__(self, key = None, padding = padWithPadLen(), keySize=16): - """ Initialize AES, keySize is in bytes """ - if not (keySize == 16 or keySize == 24 or keySize == 32) : - raise BadKeySizeError, 'Illegal AES key size, must be 16, 24, or 32 bytes' - - Rijndael.__init__( self, key, padding=padding, keySize=keySize, blockSize=16 ) - - self.name = 'AES' - - -""" - CBC mode of encryption for block ciphers. - This algorithm mode wraps any BlockCipher to make a - Cipher Block Chaining mode. -""" -from random import Random # should change to crypto.random!!! - - -class CBC(BlockCipher): - """ The CBC class wraps block ciphers to make cipher block chaining (CBC) mode - algorithms. The initialization (IV) is automatic if set to None. Padding - is also automatic based on the Pad class used to initialize the algorithm - """ - def __init__(self, blockCipherInstance, padding = padWithPadLen()): - """ CBC algorithms are created by initializing with a BlockCipher instance """ - self.baseCipher = blockCipherInstance - self.name = self.baseCipher.name + '_CBC' - self.blockSize = self.baseCipher.blockSize - self.keySize = self.baseCipher.keySize - self.padding = padding - self.baseCipher.padding = noPadding() # baseCipher should NOT pad!! - self.r = Random() # for IV generation, currently uses - # mediocre standard distro version <---------------- - import time - newSeed = time.ctime()+str(self.r) # seed with instance location - self.r.seed(newSeed) # to make unique - self.reset() - - def setKey(self, key): - self.baseCipher.setKey(key) - - # Overload to reset both CBC state and the wrapped baseCipher - def resetEncrypt(self): - BlockCipher.resetEncrypt(self) # reset CBC encrypt state (super class) - self.baseCipher.resetEncrypt() # reset base cipher encrypt state - - def resetDecrypt(self): - BlockCipher.resetDecrypt(self) # reset CBC state (super class) - self.baseCipher.resetDecrypt() # reset base cipher decrypt state - - def encrypt(self, plainText, iv=None, more=None): - """ CBC encryption - overloads baseCipher to allow optional explicit IV - when iv=None, iv is auto generated! - """ - if self.encryptBlockCount == 0: - self.iv = iv - else: - assert(iv==None), 'IV used only on first call to encrypt' - - return BlockCipher.encrypt(self,plainText, more=more) - - def decrypt(self, cipherText, iv=None, more=None): - """ CBC decryption - overloads baseCipher to allow optional explicit IV - when iv=None, iv is auto generated! - """ - if self.decryptBlockCount == 0: - self.iv = iv - else: - assert(iv==None), 'IV used only on first call to decrypt' - - return BlockCipher.decrypt(self, cipherText, more=more) - - def encryptBlock(self, plainTextBlock): - """ CBC block encryption, IV is set with 'encrypt' """ - auto_IV = '' - if self.encryptBlockCount == 0: - if self.iv == None: - # generate IV and use - self.iv = ''.join([chr(self.r.randrange(256)) for i in range(self.blockSize)]) - self.prior_encr_CT_block = self.iv - auto_IV = self.prior_encr_CT_block # prepend IV if it's automatic - else: # application provided IV - assert(len(self.iv) == self.blockSize ),'IV must be same length as block' - self.prior_encr_CT_block = self.iv - """ encrypt the prior CT XORed with the PT """ - ct = self.baseCipher.encryptBlock( xor(self.prior_encr_CT_block, plainTextBlock) ) - self.prior_encr_CT_block = ct - return auto_IV+ct - - def decryptBlock(self, encryptedBlock): - """ Decrypt a single block """ - - if self.decryptBlockCount == 0: # first call, process IV - if self.iv == None: # auto decrypt IV? - self.prior_CT_block = encryptedBlock - return '' - else: - assert(len(self.iv)==self.blockSize),"Bad IV size on CBC decryption" - self.prior_CT_block = self.iv - - dct = self.baseCipher.decryptBlock(encryptedBlock) - """ XOR the prior decrypted CT with the prior CT """ - dct_XOR_priorCT = xor( self.prior_CT_block, dct ) - - self.prior_CT_block = encryptedBlock - - return dct_XOR_priorCT - - -""" - AES_CBC Encryption Algorithm -""" - -class AES_CBC(CBC): - """ AES encryption in CBC feedback mode """ - def __init__(self, key=None, padding=padWithPadLen(), keySize=16): - CBC.__init__( self, AES(key, noPadding(), keySize), padding) - self.name = 'AES_CBC' +if __name__ == '__main__': + sys.exit(main('')) diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py index 26d740d..e5647f4 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py and b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py index e25a0c8..4dfd6c7 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py @@ -1,290 +1,249 @@ -#! /usr/bin/env python +#! /usr/bin/python +# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab -import sys, os -import hmac +import sys +import csv +import os +import getopt from struct import pack -import hashlib +from struct import unpack -# interface to needed routines libalfcrypto -def _load_libalfcrypto(): - import ctypes - from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \ - Structure, c_ulong, create_string_buffer, addressof, string_at, cast, sizeof +class PParser(object): + def __init__(self, gd, flatxml, meta_array): + self.gd = gd + self.flatdoc = flatxml.split('\n') + self.docSize = len(self.flatdoc) + self.temp = [] - pointer_size = ctypes.sizeof(ctypes.c_voidp) - name_of_lib = None - if sys.platform.startswith('darwin'): - name_of_lib = 'libalfcrypto.dylib' - elif sys.platform.startswith('win'): - if pointer_size == 4: - name_of_lib = 'alfcrypto.dll' + self.ph = -1 + self.pw = -1 + startpos = self.posinDoc('page.h') or self.posinDoc('book.h') + for p in startpos: + (name, argres) = self.lineinDoc(p) + self.ph = max(self.ph, int(argres)) + startpos = self.posinDoc('page.w') or self.posinDoc('book.w') + for p in startpos: + (name, argres) = self.lineinDoc(p) + self.pw = max(self.pw, int(argres)) + + if self.ph <= 0: + self.ph = int(meta_array.get('pageHeight', '11000')) + if self.pw <= 0: + self.pw = int(meta_array.get('pageWidth', '8500')) + + res = [] + startpos = self.posinDoc('info.glyph.x') + for p in startpos: + argres = self.getDataatPos('info.glyph.x', p) + res.extend(argres) + self.gx = res + + res = [] + startpos = self.posinDoc('info.glyph.y') + for p in startpos: + argres = self.getDataatPos('info.glyph.y', p) + res.extend(argres) + self.gy = res + + res = [] + startpos = self.posinDoc('info.glyph.glyphID') + for p in startpos: + argres = self.getDataatPos('info.glyph.glyphID', p) + res.extend(argres) + self.gid = res + + + # return tag at line pos in document + def lineinDoc(self, pos) : + if (pos >= 0) and (pos < self.docSize) : + item = self.flatdoc[pos] + if item.find('=') >= 0: + (name, argres) = item.split('=',1) + else : + name = item + argres = '' + return name, argres + + # find tag in doc if within pos to end inclusive + def findinDoc(self, tagpath, pos, end) : + result = None + if end == -1 : + end = self.docSize else: - name_of_lib = 'alfcrypto64.dll' + end = min(self.docSize, end) + foundat = -1 + for j in xrange(pos, end): + item = self.flatdoc[j] + if item.find('=') >= 0: + (name, argres) = item.split('=',1) + else : + name = item + argres = '' + if name.endswith(tagpath) : + result = argres + foundat = j + break + return foundat, result + + # return list of start positions for the tagpath + def posinDoc(self, tagpath): + startpos = [] + pos = 0 + res = "" + while res != None : + (foundpos, res) = self.findinDoc(tagpath, pos, -1) + if res != None : + startpos.append(foundpos) + pos = foundpos + 1 + return startpos + + def getData(self, path): + result = None + cnt = len(self.flatdoc) + for j in xrange(cnt): + item = self.flatdoc[j] + if item.find('=') >= 0: + (name, argt) = item.split('=') + argres = argt.split('|') + else: + name = item + argres = [] + if (name.endswith(path)): + result = argres + break + if (len(argres) > 0) : + for j in xrange(0,len(argres)): + argres[j] = int(argres[j]) + return result + + def getDataatPos(self, path, pos): + result = None + item = self.flatdoc[pos] + if item.find('=') >= 0: + (name, argt) = item.split('=') + argres = argt.split('|') + else: + name = item + argres = [] + if (len(argres) > 0) : + for j in xrange(0,len(argres)): + argres[j] = int(argres[j]) + if (name.endswith(path)): + result = argres + return result + + def getDataTemp(self, path): + result = None + cnt = len(self.temp) + for j in xrange(cnt): + item = self.temp[j] + if item.find('=') >= 0: + (name, argt) = item.split('=') + argres = argt.split('|') + else: + name = item + argres = [] + if (name.endswith(path)): + result = argres + self.temp.pop(j) + break + if (len(argres) > 0) : + for j in xrange(0,len(argres)): + argres[j] = int(argres[j]) + return result + + def getImages(self): + result = [] + self.temp = self.flatdoc + while (self.getDataTemp('img') != None): + h = self.getDataTemp('img.h')[0] + w = self.getDataTemp('img.w')[0] + x = self.getDataTemp('img.x')[0] + y = self.getDataTemp('img.y')[0] + src = self.getDataTemp('img.src')[0] + result.append('\n' % (src, x, y, w, h)) + return result + + def getGlyphs(self): + result = [] + if (self.gid != None) and (len(self.gid) > 0): + glyphs = [] + for j in set(self.gid): + glyphs.append(j) + glyphs.sort() + for gid in glyphs: + id='id="gl%d"' % gid + path = self.gd.lookup(id) + if path: + result.append(id + ' ' + path) + return result + + +def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi): + mlst = [] + pp = PParser(gdict, flat_xml, meta_array) + mlst.append('\n') + if (raw): + mlst.append('\n') + mlst.append('\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1)) + mlst.append('Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors'])) else: - if pointer_size == 4: - name_of_lib = 'libalfcrypto32.so' + mlst.append('\n') + mlst.append('\n') + mlst.append('Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors'])) + mlst.append('\n') + mlst.append('\n') + mlst.append('\n') + mlst.append('
\n') + if previd == None: + mlst.append('\n') else: - name_of_lib = 'libalfcrypto64.so' - - libalfcrypto = sys.path[0] + os.sep + name_of_lib - - if not os.path.isfile(libalfcrypto): - raise Exception('libalfcrypto not found') - - libalfcrypto = CDLL(libalfcrypto) - - c_char_pp = POINTER(c_char_p) - c_int_p = POINTER(c_int) - - - def F(restype, name, argtypes): - func = getattr(libalfcrypto, name) - func.restype = restype - func.argtypes = argtypes - return func - - # aes cbc decryption - # - # struct aes_key_st { - # unsigned long rd_key[4 *(AES_MAXNR + 1)]; - # int rounds; - # }; - # - # typedef struct aes_key_st AES_KEY; - # - # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); - # - # - # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, - # const unsigned long length, const AES_KEY *key, - # unsigned char *ivec, const int enc); - - AES_MAXNR = 14 - - class AES_KEY(Structure): - _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)] - - AES_KEY_p = POINTER(AES_KEY) - AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p, c_int]) - AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p]) - - - - # Pukall 1 Cipher - # unsigned char *PC1(const unsigned char *key, unsigned int klen, const unsigned char *src, - # unsigned char *dest, unsigned int len, int decryption); - - PC1 = F(c_char_p, 'PC1', [c_char_p, c_ulong, c_char_p, c_char_p, c_ulong, c_ulong]) - - # Topaz Encryption - # typedef struct _TpzCtx { - # unsigned int v[2]; - # } TpzCtx; - # - # void topazCryptoInit(TpzCtx *ctx, const unsigned char *key, int klen); - # void topazCryptoDecrypt(const TpzCtx *ctx, const unsigned char *in, unsigned char *out, int len); - - class TPZ_CTX(Structure): - _fields_ = [('v', c_long * 2)] - - TPZ_CTX_p = POINTER(TPZ_CTX) - topazCryptoInit = F(None, 'topazCryptoInit', [TPZ_CTX_p, c_char_p, c_ulong]) - topazCryptoDecrypt = F(None, 'topazCryptoDecrypt', [TPZ_CTX_p, c_char_p, c_char_p, c_ulong]) - - - class AES_CBC(object): - def __init__(self): - self._blocksize = 0 - self._keyctx = None - self._iv = 0 - - def set_decrypt_key(self, userkey, iv): - self._blocksize = len(userkey) - if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) : - raise Exception('AES CBC improper key used') - return - keyctx = self._keyctx = AES_KEY() - self._iv = iv - rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx) - if rv < 0: - raise Exception('Failed to initialize AES CBC key') - - def decrypt(self, data): - out = create_string_buffer(len(data)) - mutable_iv = create_string_buffer(self._iv, len(self._iv)) - rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, mutable_iv, 0) - if rv == 0: - raise Exception('AES CBC decryption failed') - return out.raw - - class Pukall_Cipher(object): - def __init__(self): - self.key = None - - def PC1(self, key, src, decryption=True): - self.key = key - out = create_string_buffer(len(src)) - de = 0 - if decryption: - de = 1 - rv = PC1(key, len(key), src, out, len(src), de) - return out.raw - - class Topaz_Cipher(object): - def __init__(self): - self._ctx = None - - def ctx_init(self, key): - tpz_ctx = self._ctx = TPZ_CTX() - topazCryptoInit(tpz_ctx, key, len(key)) - return tpz_ctx - - def decrypt(self, data, ctx=None): - if ctx == None: - ctx = self._ctx - out = create_string_buffer(len(data)) - topazCryptoDecrypt(ctx, data, out, len(data)) - return out.raw - - print "Using Library AlfCrypto DLL/DYLIB/SO" - return (AES_CBC, Pukall_Cipher, Topaz_Cipher) - - -def _load_python_alfcrypto(): - - import aescbc - - class Pukall_Cipher(object): - def __init__(self): - self.key = None - - def PC1(self, key, src, decryption=True): - sum1 = 0; - sum2 = 0; - keyXorVal = 0; - if len(key)!=16: - print "Bad key length!" - return None - wkey = [] - for i in xrange(8): - wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1])) - dst = "" - for i in xrange(len(src)): - temp1 = 0; - byteXorVal = 0; - for j in xrange(8): - temp1 ^= wkey[j] - sum2 = (sum2+j)*20021 + sum1 - sum1 = (temp1*346)&0xFFFF - sum2 = (sum2+sum1)&0xFFFF - temp1 = (temp1*20021+1)&0xFFFF - byteXorVal ^= temp1 ^ sum2 - curByte = ord(src[i]) - if not decryption: - keyXorVal = curByte * 257; - curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF - if decryption: - keyXorVal = curByte * 257; - for j in xrange(8): - wkey[j] ^= keyXorVal; - dst+=chr(curByte) - return dst - - class Topaz_Cipher(object): - def __init__(self): - self._ctx = None - - def ctx_init(self, key): - ctx1 = 0x0CAFFE19E - for keyChar in key: - keyByte = ord(keyChar) - ctx2 = ctx1 - ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF ) - self._ctx = [ctx1, ctx2] - return [ctx1,ctx2] - - def decrypt(self, data, ctx=None): - if ctx == None: - ctx = self._ctx - ctx1 = ctx[0] - ctx2 = ctx[1] - plainText = "" - for dataChar in data: - dataByte = ord(dataChar) - m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF - ctx2 = ctx1 - ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF) - plainText += chr(m) - return plainText - - class AES_CBC(object): - def __init__(self): - self._key = None - self._iv = None - self.aes = None - - def set_decrypt_key(self, userkey, iv): - self._key = userkey - self._iv = iv - self.aes = aescbc.AES_CBC(userkey, aescbc.noPadding(), len(userkey)) - - def decrypt(self, data): - iv = self._iv - cleartext = self.aes.decrypt(iv + data) - return cleartext - - return (AES_CBC, Pukall_Cipher, Topaz_Cipher) - - -def _load_crypto(): - AES_CBC = Pukall_Cipher = Topaz_Cipher = None - cryptolist = (_load_libalfcrypto, _load_python_alfcrypto) - for loader in cryptolist: - try: - AES_CBC, Pukall_Cipher, Topaz_Cipher = loader() - break - except (ImportError, Exception): - pass - return AES_CBC, Pukall_Cipher, Topaz_Cipher - -AES_CBC, Pukall_Cipher, Topaz_Cipher = _load_crypto() - - -class KeyIVGen(object): - # this only exists in openssl so we will use pure python implementation instead - # PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1', - # [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p]) - def pbkdf2(self, passwd, salt, iter, keylen): - - def xorstr( a, b ): - if len(a) != len(b): - raise Exception("xorstr(): lengths differ") - return ''.join((chr(ord(x)^ord(y)) for x, y in zip(a, b))) - - def prf( h, data ): - hm = h.copy() - hm.update( data ) - return hm.digest() - - def pbkdf2_F( h, salt, itercount, blocknum ): - U = prf( h, salt + pack('>i',blocknum ) ) - T = U - for i in range(2, itercount+1): - U = prf( h, U ) - T = xorstr( T, U ) - return T - - sha = hashlib.sha1 - digest_size = sha().digest_size - # l - number of output blocks to produce - l = keylen / digest_size - if keylen % digest_size != 0: - l += 1 - h = hmac.new( passwd, None, sha ) - T = "" - for i in range(1, l+1): - T += pbkdf2_F( h, salt, iter, i ) - return T[0: keylen] - + mlst.append('\n') + mlst.append('' % (pp.pw, pp.ph)) + if (pp.gid != None): + mlst.append('\n') + gdefs = pp.getGlyphs() + for j in xrange(0,len(gdefs)): + mlst.append(gdefs[j]) + mlst.append('\n') + img = pp.getImages() + if (img != None): + for j in xrange(0,len(img)): + mlst.append(img[j]) + if (pp.gid != None): + for j in xrange(0,len(pp.gid)): + mlst.append('\n' % (pp.gid[j], pp.gx[j], pp.gy[j])) + if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0): + xpos = "%d" % (pp.pw // 3) + ypos = "%d" % (pp.ph // 3) + mlst.append('This page intentionally left blank.\n') + if (raw) : + mlst.append('') + else : + mlst.append('\n') + if nextid == None: + mlst.append('\n') + else : + mlst.append('\n') + mlst.append('
\n') + mlst.append('
zoom in - zoom out
\n') + mlst.append('\n') + mlst.append('\n') + return "".join(mlst) diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py b/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py index 7bef68e..9733887 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py and b/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/k4mobidedrm_orig.py b/Calibre_Plugins/K4MobiDeDRM_plugin/k4mobidedrm_orig.py index 269810c..a889564 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/k4mobidedrm_orig.py and b/Calibre_Plugins/K4MobiDeDRM_plugin/k4mobidedrm_orig.py differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py b/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py index e5647f4..c4e45be 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py @@ -1,793 +1,270 @@ -#! /usr/bin/python -# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab -# For use with Topaz Scripts Version 2.6 +#!/usr/bin/env python +from __future__ import with_statement import sys -import csv -import os -import math -import getopt -from struct import pack -from struct import unpack +import os, csv +import binascii +import zlib +import re +from struct import pack, unpack, unpack_from + +class DrmException(Exception): + pass + +global charMap1 +global charMap3 +global charMap4 + +if 'calibre' in sys.modules: + inCalibre = True +else: + inCalibre = False + +if inCalibre: + if sys.platform.startswith('win'): + from calibre_plugins.k4mobidedrm.k4pcutils import getKindleInfoFiles, getDBfromFile, GetUserName, GetIDString + + if sys.platform.startswith('darwin'): + from calibre_plugins.k4mobidedrm.k4mutils import getKindleInfoFiles, getDBfromFile, GetUserName, GetIDString +else: + if sys.platform.startswith('win'): + from k4pcutils import getKindleInfoFiles, getDBfromFile, GetUserName, GetIDString + + if sys.platform.startswith('darwin'): + from k4mutils import getKindleInfoFiles, getDBfromFile, GetUserName, GetIDString -class DocParser(object): - def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage): - self.id = os.path.basename(fileid).replace('.dat','') - self.svgcount = 0 - self.docList = flatxml.split('\n') - self.docSize = len(self.docList) - self.classList = {} - self.bookDir = bookDir - self.gdict = gdict - tmpList = classlst.split('\n') - for pclass in tmpList: - if pclass != '': - # remove the leading period from the css name - cname = pclass[1:] - self.classList[cname] = True - self.fixedimage = fixedimage - self.ocrtext = [] - self.link_id = [] - self.link_title = [] - self.link_page = [] - self.link_href = [] - self.link_type = [] - self.dehyphen_rootid = [] - self.paracont_stemid = [] - self.parastems_stemid = [] +charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M" +charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" +charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789" + +# crypto digestroutines +import hashlib + +def MD5(message): + ctx = hashlib.md5() + ctx.update(message) + return ctx.digest() + +def SHA1(message): + ctx = hashlib.sha1() + ctx.update(message) + return ctx.digest() - def getGlyph(self, gid): - result = '' - id='id="gl%d"' % gid - return self.gdict.lookup(id) +# Encode the bytes in data with the characters in map +def encode(data, map): + result = "" + for char in data: + value = ord(char) + Q = (value ^ 0x80) // len(map) + R = value % len(map) + result += map[Q] + result += map[R] + return result - def glyphs_to_image(self, glyphList): +# Hash the bytes in data and then encode the digest with the characters in map +def encodeHash(data,map): + return encode(MD5(data),map) - def extract(path, key): - b = path.find(key) + len(key) - e = path.find(' ',b) - return int(path[b:e]) +# Decode the string in data with the characters in map. Returns the decoded bytes +def decode(data,map): + result = "" + for i in range (0,len(data)-1,2): + high = map.find(data[i]) + low = map.find(data[i+1]) + if (high == -1) or (low == -1) : + break + value = (((high * len(map)) ^ 0x80) & 0xFF) + low + result += pack("B",value) + return result - svgDir = os.path.join(self.bookDir,'svg') +# +# PID generation routines +# - imgDir = os.path.join(self.bookDir,'img') - imgname = self.id + '_%04d.svg' % self.svgcount - imgfile = os.path.join(imgDir,imgname) +# Returns two bit at offset from a bit field +def getTwoBitsFromBitField(bitField,offset): + byteNumber = offset // 4 + bitPosition = 6 - 2*(offset % 4) + return ord(bitField[byteNumber]) >> bitPosition & 3 - # get glyph information - gxList = self.getData('info.glyph.x',0,-1) - gyList = self.getData('info.glyph.y',0,-1) - gidList = self.getData('info.glyph.glyphID',0,-1) +# Returns the six bits at offset from a bit field +def getSixBitsFromBitField(bitField,offset): + offset *= 3 + value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2) + return value - gids = [] - maxws = [] - maxhs = [] - xs = [] - ys = [] - gdefs = [] +# 8 bits to six bits encoding from hash to generate PID string +def encodePID(hash): + global charMap3 + PID = "" + for position in range (0,8): + PID += charMap3[getSixBitsFromBitField(hash,position)] + return PID - # get path defintions, positions, dimensions for each glyph - # that makes up the image, and find min x and min y to reposition origin - minx = -1 - miny = -1 - for j in glyphList: - gid = gidList[j] - gids.append(gid) - - xs.append(gxList[j]) - if minx == -1: minx = gxList[j] - else : minx = min(minx, gxList[j]) - - ys.append(gyList[j]) - if miny == -1: miny = gyList[j] - else : miny = min(miny, gyList[j]) - - path = self.getGlyph(gid) - gdefs.append(path) - - maxws.append(extract(path,'width=')) - maxhs.append(extract(path,'height=')) - - - # change the origin to minx, miny and calc max height and width - maxw = maxws[0] + xs[0] - minx - maxh = maxhs[0] + ys[0] - miny - for j in xrange(0, len(xs)): - xs[j] = xs[j] - minx - ys[j] = ys[j] - miny - maxw = max( maxw, (maxws[j] + xs[j]) ) - maxh = max( maxh, (maxhs[j] + ys[j]) ) - - # open the image file for output - ifile = open(imgfile,'w') - ifile.write('\n') - ifile.write('\n') - ifile.write('\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh)) - ifile.write('\n') - for j in xrange(0,len(gdefs)): - ifile.write(gdefs[j]) - ifile.write('\n') - for j in xrange(0,len(gids)): - ifile.write('\n' % (gids[j], xs[j], ys[j])) - ifile.write('') - ifile.close() - - return 0 - - - - # return tag at line pos in document - def lineinDoc(self, pos) : - if (pos >= 0) and (pos < self.docSize) : - item = self.docList[pos] - if item.find('=') >= 0: - (name, argres) = item.split('=',1) +# Encryption table used to generate the device PID +def generatePidEncryptionTable() : + table = [] + for counter1 in range (0,0x100): + value = counter1 + for counter2 in range (0,8): + if (value & 1 == 0) : + value = value >> 1 else : - name = item - argres = '' - return name, argres - - - # find tag in doc if within pos to end inclusive - def findinDoc(self, tagpath, pos, end) : - result = None - if end == -1 : - end = self.docSize - else: - end = min(self.docSize, end) - foundat = -1 - for j in xrange(pos, end): - item = self.docList[j] - if item.find('=') >= 0: - (name, argres) = item.split('=',1) - else : - name = item - argres = '' - if name.endswith(tagpath) : - result = argres - foundat = j - break - return foundat, result - - - # return list of start positions for the tagpath - def posinDoc(self, tagpath): - startpos = [] - pos = 0 - res = "" - while res != None : - (foundpos, res) = self.findinDoc(tagpath, pos, -1) - if res != None : - startpos.append(foundpos) - pos = foundpos + 1 - return startpos - - - # returns a vector of integers for the tagpath - def getData(self, tagpath, pos, end): - argres=[] - (foundat, argt) = self.findinDoc(tagpath, pos, end) - if (argt != None) and (len(argt) > 0) : - argList = argt.split('|') - argres = [ int(strval) for strval in argList] - return argres - - - # get the class - def getClass(self, pclass): - nclass = pclass - - # class names are an issue given topaz may start them with numerals (not allowed), - # use a mix of cases (which cause some browsers problems), and actually - # attach numbers after "_reclustered*" to the end to deal classeses that inherit - # from a base class (but then not actually provide all of these _reclustereed - # classes in the stylesheet! - - # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass - # that exists in the stylesheet first, and then adding this specific class - # after - - # also some class names have spaces in them so need to convert to dashes - if nclass != None : - nclass = nclass.replace(' ','-') - classres = '' - nclass = nclass.lower() - nclass = 'cl-' + nclass - baseclass = '' - # graphic is the base class for captions - if nclass.find('cl-cap-') >=0 : - classres = 'graphic' + ' ' - else : - # strip to find baseclass - p = nclass.find('_') - if p > 0 : - baseclass = nclass[0:p] - if baseclass in self.classList: - classres += baseclass + ' ' - classres += nclass - nclass = classres - return nclass - - - # develop a sorted description of the starting positions of - # groups and regions on the page, as well as the page type - def PageDescription(self): - - def compare(x, y): - (xtype, xval) = x - (ytype, yval) = y - if xval > yval: - return 1 - if xval == yval: - return 0 - return -1 - - result = [] - (pos, pagetype) = self.findinDoc('page.type',0,-1) - - groupList = self.posinDoc('page.group') - groupregionList = self.posinDoc('page.group.region') - pageregionList = self.posinDoc('page.region') - # integrate into one list - for j in groupList: - result.append(('grpbeg',j)) - for j in groupregionList: - result.append(('gregion',j)) - for j in pageregionList: - result.append(('pregion',j)) - result.sort(compare) - - # insert group end and page end indicators - inGroup = False - j = 0 - while True: - if j == len(result): break - rtype = result[j][0] - rval = result[j][1] - if not inGroup and (rtype == 'grpbeg') : - inGroup = True - j = j + 1 - elif inGroup and (rtype in ('grpbeg', 'pregion')): - result.insert(j,('grpend',rval)) - inGroup = False - else: - j = j + 1 - if inGroup: - result.append(('grpend',-1)) - result.append(('pageend', -1)) - return pagetype, result - - - - # build a description of the paragraph - def getParaDescription(self, start, end, regtype): - - result = [] - - # paragraph - (pos, pclass) = self.findinDoc('paragraph.class',start,end) - - pclass = self.getClass(pclass) - - # if paragraph uses extratokens (extra glyphs) then make it fixed - (pos, extraglyphs) = self.findinDoc('paragraph.extratokens',start,end) - - # build up a description of the paragraph in result and return it - # first check for the basic - all words paragraph - (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end) - (pos, slast) = self.findinDoc('paragraph.lastWord',start,end) - if (sfirst != None) and (slast != None) : - first = int(sfirst) - last = int(slast) - - makeImage = (regtype == 'vertical') or (regtype == 'table') - makeImage = makeImage or (extraglyphs != None) - if self.fixedimage: - makeImage = makeImage or (regtype == 'fixed') - - if (pclass != None): - makeImage = makeImage or (pclass.find('.inverted') >= 0) - if self.fixedimage : - makeImage = makeImage or (pclass.find('cl-f-') >= 0) - - # before creating an image make sure glyph info exists - gidList = self.getData('info.glyph.glyphID',0,-1) - - makeImage = makeImage & (len(gidList) > 0) - - if not makeImage : - # standard all word paragraph - for wordnum in xrange(first, last): - result.append(('ocr', wordnum)) - return pclass, result - - # convert paragraph to svg image - # translate first and last word into first and last glyphs - # and generate inline image and include it - glyphList = [] - firstglyphList = self.getData('word.firstGlyph',0,-1) - gidList = self.getData('info.glyph.glyphID',0,-1) - firstGlyph = firstglyphList[first] - if last < len(firstglyphList): - lastGlyph = firstglyphList[last] - else : - lastGlyph = len(gidList) - - # handle case of white sapce paragraphs with no actual glyphs in them - # by reverting to text based paragraph - if firstGlyph >= lastGlyph: - # revert to standard text based paragraph - for wordnum in xrange(first, last): - result.append(('ocr', wordnum)) - return pclass, result - - for glyphnum in xrange(firstGlyph, lastGlyph): - glyphList.append(glyphnum) - # include any extratokens if they exist - (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end) - (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end) - if (sfg != None) and (slg != None): - for glyphnum in xrange(int(sfg), int(slg)): - glyphList.append(glyphnum) - num = self.svgcount - self.glyphs_to_image(glyphList) - self.svgcount += 1 - result.append(('svg', num)) - return pclass, result - - # this type of paragraph may be made up of multiple spans, inline - # word monograms (images), and words with semantic meaning, - # plus glyphs used to form starting letter of first word - - # need to parse this type line by line - line = start + 1 - word_class = '' - - # if end is -1 then we must search to end of document - if end == -1 : - end = self.docSize - - # seems some xml has last* coming before first* so we have to - # handle any order - sp_first = -1 - sp_last = -1 - - gl_first = -1 - gl_last = -1 - - ws_first = -1 - ws_last = -1 - - word_class = '' - - word_semantic_type = '' - - while (line < end) : - - (name, argres) = self.lineinDoc(line) - - if name.endswith('span.firstWord') : - sp_first = int(argres) - - elif name.endswith('span.lastWord') : - sp_last = int(argres) - - elif name.endswith('word.firstGlyph') : - gl_first = int(argres) - - elif name.endswith('word.lastGlyph') : - gl_last = int(argres) - - elif name.endswith('word_semantic.firstWord'): - ws_first = int(argres) - - elif name.endswith('word_semantic.lastWord'): - ws_last = int(argres) - - elif name.endswith('word.class'): - (cname, space) = argres.split('-',1) - if space == '' : space = '0' - if (cname == 'spaceafter') and (int(space) > 0) : - word_class = 'sa' - - elif name.endswith('word.img.src'): - result.append(('img' + word_class, int(argres))) - word_class = '' - - elif name.endswith('region.img.src'): - result.append(('img' + word_class, int(argres))) - - if (sp_first != -1) and (sp_last != -1): - for wordnum in xrange(sp_first, sp_last): - result.append(('ocr', wordnum)) - sp_first = -1 - sp_last = -1 - - if (gl_first != -1) and (gl_last != -1): - glyphList = [] - for glyphnum in xrange(gl_first, gl_last): - glyphList.append(glyphnum) - num = self.svgcount - self.glyphs_to_image(glyphList) - self.svgcount += 1 - result.append(('svg', num)) - gl_first = -1 - gl_last = -1 - - if (ws_first != -1) and (ws_last != -1): - for wordnum in xrange(ws_first, ws_last): - result.append(('ocr', wordnum)) - ws_first = -1 - ws_last = -1 - - line += 1 - - return pclass, result - - - def buildParagraph(self, pclass, pdesc, type, regtype) : - parares = '' - sep ='' - - classres = '' - if pclass : - classres = ' class="' + pclass + '"' - - br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical') - - handle_links = len(self.link_id) > 0 - - if (type == 'full') or (type == 'begin') : - parares += '' - - if (type == 'end'): - parares += ' ' - - lstart = len(parares) - - cnt = len(pdesc) - - for j in xrange( 0, cnt) : - - (wtype, num) = pdesc[j] - - if wtype == 'ocr' : - word = self.ocrtext[num] - sep = ' ' - - if handle_links: - link = self.link_id[num] - if (link > 0): - linktype = self.link_type[link-1] - title = self.link_title[link-1] - if (title == "") or (parares.rfind(title) < 0): - title=parares[lstart:] - if linktype == 'external' : - linkhref = self.link_href[link-1] - linkhtml = '' % linkhref - else : - if len(self.link_page) >= link : - ptarget = self.link_page[link-1] - 1 - linkhtml = '' % ptarget - else : - # just link to the current page - linkhtml = '' - linkhtml += title + '' - pos = parares.rfind(title) - if pos >= 0: - parares = parares[0:pos] + linkhtml + parares[pos+len(title):] - else : - parares += linkhtml - lstart = len(parares) - if word == '_link_' : word = '' - elif (link < 0) : - if word == '_link_' : word = '' - - if word == '_lb_': - if ((num-1) in self.dehyphen_rootid ) or handle_links: - word = '' - sep = '' - elif br_lb : - word = '
\n' - sep = '' - else : - word = '\n' - sep = '' - - if num in self.dehyphen_rootid : - word = word[0:-1] - sep = '' - - parares += word + sep - - elif wtype == 'img' : - sep = '' - parares += '' % num - parares += sep - - elif wtype == 'imgsa' : - sep = ' ' - parares += '' % num - parares += sep - - elif wtype == 'svg' : - sep = '' - parares += '' % num - parares += sep - - if len(sep) > 0 : parares = parares[0:-1] - if (type == 'full') or (type == 'end') : - parares += '

' - return parares - - - def buildTOCEntry(self, pdesc) : - parares = '' - sep ='' - tocentry = '' - handle_links = len(self.link_id) > 0 - - lstart = 0 - - cnt = len(pdesc) - for j in xrange( 0, cnt) : - - (wtype, num) = pdesc[j] - - if wtype == 'ocr' : - word = self.ocrtext[num] - sep = ' ' - - if handle_links: - link = self.link_id[num] - if (link > 0): - linktype = self.link_type[link-1] - title = self.link_title[link-1] - title = title.rstrip('. ') - alt_title = parares[lstart:] - alt_title = alt_title.strip() - # now strip off the actual printed page number - alt_title = alt_title.rstrip('01234567890ivxldIVXLD-.') - alt_title = alt_title.rstrip('. ') - # skip over any external links - can't have them in a books toc - if linktype == 'external' : - title = '' - alt_title = '' - linkpage = '' - else : - if len(self.link_page) >= link : - ptarget = self.link_page[link-1] - 1 - linkpage = '%04d' % ptarget - else : - # just link to the current page - linkpage = self.id[4:] - if len(alt_title) >= len(title): - title = alt_title - if title != '' and linkpage != '': - tocentry += title + '|' + linkpage + '\n' - lstart = len(parares) - if word == '_link_' : word = '' - elif (link < 0) : - if word == '_link_' : word = '' - - if word == '_lb_': - word = '' - sep = '' - - if num in self.dehyphen_rootid : - word = word[0:-1] - sep = '' - - parares += word + sep - - else : - continue - - return tocentry - - - - - # walk the document tree collecting the information needed - # to build an html page using the ocrText - - def process(self): - - tocinfo = '' - hlst = [] - - # get the ocr text - (pos, argres) = self.findinDoc('info.word.ocrText',0,-1) - if argres : self.ocrtext = argres.split('|') - - # get information to dehyphenate the text - self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1) - - # determine if first paragraph is continued from previous page - (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1) - first_para_continued = (self.parastems_stemid != None) - - # determine if last paragraph is continued onto the next page - (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1) - last_para_continued = (self.paracont_stemid != None) - - # collect link ids - self.link_id = self.getData('info.word.link_id',0,-1) - - # collect link destination page numbers - self.link_page = self.getData('info.links.page',0,-1) - - # collect link types (container versus external) - (pos, argres) = self.findinDoc('info.links.type',0,-1) - if argres : self.link_type = argres.split('|') - - # collect link destinations - (pos, argres) = self.findinDoc('info.links.href',0,-1) - if argres : self.link_href = argres.split('|') - - # collect link titles - (pos, argres) = self.findinDoc('info.links.title',0,-1) - if argres : - self.link_title = argres.split('|') - else: - self.link_title.append('') - - # get a descriptions of the starting points of the regions - # and groups on the page - (pagetype, pageDesc) = self.PageDescription() - regcnt = len(pageDesc) - 1 - - anchorSet = False - breakSet = False - inGroup = False - - # process each region on the page and convert what you can to html - - for j in xrange(regcnt): - - (etype, start) = pageDesc[j] - (ntype, end) = pageDesc[j+1] - - - # set anchor for link target on this page - if not anchorSet and not first_para_continued: - hlst.append('\n') - anchorSet = True - - # handle groups of graphics with text captions - if (etype == 'grpbeg'): - (pos, grptype) = self.findinDoc('group.type', start, end) - if grptype != None: - if grptype == 'graphic': - gcstr = ' class="' + grptype + '"' - hlst.append('') - inGroup = True - - elif (etype == 'grpend'): - if inGroup: - hlst.append('\n') - inGroup = False - - else: - (pos, regtype) = self.findinDoc('region.type',start,end) - - if regtype == 'graphic' : - (pos, simgsrc) = self.findinDoc('img.src',start,end) - if simgsrc: - if inGroup: - hlst.append('' % int(simgsrc)) - else: - hlst.append('
' % int(simgsrc)) - - elif regtype == 'chapterheading' : - (pclass, pdesc) = self.getParaDescription(start,end, regtype) - if not breakSet: - hlst.append('
 
\n') - breakSet = True - tag = 'h1' - if pclass and (len(pclass) >= 7): - if pclass[3:7] == 'ch1-' : tag = 'h1' - if pclass[3:7] == 'ch2-' : tag = 'h2' - if pclass[3:7] == 'ch3-' : tag = 'h3' - hlst.append('<' + tag + ' class="' + pclass + '">') - else: - hlst.append('<' + tag + '>') - hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) - hlst.append('') - - elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'): - ptype = 'full' - # check to see if this is a continution from the previous page - if first_para_continued : - ptype = 'end' - first_para_continued = False - (pclass, pdesc) = self.getParaDescription(start,end, regtype) - if pclass and (len(pclass) >= 6) and (ptype == 'full'): - tag = 'p' - if pclass[3:6] == 'h1-' : tag = 'h4' - if pclass[3:6] == 'h2-' : tag = 'h5' - if pclass[3:6] == 'h3-' : tag = 'h6' - hlst.append('<' + tag + ' class="' + pclass + '">') - hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) - hlst.append('') - else : - hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) - - elif (regtype == 'tocentry') : - ptype = 'full' - if first_para_continued : - ptype = 'end' - first_para_continued = False - (pclass, pdesc) = self.getParaDescription(start,end, regtype) - tocinfo += self.buildTOCEntry(pdesc) - hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) - - elif (regtype == 'vertical') or (regtype == 'table') : - ptype = 'full' - if inGroup: - ptype = 'middle' - if first_para_continued : - ptype = 'end' - first_para_continued = False - (pclass, pdesc) = self.getParaDescription(start, end, regtype) - hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) - - - elif (regtype == 'synth_fcvr.center'): - (pos, simgsrc) = self.findinDoc('img.src',start,end) - if simgsrc: - hlst.append('
' % int(simgsrc)) - - else : - print ' Making region type', regtype, - (pos, temp) = self.findinDoc('paragraph',start,end) - (pos2, temp) = self.findinDoc('span',start,end) - if pos != -1 or pos2 != -1: - print ' a "text" region' - orig_regtype = regtype - regtype = 'fixed' - ptype = 'full' - # check to see if this is a continution from the previous page - if first_para_continued : - ptype = 'end' - first_para_continued = False - (pclass, pdesc) = self.getParaDescription(start,end, regtype) - if not pclass: - if orig_regtype.endswith('.right') : pclass = 'cl-right' - elif orig_regtype.endswith('.center') : pclass = 'cl-center' - elif orig_regtype.endswith('.left') : pclass = 'cl-left' - elif orig_regtype.endswith('.justify') : pclass = 'cl-justify' - if pclass and (ptype == 'full') and (len(pclass) >= 6): - tag = 'p' - if pclass[3:6] == 'h1-' : tag = 'h4' - if pclass[3:6] == 'h2-' : tag = 'h5' - if pclass[3:6] == 'h3-' : tag = 'h6' - hlst.append('<' + tag + ' class="' + pclass + '">') - hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) - hlst.append('') - else : - hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) - else : - print ' a "graphic" region' - (pos, simgsrc) = self.findinDoc('img.src',start,end) - if simgsrc: - hlst.append('
' % int(simgsrc)) - - - htmlpage = "".join(hlst) - if last_para_continued : - if htmlpage[-4:] == '

': - htmlpage = htmlpage[0:-4] - last_para_continued = False - - return htmlpage, tocinfo - - -def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage): - # create a document parser - dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage) - htmlpage, tocinfo = dp.process() - return htmlpage, tocinfo + value = value >> 1 + value = value ^ 0xEDB88320 + table.append(value) + return table + +# Seed value used to generate the device PID +def generatePidSeed(table,dsn) : + value = 0 + for counter in range (0,4) : + index = (ord(dsn[counter]) ^ value) &0xFF + value = (value >> 8) ^ table[index] + return value + +# Generate the device PID +def generateDevicePID(table,dsn,nbRoll): + global charMap4 + seed = generatePidSeed(table,dsn) + pidAscii = "" + pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF] + index = 0 + for counter in range (0,nbRoll): + pid[index] = pid[index] ^ ord(dsn[counter]) + index = (index+1) %8 + for counter in range (0,8): + index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7) + pidAscii += charMap4[index] + return pidAscii + +def crc32(s): + return (~binascii.crc32(s,-1))&0xFFFFFFFF + +# convert from 8 digit PID to 10 digit PID with checksum +def checksumPid(s): + global charMap4 + crc = crc32(s) + crc = crc ^ (crc >> 16) + res = s + l = len(charMap4) + for i in (0,1): + b = crc & 0xff + pos = (b // l) ^ (b % l) + res += charMap4[pos%l] + crc >>= 8 + return res + + +# old kindle serial number to fixed pid +def pidFromSerial(s, l): + global charMap4 + crc = crc32(s) + arr1 = [0]*l + for i in xrange(len(s)): + arr1[i%l] ^= ord(s[i]) + crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff] + for i in xrange(l): + arr1[i] ^= crc_bytes[i&3] + pid = "" + for i in xrange(l): + b = arr1[i] & 0xff + pid+=charMap4[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))] + return pid + + +# Parse the EXTH header records and use the Kindle serial number to calculate the book pid. +def getKindlePid(pidlst, rec209, token, serialnum): + # Compute book PID + pidHash = SHA1(serialnum+rec209+token) + bookPID = encodePID(pidHash) + bookPID = checksumPid(bookPID) + pidlst.append(bookPID) + + # compute fixed pid for old pre 2.5 firmware update pid as well + bookPID = pidFromSerial(serialnum, 7) + "*" + bookPID = checksumPid(bookPID) + pidlst.append(bookPID) + + return pidlst + + +# parse the Kindleinfo file to calculate the book pid. + +keynames = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"] + +def getK4Pids(pidlst, rec209, token, kInfoFile): + global charMap1 + kindleDatabase = None + try: + kindleDatabase = getDBfromFile(kInfoFile) + except Exception, message: + print(message) + kindleDatabase = None + pass + + if kindleDatabase == None : + return pidlst + + try: + # Get the Mazama Random number + MazamaRandomNumber = kindleDatabase["MazamaRandomNumber"] + + # Get the kindle account token + kindleAccountToken = kindleDatabase["kindle.account.tokens"] + except KeyError: + print "Keys not found in " + kInfoFile + return pidlst + + # Get the ID string used + encodedIDString = encodeHash(GetIDString(),charMap1) + + # Get the current user name + encodedUsername = encodeHash(GetUserName(),charMap1) + + # concat, hash and encode to calculate the DSN + DSN = encode(SHA1(MazamaRandomNumber+encodedIDString+encodedUsername),charMap1) + + # Compute the device PID (for which I can tell, is used for nothing). + table = generatePidEncryptionTable() + devicePID = generateDevicePID(table,DSN,4) + devicePID = checksumPid(devicePID) + pidlst.append(devicePID) + + # Compute book PIDs + + # book pid + pidHash = SHA1(DSN+kindleAccountToken+rec209+token) + bookPID = encodePID(pidHash) + bookPID = checksumPid(bookPID) + pidlst.append(bookPID) + + # variant 1 + pidHash = SHA1(kindleAccountToken+rec209+token) + bookPID = encodePID(pidHash) + bookPID = checksumPid(bookPID) + pidlst.append(bookPID) + + # variant 2 + pidHash = SHA1(DSN+rec209+token) + bookPID = encodePID(pidHash) + bookPID = checksumPid(bookPID) + pidlst.append(bookPID) + + return pidlst + +def getPidList(md1, md2, k4, pids, serials, kInfoFiles): + pidlst = [] + if kInfoFiles is None: + kInfoFiles = [] + if k4: + kInfoFiles = getKindleInfoFiles(kInfoFiles) + for infoFile in kInfoFiles: + pidlst = getK4Pids(pidlst, md1, md2, infoFile) + for serialnum in serials: + pidlst = getKindlePid(pidlst, md1, md2, serialnum) + for pid in pids: + pidlst.append(pid) + return pidlst diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto.dylib b/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto.dylib new file mode 100644 index 0000000..01c348c Binary files /dev/null and b/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto.dylib differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto32.so b/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto32.so new file mode 100644 index 0000000..9a5a442 Binary files /dev/null and b/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto32.so differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto64.so b/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto64.so new file mode 100644 index 0000000..a08ac28 Binary files /dev/null and b/Calibre_Plugins/K4MobiDeDRM_plugin/libalfcrypto64.so differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/pbkdf2.py b/Calibre_Plugins/K4MobiDeDRM_plugin/pbkdf2.py new file mode 100644 index 0000000..65220a9 --- /dev/null +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/pbkdf2.py @@ -0,0 +1,68 @@ +# A simple implementation of pbkdf2 using stock python modules. See RFC2898 +# for details. Basically, it derives a key from a password and salt. + +# Copyright 2004 Matt Johnston +# Copyright 2009 Daniel Holth +# This code may be freely used and modified for any purpose. + +# Revision history +# v0.1 October 2004 - Initial release +# v0.2 8 March 2007 - Make usable with hashlib in Python 2.5 and use +# v0.3 "" the correct digest_size rather than always 20 +# v0.4 Oct 2009 - Rescue from chandler svn, test and optimize. + +import sys +import hmac +from struct import pack +try: + # only in python 2.5 + import hashlib + sha = hashlib.sha1 + md5 = hashlib.md5 + sha256 = hashlib.sha256 +except ImportError: # pragma: NO COVERAGE + # fallback + import sha + import md5 + +# this is what you want to call. +def pbkdf2( password, salt, itercount, keylen, hashfn = sha ): + try: + # depending whether the hashfn is from hashlib or sha/md5 + digest_size = hashfn().digest_size + except TypeError: # pragma: NO COVERAGE + digest_size = hashfn.digest_size + # l - number of output blocks to produce + l = keylen / digest_size + if keylen % digest_size != 0: + l += 1 + + h = hmac.new( password, None, hashfn ) + + T = "" + for i in range(1, l+1): + T += pbkdf2_F( h, salt, itercount, i ) + + return T[0: keylen] + +def xorstr( a, b ): + if len(a) != len(b): + raise ValueError("xorstr(): lengths differ") + return ''.join((chr(ord(x)^ord(y)) for x, y in zip(a, b))) + +def prf( h, data ): + hm = h.copy() + hm.update( data ) + return hm.digest() + +# Helper as per the spec. h is a hmac which has been created seeded with the +# password, it will be copy()ed and not modified. +def pbkdf2_F( h, salt, itercount, blocknum ): + U = prf( h, salt + pack('>i',blocknum ) ) + T = U + + for i in range(2, itercount+1): + U = prf( h, U ) + T = xorstr( T, U ) + + return T diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/plugin-import-name-k4mobidedrm.txt b/Calibre_Plugins/K4MobiDeDRM_plugin/plugin-import-name-k4mobidedrm.txt index e66e9f3..e69de29 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/plugin-import-name-k4mobidedrm.txt +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/plugin-import-name-k4mobidedrm.txt @@ -1,726 +0,0 @@ -# standlone set of Mac OSX specific routines needed for KindleBooks - -from __future__ import with_statement - -import sys -import os -import os.path -import re -import copy -import subprocess -from struct import pack, unpack, unpack_from - -class DrmException(Exception): - pass - - -# interface to needed routines in openssl's libcrypto -def _load_crypto_libcrypto(): - from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \ - Structure, c_ulong, create_string_buffer, addressof, string_at, cast - from ctypes.util import find_library - - libcrypto = find_library('crypto') - if libcrypto is None: - raise DrmException('libcrypto not found') - libcrypto = CDLL(libcrypto) - - # From OpenSSL's crypto aes header - # - # AES_ENCRYPT 1 - # AES_DECRYPT 0 - # AES_MAXNR 14 (in bytes) - # AES_BLOCK_SIZE 16 (in bytes) - # - # struct aes_key_st { - # unsigned long rd_key[4 *(AES_MAXNR + 1)]; - # int rounds; - # }; - # typedef struct aes_key_st AES_KEY; - # - # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); - # - # note: the ivec string, and output buffer are both mutable - # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, - # const unsigned long length, const AES_KEY *key, unsigned char *ivec, const int enc); - - AES_MAXNR = 14 - c_char_pp = POINTER(c_char_p) - c_int_p = POINTER(c_int) - - class AES_KEY(Structure): - _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)] - AES_KEY_p = POINTER(AES_KEY) - - def F(restype, name, argtypes): - func = getattr(libcrypto, name) - func.restype = restype - func.argtypes = argtypes - return func - - AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int]) - - AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p]) - - # From OpenSSL's Crypto evp/p5_crpt2.c - # - # int PKCS5_PBKDF2_HMAC_SHA1(const char *pass, int passlen, - # const unsigned char *salt, int saltlen, int iter, - # int keylen, unsigned char *out); - - PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1', - [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p]) - - class LibCrypto(object): - def __init__(self): - self._blocksize = 0 - self._keyctx = None - self._iv = 0 - - def set_decrypt_key(self, userkey, iv): - self._blocksize = len(userkey) - if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) : - raise DrmException('AES improper key used') - return - keyctx = self._keyctx = AES_KEY() - self._iv = iv - self._userkey = userkey - rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx) - if rv < 0: - raise DrmException('Failed to initialize AES key') - - def decrypt(self, data): - out = create_string_buffer(len(data)) - mutable_iv = create_string_buffer(self._iv, len(self._iv)) - keyctx = self._keyctx - rv = AES_cbc_encrypt(data, out, len(data), keyctx, mutable_iv, 0) - if rv == 0: - raise DrmException('AES decryption failed') - return out.raw - - def keyivgen(self, passwd, salt, iter, keylen): - saltlen = len(salt) - passlen = len(passwd) - out = create_string_buffer(keylen) - rv = PKCS5_PBKDF2_HMAC_SHA1(passwd, passlen, salt, saltlen, iter, keylen, out) - return out.raw - return LibCrypto - -def _load_crypto(): - LibCrypto = None - try: - LibCrypto = _load_crypto_libcrypto() - except (ImportError, DrmException): - pass - return LibCrypto - -LibCrypto = _load_crypto() - -# -# Utility Routines -# - -# crypto digestroutines -import hashlib - -def MD5(message): - ctx = hashlib.md5() - ctx.update(message) - return ctx.digest() - -def SHA1(message): - ctx = hashlib.sha1() - ctx.update(message) - return ctx.digest() - -def SHA256(message): - ctx = hashlib.sha256() - ctx.update(message) - return ctx.digest() - -# Various character maps used to decrypt books. Probably supposed to act as obfuscation -charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M" -charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM" - -# For kinf approach of K4Mac 1.6.X or later -# On K4PC charMap5 = "AzB0bYyCeVvaZ3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_c1XxDdW2wE" -# For Mac they seem to re-use charMap2 here -charMap5 = charMap2 - -# new in K4M 1.9.X -testMap8 = "YvaZ3FfUm9Nn_c1XuG4yCAzB0beVg-TtHh5SsIiR6rJjQdW2wEq7KkPpL8lOoMxD" - - -def encode(data, map): - result = "" - for char in data: - value = ord(char) - Q = (value ^ 0x80) // len(map) - R = value % len(map) - result += map[Q] - result += map[R] - return result - -# Hash the bytes in data and then encode the digest with the characters in map -def encodeHash(data,map): - return encode(MD5(data),map) - -# Decode the string in data with the characters in map. Returns the decoded bytes -def decode(data,map): - result = "" - for i in range (0,len(data)-1,2): - high = map.find(data[i]) - low = map.find(data[i+1]) - if (high == -1) or (low == -1) : - break - value = (((high * len(map)) ^ 0x80) & 0xFF) + low - result += pack("B",value) - return result - -# For K4M 1.6.X and later -# generate table of prime number less than or equal to int n -def primes(n): - if n==2: return [2] - elif n<2: return [] - s=range(3,n+1,2) - mroot = n ** 0.5 - half=(n+1)/2-1 - i=0 - m=3 - while m <= mroot: - if s[i]: - j=(m*m-3)/2 - s[j]=0 - while j 7: - return mungedmac - sernum = GetVolumeSerialNumber() - if len(sernum) > 7: - return sernum - diskpart = GetUserHomeAppSupKindleDirParitionName() - uuidnum = GetDiskPartitionUUID(diskpart) - if len(uuidnum) > 7: - return uuidnum - mungedmac = GetMACAddressMunged() - if len(mungedmac) > 7: - return mungedmac - return '9999999999' - - -# implements an Pseudo Mac Version of Windows built-in Crypto routine -# used by Kindle for Mac versions < 1.6.0 -class CryptUnprotectData(object): - def __init__(self): - sernum = GetVolumeSerialNumber() - if sernum == '': - sernum = '9999999999' - sp = sernum + '!@#' + GetUserName() - passwdData = encode(SHA256(sp),charMap1) - salt = '16743' - self.crp = LibCrypto() - iter = 0x3e8 - keylen = 0x80 - key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen) - self.key = key_iv[0:32] - self.iv = key_iv[32:48] - self.crp.set_decrypt_key(self.key, self.iv) - - def decrypt(self, encryptedData): - cleartext = self.crp.decrypt(encryptedData) - cleartext = decode(cleartext,charMap1) - return cleartext - - -# implements an Pseudo Mac Version of Windows built-in Crypto routine -# used for Kindle for Mac Versions >= 1.6.0 -class CryptUnprotectDataV2(object): - def __init__(self): - sp = GetUserName() + ':&%:' + GetIDString() - passwdData = encode(SHA256(sp),charMap5) - # salt generation as per the code - salt = 0x0512981d * 2 * 1 * 1 - salt = str(salt) + GetUserName() - salt = encode(salt,charMap5) - self.crp = LibCrypto() - iter = 0x800 - keylen = 0x400 - key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen) - self.key = key_iv[0:32] - self.iv = key_iv[32:48] - self.crp.set_decrypt_key(self.key, self.iv) - - def decrypt(self, encryptedData): - cleartext = self.crp.decrypt(encryptedData) - cleartext = decode(cleartext, charMap5) - return cleartext - - -# unprotect the new header blob in .kinf2011 -# used in Kindle for Mac Version >= 1.9.0 -def UnprotectHeaderData(encryptedData): - passwdData = 'header_key_data' - salt = 'HEADER.2011' - iter = 0x80 - keylen = 0x100 - crp = LibCrypto() - key_iv = crp.keyivgen(passwdData, salt, iter, keylen) - key = key_iv[0:32] - iv = key_iv[32:48] - crp.set_decrypt_key(key,iv) - cleartext = crp.decrypt(encryptedData) - return cleartext - - -# implements an Pseudo Mac Version of Windows built-in Crypto routine -# used for Kindle for Mac Versions >= 1.9.0 -class CryptUnprotectDataV3(object): - def __init__(self, entropy): - sp = GetUserName() + '+@#$%+' + GetIDString() - passwdData = encode(SHA256(sp),charMap2) - salt = entropy - self.crp = LibCrypto() - iter = 0x800 - keylen = 0x400 - key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen) - self.key = key_iv[0:32] - self.iv = key_iv[32:48] - self.crp.set_decrypt_key(self.key, self.iv) - - def decrypt(self, encryptedData): - cleartext = self.crp.decrypt(encryptedData) - cleartext = decode(cleartext, charMap2) - return cleartext - - -# Locate the .kindle-info files -def getKindleInfoFiles(kInfoFiles): - # first search for current .kindle-info files - home = os.getenv('HOME') - cmdline = 'find "' + home + '/Library/Application Support" -name ".kindle-info"' - cmdline = cmdline.encode(sys.getfilesystemencoding()) - p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) - out1, out2 = p1.communicate() - reslst = out1.split('\n') - kinfopath = 'NONE' - found = False - for resline in reslst: - if os.path.isfile(resline): - kInfoFiles.append(resline) - found = True - # add any .rainier*-kinf files - cmdline = 'find "' + home + '/Library/Application Support" -name ".rainier*-kinf"' - cmdline = cmdline.encode(sys.getfilesystemencoding()) - p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) - out1, out2 = p1.communicate() - reslst = out1.split('\n') - for resline in reslst: - if os.path.isfile(resline): - kInfoFiles.append(resline) - found = True - # add any .kinf2011 files - cmdline = 'find "' + home + '/Library/Application Support" -name ".kinf2011"' - cmdline = cmdline.encode(sys.getfilesystemencoding()) - p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) - out1, out2 = p1.communicate() - reslst = out1.split('\n') - for resline in reslst: - if os.path.isfile(resline): - kInfoFiles.append(resline) - found = True - if not found: - print('No kindle-info files have been found.') - return kInfoFiles - -# determine type of kindle info provided and return a -# database of keynames and values -def getDBfromFile(kInfoFile): - names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber", "max_date", "SIGVERIF"] - DB = {} - cnt = 0 - infoReader = open(kInfoFile, 'r') - hdr = infoReader.read(1) - data = infoReader.read() - - if data.find('[') != -1 : - - # older style kindle-info file - cud = CryptUnprotectData() - items = data.split('[') - for item in items: - if item != '': - keyhash, rawdata = item.split(':') - keyname = "unknown" - for name in names: - if encodeHash(name,charMap2) == keyhash: - keyname = name - break - if keyname == "unknown": - keyname = keyhash - encryptedValue = decode(rawdata,charMap2) - cleartext = cud.decrypt(encryptedValue) - DB[keyname] = cleartext - cnt = cnt + 1 - if cnt == 0: - DB = None - return DB - - if hdr == '/': - - # else newer style .kinf file used by K4Mac >= 1.6.0 - # the .kinf file uses "/" to separate it into records - # so remove the trailing "/" to make it easy to use split - data = data[:-1] - items = data.split('/') - cud = CryptUnprotectDataV2() - - # loop through the item records until all are processed - while len(items) > 0: - - # get the first item record - item = items.pop(0) - - # the first 32 chars of the first record of a group - # is the MD5 hash of the key name encoded by charMap5 - keyhash = item[0:32] - keyname = "unknown" - - # the raw keyhash string is also used to create entropy for the actual - # CryptProtectData Blob that represents that keys contents - # "entropy" not used for K4Mac only K4PC - # entropy = SHA1(keyhash) - - # the remainder of the first record when decoded with charMap5 - # has the ':' split char followed by the string representation - # of the number of records that follow - # and make up the contents - srcnt = decode(item[34:],charMap5) - rcnt = int(srcnt) - - # read and store in rcnt records of data - # that make up the contents value - edlst = [] - for i in xrange(rcnt): - item = items.pop(0) - edlst.append(item) - - keyname = "unknown" - for name in names: - if encodeHash(name,charMap5) == keyhash: - keyname = name - break - if keyname == "unknown": - keyname = keyhash - - # the charMap5 encoded contents data has had a length - # of chars (always odd) cut off of the front and moved - # to the end to prevent decoding using charMap5 from - # working properly, and thereby preventing the ensuing - # CryptUnprotectData call from succeeding. - - # The offset into the charMap5 encoded contents seems to be: - # len(contents) - largest prime number less than or equal to int(len(content)/3) - # (in other words split "about" 2/3rds of the way through) - - # move first offsets chars to end to align for decode by charMap5 - encdata = "".join(edlst) - contlen = len(encdata) - - # now properly split and recombine - # by moving noffset chars from the start of the - # string to the end of the string - noffset = contlen - primes(int(contlen/3))[-1] - pfx = encdata[0:noffset] - encdata = encdata[noffset:] - encdata = encdata + pfx - - # decode using charMap5 to get the CryptProtect Data - encryptedValue = decode(encdata,charMap5) - cleartext = cud.decrypt(encryptedValue) - DB[keyname] = cleartext - cnt = cnt + 1 - - if cnt == 0: - DB = None - return DB - - # the latest .kinf2011 version for K4M 1.9.1 - # put back the hdr char, it is needed - data = hdr + data - data = data[:-1] - items = data.split('/') - - # the headerblob is the encrypted information needed to build the entropy string - headerblob = items.pop(0) - encryptedValue = decode(headerblob, charMap1) - cleartext = UnprotectHeaderData(encryptedValue) - - # now extract the pieces in the same way - # this version is different from K4PC it scales the build number by multipying by 735 - pattern = re.compile(r'''\[Version:(\d+)\]\[Build:(\d+)\]\[Cksum:([^\]]+)\]\[Guid:([\{\}a-z0-9\-]+)\]''', re.IGNORECASE) - for m in re.finditer(pattern, cleartext): - entropy = str(int(m.group(2)) * 0x2df) + m.group(4) - - cud = CryptUnprotectDataV3(entropy) - - # loop through the item records until all are processed - while len(items) > 0: - - # get the first item record - item = items.pop(0) - - # the first 32 chars of the first record of a group - # is the MD5 hash of the key name encoded by charMap5 - keyhash = item[0:32] - keyname = "unknown" - - # unlike K4PC the keyhash is not used in generating entropy - # entropy = SHA1(keyhash) + added_entropy - # entropy = added_entropy - - # the remainder of the first record when decoded with charMap5 - # has the ':' split char followed by the string representation - # of the number of records that follow - # and make up the contents - srcnt = decode(item[34:],charMap5) - rcnt = int(srcnt) - - # read and store in rcnt records of data - # that make up the contents value - edlst = [] - for i in xrange(rcnt): - item = items.pop(0) - edlst.append(item) - - keyname = "unknown" - for name in names: - if encodeHash(name,testMap8) == keyhash: - keyname = name - break - if keyname == "unknown": - keyname = keyhash - - # the testMap8 encoded contents data has had a length - # of chars (always odd) cut off of the front and moved - # to the end to prevent decoding using testMap8 from - # working properly, and thereby preventing the ensuing - # CryptUnprotectData call from succeeding. - - # The offset into the testMap8 encoded contents seems to be: - # len(contents) - largest prime number less than or equal to int(len(content)/3) - # (in other words split "about" 2/3rds of the way through) - - # move first offsets chars to end to align for decode by testMap8 - encdata = "".join(edlst) - contlen = len(encdata) - - # now properly split and recombine - # by moving noffset chars from the start of the - # string to the end of the string - noffset = contlen - primes(int(contlen/3))[-1] - pfx = encdata[0:noffset] - encdata = encdata[noffset:] - encdata = encdata + pfx - - # decode using testMap8 to get the CryptProtect Data - encryptedValue = decode(encdata,testMap8) - cleartext = cud.decrypt(encryptedValue) - # print keyname - # print cleartext - DB[keyname] = cleartext - cnt = cnt + 1 - - if cnt == 0: - DB = None - return DB diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/scrolltextwidget.py b/Calibre_Plugins/K4MobiDeDRM_plugin/scrolltextwidget.py new file mode 100644 index 0000000..98b4147 --- /dev/null +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/scrolltextwidget.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab + +import Tkinter +import Tkconstants + +# basic scrolled text widget +class ScrolledText(Tkinter.Text): + def __init__(self, master=None, **kw): + self.frame = Tkinter.Frame(master) + self.vbar = Tkinter.Scrollbar(self.frame) + self.vbar.pack(side=Tkconstants.RIGHT, fill=Tkconstants.Y) + kw.update({'yscrollcommand': self.vbar.set}) + Tkinter.Text.__init__(self, self.frame, **kw) + self.pack(side=Tkconstants.LEFT, fill=Tkconstants.BOTH, expand=True) + self.vbar['command'] = self.yview + # Copy geometry methods of self.frame without overriding Text + # methods = hack! + text_meths = vars(Tkinter.Text).keys() + methods = vars(Tkinter.Pack).keys() + vars(Tkinter.Grid).keys() + vars(Tkinter.Place).keys() + methods = set(methods).difference(text_meths) + for m in methods: + if m[0] != '_' and m != 'config' and m != 'configure': + setattr(self, m, getattr(self.frame, m)) + + def __str__(self): + return str(self.frame) diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py b/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py index a412a7b..adbac49 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py @@ -1,134 +1,51 @@ #! /usr/bin/python # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab - -class Unbuffered: - def __init__(self, stream): - self.stream = stream - def write(self, data): - self.stream.write(data) - self.stream.flush() - def __getattr__(self, attr): - return getattr(self.stream, attr) - -import sys -sys.stdout=Unbuffered(sys.stdout) +# For use with Topaz Scripts Version 2.6 import csv +import sys import os import getopt +import re from struct import pack from struct import unpack -class TpzDRMError(Exception): - pass -# local support routines -if 'calibre' in sys.modules: - inCalibre = True -else: - inCalibre = False - -if inCalibre : - from calibre_plugins.k4mobidedrm import convert2xml - from calibre_plugins.k4mobidedrm import flatxml2html - from calibre_plugins.k4mobidedrm import flatxml2svg - from calibre_plugins.k4mobidedrm import stylexml2css -else : - import convert2xml - import flatxml2html - import flatxml2svg - import stylexml2css - -# global switch -buildXML = False - -# Get a 7 bit encoded number from a file -def readEncodedNumber(file): - flag = False - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) - if data == 0xFF: - flag = True - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) - if data >= 0x80: - datax = (data & 0x7F) - while data >= 0x80 : - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) - datax = (datax <<7) + (data & 0x7F) - data = datax - if flag: - data = -data - return data - -# Get a length prefixed string from the file -def lengthPrefixString(data): - return encodeNumber(len(data))+data - -def readString(file): - stringLength = readEncodedNumber(file) - if (stringLength == None): - return None - sv = file.read(stringLength) - if (len(sv) != stringLength): - return "" - return unpack(str(stringLength)+"s",sv)[0] - -def getMetaArray(metaFile): - # parse the meta file - result = {} - fo = file(metaFile,'rb') - size = readEncodedNumber(fo) - for i in xrange(size): - tag = readString(fo) - value = readString(fo) - result[tag] = value - # print tag, value - fo.close() - return result - - -# dictionary of all text strings by index value -class Dictionary(object): - def __init__(self, dictFile): - self.filename = dictFile - self.size = 0 - self.fo = file(dictFile,'rb') - self.stable = [] - self.size = readEncodedNumber(self.fo) - for i in xrange(self.size): - self.stable.append(self.escapestr(readString(self.fo))) - self.pos = 0 - def escapestr(self, str): - str = str.replace('&','&') - str = str.replace('<','<') - str = str.replace('>','>') - str = str.replace('=','=') - return str - def lookup(self,val): - if ((val >= 0) and (val < self.size)) : - self.pos = val - return self.stable[self.pos] - else: - print "Error - %d outside of string table limits" % val - raise TpzDRMError('outside or string table limits') - # sys.exit(-1) - def getSize(self): - return self.size - def getPos(self): - return self.pos - - -class PageDimParser(object): - def __init__(self, flatxml): +class DocParser(object): + def __init__(self, flatxml, fontsize, ph, pw): self.flatdoc = flatxml.split('\n') + self.fontsize = int(fontsize) + self.ph = int(ph) * 1.0 + self.pw = int(pw) * 1.0 + + stags = { + 'paragraph' : 'p', + 'graphic' : '.graphic' + } + + attr_val_map = { + 'hang' : 'text-indent: ', + 'indent' : 'text-indent: ', + 'line-space' : 'line-height: ', + 'margin-bottom' : 'margin-bottom: ', + 'margin-left' : 'margin-left: ', + 'margin-right' : 'margin-right: ', + 'margin-top' : 'margin-top: ', + 'space-after' : 'padding-bottom: ', + } + + attr_str_map = { + 'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;', + 'align-left' : 'text-align: left;', + 'align-right' : 'text-align: right;', + 'align-justify' : 'text-align: justify;', + 'display-inline' : 'display: inline;', + 'pos-left' : 'text-align: left;', + 'pos-right' : 'text-align: right;', + 'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;', + } + + # find tag if within pos to end inclusive def findinDoc(self, tagpath, pos, end) : result = None @@ -142,7 +59,7 @@ class PageDimParser(object): for j in xrange(pos, end): item = docList[j] if item.find('=') >= 0: - (name, argres) = item.split('=') + (name, argres) = item.split('=',1) else : name = item argres = '' @@ -151,559 +68,196 @@ class PageDimParser(object): foundat = j break return foundat, result + + + # return list of start positions for the tagpath + def posinDoc(self, tagpath): + startpos = [] + pos = 0 + res = "" + while res != None : + (foundpos, res) = self.findinDoc(tagpath, pos, -1) + if res != None : + startpos.append(foundpos) + pos = foundpos + 1 + return startpos + + # returns a vector of integers for the tagpath + def getData(self, tagpath, pos, end, clean=False): + if clean: + digits_only = re.compile(r'''([0-9]+)''') + argres=[] + (foundat, argt) = self.findinDoc(tagpath, pos, end) + if (argt != None) and (len(argt) > 0) : + argList = argt.split('|') + for strval in argList: + if clean: + m = re.search(digits_only, strval) + if m != None: + strval = m.group() + argres.append(int(strval)) + return argres + def process(self): - (pos, sph) = self.findinDoc('page.h',0,-1) - (pos, spw) = self.findinDoc('page.w',0,-1) - if (sph == None): sph = '-1' - if (spw == None): spw = '-1' - return sph, spw -def getPageDim(flatxml): + classlst = '' + csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n' + csspage += '.cl-right { text-align: right; }\n' + csspage += '.cl-left { text-align: left; }\n' + csspage += '.cl-justify { text-align: justify; }\n' + + # generate a list of each