From 90910ab106d9a06659240c2dbbb6aeacf65dcb0d Mon Sep 17 00:00:00 2001 From: NoDRM Date: Tue, 16 Nov 2021 11:09:03 +0100 Subject: [PATCH] Add back Python2 support (ADEPT) --- DeDRM_plugin/__init__.py | 17 +- DeDRM_plugin/adobekey.py | 16 +- DeDRM_plugin/androidkindlekey.py | 16 +- DeDRM_plugin/argv_utils.py | 2 +- DeDRM_plugin/convert2xml.py | 14 +- DeDRM_plugin/epubtest.py | 16 +- DeDRM_plugin/erdr2pml.py | 15 +- DeDRM_plugin/genbook.py | 14 +- DeDRM_plugin/ignobleepub.py | 15 +- DeDRM_plugin/ignoblekey.py | 16 +- DeDRM_plugin/ignoblekeyfetch.py | 16 +- DeDRM_plugin/ignoblekeygen.py | 16 +- DeDRM_plugin/ignoblepdf.py | 424 +++++++++++++++++-------------- DeDRM_plugin/ineptepub.py | 16 +- DeDRM_plugin/ineptpdf.py | 94 ++++--- DeDRM_plugin/k4mobidedrm.py | 16 +- DeDRM_plugin/kindlekey.py | 16 +- DeDRM_plugin/kindlepid.py | 16 +- DeDRM_plugin/mobidedrm.py | 16 +- DeDRM_plugin/topazextract.py | 16 +- DeDRM_plugin/zipfilerugged.py | 4 +- 21 files changed, 481 insertions(+), 310 deletions(-) diff --git a/DeDRM_plugin/__init__.py b/DeDRM_plugin/__init__.py index 8bdf1d2..9ee3a39 100644 --- a/DeDRM_plugin/__init__.py +++ b/DeDRM_plugin/__init__.py @@ -115,14 +115,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data,str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") try: - self.stream.buffer.write(data) - self.stream.buffer.flush() + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() except: # We can do nothing if a write fails - pass + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -132,7 +135,8 @@ class DeDRM(FileTypePlugin): supported_platforms = ['linux', 'osx', 'windows'] author = "Apprentice Alf, Apprentice Harper, NoDRM, The Dark Reverser and i♥cabbages" version = PLUGIN_VERSION_TUPLE - minimum_calibre_version = (5, 0, 0) # Python 3. + #minimum_calibre_version = (5, 0, 0) # Python 3. + minimum_calibre_version = (2, 0, 0) # Needs Calibre 1.0 minimum. 1.X untested. file_types = set(['epub','pdf','pdb','prc','mobi','pobi','azw','azw1','azw3','azw4','azw8','tpz','kfx','kfx-zip']) on_import = True on_preprocess = True @@ -151,6 +155,7 @@ class DeDRM(FileTypePlugin): The extraction only happens once per version of the plugin Also perform upgrade of preferences once per version """ + try: self.pluginsdir = os.path.join(config_dir,"plugins") if not os.path.exists(self.pluginsdir): @@ -237,7 +242,7 @@ class DeDRM(FileTypePlugin): fr.fix() except Exception as e: print("{0} v{1}: Error \'{2}\' when checking zip archive".format(PLUGIN_NAME, PLUGIN_VERSION, e.args[0])) - raise Exception(e) + raise # import the decryption keys import calibre_plugins.dedrm.prefs as prefs diff --git a/DeDRM_plugin/adobekey.py b/DeDRM_plugin/adobekey.py index 5412b86..fea2533 100644 --- a/DeDRM_plugin/adobekey.py +++ b/DeDRM_plugin/adobekey.py @@ -51,11 +51,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -99,7 +105,7 @@ def unicode_argv(): return ["adobekey.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] class ADEPTError(Exception): pass diff --git a/DeDRM_plugin/androidkindlekey.py b/DeDRM_plugin/androidkindlekey.py index e4b6cbe..da34c1d 100755 --- a/DeDRM_plugin/androidkindlekey.py +++ b/DeDRM_plugin/androidkindlekey.py @@ -45,11 +45,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data,str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -93,7 +99,7 @@ def unicode_argv(): return ["kindlekey.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] class DrmException(Exception): pass diff --git a/DeDRM_plugin/argv_utils.py b/DeDRM_plugin/argv_utils.py index fd4e03b..37bfb89 100644 --- a/DeDRM_plugin/argv_utils.py +++ b/DeDRM_plugin/argv_utils.py @@ -41,7 +41,7 @@ def unicode_argv(): return ["DeDRM.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] def add_cp65001_codec(): diff --git a/DeDRM_plugin/convert2xml.py b/DeDRM_plugin/convert2xml.py index abdaeb3..fe33eca 100644 --- a/DeDRM_plugin/convert2xml.py +++ b/DeDRM_plugin/convert2xml.py @@ -15,11 +15,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) diff --git a/DeDRM_plugin/epubtest.py b/DeDRM_plugin/epubtest.py index ebae4fb..561b053 100644 --- a/DeDRM_plugin/epubtest.py +++ b/DeDRM_plugin/epubtest.py @@ -66,11 +66,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -114,7 +120,7 @@ def unicode_argv(): return ["epubtest.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] _FILENAME_LEN_OFFSET = 26 _EXTRA_LEN_OFFSET = 28 diff --git a/DeDRM_plugin/erdr2pml.py b/DeDRM_plugin/erdr2pml.py index a7ad95e..c32431a 100755 --- a/DeDRM_plugin/erdr2pml.py +++ b/DeDRM_plugin/erdr2pml.py @@ -85,10 +85,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data,str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -129,7 +136,7 @@ def unicode_argv(): return ["mobidedrm.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] Des = None if iswindows: diff --git a/DeDRM_plugin/genbook.py b/DeDRM_plugin/genbook.py index 915bd30..6f3f57d 100644 --- a/DeDRM_plugin/genbook.py +++ b/DeDRM_plugin/genbook.py @@ -14,11 +14,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) diff --git a/DeDRM_plugin/ignobleepub.py b/DeDRM_plugin/ignobleepub.py index 72e22f9..e1cd88f 100644 --- a/DeDRM_plugin/ignobleepub.py +++ b/DeDRM_plugin/ignobleepub.py @@ -52,10 +52,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data,str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -97,7 +104,7 @@ def unicode_argv(): return ["ineptepub.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] class IGNOBLEError(Exception): diff --git a/DeDRM_plugin/ignoblekey.py b/DeDRM_plugin/ignoblekey.py index 5e33e33..0c3662e 100644 --- a/DeDRM_plugin/ignoblekey.py +++ b/DeDRM_plugin/ignoblekey.py @@ -37,11 +37,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -85,7 +91,7 @@ def unicode_argv(): return ["ignoblekey.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] class DrmException(Exception): pass diff --git a/DeDRM_plugin/ignoblekeyfetch.py b/DeDRM_plugin/ignoblekeyfetch.py index a0375f9..25c18f6 100644 --- a/DeDRM_plugin/ignoblekeyfetch.py +++ b/DeDRM_plugin/ignoblekeyfetch.py @@ -44,11 +44,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -92,7 +98,7 @@ def unicode_argv(): return ["ignoblekeyfetch.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] class IGNOBLEError(Exception): diff --git a/DeDRM_plugin/ignoblekeygen.py b/DeDRM_plugin/ignoblekeygen.py index 07bfb51..5893553 100644 --- a/DeDRM_plugin/ignoblekeygen.py +++ b/DeDRM_plugin/ignoblekeygen.py @@ -54,11 +54,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -102,7 +108,7 @@ def unicode_argv(): return ["ignoblekeygen.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] class IGNOBLEError(Exception): diff --git a/DeDRM_plugin/ignoblepdf.py b/DeDRM_plugin/ignoblepdf.py index 365eae2..9f2c4db 100644 --- a/DeDRM_plugin/ignoblepdf.py +++ b/DeDRM_plugin/ignoblepdf.py @@ -14,6 +14,7 @@ # Revision history: # 0.1 - Initial alpha testing release 2020 by Pu D. Pud # 0.2 - Python 3 for calibre 5.0 (in testing) +# 0.3 - More Python3 fixes """ @@ -21,7 +22,7 @@ Decrypts Barnes & Noble encrypted PDF files. """ __license__ = 'GPL v3' -__version__ = "0.2" +__version__ = "0.3" import sys import os @@ -29,8 +30,9 @@ import re import zlib import struct import hashlib -from decimal import * -from itertools import chain, islice +from io import BytesIO +from decimal import Decimal +import itertools import xml.etree.ElementTree as etree # Wrap a stream so that output gets flushed immediately @@ -43,11 +45,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -86,7 +94,7 @@ def unicode_argv(): return ["ignoblepdf.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] class IGNOBLEError(Exception): @@ -236,7 +244,6 @@ def _load_crypto(): ARC4, AES = _load_crypto() -from io import BytesIO # Do we generate cross reference streams on output? @@ -273,7 +280,7 @@ def nunpack(s, default=0): elif l == 2: return struct.unpack('>H', s)[0] elif l == 3: - return struct.unpack('>L', '\x00'+s)[0] + return struct.unpack('>L', bytes([0]) + s)[0] elif l == 4: return struct.unpack('>L', s)[0] else: @@ -324,7 +331,7 @@ class PSKeyword(PSObject): Use PSKeywordTable.intern() instead. ''' def __init__(self, name): - self.name = name + self.name = name.decode('utf-8') return def __repr__(self): @@ -354,12 +361,12 @@ PSLiteralTable = PSSymbolTable(PSLiteral) PSKeywordTable = PSSymbolTable(PSKeyword) LIT = PSLiteralTable.intern KWD = PSKeywordTable.intern -KEYWORD_BRACE_BEGIN = KWD('{') -KEYWORD_BRACE_END = KWD('}') -KEYWORD_ARRAY_BEGIN = KWD('[') -KEYWORD_ARRAY_END = KWD(']') -KEYWORD_DICT_BEGIN = KWD('<<') -KEYWORD_DICT_END = KWD('>>') +KEYWORD_BRACE_BEGIN = KWD(b'{') +KEYWORD_BRACE_END = KWD(b'}') +KEYWORD_ARRAY_BEGIN = KWD(b'[') +KEYWORD_ARRAY_END = KWD(b']') +KEYWORD_DICT_BEGIN = KWD(b'<<') +KEYWORD_DICT_END = KWD(b'>>') def literal_name(x): @@ -381,18 +388,18 @@ def keyword_name(x): ## PSBaseParser ## -EOL = re.compile(r'[\r\n]') -SPC = re.compile(r'\s') -NONSPC = re.compile(r'\S') -HEX = re.compile(r'[0-9a-fA-F]') -END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]') -END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]') -HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.') -END_NUMBER = re.compile(r'[^0-9]') -END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]') -END_STRING = re.compile(r'[()\134]') -OCT_STRING = re.compile(r'[0-7]') -ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 } +EOL = re.compile(br'[\r\n]') +SPC = re.compile(br'\s') +NONSPC = re.compile(br'\S') +HEX = re.compile(br'[0-9a-fA-F]') +END_LITERAL = re.compile(br'[#/%\[\]()<>{}\s]') +END_HEX_STRING = re.compile(br'[^\s0-9a-fA-F]') +HEX_PAIR = re.compile(br'[0-9a-fA-F]{2}|.') +END_NUMBER = re.compile(br'[^0-9]') +END_KEYWORD = re.compile(br'[#/%\[\]()<>{}\s]') +END_STRING = re.compile(br'[()\\]') +OCT_STRING = re.compile(br'[0-7]') +ESC_STRING = { b'b':8, b't':9, b'n':10, b'f':12, b'r':13, b'(':40, b')':41, b'\\':92 } class PSBaseParser(object): @@ -435,7 +442,7 @@ class PSBaseParser(object): self.fp.seek(pos) # reset the status for nextline() self.bufpos = pos - self.buf = '' + self.buf = b'' self.charpos = 0 # reset the status for nexttoken() self.parse1 = self.parse_main @@ -457,32 +464,37 @@ class PSBaseParser(object): if not m: return (self.parse_main, len(s)) j = m.start(0) - c = s[j] + if isinstance(s[j], str): + # Python 2 + c = s[j] + else: + # Python 3 + c = bytes([s[j]]) self.tokenstart = self.bufpos+j - if c == '%': - self.token = '%' + if c == b'%': + self.token = c return (self.parse_comment, j+1) - if c == '/': - self.token = '' + if c == b'/': + self.token = b'' return (self.parse_literal, j+1) - if c in '-+' or c.isdigit(): + if c in b'-+' or c.isdigit(): self.token = c return (self.parse_number, j+1) - if c == '.': + if c == b'.': self.token = c return (self.parse_decimal, j+1) if c.isalpha(): self.token = c return (self.parse_keyword, j+1) - if c == '(': - self.token = '' + if c == b'(': + self.token = b'' self.paren = 1 return (self.parse_string, j+1) - if c == '<': - self.token = '' + if c == b'<': + self.token = b'' return (self.parse_wopen, j+1) - if c == '>': - self.token = '' + if c == b'>': + self.token = b'' return (self.parse_wclose, j+1) self.add_token(KWD(c)) return (self.parse_main, j+1) @@ -509,20 +521,26 @@ class PSBaseParser(object): return (self.parse_literal, len(s)) j = m.start(0) self.token += s[i:j] - c = s[j] - if c == '#': - self.hex = '' + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) + if c == b'#': + self.hex = b'' return (self.parse_literal_hex, j+1) - self.add_token(LIT(self.token)) + self.add_token(PSLiteralTable.intern(self.token)) return (self.parse_main, j) def parse_literal_hex(self, s, i): - c = s[i] + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) if HEX.match(c) and len(self.hex) < 2: self.hex += c return (self.parse_literal_hex, i+1) if self.hex: - self.token += chr(int(self.hex, 16)) + self.token += bytes([int(self.hex, 16)]) return (self.parse_literal, i) def parse_number(self, s, i): @@ -532,8 +550,11 @@ class PSBaseParser(object): return (self.parse_number, len(s)) j = m.start(0) self.token += s[i:j] - c = s[j] - if c == '.': + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) + if c == b'.': self.token += c return (self.parse_decimal, j+1) try: @@ -549,7 +570,7 @@ class PSBaseParser(object): return (self.parse_decimal, len(s)) j = m.start(0) self.token += s[i:j] - self.add_token(Decimal(self.token)) + self.add_token(Decimal(self.token.decode('utf-8'))) return (self.parse_main, j) def parse_keyword(self, s, i): @@ -575,15 +596,18 @@ class PSBaseParser(object): return (self.parse_string, len(s)) j = m.start(0) self.token += s[i:j] - c = s[j] - if c == '\\': + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) + if c == b'\\': self.oct = '' return (self.parse_string_1, j+1) - if c == '(': + if c == b'(': self.paren += 1 self.token += c return (self.parse_string, j+1) - if c == ')': + if c == b')': self.paren -= 1 if self.paren: self.token += c @@ -591,42 +615,51 @@ class PSBaseParser(object): self.add_token(self.token) return (self.parse_main, j+1) def parse_string_1(self, s, i): - c = s[i] + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) if OCT_STRING.match(c) and len(self.oct) < 3: self.oct += c return (self.parse_string_1, i+1) if self.oct: - self.token += chr(int(self.oct, 8)) + self.token += bytes([int(self.oct, 8)]) return (self.parse_string, i) if c in ESC_STRING: - self.token += chr(ESC_STRING[c]) + self.token += bytes([ESC_STRING[c]]) return (self.parse_string, i+1) def parse_wopen(self, s, i): - c = s[i] + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) if c.isspace() or HEX.match(c): return (self.parse_hexstring, i) - if c == '<': + if c == b'<': self.add_token(KEYWORD_DICT_BEGIN) i += 1 return (self.parse_main, i) def parse_wclose(self, s, i): - c = s[i] - if c == '>': + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) + if c == b'>': self.add_token(KEYWORD_DICT_END) i += 1 return (self.parse_main, i) def parse_hexstring(self, s, i): - m = END_HEX_STRING.search(s, i) - if not m: + m1 = END_HEX_STRING.search(s, i) + if not m1: self.token += s[i:] return (self.parse_hexstring, len(s)) - j = m.start(0) + j = m1.start(0) self.token += s[i:j] - token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)), - SPC.sub('', self.token)) + token = HEX_PAIR.sub(lambda m2: bytes([int(m2.group(0), 16)]), + SPC.sub(b'', self.token)) self.add_token(token) return (self.parse_main, j) @@ -641,15 +674,15 @@ class PSBaseParser(object): ''' Fetches a next line that ends either with \\r or \\n. ''' - linebuf = '' + linebuf = b'' linepos = self.bufpos + self.charpos eol = False while 1: self.fillbuf() if eol: - c = self.buf[self.charpos] + c = bytes([self.buf[self.charpos]]) # handle '\r\n' - if c == '\n': + if c == b'\n': linebuf += c self.charpos += 1 break @@ -657,7 +690,7 @@ class PSBaseParser(object): if m: linebuf += self.buf[self.charpos:m.end(0)] self.charpos = m.end(0) - if linebuf[-1] == '\r': + if bytes([linebuf[-1]]) == b'\r': eol = True else: break @@ -673,7 +706,7 @@ class PSBaseParser(object): ''' self.fp.seek(0, 2) pos = self.fp.tell() - buf = '' + buf = b'' while 0 < pos: prevpos = pos pos = max(0, pos-self.BUFSIZ) @@ -681,13 +714,13 @@ class PSBaseParser(object): s = self.fp.read(prevpos-pos) if not s: break while 1: - n = max(s.rfind('\r'), s.rfind('\n')) + n = max(s.rfind(b'\r'), s.rfind(b'\n')) if n == -1: buf = s + buf break yield s[n:]+buf s = s[:n] - buf = '' + buf = b'' return @@ -743,7 +776,7 @@ class PSStackParser(PSBaseParser): def nextobject(self, direct=False): ''' - Yields a list of objects: keywords, literals, strings, + Yields a list of objects: keywords, literals, strings (byte arrays), numbers, arrays and dictionaries. Arrays and dictionaries are represented as Python sequence and dictionaries. ''' @@ -753,6 +786,8 @@ class PSStackParser(PSBaseParser): if (isinstance(token, int) or isinstance(token, Decimal) or isinstance(token, bool) or + isinstance(token, bytearray) or + isinstance(token, bytes) or isinstance(token, str) or isinstance(token, PSLiteral)): # normal token @@ -796,10 +831,10 @@ class PSStackParser(PSBaseParser): return obj -LITERAL_CRYPT = PSLiteralTable.intern('Crypt') -LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl')) -LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW')) -LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85')) +LITERAL_CRYPT = PSLiteralTable.intern(b'Crypt') +LITERALS_FLATE_DECODE = (PSLiteralTable.intern(b'FlateDecode'), PSLiteralTable.intern(b'Fl')) +LITERALS_LZW_DECODE = (PSLiteralTable.intern(b'LZWDecode'), PSLiteralTable.intern(b'LZW')) +LITERALS_ASCII85_DECODE = (PSLiteralTable.intern(b'ASCII85Decode'), PSLiteralTable.intern(b'A85')) ## PDF Objects @@ -853,7 +888,7 @@ def resolve_all(x): if isinstance(x, list): x = [ resolve_all(v) for v in x ] elif isinstance(x, dict): - for (k,v) in x.iteritems(): + for (k,v) in iter(x.items()): x[k] = resolve_all(v) return x @@ -861,13 +896,13 @@ def decipher_all(decipher, objid, genno, x): ''' Recursively decipher X. ''' - if isinstance(x, str): + if isinstance(x, bytearray) or isinstance(x,bytes) or isinstance(x,str): return decipher(objid, genno, x) decf = lambda v: decipher_all(decipher, objid, genno, v) if isinstance(x, list): x = [decf(v) for v in x] elif isinstance(x, dict): - x = dict((k, decf(v)) for (k, v) in x.iteritems()) + x = dict((k, decf(v)) for (k, v) in iter(x.items())) return x @@ -898,7 +933,7 @@ def num_value(x): def str_value(x): x = resolve1(x) - if not isinstance(x, str): + if not (isinstance(x, bytearray) or isinstance(x, bytes) or isinstance(x, str)): if STRICT: raise PDFTypeError('String required: %r' % x) return '' @@ -931,18 +966,18 @@ def stream_value(x): # ascii85decode(data) def ascii85decode(data): n = b = 0 - out = '' + out = b'' for c in data: - if '!' <= c and c <= 'u': + if b'!' <= c and c <= b'u': n += 1 - b = b*85+(ord(c)-33) + b = b*85+(c-33) if n == 5: out += struct.pack('>L',b) n = b = 0 - elif c == 'z': + elif c == b'z': assert n == 0 - out += '\0\0\0\0' - elif c == '~': + out += b'\0\0\0\0' + elif c == b'~': if n: for _ in range(5-n): b = b*85+84 @@ -963,7 +998,7 @@ class PDFStream(PDFObject): cutdiv = len(rawdata) // 16 rawdata = rawdata[:16*cutdiv] else: - if eol in ('\r', '\n', '\r\n'): + if eol in (b'\r', b'\n', b'\r\n'): rawdata = rawdata[:length] self.dic = dic @@ -1009,7 +1044,7 @@ class PDFStream(PDFObject): # will get errors if the document is encrypted. data = zlib.decompress(data) elif f in LITERALS_LZW_DECODE: - data = ''.join(LZWDecoder(BytesIO(data)).run()) + data = b''.join(LZWDecoder(BytesIO(data)).run()) elif f in LITERALS_ASCII85_DECODE: data = ascii85decode(data) elif f == LITERAL_CRYPT: @@ -1031,14 +1066,14 @@ class PDFStream(PDFObject): raise PDFValueError( 'Columns undefined for predictor=12') columns = int_value(params['Columns']) - buf = '' - ent0 = '\x00' * columns + buf = b'' + ent0 = b'\x00' * columns for i in range(0, len(data), columns+1): pred = data[i] ent1 = data[i+1:i+1+columns] - if pred == '\x02': - ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \ - for (a,b) in zip(ent0,ent1)) + if pred == 2: + ent1 = b''.join(bytes([(a+b) & 255]) \ + for (a,b) in zip(ent0,ent1)) buf += ent1 ent0 = ent1 data = buf @@ -1072,11 +1107,11 @@ class PDFEncryptionError(PDFException): pass class PDFPasswordIncorrect(PDFEncryptionError): pass # some predefined literals and keywords. -LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm') -LITERAL_XREF = PSLiteralTable.intern('XRef') -LITERAL_PAGE = PSLiteralTable.intern('Page') -LITERAL_PAGES = PSLiteralTable.intern('Pages') -LITERAL_CATALOG = PSLiteralTable.intern('Catalog') +LITERAL_OBJSTM = PSLiteralTable.intern(b'ObjStm') +LITERAL_XREF = PSLiteralTable.intern(b'XRef') +LITERAL_PAGE = PSLiteralTable.intern(b'Page') +LITERAL_PAGES = PSLiteralTable.intern(b'Pages') +LITERAL_CATALOG = PSLiteralTable.intern(b'Catalog') ## XRefs @@ -1094,7 +1129,7 @@ class PDFXRef(object): return '' % len(self.offsets) def objids(self): - return self.offsets.iterkeys() + return iter(self.offsets.keys()) def load(self, parser): self.offsets = {} @@ -1105,10 +1140,10 @@ class PDFXRef(object): raise PDFNoValidXRef('Unexpected EOF - file corrupted?') if not line: raise PDFNoValidXRef('Premature eof: %r' % parser) - if line.startswith('trailer'): + if line.startswith(b'trailer'): parser.seek(pos) break - f = line.strip().split(' ') + f = line.strip().split(b' ') if len(f) != 2: raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line)) try: @@ -1120,16 +1155,17 @@ class PDFXRef(object): (_, line) = parser.nextline() except PSEOF: raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - f = line.strip().split(' ') + f = line.strip().split(b' ') if len(f) != 3: raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line)) (pos, genno, use) = f - if use != 'n': continue - self.offsets[objid] = (int(genno), int(pos)) + if use != b'n': + continue + self.offsets[objid] = (int(genno.decode('utf-8')), int(pos.decode('utf-8'))) self.load_trailer(parser) return - KEYWORD_TRAILER = PSKeywordTable.intern('trailer') + KEYWORD_TRAILER = PSKeywordTable.intern(b'trailer') def load_trailer(self, parser): try: (_,kwd) = parser.nexttoken() @@ -1180,8 +1216,8 @@ class PDFXRefStream(object): raise PDFNoValidXRef('Invalid PDF stream spec.') size = stream.dic['Size'] index = stream.dic.get('Index', (0,size)) - self.index = zip(islice(index, 0, None, 2), - islice(index, 1, None, 2)) + self.index = list(zip(itertools.islice(index, 0, None, 2), + itertools.islice(index, 1, None, 2))) (self.fl1, self.fl2, self.fl3) = stream.dic['W'] self.data = stream.get_data() self.entlen = self.fl1+self.fl2+self.fl3 @@ -1234,7 +1270,8 @@ class PDFDocument(object): # set_parser(parser) # Associates the document with an (already initialized) parser object. def set_parser(self, parser): - if self.parser: return + if self.parser: + return self.parser = parser # The document is set to be temporarily ready during collecting # all the basic information about the document, e.g. @@ -1256,13 +1293,13 @@ class PDFDocument(object): dict_value(trailer['Encrypt'])) # fix for bad files except: - self.encryption = ('ffffffffffffffffffffffffffffffffffff', + self.encryption = (b'ffffffffffffffffffffffffffffffffffff', dict_value(trailer['Encrypt'])) if 'Root' in trailer: self.set_root(dict_value(trailer['Root'])) break - else: - raise PDFSyntaxError('No /Root object! - Is this really a PDF?') + else: + raise PDFSyntaxError('No /Root object! - Is this really a PDF?') # The document is set to be non-ready again, until all the # proper initialization (asking the password key and # verifying the access permission, so on) is finished. @@ -1283,7 +1320,7 @@ class PDFDocument(object): # Perform the initialization with a given password. # This step is mandatory even if there's no password associated # with the document. - def initialize(self, password=''): + def initialize(self, password=b''): if not self.encryption: self.is_printable = self.is_modifiable = self.is_extractable = True self.ready = True @@ -1310,14 +1347,14 @@ class PDFDocument(object): def genkey_adobe_ps(self, param): # nice little offline principal keys dictionary # global static principal key for German Onleihe / Bibliothek Digital - principalkeys = { 'bibliothek-digital.de': 'rRwGv2tbpKov1krvv7PO0ws9S436/lArPlfipz5Pqhw='.decode('base64')} + principalkeys = { b'bibliothek-digital.de': codecs.decode(b'rRwGv2tbpKov1krvv7PO0ws9S436/lArPlfipz5Pqhw=','base64')} self.is_printable = self.is_modifiable = self.is_extractable = True - length = int_value(param.get('Length', 0)) / 8 + length = int_value(param.get('Length', 0)) // 8 edcdata = str_value(param.get('EDCData')).decode('base64') pdrllic = str_value(param.get('PDRLLic')).decode('base64') pdrlpol = str_value(param.get('PDRLPol')).decode('base64') edclist = [] - for pair in edcdata.split('\n'): + for pair in edcdata.split(b'\n'): edclist.append(pair) # principal key request for key in principalkeys: @@ -1326,20 +1363,20 @@ class PDFDocument(object): else: raise IGNOBLEError('Cannot find principal key for this pdf') shakey = SHA256(principalkey) - ivector = 16 * chr(0) + ivector = bytes(16) plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64')) - if plaintext[-16:] != 16 * chr(16): + if plaintext[-16:] != bytearray(b'\0x10')*16: raise IGNOBLEError('Offlinekey cannot be decrypted, aborting ...') pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol) - if ord(pdrlpol[-1]) < 1 or ord(pdrlpol[-1]) > 16: + if pdrlpol[-1] < 1 or pdrlpol[-1] > 16: raise IGNOBLEError('Could not decrypt PDRLPol, aborting ...') else: - cutter = -1 * ord(pdrlpol[-1]) + cutter = -1 * pdrlpol[-1] pdrlpol = pdrlpol[:cutter] return plaintext[:16] - PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \ - '\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz' + PASSWORD_PADDING = b'(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \ + b'\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz' # experimental aes pw support def initialize_standard(self, password, docid, param): # copy from a global variable @@ -1356,7 +1393,7 @@ class PDFDocument(object): try: EncMetadata = str_value(param['EncryptMetadata']) except: - EncMetadata = 'True' + EncMetadata = b'True' self.is_printable = bool(P & 4) self.is_modifiable = bool(P & 8) self.is_extractable = bool(P & 16) @@ -1373,12 +1410,12 @@ class PDFDocument(object): hash.update(docid[0]) # 5 # aes special handling if metadata isn't encrypted if EncMetadata == ('False' or 'false'): - hash.update('ffffffff'.decode('hex')) + hash.update(codecs.decode(b'ffffffff','hex')) if 5 <= R: # 8 for _ in range(50): - hash = hashlib.md5(hash.digest()[:length/8]) - key = hash.digest()[:length/8] + hash = hashlib.md5(hash.digest()[:length//8]) + key = hash.digest()[:length//8] if R == 2: # Algorithm 3.4 u1 = ARC4.new(key).decrypt(password) @@ -1388,7 +1425,7 @@ class PDFDocument(object): hash.update(docid[0]) # 3 x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4 for i in range(1,19+1): - k = ''.join( chr(ord(c) ^ i) for c in key ) + k = b''.join(bytes([c ^ i]) for c in key ) x = ARC4.new(k).decrypt(x) u1 = x+x # 32bytes total if R == 2: @@ -1410,9 +1447,9 @@ class PDFDocument(object): if V != 4: self.decipher = self.decipher_rc4 # XXX may be AES # aes - elif V == 4 and Length == 128: + elif V == 4 and length == 128: elf.decipher = self.decipher_aes - elif V == 4 and Length == 256: + elif V == 4 and length == 256: raise PDFNotImplementedError('AES256 encryption is currently unsupported') self.ready = True return @@ -1429,6 +1466,9 @@ class PDFDocument(object): bookkey = ''.join(rights.findtext(expr)).decode('base64') bookkey = aes.decrypt(bookkey) bookkey = bookkey[:-ord(bookkey[-1])] + # todo: Take a look at this. + # This seems to be the only function that's different between ignoblepdf and ineptpdf. + # A ton of useless duplicated code ..... bookkey = bookkey[-16:] ebx_V = int_value(param.get('V', 4)) ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6)) @@ -1440,12 +1480,12 @@ class PDFDocument(object): else: V = 2 elif len(bookkey) == length + 1: - V = ord(bookkey[0]) + V = bookkey[0] bookkey = bookkey[1:] else: print("ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type)) print("length is %d and len(bookkey) is %d" % (length, len(bookkey))) - print("bookkey[0] is %d" % ord(bookkey[0])) + print("bookkey[0] is %d" % bookkey[0]) raise IGNOBLEError('error decrypting book session key - mismatched length') else: # proper length unknown try with whatever you have @@ -1475,7 +1515,7 @@ class PDFDocument(object): objid = struct.pack('' - KEYWORD_R = PSKeywordTable.intern('R') - KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj') - KEYWORD_STREAM = PSKeywordTable.intern('stream') - KEYWORD_XREF = PSKeywordTable.intern('xref') - KEYWORD_STARTXREF = PSKeywordTable.intern('startxref') + KEYWORD_R = PSKeywordTable.intern(b'R') + KEYWORD_ENDOBJ = PSKeywordTable.intern(b'endobj') + KEYWORD_STREAM = PSKeywordTable.intern(b'stream') + KEYWORD_XREF = PSKeywordTable.intern(b'xref') + KEYWORD_STARTXREF = PSKeywordTable.intern(b'startxref') def do_keyword(self, pos, token): if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): self.add_results(*self.pop(1)) @@ -1675,8 +1713,8 @@ class PDFParser(PSStackParser): if STRICT: raise PDFSyntaxError('Unexpected EOF') break - if 'endstream' in line: - i = line.index('endstream') + if b'endstream' in line: + i = line.index(b'endstream') objlen += i data += line[:i] break @@ -1696,7 +1734,7 @@ class PDFParser(PSStackParser): prev = None for line in self.revreadlines(): line = line.strip() - if line == 'startxref': break + if line == b'startxref': break if line: prev = line else: @@ -1748,7 +1786,7 @@ class PDFParser(PSStackParser): except PDFNoValidXRef: # fallback self.seek(0) - pat = re.compile(r'^(\d+)\s+(\d+)\s+obj\b') + pat = re.compile(b'^(\\d+)\\s+(\\d+)\\s+obj\\b') offsets = {} xref = PDFXRef() while 1: @@ -1756,7 +1794,7 @@ class PDFParser(PSStackParser): (pos, line) = self.nextline() except PSEOF: break - if line.startswith('trailer'): + if line.startswith(b'trailer'): trailerpos = pos # remember last trailer m = pat.match(line) if not m: continue @@ -1783,7 +1821,7 @@ class PDFObjStrmParser(PDFParser): self.add_results(*self.popall()) return - KEYWORD_R = KWD('R') + KEYWORD_R = KWD(b'R') def do_keyword(self, pos, token): if token is self.KEYWORD_R: # reference to indirect object @@ -1826,7 +1864,7 @@ class PDFSerializer(object): def dump(self, outf): self.outf = outf self.write(self.version) - self.write('\n%\xe2\xe3\xcf\xd3\n') + self.write(b'\n%\xe2\xe3\xcf\xd3\n') doc = self.doc objids = self.objids xrefs = {} @@ -1848,18 +1886,18 @@ class PDFSerializer(object): startxref = self.tell() if not gen_xref_stm: - self.write('xref\n') - self.write('0 %d\n' % (maxobj + 1,)) + self.write(b'xref\n') + self.write(b'0 %d\n' % (maxobj + 1,)) for objid in range(0, maxobj + 1): if objid in xrefs: # force the genno to be 0 - self.write("%010d 00000 n \n" % xrefs[objid][0]) + self.write(b"%010d 00000 n \n" % xrefs[objid][0]) else: - self.write("%010d %05d f \n" % (0, 65535)) + self.write(b"%010d %05d f \n" % (0, 65535)) - self.write('trailer\n') + self.write(b'trailer\n') self.serialize_object(trailer) - self.write('\nstartxref\n%d\n%%%%EOF' % startxref) + self.write(b'\nstartxref\n%d\n%%%%EOF' % startxref) else: # Generate crossref stream. @@ -1908,7 +1946,7 @@ class PDFSerializer(object): data.append(struct.pack('>L', f2)[-fl2:]) data.append(struct.pack('>L', f3)[-fl3:]) index.extend((first, prev - first + 1)) - data = zlib.compress(''.join(data)) + data = zlib.compress(b''.join(data)) dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index, 'W': [1, fl2, fl3], 'Length': len(data), 'Filter': LITERALS_FLATE_DECODE[0], @@ -1917,7 +1955,7 @@ class PDFSerializer(object): dic['Info'] = trailer['Info'] xrefstm = PDFStream(dic, data) self.serialize_indirect(maxobj, xrefstm) - self.write('startxref\n%d\n%%%%EOF' % startxref) + self.write(b'startxref\n%d\n%%%%EOF' % startxref) def write(self, data): self.outf.write(data) self.last = data[-1:] @@ -1926,13 +1964,10 @@ class PDFSerializer(object): return self.outf.tell() def escape_string(self, string): - string = string.replace('\\', '\\\\') - string = string.replace('\n', r'\n') - string = string.replace('(', r'\(') - string = string.replace(')', r'\)') - # get rid of ciando id - regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}') - if regularexp.match(string): return ('http://www.ciando.com') + string = string.replace(b'\\', b'\\\\') + string = string.replace(b'\n', b'\\n') + string = string.replace(b'(', b'\\(') + string = string.replace(b')', b'\\)') return string def serialize_object(self, obj): @@ -1943,34 +1978,38 @@ class PDFSerializer(object): obj['Subtype'] = obj['Type'] del obj['Type'] # end - hope this doesn't have bad effects - self.write('<<') + self.write(b'<<') for key, val in obj.items(): - self.write('/%s' % key) + self.write(str(PSLiteralTable.intern(key.encode('utf-8'))).encode('utf-8')) self.serialize_object(val) - self.write('>>') + self.write(b'>>') elif isinstance(obj, list): - self.write('[') + self.write(b'[') for val in obj: self.serialize_object(val) - self.write(']') + self.write(b']') + elif isinstance(obj, bytearray): + self.write(b'(%s)' % self.escape_string(obj)) + elif isinstance(obj, bytes): + self.write(b'(%s)' % self.escape_string(obj)) elif isinstance(obj, str): - self.write('(%s)' % self.escape_string(obj)) + self.write(b'(%s)' % self.escape_string(obj.encode('utf-8'))) elif isinstance(obj, bool): if self.last.isalnum(): - self.write(' ') - self.write(str(obj).lower()) + self.write(b' ') + self.write(str(obj).lower().encode('utf-8')) elif isinstance(obj, (int, long)): if self.last.isalnum(): - self.write(' ') - self.write(str(obj)) + self.write(b' ') + self.write(str(obj).encode('utf-8')) elif isinstance(obj, Decimal): if self.last.isalnum(): - self.write(' ') - self.write(str(obj)) + self.write(b' ') + self.write(str(obj).encode('utf-8')) elif isinstance(obj, PDFObjRef): if self.last.isalnum(): - self.write(' ') - self.write('%d %d R' % (obj.objid, 0)) + self.write(b' ') + self.write(b'%d %d R' % (obj.objid, 0)) elif isinstance(obj, PDFStream): ### If we don't generate cross ref streams the object streams ### are no longer useful, as we have extracted all objects from @@ -1980,21 +2019,21 @@ class PDFSerializer(object): else: data = obj.get_decdata() self.serialize_object(obj.dic) - self.write('stream\n') + self.write(b'stream\n') self.write(data) - self.write('\nendstream') + self.write(b'\nendstream') else: - data = str(obj) - if data[0].isalnum() and self.last.isalnum(): - self.write(' ') + data = str(obj).encode('utf-8') + if bytes([data[0]]).isalnum() and self.last.isalnum(): + self.write(b' ') self.write(data) def serialize_indirect(self, objid, obj): - self.write('%d 0 obj' % (objid,)) + self.write(b'%d 0 obj' % (objid,)) self.serialize_object(obj) if self.last.isalnum(): - self.write('\n') - self.write('endobj\n') + self.write(b'\n') + self.write(b'endobj\n') @@ -2003,12 +2042,7 @@ def decryptBook(userkey, inpath, outpath): if AES is None: raise IGNOBLEError("PyCrypto or OpenSSL must be installed.") with open(inpath, 'rb') as inf: - #try: serializer = PDFSerializer(inf, userkey) - #except: - # print("Error serializing pdf {0}. Probably wrong key.".format(os.path.basename(inpath))) - # return 2 - # hope this will fix the 'bad file descriptor' problem with open(outpath, 'wb') as outf: # help construct to make sure the method runs to the end try: diff --git a/DeDRM_plugin/ineptepub.py b/DeDRM_plugin/ineptepub.py index 3f01a7c..759a606 100644 --- a/DeDRM_plugin/ineptepub.py +++ b/DeDRM_plugin/ineptepub.py @@ -58,11 +58,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -104,7 +110,7 @@ def unicode_argv(): return ["ineptepub.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] class ADEPTError(Exception): diff --git a/DeDRM_plugin/ineptpdf.py b/DeDRM_plugin/ineptpdf.py index 7dc6e83..f8a5592 100755 --- a/DeDRM_plugin/ineptpdf.py +++ b/DeDRM_plugin/ineptpdf.py @@ -76,11 +76,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -119,7 +125,7 @@ def unicode_argv(): return ["ineptpdf.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] class ADEPTError(Exception): @@ -553,17 +559,17 @@ def keyword_name(x): ## PSBaseParser ## -EOL = re.compile(rb'[\r\n]') -SPC = re.compile(rb'\s') -NONSPC = re.compile(rb'\S') -HEX = re.compile(rb'[0-9a-fA-F]') -END_LITERAL = re.compile(rb'[#/%\[\]()<>{}\s]') -END_HEX_STRING = re.compile(rb'[^\s0-9a-fA-F]') -HEX_PAIR = re.compile(rb'[0-9a-fA-F]{2}|.') -END_NUMBER = re.compile(rb'[^0-9]') -END_KEYWORD = re.compile(rb'[#/%\[\]()<>{}\s]') -END_STRING = re.compile(rb'[()\\]') -OCT_STRING = re.compile(rb'[0-7]') +EOL = re.compile(br'[\r\n]') +SPC = re.compile(br'\s') +NONSPC = re.compile(br'\S') +HEX = re.compile(br'[0-9a-fA-F]') +END_LITERAL = re.compile(br'[#/%\[\]()<>{}\s]') +END_HEX_STRING = re.compile(br'[^\s0-9a-fA-F]') +HEX_PAIR = re.compile(br'[0-9a-fA-F]{2}|.') +END_NUMBER = re.compile(br'[^0-9]') +END_KEYWORD = re.compile(br'[#/%\[\]()<>{}\s]') +END_STRING = re.compile(br'[()\\]') +OCT_STRING = re.compile(br'[0-7]') ESC_STRING = { b'b':8, b't':9, b'n':10, b'f':12, b'r':13, b'(':40, b')':41, b'\\':92 } class PSBaseParser(object): @@ -628,7 +634,12 @@ class PSBaseParser(object): if not m: return (self.parse_main, len(s)) j = m.start(0) - c = bytes([s[j]]) + if isinstance(s[j], str): + # Python 2 + c = s[j] + else: + # Python 3 + c = bytes([s[j]]) self.tokenstart = self.bufpos+j if c == b'%': self.token = c @@ -680,7 +691,10 @@ class PSBaseParser(object): return (self.parse_literal, len(s)) j = m.start(0) self.token += s[i:j] - c = bytes([s[j]]) + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) if c == b'#': self.hex = b'' return (self.parse_literal_hex, j+1) @@ -688,7 +702,10 @@ class PSBaseParser(object): return (self.parse_main, j) def parse_literal_hex(self, s, i): - c = bytes([s[i]]) + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) if HEX.match(c) and len(self.hex) < 2: self.hex += c return (self.parse_literal_hex, i+1) @@ -703,7 +720,10 @@ class PSBaseParser(object): return (self.parse_number, len(s)) j = m.start(0) self.token += s[i:j] - c = bytes([s[j]]) + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) if c == b'.': self.token += c return (self.parse_decimal, j+1) @@ -746,7 +766,10 @@ class PSBaseParser(object): return (self.parse_string, len(s)) j = m.start(0) self.token += s[i:j] - c = bytes([s[j]]) + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) if c == b'\\': self.oct = '' return (self.parse_string_1, j+1) @@ -763,7 +786,10 @@ class PSBaseParser(object): return (self.parse_main, j+1) def parse_string_1(self, s, i): - c = bytes([s[i]]) + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) if OCT_STRING.match(c) and len(self.oct) < 3: self.oct += c return (self.parse_string_1, i+1) @@ -775,7 +801,10 @@ class PSBaseParser(object): return (self.parse_string, i+1) def parse_wopen(self, s, i): - c = bytes([s[i]]) + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) if c.isspace() or HEX.match(c): return (self.parse_hexstring, i) if c == b'<': @@ -784,7 +813,10 @@ class PSBaseParser(object): return (self.parse_main, i) def parse_wclose(self, s, i): - c = bytes([s[i]]) + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) if c == b'>': self.add_token(KEYWORD_DICT_END) i += 1 @@ -926,6 +958,7 @@ class PSStackParser(PSBaseParser): isinstance(token, bool) or isinstance(token, bytearray) or isinstance(token, bytes) or + isinstance(token, str) or isinstance(token, PSLiteral)): # normal token self.push((pos, token)) @@ -1033,7 +1066,7 @@ def decipher_all(decipher, objid, genno, x): ''' Recursively decipher X. ''' - if isinstance(x, bytearray) or isinstance(x,bytes): + if isinstance(x, bytearray) or isinstance(x,bytes) or isinstance(x,str): return decipher(objid, genno, x) decf = lambda v: decipher_all(decipher, objid, genno, v) if isinstance(x, list): @@ -1070,7 +1103,7 @@ def num_value(x): def str_value(x): x = resolve1(x) - if not (isinstance(x, bytearray) or isinstance(x, bytes)): + if not (isinstance(x, bytearray) or isinstance(x, bytes) or isinstance(x, str)): if STRICT: raise PDFTypeError('String required: %r' % x) return '' @@ -1420,7 +1453,6 @@ class PDFDocument(object): for xref in self.xrefs: trailer = xref.trailer if not trailer: continue - # If there's an encryption info, remember it. if 'Encrypt' in trailer: #assert not self.encryption @@ -1953,7 +1985,7 @@ class PDFParser(PSStackParser): except PDFNoValidXRef: # fallback self.seek(0) - pat = re.compile(rb'^(\d+)\s+(\d+)\s+obj\b') + pat = re.compile(b'^(\\d+)\\s+(\\d+)\\s+obj\\b') offsets = {} xref = PDFXRef() while 1: @@ -2158,9 +2190,9 @@ class PDFSerializer(object): def escape_string(self, string): string = string.replace(b'\\', b'\\\\') - string = string.replace(b'\n', rb'\n') - string = string.replace(b'(', rb'\(') - string = string.replace(b')', rb'\)') + string = string.replace(b'\n', b'\\n') + string = string.replace(b'(', b'\\(') + string = string.replace(b')', b'\\)') return string def serialize_object(self, obj): diff --git a/DeDRM_plugin/k4mobidedrm.py b/DeDRM_plugin/k4mobidedrm.py index cede191..4a30535 100644 --- a/DeDRM_plugin/k4mobidedrm.py +++ b/DeDRM_plugin/k4mobidedrm.py @@ -103,11 +103,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -148,7 +154,7 @@ def unicode_argv(): return ["mobidedrm.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] # cleanup unicode filenames # borrowed from calibre from calibre/src/calibre/__init__.py diff --git a/DeDRM_plugin/kindlekey.py b/DeDRM_plugin/kindlekey.py index 35baa46..68cea6b 100644 --- a/DeDRM_plugin/kindlekey.py +++ b/DeDRM_plugin/kindlekey.py @@ -61,11 +61,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -109,7 +115,7 @@ def unicode_argv(): return ["kindlekey.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] class DrmException(Exception): pass diff --git a/DeDRM_plugin/kindlepid.py b/DeDRM_plugin/kindlepid.py index d640306..ba80e9b 100644 --- a/DeDRM_plugin/kindlepid.py +++ b/DeDRM_plugin/kindlepid.py @@ -26,11 +26,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -71,7 +77,7 @@ def unicode_argv(): return ["kindlepid.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] letters = 'ABCDEFGHIJKLMNPQRSTUVWXYZ123456789' diff --git a/DeDRM_plugin/mobidedrm.py b/DeDRM_plugin/mobidedrm.py index e5a212b..ec03fdb 100755 --- a/DeDRM_plugin/mobidedrm.py +++ b/DeDRM_plugin/mobidedrm.py @@ -94,11 +94,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -139,7 +145,7 @@ def unicode_argv(): return ["mobidedrm.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] class DrmException(Exception): diff --git a/DeDRM_plugin/topazextract.py b/DeDRM_plugin/topazextract.py index 5125d62..98db615 100644 --- a/DeDRM_plugin/topazextract.py +++ b/DeDRM_plugin/topazextract.py @@ -32,11 +32,17 @@ class SafeUnbuffered: if self.encoding == None: self.encoding = "utf-8" def write(self, data): - if isinstance(data, str): + if isinstance(data,str) or isinstance(data,unicode): + # str for Python3, unicode for Python2 data = data.encode(self.encoding,"replace") - self.stream.buffer.write(data) - self.stream.buffer.flush() - + try: + buffer = getattr(self.stream, 'buffer', self.stream) + # self.stream.buffer for Python3, self.stream for Python2 + buffer.write(data) + buffer.flush() + except: + # We can do nothing if a write fails + raise def __getattr__(self, attr): return getattr(self.stream, attr) @@ -77,7 +83,7 @@ def unicode_argv(): return ["mobidedrm.py"] else: argvencoding = sys.stdin.encoding or "utf-8" - return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] + return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv] #global switch debug = False diff --git a/DeDRM_plugin/zipfilerugged.py b/DeDRM_plugin/zipfilerugged.py index b2f3762..aef9ea3 100755 --- a/DeDRM_plugin/zipfilerugged.py +++ b/DeDRM_plugin/zipfilerugged.py @@ -209,6 +209,7 @@ def _EndRecData(fpin): fpin.seek(-sizeEndCentDir, 2) except IOError: return None + data = fpin.read() if data[0:4] == stringEndArchive and data[-2:] == "\000\000": # the signature is correct and there's no comment, unpack structure @@ -662,7 +663,8 @@ class ZipFile: self.comment = b'' # Check if we were passed a file-like object - if isinstance(file, str): + # "str" is python3, "unicode" is python2 + if isinstance(file, str) or isinstance(file, unicode): self._filePassed = 0 self.filename = file modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}