From 88b09669617d9e56f8085717d4a2cc64e6783bee Mon Sep 17 00:00:00 2001 From: NoDRM Date: Sun, 7 Aug 2022 15:58:01 +0200 Subject: [PATCH] Fix tons of PDF-related issues --- CHANGELOG.md | 3 +- DeDRM_plugin/ineptpdf.py | 78 ++++++++++++++++++++++++++++++---------- 2 files changed, 62 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da8525a..a259940 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -73,8 +73,9 @@ List of changes since the fork of Apprentice Harper's repository: - Fix some more Calibre-6 bugs in the Obok plugin (should fix #114). - Fix a bug where invalid Adobe keys could cause the plugin to stop trying subsequent keys (partially fixes #109). - Fix DRM removal sometimes resetting the ZIP's internal "external_attr" value on Calibre 5 and newer. -- Fix PDF decryption issues on Calibre 4 (hopefully fixes #104). +- Fix tons of PDF decryption issues (hopefully fixes #104 and other PDF-related issues). - Small Python 2 / Calibre 4 bugfix for Obok. - Removing ancient AlfCrypto machine code libraries, moving all encryption / decryption to Python code. - General cleanup and removal of dead code. - Fix a bug where ADE account keys weren't automatically imported from the DeACSM plugin when importing a PDF file. +- Re-enable Xrefs in exported PDF files since the file corruption bug is hopefully fixed. Please open bug reports if you encounter new issues with PDF files. diff --git a/DeDRM_plugin/ineptpdf.py b/DeDRM_plugin/ineptpdf.py index b34b38c..621416c 100755 --- a/DeDRM_plugin/ineptpdf.py +++ b/DeDRM_plugin/ineptpdf.py @@ -117,7 +117,7 @@ def SHA256(message): # 1 = only if present in input # 2 = always -GEN_XREF_STM = 0 +GEN_XREF_STM = 1 # This is the value for the current document gen_xref_stm = False # will be set in PDFSerializer @@ -565,7 +565,11 @@ class PSBaseParser(object): while 1: self.fillbuf() if eol: - c = bytes([self.buf[self.charpos]]) + if sys.version_info[0] == 2: + c = self.buf[self.charpos] + else: + c = bytes([self.buf[self.charpos]]) + # handle '\r\n' if c == b'\n': linebuf += c @@ -575,10 +579,17 @@ class PSBaseParser(object): if m: linebuf += self.buf[self.charpos:m.end(0)] self.charpos = m.end(0) - if bytes([linebuf[-1]]) == b'\r': - eol = True - else: - break + if sys.version_info[0] == 2: + if linebuf[-1] == b'\r': + eol = True + else: + break + else: + if bytes([linebuf[-1]]) == b'\r': + eol = True + else: + break + else: linebuf += self.buf[self.charpos:] self.charpos = len(self.buf) @@ -954,9 +965,14 @@ class PDFStream(PDFObject): for i in range(0, len(data), columns+1): pred = data[i] ent1 = data[i+1:i+1+columns] - if pred == 2: - ent1 = b''.join(bytes([(a+b) & 255]) \ - for (a,b) in zip(ent0,ent1)) + if sys.version_info[0] == 2: + if pred == '\x02': + ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \ + for (a,b) in zip(ent0,ent1)) + else: + if pred == 2: + ent1 = b''.join(bytes([(a+b) & 255]) \ + for (a,b) in zip(ent0,ent1)) buf += ent1 ent0 = ent1 data = buf @@ -1070,8 +1086,6 @@ class PDFXRef(object): return (None, pos) -## PDFXRefStream -## class PDFXRefStream(object): def __init__(self): @@ -1404,7 +1418,10 @@ class PDFDocument(object): x = ARC4.new(hash).decrypt(Odata) # 4 if R >= 3: for i in range(1,19+1): - k = b''.join(bytes([c ^ i]) for c in hash ) + if sys.version_info[0] == 2: + k = b''.join(chr(ord(c) ^ i) for c in hash ) + else: + k = b''.join(bytes([c ^ i]) for c in hash ) x = ARC4.new(k).decrypt(x) @@ -1462,7 +1479,10 @@ class PDFDocument(object): hash.update(docid[0]) # 3 x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4 for i in range(1,19+1): - k = b''.join(bytes([c ^ i]) for c in key ) + if sys.version_info[0] == 2: + k = b''.join(chr(ord(c) ^ i) for c in key ) + else: + k = b''.join(bytes([c ^ i]) for c in key ) x = ARC4.new(k).decrypt(x) u1 = x+x # 32bytes total if R == 2: @@ -1490,8 +1510,8 @@ class PDFDocument(object): # check owner pass: retval = self.check_owner_password(password, docid, param) - if retval is True or retval is not None: - #print("Owner pass is valid - " + str(retval)) + if retval is True or (retval is not False and retval is not None): + #print("Owner pass is valid") if retval is True: self.decrypt_key = self.recover_encryption_key_with_password(password, docid, param) else: @@ -1500,7 +1520,7 @@ class PDFDocument(object): if self.decrypt_key is None or self.decrypt_key is True or self.decrypt_key is False: # That's not the owner password. Check if it's the user password. retval = self.check_user_password(password, docid, param) - if retval is True or retval is not None: + if retval is True or (retval is not False and retval is not None): #print("User pass is valid") if retval is True: self.decrypt_key = self.recover_encryption_key_with_password(password, docid, param) @@ -1723,7 +1743,11 @@ class PDFDocument(object): data = data[16:] plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data) # remove pkcs#5 aes padding - cutter = -1 * plaintext[-1] + if sys.version_info[0] == 2: + cutter = -1 * ord(plaintext[-1]) + else: + cutter = -1 * plaintext[-1] + plaintext = plaintext[:cutter] return plaintext @@ -2199,7 +2223,11 @@ class PDFSerializer(object): elif isinstance(obj, bytearray): self.write(b'(%s)' % self.escape_string(obj)) elif isinstance(obj, bytes): - self.write(b'(%s)' % self.escape_string(obj)) + # I'm not 100% sure if this is correct, but it seems to fix some PDFs ... + # If needed, revert that change. + self.write(b'<%s>' % binascii.hexlify(obj).upper()) + print("ineptpdf.py: Unknown bytes element found - guessing.") + print("If this PDF is corrupted and/or doesn't work, please open a bug report.") elif isinstance(obj, str): self.write(b'(%s)' % self.escape_string(obj.encode('utf-8'))) elif isinstance(obj, bool): @@ -2226,6 +2254,20 @@ class PDFSerializer(object): self.write(b'(deleted)') else: data = obj.get_decdata() + + # Fix length: + # We've decompressed and then recompressed the PDF stream. + # Depending on the algorithm, the implementation, and the compression level, + # the resulting recompressed stream is unlikely to have the same length as the original. + # So we need to update the PDF object to contain the new proper length. + + # Without this change, all PDFs exported by this plugin are slightly corrupted - + # even though most if not all PDF readers can correct that on-the-fly. + + if 'Length' in obj.dic: + obj.dic['Length'] = len(data) + + self.serialize_object(obj.dic) self.write(b'stream\n') self.write(data)