v0.0.8: PDF fixes & ACSM file type config

2024-12-22 17:29:56 +06:00 · 2021-10-03 10:30:45 +02:00 · 2021-10-03 10:30:45 +02:00 · 36b28765dd
commit 36b28765dd
parent 61bebdd906
4 changed files with 309 additions and 132 deletions
--- a/calibre-plugin/init.py
+++ b/calibre-plugin/init.py
@ -12,10 +12,11 @@
 # v0.0.5: Bugfix: DeDRM plugin was also executed if it's installed but disabled.
 # v0.0.6: First PDF support, allow importing previously exported activation data.
 # v0.0.7: More PDF logging, PDF reading in latin-1, MacOS locale bugfix
 # v0.0.8: More PDF bugfixes, support unlimited PDF file sizes, tell Calibre ACSMs are books.
 from calibre.customize import FileTypePlugin        # type: ignore
-__version__ = '0.0.7'
+__version__ = '0.0.8'
 PLUGIN_NAME = "DeACSM"
 PLUGIN_VERSION_TUPLE = tuple([int(x) for x in __version__.split(".")])
@ -24,7 +25,7 @@ PLUGIN_VERSION = ".".join([str(x)for x in PLUGIN_VERSION_TUPLE])
 from calibre.utils.config import config_dir         # type: ignore
-import os, shutil, traceback, sys
+import os, shutil, traceback, sys, time
 import zipfile
 from lxml import etree
@ -49,6 +50,18 @@ class DeACSM(FileTypePlugin):
        """
        try:
            # Patch Calibre to consider "ACSM" a book. This makes ACSM files show up
            # in the "Add Book" file selection, and it also makes the auto-add feature useable.
            try: 
                from calibre.ebooks import BOOK_EXTENSIONS
                if ("acsm" not in BOOK_EXTENSIONS):
                    BOOK_EXTENSIONS.append("acsm")
            except:
                print("{0} v{1}: Couldn't add ACSM to book extension list:".format(PLUGIN_NAME, PLUGIN_VERSION))
                traceback.print_exc()
            self.pluginsdir = os.path.join(config_dir,"plugins")
            if not os.path.exists(self.pluginsdir):
                os.mkdir(self.pluginsdir)
@ -167,21 +180,21 @@ class DeACSM(FileTypePlugin):
        try: 
-            from calibre_plugins.deacsm.libadobe import sendHTTPRequest
+            from calibre_plugins.deacsm.libadobe import sendHTTPRequest_DL2FILE
            from calibre_plugins.deacsm.libadobeFulfill import buildRights, fulfill
        except: 
            try: 
-                from libadobe import sendHTTPRequest
+                from libadobe import sendHTTPRequest_DL2FILE
                from libadobeFulfill import buildRights, fulfill
            except: 
                print("{0} v{1}: Error while importing Fulfillment stuff".format(PLUGIN_NAME, PLUGIN_VERSION))
                traceback.print_exc()
        try:
-            from calibre_plugins.deacsm.libpdf import patch_drm_into_pdf, prepare_string_from_xml
+            from calibre_plugins.deacsm.libpdf import patch_drm_into_pdf
        except: 
            try: 
-                from libpdf import patch_drm_into_pdf, prepare_string_from_xml
+                from libpdf import patch_drm_into_pdf
            except: 
                print("{0} v{1}: Error while importing PDF patch".format(PLUGIN_NAME, PLUGIN_VERSION))
                traceback.print_exc()
@ -190,9 +203,7 @@ class DeACSM(FileTypePlugin):
        adobe_fulfill_response = etree.fromstring(replyData)
        NSMAP = { "adept" : "http://ns.adobe.com/adept" }
        adNS = lambda tag: '{%s}%s' % ('http://ns.adobe.com/adept', tag)
        adDC = lambda tag: '{%s}%s' % ('http://purl.org/dc/elements/1.1/', tag)
        metadata_node = adobe_fulfill_response.find("./%s/%s/%s" % (adNS("fulfillmentResult"), adNS("resourceItemInfo"), adNS("metadata")))
        download_url = adobe_fulfill_response.find("./%s/%s/%s" % (adNS("fulfillmentResult"), adNS("resourceItemInfo"), adNS("src"))).text
        license_token_node = adobe_fulfill_response.find("./%s/%s/%s" % (adNS("fulfillmentResult"), adNS("resourceItemInfo"), adNS("licenseToken")))
@ -205,9 +216,25 @@ class DeACSM(FileTypePlugin):
        # Download eBook: 
        print("{0} v{1}: Loading book from {2}".format(PLUGIN_NAME, PLUGIN_VERSION, download_url))
-        book_content = sendHTTPRequest(download_url)
+        filename_tmp = self.temporary_file(".blob").name
        dl_start_time = int(time.time() * 1000)
        ret = sendHTTPRequest_DL2FILE(download_url, filename_tmp)
        dl_end_time = int(time.time() * 1000)
        print("Download took %d ms (HTTP %d)" % (dl_end_time - dl_start_time, ret))
        if (ret != 200):
            print("{0} v{1}: Download failed with error {2}".format(PLUGIN_NAME, PLUGIN_VERSION, ret))
            return None            
        filetype = ".bin"
-        
+
        book_content = None
        with open(filename_tmp, "rb") as f:
            book_content = f.read(10)
        if (book_content.startswith(b"PK")):
            print("That's a ZIP file -> EPUB")
            filetype = ".epub"
@ -217,23 +244,9 @@ class DeACSM(FileTypePlugin):
        filename = self.temporary_file(filetype).name
-        author = "None"
+        # Move to file name with matching extension
-        title = "None"
+        shutil.move(filename_tmp, filename)
        try: 
            title = metadata_node.find("./%s" % (adDC("title"))).text
            author = metadata_node.find("./%s" % (adDC("creator"))).text
            title = title.replace("(", "").replace(")", "").replace("/", "")
            author = author.replace("(", "").replace(")", "").replace("/", "")
        except:
            pass
        # Store book:
        f = open(filename, "wb")
        f.write(book_content)
        f.close()
        if filetype == ".epub":
            # Store EPUB rights / encryption stuff
@ -244,10 +257,19 @@ class DeACSM(FileTypePlugin):
            return filename
        elif filetype == ".pdf":
            adobe_fulfill_response = etree.fromstring(rights_xml_str)
            NSMAP = { "adept" : "http://ns.adobe.com/adept" }
            adNS = lambda tag: '{%s}%s' % ('http://ns.adobe.com/adept', tag)
            resource = adobe_fulfill_response.find("./%s/%s" % (adNS("licenseToken"), adNS("resource"))).text
            print("{0} v{1}: Downloaded PDF, adding encryption config ...".format(PLUGIN_NAME, PLUGIN_VERSION))
            pdf_tmp_file = self.temporary_file(filetype).name
-            patch_drm_into_pdf(filename, prepare_string_from_xml(rights_xml_str, title, author), pdf_tmp_file)
+            ret = patch_drm_into_pdf(filename, rights_xml_str, pdf_tmp_file, resource)
-            print("{0} v{1}: File successfully fulfilled ...".format(PLUGIN_NAME, PLUGIN_VERSION))
+            if (ret):
                print("{0} v{1}: File successfully fulfilled ...".format(PLUGIN_NAME, PLUGIN_VERSION))
            else:
                print("{0} v{1}: There was an error patching the PDF file.".format(PLUGIN_NAME, PLUGIN_VERSION))
            return pdf_tmp_file
        else: 
            print("{0} v{1}: Error: Unsupported file type ...".format(PLUGIN_NAME, PLUGIN_VERSION))
--- a/calibre-plugin/fulfill.py
+++ b/calibre-plugin/fulfill.py
@ -7,7 +7,7 @@ This is an experimental Python version of libgourou.
 # pyright: reportUndefinedVariable=false
-import sys, os
+import sys, os, time, shutil
 if sys.version_info[0] < 3:
    print("This script requires Python 3.")
    exit(1)
@ -15,9 +15,9 @@ if sys.version_info[0] < 3:
 import zipfile
 from lxml import etree
-from libadobe import sendHTTPRequest
+from libadobe import sendHTTPRequest_DL2FILE
 from libadobeFulfill import buildRights, fulfill
-from libpdf import patch_drm_into_pdf, prepare_string_from_xml
+from libpdf import patch_drm_into_pdf
 FILE_DEVICEKEY = "devicesalt"
 FILE_DEVICEXML = "device.xml"
@ -48,28 +48,31 @@ def download(replyData):
        exit(1)
    book_name = None
-    author = "None"
+
    title = "None"
    try: 
        book_name = metadata_node.find("./%s" % (adDC("title"))).text
    except: 
        book_name = "Book"
    try: 
        title = metadata_node.find("./%s" % (adDC("title"))).text
        author = metadata_node.find("./%s" % (adDC("creator"))).text
        title = title.replace("(", "").replace(")", "").replace("/", "")
        author = author.replace("(", "").replace(")", "").replace("/", "")
    except:
        pass
    # Download eBook: 
    print(download_url)
-    book_content = sendHTTPRequest(download_url)
+    filename_tmp = book_name + ".tmp"
    dl_start_time = int(time.time() * 1000)
    ret = sendHTTPRequest_DL2FILE(download_url, filename_tmp)
    dl_end_time = int(time.time() * 1000)
    print("Download took %d milliseconds" % (dl_end_time - dl_start_time))
    if (ret != 200):
        print("Download failed with error %d" % (ret))
        exit()
    with open(filename_tmp, "rb") as f:
        book_content = f.read(10)
    filetype = ".bin"
    if (book_content.startswith(b"PK")):
@ -80,11 +83,7 @@ def download(replyData):
        filetype = ".pdf"
    filename = book_name + filetype
-
+    shutil.move(filename_tmp, filename)
    # Store book:
    f = open(filename, "wb")
    f.write(book_content)
    f.close()
    if filetype == ".epub":
        # Store EPUB rights / encryption stuff
@ -97,11 +96,20 @@ def download(replyData):
    elif filetype == ".pdf":
        print("Successfully downloaded PDF, patching encryption ...")
        adobe_fulfill_response = etree.fromstring(rights_xml_str)
        NSMAP = { "adept" : "http://ns.adobe.com/adept" }
        adNS = lambda tag: '{%s}%s' % ('http://ns.adobe.com/adept', tag)
        resource = adobe_fulfill_response.find("./%s/%s" % (adNS("licenseToken"), adNS("resource"))).text
        os.rename(filename, "tmp_" + filename)
-        patch_drm_into_pdf("tmp_" + filename, prepare_string_from_xml(rights_xml_str, author, title), filename)
+        ret = patch_drm_into_pdf("tmp_" + filename, rights_xml_str, filename, resource)
        os.remove("tmp_" + filename)
-        print("File successfully fulfilled to " + filename)
+        if (ret):
            print("File successfully fulfilled to " + filename)
        else: 
            print("Errors occurred while patching " + filename)
            exit(1)
        exit(0)
    else: 
        print("Error: Weird filetype")
--- a/calibre-plugin/libadobe.py
+++ b/calibre-plugin/libadobe.py
@ -121,6 +121,40 @@ def makeFingerprint(serial: str):
 ############################################## HTTP stuff:
 def sendHTTPRequest_DL2FILE(URL: str, outputfile: str):
    headers = {
        "Accept": "*/*",
        "User-Agent": "book2png",
    }
    req = urllib.request.Request(url=URL, headers=headers)
    handler = urllib.request.urlopen(req)
    chunksize = 16 * 1024
    ret_code = handler.getcode()
    loc = None
    try: 
        loc = req.headers.get("Location")
    except:
        pass
    if loc is not None: 
        return sendHTTPRequest_DL2FILE(loc)
    if ret_code != 200:
        return ret_code
    with open(outputfile, "wb") as f:
        while True: 
            chunk = handler.read(chunksize)
            if not chunk: 
                break
            f.write(chunk)
    return 200
 def sendHTTPRequest_getSimple(URL: str):
    headers = {
--- a/calibre-plugin/libpdf.py
+++ b/calibre-plugin/libpdf.py
@ -1,112 +1,221 @@
-import os, zlib, base64
+import os, zlib, base64, time
 from lxml import etree
 class BackwardReader:
-def read_reverse_order(file_name):
+    def __init__(self, file):
-    # Open file for reading in binary mode
+        self.file = file
-    with open(file_name, 'rb') as read_obj:
+
-        # Move the cursor to the end of the file
+    def readlines(self):
-        read_obj.seek(0, os.SEEK_END)
+        BLKSIZE = 4096
-        # Get the current position of pointer i.e eof
+        # Move reader to the end of file
-        pointer_location = read_obj.tell()
+        self.file.seek(0, os.SEEK_END)
        # Create a buffer to keep the last read line
        buffer = bytearray()
-        # Loop till pointer reaches the top of the file
+
-        while pointer_location >= 0:
+        while True:
-            # Move the file pointer to the location pointed by pointer_location
+            pos_newline = buffer.rfind(bytes([0x0a]))
-            read_obj.seek(pointer_location)
+            # Get the current position of the reader
-            # Shift pointer location by -1
+            current_pos = self.file.tell()
-            pointer_location = pointer_location -1
+            if pos_newline != -1:
-            # read that byte / character
+                # Newline is found
-            new_byte = read_obj.read(1)
+                line = buffer[pos_newline+1:]
-            # If the read byte is new line character then it means one line is read
+                buffer = buffer[:pos_newline]
-            if new_byte == b'\n':
+                yield line.decode("latin-1")
-                # Fetch the line from buffer and yield it
+            elif current_pos:
-                yield buffer.decode("latin-1")[::-1]
+                # Need to fill the buffer
-                # Reinitialize the byte array to save next line
+                to_read = min(BLKSIZE, current_pos)
-                buffer = bytearray()
+                self.file.seek(current_pos-to_read, 0)
                buffer = self.file.read(to_read) + buffer
                self.file.seek(current_pos-to_read, 0)
                if current_pos is to_read:
                    buffer = bytes([0x0a]) + buffer
            else:
-                # If last read character is not eol then add it in buffer
+                # Start of file
-                buffer.extend(new_byte)
+                return
-        # As file is read completely, if there is still data in buffer, then its the first line.
+
-        if len(buffer) > 0:
+
-            # Yield the first line too
+
-            yield buffer.decode("latin-1")[::-1]
+def trim_encrypt_string(encrypt):
    string_list = list(encrypt)
    strlen = len(encrypt)
    i = 0
    bracket_count = 0
    while (i < strlen):
        if string_list[i] == "<" and string_list[i+1] == "<":
            bracket_count += 1
        if string_list[i] == ">" and string_list[i+1] == ">":
            bracket_count -= 1
        if bracket_count == 0: 
            break
        i = i + 1
    len_to_use = i+2
    return encrypt[0:len_to_use]
 def cleanup_encrypt_element(element):
    if element.startswith("ID[<"):
        element = element.replace("><", "> <")
    element = ' '.join(element.split())
    element = element.replace("[ ", "[").replace("] ", "]")
    return element
 def deflate_and_base64_encode( string_val ):
    zlibbed_str = zlib.compress( string_val )
    compressed_string = zlibbed_str[2:-4]
    return base64.b64encode( compressed_string )
-def prepare_string_from_xml(xmlstring, title, author):
+def update_ebx_with_keys(ebx_data, adept_license, ebx_bookid):
    b64data = deflate_and_base64_encode(xmlstring.encode("utf-8")).decode("utf-8")
-    adobe_fulfill_response = etree.fromstring(xmlstring)
+    b64data = deflate_and_base64_encode(adept_license.encode("utf-8")).decode("utf-8")
    NSMAP = { "adept" : "http://ns.adobe.com/adept" }
    adNS = lambda tag: '{%s}%s' % ('http://ns.adobe.com/adept', tag)
    resource = adobe_fulfill_response.find("./%s/%s" % (adNS("licenseToken"), adNS("resource"))).text
-    return "<</Length 128/EBX_TITLE(%s)/Filter/EBX_HANDLER/EBX_AUTHOR(%s)/V 4/ADEPT_ID(%s)/EBX_BOOKID(%s)/ADEPT_LICENSE(%s)>>" % (title, author, resource, resource, b64data)
+    ebx_new = ebx_data[:-2]
    ebx_new += "/EBX_BOOKID(%s)/ADEPT_LICENSE(%s)>>" % (ebx_bookid, b64data)
-def patch_drm_into_pdf(filename_in, drm_string, filename_out):
+    return ebx_new
-    ORIG_FILE = filename_in
+
 def find_ebx(filename_in):
    find_ebx_start = int(time.time() * 1000)
    i = 0
    fl = open(filename_in, "rb")
    br = BackwardReader(fl)
    for line in br.readlines():
        i = i + 1
        if "/EBX_HANDLER/" in line:
            find_ebx_end = int(time.time() * 1000)
            print("Found EBX after %d attempts - took %d ms" % (i, find_ebx_end - find_ebx_start))
            return line
    find_ebx_end = int(time.time() * 1000)
    print("Error: Did not find EBX_HANDLER - took %d ms" % (find_ebx_end - find_ebx_start))
    return None
 def find_enc(filename_in):
    find_enc_start = int(time.time() * 1000)
    i = 0
    fl = open(filename_in, "rb")
    br = BackwardReader(fl)
    for line in br.readlines():
        i = i + 1
        if "R/Encrypt" in line and "R/ID" in line:
            find_enc_end = int(time.time() * 1000)
            print("Found ENC after %d attempts - took %d ms" % (i, find_enc_end - find_enc_start))
            return line
    find_enc_end = int(time.time() * 1000)
    print("Error: Did not find ENC - took %d ms" % (find_enc_end - find_enc_start))
    return None
 def patch_drm_into_pdf(filename_in, adept_license_string, filename_out, ebx_bookid):
    drm_start_time = int(time.time() * 1000)
    trailer = ""
    trailer_idx = 0
-    print("DRM data is %s" % (drm_string))
+    startxref_offset = 0
    prevline = ""
-    for line in read_reverse_order(ORIG_FILE):
+
    fl = open(filename_in, "rb")
    br = BackwardReader(fl)
    print("Searching for startxref ...")
    for line in br.readlines():
        trailer_idx += 1
        trailer = line + "\n" + trailer
-        print("DEBUG: pdfdata[%d] = %s" % (trailer_idx, line))
+
-        if (trailer_idx == 20):
+        print ("LINE: " + line)
-            print("trailer_idx is very large (%d). Usually it's 10 or less. File might be corrupted." % trailer_idx)
+
-        if (line == "trailer"): 
+        if (trailer_idx > 10):
-            print("Found trailer at idx %d" % (trailer_idx))
+            print("Took more than 10 attempts to find startxref ...")
            return False
        if (line == "startxref"):
            startxref_offset = int(prevline)
            print("Got startxref: %d" % (startxref_offset))            
            break
        prevline = line
    r_encrypt_offs1 = 0
    r_encrypt_offs2 = 0
    root_str = None
    next_startxref = False
    startxref = None
-    for line in trailer.split('\n'):
+    encrypt = None
-        #print(line)
+
-        if ("R/Encrypt" in line):
+
-            root_str = line
+    encrypt = find_enc(filename_in)
-            line_split = line.split(' ')
+    if encrypt is None:
        print("Error, enc not found")
        return False
    line_split = encrypt.split(' ')
    next = 0
    for element in line_split:
        if element == "R/Encrypt":
            next = 2
            continue
        if next == 2:
            r_encrypt_offs1 = element
            next = 1
            continue
        if next == 1: 
            r_encrypt_offs2 = element
            next = 0
            for element in line_split:
                if element == "R/Encrypt":
                    next = 2
                    continue
                if next == 2:
                    r_encrypt_offs1 = element
                    next = 1
                    continue
                if next == 1: 
                    r_encrypt_offs2 = element
                    next = 0
                    continue
        if "startxref" in line: 
            next_startxref = True
            continue
        if next_startxref:
            startxref = line
            next_startxref = False
            continue
    # read EBX element:
    ebx_elem = find_ebx(filename_in)
    if (ebx_elem is None):
        print("Err: EBX is None")
        return False
    print("")
    print("")
    print("Encryption handler:")
    print(encrypt)
    print("EBX handler:")
    print(ebx_elem)
    encrypt = trim_encrypt_string(encrypt)
    print("Trimmed encryption handler:")
    print(encrypt)
    ebx_elem = update_ebx_with_keys(ebx_elem, adept_license_string, ebx_bookid)
    print("Updated EBX handler not logged due to sensitive data")
    #print(ebx_elem)
-    filesize_str = str(os.path.getsize(ORIG_FILE))
+    filesize_str = str(os.path.getsize(filename_in))
    filesize_pad = filesize_str.zfill(10)
    additional_data = "\r"
    additional_data += r_encrypt_offs1 + " " + r_encrypt_offs2 + " " + "obj" + "\r"
-    additional_data += drm_string
+    additional_data += ebx_elem
    additional_data += "\r"
    additional_data += "endobj"
@ -117,36 +226,40 @@ def patch_drm_into_pdf(filename_in, drm_string, filename_out):
    additional_data += "trailer"
    additional_data += "\r"
-    arr_root_str = root_str.split('/')
+    arr_root_str = encrypt.split('/')
    did_prev = False
    for elem in arr_root_str: 
        if elem.startswith("Prev"):
            did_prev = True
-            additional_data += "Prev " + startxref
+            additional_data += "Prev " + str(startxref_offset)
            #print("Replacing prev from '%s' to '%s'" % (elem, "Prev " + startxref))
        elif elem.startswith("ID[<"):
            additional_data += elem.replace("><", "> <")
        else:
-            additional_data += elem
+            additional_data += cleanup_encrypt_element(elem)
        additional_data += "/"
    if not did_prev:
        # remove two >> at end
        additional_data = additional_data[:-3]
-        additional_data += "/Prev " + startxref + ">>" + "/"
+        additional_data += "/Prev " + str(startxref_offset) + ">>" + "/"
        #print("Faking Prev %s" % startxref)
    additional_data = additional_data[:-1]
    additional_data += "\r" + "startxref\r" + str(ptr) + "\r" + "%%EOF"
-    print("Appending DRM data: %s" % (additional_data))
+    #print("Appending DRM data: %s" % (additional_data))
-    inp = open(ORIG_FILE, "rb")
+    inp = open(filename_in, "rb")
    out = open(filename_out, "wb")
    out.write(inp.read())
    out.write(additional_data.encode("latin-1"))
    inp.close()
    out.close()
    drm_end_time = int(time.time() * 1000)
    print("Whole DRM patching took %d milliseconds." % (drm_end_time - drm_start_time))
    return True