acsm-calibre-plugin/calibre-plugin/libpdf.py

import os, zlib, base64
from lxml import etree


def read_reverse_order(file_name):
    # Open file for reading in binary mode
    with open(file_name, 'rb') as read_obj:
        # Move the cursor to the end of the file
        read_obj.seek(0, os.SEEK_END)
        # Get the current position of pointer i.e eof
        pointer_location = read_obj.tell()
        # Create a buffer to keep the last read line
        buffer = bytearray()
        # Loop till pointer reaches the top of the file
        while pointer_location >= 0:
            # Move the file pointer to the location pointed by pointer_location
            read_obj.seek(pointer_location)
            # Shift pointer location by -1
            pointer_location = pointer_location -1
            # read that byte / character
            new_byte = read_obj.read(1)
            # If the read byte is new line character then it means one line is read
            if new_byte == b'\n':
                # Fetch the line from buffer and yield it
                yield buffer.decode("latin-1")[::-1]
                # Reinitialize the byte array to save next line
                buffer = bytearray()
            else:
                # If last read character is not eol then add it in buffer
                buffer.extend(new_byte)
        # As file is read completely, if there is still data in buffer, then its the first line.
        if len(buffer) > 0:
            # Yield the first line too
            yield buffer.decode("latin-1")[::-1]

def deflate_and_base64_encode( string_val ):
    zlibbed_str = zlib.compress( string_val )
    compressed_string = zlibbed_str[2:-4]
    return base64.b64encode( compressed_string )

def prepare_string_from_xml(xmlstring, title, author):
    b64data = deflate_and_base64_encode(xmlstring.encode("utf-8")).decode("utf-8")

    adobe_fulfill_response = etree.fromstring(xmlstring)
    NSMAP = { "adept" : "http://ns.adobe.com/adept" }
    adNS = lambda tag: '{%s}%s' % ('http://ns.adobe.com/adept', tag)
    resource = adobe_fulfill_response.find("./%s/%s" % (adNS("licenseToken"), adNS("resource"))).text

    return "<</Length 128/EBX_TITLE(%s)/Filter/EBX_HANDLER/EBX_AUTHOR(%s)/V 4/ADEPT_ID(%s)/EBX_BOOKID(%s)/ADEPT_LICENSE(%s)>>" % (title, author, resource, resource, b64data)

def patch_drm_into_pdf(filename_in, drm_string, filename_out):

    ORIG_FILE = filename_in

    trailer = ""
    trailer_idx = 0

    print("DRM data is %s" % (drm_string))

    for line in read_reverse_order(ORIG_FILE):
        trailer_idx += 1
        trailer = line + "\n" + trailer
        print("DEBUG: pdfdata[%d] = %s" % (trailer_idx, line))
        if (trailer_idx == 20):
            print("trailer_idx is very large (%d). Usually it's 10 or less. File might be corrupted." % trailer_idx)
        if (line == "trailer"): 
            print("Found trailer at idx %d" % (trailer_idx))
            break

    r_encrypt_offs1 = 0
    r_encrypt_offs2 = 0
    root_str = None
    next_startxref = False
    startxref = None

    for line in trailer.split('\n'):
        #print(line)
        if ("R/Encrypt" in line):
            root_str = line
            line_split = line.split(' ')
            next = 0
            for element in line_split:
                if element == "R/Encrypt":
                    next = 2
                    continue
                if next == 2:
                    r_encrypt_offs1 = element
                    next = 1
                    continue
                if next == 1: 
                    r_encrypt_offs2 = element
                    next = 0
                    continue
        if "startxref" in line: 
            next_startxref = True
            continue
        if next_startxref:
            startxref = line
            next_startxref = False
            continue
        

    filesize_str = str(os.path.getsize(ORIG_FILE))
    filesize_pad = filesize_str.zfill(10)


    additional_data = "\r"
    additional_data += r_encrypt_offs1 + " " + r_encrypt_offs2 + " " + "obj" + "\r"
    additional_data += drm_string
    additional_data += "\r"
    additional_data += "endobj"

    ptr = int(filesize_str) + len(additional_data)

    additional_data += "\rxref\r" + r_encrypt_offs1 + " " + str((int(r_encrypt_offs2) + 1)) + "\r"
    additional_data += filesize_pad + " 00000 n" + "\r\n"
    additional_data += "trailer"
    additional_data += "\r"

    arr_root_str = root_str.split('/')
    did_prev = False
    for elem in arr_root_str: 
        if elem.startswith("Prev"):
            did_prev = True
            additional_data += "Prev " + startxref
            #print("Replacing prev from '%s' to '%s'" % (elem, "Prev " + startxref))
        elif elem.startswith("ID[<"):
            additional_data += elem.replace("><", "> <")
        else:
            additional_data += elem
        additional_data += "/"

    if not did_prev:
        # remove two >> at end
        additional_data = additional_data[:-3]
        additional_data += "/Prev " + startxref + ">>" + "/"
        #print("Faking Prev %s" % startxref)

    additional_data = additional_data[:-1]

    additional_data += "\r" + "startxref\r" + str(ptr) + "\r" + "%%EOF"

    print("Appending DRM data: %s" % (additional_data))


    inp = open(ORIG_FILE, "rb")

    out = open(filename_out, "wb")
    out.write(inp.read())
    out.write(additional_data.encode("latin-1"))
    inp.close()
    out.close()
v0.0.6: PDF support & importing activation backup support 2021-09-28 22:43:14 +06:00			`import os, zlib, base64`
			`from lxml import etree`


			`def read_reverse_order(file_name):`
			`# Open file for reading in binary mode`
			`with open(file_name, 'rb') as read_obj:`
			`# Move the cursor to the end of the file`
			`read_obj.seek(0, os.SEEK_END)`
			`# Get the current position of pointer i.e eof`
			`pointer_location = read_obj.tell()`
			`# Create a buffer to keep the last read line`
			`buffer = bytearray()`
			`# Loop till pointer reaches the top of the file`
			`while pointer_location >= 0:`
			`# Move the file pointer to the location pointed by pointer_location`
			`read_obj.seek(pointer_location)`
			`# Shift pointer location by -1`
			`pointer_location = pointer_location -1`
			`# read that byte / character`
			`new_byte = read_obj.read(1)`
			`# If the read byte is new line character then it means one line is read`
			`if new_byte == b'\n':`
			`# Fetch the line from buffer and yield it`
v0.0.7: PDF bugfixes, more PDF logging, MacOS locale bugfix 2021-10-01 18:32:46 +06:00			`yield buffer.decode("latin-1")[::-1]`
v0.0.6: PDF support & importing activation backup support 2021-09-28 22:43:14 +06:00			`# Reinitialize the byte array to save next line`
			`buffer = bytearray()`
			`else:`
			`# If last read character is not eol then add it in buffer`
			`buffer.extend(new_byte)`
			`# As file is read completely, if there is still data in buffer, then its the first line.`
			`if len(buffer) > 0:`
			`# Yield the first line too`
v0.0.7: PDF bugfixes, more PDF logging, MacOS locale bugfix 2021-10-01 18:32:46 +06:00			`yield buffer.decode("latin-1")[::-1]`
v0.0.6: PDF support & importing activation backup support 2021-09-28 22:43:14 +06:00
			`def deflate_and_base64_encode( string_val ):`
			`zlibbed_str = zlib.compress( string_val )`
			`compressed_string = zlibbed_str[2:-4]`
			`return base64.b64encode( compressed_string )`

			`def prepare_string_from_xml(xmlstring, title, author):`
			`b64data = deflate_and_base64_encode(xmlstring.encode("utf-8")).decode("utf-8")`

			`adobe_fulfill_response = etree.fromstring(xmlstring)`
			`NSMAP = { "adept" : "http://ns.adobe.com/adept" }`
			`adNS = lambda tag: '{%s}%s' % ('http://ns.adobe.com/adept', tag)`
			`resource = adobe_fulfill_response.find("./%s/%s" % (adNS("licenseToken"), adNS("resource"))).text`

			`return "<</Length 128/EBX_TITLE(%s)/Filter/EBX_HANDLER/EBX_AUTHOR(%s)/V 4/ADEPT_ID(%s)/EBX_BOOKID(%s)/ADEPT_LICENSE(%s)>>" % (title, author, resource, resource, b64data)`

			`def patch_drm_into_pdf(filename_in, drm_string, filename_out):`

			`ORIG_FILE = filename_in`

			`trailer = ""`
			`trailer_idx = 0`

v0.0.7: PDF bugfixes, more PDF logging, MacOS locale bugfix 2021-10-01 18:32:46 +06:00			`print("DRM data is %s" % (drm_string))`

v0.0.6: PDF support & importing activation backup support 2021-09-28 22:43:14 +06:00			`for line in read_reverse_order(ORIG_FILE):`
			`trailer_idx += 1`
			`trailer = line + "\n" + trailer`
v0.0.7: PDF bugfixes, more PDF logging, MacOS locale bugfix 2021-10-01 18:32:46 +06:00			`print("DEBUG: pdfdata[%d] = %s" % (trailer_idx, line))`
			`if (trailer_idx == 20):`
			`print("trailer_idx is very large (%d). Usually it's 10 or less. File might be corrupted." % trailer_idx)`
v0.0.6: PDF support & importing activation backup support 2021-09-28 22:43:14 +06:00			`if (line == "trailer"):`
v0.0.7: PDF bugfixes, more PDF logging, MacOS locale bugfix 2021-10-01 18:32:46 +06:00			`print("Found trailer at idx %d" % (trailer_idx))`
v0.0.6: PDF support & importing activation backup support 2021-09-28 22:43:14 +06:00			`break`

			`r_encrypt_offs1 = 0`
			`r_encrypt_offs2 = 0`
			`root_str = None`
			`next_startxref = False`
			`startxref = None`

			`for line in trailer.split('\n'):`
			`#print(line)`
			`if ("R/Encrypt" in line):`
			`root_str = line`
			`line_split = line.split(' ')`
			`next = 0`
			`for element in line_split:`
			`if element == "R/Encrypt":`
			`next = 2`
			`continue`
			`if next == 2:`
			`r_encrypt_offs1 = element`
			`next = 1`
			`continue`
			`if next == 1:`
			`r_encrypt_offs2 = element`
			`next = 0`
			`continue`
			`if "startxref" in line:`
			`next_startxref = True`
			`continue`
			`if next_startxref:`
			`startxref = line`
			`next_startxref = False`
			`continue`


			`filesize_str = str(os.path.getsize(ORIG_FILE))`
			`filesize_pad = filesize_str.zfill(10)`


			`additional_data = "\r"`
			`additional_data += r_encrypt_offs1 + " " + r_encrypt_offs2 + " " + "obj" + "\r"`
			`additional_data += drm_string`
			`additional_data += "\r"`
			`additional_data += "endobj"`

			`ptr = int(filesize_str) + len(additional_data)`

			`additional_data += "\rxref\r" + r_encrypt_offs1 + " " + str((int(r_encrypt_offs2) + 1)) + "\r"`
			`additional_data += filesize_pad + " 00000 n" + "\r\n"`
			`additional_data += "trailer"`
			`additional_data += "\r"`

			`arr_root_str = root_str.split('/')`
			`did_prev = False`
			`for elem in arr_root_str:`
			`if elem.startswith("Prev"):`
			`did_prev = True`
			`additional_data += "Prev " + startxref`
			`#print("Replacing prev from '%s' to '%s'" % (elem, "Prev " + startxref))`
			`elif elem.startswith("ID[<"):`
			`additional_data += elem.replace("><", "> <")`
			`else:`
			`additional_data += elem`
			`additional_data += "/"`

			`if not did_prev:`
			`# remove two >> at end`
			`additional_data = additional_data[:-3]`
			`additional_data += "/Prev " + startxref + ">>" + "/"`
			`#print("Faking Prev %s" % startxref)`

			`additional_data = additional_data[:-1]`

			`additional_data += "\r" + "startxref\r" + str(ptr) + "\r" + "%%EOF"`

v0.0.7: PDF bugfixes, more PDF logging, MacOS locale bugfix 2021-10-01 18:32:46 +06:00			`print("Appending DRM data: %s" % (additional_data))`

v0.0.6: PDF support & importing activation backup support 2021-09-28 22:43:14 +06:00
			`inp = open(ORIG_FILE, "rb")`

			`out = open(filename_out, "wb")`
			`out.write(inp.read())`
			`out.write(additional_data.encode("latin-1"))`
			`inp.close()`
			`out.close()`