acsm-calibre-plugin/calibre-plugin/libpdf.py

153 lines
5.3 KiB
Python

import os, zlib, base64
from lxml import etree
def read_reverse_order(file_name):
# Open file for reading in binary mode
with open(file_name, 'rb') as read_obj:
# Move the cursor to the end of the file
read_obj.seek(0, os.SEEK_END)
# Get the current position of pointer i.e eof
pointer_location = read_obj.tell()
# Create a buffer to keep the last read line
buffer = bytearray()
# Loop till pointer reaches the top of the file
while pointer_location >= 0:
# Move the file pointer to the location pointed by pointer_location
read_obj.seek(pointer_location)
# Shift pointer location by -1
pointer_location = pointer_location -1
# read that byte / character
new_byte = read_obj.read(1)
# If the read byte is new line character then it means one line is read
if new_byte == b'\n':
# Fetch the line from buffer and yield it
yield buffer.decode("latin-1")[::-1]
# Reinitialize the byte array to save next line
buffer = bytearray()
else:
# If last read character is not eol then add it in buffer
buffer.extend(new_byte)
# As file is read completely, if there is still data in buffer, then its the first line.
if len(buffer) > 0:
# Yield the first line too
yield buffer.decode("latin-1")[::-1]
def deflate_and_base64_encode( string_val ):
zlibbed_str = zlib.compress( string_val )
compressed_string = zlibbed_str[2:-4]
return base64.b64encode( compressed_string )
def prepare_string_from_xml(xmlstring, title, author):
b64data = deflate_and_base64_encode(xmlstring.encode("utf-8")).decode("utf-8")
adobe_fulfill_response = etree.fromstring(xmlstring)
NSMAP = { "adept" : "http://ns.adobe.com/adept" }
adNS = lambda tag: '{%s}%s' % ('http://ns.adobe.com/adept', tag)
resource = adobe_fulfill_response.find("./%s/%s" % (adNS("licenseToken"), adNS("resource"))).text
return "<</Length 128/EBX_TITLE(%s)/Filter/EBX_HANDLER/EBX_AUTHOR(%s)/V 4/ADEPT_ID(%s)/EBX_BOOKID(%s)/ADEPT_LICENSE(%s)>>" % (title, author, resource, resource, b64data)
def patch_drm_into_pdf(filename_in, drm_string, filename_out):
ORIG_FILE = filename_in
trailer = ""
trailer_idx = 0
print("DRM data is %s" % (drm_string))
for line in read_reverse_order(ORIG_FILE):
trailer_idx += 1
trailer = line + "\n" + trailer
print("DEBUG: pdfdata[%d] = %s" % (trailer_idx, line))
if (trailer_idx == 20):
print("trailer_idx is very large (%d). Usually it's 10 or less. File might be corrupted." % trailer_idx)
if (line == "trailer"):
print("Found trailer at idx %d" % (trailer_idx))
break
r_encrypt_offs1 = 0
r_encrypt_offs2 = 0
root_str = None
next_startxref = False
startxref = None
for line in trailer.split('\n'):
#print(line)
if ("R/Encrypt" in line):
root_str = line
line_split = line.split(' ')
next = 0
for element in line_split:
if element == "R/Encrypt":
next = 2
continue
if next == 2:
r_encrypt_offs1 = element
next = 1
continue
if next == 1:
r_encrypt_offs2 = element
next = 0
continue
if "startxref" in line:
next_startxref = True
continue
if next_startxref:
startxref = line
next_startxref = False
continue
filesize_str = str(os.path.getsize(ORIG_FILE))
filesize_pad = filesize_str.zfill(10)
additional_data = "\r"
additional_data += r_encrypt_offs1 + " " + r_encrypt_offs2 + " " + "obj" + "\r"
additional_data += drm_string
additional_data += "\r"
additional_data += "endobj"
ptr = int(filesize_str) + len(additional_data)
additional_data += "\rxref\r" + r_encrypt_offs1 + " " + str((int(r_encrypt_offs2) + 1)) + "\r"
additional_data += filesize_pad + " 00000 n" + "\r\n"
additional_data += "trailer"
additional_data += "\r"
arr_root_str = root_str.split('/')
did_prev = False
for elem in arr_root_str:
if elem.startswith("Prev"):
did_prev = True
additional_data += "Prev " + startxref
#print("Replacing prev from '%s' to '%s'" % (elem, "Prev " + startxref))
elif elem.startswith("ID[<"):
additional_data += elem.replace("><", "> <")
else:
additional_data += elem
additional_data += "/"
if not did_prev:
# remove two >> at end
additional_data = additional_data[:-3]
additional_data += "/Prev " + startxref + ">>" + "/"
#print("Faking Prev %s" % startxref)
additional_data = additional_data[:-1]
additional_data += "\r" + "startxref\r" + str(ptr) + "\r" + "%%EOF"
print("Appending DRM data: %s" % (additional_data))
inp = open(ORIG_FILE, "rb")
out = open(filename_out, "wb")
out.write(inp.read())
out.write(additional_data.encode("latin-1"))
inp.close()
out.close()