v0.0.8: PDF fixes & ACSM file type config

This commit is contained in:
Florian Bach 2021-10-03 10:30:45 +02:00
parent 61bebdd906
commit 36b28765dd
4 changed files with 309 additions and 132 deletions

View File

@ -12,10 +12,11 @@
# v0.0.5: Bugfix: DeDRM plugin was also executed if it's installed but disabled. # v0.0.5: Bugfix: DeDRM plugin was also executed if it's installed but disabled.
# v0.0.6: First PDF support, allow importing previously exported activation data. # v0.0.6: First PDF support, allow importing previously exported activation data.
# v0.0.7: More PDF logging, PDF reading in latin-1, MacOS locale bugfix # v0.0.7: More PDF logging, PDF reading in latin-1, MacOS locale bugfix
# v0.0.8: More PDF bugfixes, support unlimited PDF file sizes, tell Calibre ACSMs are books.
from calibre.customize import FileTypePlugin # type: ignore from calibre.customize import FileTypePlugin # type: ignore
__version__ = '0.0.7' __version__ = '0.0.8'
PLUGIN_NAME = "DeACSM" PLUGIN_NAME = "DeACSM"
PLUGIN_VERSION_TUPLE = tuple([int(x) for x in __version__.split(".")]) PLUGIN_VERSION_TUPLE = tuple([int(x) for x in __version__.split(".")])
@ -24,7 +25,7 @@ PLUGIN_VERSION = ".".join([str(x)for x in PLUGIN_VERSION_TUPLE])
from calibre.utils.config import config_dir # type: ignore from calibre.utils.config import config_dir # type: ignore
import os, shutil, traceback, sys import os, shutil, traceback, sys, time
import zipfile import zipfile
from lxml import etree from lxml import etree
@ -49,6 +50,18 @@ class DeACSM(FileTypePlugin):
""" """
try: try:
# Patch Calibre to consider "ACSM" a book. This makes ACSM files show up
# in the "Add Book" file selection, and it also makes the auto-add feature useable.
try:
from calibre.ebooks import BOOK_EXTENSIONS
if ("acsm" not in BOOK_EXTENSIONS):
BOOK_EXTENSIONS.append("acsm")
except:
print("{0} v{1}: Couldn't add ACSM to book extension list:".format(PLUGIN_NAME, PLUGIN_VERSION))
traceback.print_exc()
self.pluginsdir = os.path.join(config_dir,"plugins") self.pluginsdir = os.path.join(config_dir,"plugins")
if not os.path.exists(self.pluginsdir): if not os.path.exists(self.pluginsdir):
os.mkdir(self.pluginsdir) os.mkdir(self.pluginsdir)
@ -167,21 +180,21 @@ class DeACSM(FileTypePlugin):
try: try:
from calibre_plugins.deacsm.libadobe import sendHTTPRequest from calibre_plugins.deacsm.libadobe import sendHTTPRequest_DL2FILE
from calibre_plugins.deacsm.libadobeFulfill import buildRights, fulfill from calibre_plugins.deacsm.libadobeFulfill import buildRights, fulfill
except: except:
try: try:
from libadobe import sendHTTPRequest from libadobe import sendHTTPRequest_DL2FILE
from libadobeFulfill import buildRights, fulfill from libadobeFulfill import buildRights, fulfill
except: except:
print("{0} v{1}: Error while importing Fulfillment stuff".format(PLUGIN_NAME, PLUGIN_VERSION)) print("{0} v{1}: Error while importing Fulfillment stuff".format(PLUGIN_NAME, PLUGIN_VERSION))
traceback.print_exc() traceback.print_exc()
try: try:
from calibre_plugins.deacsm.libpdf import patch_drm_into_pdf, prepare_string_from_xml from calibre_plugins.deacsm.libpdf import patch_drm_into_pdf
except: except:
try: try:
from libpdf import patch_drm_into_pdf, prepare_string_from_xml from libpdf import patch_drm_into_pdf
except: except:
print("{0} v{1}: Error while importing PDF patch".format(PLUGIN_NAME, PLUGIN_VERSION)) print("{0} v{1}: Error while importing PDF patch".format(PLUGIN_NAME, PLUGIN_VERSION))
traceback.print_exc() traceback.print_exc()
@ -190,9 +203,7 @@ class DeACSM(FileTypePlugin):
adobe_fulfill_response = etree.fromstring(replyData) adobe_fulfill_response = etree.fromstring(replyData)
NSMAP = { "adept" : "http://ns.adobe.com/adept" } NSMAP = { "adept" : "http://ns.adobe.com/adept" }
adNS = lambda tag: '{%s}%s' % ('http://ns.adobe.com/adept', tag) adNS = lambda tag: '{%s}%s' % ('http://ns.adobe.com/adept', tag)
adDC = lambda tag: '{%s}%s' % ('http://purl.org/dc/elements/1.1/', tag)
metadata_node = adobe_fulfill_response.find("./%s/%s/%s" % (adNS("fulfillmentResult"), adNS("resourceItemInfo"), adNS("metadata")))
download_url = adobe_fulfill_response.find("./%s/%s/%s" % (adNS("fulfillmentResult"), adNS("resourceItemInfo"), adNS("src"))).text download_url = adobe_fulfill_response.find("./%s/%s/%s" % (adNS("fulfillmentResult"), adNS("resourceItemInfo"), adNS("src"))).text
license_token_node = adobe_fulfill_response.find("./%s/%s/%s" % (adNS("fulfillmentResult"), adNS("resourceItemInfo"), adNS("licenseToken"))) license_token_node = adobe_fulfill_response.find("./%s/%s/%s" % (adNS("fulfillmentResult"), adNS("resourceItemInfo"), adNS("licenseToken")))
@ -205,9 +216,25 @@ class DeACSM(FileTypePlugin):
# Download eBook: # Download eBook:
print("{0} v{1}: Loading book from {2}".format(PLUGIN_NAME, PLUGIN_VERSION, download_url)) print("{0} v{1}: Loading book from {2}".format(PLUGIN_NAME, PLUGIN_VERSION, download_url))
book_content = sendHTTPRequest(download_url) filename_tmp = self.temporary_file(".blob").name
dl_start_time = int(time.time() * 1000)
ret = sendHTTPRequest_DL2FILE(download_url, filename_tmp)
dl_end_time = int(time.time() * 1000)
print("Download took %d ms (HTTP %d)" % (dl_end_time - dl_start_time, ret))
if (ret != 200):
print("{0} v{1}: Download failed with error {2}".format(PLUGIN_NAME, PLUGIN_VERSION, ret))
return None
filetype = ".bin" filetype = ".bin"
book_content = None
with open(filename_tmp, "rb") as f:
book_content = f.read(10)
if (book_content.startswith(b"PK")): if (book_content.startswith(b"PK")):
print("That's a ZIP file -> EPUB") print("That's a ZIP file -> EPUB")
filetype = ".epub" filetype = ".epub"
@ -217,23 +244,9 @@ class DeACSM(FileTypePlugin):
filename = self.temporary_file(filetype).name filename = self.temporary_file(filetype).name
author = "None" # Move to file name with matching extension
title = "None" shutil.move(filename_tmp, filename)
try:
title = metadata_node.find("./%s" % (adDC("title"))).text
author = metadata_node.find("./%s" % (adDC("creator"))).text
title = title.replace("(", "").replace(")", "").replace("/", "")
author = author.replace("(", "").replace(")", "").replace("/", "")
except:
pass
# Store book:
f = open(filename, "wb")
f.write(book_content)
f.close()
if filetype == ".epub": if filetype == ".epub":
# Store EPUB rights / encryption stuff # Store EPUB rights / encryption stuff
@ -244,10 +257,19 @@ class DeACSM(FileTypePlugin):
return filename return filename
elif filetype == ".pdf": elif filetype == ".pdf":
adobe_fulfill_response = etree.fromstring(rights_xml_str)
NSMAP = { "adept" : "http://ns.adobe.com/adept" }
adNS = lambda tag: '{%s}%s' % ('http://ns.adobe.com/adept', tag)
resource = adobe_fulfill_response.find("./%s/%s" % (adNS("licenseToken"), adNS("resource"))).text
print("{0} v{1}: Downloaded PDF, adding encryption config ...".format(PLUGIN_NAME, PLUGIN_VERSION)) print("{0} v{1}: Downloaded PDF, adding encryption config ...".format(PLUGIN_NAME, PLUGIN_VERSION))
pdf_tmp_file = self.temporary_file(filetype).name pdf_tmp_file = self.temporary_file(filetype).name
patch_drm_into_pdf(filename, prepare_string_from_xml(rights_xml_str, title, author), pdf_tmp_file) ret = patch_drm_into_pdf(filename, rights_xml_str, pdf_tmp_file, resource)
print("{0} v{1}: File successfully fulfilled ...".format(PLUGIN_NAME, PLUGIN_VERSION)) if (ret):
print("{0} v{1}: File successfully fulfilled ...".format(PLUGIN_NAME, PLUGIN_VERSION))
else:
print("{0} v{1}: There was an error patching the PDF file.".format(PLUGIN_NAME, PLUGIN_VERSION))
return pdf_tmp_file return pdf_tmp_file
else: else:
print("{0} v{1}: Error: Unsupported file type ...".format(PLUGIN_NAME, PLUGIN_VERSION)) print("{0} v{1}: Error: Unsupported file type ...".format(PLUGIN_NAME, PLUGIN_VERSION))

View File

@ -7,7 +7,7 @@ This is an experimental Python version of libgourou.
# pyright: reportUndefinedVariable=false # pyright: reportUndefinedVariable=false
import sys, os import sys, os, time, shutil
if sys.version_info[0] < 3: if sys.version_info[0] < 3:
print("This script requires Python 3.") print("This script requires Python 3.")
exit(1) exit(1)
@ -15,9 +15,9 @@ if sys.version_info[0] < 3:
import zipfile import zipfile
from lxml import etree from lxml import etree
from libadobe import sendHTTPRequest from libadobe import sendHTTPRequest_DL2FILE
from libadobeFulfill import buildRights, fulfill from libadobeFulfill import buildRights, fulfill
from libpdf import patch_drm_into_pdf, prepare_string_from_xml from libpdf import patch_drm_into_pdf
FILE_DEVICEKEY = "devicesalt" FILE_DEVICEKEY = "devicesalt"
FILE_DEVICEXML = "device.xml" FILE_DEVICEXML = "device.xml"
@ -48,28 +48,31 @@ def download(replyData):
exit(1) exit(1)
book_name = None book_name = None
author = "None"
title = "None"
try: try:
book_name = metadata_node.find("./%s" % (adDC("title"))).text book_name = metadata_node.find("./%s" % (adDC("title"))).text
except: except:
book_name = "Book" book_name = "Book"
try:
title = metadata_node.find("./%s" % (adDC("title"))).text
author = metadata_node.find("./%s" % (adDC("creator"))).text
title = title.replace("(", "").replace(")", "").replace("/", "")
author = author.replace("(", "").replace(")", "").replace("/", "")
except:
pass
# Download eBook: # Download eBook:
print(download_url) print(download_url)
book_content = sendHTTPRequest(download_url) filename_tmp = book_name + ".tmp"
dl_start_time = int(time.time() * 1000)
ret = sendHTTPRequest_DL2FILE(download_url, filename_tmp)
dl_end_time = int(time.time() * 1000)
print("Download took %d milliseconds" % (dl_end_time - dl_start_time))
if (ret != 200):
print("Download failed with error %d" % (ret))
exit()
with open(filename_tmp, "rb") as f:
book_content = f.read(10)
filetype = ".bin" filetype = ".bin"
if (book_content.startswith(b"PK")): if (book_content.startswith(b"PK")):
@ -80,11 +83,7 @@ def download(replyData):
filetype = ".pdf" filetype = ".pdf"
filename = book_name + filetype filename = book_name + filetype
shutil.move(filename_tmp, filename)
# Store book:
f = open(filename, "wb")
f.write(book_content)
f.close()
if filetype == ".epub": if filetype == ".epub":
# Store EPUB rights / encryption stuff # Store EPUB rights / encryption stuff
@ -97,11 +96,20 @@ def download(replyData):
elif filetype == ".pdf": elif filetype == ".pdf":
print("Successfully downloaded PDF, patching encryption ...") print("Successfully downloaded PDF, patching encryption ...")
adobe_fulfill_response = etree.fromstring(rights_xml_str)
NSMAP = { "adept" : "http://ns.adobe.com/adept" }
adNS = lambda tag: '{%s}%s' % ('http://ns.adobe.com/adept', tag)
resource = adobe_fulfill_response.find("./%s/%s" % (adNS("licenseToken"), adNS("resource"))).text
os.rename(filename, "tmp_" + filename) os.rename(filename, "tmp_" + filename)
patch_drm_into_pdf("tmp_" + filename, prepare_string_from_xml(rights_xml_str, author, title), filename) ret = patch_drm_into_pdf("tmp_" + filename, rights_xml_str, filename, resource)
os.remove("tmp_" + filename) os.remove("tmp_" + filename)
print("File successfully fulfilled to " + filename) if (ret):
print("File successfully fulfilled to " + filename)
else:
print("Errors occurred while patching " + filename)
exit(1)
exit(0) exit(0)
else: else:
print("Error: Weird filetype") print("Error: Weird filetype")

View File

@ -121,6 +121,40 @@ def makeFingerprint(serial: str):
############################################## HTTP stuff: ############################################## HTTP stuff:
def sendHTTPRequest_DL2FILE(URL: str, outputfile: str):
headers = {
"Accept": "*/*",
"User-Agent": "book2png",
}
req = urllib.request.Request(url=URL, headers=headers)
handler = urllib.request.urlopen(req)
chunksize = 16 * 1024
ret_code = handler.getcode()
loc = None
try:
loc = req.headers.get("Location")
except:
pass
if loc is not None:
return sendHTTPRequest_DL2FILE(loc)
if ret_code != 200:
return ret_code
with open(outputfile, "wb") as f:
while True:
chunk = handler.read(chunksize)
if not chunk:
break
f.write(chunk)
return 200
def sendHTTPRequest_getSimple(URL: str): def sendHTTPRequest_getSimple(URL: str):
headers = { headers = {

View File

@ -1,112 +1,221 @@
import os, zlib, base64 import os, zlib, base64, time
from lxml import etree
class BackwardReader:
def read_reverse_order(file_name): def __init__(self, file):
# Open file for reading in binary mode self.file = file
with open(file_name, 'rb') as read_obj:
# Move the cursor to the end of the file def readlines(self):
read_obj.seek(0, os.SEEK_END) BLKSIZE = 4096
# Get the current position of pointer i.e eof # Move reader to the end of file
pointer_location = read_obj.tell() self.file.seek(0, os.SEEK_END)
# Create a buffer to keep the last read line
buffer = bytearray() buffer = bytearray()
# Loop till pointer reaches the top of the file
while pointer_location >= 0: while True:
# Move the file pointer to the location pointed by pointer_location pos_newline = buffer.rfind(bytes([0x0a]))
read_obj.seek(pointer_location) # Get the current position of the reader
# Shift pointer location by -1 current_pos = self.file.tell()
pointer_location = pointer_location -1 if pos_newline != -1:
# read that byte / character # Newline is found
new_byte = read_obj.read(1) line = buffer[pos_newline+1:]
# If the read byte is new line character then it means one line is read buffer = buffer[:pos_newline]
if new_byte == b'\n': yield line.decode("latin-1")
# Fetch the line from buffer and yield it elif current_pos:
yield buffer.decode("latin-1")[::-1] # Need to fill the buffer
# Reinitialize the byte array to save next line to_read = min(BLKSIZE, current_pos)
buffer = bytearray() self.file.seek(current_pos-to_read, 0)
buffer = self.file.read(to_read) + buffer
self.file.seek(current_pos-to_read, 0)
if current_pos is to_read:
buffer = bytes([0x0a]) + buffer
else: else:
# If last read character is not eol then add it in buffer # Start of file
buffer.extend(new_byte) return
# As file is read completely, if there is still data in buffer, then its the first line.
if len(buffer) > 0:
# Yield the first line too
yield buffer.decode("latin-1")[::-1] def trim_encrypt_string(encrypt):
string_list = list(encrypt)
strlen = len(encrypt)
i = 0
bracket_count = 0
while (i < strlen):
if string_list[i] == "<" and string_list[i+1] == "<":
bracket_count += 1
if string_list[i] == ">" and string_list[i+1] == ">":
bracket_count -= 1
if bracket_count == 0:
break
i = i + 1
len_to_use = i+2
return encrypt[0:len_to_use]
def cleanup_encrypt_element(element):
if element.startswith("ID[<"):
element = element.replace("><", "> <")
element = ' '.join(element.split())
element = element.replace("[ ", "[").replace("] ", "]")
return element
def deflate_and_base64_encode( string_val ): def deflate_and_base64_encode( string_val ):
zlibbed_str = zlib.compress( string_val ) zlibbed_str = zlib.compress( string_val )
compressed_string = zlibbed_str[2:-4] compressed_string = zlibbed_str[2:-4]
return base64.b64encode( compressed_string ) return base64.b64encode( compressed_string )
def prepare_string_from_xml(xmlstring, title, author): def update_ebx_with_keys(ebx_data, adept_license, ebx_bookid):
b64data = deflate_and_base64_encode(xmlstring.encode("utf-8")).decode("utf-8")
adobe_fulfill_response = etree.fromstring(xmlstring) b64data = deflate_and_base64_encode(adept_license.encode("utf-8")).decode("utf-8")
NSMAP = { "adept" : "http://ns.adobe.com/adept" }
adNS = lambda tag: '{%s}%s' % ('http://ns.adobe.com/adept', tag)
resource = adobe_fulfill_response.find("./%s/%s" % (adNS("licenseToken"), adNS("resource"))).text
return "<</Length 128/EBX_TITLE(%s)/Filter/EBX_HANDLER/EBX_AUTHOR(%s)/V 4/ADEPT_ID(%s)/EBX_BOOKID(%s)/ADEPT_LICENSE(%s)>>" % (title, author, resource, resource, b64data) ebx_new = ebx_data[:-2]
ebx_new += "/EBX_BOOKID(%s)/ADEPT_LICENSE(%s)>>" % (ebx_bookid, b64data)
def patch_drm_into_pdf(filename_in, drm_string, filename_out): return ebx_new
ORIG_FILE = filename_in
def find_ebx(filename_in):
find_ebx_start = int(time.time() * 1000)
i = 0
fl = open(filename_in, "rb")
br = BackwardReader(fl)
for line in br.readlines():
i = i + 1
if "/EBX_HANDLER/" in line:
find_ebx_end = int(time.time() * 1000)
print("Found EBX after %d attempts - took %d ms" % (i, find_ebx_end - find_ebx_start))
return line
find_ebx_end = int(time.time() * 1000)
print("Error: Did not find EBX_HANDLER - took %d ms" % (find_ebx_end - find_ebx_start))
return None
def find_enc(filename_in):
find_enc_start = int(time.time() * 1000)
i = 0
fl = open(filename_in, "rb")
br = BackwardReader(fl)
for line in br.readlines():
i = i + 1
if "R/Encrypt" in line and "R/ID" in line:
find_enc_end = int(time.time() * 1000)
print("Found ENC after %d attempts - took %d ms" % (i, find_enc_end - find_enc_start))
return line
find_enc_end = int(time.time() * 1000)
print("Error: Did not find ENC - took %d ms" % (find_enc_end - find_enc_start))
return None
def patch_drm_into_pdf(filename_in, adept_license_string, filename_out, ebx_bookid):
drm_start_time = int(time.time() * 1000)
trailer = "" trailer = ""
trailer_idx = 0 trailer_idx = 0
print("DRM data is %s" % (drm_string)) startxref_offset = 0
prevline = ""
for line in read_reverse_order(ORIG_FILE):
fl = open(filename_in, "rb")
br = BackwardReader(fl)
print("Searching for startxref ...")
for line in br.readlines():
trailer_idx += 1 trailer_idx += 1
trailer = line + "\n" + trailer trailer = line + "\n" + trailer
print("DEBUG: pdfdata[%d] = %s" % (trailer_idx, line))
if (trailer_idx == 20): print ("LINE: " + line)
print("trailer_idx is very large (%d). Usually it's 10 or less. File might be corrupted." % trailer_idx)
if (line == "trailer"): if (trailer_idx > 10):
print("Found trailer at idx %d" % (trailer_idx)) print("Took more than 10 attempts to find startxref ...")
return False
if (line == "startxref"):
startxref_offset = int(prevline)
print("Got startxref: %d" % (startxref_offset))
break break
prevline = line
r_encrypt_offs1 = 0 r_encrypt_offs1 = 0
r_encrypt_offs2 = 0 r_encrypt_offs2 = 0
root_str = None
next_startxref = False
startxref = None
for line in trailer.split('\n'): encrypt = None
#print(line)
if ("R/Encrypt" in line):
root_str = line encrypt = find_enc(filename_in)
line_split = line.split(' ') if encrypt is None:
print("Error, enc not found")
return False
line_split = encrypt.split(' ')
next = 0
for element in line_split:
if element == "R/Encrypt":
next = 2
continue
if next == 2:
r_encrypt_offs1 = element
next = 1
continue
if next == 1:
r_encrypt_offs2 = element
next = 0 next = 0
for element in line_split:
if element == "R/Encrypt":
next = 2
continue
if next == 2:
r_encrypt_offs1 = element
next = 1
continue
if next == 1:
r_encrypt_offs2 = element
next = 0
continue
if "startxref" in line:
next_startxref = True
continue
if next_startxref:
startxref = line
next_startxref = False
continue continue
# read EBX element:
ebx_elem = find_ebx(filename_in)
if (ebx_elem is None):
print("Err: EBX is None")
return False
print("")
print("")
print("Encryption handler:")
print(encrypt)
print("EBX handler:")
print(ebx_elem)
encrypt = trim_encrypt_string(encrypt)
print("Trimmed encryption handler:")
print(encrypt)
ebx_elem = update_ebx_with_keys(ebx_elem, adept_license_string, ebx_bookid)
print("Updated EBX handler not logged due to sensitive data")
#print(ebx_elem)
filesize_str = str(os.path.getsize(ORIG_FILE)) filesize_str = str(os.path.getsize(filename_in))
filesize_pad = filesize_str.zfill(10) filesize_pad = filesize_str.zfill(10)
additional_data = "\r" additional_data = "\r"
additional_data += r_encrypt_offs1 + " " + r_encrypt_offs2 + " " + "obj" + "\r" additional_data += r_encrypt_offs1 + " " + r_encrypt_offs2 + " " + "obj" + "\r"
additional_data += drm_string additional_data += ebx_elem
additional_data += "\r" additional_data += "\r"
additional_data += "endobj" additional_data += "endobj"
@ -117,36 +226,40 @@ def patch_drm_into_pdf(filename_in, drm_string, filename_out):
additional_data += "trailer" additional_data += "trailer"
additional_data += "\r" additional_data += "\r"
arr_root_str = root_str.split('/') arr_root_str = encrypt.split('/')
did_prev = False did_prev = False
for elem in arr_root_str: for elem in arr_root_str:
if elem.startswith("Prev"): if elem.startswith("Prev"):
did_prev = True did_prev = True
additional_data += "Prev " + startxref additional_data += "Prev " + str(startxref_offset)
#print("Replacing prev from '%s' to '%s'" % (elem, "Prev " + startxref)) #print("Replacing prev from '%s' to '%s'" % (elem, "Prev " + startxref))
elif elem.startswith("ID[<"):
additional_data += elem.replace("><", "> <")
else: else:
additional_data += elem additional_data += cleanup_encrypt_element(elem)
additional_data += "/" additional_data += "/"
if not did_prev: if not did_prev:
# remove two >> at end # remove two >> at end
additional_data = additional_data[:-3] additional_data = additional_data[:-3]
additional_data += "/Prev " + startxref + ">>" + "/" additional_data += "/Prev " + str(startxref_offset) + ">>" + "/"
#print("Faking Prev %s" % startxref) #print("Faking Prev %s" % startxref)
additional_data = additional_data[:-1] additional_data = additional_data[:-1]
additional_data += "\r" + "startxref\r" + str(ptr) + "\r" + "%%EOF" additional_data += "\r" + "startxref\r" + str(ptr) + "\r" + "%%EOF"
print("Appending DRM data: %s" % (additional_data)) #print("Appending DRM data: %s" % (additional_data))
inp = open(ORIG_FILE, "rb") inp = open(filename_in, "rb")
out = open(filename_out, "wb") out = open(filename_out, "wb")
out.write(inp.read()) out.write(inp.read())
out.write(additional_data.encode("latin-1")) out.write(additional_data.encode("latin-1"))
inp.close() inp.close()
out.close() out.close()
drm_end_time = int(time.time() * 1000)
print("Whole DRM patching took %d milliseconds." % (drm_end_time - drm_start_time))
return True