DeDRM_tools/DeDRM_plugin/zipfix.py

194 lines
6.3 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
2010-11-12 04:11:36 +06:00
# zipfix.py
# Copyright © 2010-2020 by Apprentice Harper et al.
2013-10-03 00:59:40 +06:00
# Released under the terms of the GNU General Public Licence, version 3
# <http://www.gnu.org/licenses/>
# Revision history:
# 1.0 - Initial release
# 1.1 - Updated to handle zip file metadata correctly
# 2.0 - Python 3 for calibre 5.0
2013-10-03 00:59:40 +06:00
"""
Re-write zip (or ePub) fixing problems with file names (and mimetype entry).
"""
__license__ = 'GPL v3'
2013-10-03 00:59:40 +06:00
__version__ = "1.1"
import sys, os
# Calibre stuff - so we can import from our ZIP without absolute module name
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
2013-10-03 00:59:40 +06:00
import zlib
import zipfilerugged
2013-10-03 00:59:40 +06:00
import getopt
from struct import unpack
_FILENAME_LEN_OFFSET = 26
_EXTRA_LEN_OFFSET = 28
_FILENAME_OFFSET = 30
_MAX_SIZE = 64 * 1024
_MIMETYPE = 'application/epub+zip'
class ZipInfo(zipfilerugged.ZipInfo):
def __init__(self, *args, **kwargs):
if 'compress_type' in kwargs:
compress_type = kwargs.pop('compress_type')
super(ZipInfo, self).__init__(*args, **kwargs)
self.compress_type = compress_type
class fixZip:
def __init__(self, zinput, zoutput):
self.ztype = 'zip'
if zinput.lower().find('.epub') >= 0 :
self.ztype = 'epub'
self.inzip = zipfilerugged.ZipFile(zinput,'r')
self.outzip = zipfilerugged.ZipFile(zoutput,'w')
# open the input zip for reading only as a raw file
self.bzf = open(zinput,'rb')
2013-10-03 00:59:40 +06:00
def getlocalname(self, zi):
local_header_offset = zi.header_offset
self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)
leninfo = self.bzf.read(2)
local_name_length, = unpack('<H', leninfo)
self.bzf.seek(local_header_offset + _FILENAME_OFFSET)
local_name = self.bzf.read(local_name_length)
return local_name
def uncompress(self, cmpdata):
dc = zlib.decompressobj(-15)
2020-12-26 20:36:10 +06:00
data = b''
2013-10-03 00:59:40 +06:00
while len(cmpdata) > 0:
if len(cmpdata) > _MAX_SIZE :
newdata = cmpdata[0:_MAX_SIZE]
cmpdata = cmpdata[_MAX_SIZE:]
else:
newdata = cmpdata
2020-12-26 20:36:10 +06:00
cmpdata = b''
2013-10-03 00:59:40 +06:00
newdata = dc.decompress(newdata)
unprocessed = dc.unconsumed_tail
if len(unprocessed) == 0:
newdata += dc.flush()
data += newdata
cmpdata += unprocessed
2020-12-26 20:36:10 +06:00
unprocessed = b''
2013-10-03 00:59:40 +06:00
return data
def getfiledata(self, zi):
# get file name length and exta data length to find start of file data
local_header_offset = zi.header_offset
self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)
leninfo = self.bzf.read(2)
local_name_length, = unpack('<H', leninfo)
2010-11-12 04:11:36 +06:00
2013-10-03 00:59:40 +06:00
self.bzf.seek(local_header_offset + _EXTRA_LEN_OFFSET)
exinfo = self.bzf.read(2)
extra_field_length, = unpack('<H', exinfo)
2010-11-12 04:11:36 +06:00
2013-10-03 00:59:40 +06:00
self.bzf.seek(local_header_offset + _FILENAME_OFFSET + local_name_length + extra_field_length)
data = None
2010-11-12 04:11:36 +06:00
2013-10-03 00:59:40 +06:00
# if not compressed we are good to go
if zi.compress_type == zipfilerugged.ZIP_STORED:
data = self.bzf.read(zi.file_size)
2010-11-12 04:11:36 +06:00
2013-10-03 00:59:40 +06:00
# if compressed we must decompress it using zlib
if zi.compress_type == zipfilerugged.ZIP_DEFLATED:
cmpdata = self.bzf.read(zi.compress_size)
data = self.uncompress(cmpdata)
2010-11-12 04:11:36 +06:00
2013-10-03 00:59:40 +06:00
return data
2010-11-12 04:11:36 +06:00
2012-11-07 19:14:25 +06:00
2010-11-12 04:11:36 +06:00
2013-10-03 00:59:40 +06:00
def fix(self):
# get the zipinfo for each member of the input archive
# and copy member over to output archive
# if problems exist with local vs central filename, fix them
# if epub write mimetype file first, with no compression
if self.ztype == 'epub':
# first get a ZipInfo with current time and no compression
mimeinfo = ZipInfo(b'mimetype',compress_type=zipfilerugged.ZIP_STORED)
2013-10-03 00:59:40 +06:00
mimeinfo.internal_attr = 1 # text file
try:
# if the mimetype is present, get its info, including time-stamp
2020-12-26 20:36:10 +06:00
oldmimeinfo = self.inzip.getinfo(b'mimetype')
2013-10-03 00:59:40 +06:00
# copy across useful fields
mimeinfo.date_time = oldmimeinfo.date_time
mimeinfo.comment = oldmimeinfo.comment
mimeinfo.extra = oldmimeinfo.extra
mimeinfo.internal_attr = oldmimeinfo.internal_attr
mimeinfo.external_attr = oldmimeinfo.external_attr
mimeinfo.create_system = oldmimeinfo.create_system
except:
pass
self.outzip.writestr(mimeinfo, _MIMETYPE.encode('ascii'))
2013-10-03 00:59:40 +06:00
# write the rest of the files
for zinfo in self.inzip.infolist():
2020-12-26 20:36:10 +06:00
if zinfo.filename != b"mimetype" or self.ztype != 'epub':
2013-10-03 00:59:40 +06:00
data = None
try:
data = self.inzip.read(zinfo.filename)
except zipfilerugged.BadZipfile or zipfilerugged.error:
local_name = self.getlocalname(zinfo)
data = self.getfiledata(zinfo)
zinfo.filename = local_name
# create new ZipInfo with only the useful attributes from the old info
nzinfo = ZipInfo(zinfo.filename, zinfo.date_time, compress_type=zinfo.compress_type)
nzinfo.comment=zinfo.comment
nzinfo.extra=zinfo.extra
nzinfo.internal_attr=zinfo.internal_attr
nzinfo.external_attr=zinfo.external_attr
nzinfo.create_system=zinfo.create_system
nzinfo.flag_bits = zinfo.flag_bits & 0x800 # preserve UTF-8 flag
2013-10-03 00:59:40 +06:00
self.outzip.writestr(nzinfo,data)
self.bzf.close()
self.inzip.close()
self.outzip.close()
def usage():
print("""usage: zipfix.py inputzip outputzip
2013-10-03 00:59:40 +06:00
inputzip is the source zipfile to fix
outputzip is the fixed zip archive
""")
2013-10-03 00:59:40 +06:00
def repairBook(infile, outfile):
if not os.path.exists(infile):
print("Error: Input Zip File does not exist")
2013-10-03 00:59:40 +06:00
return 1
2010-11-12 04:11:36 +06:00
try:
2013-10-03 00:59:40 +06:00
fr = fixZip(infile, outfile)
fr.fix()
return 0
except Exception as e:
print("Error Occurred ", e)
2013-10-03 00:59:40 +06:00
return 2
def main(argv=sys.argv):
if len(argv)!=3:
usage()
return 1
infile = argv[1]
outfile = argv[2]
return repairBook(infile, outfile)
if __name__ == '__main__' :
sys.exit(main())