ineptpdf 7.4

This commit is contained in:
Apprentice Alf 2010-03-10 07:58:08 +00:00
parent ab9d585190
commit b7de1dcea5

View File

@ -1,7 +1,7 @@
#! /usr/bin/python #! /usr/bin/python
# ineptpdf73.pyw # ineptpdf74.pyw
# ineptpdf, version 7.3 # ineptpdf, version 7.4
# To run this program install Python 2.6 from http://www.python.org/download/ # To run this program install Python 2.6 from http://www.python.org/download/
# and PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto # and PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto
@ -24,7 +24,9 @@
# - Support for cross ref streams on output (decreases file size) # - Support for cross ref streams on output (decreases file size)
# 7.3 - Correct bug in trailer with cross ref stream that caused the error # 7.3 - Correct bug in trailer with cross ref stream that caused the error
# "The root object is missing or invalid" in Adobe Reader. # "The root object is missing or invalid" in Adobe Reader.
# 7.4 - Force all generation numbers in output file to be 0, like in v6.
# Fallback code for wrong xref improved (search till last trailer
# instead of first)
""" """
Decrypt Adobe ADEPT-encrypted PDF files. Decrypt Adobe ADEPT-encrypted PDF files.
""" """
@ -741,7 +743,7 @@ class PDFObjRef(PDFObject):
return return
def __repr__(self): def __repr__(self):
return '<PDFObjRef:%d>' % (self.objid) return '<PDFObjRef:%d %d>' % (self.objid, self.genno)
def resolve(self): def resolve(self):
return self.doc.getobj(self.objid) return self.doc.getobj(self.objid)
@ -1528,6 +1530,7 @@ class PDFParser(PSStackParser):
# read xref tables and trailers # read xref tables and trailers
def read_xref(self): def read_xref(self):
xrefs = [] xrefs = []
trailerpos = None
try: try:
pos = self.find_xref() pos = self.find_xref()
self.read_xref_from(pos, xrefs) self.read_xref_from(pos, xrefs)
@ -1542,14 +1545,16 @@ class PDFParser(PSStackParser):
(pos, line) = self.nextline() (pos, line) = self.nextline()
except PSEOF: except PSEOF:
break break
if line.startswith('trailer'): break if line.startswith('trailer'):
trailerpos = pos # remember last trailer
m = pat.match(line) m = pat.match(line)
if not m: continue if not m: continue
(objid, genno) = m.groups() (objid, genno) = m.groups()
offsets[int(objid)] = (0, pos) offsets[int(objid)] = (0, pos)
if not offsets: raise if not offsets: raise
xref.offsets = offsets xref.offsets = offsets
self.seek(pos) if trailerpos:
self.seek(trailerpos)
xref.load_trailer(self) xref.load_trailer(self)
xrefs.append(xref) xrefs.append(xref)
return xrefs return xrefs
@ -1628,7 +1633,7 @@ class PDFSerializer(object):
except AttributeError: except AttributeError:
genno = 0 genno = 0
xrefs[objid] = (self.tell(), genno) xrefs[objid] = (self.tell(), genno)
self.serialize_indirect(objid, genno, obj) self.serialize_indirect(objid, obj)
startxref = self.tell() startxref = self.tell()
if not gen_xref_stm: if not gen_xref_stm:
@ -1636,7 +1641,8 @@ class PDFSerializer(object):
self.write('0 %d\n' % (maxobj + 1,)) self.write('0 %d\n' % (maxobj + 1,))
for objid in xrange(0, maxobj + 1): for objid in xrange(0, maxobj + 1):
if objid in xrefs: if objid in xrefs:
self.write("%010d %05d n \n" % xrefs[objid]) # force the genno to be 0
self.write("%010d 00000 n \n" % xrefs[objid][0])
else: else:
self.write("%010d %05d f \n" % (0, 65535)) self.write("%010d %05d f \n" % (0, 65535))
@ -1649,7 +1655,6 @@ class PDFSerializer(object):
# Calculate size of entries # Calculate size of entries
maxoffset = max(startxref, maxobj) maxoffset = max(startxref, maxobj)
maxindex = PDFObjStmRef.maxindex maxindex = PDFObjStmRef.maxindex
# TODO - max genno should also be taken into account
fl2 = 2 fl2 = 2
power = 65536 power = 65536
while maxoffset >= power: while maxoffset >= power:
@ -1684,7 +1689,9 @@ class PDFSerializer(object):
else: else:
f1 = 1 f1 = 1
f2 = objref[0] f2 = objref[0]
f3 = objref[1] # we force all generation numbers to be 0
# f3 = objref[1]
f3 = 0
data.append(struct.pack('>B', f1)) data.append(struct.pack('>B', f1))
data.append(struct.pack('>L', f2)[-fl2:]) data.append(struct.pack('>L', f2)[-fl2:])
@ -1698,7 +1705,7 @@ class PDFSerializer(object):
if 'Info' in trailer: if 'Info' in trailer:
dic['Info'] = trailer['Info'] dic['Info'] = trailer['Info']
xrefstm = PDFStream(dic, data) xrefstm = PDFStream(dic, data)
self.serialize_indirect(maxobj, 0, xrefstm) self.serialize_indirect(maxobj, xrefstm)
self.write('startxref\n%d\n%%%%EOF' % startxref) self.write('startxref\n%d\n%%%%EOF' % startxref)
def write(self, data): def write(self, data):
@ -1751,10 +1758,9 @@ class PDFSerializer(object):
self.write(' ') self.write(' ')
self.write('%d %d R' % (obj.objid, 0)) self.write('%d %d R' % (obj.objid, 0))
elif isinstance(obj, PDFStream): elif isinstance(obj, PDFStream):
### For now, we have extracted all objects from an Object Stream, ### If we don't generate cross ref streams the object streams
### so we don't need these any more. Therefore leave them out ### are no longer useful, as we have extracted all objects from
### of the output. Later we could try to use object streams in ### them. Therefore leave them out from the output.
### the output again to get smaller output.
if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm: if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm:
self.write('(deleted)') self.write('(deleted)')
else: else:
@ -1769,8 +1775,8 @@ class PDFSerializer(object):
self.write(' ') self.write(' ')
self.write(data) self.write(data)
def serialize_indirect(self, objid, genno, obj): def serialize_indirect(self, objid, obj):
self.write('%d %d obj' % (objid, genno)) self.write('%d 0 obj' % (objid,))
self.serialize_object(obj) self.serialize_object(obj)
if self.last.isalnum(): if self.last.isalnum():
self.write('\n') self.write('\n')