Академический Документы
Профессиональный Документы
Культура Документы
#! /usr/bin/python
2.
3.
# ineptpdf8.4.51.pyw
4.
# ineptpdf, version 8.4.51
5.
6.
# To run this program install Python 2.7 from http://www.python.org/download/
7.
#
8.
# PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto
9.
#
10.
# and PyWin Extension (Win32API module) from
11.
# http://sourceforge.net/projects/pywin32/files/
12.
#
13.
# Make sure to install the dedicated versions for Python 2.7.
14.
#
15.
# It's recommended to use the 32-Bit Python Windows versions (even with a 64-bit
16.
# Windows system).
17.
#
18.
# Save this script file as
19.
# ineptpdf8.4.51.pyw and double-click on it to run it.
20.
21.
# Revision history:
22.
#
1 - Initial release
23.
7 - Get cross reference streams and object streams working for input.
30.
Not yet supported on output but this only effects file size,
31.
32.
# 7.1 - Correct a problem when an old trailer is not followed by startxref (an
on2)
33.
#
- Support for cross ref streams on output (decreases file size) (anon2)
35.
# 7.3 - Correct bug in trailer with cross ref stream that caused the error (an
on2)
36.
#
Fallback code for wrong xref improved (search till last trailer
39.
8.3.1 - fix for the "specified key file does not exist" error (Tetrachroma)
45.
57.
# 8.4.10 - line feed processing, non c system drive patch, nrbook support (Tet
rachroma)
58.
#
60.
#
80.
#
handling (Tetrachroma)
84.
8.4.43 - fix for user names longer than 13 characters and special
100.
#
102.
8.4.45 - 2nd fix for ident4d problem (Tetrachroma)
103.
8.4.46 - script cleanup and optimizations (Tetrachroma)
104.
8.4.47 - script identification change to Adobe Reader (Tetrachroma)
105.
8.4.48 - improved tolerance for false file/registry entries (Tetrachroma)
106.
8.4.49 - improved username encryption (Tetrachroma)
107.
8.4.50 - improved (experimental) APS support (Tetrachroma & Neisklar)
108.
8.4.51 - automatic APS offline key retrieval (works only for
109.
#
111.
"""
112.
Decrypts Adobe ADEPT-encrypted and Fileopen PDF files.
113.
"""
114.
115.
from __future__ import with_statement
116.
117.
__license__ = 'GPL v3'
118.
119.
import sys
120.
import os
121.
import re
122.
import zlib
123.
import struct
124.
import hashlib
125.
from itertools import chain, islice
126.
import xml.etree.ElementTree as etree
127.
import Tkinter
128.
import Tkconstants
129.
import tkFileDialog
130.
import tkMessageBox
131.
# added for fileopen support
132.
import urllib
133.
import urlparse
134.
import time
135.
import socket
136.
import string
137.
import uuid
138.
import subprocess
139.
import time
140.
import getpass
141.
from ctypes import *
142.
import traceback
143.
import inspect
144.
import tempfile
145.
import sqlite3
146.
import httplib
147.
try:
148.
from Crypto.Cipher import ARC4
149.
# needed for newer pdfs
150.
from Crypto.Cipher import AES
151.
from Crypto.Hash import SHA256
152.
from Crypto.PublicKey import RSA
153.
154.
except ImportError:
155.
ARC4 = None
156.
RSA = None
157.
try:
158.
from cStringIO import StringIO
159.
except ImportError:
160.
from StringIO import StringIO
161.
162.
class ADEPTError(Exception):
163.
pass
164.
165.
# global variable (needed for fileopen and password decryption)
166.
INPUTFILEPATH = ''
167.
KEYFILEPATH = ''
168.
PASSWORD = ''
169.
DEBUG_MODE = False
170.
IVERSION = '8.4.51'
171.
172.
# Do we generate cross reference streams on output?
173.
# 0 = never
174.
# 1 = only if present in input
175.
# 2 = always
176.
177.
GEN_XREF_STM = 1
178.
179.
# This is the value for the current document
180.
gen_xref_stm = False # will be set in PDFSerializer
181.
182.
###
183.
### ASN.1 parsing code from tlslite
184.
185.
def bytesToNumber(bytes):
186.
total = 0L
187.
for byte in bytes:
188.
total = (total << 8) + byte
189.
return total
190.
191.
class ASN1Error(Exception):
192.
pass
193.
194.
class ASN1Parser(object):
195.
class Parser(object):
196.
def __init__(self, bytes):
197.
self.bytes = bytes
198.
self.index = 0
199.
200.
def get(self, length):
201.
if self.index + length > len(self.bytes):
202.
raise ASN1Error("Error decoding ASN.1")
203.
x = 0
204.
for count in range(length):
205.
x <<= 8
206.
x |= self.bytes[self.index]
207.
self.index += 1
208.
return x
209.
210.
def getFixBytes(self, lengthBytes):
211.
bytes = self.bytes[self.index : self.index+lengthBytes]
212.
self.index += lengthBytes
213.
return bytes
214.
215.
def getVarBytes(self, lengthLength):
216.
lengthBytes = self.get(lengthLength)
217.
return self.getFixBytes(lengthBytes)
218.
219.
def getFixList(self, length, lengthList):
220.
l = [0] * lengthList
221.
for x in range(lengthList):
222.
l[x] = self.get(length)
223.
return l
224.
225.
def getVarList(self, length, lengthLength):
226.
lengthList = self.get(lengthLength)
227.
if lengthList % length != 0:
228.
raise ASN1Error("Error decoding ASN.1")
229.
lengthList = int(lengthList/length)
230.
l = [0] * lengthList
231.
for x in range(lengthList):
232.
l[x] = self.get(length)
233.
return l
234.
235.
def startLengthCheck(self, lengthLength):
236.
self.lengthCheck = self.get(lengthLength)
237.
self.indexCheck = self.index
238.
239.
def setLengthCheck(self, length):
240.
self.lengthCheck = length
241.
self.indexCheck = self.index
242.
243.
def stopLengthCheck(self):
244.
if (self.index - self.indexCheck) != self.lengthCheck:
245.
raise ASN1Error("Error decoding ASN.1")
246.
247.
def atLengthCheck(self):
248.
if (self.index - self.indexCheck) < self.lengthCheck:
249.
return False
250.
elif (self.index - self.indexCheck) == self.lengthCheck:
251.
return True
252.
else:
253.
raise ASN1Error("Error decoding ASN.1")
254.
255.
def __init__(self, bytes):
256.
p = self.Parser(bytes)
257.
p.get(1)
258.
self.length = self._getASN1Length(p)
259.
self.value = p.getFixBytes(self.length)
260.
261.
def getChild(self, which):
262.
p = self.Parser(self.value)
263.
for x in range(which+1):
264.
markIndex = p.index
265.
p.get(1)
266.
length = self._getASN1Length(p)
267.
p.getFixBytes(length)
268.
return ASN1Parser(p.bytes[markIndex:p.index])
269.
270.
def _getASN1Length(self, p):
271.
firstLength = p.get(1)
272.
if firstLength<=127:
273.
return firstLength
274.
else:
275.
lengthLength = firstLength & 0x7F
276.
return p.get(lengthLength)
277.
278.
###
279.
### PDF parsing routines from pdfminer, with changes for EBX_HANDLER
280.
281.
## Utilities
282.
##
283.
def choplist(n, seq):
284.
'''Groups every n elements of the list.'''
285.
r = []
286.
for x in seq:
287.
r.append(x)
288.
if len(r) == n:
289.
yield tuple(r)
290.
r = []
291.
return
292.
293.
def nunpack(s, default=0):
294.
'''Unpacks up to 4 bytes big endian.'''
295.
l = len(s)
296.
if not l:
297.
return default
298.
elif l == 1:
299.
return ord(s)
300.
elif l == 2:
301.
return struct.unpack('>H', s)[0]
302.
elif l == 3:
303.
return struct.unpack('>L', '\x00'+s)[0]
304.
elif l == 4:
305.
return struct.unpack('>L', s)[0]
306.
else:
307.
return TypeError('invalid length: %d' % l)
308.
309.
310.
STRICT = 0
311.
312.
313.
## PS Exceptions
314.
##
315.
class PSException(Exception): pass
316.
class PSEOF(PSException): pass
317.
class PSSyntaxError(PSException): pass
318.
class PSTypeError(PSException): pass
319.
class PSValueError(PSException): pass
320.
321.
322.
## Basic PostScript Types
323.
##
324.
325.
# PSLiteral
326.
class PSObject(object): pass
327.
328.
class PSLiteral(PSObject):
329.
'''
330.
PS literals (e.g. "/Name").
331.
Caution: Never create these objects directly.
332.
Use PSLiteralTable.intern() instead.
333.
'''
334.
def __init__(self, name):
335.
self.name = name
336.
return
337.
338.
def __repr__(self):
339.
name = []
340.
for char in self.name:
341.
if not char.isalnum():
342.
char = '#%02x' % ord(char)
343.
name.append(char)
344.
return '/%s' % ''.join(name)
345.
346.
# PSKeyword
347.
class PSKeyword(PSObject):
348.
'''
349.
PS keywords (e.g. "showpage").
350.
Caution: Never create these objects directly.
351.
Use PSKeywordTable.intern() instead.
352.
'''
353.
def __init__(self, name):
354.
self.name = name
355.
return
356.
357.
def __repr__(self):
358.
return self.name
359.
360.
# PSSymbolTable
361.
class PSSymbolTable(object):
362.
363.
'''
364.
Symbol table that stores PSLiteral or PSKeyword.
365.
'''
366.
367.
def __init__(self, classe):
368.
self.dic = {}
369.
self.classe = classe
370.
return
371.
372.
def intern(self, name):
373.
if name in self.dic:
374.
lit = self.dic[name]
375.
else:
376.
lit = self.classe(name)
377.
self.dic[name] = lit
378.
return lit
379.
380.
PSLiteralTable = PSSymbolTable(PSLiteral)
381.
PSKeywordTable = PSSymbolTable(PSKeyword)
382.
LIT = PSLiteralTable.intern
383.
KWD = PSKeywordTable.intern
384.
KEYWORD_BRACE_BEGIN = KWD('{')
385.
KEYWORD_BRACE_END = KWD('}')
386.
KEYWORD_ARRAY_BEGIN = KWD('[')
387.
KEYWORD_ARRAY_END = KWD(']')
388.
KEYWORD_DICT_BEGIN = KWD('<<')
389.
KEYWORD_DICT_END = KWD('>>')
390.
391.
392.
def literal_name(x):
393.
if not isinstance(x, PSLiteral):
394.
if STRICT:
395.
raise PSTypeError('Literal required: %r' % x)
396.
else:
397.
return str(x)
398.
return x.name
399.
400.
def keyword_name(x):
401.
if not isinstance(x, PSKeyword):
402.
if STRICT:
403.
raise PSTypeError('Keyword required: %r' % x)
404.
else:
405.
return str(x)
406.
return x.name
407.
408.
409.
## PSBaseParser
410.
##
411.
EOL = re.compile(r'[\r\n]')
412.
SPC = re.compile(r'\s')
413.
NONSPC = re.compile(r'\S')
414.
HEX = re.compile(r'[0-9a-fA-F]')
415.
END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]')
416.
END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]')
417.
HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.')
418.
END_NUMBER = re.compile(r'[^0-9]')
419.
END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
420.
END_STRING = re.compile(r'[()\134]')
421.
OCT_STRING = re.compile(r'[0-7]')
422.
ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
423.
424.
class PSBaseParser(object):
425.
426.
'''
427.
Most basic PostScript parser that performs only basic tokenization.
428.
'''
429.
BUFSIZ = 4096
430.
431.
def __init__(self, fp):
432.
self.fp = fp
433.
self.seek(0)
434.
return
435.
436.
def __repr__(self):
437.
return '<PSBaseParser: %r, bufpos=%d>' % (self.fp, self.bufpos)
438.
439.
def flush(self):
440.
return
441.
442.
def close(self):
443.
self.flush()
444.
return
445.
446.
def tell(self):
447.
return self.bufpos+self.charpos
448.
449.
def poll(self, pos=None, n=80):
450.
pos0 = self.fp.tell()
451.
if not pos:
452.
pos = self.bufpos+self.charpos
453.
self.fp.seek(pos)
454.
##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n))
455.
self.fp.seek(pos0)
456.
return
457.
458.
def seek(self, pos):
459.
'''
460.
Seeks the parser to the given position.
461.
'''
462.
self.fp.seek(pos)
463.
# reset the status for nextline()
464.
self.bufpos = pos
465.
self.buf = ''
466.
self.charpos = 0
467.
# reset the status for nexttoken()
468.
self.parse1 = self.parse_main
469.
self.tokens = []
470.
return
471.
472.
def fillbuf(self):
473.
if self.charpos < len(self.buf): return
474.
# fetch next chunk.
475.
self.bufpos = self.fp.tell()
476.
self.buf = self.fp.read(self.BUFSIZ)
477.
if not self.buf:
478.
raise PSEOF('Unexpected EOF')
479.
self.charpos = 0
480.
return
481.
482.
def parse_main(self, s, i):
483.
m = NONSPC.search(s, i)
484.
if not m:
485.
return (self.parse_main, len(s))
486.
j = m.start(0)
487.
c = s[j]
488.
self.tokenstart = self.bufpos+j
489.
if c == '%':
490.
self.token = '%'
491.
return (self.parse_comment, j+1)
492.
if c == '/':
493.
self.token = ''
494.
return (self.parse_literal, j+1)
495.
if c in '-+' or c.isdigit():
496.
self.token = c
497.
return (self.parse_number, j+1)
498.
if c == '.':
499.
self.token = c
500.
return (self.parse_float, j+1)
501.
if c.isalpha():
502.
self.token = c
503.
return (self.parse_keyword, j+1)
504.
if c == '(':
505.
self.token = ''
506.
self.paren = 1
507.
return (self.parse_string, j+1)
508.
if c == '<':
509.
self.token = ''
510.
return (self.parse_wopen, j+1)
511.
if c == '>':
512.
self.token = ''
513.
return (self.parse_wclose, j+1)
514.
self.add_token(KWD(c))
515.
return (self.parse_main, j+1)
516.
517.
def add_token(self, obj):
518.
self.tokens.append((self.tokenstart, obj))
519.
return
520.
521.
def parse_comment(self, s, i):
522.
m = EOL.search(s, i)
523.
if not m:
524.
self.token += s[i:]
525.
return (self.parse_comment, len(s))
526.
j = m.start(0)
527.
self.token += s[i:j]
528.
# We ignore comments.
529.
#self.tokens.append(self.token)
530.
return (self.parse_main, j)
531.
532.
def parse_literal(self, s, i):
533.
m = END_LITERAL.search(s, i)
534.
if not m:
535.
self.token += s[i:]
536.
return (self.parse_literal, len(s))
537.
j = m.start(0)
538.
self.token += s[i:j]
539.
c = s[j]
540.
if c == '#':
541.
self.hex = ''
542.
return (self.parse_literal_hex, j+1)
543.
self.add_token(LIT(self.token))
544.
return (self.parse_main, j)
545.
546.
def parse_literal_hex(self, s, i):
547.
c = s[i]
548.
if HEX.match(c) and len(self.hex) < 2:
549.
self.hex += c
550.
return (self.parse_literal_hex, i+1)
551.
if self.hex:
552.
self.token += chr(int(self.hex, 16))
553.
return (self.parse_literal, i)
554.
555.
def parse_number(self, s, i):
556.
m = END_NUMBER.search(s, i)
557.
if not m:
558.
self.token += s[i:]
559.
return (self.parse_number, len(s))
560.
j = m.start(0)
561.
self.token += s[i:j]
562.
c = s[j]
563.
if c == '.':
564.
self.token += c
565.
return (self.parse_float, j+1)
566.
try:
567.
self.add_token(int(self.token))
568.
except ValueError:
569.
pass
570.
return (self.parse_main, j)
571.
def parse_float(self, s, i):
572.
m = END_NUMBER.search(s, i)
573.
if not m:
574.
self.token += s[i:]
575.
return (self.parse_float, len(s))
576.
j = m.start(0)
577.
self.token += s[i:j]
578.
self.add_token(float(self.token))
579.
return (self.parse_main, j)
580.
581.
def parse_keyword(self, s, i):
582.
m = END_KEYWORD.search(s, i)
583.
if not m:
584.
self.token += s[i:]
585.
return (self.parse_keyword, len(s))
586.
j = m.start(0)
587.
self.token += s[i:j]
588.
if self.token == 'true':
589.
token = True
590.
elif self.token == 'false':
591.
token = False
592.
else:
593.
token = KWD(self.token)
594.
self.add_token(token)
595.
return (self.parse_main, j)
596.
597.
def parse_string(self, s, i):
598.
m = END_STRING.search(s, i)
599.
if not m:
600.
self.token += s[i:]
601.
return (self.parse_string, len(s))
602.
j = m.start(0)
603.
self.token += s[i:j]
604.
c = s[j]
605.
if c == '\\':
606.
self.oct = ''
607.
return (self.parse_string_1, j+1)
608.
if c == '(':
609.
self.paren += 1
610.
self.token += c
611.
return (self.parse_string, j+1)
612.
if c == ')':
613.
self.paren -= 1
614.
if self.paren:
615.
self.token += c
616.
return (self.parse_string, j+1)
617.
self.add_token(self.token)
618.
return (self.parse_main, j+1)
619.
def parse_string_1(self, s, i):
620.
c = s[i]
621.
if OCT_STRING.match(c) and len(self.oct) < 3:
622.
self.oct += c
623.
return (self.parse_string_1, i+1)
624.
if self.oct:
625.
self.token += chr(int(self.oct, 8))
626.
return (self.parse_string, i)
627.
if c in ESC_STRING:
628.
self.token += chr(ESC_STRING[c])
629.
return (self.parse_string, i+1)
630.
631.
def parse_wopen(self, s, i):
632.
c = s[i]
633.
if c.isspace() or HEX.match(c):
634.
return (self.parse_hexstring, i)
635.
if c == '<':
636.
self.add_token(KEYWORD_DICT_BEGIN)
637.
i += 1
638.
return (self.parse_main, i)
639.
640.
def parse_wclose(self, s, i):
641.
c = s[i]
642.
if c == '>':
643.
self.add_token(KEYWORD_DICT_END)
644.
i += 1
645.
return (self.parse_main, i)
646.
647.
def parse_hexstring(self, s, i):
648.
m = END_HEX_STRING.search(s, i)
649.
if not m:
650.
self.token += s[i:]
651.
return (self.parse_hexstring, len(s))
652.
j = m.start(0)
653.
self.token += s[i:j]
654.
token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)),
655.
SPC.sub('', self.token))
656.
self.add_token(token)
657.
return (self.parse_main, j)
658.
659.
def nexttoken(self):
660.
while not self.tokens:
661.
self.fillbuf()
662.
(self.parse1, self.charpos) = self.parse1(self.buf, self.charpos)
663.
token = self.tokens.pop(0)
664.
return token
665.
666.
def nextline(self):
667.
'''
668.
Fetches a next line that ends either with \\r or \\n.
669.
'''
670.
linebuf = ''
671.
linepos = self.bufpos + self.charpos
672.
eol = False
673.
while 1:
674.
self.fillbuf()
675.
if eol:
676.
c = self.buf[self.charpos]
677.
# handle '\r\n'
678.
if c == '\n':
679.
linebuf += c
680.
self.charpos += 1
681.
break
682.
m = EOL.search(self.buf, self.charpos)
683.
if m:
684.
linebuf += self.buf[self.charpos:m.end(0)]
685.
self.charpos = m.end(0)
686.
if linebuf[-1] == '\r':
687.
eol = True
688.
else:
689.
break
690.
else:
691.
linebuf += self.buf[self.charpos:]
692.
self.charpos = len(self.buf)
693.
return (linepos, linebuf)
694.
695.
def revreadlines(self):
696.
'''
697.
Fetches a next line backword. This is used to locate
698.
the trailers at the end of a file.
699.
'''
700.
self.fp.seek(0, 2)
701.
pos = self.fp.tell()
702.
buf = ''
703.
while 0 < pos:
704.
prevpos = pos
705.
pos = max(0, pos-self.BUFSIZ)
706.
self.fp.seek(pos)
707.
s = self.fp.read(prevpos-pos)
708.
if not s: break
709.
while 1:
710.
n = max(s.rfind('\r'), s.rfind('\n'))
711.
if n == -1:
712.
buf = s + buf
713.
break
714.
yield s[n:]+buf
715.
s = s[:n]
716.
buf = ''
717.
return
718.
719.
720.
## PSStackParser
721.
##
722.
class PSStackParser(PSBaseParser):
723.
724.
def __init__(self, fp):
725.
PSBaseParser.__init__(self, fp)
726.
self.reset()
727.
return
728.
729.
def reset(self):
730.
self.context = []
731.
self.curtype = None
732.
self.curstack = []
733.
self.results = []
734.
return
735.
736.
def seek(self, pos):
737.
PSBaseParser.seek(self, pos)
738.
self.reset()
739.
return
740.
741.
def push(self, *objs):