From c5b8c5e0f9cdf3ac6bf48cc07dfd388a1431b71f Mon Sep 17 00:00:00 2001 From: vnik Date: Tue, 15 Sep 2015 17:05:24 +1000 Subject: [PATCH] added PDF support --- README.md | 21 +-- crackqcli.py | 69 +++++---- thirdparty/__init__.py | 0 thirdparty/pdf2john.py | 315 ++++++++++++++++++++++++++++++++++++++++ thirdparty/termcolor.py | 168 +++++++++++++++++++++ 5 files changed, 527 insertions(+), 46 deletions(-) create mode 100644 thirdparty/__init__.py create mode 100644 thirdparty/pdf2john.py create mode 100644 thirdparty/termcolor.py diff --git a/README.md b/README.md index 5d3e156..1fdc252 100755 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Hash Formats Currently, the following algorithms are supported: +* Password protected PDF files (v1.4 - v1.6) * NTLM * MD5 * SHA1 @@ -32,23 +33,3 @@ Submitting Hashes Refer to our FAQ for detailed instructions on submitting hashes (https://hashcrack.org/crackq_faq). - -``` -$ ./crackqcli.py -h -Crackq client 0.3.1 -support@hashcrack.org - -./crackqcli.py [-t|--type hash_type] [hash|file_path] --t --type see supported hash types below --h --help help - -Supported hash types: -md5 Unsalted MD5 hashes -ntlm Windows NTLM hashes -sha1 Unsalted SHA1 hashes -wpa WPA/WPA2 handshakes -md5crypt MD5CRYPT / FreeBSD MD5 / Cisco IOS MD5 / MD5(Unix) -descrypt DESCRYPT / DES(Unix) -ike_md5 VPN IPSec IKE (MD5) preshared keys -phpass phpass (Wordpress, Joomla and phpBB3) -``` diff --git a/crackqcli.py b/crackqcli.py index 52334a2..624bed4 100755 --- a/crackqcli.py +++ b/crackqcli.py @@ -10,6 +10,8 @@ import base64 import re from urllib2 import Request, urlopen, URLError, HTTPError +from thirdparty.termcolor import cprint +from thirdparty.pdf2john import PdfParser SERVER = 'https://hashcrack.org' CONFIG_PATH = None @@ -19,12 +21,12 @@ 'client_ver' : '/crackq/v0.1/client_ver' } API_KEY = None -MYVER = '0.3.1' -HASH_TYPES = ['wpa', 'descrypt', 'md5crypt', 'md5', 'ntlm', 'sha1', 'ike_md5', 'phpass'] +MYVER = '0.3.2' +HASH_TYPES = ['wpa', 'descrypt', 'md5crypt', 'md5', 'ntlm', 'sha1', 'pdf', 'phpass'] def banner(): - sys.stdout.write('Crackq client %s\n' % MYVER) - sys.stdout.write('support@hashcrack.org\n\n') + cprint('Crackq client %s' % MYVER, 'green') + cprint('support@hashcrack.org\n', 'green') def usage(argv0): sys.stdout.write('%s [-t|--type hash_type] [hash|file_path]\n' % argv0) @@ -37,7 +39,7 @@ def usage(argv0): sys.stdout.write('wpa WPA/WPA2 handshakes\n') sys.stdout.write('md5crypt MD5CRYPT / FreeBSD MD5 / Cisco IOS MD5 / MD5(Unix)\n') sys.stdout.write('descrypt DESCRYPT / DES(Unix)\n') - sys.stdout.write('ike_md5 VPN IPSec IKE (MD5) preshared keys\n') + sys.stdout.write('pdf PDF 1.4 - 1.6\n') sys.stdout.write('phpass phpass (Wordpress, Joomla and phpBB3)\n') def validate_hash(_hash, _hash_type): @@ -130,28 +132,29 @@ def load_config(): if o in ('-t', '--type'): _type = a - # check for updates - if urlopen(SERVER + ENDPOINTS['client_ver']).read() != MYVER: - sys.stdout.write('[-] WARNING: NEW CLIENT VERSION IS AVAILABLE. PLEASE UPDATE.\n') - sys.exit(-1) + try: + # check for updates + sys.stdout.write('[+] Checking the current client version...\n') + if urlopen(SERVER + ENDPOINTS['client_ver']).read() != MYVER: + cprint('[-] WARNING: NEW CLIENT VERSION IS AVAILABLE. PLEASE UPDATE.', 'red') + sys.exit(-1) - if len(args) != 1: - usage(sys.argv[0]) - sys.exit(-1) + if len(args) != 1: + usage(sys.argv[0]) + sys.exit(-1) - _content = args[0] - - if not _type or _type not in HASH_TYPES: - sys.stdout.write('[-] ERROR: INVALID HASH TYPE\n') - sys.exit(-1) + _content = args[0] + + if not _type or _type not in HASH_TYPES: + sys.stdout.write('[-] ERROR: INVALID HASH TYPE\n') + sys.exit(-1) + + if (_type != 'wpa' and _type != 'pdf') and not validate_hash(_content, _type): + sys.stdout.write('[-] ERROR: INVALID HASH FORMAT\n') + sys.exit(-1) + + load_config() - if (_type != 'wpa' and _type != 'ike_md5') and not validate_hash(_content, _type): - sys.stdout.write('[-] ERROR: INVALID HASH FORMAT\n') - sys.exit(-1) - - load_config() - - try: headers = {'Content-type': 'application/json', 'Accept': 'text/plain'} data = {'key': API_KEY} @@ -169,7 +172,7 @@ def load_config(): sys.stdout.write('[-] ERROR: NO QUEUE SUBMISSIONS LEFT. PURCHASE SUBMISSION QUOTA AT https://hashcrack.org/crackq_buy\n') sys.exit(-1) - if _type == 'wpa' or _type == 'ike_md5': + if _type == 'wpa': try: f = open(_content, 'r') except IOError: @@ -183,6 +186,20 @@ def load_config(): _content = base64.b64encode(zlib.compress(_raw)) f.close() + + if _type == 'pdf': + parser = PdfParser(_content) + + if not parser.supported(): + print 'This PDF format is not supported' + sys.exit(-1) + try: + pdf_hash = parser.parse() + except RuntimeError: + e = sys.exc_info()[1] + sys.stderr.write("%s : %s\n" % (filename, str(e))) + sys.exit(-1) + _content = pdf_hash data = {'key': API_KEY, 'content': _content, 'type': _type, 'q': 'privq'} req = Request(SERVER + ENDPOINTS['submit']) @@ -193,5 +210,5 @@ def load_config(): sys.stdout.write('[-] ERROR: HTTP %d - %s\n' % (e.code, json.load(e)['msg'])) sys.exit(-1) except URLError as e: - sys.stdout.write('[-] ERROR: UNREACHABLE - %s\n' % e.reason) + cprint('[-] ERROR: UNREACHABLE - %s' % e.reason, 'red') sys.exit(-1) diff --git a/thirdparty/__init__.py b/thirdparty/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/thirdparty/pdf2john.py b/thirdparty/pdf2john.py new file mode 100644 index 0000000..4befa55 --- /dev/null +++ b/thirdparty/pdf2john.py @@ -0,0 +1,315 @@ +# Copyright (c) 2013 Shane Quigley, < shane at softwareontheside.info > + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import re +import sys +import os +from xml.dom import minidom + +PY3 = sys.version_info[0] == 3 + +class PdfParser: + def __init__(self, file_name): + self.file_name = file_name + f = open(file_name, 'rb') + self.encrypted = f.read() + f.close() + self.process = True + psr = re.compile(b'PDF-\d\.\d') + try: + self.pdf_spec = psr.findall(self.encrypted)[0] + except IndexError: + sys.stderr.write("%s is not a PDF file!\n" % file_name) + self.process = False + + def supported(self): + if self.pdf_spec in ['PDF-1.4', 'PDF-1.5', 'PDF-1.6']: + return True + return False + + def parse(self): + if not self.process: + return + + try: + trailer = self.get_trailer() + except RuntimeError: + e = sys.exc_info()[1] + sys.stderr.write("%s : %s\n" % (self.file_name, str(e))) + return + # print >> sys.stderr, trailer + object_id = self.get_object_id(b'Encrypt', trailer) + # print >> sys.stderr, object_id + if(len(object_id) == 0): + raise RuntimeError("Could not find object id") + encryption_dictionary = self.get_encryption_dictionary(object_id) + # print >> sys.stderr, encryption_dictionary + dr = re.compile(b'\d+') + vr = re.compile(b'\/V \d') + rr = re.compile(b'\/R \d') + try: + v = dr.findall(vr.findall(encryption_dictionary)[0])[0] + except IndexError: + raise RuntimeError("Could not find /V") + r = dr.findall(rr.findall(encryption_dictionary)[0])[0] + lr = re.compile(b'\/Length \d+') + longest = 0 + # According to the docs: + # Length : (Optional; PDF 1.4; only if V is 2 or 3). Default value: 40 + length = b'40' + for le in lr.findall(encryption_dictionary): + if(int(dr.findall(le)[0]) > longest): + longest = int(dr.findall(le)[0]) + length = dr.findall(le)[0] + pr = re.compile(b'\/P -?\d+') + try: + p = pr.findall(encryption_dictionary)[0] + except IndexError: + # print >> sys.stderr, "** dict:", encryption_dictionary + raise RuntimeError("Could not find /P") + pr = re.compile(b'-?\d+') + p = pr.findall(p)[0] + meta = '1' if self.is_meta_data_encrypted(encryption_dictionary) else '0' + idr = re.compile(b'\/ID\s*\[\s*<\w+>\s*<\w+>\s*\]') + try: + i_d = idr.findall(trailer)[0] # id key word + except IndexError: + # some pdf files use () instead of <> + idr = re.compile(b'\/ID\s*\[\s*\(\w+\)\s*\(\w+\)\s*\]') + try: + i_d = idr.findall(trailer)[0] # id key word + except IndexError: + # print >> sys.stderr, "** idr:", idr + # print >> sys.stderr, "** trailer:", trailer + raise RuntimeError("Could not find /ID tag") + return + idr = re.compile(b'<\w+>') + try: + i_d = idr.findall(trailer)[0] + except IndexError: + idr = re.compile(b'\(\w+\)') + i_d = idr.findall(trailer)[0] + i_d = i_d.replace(b'<',b'') + i_d = i_d.replace(b'>',b'') + i_d = i_d.lower() + passwords = self.get_passwords_for_JtR(encryption_dictionary) + output = '$pdf$'+v.decode('ascii')+'*'+r.decode('ascii')+'*'+length.decode('ascii')+'*' + output += p.decode('ascii')+'*'+meta+'*' + output += str(int(len(i_d)/2))+'*'+i_d.decode('ascii')+'*'+passwords + if(self.is_meta_data_encrypted(encryption_dictionary)): + return output.encode('UTF-8') + else: + gecos = self.parse_meta_data(trailer) + return "%s:::%s" % (output.encode('UTF-8'), gecos.encode('UTF-8')) + + def get_passwords_for_JtR(self, encryption_dictionary): + output = "" + letters = [b"U", b"O"] + if(b"1.7" in self.pdf_spec): + letters = [b"U", b"O", b"UE", b"OE"] + for let in letters: + pr_str = b'\/' + let + b'\s*\([^)]+\)' + pr = re.compile(pr_str) + pas = pr.findall(encryption_dictionary) + if(len(pas) > 0): + pas = pr.findall(encryption_dictionary)[0] + # because regexs in python suck <=== LOL + while(pas[-2] == b'\\'): + pr_str += b'[^)]+\)' + pr = re.compile(pr_str) + # print >> sys.stderr, "pr_str:", pr_str + # print >> sys.stderr, encryption_dictionary + try: + pas = pr.findall(encryption_dictionary)[0] + except IndexError: + break + output += self.get_password_from_byte_string(pas)+"*" + else: + pr = re.compile(let + b'\s*<\w+>') + pas = pr.findall(encryption_dictionary) + if not pas: + continue + pas = pas[0] + pr = re.compile(b'<\w+>') + pas = pr.findall(pas)[0] + pas = pas.replace(b"<",b"") + pas = pas.replace(b">",b"") + if PY3: + output += str(int(len(pas)/2))+'*'+str(pas.lower(),'ascii')+'*' + else: + output += str(int(len(pas)/2))+'*'+pas.lower()+'*' + return output[:-1] + + def is_meta_data_encrypted(self, encryption_dictionary): + mr = re.compile(b'\/EncryptMetadata\s\w+') + if(len(mr.findall(encryption_dictionary)) > 0): + wr = re.compile(b'\w+') + is_encrypted = wr.findall(mr.findall(encryption_dictionary)[0])[-1] + if(is_encrypted == b"false"): + return False + else: + return True + else: + return True + + def parse_meta_data(self, trailer): + root_object_id = self.get_object_id(b'Root', trailer) + root_object = self.get_pdf_object(root_object_id) + object_id = self.get_object_id(b'Metadata', root_object) + xmp_metadata_object = self.get_pdf_object(object_id) + return self.get_xmp_values(xmp_metadata_object) + + def get_xmp_values(self, xmp_metadata_object): + xmp_metadata_object = xmp_metadata_object.partition(b"stream")[2] + xmp_metadata_object = xmp_metadata_object.partition(b"endstream")[0] + try: + xml_metadata = minidom.parseString(xmp_metadata_object) + except: + return "" + values = [] + values.append(self.get_dc_value("title", xml_metadata)) + values.append(self.get_dc_value("creator", xml_metadata)) + values.append(self.get_dc_value("description", xml_metadata)) + values.append(self.get_dc_value("subject", xml_metadata)) + created_year = xml_metadata.getElementsByTagName("xmp:CreateDate") + if(len(created_year) > 0): + created_year = created_year[0].firstChild.data[0:4] + values.append(str(created_year)) + return " ".join(values).replace(":", "") + + def get_dc_value(self, value, xml_metadata): + output = xml_metadata.getElementsByTagName("dc:"+value) + if(len(output) > 0): + output = output[0] + output = output.getElementsByTagName("rdf:li")[0] + if(output.firstChild): + output = output.firstChild.data + return output + return "" + + def get_encryption_dictionary(self, object_id): + encryption_dictionary = self.get_pdf_object(object_id) + for o in encryption_dictionary.split(b"endobj"): + if(object_id+b" obj" in o): + encryption_dictionary = o + return encryption_dictionary + + def get_object_id(self, name , trailer): + oir = re.compile(b'\/' + name + b'\s\d+\s\d\sR') + try: + object_id = oir.findall(trailer)[0] + except IndexError: + # print >> sys.stderr, " ** get_object_id: name \"", name, "\", trailer ", trailer + return "" + oir = re.compile(b'\d+ \d') + object_id = oir.findall(object_id)[0] + return object_id + + def get_pdf_object(self, object_id): + output = object_id+b" obj" + \ + self.encrypted.partition(b"\r"+object_id+b" obj")[2] + if(output == object_id+b" obj"): + output = object_id+b" obj" + \ + self.encrypted.partition(b"\n"+object_id+b" obj")[2] + output = output.partition(b"endobj")[0] + b"endobj" + # print >> sys.stderr, output + return output + + def get_trailer(self): + trailer = self.get_data_between(b"trailer", b">>", b"/ID") + if(trailer == b""): + trailer = self.get_data_between(b"DecodeParms", b"stream", b"") + if(trailer == ""): + raise RuntimeError("Can't find trailer") + if(trailer != "" and trailer.find(b"Encrypt") == -1): + # print >> sys.stderr, trailer + raise RuntimeError("File not encrypted") + return trailer + + def get_data_between(self, s1, s2, tag): + output = b"" + inside_first = False + lines = re.split(b'\n|\r', self.encrypted) + for line in lines: + inside_first = inside_first or line.find(s1) != -1 + if(inside_first): + output += line + if(line.find(s2) != -1): + if(tag == b"" or output.find(tag) != -1): + break + else: + output = b"" + inside_first = False + return output + + def get_hex_byte(self, o_or_u, i): + if PY3: + return hex(o_or_u[i]).replace('0x', '') + else: + return hex(ord(o_or_u[i])).replace('0x', '') + + def get_password_from_byte_string(self, o_or_u): + pas = "" + escape_seq = False + escapes = 0 + excluded_indexes = [0, 1, 2] + #For UE & OE in 1.7 spec + if not PY3: + if(o_or_u[2] != '('): + excluded_indexes.append(3) + else: + if(o_or_u[2] != 40): + excluded_indexes.append(3) + for i in range(len(o_or_u)): + if(i not in excluded_indexes): + if(len(self.get_hex_byte(o_or_u, i)) == 1 \ + and o_or_u[i] != "\\"[0]): + pas += "0" # need to be 2 digit hex numbers + is_back_slash = True + if not PY3: + is_back_slash = o_or_u[i] != "\\"[0] + else: + is_back_slash = o_or_u[i] != 92 + if(is_back_slash or escape_seq): + if(escape_seq): + if not PY3: + esc = "\\"+o_or_u[i] + else: + esc = "\\"+chr(o_or_u[i]) + esc = self.unescape(esc) + if(len(hex(ord(esc[0])).replace('0x', '')) == 1): + pas += "0" + pas += hex(ord(esc[0])).replace('0x', '') + escape_seq = False + else: + pas += self.get_hex_byte(o_or_u, i) + else: + escape_seq = True + escapes += 1 + output = len(o_or_u)-(len(excluded_indexes)+1)-escapes + return str(output)+'*'+pas[:-2] + + def unescape(self, esc): + escape_seq_map = {'\\n':"\n", '\\s':"\s", '\\e':"\e", + '\\r':"\r", '\\t':"\t", '\\v':"\v", '\\f':"\f", + '\\b':"\b", '\\a':"\a", "\\)":")", + "\\(":"(", "\\\\":"\\" } + + return escape_seq_map[esc] diff --git a/thirdparty/termcolor.py b/thirdparty/termcolor.py new file mode 100644 index 0000000..f11b824 --- /dev/null +++ b/thirdparty/termcolor.py @@ -0,0 +1,168 @@ +# coding: utf-8 +# Copyright (c) 2008-2011 Volvox Development Team +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# Author: Konstantin Lepa + +"""ANSII Color formatting for output in terminal.""" + +from __future__ import print_function +import os + + +__ALL__ = [ 'colored', 'cprint' ] + +VERSION = (1, 1, 0) + +ATTRIBUTES = dict( + list(zip([ + 'bold', + 'dark', + '', + 'underline', + 'blink', + '', + 'reverse', + 'concealed' + ], + list(range(1, 9)) + )) + ) +del ATTRIBUTES[''] + + +HIGHLIGHTS = dict( + list(zip([ + 'on_grey', + 'on_red', + 'on_green', + 'on_yellow', + 'on_blue', + 'on_magenta', + 'on_cyan', + 'on_white' + ], + list(range(40, 48)) + )) + ) + + +COLORS = dict( + list(zip([ + 'grey', + 'red', + 'green', + 'yellow', + 'blue', + 'magenta', + 'cyan', + 'white', + ], + list(range(30, 38)) + )) + ) + + +RESET = '\033[0m' + + +def colored(text, color=None, on_color=None, attrs=None): + """Colorize text. + + Available text colors: + red, green, yellow, blue, magenta, cyan, white. + + Available text highlights: + on_red, on_green, on_yellow, on_blue, on_magenta, on_cyan, on_white. + + Available attributes: + bold, dark, underline, blink, reverse, concealed. + + Example: + colored('Hello, World!', 'red', 'on_grey', ['blue', 'blink']) + colored('Hello, World!', 'green') + """ + if os.getenv('ANSI_COLORS_DISABLED') is None: + fmt_str = '\033[%dm%s' + if color is not None: + text = fmt_str % (COLORS[color], text) + + if on_color is not None: + text = fmt_str % (HIGHLIGHTS[on_color], text) + + if attrs is not None: + for attr in attrs: + text = fmt_str % (ATTRIBUTES[attr], text) + + text += RESET + return text + + +def cprint(text, color=None, on_color=None, attrs=None, **kwargs): + """Print colorize text. + + It accepts arguments of print function. + """ + + print((colored(text, color, on_color, attrs)), **kwargs) + + +if __name__ == '__main__': + print('Current terminal type: %s' % os.getenv('TERM')) + print('Test basic colors:') + cprint('Grey color', 'grey') + cprint('Red color', 'red') + cprint('Green color', 'green') + cprint('Yellow color', 'yellow') + cprint('Blue color', 'blue') + cprint('Magenta color', 'magenta') + cprint('Cyan color', 'cyan') + cprint('White color', 'white') + print(('-' * 78)) + + print('Test highlights:') + cprint('On grey color', on_color='on_grey') + cprint('On red color', on_color='on_red') + cprint('On green color', on_color='on_green') + cprint('On yellow color', on_color='on_yellow') + cprint('On blue color', on_color='on_blue') + cprint('On magenta color', on_color='on_magenta') + cprint('On cyan color', on_color='on_cyan') + cprint('On white color', color='grey', on_color='on_white') + print('-' * 78) + + print('Test attributes:') + cprint('Bold grey color', 'grey', attrs=['bold']) + cprint('Dark red color', 'red', attrs=['dark']) + cprint('Underline green color', 'green', attrs=['underline']) + cprint('Blink yellow color', 'yellow', attrs=['blink']) + cprint('Reversed blue color', 'blue', attrs=['reverse']) + cprint('Concealed Magenta color', 'magenta', attrs=['concealed']) + cprint('Bold underline reverse cyan color', 'cyan', + attrs=['bold', 'underline', 'reverse']) + cprint('Dark blink concealed white color', 'white', + attrs=['dark', 'blink', 'concealed']) + print(('-' * 78)) + + print('Test mixing:') + cprint('Underline red on grey color', 'red', 'on_grey', + ['underline']) + cprint('Reversed green on red color', 'green', 'on_red', ['reverse']) +