From 80d8344a3d9769af94933dc4a0f30ed770c7a601 Mon Sep 17 00:00:00 2001 From: Rohit kumar Date: Thu, 18 Jul 2024 13:08:46 +0530 Subject: [PATCH] Releasing exclude_columns support in mysql --- fingerprint.yml | 2 +- hawk_scanner/commands/couchdb.py | 4 ++-- hawk_scanner/commands/firebase.py | 4 ++-- hawk_scanner/commands/gcs.py | 4 ++-- hawk_scanner/commands/gdrive.py | 6 +++--- hawk_scanner/commands/gdrive_workspace.py | 6 +++--- hawk_scanner/commands/mongodb.py | 8 ++++---- hawk_scanner/commands/mysql.py | 11 +++++++---- hawk_scanner/commands/postgresql.py | 10 +++++----- hawk_scanner/commands/redis.py | 4 ++-- hawk_scanner/commands/s3.py | 4 ++-- hawk_scanner/commands/slack.py | 8 ++++---- hawk_scanner/commands/text.py | 4 ++-- setup.py | 2 +- 14 files changed, 40 insertions(+), 37 deletions(-) diff --git a/fingerprint.yml b/fingerprint.yml index e7345d7..2b80bfb 100644 --- a/fingerprint.yml +++ b/fingerprint.yml @@ -1,5 +1,5 @@ Email: "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}\\b" -Phone: "\\b\\+\\d{1,3}[-.]?\\d{3}[-.]?\\d{4}\\b" +Phone Number: "^\\(\\+\\d{1,2}\\s\\)?\\(?\\d{3}\\)?[\\s.-]\\d{3}[\\s.-]\\d{4}$" Aadhar: "\\b\\d{4}[-.]?\\d{4}[-.]?\\d{4}\\b" PAN Number: "[A-Z]{5}[0-9]{4}[A-Z]{1}" Amazon MWS Auth Token: "amzn\\.mws\\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" diff --git a/hawk_scanner/commands/couchdb.py b/hawk_scanner/commands/couchdb.py index d4d6592..94ec308 100644 --- a/hawk_scanner/commands/couchdb.py +++ b/hawk_scanner/commands/couchdb.py @@ -5,7 +5,7 @@ console = Console() -def connect_couchdb(host, port, username, password, database): +def connect_couchdb(args, host, port, username, password, database): try: server = couchdb.Server(f"http://{username}:{password}@{host}:{port}/") if database not in server: @@ -67,7 +67,7 @@ def execute(args): system.print_error(args, f"Incomplete CouchDB configuration for key: {key}") continue - db = connect_couchdb(host, port, username, password, database) + db = connect_couchdb(args, host, port, username, password, database) if db: results += check_data_patterns(db, patterns, key, database) else: diff --git a/hawk_scanner/commands/firebase.py b/hawk_scanner/commands/firebase.py index c159d76..66b0eb9 100644 --- a/hawk_scanner/commands/firebase.py +++ b/hawk_scanner/commands/firebase.py @@ -5,7 +5,7 @@ from hawk_scanner.internals import system import os -def connect_firebase(credentials_file, bucket_name): +def connect_firebase(args, credentials_file, bucket_name): try: cred = credentials.Certificate(credentials_file) firebase_admin.initialize_app(cred) @@ -31,7 +31,7 @@ def execute(args): exclude_patterns = config.get(key, {}).get('exclude_patterns', []) if credentials_file and bucket_name: - bucket = connect_firebase(credentials_file, bucket_name) + bucket = connect_firebase(args, credentials_file, bucket_name) if bucket: for blob in bucket.list_blobs(): file_name = blob.name diff --git a/hawk_scanner/commands/gcs.py b/hawk_scanner/commands/gcs.py index 059fbe5..b10482f 100644 --- a/hawk_scanner/commands/gcs.py +++ b/hawk_scanner/commands/gcs.py @@ -7,7 +7,7 @@ import time import yaml -def connect_google_cloud(bucket_name, credentials_file): +def connect_google_cloud(args, bucket_name, credentials_file): try: ## connect using credentials file os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_file @@ -38,7 +38,7 @@ def execute(args): credentials_file = config.get('credentials_file') if bucket_name: - bucket = connect_google_cloud(bucket_name, credentials_file) + bucket = connect_google_cloud(args, bucket_name, credentials_file) if bucket: for blob in bucket.list_blobs(): file_name = blob.name diff --git a/hawk_scanner/commands/gdrive.py b/hawk_scanner/commands/gdrive.py index d6c3cd2..8e04c7b 100644 --- a/hawk_scanner/commands/gdrive.py +++ b/hawk_scanner/commands/gdrive.py @@ -5,7 +5,7 @@ from hawk_scanner.internals import system from pydrive2.fs import GDriveFileSystem -def connect_google_drive(credentials_file): +def connect_google_drive(args, credentials_file): credentials = open(credentials_file, 'r').read() credentials = json.loads(credentials) ## if installed key is in the credentials file, use it @@ -23,7 +23,7 @@ def connect_google_drive(credentials_file): print(f"Failed to connect to Google Drive: {e}") os.system("rm -rf client_secrets.json") -def download_file(drive, file_obj, base_path): +def download_file(args, drive, file_obj, base_path): try: file_name = file_obj['title'] file_id = file_obj['id'] @@ -84,7 +84,7 @@ def execute(args): folder_name = config.get('folder_name') exclude_patterns = config.get(key, {}).get('exclude_patterns', []) is_cache_enabled = config.get('cache', False) - drive = connect_google_drive(credentials_file) + drive = connect_google_drive(args, credentials_file) if not os.path.exists("data/google_drive"): os.makedirs("data/google_drive") if drive: diff --git a/hawk_scanner/commands/gdrive_workspace.py b/hawk_scanner/commands/gdrive_workspace.py index 26f86d4..746272b 100644 --- a/hawk_scanner/commands/gdrive_workspace.py +++ b/hawk_scanner/commands/gdrive_workspace.py @@ -22,7 +22,7 @@ def connect_google_drive(credentials_file, impersonate_user=None): except Exception as e: print(f"Failed to connect to Google Drive: {e}") -def download_file(drive, file_obj, base_path): +def download_file(args, drive, file_obj, base_path): print(f"Downloading file: {file_obj['name']} to {base_path}") try: file_name = file_obj['name'] @@ -47,7 +47,7 @@ def download_file(drive, file_obj, base_path): os.makedirs(folder_path) folder_files = drive.files().list(q=f"'{file_id}' in parents").execute().get('files', []) for folder_file in folder_files: - download_file(drive, folder_file, folder_path) + download_file(args, drive, folder_file, folder_path) else: try: # Check if the file is a Google Docs type @@ -126,7 +126,7 @@ def execute(args): is_cache_enabled = True if is_cache_enabled: - download_file(drive, file_obj, "data/google_drive/") + download_file(args, drive, file_obj, "data/google_drive/") matches = system.read_match_strings(args, file_path, 'gdrive_workspace') file_name = file_name.replace('-runtime.pdf', '') diff --git a/hawk_scanner/commands/mongodb.py b/hawk_scanner/commands/mongodb.py index f532cc4..f8a3207 100644 --- a/hawk_scanner/commands/mongodb.py +++ b/hawk_scanner/commands/mongodb.py @@ -4,7 +4,7 @@ console = Console() -def connect_mongodb(host, port, username, password, database, uri=None): +def connect_mongodb(args, host, port, username, password, database, uri=None): try: if uri: client = pymongo.MongoClient(uri) @@ -23,7 +23,7 @@ def connect_mongodb(host, port, username, password, database, uri=None): return None -def check_data_patterns(db, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_collections=None): +def check_data_patterns(args, db, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_collections=None): results = [] all_collections = db.list_collection_names() @@ -34,7 +34,7 @@ def check_data_patterns(db, patterns, profile_name, database_name, limit_start=0 for collection_name in collections_to_scan: if collection_name not in all_collections: - system.print_warning(f"Collection {collection_name} not found in the database. Skipping.") + system.print_error(args, f"Collection {collection_name} not found in the database. Skipping.") continue collection = db[collection_name] @@ -92,7 +92,7 @@ def execute(args): db = connect_mongodb(host, port, username, password, database, uri) if db: - results += check_data_patterns(db, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_collections=collections) + results += check_data_patterns(args, db, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_collections=collections) else: system.print_error(args, "No MongoDB connection details found in connection.yml") else: diff --git a/hawk_scanner/commands/mysql.py b/hawk_scanner/commands/mysql.py index fe43886..eda66fc 100644 --- a/hawk_scanner/commands/mysql.py +++ b/hawk_scanner/commands/mysql.py @@ -4,7 +4,7 @@ console = Console() -def connect_mysql(host, port, user, password, database): +def connect_mysql(args, host, port, user, password, database): try: conn = pymysql.connect( host=host, @@ -19,7 +19,7 @@ def connect_mysql(host, port, user, password, database): except Exception as e: system.print_error(args, f"Failed to connect to MySQL database at {host} with error: {e}") -def check_data_patterns(conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None): +def check_data_patterns(args, conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None, exclude_columns=None): cursor = conn.cursor() # Get the list of tables to scan @@ -40,6 +40,8 @@ def check_data_patterns(conn, patterns, profile_name, database_name, limit_start data_count = 1 for row in cursor.fetchall(): for column, value in zip(columns, row): + if exclude_columns and column in exclude_columns: + continue if value: value_str = str(value) matches = system.match_strings(args, value_str) @@ -84,12 +86,13 @@ def execute(args): limit_start = config.get('limit_start', 0) limit_end = config.get('limit_end', 500) tables = config.get('tables', []) + exclude_columns = config.get('exclude_columns', []) if host and user and database: system.print_info(args, f"Checking MySQL Profile {key} and database {database}") - conn = connect_mysql(host, port, user, password, database) + conn = connect_mysql(args, host, port, user, password, database) if conn: - results += check_data_patterns(conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables) + results += check_data_patterns(args, conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables, exclude_columns=exclude_columns) conn.close() else: system.print_error(args, f"Incomplete MySQL configuration for key: {key}") diff --git a/hawk_scanner/commands/postgresql.py b/hawk_scanner/commands/postgresql.py index 141edbd..423e48d 100644 --- a/hawk_scanner/commands/postgresql.py +++ b/hawk_scanner/commands/postgresql.py @@ -4,7 +4,7 @@ console = Console() -def connect_postgresql(host, port, user, password, database): +def connect_postgresql(args, host, port, user, password, database): try: conn = psycopg2.connect( host=host, @@ -19,7 +19,7 @@ def connect_postgresql(host, port, user, password, database): except Exception as e: system.print_error(args, f"Failed to connect to PostgreSQL database at {host} with error: {e}") -def check_data_patterns(conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None): +def check_data_patterns(args, conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None): cursor = conn.cursor() # Get the list of tables to scan @@ -35,7 +35,7 @@ def check_data_patterns(conn, patterns, profile_name, database_name, limit_start results = [] for table in tables_to_scan: if table not in all_tables: - system.print_warning(f"Table {table} not found in the database. Skipping.") + system.print_error(args, f"Table {table} not found in the database. Skipping.") continue cursor.execute(f"SELECT * FROM {table} LIMIT {limit_end} OFFSET {limit_start}") @@ -92,9 +92,9 @@ def execute(args): if host and user and password and database: system.print_info(args, f"Checking PostgreSQL Profile {key}, database {database}") - conn = connect_postgresql(host, port, user, password, database) + conn = connect_postgresql(args, host, port, user, password, database) if conn: - results += check_data_patterns(conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables) + results += check_data_patterns(args, conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables) conn.close() else: system.print_error(args, f"Incomplete PostgreSQL configuration for key: {key}") diff --git a/hawk_scanner/commands/redis.py b/hawk_scanner/commands/redis.py index 03dc11c..1ec5031 100644 --- a/hawk_scanner/commands/redis.py +++ b/hawk_scanner/commands/redis.py @@ -6,7 +6,7 @@ console = Console() -def connect_redis(host, port, password=None): +def connect_redis(args, host, port, password=None): try: r = redis.Redis(host=host, port=port, password=password) if r.ping(): @@ -61,7 +61,7 @@ def execute(args): password = config.get('password') if host: - redis_instance = connect_redis(host, port, password) + redis_instance = connect_redis(args, host, port, password) if redis_instance: results = check_data_patterns(redis_instance, patterns, profile_name, host) redis_instance.close() diff --git a/hawk_scanner/commands/s3.py b/hawk_scanner/commands/s3.py index d3b21ef..704d4cd 100644 --- a/hawk_scanner/commands/s3.py +++ b/hawk_scanner/commands/s3.py @@ -7,7 +7,7 @@ console = Console() -def connect_s3(access_key, secret_key, bucket_name): +def connect_s3(args, access_key, secret_key, bucket_name): try: session = boto3.Session( aws_access_key_id=access_key, @@ -50,7 +50,7 @@ def execute(args): system.print_info(args, f"Checking S3 profile: '{key}' with bucket '{bucket_name}'") profile_name = key if access_key and secret_key and bucket_name: - bucket = connect_s3(access_key, secret_key, bucket_name) + bucket = connect_s3(args, access_key, secret_key, bucket_name) if bucket: for obj in bucket.objects.all(): diff --git a/hawk_scanner/commands/slack.py b/hawk_scanner/commands/slack.py index 1e71a7f..d23a16b 100644 --- a/hawk_scanner/commands/slack.py +++ b/hawk_scanner/commands/slack.py @@ -6,7 +6,7 @@ console = Console() -def connect_slack(token): +def connect_slack(args, token): try: client = WebClient(token=token) # Test the connection by making an API call @@ -21,7 +21,7 @@ def connect_slack(token): system.print_error(args, f"Failed to connect to Slack with error: {e.response['error']}") return None -def check_slack_messages(client, patterns, profile_name, channel_types, channel_names=None): +def check_slack_messages(args, client, patterns, profile_name, channel_types, channel_names=None): results = [] try: team_info = client.team_info() @@ -90,9 +90,9 @@ def execute(args): system.print_error(args, f"Incomplete Slack configuration for key: {key}") continue - client = connect_slack(token) + client = connect_slack(args, token) if client: - results += check_slack_messages(client, patterns, key, channel_types, channel_names) + results += check_slack_messages(args, client, patterns, key, channel_types, channel_names) else: system.print_error(args, "No Slack connection details found in connection.yml") else: diff --git a/hawk_scanner/commands/text.py b/hawk_scanner/commands/text.py index 33c64aa..f128a49 100644 --- a/hawk_scanner/commands/text.py +++ b/hawk_scanner/commands/text.py @@ -3,7 +3,7 @@ console = Console() -def check_data_patterns(value, patterns, profile_name): +def check_data_patterns(args, value, patterns, profile_name): value_str = str(value) matches = system.match_strings(args, value_str) results = [] @@ -30,7 +30,7 @@ def execute(args): if text_config: for key, config in text_config.items(): text = config.get('text', None) - results += check_data_patterns(text, patterns, key) + results += check_data_patterns(args, text, patterns, key) else: system.print_error(args, "No text connection details found in connection.yml") else: diff --git a/setup.py b/setup.py index 0032376..165cc9a 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -VERSION = "0.3.16" +VERSION = "0.3.17" from setuptools import setup, find_packages