Skip to content

Commit

Permalink
Releasing exclude_columns support in mysql
Browse files Browse the repository at this point in the history
  • Loading branch information
rohitcoder committed Jul 18, 2024
1 parent 7db9e1a commit 80d8344
Show file tree
Hide file tree
Showing 14 changed files with 40 additions and 37 deletions.
2 changes: 1 addition & 1 deletion fingerprint.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Email: "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}\\b"
Phone: "\\b\\+\\d{1,3}[-.]?\\d{3}[-.]?\\d{4}\\b"
Phone Number: "^\\(\\+\\d{1,2}\\s\\)?\\(?\\d{3}\\)?[\\s.-]\\d{3}[\\s.-]\\d{4}$"
Aadhar: "\\b\\d{4}[-.]?\\d{4}[-.]?\\d{4}\\b"
PAN Number: "[A-Z]{5}[0-9]{4}[A-Z]{1}"
Amazon MWS Auth Token: "amzn\\.mws\\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
Expand Down
4 changes: 2 additions & 2 deletions hawk_scanner/commands/couchdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

console = Console()

def connect_couchdb(host, port, username, password, database):
def connect_couchdb(args, host, port, username, password, database):
try:
server = couchdb.Server(f"http://{username}:{password}@{host}:{port}/")
if database not in server:
Expand Down Expand Up @@ -67,7 +67,7 @@ def execute(args):
system.print_error(args, f"Incomplete CouchDB configuration for key: {key}")
continue

db = connect_couchdb(host, port, username, password, database)
db = connect_couchdb(args, host, port, username, password, database)
if db:
results += check_data_patterns(db, patterns, key, database)
else:
Expand Down
4 changes: 2 additions & 2 deletions hawk_scanner/commands/firebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from hawk_scanner.internals import system
import os

def connect_firebase(credentials_file, bucket_name):
def connect_firebase(args, credentials_file, bucket_name):
try:
cred = credentials.Certificate(credentials_file)
firebase_admin.initialize_app(cred)
Expand All @@ -31,7 +31,7 @@ def execute(args):
exclude_patterns = config.get(key, {}).get('exclude_patterns', [])

if credentials_file and bucket_name:
bucket = connect_firebase(credentials_file, bucket_name)
bucket = connect_firebase(args, credentials_file, bucket_name)
if bucket:
for blob in bucket.list_blobs():
file_name = blob.name
Expand Down
4 changes: 2 additions & 2 deletions hawk_scanner/commands/gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import time
import yaml

def connect_google_cloud(bucket_name, credentials_file):
def connect_google_cloud(args, bucket_name, credentials_file):
try:
## connect using credentials file
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_file
Expand Down Expand Up @@ -38,7 +38,7 @@ def execute(args):
credentials_file = config.get('credentials_file')

if bucket_name:
bucket = connect_google_cloud(bucket_name, credentials_file)
bucket = connect_google_cloud(args, bucket_name, credentials_file)
if bucket:
for blob in bucket.list_blobs():
file_name = blob.name
Expand Down
6 changes: 3 additions & 3 deletions hawk_scanner/commands/gdrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from hawk_scanner.internals import system
from pydrive2.fs import GDriveFileSystem

def connect_google_drive(credentials_file):
def connect_google_drive(args, credentials_file):
credentials = open(credentials_file, 'r').read()
credentials = json.loads(credentials)
## if installed key is in the credentials file, use it
Expand All @@ -23,7 +23,7 @@ def connect_google_drive(credentials_file):
print(f"Failed to connect to Google Drive: {e}")
os.system("rm -rf client_secrets.json")

def download_file(drive, file_obj, base_path):
def download_file(args, drive, file_obj, base_path):
try:
file_name = file_obj['title']
file_id = file_obj['id']
Expand Down Expand Up @@ -84,7 +84,7 @@ def execute(args):
folder_name = config.get('folder_name')
exclude_patterns = config.get(key, {}).get('exclude_patterns', [])
is_cache_enabled = config.get('cache', False)
drive = connect_google_drive(credentials_file)
drive = connect_google_drive(args, credentials_file)
if not os.path.exists("data/google_drive"):
os.makedirs("data/google_drive")
if drive:
Expand Down
6 changes: 3 additions & 3 deletions hawk_scanner/commands/gdrive_workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def connect_google_drive(credentials_file, impersonate_user=None):
except Exception as e:
print(f"Failed to connect to Google Drive: {e}")

def download_file(drive, file_obj, base_path):
def download_file(args, drive, file_obj, base_path):
print(f"Downloading file: {file_obj['name']} to {base_path}")
try:
file_name = file_obj['name']
Expand All @@ -47,7 +47,7 @@ def download_file(drive, file_obj, base_path):
os.makedirs(folder_path)
folder_files = drive.files().list(q=f"'{file_id}' in parents").execute().get('files', [])
for folder_file in folder_files:
download_file(drive, folder_file, folder_path)
download_file(args, drive, folder_file, folder_path)
else:
try:
# Check if the file is a Google Docs type
Expand Down Expand Up @@ -126,7 +126,7 @@ def execute(args):
is_cache_enabled = True

if is_cache_enabled:
download_file(drive, file_obj, "data/google_drive/")
download_file(args, drive, file_obj, "data/google_drive/")

matches = system.read_match_strings(args, file_path, 'gdrive_workspace')
file_name = file_name.replace('-runtime.pdf', '')
Expand Down
8 changes: 4 additions & 4 deletions hawk_scanner/commands/mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

console = Console()

def connect_mongodb(host, port, username, password, database, uri=None):
def connect_mongodb(args, host, port, username, password, database, uri=None):
try:
if uri:
client = pymongo.MongoClient(uri)
Expand All @@ -23,7 +23,7 @@ def connect_mongodb(host, port, username, password, database, uri=None):
return None


def check_data_patterns(db, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_collections=None):
def check_data_patterns(args, db, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_collections=None):
results = []
all_collections = db.list_collection_names()

Expand All @@ -34,7 +34,7 @@ def check_data_patterns(db, patterns, profile_name, database_name, limit_start=0

for collection_name in collections_to_scan:
if collection_name not in all_collections:
system.print_warning(f"Collection {collection_name} not found in the database. Skipping.")
system.print_error(args, f"Collection {collection_name} not found in the database. Skipping.")
continue

collection = db[collection_name]
Expand Down Expand Up @@ -92,7 +92,7 @@ def execute(args):

db = connect_mongodb(host, port, username, password, database, uri)
if db:
results += check_data_patterns(db, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_collections=collections)
results += check_data_patterns(args, db, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_collections=collections)
else:
system.print_error(args, "No MongoDB connection details found in connection.yml")
else:
Expand Down
11 changes: 7 additions & 4 deletions hawk_scanner/commands/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

console = Console()

def connect_mysql(host, port, user, password, database):
def connect_mysql(args, host, port, user, password, database):
try:
conn = pymysql.connect(
host=host,
Expand All @@ -19,7 +19,7 @@ def connect_mysql(host, port, user, password, database):
except Exception as e:
system.print_error(args, f"Failed to connect to MySQL database at {host} with error: {e}")

def check_data_patterns(conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None):
def check_data_patterns(args, conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None, exclude_columns=None):
cursor = conn.cursor()

# Get the list of tables to scan
Expand All @@ -40,6 +40,8 @@ def check_data_patterns(conn, patterns, profile_name, database_name, limit_start
data_count = 1
for row in cursor.fetchall():
for column, value in zip(columns, row):
if exclude_columns and column in exclude_columns:
continue
if value:
value_str = str(value)
matches = system.match_strings(args, value_str)
Expand Down Expand Up @@ -84,12 +86,13 @@ def execute(args):
limit_start = config.get('limit_start', 0)
limit_end = config.get('limit_end', 500)
tables = config.get('tables', [])
exclude_columns = config.get('exclude_columns', [])

if host and user and database:
system.print_info(args, f"Checking MySQL Profile {key} and database {database}")
conn = connect_mysql(host, port, user, password, database)
conn = connect_mysql(args, host, port, user, password, database)
if conn:
results += check_data_patterns(conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables)
results += check_data_patterns(args, conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables, exclude_columns=exclude_columns)
conn.close()
else:
system.print_error(args, f"Incomplete MySQL configuration for key: {key}")
Expand Down
10 changes: 5 additions & 5 deletions hawk_scanner/commands/postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

console = Console()

def connect_postgresql(host, port, user, password, database):
def connect_postgresql(args, host, port, user, password, database):
try:
conn = psycopg2.connect(
host=host,
Expand All @@ -19,7 +19,7 @@ def connect_postgresql(host, port, user, password, database):
except Exception as e:
system.print_error(args, f"Failed to connect to PostgreSQL database at {host} with error: {e}")

def check_data_patterns(conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None):
def check_data_patterns(args, conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None):
cursor = conn.cursor()

# Get the list of tables to scan
Expand All @@ -35,7 +35,7 @@ def check_data_patterns(conn, patterns, profile_name, database_name, limit_start
results = []
for table in tables_to_scan:
if table not in all_tables:
system.print_warning(f"Table {table} not found in the database. Skipping.")
system.print_error(args, f"Table {table} not found in the database. Skipping.")
continue

cursor.execute(f"SELECT * FROM {table} LIMIT {limit_end} OFFSET {limit_start}")
Expand Down Expand Up @@ -92,9 +92,9 @@ def execute(args):

if host and user and password and database:
system.print_info(args, f"Checking PostgreSQL Profile {key}, database {database}")
conn = connect_postgresql(host, port, user, password, database)
conn = connect_postgresql(args, host, port, user, password, database)
if conn:
results += check_data_patterns(conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables)
results += check_data_patterns(args, conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables)
conn.close()
else:
system.print_error(args, f"Incomplete PostgreSQL configuration for key: {key}")
Expand Down
4 changes: 2 additions & 2 deletions hawk_scanner/commands/redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

console = Console()

def connect_redis(host, port, password=None):
def connect_redis(args, host, port, password=None):
try:
r = redis.Redis(host=host, port=port, password=password)
if r.ping():
Expand Down Expand Up @@ -61,7 +61,7 @@ def execute(args):
password = config.get('password')

if host:
redis_instance = connect_redis(host, port, password)
redis_instance = connect_redis(args, host, port, password)
if redis_instance:
results = check_data_patterns(redis_instance, patterns, profile_name, host)
redis_instance.close()
Expand Down
4 changes: 2 additions & 2 deletions hawk_scanner/commands/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

console = Console()

def connect_s3(access_key, secret_key, bucket_name):
def connect_s3(args, access_key, secret_key, bucket_name):
try:
session = boto3.Session(
aws_access_key_id=access_key,
Expand Down Expand Up @@ -50,7 +50,7 @@ def execute(args):
system.print_info(args, f"Checking S3 profile: '{key}' with bucket '{bucket_name}'")
profile_name = key
if access_key and secret_key and bucket_name:
bucket = connect_s3(access_key, secret_key, bucket_name)
bucket = connect_s3(args, access_key, secret_key, bucket_name)
if bucket:

for obj in bucket.objects.all():
Expand Down
8 changes: 4 additions & 4 deletions hawk_scanner/commands/slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

console = Console()

def connect_slack(token):
def connect_slack(args, token):
try:
client = WebClient(token=token)
# Test the connection by making an API call
Expand All @@ -21,7 +21,7 @@ def connect_slack(token):
system.print_error(args, f"Failed to connect to Slack with error: {e.response['error']}")
return None

def check_slack_messages(client, patterns, profile_name, channel_types, channel_names=None):
def check_slack_messages(args, client, patterns, profile_name, channel_types, channel_names=None):
results = []
try:
team_info = client.team_info()
Expand Down Expand Up @@ -90,9 +90,9 @@ def execute(args):
system.print_error(args, f"Incomplete Slack configuration for key: {key}")
continue

client = connect_slack(token)
client = connect_slack(args, token)
if client:
results += check_slack_messages(client, patterns, key, channel_types, channel_names)
results += check_slack_messages(args, client, patterns, key, channel_types, channel_names)
else:
system.print_error(args, "No Slack connection details found in connection.yml")
else:
Expand Down
4 changes: 2 additions & 2 deletions hawk_scanner/commands/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

console = Console()

def check_data_patterns(value, patterns, profile_name):
def check_data_patterns(args, value, patterns, profile_name):
value_str = str(value)
matches = system.match_strings(args, value_str)
results = []
Expand All @@ -30,7 +30,7 @@ def execute(args):
if text_config:
for key, config in text_config.items():
text = config.get('text', None)
results += check_data_patterns(text, patterns, key)
results += check_data_patterns(args, text, patterns, key)
else:
system.print_error(args, "No text connection details found in connection.yml")
else:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION = "0.3.16"
VERSION = "0.3.17"

from setuptools import setup, find_packages

Expand Down

0 comments on commit 80d8344

Please sign in to comment.