Skip to content

Commit

Permalink
Added programmatic support in all modules
Browse files Browse the repository at this point in the history
  • Loading branch information
rohitcoder committed Jan 10, 2024
1 parent 5c1d8d9 commit 94d6fbd
Show file tree
Hide file tree
Showing 16 changed files with 644 additions and 606 deletions.
57 changes: 30 additions & 27 deletions hawk_scanner/commands/couchdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def check_data_patterns(db, patterns, profile_name, database_name):
for field_name, field_value in document.items():
if field_value:
value_str = str(field_value)
matches = system.match_strings(value_str)
matches = system.analyze_strings(value_str, 'couchdb')
if matches:
for match in matches:
results.append({
Expand All @@ -42,36 +42,39 @@ def check_data_patterns(db, patterns, profile_name, database_name):

return results

def execute(args):
results = []
system.print_info(f"Running Checks for CouchDB Sources")
connections = system.get_connection()
def execute(args, programmatic=False):
try:
results = []
system.print_info(f"Running Checks for CouchDB Sources")
connections = system.get_connection(args, programmatic)

if 'sources' in connections:
sources_config = connections['sources']
couchdb_config = sources_config.get('couchdb')
if 'sources' in connections:
sources_config = connections['sources']
couchdb_config = sources_config.get('couchdb')

if couchdb_config:
patterns = system.get_fingerprint_file()
if couchdb_config:
patterns = system.get_fingerprint_file(args, programmatic)

for key, config in couchdb_config.items():
host = config.get('host')
port = config.get('port', 5984) # default CouchDB port
username = config.get('username')
password = config.get('password')
database = config.get('database')
for key, config in couchdb_config.items():
host = config.get('host')
port = config.get('port', 5984) # default CouchDB port
username = config.get('username')
password = config.get('password')
database = config.get('database')

if host and username and password and database:
system.print_info(f"Checking CouchDB Profile {key} with host and authentication")
else:
system.print_error(f"Incomplete CouchDB configuration for key: {key}")
continue
if host and username and password and database:
system.print_info(f"Checking CouchDB Profile {key} with host and authentication")
else:
system.print_error(f"Incomplete CouchDB configuration for key: {key}")
continue

db = connect_couchdb(host, port, username, password, database)
if db:
results += check_data_patterns(db, patterns, key, database)
db = connect_couchdb(host, port, username, password, database)
if db:
results += check_data_patterns(db, patterns, key, database)
else:
system.print_error("No CouchDB connection details found in connection.yml")
else:
system.print_error("No CouchDB connection details found in connection.yml")
else:
system.print_error("No 'sources' section found in connection.yml")
system.print_error("No 'sources' section found in connection.yml")
except Exception as e:
system.print_error(f"Failed to execute CouchDB checks with error: {e}")
return results
124 changes: 64 additions & 60 deletions hawk_scanner/commands/firebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,75 +15,79 @@ def connect_firebase(credentials_file, bucket_name):
except Exception as e:
print(f"Failed to connect to Firebase bucket: {e}")

def execute(args):
results = []
shouldDownload = True
connections = system.get_connection()
def execute(args, programmatic=False):
try:
results = []
shouldDownload = True
connections = system.get_connection(args, programmatic)
fingerprints = system.get_fingerprint_file(args, programmatic)

if 'sources' in connections:
sources_config = connections['sources']
firebase_config = sources_config.get('firebase')
if 'sources' in connections:
sources_config = connections['sources']
firebase_config = sources_config.get('firebase')

if firebase_config:
for key, config in firebase_config.items():
credentials_file = config.get('credentials_file')
bucket_name = config.get('bucket_name')
exclude_patterns = config.get(key, {}).get('exclude_patterns', [])
if firebase_config:
for key, config in firebase_config.items():
credentials_file = config.get('credentials_file')
bucket_name = config.get('bucket_name')
exclude_patterns = config.get(key, {}).get('exclude_patterns', [])

if credentials_file and bucket_name:
bucket = connect_firebase(credentials_file, bucket_name)
if bucket:
for blob in bucket.list_blobs():
file_name = blob.name
## get unique etag or hash of file
remote_etag = blob.etag
system.print_debug(f"Remote etag: {remote_etag}")
if credentials_file and bucket_name:
bucket = connect_firebase(credentials_file, bucket_name)
if bucket:
for blob in bucket.list_blobs():
file_name = blob.name
## get unique etag or hash of file
remote_etag = blob.etag
system.print_debug(f"Remote etag: {remote_etag}")

if system.should_exclude_file(file_name, exclude_patterns):
continue
if system.should_exclude_file(file_name, exclude_patterns):
continue

file_path = f"data/firebase/{remote_etag}-{file_name}"
os.makedirs(os.path.dirname(file_path), exist_ok=True)
file_path = f"data/firebase/{remote_etag}-{file_name}"
os.makedirs(os.path.dirname(file_path), exist_ok=True)

if config.get("cache") == True:
if os.path.exists(file_path):
shouldDownload = False
local_etag = file_path.split('/')[-1].split('-')[0]
system.print_debug(f"Local etag: {local_etag}")
system.print_debug(f"File already exists in cache, using it. You can disable cache by setting 'cache: false' in connection.yml")
if remote_etag != local_etag:
system.print_debug(f"File in firebase bucket has changed, downloading it again...")
shouldDownload = True
else:
if config.get("cache") == True:
if os.path.exists(file_path):
shouldDownload = False
local_etag = file_path.split('/')[-1].split('-')[0]
system.print_debug(f"Local etag: {local_etag}")
system.print_debug(f"File already exists in cache, using it. You can disable cache by setting 'cache: false' in connection.yml")
if remote_etag != local_etag:
system.print_debug(f"File in firebase bucket has changed, downloading it again...")
shouldDownload = True
else:
shouldDownload = False

if shouldDownload:
file_path = f"data/firebase/{remote_etag}-{file_name}"
system.print_debug(f"Downloading file: {file_name} to {file_path}...")
blob.download_to_filename(file_path)

matches = system.read_match_strings(file_path, 'google_cloud_storage')
if matches:
for match in matches:
results.append({
'bucket': bucket_name,
'file_path': file_name,
'pattern_name': match['pattern_name'],
'matches': match['matches'],
'sample_text': match['sample_text'],
'profile': key,
'data_source': 'firebase'
})
if shouldDownload:
file_path = f"data/firebase/{remote_etag}-{file_name}"
system.print_debug(f"Downloading file: {file_name} to {file_path}...")
blob.download_to_filename(file_path)
matches = system.analyze_file(file_path, 'google_cloud_storage', connections, fingerprints, programmatic=programmatic)
if matches:
for match in matches:
results.append({
'bucket': bucket_name,
'file_path': file_name,
'pattern_name': match['pattern_name'],
'matches': match['matches'],
'sample_text': match['sample_text'],
'profile': key,
'data_source': 'firebase'
})

else:
system.print_error(f"Failed to connect to Firebase bucket: {bucket_name}")
else:
system.print_error(f"Failed to connect to Firebase bucket: {bucket_name}")
else:
system.print_error(f"Incomplete Firebase configuration for key: {key}")
system.print_error(f"Incomplete Firebase configuration for key: {key}")
else:
system.print_error("No Firebase connection details found in connection file")
else:
system.print_error("No Firebase connection details found in connection file")
else:
system.print_error("No 'sources' section found in connection.yml")

if config.get("cache") == False:
os.system("rm -rf data/firebase")
system.print_error("No 'sources' section found in connection.yml")
if config.get("cache") == False:
os.system("rm -rf data/firebase")
except Exception as e:
print(f"Failed to connect to Firebase bucket: {e}")
return results
82 changes: 43 additions & 39 deletions hawk_scanner/commands/fs.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import argparse
import argparse, os, concurrent.futures, time
from google.cloud import storage
from rich.console import Console
from hawk_scanner.internals import system
import os
import concurrent.futures
import time

def process_file(file_path, key, results):
matches = system.read_match_strings(file_path, 'fs')
def process_file(file_path, key, connections, fingerprints, programmatic=False):
matches = system.analyze_file(file_path, 'fs', connections, fingerprints, programmatic=programmatic)
file_data = system.getFileData(file_path)
results = []
if matches:
for match in matches:
results.append({
Expand All @@ -21,43 +19,49 @@ def process_file(file_path, key, results):
'data_source': 'fs',
'file_data': file_data
})
return results

def execute(args):
results = []
connections = system.get_connection()
def execute(args, programmatic=False):
try:
results = []
connections = system.get_connection(args, programmatic)
fingerprints = system.get_fingerprint_file(args, programmatic)

if 'sources' in connections:
sources_config = connections['sources']
fs_config = sources_config.get('fs')
if fs_config:
for key, config in fs_config.items():
if 'path' not in config:
system.print_error(f"Path not found in fs profile '{key}'")
continue
path = config.get('path')
if not os.path.exists(path):
system.print_error(f"Path '{path}' does not exist")

exclude_patterns = fs_config.get(key, {}).get('exclude_patterns', [])
start_time = time.time()
files = system.list_all_files_iteratively(path, exclude_patterns)

# Use ThreadPoolExecutor for parallel processing
file_count = 0
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for file_path in files:
file_count += 1
futures.append(executor.submit(process_file, file_path, key, results))
if 'sources' in connections:
sources_config = connections['sources']
fs_config = sources_config.get('fs')
if fs_config:
for key, config in fs_config.items():
if 'path' not in config:
system.print_error(f"Path not found in fs profile '{key}'")
continue
path = config.get('path')
if not os.path.exists(path):
system.print_error(f"Path '{path}' does not exist")

exclude_patterns = fs_config.get(key, {}).get('exclude_patterns', [])
start_time = time.time()
files = system.list_all_files_iteratively(path, exclude_patterns)

# Wait for all tasks to complete
concurrent.futures.wait(futures)
end_time = time.time()
system.print_info(f"Time taken to analyze {file_count} files: {end_time - start_time} seconds")
# Use ThreadPoolExecutor for parallel processing
file_count = 0
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for file_path in files:
file_count += 1
results += process_file(file_path, key, connections, fingerprints, programmatic=programmatic)

# Wait for all tasks to complete
concurrent.futures.wait(futures)
end_time = time.time()
elapsed_time = round(end_time - start_time, 2)
system.print_info(f"Time taken to analyze {file_count} files: {elapsed_time} seconds")
else:
system.print_error("No filesystem 'fs' connection details found in connection.yml")
else:
system.print_error("No filesystem 'fs' connection details found in connection.yml")
else:
system.print_error("No 'sources' section found in connection.yml")
system.print_error("No 'sources' section found in connection.yml")
except Exception as e:
system.print_error(f"Error in executing filesystem checks: {e}")
return results

if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 94d6fbd

Please sign in to comment.