From 76bbd94e8f9871660f095af0fb4938e2a4ab1ba9 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Thu, 17 Jan 2019 14:07:25 -0600 Subject: [PATCH 001/257] Complete rewrite of yara connector Initial commit --- README.md | 156 ++++--- analysis_result.py | 22 + binary_database.py | 54 +++ cb-yara-connector.spec | 26 -- cbopensource/__init__.py | 1 - cbopensource/connectors/__init__.py | 1 - cbopensource/connectors/yara/__init__.py | 1 - cbopensource/connectors/yara/bridge.py | 142 ------ feed.py | 410 ++++++++++++++++++ globals.py | 27 ++ main.py | 394 +++++++++++++++++ main.spec | 34 ++ requirements.txt | 39 +- .../integrations/yara/connector.conf.example | 75 ---- root/etc/init.d/cb-yara-connector | 99 ----- .../cb/integrations/yara/carbonblack.png | Bin 6767 -> 0 bytes .../integrations/yara/example_rules/AAR.yar | 21 - .../yara/example_rules/Ap0calypse.yar | 20 - .../integrations/yara/example_rules/Arcom.yar | 20 - .../yara/example_rules/Bandook.yar | 27 -- .../yara/example_rules/BlackNix.yar | 20 - .../yara/example_rules/BlackShades.yar | 16 - .../yara/example_rules/BlueBanana.yar | 21 - .../integrations/yara/example_rules/Bozok.yar | 19 - .../yara/example_rules/ClientMesh.yar | 20 - .../yara/example_rules/CyberGate.yar | 23 - .../yara/example_rules/DarkComet.yar | 27 -- .../yara/example_rules/DarkRAT.yar | 21 - .../yara/example_rules/Greame.yar | 24 - .../yara/example_rules/Imminent3.yar | 28 -- .../yara/example_rules/Infinity.yar | 22 - .../yara/example_rules/LostDoor.yar | 24 - .../yara/example_rules/LuxNet.yar | 20 - .../yara/example_rules/NanoCore.yar | 26 -- .../yara/example_rules/NetWire.yar | 19 - .../yara/example_rules/Pandora.yar | 27 -- .../yara/example_rules/Paradox.yar | 21 - .../yara/example_rules/PoisonIvy.yar | 19 - .../yara/example_rules/Punisher.yar | 21 - .../yara/example_rules/PythoRAT.yar | 22 - .../yara/example_rules/ShadowTech.yar | 21 - .../yara/example_rules/SmallNet.yar | 19 - .../yara/example_rules/SpyGate.yar | 26 -- .../yara/example_rules/Sub7Nation.yar | 28 -- .../yara/example_rules/Vertex.yar | 23 - .../yara/example_rules/VirusRat.yar | 26 -- .../yara/example_rules/Xtreme.yar | 20 - .../yara/example_rules/adWind.yar | 18 - .../integrations/yara/example_rules/jRat.yar | 23 - .../integrations/yara/example_rules/njRat.yar | 24 - .../yara/example_rules/unrecom.yar | 19 - .../integrations/yara/example_rules/xRAT.yar | 28 -- .../share/cb/integrations/yara/yara-logo.png | Bin 3123 -> 0 bytes scripts/cb-yara-connector | 29 -- setup.py | 175 -------- singleton.py | 147 +++++++ tasks.py | 169 ++++++++ tests/__init__.py | 1 - .../00af22b51f217dc4c536f6039577b28c | 1 - .../00af22b51f217dc4c536f6039577b28c.json | 1 - .../0ab74f7d94cdec551db81954f51cc95e | 1 - .../0ab74f7d94cdec551db81954f51cc95e.json | 1 - .../0b25fbbb6c94c1246381a527ed418f04 | 1 - .../0b25fbbb6c94c1246381a527ed418f04.json | 1 - .../0c0f5d1428d00def0fe16eba9edaa4d5 | 1 - .../0c0f5d1428d00def0fe16eba9edaa4d5.json | 1 - tests/data/daemon.conf | 4 - tests/data/yara_rules/rules.yar | 15 - tests/test_yara.py | 76 ---- tests/utils/__init__.py | 1 - tests/utils/mock_server.py | 97 ----- 71 files changed, 1379 insertions(+), 1627 deletions(-) create mode 100644 analysis_result.py create mode 100644 binary_database.py delete mode 100644 cb-yara-connector.spec delete mode 100644 cbopensource/__init__.py delete mode 100644 cbopensource/connectors/__init__.py delete mode 100644 cbopensource/connectors/yara/__init__.py delete mode 100644 cbopensource/connectors/yara/bridge.py create mode 100644 feed.py create mode 100644 globals.py create mode 100644 main.py create mode 100644 main.spec delete mode 100644 root/etc/cb/integrations/yara/connector.conf.example delete mode 100755 root/etc/init.d/cb-yara-connector delete mode 100644 root/usr/share/cb/integrations/yara/carbonblack.png delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/AAR.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/Ap0calypse.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/Arcom.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/Bandook.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/BlackNix.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/BlackShades.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/BlueBanana.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/Bozok.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/ClientMesh.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/CyberGate.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/DarkComet.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/DarkRAT.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/Greame.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/Imminent3.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/Infinity.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/LostDoor.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/LuxNet.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/NanoCore.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/NetWire.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/Pandora.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/Paradox.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/PoisonIvy.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/Punisher.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/PythoRAT.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/ShadowTech.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/SmallNet.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/SpyGate.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/Sub7Nation.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/Vertex.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/VirusRat.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/Xtreme.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/adWind.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/jRat.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/njRat.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/unrecom.yar delete mode 100644 root/usr/share/cb/integrations/yara/example_rules/xRAT.yar delete mode 100644 root/usr/share/cb/integrations/yara/yara-logo.png delete mode 100644 scripts/cb-yara-connector delete mode 100644 setup.py create mode 100644 singleton.py create mode 100644 tasks.py delete mode 100644 tests/__init__.py delete mode 100644 tests/data/binary_data/00af22b51f217dc4c536f6039577b28c delete mode 100644 tests/data/binary_data/00af22b51f217dc4c536f6039577b28c.json delete mode 100644 tests/data/binary_data/0ab74f7d94cdec551db81954f51cc95e delete mode 100644 tests/data/binary_data/0ab74f7d94cdec551db81954f51cc95e.json delete mode 100644 tests/data/binary_data/0b25fbbb6c94c1246381a527ed418f04 delete mode 100644 tests/data/binary_data/0b25fbbb6c94c1246381a527ed418f04.json delete mode 100644 tests/data/binary_data/0c0f5d1428d00def0fe16eba9edaa4d5 delete mode 100644 tests/data/binary_data/0c0f5d1428d00def0fe16eba9edaa4d5.json delete mode 100644 tests/data/daemon.conf delete mode 100644 tests/data/yara_rules/rules.yar delete mode 100644 tests/test_yara.py delete mode 100644 tests/utils/__init__.py delete mode 100644 tests/utils/mock_server.py diff --git a/README.md b/README.md index 7edb517..3e596b9 100644 --- a/README.md +++ b/README.md @@ -1,70 +1,88 @@ -# Carbon Black - Yara Connector -[Yara](http://plusvic.github.io/yara/) is the linga franca of malware analysts. -With a robust language to define byte strings and clean, well-designed interfaces, -many IR and security operations shops keep the results of their analysis in a local -repository of yara rules. - -However, monitoring activity across your network for matches to your yara rules is -difficult. If possible at all, it usually involves infrequent, time-consuming scans. -Since Carbon Black collects all executed binaries and has a robust API, it is possible -to configure your Carbon Black server to act as a "Yara Monitor" and automatically trigger -notification for any binary executed across your network matching any of your Yara rules. - -## Installation Quickstart - -As root on your Carbon Black or other RPM based 64-bit Linux distribution server: -``` -cd /etc/yum.repos.d -curl -O https://opensource.carbonblack.com/release/x86_64/CbOpenSource.repo -yum install python-cb-yara-connector -``` - -Once the software is installed via YUM, copy the `/etc/cb/integrations/yara/connector.conf.example` file to -`/etc/cb/integrations/yara/connector.conf`. Edit this file and place your Carbon Black API key into the -`carbonblack_server_token` variable and your Carbon Black server's base URL into the `carbonblack_server_url` variable. -Also, point the Yara connector to a directory of yara rule files by editing the `yara_rule_directory` variable. A set -of example rules are included in the `/usr/share/cb/integrations/yara/example_rules` directory. - -To start the service, run `service cb-yara-connector start` as root. Any errors will be logged into `/var/log/cb/integrations/yara/yara.log`. - -## Troubleshooting - -If you suspect a problem, please first look at the Yara connector logs found here: `/var/log/cb/integrations/yara/yara.log` -(There might be multiple files as the logger "rolls over" when the log file hits a certain size). - -If you want to re-run the analysis across your binaries: - -1. Stop the service: `service cb-yara-connector stop` -2. Remove the database file: `rm /usr/share/cb/integrations/yara/db/sqlite.db` -3. Remove the feed from your Cb server's Threat Intelligence page -4. Restart the service: `service cb-yara-connector start` - -## Building yara-python with crypto - -This is only needed if you are building the connector from scratch. - -1. `git clone --recursive https://github.com/VirusTotal/yara-python` -2. `cd yara-python/yara` -3. `./bootstrap.sh` -3. `./configure --with-crypto` -4. `make` -5. `mktmpenv` -6. `python setup.py build --dynamic-linking` -7. `python setup.py install` - -## Contacting Carbon Black Developer Relations Support - -Web: https://developer.carbonblack.com -E-mail: dev-support@carbonblack.com - -### Reporting Problems - -When you contact Bit9 Developer Relations Technical Support with an issue, please provide the following: - -* Your name, company name, telephone number, and e-mail address -* Product name/version, CB Server version, CB Sensor version -* Hardware configuration of the Carbon Black Server or computer (processor, memory, and RAM) -* For documentation issues, specify the version of the manual you are using. -* Action causing the problem, error message returned, and event log output (as appropriate) -* Problem severity +install zlib +install openssl-devel +install sqlite-devel + +# Running the agent + +mkdir -p /usr/share/cb/integrations/yara/yara_rules` +wget <> /usr/share/cb/integrations/yara/yara_agent + +## Sample Yara Agent Config + + [general] + + ; + ; either run a single worker locally or remotely + ; valid types are 'local' or 'remote' + ; + worker_type=local + + ; + ; ONLY for worker_type of remote + ; IP Address of workers if worker_type is remote + ; + + ;worker_ip=127.0.0.1 + + ; + ; ONLY for worker_type of local + ; Cb Response Server settings for scanning locally. + ; For remote scanning please set these parameters in the yara worker config file + ; Default: https://127.0.0.1 + ; + cb_server_url= + cb_server_token= + + ; + ; path to directory containing yara rules + ; + yara_rules_dir=yara_rules + + ; + ; Cb Response postgres Database settings + ; + postgres_host= + postgres_username= + postgres_password= + postgres_db= + postgres_port= + + ; + ; nice value used for this script + ; + niceness=1 + +* copy the above config to `/etc/cb/integrations/yara/yara_agent.conf` + +# Example Cron Entry + +## + + + +# Centos 6 Build Instructions + +## Install Python 3.6 + + ./configure --prefix=/usr/local --enable-shared LDFLAGS="-Wl,-rpath /usr/local/lib" + make + make altinstall + +## Create VirtualEnv + + python3.6 -m venv venv-build + source ./venv-build/bin/activate + pip install -r requirements.txt + +## Create Executable + + pyinstaller main.spec + +# Centos 7 Build Instructions + +## Install Python 3.6 + +## Create VirtualEnv + +## Create Executable diff --git a/analysis_result.py b/analysis_result.py new file mode 100644 index 0000000..644b043 --- /dev/null +++ b/analysis_result.py @@ -0,0 +1,22 @@ +class AnalysisResult(object): + def __init__(self, + md5, + score=0, + short_result='', + long_result='', + last_scan_date=None, + last_error_msg='', + last_error_date=None, + stop_future_scans=False, + binary_not_available=False, + misc=''): + self.md5 = md5 + self.short_result = short_result + self.long_result = long_result + self.last_error_msg = last_error_msg + self.last_error_date = last_error_date + self.last_scan_date = last_scan_date + self.score = score + self.stop_future_scans = stop_future_scans + self.binary_not_available = binary_not_available + self.misc = misc diff --git a/binary_database.py b/binary_database.py new file mode 100644 index 0000000..2cfa896 --- /dev/null +++ b/binary_database.py @@ -0,0 +1,54 @@ +import logging + +from peewee import * +from playhouse.sqliteq import SqliteQueueDatabase + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +# +# autostart must be False if we intend to dynamically create the database. +# +db = Proxy() + + +class BinaryDetonationResult(Model): + md5 = CharField(index=True, unique=True) + last_scan_date = DateTimeField(null=True) + last_success_msg = CharField(default='', null=True) + + last_error_msg = CharField(default='', null=True) + last_error_date = DateTimeField(null=True) + + score = IntegerField(default=0) + + scan_count = IntegerField(default=0) + + # + # If There was a permanent error then set this to True + # + stop_future_scans = BooleanField(default=False) + + # + # if we could not download the binary then set this to False + # We will need to wait for alliance download + # + binary_not_available = BooleanField(null=True) + + # + # Last attempt to scan this binary. Which could have thrown an error if the binary was not available to download + # + last_scan_attempt = DateTimeField(null=True) + + # + # + # + num_attempts = IntegerField(default=0) + + # + # Misc use for connectors + # + misc = CharField(default='') + + class Meta: + database = db diff --git a/cb-yara-connector.spec b/cb-yara-connector.spec deleted file mode 100644 index 6cd1e08..0000000 --- a/cb-yara-connector.spec +++ /dev/null @@ -1,26 +0,0 @@ -# -*- mode: python -*- -a = Analysis(['scripts/cb-yara-connector'], - pathex=['.'], - hiddenimports=['unicodedata'], - datas=[ (HOMEPATH + '/cbapi/response/models/*', 'cbapi/response/models/'), - (HOMEPATH + '/cbapi/protection/models/*', 'cbapi/protection/models/'), - (HOMEPATH + '/cbapi/defense/models/*', 'cbapi/defense/models/') ], - hookspath=None, - runtime_hooks=None) -pyz = PYZ(a.pure) -exe = EXE(pyz, - a.scripts, - exclude_binaries=True, - name='cb-yara-connector', - debug=False, - strip=None, - upx=True, - console=True ) -coll = COLLECT(exe, - a.binaries, - a.zipfiles, - a.datas, - strip=None, - upx=True, - name='cb-yara-connector') - diff --git a/cbopensource/__init__.py b/cbopensource/__init__.py deleted file mode 100644 index 6d75c54..0000000 --- a/cbopensource/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__author__ = 'jgarman' diff --git a/cbopensource/connectors/__init__.py b/cbopensource/connectors/__init__.py deleted file mode 100644 index 6d75c54..0000000 --- a/cbopensource/connectors/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__author__ = 'jgarman' diff --git a/cbopensource/connectors/yara/__init__.py b/cbopensource/connectors/yara/__init__.py deleted file mode 100644 index 757979f..0000000 --- a/cbopensource/connectors/yara/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__author__ = 'jgarman' \ No newline at end of file diff --git a/cbopensource/connectors/yara/bridge.py b/cbopensource/connectors/yara/bridge.py deleted file mode 100644 index 47494a1..0000000 --- a/cbopensource/connectors/yara/bridge.py +++ /dev/null @@ -1,142 +0,0 @@ -from cbint.utils.detonation import DetonationDaemon, ConfigurationError -from cbint.utils.detonation.binary_analysis import (BinaryAnalysisProvider, AnalysisPermanentError, - AnalysisTemporaryError, AnalysisResult) -import cbint.utils.feed -import yara -import time -import logging - -import os -import pprint - -log = logging.getLogger(__name__) -log.setLevel(logging.INFO) - - -class YaraProvider(BinaryAnalysisProvider): - def __init__(self, name, yara_rule_directory): - super(YaraProvider, self).__init__(name) - self.yara_rules = self.compile_rules(yara_rule_directory) - - def compile_rules(self, pathname): - rule_map = {} - for fn in os.listdir(pathname): - fullpath = os.path.join(pathname, fn) - if not os.path.isfile(fullpath): - continue - - last_dot = fn.rfind('.') - if last_dot != -1: - namespace = fn[:last_dot] - else: - namespace = fn - rule_map[namespace] = fullpath - - log.info("Yara Rules to be compiled:") - log.info(pprint.pformat(rule_map)) - - return yara.compile(filepaths=rule_map) - - # take default definition of check_result_for (return None) - def check_result_for(self, md5sum): - return None - - def analyze_binary(self, md5sum, binary_file_stream): - log.debug("%s: in analyze_binary" % md5sum) - d = binary_file_stream.read() - - try: - start_analyze_time = time.time() - matches = self.yara_rules.match(data=d, timeout=60) - end_analyze_time = time.time() - log.debug("%s: Took %0.3f seconds to analyze the file" % (md5sum, end_analyze_time - start_analyze_time)) - except yara.TimeoutError: - raise AnalysisPermanentError(message="Analysis timed out after 60 seconds") - except yara.Error: - raise AnalysisTemporaryError(message="Yara exception", retry_in=10) - else: - if matches: - score = self.getHighScore(matches) - return AnalysisResult(message="Matched yara rules: %s" % ', '.join([match.rule for match in matches]), - extended_message="%s" % ', '.join([match.rule for match in matches]), - analysis_version=1, score=score) - else: - return AnalysisResult(score=0) - - def getHighScore(self, matches): - score = 0 - for match in matches: - if match.meta.get('score') > score: - score = match.meta.get('score') - if score == 0: - return 100 - else: - return score - - -class YaraConnector(DetonationDaemon): - @property - def integration_name(self): - return 'Cb Yara Connector 1.3.5' - - @property - def filter_spec(self): - filters = [] - additional_filter_requirements = self.get_config_string("binary_filter_query", None) - if additional_filter_requirements: - log.info("Binary Filter Query: {0}".format(additional_filter_requirements)) - filters.append(additional_filter_requirements) - - return ' '.join(filters) - - @property - def num_quick_scan_threads(self): - return 0 - - @property - def num_deep_scan_threads(self): - yara_num_threads = self.get_config_integer("yara_num_threads", 4) - log.info("Number of deep scan threads: {0}".format(yara_num_threads)) - return yara_num_threads - - @property - def up_to_date_rate_limiter(self): - return 0 - - @property - def historical_rate_limiter(self): - return 0 - - def get_provider(self): - yara_provider = YaraProvider(self.name, self.yara_rule_directory) - return yara_provider - - def get_metadata(self): - return cbint.utils.feed.generate_feed(self.name, summary="Scan binaries collected by Carbon Black with Yara.", - tech_data="There are no requirements to share any data with Carbon Black to use this feed.", - provider_url="http://plusvic.github.io/yara/", - icon_path='/usr/share/cb/integrations/yara/yara-logo.png', - display_name="Yara", category="Connectors") - - def validate_config(self): - super(YaraConnector, self).validate_config() - - self.yara_rule_directory = self.get_config_string("yara_rule_directory", None) - if not self.yara_rule_directory: - raise ConfigurationError("A yara_rule_directory stanza is required in the configuration file") - - return True - - -if __name__ == '__main__': - import logging - - logging.basicConfig(level=logging.DEBUG) - - my_path = os.path.dirname(os.path.abspath(__file__)) - temp_directory = "/tmp/yara" - - config_path = os.path.join(my_path, "testing.conf") - daemon = YaraConnector('yaratest', configfile=config_path, work_directory=temp_directory, - logfile=os.path.join(temp_directory, 'test.log'), debug=True) - daemon.start() diff --git a/feed.py b/feed.py new file mode 100644 index 0000000..7a171f2 --- /dev/null +++ b/feed.py @@ -0,0 +1,410 @@ +import base64 +import json +import os +import re +import time +import logging + + +class CbException(Exception): + pass + + +class CbIconError(CbException): + pass + + +class CbInvalidFeed(CbException): + pass + + +class CbInvalidReport(CbException): + pass + + +logger = logging.getLogger(__name__) + + +class CbJSONEncoder(json.JSONEncoder): + def default(self, o): + return o.dump() + + +class CbFeed(object): + def __init__(self, feedinfo, reports): + self.data = {'feedinfo': feedinfo, + 'reports': reports} + + def dump(self, validate=True): + ''' + dumps the feed data + :param validate: is set, validates feed before dumping + :return: json string of feed data + ''' + if validate: + self.validate() + + return json.dumps(self.data, cls=CbJSONEncoder, indent=2) + + def dumpjson(self, validate=True): + ''' + dumps the feed data + :param validate: is set, validates feed before dumping + :return: json string of feed data + ''' + if validate: + self.validate() + + return json.loads(self.dump(validate)) + + def __repr__(self): + return repr(self.data) + + def __str__(self): + return "CbFeed(%s)" % (self.data.get('feedinfo', "unknown")) + + def iter_iocs(self): + ''' + yields all iocs in the feed + ''' + + data = json.loads(self.dump(validate=False)) + for report in data["reports"]: + for md5 in report.get("iocs", {}).get("md5", []): + yield {"type": "md5", "ioc": md5, "report_id": report.get("id", "")} + for ip in report.get("iocs", {}).get("ipv4", []): + yield {"type": "ipv4", "ioc": ip, "report_id": report.get("id", "")} + for domain in report.get("iocs", {}).get("dns", []): + yield {"type": "dns", "ioc": domain, "report_id": report.get("id", "")} + + def validate_report_list(self, reports): + ''' + validates reports as a set, as compared to each report as a standalone entity + :param reports: list of reports + ''' + + reportids = set() + + # verify that no two reports have the same feed id + # see CBAPI-17 + for report in reports: + if report['id'] in reportids: + raise CbInvalidFeed("duplicate report id '%s'" % report['id']) + reportids.add(report['id']) + + def validate(self, pedantic=False, serialized_data=None): + ''' + validates the feed + :param pedantic: when set, perform strict validation + :param serialized_data: serialized data for the feed + ''' + if not serialized_data: + # this should be identity, but just to be safe. + serialized_data = self.dump(validate=False) + + data = json.loads(serialized_data) + + if not "feedinfo" in data: + raise CbInvalidFeed("Feed missing 'feedinfo' data") + + if not 'reports' in data: + raise CbInvalidFeed("Feed missing 'reports' structure") + + # validate the feed info + fi = CbFeedInfo(**data["feedinfo"]) + fi.validate(pedantic=pedantic) + + # validate each report individually + for rep in data["reports"]: + report = CbReport(**rep) + report.validate(pedantic=pedantic) + + # validate the reports as a whole + self.validate_report_list(data["reports"]) + + +class CbFeedInfo(object): + def __init__(self, **kwargs): + # these fields are required in every feed descriptor + self.required = ["name", "display_name", + "summary", "tech_data", "provider_url"] + self.optional = ["category", "icon", "version", "icon_small"] + self.noemptystrings = ["name", "display_name", "summary", "tech_data", "category"] + self.data = kwargs + + # if they are present, set the icon fields of the data to hold + # the base64 encoded file data from their path + for icon_field in ["icon", "icon_small"]: + if icon_field in self.data and os.path.exists(self.data[icon_field]): + icon_path = self.data.pop(icon_field) + try: + self.data[icon_field] = base64.b64encode(open(icon_path, "rb").read()).decode('utf-8') + except Exception as err: + raise CbIconError(f"Unknown error reading/encoding icon data: {err}") + + def dump(self): + ''' + validates, then dumps the feed info data + :return: the feed info data + ''' + self.validate() + return self.data + + def validate(self, pedantic=False): + """ a set of checks to validate data before we export the feed""" + + if not all([x in self.data.keys() for x in self.required]): + missing_fields = ", ".join(set(self.required).difference(set(self.data.keys()))) + raise CbInvalidFeed("FeedInfo missing required field(s): %s" % missing_fields) + + # verify no non-supported keys are present + for key in self.data.keys(): + if key not in self.required and key not in self.optional: + raise CbInvalidFeed("FeedInfo includes extraneous key '%s'" % key) + + # check to see if icon_field can be base64 decoded + for icon_field in ["icon", "icon_small"]: + try: + base64.b64decode(self.data[icon_field]) + except TypeError as err: + raise CbIconError(f"Icon must either be path or base64 data. \ + Path does not exist and base64 decode failed with: {err}") + except KeyError as err: + # we don't want to cause a ruckus if the icon is missing + pass + + # all fields in feedinfo must be strings + for key in self.data.keys(): + if not isinstance(self.data[key], str): + raise CbInvalidFeed("FeedInfo field %s must be of type %s, the field \ + %s is of type %s " % (key, "unicode", key, type(self.data[key]))) + + # certain fields, when present, must not be empty strings + for key in self.data.keys(): + if key in self.noemptystrings and self.data[key] == "": + raise CbInvalidFeed("The '%s' field must not be an empty string" % key) + + # validate shortname of this field is just a-z and 0-9, with at least one character + if not self.data["name"].isalnum(): + raise CbInvalidFeed( + "Feed name %s may only contain a-z, A-Z, 0-9 and must have one character" % self.data["name"]) + + return True + + def __str__(self): + return "CbFeed(%s)" % (self.data.get("name", "unnamed")) + + def __repr__(self): + return repr(self.data) + + +class CbReport(object): + def __init__(self, allow_negative_scores=False, **kwargs): + + # negative scores introduced in CB 4.2 + # negative scores indicate a measure of "goodness" versus "badness" + self.allow_negative_scores = allow_negative_scores + + # these fields are required in every report + self.required = ["iocs", "timestamp", "link", "title", "id", "score"] + + # these fields must be of type string + self.typestring = ["link", "title", "id", "description"] + + # these fields must be of type int + self.typeint = ["timestamp", "score"] + + # these fields are optional + self.optional = ["tags", "description"] + + # valid IOC types are "md5", "ipv4", "dns", "query" + self.valid_ioc_types = ["md5", "ipv4", "dns", "query"] + + # valid index_type options for "query" IOC + self.valid_query_ioc_types = ["events", "modules"] + + if "timestamp" not in kwargs: + kwargs["timestamp"] = int(time.mktime(time.gmtime())) + + self.data = kwargs + + def dump(self): + self.validate() + return self.data + + def is_valid_query(self, q, reportid): + """ + make a determination as to if this is a valid query + """ + # the query itself must be percent-encoded + # verify there are only non-reserved characters present + # no logic to detect unescaped '%' characters + for c in q: + if c not in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.~%*()": + raise CbInvalidReport( + "Unescaped non-reserved character '%s' found in query for report %s; use percent-encoding" % ( + c, reportid)) + + def validate(self, pedantic=False): + """ a set of checks to validate the report""" + + # validate we have all required keys + global ip + if not all([x in self.data.keys() for x in self.required]): + missing_fields = ", ".join(set(self.required).difference(set(self.data.keys()))) + raise CbInvalidReport("Report missing required field(s): %s" % missing_fields) + + # validate that no extra keys are present + for key in self.data.keys(): + if key not in self.required and key not in self.optional: + raise CbInvalidReport("Report contains extra key '%s'" % key) + + # (pedantically) validate only required keys are present + if pedantic and len(self.data.keys()) > len(self.required): + raise CbInvalidReport("Report contains extra keys: %s" % + (set(self.data.keys()) - set(self.required))) + + # CBAPI-36 + # verify that all fields that should be strings are strings + for key in self.typestring: + if key in self.data.keys(): + if not isinstance(self.data[key], str): + raise CbInvalidReport("Report field '%s' must be a string" % key) + + # verify that all fields that should be ints are ints + for key in self.typeint: + if key in self.data.keys(): + if not isinstance(self.data[key], int): + raise CbInvalidReport("Report field '%s' must be an int" % key) + + # validate that tags is a list of alphanumeric strings + if "tags" in self.data.keys(): + if type(self.data["tags"]) != type([]): + raise CbInvalidReport("Tags must be a list") + for tag in self.data["tags"]: + if not str(tag).isalnum(): + raise CbInvalidReport("Tag '%s' is not alphanumeric" % tag) + if len(tag) > 32: + raise CbInvalidReport("Tags must be 32 characters or fewer") + + # validate score is integer between -100 (if so specified) or 0 and 100 + try: + int(self.data["score"]) + except ValueError: + raise CbInvalidReport( + "Report has non-integer score %s in report %s" % (self.data["score"], self.data["id"])) + + if self.data["score"] < -100 or self.data["score"] > 100: + raise CbInvalidReport( + "Report score %s out of range -100 to 100 in report %s" % (self.data["score"], self.data["id"])) + + if not self.allow_negative_scores and self.data["score"] < 0: + raise CbInvalidReport( + "Report score %s out of range 0 to 100 in report %s" % (self.data["score"], self.data["id"])) + + # validate id of this report is just a-z and 0-9 and - and ., with at least one character + if not re.match("^[a-zA-Z0-9-_.]+$", self.data["id"]): + raise CbInvalidReport( + "Report ID %s may only contain a-z, A-Z, 0-9, - and must have one character" % self.data["id"]) + + # validate there is at least one IOC for each report and each IOC entry has at least one entry + if not all([len(self.data["iocs"][ioc]) >= 1 for ioc in self.data['iocs']]): + raise CbInvalidReport("Report IOC list with zero length in report %s" % (self.data["id"])) + + # convenience variable + iocs = self.data['iocs'] + + # validate that there are at least one type of ioc present + if len(iocs.keys()) == 0: + raise CbInvalidReport("Report with no IOCs in report %s" % (self.data["id"])) + + # (pedantically) validate that no extra keys are present + if pedantic and len(set(iocs.keys()) - set(self.valid_ioc_types)) > 0: + raise CbInvalidReport( + "Report IOCs section contains extra keys: %s" % (set(iocs.keys()) - set(self.valid_ioc_types))) + + # Let us check and make sure that for "query" ioc type does not contain other types of ioc + query_ioc = "query" in iocs.keys() + if query_ioc and len(iocs.keys()) > 1: + raise CbInvalidReport( + "Report IOCs section for \"query\" contains extra keys: %s for report %s" % + (set(iocs.keys()), self.data["id"])) + + if query_ioc: + iocs_query = iocs["query"][0] + + # validate that the index_type field exists + if "index_type" not in iocs_query.keys(): + raise CbInvalidReport("Query IOC section for report %s missing index_type" % self.data["id"]) + + # validate that the index_type is a valid value + if not iocs_query.get("index_type", None) in self.valid_query_ioc_types: + raise CbInvalidReport( + "Report IOCs section for \"query\" contains invalid index_type: %s for report %s" % + (iocs_query.get("index_type", None), self.data["id"])) + + # validate that the search_query field exists + if "search_query" not in iocs_query.keys(): + raise CbInvalidReport("Query IOC for report %s missing 'search_query'" % self.data["id"]) + + # validate that the search_query field is at least minimally valid + # in particular, we are looking for a "q=" or "cb.q." + # this is by no means a complete validation, but it does provide a protection + # against leaving the actual query unqualified + if "q=" not in iocs_query["search_query"] and "cb.q." not in iocs_query["search_query"]: + raise CbInvalidReport("Query IOC for report %s missing q= on query" % self.data["id"]) + + for kvpair in iocs_query["search_query"].split('&'): + if 2 != len(kvpair.split('=')): + continue + if kvpair.split('=')[0] == 'q': + self.is_valid_query(kvpair.split('=')[1], self.data["id"]) + + # validate all md5 fields are 32 characters, just alphanumeric, and + # do not include [g-z] and [G-Z] meet the alphanumeric criteria but are not valid in a md5 + for md5 in iocs.get("md5", []): + if 32 != len(md5): + raise CbInvalidReport("Invalid md5 length for md5 (%s) for report %s" % (md5, self.data["id"])) + if not md5.isalnum(): + raise CbInvalidReport("Malformed md5 (%s) in IOC list for report %s" % (md5, self.data["id"])) + for c in "ghijklmnopqrstuvwxyz": + if c in md5 or c.upper() in md5: + raise CbInvalidReport("Malformed md5 (%s) in IOC list for report %s" % (md5, self.data["id"])) + + # validate all IPv4 fields pass socket.inet_ntoa() + import socket + + try: + [socket.inet_aton(ip) for ip in iocs.get("ipv4", [])] + except socket.error: + raise CbInvalidReport("Malformed IPv4 (%s) addr in IOC list for report %s" % (ip, self.data["id"])) + + # validate all lowercased domains have just printable ascii + import string + # 255 chars allowed in dns; all must be printables, sans control characters + # hostnames can only be A-Z, 0-9 and - but labels can be any printable. See + # O'Reilly's DNS and Bind Chapter 4 Section 5: + # "Names that are not host names can consist of any printable ASCII character." + allowed_chars = string.printable[:-6] + for domain in iocs.get("dns", []): + if len(domain) > 255: + raise CbInvalidReport( + "Excessively long domain name (%s) in IOC list for report %s" % (domain, self.data["id"])) + if not all([c in allowed_chars for c in domain]): + raise CbInvalidReport( + "Malformed domain name (%s) in IOC list for report %s" % (domain, self.data["id"])) + labels = domain.split('.') + if 0 == len(labels): + raise CbInvalidReport("Empty domain name in IOC list for report %s" % (self.data["id"])) + for label in labels: + if len(label) < 1 or len(label) > 63: + raise CbInvalidReport("Invalid label length (%s) in domain name (%s) for report %s" % ( + label, domain, self.data["id"])) + + return True + + def __str__(self): + return "CbReport(%s)" % (self.data.get("title", self.data.get("id", ''))) + + def __repr__(self): + return repr(self.data) diff --git a/globals.py b/globals.py new file mode 100644 index 0000000..d34ded1 --- /dev/null +++ b/globals.py @@ -0,0 +1,27 @@ +g_redis_url = '' +g_config = {} + +g_cb_server_url = 'https://127.0.0.1' +g_cb_server_token = '' + +worker_ip = "127.0.0.1" + +g_yara_rules_dir = 'yara_rules' +output_file = 'yara_feed.json' + +g_remote = False +g_yara_rule_map = {} +g_yara_rule_map_hash_list = list() + +g_postgres_host = '127.0.0.1' +g_postgres_username = 'cb' +g_postgres_password = '' +g_postgres_port = 5002 +g_postgres_db = 'cb' + +MAX_HASHES = 2 + +g_num_binaries_not_available = 0 +g_num_binaries_analyzed = 0 + +g_num_save_results = 0 diff --git a/main.py b/main.py new file mode 100644 index 0000000..31c042e --- /dev/null +++ b/main.py @@ -0,0 +1,394 @@ +import os +import traceback +import logging +import time +import threading +import humanfriendly +import psycopg2 +import json +from datetime import datetime +from peewee import SqliteDatabase +from tasks import analyze_binary, update_yara_rules_remote, generate_rule_map +import globals +import argparse +import configparser +import hashlib + +from feed import CbFeed, CbFeedInfo, CbReport +from celery import group +from binary_database import db, BinaryDetonationResult +import singleton + +logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +celery_logger = logging.getLogger('celery.app.trace') +celery_logger.setLevel(logging.ERROR) + + +def generate_feed_from_db(): + query = BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0) + reports = list() + + for binary in query: + fields = {'iocs': {'md5': [binary.md5]}, + 'score': binary.score, + 'timestamp': int(time.mktime(time.gmtime())), + 'link': '', + 'id': f'binary_{binary.md5}', + 'title': '', + 'description': binary.last_success_msg + } + + reports.append(CbReport(**fields)) + + feedinfo = {'name': 'yara', + 'display_name': "Yara", + 'provider_url': "http://plusvic.github.io/yara/", + 'summary': "Scan binaries collected by Carbon Black with Yara.", + 'tech_data': "There are no requirements to share any data with Carbon Black to use this feed.", + 'icon': 'yara-logo.png', + 'category': "Connectors", + } + + feedinfo = CbFeedInfo(**feedinfo) + feed = CbFeed(feedinfo, reports) + logger.debug("dumping feed...") + created_feed = feed.dump() + + logger.debug("Writing out feed to disk") + with open(globals.output_file, 'w') as fp: + fp.write(created_feed) + + +def generate_yara_rule_map_hash(yara_rule_path): + md5 = hashlib.md5() + + temp_list = list() + + for fn in os.listdir(yara_rule_path): + with open(os.path.join(yara_rule_path, fn), 'rb') as fp: + data = fp.read() + md5.update(data) + temp_list.append(str(md5.hexdigest())) + + globals.g_yara_rule_map_hash_list = temp_list + globals.g_yara_rule_map_hash_list.sort() + + +def generate_rule_map_remote(yara_rule_path): + ret_dict = dict() + for fn in os.listdir(yara_rule_path): + if fn.lower().endswith(".yar"): + ret_dict[fn] = open(os.path.join(yara_rule_path, fn), 'rb').read() + + result = update_yara_rules_remote.delay(ret_dict) + globals.g_yara_rule_map = ret_dict + while not result.ready(): + time.sleep(.1) + + +def analyze_binaries(md5_hashes, local): + if local: + try: + results = list() + for md5_hash in md5_hashes: + results.append(analyze_binary(md5_hash)) + except: + logger.error(traceback.format_exc()) + time.sleep(5) + return + else: + return results + else: + try: + scan_group = list() + for md5_hash in md5_hashes: + scan_group.append(analyze_binary.s(md5_hash)) + job = group(scan_group) + + result = job.apply_async() + + time_waited = 0 + while not result.ready(): + if time_waited == 100: + break + else: + time.sleep(.1) + time_waited += 1 + + except: + logger.error(traceback.format_exc()) + time.sleep(5) + return + else: + if result.successful(): + return result.get(timeout=30) + + +def save_results(analysis_results): + for analysis_result in analysis_results: + if analysis_result.binary_not_available: + globals.g_num_binaries_not_available += 1 + continue + try: + bdr = BinaryDetonationResult() + bdr.md5 = analysis_result.md5 + bdr.last_scan_date = datetime.now() + bdr.score = analysis_result.score + bdr.last_error_msg = analysis_result.last_error_msg + bdr.last_success_msg = analysis_result.short_result + bdr.misc = json.dumps(globals.g_yara_rule_map_hash_list) + bdr.save() + globals.g_num_binaries_analyzed += 1 + except: + logger.error("Error saving to database") + logger.error(traceback.format_exc()) + else: + if analysis_result.score > 0: + generate_feed_from_db() + + +def queue_save_results(md5_hashes): + try: + scan_group = list() + for md5_hash in md5_hashes: + scan_group.append(analyze_binary.s(md5_hash)) + job = group(scan_group) + + result = job.apply_async() + + time_waited = 0 + while not result.ready(): + if time_waited == 100: + break + else: + time.sleep(.1) + time_waited += 1 + + if result.successful(): + for analysis_result in result.get(timeout=30): + if analysis_result.binary_not_available: + globals.g_num_binaries_not_available += 1 + continue + try: + bdr = BinaryDetonationResult() + bdr.md5 = analysis_result.md5 + bdr.last_scan_date = datetime.now() + bdr.score = analysis_result.score + bdr.last_error_msg = analysis_result.last_error_msg + bdr.last_success_msg = analysis_result.short_result + bdr.misc = json.dumps(globals.g_yara_rule_map_hash_list) + bdr.save() + + except: + logger.error("Error saving to database") + logger.error(traceback.format_exc()) + if analysis_result.score > 0: + fields = {'iocs': {'md5': [analysis_result.md5]}, + 'score': analysis_result.score, + 'timestamp': int(time.mktime(time.gmtime())), + 'link': '', + 'id': f'binary_{analysis_result.md5}', + 'title': '', + 'description': analysis_result.short_result + } + + globals.g_reports.append(CbReport(**fields)) + else: + logger.error(result.traceback()) + except: + logger.error(traceback.format_exc()) + time.sleep(5) + + +def print_statistics(): + pass + + +def main(yara_rule_dir): + if globals.g_remote: + logger.info("Uploading yara rules to workers...") + generate_rule_map_remote(yara_rule_dir) + + num_total_binaries = 0 + num_binaries_skipped = 0 + num_binaries_queued = 0 + md5_hashes = list() + + start_time = time.time() + + logger.info("Connecting to Postgres database...") + try: + conn = psycopg2.connect(host=globals.g_postgres_host, + database=globals.g_postgres_db, + user=globals.g_postgres_username, + password=globals.g_postgres_password, + port=globals.g_postgres_port) + cur = conn.cursor() + cur.execute("SELECT md5hash FROM storefiles WHERE present_locally = TRUE") + except: + logger.error("Failed to connect to Postgres database") + logger.error(traceback.format_exc()) + return + + logger.info("Enumerating modulestore...") + + while True: + rows = cur.fetchmany() + if len(rows) == 0: + break + + for row in rows: + num_total_binaries += 1 + md5_hash = row[0].hex() + + try: + # + # see if we have already seen this file before. + # we need to check to see what yara rules we have scanned with + # + bdr = BinaryDetonationResult.get(BinaryDetonationResult.md5 == md5_hash) + except: + + # + # Not found so we have to scan + # + pass + else: + try: + scanned_hash_list = json.loads(bdr.misc) + if scanned_hash_list == globals.g_yara_rule_map_hash_list: + num_binaries_skipped += 1 + # + # If it is the same then we don't need to scan again + # + continue + else: + # + # Yara rules were updated, so lets scan + # + pass + except: + logger.error("Unable to decode yara rule map hash from database") + pass + + num_binaries_queued += 1 + md5_hashes.append(md5_hash) + + if len(md5_hashes) >= globals.MAX_HASHES: + analysis_results = analyze_binaries(md5_hashes, local=(not globals.g_remote)) + save_results(analysis_results) + md5_hashes = list() + + if num_total_binaries % 1000 == 0: + elapsed_time = time.time() - start_time + logger.info("elapsed time: {0}".format(humanfriendly.format_timespan(elapsed_time))) + logger.info("number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) + logger.info("number binaries already scanned: {0}".format(num_binaries_skipped)) + logger.info("number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) + logger.info("total binaries: {0}".format(num_total_binaries)) + logger.info("binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) + logger.info("num binaries score greater than 0: {0}".format( + len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) + logger.info("") + + conn.close() + + analysis_results = analyze_binaries(md5_hashes, local=(not globals.g_remote)) + save_results(analysis_results) + md5_hashes = list() + + elapsed_time = time.time() - start_time + logger.info("elapsed time: {0}".format(humanfriendly.format_timespan(elapsed_time))) + logger.info("number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) + logger.info("number binaries skipped: {0}".format(num_binaries_skipped)) + logger.info("number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) + logger.info("total binaries: {0}".format(num_total_binaries)) + logger.info("binaries per second: {0}:".format(num_total_binaries / elapsed_time)) + + generate_feed_from_db() + + +def verify_config(config_file, output_file): + config = configparser.ConfigParser() + config.read(config_file) + + globals.output_file = output_file + + if not config.has_section('general'): + logger.error("Config file does not have a \'general\' section") + return False + + if 'worker_type' not in config['general']: + + if config['general']['worker_type'] == 'local': + globals.g_remote = False + elif config['general']['worker_type'] == 'remote': + globals.g_remote = True + if 'worker_ip' in config['general']: + globals.worker_ip = config['general']['worker_ip'] + else: + logger.error("invalid worker_type specified. Must be \'local\' or \'remote\'") + + if 'yara_rules_dir' in config['general']: + globals.g_yara_rules_dir = config['general']['yara_rules_dir'] + + if 'postgres_host' in config['general']: + globals.g_postgres_host = config['general']['postgres_host'] + + if 'postgres_username' in config['general']: + globals.g_postgres_username = config['general']['postgres_username'] + + if 'postgres_password' in config['general']: + globals.g_postgres_password = config['general']['postgres_password'] + + if 'postgres_db' in config['general']: + globals.g_postgres_db = config['general']['postgres_db'] + + if 'cb_server_url' in config['general']: + globals.g_cb_server_url = config['general']['cb_server_url'] + + if 'cb_server_token' in config['general']: + globals.g_cb_server_token = config['general']['cb_server_token'] + + if 'niceness' in config['general']: + os.nice(int(config['general']['niceness'])) + + return True + + +if __name__ == "__main__": + try: + me = singleton.SingleInstance() + except: + logger.error("Only one instance of this script is allowed to run at a time") + else: + parser = argparse.ArgumentParser(description='Yara Agent for Yara Connector') + parser.add_argument('--config-file', + required=True, + default='yara_agent.conf', + help='Location of the config file') + parser.add_argument('--log-file', + default='yara_agent.log', + help='Log file output') + parser.add_argument('--output-file', + default='yara_feed.json', + help='output feed file') + parser.add_argument('--debug') + + args = parser.parse_args() + + if verify_config(args.config_file, args.output_file): + try: + g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) + generate_yara_rule_map_hash(globals.g_yara_rules_dir) + database = SqliteDatabase('binary.db') + db.initialize(database) + db.connect() + db.create_tables([BinaryDetonationResult]) + main('yara_rules') + except: + logger.error(traceback.format_exc()) diff --git a/main.spec b/main.spec new file mode 100644 index 0000000..cb2172f --- /dev/null +++ b/main.spec @@ -0,0 +1,34 @@ +# -*- mode: python -*- + +block_cipher = None + + +a = Analysis(['main.py'], + pathex=['.'], + binaries=[], + datas=[ (HOMEPATH + '/cbapi/response/models/*', 'cbapi/response/models/'), + (HOMEPATH + '/cbapi/protection/models/*', 'cbapi/protection/models/'), + (HOMEPATH + '/cbapi/defense/models/*', 'cbapi/defense/models/') ], + hiddenimports=['celery.fixups', 'celery.fixups.django', 'celery.loaders.app'], + hookspath=[], + runtime_hooks=[], + excludes=[], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher, + noarchive=False) +pyz = PYZ(a.pure, a.zipped_data, + cipher=block_cipher) +exe = EXE(pyz, + a.scripts, + a.binaries, + a.zipfiles, + a.datas, + [], + name='yara_agent', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=False, + runtime_tmpdir=None, + console=True ) diff --git a/requirements.txt b/requirements.txt index fdb6b05..4e2bdb2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,35 @@ -flask==0.12.3 -python-dateutil>=2.4.2 -cbint>=0.9.2 -pyinstaller==3.1.1 +altgraph==0.16.1 +amqp==2.4.0 +attrdict==2.0.0 +billiard==3.5.0.5 +cachetools==3.0.0 +cbapi==1.3.6 +celery==4.2.1 +certifi==2018.11.29 +chardet==3.0.4 +future==0.17.1 +humanfriendly==4.17 +idna==2.8 +kombu==4.2.2.post1 +macholib==1.11 +pbr==5.1.1 +peewee==3.8.1 +pefile==2018.8.8 +pika==0.12.0 +prompt-toolkit==2.0.7 +protobuf==3.6.1 +psycopg2==2.7.6.1 +psycopg2-binary==2.7.6.1 +Pygments==2.3.1 +PyInstaller==3.4 +python-dateutil==2.7.5 +pytz==2018.9 +PyYAML==3.13 +requests==2.21.0 +six==1.12.0 +solrq==1.1.1 +tendo==0.2.12 +urllib3==1.24.1 +vine==1.2.0 +wcwidth==0.1.7 +yara-python==3.8.1 diff --git a/root/etc/cb/integrations/yara/connector.conf.example b/root/etc/cb/integrations/yara/connector.conf.example deleted file mode 100644 index 47b83b1..0000000 --- a/root/etc/cb/integrations/yara/connector.conf.example +++ /dev/null @@ -1,75 +0,0 @@ -[bridge] - -; -; core configuration options -; - -; -; listener_port -; port to listen for incoming feed requests -; -listener_port=7000 - -; -; listener_address -; ipv4 address to listen; defaults to 127.0.0.1 -; 0.0.0.0 binds to all interfaces -; -listener_address=0.0.0.0 - -; -; feed_host_address -; ipv4 address feed is on; defaults to 127.0.0.1 -; 0.0.0.0 binds to all interfaces -; -feed_host=0.0.0.0 - -; -; binary_filter_query -; Additional query options to filter only binaries of interest to the Yara connector -; -; Default=None -; -binary_filter_query=is_executable_image:true -(digsig_publisher:"Microsoft Corporation" and digsig_result:"Signed") - -; -; Number of concurrent threads -; -; Default=4 -; -yara_num_threads=4 - -; -; yara_rule_directory -; directory where yara rules live. -; -yara_rule_directory=/usr/share/cb/integrations/yara/example_rules - -; -; Carbon Black Enterprise Server options -; - -; -; Carbon Black Enterprise Server URL -; -carbonblack_server_url=https://localhost/ - -; -; Carbon Black Enterprise Server API Token -; -carbonblack_server_token= - -; -; Carbon Black Enterprise Server SSL Verfication -; -carbonblack_server_sslverify=0 - - -; -; debugging options -; - -; debug -; enables various debug output -; -debug=1 diff --git a/root/etc/init.d/cb-yara-connector b/root/etc/init.d/cb-yara-connector deleted file mode 100755 index 3f91bb9..0000000 --- a/root/etc/init.d/cb-yara-connector +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/sh -# -# cb-yara-bridge -# -# chkconfig: 2345 55 25 -# description: The Carbon Black Yara Bridge is a Carbon Black component \ -# that integrates with Yara to provide \ -# a Carbon Black feed of Yara hits against binaries run in your environment. - -# Source function library. -. /etc/rc.d/init.d/functions - -exec="/usr/share/cb/integrations/yara/bin/cb-yara-connector" -prog="cb-yara-connector" -pidfile="/var/run/cb/integrations/yara.pid" - -lockfile=/var/lock/subsys/$prog - -start() { - [ -x $exec ] || exit 5 - - echo -n $"Starting $prog: " - $exec start - retval=$? - echo - [ $retval -eq 0 ] && touch $lockfile - return $retval -} - -stop() { - [ -x $exec ] || exit 5 - - echo -n $"Stopping $prog: " - $exec stop - retval=$? - echo - [ $retval -eq 0 ] && rm -f $lockfile - return $retval -} - -restart() { - [ -x $exec ] || exit 5 - - echo -n $"Restarting $prog: " - $exec restart - retval=$? - echo - [ $retval -eq 0 ] && rm -f $lockfile - return $retval -} - -reload() { - restart -} - -force_reload() { - restart -} - -rh_status() { - # run checks to determine if the service is running or use generic status - status -p $pidfile $prog -} - -rh_status_q() { - rh_status >/dev/null 2>&1 -} - - -case "$1" in - start) - rh_status_q && exit 0 - $1 - ;; - stop) - $1 - ;; - restart) - $1 - ;; - reload) - rh_status_q || exit 7 - $1 - ;; - force-reload) - force_reload - ;; - status) - rh_status - ;; - condrestart|try-restart) - rh_status_q || exit 0 - restart - ;; - *) - echo $"Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}" - exit 2 -esac -exit $? diff --git a/root/usr/share/cb/integrations/yara/carbonblack.png b/root/usr/share/cb/integrations/yara/carbonblack.png deleted file mode 100644 index 3062778faee2395e4ae6bee1b44f1f3d18055c1e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6767 zcmZvBWmFV@)c+DoFWn$rqNKpmxikVxgGh*UmnDvLP~5c? zWkD4aw0pn;-CE|g3gqgK={4?1Kv+mGg{QDo0-z^Z%?}fgRGvf!WX~<#uRkO zW%U0Y-XSH6_4Y|&xD<)hf_5$^yC_Xicu)|lvf3Ex`Hd?^ka)nqRKLFq)+`JRA8A%% z*Rx^X-rfm~s{t7Wg@t^Om)6NR4kEg5tjR`v6 zw8HL#GPPxNc}9)oNJ;JeZiO}@&XB3IhBdy|bb0DoqNqICq3MG5*kT_Jr%HM|AMaxD zXocfRIW#}gOv=$k@PpbABstd z8InNC%E~D_IAjhicO)mXwO!4fslWve*IJU-i>`;HrLpZJMng9E*ty6mHMA!*t8^uo;xNy^fa|Ee7M5<)ca02i%D> zJRCKWO*+xGLM+qr38_g^bxmGS2@UQ7c z+3@_JvJ=PL+}uefo;vRYcj?WCrt2py^M4aghmmQX1>2(foJAR~keG z3Q@$Yq0Kn~oZ@~rn*Z#Q#Crk$FTyh3q?t8Ci3v}JZeEKxT@?t(pH1M%4l{AqiODaX7*=@To^NetNn&WMY zhZO}l&RXoMS{$}O}y5g#9K zrrDZ}IG?PyA1EnF3kS%u&!i?niM3E`U77ITgYN(Fk;U)n;d;c7O%z;fe*gCP#-zf!1_J!{BvwZq?-?52;z^LSY#nQ*9fQgRg1E9d6M4{fv!$6-jIP?U|pfA$Gr z$hsc?d$otVD?xPeGgy}Bm6$44f#JnYi)5QgyN@vm?L3fCrys#+K9`5K4GI&PBH9;t zsxWU~UtF#?4FaF21xI)-2JpfgSA0e_^A#X(_tv|QqS7V(MWIQyyLw*O5k6u7%&tZ1 zc#6E^M38=dexbuj>@<}>0@4tMj6*ghKQSQGf7CNVKYj#t+XxVa$@DY*O|#t=M-r2e zG^Y`h&rO$3-|v@ITKU~>6GLobeG2eLnfGtbNCxs1KkwIw(o6cWTpr9%a@Iss@MMcb zk>s}ozFSWORb0Nje&$s!3?72FC3hW6|;>iM}ds#GJ9#v{k z&D8j%;%)2PUg^p8YG7mPigSly_iN{$MdRl}@H!sGDVV*At;&dUjO(n;a#MAc->q8@ z+Bi%91p-$711>y;LLTy>K>uWg9P`=lbdI&lv{&h$BMQlgiQkbKCb7NVuwu%r1uR&v zPD~t0&SmtTulD}>#CgQ_D;_0p*E^%PpGve}GRE(SMj16YP(eei1OHX)?i0keQ0g)c zYGh6ogecDFoBCUx`S*rHw7LHfgR>k>5F*CW9Jic*^~!rZuXudY&wl;}^u5HSy@k8x zO{1+vL+8cqE?-Neii%2&(#&$RdvjQx=Yi0%Zhn=03X4w^r&d8ENF$W;HDj?xjt~Z> z{*VyKND6_FNgE=HhJL!u8yA8ij-1lN0E5%~DafTV#k~{nv)#5w->{V0z@OBRRiAUS z{#Llq!MpmR{{01`5-Wb9G(Ww*)_;$2C+2^5$!FZ`Gy76Gf`EQbf)1Sb8@0ARx)ka} z&12m3ae@yTN88cSpQT$@ zI>eQ~{Kv=5wQe~)piFo*H6h?Q^;EcO)1@gSw9R|C(!TT&uR^=A!ifys%%3=$88dv% zm0)5%;NhNT3PoX+la;mKWdjy%elVBwDIp;tyg1uur08ViCv2M)5$Q(qUo?@zfsb#6 z9aQJM8bq_B9HvSX_AHKdZ>n4F3oWHxLZmDdnd<-7@S1QN%gmQiS>+2yH^hR~O`Y^F zcq7bP>qn8}lG=$#pJill$SWZsbu38vC5%Mxn-(jel@I~3FAsl=S?vZsQ8|2<=T0S* zV&+uoa_9mlr2K1>VcV&#SW9;@~cj>@}WhoN1ikmZFo}16mF!)HKdb~Iz%imm( zS7^3agPu`PPgXNl=vkUSY}*ApiX=R>R#P!?FOA&I?5QFs|?a zvf5$wwD4_3WTPm&NO*;0G1ITEwiv7_FWwGT@Z28JmAzeC6PGdgrd1gF^lx`m?cxnk ztv<1C4hC)waWx*veF?3x95rtsI`}gTHgRx3)es9=MpWP*S_|0CaLBZ$@!@Rj&z4nN zi$b%XnVTY6fZLHQY-~|ENvy&qD1$GBr3HoYjusm!)H5}YdohE&isWN>ooF$;_+Tj0 zdPOPRZ;7S2R9!w|UyAc&m*u)`kMsVi$<& z6&vc$gyyqMaLMMpiSO&1i%BU#JEx_FqpdXS?EhDfDSknx)n~tUP&xRpct3ZzaQH{Jx$B;mgoOFe@8fYJgs*SXk9nWE!80lxG_%}Sbki6dQ`-K`23um-1g)`f)E+5W>WY&`lCg!GeYfV zOAX~yJ3Bk2XUleMF#cuV?-d)c^~UbLbCE!@prd1K5R%UYCgAcTCNi64&o&mWtu^KD zu1_jOzT=YF0nFRlE6&*&aECVmo1F@j87Ku8kK@tthx#tV^A%!4AMUU17xR7=PsRiO zVAs=%Vw}SPdPtqfYBiH@P2DyOM(cB_An93`-m2_;Ot*JZR8XiPshFMoK+=Q>Cx+Fg zXN9s+P+|cVIT)b2K?bCAz|NaO1S!bw!UMooLwtoX#ZAOzjlp)h02|st(<2d;sM%;D zacIk;WkHM(TS06>^r4OAVZJ2b+=u-iwd~|dNg9&3eI_JyZz0DTU(hLBGDV5KBTB^a z3E)ns|3B^t!BBjk2k&Z`L9g{g-xUe=o)5@upZ(__kXOJ?qahv(YxD!3H#Rqib7ewa zW*^PhSgIz*SAG6O1I_lOrxf+z!0ko5(^&PZ4F}G zjelP9wAdUT{&#;_-!wQ_zKT12o2nnMGe-3xJ`iY$1YJw@D<`t0?B~8a;J&~wg*7R} z(zu^kqb)`5&y~|OdmfN4v|AJG{m93pA?n{60aBR+xNnX>!%q9x=i1>k^pT%eiXS2@?`UG3`yw5wR; zbTQO3_(lcSyE)gje5HIA1Mxs-Si?LQ5*)0AK-e4cc9^gH`z(2690qH*$GM`M5 z(vBPX`l6Qinb?P|l}U^_lCa_DN{2JHKgW^*;~S|B(ffP+dlZ5;kH0K+buiCvYdb&u z<+-+iIf&5n_T2JV0vkdtV5U8(5a$QIppFE(o|XmI7c8OQ5%pqH> zI>B_vT%kP4Rqx%bG5h35*_Onlq~Ym8Q%jTkqsG;$#zdw}k=y}u)QKf$0_#pz$KK2Q1BR5(x)`#bUDmgweG1yUK;mHko zN>fi9u4`38&O<@xDmLFklQ$ zk6LK7tH(xamd;Q6x-o24AsD&1AQwDJT&~rIW@Zd8J^~2R7iSk^VP#G5JeaF~nuVbN z@U^aYI^P;jo6l630s6)X6Z996UZ#-mEic}Rj-J11@p%4oHPs&{Mm0|AQCM|$5-!gO zg6mg4%5&gP=?I>(8^ zdeE-0dh_ueGqMy&s~nev+zx-a##|5#e5Ti!wzY=utC}ki@lP!anER35negrv+WSPx z(&JBO{Y@zN@3NBs%+AD=`0(F6z+flh8-E7oSW&y%HjXbwKn62T*ERB)yjLOL(I44N zSyC174CJ!Cnd}tUu9bw3vQ$dZ`@Gjwio@CwUpKc?Os(jr>uk0Ez9E2f@Dk987YN#G z?g*;F@-)@*AEfJ2JojgY+sG*zgT5_>Pf8R=p-FRTqzgpVOYpRv%I;hpMxvT6Y>4qT zVY}kMLDcKO0D^B4^^VlWVu~o>YF6_7@4G9tLZB>UuWgqx-2tnr-=t?RiNNBD!<~UrSz+$fa1cM)i>|zoB%4`D*?Y#*&Y)aT&l=I@A*Gk^ zwXLH?a)l;h;@Bi5CBK8ZGIzp~(3~=chF;2nvKDSHq~<~U6oRP7#fRhtc?=V&MEx`3 zqBocXY)Myjpq}eLCza3k3GgervQ3-a$i^i^qdU5 zv9*8C9Y>&~3O z1WMX%0*jJ7WM+BHgh<zxCwKbXMZI5a75betbuAMPLup(04lR_`~E?ylf#IiHT$ zAu4!vwV$&)Fzz7pos(G;sbYsyBEH~Wb{bEml^e5;;^1OqTkb1zpXU(?ihej`2_H#> zuKFdZk}h&x9lt|R3@jkZUOPxnM9P|?m!ZsRrYPV^?H0EznZW9uek;H;c#Uugz{y@C zV7V);%%HmDA@Dx8m>kC3T2KgPn!CO%?0rEv2Psy)*)MPzyZ2;0(S&WjB&}F-eo$vb zR)cnx(>7|&7W1UYM~hF`W;{?zLP8_^G|R#Pz|{K**<^;pe^N?H*LUT|KF2sc2@4J7 zWi|s~gku2?CVE@<*$z4L%%sH1^EJ9*NJ&2j0WiAwsbFpZqUvWJNnGPFY`edjUnQ(e zMW?3A zemEKhW(VV3MsFIL>Oifo0NTDTmZR@+cUfO+JK2HsiVjnv@pWa3XK0ik3k0xv7 zk!y*>WTH#g{AXUh(NB-k{~Ga9E*uZz74ITw#T|5e4@$;Q7g{5F{bJfyrS|#5bfeNL zM%BsJbLp|qtpb6dWDU~Bhkwy4vUJhj$`|Cfqvs+JZ*|vlr39v@sa0!R>f||KfL9ci d<#+o?P_ZMa%@@PT1^f>JDa)(FDrC)q{|Be6=hFZH diff --git a/root/usr/share/cb/integrations/yara/example_rules/AAR.yar b/root/usr/share/cb/integrations/yara/example_rules/AAR.yar deleted file mode 100644 index c61a617..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/AAR.yar +++ /dev/null @@ -1,21 +0,0 @@ -rule AAR -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/AAR" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "Hashtable" - $b = "get_IsDisposed" - $c = "TripleDES" - $d = "testmemory.FRMMain.resources" - $e = "$this.Icon" wide - $f = "{11111-22222-20001-00001}" wide - $g = "@@@@@" - - condition: - all of them -} \ No newline at end of file diff --git a/root/usr/share/cb/integrations/yara/example_rules/Ap0calypse.yar b/root/usr/share/cb/integrations/yara/example_rules/Ap0calypse.yar deleted file mode 100644 index 163da17..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/Ap0calypse.yar +++ /dev/null @@ -1,20 +0,0 @@ -rule Ap0calypse -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/Ap0calypse" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "Ap0calypse" - $b = "Sifre" - $c = "MsgGoster" - $d = "Baslik" - $e = "Dosyalars" - $f = "Injecsiyon" - - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/Arcom.yar b/root/usr/share/cb/integrations/yara/example_rules/Arcom.yar deleted file mode 100644 index 473c1f2..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/Arcom.yar +++ /dev/null @@ -1,20 +0,0 @@ -rule Arcom -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/Arcom" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a1 = "CVu3388fnek3W(3ij3fkp0930di" - $a2 = "ZINGAWI2" - $a3 = "clWebLightGoldenrodYellow" - $a4 = "Ancestor for '%s' not found" wide - $a5 = "Control-C hit" wide - $a6 = {A3 24 25 21} - - condition: - all of them -} \ No newline at end of file diff --git a/root/usr/share/cb/integrations/yara/example_rules/Bandook.yar b/root/usr/share/cb/integrations/yara/example_rules/Bandook.yar deleted file mode 100644 index 9f1c233..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/Bandook.yar +++ /dev/null @@ -1,27 +0,0 @@ -rule Bandook -{ - - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/bandook" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "aaaaaa1|" - $b = "aaaaaa2|" - $c = "aaaaaa3|" - $d = "aaaaaa4|" - $e = "aaaaaa5|" - $f = "%s%d.exe" - $g = "astalavista" - $h = "givemecache" - $i = "%s\\system32\\drivers\\blogs\\*" - $j = "bndk13me" - - - - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/BlackNix.yar b/root/usr/share/cb/integrations/yara/example_rules/BlackNix.yar deleted file mode 100644 index c22f5e7..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/BlackNix.yar +++ /dev/null @@ -1,20 +0,0 @@ -rule BlackNix -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/BlackNix" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a1 = "SETTINGS" wide - $a2 = "Mark Adler" - $a3 = "Random-Number-Here" - $a4 = "RemoteShell" - $a5 = "SystemInfo" - - - condition: - all of them -} \ No newline at end of file diff --git a/root/usr/share/cb/integrations/yara/example_rules/BlackShades.yar b/root/usr/share/cb/integrations/yara/example_rules/BlackShades.yar deleted file mode 100644 index 7fb90ee..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/BlackShades.yar +++ /dev/null @@ -1,16 +0,0 @@ -rule BlackShades -{ - meta: - author = "Brian Wallace (@botnet_hunter)" - date = "2014/04" - ref = "http://malwareconfig.com/stats/PoisonIvy" - ref = "http://blog.cylance.com/a-study-in-bots-blackshades-net" - family = "blackshades" - - strings: - $string1 = "bss_server" - $string2 = "txtChat" - $string3 = "UDPFlood" - condition: - all of them -} \ No newline at end of file diff --git a/root/usr/share/cb/integrations/yara/example_rules/BlueBanana.yar b/root/usr/share/cb/integrations/yara/example_rules/BlueBanana.yar deleted file mode 100644 index d26e6c1..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/BlueBanana.yar +++ /dev/null @@ -1,21 +0,0 @@ -rule BlueBanana -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/BlueBanana" - maltype = "Remote Access Trojan" - filetype = "Java" - - strings: - $meta = "META-INF" - $conf = "config.txt" - $a = "a/a/a/a/f.class" - $b = "a/a/a/a/l.class" - $c = "a/a/a/b/q.class" - $d = "a/a/a/b/v.class" - - - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/Bozok.yar b/root/usr/share/cb/integrations/yara/example_rules/Bozok.yar deleted file mode 100644 index ac2c055..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/Bozok.yar +++ /dev/null @@ -1,19 +0,0 @@ -rule Bozok -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/Bozok" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "getVer" nocase - $b = "StartVNC" nocase - $c = "SendCamList" nocase - $d = "untPlugin" nocase - $e = "gethostbyname" nocase - - condition: - all of them -} \ No newline at end of file diff --git a/root/usr/share/cb/integrations/yara/example_rules/ClientMesh.yar b/root/usr/share/cb/integrations/yara/example_rules/ClientMesh.yar deleted file mode 100644 index 19d8f99..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/ClientMesh.yar +++ /dev/null @@ -1,20 +0,0 @@ -rule ClientMesh -{ - meta: - author = "Kevin Breen " - date = "2014/06" - ref = "http://malwareconfig.com/stats/ClientMesh" - family = "torct" - - strings: - $string1 = "machinedetails" - $string2 = "MySettings" - $string3 = "sendftppasswords" - $string4 = "sendbrowserpasswords" - $string5 = "arma2keyMass" - $string6 = "keylogger" - $conf = {00 00 00 00 00 00 00 00 00 7E} - - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/CyberGate.yar b/root/usr/share/cb/integrations/yara/example_rules/CyberGate.yar deleted file mode 100644 index 47de0fc..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/CyberGate.yar +++ /dev/null @@ -1,23 +0,0 @@ -rule CyberGate -{ - - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/CyberGate" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $string1 = {23 23 23 23 40 23 23 23 23 E8 EE E9 F9 23 23 23 23 40 23 23 23 23} - $string2 = {23 23 23 23 40 23 23 23 23 FA FD F0 EF F9 23 23 23 23 40 23 23 23 23} - $string3 = "EditSvr" - $string4 = "TLoader" - $string5 = "Stroks" - $string6 = "####@####" - $res1 = "XX-XX-XX-XX" - $res2 = "CG-CG-CG-CG" - - condition: - all of ($string*) and any of ($res*) -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/DarkComet.yar b/root/usr/share/cb/integrations/yara/example_rules/DarkComet.yar deleted file mode 100644 index b27b11e..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/DarkComet.yar +++ /dev/null @@ -1,27 +0,0 @@ -rule DarkComet -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/DarkComet" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - // Versions 2x - $a1 = "#BOT#URLUpdate" - $a2 = "Command successfully executed!" - $a3 = "MUTEXNAME" wide - $a4 = "NETDATA" wide - // Versions 3x & 4x & 5x - $b1 = "FastMM Borland Edition" - $b2 = "%s, ClassID: %s" - $b3 = "I wasn't able to open the hosts file" - $b4 = "#BOT#VisitUrl" - $b5 = "#KCMDDC" - - - - condition: - all of ($a*) or all of ($b*) -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/DarkRAT.yar b/root/usr/share/cb/integrations/yara/example_rules/DarkRAT.yar deleted file mode 100644 index ff86958..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/DarkRAT.yar +++ /dev/null @@ -1,21 +0,0 @@ -rule DarkRAT -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/DarkRAT" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "@1906dark1996coder@" - $b = "SHEmptyRecycleBinA" - $c = "mciSendStringA" - $d = "add_Shutdown" - $e = "get_SaveMySettingsOnExit" - $f = "get_SpecialDirectories" - $g = "Client.My" - - condition: - all of them -} \ No newline at end of file diff --git a/root/usr/share/cb/integrations/yara/example_rules/Greame.yar b/root/usr/share/cb/integrations/yara/example_rules/Greame.yar deleted file mode 100644 index a861ebb..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/Greame.yar +++ /dev/null @@ -1,24 +0,0 @@ -rule Greame -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/Greame" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = {23 23 23 23 40 23 23 23 23 E8 EE E9 F9 23 23 23 23 40 23 23 23 23} - $b = {23 23 23 23 40 23 23 23 23 FA FD F0 EF F9 23 23 23 23 40 23 23 23 23} - $c = "EditSvr" - $d = "TLoader" - $e = "Stroks" - $f = "Avenger by NhT" - $g = "####@####" - $h = "GREAME" - - - - condition: - all of them -} \ No newline at end of file diff --git a/root/usr/share/cb/integrations/yara/example_rules/Imminent3.yar b/root/usr/share/cb/integrations/yara/example_rules/Imminent3.yar deleted file mode 100644 index 0c3cc41..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/Imminent3.yar +++ /dev/null @@ -1,28 +0,0 @@ -rule Imminent -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/Imminent" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $v1a = "DecodeProductKey" - $v1b = "StartHTTPFlood" - $v1c = "CodeKey" - $v1d = "MESSAGEBOX" - $v1e = "GetFilezillaPasswords" - $v1f = "DataIn" - $v1g = "UDPzSockets" - $v1h = {52 00 54 00 5F 00 52 00 43 00 44 00 41 00 54 00 41} - - $v2a = "k__BackingField" - $v2b = "k__BackingField" - $v2c = "DownloadAndExecute" - $v2d = "-CHECK & PING -n 2 127.0.0.1 & EXIT" wide - $v2e = "england.png" wide - $v2f = "Showed Messagebox" wide - condition: - all of ($v1*) or all of ($v2*) -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/Infinity.yar b/root/usr/share/cb/integrations/yara/example_rules/Infinity.yar deleted file mode 100644 index 8ea1efe..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/Infinity.yar +++ /dev/null @@ -1,22 +0,0 @@ -rule Infinity -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/Infinity" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "CRYPTPROTECT_PROMPTSTRUCT" - $b = "discomouse" - $c = "GetDeepInfo" - $d = "AES_Encrypt" - $e = "StartUDPFlood" - $f = "BATScripting" wide - $g = "FBqINhRdpgnqATxJ.html" wide - $i = "magic_key" wide - - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/LostDoor.yar b/root/usr/share/cb/integrations/yara/example_rules/LostDoor.yar deleted file mode 100644 index ce26972..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/LostDoor.yar +++ /dev/null @@ -1,24 +0,0 @@ -rule LostDoor -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/LostDoor" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a0 = {0D 0A 2A 45 44 49 54 5F 53 45 52 56 45 52 2A 0D 0A} - $a1 = "*mlt* = %" - $a2 = "*ip* = %" - $a3 = "*victimo* = %" - $a4 = "*name* = %" - $b5 = "[START]" - $b6 = "[DATA]" - $b7 = "We Control Your Digital World" wide ascii - $b8 = "RC4Initialize" wide ascii - $b9 = "RC4Decrypt" wide ascii - - condition: - all of ($a*) or all of ($b*) -} \ No newline at end of file diff --git a/root/usr/share/cb/integrations/yara/example_rules/LuxNet.yar b/root/usr/share/cb/integrations/yara/example_rules/LuxNet.yar deleted file mode 100644 index 8f498ac..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/LuxNet.yar +++ /dev/null @@ -1,20 +0,0 @@ -rule LuxNet -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/LuxNet" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "GetHashCode" - $b = "Activator" - $c = "WebClient" - $d = "op_Equality" - $e = "dickcursor.cur" wide - $f = "{0}|{1}|{2}" wide - - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/NanoCore.yar b/root/usr/share/cb/integrations/yara/example_rules/NanoCore.yar deleted file mode 100644 index ff63242..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/NanoCore.yar +++ /dev/null @@ -1,26 +0,0 @@ -rule NanoCore -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/NanoCore" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "NanoCore" - $b = "ClientPlugin" - $c = "ProjectData" - $d = "DESCrypto" - $e = "KeepAlive" - $f = "IPNETROW" - $g = "LogClientMessage" - $h = "|ClientHost" - $i = "get_Connected" - $j = "#=q" - $key = {43 6f 24 cb 95 30 38 39} - - - condition: - 6 of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/NetWire.yar b/root/usr/share/cb/integrations/yara/example_rules/NetWire.yar deleted file mode 100644 index a294fa7..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/NetWire.yar +++ /dev/null @@ -1,19 +0,0 @@ -rule NetWire -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/NetWire" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $string1 = "[Scroll Lock]" - $string2 = "[Shift Lock]" - $string3 = "200 OK" - $string4 = "%s.Identifier" - $string5 = "sqlite3_column_text" - $string6 = "[%s] - [%.2d/%.2d/%d %.2d:%.2d:%.2d]" - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/Pandora.yar b/root/usr/share/cb/integrations/yara/example_rules/Pandora.yar deleted file mode 100644 index 3b321c0..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/Pandora.yar +++ /dev/null @@ -1,27 +0,0 @@ -rule Pandora -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/Pandora" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "Can't get the Windows version" - $b = "=M=Q=U=Y=]=a=e=i=m=q=u=y=}=" - $c = "JPEG error #%d" wide - $d = "Cannot assign a %s to a %s" wide - $g = "%s, ProgID:" - $h = "clave" - $i = "Shell_TrayWnd" - $j = "melt.bat" - $k = "\\StubPath" - $l = "\\logs.dat" - $m = "1027|Operation has been canceled!" - $n = "466|You need to plug-in! Double click to install... |" - $0 = "33|[Keylogger Not Activated!]" - - condition: - all of them -} \ No newline at end of file diff --git a/root/usr/share/cb/integrations/yara/example_rules/Paradox.yar b/root/usr/share/cb/integrations/yara/example_rules/Paradox.yar deleted file mode 100644 index 8521665..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/Paradox.yar +++ /dev/null @@ -1,21 +0,0 @@ -rule Paradox -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/Paradox" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "ParadoxRAT" - $b = "Form1" - $c = "StartRMCam" - $d = "Flooders" - $e = "SlowLaris" - $f = "SHITEMID" - $g = "set_Remote_Chat" - - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/PoisonIvy.yar b/root/usr/share/cb/integrations/yara/example_rules/PoisonIvy.yar deleted file mode 100644 index 920635c..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/PoisonIvy.yar +++ /dev/null @@ -1,19 +0,0 @@ -rule PoisonIvy -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/PoisonIvy" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $stub = {04 08 00 53 74 75 62 50 61 74 68 18 04} - $string1 = "CONNECT %s:%i HTTP/1.0" - $string2 = "ws2_32" - $string3 = "cks=u" - $string4 = "thj@h" - $string5 = "advpack" - condition: - $stub at 0x1620 and all of ($string*) or (all of them) -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/Punisher.yar b/root/usr/share/cb/integrations/yara/example_rules/Punisher.yar deleted file mode 100644 index 087695a..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/Punisher.yar +++ /dev/null @@ -1,21 +0,0 @@ -rule Punisher -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/Punisher" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "abccba" - $b = {5C 00 68 00 66 00 68 00 2E 00 76 00 62 00 73} - $c = {5C 00 73 00 63 00 2E 00 76 00 62 00 73} - $d = "SpyTheSpy" wide ascii - $e = "wireshark" wide - $f = "apateDNS" wide - $g = "abccbaDanabccb" - - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/PythoRAT.yar b/root/usr/share/cb/integrations/yara/example_rules/PythoRAT.yar deleted file mode 100644 index 7d678f4..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/PythoRAT.yar +++ /dev/null @@ -1,22 +0,0 @@ -rule PythoRAT -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/PythoRAT" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "TKeylogger" - $b = "uFileTransfer" - $c = "TTDownload" - $d = "SETTINGS" - $e = "Unknown" wide - $f = "#@#@#" - $g = "PluginData" - $i = "OnPluginMessage" - - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/ShadowTech.yar b/root/usr/share/cb/integrations/yara/example_rules/ShadowTech.yar deleted file mode 100644 index 47eacb2..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/ShadowTech.yar +++ /dev/null @@ -1,21 +0,0 @@ -rule ShadowTech -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/ShadowTech" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "ShadowTech" nocase - $b = "DownloadContainer" - $c = "MySettings" - $d = "System.Configuration" - $newline = "#-@NewLine@-#" wide - $split = "pSIL" wide - $key = "ESIL" wide - - condition: - 4 of them -} \ No newline at end of file diff --git a/root/usr/share/cb/integrations/yara/example_rules/SmallNet.yar b/root/usr/share/cb/integrations/yara/example_rules/SmallNet.yar deleted file mode 100644 index 20cf70e..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/SmallNet.yar +++ /dev/null @@ -1,19 +0,0 @@ -rule SmallNet -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/SmallNet" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $split1 = "!!<3SAFIA<3!!" - $split2 = "!!ElMattadorDz!!" - $a1 = "stub_2.Properties" - $a2 = "stub.exe" wide - $a3 = "get_CurrentDomain" - - condition: - ($split1 or $split2) and (all of ($a*)) -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/SpyGate.yar b/root/usr/share/cb/integrations/yara/example_rules/SpyGate.yar deleted file mode 100644 index f7a9fd9..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/SpyGate.yar +++ /dev/null @@ -1,26 +0,0 @@ -rule SpyGate -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/SpyGate" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $split = "abccba" - $a1 = "abccbaSpyGateRATabccba" //$a = Version 0.2.6 - $a2 = "StubX.pdb" - $a3 = "abccbaDanabccb" - $b1 = "monikerString" nocase //$b = Version 2.0 - $b2 = "virustotal1" - $b3 = "get_CurrentDomain" - $c1 = "shutdowncomputer" wide //$c = Version 2.9 - $c2 = "shutdown -r -t 00" wide - $c3 = "set cdaudio door closed" wide - $c4 = "FileManagerSplit" wide - $c5 = "Chating With >> [~Hacker~]" wide - - condition: - (all of ($a*) and #split > 40) or (all of ($b*) and #split > 10) or (all of ($c*)) -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/Sub7Nation.yar b/root/usr/share/cb/integrations/yara/example_rules/Sub7Nation.yar deleted file mode 100644 index 1a1b8c6..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/Sub7Nation.yar +++ /dev/null @@ -1,28 +0,0 @@ -rule Sub7Nation -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/Sub7Nation" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $a = "EnableLUA /t REG_DWORD /d 0 /f" - $b = "*A01*" - $c = "*A02*" - $d = "*A03*" - $e = "*A04*" - $f = "*A05*" - $g = "*A06*" - $h = "#@#@#" - $i = "HostSettings" - $verSpecific1 = "sevane.tmp" - $verSpecific2 = "cmd_.bat" - $verSpecific3 = "a2b7c3d7e4" - $verSpecific4 = "cmd.dll" - - - condition: - all of them -} \ No newline at end of file diff --git a/root/usr/share/cb/integrations/yara/example_rules/Vertex.yar b/root/usr/share/cb/integrations/yara/example_rules/Vertex.yar deleted file mode 100644 index d8daf96..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/Vertex.yar +++ /dev/null @@ -1,23 +0,0 @@ -rule Vertex -{ - - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/Vertex" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $string1 = "DEFPATH" - $string2 = "HKNAME" - $string3 = "HPORT" - $string4 = "INSTALL" - $string5 = "IPATH" - $string6 = "MUTEX" - $res1 = "PANELPATH" - $res2 = "ROOTURL" - - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/VirusRat.yar b/root/usr/share/cb/integrations/yara/example_rules/VirusRat.yar deleted file mode 100644 index 4f4ed52..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/VirusRat.yar +++ /dev/null @@ -1,26 +0,0 @@ -rule VirusRat -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/VirusRat" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $string0 = "virustotal" - $string1 = "virusscan" - $string2 = "abccba" - $string3 = "pronoip" - $string4 = "streamWebcam" - $string5 = "DOMAIN_PASSWORD" - $string6 = "Stub.Form1.resources" - $string7 = "ftp://{0}@{1}" wide - $string8 = "SELECT * FROM moz_logins" wide - $string9 = "SELECT * FROM moz_disabledHosts" wide - $string10 = "DynDNS\\Updater\\config.dyndns" wide - $string11 = "|BawaneH|" wide - - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/Xtreme.yar b/root/usr/share/cb/integrations/yara/example_rules/Xtreme.yar deleted file mode 100644 index 8238492..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/Xtreme.yar +++ /dev/null @@ -1,20 +0,0 @@ -rule Xtreme -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/Xtreme" - maltype = "Remote Access Trojan" - filetype = "exe" - ver = "2.9, 3.1, 3.2, 3.5" - - strings: - $a = "XTREME" wide - $b = "ServerStarted" wide - $c = "XtremeKeylogger" wide - $d = "x.html" wide - $e = "Xtreme RAT" wide - - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/adWind.yar b/root/usr/share/cb/integrations/yara/example_rules/adWind.yar deleted file mode 100644 index 125fe9c..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/adWind.yar +++ /dev/null @@ -1,18 +0,0 @@ -rule adWind -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/AAR" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $meta = "META-INF" - $conf = "config.xml" - $a = "Adwind.class" - $b = "Principal.adwind" - - condition: - all of them -} \ No newline at end of file diff --git a/root/usr/share/cb/integrations/yara/example_rules/jRat.yar b/root/usr/share/cb/integrations/yara/example_rules/jRat.yar deleted file mode 100644 index e5a41c9..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/jRat.yar +++ /dev/null @@ -1,23 +0,0 @@ -rule jRat -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/jRat" - maltype = "Remote Access Trojan" - filetype = "Java" - - strings: - $meta = "META-INF" - $key = "key.dat" - $conf = "config.dat" - $jra1 = "enc.dat" - $jra2 = "a.class" - $jra3 = "b.class" - $jra4 = "c.class" - $reClass1 = /[a-z]\.class/ - $reClass2 = /[a-z][a-f]\.class/ - - condition: - ($meta and $key and $conf and #reClass1 > 10 and #reClass2 > 10) or ($meta and $key and all of ($jra*)) -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/njRat.yar b/root/usr/share/cb/integrations/yara/example_rules/njRat.yar deleted file mode 100644 index 2226b44..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/njRat.yar +++ /dev/null @@ -1,24 +0,0 @@ -rule njRat -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/njRat" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - - $s1 = {7C 00 27 00 7C 00 27 00 7C} // |'|'| - $s2 = "netsh firewall add allowedprogram" wide - $s3 = "Software\\Microsoft\\Windows\\CurrentVersion\\Run" wide - $s4 = "yyyy-MM-dd" wide - - $v1 = "cmd.exe /k ping 0 & del" wide - $v2 = "cmd.exe /c ping 127.0.0.1 & del" wide - $v3 = "cmd.exe /c ping 0 -n 2 & del" wide - - - condition: - all of ($s*) and any of ($v*) -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/unrecom.yar b/root/usr/share/cb/integrations/yara/example_rules/unrecom.yar deleted file mode 100644 index 4520aae..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/unrecom.yar +++ /dev/null @@ -1,19 +0,0 @@ -rule unrecom -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/AAR" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $meta = "META-INF" - $conf = "load/ID" - $a = "load/JarMain.class" - $b = "load/MANIFEST.MF" - $c = "plugins/UnrecomServer.class" - - condition: - all of them -} diff --git a/root/usr/share/cb/integrations/yara/example_rules/xRAT.yar b/root/usr/share/cb/integrations/yara/example_rules/xRAT.yar deleted file mode 100644 index 35b8b8b..0000000 --- a/root/usr/share/cb/integrations/yara/example_rules/xRAT.yar +++ /dev/null @@ -1,28 +0,0 @@ -rule xRAT -{ - meta: - author = " Kevin Breen " - date = "2014/04" - ref = "http://malwareconfig.com/stats/xRat" - maltype = "Remote Access Trojan" - filetype = "exe" - - strings: - $v1a = "DecodeProductKey" - $v1b = "StartHTTPFlood" - $v1c = "CodeKey" - $v1d = "MESSAGEBOX" - $v1e = "GetFilezillaPasswords" - $v1f = "DataIn" - $v1g = "UDPzSockets" - $v1h = {52 00 54 00 5F 00 52 00 43 00 44 00 41 00 54 00 41} - - $v2a = "k__BackingField" - $v2b = "k__BackingField" - $v2c = "DownloadAndExecute" - $v2d = "-CHECK & PING -n 2 127.0.0.1 & EXIT" wide - $v2e = "england.png" wide - $v2f = "Showed Messagebox" wide - condition: - all of ($v1*) or all of ($v2*) -} \ No newline at end of file diff --git a/root/usr/share/cb/integrations/yara/yara-logo.png b/root/usr/share/cb/integrations/yara/yara-logo.png deleted file mode 100644 index 8d57f97f4975046ce7523cc63ce6a9dcf7b7c97b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3123 zcmV-349xS1P)5P}9u*k}U2%p{mCK?6k74>m$E;un`tzJQS^g~Sgv5-^hB8e^-*jV2h2At?qD zW^2GmQmCZtg0#*^*HSB;b_%7GOqspU?d>wl0-bx$xp(f%|Ce0H0Qc?u@BQEBygkR4 zOeP5c0Ib5#|2a`y?5dku(bEHE0kE^_XjCfOA1yiQv}rjH@8AEI!eFq{mz|ws4vC=rj-t&tJ6%9b;II%O%px0JvC z_xtZdJpjCHIyr#=O`bDH=oraFf_|y5r|u6wq@-dr005ZIJ;{IM5#eL}YvV@hKYA3Z z0N`SzwdqV!A{`O>nww!b0Kjz8hsWoLaOKb;r~v>>$2x}E+o1*kFrD<_Mqm+cq2U++ zn2sJW>l*-&4$^wBMg&dA|6l=)P?xDUJJ;uf;=pWz%2uvzsYJCc?6m^vykqePX{K`{ z5)s-zw{$iQoolz^fd~rhA2#u!5xurX?V+BCn!CIZc7V%uK-t<%JV%{L`}5qN4chNS z_`I%(yhx|3OC3pUbh2lj;J`v1FZNCeLfG-z!fBe$20?Q(i`08>Qno|d?|@0?kYPHU zr@KM>6~R-QNbS86>4AD+*)jy?AT@H}m$_g2wPO^((a9MJhT}O+q?*pv)29vZJ9Wtt zylUWHFLy+XG74>UO%2e*m?lz9XYhv~4DXv)S}Hui5V#r+^^e;ivhcd$2x}tMbmHHC zFEn1KVEJ;$2<}IP_K??P==@xz*b5oNG?8jLqwzRh`|(HPrt{>JkPu8np%!Um!g%)D zsEX%UO{7nQ=s$kk@V?VRAtIz91?j;LdC=l>u0^qnYho+Y;iZOA#R59`8j*+dG?8vP z*IHX?q`TYj{?k{kgls@hSicj+;4Do{nPuL8?3m$wdCe?Tv49S0$)4kQpox{HbLH@1 z!~1eFFjTRCPPU*3oTG^;Aq6K@=Y;D1f&xSP2db(l-qHfu%wa2KwjCs>Y)iCXS=X^v zyDziZ1x?bX=3bhZQ;$g`D0cKH1zvc;(C$U6R#B(6>I0BYTZYdcPO1CXYF|Xr2c1Lt z3JdP74w|G*;k`65Epy<2Kra?g8ZDBvoDHxo1KLZxRMJa_?reu>Vw&m11R9goXy@5y z@d$W>sIrAfd^R~I?>R&h(@aMloh&fxoCgYKS+FKLu+0}%v`wZQ#pn!8%uCGf_Y1Yj zWaZ^iA*MxmiAKTwD^P^a&_ufFaC9=``R8fLkt6iriFlx0(iBe=By21qT92Mlj9K=r3#IWi3a&uLK;St1aII@f;)o@};v;+Wf z7rIS{qnbH8cdAvWY?B&#Z}#?5=eBKh{rq_-1c28~XU^{3R9syx{X&K#5p^x|J|(&t zJatMPU^JS$M?gUcwj`Xku5>nXH(OUO2Z7C-&+) zd{~_bn#7vrb|F(KKY_cmlF%G1U-YjEI{1q;&t zw?4n09^Sv7=I+@;4?Ol59xHo&O<_3D#2Jy!l=<^@9h`F>b59pbK87SF1!k z1!CrlFVa-Wv%vEgE~K&@JLtkIui%lC%Ld8EdOrSG z-MC6Lv$~odTC>J*681YmHOsWGs;mclE15Tw`)LoEj%PIy%BREYmh;5SQ;QeV#VuRt zYHO?T1=)O*a+w1k?s7>FO(Tuv&J7cQCYdswad?z*@0VZFSta7Re(|Evv3d_4wEM&2 z#0OlIC|R?_imnrAqbtS6^9q z&xc6P1iPTNMKvRf3)ZvE>W+^}I!?J}v~5}$n$f1SYs?s2YG*G1Ham$Q#M32DGp1Izj_8G_jlMaMenLU*3M(4!u=I!reJqK z1Wn9l!ofCEmn~Bhri(;5u_H&ET0T)N@)@U`ZVz1#NMw2rEW}*P2DG!9~YF*GK_c`Fw=&sJ;RWDa)GGX0ri zGjy(9U9rJz`i~#aM1pp=uoXoBTw^+8(N44J7r|94VAyaFdA*A5pY9Kds?!*En`cFh{<+_p_hS9%-`$agauj%GHdm>Em=l+zxXwl5vl9K6qPJI2sh3@`i$GAU!RO0=_ z()c$~vPU=bKT?dQjs7<)Apow$mrN$ 0: - dirname = os.path.relpath(root, rootdir) - flist = [os.path.join(root, f) for f in files] - results.append(("/%s" % dirname, flist)) - - return results - -data_files = get_data_files("root") -data_files.append('cb-yara-connector.spec') -data_files.append('scripts/cb-yara-connector') -scripts = { - 'cb-yara-connector': { - 'spec': 'cb-yara-connector.spec', - 'dest': '/usr/share/cb/integrations/yara/bin/' - } -} - -setup( - name='python-cb-yara-connector', - version='1.3', - packages=['cbopensource', 'cbopensource.connectors', 'cbopensource.connectors.yara'], - url='https://github.com/carbonblack/cb-yara-connector', - license='MIT', - author='Carbon Black Developer Network', - author_email='dev-support@carbonblack.com', - description= - 'Connector between Carbon Black and Yara', - data_files=data_files, - classifiers=[ - 'Development Status :: 4 - Beta', - - # Indicate who your project is intended for - 'Intended Audience :: Developers', - - # Pick your license as you wish (should match "license" above) - 'License :: OSI Approved :: MIT License', - - # Specify the Python versions you support here. In particular, ensure - # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - ], - keywords='carbonblack bit9', - cmdclass={'install_cb': install_cb, 'bdist_binaryrpm': bdist_binaryrpm} -) \ No newline at end of file diff --git a/singleton.py b/singleton.py new file mode 100644 index 0000000..96df95f --- /dev/null +++ b/singleton.py @@ -0,0 +1,147 @@ +#! /usr/bin/env python + +import logging +from multiprocessing import Process +import os +import sys +import tempfile +import unittest + + +class SingleInstanceException(BaseException): + pass + + +class SingleInstance(object): + """Class that can be instantiated only once per machine. + + If you want to prevent your script from running in parallel just instantiate SingleInstance() class. If is there another instance already running it will throw a `SingleInstanceException`. + + >>> import tendo + ... me = SingleInstance() + + This option is very useful if you have scripts executed by crontab at small amounts of time. + + Remember that this works by creating a lock file with a filename based on the full path to the script file. + + Providing a flavor_id will augment the filename with the provided flavor_id, allowing you to create multiple singleton instances from the same file. This is particularly useful if you want specific functions to have their own singleton instances. + """ + + def __init__(self, flavor_id="", lockfile=""): + import sys + self.initialized = False + if lockfile: + self.lockfile = lockfile + else: + basename = os.path.splitext(os.path.abspath(sys.argv[0]))[0].replace( + "/", "-").replace(":", "").replace("\\", "-") + '-%s' % flavor_id + '.lock' + self.lockfile = os.path.normpath( + tempfile.gettempdir() + '/' + basename) + + logger.debug("SingleInstance lockfile: " + self.lockfile) + if sys.platform == 'win32': + try: + # file already exists, we try to remove (in case previous + # execution was interrupted) + if os.path.exists(self.lockfile): + os.unlink(self.lockfile) + self.fd = os.open( + self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR) + except OSError: + type, e, tb = sys.exc_info() + if e.errno == 13: + logger.error( + "Another instance is already running, quitting.") + raise SingleInstanceException() + print(e.errno) + raise + else: # non Windows + import fcntl + self.fp = open(self.lockfile, 'w') + self.fp.flush() + try: + fcntl.lockf(self.fp, fcntl.LOCK_EX | fcntl.LOCK_NB) + except IOError: + logger.warning( + "Another instance is already running, quitting.") + raise SingleInstanceException() + self.initialized = True + + def __del__(self): + import os + import sys + if not self.initialized: + return + try: + if sys.platform == 'win32': + if hasattr(self, 'fd'): + os.close(self.fd) + os.unlink(self.lockfile) + else: + import fcntl + fcntl.lockf(self.fp, fcntl.LOCK_UN) + # os.close(self.fp) + if os.path.isfile(self.lockfile): + os.unlink(self.lockfile) + except Exception as e: + if logger: + logger.warning(e) + else: + print("Unloggable error: %s" % e) + sys.exit(-1) + + +def f(name): + tmp = logger.level + logger.setLevel(logging.CRITICAL) # we do not want to see the warning + try: + me2 = SingleInstance(flavor_id=name) # noqa + except SingleInstanceException: + sys.exit(-1) + logger.setLevel(tmp) + pass + + +class testSingleton(unittest.TestCase): + + def test_1(self): + me = SingleInstance(flavor_id="test-1") + del me # now the lock should be removed + assert True + + def test_2(self): + p = Process(target=f, args=("test-2",)) + p.start() + p.join() + # the called function should succeed + assert p.exitcode == 0, "%s != 0" % p.exitcode + + def test_3(self): + me = SingleInstance(flavor_id="test-3") # noqa -- me should still kept + p = Process(target=f, args=("test-3",)) + p.start() + p.join() + # the called function should fail because we already have another + # instance running + assert p.exitcode != 0, "%s != 0 (2nd execution)" % p.exitcode + # note, we return -1 but this translates to 255 meanwhile we'll + # consider that anything different from 0 is good + p = Process(target=f, args=("test-3",)) + p.start() + p.join() + # the called function should fail because we already have another + # instance running + assert p.exitcode != 0, "%s != 0 (3rd execution)" % p.exitcode + + def test_4(self): + lockfile = '/tmp/foo.lock' + me = SingleInstance(lockfile=lockfile) + assert me.lockfile == lockfile + + +logger = logging.getLogger("tendo.singleton") +logger.addHandler(logging.StreamHandler()) + +if __name__ == "__main__": + logger.setLevel(logging.DEBUG) + unittest.main() diff --git a/tasks.py b/tasks.py new file mode 100644 index 0000000..df2f533 --- /dev/null +++ b/tasks.py @@ -0,0 +1,169 @@ +from celery import Celery, bootsteps + +app = Celery('tasks', backend='redis://localhost', broker='redis://localhost') +app.conf.task_serializer = "pickle" +app.conf.result_serializer = "pickle" +app.conf.accept_content = {"pickle"} + +import yara +import logging +import traceback +import datetime +import configparser +import os +import hashlib +from analysis_result import AnalysisResult +from cbapi.response.models import Binary +from cbapi.response.rest_api import CbResponseAPI +import globals + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +g_config = dict() +g_yara_rules_dir = "" + + +def add_worker_arguments(parser): + parser.add_argument('--config-file', default='yara_worker.conf', help='Yara Worker Config') + parser.add_argument('--yara-rules-dir', default='yara_rules', help='Yara Rules Directory') + + +app.user_options['worker'].add(add_worker_arguments) + + +class MyBootstep(bootsteps.Step): + + def __init__(self, worker, config_file='yara_worker.conf', yara_rules_dir='yara_rules', **options): + super().__init__(self) + global g_config + global g_yara_rules_dir + g_config = configparser.ConfigParser() + g_config.read(config_file) + g_yara_rules_dir = yara_rules_dir + + +app.steps['worker'].add(MyBootstep) + + +def generate_rule_map(yara_rule_path): + global yara_rule_map_hash + + rule_map = {} + for fn in os.listdir(yara_rule_path): + if fn.lower().endswith(".yar"): + fullpath = os.path.join(yara_rule_path, fn) + if not os.path.isfile(fullpath): + continue + + last_dot = fn.rfind('.') + if last_dot != -1: + namespace = fn[:last_dot] + else: + namespace = fn + rule_map[namespace] = fullpath + + return rule_map + + +def generate_yara_rule_map_hash(yara_rule_path): + global g_yara_rule_map_hash_list + + md5 = hashlib.md5() + + temp_list = list() + + for fn in os.listdir(yara_rule_path): + with open(os.path.join(yara_rule_path, fn), 'rb') as fp: + data = fp.read() + md5.update(data) + temp_list.append(str(md5.hexdigest())) + + temp_list.sort() + return temp_list + + +@app.task +def update_yara_rules_remote(yara_rules): + try: + for key in yara_rules: + open(os.path.join(g_yara_rules_dir, key), 'wb').write(yara_rules[key]) + except: + logger.error(traceback.format_exc()) + + +@app.task +def analyze_binary(md5sum): + logger.debug("{}: in analyze_binary".format(md5sum)) + analysis_result = AnalysisResult(md5sum) + + try: + + analysis_result.last_scan_date = datetime.datetime.now() + + cb = CbResponseAPI(url=globals.g_cb_server_url, + token=globals.g_cb_server_token, + ssl_verify=False) + + binary_query = cb.select(Binary).where(f"md5:{md5sum}") + + if binary_query: + try: + binary_data = binary_query[0].file.read() + except: + analysis_result.binary_not_available = True + return analysis_result + + yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) + yara_rules = yara.compile(filepaths=yara_rule_map) + + try: + #matches = "debug" + matches = yara_rules.match(data=binary_data, timeout=30) + except yara.TimeoutError: + # + # yara timed out + # + analysis_result.last_error_msg = "Analysis timed out after 60 seconds" + analysis_result.stop_future_scans = True + except yara.Error: + # + # Yara errored while trying to scan binary + # + analysis_result.last_error_msg = "Yara exception" + except: + analysis_result.last_error_msg = traceback.format_exc() + else: + if matches: + score = getHighScore(matches) + analysis_result.score = score + analysis_result.short_result = "Matched yara rules: %s" % ', '.join([match.rule for match in matches]) + #analysis_result.short_result = "Matched yara rules: debug" + analysis_result.long_result = analysis_result.long_result + analysis_result.misc = generate_yara_rule_map_hash(globals.g_yara_rules_dir) + else: + analysis_result.score = 0 + + else: + analysis_result.binary_not_available = True + return analysis_result + except: + error = traceback.format_exc() + logger.error(traceback.format_exc()) + analysis_result.last_error_msg = error + return analysis_result + + +def getHighScore(matches): + ####### + # if str(matches) == "debug": + # return 100 + ####### + score = 0 + for match in matches: + if match.meta.get('score', 0) > score: + score = match.meta.get('score') + if score == 0: + return 100 + else: + return score diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index 6d75c54..0000000 --- a/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__author__ = 'jgarman' diff --git a/tests/data/binary_data/00af22b51f217dc4c536f6039577b28c b/tests/data/binary_data/00af22b51f217dc4c536f6039577b28c deleted file mode 100644 index d54dcfc..0000000 --- a/tests/data/binary_data/00af22b51f217dc4c536f6039577b28c +++ /dev/null @@ -1 +0,0 @@ -{"host_count": 1, "digsig_result": "Signed", "observed_filename": ["c:\\windows\\syswow64\\dxgi.dll"], "product_version": "6.3.9600.16421", "digsig_issuer": "Microsoft Windows Production PCA 2011", "digsig_sign_time": "2014-01-10T03:32:00Z", "is_executable_image": false, "orig_mod_len": 406400, "is_64bit": false, "digsig_subject": "Microsoft Windows", "digsig_publisher": "Microsoft Corporation", "group": ["Default Group"], "file_version": "6.3.9600.16421 (winblue_gdr.131004-2100)", "company_name": "Microsoft Corporation", "internal_name": "dxgi.dll", "product_name": "Microsoft\u00ae Windows\u00ae Operating System", "digsig_result_code": "0", "copied_mod_len": 406400, "server_added_timestamp": "2015-06-24T14:20:30.856Z", "digsig_prog_name": "Microsoft Windows", "watchlist_3": "2015-06-24T14:30:04.49Z", "md5": "00AF22B51F217DC4C536F6039577B28C", "endpoint": ["JASON-WIN81-VM|1"], "legal_copyright": "\u00a9 Microsoft Corporation. All rights reserved.", "original_filename": "dxgi.dll", "cb_version": 500, "os_type": "Windows", "file_desc": "DirectX Graphics Infrastructure", "last_seen": "2015-06-24T14:30:05.728Z"} \ No newline at end of file diff --git a/tests/data/binary_data/00af22b51f217dc4c536f6039577b28c.json b/tests/data/binary_data/00af22b51f217dc4c536f6039577b28c.json deleted file mode 100644 index d54dcfc..0000000 --- a/tests/data/binary_data/00af22b51f217dc4c536f6039577b28c.json +++ /dev/null @@ -1 +0,0 @@ -{"host_count": 1, "digsig_result": "Signed", "observed_filename": ["c:\\windows\\syswow64\\dxgi.dll"], "product_version": "6.3.9600.16421", "digsig_issuer": "Microsoft Windows Production PCA 2011", "digsig_sign_time": "2014-01-10T03:32:00Z", "is_executable_image": false, "orig_mod_len": 406400, "is_64bit": false, "digsig_subject": "Microsoft Windows", "digsig_publisher": "Microsoft Corporation", "group": ["Default Group"], "file_version": "6.3.9600.16421 (winblue_gdr.131004-2100)", "company_name": "Microsoft Corporation", "internal_name": "dxgi.dll", "product_name": "Microsoft\u00ae Windows\u00ae Operating System", "digsig_result_code": "0", "copied_mod_len": 406400, "server_added_timestamp": "2015-06-24T14:20:30.856Z", "digsig_prog_name": "Microsoft Windows", "watchlist_3": "2015-06-24T14:30:04.49Z", "md5": "00AF22B51F217DC4C536F6039577B28C", "endpoint": ["JASON-WIN81-VM|1"], "legal_copyright": "\u00a9 Microsoft Corporation. All rights reserved.", "original_filename": "dxgi.dll", "cb_version": 500, "os_type": "Windows", "file_desc": "DirectX Graphics Infrastructure", "last_seen": "2015-06-24T14:30:05.728Z"} \ No newline at end of file diff --git a/tests/data/binary_data/0ab74f7d94cdec551db81954f51cc95e b/tests/data/binary_data/0ab74f7d94cdec551db81954f51cc95e deleted file mode 100644 index d98de82..0000000 --- a/tests/data/binary_data/0ab74f7d94cdec551db81954f51cc95e +++ /dev/null @@ -1 +0,0 @@ -{"host_count": 1, "digsig_result": "Signed", "observed_filename": ["c:\\windows\\syswow64\\d2d1.dll"], "product_version": "6.2.9200.16384", "digsig_sign_time": "2012-07-26T09:09:00Z", "is_executable_image": false, "orig_mod_len": 3295232, "is_64bit": false, "digsig_publisher": "Microsoft Corporation", "group": ["Default Group"], "file_version": "6.2.9200.16384 (win8_rtm.120725-1247)", "company_name": "Microsoft Corporation", "internal_name": "d2d1", "product_name": "Microsoft\u00ae Windows\u00ae Operating System", "digsig_result_code": "0", "copied_mod_len": 3295232, "server_added_timestamp": "2015-07-10T20:24:35.322Z", "watchlist_3": "2015-07-10T20:30:04.499Z", "md5": "0AB74F7D94CDEC551DB81954F51CC95E", "endpoint": ["WIN-IA9NQ1GN8OI|4"], "legal_copyright": "\u00a9 Microsoft Corporation. All rights reserved.", "original_filename": "d2d1", "cb_version": 510, "os_type": "Windows", "file_desc": "Microsoft D2D Library", "last_seen": "2015-07-10T20:30:04.937Z"} \ No newline at end of file diff --git a/tests/data/binary_data/0ab74f7d94cdec551db81954f51cc95e.json b/tests/data/binary_data/0ab74f7d94cdec551db81954f51cc95e.json deleted file mode 100644 index d98de82..0000000 --- a/tests/data/binary_data/0ab74f7d94cdec551db81954f51cc95e.json +++ /dev/null @@ -1 +0,0 @@ -{"host_count": 1, "digsig_result": "Signed", "observed_filename": ["c:\\windows\\syswow64\\d2d1.dll"], "product_version": "6.2.9200.16384", "digsig_sign_time": "2012-07-26T09:09:00Z", "is_executable_image": false, "orig_mod_len": 3295232, "is_64bit": false, "digsig_publisher": "Microsoft Corporation", "group": ["Default Group"], "file_version": "6.2.9200.16384 (win8_rtm.120725-1247)", "company_name": "Microsoft Corporation", "internal_name": "d2d1", "product_name": "Microsoft\u00ae Windows\u00ae Operating System", "digsig_result_code": "0", "copied_mod_len": 3295232, "server_added_timestamp": "2015-07-10T20:24:35.322Z", "watchlist_3": "2015-07-10T20:30:04.499Z", "md5": "0AB74F7D94CDEC551DB81954F51CC95E", "endpoint": ["WIN-IA9NQ1GN8OI|4"], "legal_copyright": "\u00a9 Microsoft Corporation. All rights reserved.", "original_filename": "d2d1", "cb_version": 510, "os_type": "Windows", "file_desc": "Microsoft D2D Library", "last_seen": "2015-07-10T20:30:04.937Z"} \ No newline at end of file diff --git a/tests/data/binary_data/0b25fbbb6c94c1246381a527ed418f04 b/tests/data/binary_data/0b25fbbb6c94c1246381a527ed418f04 deleted file mode 100644 index 69f9bc3..0000000 --- a/tests/data/binary_data/0b25fbbb6c94c1246381a527ed418f04 +++ /dev/null @@ -1 +0,0 @@ -{"host_count": 1, "digsig_result": "Signed", "observed_filename": ["c:\\windows\\syswow64\\windowscodecs.dll"], "product_version": "6.2.9200.16384", "digsig_sign_time": "2012-07-26T09:09:00Z", "is_executable_image": false, "orig_mod_len": 1319424, "is_64bit": false, "digsig_publisher": "Microsoft Corporation", "group": ["Default Group"], "file_version": "6.2.9200.16384 (win8_rtm.120725-1247)", "company_name": "Microsoft Corporation", "internal_name": "WindowsCodecs", "product_name": "Microsoft\u00ae Windows\u00ae Operating System", "digsig_result_code": "0", "copied_mod_len": 1319424, "server_added_timestamp": "2015-07-10T20:24:35.326Z", "watchlist_3": "2015-07-10T20:30:04.499Z", "md5": "0B25FBBB6C94C1246381A527ED418F04", "endpoint": ["WIN-IA9NQ1GN8OI|4"], "legal_copyright": "\u00a9 Microsoft Corporation. All rights reserved.", "original_filename": "WindowsCodecs", "cb_version": 510, "os_type": "Windows", "file_desc": "Microsoft Windows Codecs Library", "last_seen": "2015-07-10T20:30:04.81Z"} \ No newline at end of file diff --git a/tests/data/binary_data/0b25fbbb6c94c1246381a527ed418f04.json b/tests/data/binary_data/0b25fbbb6c94c1246381a527ed418f04.json deleted file mode 100644 index 69f9bc3..0000000 --- a/tests/data/binary_data/0b25fbbb6c94c1246381a527ed418f04.json +++ /dev/null @@ -1 +0,0 @@ -{"host_count": 1, "digsig_result": "Signed", "observed_filename": ["c:\\windows\\syswow64\\windowscodecs.dll"], "product_version": "6.2.9200.16384", "digsig_sign_time": "2012-07-26T09:09:00Z", "is_executable_image": false, "orig_mod_len": 1319424, "is_64bit": false, "digsig_publisher": "Microsoft Corporation", "group": ["Default Group"], "file_version": "6.2.9200.16384 (win8_rtm.120725-1247)", "company_name": "Microsoft Corporation", "internal_name": "WindowsCodecs", "product_name": "Microsoft\u00ae Windows\u00ae Operating System", "digsig_result_code": "0", "copied_mod_len": 1319424, "server_added_timestamp": "2015-07-10T20:24:35.326Z", "watchlist_3": "2015-07-10T20:30:04.499Z", "md5": "0B25FBBB6C94C1246381A527ED418F04", "endpoint": ["WIN-IA9NQ1GN8OI|4"], "legal_copyright": "\u00a9 Microsoft Corporation. All rights reserved.", "original_filename": "WindowsCodecs", "cb_version": 510, "os_type": "Windows", "file_desc": "Microsoft Windows Codecs Library", "last_seen": "2015-07-10T20:30:04.81Z"} \ No newline at end of file diff --git a/tests/data/binary_data/0c0f5d1428d00def0fe16eba9edaa4d5 b/tests/data/binary_data/0c0f5d1428d00def0fe16eba9edaa4d5 deleted file mode 100644 index 6802f4d..0000000 --- a/tests/data/binary_data/0c0f5d1428d00def0fe16eba9edaa4d5 +++ /dev/null @@ -1 +0,0 @@ -{"host_count": 1, "digsig_result": "Signed", "observed_filename": ["c:\\program files (x86)\\google\\chrome\\application\\43.0.2357.130\\chrome.dll"], "product_version": "43.0.2357.130", "digsig_issuer": "VeriSign Class 3 Code Signing 2010 CA", "digsig_sign_time": "2015-06-20T09:46:00Z", "is_executable_image": false, "orig_mod_len": 31533896, "is_64bit": false, "digsig_subject": "Google Inc", "digsig_publisher": "Google Inc", "group": ["Default Group"], "file_version": "43.0.2357.130", "company_name": "Google Inc.", "internal_name": "chrome_dll", "md5": "0C0F5D1428D00DEF0FE16EBA9EDAA4D5", "product_name": "Google Chrome", "digsig_result_code": "0", "copied_mod_len": 26214400, "server_added_timestamp": "2015-06-25T04:13:18.783Z", "watchlist_3": "2015-06-25T04:20:02.951Z", "icon": "aVZCT1J3MEtHZ29BQUFBTlNVaEVVZ0FBQURBQUFBQXdDQVlBQUFCWEF2bUhBQUFBQVhOU1IwSUFyczRjNlFBQUFBUm5RVTFCQUFDeGp3djhZUVVBQUFBSmNFaFpjd0FBRHNNQUFBN0RBY2R2cUdRQUFBMjJTVVJCVkdoRDdaZDVWTlRsR3NmNzQ5N2Jkc3UwWE1KazJFVmtCNUY5bTJGWU5WUklTYXhjNkhyTjl0STJjaWx0TDgxU0swMVRVU0FURUN4UlUzWVVCRVZSQkdVZDloMEV5YnJuZk8venZETURnOHdBNWgrZGU4NTl6L21lK2MzdjkvN2U5L3Q1bnVkOTM1bTc4RC9lL2cvd1Y3YzdCcmpycnJ0UUhPYUhpb1hCcUYwY2l2cWxjOUVjTlcrSW1wYk9RZTFUczFDeklCRFg1c3B3SWNSTHZIdW43VStQd0pPWHp2SkJUWmdjRFJGQmFGd1VncFlsYzlENmJCZzZWeXhBNTNNUlN0RjF4L0w1YUk4S1EvTlRzOUg0WkREcTZaMjZVQ21xZ3IxUUluTzlJNURiZnBNbnUrenJqT29nVDlUTjlrVmp1RDlhRjg1QzIxT1BvMjN4SExRdm5ZZjJaV0ZrT0Z5WWJsOUczNWZNUmRzem9XaGROQnV0VHhMb0U0Rm9udWVQK2hBS2dKOGJLbjJjY2RIRjRVK0JqUG9OSGp6WHlSWlhQWjFRUTFHckQvSkc4eHcvTklmNW8yVitJRm9pZ2dVSW14UXdLZ25Ua1hTZmpTOElRa3Q0QUpybnl0SDB1QXlOd1Q1b0RQQkNyYThycXR3Y1VlcGtoMXhyaTlzQ0dWVlBIdkNzdlJYS1hSMVE1ZUdFQnJrbkdnTzkwUmppSzR3SUVJb29tK1BvdHN3bm8yeVdQeGt1bkNKT29NSjRxQXhOc3loekZJQUdmMC9VeTl4UjcrT0tPazluVk15d3hUVTdhK1JQbXpwcWlCRjc4VUFGbHROUTRXQ0RhbWNIMUhtNW9ON1hEUTAwTVJ0UWd2aVFLYW1BWVlNQ1NLMVFQK1g5MlZJQnpNWTU2ZzErSG1LY09tOFgxSG5NUkszckROVE9kRUNsclJXdVdWZ2d6OVIwVkJERDl1QUI4czNNVUc0MUhWWDJ0cWgxZGtTdG14Tk42Q3dtWmdNY3dRYTVCeG9aaG93SmcwRlVHdjJpNzRGMDM1OU1VK1lZdk44NFJaM0g0M0VWVGc1UU9OaWh4dFlHRlFSdzFkUU1lUkxERVNGMFB1VVhzdzJNY05Wc0tpb3RwNlBHemhZS1J6dlUwa1MxTGhRdEFUSlRtS2p6cGpWQlpTQ0FwR1NRb2Rnb2k3L3pmUzRUbFdrUmNUYk80N0J4R3BmSHI3R3hScldsRmFvdExGRnVNaFZsRW1OazZ1a1BDNkgxQ2Ird2UveEVsQmdZbzl4MEtxcW5XNkxheWtwRXA0WXl3Uk55eEdwblVrYlloQ3VaY1NNWWR4Sm5oeVdNcWtUM2hXSHV4LzNwUGNVTWUyWEVoWEVxVHhwZnpHTk8yVGFiaGlvVGMxelZOMGJKWTRhSW02Q25FMElud0pKM0hYR082Q3ROYWJDcEZxaWVObjBBaENJbEptWVlNaUhNTUpBYTZsWnhsTG1QbzhvMHZjZkJxTGFtaUt1TjAvZzhUeFhOVjJsc2prb2p5anlwbEFDS0podU1Ib0E3ZXUxeWh6elJGNTh2c1VjeHBaRUg1SUVGQ0VXSVV5eFN6Wk9UQ1k2Z3lJNGRTd2tteE5jc2ZzYmlFbUhUL0s3S2RMVTVtZWFJbTVKcERlT2FZb2pNUjdXWGtsWUEyVTgrQ0R3aVIxaXNET25XWmdLaWdnZGpFRXF0bUZEQXFESmpvWUppVTVhV3FIR21CUjRaaWVhMWE0VDRXaEZLNTBmd05KSTVhb0tvTEFQTlNLWWtFMVFIc0l4Um8xYWdFUlNzSUtXcS9DVW9EZEJlUm9QdWNBZVBIUlQ5SkNsbUhRMFVpbjU5Smk3b0d3MUFxR1dzQWNUWklhaTZpSVhvK1BWWDlIYTI0MFpuQi9xNk90SFh6ZXBDMy9WdWROV2NSRlBtWE5RZjBVUER6NVBRZUhRaW1vOU5RTXZ4OFdqOWRUemFUajZDOXJTSDBaRStEcDJaWTlHZE5SYlhjeDVDeitreDZNeStHNlVwZng4Q01RUkFGay9SVDVIM0E0U2srQ1BSYTVwMkNKV3FiQnpSbXBTTW51czl1RmJmZ2YwNWRmanlSQTNXSHE3RXV1UktiRG1wUUVKQkl4cmFlOURYMTRkdVJTSmEwNHpRZGtwbE9HTWN1Z1laZmhCOStRL2dadUg5K09QOGZmalBoWHR4ODl3LzBKWTlEQUEvc0ZsbERYbUNGQ0cvS00ycnRmSUROd0dnRFlMTnQrWGxvN210Q3pIWkNxemNXNEtYWTh1dzZzZXJlUE9uY3J4NXFCeXJEMTdEcTNGbGVDSG1DZzdrMXFHdHF4YzlyZm5vUGkwaHcyTzBHdGFtM3Z5L1kvOUhmeHNFTVFqQTR4czNKWUNHZWFHZkE3QWpaRHFLcGlnaFNneE0rZ0dhRTVMUTJOS0J0K05Lc0d6SFJTemZmVWxBdkxpL0ZDOGZLTVVyQlBNU2ZUNi83d3IrL2NObFJIMWZqT2lEcFdqdDdFRnY0MEg4VWFUYjhLMWlnSktrd1ZrWUJPRHpneWNDa3Z5R0FwQ2UvTW9EV1VhR09EL0ZzQjlDc1RnSzdSMmQySmg0QlF1L1BvZW52eW5DVW9KNGRsZXhNTHRpajFKOC9hOWRsN0JzNTBVcy92WUNGbTA3ajUybnF0RFQyNHZmeTJSYXpXcFRYOEhkcUR0K3QyNEEzMzFlQ0R3c0gxSkNhbjBRYm9rOE9oc0tIek1RMmFoUFBZYnN5L1dZODBVK252aXlBRThTeEZOa2prMHUrZTZDZ09HczhDZmZlM3A3RVNLM25zT0NMWVdZdCtrczhxODJvN2NsVmF2Wm9ib1B2eFhlZzZaVDkrZ0drTVg3MHFLbGhhc0RJQ2hlaWhSVENVNFR4UGtaTG1ocGJjV0dRNWNSL01rWmhINmVqN0ROWnpGL0M0TVVDcU9MdHA0WDRtdk9FQnNQSjFBR252VnBIajVLS2tWMzkzWDhjZGxNaStGYmRSOStQLzlQWE04ZEFTQjRHQURXeXVVMlNKODRHWVd6UXRIYzBvTDVYK1pCL2tFdUFqOCtJMHd4eUZ5S0xzT0VieTRRaHNQcG1pUE94bWQvbG9jZ0F2Yi84RFFpdHB3bGdHN2NMSk5xTVR4WXZGWVlvRHZuM21FQTRnZ2dtUUIrSm1reHp3cE84TVBPNlZOd2NjMDZORFUxdzNWZEpyemV5NFowWXc3a0h4TElSNmRGUmtJSWhzVlEvTW4zR0pLTnl6Ym13dWY5YkxpdnowSlhWeGQ2cTkvUmFscFRBdURjQStqS3ZrODNnUFFBQVJ3T0dCYUF0ZkF0UnhSR3IwRkRZeU9jMXFURGVXMG0zTWdNZy9odXlDR0RPZkNqckdpSzcvRXpiekx1UVgxZENOeHBiUVk2T2p2Uld6VWFnUHZ4ZStFRGRNRGRQd3hBakErQ2tnamd5UEJsRkpMc2o4K2VuNGU2K2daNHZKY0d4M2ZUQk1oTU1zVEczTlpsaWVpeVViVVlrSjh4ck5PYURNeWdkNEkreVVKYmV3ZCtLL1hWYWxvdGRmbmNMQmhEcC9RRHVnRjg5eEpBUWtEL09oZzJDekh6b0tpdFE5UzNPYkIrOHhqczNqNEJoK2lUQW1ZR3diRFlxRkxLNzJ6YUlmb1U3Tjc1RlRadkhjZnIrd3JFUnZEN0pWT3R4dFVTMGFmeTZjdC9pRTd2TWJvQnZMN3hoRHplVDFsR0kyU0JsVldjaGVUY0s1ajZXaklzVnYyTTZhdVB3b3BnMkp3dEFiRVlqRC81SG9OYXZwRktmWCtCK1dzcFNDMm9RRnQ5bWxiVEExSkgvMEYwWjQ5QlZmSkR1Z0hjTnJuQ2I3OFVRWW1qeThMN3VldFJYYVBBbkUrT3dlakZRekI1T1JGbXJ4NFdRT2F2cDJEYTYwZUV6RmxrZUNvOU0zMGxDY1l2SmVDWnI5UFFTSnZBamRMSHRaZ2VrRHI2djFIME85UEhvamgyR0FDSGFBYzZqZW5IM0U4RU1Nb3NITHQ4SE1XbEZaQ3VTWUpreFFFWXJJeUR3Zk0vd3ZBRjFzRis4VDJEbGZHUVBCY0wyYnBrbEZYV29yVTZVYXRwdFFacS8wSDBuaDZIbGhOamNYekxNQUNTSUFtOGQzb2pJTjVmdVJaRzJGSlpUeHliaTh5U1RGeThVbzdaNnhNd1pla3U2RWY5QU1tenJEMERvbnY2eTNZajlQMGtYQ212d2VXeWRQUVVUZEpxbk1YbS96aXYzSG42OHVoWGF2WjROQjE5QkM4dEdHWVhZcmx2ZG9kc3J3eUJCd2xDWFVxamdFZ3RUa1ZGUlNWMi9aS0hlUnNUTUc3UmRqeXNra0hVVG9SOWtJaUVqQ0xVS0dxUlduUUNaOUpNdEJwWGlzeHJsRTVQRHYzc3B2OExwZkhqKzMycTJ4QUE2eGR0NFAwZFpTSFdYMWxLdEsyT1pqMncxdWVzUS9ybERKUVRTRVZsbFZCbFZYVy9NaTVsNFoyamEvQkdyQzF1bkx0ZmkvRmJ6SjhkZzk3Y2g5R1ZNUkVOUjhZajdyMkhkUU53VXo5MC84SUQwbDFTQk1SUkZnNXBRSXlRQ2JXZVBoR0p0ekpYNCt2OExhU3ZzUHJrS2tRa1JrQVc0d2YvM2I0b3loaXZ4YmlxYkRUTlU5MTNaMDVFUytva2xNVk9HbUtlbTFhQTZTdXM0TG5WQy9LOThrRVE0b2NlUTR5d3NEVVZRdjhsQWhQOGFYdVdDNEN2RHB2ck5NOExsbXVleTRZajM1MDVnZjZpNmtHUk9Bbjdvb2VXRDdmQjM2aXBPemx0Y0lIbnRnR0l3RnN6TVlxU1lnV25CQ0NBQWVMa1dIREFBdzE1RHc0eHJveTZjcmZweTZPYVY1V04ybnpCRHVVZitsdk5jOU1KTU5GTkR5NmZlY0I3dTQ4U2d0ZkV3UURsd3FZdDlsWVFYVEJCaC8wUmNFZ0prSEpDb2pMT3BsWEcxVkdua3JseFpxeFlzRjNwRTlGK1lqTHFEazlHV2N4a1JNcVZQeDlHQmNCTjNka2swaHd1dEI1OHR2dEN2a2NPLy8wRThhTXlHd0lrV1FORURhTUJ4TmVCU1VxQU41SWNjWjJNOXB1bWlMUHhtMlNjdDBsUk1sa1QwSm4yS0ZxUFBZYmFwQ200ZG1BS05xM1VYanJxTml3QXkzU3hCWndwRTE1YmZlRzNpeUQycWJLaENjSVpvVE5Ed1BEaHB3SUtJa0F1bjlCRE1oUmxUaEtMVTVpbVV1RTZGeEVuNDlmSmVGZjZKTm9xSjZQNXFENFVDZnE0R3FPUGJTOVBHT1JGVzlOK2w1cm1peVpQVzhEcEV3KzRiL0dHYkljZjVEOFFTQXhGVmcxQzJ5MGZmQUtHMXdrREhhYm9VK1FENkR6WmZNUkcvQkRqU044NE0wNUVtdzhtWHFTZGFXeGNqM2FhS1doSU1VQmx2QVJYOWhoZzY0c1RCM25RMVhRL29hWTVnQ1RjRFBZYjNFUTJmTGJMbENDN2xSbmgwbUlZUHNINUFHUWdsbitzSEJIeFBxaE8wMU1hcGtoMzArSmsweDBuOWRCMm5DT3VORjZiWUlTeWZSSVVmV2VBRDVjT2xBMXJ1RGI4VTJxYUE0Mnptd0NyTjV6ZytLRTdYRGQ1dzJlYkROSnYvZUMzazBRd3ZFN2tERVRaa2NmUXRrbVoycDlvS2N4eWxOa3cxemVYU2VNUkNlcVRES0U0Wkl4cis0MXc2WHREcEgycWoxRFhnUVhMR3FtTjNJT2E1b0FzZ3doeldFYzd3MkdqTysxVVh2RGNJb1gzVmhsOEtUTU14Tm1SZnVlSHFPL2RvVWcyRUJHdXAwODJ6Skd1K2NrRVZYR21LTnRyak9JZGhqaTkyUkJmTEI5Y01xelJ0Tkgxb25icjRDekpFK2IwSDhBSnR1dmRZTC9SRFU0ZmUxS0plY0hqU3luY04vbmk2QjVMVkpMUmlsZ3psQjh3UTlrZVk1VHNNc2JGYncxeGZwc1IwajQyd0dkUkF5ZXNwa2JiUnQ5VDFiUk45cERWZUJoRVdzRDhCWHRZRUpEMVdsY3MyMmlQd3ExR09NZjYyZ2lGWHhuajFFZUdTRjRyd2RibjlCRG9PTGhVMUxyZGR2dHZxSnEyeWU5RWY3YjkrVGRWVFp1WjI5R2R0anNmNFM5dHdIOEJWMDkraW83Umh2NEFBQUFBU1VWT1JLNUNZSUk9", "endpoint": ["JASON-WIN81-VM|1"], "legal_copyright": "Copyright 2012 Google Inc. All rights reserved.", "original_filename": "chrome.dll", "cb_version": 500, "os_type": "Windows", "file_desc": "Google Chrome", "last_seen": "2015-06-25T04:20:03.351Z"} \ No newline at end of file diff --git a/tests/data/binary_data/0c0f5d1428d00def0fe16eba9edaa4d5.json b/tests/data/binary_data/0c0f5d1428d00def0fe16eba9edaa4d5.json deleted file mode 100644 index 6802f4d..0000000 --- a/tests/data/binary_data/0c0f5d1428d00def0fe16eba9edaa4d5.json +++ /dev/null @@ -1 +0,0 @@ -{"host_count": 1, "digsig_result": "Signed", "observed_filename": ["c:\\program files (x86)\\google\\chrome\\application\\43.0.2357.130\\chrome.dll"], "product_version": "43.0.2357.130", "digsig_issuer": "VeriSign Class 3 Code Signing 2010 CA", "digsig_sign_time": "2015-06-20T09:46:00Z", "is_executable_image": false, "orig_mod_len": 31533896, "is_64bit": false, "digsig_subject": "Google Inc", "digsig_publisher": "Google Inc", "group": ["Default Group"], "file_version": "43.0.2357.130", "company_name": "Google Inc.", "internal_name": "chrome_dll", "md5": "0C0F5D1428D00DEF0FE16EBA9EDAA4D5", "product_name": "Google Chrome", "digsig_result_code": "0", "copied_mod_len": 26214400, "server_added_timestamp": "2015-06-25T04:13:18.783Z", "watchlist_3": "2015-06-25T04:20:02.951Z", "icon": "aVZCT1J3MEtHZ29BQUFBTlNVaEVVZ0FBQURBQUFBQXdDQVlBQUFCWEF2bUhBQUFBQVhOU1IwSUFyczRjNlFBQUFBUm5RVTFCQUFDeGp3djhZUVVBQUFBSmNFaFpjd0FBRHNNQUFBN0RBY2R2cUdRQUFBMjJTVVJCVkdoRDdaZDVWTlRsR3NmNzQ5N2Jkc3UwWE1KazJFVmtCNUY5bTJGWU5WUklTYXhjNkhyTjl0STJjaWx0TDgxU0swMVRVU0FURUN4UlUzWVVCRVZSQkdVZDloMEV5YnJuZk8venZETURnOHdBNWgrZGU4NTl6L21lK2MzdjkvN2U5L3Q1bnVkOTM1bTc4RC9lL2cvd1Y3YzdCcmpycnJ0UUhPYUhpb1hCcUYwY2l2cWxjOUVjTlcrSW1wYk9RZTFUczFDeklCRFg1c3B3SWNSTHZIdW43VStQd0pPWHp2SkJUWmdjRFJGQmFGd1VncFlsYzlENmJCZzZWeXhBNTNNUlN0RjF4L0w1YUk4S1EvTlRzOUg0WkREcTZaMjZVQ21xZ3IxUUluTzlJNURiZnBNbnUrenJqT29nVDlUTjlrVmp1RDlhRjg1QzIxT1BvMjN4SExRdm5ZZjJaV0ZrT0Z5WWJsOUczNWZNUmRzem9XaGROQnV0VHhMb0U0Rm9udWVQK2hBS2dKOGJLbjJjY2RIRjRVK0JqUG9OSGp6WHlSWlhQWjFRUTFHckQvSkc4eHcvTklmNW8yVitJRm9pZ2dVSW14UXdLZ25Ua1hTZmpTOElRa3Q0QUpybnl0SDB1QXlOd1Q1b0RQQkNyYThycXR3Y1VlcGtoMXhyaTlzQ0dWVlBIdkNzdlJYS1hSMVE1ZUdFQnJrbkdnTzkwUmppSzR3SUVJb29tK1BvdHN3bm8yeVdQeGt1bkNKT29NSjRxQXhOc3loekZJQUdmMC9VeTl4UjcrT0tPazluVk15d3hUVTdhK1JQbXpwcWlCRjc4VUFGbHROUTRXQ0RhbWNIMUhtNW9ON1hEUTAwTVJ0UWd2aVFLYW1BWVlNQ1NLMVFQK1g5MlZJQnpNWTU2ZzErSG1LY09tOFgxSG5NUkszckROVE9kRUNsclJXdVdWZ2d6OVIwVkJERDl1QUI4czNNVUc0MUhWWDJ0cWgxZGtTdG14Tk42Q3dtWmdNY3dRYTVCeG9aaG93SmcwRlVHdjJpNzRGMDM1OU1VK1lZdk44NFJaM0g0M0VWVGc1UU9OaWh4dFlHRlFSdzFkUU1lUkxERVNGMFB1VVhzdzJNY05Wc0tpb3RwNlBHemhZS1J6dlUwa1MxTGhRdEFUSlRtS2p6cGpWQlpTQ0FwR1NRb2Rnb2k3L3pmUzRUbFdrUmNUYk80N0J4R3BmSHI3R3hScldsRmFvdExGRnVNaFZsRW1OazZ1a1BDNkgxQ2Ird2UveEVsQmdZbzl4MEtxcW5XNkxheWtwRXA0WXl3Uk55eEdwblVrYlloQ3VaY1NNWWR4Sm5oeVdNcWtUM2hXSHV4LzNwUGNVTWUyWEVoWEVxVHhwZnpHTk8yVGFiaGlvVGMxelZOMGJKWTRhSW02Q25FMElud0pKM0hYR082Q3ROYWJDcEZxaWVObjBBaENJbEptWVlNaUhNTUpBYTZsWnhsTG1QbzhvMHZjZkJxTGFtaUt1TjAvZzhUeFhOVjJsc2prb2p5anlwbEFDS0podU1Ib0E3ZXUxeWh6elJGNTh2c1VjeHBaRUg1SUVGQ0VXSVV5eFN6Wk9UQ1k2Z3lJNGRTd2tteE5jc2ZzYmlFbUhUL0s3S2RMVTVtZWFJbTVKcERlT2FZb2pNUjdXWGtsWUEyVTgrQ0R3aVIxaXNET25XWmdLaWdnZGpFRXF0bUZEQXFESmpvWUppVTVhV3FIR21CUjRaaWVhMWE0VDRXaEZLNTBmd05KSTVhb0tvTEFQTlNLWWtFMVFIc0l4Um8xYWdFUlNzSUtXcS9DVW9EZEJlUm9QdWNBZVBIUlQ5SkNsbUhRMFVpbjU5Smk3b0d3MUFxR1dzQWNUWklhaTZpSVhvK1BWWDlIYTI0MFpuQi9xNk90SFh6ZXBDMy9WdWROV2NSRlBtWE5RZjBVUER6NVBRZUhRaW1vOU5RTXZ4OFdqOWRUemFUajZDOXJTSDBaRStEcDJaWTlHZE5SYlhjeDVDeitreDZNeStHNlVwZng4Q01RUkFGay9SVDVIM0E0U2srQ1BSYTVwMkNKV3FiQnpSbXBTTW51czl1RmJmZ2YwNWRmanlSQTNXSHE3RXV1UktiRG1wUUVKQkl4cmFlOURYMTRkdVJTSmEwNHpRZGtwbE9HTWN1Z1laZmhCOStRL2dadUg5K09QOGZmalBoWHR4ODl3LzBKWTlEQUEvc0ZsbERYbUNGQ0cvS00ycnRmSUROd0dnRFlMTnQrWGxvN210Q3pIWkNxemNXNEtYWTh1dzZzZXJlUE9uY3J4NXFCeXJEMTdEcTNGbGVDSG1DZzdrMXFHdHF4YzlyZm5vUGkwaHcyTzBHdGFtM3Z5L1kvOUhmeHNFTVFqQTR4czNKWUNHZWFHZkE3QWpaRHFLcGlnaFNneE0rZ0dhRTVMUTJOS0J0K05Lc0d6SFJTemZmVWxBdkxpL0ZDOGZLTVVyQlBNU2ZUNi83d3IrL2NObFJIMWZqT2lEcFdqdDdFRnY0MEg4VWFUYjhLMWlnSktrd1ZrWUJPRHpneWNDa3Z5R0FwQ2UvTW9EV1VhR09EL0ZzQjlDc1RnSzdSMmQySmg0QlF1L1BvZW52eW5DVW9KNGRsZXhNTHRpajFKOC9hOWRsN0JzNTBVcy92WUNGbTA3ajUybnF0RFQyNHZmeTJSYXpXcFRYOEhkcUR0K3QyNEEzMzFlQ0R3c0gxSkNhbjBRYm9rOE9oc0tIek1RMmFoUFBZYnN5L1dZODBVK252aXlBRThTeEZOa2prMHUrZTZDZ09HczhDZmZlM3A3RVNLM25zT0NMWVdZdCtrczhxODJvN2NsVmF2Wm9ib1B2eFhlZzZaVDkrZ0drTVg3MHFLbGhhc0RJQ2hlaWhSVENVNFR4UGtaTG1ocGJjV0dRNWNSL01rWmhINmVqN0ROWnpGL0M0TVVDcU9MdHA0WDRtdk9FQnNQSjFBR252VnBIajVLS2tWMzkzWDhjZGxNaStGYmRSOStQLzlQWE04ZEFTQjRHQURXeXVVMlNKODRHWVd6UXRIYzBvTDVYK1pCL2tFdUFqOCtJMHd4eUZ5S0xzT0VieTRRaHNQcG1pUE94bWQvbG9jZ0F2Yi84RFFpdHB3bGdHN2NMSk5xTVR4WXZGWVlvRHZuM21FQTRnZ2dtUUIrSm1reHp3cE84TVBPNlZOd2NjMDZORFUxdzNWZEpyemV5NFowWXc3a0h4TElSNmRGUmtJSWhzVlEvTW4zR0pLTnl6Ym13dWY5YkxpdnowSlhWeGQ2cTkvUmFscFRBdURjQStqS3ZrODNnUFFBQVJ3T0dCYUF0ZkF0UnhSR3IwRkRZeU9jMXFURGVXMG0zTWdNZy9odXlDR0RPZkNqckdpSzcvRXpiekx1UVgxZENOeHBiUVk2T2p2Uld6VWFnUHZ4ZStFRGRNRGRQd3hBakErQ2tnamd5UEJsRkpMc2o4K2VuNGU2K2daNHZKY0d4M2ZUQk1oTU1zVEczTlpsaWVpeVViVVlrSjh4ck5PYURNeWdkNEkreVVKYmV3ZCtLL1hWYWxvdGRmbmNMQmhEcC9RRHVnRjg5eEpBUWtEL09oZzJDekh6b0tpdFE5UzNPYkIrOHhqczNqNEJoK2lUQW1ZR3diRFlxRkxLNzJ6YUlmb1U3Tjc1RlRadkhjZnIrd3JFUnZEN0pWT3R4dFVTMGFmeTZjdC9pRTd2TWJvQnZMN3hoRHplVDFsR0kyU0JsVldjaGVUY0s1ajZXaklzVnYyTTZhdVB3b3BnMkp3dEFiRVlqRC81SG9OYXZwRktmWCtCK1dzcFNDMm9RRnQ5bWxiVEExSkgvMEYwWjQ5QlZmSkR1Z0hjTnJuQ2I3OFVRWW1qeThMN3VldFJYYVBBbkUrT3dlakZRekI1T1JGbXJ4NFdRT2F2cDJEYTYwZUV6RmxrZUNvOU0zMGxDY1l2SmVDWnI5UFFTSnZBamRMSHRaZ2VrRHI2djFIME85UEhvamgyR0FDSGFBYzZqZW5IM0U4RU1Nb3NITHQ4SE1XbEZaQ3VTWUpreFFFWXJJeUR3Zk0vd3ZBRjFzRis4VDJEbGZHUVBCY0wyYnBrbEZYV29yVTZVYXRwdFFacS8wSDBuaDZIbGhOamNYekxNQUNTSUFtOGQzb2pJTjVmdVJaRzJGSlpUeHliaTh5U1RGeThVbzdaNnhNd1pla3U2RWY5QU1tenJEMERvbnY2eTNZajlQMGtYQ212d2VXeWRQUVVUZEpxbk1YbS96aXYzSG42OHVoWGF2WjROQjE5QkM4dEdHWVhZcmx2ZG9kc3J3eUJCd2xDWFVxamdFZ3RUa1ZGUlNWMi9aS0hlUnNUTUc3UmRqeXNra0hVVG9SOWtJaUVqQ0xVS0dxUlduUUNaOUpNdEJwWGlzeHJsRTVQRHYzc3B2OExwZkhqKzMycTJ4QUE2eGR0NFAwZFpTSFdYMWxLdEsyT1pqMncxdWVzUS9ybERKUVRTRVZsbFZCbFZYVy9NaTVsNFoyamEvQkdyQzF1bkx0ZmkvRmJ6SjhkZzk3Y2g5R1ZNUkVOUjhZajdyMkhkUU53VXo5MC84SUQwbDFTQk1SUkZnNXBRSXlRQ2JXZVBoR0p0ekpYNCt2OExhU3ZzUHJrS2tRa1JrQVc0d2YvM2I0b3loaXZ4YmlxYkRUTlU5MTNaMDVFUytva2xNVk9HbUtlbTFhQTZTdXM0TG5WQy9LOThrRVE0b2NlUTR5d3NEVVZRdjhsQWhQOGFYdVdDNEN2RHB2ck5NOExsbXVleTRZajM1MDVnZjZpNmtHUk9Bbjdvb2VXRDdmQjM2aXBPemx0Y0lIbnRnR0l3RnN6TVlxU1lnV25CQ0NBQWVMa1dIREFBdzE1RHc0eHJveTZjcmZweTZPYVY1V04ybnpCRHVVZitsdk5jOU1KTU5GTkR5NmZlY0I3dTQ4U2d0ZkV3UURsd3FZdDlsWVFYVEJCaC8wUmNFZ0prSEpDb2pMT3BsWEcxVkdua3JseFpxeFlzRjNwRTlGK1lqTHFEazlHV2N4a1JNcVZQeDlHQmNCTjNka2swaHd1dEI1OHR2dEN2a2NPLy8wRThhTXlHd0lrV1FORURhTUJ4TmVCU1VxQU41SWNjWjJNOXB1bWlMUHhtMlNjdDBsUk1sa1QwSm4yS0ZxUFBZYmFwQ200ZG1BS05xM1VYanJxTml3QXkzU3hCWndwRTE1YmZlRzNpeUQycWJLaENjSVpvVE5Ed1BEaHB3SUtJa0F1bjlCRE1oUmxUaEtMVTVpbVV1RTZGeEVuNDlmSmVGZjZKTm9xSjZQNXFENFVDZnE0R3FPUGJTOVBHT1JGVzlOK2w1cm1peVpQVzhEcEV3KzRiL0dHYkljZjVEOFFTQXhGVmcxQzJ5MGZmQUtHMXdrREhhYm9VK1FENkR6WmZNUkcvQkRqU044NE0wNUVtdzhtWHFTZGFXeGNqM2FhS1doSU1VQmx2QVJYOWhoZzY0c1RCM25RMVhRL29hWTVnQ1RjRFBZYjNFUTJmTGJMbENDN2xSbmgwbUlZUHNINUFHUWdsbitzSEJIeFBxaE8wMU1hcGtoMzArSmsweDBuOWRCMm5DT3VORjZiWUlTeWZSSVVmV2VBRDVjT2xBMXJ1RGI4VTJxYUE0Mnptd0NyTjV6ZytLRTdYRGQ1dzJlYkROSnYvZUMzazBRd3ZFN2tERVRaa2NmUXRrbVoycDlvS2N4eWxOa3cxemVYU2VNUkNlcVRES0U0Wkl4cis0MXc2WHREcEgycWoxRFhnUVhMR3FtTjNJT2E1b0FzZ3doeldFYzd3MkdqTysxVVh2RGNJb1gzVmhsOEtUTU14Tm1SZnVlSHFPL2RvVWcyRUJHdXAwODJ6Skd1K2NrRVZYR21LTnRyak9JZGhqaTkyUkJmTEI5Y01xelJ0Tkgxb25icjRDekpFK2IwSDhBSnR1dmRZTC9SRFU0ZmUxS0plY0hqU3luY04vbmk2QjVMVkpMUmlsZ3psQjh3UTlrZVk1VHNNc2JGYncxeGZwc1IwajQyd0dkUkF5ZXNwa2JiUnQ5VDFiUk45cERWZUJoRVdzRDhCWHRZRUpEMVdsY3MyMmlQd3ExR09NZjYyZ2lGWHhuajFFZUdTRjRyd2RibjlCRG9PTGhVMUxyZGR2dHZxSnEyeWU5RWY3YjkrVGRWVFp1WjI5R2R0anNmNFM5dHdIOEJWMDkraW83Umh2NEFBQUFBU1VWT1JLNUNZSUk9", "endpoint": ["JASON-WIN81-VM|1"], "legal_copyright": "Copyright 2012 Google Inc. All rights reserved.", "original_filename": "chrome.dll", "cb_version": 500, "os_type": "Windows", "file_desc": "Google Chrome", "last_seen": "2015-06-25T04:20:03.351Z"} \ No newline at end of file diff --git a/tests/data/daemon.conf b/tests/data/daemon.conf deleted file mode 100644 index 8f2c867..0000000 --- a/tests/data/daemon.conf +++ /dev/null @@ -1,4 +0,0 @@ -[bridge] -carbonblack_server_url=http://localhost:7982 -carbonblack_server_token=super_sekret -yara_rule_directory=/tmp \ No newline at end of file diff --git a/tests/data/yara_rules/rules.yar b/tests/data/yara_rules/rules.yar deleted file mode 100644 index 45f74f9..0000000 --- a/tests/data/yara_rules/rules.yar +++ /dev/null @@ -1,15 +0,0 @@ -rule test -{ - meta: - author = "Bit9 + Carbon Black " - date = "2015/08" - filetype = "exe" - testing = "yep" - - strings: - $a = "win8_rtm.120725-1247" - - condition: - all of them -} - diff --git a/tests/test_yara.py b/tests/test_yara.py deleted file mode 100644 index 58ad0a0..0000000 --- a/tests/test_yara.py +++ /dev/null @@ -1,76 +0,0 @@ -__author__ = 'jgarman' - -import unittest -from cbint.utils.detonation import DetonationDaemon, CbAPIProducerThread -from cbint.utils.detonation.binary_analysis import DeepAnalysisThread -from cbopensource.connectors.yara.bridge import YaraConnector, YaraProvider -import os -import sys -import tempfile -from time import sleep -import multiprocessing -import socket -import threading - - -sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from utils.mock_server import get_mocked_server - -test_dir = os.path.dirname(os.path.abspath(__file__)) - - -class ServerNeverWokeUpError(Exception): - pass - - -def sleep_till_available(conn_tuple): - num_retries = 5 - while num_retries: - s = socket.socket() - try: - s.connect(conn_tuple) - except socket.error: - num_retries -= 1 - sleep(.1) - else: - return - - raise ServerNeverWokeUpError(conn_tuple) - - -class YaraTest(unittest.TestCase): - def setUp(self): - self.temp_directory = tempfile.mkdtemp() - config_path = os.path.join(test_dir, "data", "daemon.conf") - - mydir = os.path.dirname(os.path.abspath(__file__)) - binaries_dir = os.path.join(mydir, 'data', 'binary_data') - self.mock_server = get_mocked_server(binaries_dir) - self.mock_server_thread = threading.Thread(target=self.mock_server.run, args=['127.0.0.1', 7982]) - self.mock_server_thread.daemon = True - self.mock_server_thread.start() - sleep_till_available(('127.0.0.1', 7982)) - - self.daemon = YaraConnector('yara-test', configfile=config_path, work_directory=self.temp_directory, - logfile=os.path.join(self.temp_directory, 'test.log'), debug=True) - self.daemon.validate_config() - - self.daemon.initialize_queue() - - def test_yara(self): - CbAPIProducerThread(self.daemon.work_queue, self.daemon.cb, self.daemon.name, rate_limiter=0, - stop_when_done=True).run() - - yara_provider = YaraProvider('yara-test', os.path.join(test_dir, 'data', 'yara_rules')) - dirty_flag = threading.Event() - t = DeepAnalysisThread(self.daemon.work_queue, self.daemon.cb, yara_provider, dirty_event=dirty_flag) - t.start() - - unanalyzed = self.daemon.work_queue.number_unanalyzed() - while unanalyzed: - print unanalyzed - sleep(.1) - unanalyzed = self.daemon.work_queue.number_unanalyzed() - - t.stop() - t.join() diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py deleted file mode 100644 index 6d75c54..0000000 --- a/tests/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__author__ = 'jgarman' diff --git a/tests/utils/mock_server.py b/tests/utils/mock_server.py deleted file mode 100644 index d9bd21d..0000000 --- a/tests/utils/mock_server.py +++ /dev/null @@ -1,97 +0,0 @@ -import logging -import os - -try: - import simplejson as json -except ImportError: - import json - -from flask import Flask, request, make_response, Response -from cStringIO import StringIO -import zipfile - - -def get_mocked_server(binary_directory): - mocked_cb_server = Flask('cb') - - files = os.listdir(binary_directory) - - @mocked_cb_server.route('/api/v1/binary', methods=['GET', 'POST']) - def binary_search_endpoint(): - if request.method == 'GET': - query_string = request.args.get('q', '') - rows = int(request.args.get('rows', 10)) - start = int(request.args.get('start', 0)) - elif request.method == 'POST': - parsed_data = json.loads(request.data) - if 'q' in parsed_data: - query_string = parsed_data['q'] - else: - query_string = '' - - if 'rows' in parsed_data: - rows = int(parsed_data['rows']) - else: - rows = 10 - - if 'start' in parsed_data: - start = int(parsed_data['start']) - else: - start = 0 - else: - return make_response('Invalid Request', 500) - - return Response(response=json.dumps(binary_search(query_string, rows, start)), - mimetype='application/json') - - def binary_search(q, rows, start): - return { - 'results': - [json.load(open(os.path.join(binary_directory, fn), 'r')) for fn in files[start:start+rows]], - 'terms': '', - 'total_results': len(files), - 'start': start, - 'elapsed': 0.1, - 'highlights': [], - 'facets': {} - } - - @mocked_cb_server.route('/api/v1/binary//summary') - def get_binary_summary(md5sum): - filepath = os.path.join(binary_directory, '%s.json' % md5sum.lower()) - if not os.path.exists(filepath): - return Response("File not found", 404) - - binary_data = open(filepath, 'r').read() - return Response(response=binary_data, mimetype='application/json') - - @mocked_cb_server.route('/api/v1/binary/') - def get_binary(md5sum): - metadata_filepath = os.path.join(binary_directory, '%s.json' % md5sum.lower()) - content_filepath = os.path.join(binary_directory, '%s' % md5sum.lower()) - - for filepath in [metadata_filepath, content_filepath]: - if not os.path.exists(filepath): - return Response("File not found", 404) - - zipfile_contents = StringIO() - zf = zipfile.ZipFile(zipfile_contents, 'w', zipfile.ZIP_DEFLATED, False) - zf.writestr('filedata', open(content_filepath, 'r').read()) - zf.writestr('metadata', open(metadata_filepath, 'r').read()) - zf.close() - - return Response(response=zipfile_contents.getvalue(), mimetype='application/zip') - - @mocked_cb_server.route('/api/info') - def info(): - return Response(response=json.dumps({"version": "5.1.0"}), mimetype='application/json') - - return mocked_cb_server - - -if __name__ == '__main__': - mydir = os.path.dirname(os.path.abspath(__file__)) - binaries_dir = os.path.join(mydir, '..', 'data', 'binary_data') - - mock_server = get_mocked_server(binaries_dir) - mock_server.run('127.0.0.1', 7982, debug=True) From ae5220144d8eaf670abe729c50de6642f4e3ef34 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Thu, 17 Jan 2019 14:07:38 -0600 Subject: [PATCH 002/257] deleting files not needed --- LICENSE | 22 -------------- MANIFEST.in | 5 --- post_install | 6 ---- pre_uninstall | 5 --- python-cb-yara-connector.spec | 57 ----------------------------------- 5 files changed, 95 deletions(-) delete mode 100644 LICENSE delete mode 100644 MANIFEST.in delete mode 100644 post_install delete mode 100644 pre_uninstall delete mode 100644 python-cb-yara-connector.spec diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 299dfcd..0000000 --- a/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2015 Bit9 + Carbon Black - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index f08ebe5..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,5 +0,0 @@ -recursive-include cbopensource * -recursive-include root/etc * -recursive-include root/usr * -include cb-yara-connector.spec -recursive-include scripts \ No newline at end of file diff --git a/post_install b/post_install deleted file mode 100644 index c7816fe..0000000 --- a/post_install +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh - -mkdir -f /usr/share/cb/integrations/yara/db -chkconfig --add cb-yara-connector -chkconfig --level 345 cb-yara-connector on - diff --git a/pre_uninstall b/pre_uninstall deleted file mode 100644 index 1e36a36..0000000 --- a/pre_uninstall +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -/etc/init.d/cb-yara-connector stop - -chkconfig --del cb-yara-connector diff --git a/python-cb-yara-connector.spec b/python-cb-yara-connector.spec deleted file mode 100644 index 5f9698c..0000000 --- a/python-cb-yara-connector.spec +++ /dev/null @@ -1,57 +0,0 @@ -%define name python-cb-yara-connector -%define version 1.3 -%define unmangled_version 1.3 -%define release 5 -%global _enable_debug_package 0 -%global debug_package %{nil} -%global __os_install_post /usr/lib/rpm/brp-compress %{nil} - -Summary: Carbon Black yara Bridge -Name: %{name} -Version: %{version} -Release: %{release} -Source0: %{name}-%{unmangled_version}.tar.gz -License: Commercial -Group: Development/Libraries -BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot -Prefix: %{_prefix} -BuildArch: x86_64 -Vendor: Carbon Black -Url: http://www.carbonblack.com/ - -%description -UNKNOWN - -%prep -%setup -n %{name}-%{unmangled_version} - -%build -pyinstaller cb-yara-connector.spec - -%install -python setup.py install_cb --root=$RPM_BUILD_ROOT --record=INSTALLED_FILES - -%clean -rm -rf $RPM_BUILD_ROOT - -%posttrans -mkdir -p /usr/share/cb/integrations/yara/db -chkconfig --add cb-yara-connector -chkconfig --level 345 cb-yara-connector on - -# not auto-starting because conf needs to be updated -#/etc/init.d/cb-yara-connector start - -%preun -/etc/init.d/cb-yara-connector stop - -# only delete the chkconfig entry when we uninstall for the last time, -# not on upgrades -if [ "X$1" = "X0" ] -then - chkconfig --del cb-yara-connector -fi - - -%files -f INSTALLED_FILES -%defattr(-,root,root) From dd28cc380805a840ce836f08b0eac2d08a9cc08c Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Fri, 18 Jan 2019 09:17:37 -0600 Subject: [PATCH 003/257] working to get remote scans working with pyinstaller and remote workers from instructions --- README.md | 124 +++++++++++++++++++++++++++++++++++++++-------- globals.py | 2 +- main.spec | 3 +- requirements.txt | 1 + tasks.py | 39 +++++++++++---- 5 files changed, 139 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 3e596b9..424c00a 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,22 @@ -install zlib -install openssl-devel -install sqlite-devel -# Running the agent -mkdir -p /usr/share/cb/integrations/yara/yara_rules` -wget <> /usr/share/cb/integrations/yara/yara_agent +# Installing Yara Agent (Centos/RHEL 6) -## Sample Yara Agent Config +* Create directories + + + mkdir -p /usr/share/cb/integrations/yara/yara_rules + +* Download Yara Agent + + + wget <> /usr/share/cb/integrations/yara/yara_agent + +* Create Yara Agent Config File + + +#### Sample Yara Agent Config [general] @@ -53,15 +61,101 @@ wget <> /usr/share/cb/integrations/yara/yara_agent ; niceness=1 -* copy the above config to `/etc/cb/integrations/yara/yara_agent.conf` +* copy and modify the above config to `/etc/cb/integrations/yara/yara_agent.conf` + +#### Run Yara Agent Manually -# Example Cron Entry + ./yara_agent --config-file=/etc/cb/integrations/yara/yara_agent.conf -## +#### Example Cron Entry +# Remote Worker Installation (Centos/RHEL 7) +* Install Python 3.6 -# Centos 6 Build Instructions + + sudo yum install epel-release + sudo yum install python36 + sudo yum install python36-devel + +* Install Redis + + + sudo yum install redis + sudo systemctl start redis + sudo systemctl enable redis + +* Install Supervisord + + + sudo yum install supervisor + +* Install Yara Worker + + + git clone https://github.com/carbonblack/cb-yara-connector.git + cd cb-yara-connector + git checkout yara_version2 + python3.6 -m venv venv + source ./venv/bin/activate + pip install -r requirements.txt + mkdir yara_rules_remote + +* Create Yara Worker Config File + +#### Example Yara Worker Config File + + [general] + + ; + ; Cb Response Server Configuration + ; Used for downloading binaries + ; + cb_server_url= + cb_server_token= + + ; + ; Directory for temporary yara rules storage + ; WARNING: Put your yara rules with the yara agent. This is just temporary storage. + ; + yara_rules_dir=yara_rules_remote + +* Copy and modify the above + +#### Run Yara Worker Manually + + celery -A tasks worker --concurrency=10 --loglevel=info + +#### Example Supervisor Config + + [program:yara_workers] + stdout_logfile=/var/log/yara_worker.log + stderr_logfile=/var/log/yara_worker.log + user= + directory=/home//cb-yara-connector + command=/home//cb-yara-connector/venv/bin/celery -A tasks worker --config-file=yara_worker.conf --concurrency=10 --loglevel=info + autostart=true + autorestart=true + +* Copy the above, modify and add to `/etc/supervisord.conf` + +* Enabled Supervisor + + + systemctl enable supervisord + +* Restart Supervisor + + + systemctl restart supervisord + +# Centos 6 Build Instructions (Development) + +## Install Dependencies + +* zlib-devel +* openssl-devel +* sqlite-devel ## Install Python 3.6 @@ -78,11 +172,3 @@ wget <> /usr/share/cb/integrations/yara/yara_agent ## Create Executable pyinstaller main.spec - -# Centos 7 Build Instructions - -## Install Python 3.6 - -## Create VirtualEnv - -## Create Executable diff --git a/globals.py b/globals.py index d34ded1..4f00d6b 100644 --- a/globals.py +++ b/globals.py @@ -19,7 +19,7 @@ g_postgres_port = 5002 g_postgres_db = 'cb' -MAX_HASHES = 2 +MAX_HASHES = 8 g_num_binaries_not_available = 0 g_num_binaries_analyzed = 0 diff --git a/main.spec b/main.spec index cb2172f..0515560 100644 --- a/main.spec +++ b/main.spec @@ -9,7 +9,8 @@ a = Analysis(['main.py'], datas=[ (HOMEPATH + '/cbapi/response/models/*', 'cbapi/response/models/'), (HOMEPATH + '/cbapi/protection/models/*', 'cbapi/protection/models/'), (HOMEPATH + '/cbapi/defense/models/*', 'cbapi/defense/models/') ], - hiddenimports=['celery.fixups', 'celery.fixups.django', 'celery.loaders.app'], + hiddenimports=['celery.fixups', 'celery.fixups.django', 'celery.loaders.app', + 'celery.app.amqp', 'kombu.transport.redis'], hookspath=[], runtime_hooks=[], excludes=[], diff --git a/requirements.txt b/requirements.txt index 4e2bdb2..0bb99d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,3 +33,4 @@ urllib3==1.24.1 vine==1.2.0 wcwidth==0.1.7 yara-python==3.8.1 +redis==3.0.1 diff --git a/tasks.py b/tasks.py index df2f533..795a0e4 100644 --- a/tasks.py +++ b/tasks.py @@ -24,9 +24,28 @@ g_yara_rules_dir = "" +def verify_config(config_file): + config = configparser.ConfigParser() + config.read(config_file) + + if not config.has_section('general'): + logger.error("Config file does not have a \'general\' section") + return False + + if 'yara_rules_dir' in config['general']: + globals.g_yara_rules_dir = config['general']['yara_rules_dir'] + + if 'cb_server_url' in config['general']: + globals.g_cb_server_url = config['general']['cb_server_url'] + + if 'cb_server_token' in config['general']: + globals.g_cb_server_token = config['general']['cb_server_token'] + + return True + + def add_worker_arguments(parser): parser.add_argument('--config-file', default='yara_worker.conf', help='Yara Worker Config') - parser.add_argument('--yara-rules-dir', default='yara_rules', help='Yara Rules Directory') app.user_options['worker'].add(add_worker_arguments) @@ -34,13 +53,13 @@ def add_worker_arguments(parser): class MyBootstep(bootsteps.Step): - def __init__(self, worker, config_file='yara_worker.conf', yara_rules_dir='yara_rules', **options): + def __init__(self, worker, config_file='yara_worker.conf', **options): super().__init__(self) global g_config global g_yara_rules_dir - g_config = configparser.ConfigParser() - g_config.read(config_file) - g_yara_rules_dir = yara_rules_dir + verify_config(config_file) + + # g_yara_rules_dir = yara_rules_dir app.steps['worker'].add(MyBootstep) @@ -103,7 +122,8 @@ def analyze_binary(md5sum): cb = CbResponseAPI(url=globals.g_cb_server_url, token=globals.g_cb_server_token, - ssl_verify=False) + ssl_verify=False, + timeout=5) binary_query = cb.select(Binary).where(f"md5:{md5sum}") @@ -118,7 +138,7 @@ def analyze_binary(md5sum): yara_rules = yara.compile(filepaths=yara_rule_map) try: - #matches = "debug" + # matches = "debug" matches = yara_rules.match(data=binary_data, timeout=30) except yara.TimeoutError: # @@ -137,8 +157,9 @@ def analyze_binary(md5sum): if matches: score = getHighScore(matches) analysis_result.score = score - analysis_result.short_result = "Matched yara rules: %s" % ', '.join([match.rule for match in matches]) - #analysis_result.short_result = "Matched yara rules: debug" + analysis_result.short_result = "Matched yara rules: %s" % ', '.join( + [match.rule for match in matches]) + # analysis_result.short_result = "Matched yara rules: debug" analysis_result.long_result = analysis_result.long_result analysis_result.misc = generate_yara_rule_map_hash(globals.g_yara_rules_dir) else: From a0aa233c5e0c3ef17fb427cf8bdca8de2bb625d6 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Fri, 18 Jan 2019 09:17:54 -0600 Subject: [PATCH 004/257] fixes to detecting remote in config --- main.py | 65 +++++++++------------------------------------------------ 1 file changed, 10 insertions(+), 55 deletions(-) diff --git a/main.py b/main.py index 31c042e..33e6896 100644 --- a/main.py +++ b/main.py @@ -151,59 +151,6 @@ def save_results(analysis_results): generate_feed_from_db() -def queue_save_results(md5_hashes): - try: - scan_group = list() - for md5_hash in md5_hashes: - scan_group.append(analyze_binary.s(md5_hash)) - job = group(scan_group) - - result = job.apply_async() - - time_waited = 0 - while not result.ready(): - if time_waited == 100: - break - else: - time.sleep(.1) - time_waited += 1 - - if result.successful(): - for analysis_result in result.get(timeout=30): - if analysis_result.binary_not_available: - globals.g_num_binaries_not_available += 1 - continue - try: - bdr = BinaryDetonationResult() - bdr.md5 = analysis_result.md5 - bdr.last_scan_date = datetime.now() - bdr.score = analysis_result.score - bdr.last_error_msg = analysis_result.last_error_msg - bdr.last_success_msg = analysis_result.short_result - bdr.misc = json.dumps(globals.g_yara_rule_map_hash_list) - bdr.save() - - except: - logger.error("Error saving to database") - logger.error(traceback.format_exc()) - if analysis_result.score > 0: - fields = {'iocs': {'md5': [analysis_result.md5]}, - 'score': analysis_result.score, - 'timestamp': int(time.mktime(time.gmtime())), - 'link': '', - 'id': f'binary_{analysis_result.md5}', - 'title': '', - 'description': analysis_result.short_result - } - - globals.g_reports.append(CbReport(**fields)) - else: - logger.error(result.traceback()) - except: - logger.error(traceback.format_exc()) - time.sleep(5) - - def print_statistics(): pass @@ -280,7 +227,11 @@ def main(yara_rule_dir): if len(md5_hashes) >= globals.MAX_HASHES: analysis_results = analyze_binaries(md5_hashes, local=(not globals.g_remote)) - save_results(analysis_results) + if analysis_results: + save_results(analysis_results) + else: + logger.error(traceback.format_exc()) + logger.error("analysis_results is None") md5_hashes = list() if num_total_binaries % 1000 == 0: @@ -322,7 +273,8 @@ def verify_config(config_file, output_file): logger.error("Config file does not have a \'general\' section") return False - if 'worker_type' not in config['general']: + if 'worker_type' in config['general']: + logger.info(config['general']['worker_type']) if config['general']['worker_type'] == 'local': globals.g_remote = False @@ -332,6 +284,8 @@ def verify_config(config_file, output_file): globals.worker_ip = config['general']['worker_ip'] else: logger.error("invalid worker_type specified. Must be \'local\' or \'remote\'") + else: + logger.warn("Config file does not specify worker_type, assuming local") if 'yara_rules_dir' in config['general']: globals.g_yara_rules_dir = config['general']['yara_rules_dir'] @@ -389,6 +343,7 @@ def verify_config(config_file, output_file): db.initialize(database) db.connect() db.create_tables([BinaryDetonationResult]) + generate_feed_from_db() main('yara_rules') except: logger.error(traceback.format_exc()) From 669d164baf7b176e067d00b84f752dbc94ac35d2 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Fri, 18 Jan 2019 09:18:01 -0600 Subject: [PATCH 005/257] adding yara logo --- yara-logo.png | Bin 0 -> 3123 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 yara-logo.png diff --git a/yara-logo.png b/yara-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..8d57f97f4975046ce7523cc63ce6a9dcf7b7c97b GIT binary patch literal 3123 zcmV-349xS1P)5P}9u*k}U2%p{mCK?6k74>m$E;un`tzJQS^g~Sgv5-^hB8e^-*jV2h2At?qD zW^2GmQmCZtg0#*^*HSB;b_%7GOqspU?d>wl0-bx$xp(f%|Ce0H0Qc?u@BQEBygkR4 zOeP5c0Ib5#|2a`y?5dku(bEHE0kE^_XjCfOA1yiQv}rjH@8AEI!eFq{mz|ws4vC=rj-t&tJ6%9b;II%O%px0JvC z_xtZdJpjCHIyr#=O`bDH=oraFf_|y5r|u6wq@-dr005ZIJ;{IM5#eL}YvV@hKYA3Z z0N`SzwdqV!A{`O>nww!b0Kjz8hsWoLaOKb;r~v>>$2x}E+o1*kFrD<_Mqm+cq2U++ zn2sJW>l*-&4$^wBMg&dA|6l=)P?xDUJJ;uf;=pWz%2uvzsYJCc?6m^vykqePX{K`{ z5)s-zw{$iQoolz^fd~rhA2#u!5xurX?V+BCn!CIZc7V%uK-t<%JV%{L`}5qN4chNS z_`I%(yhx|3OC3pUbh2lj;J`v1FZNCeLfG-z!fBe$20?Q(i`08>Qno|d?|@0?kYPHU zr@KM>6~R-QNbS86>4AD+*)jy?AT@H}m$_g2wPO^((a9MJhT}O+q?*pv)29vZJ9Wtt zylUWHFLy+XG74>UO%2e*m?lz9XYhv~4DXv)S}Hui5V#r+^^e;ivhcd$2x}tMbmHHC zFEn1KVEJ;$2<}IP_K??P==@xz*b5oNG?8jLqwzRh`|(HPrt{>JkPu8np%!Um!g%)D zsEX%UO{7nQ=s$kk@V?VRAtIz91?j;LdC=l>u0^qnYho+Y;iZOA#R59`8j*+dG?8vP z*IHX?q`TYj{?k{kgls@hSicj+;4Do{nPuL8?3m$wdCe?Tv49S0$)4kQpox{HbLH@1 z!~1eFFjTRCPPU*3oTG^;Aq6K@=Y;D1f&xSP2db(l-qHfu%wa2KwjCs>Y)iCXS=X^v zyDziZ1x?bX=3bhZQ;$g`D0cKH1zvc;(C$U6R#B(6>I0BYTZYdcPO1CXYF|Xr2c1Lt z3JdP74w|G*;k`65Epy<2Kra?g8ZDBvoDHxo1KLZxRMJa_?reu>Vw&m11R9goXy@5y z@d$W>sIrAfd^R~I?>R&h(@aMloh&fxoCgYKS+FKLu+0}%v`wZQ#pn!8%uCGf_Y1Yj zWaZ^iA*MxmiAKTwD^P^a&_ufFaC9=``R8fLkt6iriFlx0(iBe=By21qT92Mlj9K=r3#IWi3a&uLK;St1aII@f;)o@};v;+Wf z7rIS{qnbH8cdAvWY?B&#Z}#?5=eBKh{rq_-1c28~XU^{3R9syx{X&K#5p^x|J|(&t zJatMPU^JS$M?gUcwj`Xku5>nXH(OUO2Z7C-&+) zd{~_bn#7vrb|F(KKY_cmlF%G1U-YjEI{1q;&t zw?4n09^Sv7=I+@;4?Ol59xHo&O<_3D#2Jy!l=<^@9h`F>b59pbK87SF1!k z1!CrlFVa-Wv%vEgE~K&@JLtkIui%lC%Ld8EdOrSG z-MC6Lv$~odTC>J*681YmHOsWGs;mclE15Tw`)LoEj%PIy%BREYmh;5SQ;QeV#VuRt zYHO?T1=)O*a+w1k?s7>FO(Tuv&J7cQCYdswad?z*@0VZFSta7Re(|Evv3d_4wEM&2 z#0OlIC|R?_imnrAqbtS6^9q z&xc6P1iPTNMKvRf3)ZvE>W+^}I!?J}v~5}$n$f1SYs?s2YG*G1Ham$Q#M32DGp1Izj_8G_jlMaMenLU*3M(4!u=I!reJqK z1Wn9l!ofCEmn~Bhri(;5u_H&ET0T)N@)@U`ZVz1#NMw2rEW}*P2DG!9~YF*GK_c`Fw=&sJ;RWDa)GGX0ri zGjy(9U9rJz`i~#aM1pp=uoXoBTw^+8(N44J7r|94VAyaFdA*A5pY9Kds?!*En`cFh{<+_p_hS9%-`$agauj%GHdm>Em=l+zxXwl5vl9K6qPJI2sh3@`i$GAU!RO0=_ z()c$~vPU=bKT?dQjs7<)Apow$mrN$ Date: Fri, 18 Jan 2019 10:26:55 -0600 Subject: [PATCH 006/257] fixing bugs when specifing remote brokers --- globals.py | 2 +- main.py | 8 +++++--- main.spec | 3 ++- tasks.py | 10 +++++++--- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/globals.py b/globals.py index 4f00d6b..fe5532a 100644 --- a/globals.py +++ b/globals.py @@ -4,7 +4,7 @@ g_cb_server_url = 'https://127.0.0.1' g_cb_server_token = '' -worker_ip = "127.0.0.1" +broker_url = '' g_yara_rules_dir = 'yara_rules' output_file = 'yara_feed.json' diff --git a/main.py b/main.py index 33e6896..0c13d66 100644 --- a/main.py +++ b/main.py @@ -8,7 +8,7 @@ import json from datetime import datetime from peewee import SqliteDatabase -from tasks import analyze_binary, update_yara_rules_remote, generate_rule_map +from tasks import analyze_binary, update_yara_rules_remote, generate_rule_map, app import globals import argparse import configparser @@ -280,8 +280,10 @@ def verify_config(config_file, output_file): globals.g_remote = False elif config['general']['worker_type'] == 'remote': globals.g_remote = True - if 'worker_ip' in config['general']: - globals.worker_ip = config['general']['worker_ip'] + if 'broker_url' in config['general']: + app.conf.update( + broker_url=config['general']['broker_url'], + result_backend=config['general']['broker_url']) else: logger.error("invalid worker_type specified. Must be \'local\' or \'remote\'") else: diff --git a/main.spec b/main.spec index 0515560..730fe7a 100644 --- a/main.spec +++ b/main.spec @@ -10,7 +10,8 @@ a = Analysis(['main.py'], (HOMEPATH + '/cbapi/protection/models/*', 'cbapi/protection/models/'), (HOMEPATH + '/cbapi/defense/models/*', 'cbapi/defense/models/') ], hiddenimports=['celery.fixups', 'celery.fixups.django', 'celery.loaders.app', - 'celery.app.amqp', 'kombu.transport.redis'], + 'celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends', + 'celery.backends.redis', 'celery.app.events', 'celery.events'], hookspath=[], runtime_hooks=[], excludes=[], diff --git a/tasks.py b/tasks.py index 795a0e4..1e88bbf 100644 --- a/tasks.py +++ b/tasks.py @@ -1,6 +1,7 @@ from celery import Celery, bootsteps +import globals -app = Celery('tasks', backend='redis://localhost', broker='redis://localhost') +app = Celery() app.conf.task_serializer = "pickle" app.conf.result_serializer = "pickle" app.conf.accept_content = {"pickle"} @@ -41,6 +42,11 @@ def verify_config(config_file): if 'cb_server_token' in config['general']: globals.g_cb_server_token = config['general']['cb_server_token'] + if 'broker_url' in config['general']: + app.conf.update( + broker_url=config['general']['broker_url'], + result_backend=config['general']['broker_url']) + return True @@ -55,8 +61,6 @@ class MyBootstep(bootsteps.Step): def __init__(self, worker, config_file='yara_worker.conf', **options): super().__init__(self) - global g_config - global g_yara_rules_dir verify_config(config_file) # g_yara_rules_dir = yara_rules_dir From a86b738b0d6a6ffbcc65895cbe0d39f1c58a730c Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Fri, 18 Jan 2019 10:49:01 -0600 Subject: [PATCH 007/257] configuration file changes --- README.md | 45 ++++++++++++++++++++++++--------------------- globals.py | 3 --- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 424c00a..5c5c287 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,3 @@ - - - # Installing Yara Agent (Centos/RHEL 6) * Create directories @@ -11,7 +8,7 @@ * Download Yara Agent - wget <> /usr/share/cb/integrations/yara/yara_agent + wget -O /usr/share/cb/integrations/yara/yara_agent * Create Yara Agent Config File @@ -19,7 +16,7 @@ #### Sample Yara Agent Config [general] - + ; ; either run a single worker locally or remotely ; valid types are 'local' or 'remote' @@ -30,17 +27,7 @@ ; ONLY for worker_type of remote ; IP Address of workers if worker_type is remote ; - - ;worker_ip=127.0.0.1 - - ; - ; ONLY for worker_type of local - ; Cb Response Server settings for scanning locally. - ; For remote scanning please set these parameters in the yara worker config file - ; Default: https://127.0.0.1 - ; - cb_server_url= - cb_server_token= + broker_url=redis:// ; ; path to directory containing yara rules @@ -56,10 +43,20 @@ postgres_db= postgres_port= + ; + ; ONLY for worker_type of local + ; Cb Response Server settings for scanning locally. + ; For remote scanning please set these parameters in the yara worker config file + ; Default: https://127.0.0.1 + ; + cb_server_url= + cb_server_token= + ; ; nice value used for this script ; niceness=1 + * copy and modify the above config to `/etc/cb/integrations/yara/yara_agent.conf` @@ -99,13 +96,19 @@ python3.6 -m venv venv source ./venv/bin/activate pip install -r requirements.txt - mkdir yara_rules_remote + mkdir yara_rules -* Create Yara Worker Config File +* Create Yara Worker Config File `yara_worker.conf` #### Example Yara Worker Config File [general] + + ; + ; Python Celery Broker Url. Set this full url string for Redis + ; Example: redis:// + ; + broker_url=redis://127.0.0.1 ; ; Cb Response Server Configuration @@ -118,13 +121,13 @@ ; Directory for temporary yara rules storage ; WARNING: Put your yara rules with the yara agent. This is just temporary storage. ; - yara_rules_dir=yara_rules_remote + yara_rules_dir=yara_rules -* Copy and modify the above +* Copy, modify and save to `yara_worker.conf` #### Run Yara Worker Manually - celery -A tasks worker --concurrency=10 --loglevel=info + celery -A tasks worker --config-file=yara_worker.conf --concurrency=10 --loglevel=info #### Example Supervisor Config diff --git a/globals.py b/globals.py index fe5532a..831ceb9 100644 --- a/globals.py +++ b/globals.py @@ -1,4 +1,3 @@ -g_redis_url = '' g_config = {} g_cb_server_url = 'https://127.0.0.1' @@ -23,5 +22,3 @@ g_num_binaries_not_available = 0 g_num_binaries_analyzed = 0 - -g_num_save_results = 0 From d560e9085d7256213e84cb12a1a4f2127ae54428 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Fri, 18 Jan 2019 10:52:10 -0600 Subject: [PATCH 008/257] readme changes --- README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5c5c287..bb5eeaf 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,19 @@ # Installing Yara Agent (Centos/RHEL 6) +The Yara agent must be installed on the same system as Cb Response. + * Create directories mkdir -p /usr/share/cb/integrations/yara/yara_rules + * Download Yara Agent wget -O /usr/share/cb/integrations/yara/yara_agent + * Create Yara Agent Config File @@ -27,7 +31,7 @@ ; ONLY for worker_type of remote ; IP Address of workers if worker_type is remote ; - broker_url=redis:// + ;broker_url=redis:// ; ; path to directory containing yara rules @@ -60,7 +64,7 @@ * copy and modify the above config to `/etc/cb/integrations/yara/yara_agent.conf` -#### Run Yara Agent Manually +#### Running Yara Agent Manually ./yara_agent --config-file=/etc/cb/integrations/yara/yara_agent.conf @@ -75,6 +79,7 @@ sudo yum install python36 sudo yum install python36-devel + * Install Redis @@ -82,11 +87,13 @@ sudo systemctl start redis sudo systemctl enable redis + * Install Supervisord sudo yum install supervisor + * Install Yara Worker @@ -98,6 +105,7 @@ pip install -r requirements.txt mkdir yara_rules + * Create Yara Worker Config File `yara_worker.conf` #### Example Yara Worker Config File @@ -147,10 +155,12 @@ systemctl enable supervisord + * Restart Supervisor systemctl restart supervisord + # Centos 6 Build Instructions (Development) From 285eb50fae0bffc49cf74424fd55b3405bdb210d Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Fri, 18 Jan 2019 10:55:25 -0600 Subject: [PATCH 009/257] readme formatting --- README.md | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index bb5eeaf..3f9ab4b 100644 --- a/README.md +++ b/README.md @@ -4,15 +4,15 @@ The Yara agent must be installed on the same system as Cb Response. * Create directories - + ``` mkdir -p /usr/share/cb/integrations/yara/yara_rules - + ``` * Download Yara Agent - + ``` wget -O /usr/share/cb/integrations/yara/yara_agent - + ``` * Create Yara Agent Config File @@ -74,29 +74,30 @@ The Yara agent must be installed on the same system as Cb Response. * Install Python 3.6 - + ``` sudo yum install epel-release sudo yum install python36 sudo yum install python36-devel - + ``` * Install Redis - + ``` sudo yum install redis sudo systemctl start redis sudo systemctl enable redis + ``` * Install Supervisord - + ``` sudo yum install supervisor - + ``` * Install Yara Worker - + ``` git clone https://github.com/carbonblack/cb-yara-connector.git cd cb-yara-connector git checkout yara_version2 @@ -104,6 +105,7 @@ The Yara agent must be installed on the same system as Cb Response. source ./venv/bin/activate pip install -r requirements.txt mkdir yara_rules + ``` * Create Yara Worker Config File `yara_worker.conf` @@ -152,14 +154,15 @@ The Yara agent must be installed on the same system as Cb Response. * Enabled Supervisor - + ``` systemctl enable supervisord - + ``` * Restart Supervisor - + ``` systemctl restart supervisord + ``` # Centos 6 Build Instructions (Development) @@ -172,16 +175,21 @@ The Yara agent must be installed on the same system as Cb Response. ## Install Python 3.6 + ./configure --prefix=/usr/local --enable-shared LDFLAGS="-Wl,-rpath /usr/local/lib" make make altinstall + ## Create VirtualEnv + python3.6 -m venv venv-build source ./venv-build/bin/activate pip install -r requirements.txt + ## Create Executable + pyinstaller main.spec From daa3100449ea67c082b4bd8c8195200e469525f4 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Fri, 18 Jan 2019 13:56:51 -0600 Subject: [PATCH 010/257] added logging to file and debug mode --- main.py | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/main.py b/main.py index 0c13d66..d4cc915 100644 --- a/main.py +++ b/main.py @@ -19,7 +19,8 @@ from binary_database import db, BinaryDetonationResult import singleton -logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logging_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' +logging.basicConfig(format=logging_format) logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -228,6 +229,9 @@ def main(yara_rule_dir): if len(md5_hashes) >= globals.MAX_HASHES: analysis_results = analyze_binaries(md5_hashes, local=(not globals.g_remote)) if analysis_results: + for analysis_result in analysis_results: + if analysis_result.last_error_msg: + logger.error(analysis_result.last_error_msg) save_results(analysis_results) else: logger.error(traceback.format_exc()) @@ -237,12 +241,12 @@ def main(yara_rule_dir): if num_total_binaries % 1000 == 0: elapsed_time = time.time() - start_time logger.info("elapsed time: {0}".format(humanfriendly.format_timespan(elapsed_time))) - logger.info("number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) - logger.info("number binaries already scanned: {0}".format(num_binaries_skipped)) - logger.info("number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) + logger.debug("number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) + logger.debug("number binaries already scanned: {0}".format(num_binaries_skipped)) + logger.debug("number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) logger.info("total binaries: {0}".format(num_total_binaries)) - logger.info("binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) - logger.info("num binaries score greater than 0: {0}".format( + logger.debug("binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) + logger.info("num binaries score greater than zero: {0}".format( len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) logger.info("") @@ -254,11 +258,14 @@ def main(yara_rule_dir): elapsed_time = time.time() - start_time logger.info("elapsed time: {0}".format(humanfriendly.format_timespan(elapsed_time))) - logger.info("number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) - logger.info("number binaries skipped: {0}".format(num_binaries_skipped)) - logger.info("number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) + logger.debug("number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) + logger.debug("number binaries already scanned: {0}".format(num_binaries_skipped)) + logger.debug("number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) logger.info("total binaries: {0}".format(num_total_binaries)) - logger.info("binaries per second: {0}:".format(num_total_binaries / elapsed_time)) + logger.debug("binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) + logger.info("num binaries score greater than zero: {0}".format( + len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) + logger.info("") generate_feed_from_db() @@ -274,8 +281,6 @@ def verify_config(config_file, output_file): return False if 'worker_type' in config['general']: - logger.info(config['general']['worker_type']) - if config['general']['worker_type'] == 'local': globals.g_remote = False elif config['general']['worker_type'] == 'remote': @@ -333,10 +338,20 @@ def verify_config(config_file, output_file): parser.add_argument('--output-file', default='yara_feed.json', help='output feed file') - parser.add_argument('--debug') + parser.add_argument('--debug', action='store_true') args = parser.parse_args() + if args.debug: + logger = logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + + if args.log_file: + formatter = logging.Formatter(logging_format) + handler = logging.handlers.RotatingFileHandler(args.log_file, maxBytes=10 * 1000000, backupCount=10) + handler.setFormatter(formatter) + logger.addHandler(handler) + if verify_config(args.config_file, args.output_file): try: g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) From fbb91be8041c18f07f55e101c9f69f436ec35e70 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Fri, 18 Jan 2019 14:17:29 -0600 Subject: [PATCH 011/257] minor logging changes create a message even if no matches occur --- main.py | 6 +++--- tasks.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index d4cc915..fd2930f 100644 --- a/main.py +++ b/main.py @@ -19,7 +19,7 @@ from binary_database import db, BinaryDetonationResult import singleton -logging_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' +logging_format = '%(asctime)s-%(name)s-%(lineno)d-%(levelname)s-%(message)s' logging.basicConfig(format=logging_format) logger = logging.getLogger() @@ -244,7 +244,7 @@ def main(yara_rule_dir): logger.debug("number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) logger.debug("number binaries already scanned: {0}".format(num_binaries_skipped)) logger.debug("number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) - logger.info("total binaries: {0}".format(num_total_binaries)) + logger.info("total binaries from db: {0}".format(num_total_binaries)) logger.debug("binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) logger.info("num binaries score greater than zero: {0}".format( len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) @@ -261,7 +261,7 @@ def main(yara_rule_dir): logger.debug("number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) logger.debug("number binaries already scanned: {0}".format(num_binaries_skipped)) logger.debug("number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) - logger.info("total binaries: {0}".format(num_total_binaries)) + logger.info("total binaries from db: {0}".format(num_total_binaries)) logger.debug("binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) logger.info("num binaries score greater than zero: {0}".format( len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) diff --git a/tasks.py b/tasks.py index 1e88bbf..f7793d8 100644 --- a/tasks.py +++ b/tasks.py @@ -168,6 +168,7 @@ def analyze_binary(md5sum): analysis_result.misc = generate_yara_rule_map_hash(globals.g_yara_rules_dir) else: analysis_result.score = 0 + analysis_result.short_result = "No Matches" else: analysis_result.binary_not_available = True From 62f71771e8b7fa6a4ff5378f3bc19d2dd36809ef Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Fri, 18 Jan 2019 14:36:16 -0600 Subject: [PATCH 012/257] updates to README --- README.md | 14 +++++++------- main.py | 3 +-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 3f9ab4b..6997387 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ The Yara agent must be installed on the same system as Cb Response. * Download Yara Agent ``` - wget -O /usr/share/cb/integrations/yara/yara_agent + wget -O /usr/share/cb/integrations/yara/yara_agent https://github.com/carbonblack/cb-yara-connector/releases/download/2.0.0/yara_agent ``` * Create Yara Agent Config File @@ -163,17 +163,17 @@ The Yara agent must be installed on the same system as Cb Response. ``` systemctl restart supervisord ``` - +# Development Notes -# Centos 6 Build Instructions (Development) +## Yara Agent Build Instructions (Centos 6) -## Install Dependencies +### Install Dependencies * zlib-devel * openssl-devel * sqlite-devel -## Install Python 3.6 +### Install Python 3.6 ./configure --prefix=/usr/local --enable-shared LDFLAGS="-Wl,-rpath /usr/local/lib" @@ -181,7 +181,7 @@ The Yara agent must be installed on the same system as Cb Response. make altinstall -## Create VirtualEnv +### Create VirtualEnv python3.6 -m venv venv-build @@ -189,7 +189,7 @@ The Yara agent must be installed on the same system as Cb Response. pip install -r requirements.txt -## Create Executable +### Create Executable pyinstaller main.spec diff --git a/main.py b/main.py index fd2930f..b86a5a3 100644 --- a/main.py +++ b/main.py @@ -234,8 +234,7 @@ def main(yara_rule_dir): logger.error(analysis_result.last_error_msg) save_results(analysis_results) else: - logger.error(traceback.format_exc()) - logger.error("analysis_results is None") + pass md5_hashes = list() if num_total_binaries % 1000 == 0: From 8aed6d1e34617b28a9065342b6a4e5aa8f7fd9f0 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Tue, 29 Jan 2019 21:17:12 -0600 Subject: [PATCH 013/257] updates to spec and readme file --- README.md | 6 ++++++ main.spec | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6997387..69d51b0 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,12 @@ The Yara agent must be installed on the same system as Cb Response. wget -O /usr/share/cb/integrations/yara/yara_agent https://github.com/carbonblack/cb-yara-connector/releases/download/2.0.0/yara_agent ``` +* Download Yara Logo + + ``` + wget -O /usr/share/cb/integrations/yara/yara-logo.png https://github.com/carbonblack/cb-yara-connector/releases/download/2.0.0/yara-logo.png + ``` + * Create Yara Agent Config File diff --git a/main.spec b/main.spec index 730fe7a..24eef6d 100644 --- a/main.spec +++ b/main.spec @@ -11,7 +11,8 @@ a = Analysis(['main.py'], (HOMEPATH + '/cbapi/defense/models/*', 'cbapi/defense/models/') ], hiddenimports=['celery.fixups', 'celery.fixups.django', 'celery.loaders.app', 'celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends', - 'celery.backends.redis', 'celery.app.events', 'celery.events'], + 'celery.backends.redis', 'celery.app.events', 'celery.events', + 'kombu.transport.pyamqp'], hookspath=[], runtime_hooks=[], excludes=[], From 0cb2592cca3cb8a4c3ee961c1c19b68bc38ebd99 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Mon, 4 Feb 2019 12:56:20 -0600 Subject: [PATCH 014/257] updated readme --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 69d51b0..03a785b 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,13 @@ The Yara agent must be installed on the same system as Cb Response. # Remote Worker Installation (Centos/RHEL 7) +* Install Git and GCC + + ``` + sudo yum install git + sudo yum install gcc + ``` + * Install Python 3.6 ``` @@ -110,6 +117,7 @@ The Yara agent must be installed on the same system as Cb Response. python3.6 -m venv venv source ./venv/bin/activate pip install -r requirements.txt + deactivate mkdir yara_rules ``` From 86ae219816597b1ebca8322f244ec8c7baf2be6a Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Thu, 7 Feb 2019 10:21:53 -0600 Subject: [PATCH 015/257] validate_yara_rules + 3.4 python --- main.py | 50 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/main.py b/main.py index b86a5a3..ee27f71 100644 --- a/main.py +++ b/main.py @@ -13,6 +13,7 @@ import argparse import configparser import hashlib +import yara from feed import CbFeed, CbFeedInfo, CbReport from celery import group @@ -38,7 +39,7 @@ def generate_feed_from_db(): 'score': binary.score, 'timestamp': int(time.mktime(time.gmtime())), 'link': '', - 'id': f'binary_{binary.md5}', + 'id': 'binary_{0}'.format(binary.md5), 'title': '', 'description': binary.last_success_msg } @@ -156,7 +157,7 @@ def print_statistics(): pass -def main(yara_rule_dir): +def perform(yara_rule_dir): if globals.g_remote: logger.info("Uploading yara rules to workers...") generate_rule_map_remote(yara_rule_dir) @@ -319,8 +320,9 @@ def verify_config(config_file, output_file): return True +def main(): + global logger -if __name__ == "__main__": try: me = singleton.SingleInstance() except: @@ -337,6 +339,9 @@ def verify_config(config_file, output_file): parser.add_argument('--output-file', default='yara_feed.json', help='output feed file') + parser.add_argument('--validate-yara-rules', + action='store_true', + help='ONLY validate yara rules in a specified directory') parser.add_argument('--debug', action='store_true') args = parser.parse_args() @@ -352,14 +357,31 @@ def verify_config(config_file, output_file): logger.addHandler(handler) if verify_config(args.config_file, args.output_file): - try: - g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database = SqliteDatabase('binary.db') - db.initialize(database) - db.connect() - db.create_tables([BinaryDetonationResult]) - generate_feed_from_db() - main('yara_rules') - except: - logger.error(traceback.format_exc()) + + if args.validate_yara_rules: + logger.info("Validating yara rules in directory: {0}".format(globals.g_yara_rules_dir)) + yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) + try: + yara.compile(filepaths=yara_rule_map) + except: + logger.error("There were errors compiling yara rules") + logger.error(traceback.format_exc()) + else: + logger.info("All yara rules compiled successfully") + else: + try: + globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) + generate_yara_rule_map_hash(globals.g_yara_rules_dir) + database = SqliteDatabase('binary.db') + db.initialize(database) + db.connect() + db.create_tables([BinaryDetonationResult]) + generate_feed_from_db() + perform(args.yara_rules_dir) + except: + logger.error(traceback.format_exc()) + + +if __name__ == "__main__": + main() + From e2362b7894b1fcae655fb5155ca3f0b99e3bc6e1 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Thu, 28 Feb 2019 23:17:13 -0600 Subject: [PATCH 016/257] more sanity checks around database entries --- main.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/main.py b/main.py index ee27f71..4b5e46b 100644 --- a/main.py +++ b/main.py @@ -135,8 +135,10 @@ def save_results(analysis_results): if analysis_result.binary_not_available: globals.g_num_binaries_not_available += 1 continue + + bdr, created = BinaryDetonationResult.get_or_create(md5=analysis_result.md5) + try: - bdr = BinaryDetonationResult() bdr.md5 = analysis_result.md5 bdr.last_scan_date = datetime.now() bdr.score = analysis_result.score @@ -194,20 +196,13 @@ def perform(yara_rule_dir): num_total_binaries += 1 md5_hash = row[0].hex() - try: - # - # see if we have already seen this file before. - # we need to check to see what yara rules we have scanned with - # - bdr = BinaryDetonationResult.get(BinaryDetonationResult.md5 == md5_hash) - except: - - # - # Not found so we have to scan - # - pass - else: + # + # Check if query returns any rows + # + query = BinaryDetonationResult.select().where(BinaryDetonationResult.md5 == md5_hash) + if query.exists(): try: + bdr = BinaryDetonationResult.get(BinaryDetonationResult.md5 == md5_hash) scanned_hash_list = json.loads(bdr.misc) if scanned_hash_list == globals.g_yara_rule_map_hash_list: num_binaries_skipped += 1 @@ -222,7 +217,6 @@ def perform(yara_rule_dir): pass except: logger.error("Unable to decode yara rule map hash from database") - pass num_binaries_queued += 1 md5_hashes.append(md5_hash) @@ -377,7 +371,7 @@ def main(): db.connect() db.create_tables([BinaryDetonationResult]) generate_feed_from_db() - perform(args.yara_rules_dir) + perform(globals.g_yara_rules_dir) except: logger.error(traceback.format_exc()) From 9653a10ef78ace52c78def955e9f65f3f48504c0 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Thu, 28 Feb 2019 23:56:29 -0600 Subject: [PATCH 017/257] bugfix on yara_rules_dir when updating yara_rules --- tasks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tasks.py b/tasks.py index f7793d8..40ddae7 100644 --- a/tasks.py +++ b/tasks.py @@ -22,7 +22,6 @@ logger.setLevel(logging.INFO) g_config = dict() -g_yara_rules_dir = "" def verify_config(config_file): @@ -110,7 +109,7 @@ def generate_yara_rule_map_hash(yara_rule_path): def update_yara_rules_remote(yara_rules): try: for key in yara_rules: - open(os.path.join(g_yara_rules_dir, key), 'wb').write(yara_rules[key]) + open(os.path.join(globals.g_yara_rules_dir, key), 'wb').write(yara_rules[key]) except: logger.error(traceback.format_exc()) From 763a9a191a956f11d6841d1244bc5960f472e3d5 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Wed, 6 Mar 2019 10:44:47 -0600 Subject: [PATCH 018/257] adding support for .yara and adding to threat report title --- main.py | 4 ++-- tasks.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index 4b5e46b..7218d16 100644 --- a/main.py +++ b/main.py @@ -40,7 +40,7 @@ def generate_feed_from_db(): 'timestamp': int(time.mktime(time.gmtime())), 'link': '', 'id': 'binary_{0}'.format(binary.md5), - 'title': '', + 'title': binary.last_success_msg, 'description': binary.last_success_msg } @@ -83,7 +83,7 @@ def generate_yara_rule_map_hash(yara_rule_path): def generate_rule_map_remote(yara_rule_path): ret_dict = dict() for fn in os.listdir(yara_rule_path): - if fn.lower().endswith(".yar"): + if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): ret_dict[fn] = open(os.path.join(yara_rule_path, fn), 'rb').read() result = update_yara_rules_remote.delay(ret_dict) diff --git a/tasks.py b/tasks.py index 40ddae7..049ebca 100644 --- a/tasks.py +++ b/tasks.py @@ -73,7 +73,7 @@ def generate_rule_map(yara_rule_path): rule_map = {} for fn in os.listdir(yara_rule_path): - if fn.lower().endswith(".yar"): + if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): fullpath = os.path.join(yara_rule_path, fn) if not os.path.isfile(fullpath): continue From 9420a0b55add5cd6f8c4ec4f5defcb9a2ea32196 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Wed, 6 Mar 2019 13:52:19 -0600 Subject: [PATCH 019/257] updates to README --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 03a785b..f83482a 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,13 @@ The Yara agent must be installed on the same system as Cb Response. * Download Yara Agent ``` - wget -O /usr/share/cb/integrations/yara/yara_agent https://github.com/carbonblack/cb-yara-connector/releases/download/2.0.0/yara_agent + wget -O /usr/share/cb/integrations/yara/yara_agent https://github.com/carbonblack/cb-yara-connector/releases/download/2.0.1/yara_agent ``` * Download Yara Logo ``` - wget -O /usr/share/cb/integrations/yara/yara-logo.png https://github.com/carbonblack/cb-yara-connector/releases/download/2.0.0/yara-logo.png + wget -O /usr/share/cb/integrations/yara/yara-logo.png https://github.com/carbonblack/cb-yara-connector/releases/download/2.0.1/yara-logo.png ``` * Create Yara Agent Config File @@ -78,6 +78,12 @@ The Yara agent must be installed on the same system as Cb Response. # Remote Worker Installation (Centos/RHEL 7) +* Make sure openssl-devel is installed + + ``` + sudo yum install openssl-devel + ``` + * Install Git and GCC ``` From 084901c9e9aae6e21b989a1bf555452ad92a8c67 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Wed, 13 Mar 2019 13:15:25 -0500 Subject: [PATCH 020/257] added the ability to modify concurrent_hashes for additional throughput --- main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index 7218d16..5f757f9 100644 --- a/main.py +++ b/main.py @@ -215,8 +215,9 @@ def perform(yara_rule_dir): # Yara rules were updated, so lets scan # pass - except: + except Exception as e: logger.error("Unable to decode yara rule map hash from database") + logger.error(str(e)) num_binaries_queued += 1 md5_hashes.append(md5_hash) @@ -312,6 +313,9 @@ def verify_config(config_file, output_file): if 'niceness' in config['general']: os.nice(int(config['general']['niceness'])) + if 'concurrent_hashes' in config['general']: + globals.MAX_HASHES = int(config['general']['concurrent_hashes']) + return True def main(): From 45806d083237ad822681dab1d6045abab4e5e3f9 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Wed, 13 Mar 2019 15:03:24 -0500 Subject: [PATCH 021/257] updating readme for concurrent_hashes --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index f83482a..3f6963f 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,11 @@ The Yara agent must be installed on the same system as Cb Response. ; nice value used for this script ; niceness=1 + ; + ; Number of hashes to send to the workers concurrently. Defaults to 8. + ; Recommend setting to the number of workers on the remote system. + ; + concurrent_hashes=8 * copy and modify the above config to `/etc/cb/integrations/yara/yara_agent.conf` From 46ba2cee0757d6c3ebe6d12e021fe3ace95e7f2d Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Fri, 22 Mar 2019 10:41:32 -0500 Subject: [PATCH 022/257] added support to never rescan and prioritize newer binaries --- globals.py | 2 ++ main.py | 13 +++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/globals.py b/globals.py index 831ceb9..bc3f523 100644 --- a/globals.py +++ b/globals.py @@ -22,3 +22,5 @@ g_num_binaries_not_available = 0 g_num_binaries_analyzed = 0 + +g_disable_rescan = False diff --git a/main.py b/main.py index 5f757f9..90d73d4 100644 --- a/main.py +++ b/main.py @@ -179,7 +179,7 @@ def perform(yara_rule_dir): password=globals.g_postgres_password, port=globals.g_postgres_port) cur = conn.cursor() - cur.execute("SELECT md5hash FROM storefiles WHERE present_locally = TRUE") + cur.execute("SELECT md5hash FROM storefiles WHERE present_locally = TRUE ORDER BY timestamp DESC") except: logger.error("Failed to connect to Postgres database") logger.error(traceback.format_exc()) @@ -204,17 +204,15 @@ def perform(yara_rule_dir): try: bdr = BinaryDetonationResult.get(BinaryDetonationResult.md5 == md5_hash) scanned_hash_list = json.loads(bdr.misc) + if globals.g_disable_rescan and bdr.misc: + continue + if scanned_hash_list == globals.g_yara_rule_map_hash_list: num_binaries_skipped += 1 # # If it is the same then we don't need to scan again # continue - else: - # - # Yara rules were updated, so lets scan - # - pass except Exception as e: logger.error("Unable to decode yara rule map hash from database") logger.error(str(e)) @@ -316,6 +314,9 @@ def verify_config(config_file, output_file): if 'concurrent_hashes' in config['general']: globals.MAX_HASHES = int(config['general']['concurrent_hashes']) + if 'disable_rescan' in config['general']: + globals.g_disable_rescan = bool(config['general']['disable_rescan']) + return True def main(): From 23a5e73c88d0c283759d83cbf7ee3c2ab59ecd3c Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Fri, 22 Mar 2019 11:09:41 -0500 Subject: [PATCH 023/257] added support to pull binaries from the last N number of days --- globals.py | 2 ++ main.py | 17 ++++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/globals.py b/globals.py index bc3f523..02db14b 100644 --- a/globals.py +++ b/globals.py @@ -24,3 +24,5 @@ g_num_binaries_analyzed = 0 g_disable_rescan = False + +g_num_days_binaries = 365 diff --git a/main.py b/main.py index 90d73d4..ebb354f 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,7 @@ import humanfriendly import psycopg2 import json -from datetime import datetime +from datetime import datetime, timedelta from peewee import SqliteDatabase from tasks import analyze_binary, update_yara_rules_remote, generate_rule_map, app import globals @@ -57,10 +57,10 @@ def generate_feed_from_db(): feedinfo = CbFeedInfo(**feedinfo) feed = CbFeed(feedinfo, reports) - logger.debug("dumping feed...") + #logger.debug("dumping feed...") created_feed = feed.dump() - logger.debug("Writing out feed to disk") + #logger.debug("Writing out feed to disk") with open(globals.output_file, 'w') as fp: fp.write(created_feed) @@ -179,7 +179,11 @@ def perform(yara_rule_dir): password=globals.g_postgres_password, port=globals.g_postgres_port) cur = conn.cursor() - cur.execute("SELECT md5hash FROM storefiles WHERE present_locally = TRUE ORDER BY timestamp DESC") + + start_date_binaries = datetime.now() - timedelta(days=globals.g_num_days_binaries) + cur.execute("SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " + "ORDER BY timestamp DESC".format(start_date_binaries)) + except: logger.error("Failed to connect to Postgres database") logger.error(traceback.format_exc()) @@ -317,8 +321,12 @@ def verify_config(config_file, output_file): if 'disable_rescan' in config['general']: globals.g_disable_rescan = bool(config['general']['disable_rescan']) + if 'num_days_binaries' in config['general']: + globals.g_num_days_binaries = int(config['general']['num_days_binaries']) + return True + def main(): global logger @@ -383,4 +391,3 @@ def main(): if __name__ == "__main__": main() - From 470d48ef686766b580ef8279f59867cd0bfb3d93 Mon Sep 17 00:00:00 2001 From: Jason McFarland Date: Wed, 27 Mar 2019 12:05:55 -0500 Subject: [PATCH 024/257] named cursor for improved Postgres performance and updated README with new configuration file example --- README.md | 14 ++++++++++++++ main.py | 18 +++++++++++++----- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3f6963f..a1573d9 100644 --- a/README.md +++ b/README.md @@ -66,11 +66,25 @@ The Yara agent must be installed on the same system as Cb Response. ; nice value used for this script ; niceness=1 + ; ; Number of hashes to send to the workers concurrently. Defaults to 8. ; Recommend setting to the number of workers on the remote system. ; concurrent_hashes=8 + + ; + ; If you don't want binaries to be rescanned more than once, regardless of the rules used, set this to True + ; Default: False + ; + disable_rescan=False + + ; + ; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with + ; a timestamp within the last year + ; Default: 365 + ; + num_days_binaries=365 * copy and modify the above config to `/etc/cb/integrations/yara/yara_agent.conf` diff --git a/main.py b/main.py index ebb354f..3db9026 100644 --- a/main.py +++ b/main.py @@ -101,7 +101,7 @@ def analyze_binaries(md5_hashes, local): except: logger.error(traceback.format_exc()) time.sleep(5) - return + return None else: return results else: @@ -115,11 +115,11 @@ def analyze_binaries(md5_hashes, local): time_waited = 0 while not result.ready(): - if time_waited == 100: + if time_waited >= 100: break else: time.sleep(.1) - time_waited += 1 + time_waited += .1 except: logger.error(traceback.format_exc()) @@ -128,6 +128,8 @@ def analyze_binaries(md5_hashes, local): else: if result.successful(): return result.get(timeout=30) + else: + return None def save_results(analysis_results): @@ -178,7 +180,7 @@ def perform(yara_rule_dir): user=globals.g_postgres_username, password=globals.g_postgres_password, port=globals.g_postgres_port) - cur = conn.cursor() + cur = conn.cursor(name="yara_agent") start_date_binaries = datetime.now() - timedelta(days=globals.g_num_days_binaries) cur.execute("SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " @@ -250,7 +252,11 @@ def perform(yara_rule_dir): conn.close() analysis_results = analyze_binaries(md5_hashes, local=(not globals.g_remote)) - save_results(analysis_results) + if analysis_results: + for analysis_result in analysis_results: + if analysis_result.last_error_msg: + logger.error(analysis_result.last_error_msg) + save_results(analysis_results) md5_hashes = list() elapsed_time = time.time() - start_time @@ -320,9 +326,11 @@ def verify_config(config_file, output_file): if 'disable_rescan' in config['general']: globals.g_disable_rescan = bool(config['general']['disable_rescan']) + logger.debug("Disable Rescan: {}".format(globals.g_disable_rescan)) if 'num_days_binaries' in config['general']: globals.g_num_days_binaries = int(config['general']['num_days_binaries']) + logger.debug("Number of days for binaries: {}".format(globals.g_num_days_binaries)) return True From 95544c8752724fc2baa8722c2d0b8906a5872da0 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 9 Sep 2019 08:28:53 -0400 Subject: [PATCH 025/257] Fix rule persistence on remote worker --- tasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tasks.py b/tasks.py index 049ebca..cc9b527 100644 --- a/tasks.py +++ b/tasks.py @@ -12,6 +12,7 @@ import datetime import configparser import os +import shutil import hashlib from analysis_result import AnalysisResult from cbapi.response.models import Binary @@ -108,6 +109,7 @@ def generate_yara_rule_map_hash(yara_rule_path): @app.task def update_yara_rules_remote(yara_rules): try: + shutil.rmtree(globals.g_yara_rules_dir) for key in yara_rules: open(os.path.join(globals.g_yara_rules_dir, key), 'wb').write(yara_rules[key]) except: From f0b83c823988eece3bbf1222576b956946e45c9c Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 18 Sep 2019 13:01:34 -0400 Subject: [PATCH 026/257] Allow control of feed_db location, fix timeout values --- globals.py | 2 ++ main.py | 12 ++++++++++-- tasks.py | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/globals.py b/globals.py index 02db14b..8238976 100644 --- a/globals.py +++ b/globals.py @@ -8,6 +8,8 @@ g_yara_rules_dir = 'yara_rules' output_file = 'yara_feed.json' +g_database_path = "./" + g_remote = False g_yara_rule_map = {} g_yara_rule_map_hash_list = list() diff --git a/main.py b/main.py index 3db9026..bfa8007 100644 --- a/main.py +++ b/main.py @@ -265,6 +265,7 @@ def perform(yara_rule_dir): logger.debug("number binaries already scanned: {0}".format(num_binaries_skipped)) logger.debug("number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) logger.info("total binaries from db: {0}".format(num_total_binaries)) + logger.info(f"number of binaries queued to be scanned: {num_binaries_queued}") logger.debug("binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) logger.info("num binaries score greater than zero: {0}".format( len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) @@ -300,11 +301,17 @@ def verify_config(config_file, output_file): if 'yara_rules_dir' in config['general']: globals.g_yara_rules_dir = config['general']['yara_rules_dir'] + if 'feed_database_path' in config['general']: + globals.g_database_path = config['general']['feed_database_path'] + if 'postgres_host' in config['general']: globals.g_postgres_host = config['general']['postgres_host'] + if 'postgres_port' in config['general']: + globals.g_postgres_port = config['general']['postgres_port'] + if 'postgres_username' in config['general']: - globals.g_postgres_username = config['general']['postgres_username'] + globals.g_postgres_username = config['general']['postgres_username'] if 'postgres_password' in config['general']: globals.g_postgres_password = config['general']['postgres_password'] @@ -387,7 +394,8 @@ def main(): try: globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database = SqliteDatabase('binary.db') + database_path = os.path.join(globals.g_database_path,'binary.db') + database = SqliteDatabase(database_path) db.initialize(database) db.connect() db.create_tables([BinaryDetonationResult]) diff --git a/tasks.py b/tasks.py index cc9b527..040dc78 100644 --- a/tasks.py +++ b/tasks.py @@ -149,7 +149,7 @@ def analyze_binary(md5sum): # # yara timed out # - analysis_result.last_error_msg = "Analysis timed out after 60 seconds" + analysis_result.last_error_msg = "Analysis timed out after 30 seconds" analysis_result.stop_future_scans = True except yara.Error: # From da5af50de79413cff50a679bd4088b27978a2924 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 18 Sep 2019 14:29:14 -0400 Subject: [PATCH 027/257] Making the icon optional, truly --- feed.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/feed.py b/feed.py index 7a171f2..d1c42c7 100644 --- a/feed.py +++ b/feed.py @@ -140,7 +140,8 @@ def __init__(self, **kwargs): try: self.data[icon_field] = base64.b64encode(open(icon_path, "rb").read()).decode('utf-8') except Exception as err: - raise CbIconError(f"Unknown error reading/encoding icon data: {err}") + pass + #raise CbIconError(f"Unknown error reading/encoding icon data: {err}") def dump(self): ''' From ede63f79c42a0844abd9209a6966d7b0c287eaa6 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 19 Sep 2019 14:12:38 -0400 Subject: [PATCH 028/257] feed decode error supression --- feed.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/feed.py b/feed.py index d1c42c7..89ed0a7 100644 --- a/feed.py +++ b/feed.py @@ -168,11 +168,13 @@ def validate(self, pedantic=False): try: base64.b64decode(self.data[icon_field]) except TypeError as err: - raise CbIconError(f"Icon must either be path or base64 data. \ - Path does not exist and base64 decode failed with: {err}") + #raise CbIconError(f"Icon must either be path or base64 data. \ + # Path does not exist and base64 decode failed with: {err}") except KeyError as err: # we don't want to cause a ruckus if the icon is missing pass + except: #supress all icon related errors, cbr will accept no/bad icon inormation + pass # all fields in feedinfo must be strings for key in self.data.keys(): From bbd64c205bc2a527d638e1ebe05e32e6ca06478d Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 19 Sep 2019 18:32:34 -0400 Subject: [PATCH 029/257] Revert "feed decode error supression" This reverts commit ede63f79c42a0844abd9209a6966d7b0c287eaa6. --- feed.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/feed.py b/feed.py index 89ed0a7..d1c42c7 100644 --- a/feed.py +++ b/feed.py @@ -168,13 +168,11 @@ def validate(self, pedantic=False): try: base64.b64decode(self.data[icon_field]) except TypeError as err: - #raise CbIconError(f"Icon must either be path or base64 data. \ - # Path does not exist and base64 decode failed with: {err}") + raise CbIconError(f"Icon must either be path or base64 data. \ + Path does not exist and base64 decode failed with: {err}") except KeyError as err: # we don't want to cause a ruckus if the icon is missing pass - except: #supress all icon related errors, cbr will accept no/bad icon inormation - pass # all fields in feedinfo must be strings for key in self.data.keys(): From af937e0489fba68efe3336a6ed92bfd9c3ba21e5 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 19 Sep 2019 18:32:41 -0400 Subject: [PATCH 030/257] Revert "Making the icon optional, truly" This reverts commit da5af50de79413cff50a679bd4088b27978a2924. --- feed.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/feed.py b/feed.py index d1c42c7..7a171f2 100644 --- a/feed.py +++ b/feed.py @@ -140,8 +140,7 @@ def __init__(self, **kwargs): try: self.data[icon_field] = base64.b64encode(open(icon_path, "rb").read()).decode('utf-8') except Exception as err: - pass - #raise CbIconError(f"Unknown error reading/encoding icon data: {err}") + raise CbIconError(f"Unknown error reading/encoding icon data: {err}") def dump(self): ''' From aae8e7598c134eed82770261b95bb64513d1956e Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 19 Sep 2019 18:32:43 -0400 Subject: [PATCH 031/257] Revert "Allow control of feed_db location, fix timeout values" This reverts commit f0b83c823988eece3bbf1222576b956946e45c9c. --- globals.py | 2 -- main.py | 12 ++---------- tasks.py | 2 +- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/globals.py b/globals.py index 8238976..02db14b 100644 --- a/globals.py +++ b/globals.py @@ -8,8 +8,6 @@ g_yara_rules_dir = 'yara_rules' output_file = 'yara_feed.json' -g_database_path = "./" - g_remote = False g_yara_rule_map = {} g_yara_rule_map_hash_list = list() diff --git a/main.py b/main.py index bfa8007..3db9026 100644 --- a/main.py +++ b/main.py @@ -265,7 +265,6 @@ def perform(yara_rule_dir): logger.debug("number binaries already scanned: {0}".format(num_binaries_skipped)) logger.debug("number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) logger.info("total binaries from db: {0}".format(num_total_binaries)) - logger.info(f"number of binaries queued to be scanned: {num_binaries_queued}") logger.debug("binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) logger.info("num binaries score greater than zero: {0}".format( len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) @@ -301,17 +300,11 @@ def verify_config(config_file, output_file): if 'yara_rules_dir' in config['general']: globals.g_yara_rules_dir = config['general']['yara_rules_dir'] - if 'feed_database_path' in config['general']: - globals.g_database_path = config['general']['feed_database_path'] - if 'postgres_host' in config['general']: globals.g_postgres_host = config['general']['postgres_host'] - if 'postgres_port' in config['general']: - globals.g_postgres_port = config['general']['postgres_port'] - if 'postgres_username' in config['general']: - globals.g_postgres_username = config['general']['postgres_username'] + globals.g_postgres_username = config['general']['postgres_username'] if 'postgres_password' in config['general']: globals.g_postgres_password = config['general']['postgres_password'] @@ -394,8 +387,7 @@ def main(): try: globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database_path = os.path.join(globals.g_database_path,'binary.db') - database = SqliteDatabase(database_path) + database = SqliteDatabase('binary.db') db.initialize(database) db.connect() db.create_tables([BinaryDetonationResult]) diff --git a/tasks.py b/tasks.py index 040dc78..cc9b527 100644 --- a/tasks.py +++ b/tasks.py @@ -149,7 +149,7 @@ def analyze_binary(md5sum): # # yara timed out # - analysis_result.last_error_msg = "Analysis timed out after 30 seconds" + analysis_result.last_error_msg = "Analysis timed out after 60 seconds" analysis_result.stop_future_scans = True except yara.Error: # From b2752553b4b6776c620c6e345deaeb1d0af2e2a0 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 19 Sep 2019 18:32:46 -0400 Subject: [PATCH 032/257] Revert "Fix rule persistence on remote worker" This reverts commit 95544c8752724fc2baa8722c2d0b8906a5872da0. --- tasks.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tasks.py b/tasks.py index cc9b527..049ebca 100644 --- a/tasks.py +++ b/tasks.py @@ -12,7 +12,6 @@ import datetime import configparser import os -import shutil import hashlib from analysis_result import AnalysisResult from cbapi.response.models import Binary @@ -109,7 +108,6 @@ def generate_yara_rule_map_hash(yara_rule_path): @app.task def update_yara_rules_remote(yara_rules): try: - shutil.rmtree(globals.g_yara_rules_dir) for key in yara_rules: open(os.path.join(globals.g_yara_rules_dir, key), 'wb').write(yara_rules[key]) except: From 648573c083323f42692eb814f7c8a8fcf3e23f29 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 23 Sep 2019 11:32:26 -0400 Subject: [PATCH 033/257] Adding more robust feed icon error prevention --- feed.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/feed.py b/feed.py index 89ed0a7..df33f30 100644 --- a/feed.py +++ b/feed.py @@ -140,6 +140,7 @@ def __init__(self, **kwargs): try: self.data[icon_field] = base64.b64encode(open(icon_path, "rb").read()).decode('utf-8') except Exception as err: + del self.data[icon_field] pass #raise CbIconError(f"Unknown error reading/encoding icon data: {err}") @@ -170,6 +171,7 @@ def validate(self, pedantic=False): except TypeError as err: #raise CbIconError(f"Icon must either be path or base64 data. \ # Path does not exist and base64 decode failed with: {err}") + pass except KeyError as err: # we don't want to cause a ruckus if the icon is missing pass From 8fc003fcdd18480d1c2bc41c2f8fc6c2da69c35f Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 24 Sep 2019 12:15:49 -0400 Subject: [PATCH 034/257] Removing f-style format strings for python34 support --- feed.py | 2 +- main.py | 6 +++++- tasks.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/feed.py b/feed.py index df33f30..9541bfa 100644 --- a/feed.py +++ b/feed.py @@ -140,7 +140,7 @@ def __init__(self, **kwargs): try: self.data[icon_field] = base64.b64encode(open(icon_path, "rb").read()).decode('utf-8') except Exception as err: - del self.data[icon_field] + #del self.data[icon_field] pass #raise CbIconError(f"Unknown error reading/encoding icon data: {err}") diff --git a/main.py b/main.py index bfa8007..e1cdc6a 100644 --- a/main.py +++ b/main.py @@ -93,6 +93,7 @@ def generate_rule_map_remote(yara_rule_path): def analyze_binaries(md5_hashes, local): + #logger.debug(f"md5hashes = {len(md5_hashes)}") if local: try: results = list() @@ -201,6 +202,7 @@ def perform(yara_rule_dir): for row in rows: num_total_binaries += 1 md5_hash = row[0].hex() + #logger.debug(md5_hash) # # Check if query returns any rows @@ -230,6 +232,7 @@ def perform(yara_rule_dir): analysis_results = analyze_binaries(md5_hashes, local=(not globals.g_remote)) if analysis_results: for analysis_result in analysis_results: + #logger.debug(f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available} {analysis_result.long_result} {analysis_result.last_error_msg}") if analysis_result.last_error_msg: logger.error(analysis_result.last_error_msg) save_results(analysis_results) @@ -254,6 +257,7 @@ def perform(yara_rule_dir): analysis_results = analyze_binaries(md5_hashes, local=(not globals.g_remote)) if analysis_results: for analysis_result in analysis_results: + #logger.debug(f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available} {analysis_result.long_result} {analysis_result.last_error_msg}") if analysis_result.last_error_msg: logger.error(analysis_result.last_error_msg) save_results(analysis_results) @@ -265,7 +269,7 @@ def perform(yara_rule_dir): logger.debug("number binaries already scanned: {0}".format(num_binaries_skipped)) logger.debug("number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) logger.info("total binaries from db: {0}".format(num_total_binaries)) - logger.info(f"number of binaries queued to be scanned: {num_binaries_queued}") + #logger.info(f"number of binaries queued to be scanned: {num_binaries_queued}") logger.debug("binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) logger.info("num binaries score greater than zero: {0}".format( len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) diff --git a/tasks.py b/tasks.py index 040dc78..84a15de 100644 --- a/tasks.py +++ b/tasks.py @@ -130,7 +130,7 @@ def analyze_binary(md5sum): ssl_verify=False, timeout=5) - binary_query = cb.select(Binary).where(f"md5:{md5sum}") + binary_query = cb.select(Binary).where("md5:{0}".format(md5sum)) if binary_query: try: From 5dbc46f228cadc61653bd4c7ac30b001039e4914 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 10 Oct 2019 15:42:53 -0400 Subject: [PATCH 035/257] Fixing spec file for distutils on centos6 --- main.spec | 3 +++ 1 file changed, 3 insertions(+) diff --git a/main.spec b/main.spec index 24eef6d..65e94a9 100644 --- a/main.spec +++ b/main.spec @@ -1,4 +1,7 @@ # -*- mode: python -*- +import distutils +if distutils.distutils_path.endswith('__init__.py'): + distutils.distutils_path = os.path.dirname(distutils.distutils_path) block_cipher = None From ecdcf80096cd66c01f0acf1510438f8c3a0fa41f Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Thu, 17 Oct 2019 16:38:01 -0400 Subject: [PATCH 036/257] Saved work --- .travis.yml | 2 +- binary_database.py | 3 +- feed.py | 599 +++++++++++++++++++++--------------- globals.py | 6 + main.py | 209 +++++++------ requirements.txt | 2 +- singleton.py | 136 +++----- src/README.md | 1 + src/__init__.py | 0 tasks.py | 132 +++++--- test/__init__.py | 0 test/test_cbFeed.py | 100 ++++++ test/test_cbFeedInfo.py | 116 +++++++ test/test_cbReport.py | 307 ++++++++++++++++++ test/test_singleInstance.py | 82 +++++ 15 files changed, 1216 insertions(+), 479 deletions(-) create mode 100644 src/README.md create mode 100644 src/__init__.py create mode 100644 test/__init__.py create mode 100644 test/test_cbFeed.py create mode 100644 test/test_cbFeedInfo.py create mode 100644 test/test_cbReport.py create mode 100644 test/test_singleInstance.py diff --git a/.travis.yml b/.travis.yml index 6711857..71fc9ec 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: python python: - - "2.7" + - "3.7" # command to install dependencies install: - "pip install nose" diff --git a/binary_database.py b/binary_database.py index 2cfa896..2ea8a17 100644 --- a/binary_database.py +++ b/binary_database.py @@ -1,7 +1,6 @@ import logging from peewee import * -from playhouse.sqliteq import SqliteQueueDatabase logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) @@ -12,6 +11,7 @@ db = Proxy() +# noinspection PyUnusedName class BinaryDetonationResult(Model): md5 = CharField(index=True, unique=True) last_scan_date = DateTimeField(null=True) @@ -50,5 +50,6 @@ class BinaryDetonationResult(Model): # misc = CharField(default='') + # noinspection PyUnusedClass,PyUnusedName class Meta: database = db diff --git a/feed.py b/feed.py index 7a171f2..c66e67e 100644 --- a/feed.py +++ b/feed.py @@ -1,11 +1,22 @@ import base64 +import binascii import json +import logging import os import re +import socket +import string import time -import logging +from typing import List + +# noinspection PyUnusedName +logger = logging.getLogger(__name__) +################################################################################ +# Exception Classes +################################################################################ + class CbException(Exception): pass @@ -22,203 +33,165 @@ class CbInvalidReport(CbException): pass -logger = logging.getLogger(__name__) - +################################################################################ +# Working Code Classes +################################################################################ class CbJSONEncoder(json.JSONEncoder): - def default(self, o): - return o.dump() - - -class CbFeed(object): - def __init__(self, feedinfo, reports): - self.data = {'feedinfo': feedinfo, - 'reports': reports} - - def dump(self, validate=True): - ''' - dumps the feed data - :param validate: is set, validates feed before dumping - :return: json string of feed data - ''' - if validate: - self.validate() - - return json.dumps(self.data, cls=CbJSONEncoder, indent=2) - - def dumpjson(self, validate=True): - ''' - dumps the feed data - :param validate: is set, validates feed before dumping - :return: json string of feed data - ''' - if validate: - self.validate() - - return json.loads(self.dump(validate)) - - def __repr__(self): - return repr(self.data) - - def __str__(self): - return "CbFeed(%s)" % (self.data.get('feedinfo', "unknown")) - - def iter_iocs(self): - ''' - yields all iocs in the feed - ''' - - data = json.loads(self.dump(validate=False)) - for report in data["reports"]: - for md5 in report.get("iocs", {}).get("md5", []): - yield {"type": "md5", "ioc": md5, "report_id": report.get("id", "")} - for ip in report.get("iocs", {}).get("ipv4", []): - yield {"type": "ipv4", "ioc": ip, "report_id": report.get("id", "")} - for domain in report.get("iocs", {}).get("dns", []): - yield {"type": "dns", "ioc": domain, "report_id": report.get("id", "")} - - def validate_report_list(self, reports): - ''' - validates reports as a set, as compared to each report as a standalone entity - :param reports: list of reports - ''' - - reportids = set() + def default(self, obj): + return obj.data - # verify that no two reports have the same feed id - # see CBAPI-17 - for report in reports: - if report['id'] in reportids: - raise CbInvalidFeed("duplicate report id '%s'" % report['id']) - reportids.add(report['id']) - - def validate(self, pedantic=False, serialized_data=None): - ''' - validates the feed - :param pedantic: when set, perform strict validation - :param serialized_data: serialized data for the feed - ''' - if not serialized_data: - # this should be identity, but just to be safe. - serialized_data = self.dump(validate=False) - - data = json.loads(serialized_data) - - if not "feedinfo" in data: - raise CbInvalidFeed("Feed missing 'feedinfo' data") - - if not 'reports' in data: - raise CbInvalidFeed("Feed missing 'reports' structure") - - # validate the feed info - fi = CbFeedInfo(**data["feedinfo"]) - fi.validate(pedantic=pedantic) - - # validate each report individually - for rep in data["reports"]: - report = CbReport(**rep) - report.validate(pedantic=pedantic) - # validate the reports as a whole - self.validate_report_list(data["reports"]) +class CbFeedInfo(object): + """ + Contains data relating feed information. + """ + def __init__(self, strict_validation: bool = False, **kwargs): + """ + Initizlize the feed info object. + :param strict_validation: If True, validate data on every reference (default False) + :param kwargs: + """ + self._strict = strict_validation -class CbFeedInfo(object): - def __init__(self, **kwargs): # these fields are required in every feed descriptor - self.required = ["name", "display_name", - "summary", "tech_data", "provider_url"] - self.optional = ["category", "icon", "version", "icon_small"] + self.required = { + "display_name": str, + "provider_url": str, + "name": str, + "summary": str, + "tech_data": str + } + + # these fields are optional + self.optional = { + "category": str, + "icon": str, + "icon_small": str, + "version": int + } + + # these string fields cannot be empty string self.noemptystrings = ["name", "display_name", "summary", "tech_data", "category"] - self.data = kwargs - # if they are present, set the icon fields of the data to hold - # the base64 encoded file data from their path + self._data = kwargs + + # if they are present, the icon and icon_small parameters represent either actual base64 encoded data + # or a path to a local file containing the icon data, which must be read and encoded for icon_field in ["icon", "icon_small"]: - if icon_field in self.data and os.path.exists(self.data[icon_field]): - icon_path = self.data.pop(icon_field) + if icon_field in self._data: try: - self.data[icon_field] = base64.b64encode(open(icon_path, "rb").read()).decode('utf-8') - except Exception as err: - raise CbIconError(f"Unknown error reading/encoding icon data: {err}") + base64.b64decode(self._data[icon_field]) + continue # yes, is actual base64 encoded data + except (binascii.Error, TypeError): + pass # No, must be a path; try processing as such + + if os.path.exists(self._data[icon_field]): + icon_path = self._data.pop(icon_field) + try: + with open(icon_path, "rb") as fp: + self._data[icon_field] = base64.b64encode(fp.read()).decode('utf-8') + except Exception as err: + raise CbIconError(f"Unknown error reading/encoding icon data: {err}") + else: + raise CbIconError("No such icon file at '{0}'".format(self._data[icon_field])) + + if self._strict: + self.validate() + + def __str__(self): + return "CbFeed(%s)" % (self._data.get("name", "unnamed")) + + def __repr__(self): + return repr(self._data) - def dump(self): - ''' - validates, then dumps the feed info data - :return: the feed info data - ''' - self.validate() - return self.data + # -------------------------------------------------------------------------------- - def validate(self, pedantic=False): - """ a set of checks to validate data before we export the feed""" + @property + def data(self) -> dict: + if self._strict: + self.validate() + return self._data - if not all([x in self.data.keys() for x in self.required]): - missing_fields = ", ".join(set(self.required).difference(set(self.data.keys()))) - raise CbInvalidFeed("FeedInfo missing required field(s): %s" % missing_fields) + def validate(self) -> None: + """ + A set of checks to validate the internal data. + :raises CbInvalidFeed: + :raises CbIconError: + """ + if not all([x in self._data.keys() for x in self.required.keys()]): + missing_fields = ", ".join(set(self.required).difference(set(self._data.keys()))) + raise CbInvalidFeed(f"FeedInfo missing required field(s): {missing_fields}") # verify no non-supported keys are present - for key in self.data.keys(): + for key in self._data.keys(): if key not in self.required and key not in self.optional: - raise CbInvalidFeed("FeedInfo includes extraneous key '%s'" % key) + raise CbInvalidFeed(f"FeedInfo includes extraneous key '{key}'") # check to see if icon_field can be base64 decoded for icon_field in ["icon", "icon_small"]: try: - base64.b64decode(self.data[icon_field]) + base64.b64decode(self._data[icon_field]) + except binascii.Error as err: + raise CbIconError(f"Icon must be base64 data; decode failed with: {err}") except TypeError as err: - raise CbIconError(f"Icon must either be path or base64 data. \ - Path does not exist and base64 decode failed with: {err}") - except KeyError as err: + raise CbIconError(f"Icon must be base64 data; decode failed with: {err}") + except KeyError: # we don't want to cause a ruckus if the icon is missing pass - # all fields in feedinfo must be strings - for key in self.data.keys(): - if not isinstance(self.data[key], str): - raise CbInvalidFeed("FeedInfo field %s must be of type %s, the field \ - %s is of type %s " % (key, "unicode", key, type(self.data[key]))) + # all fields in feedinfo must be the correct type + for key in self._data.keys(): + needed = self.required.get(key, self.optional.get(key, None)) + if not isinstance(self._data[key], needed): + raise CbInvalidFeed( + "FeedInfo field '{0}' must be of type '{1}'; we see type '{2}'".format(key, self.required[key], + type(self._data[key]))) # certain fields, when present, must not be empty strings - for key in self.data.keys(): - if key in self.noemptystrings and self.data[key] == "": - raise CbInvalidFeed("The '%s' field must not be an empty string" % key) + for key in self._data.keys(): + if key in self.noemptystrings and self._data[key] == "": + raise CbInvalidFeed(f"The '{key}' field must not be an empty string") # validate shortname of this field is just a-z and 0-9, with at least one character - if not self.data["name"].isalnum(): + if not self._data["name"].isalnum(): raise CbInvalidFeed( - "Feed name %s may only contain a-z, A-Z, 0-9 and must have one character" % self.data["name"]) - - return True - - def __str__(self): - return "CbFeed(%s)" % (self.data.get("name", "unnamed")) - - def __repr__(self): - return repr(self.data) + "Feed name '{0}' may only contain a-z, A-Z, 0-9 and must have one character".format(self._data["name"])) class CbReport(object): - def __init__(self, allow_negative_scores=False, **kwargs): + def __init__(self, strict_validation: bool = False, allow_negative_scores: bool = False, **kwargs): + """ + Contains data relating information for a single report. + + :param strict_validation: If True, validate data on every reference (default False) + :param allow_negative_scores: If True, allow negative score values (default False) + :param kwargs: + """ + self._strict = strict_validation - # negative scores introduced in CB 4.2 - # negative scores indicate a measure of "goodness" versus "badness" + # negative scores introduced in CB 4.2; a measure of "goodness" versus "badness" self.allow_negative_scores = allow_negative_scores # these fields are required in every report - self.required = ["iocs", "timestamp", "link", "title", "id", "score"] - - # these fields must be of type string - self.typestring = ["link", "title", "id", "description"] - - # these fields must be of type int - self.typeint = ["timestamp", "score"] + self.required = { + "id": str, + "iocs": dict, + "link": str, + "score": int, + "timestamp": int, + "title": str + } # these fields are optional - self.optional = ["tags", "description"] + self.optional = { + "description": str, + "tags": list + } - # valid IOC types are "md5", "ipv4", "dns", "query" - self.valid_ioc_types = ["md5", "ipv4", "dns", "query"] + # valid IOC types are "sha256", "md5", "ipv4", "dns", "query" + self.valid_ioc_types = ["sha256", "md5", "ipv4", "dns", "query"] # valid index_type options for "query" IOC self.valid_query_ioc_types = ["events", "modules"] @@ -226,185 +199,315 @@ def __init__(self, allow_negative_scores=False, **kwargs): if "timestamp" not in kwargs: kwargs["timestamp"] = int(time.mktime(time.gmtime())) - self.data = kwargs + self._data = kwargs + + if self._strict: + self.validate() + + def __str__(self): + return "CbReport(%s)" % (self._data.get("title", self._data.get("id", ''))) + + def __repr__(self): + return repr(self._data) + + # -------------------------------------------------------------------------------- - def dump(self): - self.validate() - return self.data + @property + def data(self) -> dict: + if self._strict: + self.validate() + return self._data - def is_valid_query(self, q, reportid): + @staticmethod + def is_valid_query(query: str, reportid: str): """ - make a determination as to if this is a valid query + Make a determination as to if this is a valid query. + :param query: An ioc query + :param reportid: the report id + :raises CbInvalidReport: """ - # the query itself must be percent-encoded - # verify there are only non-reserved characters present - # no logic to detect unescaped '%' characters - for c in q: + # the query itself must be percent-encoded; verify there are only non-reserved characters present + # -- no logic to detect unescaped '%' characters + for c in query: if c not in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.~%*()": - raise CbInvalidReport( - "Unescaped non-reserved character '%s' found in query for report %s; use percent-encoding" % ( - c, reportid)) + raise CbInvalidReport((f"Unescaped non-reserved character '{c}' ", + f"found in query for report {reportid}; use percent-encoding")) - def validate(self, pedantic=False): - """ a set of checks to validate the report""" + def validate(self, pedantic: bool = False) -> None: + """ + A set of checks to validate the report. + :param pedantic: If True, limit to required fields (default False) + :raises CbInvalidReport: + """ # validate we have all required keys - global ip - if not all([x in self.data.keys() for x in self.required]): - missing_fields = ", ".join(set(self.required).difference(set(self.data.keys()))) - raise CbInvalidReport("Report missing required field(s): %s" % missing_fields) + if not all([x in self._data.keys() for x in self.required.keys()]): + missing_fields = ", ".join(set(self.required).difference(set(self._data.keys()))) + raise CbInvalidReport(f"Report missing required field(s): {missing_fields}") + + # if we get here, create convenience variable for later use + rid = self._data['id'] # validate that no extra keys are present - for key in self.data.keys(): + for key in self._data.keys(): if key not in self.required and key not in self.optional: - raise CbInvalidReport("Report contains extra key '%s'" % key) - - # (pedantically) validate only required keys are present - if pedantic and len(self.data.keys()) > len(self.required): - raise CbInvalidReport("Report contains extra keys: %s" % - (set(self.data.keys()) - set(self.required))) + raise CbInvalidReport(f"Report contains extra key '{key}'") + if pedantic and key not in self.required: + raise CbInvalidReport(f"Report contains non-required key '{key}'") # CBAPI-36 - # verify that all fields that should be strings are strings - for key in self.typestring: - if key in self.data.keys(): - if not isinstance(self.data[key], str): - raise CbInvalidReport("Report field '%s' must be a string" % key) - - # verify that all fields that should be ints are ints - for key in self.typeint: - if key in self.data.keys(): - if not isinstance(self.data[key], int): - raise CbInvalidReport("Report field '%s' must be an int" % key) + # all fields in feedinfo must be the correct type + for key in self._data.keys(): + needed = self.required.get(key, self.optional.get(key, None)) + if not isinstance(self._data[key], needed): + raise CbInvalidFeed( + "report field '{0}' must be of type '{1}'; we see type '{2}'".format(key, self.required[key], + type(self._data[key]))) # validate that tags is a list of alphanumeric strings - if "tags" in self.data.keys(): - if type(self.data["tags"]) != type([]): + if "tags" in self._data.keys(): + if not isinstance(self._data["tags"], list): raise CbInvalidReport("Tags must be a list") - for tag in self.data["tags"]: + for tag in self._data["tags"]: if not str(tag).isalnum(): - raise CbInvalidReport("Tag '%s' is not alphanumeric" % tag) + raise CbInvalidReport(f"Tag '{tag}' is not alphanumeric") if len(tag) > 32: raise CbInvalidReport("Tags must be 32 characters or fewer") - # validate score is integer between -100 (if so specified) or 0 and 100 + # validate score is integer between -100 (if so specified) or 0 and 100 try: - int(self.data["score"]) + int(self._data["score"]) except ValueError: raise CbInvalidReport( - "Report has non-integer score %s in report %s" % (self.data["score"], self.data["id"])) + "Report has non-integer score {0} in report '{1}'".format(self._data["score"], rid)) - if self.data["score"] < -100 or self.data["score"] > 100: + if self._data["score"] < -100 or self._data["score"] > 100: raise CbInvalidReport( - "Report score %s out of range -100 to 100 in report %s" % (self.data["score"], self.data["id"])) + "Report score {0} out of range -100 to 100 in report '{1}'".format(self._data["score"], rid)) - if not self.allow_negative_scores and self.data["score"] < 0: + if not self.allow_negative_scores and self._data["score"] < 0: raise CbInvalidReport( - "Report score %s out of range 0 to 100 in report %s" % (self.data["score"], self.data["id"])) + "Report score {0} out of range 0 to 100 in report '{1}'".format(self._data["score"], rid)) # validate id of this report is just a-z and 0-9 and - and ., with at least one character - if not re.match("^[a-zA-Z0-9-_.]+$", self.data["id"]): - raise CbInvalidReport( - "Report ID %s may only contain a-z, A-Z, 0-9, - and must have one character" % self.data["id"]) + if not re.match("^[a-zA-Z0-9-_.]+$", rid): + raise CbInvalidReport(f"Report ID '{rid}' may only contain a-z, A-Z, 0-9, - and must have one character") # validate there is at least one IOC for each report and each IOC entry has at least one entry - if not all([len(self.data["iocs"][ioc]) >= 1 for ioc in self.data['iocs']]): - raise CbInvalidReport("Report IOC list with zero length in report %s" % (self.data["id"])) + if not all([len(self._data["iocs"][ioc]) >= 1 for ioc in self._data['iocs']]): + raise CbInvalidReport(f"Report IOC list with zero length in report '{rid}'") # convenience variable - iocs = self.data['iocs'] + iocs = self._data['iocs'] # validate that there are at least one type of ioc present if len(iocs.keys()) == 0: - raise CbInvalidReport("Report with no IOCs in report %s" % (self.data["id"])) + raise CbInvalidReport(f"Report with no IOCs in report '{rid}'") - # (pedantically) validate that no extra keys are present + # (pedantically) validate that no extra iocs are present if pedantic and len(set(iocs.keys()) - set(self.valid_ioc_types)) > 0: raise CbInvalidReport( - "Report IOCs section contains extra keys: %s" % (set(iocs.keys()) - set(self.valid_ioc_types))) + "Report IOCs section contains extra keys: {0}".format(set(iocs.keys()) - set(self.valid_ioc_types))) # Let us check and make sure that for "query" ioc type does not contain other types of ioc query_ioc = "query" in iocs.keys() if query_ioc and len(iocs.keys()) > 1: raise CbInvalidReport( - "Report IOCs section for \"query\" contains extra keys: %s for report %s" % - (set(iocs.keys()), self.data["id"])) + "Report IOCs section for \"query\" contains extra keys: {0} for report '{1}'".format(set(iocs.keys()), + rid)) if query_ioc: iocs_query = iocs["query"][0] + if not isinstance(iocs_query, dict): + raise CbInvalidReport(f"Query IOC section not a dict structure") + # validate that the index_type field exists if "index_type" not in iocs_query.keys(): - raise CbInvalidReport("Query IOC section for report %s missing index_type" % self.data["id"]) + raise CbInvalidReport(f"Query IOC section for report '{rid}' missing index_type") # validate that the index_type is a valid value if not iocs_query.get("index_type", None) in self.valid_query_ioc_types: raise CbInvalidReport( - "Report IOCs section for \"query\" contains invalid index_type: %s for report %s" % - (iocs_query.get("index_type", None), self.data["id"])) + "Report IOCs section for 'query' contains invalid index_type: {0} for report '{1}".format( + iocs_query.get("index_type", None), rid)) # validate that the search_query field exists if "search_query" not in iocs_query.keys(): - raise CbInvalidReport("Query IOC for report %s missing 'search_query'" % self.data["id"]) + raise CbInvalidReport(f"Query IOC for report {rid} missing 'search_query'") # validate that the search_query field is at least minimally valid # in particular, we are looking for a "q=" or "cb.q." # this is by no means a complete validation, but it does provide a protection # against leaving the actual query unqualified if "q=" not in iocs_query["search_query"] and "cb.q." not in iocs_query["search_query"]: - raise CbInvalidReport("Query IOC for report %s missing q= on query" % self.data["id"]) + raise CbInvalidReport(f"Query IOC for report {rid} missing q= on query") for kvpair in iocs_query["search_query"].split('&'): if 2 != len(kvpair.split('=')): continue if kvpair.split('=')[0] == 'q': - self.is_valid_query(kvpair.split('=')[1], self.data["id"]) + self.is_valid_query(kvpair.split('=')[1], rid) + + hex_digits = "0123456789ABCDEF" - # validate all md5 fields are 32 characters, just alphanumeric, and - # do not include [g-z] and [G-Z] meet the alphanumeric criteria but are not valid in a md5 + # validate all md5 fields are 32 hex (0-F) characters for md5 in iocs.get("md5", []): if 32 != len(md5): - raise CbInvalidReport("Invalid md5 length for md5 (%s) for report %s" % (md5, self.data["id"])) - if not md5.isalnum(): - raise CbInvalidReport("Malformed md5 (%s) in IOC list for report %s" % (md5, self.data["id"])) - for c in "ghijklmnopqrstuvwxyz": - if c in md5 or c.upper() in md5: - raise CbInvalidReport("Malformed md5 (%s) in IOC list for report %s" % (md5, self.data["id"])) - - # validate all IPv4 fields pass socket.inet_ntoa() - import socket - + raise CbInvalidReport(f"Invalid md5 length for md5 ({md5}) for report '{rid}'") + for c in md5.upper(): + if c not in hex_digits: + raise CbInvalidReport(f"Malformed md5 ({md5}) in IOC list for report '{rid}'") + + # validate all sha256 fields are 64 hex (0-F) characters + for sha256 in iocs.get("sha256", []): + if 64 != len(sha256): + raise CbInvalidReport("Invalid sha256 length for md5 ({sha256) for report '{rid}'") + for c in sha256.upper(): + if c not in hex_digits: + raise CbInvalidReport(f"Malformed sha256 ({sha256}) in IOC list for report '{rid}'") + + # validate all IPv4 fields pass socket.inet_ntoa() try: [socket.inet_aton(ip) for ip in iocs.get("ipv4", [])] - except socket.error: - raise CbInvalidReport("Malformed IPv4 (%s) addr in IOC list for report %s" % (ip, self.data["id"])) + except socket.error as err: + raise CbInvalidReport(f"Malformed IPv4 addr in IOC list for report '{rid}': {err}") # validate all lowercased domains have just printable ascii - import string # 255 chars allowed in dns; all must be printables, sans control characters # hostnames can only be A-Z, 0-9 and - but labels can be any printable. See # O'Reilly's DNS and Bind Chapter 4 Section 5: # "Names that are not host names can consist of any printable ASCII character." - allowed_chars = string.printable[:-6] + allowed_chars = string.printable[:-6] # all but whitespace for domain in iocs.get("dns", []): if len(domain) > 255: raise CbInvalidReport( - "Excessively long domain name (%s) in IOC list for report %s" % (domain, self.data["id"])) + f"Excessively long domain name ({domain}) in IOC list for report '{rid}'") if not all([c in allowed_chars for c in domain]): raise CbInvalidReport( - "Malformed domain name (%s) in IOC list for report %s" % (domain, self.data["id"])) + f"Malformed domain name ({domain}) in IOC list for report '{rid}'") labels = domain.split('.') if 0 == len(labels): - raise CbInvalidReport("Empty domain name in IOC list for report %s" % (self.data["id"])) + raise CbInvalidReport(f"Empty domain name in IOC list for report '{rid}'") for label in labels: if len(label) < 1 or len(label) > 63: - raise CbInvalidReport("Invalid label length (%s) in domain name (%s) for report %s" % ( - label, domain, self.data["id"])) + raise CbInvalidReport( + f"Invalid label length ({label}) in domain name ({domain}) for report '{rid}'") - return True - def __str__(self): - return "CbReport(%s)" % (self.data.get("title", self.data.get("id", ''))) +class CbFeed(object): + def __init__(self, feedinfo: CbFeedInfo, reports: List[CbReport], strict_validation: bool = False): + """ + Contains data relating information for a single report. + + :param feedinfo: dict represnation of the feed information + :param reports: list of dict represenations for east report + :param strict_validation: If True, validate data on every reference (default False) + """ + self._strict = strict_validation + + self._data = {'feedinfo': feedinfo, 'reports': reports} + + if self._strict: + self.validate() def __repr__(self): - return repr(self.data) + return repr(self._data) + + def __str__(self): + return "CbFeed(%s)" % (self._data.get('feedinfo', "unknown")) + + # -------------------------------------------------------------------------------- + + @property + def data(self) -> dict: + if self._strict: + self.validate() + return self._data + + @staticmethod + def load(serialized_data: str, strict_validation: bool = False) -> 'CbFeed': + """ + Take in a feed descripotion as a JSON string and convert to a CbFeed object. + + :param serialized_data: source JSON string + :param strict_validation: If True, validate data on every reference (default False) + :return: + """ + raw_data = json.loads(serialized_data) + + if "feedinfo" not in raw_data: + raise CbInvalidFeed("Feed missing 'feedinfo' data") + + if 'reports' not in raw_data: + raise CbInvalidFeed("Feed missing 'reports' structure") + + fi = CbFeedInfo(**raw_data["feedinfo"]) + rpt = [CbReport(**rp) for rp in raw_data["reports"]] + + new_feed = CbFeed(fi, rpt, strict_validation=strict_validation) + new_feed.validate() + return new_feed + + def dump(self) -> str: + """ + Dumps the feed data as a JSON object. + + :return: json data object + """ + return json.dumps(self.data, cls=CbJSONEncoder, sort_keys=True, indent=2) + + def dumpjson(self) -> json: + """ + Dumps the feed data as a JSON object. + + :return: json data object + """ + return json.loads(self.dump()) + + def iter_iocs(self): + """ + Yields all iocs in the feed. + """ + + for report in self._data["reports"]: + for sha256 in report.data.get("iocs", {}).get("sha256", []): + yield {"type": "sha256", "ioc": sha256, "report_id": report.data.get("id", "")} + for md5 in report.data.get("iocs", {}).get("md5", []): + yield {"type": "md5", "ioc": md5, "report_id": report.data.get("id", "")} + for ip in report.data.get("iocs", {}).get("ipv4", []): + yield {"type": "ipv4", "ioc": ip, "report_id": report.data.get("id", "")} + for domain in report.data.get("iocs", {}).get("dns", []): + yield {"type": "dns", "ioc": domain, "report_id": report.data.get("id", "")} + + @staticmethod + def validate_report_list(reports: List[CbReport]) -> None: + """ + Validates reports as a set, as compared to each report as a standalone entity. + + :param reports: list of reports + :raises CbInvalidFeed: + """ + reportids = set() + + # verify that no two reports have the same report id + # see CBAPI-17 + for report in reports: + if report.data['id'] in reportids: + raise CbInvalidFeed("duplicate report id '{0}'".format(report.data['id'])) + reportids.add(report.data['id']) + + def validate(self, pedantic: bool = False) -> None: + """ + Validates the feed. + + :param pedantic: when set, perform strict validation on reports + """ + self._data['feedinfo'].validate() + + # validate each report individually + for rep in self._data['reports']: + rep.validate(pedantic=pedantic) + + # validate the reports as a whole + self.validate_report_list(self._data['reports']) diff --git a/globals.py b/globals.py index 02db14b..bcf2625 100644 --- a/globals.py +++ b/globals.py @@ -1,8 +1,14 @@ +################################################################################ +# This module contains global variables used by a single instance. +################################################################################ + +# noinspection PyUnusedName g_config = {} g_cb_server_url = 'https://127.0.0.1' g_cb_server_token = '' +# noinspection PyUnusedName broker_url = '' g_yara_rules_dir = 'yara_rules' diff --git a/main.py b/main.py index 3db9026..d7ae1e1 100644 --- a/main.py +++ b/main.py @@ -1,39 +1,46 @@ -import os -import traceback +import argparse +import configparser +import hashlib +import json import logging +import logging.handlers +import os import time -import threading +import traceback +from datetime import datetime, timedelta +from typing import List + import humanfriendly import psycopg2 -import json -from datetime import datetime, timedelta -from peewee import SqliteDatabase -from tasks import analyze_binary, update_yara_rules_remote, generate_rule_map, app -import globals -import argparse -import configparser -import hashlib +# noinspection PyPackageRequirements import yara - -from feed import CbFeed, CbFeedInfo, CbReport from celery import group -from binary_database import db, BinaryDetonationResult +from peewee import SqliteDatabase + +import globals import singleton +from binary_database import BinaryDetonationResult, db +from feed import CbFeed, CbFeedInfo, CbReport +from tasks import analyze_binary, app, generate_rule_map, update_yara_rules_remote logging_format = '%(asctime)s-%(name)s-%(lineno)d-%(levelname)s-%(message)s' logging.basicConfig(format=logging_format) -logger = logging.getLogger() +logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) celery_logger = logging.getLogger('celery.app.trace') celery_logger.setLevel(logging.ERROR) -def generate_feed_from_db(): +def generate_feed_from_db() -> None: + """ + Creates a feed based on specific database information. + :return: + """ query = BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0) - reports = list() + reports = [] for binary in query: fields = {'iocs': {'md5': [binary.md5]}, 'score': binary.score, @@ -43,7 +50,6 @@ def generate_feed_from_db(): 'title': binary.last_success_msg, 'description': binary.last_success_msg } - reports.append(CbReport(**fields)) feedinfo = {'name': 'yara', @@ -54,37 +60,50 @@ def generate_feed_from_db(): 'icon': 'yara-logo.png', 'category': "Connectors", } - feedinfo = CbFeedInfo(**feedinfo) feed = CbFeed(feedinfo, reports) - #logger.debug("dumping feed...") - created_feed = feed.dump() - #logger.debug("Writing out feed to disk") + logger.debug("Writing out feed '{0}' to disk".format(feedinfo.data['name'])) with open(globals.output_file, 'w') as fp: - fp.write(created_feed) + fp.write(feed.dump()) -def generate_yara_rule_map_hash(yara_rule_path): - md5 = hashlib.md5() +def generate_yara_rule_map_hash(yara_rule_path: str) -> None: + """ + Create a list of hashes for each yara rule. - temp_list = list() + :param yara_rule_path: the path to where the yara rules are stored. + :return: + """ + md5 = hashlib.md5() + temp_list = [] for fn in os.listdir(yara_rule_path): - with open(os.path.join(yara_rule_path, fn), 'rb') as fp: - data = fp.read() - md5.update(data) - temp_list.append(str(md5.hexdigest())) - + if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): + with open(os.path.join(yara_rule_path, fn), 'rb') as fp: + data = fp.read() + # TODO: Original logic did not have this, resulting in a cumulative hash for each file (linking them) + md5.new() + md5.update(data) + temp_list.append(str(md5.hexdigest())) + + # FUTURE: Would this be better served as a map keyed by md5, with the value being the rule text, as for the + # following method? globals.g_yara_rule_map_hash_list = temp_list globals.g_yara_rule_map_hash_list.sort() -def generate_rule_map_remote(yara_rule_path): - ret_dict = dict() +def generate_rule_map_remote(yara_rule_path) -> None: + """ + Get remote rules and store into an internal map keyed by file name. + :param yara_rule_path: path to wheer thr rules are stored + :return: + """ + ret_dict = {} for fn in os.listdir(yara_rule_path): if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): - ret_dict[fn] = open(os.path.join(yara_rule_path, fn), 'rb').read() + with open(os.path.join(yara_rule_path, fn), 'rb') as fp: + ret_dict[fn] = fp.read() result = update_yara_rules_remote.delay(ret_dict) globals.g_yara_rule_map = ret_dict @@ -92,14 +111,23 @@ def generate_rule_map_remote(yara_rule_path): time.sleep(.1) -def analyze_binaries(md5_hashes, local): +def analyze_binaries(md5_hashes: List[str], local: bool): + """ + Analyze binaries. + + TODO: determine return typing! + + :param md5_hashes: list of hashes to check. + :param local: True if local + :return: None if there is a problem; results otherwise + """ if local: try: - results = list() + results = [] for md5_hash in md5_hashes: results.append(analyze_binary(md5_hash)) - except: - logger.error(traceback.format_exc()) + except Exception as err: + logger.error("{0}".format(err)) time.sleep(5) return None else: @@ -113,18 +141,17 @@ def analyze_binaries(md5_hashes, local): result = job.apply_async() - time_waited = 0 + start = time.time() while not result.ready(): - if time_waited >= 100: + if time.time() - start >= 120: # 2 minute timeout break else: time.sleep(.1) - time_waited += .1 - - except: + except Exception as err: + logger.error("Error when analyzing: {0}".format(err)) logger.error(traceback.format_exc()) time.sleep(5) - return + return None else: if result.successful(): return result.get(timeout=30) @@ -132,7 +159,15 @@ def analyze_binaries(md5_hashes, local): return None -def save_results(analysis_results): +def save_results(analysis_results: List) -> None: + """ + Save the current analysis results. + + TODO: figure out typing! + + :param analysis_results: + :return: + """ for analysis_result in analysis_results: if analysis_result.binary_not_available: globals.g_num_binaries_not_available += 1 @@ -149,18 +184,14 @@ def save_results(analysis_results): bdr.misc = json.dumps(globals.g_yara_rule_map_hash_list) bdr.save() globals.g_num_binaries_analyzed += 1 - except: - logger.error("Error saving to database") + except Exception as err: + logger.error("Error saving to database: {0}".format(err)) logger.error(traceback.format_exc()) else: if analysis_result.score > 0: generate_feed_from_db() -def print_statistics(): - pass - - def perform(yara_rule_dir): if globals.g_remote: logger.info("Uploading yara rules to workers...") @@ -169,7 +200,7 @@ def perform(yara_rule_dir): num_total_binaries = 0 num_binaries_skipped = 0 num_binaries_queued = 0 - md5_hashes = list() + md5_hashes = [] start_time = time.time() @@ -183,16 +214,15 @@ def perform(yara_rule_dir): cur = conn.cursor(name="yara_agent") start_date_binaries = datetime.now() - timedelta(days=globals.g_num_days_binaries) + # noinspection SqlDialectInspection,SqlNoDataSourceInspection cur.execute("SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " "ORDER BY timestamp DESC".format(start_date_binaries)) - - except: - logger.error("Failed to connect to Postgres database") + except Exception as err: + logger.error("Failed to connect to Postgres database: {0}".format(err)) logger.error(traceback.format_exc()) return logger.info("Enumerating modulestore...") - while True: rows = cur.fetchmany() if len(rows) == 0: @@ -220,8 +250,7 @@ def perform(yara_rule_dir): # continue except Exception as e: - logger.error("Unable to decode yara rule map hash from database") - logger.error(str(e)) + logger.error("Unable to decode yara rule map hash from database: {0}".format(e)) num_binaries_queued += 1 md5_hashes.append(md5_hash) @@ -235,19 +264,11 @@ def perform(yara_rule_dir): save_results(analysis_results) else: pass - md5_hashes = list() + md5_hashes = [] + # throw us a bone every 1000 binaries processed if num_total_binaries % 1000 == 0: - elapsed_time = time.time() - start_time - logger.info("elapsed time: {0}".format(humanfriendly.format_timespan(elapsed_time))) - logger.debug("number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) - logger.debug("number binaries already scanned: {0}".format(num_binaries_skipped)) - logger.debug("number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) - logger.info("total binaries from db: {0}".format(num_total_binaries)) - logger.debug("binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) - logger.info("num binaries score greater than zero: {0}".format( - len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) - logger.info("") + _rule_logging(start_time, num_binaries_skipped, num_total_binaries) conn.close() @@ -257,23 +278,38 @@ def perform(yara_rule_dir): if analysis_result.last_error_msg: logger.error(analysis_result.last_error_msg) save_results(analysis_results) - md5_hashes = list() + _rule_logging(start_time, num_binaries_skipped, num_total_binaries) + generate_feed_from_db() + + +def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binaries: int) -> None: + """ + Simple method to log yara work. + :param start_time: start time for the work + :param num_binaries_skipped: + :param num_total_binaries: + :return: + """ elapsed_time = time.time() - start_time logger.info("elapsed time: {0}".format(humanfriendly.format_timespan(elapsed_time))) - logger.debug("number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) - logger.debug("number binaries already scanned: {0}".format(num_binaries_skipped)) - logger.debug("number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) + logger.debug(" number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) + logger.debug(" number binaries already scanned: {0}".format(num_binaries_skipped)) + logger.debug(" number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) logger.info("total binaries from db: {0}".format(num_total_binaries)) - logger.debug("binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) + logger.debug(" binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) logger.info("num binaries score greater than zero: {0}".format( len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) logger.info("") - generate_feed_from_db() - -def verify_config(config_file, output_file): +def verify_config(config_file: str, output_file: str) -> bool: + """ + Validate the config file. + :param config_file: + :param output_file: + :return: True if configuration file is good + """ config = configparser.ConfigParser() config.read(config_file) @@ -294,8 +330,9 @@ def verify_config(config_file, output_file): result_backend=config['general']['broker_url']) else: logger.error("invalid worker_type specified. Must be \'local\' or \'remote\'") + return False else: - logger.warn("Config file does not specify worker_type, assuming local") + logger.warning("Config file does not specify worker_type, assuming local") if 'yara_rules_dir' in config['general']: globals.g_yara_rules_dir = config['general']['yara_rules_dir'] @@ -339,9 +376,10 @@ def main(): global logger try: - me = singleton.SingleInstance() - except: - logger.error("Only one instance of this script is allowed to run at a time") + # check for single operation + singleton.SingleInstance() + except Exception as err: + logger.error(f"Only one instance of this script is allowed to run at a time: {err}") else: parser = argparse.ArgumentParser(description='Yara Agent for Yara Connector') parser.add_argument('--config-file', @@ -362,7 +400,6 @@ def main(): args = parser.parse_args() if args.debug: - logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) if args.log_file: @@ -372,14 +409,13 @@ def main(): logger.addHandler(handler) if verify_config(args.config_file, args.output_file): - if args.validate_yara_rules: logger.info("Validating yara rules in directory: {0}".format(globals.g_yara_rules_dir)) yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) try: yara.compile(filepaths=yara_rule_map) - except: - logger.error("There were errors compiling yara rules") + except Exception as err: + logger.error(f"There were errors compiling yara rules: {err}") logger.error(traceback.format_exc()) else: logger.info("All yara rules compiled successfully") @@ -393,7 +429,8 @@ def main(): db.create_tables([BinaryDetonationResult]) generate_feed_from_db() perform(globals.g_yara_rules_dir) - except: + except Exception as err: + logger.error(f"There were errors executing yara rules: {err}") logger.error(traceback.format_exc()) diff --git a/requirements.txt b/requirements.txt index 0bb99d4..95748b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,6 +25,7 @@ PyInstaller==3.4 python-dateutil==2.7.5 pytz==2018.9 PyYAML==3.13 +redis==3.0.1 requests==2.21.0 six==1.12.0 solrq==1.1.1 @@ -33,4 +34,3 @@ urllib3==1.24.1 vine==1.2.0 wcwidth==0.1.7 yara-python==3.8.1 -redis==3.0.1 diff --git a/singleton.py b/singleton.py index 96df95f..67a0c18 100644 --- a/singleton.py +++ b/singleton.py @@ -1,147 +1,99 @@ -#! /usr/bin/env python +#!/usr/bin/env python +import fcntl import logging -from multiprocessing import Process import os import sys import tempfile -import unittest + +logger = logging.getLogger(__name__) + + +################################################################################ +# Exception Classes +################################################################################ class SingleInstanceException(BaseException): pass +################################################################################ +# Primary Classes +################################################################################ + + class SingleInstance(object): """Class that can be instantiated only once per machine. - If you want to prevent your script from running in parallel just instantiate SingleInstance() class. If is there another instance already running it will throw a `SingleInstanceException`. + If you want to prevent your script from running in parallel just instantiate SingleInstance() class. If is there + another instance already running it will throw a `SingleInstanceException`. - >>> import tendo - ... me = SingleInstance() + >>> import singleton + ... singleton.SingleInstance() This option is very useful if you have scripts executed by crontab at small amounts of time. Remember that this works by creating a lock file with a filename based on the full path to the script file. - Providing a flavor_id will augment the filename with the provided flavor_id, allowing you to create multiple singleton instances from the same file. This is particularly useful if you want specific functions to have their own singleton instances. + Providing a flavor_id will augment the filename with the provided flavor_id, allowing you to create multiple + singleton instances from the same file. This is particularly useful if you want specific functions to have their + own singleton instances. """ - def __init__(self, flavor_id="", lockfile=""): - import sys + def __init__(self, flavor_id: str = None, lockfile: str = None): self.initialized = False - if lockfile: + + # define the lockfile + if lockfile is not None: self.lockfile = lockfile else: - basename = os.path.splitext(os.path.abspath(sys.argv[0]))[0].replace( - "/", "-").replace(":", "").replace("\\", "-") + '-%s' % flavor_id + '.lock' + converted = os.path.splitext(os.path.abspath(sys.argv[0]))[0].replace( + "/", "-").replace(":", "").replace("\\", "-") + if flavor_id is not None: + converted += f"-{flavor_id}" + converted += '.lock' self.lockfile = os.path.normpath( - tempfile.gettempdir() + '/' + basename) + tempfile.gettempdir() + '/' + converted) + logger.debug("SingleInstance lockfile: `{0}`".format(self.lockfile)) - logger.debug("SingleInstance lockfile: " + self.lockfile) if sys.platform == 'win32': try: # file already exists, we try to remove (in case previous # execution was interrupted) if os.path.exists(self.lockfile): os.unlink(self.lockfile) - self.fd = os.open( - self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR) - except OSError: - type, e, tb = sys.exc_info() + self.fd = os.open(self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR) + except OSError as err: + the_type, e, tb = sys.exc_info() if e.errno == 13: - logger.error( - "Another instance is already running, quitting.") - raise SingleInstanceException() - print(e.errno) - raise + raise SingleInstanceException("Another instance is already running, quitting.") + raise RuntimeError("[{0}] An error prevented creation of the lockfile: {1}".format(e.errno, err)) else: # non Windows - import fcntl self.fp = open(self.lockfile, 'w') self.fp.flush() try: fcntl.lockf(self.fp, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: - logger.warning( - "Another instance is already running, quitting.") - raise SingleInstanceException() + raise SingleInstanceException("Another instance is already running, quitting.") + + # ready to go! self.initialized = True def __del__(self): - import os - import sys if not self.initialized: return + try: if sys.platform == 'win32': if hasattr(self, 'fd'): os.close(self.fd) os.unlink(self.lockfile) else: - import fcntl fcntl.lockf(self.fp, fcntl.LOCK_UN) - # os.close(self.fp) + self.fp.close() if os.path.isfile(self.lockfile): os.unlink(self.lockfile) - except Exception as e: - if logger: - logger.warning(e) - else: - print("Unloggable error: %s" % e) + except Exception as err: + logger.warning(f"Unable to remove lockfile: {err}") sys.exit(-1) - - -def f(name): - tmp = logger.level - logger.setLevel(logging.CRITICAL) # we do not want to see the warning - try: - me2 = SingleInstance(flavor_id=name) # noqa - except SingleInstanceException: - sys.exit(-1) - logger.setLevel(tmp) - pass - - -class testSingleton(unittest.TestCase): - - def test_1(self): - me = SingleInstance(flavor_id="test-1") - del me # now the lock should be removed - assert True - - def test_2(self): - p = Process(target=f, args=("test-2",)) - p.start() - p.join() - # the called function should succeed - assert p.exitcode == 0, "%s != 0" % p.exitcode - - def test_3(self): - me = SingleInstance(flavor_id="test-3") # noqa -- me should still kept - p = Process(target=f, args=("test-3",)) - p.start() - p.join() - # the called function should fail because we already have another - # instance running - assert p.exitcode != 0, "%s != 0 (2nd execution)" % p.exitcode - # note, we return -1 but this translates to 255 meanwhile we'll - # consider that anything different from 0 is good - p = Process(target=f, args=("test-3",)) - p.start() - p.join() - # the called function should fail because we already have another - # instance running - assert p.exitcode != 0, "%s != 0 (3rd execution)" % p.exitcode - - def test_4(self): - lockfile = '/tmp/foo.lock' - me = SingleInstance(lockfile=lockfile) - assert me.lockfile == lockfile - - -logger = logging.getLogger("tendo.singleton") -logger.addHandler(logging.StreamHandler()) - -if __name__ == "__main__": - logger.setLevel(logging.DEBUG) - unittest.main() diff --git a/src/README.md b/src/README.md new file mode 100644 index 0000000..c7c813a --- /dev/null +++ b/src/README.md @@ -0,0 +1 @@ +Future home of the source code. diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tasks.py b/tasks.py index 049ebca..d30a7f2 100644 --- a/tasks.py +++ b/tasks.py @@ -1,30 +1,38 @@ -from celery import Celery, bootsteps +import configparser +import datetime +import hashlib +import logging +import os +import traceback +from typing import List + +# noinspection PyPackageRequirements +import yara +from cbapi.response.models import Binary +from cbapi.response.rest_api import CbResponseAPI +from celery import bootsteps, Celery + import globals +from analysis_result import AnalysisResult app = Celery() +# noinspection PyUnusedName app.conf.task_serializer = "pickle" +# noinspection PyUnusedName app.conf.result_serializer = "pickle" +# noinspection PyUnusedName app.conf.accept_content = {"pickle"} -import yara -import logging -import traceback -import datetime -import configparser -import os -import hashlib -from analysis_result import AnalysisResult -from cbapi.response.models import Binary -from cbapi.response.rest_api import CbResponseAPI -import globals - logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -g_config = dict() - -def verify_config(config_file): +def verify_config(config_file: str) -> bool: + """ + Read and validate the current config file. + :param config_file: path to the config file + :return: True if valid + """ config = configparser.ConfigParser() config.read(config_file) @@ -58,6 +66,7 @@ def add_worker_arguments(parser): class MyBootstep(bootsteps.Step): + # noinspection PyUnusedLocal def __init__(self, worker, config_file='yara_worker.conf', **options): super().__init__(self) verify_config(config_file) @@ -68,9 +77,12 @@ def __init__(self, worker, config_file='yara_worker.conf', **options): app.steps['worker'].add(MyBootstep) -def generate_rule_map(yara_rule_path): - global yara_rule_map_hash - +def generate_rule_map(yara_rule_path: str) -> dict: + """ + Create a dictionary keyed by filename containing file paths + :param yara_rule_path: location of yara rules + :return: + """ rule_map = {} for fn in os.listdir(yara_rule_path): if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): @@ -88,39 +100,58 @@ def generate_rule_map(yara_rule_path): return rule_map -def generate_yara_rule_map_hash(yara_rule_path): - global g_yara_rule_map_hash_list +# noinspection DuplicatedCode +def generate_yara_rule_map_hash(yara_rule_path: str) -> List: + """ + Create a list of md5 hashes based on rule file contents. + :param yara_rule_path: location of the yara rules + :return: + """ md5 = hashlib.md5() - temp_list = list() + temp_list = [] for fn in os.listdir(yara_rule_path): - with open(os.path.join(yara_rule_path, fn), 'rb') as fp: - data = fp.read() - md5.update(data) - temp_list.append(str(md5.hexdigest())) + if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): + with open(os.path.join(yara_rule_path, fn), 'rb') as fp: + data = fp.read() + # TODO: Original logic did not have this, resulting in a cumulative hash for each file (linking them) + md5.new() + md5.update(data) + temp_list.append(str(md5.hexdigest())) temp_list.sort() return temp_list @app.task -def update_yara_rules_remote(yara_rules): +def update_yara_rules_remote(yara_rules: dict) -> None: + """ + Update remote yara rules. + :param yara_rules: dict of rules, keyed by file name + :return: + """ try: for key in yara_rules: - open(os.path.join(globals.g_yara_rules_dir, key), 'wb').write(yara_rules[key]) - except: + with open(os.path.join(globals.g_yara_rules_dir, key), 'wb') as fp: + fp.write(yara_rules[key]) + except Exception as err: + logger.error(f"Error writing rule file: {err}") logger.error(traceback.format_exc()) @app.task -def analyze_binary(md5sum): - logger.debug("{}: in analyze_binary".format(md5sum)) +def analyze_binary(md5sum: str) -> AnalysisResult: + """ + Analyze binary information. + :param md5sum: md5 binary to check + :return: AnalysisResult instance + """ + logger.debug(f"{md5sum}: in analyze_binary") analysis_result = AnalysisResult(md5sum) try: - analysis_result.last_scan_date = datetime.datetime.now() cb = CbResponseAPI(url=globals.g_cb_server_url, @@ -133,7 +164,8 @@ def analyze_binary(md5sum): if binary_query: try: binary_data = binary_query[0].file.read() - except: + except Exception as err: + logger.debug(f"No binary agailable for {md5sum}: {err}") analysis_result.binary_not_available = True return analysis_result @@ -144,21 +176,18 @@ def analyze_binary(md5sum): # matches = "debug" matches = yara_rules.match(data=binary_data, timeout=30) except yara.TimeoutError: - # # yara timed out - # analysis_result.last_error_msg = "Analysis timed out after 60 seconds" analysis_result.stop_future_scans = True - except yara.Error: - # + except yara.Error as err: # Yara errored while trying to scan binary - # - analysis_result.last_error_msg = "Yara exception" - except: - analysis_result.last_error_msg = traceback.format_exc() + analysis_result.last_error_msg = f"Yara exception: {err}" + except Exception as err: + analysis_result.last_error_msg = f"Other exception while matching rules: {err}\n" + \ + traceback.format_exc() else: if matches: - score = getHighScore(matches) + score = get_high_score(matches) analysis_result.score = score analysis_result.short_result = "Matched yara rules: %s" % ', '.join( [match.rule for match in matches]) @@ -172,18 +201,21 @@ def analyze_binary(md5sum): else: analysis_result.binary_not_available = True return analysis_result - except: - error = traceback.format_exc() - logger.error(traceback.format_exc()) + except Exception as err: + error = f"Unexpected error: {err}\n" + traceback.format_exc() + logger.error(error) analysis_result.last_error_msg = error return analysis_result -def getHighScore(matches): - ####### - # if str(matches) == "debug": - # return 100 - ####### +def get_high_score(matches) -> int: + """ + Find the higest match score. + + NOTE: if str(matches) == "debug", return 100 + :param matches: List of rule matches. + :return: + """ score = 0 for match in matches: if match.meta.get('score', 0) > score: diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/test_cbFeed.py b/test/test_cbFeed.py new file mode 100644 index 0000000..86ff855 --- /dev/null +++ b/test/test_cbFeed.py @@ -0,0 +1,100 @@ +from unittest import TestCase + +from feed import CbFeed + + +class TestCbFeed(TestCase): + SOURCE = """{ + "feedinfo": { + "category": "Local Feed QA Feed BBI893963562", + "display_name": "QA Feed BBI893963562", + "icon": "/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/2wBDAQMEBAUEBQkFBQkUDQsNFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBT/wAARCAAyADIDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD9MKhvL630+3ee6uIraBPvSzOEUfUnpWZ401afw/4O13VLYKbmysLi5iDDI3JGzDI9Mivj66nsNY8L6f4w+Iet6xrtxqU00djpVmyoo8ogMWY/Ki5YcIoPI98AH07qfx48AaTIUn8UWbsvX7NunH5xhqr2f7Q/w7vpAkfiaFSf+e0E0Q/NkFfJrfEvQLE7dJ+HuhxR9jqck945+pLqP0pq/FPT7g7b3wD4XmiPUW9vLA/4MsnH5UAfdGjeI9K8RQmbStStNSiHVrWdZAPrtPFaNfEvhOy8H+OtU8jw5/a3gbxQIpJrXyro3Fs5RC5UPxIhwp5yRwetfQf7NXjbV/HXw8lu9auPtd3a30los7KAzoEjYFsdT85GfYd6APV6KKKAM3xNpp1rw3q2nqMm7tJoB/wNCv8AWvhu+jbVPgbp8m0+boutTQSL3SOeNWBPoN0bD8q+9q+XV0XTfAXx11nwxr1pHP4U8WFZYklyI/ML74+R02yb06/xAnigD550Pw7qniW7FrpOnXWo3HeO1iaQj3OBwPc16r4f/ZT8Z6rGJtRNjoUGNzfapt7geu1Mj8yK6Xx3+0jd+ELy98NeD/D9r4cgsZntzJLCN+5SQWWMYVenfdmvEfEnjrxD4vkL6zrN5qAJz5c0pMY+ifdH4CgR7/ofwh8N/DDSfEHiaHxbDr+q6VptxiG1MarFJJE0a7gGZuSxAyR1rvf2VdLbT/hHazMu37bdTXA9xkR5/wDIf8q+fV0O60H4faV4XtYi3iXxlcw3MsH8UdorYgVvTe5L/RRmvs3wr4fg8KeG9M0e35hsbdIA3TdtUAsfcnn8TQM1aKKKACvO/jb8K4vij4V8iErDrNmTNYztwN3dCeytgfQgHtXolFAHxJq2ky/FVHs7uP8As/4l6Wv2e4tbjCHVUQYBBP8Ay3UDGD94DI9szw34Ht/BNovibxxaSW8EbH7BoU6lJ9QlXpuU8rED95iOegzmvqb4sfBHSviZGt5HIdK8QQAeRqUI5OOivj7w9DnI7elc18P/ANneWz1weIfHWq/8JRrUZAhSR2lhj2/dYl+XI7AgAe9AFf4E/DrVNS1q5+Ivi5c6zqGTZW7rjyIyMbsfw/L8qjsufXj3WiigAooooAKKKKACiiigAooooAKKKKAP/9k=", + "name": "qafeedbbi893963562", + "provider_url": "https://confluence.carbonblack.local/display/CB/CB+Response+QA", + "summary": "Feed generated by QA Framework (QA Feed BBI893963562)", + "tech_data": "Will trigger on MD5 hash of [c296a66022a990b22f734935a66b876a] ", + "version": 2 + }, + "reports": [ + { + "description": "MD5 hash [c296a66022a990b22f734935a66b876a] ", + "id": "ID70170133447789278768703804369333656.exe", + "iocs": { + "md5": [ + "c296a66022a990b22f734935a66b876a" + ] + }, + "link": "http://www.carbonblack.com", + "score": 99, + "tags": [ + "md5" + ], + "timestamp": 1571143956, + "title": "CB Response QA ID70170133447789278768703804369333656" + }, + { + "description": "MD5 hash [58ce99ab4ca124973fe2bfee428862a0] ", + "id": "ID36724710133780394307691457860616137.exe", + "iocs": { + "md5": [ + "58ce99ab4ca124973fe2bfee428862a0" + ] + }, + "link": "http://www.carbonblack.com", + "score": 99, + "tags": [ + "md5" + ], + "timestamp": 1571143956, + "title": "CB Response QA ID36724710133780394307691457860616137" + } + ] +}""" + + def test_load_and_dump(self): + """ + Ensure that the load functionality works as expected. + """ + feed = CbFeed.load(self.SOURCE) + + fi = feed.data['feedinfo'].data + self.assertEqual('qafeedbbi893963562', fi['name']) + self.assertEqual('QA Feed BBI893963562', fi['display_name']) + + rpts = feed.data['reports'] + self.assertEqual(2, len(rpts)) + + check = feed.dump() + self.assertEqual(self.SOURCE, check) + + def test_dumpjson(self): + feed = CbFeed.load(self.SOURCE) + json = feed.dumpjson() + + fi = feed.data['feedinfo'].data + for key in fi.keys(): + self.assertEqual(fi[key], json['feedinfo'][key]) + + check = {} + for rpt in feed.data['reports']: + check[rpt.data['id']] = rpt.data + + for rpt in json['reports']: + entry = rpt['id'] + for key in rpt: + self.assertEqual(check[entry][key], rpt[key]) + + def test_iter_iocs(self): + feed = CbFeed.load(self.SOURCE) + + check = { + "ID36724710133780394307691457860616137.exe": "58ce99ab4ca124973fe2bfee428862a0", + "ID70170133447789278768703804369333656.exe": "c296a66022a990b22f734935a66b876a" + } + for x in feed.iter_iocs(): + if x['report_id'] in check: + del check[x['report_id']] + else: + self.fail("Saw unexepcted ioc: {0}".format(x)) + if len(check) > 0: + self.fail("Did not see the following reports: {0}".format(check)) diff --git a/test/test_cbFeedInfo.py b/test/test_cbFeedInfo.py new file mode 100644 index 0000000..2ef5bba --- /dev/null +++ b/test/test_cbFeedInfo.py @@ -0,0 +1,116 @@ +from unittest import TestCase + +from feed import CbFeedInfo, CbIconError, CbInvalidFeed + + +class TestCbFeedInfo(TestCase): + + @staticmethod + def core(**kwargs) -> dict: + """ + Create default required fields. + :return: + """ + data = { + 'display_name': "Simple Test 123", + 'provider_url': "https://qa.carbonblack.com", + 'name': "simpletest123", + 'summary': "Unit test for feed info", + 'tech_data': "Unit test for feed info", + } + if len(kwargs) > 0: + for key, value in kwargs.items(): + data[key] = value + return data + + def test_fields_minimum(self): + """ + Ensure minimum required fields. + """ + data = self.core() + fi = CbFeedInfo(**data) + fi.validate() + + def test_fields_all(self): + """ + Ensure all required fields. + + # TODO: update icon paths when we move source files + """ + data = self.core() + data['category'] = "Basic" + data['icon'] = "../yara-logo.png" + data['icon_small'] = "../yara-logo.png" + data['version'] = 1 + + fi = CbFeedInfo(**data) + fi.validate() + + def test_missing_required_field(self): + """ + Ensure minimum required fields. + """ + data = self.core() + del data['display_name'] + + with self.assertRaises(CbInvalidFeed) as err: + fi = CbFeedInfo(**data) + fi.validate() + assert "FeedInfo missing required field" in "{0}".format(err.exception.args[0]) + + def test_extra_field(self): + """ + Ensure no unexpected fields. + """ + data = self.core() + data['bogus'] = "foobar" + + with self.assertRaises(CbInvalidFeed) as err: + fi = CbFeedInfo(**data) + fi.validate() + assert "FeedInfo includes extraneous key 'bogus'" in "{0}".format(err.exception.args[0]) + + def test_field_wrong_type(self): + """ + Ensure fields have expected type. + """ + data = self.core() + data['display_name'] = 5 + + with self.assertRaises(CbInvalidFeed) as err: + fi = CbFeedInfo(**data) + fi.validate() + assert "FeedInfo field 'display_name' must be of type" in "{0}".format(err.exception.args[0]) + + def test_field_empty_string(self): + """ + Ensure fields that are not allowed to be empty are caught. + """ + data = self.core() + data['display_name'] = "" + + with self.assertRaises(CbInvalidFeed) as err: + fi = CbFeedInfo(**data) + fi.validate() + assert "The 'display_name' field must not be an empty string" in "{0}".format(err.exception.args[0]) + + def test_bad_icon(self): + """ + Ensure we trap bad icon data. + """ + data = self.core() + + with self.assertRaises(CbIconError) as err: + fi = CbFeedInfo(**data) + fi._data['icon'] = "BOGUS" + fi.validate() + assert "Icon must be base64 data; decode failed with: Incorrect padding" in "{0}".format(err.exception.args[0]) + + def test_bad_icon_missing_path(self): + """ + Ensure we trap bad icon data. + """ + with self.assertRaises(CbIconError) as err: + data = self.core(icon="nonesuch.png") + CbFeedInfo(**data) + assert "No such icon file at 'nonesuch.png'" in "{0}".format(err.exception.args[0]) diff --git a/test/test_cbReport.py b/test/test_cbReport.py new file mode 100644 index 0000000..2851ef3 --- /dev/null +++ b/test/test_cbReport.py @@ -0,0 +1,307 @@ +import time +from unittest import TestCase + +from feed import CbInvalidReport, CbReport + + +# noinspection DuplicatedCode +class TestCbReport(TestCase): + + @staticmethod + def core(**kwargs) -> dict: + """ + Create default required fields. + fields = {'iocs': {'md5': [binary.md5]}, + 'score': binary.score, + 'timestamp': int(time.mktime(time.gmtime())), + 'link': '', + 'id': 'binary_{0}'.format(binary.md5), + 'title': binary.last_success_msg, + 'description': binary.last_success_msg + } + :return: + """ + iocs = { + 'md5': ["00000000001111111111222222222233", "11111111112222222222333333333344"] + } + + data = { + 'id': "RepId1", + 'iocs': iocs, + 'link': "https://qa.carbonblack.com", + 'score': 22, + 'timestamp': int(time.time()), + 'title': "Unit test for report", + } + if len(kwargs) > 0: + for key, value in kwargs.items(): + data[key] = value + return data + + def test_fields_minimum(self): + """ + Ensure minimum required fields. + """ + data = self.core() + rpt = CbReport(**data) + rpt.validate() + + def test_fields_all(self): + """ + Ensure all required fields. + """ + data = self.core() + data['description'] = "The Decription" + data['tags'] = ["md5"] + + rpt = CbReport(**data) + rpt.validate() + + def test_fields_all_required_only(self): + """ + Ensure all required fields. + """ + data = self.core() + data['description'] = "The Decription" + data['tags'] = ["md5"] + + rpt = CbReport(**data) + rpt.validate() + + with self.assertRaises(CbInvalidReport) as err: + rpt.validate(pedantic=True) + assert "Report contains non-required key 'description'" in "{0}".format(err.exception.args[0]) + + def test_fields_with_sha256(self): + """ + Ensure sha256 ioc can be added. + """ + iocs = { + 'sha256': ["0000000000111111111122222222223333333333444444444455555555556666"] + } + + data = self.core(iocs=iocs) + data['description'] = "The Decription" + data['tags'] = ["sha256"] + + rpt = CbReport(**data) + rpt.validate() + + def test_fields_with_ipv4(self): + """ + Ensure ipv4 ioc can be added. + """ + iocs = { + 'ipv4': ["12.34.56.78"] + } + + data = self.core(iocs=iocs) + data['description'] = "The Decription" + data['tags'] = ["ipv4"] + + rpt = CbReport(**data) + rpt.validate() + + def test_fields_with_query(self): + """ + Ensure query ioc can be added. + """ + iocs = { + 'query': [{ + 'index_type': "events", + 'search_query': "cb.q.commandline=foo.txt" + }] + } + data = self.core(iocs=iocs) + data['description'] = "The Decription" + data['tags'] = ["query"] + + rpt = CbReport(**data) + rpt.validate() + + def test_fields_with_malformed_md5(self): + """ + Ensure invalid md5 is caught. + """ + iocs = { + 'md5': ["Bogus!!!Bogus!!!Bogus!!!Bogus!!!"] + } + + data = self.core(iocs=iocs) + data['description'] = "The Decription" + + with self.assertRaises(CbInvalidReport) as err: + rpt = CbReport(**data) + rpt.validate() + assert "Malformed md5" in "{0}".format(err.exception.args[0]) + + def test_fields_with_short_md5(self): + """ + Ensure short md5 is caught. + """ + iocs = { + 'md5': ["11111111112222222222"] + } + + data = self.core(iocs=iocs) + data['description'] = "The Decription" + + with self.assertRaises(CbInvalidReport) as err: + rpt = CbReport(**data) + rpt.validate() + assert "Invalid md5 length" in "{0}".format(err.exception.args[0]) + + def test_fields_with_long_md5(self): + """ + Ensure long md5 is caught. + """ + iocs = { + 'md5': ["1111111111222222222233333333334444444444"] + } + + data = self.core(iocs=iocs) + data['description'] = "The Decription" + + with self.assertRaises(CbInvalidReport) as err: + rpt = CbReport(**data) + rpt.validate() + assert "Invalid md5 length" in "{0}".format(err.exception.args[0]) + + def test_fields_with_malformed_sha256(self): + """ + Ensure invalid sha256 is caught. + """ + iocs = { + 'sha256': ["Bogus!!!Bogus!!!Bogus!!!Bogus!!!Bogus!!!Bogus!!!Bogus!!!Bogus!!!"] + } + + data = self.core(iocs=iocs) + data['description'] = "The Decription" + + with self.assertRaises(CbInvalidReport) as err: + rpt = CbReport(**data) + rpt.validate() + assert "Malformed sha256" in "{0}".format(err.exception.args[0]) + + def test_fields_with_short_sha256(self): + """ + Ensure short sha256 is caught. + """ + iocs = { + 'sha256': ["11111111112222222222"] + } + + data = self.core(iocs=iocs) + data['description'] = "The Decription" + + with self.assertRaises(CbInvalidReport) as err: + rpt = CbReport(**data) + rpt.validate() + assert "Invalid sha256 length" in "{0}".format(err.exception.args[0]) + + def test_fields_with_long_sha256(self): + """ + Ensure long md5 is caught. + """ + iocs = { + 'md5': ["1111111111222222222233333333334444444444555555555566666666667777777777"] + } + + data = self.core(iocs=iocs) + data['description'] = "The Decription" + + with self.assertRaises(CbInvalidReport) as err: + rpt = CbReport(**data) + rpt.validate() + assert "Invalid md5 length" in "{0}".format(err.exception.args[0]) + + def test_fields_with_malformed_ipv4(self): + """ + Ensure invalid ipv4 is caught. + """ + iocs = { + 'ipv4': ["Bogus"] + } + + data = self.core(iocs=iocs) + data['description'] = "The Decription" + + with self.assertRaises(CbInvalidReport) as err: + rpt = CbReport(**data) + rpt.validate() + assert "Malformed IPv4 addr" in "{0}".format(err.exception.args[0]) + + def test_fields_with_query_missing_index_type(self): + """ + Ensure query with missing index type is caught. + """ + iocs = { + 'query': [{ + 'search_query': "cb.q.commandline=foo.txt" + }] + } + data = self.core(iocs=iocs) + data['description'] = "The Decription" + data['tags'] = ["query"] + + with self.assertRaises(CbInvalidReport) as err: + rpt = CbReport(**data) + rpt.validate() + assert "Query IOC section for report 'RepId1' missing index_type" in "{0}".format(err.exception.args[0]) + + def test_fields_with_query_invalid_index_type(self): + """ + Ensure query with bogus index type is caught. + """ + iocs = { + 'query': [{ + 'index_type': "BOGUS", + 'search_query': "cb.q.commandline=foo.txt" + }] + } + data = self.core(iocs=iocs) + data['description'] = "The Decription" + data['tags'] = ["query"] + + with self.assertRaises(CbInvalidReport) as err: + rpt = CbReport(**data) + rpt.validate() + assert "Report IOCs section for 'query' contains invalid index_type: BOGUS" in "{0}".format( + err.exception.args[0]) + + def test_fields_with_query_missing_query(self): + """ + Ensure query with missing query is caught. + """ + iocs = { + 'query': [{ + 'index_type': "events", + }] + } + data = self.core(iocs=iocs) + data['description'] = "The Decription" + data['tags'] = ["query"] + + with self.assertRaises(CbInvalidReport) as err: + rpt = CbReport(**data) + rpt.validate() + assert "Query IOC for report RepId1 missing 'search_query'" in "{0}".format(err.exception.args[0]) + + def test_fields_with_query_bogus_query(self): + """ + Ensure query with missing query is caught. + """ + iocs = { + 'query': [{ + 'index_type': "events", + 'search_query': "BOGUS" + }] + } + data = self.core(iocs=iocs) + data['description'] = "The Decription" + data['tags'] = ["query"] + + with self.assertRaises(CbInvalidReport) as err: + rpt = CbReport(**data) + rpt.validate() + assert "Query IOC for report RepId1 missing q= on query" in "{0}".format(err.exception.args[0]) diff --git a/test/test_singleInstance.py b/test/test_singleInstance.py new file mode 100644 index 0000000..4d0453b --- /dev/null +++ b/test/test_singleInstance.py @@ -0,0 +1,82 @@ +import logging +import os +import sys +from multiprocessing import Process +from unittest import TestCase + +from singleton import SingleInstance, SingleInstanceException + +logger = logging.getLogger(__name__) + + +def f(flavor: str = None): + tmp = logger.level + logger.setLevel(logging.CRITICAL) # we do not want to see the warning + try: + SingleInstance(flavor_id=flavor) # noqa + except SingleInstanceException: + sys.exit(1) + finally: + logger.setLevel(tmp) + + +class TestSingleInstance(TestCase): + + def test_01_unflavored(self): + si = SingleInstance() + logger.info("Lockfile: {0}".format(si.lockfile)) + self.assertTrue(os.path.exists(si.lockfile)) + + lock = si.lockfile + del si # now the lock should be removed + self.assertFalse(os.path.exists(lock)) + + def test_02_flavored(self): + si = SingleInstance(flavor_id="test-1") + logger.info("Lockfile: {0}".format(si.lockfile)) + self.assertTrue(os.path.exists(si.lockfile)) + try: + assert "test-1" in si.lockfile + except AssertionError: + del si + raise + + lock = si.lockfile + del si # now the lock should be removed + self.assertFalse(os.path.exists(lock)) + + def test_03_specified(self): + lockfile = '/tmp/foo.lock' + si = SingleInstance(lockfile=lockfile) + logger.info("Lockfile: {0}".format(si.lockfile)) + self.assertTrue(os.path.exists(lockfile)) + + del si # now the lock should be removed + self.assertFalse(os.path.exists(lockfile)) + + def test_04_as_process(self): + p = Process(target=f, args=("as-process",)) + p.start() + p.join() + # the called function should succeed + assert p.exitcode == 0, "%s != 0" % p.exitcode + + def test_05_as_process_multi_invoke(self): + # get an instance running + si = SingleInstance(flavor_id="test-05") + + p = Process(target=f, args=("test-05",)) + p.start() + p.join() + # the called function should fail because we already have another instance running + assert p.exitcode != 0, "%s != 0 (2nd execution)" % p.exitcode + + # try a different flavor + p = Process(target=f, args=("test-05a",)) + p.start() + p.join() + # the called function should fail because we already have another + # instance running + assert p.exitcode == 0, "%s != 0 (new flavor)" % p.exitcode + + del si From 41e9f919e62e538428f1dec57262fd7ae7098b41 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 21 Oct 2019 08:39:52 -0400 Subject: [PATCH 037/257] Intellij cleanup --- main.py | 1 + tasks.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index d7ae1e1..686359d 100644 --- a/main.py +++ b/main.py @@ -68,6 +68,7 @@ def generate_feed_from_db() -> None: fp.write(feed.dump()) +# noinspection DuplicatedCode def generate_yara_rule_map_hash(yara_rule_path: str) -> None: """ Create a list of hashes for each yara rule. diff --git a/tasks.py b/tasks.py index d30a7f2..50520ec 100644 --- a/tasks.py +++ b/tasks.py @@ -111,7 +111,6 @@ def generate_yara_rule_map_hash(yara_rule_path: str) -> List: md5 = hashlib.md5() temp_list = [] - for fn in os.listdir(yara_rule_path): if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): with open(os.path.join(yara_rule_path, fn), 'rb') as fp: From b6db1c9b4b622eacd9f325dc79cc7550632546f7 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 21 Oct 2019 09:51:58 -0400 Subject: [PATCH 038/257] Propogated safety code --- main.py | 6 ++++++ tasks.py | 3 +++ 2 files changed, 9 insertions(+) diff --git a/main.py b/main.py index 686359d..3d5b4a6 100644 --- a/main.py +++ b/main.py @@ -81,6 +81,9 @@ def generate_yara_rule_map_hash(yara_rule_path: str) -> None: temp_list = [] for fn in os.listdir(yara_rule_path): if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): + fullpath = os.path.join(yara_rule_path, fn) + if not os.path.isfile(fullpath): + continue with open(os.path.join(yara_rule_path, fn), 'rb') as fp: data = fp.read() # TODO: Original logic did not have this, resulting in a cumulative hash for each file (linking them) @@ -103,6 +106,9 @@ def generate_rule_map_remote(yara_rule_path) -> None: ret_dict = {} for fn in os.listdir(yara_rule_path): if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): + fullpath = os.path.join(yara_rule_path, fn) + if not os.path.isfile(fullpath): + continue with open(os.path.join(yara_rule_path, fn), 'rb') as fp: ret_dict[fn] = fp.read() diff --git a/tasks.py b/tasks.py index 50520ec..bf8f16a 100644 --- a/tasks.py +++ b/tasks.py @@ -113,6 +113,9 @@ def generate_yara_rule_map_hash(yara_rule_path: str) -> List: temp_list = [] for fn in os.listdir(yara_rule_path): if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): + fullpath = os.path.join(yara_rule_path, fn) + if not os.path.isfile(fullpath): + continue with open(os.path.join(yara_rule_path, fn), 'rb') as fp: data = fp.read() # TODO: Original logic did not have this, resulting in a cumulative hash for each file (linking them) From 48d53ac627a360848008ef7fc14a34319a654a11 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 21 Oct 2019 10:02:42 -0400 Subject: [PATCH 039/257] Added diuplicate report id test --- test/test_cbFeed.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/test/test_cbFeed.py b/test/test_cbFeed.py index 86ff855..8ba980b 100644 --- a/test/test_cbFeed.py +++ b/test/test_cbFeed.py @@ -1,6 +1,6 @@ from unittest import TestCase -from feed import CbFeed +from feed import CbFeed, CbInvalidFeed class TestCbFeed(TestCase): @@ -67,6 +67,18 @@ def test_load_and_dump(self): check = feed.dump() self.assertEqual(self.SOURCE, check) + def test_duplicate_report_ids(self): + """ + Ensure that report ids cannot be the same.. + """ + feed = CbFeed.load(self.SOURCE) + reps = feed.data['reports'] + reps[1].data['id'] = reps[0].data['id'] + + with self.assertRaises(CbInvalidFeed) as err: + feed.validate_report_list(reps) + assert "duplicate report id" in "{0}".format(err.exception.args[0]) + def test_dumpjson(self): feed = CbFeed.load(self.SOURCE) json = feed.dumpjson() From e163fcac923324db8a4c98b18068f7472028034d Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 21 Oct 2019 10:14:54 -0400 Subject: [PATCH 040/257] attempt cleanup --- test/test_singleInstance.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/test_singleInstance.py b/test/test_singleInstance.py index 4d0453b..cc0969c 100644 --- a/test/test_singleInstance.py +++ b/test/test_singleInstance.py @@ -12,11 +12,14 @@ def f(flavor: str = None): tmp = logger.level logger.setLevel(logging.CRITICAL) # we do not want to see the warning + si = None try: - SingleInstance(flavor_id=flavor) # noqa + si = SingleInstance(flavor_id=flavor) # noqa except SingleInstanceException: sys.exit(1) finally: + if si is not None: + del si logger.setLevel(tmp) From bfb95d719392b2f6fe1ffd457984c895b112b6c4 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 21 Oct 2019 10:28:53 -0400 Subject: [PATCH 041/257] more cleanups --- main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index 3d5b4a6..6553e2d 100644 --- a/main.py +++ b/main.py @@ -8,7 +8,7 @@ import time import traceback from datetime import datetime, timedelta -from typing import List +from typing import List, Optional import humanfriendly import psycopg2 @@ -118,7 +118,7 @@ def generate_rule_map_remote(yara_rule_path) -> None: time.sleep(.1) -def analyze_binaries(md5_hashes: List[str], local: bool): +def analyze_binaries(md5_hashes: List[str], local: bool) -> Optional: """ Analyze binaries. @@ -141,7 +141,7 @@ def analyze_binaries(md5_hashes: List[str], local: bool): return results else: try: - scan_group = list() + scan_group = [] for md5_hash in md5_hashes: scan_group.append(analyze_binary.s(md5_hash)) job = group(scan_group) From 81df3f6382a82f8351dd424acb377d0635cae8f2 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 21 Oct 2019 14:12:00 -0400 Subject: [PATCH 042/257] cleanup , intial pyinstaller spec file --- main.py | 10 ---------- main.spec | 27 +++++++++++++-------------- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/main.py b/main.py index 4c80f7c..d2611b4 100644 --- a/main.py +++ b/main.py @@ -118,7 +118,6 @@ def generate_rule_map_remote(yara_rule_path) -> None: time.sleep(.1) -<<<<<<< HEAD def analyze_binaries(md5_hashes: List[str], local: bool) -> Optional: """ Analyze binaries. @@ -129,10 +128,6 @@ def analyze_binaries(md5_hashes: List[str], local: bool) -> Optional: :param local: True if local :return: None if there is a problem; results otherwise """ -======= -def analyze_binaries(md5_hashes, local): - #logger.debug(f"md5hashes = {len(md5_hashes)}") ->>>>>>> origin/yara_v2_development if local: try: results = [] @@ -312,12 +307,7 @@ def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binari logger.debug(" number binaries already scanned: {0}".format(num_binaries_skipped)) logger.debug(" number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) logger.info("total binaries from db: {0}".format(num_total_binaries)) -<<<<<<< HEAD logger.debug(" binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) -======= - #logger.info(f"number of binaries queued to be scanned: {num_binaries_queued}") - logger.debug("binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) ->>>>>>> origin/yara_v2_development logger.info("num binaries score greater than zero: {0}".format( len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) logger.info("") diff --git a/main.spec b/main.spec index 24eef6d..4bc3f81 100644 --- a/main.spec +++ b/main.spec @@ -4,15 +4,10 @@ block_cipher = None a = Analysis(['main.py'], - pathex=['.'], + pathex=['/home/zestep/projects/src/github.com/carbonblack/cb-yara-connector'], binaries=[], - datas=[ (HOMEPATH + '/cbapi/response/models/*', 'cbapi/response/models/'), - (HOMEPATH + '/cbapi/protection/models/*', 'cbapi/protection/models/'), - (HOMEPATH + '/cbapi/defense/models/*', 'cbapi/defense/models/') ], - hiddenimports=['celery.fixups', 'celery.fixups.django', 'celery.loaders.app', - 'celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends', - 'celery.backends.redis', 'celery.app.events', 'celery.events', - 'kombu.transport.pyamqp'], + datas=[], + hiddenimports=[], hookspath=[], runtime_hooks=[], excludes=[], @@ -24,14 +19,18 @@ pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) exe = EXE(pyz, a.scripts, - a.binaries, - a.zipfiles, - a.datas, [], - name='yara_agent', + exclude_binaries=True, + name='main', debug=False, bootloader_ignore_signals=False, strip=False, - upx=False, - runtime_tmpdir=None, + upx=True, console=True ) +coll = COLLECT(exe, + a.binaries, + a.zipfiles, + a.datas, + strip=False, + upx=True, + name='main') From b32d945909038780b0e15c40cdb8a621027317c6 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 21 Oct 2019 14:19:17 -0400 Subject: [PATCH 043/257] updates --- feed.py | 42 ++++-------------------------------------- main.py | 10 ---------- main.spec | 27 +++++++++++++-------------- 3 files changed, 17 insertions(+), 62 deletions(-) diff --git a/feed.py b/feed.py index 12b0086..3458295 100644 --- a/feed.py +++ b/feed.py @@ -114,7 +114,6 @@ def data(self) -> dict: self.validate() return self._data -<<<<<<< HEAD def validate(self) -> None: """ A set of checks to validate the internal data. @@ -124,34 +123,6 @@ def validate(self) -> None: if not all([x in self._data.keys() for x in self.required.keys()]): missing_fields = ", ".join(set(self.required).difference(set(self._data.keys()))) raise CbInvalidFeed(f"FeedInfo missing required field(s): {missing_fields}") -======= - # if they are present, set the icon fields of the data to hold - # the base64 encoded file data from their path - for icon_field in ["icon", "icon_small"]: - if icon_field in self.data and os.path.exists(self.data[icon_field]): - icon_path = self.data.pop(icon_field) - try: - self.data[icon_field] = base64.b64encode(open(icon_path, "rb").read()).decode('utf-8') - except Exception as err: - #del self.data[icon_field] - pass - #raise CbIconError(f"Unknown error reading/encoding icon data: {err}") - - def dump(self): - ''' - validates, then dumps the feed info data - :return: the feed info data - ''' - self.validate() - return self.data - - def validate(self, pedantic=False): - """ a set of checks to validate data before we export the feed""" - - if not all([x in self.data.keys() for x in self.required]): - missing_fields = ", ".join(set(self.required).difference(set(self.data.keys()))) - raise CbInvalidFeed("FeedInfo missing required field(s): %s" % missing_fields) ->>>>>>> origin/yara_v2_development # verify no non-supported keys are present for key in self._data.keys(): @@ -163,17 +134,12 @@ def validate(self, pedantic=False): try: base64.b64decode(self._data[icon_field]) except binascii.Error as err: - raise CbIconError(f"Icon must be base64 data; decode failed with: {err}") + #raise CbIconError(f"Icon must be base64 data; decode failed with: {err}") + pass except TypeError as err: -<<<<<<< HEAD - raise CbIconError(f"Icon must be base64 data; decode failed with: {err}") + #raise CbIconError(f"Icon must be base64 data; decode failed with: {err}") + pass except KeyError: -======= - #raise CbIconError(f"Icon must either be path or base64 data. \ - # Path does not exist and base64 decode failed with: {err}") - pass - except KeyError as err: ->>>>>>> origin/yara_v2_development # we don't want to cause a ruckus if the icon is missing pass diff --git a/main.py b/main.py index 4c80f7c..d2611b4 100644 --- a/main.py +++ b/main.py @@ -118,7 +118,6 @@ def generate_rule_map_remote(yara_rule_path) -> None: time.sleep(.1) -<<<<<<< HEAD def analyze_binaries(md5_hashes: List[str], local: bool) -> Optional: """ Analyze binaries. @@ -129,10 +128,6 @@ def analyze_binaries(md5_hashes: List[str], local: bool) -> Optional: :param local: True if local :return: None if there is a problem; results otherwise """ -======= -def analyze_binaries(md5_hashes, local): - #logger.debug(f"md5hashes = {len(md5_hashes)}") ->>>>>>> origin/yara_v2_development if local: try: results = [] @@ -312,12 +307,7 @@ def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binari logger.debug(" number binaries already scanned: {0}".format(num_binaries_skipped)) logger.debug(" number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) logger.info("total binaries from db: {0}".format(num_total_binaries)) -<<<<<<< HEAD logger.debug(" binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) -======= - #logger.info(f"number of binaries queued to be scanned: {num_binaries_queued}") - logger.debug("binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) ->>>>>>> origin/yara_v2_development logger.info("num binaries score greater than zero: {0}".format( len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) logger.info("") diff --git a/main.spec b/main.spec index 24eef6d..4bc3f81 100644 --- a/main.spec +++ b/main.spec @@ -4,15 +4,10 @@ block_cipher = None a = Analysis(['main.py'], - pathex=['.'], + pathex=['/home/zestep/projects/src/github.com/carbonblack/cb-yara-connector'], binaries=[], - datas=[ (HOMEPATH + '/cbapi/response/models/*', 'cbapi/response/models/'), - (HOMEPATH + '/cbapi/protection/models/*', 'cbapi/protection/models/'), - (HOMEPATH + '/cbapi/defense/models/*', 'cbapi/defense/models/') ], - hiddenimports=['celery.fixups', 'celery.fixups.django', 'celery.loaders.app', - 'celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends', - 'celery.backends.redis', 'celery.app.events', 'celery.events', - 'kombu.transport.pyamqp'], + datas=[], + hiddenimports=[], hookspath=[], runtime_hooks=[], excludes=[], @@ -24,14 +19,18 @@ pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) exe = EXE(pyz, a.scripts, - a.binaries, - a.zipfiles, - a.datas, [], - name='yara_agent', + exclude_binaries=True, + name='main', debug=False, bootloader_ignore_signals=False, strip=False, - upx=False, - runtime_tmpdir=None, + upx=True, console=True ) +coll = COLLECT(exe, + a.binaries, + a.zipfiles, + a.datas, + strip=False, + upx=True, + name='main') From bfe276fef7575695b2fcad6acb9c09149dcbf03d Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 21 Oct 2019 14:23:25 -0400 Subject: [PATCH 044/257] updates to main.spec --- main.spec | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/main.spec b/main.spec index 4bc3f81..3615129 100644 --- a/main.spec +++ b/main.spec @@ -1,13 +1,20 @@ -# -*- mode: python -*- +import distutils +if distutils.distutils_path.endswith('__init__.py'): + distutils.distutils_path = os.path.dirname(distutils.distutils_path) block_cipher = None a = Analysis(['main.py'], - pathex=['/home/zestep/projects/src/github.com/carbonblack/cb-yara-connector'], + pathex=['.'], binaries=[], - datas=[], - hiddenimports=[], + datas=[ (HOMEPATH + '/cbapi/response/models/*', 'cbapi/response/models/'), + (HOMEPATH + '/cbapi/protection/models/*', 'cbapi/protection/models/'), + (HOMEPATH + '/cbapi/defense/models/*', 'cbapi/defense/models/') ], + hiddenimports=['celery.fixups', 'celery.fixups.django', 'celery.loaders.app', + 'celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends', + 'celery.backends.redis', 'celery.app.events', 'celery.events', + 'kombu.transport.pyamqp'], hookspath=[], runtime_hooks=[], excludes=[], @@ -19,18 +26,14 @@ pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) exe = EXE(pyz, a.scripts, + a.binaries, + a.zipfiles, + a.datas, [], - exclude_binaries=True, - name='main', + name='yara_agent', debug=False, bootloader_ignore_signals=False, strip=False, - upx=True, - console=True ) -coll = COLLECT(exe, - a.binaries, - a.zipfiles, - a.datas, - strip=False, - upx=True, - name='main') + upx=False, + runtime_tmpdir=None, + console=True ) \ No newline at end of file From b5d7e7f5fe9734b65d8519abb94e67879ce2aff2 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 22 Oct 2019 13:43:03 -0400 Subject: [PATCH 045/257] Initial push with new tests --- feed.py | 6 ++-- main.py | 20 ++++++------- tasks.py | 8 ++--- test/config/no_worker.conf | 17 +++++++++++ test/config/sample.conf | 60 ++++++++++++++++++++++++++++++++++++++ test/rules/README.md | 1 + test/rules/test.yara | 14 +++++++++ test/test_cbFeedInfo.py | 16 ++++++---- test/test_main.py | 28 ++++++++++++++++++ test/test_tasks.py | 14 +++++++++ 10 files changed, 159 insertions(+), 25 deletions(-) create mode 100644 test/config/no_worker.conf create mode 100644 test/config/sample.conf create mode 100644 test/rules/README.md create mode 100644 test/rules/test.yara create mode 100644 test/test_main.py create mode 100644 test/test_tasks.py diff --git a/feed.py b/feed.py index 3458295..d4fe607 100644 --- a/feed.py +++ b/feed.py @@ -134,11 +134,9 @@ def validate(self) -> None: try: base64.b64decode(self._data[icon_field]) except binascii.Error as err: - #raise CbIconError(f"Icon must be base64 data; decode failed with: {err}") - pass + logger.debug("Feed '{0}' has incorrect {1} data: {2}".format(self._data['name'], icon_field, err)) except TypeError as err: - #raise CbIconError(f"Icon must be base64 data; decode failed with: {err}") - pass + logger.debug("Feed '{0}' has incorrect {1} data: {2}".format(self._data['name'], icon_field, err)) except KeyError: # we don't want to cause a ruckus if the icon is missing pass diff --git a/main.py b/main.py index d2611b4..abfb2f7 100644 --- a/main.py +++ b/main.py @@ -76,8 +76,6 @@ def generate_yara_rule_map_hash(yara_rule_path: str) -> None: :param yara_rule_path: the path to where the yara rules are stored. :return: """ - md5 = hashlib.md5() - temp_list = [] for fn in os.listdir(yara_rule_path): if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): @@ -86,8 +84,8 @@ def generate_yara_rule_map_hash(yara_rule_path: str) -> None: continue with open(os.path.join(yara_rule_path, fn), 'rb') as fp: data = fp.read() - # TODO: Original logic did not have this, resulting in a cumulative hash for each file (linking them) - md5.new() + # NOTE: Original logic resulted in a cumulative hash for each file (linking them) + md5 = hashlib.md5() md5.update(data) temp_list.append(str(md5.hexdigest())) @@ -238,7 +236,6 @@ def perform(yara_rule_dir): for row in rows: num_total_binaries += 1 md5_hash = row[0].hex() - #logger.debug(md5_hash) # # Check if query returns any rows @@ -267,7 +264,8 @@ def perform(yara_rule_dir): analysis_results = analyze_binaries(md5_hashes, local=(not globals.g_remote)) if analysis_results: for analysis_result in analysis_results: - #logger.debug(f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available} {analysis_result.long_result} {analysis_result.last_error_msg}") + logger.debug((f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" + f" {analysis_result.long_result} {analysis_result.last_error_msg}")) if analysis_result.last_error_msg: logger.error(analysis_result.last_error_msg) save_results(analysis_results) @@ -284,7 +282,8 @@ def perform(yara_rule_dir): analysis_results = analyze_binaries(md5_hashes, local=(not globals.g_remote)) if analysis_results: for analysis_result in analysis_results: - #logger.debug(f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available} {analysis_result.long_result} {analysis_result.last_error_msg}") + logger.debug((f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" + f" {analysis_result.long_result} {analysis_result.last_error_msg}")) if analysis_result.last_error_msg: logger.error(analysis_result.last_error_msg) save_results(analysis_results) @@ -316,7 +315,7 @@ def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binari def verify_config(config_file: str, output_file: str) -> bool: """ Validate the config file. - :param config_file: + :param config_file: The config file to validate :param output_file: :return: True if configuration file is good """ @@ -326,7 +325,7 @@ def verify_config(config_file: str, output_file: str) -> bool: globals.output_file = output_file if not config.has_section('general'): - logger.error("Config file does not have a \'general\' section") + logger.error("Config file does not have a 'general' section") return False if 'worker_type' in config['general']: @@ -342,7 +341,8 @@ def verify_config(config_file: str, output_file: str) -> bool: logger.error("invalid worker_type specified. Must be \'local\' or \'remote\'") return False else: - logger.warning("Config file does not specify worker_type, assuming local") + globals.g_remote = False + logger.warning("Config file does not specify 'worker_type', assuming local") if 'yara_rules_dir' in config['general']: globals.g_yara_rules_dir = config['general']['yara_rules_dir'] diff --git a/tasks.py b/tasks.py index 0ab09e9..d18d82b 100644 --- a/tasks.py +++ b/tasks.py @@ -108,8 +108,6 @@ def generate_yara_rule_map_hash(yara_rule_path: str) -> List: :param yara_rule_path: location of the yara rules :return: """ - md5 = hashlib.md5() - temp_list = [] for fn in os.listdir(yara_rule_path): if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): @@ -118,8 +116,8 @@ def generate_yara_rule_map_hash(yara_rule_path: str) -> List: continue with open(os.path.join(yara_rule_path, fn), 'rb') as fp: data = fp.read() - # TODO: Original logic did not have this, resulting in a cumulative hash for each file (linking them) - md5.new() + # NOTE: Original logic resulted in a cumulative hash for each file (linking them) + md5 = hashlib.md5() md5.update(data) temp_list.append(str(md5.hexdigest())) @@ -161,7 +159,7 @@ def analyze_binary(md5sum: str) -> AnalysisResult: ssl_verify=False, timeout=5) - binary_query = cb.select(Binary).where("md5:{0}".format(md5sum)) + binary_query = cb.select(Binary).where(f"md5:{md5sum}") if binary_query: try: diff --git a/test/config/no_worker.conf b/test/config/no_worker.conf new file mode 100644 index 0000000..a327eaf --- /dev/null +++ b/test/config/no_worker.conf @@ -0,0 +1,17 @@ + [general] + ; worker_type missing, should default to local + yara_rules_dir=yara_rules/ + + postgres_host=localhost + postgres_username=cb + postgres_password=6PGcbuwlQnIuPqOF + postgres_db=cb + postgres_port=5002 + + cb_server_url=https://127.0.0.1:443 + cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + + niceness=1 + concurrent_hashes=8 + disable_rescan=False + num_days_binaries=365 diff --git a/test/config/sample.conf b/test/config/sample.conf new file mode 100644 index 0000000..cdda9ed --- /dev/null +++ b/test/config/sample.conf @@ -0,0 +1,60 @@ + [general] + + ; + ; either run a single worker locally or remotely + ; valid types are 'local' or 'remote' + ; + worker_type=local + + ; + ; ONLY for worker_type of remote + ; IP Address of workers if worker_type is remote + ; + ;broker_url=redis:// + + ; + ; path to directory containing yara rules + ; + yara_rules_dir=yara_rules/ + + ; + ; Cb Response postgres Database settings + ; + postgres_host=localhost + postgres_username=cb + postgres_password=6PGcbuwlQnIuPqOF + postgres_db=cb + postgres_port=5002 + + ; + ; ONLY for worker_type of local + ; Cb Response Server settings for scanning locally. + ; For remote scanning please set these parameters in the yara worker config file + ; Default: https://127.0.0.1 + ; + cb_server_url=https://127.0.0.1:443 + cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + + ; + ; nice value used for this script + ; + niceness=1 + + ; + ; Number of hashes to send to the workers concurrently. Defaults to 8. + ; Recommend setting to the number of workers on the remote system. + ; + concurrent_hashes=8 + + ; + ; If you don't want binaries to be rescanned more than once, regardless of the rules used, set this to True + ; Default: False + ; + disable_rescan=False + + ; + ; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with + ; a timestamp within the last year + ; Default: 365 + ; + num_days_binaries=365 diff --git a/test/rules/README.md b/test/rules/README.md new file mode 100644 index 0000000..14d1c9e --- /dev/null +++ b/test/rules/README.md @@ -0,0 +1 @@ +Thjis file should not be picked up as a yara rules file! diff --git a/test/rules/test.yara b/test/rules/test.yara new file mode 100644 index 0000000..87420d5 --- /dev/null +++ b/test/rules/test.yara @@ -0,0 +1,14 @@ +rule test +{ + meta: + author = "Bit9 + Carbon Black " + date = "2015/08" + filetype = "exe" + testing = "yep" + + strings: + $a = "win8_rtm.120725-1247" + + condition: + all of them +} diff --git a/test/test_cbFeedInfo.py b/test/test_cbFeedInfo.py index 2ef5bba..9b3e261 100644 --- a/test/test_cbFeedInfo.py +++ b/test/test_cbFeedInfo.py @@ -96,15 +96,19 @@ def test_field_empty_string(self): def test_bad_icon(self): """ - Ensure we trap bad icon data. + Ensure we trap bad icon data (only raises logger message) """ data = self.core() - with self.assertRaises(CbIconError) as err: - fi = CbFeedInfo(**data) - fi._data['icon'] = "BOGUS" - fi.validate() - assert "Icon must be base64 data; decode failed with: Incorrect padding" in "{0}".format(err.exception.args[0]) + fi = CbFeedInfo(**data) + fi._data['icon'] = "BOGUS" + fi.validate() + + data = self.core() + + fi = CbFeedInfo(**data) + fi._data['icon_small'] = "BOGUS" + fi.validate() def test_bad_icon_missing_path(self): """ diff --git a/test/test_main.py b/test/test_main.py new file mode 100644 index 0000000..672209c --- /dev/null +++ b/test/test_main.py @@ -0,0 +1,28 @@ +import os +from unittest import TestCase + +import globals +from main import generate_yara_rule_map_hash, verify_config + +TESTS = os.path.abspath(os.path.dirname(__file__)) + + +class TestMain(TestCase): + + def test_generate_yara_rule_map_hash(self): + globals.g_yara_rule_map_hash_list = [] + generate_yara_rule_map_hash(os.path.join(TESTS, "rules")) + self.assertEqual(1, len(globals.g_yara_rule_map_hash_list)) + self.assertEqual("191cc0ea3f9ef90ed1850a3650cd38ed", globals.g_yara_rule_map_hash_list[0]) + + def test_validate_config(self): + globals.output_file = None + ok = verify_config(os.path.join(TESTS, "config", "sample.conf"), "Sample.conf") + self.assertTrue(ok) + self.assertEqual("Sample.conf", globals.output_file) + + def test_validate_config_missing_worker(self): + globals.g_remote = None + ok = verify_config(os.path.join(TESTS, "config", "no_worker.conf"), "Sample.conf") + self.assertTrue(ok) + self.assertFalse(globals.g_remote) diff --git a/test/test_tasks.py b/test/test_tasks.py new file mode 100644 index 0000000..73a349c --- /dev/null +++ b/test/test_tasks.py @@ -0,0 +1,14 @@ +import os +from unittest import TestCase + +from tasks import generate_yara_rule_map_hash + +TESTS = os.path.abspath(os.path.dirname(__file__)) + + +class TestTasks(TestCase): + + def test_generate_yara_rule_map_hash(self): + the_list = generate_yara_rule_map_hash(os.path.join(TESTS, "rules")) + self.assertEqual(1, len(the_list)) + self.assertEqual("191cc0ea3f9ef90ed1850a3650cd38ed", the_list[0]) From b70360332e6f415f152cb40114aa16b52ca95abc Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 22 Oct 2019 14:57:37 -0400 Subject: [PATCH 046/257] updates for feature-cb-28796 --- globals.py | 2 ++ main.py | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/globals.py b/globals.py index 02db14b..5a122b7 100644 --- a/globals.py +++ b/globals.py @@ -26,3 +26,5 @@ g_disable_rescan = False g_num_days_binaries = 365 +g_vacuum_seconds = 3600 +g_vacuum_script = './vacuumtables.sh' diff --git a/main.py b/main.py index 3db9026..47c31c7 100644 --- a/main.py +++ b/main.py @@ -172,6 +172,7 @@ def perform(yara_rule_dir): md5_hashes = list() start_time = time.time() + start_datetime = datetime.now() logger.info("Connecting to Postgres database...") try: @@ -194,11 +195,19 @@ def perform(yara_rule_dir): logger.info("Enumerating modulestore...") while True: + if cur.closed: + cur = conn.cursor(name="yara_agent") rows = cur.fetchmany() if len(rows) == 0: break for row in rows: + seconds_since_start = (datetime.now() - start_datetime).seconds + if seconds_since_start >= globals.g_vacuum_seconds: + cur.close() + os.system(globals.g_vacuum_script) + break + num_total_binaries += 1 md5_hash = row[0].hex() From 5f3909beef27a4d3fbe0bd59e2a1eda71a5b151a Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 23 Oct 2019 10:47:27 -0400 Subject: [PATCH 047/257] More unit tests and test files --- main.py | 204 +++++++----- test/config/bogus_concurrent_hashes.conf | 19 ++ test/config/bogus_disable_rescan.conf | 19 ++ test/config/bogus_niceness.conf | 19 ++ test/config/bogus_num_days_binaries.conf | 19 ++ test/config/bogus_postgres_port.conf | 19 ++ test/config/bogus_rules_dir.conf | 26 ++ test/config/bogus_worker.conf | 26 ++ test/config/invalid_header.conf | 19 ++ test/config/local_worker_no_server_token.conf | 22 ++ .../config/local_worker_no_server_token2.conf | 23 ++ test/config/local_worker_no_server_url.conf | 22 ++ test/config/local_worker_no_server_url2.conf | 23 ++ test/config/missing_concurrent_hashes.conf | 20 ++ test/config/missing_disable_rescan.conf | 20 ++ test/config/missing_header.conf | 19 ++ test/config/missing_niceness.conf | 20 ++ test/config/missing_num_days_binaries.conf | 20 ++ test/config/missing_postgres_db.conf | 19 ++ test/config/missing_postgres_db2.conf | 20 ++ test/config/missing_postgres_host.conf | 20 ++ test/config/missing_postgres_host2.conf | 21 ++ test/config/missing_postgres_password.conf | 19 ++ test/config/missing_postgres_password2.conf | 20 ++ test/config/missing_postgres_port.conf | 19 ++ test/config/missing_postgres_port2.conf | 20 ++ test/config/missing_postgres_username.conf | 19 ++ test/config/missing_postgres_username2.conf | 20 ++ test/config/missing_rules_dir.conf | 26 ++ test/config/missing_worker.conf | 18 ++ test/config/missing_worker2.conf | 19 ++ test/config/no_rules_dir.conf | 19 ++ test/config/no_rules_dir2.conf | 20 ++ test/config/no_worker.conf | 17 - test/config/remote_worker_no_broker_url.conf | 17 + test/config/remote_worker_no_broker_url2.conf | 17 + test/config/sample.conf | 60 ---- test/config/valid.conf | 19 ++ test/config/valid2.conf | 18 ++ test/test_main.py | 297 +++++++++++++++++- 40 files changed, 1145 insertions(+), 158 deletions(-) create mode 100644 test/config/bogus_concurrent_hashes.conf create mode 100644 test/config/bogus_disable_rescan.conf create mode 100644 test/config/bogus_niceness.conf create mode 100644 test/config/bogus_num_days_binaries.conf create mode 100644 test/config/bogus_postgres_port.conf create mode 100644 test/config/bogus_rules_dir.conf create mode 100644 test/config/bogus_worker.conf create mode 100644 test/config/invalid_header.conf create mode 100644 test/config/local_worker_no_server_token.conf create mode 100644 test/config/local_worker_no_server_token2.conf create mode 100644 test/config/local_worker_no_server_url.conf create mode 100644 test/config/local_worker_no_server_url2.conf create mode 100644 test/config/missing_concurrent_hashes.conf create mode 100644 test/config/missing_disable_rescan.conf create mode 100644 test/config/missing_header.conf create mode 100644 test/config/missing_niceness.conf create mode 100644 test/config/missing_num_days_binaries.conf create mode 100644 test/config/missing_postgres_db.conf create mode 100644 test/config/missing_postgres_db2.conf create mode 100644 test/config/missing_postgres_host.conf create mode 100644 test/config/missing_postgres_host2.conf create mode 100644 test/config/missing_postgres_password.conf create mode 100644 test/config/missing_postgres_password2.conf create mode 100644 test/config/missing_postgres_port.conf create mode 100644 test/config/missing_postgres_port2.conf create mode 100644 test/config/missing_postgres_username.conf create mode 100644 test/config/missing_postgres_username2.conf create mode 100644 test/config/missing_rules_dir.conf create mode 100644 test/config/missing_worker.conf create mode 100644 test/config/missing_worker2.conf create mode 100644 test/config/no_rules_dir.conf create mode 100644 test/config/no_rules_dir2.conf delete mode 100644 test/config/no_worker.conf create mode 100644 test/config/remote_worker_no_broker_url.conf create mode 100644 test/config/remote_worker_no_broker_url2.conf delete mode 100644 test/config/sample.conf create mode 100644 test/config/valid.conf create mode 100644 test/config/valid2.conf diff --git a/main.py b/main.py index abfb2f7..4bce79c 100644 --- a/main.py +++ b/main.py @@ -5,6 +5,7 @@ import logging import logging.handlers import os +import sys import time import traceback from datetime import datetime, timedelta @@ -33,6 +34,18 @@ celery_logger.setLevel(logging.ERROR) +################################################################################ +# Exception Classes +################################################################################ + +class CbInvalidConfig(Exception): + pass + + +################################################################################ +# Exception Classes +################################################################################ + def generate_feed_from_db() -> None: """ Creates a feed based on specific database information. @@ -312,79 +325,121 @@ def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binari logger.info("") -def verify_config(config_file: str, output_file: str) -> bool: +def verify_config(config_file: str, output_file: str = None) -> None: """ Validate the config file. :param config_file: The config file to validate - :param output_file: - :return: True if configuration file is good + :param output_file: the output file; if not specified equals config file plus ".json" """ + abs_config = os.path.abspath(config_file) + config = configparser.ConfigParser() - config.read(config_file) + if not os.path.exists(config_file): + raise CbInvalidConfig(f"Config file '{abs_config}' does not exist!") - globals.output_file = output_file + try: + config.read(config_file) + except Exception as err: + raise CbInvalidConfig(err) + logger.debug(f"NOTE: using config file '{abs_config}'") if not config.has_section('general'): - logger.error("Config file does not have a 'general' section") - return False - - if 'worker_type' in config['general']: - if config['general']['worker_type'] == 'local': - globals.g_remote = False - elif config['general']['worker_type'] == 'remote': - globals.g_remote = True - if 'broker_url' in config['general']: - app.conf.update( - broker_url=config['general']['broker_url'], - result_backend=config['general']['broker_url']) - else: - logger.error("invalid worker_type specified. Must be \'local\' or \'remote\'") - return False + raise CbInvalidConfig(f"Config file does not have a 'general' section") + + globals.output_file = output_file if output_file is not None else config_file.strip() + ".json" + logger.debug(f"NOTE: output file will be '{globals.output_file}'") + + the_config = config['general'] + if 'worker_type' in the_config: + if the_config['worker_type'] == 'local' or the_config['worker_type'].strip() == "": + globals.g_remote = False # 'local' or empty definition + elif the_config['worker_type'] == 'remote': + globals.g_remote = True # 'remote' + else: # anything else + raise CbInvalidConfig( + f"Invalid worker_type '{the_config['worker_type']}' specified; must be 'local' or 'remote'") else: globals.g_remote = False logger.warning("Config file does not specify 'worker_type', assuming local") - if 'yara_rules_dir' in config['general']: - globals.g_yara_rules_dir = config['general']['yara_rules_dir'] - - if 'postgres_host' in config['general']: - globals.g_postgres_host = config['general']['postgres_host'] + # local/remote configuration data + if not globals.g_remote: + if 'cb_server_url' in the_config and the_config['cb_server_url'].strip() != "": + globals.g_cb_server_url = the_config['cb_server_url'] + else: + raise CbInvalidConfig(f"Local worker configuration missing 'cb_server_url'") + if 'cb_server_token' in the_config and the_config['cb_server_token'].strip() != "": + globals.g_cb_server_token = the_config['cb_server_token'] + else: + raise CbInvalidConfig(f"Local worker configuration missing 'cb_server_token'") + # TODO: validate url & token with test call? + else: + if 'broker_url' in the_config and the_config['broker_url'].strip() != "": + app.conf.update(broker_url=the_config['broker_url'], result_backend=the_config['broker_url']) + else: + raise CbInvalidConfig(f"Remote worker configuration missing 'broker_url'") + # TODO: validate broker with test call? + + if 'yara_rules_dir' in the_config and the_config['yara_rules_dir'].strip() != "": + check = os.path.abspath(the_config['yara_rules_dir']) + if os.path.exists(check): + if os.path.isdir(check): + globals.g_yara_rules_dir = check + else: + raise CbInvalidConfig("Rules dir '{0}' is not actualy a directory".format(check)) + else: + raise CbInvalidConfig("Rules dir '{0}' does not exist".format(check)) + else: + raise CbInvalidConfig("You must specify a yara rules directory in your configuration") - if 'postgres_username' in config['general']: - globals.g_postgres_username = config['general']['postgres_username'] + # NOTE: postgres_host has a default value in globals; use and warn if not defined + if 'postgres_host' in the_config and the_config['postgres_host'].strip() != "": + globals.g_postgres_host = the_config['postgres_host'] + else: + logger.warning(f"No defined 'postgres_host'; using default of {globals.g_postgres_host}") - if 'postgres_password' in config['general']: - globals.g_postgres_password = config['general']['postgres_password'] + # NOTE: postgres_username has a default value in globals; use and warn if not defined + if 'postgres_username' in the_config and the_config['postgres_username'].strip() != "": + globals.g_postgres_username = the_config['postgres_username'] + else: + logger.warning(f"No defined 'postgres_username'; using default of {globals.g_postgres_username}") - if 'postgres_db' in config['general']: - globals.g_postgres_db = config['general']['postgres_db'] + if 'postgres_password' in the_config and the_config['postgres_password'].strip() != "": + globals.g_postgres_password = the_config['postgres_password'] + else: + raise CbInvalidConfig("No 'postgres_password' defined in the configuration") - if 'cb_server_url' in config['general']: - globals.g_cb_server_url = config['general']['cb_server_url'] + # NOTE: postgres_db has a default value in globals; use and warn if not defined + if 'postgres_db' in the_config and the_config['postgres_db'].strip() != "": + globals.g_postgres_db = the_config['postgres_db'] + else: + logger.warning(f"No defined 'postgres_db'; using default of {globals.g_postgres_db}") - if 'cb_server_token' in config['general']: - globals.g_cb_server_token = config['general']['cb_server_token'] + # NOTE: postgres_port has a default value in globals; use and warn if not defined + if 'postgres_port' in the_config: + globals.g_postgres_port = int(the_config['postgres_port']) + else: + logger.warning(f"No defined 'postgres_port'; using default of {globals.g_postgres_port}") - if 'niceness' in config['general']: - os.nice(int(config['general']['niceness'])) + # TODO: validate postgres connection with supplied information? - if 'concurrent_hashes' in config['general']: - globals.MAX_HASHES = int(config['general']['concurrent_hashes']) + if 'niceness' in the_config: + os.nice(int(the_config['niceness'])) - if 'disable_rescan' in config['general']: - globals.g_disable_rescan = bool(config['general']['disable_rescan']) - logger.debug("Disable Rescan: {}".format(globals.g_disable_rescan)) + if 'concurrent_hashes' in the_config: + globals.MAX_HASHES = int(the_config['concurrent_hashes']) + logger.debug("Consurrent Hashes: {0}".format(globals.MAX_HASHES)) - if 'num_days_binaries' in config['general']: - globals.g_num_days_binaries = int(config['general']['num_days_binaries']) - logger.debug("Number of days for binaries: {}".format(globals.g_num_days_binaries)) + if 'disable_rescan' in the_config: + globals.g_disable_rescan = bool(the_config['disable_rescan']) + logger.debug("Disable Rescan: {0}".format(globals.g_disable_rescan)) - return True + if 'num_days_binaries' in the_config: + globals.g_num_days_binaries = int(the_config['num_days_binaries']) + logger.debug("Number of days for binaries: {0}".format(globals.g_num_days_binaries)) def main(): - global logger - try: # check for single operation singleton.SingleInstance() @@ -418,30 +473,35 @@ def main(): handler.setFormatter(formatter) logger.addHandler(handler) - if verify_config(args.config_file, args.output_file): - if args.validate_yara_rules: - logger.info("Validating yara rules in directory: {0}".format(globals.g_yara_rules_dir)) - yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - try: - yara.compile(filepaths=yara_rule_map) - except Exception as err: - logger.error(f"There were errors compiling yara rules: {err}") - logger.error(traceback.format_exc()) - else: - logger.info("All yara rules compiled successfully") - else: - try: - globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database = SqliteDatabase('binary.db') - db.initialize(database) - db.connect() - db.create_tables([BinaryDetonationResult]) - generate_feed_from_db() - perform(globals.g_yara_rules_dir) - except Exception as err: - logger.error(f"There were errors executing yara rules: {err}") - logger.error(traceback.format_exc()) + # Verify the configuration file and load up important global variables + try: + verify_config(args.config_file, args.output_file) + except Exception as err: + logger.error(f"Unable to continue due to a configuration problem: {err}") + sys.exit(1) + + if args.validate_yara_rules: + logger.info("Validating yara rules in directory: {0}".format(globals.g_yara_rules_dir)) + yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) + try: + yara.compile(filepaths=yara_rule_map) + logger.info("All yara rules compiled successfully") + except Exception as err: + logger.error(f"There were errors compiling yara rules: {err}") + logger.error(traceback.format_exc()) + else: + try: + globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) + generate_yara_rule_map_hash(globals.g_yara_rules_dir) + database = SqliteDatabase('binary.db') + db.initialize(database) + db.connect() + db.create_tables([BinaryDetonationResult]) + generate_feed_from_db() + perform(globals.g_yara_rules_dir) + except Exception as err: + logger.error(f"There were errors executing yara rules: {err}") + logger.error(traceback.format_exc()) if __name__ == "__main__": diff --git a/test/config/bogus_concurrent_hashes.conf b/test/config/bogus_concurrent_hashes.conf new file mode 100644 index 0000000..5978db1 --- /dev/null +++ b/test/config/bogus_concurrent_hashes.conf @@ -0,0 +1,19 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=BOGUS +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/bogus_disable_rescan.conf b/test/config/bogus_disable_rescan.conf new file mode 100644 index 0000000..43bb864 --- /dev/null +++ b/test/config/bogus_disable_rescan.conf @@ -0,0 +1,19 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=BOGUS +num_days_binaries=365 diff --git a/test/config/bogus_niceness.conf b/test/config/bogus_niceness.conf new file mode 100644 index 0000000..487e1f5 --- /dev/null +++ b/test/config/bogus_niceness.conf @@ -0,0 +1,19 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=BOGUS +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/bogus_num_days_binaries.conf b/test/config/bogus_num_days_binaries.conf new file mode 100644 index 0000000..59149e4 --- /dev/null +++ b/test/config/bogus_num_days_binaries.conf @@ -0,0 +1,19 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=BOGUS diff --git a/test/config/bogus_postgres_port.conf b/test/config/bogus_postgres_port.conf new file mode 100644 index 0000000..7b64f50 --- /dev/null +++ b/test/config/bogus_postgres_port.conf @@ -0,0 +1,19 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=BOGUS + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/bogus_rules_dir.conf b/test/config/bogus_rules_dir.conf new file mode 100644 index 0000000..f7b1bef --- /dev/null +++ b/test/config/bogus_rules_dir.conf @@ -0,0 +1,26 @@ +[general] +worker_type=local + +; ONLY for worker_type of remote +; IP Address of workers if worker_type is remote +;broker_url=redis:// + +yara_rules_dir=./rules/README.md + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +; ONLY for worker_type of local +; Cb Response Server settings for scanning locally. +; For remote scanning please set these parameters in the yara worker config file +; Default: https://127.0.0.1 +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/bogus_worker.conf b/test/config/bogus_worker.conf new file mode 100644 index 0000000..01167f0 --- /dev/null +++ b/test/config/bogus_worker.conf @@ -0,0 +1,26 @@ +[general] +worker_type=bogus + +; ONLY for worker_type of remote +; IP Address of workers if worker_type is remote +;broker_url=redis:// + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +; ONLY for worker_type of local +; Cb Response Server settings for scanning locally. +; For remote scanning please set these parameters in the yara worker config file +; Default: https://127.0.0.1 +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/invalid_header.conf b/test/config/invalid_header.conf new file mode 100644 index 0000000..622fb5b --- /dev/null +++ b/test/config/invalid_header.conf @@ -0,0 +1,19 @@ +[bogus] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/local_worker_no_server_token.conf b/test/config/local_worker_no_server_token.conf new file mode 100644 index 0000000..db60931 --- /dev/null +++ b/test/config/local_worker_no_server_token.conf @@ -0,0 +1,22 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +; Cb Response Server settings for scanning locally. +; For remote scanning please set these parameters in the yara worker config file +; Default: https://127.0.0.1 +cb_server_url=https://127.0.0.1:443 +; MISSING: cb_server_token + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/local_worker_no_server_token2.conf b/test/config/local_worker_no_server_token2.conf new file mode 100644 index 0000000..38cb5d3 --- /dev/null +++ b/test/config/local_worker_no_server_token2.conf @@ -0,0 +1,23 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +; Cb Response Server settings for scanning locally. +; For remote scanning please set these parameters in the yara worker config file +; Default: https://127.0.0.1 +cb_server_url=https://127.0.0.1:443 +; undefined +cb_server_token= + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/local_worker_no_server_url.conf b/test/config/local_worker_no_server_url.conf new file mode 100644 index 0000000..48556f8 --- /dev/null +++ b/test/config/local_worker_no_server_url.conf @@ -0,0 +1,22 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +; Cb Response Server settings for scanning locally. +; For remote scanning please set these parameters in the yara worker config file +; Default: https://127.0.0.1 +; MISSING: cb_server_url +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/local_worker_no_server_url2.conf b/test/config/local_worker_no_server_url2.conf new file mode 100644 index 0000000..64d03a5 --- /dev/null +++ b/test/config/local_worker_no_server_url2.conf @@ -0,0 +1,23 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +; Cb Response Server settings for scanning locally. +; For remote scanning please set these parameters in the yara worker config file +; Default: https://127.0.0.1 +; undefined +cb_server_url= +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_concurrent_hashes.conf b/test/config/missing_concurrent_hashes.conf new file mode 100644 index 0000000..5d87506 --- /dev/null +++ b/test/config/missing_concurrent_hashes.conf @@ -0,0 +1,20 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +; undefined +concurrent_hashes= +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_disable_rescan.conf b/test/config/missing_disable_rescan.conf new file mode 100644 index 0000000..4a5078d --- /dev/null +++ b/test/config/missing_disable_rescan.conf @@ -0,0 +1,20 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +; undefined +disable_rescan= +num_days_binaries=365 diff --git a/test/config/missing_header.conf b/test/config/missing_header.conf new file mode 100644 index 0000000..c6cfe47 --- /dev/null +++ b/test/config/missing_header.conf @@ -0,0 +1,19 @@ +; MISSING: [general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_niceness.conf b/test/config/missing_niceness.conf new file mode 100644 index 0000000..36d1715 --- /dev/null +++ b/test/config/missing_niceness.conf @@ -0,0 +1,20 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +; undefined +niceness= +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_num_days_binaries.conf b/test/config/missing_num_days_binaries.conf new file mode 100644 index 0000000..1cc21fa --- /dev/null +++ b/test/config/missing_num_days_binaries.conf @@ -0,0 +1,20 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +; undefined +num_days_binaries= diff --git a/test/config/missing_postgres_db.conf b/test/config/missing_postgres_db.conf new file mode 100644 index 0000000..04b0589 --- /dev/null +++ b/test/config/missing_postgres_db.conf @@ -0,0 +1,19 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +; MISSING: postgres_db +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_postgres_db2.conf b/test/config/missing_postgres_db2.conf new file mode 100644 index 0000000..cd02280 --- /dev/null +++ b/test/config/missing_postgres_db2.conf @@ -0,0 +1,20 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +; undefined +postgres_db= +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_postgres_host.conf b/test/config/missing_postgres_host.conf new file mode 100644 index 0000000..cc4b323 --- /dev/null +++ b/test/config/missing_postgres_host.conf @@ -0,0 +1,20 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +; MISSING: postgres_host +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_postgres_host2.conf b/test/config/missing_postgres_host2.conf new file mode 100644 index 0000000..4581a39 --- /dev/null +++ b/test/config/missing_postgres_host2.conf @@ -0,0 +1,21 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +; undefined +postgres_host= +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_postgres_password.conf b/test/config/missing_postgres_password.conf new file mode 100644 index 0000000..13c68a6 --- /dev/null +++ b/test/config/missing_postgres_password.conf @@ -0,0 +1,19 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +; MISSING: postgres_password +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_postgres_password2.conf b/test/config/missing_postgres_password2.conf new file mode 100644 index 0000000..2ae51b5 --- /dev/null +++ b/test/config/missing_postgres_password2.conf @@ -0,0 +1,20 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +; undefined +postgres_password= +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_postgres_port.conf b/test/config/missing_postgres_port.conf new file mode 100644 index 0000000..8b69a87 --- /dev/null +++ b/test/config/missing_postgres_port.conf @@ -0,0 +1,19 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +; MISSING: postgres_port + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_postgres_port2.conf b/test/config/missing_postgres_port2.conf new file mode 100644 index 0000000..e88c40f --- /dev/null +++ b/test/config/missing_postgres_port2.conf @@ -0,0 +1,20 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +; undefined +postgres_port= + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_postgres_username.conf b/test/config/missing_postgres_username.conf new file mode 100644 index 0000000..f3b2a50 --- /dev/null +++ b/test/config/missing_postgres_username.conf @@ -0,0 +1,19 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +; MISSING: postgres_username +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_postgres_username2.conf b/test/config/missing_postgres_username2.conf new file mode 100644 index 0000000..12e121e --- /dev/null +++ b/test/config/missing_postgres_username2.conf @@ -0,0 +1,20 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +; undefined +postgres_username= +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_rules_dir.conf b/test/config/missing_rules_dir.conf new file mode 100644 index 0000000..78742aa --- /dev/null +++ b/test/config/missing_rules_dir.conf @@ -0,0 +1,26 @@ +[general] +worker_type=local + +; ONLY for worker_type of remote +; IP Address of workers if worker_type is remote +;broker_url=redis:// + +yara_rules_dir=./rules_not_here + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +; ONLY for worker_type of local +; Cb Response Server settings for scanning locally. +; For remote scanning please set these parameters in the yara worker config file +; Default: https://127.0.0.1 +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_worker.conf b/test/config/missing_worker.conf new file mode 100644 index 0000000..ed07cae --- /dev/null +++ b/test/config/missing_worker.conf @@ -0,0 +1,18 @@ +[general] +; MISSING: worker_type + +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_worker2.conf b/test/config/missing_worker2.conf new file mode 100644 index 0000000..123fb8e --- /dev/null +++ b/test/config/missing_worker2.conf @@ -0,0 +1,19 @@ +[general] +; undefined +worker_type= + +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/no_rules_dir.conf b/test/config/no_rules_dir.conf new file mode 100644 index 0000000..f77f803 --- /dev/null +++ b/test/config/no_rules_dir.conf @@ -0,0 +1,19 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +; MISSING: yara_rules_dir + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/no_rules_dir2.conf b/test/config/no_rules_dir2.conf new file mode 100644 index 0000000..4978913 --- /dev/null +++ b/test/config/no_rules_dir2.conf @@ -0,0 +1,20 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +; undefined +yara_rules_dir= + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/no_worker.conf b/test/config/no_worker.conf deleted file mode 100644 index a327eaf..0000000 --- a/test/config/no_worker.conf +++ /dev/null @@ -1,17 +0,0 @@ - [general] - ; worker_type missing, should default to local - yara_rules_dir=yara_rules/ - - postgres_host=localhost - postgres_username=cb - postgres_password=6PGcbuwlQnIuPqOF - postgres_db=cb - postgres_port=5002 - - cb_server_url=https://127.0.0.1:443 - cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - - niceness=1 - concurrent_hashes=8 - disable_rescan=False - num_days_binaries=365 diff --git a/test/config/remote_worker_no_broker_url.conf b/test/config/remote_worker_no_broker_url.conf new file mode 100644 index 0000000..9f5314c --- /dev/null +++ b/test/config/remote_worker_no_broker_url.conf @@ -0,0 +1,17 @@ +[general] +worker_type=remote + +; MISSING: broker_url + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/remote_worker_no_broker_url2.conf b/test/config/remote_worker_no_broker_url2.conf new file mode 100644 index 0000000..b399874 --- /dev/null +++ b/test/config/remote_worker_no_broker_url2.conf @@ -0,0 +1,17 @@ +[general] +worker_type=remote + +broker_url= + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/sample.conf b/test/config/sample.conf deleted file mode 100644 index cdda9ed..0000000 --- a/test/config/sample.conf +++ /dev/null @@ -1,60 +0,0 @@ - [general] - - ; - ; either run a single worker locally or remotely - ; valid types are 'local' or 'remote' - ; - worker_type=local - - ; - ; ONLY for worker_type of remote - ; IP Address of workers if worker_type is remote - ; - ;broker_url=redis:// - - ; - ; path to directory containing yara rules - ; - yara_rules_dir=yara_rules/ - - ; - ; Cb Response postgres Database settings - ; - postgres_host=localhost - postgres_username=cb - postgres_password=6PGcbuwlQnIuPqOF - postgres_db=cb - postgres_port=5002 - - ; - ; ONLY for worker_type of local - ; Cb Response Server settings for scanning locally. - ; For remote scanning please set these parameters in the yara worker config file - ; Default: https://127.0.0.1 - ; - cb_server_url=https://127.0.0.1:443 - cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - - ; - ; nice value used for this script - ; - niceness=1 - - ; - ; Number of hashes to send to the workers concurrently. Defaults to 8. - ; Recommend setting to the number of workers on the remote system. - ; - concurrent_hashes=8 - - ; - ; If you don't want binaries to be rescanned more than once, regardless of the rules used, set this to True - ; Default: False - ; - disable_rescan=False - - ; - ; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with - ; a timestamp within the last year - ; Default: 365 - ; - num_days_binaries=365 diff --git a/test/config/valid.conf b/test/config/valid.conf new file mode 100644 index 0000000..cdbdea9 --- /dev/null +++ b/test/config/valid.conf @@ -0,0 +1,19 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/valid2.conf b/test/config/valid2.conf new file mode 100644 index 0000000..5907483 --- /dev/null +++ b/test/config/valid2.conf @@ -0,0 +1,18 @@ +[general] +worker_type=remote + +; ONLY for worker_type of remote +broker_url=redis:// + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/test_main.py b/test/test_main.py index 672209c..47ebd3c 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -2,27 +2,306 @@ from unittest import TestCase import globals -from main import generate_yara_rule_map_hash, verify_config +from main import CbInvalidConfig, generate_yara_rule_map_hash, verify_config TESTS = os.path.abspath(os.path.dirname(__file__)) class TestMain(TestCase): - def test_generate_yara_rule_map_hash(self): + def test_01_generate_yara_rule_map_hash(self): globals.g_yara_rule_map_hash_list = [] generate_yara_rule_map_hash(os.path.join(TESTS, "rules")) self.assertEqual(1, len(globals.g_yara_rule_map_hash_list)) self.assertEqual("191cc0ea3f9ef90ed1850a3650cd38ed", globals.g_yara_rule_map_hash_list[0]) - def test_validate_config(self): + def test_02a_validate_config(self): + # valid local globals.output_file = None - ok = verify_config(os.path.join(TESTS, "config", "sample.conf"), "Sample.conf") - self.assertTrue(ok) - self.assertEqual("Sample.conf", globals.output_file) + globals.g_remote = None + verify_config(os.path.join(TESTS, "config", "valid.conf")) + self.assertTrue(globals.output_file.endswith("valid.conf.json")) + self.assertFalse(globals.g_remote) + + # valid remote + globals.g_remote = None + verify_config(os.path.join(TESTS, "config", "valid2.conf"), "sample.json") + self.assertTrue(globals.output_file.endswith("sample.json")) + self.assertTrue(globals.g_remote) + + def test_02b_missing_config(self): + """ + Ensure a missing config file is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "no-such-config.conf")) + assert "does not exist!" in "{0}".format(err.exception.args[0]) + + def test_03a_config_missing_header(self): + """ + Ensure we detect a configuration file with no section header. + """ + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "missing_header.conf")) + assert "File contains no section headers" in "{0}".format(err.exception.args[0]) + + def test_03b_config_invalid_header(self): + """ + Ensure we detect a configuration file with no "[general]" section header. + """ + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "invalid_header.conf")) + assert "Config file does not have a 'general' section" in "{0}".format(err.exception.args[0]) - def test_validate_config_missing_worker(self): + def test_04a_config_missing_worker(self): + """ + Ensure that config lacking worker information defaults to local. + """ + # not defined in file globals.g_remote = None - ok = verify_config(os.path.join(TESTS, "config", "no_worker.conf"), "Sample.conf") - self.assertTrue(ok) + verify_config(os.path.join(TESTS, "config", "missing_worker.conf")) self.assertFalse(globals.g_remote) + + # defined as "worker_type=" + globals.g_remote = None + verify_config(os.path.join(TESTS, "config", "missing_worker2.conf")) + self.assertFalse(globals.g_remote) + + def test_04b_config_bogus_worker(self): + """ + Ensure that config with bogus worker is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "bogus_worker.conf")) + assert "Invalid worker_type" in "{0}".format(err.exception.args[0]) + + def test_05a_config_local_worker_missing_server_url(self): + """ + Ensure that local worker config with missing server url is detected. + """ + # not defined in file + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "local_worker_no_server_url.conf")) + assert "Local worker configuration missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) + + # defined as "cb_server_url=" + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "local_worker_no_server_url2.conf")) + assert "Local worker configuration missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) + + def test_05b_config_local_worker_missing_server_token(self): + """ + Ensure that local worker config with missing server token is detected. + """ + # not defined in file + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "local_worker_no_server_token.conf")) + assert "Local worker configuration missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) + + # defined as "cb_server_token=" + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "local_worker_no_server_token2.conf")) + assert "Local worker configuration missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) + + def test_06_config_remote_worker_missing_server_token(self): + """ + Ensure that remote worker config with missing broker url is detected. + """ + # not defined in file + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "remote_worker_no_broker_url.conf")) + assert "Remote worker configuration missing 'broker_url'" in "{0}".format(err.exception.args[0]) + + # defined as "broker_url=" + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "remote_worker_no_broker_url2.conf")) + assert "Remote worker configuration missing 'broker_url'" in "{0}".format(err.exception.args[0]) + + def test_07a_config_missing_yara_rules_dir(self): + """ + Ensure that config with missing yara rules directory is detected. + """ + # not defined in file + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "no_rules_dir.conf")) + assert "You must specify a yara rules directory in your configuration" in "{0}".format(err.exception.args[0]) + + # defined as "yara_rules_dir=" + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "no_rules_dir2.conf")) + assert "You must specify a yara rules directory in your configuration" in "{0}".format(err.exception.args[0]) + + def test_07b_config_yara_rules_dir_not_exists(self): + """ + Ensure that config with yara rules directory that does not exist is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "missing_rules_dir.conf")) + assert "does not exist" in "{0}".format(err.exception.args[0]) + + def test_07c_config_yara_rules_dir_not_directory(self): + """ + Ensure that config with yara rules directory that is not a directory is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "bogus_rules_dir.conf")) + assert "is not actualy a directory" in "{0}".format(err.exception.args[0]) + + def test_08a_config_missing_postgres_host(self): + """ + Ensure that config with missing postgres_host uses defaults. + """ + check = globals.g_postgres_host + + # undefined, use default in globals + verify_config(os.path.join(TESTS, "config", "missing_postgres_host.conf")) + self.assertEqual(check, globals.g_postgres_host) + + # defined as "postgres_host=" + verify_config(os.path.join(TESTS, "config", "missing_postgres_host2.conf")) + self.assertEqual(check, globals.g_postgres_host) + + # TODO: test_08b_config_invalid_postgres_host + + def test_09a_config_missing_postgres_username(self): + """ + Ensure that config with missing postgres_username uses defaults. + """ + check = globals.g_postgres_username + + # undefined, use default in globals + verify_config(os.path.join(TESTS, "config", "missing_postgres_username.conf")) + self.assertEqual(check, globals.g_postgres_username) + + # defined as "postgres_username=" + verify_config(os.path.join(TESTS, "config", "missing_postgres_username2.conf")) + self.assertEqual(check, globals.g_postgres_username) + + # TODO: test_09b_config_invalid_postgres_username + + def test_10a_config_missing_postgres_password(self): + """ + Ensure that config with missing postgres_password is detected. + """ + # undefined + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "missing_postgres_password.conf")) + assert "No 'postgres_password' defined in the configuration" in "{0}".format(err.exception.args[0]) + + # defined as "postgres_password=" + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "missing_postgres_password2.conf")) + assert "No 'postgres_password' defined in the configuration" in "{0}".format(err.exception.args[0]) + + # TODO: test_10a_config_invalid_postgres_password + + def test_11a_config_missing_postgres_db(self): + """ + Ensure that config with missing postgres_db is detected. + """ + check = globals.g_postgres_db + + # undefined, use default in globals + verify_config(os.path.join(TESTS, "config", "missing_postgres_db.conf")) + self.assertEqual(check, globals.g_postgres_db) + + # defined as "postgres_db=" + verify_config(os.path.join(TESTS, "config", "missing_postgres_db2.conf")) + self.assertEqual(check, globals.g_postgres_db) + + # TODO: test_11b_config_invalid_postgres_db + + def test_12a_config_missing_postgres_port(self): + """ + Ensure that config with missing postgres_port is detected. + """ + check = globals.g_postgres_port + + # undefined, use default in globals + verify_config(os.path.join(TESTS, "config", "missing_postgres_port.conf")) + self.assertEqual(check, globals.g_postgres_port) + + # defined as "postgres_port=" + with self.assertRaises(ValueError) as err: + verify_config(os.path.join(TESTS, "config", "missing_postgres_port2.conf")) + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_12b_config_bogus_postgres_port(self): + """ + Ensure that config with bogus (non-int) postgres_port is detected. + """ + with self.assertRaises(ValueError) as err: + verify_config(os.path.join(TESTS, "config", "bogus_postgres_port.conf")) + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + # TODO: test_12c_config_invalid_postgres_port + + def test_13a_config_missing_niceness(self): + """ + Ensure that config with missing niceness is detected. + """ + # defined as "niceness=" + with self.assertRaises(ValueError) as err: + verify_config(os.path.join(TESTS, "config", "missing_niceness.conf")) + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_13b_config_bogus_niceness(self): + """ + Ensure that config with bogus (non-int) niceness is detected. + """ + with self.assertRaises(ValueError) as err: + verify_config(os.path.join(TESTS, "config", "bogus_niceness.conf")) + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_14a_config_missing_concurrent_hashes(self): + """ + Ensure that config with missing concurrent_hashes is detected. + """ + # defined as "concurrent_hashes=" + with self.assertRaises(ValueError) as err: + verify_config(os.path.join(TESTS, "config", "missing_concurrent_hashes.conf")) + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_14b_config_bogus_concurrent_hashes(self): + """ + Ensure that config with bogus (non-int) concurrent_hashes is detected. + """ + with self.assertRaises(ValueError) as err: + verify_config(os.path.join(TESTS, "config", "bogus_concurrent_hashes.conf")) + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_15a_config_missing_disable_rescan(self): + """ + Ensure that config with missing disable_rescan is detected. + """ + globals.g_disable_rescan = None + + # defined as "disable_rescan=" + verify_config(os.path.join(TESTS, "config", "missing_disable_rescan.conf")) + self.assertFalse(globals.g_disable_rescan) + + def test_15b_config_bogus_disable_rescan(self): + """ + Ensure that config with bogus (non-bool) disable_rescan is detected. + """ + globals.g_disable_rescan = None + + verify_config(os.path.join(TESTS, "config", "bogus_disable_rescan.conf")) + self.assertTrue(globals.g_disable_rescan) + + def test_16a_config_missing_num_days_binaries(self): + """ + Ensure that config with missing num_days_binaries is detected. + """ + # defined as "num_days_binaries=" + with self.assertRaises(ValueError) as err: + verify_config(os.path.join(TESTS, "config", "missing_num_days_binaries.conf")) + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_16b_config_bogus_num_days_binaries(self): + """ + Ensure that config with bogus (non-int) num_days_binaries is detected. + """ + with self.assertRaises(ValueError) as err: + verify_config(os.path.join(TESTS, "config", "bogus_num_days_binaries.conf")) + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) From ec2b35ad80757438cd9f3921dcbf31d30baf0d5c Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 23 Oct 2019 12:48:24 -0400 Subject: [PATCH 048/257] adding psql vacuum script --- vacuumscript.sh | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 vacuumscript.sh diff --git a/vacuumscript.sh b/vacuumscript.sh new file mode 100644 index 0000000..e348da6 --- /dev/null +++ b/vacuumscript.sh @@ -0,0 +1,5 @@ +#!/bin/bash +psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) storefiles;" +psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) binary_status;" +psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) sensor_registrations;" +psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) vt_write_events;" \ No newline at end of file From a86007404487101d79a6b3c12b6f8010207b7203 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 23 Oct 2019 17:14:05 -0400 Subject: [PATCH 049/257] fixing logic --- globals.py | 4 ++-- main.py | 23 +++++++++++++++++++++-- vacuumscript.sh | 0 3 files changed, 23 insertions(+), 4 deletions(-) mode change 100644 => 100755 vacuumscript.sh diff --git a/globals.py b/globals.py index 5a122b7..3659f65 100644 --- a/globals.py +++ b/globals.py @@ -26,5 +26,5 @@ g_disable_rescan = False g_num_days_binaries = 365 -g_vacuum_seconds = 3600 -g_vacuum_script = './vacuumtables.sh' +g_vacuum_seconds = 1 +g_vacuum_script = 'vacuumscript.sh' diff --git a/main.py b/main.py index 47c31c7..a321095 100644 --- a/main.py +++ b/main.py @@ -197,7 +197,15 @@ def perform(yara_rule_dir): while True: if cur.closed: cur = conn.cursor(name="yara_agent") - rows = cur.fetchmany() + cur.execute("SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " + "ORDER BY timestamp DESC".format(start_date_binaries)) + try: + rows = cur.fetchmany() + except psycopg2.OperationalError: + cur = conn.cursor(name="yara_agent") + cur.execute("SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " + "ORDER BY timestamp DESC".format(start_date_binaries)) + rows = cur.fetchmany() if len(rows) == 0: break @@ -205,7 +213,8 @@ def perform(yara_rule_dir): seconds_since_start = (datetime.now() - start_datetime).seconds if seconds_since_start >= globals.g_vacuum_seconds: cur.close() - os.system(globals.g_vacuum_script) + os.system(os.path.join(os.getcwd(),globals.g_vacuum_script)) + start_datetime = datetime.now() break num_total_binaries += 1 @@ -312,6 +321,9 @@ def verify_config(config_file, output_file): if 'postgres_host' in config['general']: globals.g_postgres_host = config['general']['postgres_host'] + if 'postgres_port' in config['general']: + globals.g_postgres_port = config['general']['postgres_port'] + if 'postgres_username' in config['general']: globals.g_postgres_username = config['general']['postgres_username'] @@ -341,6 +353,13 @@ def verify_config(config_file, output_file): globals.g_num_days_binaries = int(config['general']['num_days_binaries']) logger.debug("Number of days for binaries: {}".format(globals.g_num_days_binaries)) + if 'vacuum_seconds' in config['general']: + globals.g_vacuum_seconds = int(config['general']['vacuum_seconds']) + + if 'vacuum_script' in config['general']: + globals.g_vacuum_script = config['general']['vacuum_script'] + + return True diff --git a/vacuumscript.sh b/vacuumscript.sh old mode 100644 new mode 100755 From 30ab80aeabac234454813ab36295762cfc23f2ee Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Thu, 24 Oct 2019 08:27:46 -0400 Subject: [PATCH 050/257] * Normalization of exception messages * moved exceptions to their own module --- exceptions.py | 29 ++++++++++++ feed.py | 22 +--------- main.py | 49 +++++++++------------ tasks.py | 72 ++++++++++++++++++++++-------- test/test_main.py | 31 ++++++------- test/test_tasks.py | 107 ++++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 226 insertions(+), 84 deletions(-) create mode 100644 exceptions.py diff --git a/exceptions.py b/exceptions.py new file mode 100644 index 0000000..8bc53a9 --- /dev/null +++ b/exceptions.py @@ -0,0 +1,29 @@ +################################################################################ +# Exception Classes +################################################################################ + + +class CbException(Exception): + """ + Root exception for this connector. + """ + pass + + +class CbInvalidConfig(CbException): + """ + Exception raised on an invalid configuration file. + """ + pass + + +class CbIconError(CbException): + pass + + +class CbInvalidFeed(CbException): + pass + + +class CbInvalidReport(CbException): + pass diff --git a/feed.py b/feed.py index d4fe607..d01eb40 100644 --- a/feed.py +++ b/feed.py @@ -9,30 +9,12 @@ import time from typing import List +from exceptions import CbIconError, CbInvalidFeed, CbInvalidReport + # noinspection PyUnusedName logger = logging.getLogger(__name__) -################################################################################ -# Exception Classes -################################################################################ - -class CbException(Exception): - pass - - -class CbIconError(CbException): - pass - - -class CbInvalidFeed(CbException): - pass - - -class CbInvalidReport(CbException): - pass - - ################################################################################ # Working Code Classes ################################################################################ diff --git a/main.py b/main.py index 4bce79c..dafa46a 100644 --- a/main.py +++ b/main.py @@ -20,7 +20,9 @@ import globals import singleton +from analysis_result import AnalysisResult from binary_database import BinaryDetonationResult, db +from exceptions import CbInvalidConfig from feed import CbFeed, CbFeedInfo, CbReport from tasks import analyze_binary, app, generate_rule_map, update_yara_rules_remote @@ -34,18 +36,6 @@ celery_logger.setLevel(logging.ERROR) -################################################################################ -# Exception Classes -################################################################################ - -class CbInvalidConfig(Exception): - pass - - -################################################################################ -# Exception Classes -################################################################################ - def generate_feed_from_db() -> None: """ Creates a feed based on specific database information. @@ -177,7 +167,7 @@ def analyze_binaries(md5_hashes: List[str], local: bool) -> Optional: return None -def save_results(analysis_results: List) -> None: +def save_results(analysis_results: List[AnalysisResult]) -> None: """ Save the current analysis results. @@ -325,6 +315,7 @@ def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binari logger.info("") +# noinspection DuplicatedCode def verify_config(config_file: str, output_file: str = None) -> None: """ Validate the config file. @@ -332,10 +323,11 @@ def verify_config(config_file: str, output_file: str = None) -> None: :param output_file: the output file; if not specified equals config file plus ".json" """ abs_config = os.path.abspath(config_file) + header = f"Config file '{abs_config}'" config = configparser.ConfigParser() if not os.path.exists(config_file): - raise CbInvalidConfig(f"Config file '{abs_config}' does not exist!") + raise CbInvalidConfig(f"{header} does not exist!") try: config.read(config_file) @@ -344,7 +336,7 @@ def verify_config(config_file: str, output_file: str = None) -> None: logger.debug(f"NOTE: using config file '{abs_config}'") if not config.has_section('general'): - raise CbInvalidConfig(f"Config file does not have a 'general' section") + raise CbInvalidConfig(f"{header} does not have a 'general' section") globals.output_file = output_file if output_file is not None else config_file.strip() + ".json" logger.debug(f"NOTE: output file will be '{globals.output_file}'") @@ -356,28 +348,27 @@ def verify_config(config_file: str, output_file: str = None) -> None: elif the_config['worker_type'] == 'remote': globals.g_remote = True # 'remote' else: # anything else - raise CbInvalidConfig( - f"Invalid worker_type '{the_config['worker_type']}' specified; must be 'local' or 'remote'") + raise CbInvalidConfig(f"{header} has an invalid 'worker_type' ({the_config['worker_type']})") else: globals.g_remote = False - logger.warning("Config file does not specify 'worker_type', assuming local") + logger.warning(f"{header} does not specify 'worker_type', assuming local") # local/remote configuration data if not globals.g_remote: if 'cb_server_url' in the_config and the_config['cb_server_url'].strip() != "": globals.g_cb_server_url = the_config['cb_server_url'] else: - raise CbInvalidConfig(f"Local worker configuration missing 'cb_server_url'") + raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_url'") if 'cb_server_token' in the_config and the_config['cb_server_token'].strip() != "": globals.g_cb_server_token = the_config['cb_server_token'] else: - raise CbInvalidConfig(f"Local worker configuration missing 'cb_server_token'") + raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_token'") # TODO: validate url & token with test call? else: if 'broker_url' in the_config and the_config['broker_url'].strip() != "": app.conf.update(broker_url=the_config['broker_url'], result_backend=the_config['broker_url']) else: - raise CbInvalidConfig(f"Remote worker configuration missing 'broker_url'") + raise CbInvalidConfig(f"{header} is 'remote' and missing 'broker_url'") # TODO: validate broker with test call? if 'yara_rules_dir' in the_config and the_config['yara_rules_dir'].strip() != "": @@ -386,40 +377,40 @@ def verify_config(config_file: str, output_file: str = None) -> None: if os.path.isdir(check): globals.g_yara_rules_dir = check else: - raise CbInvalidConfig("Rules dir '{0}' is not actualy a directory".format(check)) + raise CbInvalidConfig(f"{header} specified 'yara_rules_dir' ({check}) is not a directory") else: - raise CbInvalidConfig("Rules dir '{0}' does not exist".format(check)) + raise CbInvalidConfig(f"{header} specified 'yara_rules_dir' ({check}) does not exist") else: - raise CbInvalidConfig("You must specify a yara rules directory in your configuration") + raise CbInvalidConfig(f"{header} has no 'yara_rules_dir' definition") # NOTE: postgres_host has a default value in globals; use and warn if not defined if 'postgres_host' in the_config and the_config['postgres_host'].strip() != "": globals.g_postgres_host = the_config['postgres_host'] else: - logger.warning(f"No defined 'postgres_host'; using default of {globals.g_postgres_host}") + logger.warning(f"{header} has no defined 'postgres_host'; using default of '{globals.g_postgres_host}'") # NOTE: postgres_username has a default value in globals; use and warn if not defined if 'postgres_username' in the_config and the_config['postgres_username'].strip() != "": globals.g_postgres_username = the_config['postgres_username'] else: - logger.warning(f"No defined 'postgres_username'; using default of {globals.g_postgres_username}") + logger.warning(f"{header} has no defined 'postgres_username'; using default of '{globals.g_postgres_username}'") if 'postgres_password' in the_config and the_config['postgres_password'].strip() != "": globals.g_postgres_password = the_config['postgres_password'] else: - raise CbInvalidConfig("No 'postgres_password' defined in the configuration") + raise CbInvalidConfig(f"{header} has no 'postgres_password' defined") # NOTE: postgres_db has a default value in globals; use and warn if not defined if 'postgres_db' in the_config and the_config['postgres_db'].strip() != "": globals.g_postgres_db = the_config['postgres_db'] else: - logger.warning(f"No defined 'postgres_db'; using default of {globals.g_postgres_db}") + logger.warning(f"{header} has no defined 'postgres_db'; using default of '{globals.g_postgres_db}'") # NOTE: postgres_port has a default value in globals; use and warn if not defined if 'postgres_port' in the_config: globals.g_postgres_port = int(the_config['postgres_port']) else: - logger.warning(f"No defined 'postgres_port'; using default of {globals.g_postgres_port}") + logger.warning(f"{header} has no defined 'postgres_port'; using default of '{globals.g_postgres_port}'") # TODO: validate postgres connection with supplied information? diff --git a/tasks.py b/tasks.py index d18d82b..9af437b 100644 --- a/tasks.py +++ b/tasks.py @@ -14,6 +14,7 @@ import globals from analysis_result import AnalysisResult +from exceptions import CbInvalidConfig app = Celery() # noinspection PyUnusedName @@ -27,34 +28,69 @@ logger.setLevel(logging.INFO) -def verify_config(config_file: str) -> bool: +# noinspection DuplicatedCode +def verify_config(config_file: str) -> None: """ Read and validate the current config file. + + NOTE: Replicates, to a smaller degree, the function in main.py; it is presumed that more detailed checks are there :param config_file: path to the config file - :return: True if valid """ - config = configparser.ConfigParser() - config.read(config_file) + abs_config = os.path.abspath(config_file) + header = f"Config file '{abs_config}'" - if not config.has_section('general'): - logger.error("Config file does not have a \'general\' section") - return False + config = configparser.ConfigParser() + if not os.path.exists(config_file): + raise CbInvalidConfig(f"{header} does not exist!") - if 'yara_rules_dir' in config['general']: - globals.g_yara_rules_dir = config['general']['yara_rules_dir'] + try: + config.read(config_file) + except Exception as err: + raise CbInvalidConfig(err) - if 'cb_server_url' in config['general']: - globals.g_cb_server_url = config['general']['cb_server_url'] + logger.debug(f"NOTE: using config file '{abs_config}'") + if not config.has_section('general'): + raise CbInvalidConfig(f"{header} does not have a 'general' section") - if 'cb_server_token' in config['general']: - globals.g_cb_server_token = config['general']['cb_server_token'] + the_config = config['general'] - if 'broker_url' in config['general']: - app.conf.update( - broker_url=config['general']['broker_url'], - result_backend=config['general']['broker_url']) + if 'yara_rules_dir' in the_config and the_config['yara_rules_dir'].strip() != "": + check = os.path.abspath(the_config['yara_rules_dir']) + if os.path.exists(check): + if os.path.isdir(check): + globals.g_yara_rules_dir = check + else: + raise CbInvalidConfig(f"{header} specified 'yara_rules_dir' ({check}) is not a directory") + else: + raise CbInvalidConfig(f"{header} specified 'yara_rules_dir' ({check}) does not exist") + else: + raise CbInvalidConfig(f"{header} has no 'yara_rules_dir' definition") + + if 'worker_type' in the_config: + if the_config['worker_type'] == 'local' or the_config['worker_type'].strip() == "": + remote = False + elif the_config['worker_type'] == 'remote': + remote = True + else: # anything else + raise CbInvalidConfig(f"{header} has an invalid 'worker_type' ({the_config['worker_type']})") + else: + remote = False - return True + # local/remote configuration data + if not remote: + if 'cb_server_url' in the_config and the_config['cb_server_url'].strip() != "": + globals.g_cb_server_url = the_config['cb_server_url'] + else: + raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_url'") + if 'cb_server_token' in the_config and the_config['cb_server_token'].strip() != "": + globals.g_cb_server_token = the_config['cb_server_token'] + else: + raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_token'") + else: + if 'broker_url' in the_config and the_config['broker_url'].strip() != "": + app.conf.update(broker_url=the_config['broker_url'], result_backend=the_config['broker_url']) + else: + raise CbInvalidConfig(f"{header} is 'remote' and missing 'broker_url'") def add_worker_arguments(parser): diff --git a/test/test_main.py b/test/test_main.py index 47ebd3c..884ece2 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -2,7 +2,8 @@ from unittest import TestCase import globals -from main import CbInvalidConfig, generate_yara_rule_map_hash, verify_config +from exceptions import CbInvalidConfig +from main import generate_yara_rule_map_hash, verify_config TESTS = os.path.abspath(os.path.dirname(__file__)) @@ -51,7 +52,7 @@ def test_03b_config_invalid_header(self): """ with self.assertRaises(CbInvalidConfig) as err: verify_config(os.path.join(TESTS, "config", "invalid_header.conf")) - assert "Config file does not have a 'general' section" in "{0}".format(err.exception.args[0]) + assert "does not have a 'general' section" in "{0}".format(err.exception.args[0]) def test_04a_config_missing_worker(self): """ @@ -73,7 +74,7 @@ def test_04b_config_bogus_worker(self): """ with self.assertRaises(CbInvalidConfig) as err: verify_config(os.path.join(TESTS, "config", "bogus_worker.conf")) - assert "Invalid worker_type" in "{0}".format(err.exception.args[0]) + assert "invalid 'worker_type'" in "{0}".format(err.exception.args[0]) def test_05a_config_local_worker_missing_server_url(self): """ @@ -82,12 +83,12 @@ def test_05a_config_local_worker_missing_server_url(self): # not defined in file with self.assertRaises(CbInvalidConfig) as err: verify_config(os.path.join(TESTS, "config", "local_worker_no_server_url.conf")) - assert "Local worker configuration missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) + assert "is 'local' and missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) # defined as "cb_server_url=" with self.assertRaises(CbInvalidConfig) as err: verify_config(os.path.join(TESTS, "config", "local_worker_no_server_url2.conf")) - assert "Local worker configuration missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) + assert "is 'local' and missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) def test_05b_config_local_worker_missing_server_token(self): """ @@ -96,26 +97,26 @@ def test_05b_config_local_worker_missing_server_token(self): # not defined in file with self.assertRaises(CbInvalidConfig) as err: verify_config(os.path.join(TESTS, "config", "local_worker_no_server_token.conf")) - assert "Local worker configuration missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) + assert "is 'local' and missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) # defined as "cb_server_token=" with self.assertRaises(CbInvalidConfig) as err: verify_config(os.path.join(TESTS, "config", "local_worker_no_server_token2.conf")) - assert "Local worker configuration missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) + assert "is 'local' and missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) - def test_06_config_remote_worker_missing_server_token(self): + def test_06_config_remote_worker_missing_broker_url(self): """ Ensure that remote worker config with missing broker url is detected. """ # not defined in file with self.assertRaises(CbInvalidConfig) as err: verify_config(os.path.join(TESTS, "config", "remote_worker_no_broker_url.conf")) - assert "Remote worker configuration missing 'broker_url'" in "{0}".format(err.exception.args[0]) + assert "is 'remote' and missing 'broker_url'" in "{0}".format(err.exception.args[0]) # defined as "broker_url=" with self.assertRaises(CbInvalidConfig) as err: verify_config(os.path.join(TESTS, "config", "remote_worker_no_broker_url2.conf")) - assert "Remote worker configuration missing 'broker_url'" in "{0}".format(err.exception.args[0]) + assert "is 'remote' and missing 'broker_url'" in "{0}".format(err.exception.args[0]) def test_07a_config_missing_yara_rules_dir(self): """ @@ -124,12 +125,12 @@ def test_07a_config_missing_yara_rules_dir(self): # not defined in file with self.assertRaises(CbInvalidConfig) as err: verify_config(os.path.join(TESTS, "config", "no_rules_dir.conf")) - assert "You must specify a yara rules directory in your configuration" in "{0}".format(err.exception.args[0]) + assert "has no 'yara_rules_dir' definition" in "{0}".format(err.exception.args[0]) # defined as "yara_rules_dir=" with self.assertRaises(CbInvalidConfig) as err: verify_config(os.path.join(TESTS, "config", "no_rules_dir2.conf")) - assert "You must specify a yara rules directory in your configuration" in "{0}".format(err.exception.args[0]) + assert "has no 'yara_rules_dir' definition" in "{0}".format(err.exception.args[0]) def test_07b_config_yara_rules_dir_not_exists(self): """ @@ -145,7 +146,7 @@ def test_07c_config_yara_rules_dir_not_directory(self): """ with self.assertRaises(CbInvalidConfig) as err: verify_config(os.path.join(TESTS, "config", "bogus_rules_dir.conf")) - assert "is not actualy a directory" in "{0}".format(err.exception.args[0]) + assert "is not a directory" in "{0}".format(err.exception.args[0]) def test_08a_config_missing_postgres_host(self): """ @@ -186,12 +187,12 @@ def test_10a_config_missing_postgres_password(self): # undefined with self.assertRaises(CbInvalidConfig) as err: verify_config(os.path.join(TESTS, "config", "missing_postgres_password.conf")) - assert "No 'postgres_password' defined in the configuration" in "{0}".format(err.exception.args[0]) + assert "has no 'postgres_password' defined" in "{0}".format(err.exception.args[0]) # defined as "postgres_password=" with self.assertRaises(CbInvalidConfig) as err: verify_config(os.path.join(TESTS, "config", "missing_postgres_password2.conf")) - assert "No 'postgres_password' defined in the configuration" in "{0}".format(err.exception.args[0]) + assert "has no 'postgres_password' defined" in "{0}".format(err.exception.args[0]) # TODO: test_10a_config_invalid_postgres_password diff --git a/test/test_tasks.py b/test/test_tasks.py index 73a349c..ae8009e 100644 --- a/test/test_tasks.py +++ b/test/test_tasks.py @@ -1,14 +1,117 @@ import os from unittest import TestCase -from tasks import generate_yara_rule_map_hash +from exceptions import CbInvalidConfig +from tasks import generate_rule_map, generate_yara_rule_map_hash, verify_config TESTS = os.path.abspath(os.path.dirname(__file__)) class TestTasks(TestCase): - def test_generate_yara_rule_map_hash(self): + def test_01a_generate_yara_rule_map(self): + the_dict = generate_rule_map(os.path.join(TESTS, "rules")) + self.assertEqual(1, len(the_dict)) + self.assertTrue("test" in the_dict) + self.assertTrue(the_dict["test"].endswith("test/rules/test.yara")) + + def test_01b_generate_yara_rule_map_hash(self): the_list = generate_yara_rule_map_hash(os.path.join(TESTS, "rules")) self.assertEqual(1, len(the_list)) self.assertEqual("191cc0ea3f9ef90ed1850a3650cd38ed", the_list[0]) + + def test_02a_missing_config(self): + """ + Ensure a missing config file is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "no-such-config.conf")) + assert "does not exist!" in "{0}".format(err.exception.args[0]) + + def test_03a_config_missing_header(self): + """ + Ensure we detect a configuration file with no section header. + """ + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "missing_header.conf")) + assert "File contains no section headers" in "{0}".format(err.exception.args[0]) + + def test_03b_config_invalid_header(self): + """ + Ensure we detect a configuration file with no "[general]" section header. + """ + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "invalid_header.conf")) + assert "does not have a 'general' section" in "{0}".format(err.exception.args[0]) + + def test_04a_config_local_worker_missing_server_url(self): + """ + Ensure that local worker config with missing server url is detected. + """ + # not defined in file + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "local_worker_no_server_url.conf")) + assert "is 'local' and missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) + + # defined as "cb_server_url=" + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "local_worker_no_server_url2.conf")) + assert "is 'local' and missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) + + def test_04b_config_local_worker_missing_server_token(self): + """ + Ensure that local worker config with missing server token is detected. + """ + # not defined in file + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "local_worker_no_server_token.conf")) + assert "is 'local' and missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) + + # defined as "cb_server_token=" + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "local_worker_no_server_token2.conf")) + assert "is 'local' and missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) + + def test_05_config_remote_worker_missing_broker_url(self): + """ + Ensure that remote worker config with missing broker url is detected. + """ + # not defined in file + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "remote_worker_no_broker_url.conf")) + assert "is 'remote' and missing 'broker_url'" in "{0}".format(err.exception.args[0]) + + # defined as "broker_url=" + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "remote_worker_no_broker_url2.conf")) + assert "is 'remote' and missing 'broker_url'" in "{0}".format(err.exception.args[0]) + + def test_06a_config_missing_yara_rules_dir(self): + """ + Ensure that config with missing yara rules directory is detected. + """ + # not defined in file + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "no_rules_dir.conf")) + assert "has no 'yara_rules_dir' definition" in "{0}".format(err.exception.args[0]) + + # defined as "yara_rules_dir=" + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "no_rules_dir2.conf")) + assert "has no 'yara_rules_dir' definition" in "{0}".format(err.exception.args[0]) + + def test_06b_config_yara_rules_dir_not_exists(self): + """ + Ensure that config with yara rules directory that does not exist is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "missing_rules_dir.conf")) + assert "does not exist" in "{0}".format(err.exception.args[0]) + + def test_06c_config_yara_rules_dir_not_directory(self): + """ + Ensure that config with yara rules directory that is not a directory is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "bogus_rules_dir.conf")) + assert "is not a directory" in "{0}".format(err.exception.args[0]) From d6bdf0721af4e6162dfe047d431605bcb3ff8312 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 24 Oct 2019 11:18:13 -0400 Subject: [PATCH 051/257] make new feature opt-in and add warning message --- globals.py | 2 +- main.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/globals.py b/globals.py index 3659f65..84121e5 100644 --- a/globals.py +++ b/globals.py @@ -26,5 +26,5 @@ g_disable_rescan = False g_num_days_binaries = 365 -g_vacuum_seconds = 1 +g_vacuum_seconds = -1 g_vacuum_script = 'vacuumscript.sh' diff --git a/main.py b/main.py index a321095..4306da2 100644 --- a/main.py +++ b/main.py @@ -211,7 +211,7 @@ def perform(yara_rule_dir): for row in rows: seconds_since_start = (datetime.now() - start_datetime).seconds - if seconds_since_start >= globals.g_vacuum_seconds: + if seconds_since_start >= globals.g_vacuum_seconds and globals.g_vacuum_seconds > 0: cur.close() os.system(os.path.join(os.getcwd(),globals.g_vacuum_script)) start_datetime = datetime.now() @@ -355,10 +355,9 @@ def verify_config(config_file, output_file): if 'vacuum_seconds' in config['general']: globals.g_vacuum_seconds = int(config['general']['vacuum_seconds']) - - if 'vacuum_script' in config['general']: - globals.g_vacuum_script = config['general']['vacuum_script'] - + if 'vacuum_script' in config['general'] and globals.g_vacuum_seconds > 0: + globals.g_vacuum_script = config['general']['vacuum_script'] + logger.warn("WARNING: Vacuum Script is enabled --- use this advanced feature at your own digression ---") return True From 8347ebd18156c3dcb4da72a3f41f120f87504b0d Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Thu, 24 Oct 2019 12:08:36 -0400 Subject: [PATCH 052/257] * added test conf for main --- test/config/from_main.conf | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 test/config/from_main.conf diff --git a/test/config/from_main.conf b/test/config/from_main.conf new file mode 100644 index 0000000..d660107 --- /dev/null +++ b/test/config/from_main.conf @@ -0,0 +1,19 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./test/rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 From 1e25133382469a790a527728bc2541f3b5ee30f3 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 24 Oct 2019 16:26:32 -0400 Subject: [PATCH 053/257] Brushing up warning messages --- main.py | 3 ++- yara.conf | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 yara.conf diff --git a/main.py b/main.py index 4306da2..ce658b1 100644 --- a/main.py +++ b/main.py @@ -213,6 +213,7 @@ def perform(yara_rule_dir): seconds_since_start = (datetime.now() - start_datetime).seconds if seconds_since_start >= globals.g_vacuum_seconds and globals.g_vacuum_seconds > 0: cur.close() + logger.warning("!!!Executing vacuum script!!!") os.system(os.path.join(os.getcwd(),globals.g_vacuum_script)) start_datetime = datetime.now() break @@ -357,7 +358,7 @@ def verify_config(config_file, output_file): globals.g_vacuum_seconds = int(config['general']['vacuum_seconds']) if 'vacuum_script' in config['general'] and globals.g_vacuum_seconds > 0: globals.g_vacuum_script = config['general']['vacuum_script'] - logger.warn("WARNING: Vacuum Script is enabled --- use this advanced feature at your own digression ---") + logger.warn("!!! Vacuum Script is enabled --- use this advanced feature at your own discretion --- !!!") return True diff --git a/yara.conf b/yara.conf new file mode 100644 index 0000000..c54d3d4 --- /dev/null +++ b/yara.conf @@ -0,0 +1,58 @@ +[general] +num_days_binaries=1 +; valid types are 'local' or 'remote' +; +worker_type=local + +; +; ONLY for worker_type of remote +; IP Address of workers if worker_type is remote +; +;broker_url=redis://127.0.0.1 + +; +; path to directory containing yara rules +; +yara_rules_dir=./yara_rules + +; +; Cb Response postgres Database settings +; +postgres_host=localhost +postgres_username=cb +postgres_password= +postgres_db=cb +postgres_port=5002 + +; +; ONLY for worker_type of local +; Cb Response Server settings for scanning locally. +; For remote scanning please set these parameters in the yara worker config file +; Default: https://127.0.0.1 +; +cb_server_url=https://127.0.0.1 +cb_server_token= + +; +; nice value used for this script +; +;niceness=1 + +; +; Number of hashes to send to the workers concurrently. Defaults to 8. +; Recommend setting to the number of workers on the remote system. +; +concurrent_hashes=8 + +; +; If you don't want binaries to be rescanned more than once, regardless of the rules used, set this to True +; Default: False +; +disable_rescan=True + +; +; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with +; a timestamp within the last year +; Default: 365 +; +;num_days_binaries=365 From aeb5dfa763b459a33e2b927abebda52aa3ffbd5b Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 24 Oct 2019 16:35:18 -0400 Subject: [PATCH 054/257] updates --- main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/main.py b/main.py index ce658b1..a71b670 100644 --- a/main.py +++ b/main.py @@ -216,6 +216,7 @@ def perform(yara_rule_dir): logger.warning("!!!Executing vacuum script!!!") os.system(os.path.join(os.getcwd(),globals.g_vacuum_script)) start_datetime = datetime.now() + logger.warning("!!!Done Executing vacuum script!!!") break num_total_binaries += 1 From 54aa7eda933daaf6ed59cb82fa128c2e7dd0707f Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 24 Oct 2019 16:49:37 -0400 Subject: [PATCH 055/257] updates --- main.py | 2 +- requirements.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index a71b670..4f0e021 100644 --- a/main.py +++ b/main.py @@ -214,7 +214,7 @@ def perform(yara_rule_dir): if seconds_since_start >= globals.g_vacuum_seconds and globals.g_vacuum_seconds > 0: cur.close() logger.warning("!!!Executing vacuum script!!!") - os.system(os.path.join(os.getcwd(),globals.g_vacuum_script)) + os.system(f".{os.path.join(os.getcwd(), globals.g_vacuum_script)}") start_datetime = datetime.now() logger.warning("!!!Done Executing vacuum script!!!") break diff --git a/requirements.txt b/requirements.txt index 0bb99d4..9a716f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,3 +34,4 @@ vine==1.2.0 wcwidth==0.1.7 yara-python==3.8.1 redis==3.0.1 +humanfriendly From e31fc0cafc3928489b223515bcf81613836d495f Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 24 Oct 2019 16:57:09 -0400 Subject: [PATCH 056/257] updates --- main.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index 4f0e021..74264f2 100644 --- a/main.py +++ b/main.py @@ -14,6 +14,7 @@ import configparser import hashlib import yara +import subprocess from feed import CbFeed, CbFeedInfo, CbReport from celery import group @@ -214,7 +215,13 @@ def perform(yara_rule_dir): if seconds_since_start >= globals.g_vacuum_seconds and globals.g_vacuum_seconds > 0: cur.close() logger.warning("!!!Executing vacuum script!!!") - os.system(f".{os.path.join(os.getcwd(), globals.g_vacuum_script)}") + target = os.path.join(os.getcwd(), globals.g_vacuum_script) + prog = subprocess.Popen(target, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = prog.communicate() # Returns (stdoutdata, stderrdata): stdout and stderr are ignored, here + logger.info(stdout) + logger.error(stderr) + if prog.returncode: + logger.warning('program returned error code {0}'.format(prog.returncode)) start_datetime = datetime.now() logger.warning("!!!Done Executing vacuum script!!!") break From e8e858f74a2f67f5f75e7bfdb1961332a4625cb6 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 25 Oct 2019 07:30:54 -0400 Subject: [PATCH 057/257] Adding shell=True to subprocess.call --- main.py | 348 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 227 insertions(+), 121 deletions(-) diff --git a/main.py b/main.py index f603845..55b7b89 100644 --- a/main.py +++ b/main.py @@ -13,6 +13,7 @@ import humanfriendly import psycopg2 + # noinspection PyPackageRequirements import yara import subprocess @@ -27,13 +28,13 @@ from feed import CbFeed, CbFeedInfo, CbReport from tasks import analyze_binary, app, generate_rule_map, update_yara_rules_remote -logging_format = '%(asctime)s-%(name)s-%(lineno)d-%(levelname)s-%(message)s' +logging_format = "%(asctime)s-%(name)s-%(lineno)d-%(levelname)s-%(message)s" logging.basicConfig(format=logging_format) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -celery_logger = logging.getLogger('celery.app.trace') +celery_logger = logging.getLogger("celery.app.trace") celery_logger.setLevel(logging.ERROR) @@ -41,6 +42,7 @@ # Exception Classes ################################################################################ + class CbInvalidConfig(Exception): pass @@ -49,6 +51,7 @@ class CbInvalidConfig(Exception): # Exception Classes ################################################################################ + def generate_feed_from_db() -> None: """ Creates a feed based on specific database information. @@ -58,29 +61,31 @@ def generate_feed_from_db() -> None: reports = [] for binary in query: - fields = {'iocs': {'md5': [binary.md5]}, - 'score': binary.score, - 'timestamp': int(time.mktime(time.gmtime())), - 'link': '', - 'id': 'binary_{0}'.format(binary.md5), - 'title': binary.last_success_msg, - 'description': binary.last_success_msg - } + fields = { + "iocs": {"md5": [binary.md5]}, + "score": binary.score, + "timestamp": int(time.mktime(time.gmtime())), + "link": "", + "id": "binary_{0}".format(binary.md5), + "title": binary.last_success_msg, + "description": binary.last_success_msg, + } reports.append(CbReport(**fields)) - feedinfo = {'name': 'yara', - 'display_name': "Yara", - 'provider_url': "http://plusvic.github.io/yara/", - 'summary': "Scan binaries collected by Carbon Black with Yara.", - 'tech_data': "There are no requirements to share any data with Carbon Black to use this feed.", - 'icon': 'yara-logo.png', - 'category': "Connectors", - } + feedinfo = { + "name": "yara", + "display_name": "Yara", + "provider_url": "http://plusvic.github.io/yara/", + "summary": "Scan binaries collected by Carbon Black with Yara.", + "tech_data": "There are no requirements to share any data with Carbon Black to use this feed.", + "icon": "yara-logo.png", + "category": "Connectors", + } feedinfo = CbFeedInfo(**feedinfo) feed = CbFeed(feedinfo, reports) - logger.debug("Writing out feed '{0}' to disk".format(feedinfo.data['name'])) - with open(globals.output_file, 'w') as fp: + logger.debug("Writing out feed '{0}' to disk".format(feedinfo.data["name"])) + with open(globals.output_file, "w") as fp: fp.write(feed.dump()) @@ -98,7 +103,7 @@ def generate_yara_rule_map_hash(yara_rule_path: str) -> None: fullpath = os.path.join(yara_rule_path, fn) if not os.path.isfile(fullpath): continue - with open(os.path.join(yara_rule_path, fn), 'rb') as fp: + with open(os.path.join(yara_rule_path, fn), "rb") as fp: data = fp.read() # NOTE: Original logic resulted in a cumulative hash for each file (linking them) md5 = hashlib.md5() @@ -123,13 +128,13 @@ def generate_rule_map_remote(yara_rule_path) -> None: fullpath = os.path.join(yara_rule_path, fn) if not os.path.isfile(fullpath): continue - with open(os.path.join(yara_rule_path, fn), 'rb') as fp: + with open(os.path.join(yara_rule_path, fn), "rb") as fp: ret_dict[fn] = fp.read() result = update_yara_rules_remote.delay(ret_dict) globals.g_yara_rule_map = ret_dict while not result.ready(): - time.sleep(.1) + time.sleep(0.1) def analyze_binaries(md5_hashes: List[str], local: bool) -> Optional: @@ -167,7 +172,7 @@ def analyze_binaries(md5_hashes: List[str], local: bool) -> Optional: if time.time() - start >= 120: # 2 minute timeout break else: - time.sleep(.1) + time.sleep(0.1) except Exception as err: logger.error("Error when analyzing: {0}".format(err)) logger.error(traceback.format_exc()) @@ -228,17 +233,23 @@ def perform(yara_rule_dir): logger.info("Connecting to Postgres database...") try: - conn = psycopg2.connect(host=globals.g_postgres_host, - database=globals.g_postgres_db, - user=globals.g_postgres_username, - password=globals.g_postgres_password, - port=globals.g_postgres_port) + conn = psycopg2.connect( + host=globals.g_postgres_host, + database=globals.g_postgres_db, + user=globals.g_postgres_username, + password=globals.g_postgres_password, + port=globals.g_postgres_port, + ) cur = conn.cursor(name="yara_agent") - start_date_binaries = datetime.now() - timedelta(days=globals.g_num_days_binaries) + start_date_binaries = datetime.now() - timedelta( + days=globals.g_num_days_binaries + ) # noinspection SqlDialectInspection,SqlNoDataSourceInspection - cur.execute("SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " - "ORDER BY timestamp DESC".format(start_date_binaries)) + cur.execute( + "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " + "ORDER BY timestamp DESC".format(start_date_binaries) + ) except Exception as err: logger.error("Failed to connect to Postgres database: {0}".format(err)) logger.error(traceback.format_exc()) @@ -248,30 +259,43 @@ def perform(yara_rule_dir): while True: if cur.closed: cur = conn.cursor(name="yara_agent") - cur.execute("SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " - "ORDER BY timestamp DESC".format(start_date_binaries)) - try: + cur.execute( + "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " + "ORDER BY timestamp DESC".format(start_date_binaries) + ) + try: rows = cur.fetchmany() except psycopg2.OperationalError: cur = conn.cursor(name="yara_agent") - cur.execute("SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " - "ORDER BY timestamp DESC".format(start_date_binaries)) + cur.execute( + "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " + "ORDER BY timestamp DESC".format(start_date_binaries) + ) rows = cur.fetchmany() if len(rows) == 0: break for row in rows: seconds_since_start = (datetime.now() - start_datetime).seconds - if seconds_since_start >= globals.g_vacuum_seconds and globals.g_vacuum_seconds > 0: + if ( + seconds_since_start >= globals.g_vacuum_seconds + and globals.g_vacuum_seconds > 0 + ): cur.close() logger.warning("!!!Executing vacuum script!!!") target = os.path.join(os.getcwd(), globals.g_vacuum_script) - prog = subprocess.Popen(target, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = prog.communicate() # Returns (stdoutdata, stderrdata): stdout and stderr are ignored, here + prog = subprocess.Popen( + target, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True + ) + stdout, stderr = ( + prog.communicate() + ) # Returns (stdoutdata, stderrdata): stdout and stderr are ignored, here logger.info(stdout) logger.error(stderr) if prog.returncode: - logger.warning('program returned error code {0}'.format(prog.returncode)) + logger.warning( + "program returned error code {0}".format(prog.returncode) + ) start_datetime = datetime.now() logger.warning("!!!Done Executing vacuum script!!!") break @@ -282,10 +306,14 @@ def perform(yara_rule_dir): # # Check if query returns any rows # - query = BinaryDetonationResult.select().where(BinaryDetonationResult.md5 == md5_hash) + query = BinaryDetonationResult.select().where( + BinaryDetonationResult.md5 == md5_hash + ) if query.exists(): try: - bdr = BinaryDetonationResult.get(BinaryDetonationResult.md5 == md5_hash) + bdr = BinaryDetonationResult.get( + BinaryDetonationResult.md5 == md5_hash + ) scanned_hash_list = json.loads(bdr.misc) if globals.g_disable_rescan and bdr.misc: continue @@ -297,17 +325,27 @@ def perform(yara_rule_dir): # continue except Exception as e: - logger.error("Unable to decode yara rule map hash from database: {0}".format(e)) + logger.error( + "Unable to decode yara rule map hash from database: {0}".format( + e + ) + ) num_binaries_queued += 1 md5_hashes.append(md5_hash) if len(md5_hashes) >= globals.MAX_HASHES: - analysis_results = analyze_binaries(md5_hashes, local=(not globals.g_remote)) + analysis_results = analyze_binaries( + md5_hashes, local=(not globals.g_remote) + ) if analysis_results: for analysis_result in analysis_results: - logger.debug((f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" - f" {analysis_result.long_result} {analysis_result.last_error_msg}")) + logger.debug( + ( + f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" + f" {analysis_result.long_result} {analysis_result.last_error_msg}" + ) + ) if analysis_result.last_error_msg: logger.error(analysis_result.last_error_msg) save_results(analysis_results) @@ -324,8 +362,12 @@ def perform(yara_rule_dir): analysis_results = analyze_binaries(md5_hashes, local=(not globals.g_remote)) if analysis_results: for analysis_result in analysis_results: - logger.debug((f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" - f" {analysis_result.long_result} {analysis_result.last_error_msg}")) + logger.debug( + ( + f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" + f" {analysis_result.long_result} {analysis_result.last_error_msg}" + ) + ) if analysis_result.last_error_msg: logger.error(analysis_result.last_error_msg) save_results(analysis_results) @@ -334,7 +376,9 @@ def perform(yara_rule_dir): generate_feed_from_db() -def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binaries: int) -> None: +def _rule_logging( + start_time: float, num_binaries_skipped: int, num_total_binaries: int +) -> None: """ Simple method to log yara work. :param start_time: start time for the work @@ -344,13 +388,26 @@ def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binari """ elapsed_time = time.time() - start_time logger.info("elapsed time: {0}".format(humanfriendly.format_timespan(elapsed_time))) - logger.debug(" number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) + logger.debug( + " number binaries scanned: {0}".format(globals.g_num_binaries_analyzed) + ) logger.debug(" number binaries already scanned: {0}".format(num_binaries_skipped)) - logger.debug(" number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) + logger.debug( + " number binaries unavailable: {0}".format( + globals.g_num_binaries_not_available + ) + ) logger.info("total binaries from db: {0}".format(num_total_binaries)) - logger.debug(" binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) - logger.info("num binaries score greater than zero: {0}".format( - len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) + logger.debug( + " binaries per second: {0}:".format( + round(num_total_binaries / elapsed_time, 2) + ) + ) + logger.info( + "num binaries score greater than zero: {0}".format( + len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)) + ) + ) logger.info("") @@ -372,131 +429,174 @@ def verify_config(config_file: str, output_file: str = None) -> None: raise CbInvalidConfig(err) logger.debug(f"NOTE: using config file '{abs_config}'") - if not config.has_section('general'): + if not config.has_section("general"): raise CbInvalidConfig(f"Config file does not have a 'general' section") - globals.output_file = output_file if output_file is not None else config_file.strip() + ".json" + globals.output_file = ( + output_file if output_file is not None else config_file.strip() + ".json" + ) logger.debug(f"NOTE: output file will be '{globals.output_file}'") - the_config = config['general'] - if 'worker_type' in the_config: - if the_config['worker_type'] == 'local' or the_config['worker_type'].strip() == "": + the_config = config["general"] + if "worker_type" in the_config: + if ( + the_config["worker_type"] == "local" + or the_config["worker_type"].strip() == "" + ): globals.g_remote = False # 'local' or empty definition - elif the_config['worker_type'] == 'remote': + elif the_config["worker_type"] == "remote": globals.g_remote = True # 'remote' else: # anything else raise CbInvalidConfig( - f"Invalid worker_type '{the_config['worker_type']}' specified; must be 'local' or 'remote'") + f"Invalid worker_type '{the_config['worker_type']}' specified; must be 'local' or 'remote'" + ) else: globals.g_remote = False logger.warning("Config file does not specify 'worker_type', assuming local") # local/remote configuration data if not globals.g_remote: - if 'cb_server_url' in the_config and the_config['cb_server_url'].strip() != "": - globals.g_cb_server_url = the_config['cb_server_url'] + if "cb_server_url" in the_config and the_config["cb_server_url"].strip() != "": + globals.g_cb_server_url = the_config["cb_server_url"] else: raise CbInvalidConfig(f"Local worker configuration missing 'cb_server_url'") - if 'cb_server_token' in the_config and the_config['cb_server_token'].strip() != "": - globals.g_cb_server_token = the_config['cb_server_token'] + if ( + "cb_server_token" in the_config + and the_config["cb_server_token"].strip() != "" + ): + globals.g_cb_server_token = the_config["cb_server_token"] else: - raise CbInvalidConfig(f"Local worker configuration missing 'cb_server_token'") + raise CbInvalidConfig( + f"Local worker configuration missing 'cb_server_token'" + ) # TODO: validate url & token with test call? else: - if 'broker_url' in the_config and the_config['broker_url'].strip() != "": - app.conf.update(broker_url=the_config['broker_url'], result_backend=the_config['broker_url']) + if "broker_url" in the_config and the_config["broker_url"].strip() != "": + app.conf.update( + broker_url=the_config["broker_url"], + result_backend=the_config["broker_url"], + ) else: raise CbInvalidConfig(f"Remote worker configuration missing 'broker_url'") # TODO: validate broker with test call? - if 'yara_rules_dir' in the_config and the_config['yara_rules_dir'].strip() != "": - check = os.path.abspath(the_config['yara_rules_dir']) + if "yara_rules_dir" in the_config and the_config["yara_rules_dir"].strip() != "": + check = os.path.abspath(the_config["yara_rules_dir"]) if os.path.exists(check): if os.path.isdir(check): globals.g_yara_rules_dir = check else: - raise CbInvalidConfig("Rules dir '{0}' is not actualy a directory".format(check)) + raise CbInvalidConfig( + "Rules dir '{0}' is not actualy a directory".format(check) + ) else: raise CbInvalidConfig("Rules dir '{0}' does not exist".format(check)) else: - raise CbInvalidConfig("You must specify a yara rules directory in your configuration") + raise CbInvalidConfig( + "You must specify a yara rules directory in your configuration" + ) # NOTE: postgres_host has a default value in globals; use and warn if not defined - if 'postgres_host' in the_config and the_config['postgres_host'].strip() != "": - globals.g_postgres_host = the_config['postgres_host'] + if "postgres_host" in the_config and the_config["postgres_host"].strip() != "": + globals.g_postgres_host = the_config["postgres_host"] else: - logger.warning(f"No defined 'postgres_host'; using default of {globals.g_postgres_host}") + logger.warning( + f"No defined 'postgres_host'; using default of {globals.g_postgres_host}" + ) # NOTE: postgres_username has a default value in globals; use and warn if not defined - if 'postgres_username' in the_config and the_config['postgres_username'].strip() != "": - globals.g_postgres_username = the_config['postgres_username'] + if ( + "postgres_username" in the_config + and the_config["postgres_username"].strip() != "" + ): + globals.g_postgres_username = the_config["postgres_username"] else: - logger.warning(f"No defined 'postgres_username'; using default of {globals.g_postgres_username}") - - if 'postgres_password' in the_config and the_config['postgres_password'].strip() != "": - globals.g_postgres_password = the_config['postgres_password'] + logger.warning( + f"No defined 'postgres_username'; using default of {globals.g_postgres_username}" + ) + + if ( + "postgres_password" in the_config + and the_config["postgres_password"].strip() != "" + ): + globals.g_postgres_password = the_config["postgres_password"] else: raise CbInvalidConfig("No 'postgres_password' defined in the configuration") # NOTE: postgres_db has a default value in globals; use and warn if not defined - if 'postgres_db' in the_config and the_config['postgres_db'].strip() != "": - globals.g_postgres_db = the_config['postgres_db'] + if "postgres_db" in the_config and the_config["postgres_db"].strip() != "": + globals.g_postgres_db = the_config["postgres_db"] else: - logger.warning(f"No defined 'postgres_db'; using default of {globals.g_postgres_db}") + logger.warning( + f"No defined 'postgres_db'; using default of {globals.g_postgres_db}" + ) # NOTE: postgres_port has a default value in globals; use and warn if not defined - if 'postgres_port' in the_config: - globals.g_postgres_port = int(the_config['postgres_port']) + if "postgres_port" in the_config: + globals.g_postgres_port = int(the_config["postgres_port"]) else: - logger.warning(f"No defined 'postgres_port'; using default of {globals.g_postgres_port}") + logger.warning( + f"No defined 'postgres_port'; using default of {globals.g_postgres_port}" + ) # TODO: validate postgres connection with supplied information? - if 'niceness' in the_config: - os.nice(int(the_config['niceness'])) + if "niceness" in the_config: + os.nice(int(the_config["niceness"])) - if 'concurrent_hashes' in the_config: - globals.MAX_HASHES = int(the_config['concurrent_hashes']) + if "concurrent_hashes" in the_config: + globals.MAX_HASHES = int(the_config["concurrent_hashes"]) logger.debug("Consurrent Hashes: {0}".format(globals.MAX_HASHES)) - if 'disable_rescan' in the_config: - globals.g_disable_rescan = bool(the_config['disable_rescan']) + if "disable_rescan" in the_config: + globals.g_disable_rescan = bool(the_config["disable_rescan"]) logger.debug("Disable Rescan: {0}".format(globals.g_disable_rescan)) - if 'num_days_binaries' in the_config: - globals.g_num_days_binaries = int(the_config['num_days_binaries']) - logger.debug("Number of days for binaries: {0}".format(globals.g_num_days_binaries)) + if "num_days_binaries" in the_config: + globals.g_num_days_binaries = int(the_config["num_days_binaries"]) + logger.debug( + "Number of days for binaries: {0}".format(globals.g_num_days_binaries) + ) - if 'vacuum_seconds' in config['general']: - globals.g_vacuum_seconds = int(config['general']['vacuum_seconds']) - if 'vacuum_script' in config['general'] and globals.g_vacuum_seconds > 0: - globals.g_vacuum_script = config['general']['vacuum_script'] - logger.warn("!!! Vacuum Script is enabled --- use this advanced feature at your own discretion --- !!!") + if "vacuum_seconds" in config["general"]: + globals.g_vacuum_seconds = int(config["general"]["vacuum_seconds"]) + if "vacuum_script" in config["general"] and globals.g_vacuum_seconds > 0: + globals.g_vacuum_script = config["general"]["vacuum_script"] + logger.warn( + "!!! Vacuum Script is enabled --- use this advanced feature at your own discretion --- !!!" + ) return True + def main(): try: # check for single operation singleton.SingleInstance() except Exception as err: - logger.error(f"Only one instance of this script is allowed to run at a time: {err}") + logger.error( + f"Only one instance of this script is allowed to run at a time: {err}" + ) else: - parser = argparse.ArgumentParser(description='Yara Agent for Yara Connector') - parser.add_argument('--config-file', - required=True, - default='yara_agent.conf', - help='Location of the config file') - parser.add_argument('--log-file', - default='yara_agent.log', - help='Log file output') - parser.add_argument('--output-file', - default='yara_feed.json', - help='output feed file') - parser.add_argument('--validate-yara-rules', - action='store_true', - help='ONLY validate yara rules in a specified directory') - parser.add_argument('--debug', action='store_true') + parser = argparse.ArgumentParser(description="Yara Agent for Yara Connector") + parser.add_argument( + "--config-file", + required=True, + default="yara_agent.conf", + help="Location of the config file", + ) + parser.add_argument( + "--log-file", default="yara_agent.log", help="Log file output" + ) + parser.add_argument( + "--output-file", default="yara_feed.json", help="output feed file" + ) + parser.add_argument( + "--validate-yara-rules", + action="store_true", + help="ONLY validate yara rules in a specified directory", + ) + parser.add_argument("--debug", action="store_true") args = parser.parse_args() @@ -505,7 +605,9 @@ def main(): if args.log_file: formatter = logging.Formatter(logging_format) - handler = logging.handlers.RotatingFileHandler(args.log_file, maxBytes=10 * 1000000, backupCount=10) + handler = logging.handlers.RotatingFileHandler( + args.log_file, maxBytes=10 * 1000000, backupCount=10 + ) handler.setFormatter(formatter) logger.addHandler(handler) @@ -517,7 +619,11 @@ def main(): sys.exit(1) if args.validate_yara_rules: - logger.info("Validating yara rules in directory: {0}".format(globals.g_yara_rules_dir)) + logger.info( + "Validating yara rules in directory: {0}".format( + globals.g_yara_rules_dir + ) + ) yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) try: yara.compile(filepaths=yara_rule_map) @@ -529,7 +635,7 @@ def main(): try: globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database = SqliteDatabase('binary.db') + database = SqliteDatabase("binary.db") db.initialize(database) db.connect() db.create_tables([BinaryDetonationResult]) From c736acb683c6fc7a6ccfc46883e85efe82d11032 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 25 Oct 2019 07:36:03 -0400 Subject: [PATCH 058/257] check_call --- main.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/main.py b/main.py index 55b7b89..a6f99da 100644 --- a/main.py +++ b/main.py @@ -284,20 +284,13 @@ def perform(yara_rule_dir): cur.close() logger.warning("!!!Executing vacuum script!!!") target = os.path.join(os.getcwd(), globals.g_vacuum_script) - prog = subprocess.Popen( - target, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True - ) - stdout, stderr = ( - prog.communicate() - ) # Returns (stdoutdata, stderrdata): stdout and stderr are ignored, here - logger.info(stdout) - logger.error(stderr) - if prog.returncode: - logger.warning( - "program returned error code {0}".format(prog.returncode) - ) - start_datetime = datetime.now() - logger.warning("!!!Done Executing vacuum script!!!") + try: + ret = subprocess.check_call(target, shell=True) + except subprocess.CalledProcessError: + logger.error(f"Failed to call {target} return code {ret}") + finally: + start_datetime = datetime.now() + logger.warning("!!!Done Executing vacuum script!!!") break num_total_binaries += 1 From 93c4de2c9a19f71935f80582282c25a6945cdd27 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Fri, 25 Oct 2019 08:55:56 -0400 Subject: [PATCH 059/257] * Merger of old branch code * added new unit tests --- main.py | 67 +++++++++++------------ test/config/bogus_vacuum_seconds.conf | 21 +++++++ test/config/negative_vacuum_seconds.conf | 21 +++++++ test/config/no_such_vacuum_script.conf | 22 ++++++++ test/config/vacuum_script_dir.conf | 22 ++++++++ test/config/vacuum_script_enabled.conf | 23 ++++++++ test/config/vacuum_script_no_seconds.conf | 23 ++++++++ test/test_main.py | 52 ++++++++++++++++++ 8 files changed, 217 insertions(+), 34 deletions(-) create mode 100644 test/config/bogus_vacuum_seconds.conf create mode 100644 test/config/negative_vacuum_seconds.conf create mode 100644 test/config/no_such_vacuum_script.conf create mode 100644 test/config/vacuum_script_dir.conf create mode 100644 test/config/vacuum_script_enabled.conf create mode 100644 test/config/vacuum_script_no_seconds.conf diff --git a/main.py b/main.py index f24b9c7..91ee625 100644 --- a/main.py +++ b/main.py @@ -5,6 +5,7 @@ import logging import logging.handlers import os +import subprocess import sys import time import traceback @@ -15,8 +16,6 @@ import psycopg2 # noinspection PyPackageRequirements import yara -import subprocess - from celery import group from peewee import SqliteDatabase @@ -245,6 +244,7 @@ def perform(yara_rule_dir): while True: if cur.closed: cur = conn.cursor(name="yara_agent") + # noinspection SqlDialectInspection cur.execute( "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " "ORDER BY timestamp DESC".format(start_date_binaries) @@ -253,6 +253,7 @@ def perform(yara_rule_dir): rows = cur.fetchmany() except psycopg2.OperationalError: cur = conn.cursor(name="yara_agent") + # noinspection SqlDialectInspection cur.execute( "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " "ORDER BY timestamp DESC".format(start_date_binaries) @@ -263,17 +264,15 @@ def perform(yara_rule_dir): for row in rows: seconds_since_start = (datetime.now() - start_datetime).seconds - if ( - seconds_since_start >= globals.g_vacuum_seconds - and globals.g_vacuum_seconds > 0 - ): + if seconds_since_start >= globals.g_vacuum_seconds > 0: cur.close() logger.warning("!!!Executing vacuum script!!!") target = os.path.join(os.getcwd(), globals.g_vacuum_script) try: ret = subprocess.check_call(target, shell=True) - except subprocess.CalledProcessError: - logger.error(f"Failed to call {target} return code {ret}") + logger.debug(f"Subprocess return code: {ret}") + except subprocess.CalledProcessError as err: + logger.error(f"Failed to call {target}: {err}") finally: start_datetime = datetime.now() logger.warning("!!!Done Executing vacuum script!!!") @@ -356,7 +355,7 @@ def perform(yara_rule_dir): def _rule_logging( - start_time: float, num_binaries_skipped: int, num_total_binaries: int + start_time: float, num_binaries_skipped: int, num_total_binaries: int ) -> None: """ Simple method to log yara work. @@ -397,7 +396,7 @@ def verify_config(config_file: str, output_file: str = None) -> None: :param config_file: The config file to validate :param output_file: the output file; if not specified equals config file plus ".json" """ - abs_config = os.path.abspath(config_file) + abs_config = os.path.abspath(os.path.expanduser(config_file)) header = f"Config file '{abs_config}'" config = configparser.ConfigParser() @@ -413,16 +412,15 @@ def verify_config(config_file: str, output_file: str = None) -> None: if not config.has_section('general'): raise CbInvalidConfig(f"{header} does not have a 'general' section") - globals.output_file = ( - output_file if output_file is not None else config_file.strip() + ".json" - ) + globals.output_file = output_file if output_file is not None else config_file.strip() + ".json" + globals.output_file = os.path.abspath(os.path.expanduser(globals.output_file)) logger.debug(f"NOTE: output file will be '{globals.output_file}'") the_config = config["general"] if "worker_type" in the_config: if ( - the_config["worker_type"] == "local" - or the_config["worker_type"].strip() == "" + the_config["worker_type"] == "local" + or the_config["worker_type"].strip() == "" ): globals.g_remote = False # 'local' or empty definition elif the_config["worker_type"] == "remote": @@ -474,8 +472,8 @@ def verify_config(config_file: str, output_file: str = None) -> None: # NOTE: postgres_username has a default value in globals; use and warn if not defined if ( - "postgres_username" in the_config - and the_config["postgres_username"].strip() != "" + "postgres_username" in the_config + and the_config["postgres_username"].strip() != "" ): globals.g_postgres_username = the_config["postgres_username"] else: @@ -512,20 +510,23 @@ def verify_config(config_file: str, output_file: str = None) -> None: logger.debug("Disable Rescan: {0}".format(globals.g_disable_rescan)) if "num_days_binaries" in the_config: - globals.g_num_days_binaries = int(the_config["num_days_binaries"]) - logger.debug( - "Number of days for binaries: {0}".format(globals.g_num_days_binaries) - ) - - if "vacuum_seconds" in config["general"]: - globals.g_vacuum_seconds = int(config["general"]["vacuum_seconds"]) - if "vacuum_script" in config["general"] and globals.g_vacuum_seconds > 0: - globals.g_vacuum_script = config["general"]["vacuum_script"] - logger.warn( - "!!! Vacuum Script is enabled --- use this advanced feature at your own discretion --- !!!" - ) - - return True + globals.g_num_days_binaries = max(int(the_config["num_days_binaries"]), 1) + logger.debug("Number of days for binaries: {0}".format(globals.g_num_days_binaries)) + + if "vacuum_seconds" in the_config: + globals.g_vacuum_seconds = max(int(the_config["vacuum_seconds"]), 0) + if "vacuum_script" in the_config and the_config["vacuum_seconds"].strip() != "": + if globals.g_vacuum_seconds > 0: + check = os.path.abspath(the_config["vacuum_script"]) + if os.path.exists(check): + if os.path.isdir(check): + raise CbInvalidConfig(f"{header} specified 'vacuum_script' ({check}) is a directory") + else: + raise CbInvalidConfig(f"{header} specified 'vacuum_script' ({check}) does not exist") + globals.g_vacuum_script = check + logger.warning(f"Vacuum Script '{check}' is enabled; use this advanced feature at your own discretion!") + else: + logger.debug(f"{header} has 'vacuum_script' defined, but it is disabled") def main(): @@ -533,9 +534,7 @@ def main(): # check for single operation singleton.SingleInstance() except Exception as err: - logger.error( - f"Only one instance of this script is allowed to run at a time: {err}" - ) + logger.error(f"Only one instance of this script is allowed to run at a time: {err}") else: parser = argparse.ArgumentParser(description="Yara Agent for Yara Connector") parser.add_argument( diff --git a/test/config/bogus_vacuum_seconds.conf b/test/config/bogus_vacuum_seconds.conf new file mode 100644 index 0000000..b77c8d9 --- /dev/null +++ b/test/config/bogus_vacuum_seconds.conf @@ -0,0 +1,21 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +vacuum_seconds=BOGUS diff --git a/test/config/negative_vacuum_seconds.conf b/test/config/negative_vacuum_seconds.conf new file mode 100644 index 0000000..9376351 --- /dev/null +++ b/test/config/negative_vacuum_seconds.conf @@ -0,0 +1,21 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +vacuum_seconds=-20 diff --git a/test/config/no_such_vacuum_script.conf b/test/config/no_such_vacuum_script.conf new file mode 100644 index 0000000..43a9ec8 --- /dev/null +++ b/test/config/no_such_vacuum_script.conf @@ -0,0 +1,22 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +vacuum_seconds=3600 +vacuum_script=no-such-script.sh diff --git a/test/config/vacuum_script_dir.conf b/test/config/vacuum_script_dir.conf new file mode 100644 index 0000000..c72ac9d --- /dev/null +++ b/test/config/vacuum_script_dir.conf @@ -0,0 +1,22 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +vacuum_seconds=3600 +vacuum_script=./rules diff --git a/test/config/vacuum_script_enabled.conf b/test/config/vacuum_script_enabled.conf new file mode 100644 index 0000000..19b8544 --- /dev/null +++ b/test/config/vacuum_script_enabled.conf @@ -0,0 +1,23 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +vacuum_seconds=3600 +; we just need a valid file +vacuum_script=./config/valid.conf diff --git a/test/config/vacuum_script_no_seconds.conf b/test/config/vacuum_script_no_seconds.conf new file mode 100644 index 0000000..fce3e7f --- /dev/null +++ b/test/config/vacuum_script_no_seconds.conf @@ -0,0 +1,23 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +vacuum_seconds=0 +; we just need a valid file +vacuum_script=./config/valid.conf diff --git a/test/test_main.py b/test/test_main.py index 884ece2..103f287 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -306,3 +306,55 @@ def test_16b_config_bogus_num_days_binaries(self): with self.assertRaises(ValueError) as err: verify_config(os.path.join(TESTS, "config", "bogus_num_days_binaries.conf")) assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_17a_config_bogus_vacuum_seconds(self): + """ + Ensure that config with bogus (non-int) vacuum_seconds is detected. + """ + with self.assertRaises(ValueError) as err: + verify_config(os.path.join(TESTS, "config", "bogus_vacuum_seconds.conf")) + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_17b_config_negative_vacuum_seconds(self): + """ + Ensure that config with bogus (non-int) vacuum_seconds is detected. + """ + globals.g_vacuum_seconds = None + verify_config(os.path.join(TESTS, "config", "negative_vacuum_seconds.conf")) + self.assertEqual(0, globals.g_vacuum_seconds) + + def test_18a_config_missing_vacuum_script(self): + """ + Ensure that config with missing vacuum_script is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "no_such_vacuum_script.conf")) + assert "does not exist" in "{0}".format(err.exception.args[0]) + + def test_18b_config_bogus_vacuum_script_is_dir(self): + """ + Ensure that config with vacuum_script as directory is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + verify_config(os.path.join(TESTS, "config", "vacuum_script_dir.conf")) + assert "is a directory" in "{0}".format(err.exception.args[0]) + + def test_19a_config_vacuum_script_enabled(self): + """ + Ensure that config with vacuum_script and vacuum_seconds is ready to go. + """ + globals.g_vacuum_seconds = None + globals.g_vacuum_script = None + verify_config(os.path.join(TESTS, "config", "vacuum_script_enabled.conf")) + self.assertEqual(3600, globals.g_vacuum_seconds) + self.assertTrue(globals.g_vacuum_script.endswith("/config/valid.conf")) + + def test_19a_config_vacuum_script_and_no_vacuum_seconds(self): + """ + Ensure that config with vacuum_script but vacuum_seconds == 0 has it disabled. + """ + globals.g_vacuum_seconds = None + globals.g_vacuum_script = None + verify_config(os.path.join(TESTS, "config", "vacuum_script_no_seconds.conf")) + self.assertEqual(0, globals.g_vacuum_seconds) + self.assertIsNone(globals.g_vacuum_script) From 4a31a2577e2859ba116f2b05dc5ab57495356f14 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Fri, 25 Oct 2019 09:05:19 -0400 Subject: [PATCH 060/257] * moved script to consolidated scripts directory --- vacuumscript.sh => scripts/vacuumscript.sh | 0 test/config/vacuum_script_enabled.conf | 3 +-- test/config/vacuum_script_no_seconds.conf | 3 +-- test/test_main.py | 2 +- 4 files changed, 3 insertions(+), 5 deletions(-) rename vacuumscript.sh => scripts/vacuumscript.sh (100%) diff --git a/vacuumscript.sh b/scripts/vacuumscript.sh similarity index 100% rename from vacuumscript.sh rename to scripts/vacuumscript.sh diff --git a/test/config/vacuum_script_enabled.conf b/test/config/vacuum_script_enabled.conf index 19b8544..885bd30 100644 --- a/test/config/vacuum_script_enabled.conf +++ b/test/config/vacuum_script_enabled.conf @@ -19,5 +19,4 @@ disable_rescan=False num_days_binaries=365 vacuum_seconds=3600 -; we just need a valid file -vacuum_script=./config/valid.conf +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/vacuum_script_no_seconds.conf b/test/config/vacuum_script_no_seconds.conf index fce3e7f..06316e3 100644 --- a/test/config/vacuum_script_no_seconds.conf +++ b/test/config/vacuum_script_no_seconds.conf @@ -19,5 +19,4 @@ disable_rescan=False num_days_binaries=365 vacuum_seconds=0 -; we just need a valid file -vacuum_script=./config/valid.conf +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/test_main.py b/test/test_main.py index 103f287..96d13bb 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -347,7 +347,7 @@ def test_19a_config_vacuum_script_enabled(self): globals.g_vacuum_script = None verify_config(os.path.join(TESTS, "config", "vacuum_script_enabled.conf")) self.assertEqual(3600, globals.g_vacuum_seconds) - self.assertTrue(globals.g_vacuum_script.endswith("/config/valid.conf")) + self.assertTrue(globals.g_vacuum_script.endswith("/scripts/vacuumscript.sh")) def test_19a_config_vacuum_script_and_no_vacuum_seconds(self): """ From dfd86b10153b88fe633ba69e0e0a35dc32017745 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Fri, 25 Oct 2019 09:45:03 -0400 Subject: [PATCH 061/257] * updated README * Added `{HERE}` placeholder for pathing --- README.md | 117 +++++++++++-------------- main.py | 14 ++- test/config/vacuum_script_enabled.conf | 2 +- utilities.py | 20 +++++ 4 files changed, 80 insertions(+), 73 deletions(-) create mode 100644 utilities.py diff --git a/README.md b/README.md index a1573d9..ff18caf 100644 --- a/README.md +++ b/README.md @@ -24,70 +24,59 @@ The Yara agent must be installed on the same system as Cb Response. #### Sample Yara Agent Config - - [general] - - ; - ; either run a single worker locally or remotely - ; valid types are 'local' or 'remote' - ; - worker_type=local - - ; - ; ONLY for worker_type of remote - ; IP Address of workers if worker_type is remote - ; - ;broker_url=redis:// - - ; - ; path to directory containing yara rules - ; - yara_rules_dir=yara_rules - - ; - ; Cb Response postgres Database settings - ; - postgres_host= - postgres_username= - postgres_password= - postgres_db= - postgres_port= - - ; - ; ONLY for worker_type of local - ; Cb Response Server settings for scanning locally. - ; For remote scanning please set these parameters in the yara worker config file - ; Default: https://127.0.0.1 - ; - cb_server_url= - cb_server_token= - - ; - ; nice value used for this script - ; - niceness=1 - - ; - ; Number of hashes to send to the workers concurrently. Defaults to 8. - ; Recommend setting to the number of workers on the remote system. - ; - concurrent_hashes=8 - - ; - ; If you don't want binaries to be rescanned more than once, regardless of the rules used, set this to True - ; Default: False - ; - disable_rescan=False - - ; - ; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with - ; a timestamp within the last year - ; Default: 365 - ; - num_days_binaries=365 - - -* copy and modify the above config to `/etc/cb/integrations/yara/yara_agent.conf` +Copy and modify the following config to `/etc/cb/integrations/yara/yara_agent.conf` + +```ini +[general] +; either run a single worker locally or remotely +; valid types are 'local' or 'remote' +worker_type=local + +; ONLY used for worker_type of local +; Cb Response Server settings for scanning locally. +; For remote scanning please set these parameters in the yara worker config file +; Default: https://127.0.0.1 +cb_server_url= +cb_server_token= + +; ONLY used for worker_type of remote +; IP Address of workers if worker_type is remote +;broker_url=redis:// + +; path to directory containing yara rules +yara_rules_dir=yara_rules + +; Cb Response postgres Database settings +postgres_host= +postgres_username= +postgres_password= +postgres_db= +postgres_port= + +; os nice value used for this script +niceness=1 + +; Number of hashes to send to the workers concurrently. Defaults to 8. +; Recommend setting to the number of workers on the remote system. +concurrent_hashes=8 + +; If you don't want binaries to be rescanned more than once, regardless of the rules used, set this to True +disable_rescan=True + +; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with +; a timestamp within the last year +; Default: 365 +num_days_binaries=365 + +; ADVANCED: A vacuum script can be enabled to "clean" the database and prevent fragmentation +; This can be disabled if the seconds value is 0 or less +vacuum_seconds=0 +vacuum_script={HERE}/scripts/vacuumscript.sh +``` + +> NOTES: +> 1) The use of `{HERE}` is a placeholder representing the location of this package's `main.py` file, +> allowing for the use of relative paths to the package itself. #### Running Yara Agent Manually diff --git a/main.py b/main.py index 91ee625..a566dc0 100644 --- a/main.py +++ b/main.py @@ -26,6 +26,7 @@ from exceptions import CbInvalidConfig from feed import CbFeed, CbFeedInfo, CbReport from tasks import analyze_binary, app, generate_rule_map, update_yara_rules_remote +from utilities import placehold logging_format = "%(asctime)s-%(name)s-%(lineno)d-%(levelname)s-%(message)s" logging.basicConfig(format=logging_format) @@ -396,7 +397,7 @@ def verify_config(config_file: str, output_file: str = None) -> None: :param config_file: The config file to validate :param output_file: the output file; if not specified equals config file plus ".json" """ - abs_config = os.path.abspath(os.path.expanduser(config_file)) + abs_config = os.path.abspath(os.path.expanduser(placehold(config_file))) header = f"Config file '{abs_config}'" config = configparser.ConfigParser() @@ -413,15 +414,12 @@ def verify_config(config_file: str, output_file: str = None) -> None: raise CbInvalidConfig(f"{header} does not have a 'general' section") globals.output_file = output_file if output_file is not None else config_file.strip() + ".json" - globals.output_file = os.path.abspath(os.path.expanduser(globals.output_file)) + globals.output_file = os.path.abspath(os.path.expanduser(placehold(globals.output_file))) logger.debug(f"NOTE: output file will be '{globals.output_file}'") the_config = config["general"] if "worker_type" in the_config: - if ( - the_config["worker_type"] == "local" - or the_config["worker_type"].strip() == "" - ): + if the_config["worker_type"] == "local" or the_config["worker_type"].strip() == "": globals.g_remote = False # 'local' or empty definition elif the_config["worker_type"] == "remote": globals.g_remote = True # 'remote' @@ -453,7 +451,7 @@ def verify_config(config_file: str, output_file: str = None) -> None: # TODO: validate broker with test call? if "yara_rules_dir" in the_config and the_config["yara_rules_dir"].strip() != "": - check = os.path.abspath(the_config["yara_rules_dir"]) + check = os.path.abspath(placehold(the_config["yara_rules_dir"])) if os.path.exists(check): if os.path.isdir(check): globals.g_yara_rules_dir = check @@ -517,7 +515,7 @@ def verify_config(config_file: str, output_file: str = None) -> None: globals.g_vacuum_seconds = max(int(the_config["vacuum_seconds"]), 0) if "vacuum_script" in the_config and the_config["vacuum_seconds"].strip() != "": if globals.g_vacuum_seconds > 0: - check = os.path.abspath(the_config["vacuum_script"]) + check = os.path.abspath(placehold(the_config["vacuum_script"])) if os.path.exists(check): if os.path.isdir(check): raise CbInvalidConfig(f"{header} specified 'vacuum_script' ({check}) is a directory") diff --git a/test/config/vacuum_script_enabled.conf b/test/config/vacuum_script_enabled.conf index 885bd30..6b914c8 100644 --- a/test/config/vacuum_script_enabled.conf +++ b/test/config/vacuum_script_enabled.conf @@ -19,4 +19,4 @@ disable_rescan=False num_days_binaries=365 vacuum_seconds=3600 -vacuum_script=../scripts/vacuumscript.sh +vacuum_script={HERE}/scripts/vacuumscript.sh diff --git a/utilities.py b/utilities.py new file mode 100644 index 0000000..92788fb --- /dev/null +++ b/utilities.py @@ -0,0 +1,20 @@ +################################################################################ +# This file contains various package-wide utility functions +################################################################################ + +import os + +__all__ = ["HERE", "placehold"] + +# self location for the package +HERE = os.path.dirname(__file__) + + +def placehold(source: str) -> str: + """ + Locate any important string placeholders and substitute live values for them. + :param source: source string to convert + :return: converted string + """ + source = source.replace("{HERE}", HERE) + return source From 100d2afc68fecd9487cdbce8457bd768a97ccc8c Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Fri, 25 Oct 2019 09:56:58 -0400 Subject: [PATCH 062/257] * added local rules folder and README --- .gitignore | 3 +++ yara_rules/README.md | 8 ++++++++ 2 files changed, 11 insertions(+) create mode 100644 yara_rules/README.md diff --git a/.gitignore b/.gitignore index 35221c3..adf06dc 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,6 @@ nosetests.xml .idea config.ini + +# created local DB +#binary.db diff --git a/yara_rules/README.md b/yara_rules/README.md new file mode 100644 index 0000000..3f4a2c9 --- /dev/null +++ b/yara_rules/README.md @@ -0,0 +1,8 @@ +# yara_rules +This folder can be used a convenient location to locate your Yara rules. It can be defined +in your configuration file as: +```ini +yara_rules_dir={HERE}/yara_rules +``` + +It is suggested that subfolders be used to organize any complex and differing rule sets. From 02557094723974bd4151e77f10831586797af1f2 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Fri, 25 Oct 2019 09:57:41 -0400 Subject: [PATCH 063/257] * added binary.db to .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index adf06dc..bb9e8b3 100644 --- a/.gitignore +++ b/.gitignore @@ -40,4 +40,4 @@ nosetests.xml config.ini # created local DB -#binary.db +binary.db From 685b1d3010a026327a2b82e91f4ee8422ab7c325 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 25 Oct 2019 10:01:50 -0400 Subject: [PATCH 064/257] Back to popen --- main.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/main.py b/main.py index a566dc0..54b7902 100644 --- a/main.py +++ b/main.py @@ -269,14 +269,20 @@ def perform(yara_rule_dir): cur.close() logger.warning("!!!Executing vacuum script!!!") target = os.path.join(os.getcwd(), globals.g_vacuum_script) - try: - ret = subprocess.check_call(target, shell=True) - logger.debug(f"Subprocess return code: {ret}") - except subprocess.CalledProcessError as err: - logger.error(f"Failed to call {target}: {err}") - finally: - start_datetime = datetime.now() - logger.warning("!!!Done Executing vacuum script!!!") + prog = subprocess.Popen( + target, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True + ) + stdout, stderr = ( + prog.communicate() + ) # Returns (stdoutdata, stderrdata): stdout and stderr are ignored, here + logger.info(stdout) + logger.error(stderr) + if prog.returncode: + logger.warning( + "program returned error code {0}".format(prog.returncode) + ) + start_datetime = datetime.now() + logger.warning("!!!Done Executing vacuum script!!!") break num_total_binaries += 1 From 475ef70b0c13deb245bd909f4887343a798b5ad8 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Fri, 25 Oct 2019 10:13:09 -0400 Subject: [PATCH 065/257] Changes to tasks. --- README.md | 1 + main.py | 9 +++------ tasks.py | 5 +++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index ff18caf..23d839e 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,7 @@ vacuum_script={HERE}/scripts/vacuumscript.sh > NOTES: > 1) The use of `{HERE}` is a placeholder representing the location of this package's `main.py` file, > allowing for the use of relative paths to the package itself. +> 2) Paths can use `~` to access your home directory, so you locate files there as well. #### Running Yara Agent Manually diff --git a/main.py b/main.py index 54b7902..1c51362 100644 --- a/main.py +++ b/main.py @@ -457,7 +457,7 @@ def verify_config(config_file: str, output_file: str = None) -> None: # TODO: validate broker with test call? if "yara_rules_dir" in the_config and the_config["yara_rules_dir"].strip() != "": - check = os.path.abspath(placehold(the_config["yara_rules_dir"])) + check = os.path.abspath(os.path.expanduser(placehold(the_config["yara_rules_dir"]))) if os.path.exists(check): if os.path.isdir(check): globals.g_yara_rules_dir = check @@ -475,10 +475,7 @@ def verify_config(config_file: str, output_file: str = None) -> None: logger.warning(f"{header} has no defined 'postgres_host'; using default of '{globals.g_postgres_host}'") # NOTE: postgres_username has a default value in globals; use and warn if not defined - if ( - "postgres_username" in the_config - and the_config["postgres_username"].strip() != "" - ): + if "postgres_username" in the_config and the_config["postgres_username"].strip() != "": globals.g_postgres_username = the_config["postgres_username"] else: logger.warning(f"{header} has no defined 'postgres_username'; using default of '{globals.g_postgres_username}'") @@ -521,7 +518,7 @@ def verify_config(config_file: str, output_file: str = None) -> None: globals.g_vacuum_seconds = max(int(the_config["vacuum_seconds"]), 0) if "vacuum_script" in the_config and the_config["vacuum_seconds"].strip() != "": if globals.g_vacuum_seconds > 0: - check = os.path.abspath(placehold(the_config["vacuum_script"])) + check = os.path.abspath(os.path.expanduser(placehold(the_config["vacuum_script"]))) if os.path.exists(check): if os.path.isdir(check): raise CbInvalidConfig(f"{header} specified 'vacuum_script' ({check}) is a directory") diff --git a/tasks.py b/tasks.py index 9af437b..f5b54a2 100644 --- a/tasks.py +++ b/tasks.py @@ -15,6 +15,7 @@ import globals from analysis_result import AnalysisResult from exceptions import CbInvalidConfig +from utilities import placehold app = Celery() # noinspection PyUnusedName @@ -36,7 +37,7 @@ def verify_config(config_file: str) -> None: NOTE: Replicates, to a smaller degree, the function in main.py; it is presumed that more detailed checks are there :param config_file: path to the config file """ - abs_config = os.path.abspath(config_file) + abs_config = os.path.abspath(os.path.expanduser(placehold(config_file))) header = f"Config file '{abs_config}'" config = configparser.ConfigParser() @@ -55,7 +56,7 @@ def verify_config(config_file: str) -> None: the_config = config['general'] if 'yara_rules_dir' in the_config and the_config['yara_rules_dir'].strip() != "": - check = os.path.abspath(the_config['yara_rules_dir']) + check = os.path.abspath(os.path.expanduser(placehold(the_config["yara_rules_dir"]))) if os.path.exists(check): if os.path.isdir(check): globals.g_yara_rules_dir = check From 7b8d8c692dd79a89507d22ece71b9ab5d8a7e38d Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 25 Oct 2019 10:15:43 -0400 Subject: [PATCH 066/257] Adding feed_database_path config option --- globals.py | 4 +++- main.py | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/globals.py b/globals.py index 76414f8..b7f3337 100644 --- a/globals.py +++ b/globals.py @@ -33,4 +33,6 @@ g_num_days_binaries = 365 g_vacuum_seconds = -1 -g_vacuum_script = 'vacuumscript.sh' +g_vacuum_script = 'scripts/vacuumscript.sh' + +g_feed_database_path = "." diff --git a/main.py b/main.py index 1c51362..a41cc39 100644 --- a/main.py +++ b/main.py @@ -529,6 +529,12 @@ def verify_config(config_file: str, output_file: str = None) -> None: else: logger.debug(f"{header} has 'vacuum_script' defined, but it is disabled") + if 'feed_database_path' in the_config: + globals.feed_database_path = the_config['feed_database_path'] + check = os.path.abspath(placehold(the_config["feed_database_path"])) + if not(os.path.exists(check) and os.path.isdir(check)): + raise CbInvalidConfig("Invalid database path specified") + def main(): try: @@ -594,7 +600,7 @@ def main(): try: globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database = SqliteDatabase("binary.db") + database = SqliteDatabase(os.path.join(globals.g_feed_database_path,"binary.db")) db.initialize(database) db.connect() db.create_tables([BinaryDetonationResult]) From 58c0fd47988038eacc4bf77417bdad198979e529 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 25 Oct 2019 10:17:24 -0400 Subject: [PATCH 067/257] Fixing default for feed_db-Path --- globals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/globals.py b/globals.py index b7f3337..5c6eb94 100644 --- a/globals.py +++ b/globals.py @@ -35,4 +35,4 @@ g_vacuum_seconds = -1 g_vacuum_script = 'scripts/vacuumscript.sh' -g_feed_database_path = "." +g_feed_database_path = "./" From 3b8d26ab525c1b829c943f7d1610826c6c371305 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 25 Oct 2019 10:24:06 -0400 Subject: [PATCH 068/257] updates --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index a41cc39..a8ef23c 100644 --- a/main.py +++ b/main.py @@ -270,7 +270,7 @@ def perform(yara_rule_dir): logger.warning("!!!Executing vacuum script!!!") target = os.path.join(os.getcwd(), globals.g_vacuum_script) prog = subprocess.Popen( - target, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True + target, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, env = os.environ.copy() ) stdout, stderr = ( prog.communicate() From ee284557e0273050979c2c0c097b105fe9d931c0 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 25 Oct 2019 10:44:31 -0400 Subject: [PATCH 069/257] update pgpassword --- main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index a8ef23c..fd488a8 100644 --- a/main.py +++ b/main.py @@ -269,8 +269,10 @@ def perform(yara_rule_dir): cur.close() logger.warning("!!!Executing vacuum script!!!") target = os.path.join(os.getcwd(), globals.g_vacuum_script) + envdict = dict(os.environ) + envdict["PGPASSWORD"] = globals.g_postgres_password prog = subprocess.Popen( - target, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, env = os.environ.copy() + target, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, env = envdict ) stdout, stderr = ( prog.communicate() From 32ef9c0b70dab20ffdb8b8731d47a6eddeb4f5e0 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 25 Oct 2019 10:52:53 -0400 Subject: [PATCH 070/257] updates to envars for postgres --- main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/main.py b/main.py index fd488a8..538a265 100644 --- a/main.py +++ b/main.py @@ -271,6 +271,10 @@ def perform(yara_rule_dir): target = os.path.join(os.getcwd(), globals.g_vacuum_script) envdict = dict(os.environ) envdict["PGPASSWORD"] = globals.g_postgres_password + envdict["PGUSERNAME"] = globals.g_postgres_username + envdict['PGHOST'] = globals.g_postgres_host + envdict["PGDATABASE"] = globals.g_postgres_db + envdict["PGPORT"] = globals.g_postgres_port prog = subprocess.Popen( target, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, env = envdict ) From d1f6b1c55cbf991bc3fc7ed2f897b872418f0b65 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Fri, 25 Oct 2019 14:43:56 -0400 Subject: [PATCH 071/257] tweaks --- main.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/main.py b/main.py index 538a265..659d390 100644 --- a/main.py +++ b/main.py @@ -268,25 +268,19 @@ def perform(yara_rule_dir): if seconds_since_start >= globals.g_vacuum_seconds > 0: cur.close() logger.warning("!!!Executing vacuum script!!!") - target = os.path.join(os.getcwd(), globals.g_vacuum_script) envdict = dict(os.environ) envdict["PGPASSWORD"] = globals.g_postgres_password envdict["PGUSERNAME"] = globals.g_postgres_username envdict['PGHOST'] = globals.g_postgres_host envdict["PGDATABASE"] = globals.g_postgres_db - envdict["PGPORT"] = globals.g_postgres_port - prog = subprocess.Popen( - target, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, env = envdict - ) - stdout, stderr = ( - prog.communicate() - ) # Returns (stdoutdata, stderrdata): stdout and stderr are ignored, here + envdict["PGPORT"] = str(globals.g_postgres_port) + prog = subprocess.Popen(globals.g_vacuum_script, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + shell=True, env=envdict) + stdout, stderr = (prog.communicate()) # Returns (stdoutdata, stderrdata) logger.info(stdout) logger.error(stderr) if prog.returncode: - logger.warning( - "program returned error code {0}".format(prog.returncode) - ) + logger.warning("program returned error code {0}".format(prog.returncode)) start_datetime = datetime.now() logger.warning("!!!Done Executing vacuum script!!!") break @@ -538,7 +532,7 @@ def verify_config(config_file: str, output_file: str = None) -> None: if 'feed_database_path' in the_config: globals.feed_database_path = the_config['feed_database_path'] check = os.path.abspath(placehold(the_config["feed_database_path"])) - if not(os.path.exists(check) and os.path.isdir(check)): + if not (os.path.exists(check) and os.path.isdir(check)): raise CbInvalidConfig("Invalid database path specified") @@ -606,7 +600,7 @@ def main(): try: globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database = SqliteDatabase(os.path.join(globals.g_feed_database_path,"binary.db")) + database = SqliteDatabase(os.path.join(globals.g_feed_database_path, "binary.db")) db.initialize(database) db.connect() db.create_tables([BinaryDetonationResult]) From 788142938b5536eb53a77a3e1ab01ceb26982694 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Fri, 25 Oct 2019 16:32:59 -0400 Subject: [PATCH 072/257] supposedly working --- main.py | 5 +++-- scripts/vacuumscript.sh | 10 ++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index 659d390..7bd673b 100644 --- a/main.py +++ b/main.py @@ -268,14 +268,15 @@ def perform(yara_rule_dir): if seconds_since_start >= globals.g_vacuum_seconds > 0: cur.close() logger.warning("!!!Executing vacuum script!!!") + target = ["/bin/sh", globals.g_vacuum_script] + envdict = dict(os.environ) envdict["PGPASSWORD"] = globals.g_postgres_password envdict["PGUSERNAME"] = globals.g_postgres_username envdict['PGHOST'] = globals.g_postgres_host envdict["PGDATABASE"] = globals.g_postgres_db envdict["PGPORT"] = str(globals.g_postgres_port) - prog = subprocess.Popen(globals.g_vacuum_script, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - shell=True, env=envdict) + prog = subprocess.Popen(target, shell=False, env=envdict, universal_newlines=True) stdout, stderr = (prog.communicate()) # Returns (stdoutdata, stderrdata) logger.info(stdout) logger.error(stderr) diff --git a/scripts/vacuumscript.sh b/scripts/vacuumscript.sh index e348da6..fe39a8a 100755 --- a/scripts/vacuumscript.sh +++ b/scripts/vacuumscript.sh @@ -1,5 +1,7 @@ #!/bin/bash -psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) storefiles;" -psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) binary_status;" -psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) sensor_registrations;" -psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) vt_write_events;" \ No newline at end of file +set -x +# commented out; hangs! +#psql -p 5002 -d $PGDATABASE -U $PGUSERNAME -c "vacuum (full,analyze, verbose) storefiles;" +psql -p 5002 -d $PGDATABASE -U $PGUSERNAME -c "vacuum (full,analyze, verbose) binary_status;" +psql -p 5002 -d $PGDATABASE -U $PGUSERNAME -c "vacuum (full,analyze, verbose) sensor_registrations;" +psql -p 5002 -d $PGDATABASE -U $PGUSERNAME -c "vacuum (full,analyze, verbose) vt_write_events;" From 0a7819c171d76d5f1f384c39b04f28b49ab8d6f7 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Fri, 25 Oct 2019 16:36:48 -0400 Subject: [PATCH 073/257] ignore updates --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index bb9e8b3..5f47e59 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,7 @@ config.ini # created local DB binary.db + +# local rules +yara_rules/*.yara +yara_rules/*.yar From 834fa0f45def928991cd2903a119893c6573cfa8 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 28 Oct 2019 07:56:41 -0400 Subject: [PATCH 074/257] * Swapped script order * added user interrupt trap --- main.py | 4 ++++ scripts/vacuumscript.sh | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 7bd673b..6fda00d 100644 --- a/main.py +++ b/main.py @@ -607,9 +607,13 @@ def main(): db.create_tables([BinaryDetonationResult]) generate_feed_from_db() perform(globals.g_yara_rules_dir) + except KeyboardInterrupt: + logger.info("\n\n##### Intterupted by User!\n") + sys.exit(2) except Exception as err: logger.error(f"There were errors executing yara rules: {err}") logger.error(traceback.format_exc()) + sys.exit(1) if __name__ == "__main__": diff --git a/scripts/vacuumscript.sh b/scripts/vacuumscript.sh index fe39a8a..673aa61 100755 --- a/scripts/vacuumscript.sh +++ b/scripts/vacuumscript.sh @@ -1,7 +1,7 @@ #!/bin/bash set -x -# commented out; hangs! -#psql -p 5002 -d $PGDATABASE -U $PGUSERNAME -c "vacuum (full,analyze, verbose) storefiles;" psql -p 5002 -d $PGDATABASE -U $PGUSERNAME -c "vacuum (full,analyze, verbose) binary_status;" psql -p 5002 -d $PGDATABASE -U $PGUSERNAME -c "vacuum (full,analyze, verbose) sensor_registrations;" psql -p 5002 -d $PGDATABASE -U $PGUSERNAME -c "vacuum (full,analyze, verbose) vt_write_events;" +# commented out; hangs! +psql -p 5002 -d $PGDATABASE -U $PGUSERNAME -c "vacuum (full,analyze, verbose) storefiles;" From 36596a96436d46f8e6e68c826a3e20b0676311df Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 28 Oct 2019 08:03:21 -0400 Subject: [PATCH 075/257] fixing bugs --- main.py | 132 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 94 insertions(+), 38 deletions(-) diff --git a/main.py b/main.py index 6fda00d..472be90 100644 --- a/main.py +++ b/main.py @@ -14,6 +14,7 @@ import humanfriendly import psycopg2 + # noinspection PyPackageRequirements import yara from celery import group @@ -267,21 +268,28 @@ def perform(yara_rule_dir): seconds_since_start = (datetime.now() - start_datetime).seconds if seconds_since_start >= globals.g_vacuum_seconds > 0: cur.close() + conn.commit() logger.warning("!!!Executing vacuum script!!!") - target = ["/bin/sh", globals.g_vacuum_script] + + target = os.path.join(os.getcwd(), globals.g_vacuum_script) envdict = dict(os.environ) envdict["PGPASSWORD"] = globals.g_postgres_password - envdict["PGUSERNAME"] = globals.g_postgres_username - envdict['PGHOST'] = globals.g_postgres_host + envdict["PGUSER"] = globals.g_postgres_username + envdict["PGHOST"] = globals.g_postgres_host envdict["PGDATABASE"] = globals.g_postgres_db envdict["PGPORT"] = str(globals.g_postgres_port) - prog = subprocess.Popen(target, shell=False, env=envdict, universal_newlines=True) - stdout, stderr = (prog.communicate()) # Returns (stdoutdata, stderrdata) + + prog = subprocess.Popen( + target, shell=True, env=envdict, universal_newlines=True + ) + stdout, stderr = prog.communicate() # Returns (stdoutdata, stderrdata) logger.info(stdout) logger.error(stderr) if prog.returncode: - logger.warning("program returned error code {0}".format(prog.returncode)) + logger.warning( + "program returned error code {0}".format(prog.returncode) + ) start_datetime = datetime.now() logger.warning("!!!Done Executing vacuum script!!!") break @@ -363,7 +371,7 @@ def perform(yara_rule_dir): def _rule_logging( - start_time: float, num_binaries_skipped: int, num_total_binaries: int + start_time: float, num_binaries_skipped: int, num_total_binaries: int ) -> None: """ Simple method to log yara work. @@ -417,21 +425,30 @@ def verify_config(config_file: str, output_file: str = None) -> None: raise CbInvalidConfig(err) logger.debug(f"NOTE: using config file '{abs_config}'") - if not config.has_section('general'): + if not config.has_section("general"): raise CbInvalidConfig(f"{header} does not have a 'general' section") - globals.output_file = output_file if output_file is not None else config_file.strip() + ".json" - globals.output_file = os.path.abspath(os.path.expanduser(placehold(globals.output_file))) + globals.output_file = ( + output_file if output_file is not None else config_file.strip() + ".json" + ) + globals.output_file = os.path.abspath( + os.path.expanduser(placehold(globals.output_file)) + ) logger.debug(f"NOTE: output file will be '{globals.output_file}'") the_config = config["general"] if "worker_type" in the_config: - if the_config["worker_type"] == "local" or the_config["worker_type"].strip() == "": + if ( + the_config["worker_type"] == "local" + or the_config["worker_type"].strip() == "" + ): globals.g_remote = False # 'local' or empty definition elif the_config["worker_type"] == "remote": globals.g_remote = True # 'remote' else: # anything else - raise CbInvalidConfig(f"{header} has an invalid 'worker_type' ({the_config['worker_type']})") + raise CbInvalidConfig( + f"{header} has an invalid 'worker_type' ({the_config['worker_type']})" + ) else: globals.g_remote = False logger.warning(f"{header} does not specify 'worker_type', assuming local") @@ -442,8 +459,11 @@ def verify_config(config_file: str, output_file: str = None) -> None: globals.g_cb_server_url = the_config["cb_server_url"] else: raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_url'") - if 'cb_server_token' in the_config and the_config['cb_server_token'].strip() != "": - globals.g_cb_server_token = the_config['cb_server_token'] + if ( + "cb_server_token" in the_config + and the_config["cb_server_token"].strip() != "" + ): + globals.g_cb_server_token = the_config["cb_server_token"] else: raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_token'") # TODO: validate url & token with test call? @@ -458,14 +478,20 @@ def verify_config(config_file: str, output_file: str = None) -> None: # TODO: validate broker with test call? if "yara_rules_dir" in the_config and the_config["yara_rules_dir"].strip() != "": - check = os.path.abspath(os.path.expanduser(placehold(the_config["yara_rules_dir"]))) + check = os.path.abspath( + os.path.expanduser(placehold(the_config["yara_rules_dir"])) + ) if os.path.exists(check): if os.path.isdir(check): globals.g_yara_rules_dir = check else: - raise CbInvalidConfig(f"{header} specified 'yara_rules_dir' ({check}) is not a directory") + raise CbInvalidConfig( + f"{header} specified 'yara_rules_dir' ({check}) is not a directory" + ) else: - raise CbInvalidConfig(f"{header} specified 'yara_rules_dir' ({check}) does not exist") + raise CbInvalidConfig( + f"{header} specified 'yara_rules_dir' ({check}) does not exist" + ) else: raise CbInvalidConfig(f"{header} has no 'yara_rules_dir' definition") @@ -473,30 +499,44 @@ def verify_config(config_file: str, output_file: str = None) -> None: if "postgres_host" in the_config and the_config["postgres_host"].strip() != "": globals.g_postgres_host = the_config["postgres_host"] else: - logger.warning(f"{header} has no defined 'postgres_host'; using default of '{globals.g_postgres_host}'") + logger.warning( + f"{header} has no defined 'postgres_host'; using default of '{globals.g_postgres_host}'" + ) # NOTE: postgres_username has a default value in globals; use and warn if not defined - if "postgres_username" in the_config and the_config["postgres_username"].strip() != "": + if ( + "postgres_username" in the_config + and the_config["postgres_username"].strip() != "" + ): globals.g_postgres_username = the_config["postgres_username"] else: - logger.warning(f"{header} has no defined 'postgres_username'; using default of '{globals.g_postgres_username}'") + logger.warning( + f"{header} has no defined 'postgres_username'; using default of '{globals.g_postgres_username}'" + ) - if 'postgres_password' in the_config and the_config['postgres_password'].strip() != "": - globals.g_postgres_password = the_config['postgres_password'] + if ( + "postgres_password" in the_config + and the_config["postgres_password"].strip() != "" + ): + globals.g_postgres_password = the_config["postgres_password"] else: raise CbInvalidConfig(f"{header} has no 'postgres_password' defined") # NOTE: postgres_db has a default value in globals; use and warn if not defined - if 'postgres_db' in the_config and the_config['postgres_db'].strip() != "": - globals.g_postgres_db = the_config['postgres_db'] + if "postgres_db" in the_config and the_config["postgres_db"].strip() != "": + globals.g_postgres_db = the_config["postgres_db"] else: - logger.warning(f"{header} has no defined 'postgres_db'; using default of '{globals.g_postgres_db}'") + logger.warning( + f"{header} has no defined 'postgres_db'; using default of '{globals.g_postgres_db}'" + ) # NOTE: postgres_port has a default value in globals; use and warn if not defined - if 'postgres_port' in the_config: - globals.g_postgres_port = int(the_config['postgres_port']) + if "postgres_port" in the_config: + globals.g_postgres_port = int(the_config["postgres_port"]) else: - logger.warning(f"{header} has no defined 'postgres_port'; using default of '{globals.g_postgres_port}'") + logger.warning( + f"{header} has no defined 'postgres_port'; using default of '{globals.g_postgres_port}'" + ) # TODO: validate postgres connection with supplied information? @@ -513,25 +553,37 @@ def verify_config(config_file: str, output_file: str = None) -> None: if "num_days_binaries" in the_config: globals.g_num_days_binaries = max(int(the_config["num_days_binaries"]), 1) - logger.debug("Number of days for binaries: {0}".format(globals.g_num_days_binaries)) + logger.debug( + "Number of days for binaries: {0}".format(globals.g_num_days_binaries) + ) if "vacuum_seconds" in the_config: globals.g_vacuum_seconds = max(int(the_config["vacuum_seconds"]), 0) if "vacuum_script" in the_config and the_config["vacuum_seconds"].strip() != "": if globals.g_vacuum_seconds > 0: - check = os.path.abspath(os.path.expanduser(placehold(the_config["vacuum_script"]))) + check = os.path.abspath( + os.path.expanduser(placehold(the_config["vacuum_script"])) + ) if os.path.exists(check): if os.path.isdir(check): - raise CbInvalidConfig(f"{header} specified 'vacuum_script' ({check}) is a directory") + raise CbInvalidConfig( + f"{header} specified 'vacuum_script' ({check}) is a directory" + ) else: - raise CbInvalidConfig(f"{header} specified 'vacuum_script' ({check}) does not exist") + raise CbInvalidConfig( + f"{header} specified 'vacuum_script' ({check}) does not exist" + ) globals.g_vacuum_script = check - logger.warning(f"Vacuum Script '{check}' is enabled; use this advanced feature at your own discretion!") + logger.warning( + f"Vacuum Script '{check}' is enabled; use this advanced feature at your own discretion!" + ) else: - logger.debug(f"{header} has 'vacuum_script' defined, but it is disabled") + logger.debug( + f"{header} has 'vacuum_script' defined, but it is disabled" + ) - if 'feed_database_path' in the_config: - globals.feed_database_path = the_config['feed_database_path'] + if "feed_database_path" in the_config: + globals.feed_database_path = the_config["feed_database_path"] check = os.path.abspath(placehold(the_config["feed_database_path"])) if not (os.path.exists(check) and os.path.isdir(check)): raise CbInvalidConfig("Invalid database path specified") @@ -542,7 +594,9 @@ def main(): # check for single operation singleton.SingleInstance() except Exception as err: - logger.error(f"Only one instance of this script is allowed to run at a time: {err}") + logger.error( + f"Only one instance of this script is allowed to run at a time: {err}" + ) else: parser = argparse.ArgumentParser(description="Yara Agent for Yara Connector") parser.add_argument( @@ -601,7 +655,9 @@ def main(): try: globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database = SqliteDatabase(os.path.join(globals.g_feed_database_path, "binary.db")) + database = SqliteDatabase( + os.path.join(globals.g_feed_database_path, "binary.db") + ) db.initialize(database) db.connect() db.create_tables([BinaryDetonationResult]) From 26323c262ebc8799907c46d4c34cadb39822b639 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 28 Oct 2019 08:04:16 -0400 Subject: [PATCH 076/257] script edit --- scripts/vacuumscript.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/vacuumscript.sh b/scripts/vacuumscript.sh index 673aa61..225618d 100755 --- a/scripts/vacuumscript.sh +++ b/scripts/vacuumscript.sh @@ -1,7 +1,7 @@ #!/bin/bash set -x -psql -p 5002 -d $PGDATABASE -U $PGUSERNAME -c "vacuum (full,analyze, verbose) binary_status;" -psql -p 5002 -d $PGDATABASE -U $PGUSERNAME -c "vacuum (full,analyze, verbose) sensor_registrations;" -psql -p 5002 -d $PGDATABASE -U $PGUSERNAME -c "vacuum (full,analyze, verbose) vt_write_events;" +psql -p 5002 -d $PGDATABASE -U $PGUSER -c "vacuum (full,analyze, verbose) binary_status;" +psql -p 5002 -d $PGDATABASE -U $PGUSER -c "vacuum (full,analyze, verbose) sensor_registrations;" +psql -p 5002 -d $PGDATABASE -U $PGUSER -c "vacuum (full,analyze, verbose) vt_write_events;" # commented out; hangs! -psql -p 5002 -d $PGDATABASE -U $PGUSERNAME -c "vacuum (full,analyze, verbose) storefiles;" +psql -p 5002 -d $PGDATABASE -U $PGUSER -c "vacuum (full,analyze, verbose) storefiles;" From 6cf77f4d1fcd11841eea8c1f1c60f12aee418756 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 28 Oct 2019 08:08:07 -0400 Subject: [PATCH 077/257] Code cleanup --- main.py | 2 +- scripts/vacuumscript.sh | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index 472be90..9f0fe8a 100644 --- a/main.py +++ b/main.py @@ -664,7 +664,7 @@ def main(): generate_feed_from_db() perform(globals.g_yara_rules_dir) except KeyboardInterrupt: - logger.info("\n\n##### Intterupted by User!\n") + logger.info("\n\n##### Interupted by User!\n") sys.exit(2) except Exception as err: logger.error(f"There were errors executing yara rules: {err}") diff --git a/scripts/vacuumscript.sh b/scripts/vacuumscript.sh index 225618d..a8c64b8 100755 --- a/scripts/vacuumscript.sh +++ b/scripts/vacuumscript.sh @@ -1,7 +1,6 @@ #!/bin/bash set -x -psql -p 5002 -d $PGDATABASE -U $PGUSER -c "vacuum (full,analyze, verbose) binary_status;" -psql -p 5002 -d $PGDATABASE -U $PGUSER -c "vacuum (full,analyze, verbose) sensor_registrations;" -psql -p 5002 -d $PGDATABASE -U $PGUSER -c "vacuum (full,analyze, verbose) vt_write_events;" -# commented out; hangs! -psql -p 5002 -d $PGDATABASE -U $PGUSER -c "vacuum (full,analyze, verbose) storefiles;" +psql -p 5002 -d "$PGDATABASE" -U "$PGUSERNAME" -c "vacuum (full,analyze, verbose) storefiles;" +psql -p 5002 -d "$PGDATABASE" -U "$PGUSERNAME" -c "vacuum (full,analyze, verbose) binary_status;" +psql -p 5002 -d "$PGDATABASE" -U "$PGUSERNAME" -c "vacuum (full,analyze, verbose) sensor_registrations;" +psql -p 5002 -d "$PGDATABASE" -U "$PGUSERNAME" -c "vacuum (full,analyze, verbose) vt_write_events;" From eb79ae68502a1c51c3eeb5409cb9c32816bec4d4 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 28 Oct 2019 09:04:17 -0400 Subject: [PATCH 078/257] Removing crud --- main.py | 221 +++++++++++++++++----------------------- scripts/vacuumscript.sh | 8 +- 2 files changed, 96 insertions(+), 133 deletions(-) diff --git a/main.py b/main.py index 9f0fe8a..f293a27 100644 --- a/main.py +++ b/main.py @@ -205,6 +205,45 @@ def save_results(analysis_results: List[AnalysisResult]) -> None: generate_feed_from_db() +def get_database_conn(): + logger.info("Connecting to Postgres database...") + conn = psycopg2.connect( + host=globals.g_postgres_host, + database=globals.g_postgres_db, + user=globals.g_postgres_username, + password=globals.g_postgres_password, + port=globals.g_postgres_port, + ) + + return conn + + +def get_cursor(conn, start_date_binaries): + cur = conn.cursor(name="yara_agent") + + # noinspection SqlDialectInspection,SqlNoDataSourceInspection + cur.execute( + "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " + "ORDER BY timestamp DESC".format(start_date_binaries) + ) + + return cur + + +def execute_script(): + logger.warning("!!!Executing vacuum script!!!") + + target = os.path.join(os.getcwd(), globals.g_vacuum_script) + + prog = subprocess.Popen(target, shell=True, universal_newlines=True) + stdout, stderr = prog.communicate() + logger.info(stdout) + logger.error(stderr) + if prog.returncode: + logger.warning("program returned error code {0}".format(prog.returncode)) + logger.warning("!!!Done Executing vacuum script!!!") + + def perform(yara_rule_dir): if globals.g_remote: logger.info("Uploading yara rules to workers...") @@ -216,144 +255,69 @@ def perform(yara_rule_dir): md5_hashes = [] start_time = time.time() - start_datetime = datetime.now() - logger.info("Connecting to Postgres database...") - try: - conn = psycopg2.connect( - host=globals.g_postgres_host, - database=globals.g_postgres_db, - user=globals.g_postgres_username, - password=globals.g_postgres_password, - port=globals.g_postgres_port, - ) - cur = conn.cursor(name="yara_agent") + conn = get_database_conn() - start_date_binaries = datetime.now() - timedelta( - days=globals.g_num_days_binaries - ) - # noinspection SqlDialectInspection,SqlNoDataSourceInspection - cur.execute( - "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " - "ORDER BY timestamp DESC".format(start_date_binaries) - ) - except Exception as err: - logger.error("Failed to connect to Postgres database: {0}".format(err)) - logger.error(traceback.format_exc()) - return + start_date_binaries = datetime.now() - timedelta(days=globals.g_num_days_binaries) + + cur = get_cursor(conn, start_date_binaries) + + rows = cur.fetchmany() + + conn.commit() + + conn.close() logger.info("Enumerating modulestore...") - while True: - if cur.closed: - cur = conn.cursor(name="yara_agent") - # noinspection SqlDialectInspection - cur.execute( - "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " - "ORDER BY timestamp DESC".format(start_date_binaries) - ) - try: - rows = cur.fetchmany() - except psycopg2.OperationalError: - cur = conn.cursor(name="yara_agent") - # noinspection SqlDialectInspection - cur.execute( - "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " - "ORDER BY timestamp DESC".format(start_date_binaries) - ) - rows = cur.fetchmany() - if len(rows) == 0: - break - - for row in rows: - seconds_since_start = (datetime.now() - start_datetime).seconds - if seconds_since_start >= globals.g_vacuum_seconds > 0: - cur.close() - conn.commit() - logger.warning("!!!Executing vacuum script!!!") - - target = os.path.join(os.getcwd(), globals.g_vacuum_script) - - envdict = dict(os.environ) - envdict["PGPASSWORD"] = globals.g_postgres_password - envdict["PGUSER"] = globals.g_postgres_username - envdict["PGHOST"] = globals.g_postgres_host - envdict["PGDATABASE"] = globals.g_postgres_db - envdict["PGPORT"] = str(globals.g_postgres_port) - - prog = subprocess.Popen( - target, shell=True, env=envdict, universal_newlines=True - ) - stdout, stderr = prog.communicate() # Returns (stdoutdata, stderrdata) - logger.info(stdout) - logger.error(stderr) - if prog.returncode: - logger.warning( - "program returned error code {0}".format(prog.returncode) - ) - start_datetime = datetime.now() - logger.warning("!!!Done Executing vacuum script!!!") - break - - num_total_binaries += 1 - md5_hash = row[0].hex() - - # - # Check if query returns any rows - # - query = BinaryDetonationResult.select().where( - BinaryDetonationResult.md5 == md5_hash - ) - if query.exists(): - try: - bdr = BinaryDetonationResult.get( - BinaryDetonationResult.md5 == md5_hash - ) - scanned_hash_list = json.loads(bdr.misc) - if globals.g_disable_rescan and bdr.misc: - continue - - if scanned_hash_list == globals.g_yara_rule_map_hash_list: - num_binaries_skipped += 1 - # - # If it is the same then we don't need to scan again - # - continue - except Exception as e: - logger.error( - "Unable to decode yara rule map hash from database: {0}".format( - e - ) - ) + for row in rows: + + num_total_binaries += 1 + md5_hash = row[0].hex() - num_binaries_queued += 1 + num_binaries_queued += 1 + + if _check_hash_against_feed(md5_hash): md5_hashes.append(md5_hash) - if len(md5_hashes) >= globals.MAX_HASHES: - analysis_results = analyze_binaries( - md5_hashes, local=(not globals.g_remote) - ) - if analysis_results: - for analysis_result in analysis_results: - logger.debug( - ( - f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" - f" {analysis_result.long_result} {analysis_result.last_error_msg}" - ) - ) - if analysis_result.last_error_msg: - logger.error(analysis_result.last_error_msg) - save_results(analysis_results) - else: - pass - md5_hashes = [] + if len(md5_hashes) >= globals.MAX_HASHES: + _analyze_save_and_log( + md5_hashes, start_time, num_binaries_skipped, num_total_binaries + ) + md5_hashes = [] + + _analyze_save_and_log( + md5_hashes, start_time, num_binaries_skipped, num_total_binaries + ) - # throw us a bone every 1000 binaries processed - if num_total_binaries % 1000 == 0: - _rule_logging(start_time, num_binaries_skipped, num_total_binaries) + generate_feed_from_db() - conn.close() - analysis_results = analyze_binaries(md5_hashes, local=(not globals.g_remote)) +def _check_hash_against_feed(md5_hash): + query = BinaryDetonationResult.select().where( + BinaryDetonationResult.md5 == md5_hash + ) + if query.exists(): + try: + bdr = BinaryDetonationResult.get(BinaryDetonationResult.md5 == md5_hash) + scanned_hash_list = json.loads(bdr.misc) + if globals.g_disable_rescan and bdr.misc: + return False + + if scanned_hash_list == globals.g_yara_rule_map_hash_list: + # + # If it is the same then we don't need to scan again + # + return False + except Exception as e: + logger.error( + "Unable to decode yara rule map hash from database: {0}".format(e) + ) + return False + return True + + +def _analyze_save_and_log(hashes, start_time, num_binaries_skipped, num_total_binaries): + analysis_results = analyze_binaries(hashes, local=(not globals.g_remote)) if analysis_results: for analysis_result in analysis_results: logger.debug( @@ -367,7 +331,6 @@ def perform(yara_rule_dir): save_results(analysis_results) _rule_logging(start_time, num_binaries_skipped, num_total_binaries) - generate_feed_from_db() def _rule_logging( diff --git a/scripts/vacuumscript.sh b/scripts/vacuumscript.sh index a8c64b8..efdf3b9 100755 --- a/scripts/vacuumscript.sh +++ b/scripts/vacuumscript.sh @@ -1,6 +1,6 @@ #!/bin/bash set -x -psql -p 5002 -d "$PGDATABASE" -U "$PGUSERNAME" -c "vacuum (full,analyze, verbose) storefiles;" -psql -p 5002 -d "$PGDATABASE" -U "$PGUSERNAME" -c "vacuum (full,analyze, verbose) binary_status;" -psql -p 5002 -d "$PGDATABASE" -U "$PGUSERNAME" -c "vacuum (full,analyze, verbose) sensor_registrations;" -psql -p 5002 -d "$PGDATABASE" -U "$PGUSERNAME" -c "vacuum (full,analyze, verbose) vt_write_events;" +psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) binary_status;" +psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) sensor_registrations;" +psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) vt_write_events;" +psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) storefiles;" From 9eecbb3e464f8151c449329dda4def0c00c67e4e Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 28 Oct 2019 09:13:11 -0400 Subject: [PATCH 079/257] updates --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index f293a27..1f1276c 100644 --- a/main.py +++ b/main.py @@ -262,7 +262,7 @@ def perform(yara_rule_dir): cur = get_cursor(conn, start_date_binaries) - rows = cur.fetchmany() + rows = cur.fetchall() conn.commit() From 754cde993d6b63064f19337b41ea302b4949435c Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 28 Oct 2019 11:29:50 -0400 Subject: [PATCH 080/257] Puttinb back script executor --- main.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index 1f1276c..6e1d3ec 100644 --- a/main.py +++ b/main.py @@ -259,6 +259,7 @@ def perform(yara_rule_dir): conn = get_database_conn() start_date_binaries = datetime.now() - timedelta(days=globals.g_num_days_binaries) + start_datetime = datetime.now() cur = get_cursor(conn, start_date_binaries) @@ -268,8 +269,13 @@ def perform(yara_rule_dir): conn.close() - logger.info("Enumerating modulestore...") + logger.info(f"Enumerating modulestore...found {len(rows)} resident binaries") + for row in rows: + seconds_since_start = (datetime.now() - start_datetime).seconds + if seconds_since_start >= globals.g_vacuum_seconds > 0: + execute_script() + start_datetime = datetime.now() num_total_binaries += 1 md5_hash = row[0].hex() @@ -278,6 +284,8 @@ def perform(yara_rule_dir): if _check_hash_against_feed(md5_hash): md5_hashes.append(md5_hash) + else: + num_binaries_skipped += 1 if len(md5_hashes) >= globals.MAX_HASHES: _analyze_save_and_log( From 5a9067e9381a7f80fc566440d5b952242454571d Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 28 Oct 2019 11:31:15 -0400 Subject: [PATCH 081/257] * Change to config class * updated unit tests --- config_handling.py | 223 ++++++++++++ main.py | 197 +---------- tasks.py | 88 +---- test/config/bogus_feed_database_dir.conf | 22 ++ test/config/missing_feed_database_dir.conf | 21 ++ test/test_configInit.py | 388 +++++++++++++++++++++ test/test_main.py | 349 +----------------- test/test_tasks.py | 102 +----- 8 files changed, 675 insertions(+), 715 deletions(-) create mode 100644 config_handling.py create mode 100644 test/config/bogus_feed_database_dir.conf create mode 100644 test/config/missing_feed_database_dir.conf create mode 100644 test/test_configInit.py diff --git a/config_handling.py b/config_handling.py new file mode 100644 index 0000000..45a1ce4 --- /dev/null +++ b/config_handling.py @@ -0,0 +1,223 @@ +# coding: utf-8 +# Copyright © 2018-2019 VMware, Inc. All Rights Reserved. + +import configparser +import logging +import os + +from celery import Celery + +import globals +from exceptions import CbInvalidConfig +from utilities import placehold + +logger = logging.getLogger(__name__) + +__all__ = ["ConfigurationInit", "app"] + +################################################################################ +# Celery app +################################################################################ + +app = Celery() +# noinspection PyUnusedName +app.conf.task_serializer = "pickle" +# noinspection PyUnusedName +app.conf.result_serializer = "pickle" +# noinspection PyUnusedName +app.conf.accept_content = {"pickle"} + + +################################################################################ +# Configuration reader/validator +################################################################################ + +class ConfigurationInit(object): + """ + Class to deal with all configuration loading and validation. + """ + + def __init__(self, config_file: str, output_file: str = None) -> None: + """ + Validate the config file. + :param config_file: The config file to validate + :param output_file: the output file; if not specified assume we are a task worker (simplified validation) + """ + abs_config = os.path.abspath(os.path.expanduser(placehold(config_file))) + header = f"Config file '{abs_config}'" + + config = configparser.ConfigParser() + if not os.path.exists(config_file): + raise CbInvalidConfig(f"{header} does not exist!") + + try: + config.read(config_file) + except Exception as err: + raise CbInvalidConfig(err) + + logger.debug(f"NOTE: using config file '{abs_config}'") + if not config.has_section("general"): + raise CbInvalidConfig(f"{header} does not have a 'general' section") + + if output_file is not None: + globals.output_file = os.path.abspath(os.path.expanduser(placehold(output_file))) + logger.debug(f"NOTE: output file will be '{globals.output_file}'") + + the_config = config["general"] + if "worker_type" in the_config: + if ( + the_config["worker_type"] == "local" + or the_config["worker_type"].strip() == "" + ): + globals.g_remote = False # 'local' or empty definition + elif the_config["worker_type"] == "remote": + globals.g_remote = True # 'remote' + else: # anything else + raise CbInvalidConfig( + f"{header} has an invalid 'worker_type' ({the_config['worker_type']})" + ) + else: + globals.g_remote = False + logger.warning(f"{header} does not specify 'worker_type', assuming local") + + if "yara_rules_dir" in the_config and the_config["yara_rules_dir"].strip() != "": + check = os.path.abspath(os.path.expanduser(placehold(the_config["yara_rules_dir"]))) + if os.path.exists(check): + if os.path.isdir(check): + globals.g_yara_rules_dir = check + else: + raise CbInvalidConfig( + f"{header} specified 'yara_rules_dir' ({check}) is not a directory" + ) + else: + raise CbInvalidConfig( + f"{header} specified 'yara_rules_dir' ({check}) does not exist" + ) + else: + raise CbInvalidConfig(f"{header} has no 'yara_rules_dir' definition") + + # local/remote configuration data + if not globals.g_remote: + if "cb_server_url" in the_config and the_config["cb_server_url"].strip() != "": + globals.g_cb_server_url = the_config["cb_server_url"] + else: + raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_url'") + if ( + "cb_server_token" in the_config + and the_config["cb_server_token"].strip() != "" + ): + globals.g_cb_server_token = the_config["cb_server_token"] + else: + raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_token'") + else: + if "broker_url" in the_config and the_config["broker_url"].strip() != "": + app.conf.update( + broker_url=the_config["broker_url"], + result_backend=the_config["broker_url"], + ) + else: + raise CbInvalidConfig(f"{header} is 'remote' and missing 'broker_url'") + + # done with minimal task worker validation + if output_file is None: + return + + # TODO: validate url & token with test call (if local) + # TODO: validate broker with test call (if remote) + + # NOTE: postgres_host has a default value in globals; use and warn if not defined + if "postgres_host" in the_config and the_config["postgres_host"].strip() != "": + globals.g_postgres_host = the_config["postgres_host"] + else: + logger.warning( + f"{header} has no defined 'postgres_host'; using default of '{globals.g_postgres_host}'" + ) + + # NOTE: postgres_username has a default value in globals; use and warn if not defined + if ( + "postgres_username" in the_config + and the_config["postgres_username"].strip() != "" + ): + globals.g_postgres_username = the_config["postgres_username"] + else: + logger.warning( + f"{header} has no defined 'postgres_username'; using default of '{globals.g_postgres_username}'" + ) + + if ( + "postgres_password" in the_config + and the_config["postgres_password"].strip() != "" + ): + globals.g_postgres_password = the_config["postgres_password"] + else: + raise CbInvalidConfig(f"{header} has no 'postgres_password' defined") + + # NOTE: postgres_db has a default value in globals; use and warn if not defined + if "postgres_db" in the_config and the_config["postgres_db"].strip() != "": + globals.g_postgres_db = the_config["postgres_db"] + else: + logger.warning( + f"{header} has no defined 'postgres_db'; using default of '{globals.g_postgres_db}'" + ) + + # NOTE: postgres_port has a default value in globals; use and warn if not defined + if "postgres_port" in the_config: + globals.g_postgres_port = int(the_config["postgres_port"]) + else: + logger.warning( + f"{header} has no defined 'postgres_port'; using default of '{globals.g_postgres_port}'" + ) + + # TODO: validate postgres connection with supplied information? + + if "niceness" in the_config: + os.nice(int(the_config["niceness"])) + + if "concurrent_hashes" in the_config: + globals.MAX_HASHES = int(the_config["concurrent_hashes"]) + logger.debug("Consurrent Hashes: {0}".format(globals.MAX_HASHES)) + + if "disable_rescan" in the_config: + globals.g_disable_rescan = bool(the_config["disable_rescan"]) + logger.debug("Disable Rescan: {0}".format(globals.g_disable_rescan)) + + if "num_days_binaries" in the_config: + globals.g_num_days_binaries = max(int(the_config["num_days_binaries"]), 1) + logger.debug( + "Number of days for binaries: {0}".format(globals.g_num_days_binaries) + ) + + if "vacuum_seconds" in the_config: + globals.g_vacuum_seconds = max(int(the_config["vacuum_seconds"]), 0) + if "vacuum_script" in the_config and the_config["vacuum_seconds"].strip() != "": + if globals.g_vacuum_seconds > 0: + check = os.path.abspath( + os.path.expanduser(placehold(the_config["vacuum_script"])) + ) + if os.path.exists(check): + if os.path.isdir(check): + raise CbInvalidConfig( + f"{header} specified 'vacuum_script' ({check}) is a directory" + ) + else: + raise CbInvalidConfig( + f"{header} specified 'vacuum_script' ({check}) does not exist" + ) + globals.g_vacuum_script = check + logger.warning( + f"Vacuum Script '{check}' is enabled; use this advanced feature at your own discretion!" + ) + else: + logger.debug( + f"{header} has 'vacuum_script' defined, but it is disabled" + ) + + if "feed_database_dir" in the_config and the_config["feed_database_dir"].strip() != "": + check = os.path.abspath(os.path.expanduser(placehold(the_config["feed_database_dir"]))) + if os.path.exists(check): + if not os.path.isdir(check): + raise CbInvalidConfig(f"{header} specified 'feed_database_dir' ({check}) is not a directory") + else: + globals.feed_database_dir = the_config["feed_database_dir"] + else: + raise CbInvalidConfig(f"{header} specified 'feed_database_dir' ({check}) does not exist") diff --git a/main.py b/main.py index 1f1276c..70ff843 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,4 @@ import argparse -import configparser import hashlib import json import logging @@ -14,7 +13,6 @@ import humanfriendly import psycopg2 - # noinspection PyPackageRequirements import yara from celery import group @@ -24,10 +22,9 @@ import singleton from analysis_result import AnalysisResult from binary_database import BinaryDetonationResult, db -from exceptions import CbInvalidConfig +from config_handling import ConfigurationInit from feed import CbFeed, CbFeedInfo, CbReport -from tasks import analyze_binary, app, generate_rule_map, update_yara_rules_remote -from utilities import placehold +from tasks import analyze_binary, generate_rule_map, update_yara_rules_remote logging_format = "%(asctime)s-%(name)s-%(lineno)d-%(levelname)s-%(message)s" logging.basicConfig(format=logging_format) @@ -334,7 +331,7 @@ def _analyze_save_and_log(hashes, start_time, num_binaries_skipped, num_total_bi def _rule_logging( - start_time: float, num_binaries_skipped: int, num_total_binaries: int + start_time: float, num_binaries_skipped: int, num_total_binaries: int ) -> None: """ Simple method to log yara work. @@ -368,189 +365,9 @@ def _rule_logging( logger.info("") -# noinspection DuplicatedCode -def verify_config(config_file: str, output_file: str = None) -> None: - """ - Validate the config file. - :param config_file: The config file to validate - :param output_file: the output file; if not specified equals config file plus ".json" - """ - abs_config = os.path.abspath(os.path.expanduser(placehold(config_file))) - header = f"Config file '{abs_config}'" - - config = configparser.ConfigParser() - if not os.path.exists(config_file): - raise CbInvalidConfig(f"{header} does not exist!") - - try: - config.read(config_file) - except Exception as err: - raise CbInvalidConfig(err) - - logger.debug(f"NOTE: using config file '{abs_config}'") - if not config.has_section("general"): - raise CbInvalidConfig(f"{header} does not have a 'general' section") - - globals.output_file = ( - output_file if output_file is not None else config_file.strip() + ".json" - ) - globals.output_file = os.path.abspath( - os.path.expanduser(placehold(globals.output_file)) - ) - logger.debug(f"NOTE: output file will be '{globals.output_file}'") - - the_config = config["general"] - if "worker_type" in the_config: - if ( - the_config["worker_type"] == "local" - or the_config["worker_type"].strip() == "" - ): - globals.g_remote = False # 'local' or empty definition - elif the_config["worker_type"] == "remote": - globals.g_remote = True # 'remote' - else: # anything else - raise CbInvalidConfig( - f"{header} has an invalid 'worker_type' ({the_config['worker_type']})" - ) - else: - globals.g_remote = False - logger.warning(f"{header} does not specify 'worker_type', assuming local") - - # local/remote configuration data - if not globals.g_remote: - if "cb_server_url" in the_config and the_config["cb_server_url"].strip() != "": - globals.g_cb_server_url = the_config["cb_server_url"] - else: - raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_url'") - if ( - "cb_server_token" in the_config - and the_config["cb_server_token"].strip() != "" - ): - globals.g_cb_server_token = the_config["cb_server_token"] - else: - raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_token'") - # TODO: validate url & token with test call? - else: - if "broker_url" in the_config and the_config["broker_url"].strip() != "": - app.conf.update( - broker_url=the_config["broker_url"], - result_backend=the_config["broker_url"], - ) - else: - raise CbInvalidConfig(f"{header} is 'remote' and missing 'broker_url'") - # TODO: validate broker with test call? - - if "yara_rules_dir" in the_config and the_config["yara_rules_dir"].strip() != "": - check = os.path.abspath( - os.path.expanduser(placehold(the_config["yara_rules_dir"])) - ) - if os.path.exists(check): - if os.path.isdir(check): - globals.g_yara_rules_dir = check - else: - raise CbInvalidConfig( - f"{header} specified 'yara_rules_dir' ({check}) is not a directory" - ) - else: - raise CbInvalidConfig( - f"{header} specified 'yara_rules_dir' ({check}) does not exist" - ) - else: - raise CbInvalidConfig(f"{header} has no 'yara_rules_dir' definition") - - # NOTE: postgres_host has a default value in globals; use and warn if not defined - if "postgres_host" in the_config and the_config["postgres_host"].strip() != "": - globals.g_postgres_host = the_config["postgres_host"] - else: - logger.warning( - f"{header} has no defined 'postgres_host'; using default of '{globals.g_postgres_host}'" - ) - - # NOTE: postgres_username has a default value in globals; use and warn if not defined - if ( - "postgres_username" in the_config - and the_config["postgres_username"].strip() != "" - ): - globals.g_postgres_username = the_config["postgres_username"] - else: - logger.warning( - f"{header} has no defined 'postgres_username'; using default of '{globals.g_postgres_username}'" - ) - - if ( - "postgres_password" in the_config - and the_config["postgres_password"].strip() != "" - ): - globals.g_postgres_password = the_config["postgres_password"] - else: - raise CbInvalidConfig(f"{header} has no 'postgres_password' defined") - - # NOTE: postgres_db has a default value in globals; use and warn if not defined - if "postgres_db" in the_config and the_config["postgres_db"].strip() != "": - globals.g_postgres_db = the_config["postgres_db"] - else: - logger.warning( - f"{header} has no defined 'postgres_db'; using default of '{globals.g_postgres_db}'" - ) - - # NOTE: postgres_port has a default value in globals; use and warn if not defined - if "postgres_port" in the_config: - globals.g_postgres_port = int(the_config["postgres_port"]) - else: - logger.warning( - f"{header} has no defined 'postgres_port'; using default of '{globals.g_postgres_port}'" - ) - - # TODO: validate postgres connection with supplied information? - - if "niceness" in the_config: - os.nice(int(the_config["niceness"])) - - if "concurrent_hashes" in the_config: - globals.MAX_HASHES = int(the_config["concurrent_hashes"]) - logger.debug("Consurrent Hashes: {0}".format(globals.MAX_HASHES)) - - if "disable_rescan" in the_config: - globals.g_disable_rescan = bool(the_config["disable_rescan"]) - logger.debug("Disable Rescan: {0}".format(globals.g_disable_rescan)) - - if "num_days_binaries" in the_config: - globals.g_num_days_binaries = max(int(the_config["num_days_binaries"]), 1) - logger.debug( - "Number of days for binaries: {0}".format(globals.g_num_days_binaries) - ) - - if "vacuum_seconds" in the_config: - globals.g_vacuum_seconds = max(int(the_config["vacuum_seconds"]), 0) - if "vacuum_script" in the_config and the_config["vacuum_seconds"].strip() != "": - if globals.g_vacuum_seconds > 0: - check = os.path.abspath( - os.path.expanduser(placehold(the_config["vacuum_script"])) - ) - if os.path.exists(check): - if os.path.isdir(check): - raise CbInvalidConfig( - f"{header} specified 'vacuum_script' ({check}) is a directory" - ) - else: - raise CbInvalidConfig( - f"{header} specified 'vacuum_script' ({check}) does not exist" - ) - globals.g_vacuum_script = check - logger.warning( - f"Vacuum Script '{check}' is enabled; use this advanced feature at your own discretion!" - ) - else: - logger.debug( - f"{header} has 'vacuum_script' defined, but it is disabled" - ) - - if "feed_database_path" in the_config: - globals.feed_database_path = the_config["feed_database_path"] - check = os.path.abspath(placehold(the_config["feed_database_path"])) - if not (os.path.exists(check) and os.path.isdir(check)): - raise CbInvalidConfig("Invalid database path specified") - +################################################################################ +# Main entrypoint +################################################################################ def main(): try: @@ -596,7 +413,7 @@ def main(): # Verify the configuration file and load up important global variables try: - verify_config(args.config_file, args.output_file) + ConfigurationInit(args.config_file, args.output_file) except Exception as err: logger.error(f"Unable to continue due to a configuration problem: {err}") sys.exit(1) diff --git a/tasks.py b/tasks.py index f5b54a2..be1e435 100644 --- a/tasks.py +++ b/tasks.py @@ -1,4 +1,3 @@ -import configparser import datetime import hashlib import logging @@ -10,90 +9,16 @@ import yara from cbapi.response.models import Binary from cbapi.response.rest_api import CbResponseAPI -from celery import bootsteps, Celery +from celery import bootsteps import globals from analysis_result import AnalysisResult -from exceptions import CbInvalidConfig -from utilities import placehold - -app = Celery() -# noinspection PyUnusedName -app.conf.task_serializer = "pickle" -# noinspection PyUnusedName -app.conf.result_serializer = "pickle" -# noinspection PyUnusedName -app.conf.accept_content = {"pickle"} +from config_handling import app, ConfigurationInit logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -# noinspection DuplicatedCode -def verify_config(config_file: str) -> None: - """ - Read and validate the current config file. - - NOTE: Replicates, to a smaller degree, the function in main.py; it is presumed that more detailed checks are there - :param config_file: path to the config file - """ - abs_config = os.path.abspath(os.path.expanduser(placehold(config_file))) - header = f"Config file '{abs_config}'" - - config = configparser.ConfigParser() - if not os.path.exists(config_file): - raise CbInvalidConfig(f"{header} does not exist!") - - try: - config.read(config_file) - except Exception as err: - raise CbInvalidConfig(err) - - logger.debug(f"NOTE: using config file '{abs_config}'") - if not config.has_section('general'): - raise CbInvalidConfig(f"{header} does not have a 'general' section") - - the_config = config['general'] - - if 'yara_rules_dir' in the_config and the_config['yara_rules_dir'].strip() != "": - check = os.path.abspath(os.path.expanduser(placehold(the_config["yara_rules_dir"]))) - if os.path.exists(check): - if os.path.isdir(check): - globals.g_yara_rules_dir = check - else: - raise CbInvalidConfig(f"{header} specified 'yara_rules_dir' ({check}) is not a directory") - else: - raise CbInvalidConfig(f"{header} specified 'yara_rules_dir' ({check}) does not exist") - else: - raise CbInvalidConfig(f"{header} has no 'yara_rules_dir' definition") - - if 'worker_type' in the_config: - if the_config['worker_type'] == 'local' or the_config['worker_type'].strip() == "": - remote = False - elif the_config['worker_type'] == 'remote': - remote = True - else: # anything else - raise CbInvalidConfig(f"{header} has an invalid 'worker_type' ({the_config['worker_type']})") - else: - remote = False - - # local/remote configuration data - if not remote: - if 'cb_server_url' in the_config and the_config['cb_server_url'].strip() != "": - globals.g_cb_server_url = the_config['cb_server_url'] - else: - raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_url'") - if 'cb_server_token' in the_config and the_config['cb_server_token'].strip() != "": - globals.g_cb_server_token = the_config['cb_server_token'] - else: - raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_token'") - else: - if 'broker_url' in the_config and the_config['broker_url'].strip() != "": - app.conf.update(broker_url=the_config['broker_url'], result_backend=the_config['broker_url']) - else: - raise CbInvalidConfig(f"{header} is 'remote' and missing 'broker_url'") - - def add_worker_arguments(parser): parser.add_argument('--config-file', default='yara_worker.conf', help='Yara Worker Config') @@ -102,13 +27,12 @@ def add_worker_arguments(parser): class MyBootstep(bootsteps.Step): - - # noinspection PyUnusedLocal + """ + Define the bootstrap task. + """ def __init__(self, worker, config_file='yara_worker.conf', **options): super().__init__(self) - verify_config(config_file) - - # g_yara_rules_dir = yara_rules_dir + ConfigurationInit(config_file, None) app.steps['worker'].add(MyBootstep) diff --git a/test/config/bogus_feed_database_dir.conf b/test/config/bogus_feed_database_dir.conf new file mode 100644 index 0000000..dafef3d --- /dev/null +++ b/test/config/bogus_feed_database_dir.conf @@ -0,0 +1,22 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +; file, not dir +feed_database_dir={HERE}/README.md diff --git a/test/config/missing_feed_database_dir.conf b/test/config/missing_feed_database_dir.conf new file mode 100644 index 0000000..c1921f6 --- /dev/null +++ b/test/config/missing_feed_database_dir.conf @@ -0,0 +1,21 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +feed_database_dir={HERE}/no-such-directory diff --git a/test/test_configInit.py b/test/test_configInit.py new file mode 100644 index 0000000..822f024 --- /dev/null +++ b/test/test_configInit.py @@ -0,0 +1,388 @@ +# coding: utf-8 +# Copyright © 2018-2019 VMware, Inc. All Rights Reserved. + +import os +from unittest import TestCase + +import globals +from config_handling import ConfigurationInit +from exceptions import CbInvalidConfig + +TESTS = os.path.abspath(os.path.dirname(__file__)) + + +class TestConfigurationInit(TestCase): + + def test_01_missing_config(self): + """ + Ensure a missing config file is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "no-such-config.conf")) + assert "does not exist!" in "{0}".format(err.exception.args[0]) + + # ----- Full validation (main) + + def test_02_validate_config(self): + # valid local + globals.output_file = None + globals.g_remote = None + ConfigurationInit(os.path.join(TESTS, "config", "valid.conf"), "sample.json") + self.assertTrue(globals.output_file.endswith("sample.json")) + self.assertFalse(globals.g_remote) + + # valid remote + globals.g_remote = None + ConfigurationInit(os.path.join(TESTS, "config", "valid2.conf"), "sample2.json") + self.assertTrue(globals.output_file.endswith("sample2.json")) + self.assertTrue(globals.g_remote) + + def test_03a_config_missing_header(self): + """ + Ensure we detect a configuration file with no section header. + """ + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "missing_header.conf"), "sample.json") + assert "File contains no section headers" in "{0}".format(err.exception.args[0]) + + def test_03b_config_invalid_header(self): + """ + Ensure we detect a configuration file with no "[general]" section header. + """ + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "invalid_header.conf"), "sample.json") + assert "does not have a 'general' section" in "{0}".format(err.exception.args[0]) + + def test_04a_config_missing_worker(self): + """ + Ensure that config lacking worker information defaults to local. + """ + # not defined in file + globals.g_remote = None + ConfigurationInit(os.path.join(TESTS, "config", "missing_worker.conf"), "sample.json") + self.assertFalse(globals.g_remote) + + # defined as "worker_type=" + globals.g_remote = None + ConfigurationInit(os.path.join(TESTS, "config", "missing_worker2.conf"), "sample.json") + self.assertFalse(globals.g_remote) + + def test_04b_config_bogus_worker(self): + """ + Ensure that config with bogus worker is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "bogus_worker.conf"), "sample.json") + assert "invalid 'worker_type'" in "{0}".format(err.exception.args[0]) + + def test_05a_config_local_worker_missing_server_url(self): + """ + Ensure that local worker config with missing server url is detected. + """ + # not defined in file + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "local_worker_no_server_url.conf"), "sample.json") + assert "is 'local' and missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) + + # defined as "cb_server_url=" + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "local_worker_no_server_url2.conf"), "sample.json") + assert "is 'local' and missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) + + def test_05b_config_local_worker_missing_server_token(self): + """ + Ensure that local worker config with missing server token is detected. + """ + # not defined in file + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "local_worker_no_server_token.conf"), "sample.json") + assert "is 'local' and missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) + + # defined as "cb_server_token=" + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "local_worker_no_server_token2.conf"), "sample.json") + assert "is 'local' and missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) + + def test_06_config_remote_worker_missing_broker_url(self): + """ + Ensure that remote worker config with missing broker url is detected. + """ + # not defined in file + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "remote_worker_no_broker_url.conf"), "sample.json") + assert "is 'remote' and missing 'broker_url'" in "{0}".format(err.exception.args[0]) + + # defined as "broker_url=" + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "remote_worker_no_broker_url2.conf"), "sample.json") + assert "is 'remote' and missing 'broker_url'" in "{0}".format(err.exception.args[0]) + + def test_07a_config_missing_yara_rules_dir(self): + """ + Ensure that config with missing yara rules directory is detected. + """ + # not defined in file + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "no_rules_dir.conf"), "sample.json") + assert "has no 'yara_rules_dir' definition" in "{0}".format(err.exception.args[0]) + + # defined as "yara_rules_dir=" + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "no_rules_dir2.conf"), "sample.json") + assert "has no 'yara_rules_dir' definition" in "{0}".format(err.exception.args[0]) + + def test_07b_config_yara_rules_dir_not_exists(self): + """ + Ensure that config with yara rules directory that does not exist is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "missing_rules_dir.conf"), "sample.json") + assert "does not exist" in "{0}".format(err.exception.args[0]) + + def test_07c_config_yara_rules_dir_not_directory(self): + """ + Ensure that config with yara rules directory that is not a directory is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "bogus_rules_dir.conf"), "sample.json") + assert "is not a directory" in "{0}".format(err.exception.args[0]) + + def test_08a_config_missing_postgres_host(self): + """ + Ensure that config with missing postgres_host uses defaults. + """ + check = globals.g_postgres_host + + # undefined, use default in globals + ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_host.conf"), "sample.json") + self.assertEqual(check, globals.g_postgres_host) + + # defined as "postgres_host=" + ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_host2.conf"), "sample.json") + self.assertEqual(check, globals.g_postgres_host) + + # TODO: test_08b_config_invalid_postgres_host + + def test_09a_config_missing_postgres_username(self): + """ + Ensure that config with missing postgres_username uses defaults. + """ + check = globals.g_postgres_username + + # undefined, use default in globals + ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_username.conf"), "sample.json") + self.assertEqual(check, globals.g_postgres_username) + + # defined as "postgres_username=" + ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_username2.conf"), "sample.json") + self.assertEqual(check, globals.g_postgres_username) + + # TODO: test_09b_config_invalid_postgres_username + + def test_10a_config_missing_postgres_password(self): + """ + Ensure that config with missing postgres_password is detected. + """ + # undefined + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_password.conf"), "sample.json") + assert "has no 'postgres_password' defined" in "{0}".format(err.exception.args[0]) + + # defined as "postgres_password=" + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_password2.conf"), "sample.json") + assert "has no 'postgres_password' defined" in "{0}".format(err.exception.args[0]) + + # TODO: test_10a_config_invalid_postgres_password + + def test_11a_config_missing_postgres_db(self): + """ + Ensure that config with missing postgres_db is detected. + """ + check = globals.g_postgres_db + + # undefined, use default in globals + ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_db.conf"), "sample.json") + self.assertEqual(check, globals.g_postgres_db) + + # defined as "postgres_db=" + ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_db2.conf"), "sample.json") + self.assertEqual(check, globals.g_postgres_db) + + # TODO: test_11b_config_invalid_postgres_db + + def test_12a_config_missing_postgres_port(self): + """ + Ensure that config with missing postgres_port is detected. + """ + check = globals.g_postgres_port + + # undefined, use default in globals + ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_port.conf"), "sample.json") + self.assertEqual(check, globals.g_postgres_port) + + # defined as "postgres_port=" + with self.assertRaises(ValueError) as err: + ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_port2.conf"), "sample.json") + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_12b_config_bogus_postgres_port(self): + """ + Ensure that config with bogus (non-int) postgres_port is detected. + """ + with self.assertRaises(ValueError) as err: + ConfigurationInit(os.path.join(TESTS, "config", "bogus_postgres_port.conf"), "sample.json") + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + # TODO: test_12c_config_invalid_postgres_port + + def test_13a_config_missing_niceness(self): + """ + Ensure that config with missing niceness is detected. + """ + # defined as "niceness=" + with self.assertRaises(ValueError) as err: + ConfigurationInit(os.path.join(TESTS, "config", "missing_niceness.conf"), "sample.json") + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_13b_config_bogus_niceness(self): + """ + Ensure that config with bogus (non-int) niceness is detected. + """ + with self.assertRaises(ValueError) as err: + ConfigurationInit(os.path.join(TESTS, "config", "bogus_niceness.conf"), "sample.json") + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_14a_config_missing_concurrent_hashes(self): + """ + Ensure that config with missing concurrent_hashes is detected. + """ + # defined as "concurrent_hashes=" + with self.assertRaises(ValueError) as err: + ConfigurationInit(os.path.join(TESTS, "config", "missing_concurrent_hashes.conf"), "sample.json") + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_14b_config_bogus_concurrent_hashes(self): + """ + Ensure that config with bogus (non-int) concurrent_hashes is detected. + """ + with self.assertRaises(ValueError) as err: + ConfigurationInit(os.path.join(TESTS, "config", "bogus_concurrent_hashes.conf"), "sample.json") + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_15a_config_missing_disable_rescan(self): + """ + Ensure that config with missing disable_rescan is detected. + """ + globals.g_disable_rescan = None + + # defined as "disable_rescan=" + ConfigurationInit(os.path.join(TESTS, "config", "missing_disable_rescan.conf"), "sample.json") + self.assertFalse(globals.g_disable_rescan) + + def test_15b_config_bogus_disable_rescan(self): + """ + Ensure that config with bogus (non-bool) disable_rescan is detected. + """ + globals.g_disable_rescan = None + + ConfigurationInit(os.path.join(TESTS, "config", "bogus_disable_rescan.conf"), "sample.json") + self.assertTrue(globals.g_disable_rescan) + + def test_16a_config_missing_num_days_binaries(self): + """ + Ensure that config with missing num_days_binaries is detected. + """ + # defined as "num_days_binaries=" + with self.assertRaises(ValueError) as err: + ConfigurationInit(os.path.join(TESTS, "config", "missing_num_days_binaries.conf"), "sample.json") + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_16b_config_bogus_num_days_binaries(self): + """ + Ensure that config with bogus (non-int) num_days_binaries is detected. + """ + with self.assertRaises(ValueError) as err: + ConfigurationInit(os.path.join(TESTS, "config", "bogus_num_days_binaries.conf"), "sample.json") + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_17a_config_bogus_vacuum_seconds(self): + """ + Ensure that config with bogus (non-int) vacuum_seconds is detected. + """ + with self.assertRaises(ValueError) as err: + ConfigurationInit(os.path.join(TESTS, "config", "bogus_vacuum_seconds.conf"), "sample.json") + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_17b_config_negative_vacuum_seconds(self): + """ + Ensure that config with bogus (non-int) vacuum_seconds is detected. + """ + globals.g_vacuum_seconds = None + ConfigurationInit(os.path.join(TESTS, "config", "negative_vacuum_seconds.conf"), "sample.json") + self.assertEqual(0, globals.g_vacuum_seconds) + + def test_18a_config_missing_vacuum_script(self): + """ + Ensure that config with missing vacuum_script is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "no_such_vacuum_script.conf"), "sample.json") + assert "does not exist" in "{0}".format(err.exception.args[0]) + + def test_18b_config_bogus_vacuum_script_is_dir(self): + """ + Ensure that config with vacuum_script as directory is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "vacuum_script_dir.conf"), "sample.json") + assert "is a directory" in "{0}".format(err.exception.args[0]) + + def test_19a_config_vacuum_script_enabled(self): + """ + Ensure that config with vacuum_script and vacuum_seconds is ready to go. + """ + globals.g_vacuum_seconds = None + globals.g_vacuum_script = None + ConfigurationInit(os.path.join(TESTS, "config", "vacuum_script_enabled.conf"), "sample.json") + self.assertEqual(3600, globals.g_vacuum_seconds) + self.assertTrue(globals.g_vacuum_script.endswith("/scripts/vacuumscript.sh")) + + def test_19a_config_vacuum_script_and_no_vacuum_seconds(self): + """ + Ensure that config with vacuum_script but vacuum_seconds == 0 has it disabled. + """ + globals.g_vacuum_seconds = None + globals.g_vacuum_script = None + ConfigurationInit(os.path.join(TESTS, "config", "vacuum_script_no_seconds.conf"), "sample.json") + self.assertEqual(0, globals.g_vacuum_seconds) + self.assertIsNone(globals.g_vacuum_script) + + # TODO: 20a feed databasepath not exist + # TODO: 20b feed databasepath not dir + + def test_20a_config_feed_database_dir_not_exists(self): + """ + Ensure that config with feed database directory that does not exist is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "missing_feed_database_dir.conf"), "sample.json") + assert "does not exist" in "{0}".format(err.exception.args[0]) + + def test_20b_config_feed_database_dir_not_directory(self): + """ + Ensure that config with eed database directory that is not a directory is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "bogus_feed_database_dir.conf"), "sample.json") + assert "is not a directory" in "{0}".format(err.exception.args[0]) + + # ----- Minimal validation (worker) + + def test_90_minimal_validation_effects(self): + """ + Ensure that minimal caonfiguration does not set extra globals + """ + globals.g_postgres_host = None + ConfigurationInit(os.path.join(TESTS, "config", "valid.conf")) + self.assertIsNone(globals.g_postgres_host) diff --git a/test/test_main.py b/test/test_main.py index 96d13bb..1440d63 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -1,9 +1,11 @@ +# coding: utf-8 +# Copyright © 2018-2019 VMware, Inc. All Rights Reserved. + import os from unittest import TestCase import globals -from exceptions import CbInvalidConfig -from main import generate_yara_rule_map_hash, verify_config +from main import generate_yara_rule_map_hash TESTS = os.path.abspath(os.path.dirname(__file__)) @@ -15,346 +17,3 @@ def test_01_generate_yara_rule_map_hash(self): generate_yara_rule_map_hash(os.path.join(TESTS, "rules")) self.assertEqual(1, len(globals.g_yara_rule_map_hash_list)) self.assertEqual("191cc0ea3f9ef90ed1850a3650cd38ed", globals.g_yara_rule_map_hash_list[0]) - - def test_02a_validate_config(self): - # valid local - globals.output_file = None - globals.g_remote = None - verify_config(os.path.join(TESTS, "config", "valid.conf")) - self.assertTrue(globals.output_file.endswith("valid.conf.json")) - self.assertFalse(globals.g_remote) - - # valid remote - globals.g_remote = None - verify_config(os.path.join(TESTS, "config", "valid2.conf"), "sample.json") - self.assertTrue(globals.output_file.endswith("sample.json")) - self.assertTrue(globals.g_remote) - - def test_02b_missing_config(self): - """ - Ensure a missing config file is detected. - """ - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "no-such-config.conf")) - assert "does not exist!" in "{0}".format(err.exception.args[0]) - - def test_03a_config_missing_header(self): - """ - Ensure we detect a configuration file with no section header. - """ - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "missing_header.conf")) - assert "File contains no section headers" in "{0}".format(err.exception.args[0]) - - def test_03b_config_invalid_header(self): - """ - Ensure we detect a configuration file with no "[general]" section header. - """ - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "invalid_header.conf")) - assert "does not have a 'general' section" in "{0}".format(err.exception.args[0]) - - def test_04a_config_missing_worker(self): - """ - Ensure that config lacking worker information defaults to local. - """ - # not defined in file - globals.g_remote = None - verify_config(os.path.join(TESTS, "config", "missing_worker.conf")) - self.assertFalse(globals.g_remote) - - # defined as "worker_type=" - globals.g_remote = None - verify_config(os.path.join(TESTS, "config", "missing_worker2.conf")) - self.assertFalse(globals.g_remote) - - def test_04b_config_bogus_worker(self): - """ - Ensure that config with bogus worker is detected. - """ - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "bogus_worker.conf")) - assert "invalid 'worker_type'" in "{0}".format(err.exception.args[0]) - - def test_05a_config_local_worker_missing_server_url(self): - """ - Ensure that local worker config with missing server url is detected. - """ - # not defined in file - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "local_worker_no_server_url.conf")) - assert "is 'local' and missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) - - # defined as "cb_server_url=" - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "local_worker_no_server_url2.conf")) - assert "is 'local' and missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) - - def test_05b_config_local_worker_missing_server_token(self): - """ - Ensure that local worker config with missing server token is detected. - """ - # not defined in file - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "local_worker_no_server_token.conf")) - assert "is 'local' and missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) - - # defined as "cb_server_token=" - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "local_worker_no_server_token2.conf")) - assert "is 'local' and missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) - - def test_06_config_remote_worker_missing_broker_url(self): - """ - Ensure that remote worker config with missing broker url is detected. - """ - # not defined in file - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "remote_worker_no_broker_url.conf")) - assert "is 'remote' and missing 'broker_url'" in "{0}".format(err.exception.args[0]) - - # defined as "broker_url=" - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "remote_worker_no_broker_url2.conf")) - assert "is 'remote' and missing 'broker_url'" in "{0}".format(err.exception.args[0]) - - def test_07a_config_missing_yara_rules_dir(self): - """ - Ensure that config with missing yara rules directory is detected. - """ - # not defined in file - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "no_rules_dir.conf")) - assert "has no 'yara_rules_dir' definition" in "{0}".format(err.exception.args[0]) - - # defined as "yara_rules_dir=" - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "no_rules_dir2.conf")) - assert "has no 'yara_rules_dir' definition" in "{0}".format(err.exception.args[0]) - - def test_07b_config_yara_rules_dir_not_exists(self): - """ - Ensure that config with yara rules directory that does not exist is detected. - """ - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "missing_rules_dir.conf")) - assert "does not exist" in "{0}".format(err.exception.args[0]) - - def test_07c_config_yara_rules_dir_not_directory(self): - """ - Ensure that config with yara rules directory that is not a directory is detected. - """ - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "bogus_rules_dir.conf")) - assert "is not a directory" in "{0}".format(err.exception.args[0]) - - def test_08a_config_missing_postgres_host(self): - """ - Ensure that config with missing postgres_host uses defaults. - """ - check = globals.g_postgres_host - - # undefined, use default in globals - verify_config(os.path.join(TESTS, "config", "missing_postgres_host.conf")) - self.assertEqual(check, globals.g_postgres_host) - - # defined as "postgres_host=" - verify_config(os.path.join(TESTS, "config", "missing_postgres_host2.conf")) - self.assertEqual(check, globals.g_postgres_host) - - # TODO: test_08b_config_invalid_postgres_host - - def test_09a_config_missing_postgres_username(self): - """ - Ensure that config with missing postgres_username uses defaults. - """ - check = globals.g_postgres_username - - # undefined, use default in globals - verify_config(os.path.join(TESTS, "config", "missing_postgres_username.conf")) - self.assertEqual(check, globals.g_postgres_username) - - # defined as "postgres_username=" - verify_config(os.path.join(TESTS, "config", "missing_postgres_username2.conf")) - self.assertEqual(check, globals.g_postgres_username) - - # TODO: test_09b_config_invalid_postgres_username - - def test_10a_config_missing_postgres_password(self): - """ - Ensure that config with missing postgres_password is detected. - """ - # undefined - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "missing_postgres_password.conf")) - assert "has no 'postgres_password' defined" in "{0}".format(err.exception.args[0]) - - # defined as "postgres_password=" - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "missing_postgres_password2.conf")) - assert "has no 'postgres_password' defined" in "{0}".format(err.exception.args[0]) - - # TODO: test_10a_config_invalid_postgres_password - - def test_11a_config_missing_postgres_db(self): - """ - Ensure that config with missing postgres_db is detected. - """ - check = globals.g_postgres_db - - # undefined, use default in globals - verify_config(os.path.join(TESTS, "config", "missing_postgres_db.conf")) - self.assertEqual(check, globals.g_postgres_db) - - # defined as "postgres_db=" - verify_config(os.path.join(TESTS, "config", "missing_postgres_db2.conf")) - self.assertEqual(check, globals.g_postgres_db) - - # TODO: test_11b_config_invalid_postgres_db - - def test_12a_config_missing_postgres_port(self): - """ - Ensure that config with missing postgres_port is detected. - """ - check = globals.g_postgres_port - - # undefined, use default in globals - verify_config(os.path.join(TESTS, "config", "missing_postgres_port.conf")) - self.assertEqual(check, globals.g_postgres_port) - - # defined as "postgres_port=" - with self.assertRaises(ValueError) as err: - verify_config(os.path.join(TESTS, "config", "missing_postgres_port2.conf")) - assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - - def test_12b_config_bogus_postgres_port(self): - """ - Ensure that config with bogus (non-int) postgres_port is detected. - """ - with self.assertRaises(ValueError) as err: - verify_config(os.path.join(TESTS, "config", "bogus_postgres_port.conf")) - assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - - # TODO: test_12c_config_invalid_postgres_port - - def test_13a_config_missing_niceness(self): - """ - Ensure that config with missing niceness is detected. - """ - # defined as "niceness=" - with self.assertRaises(ValueError) as err: - verify_config(os.path.join(TESTS, "config", "missing_niceness.conf")) - assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - - def test_13b_config_bogus_niceness(self): - """ - Ensure that config with bogus (non-int) niceness is detected. - """ - with self.assertRaises(ValueError) as err: - verify_config(os.path.join(TESTS, "config", "bogus_niceness.conf")) - assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - - def test_14a_config_missing_concurrent_hashes(self): - """ - Ensure that config with missing concurrent_hashes is detected. - """ - # defined as "concurrent_hashes=" - with self.assertRaises(ValueError) as err: - verify_config(os.path.join(TESTS, "config", "missing_concurrent_hashes.conf")) - assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - - def test_14b_config_bogus_concurrent_hashes(self): - """ - Ensure that config with bogus (non-int) concurrent_hashes is detected. - """ - with self.assertRaises(ValueError) as err: - verify_config(os.path.join(TESTS, "config", "bogus_concurrent_hashes.conf")) - assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - - def test_15a_config_missing_disable_rescan(self): - """ - Ensure that config with missing disable_rescan is detected. - """ - globals.g_disable_rescan = None - - # defined as "disable_rescan=" - verify_config(os.path.join(TESTS, "config", "missing_disable_rescan.conf")) - self.assertFalse(globals.g_disable_rescan) - - def test_15b_config_bogus_disable_rescan(self): - """ - Ensure that config with bogus (non-bool) disable_rescan is detected. - """ - globals.g_disable_rescan = None - - verify_config(os.path.join(TESTS, "config", "bogus_disable_rescan.conf")) - self.assertTrue(globals.g_disable_rescan) - - def test_16a_config_missing_num_days_binaries(self): - """ - Ensure that config with missing num_days_binaries is detected. - """ - # defined as "num_days_binaries=" - with self.assertRaises(ValueError) as err: - verify_config(os.path.join(TESTS, "config", "missing_num_days_binaries.conf")) - assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - - def test_16b_config_bogus_num_days_binaries(self): - """ - Ensure that config with bogus (non-int) num_days_binaries is detected. - """ - with self.assertRaises(ValueError) as err: - verify_config(os.path.join(TESTS, "config", "bogus_num_days_binaries.conf")) - assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - - def test_17a_config_bogus_vacuum_seconds(self): - """ - Ensure that config with bogus (non-int) vacuum_seconds is detected. - """ - with self.assertRaises(ValueError) as err: - verify_config(os.path.join(TESTS, "config", "bogus_vacuum_seconds.conf")) - assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - - def test_17b_config_negative_vacuum_seconds(self): - """ - Ensure that config with bogus (non-int) vacuum_seconds is detected. - """ - globals.g_vacuum_seconds = None - verify_config(os.path.join(TESTS, "config", "negative_vacuum_seconds.conf")) - self.assertEqual(0, globals.g_vacuum_seconds) - - def test_18a_config_missing_vacuum_script(self): - """ - Ensure that config with missing vacuum_script is detected. - """ - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "no_such_vacuum_script.conf")) - assert "does not exist" in "{0}".format(err.exception.args[0]) - - def test_18b_config_bogus_vacuum_script_is_dir(self): - """ - Ensure that config with vacuum_script as directory is detected. - """ - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "vacuum_script_dir.conf")) - assert "is a directory" in "{0}".format(err.exception.args[0]) - - def test_19a_config_vacuum_script_enabled(self): - """ - Ensure that config with vacuum_script and vacuum_seconds is ready to go. - """ - globals.g_vacuum_seconds = None - globals.g_vacuum_script = None - verify_config(os.path.join(TESTS, "config", "vacuum_script_enabled.conf")) - self.assertEqual(3600, globals.g_vacuum_seconds) - self.assertTrue(globals.g_vacuum_script.endswith("/scripts/vacuumscript.sh")) - - def test_19a_config_vacuum_script_and_no_vacuum_seconds(self): - """ - Ensure that config with vacuum_script but vacuum_seconds == 0 has it disabled. - """ - globals.g_vacuum_seconds = None - globals.g_vacuum_script = None - verify_config(os.path.join(TESTS, "config", "vacuum_script_no_seconds.conf")) - self.assertEqual(0, globals.g_vacuum_seconds) - self.assertIsNone(globals.g_vacuum_script) diff --git a/test/test_tasks.py b/test/test_tasks.py index ae8009e..b3fe9f2 100644 --- a/test/test_tasks.py +++ b/test/test_tasks.py @@ -1,8 +1,10 @@ +# coding: utf-8 +# Copyright © 2018-2019 VMware, Inc. All Rights Reserved. + import os from unittest import TestCase -from exceptions import CbInvalidConfig -from tasks import generate_rule_map, generate_yara_rule_map_hash, verify_config +from tasks import generate_rule_map, generate_yara_rule_map_hash TESTS = os.path.abspath(os.path.dirname(__file__)) @@ -19,99 +21,3 @@ def test_01b_generate_yara_rule_map_hash(self): the_list = generate_yara_rule_map_hash(os.path.join(TESTS, "rules")) self.assertEqual(1, len(the_list)) self.assertEqual("191cc0ea3f9ef90ed1850a3650cd38ed", the_list[0]) - - def test_02a_missing_config(self): - """ - Ensure a missing config file is detected. - """ - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "no-such-config.conf")) - assert "does not exist!" in "{0}".format(err.exception.args[0]) - - def test_03a_config_missing_header(self): - """ - Ensure we detect a configuration file with no section header. - """ - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "missing_header.conf")) - assert "File contains no section headers" in "{0}".format(err.exception.args[0]) - - def test_03b_config_invalid_header(self): - """ - Ensure we detect a configuration file with no "[general]" section header. - """ - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "invalid_header.conf")) - assert "does not have a 'general' section" in "{0}".format(err.exception.args[0]) - - def test_04a_config_local_worker_missing_server_url(self): - """ - Ensure that local worker config with missing server url is detected. - """ - # not defined in file - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "local_worker_no_server_url.conf")) - assert "is 'local' and missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) - - # defined as "cb_server_url=" - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "local_worker_no_server_url2.conf")) - assert "is 'local' and missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) - - def test_04b_config_local_worker_missing_server_token(self): - """ - Ensure that local worker config with missing server token is detected. - """ - # not defined in file - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "local_worker_no_server_token.conf")) - assert "is 'local' and missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) - - # defined as "cb_server_token=" - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "local_worker_no_server_token2.conf")) - assert "is 'local' and missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) - - def test_05_config_remote_worker_missing_broker_url(self): - """ - Ensure that remote worker config with missing broker url is detected. - """ - # not defined in file - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "remote_worker_no_broker_url.conf")) - assert "is 'remote' and missing 'broker_url'" in "{0}".format(err.exception.args[0]) - - # defined as "broker_url=" - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "remote_worker_no_broker_url2.conf")) - assert "is 'remote' and missing 'broker_url'" in "{0}".format(err.exception.args[0]) - - def test_06a_config_missing_yara_rules_dir(self): - """ - Ensure that config with missing yara rules directory is detected. - """ - # not defined in file - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "no_rules_dir.conf")) - assert "has no 'yara_rules_dir' definition" in "{0}".format(err.exception.args[0]) - - # defined as "yara_rules_dir=" - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "no_rules_dir2.conf")) - assert "has no 'yara_rules_dir' definition" in "{0}".format(err.exception.args[0]) - - def test_06b_config_yara_rules_dir_not_exists(self): - """ - Ensure that config with yara rules directory that does not exist is detected. - """ - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "missing_rules_dir.conf")) - assert "does not exist" in "{0}".format(err.exception.args[0]) - - def test_06c_config_yara_rules_dir_not_directory(self): - """ - Ensure that config with yara rules directory that is not a directory is detected. - """ - with self.assertRaises(CbInvalidConfig) as err: - verify_config(os.path.join(TESTS, "config", "bogus_rules_dir.conf")) - assert "is not a directory" in "{0}".format(err.exception.args[0]) From c881c699bdf28796436b1de4e2e7e34dc03e4d7f Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 28 Oct 2019 11:41:45 -0400 Subject: [PATCH 082/257] Fix script execution --- main.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index 6e1d3ec..37dafde 100644 --- a/main.py +++ b/main.py @@ -256,10 +256,11 @@ def perform(yara_rule_dir): start_time = time.time() + start_datetime = datetime.now() + conn = get_database_conn() - start_date_binaries = datetime.now() - timedelta(days=globals.g_num_days_binaries) - start_datetime = datetime.now() + start_date_binaries = start_datetime - timedelta(days=globals.g_num_days_binaries) cur = get_cursor(conn, start_date_binaries) @@ -273,7 +274,7 @@ def perform(yara_rule_dir): for row in rows: seconds_since_start = (datetime.now() - start_datetime).seconds - if seconds_since_start >= globals.g_vacuum_seconds > 0: + if seconds_since_start >= globals.g_vacuum_seconds and globals.g_vacuum_seconds > 0: execute_script() start_datetime = datetime.now() From 66b4272b10f47faeceb056ade7237ae9d870aa79 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 28 Oct 2019 13:16:39 -0400 Subject: [PATCH 083/257] * mnerged and cleaned up --- README.md | 60 +++++--------------------------------- globals.py | 4 +-- main.py | 2 +- samples/sample_local.conf | 48 ++++++++++++++++++++++++++++++ samples/sample_remote.conf | 48 ++++++++++++++++++++++++++++++ tasks.py | 2 ++ yara.conf | 58 ------------------------------------ 7 files changed, 109 insertions(+), 113 deletions(-) create mode 100644 samples/sample_local.conf create mode 100644 samples/sample_remote.conf delete mode 100644 yara.conf diff --git a/README.md b/README.md index 23d839e..e26b67b 100644 --- a/README.md +++ b/README.md @@ -24,64 +24,20 @@ The Yara agent must be installed on the same system as Cb Response. #### Sample Yara Agent Config -Copy and modify the following config to `/etc/cb/integrations/yara/yara_agent.conf` - -```ini -[general] -; either run a single worker locally or remotely -; valid types are 'local' or 'remote' -worker_type=local - -; ONLY used for worker_type of local -; Cb Response Server settings for scanning locally. -; For remote scanning please set these parameters in the yara worker config file -; Default: https://127.0.0.1 -cb_server_url= -cb_server_token= - -; ONLY used for worker_type of remote -; IP Address of workers if worker_type is remote -;broker_url=redis:// - -; path to directory containing yara rules -yara_rules_dir=yara_rules - -; Cb Response postgres Database settings -postgres_host= -postgres_username= -postgres_password= -postgres_db= -postgres_port= - -; os nice value used for this script -niceness=1 - -; Number of hashes to send to the workers concurrently. Defaults to 8. -; Recommend setting to the number of workers on the remote system. -concurrent_hashes=8 - -; If you don't want binaries to be rescanned more than once, regardless of the rules used, set this to True -disable_rescan=True - -; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with -; a timestamp within the last year -; Default: 365 -num_days_binaries=365 - -; ADVANCED: A vacuum script can be enabled to "clean" the database and prevent fragmentation -; This can be disabled if the seconds value is 0 or less -vacuum_seconds=0 -vacuum_script={HERE}/scripts/vacuumscript.sh -``` +Copy and modify either `sample_local.conf` or `sample_remote.conf` from the `samples` folder +to your desired location. + > NOTES: -> 1) The use of `{HERE}` is a placeholder representing the location of this package's `main.py` file, +> 1) The use of `{HERE}` is a placeholder representing the location of the yara package's `main.py` file, > allowing for the use of relative paths to the package itself. -> 2) Paths can use `~` to access your home directory, so you locate files there as well. +> 2) All paths can use `~` to access your home directory, so you can locate files there as well. #### Running Yara Agent Manually - ./yara_agent --config-file=/etc/cb/integrations/yara/yara_agent.conf +```shell script +./yara_agent --config-file= +``` #### Example Cron Entry diff --git a/globals.py b/globals.py index 5c6eb94..bd1c622 100644 --- a/globals.py +++ b/globals.py @@ -16,7 +16,7 @@ g_remote = False g_yara_rule_map = {} -g_yara_rule_map_hash_list = list() +g_yara_rule_map_hash_list = [] g_postgres_host = '127.0.0.1' g_postgres_username = 'cb' @@ -29,7 +29,7 @@ g_num_binaries_not_available = 0 g_num_binaries_analyzed = 0 -g_disable_rescan = False +g_disable_rescan = True g_num_days_binaries = 365 g_vacuum_seconds = -1 diff --git a/main.py b/main.py index 8b713c9..2b7eeae 100644 --- a/main.py +++ b/main.py @@ -271,7 +271,7 @@ def perform(yara_rule_dir): for row in rows: seconds_since_start = (datetime.now() - start_datetime).seconds - if seconds_since_start >= globals.g_vacuum_seconds and globals.g_vacuum_seconds > 0: + if seconds_since_start >= globals.g_vacuum_seconds > 0: execute_script() start_datetime = datetime.now() diff --git a/samples/sample_local.conf b/samples/sample_local.conf new file mode 100644 index 0000000..96d50bf --- /dev/null +++ b/samples/sample_local.conf @@ -0,0 +1,48 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Sample local worker config file +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +[general] +worker_type=local + +; +; ONLY for worker_type of local; add a valid CBR user api token for `cb_server_token` +; +cb_server_url=https://127.0.0.1 +cb_server_token= + +; +; path to directory containing yara rules +; +yara_rules_dir={HERE}/yara_rules + +; +; Cb Response postgres Database settings; insert posgres password as used in cb.conf for `postgres_password` +; +postgres_host=localhost +postgres_username=cb +postgres_password= +postgres_db=cb +postgres_port=5002 + +; +; os.nice value used for this script, if desired +; +;niceness=1 + +; +; Number of hashes to send to the workers concurrently. +; Recommend setting to the number of workers on the remote system. +; +concurrent_hashes=8 + +; +; If you want binaries to be rescanned more than once, regardless of the rules used, set this to False +; +disable_rescan=True + +; +; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with +; a timestamp within the last year +; +num_days_binaries=365 diff --git a/samples/sample_remote.conf b/samples/sample_remote.conf new file mode 100644 index 0000000..8ca8af7 --- /dev/null +++ b/samples/sample_remote.conf @@ -0,0 +1,48 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Sample remote worker config file +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +[general] +worker_type=local + +; +; ONLY for worker_type of remote +; IP Address of workers if worker_type is remote +; +broker_url=redis://127.0.0.1 + +; +; path to directory containing yara rules +; +yara_rules_dir={HERE}/yara_rules + +; +; Cb Response postgres Database settings; insert posgres password as used in cb.conf for `postgres_password` +; +postgres_host=localhost +postgres_username=cb +postgres_password= +postgres_db=cb +postgres_port=5002 + +; +; os.nice value used for this script, if desired +; +;niceness=1 + +; +; Number of hashes to send to the workers concurrently. +; Recommend setting to the number of workers on the remote system. +; +concurrent_hashes=8 + +; +; If you want binaries to be rescanned more than once, regardless of the rules used, set this to False +; +disable_rescan=True + +; +; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with +; a timestamp within the last year +; +num_days_binaries=365 diff --git a/tasks.py b/tasks.py index be1e435..72a08ee 100644 --- a/tasks.py +++ b/tasks.py @@ -30,6 +30,8 @@ class MyBootstep(bootsteps.Step): """ Define the bootstrap task. """ + + # noinspection PyUnusedLocal def __init__(self, worker, config_file='yara_worker.conf', **options): super().__init__(self) ConfigurationInit(config_file, None) diff --git a/yara.conf b/yara.conf deleted file mode 100644 index c54d3d4..0000000 --- a/yara.conf +++ /dev/null @@ -1,58 +0,0 @@ -[general] -num_days_binaries=1 -; valid types are 'local' or 'remote' -; -worker_type=local - -; -; ONLY for worker_type of remote -; IP Address of workers if worker_type is remote -; -;broker_url=redis://127.0.0.1 - -; -; path to directory containing yara rules -; -yara_rules_dir=./yara_rules - -; -; Cb Response postgres Database settings -; -postgres_host=localhost -postgres_username=cb -postgres_password= -postgres_db=cb -postgres_port=5002 - -; -; ONLY for worker_type of local -; Cb Response Server settings for scanning locally. -; For remote scanning please set these parameters in the yara worker config file -; Default: https://127.0.0.1 -; -cb_server_url=https://127.0.0.1 -cb_server_token= - -; -; nice value used for this script -; -;niceness=1 - -; -; Number of hashes to send to the workers concurrently. Defaults to 8. -; Recommend setting to the number of workers on the remote system. -; -concurrent_hashes=8 - -; -; If you don't want binaries to be rescanned more than once, regardless of the rules used, set this to True -; Default: False -; -disable_rescan=True - -; -; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with -; a timestamp within the last year -; Default: 365 -; -;num_days_binaries=365 From 831a7d31a39f0707d5276c1e4046e1093d5f5b0e Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 28 Oct 2019 13:26:05 -0400 Subject: [PATCH 084/257] ready to merge down --- test/test_configInit.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/test_configInit.py b/test/test_configInit.py index 822f024..d2d4e50 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -358,9 +358,6 @@ def test_19a_config_vacuum_script_and_no_vacuum_seconds(self): self.assertEqual(0, globals.g_vacuum_seconds) self.assertIsNone(globals.g_vacuum_script) - # TODO: 20a feed databasepath not exist - # TODO: 20b feed databasepath not dir - def test_20a_config_feed_database_dir_not_exists(self): """ Ensure that config with feed database directory that does not exist is detected. From 76312e13eb3ca57e05e97590252a4c08f2aaeb66 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 28 Oct 2019 14:22:33 -0400 Subject: [PATCH 085/257] * cleaned up globals names * normalized feed db path * bug fixes --- config_handling.py | 70 +++++++++++++++++------------------------ globals.py | 19 ++++++----- main.py | 4 +-- test/test_configInit.py | 6 ++-- 4 files changed, 44 insertions(+), 55 deletions(-) diff --git a/config_handling.py b/config_handling.py index 45a1ce4..7a8efab 100644 --- a/config_handling.py +++ b/config_handling.py @@ -60,8 +60,8 @@ def __init__(self, config_file: str, output_file: str = None) -> None: raise CbInvalidConfig(f"{header} does not have a 'general' section") if output_file is not None: - globals.output_file = os.path.abspath(os.path.expanduser(placehold(output_file))) - logger.debug(f"NOTE: output file will be '{globals.output_file}'") + globals.g_output_file = os.path.abspath(os.path.expanduser(placehold(output_file))) + logger.debug(f"NOTE: output file will be '{globals.g_output_file}'") the_config = config["general"] if "worker_type" in the_config: @@ -134,20 +134,13 @@ def __init__(self, config_file: str, output_file: str = None) -> None: ) # NOTE: postgres_username has a default value in globals; use and warn if not defined - if ( - "postgres_username" in the_config - and the_config["postgres_username"].strip() != "" - ): + if "postgres_username" in the_config and the_config["postgres_username"].strip() != "": globals.g_postgres_username = the_config["postgres_username"] else: logger.warning( - f"{header} has no defined 'postgres_username'; using default of '{globals.g_postgres_username}'" - ) + f"{header} has no defined 'postgres_username'; using default of '{globals.g_postgres_username}'") - if ( - "postgres_password" in the_config - and the_config["postgres_password"].strip() != "" - ): + if "postgres_password" in the_config and the_config["postgres_password"].strip() != "": globals.g_postgres_password = the_config["postgres_password"] else: raise CbInvalidConfig(f"{header} has no 'postgres_password' defined") @@ -156,17 +149,13 @@ def __init__(self, config_file: str, output_file: str = None) -> None: if "postgres_db" in the_config and the_config["postgres_db"].strip() != "": globals.g_postgres_db = the_config["postgres_db"] else: - logger.warning( - f"{header} has no defined 'postgres_db'; using default of '{globals.g_postgres_db}'" - ) + logger.warning(f"{header} has no defined 'postgres_db'; using default of '{globals.g_postgres_db}'") # NOTE: postgres_port has a default value in globals; use and warn if not defined if "postgres_port" in the_config: globals.g_postgres_port = int(the_config["postgres_port"]) else: - logger.warning( - f"{header} has no defined 'postgres_port'; using default of '{globals.g_postgres_port}'" - ) + logger.warning(f"{header} has no defined 'postgres_port'; using default of '{globals.g_postgres_port}'") # TODO: validate postgres connection with supplied information? @@ -174,8 +163,8 @@ def __init__(self, config_file: str, output_file: str = None) -> None: os.nice(int(the_config["niceness"])) if "concurrent_hashes" in the_config: - globals.MAX_HASHES = int(the_config["concurrent_hashes"]) - logger.debug("Consurrent Hashes: {0}".format(globals.MAX_HASHES)) + globals.g_max_hashes = int(the_config["concurrent_hashes"]) + logger.debug("Consurrent Hashes: {0}".format(globals.g_max_hashes)) if "disable_rescan" in the_config: globals.g_disable_rescan = bool(the_config["disable_rescan"]) @@ -189,28 +178,22 @@ def __init__(self, config_file: str, output_file: str = None) -> None: if "vacuum_seconds" in the_config: globals.g_vacuum_seconds = max(int(the_config["vacuum_seconds"]), 0) - if "vacuum_script" in the_config and the_config["vacuum_seconds"].strip() != "": - if globals.g_vacuum_seconds > 0: - check = os.path.abspath( - os.path.expanduser(placehold(the_config["vacuum_script"])) - ) - if os.path.exists(check): - if os.path.isdir(check): - raise CbInvalidConfig( - f"{header} specified 'vacuum_script' ({check}) is a directory" - ) - else: - raise CbInvalidConfig( - f"{header} specified 'vacuum_script' ({check}) does not exist" - ) - globals.g_vacuum_script = check - logger.warning( - f"Vacuum Script '{check}' is enabled; use this advanced feature at your own discretion!" - ) + + if "vacuum_script" in the_config and the_config["vacuum_script"].strip() != "": + check = os.path.abspath(os.path.expanduser(placehold(the_config["vacuum_script"]))) + else: + check = os.path.abspath(os.path.expanduser(placehold(globals.g_vacuum_script))) + + if globals.g_vacuum_seconds > 0: + if os.path.exists(check): + if os.path.isdir(check): + raise CbInvalidConfig(f"{header} specified 'vacuum_script' ({check}) is a directory") else: - logger.debug( - f"{header} has 'vacuum_script' defined, but it is disabled" - ) + raise CbInvalidConfig(f"{header} specified 'vacuum_script' ({check}) does not exist") + globals.g_vacuum_script = check + logger.warning(f"Vacuum Script '{check}' is enabled; use this advanced feature at your own discretion!") + else: + logger.debug(f"{header} has 'vacuum_script' defined, but it is disabled") if "feed_database_dir" in the_config and the_config["feed_database_dir"].strip() != "": check = os.path.abspath(os.path.expanduser(placehold(the_config["feed_database_dir"]))) @@ -218,6 +201,9 @@ def __init__(self, config_file: str, output_file: str = None) -> None: if not os.path.isdir(check): raise CbInvalidConfig(f"{header} specified 'feed_database_dir' ({check}) is not a directory") else: - globals.feed_database_dir = the_config["feed_database_dir"] + globals.g_feed_database_dir = check else: raise CbInvalidConfig(f"{header} specified 'feed_database_dir' ({check}) does not exist") + else: + # we assume the default is correct, sanitize + globals.g_feed_database_dir = os.path.abspath(os.path.expanduser(placehold(globals.g_feed_database_dir))) diff --git a/globals.py b/globals.py index bd1c622..a79870b 100644 --- a/globals.py +++ b/globals.py @@ -4,17 +4,19 @@ # noinspection PyUnusedName g_config = {} +g_output_file = './yara_feed.json' +g_remote = False + +# local info g_cb_server_url = 'https://127.0.0.1' g_cb_server_token = '' +# remote info # noinspection PyUnusedName -broker_url = '' - -g_yara_rules_dir = 'yara_rules' -output_file = 'yara_feed.json' +g_broker_url = '' -g_remote = False +g_yara_rules_dir = './yara_rules' g_yara_rule_map = {} g_yara_rule_map_hash_list = [] @@ -24,7 +26,7 @@ g_postgres_port = 5002 g_postgres_db = 'cb' -MAX_HASHES = 8 +g_max_hashes = 8 g_num_binaries_not_available = 0 g_num_binaries_analyzed = 0 @@ -32,7 +34,8 @@ g_disable_rescan = True g_num_days_binaries = 365 + g_vacuum_seconds = -1 -g_vacuum_script = 'scripts/vacuumscript.sh' +g_vacuum_script = './scripts/vacuumscript.sh' -g_feed_database_path = "./" +g_feed_database_dir = "./" diff --git a/main.py b/main.py index 2b7eeae..e9cdfc0 100644 --- a/main.py +++ b/main.py @@ -69,7 +69,7 @@ def generate_feed_from_db() -> None: feed = CbFeed(feedinfo, reports) logger.debug("Writing out feed '{0}' to disk".format(feedinfo.data["name"])) - with open(globals.output_file, "w") as fp: + with open(globals.g_output_file, "w") as fp: fp.write(feed.dump()) @@ -285,7 +285,7 @@ def perform(yara_rule_dir): else: num_binaries_skipped += 1 - if len(md5_hashes) >= globals.MAX_HASHES: + if len(md5_hashes) >= globals.g_max_hashes: _analyze_save_and_log( md5_hashes, start_time, num_binaries_skipped, num_total_binaries ) diff --git a/test/test_configInit.py b/test/test_configInit.py index d2d4e50..e267350 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -25,16 +25,16 @@ def test_01_missing_config(self): def test_02_validate_config(self): # valid local - globals.output_file = None + globals.g_output_file = None globals.g_remote = None ConfigurationInit(os.path.join(TESTS, "config", "valid.conf"), "sample.json") - self.assertTrue(globals.output_file.endswith("sample.json")) + self.assertTrue(globals.g_output_file.endswith("sample.json")) self.assertFalse(globals.g_remote) # valid remote globals.g_remote = None ConfigurationInit(os.path.join(TESTS, "config", "valid2.conf"), "sample2.json") - self.assertTrue(globals.output_file.endswith("sample2.json")) + self.assertTrue(globals.g_output_file.endswith("sample2.json")) self.assertTrue(globals.g_remote) def test_03a_config_missing_header(self): From 8a1b8985b55be7df5d786490242d2807cd93e3dd Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 28 Oct 2019 14:26:00 -0400 Subject: [PATCH 086/257] fix missed typo --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index e9cdfc0..9113173 100644 --- a/main.py +++ b/main.py @@ -445,7 +445,7 @@ def main(): globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) generate_yara_rule_map_hash(globals.g_yara_rules_dir) database = SqliteDatabase( - os.path.join(globals.g_feed_database_path, "binary.db") + os.path.join(globals.g_feed_database_dir, "binary.db") ) db.initialize(database) db.connect() From db79d89df1e01a83efa55ce5e87214c17951e38d Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 28 Oct 2019 20:20:37 -0400 Subject: [PATCH 087/257] * clarified config handling with utility methods * fixed unit tests to match --- README.md | 2 +- config_handling.py | 274 +++++++++++---------- globals.py | 6 +- samples/sample_local.conf | 2 +- samples/sample_remote.conf | 2 +- test/config/bogus_feed_database_dir.conf | 2 +- test/config/missing_feed_database_dir.conf | 2 +- test/config/vacuum_script_dir.conf | 2 +- test/config/vacuum_script_enabled.conf | 2 +- test/test_configInit.py | 51 ++-- utilities.py | 6 +- yara_rules/README.md | 2 +- 12 files changed, 188 insertions(+), 165 deletions(-) diff --git a/README.md b/README.md index e26b67b..7a3f7bd 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ to your desired location. > NOTES: -> 1) The use of `{HERE}` is a placeholder representing the location of the yara package's `main.py` file, +> 1) The use of `{YARA}` is a placeholder representing the location of the yara package's `main.py` file, > allowing for the use of relative paths to the package itself. > 2) All paths can use `~` to access your home directory, so you can locate files there as well. diff --git a/config_handling.py b/config_handling.py index 7a8efab..84a7a50 100644 --- a/config_handling.py +++ b/config_handling.py @@ -4,6 +4,7 @@ import configparser import logging import os +from typing import Optional from celery import Celery @@ -43,167 +44,186 @@ def __init__(self, config_file: str, output_file: str = None) -> None: :param config_file: The config file to validate :param output_file: the output file; if not specified assume we are a task worker (simplified validation) """ - abs_config = os.path.abspath(os.path.expanduser(placehold(config_file))) - header = f"Config file '{abs_config}'" + self.abs_config = os.path.abspath(os.path.expanduser(placehold(config_file))) + self.source = f"Config file '{self.abs_config}'" config = configparser.ConfigParser() if not os.path.exists(config_file): - raise CbInvalidConfig(f"{header} does not exist!") + raise CbInvalidConfig(f"{self.source} does not exist!") try: config.read(config_file) except Exception as err: raise CbInvalidConfig(err) - logger.debug(f"NOTE: using config file '{abs_config}'") + logger.debug(f"NOTE: using config file '{self.abs_config}'") if not config.has_section("general"): - raise CbInvalidConfig(f"{header} does not have a 'general' section") + raise CbInvalidConfig(f"{self.source} does not have a 'general' section") + self.the_config = config["general"] + + self._worker_check() if output_file is not None: globals.g_output_file = os.path.abspath(os.path.expanduser(placehold(output_file))) logger.debug(f"NOTE: output file will be '{globals.g_output_file}'") + self._extended_check() - the_config = config["general"] - if "worker_type" in the_config: - if ( - the_config["worker_type"] == "local" - or the_config["worker_type"].strip() == "" - ): - globals.g_remote = False # 'local' or empty definition - elif the_config["worker_type"] == "remote": - globals.g_remote = True # 'remote' - else: # anything else - raise CbInvalidConfig( - f"{header} has an invalid 'worker_type' ({the_config['worker_type']})" - ) - else: + def _worker_check(self) -> None: + """ + Validate entries used by task workers as well as the main process. + + :raises CbInvalidConfig: + """ + value = self._as_str("worker_type", default="local") + if value == "local": globals.g_remote = False - logger.warning(f"{header} does not specify 'worker_type', assuming local") - - if "yara_rules_dir" in the_config and the_config["yara_rules_dir"].strip() != "": - check = os.path.abspath(os.path.expanduser(placehold(the_config["yara_rules_dir"]))) - if os.path.exists(check): - if os.path.isdir(check): - globals.g_yara_rules_dir = check - else: - raise CbInvalidConfig( - f"{header} specified 'yara_rules_dir' ({check}) is not a directory" - ) - else: - raise CbInvalidConfig( - f"{header} specified 'yara_rules_dir' ({check}) does not exist" - ) + elif value == "remote": + globals.g_remote = True else: - raise CbInvalidConfig(f"{header} has no 'yara_rules_dir' definition") + raise CbInvalidConfig(f"{self.source} has an invalid 'worker_type' ({value})") + + globals.g_yara_rules_dir = self._as_path("yara_rules_dir", required=True, exists=True, is_dir=True) # local/remote configuration data if not globals.g_remote: - if "cb_server_url" in the_config and the_config["cb_server_url"].strip() != "": - globals.g_cb_server_url = the_config["cb_server_url"] - else: - raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_url'") - if ( - "cb_server_token" in the_config - and the_config["cb_server_token"].strip() != "" - ): - globals.g_cb_server_token = the_config["cb_server_token"] - else: - raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_token'") + globals.g_cb_server_url = self._as_str("cb_server_url", required=True) + globals.g_cb_server_token = self._as_str("cb_server_token", required=True) else: - if "broker_url" in the_config and the_config["broker_url"].strip() != "": - app.conf.update( - broker_url=the_config["broker_url"], - result_backend=the_config["broker_url"], - ) - else: - raise CbInvalidConfig(f"{header} is 'remote' and missing 'broker_url'") + value = self._as_str("broker_url", required=True) + app.conf.update(broker_url=value, result_backend=value) + + def _extended_check(self) -> None: + """ + Validate entries used by the main process. - # done with minimal task worker validation - if output_file is None: - return + :raises CbInvalidConfig: + :raises ValueError: + """ # TODO: validate url & token with test call (if local) # TODO: validate broker with test call (if remote) - # NOTE: postgres_host has a default value in globals; use and warn if not defined - if "postgres_host" in the_config and the_config["postgres_host"].strip() != "": - globals.g_postgres_host = the_config["postgres_host"] - else: - logger.warning( - f"{header} has no defined 'postgres_host'; using default of '{globals.g_postgres_host}'" - ) - - # NOTE: postgres_username has a default value in globals; use and warn if not defined - if "postgres_username" in the_config and the_config["postgres_username"].strip() != "": - globals.g_postgres_username = the_config["postgres_username"] - else: - logger.warning( - f"{header} has no defined 'postgres_username'; using default of '{globals.g_postgres_username}'") - - if "postgres_password" in the_config and the_config["postgres_password"].strip() != "": - globals.g_postgres_password = the_config["postgres_password"] - else: - raise CbInvalidConfig(f"{header} has no 'postgres_password' defined") + globals.g_postgres_host = self._as_str("postgres_host", default=globals.g_postgres_host) + globals.g_postgres_username = self._as_str("postgres_username", default=globals.g_postgres_username) + globals.g_postgres_password = self._as_str("postgres_password", required=True) + globals.g_postgres_db = self._as_str("postgres_db", default=globals.g_postgres_username) + globals.g_postgres_port = self._as_int("postgres_port", default=globals.g_postgres_port) - # NOTE: postgres_db has a default value in globals; use and warn if not defined - if "postgres_db" in the_config and the_config["postgres_db"].strip() != "": - globals.g_postgres_db = the_config["postgres_db"] - else: - logger.warning(f"{header} has no defined 'postgres_db'; using default of '{globals.g_postgres_db}'") + # TODO: validate postgres connection with supplied information? - # NOTE: postgres_port has a default value in globals; use and warn if not defined - if "postgres_port" in the_config: - globals.g_postgres_port = int(the_config["postgres_port"]) + value = self._as_int("niceness") + if value: + os.nice(value) + + globals.g_max_hashes = self._as_int("concurrent_hashes", default=globals.g_max_hashes) + globals.g_disable_rescan = self._as_bool("disable_rescan", default=globals.g_disable_rescan) + globals.g_num_days_binaries = self._as_int("num_days_binaries", default=globals.g_num_days_binaries, + min_value=1) + + globals.g_vacuum_seconds = self._as_int("vacuum_seconds", default=globals.g_vacuum_seconds, min_value=0) + if globals.g_vacuum_seconds > 0: + globals.g_vacuum_script = self._as_path("vacuum_script", required=True, is_dir=False, + default=globals.g_vacuum_script) + logger.warning((f"Vacuum Script '{globals.g_vacuum_script}' is enabled; ", + "use this advanced feature at your own discretion!")) else: - logger.warning(f"{header} has no defined 'postgres_port'; using default of '{globals.g_postgres_port}'") + if self._as_path("vacuum_script", required=False, default=globals.g_vacuum_script): + logger.debug(f"{self.source} has 'vacuum_script' defined, but it is disabled") - # TODO: validate postgres connection with supplied information? + globals.g_feed_database_dir = self._as_path("feed_database_dir", required=True, is_dir=True, + default=globals.g_feed_database_dir) - if "niceness" in the_config: - os.nice(int(the_config["niceness"])) + # ----- Type Handlers - if "concurrent_hashes" in the_config: - globals.g_max_hashes = int(the_config["concurrent_hashes"]) - logger.debug("Consurrent Hashes: {0}".format(globals.g_max_hashes)) + def _as_str(self, param: str, required: bool = False, default: str = None) -> Optional[str]: + """ + Get a string parameter from the configuration. - if "disable_rescan" in the_config: - globals.g_disable_rescan = bool(the_config["disable_rescan"]) - logger.debug("Disable Rescan: {0}".format(globals.g_disable_rescan)) + :param param: Name of the configuration parameter + :param required: True if this must be specified in the configuration + :param default: If not required, default value if not supplied + :return: the string value, or None/default if not required and no exception + :raises CbInvalidConfig: + """ + value = self.the_config.get(param, None) + if value is not None: + value = value.strip() + if (value is None or value == "") and default is not None: + value = default + logger.warning(f"{self.source} has no defined '{param}'; using default of '{default}'") + if required and (value is None or value == ""): + raise CbInvalidConfig(f"{self.source} has no '{param}' definition") + return value + + def _as_path(self, param: str, required: bool = False, exists: bool = True, is_dir: bool = False, + default: str = None) -> Optional[str]: + """ + Get an string parameter from the configuration and treat it as a path, performing normalization + to produce an absolute path. a "~" at the beginning will be treated as the current user's home + directory; the placeholder "{YARA}" will be treated as the location of your yara package directory. + + :param param: Name of the configuration parameter + :param required: True if this must be specified in the configuration + :param exists: if True and required, check for existance as well + :param is_dir: if exists and True, source must be a directory + :param default: If not required, default value if not supplied + :return: the integer value, or None if not required and no exception + :raises CbInvalidConfig: + """ + value = self._as_str(param, required, default=default) + if value is None: + return value + + value = os.path.abspath(os.path.expanduser(placehold(value))) + if exists: + if not os.path.exists(value): + raise CbInvalidConfig(f"{self.source} specified path parameter '{param}' ({value}) does not exist") + if is_dir: + if not os.path.isdir(value): + raise CbInvalidConfig(f"{self.source} specified path '{param}' ({value}) is not a directory") + else: + if os.path.isdir(value): + raise CbInvalidConfig(f"{self.source} specified path '{param}' ({value}) is a directory") - if "num_days_binaries" in the_config: - globals.g_num_days_binaries = max(int(the_config["num_days_binaries"]), 1) - logger.debug( - "Number of days for binaries: {0}".format(globals.g_num_days_binaries) - ) + return value - if "vacuum_seconds" in the_config: - globals.g_vacuum_seconds = max(int(the_config["vacuum_seconds"]), 0) + def _as_int(self, param: str, required: bool = False, default: int = None, min_value: int = -1) -> Optional[int]: + """ + Get an integer configuration parameter from the configuration. A parameter that cannot be converted + to an int will return a ValueError. + + :param param: Name of the configuration parameter + :param required: True if this must be specified in the configuration + :param default: If not required, default value if not supplied + :param min_value: minumum value allowed + :return: the integer value, or None/default if not required and no exception + :raises CbInvalidConfig: + :raises ValueError: + """ + value = self._as_str(param, required) + if (value is None or value == "") and default is not None: + logger.warning(f"{self.source} has no defined '{param}'; using default of '{default}'") + return default + else: + return None if (value is None or value == "") else max(int(value), min_value) - if "vacuum_script" in the_config and the_config["vacuum_script"].strip() != "": - check = os.path.abspath(os.path.expanduser(placehold(the_config["vacuum_script"]))) - else: - check = os.path.abspath(os.path.expanduser(placehold(globals.g_vacuum_script))) - - if globals.g_vacuum_seconds > 0: - if os.path.exists(check): - if os.path.isdir(check): - raise CbInvalidConfig(f"{header} specified 'vacuum_script' ({check}) is a directory") - else: - raise CbInvalidConfig(f"{header} specified 'vacuum_script' ({check}) does not exist") - globals.g_vacuum_script = check - logger.warning(f"Vacuum Script '{check}' is enabled; use this advanced feature at your own discretion!") - else: - logger.debug(f"{header} has 'vacuum_script' defined, but it is disabled") - - if "feed_database_dir" in the_config and the_config["feed_database_dir"].strip() != "": - check = os.path.abspath(os.path.expanduser(placehold(the_config["feed_database_dir"]))) - if os.path.exists(check): - if not os.path.isdir(check): - raise CbInvalidConfig(f"{header} specified 'feed_database_dir' ({check}) is not a directory") - else: - globals.g_feed_database_dir = check - else: - raise CbInvalidConfig(f"{header} specified 'feed_database_dir' ({check}) does not exist") + # noinspection PySameParameterValue + def _as_bool(self, param: str, required: bool = False, default: bool = None) -> Optional[bool]: + """ + Get a boolean configuration parameter from the configuration. A parameter not one of + ["true", "yes", "false", "no"] will return a ValueError. + + :param param: Name of the configuration parameter + :param required: True if this must be specified in the configuration + :return: the boolean value, or None if not required and no exception + :raises CbInvalidConfig: + :raises ValueError: + """ + value = self._as_str(param, required) + if value is not None and value.lower() not in ["true", "yes", "false", "no", ""]: + raise ValueError(f"{self.source} parameter '{param}' is not a valid boolean value") + if value is None and default is not None: + logger.warning(f"{self.source} has no defined '{param}'; using default of '{default}'") + return default else: - # we assume the default is correct, sanitize - globals.g_feed_database_dir = os.path.abspath(os.path.expanduser(placehold(globals.g_feed_database_dir))) + return value if value is None else value.lower() in ["true", "yes"] diff --git a/globals.py b/globals.py index a79870b..66827ad 100644 --- a/globals.py +++ b/globals.py @@ -1,5 +1,7 @@ ################################################################################ # This module contains global variables used by a single instance. +# +# A placeholder of "{YARA}" represents the location of this yara package ################################################################################ # noinspection PyUnusedName @@ -16,7 +18,7 @@ # noinspection PyUnusedName g_broker_url = '' -g_yara_rules_dir = './yara_rules' +g_yara_rules_dir = '{YARA}/yara_rules' g_yara_rule_map = {} g_yara_rule_map_hash_list = [] @@ -36,6 +38,6 @@ g_num_days_binaries = 365 g_vacuum_seconds = -1 -g_vacuum_script = './scripts/vacuumscript.sh' +g_vacuum_script = '{YARA}/scripts/vacuumscript.sh' g_feed_database_dir = "./" diff --git a/samples/sample_local.conf b/samples/sample_local.conf index 96d50bf..d078fea 100644 --- a/samples/sample_local.conf +++ b/samples/sample_local.conf @@ -14,7 +14,7 @@ cb_server_token= ; ; path to directory containing yara rules ; -yara_rules_dir={HERE}/yara_rules +yara_rules_dir={YARA}/yara_rules ; ; Cb Response postgres Database settings; insert posgres password as used in cb.conf for `postgres_password` diff --git a/samples/sample_remote.conf b/samples/sample_remote.conf index 8ca8af7..65ede2c 100644 --- a/samples/sample_remote.conf +++ b/samples/sample_remote.conf @@ -14,7 +14,7 @@ broker_url=redis://127.0.0.1 ; ; path to directory containing yara rules ; -yara_rules_dir={HERE}/yara_rules +yara_rules_dir={YARA}/yara_rules ; ; Cb Response postgres Database settings; insert posgres password as used in cb.conf for `postgres_password` diff --git a/test/config/bogus_feed_database_dir.conf b/test/config/bogus_feed_database_dir.conf index dafef3d..1c69eae 100644 --- a/test/config/bogus_feed_database_dir.conf +++ b/test/config/bogus_feed_database_dir.conf @@ -19,4 +19,4 @@ disable_rescan=False num_days_binaries=365 ; file, not dir -feed_database_dir={HERE}/README.md +feed_database_dir={YARA}/README.md diff --git a/test/config/missing_feed_database_dir.conf b/test/config/missing_feed_database_dir.conf index c1921f6..9969c52 100644 --- a/test/config/missing_feed_database_dir.conf +++ b/test/config/missing_feed_database_dir.conf @@ -18,4 +18,4 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -feed_database_dir={HERE}/no-such-directory +feed_database_dir={YARA}/no-such-directory diff --git a/test/config/vacuum_script_dir.conf b/test/config/vacuum_script_dir.conf index c72ac9d..edd47ba 100644 --- a/test/config/vacuum_script_dir.conf +++ b/test/config/vacuum_script_dir.conf @@ -19,4 +19,4 @@ disable_rescan=False num_days_binaries=365 vacuum_seconds=3600 -vacuum_script=./rules +vacuum_script={YARA}/test/rules diff --git a/test/config/vacuum_script_enabled.conf b/test/config/vacuum_script_enabled.conf index 6b914c8..cd7c967 100644 --- a/test/config/vacuum_script_enabled.conf +++ b/test/config/vacuum_script_enabled.conf @@ -19,4 +19,4 @@ disable_rescan=False num_days_binaries=365 vacuum_seconds=3600 -vacuum_script={HERE}/scripts/vacuumscript.sh +vacuum_script={YARA}/scripts/vacuumscript.sh diff --git a/test/test_configInit.py b/test/test_configInit.py index e267350..94c5700 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -82,12 +82,12 @@ def test_05a_config_local_worker_missing_server_url(self): # not defined in file with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(os.path.join(TESTS, "config", "local_worker_no_server_url.conf"), "sample.json") - assert "is 'local' and missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) + assert "has no 'cb_server_url' definition" in "{0}".format(err.exception.args[0]) # defined as "cb_server_url=" with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(os.path.join(TESTS, "config", "local_worker_no_server_url2.conf"), "sample.json") - assert "is 'local' and missing 'cb_server_url'" in "{0}".format(err.exception.args[0]) + assert "has no 'cb_server_url' definition" in "{0}".format(err.exception.args[0]) def test_05b_config_local_worker_missing_server_token(self): """ @@ -96,12 +96,12 @@ def test_05b_config_local_worker_missing_server_token(self): # not defined in file with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(os.path.join(TESTS, "config", "local_worker_no_server_token.conf"), "sample.json") - assert "is 'local' and missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) + assert "has no 'cb_server_token' definition" in "{0}".format(err.exception.args[0]) # defined as "cb_server_token=" with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(os.path.join(TESTS, "config", "local_worker_no_server_token2.conf"), "sample.json") - assert "is 'local' and missing 'cb_server_token'" in "{0}".format(err.exception.args[0]) + assert "has no 'cb_server_token' definition" in "{0}".format(err.exception.args[0]) def test_06_config_remote_worker_missing_broker_url(self): """ @@ -110,12 +110,12 @@ def test_06_config_remote_worker_missing_broker_url(self): # not defined in file with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(os.path.join(TESTS, "config", "remote_worker_no_broker_url.conf"), "sample.json") - assert "is 'remote' and missing 'broker_url'" in "{0}".format(err.exception.args[0]) + assert "has no 'broker_url' definition" in "{0}".format(err.exception.args[0]) # defined as "broker_url=" with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(os.path.join(TESTS, "config", "remote_worker_no_broker_url2.conf"), "sample.json") - assert "is 'remote' and missing 'broker_url'" in "{0}".format(err.exception.args[0]) + assert "has no 'broker_url' definition" in "{0}".format(err.exception.args[0]) def test_07a_config_missing_yara_rules_dir(self): """ @@ -186,12 +186,12 @@ def test_10a_config_missing_postgres_password(self): # undefined with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_password.conf"), "sample.json") - assert "has no 'postgres_password' defined" in "{0}".format(err.exception.args[0]) + assert "has no 'postgres_password' definition" in "{0}".format(err.exception.args[0]) # defined as "postgres_password=" with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_password2.conf"), "sample.json") - assert "has no 'postgres_password' defined" in "{0}".format(err.exception.args[0]) + assert "has no 'postgres_password' definition" in "{0}".format(err.exception.args[0]) # TODO: test_10a_config_invalid_postgres_password @@ -222,9 +222,8 @@ def test_12a_config_missing_postgres_port(self): self.assertEqual(check, globals.g_postgres_port) # defined as "postgres_port=" - with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_port2.conf"), "sample.json") - assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_port2.conf"), "sample.json") + self.assertEqual(check, globals.g_postgres_port) def test_12b_config_bogus_postgres_port(self): """ @@ -238,12 +237,10 @@ def test_12b_config_bogus_postgres_port(self): def test_13a_config_missing_niceness(self): """ - Ensure that config with missing niceness is detected. + Ensure that config with missing niceness is not a problem. """ # defined as "niceness=" - with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "missing_niceness.conf"), "sample.json") - assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + ConfigurationInit(os.path.join(TESTS, "config", "missing_niceness.conf"), "sample.json") def test_13b_config_bogus_niceness(self): """ @@ -255,12 +252,13 @@ def test_13b_config_bogus_niceness(self): def test_14a_config_missing_concurrent_hashes(self): """ - Ensure that config with missing concurrent_hashes is detected. + Ensure that config with missing concurrent_hashes uses default. """ + check = globals.g_max_hashes + # defined as "concurrent_hashes=" - with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "missing_concurrent_hashes.conf"), "sample.json") - assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + ConfigurationInit(os.path.join(TESTS, "config", "missing_concurrent_hashes.conf"), "sample.json") + self.assertEqual(check, globals.g_max_hashes) def test_14b_config_bogus_concurrent_hashes(self): """ @@ -286,17 +284,20 @@ def test_15b_config_bogus_disable_rescan(self): """ globals.g_disable_rescan = None - ConfigurationInit(os.path.join(TESTS, "config", "bogus_disable_rescan.conf"), "sample.json") - self.assertTrue(globals.g_disable_rescan) + # Not true, false, yes, no + with self.assertRaises(ValueError) as err: + ConfigurationInit(os.path.join(TESTS, "config", "bogus_disable_rescan.conf"), "sample.json") + assert "is not a valid boolean value" in "{0}".format(err.exception.args[0]) def test_16a_config_missing_num_days_binaries(self): """ - Ensure that config with missing num_days_binaries is detected. + Ensure that config with missing num_days_binaries reverts to default """ + check = globals.g_num_days_binaries + # defined as "num_days_binaries=" - with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "missing_num_days_binaries.conf"), "sample.json") - assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + ConfigurationInit(os.path.join(TESTS, "config", "missing_num_days_binaries.conf"), "sample.json") + self.assertEqual(check, globals.g_num_days_binaries) def test_16b_config_bogus_num_days_binaries(self): """ diff --git a/utilities.py b/utilities.py index 92788fb..dba95be 100644 --- a/utilities.py +++ b/utilities.py @@ -4,10 +4,10 @@ import os -__all__ = ["HERE", "placehold"] +__all__ = ["YARAHOME", "placehold"] # self location for the package -HERE = os.path.dirname(__file__) +YARAHOME = os.path.dirname(__file__) def placehold(source: str) -> str: @@ -16,5 +16,5 @@ def placehold(source: str) -> str: :param source: source string to convert :return: converted string """ - source = source.replace("{HERE}", HERE) + source = source.replace("{YARA}", YARAHOME) return source diff --git a/yara_rules/README.md b/yara_rules/README.md index 3f4a2c9..cbc04e9 100644 --- a/yara_rules/README.md +++ b/yara_rules/README.md @@ -2,7 +2,7 @@ This folder can be used a convenient location to locate your Yara rules. It can be defined in your configuration file as: ```ini -yara_rules_dir={HERE}/yara_rules +yara_rules_dir={YARA}/yara_rules ``` It is suggested that subfolders be used to organize any complex and differing rule sets. From 3722357308bc8043d4307ac5ff7c8332433810a0 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 29 Oct 2019 09:13:20 -0400 Subject: [PATCH 088/257] * added copyright * unit test cleanup; added setUp to revert globals --- analysis_result.py | 22 +++++++++++----------- binary_database.py | 3 +++ config_handling.py | 13 +++++++------ exceptions.py | 21 ++++++++++++++++++--- feed.py | 3 +++ globals.py | 3 +++ main.py | 3 +++ singleton.py | 19 ++++--------------- tasks.py | 3 +++ test/test_cbFeed.py | 3 +++ test/test_cbFeedInfo.py | 3 +++ test/test_cbReport.py | 3 +++ test/test_configInit.py | 26 +++++++++++++++++++++++++- test/test_main.py | 2 +- test/test_singleInstance.py | 6 +++++- test/test_tasks.py | 2 +- utilities.py | 5 ++++- 17 files changed, 100 insertions(+), 40 deletions(-) diff --git a/analysis_result.py b/analysis_result.py index 644b043..dc63a94 100644 --- a/analysis_result.py +++ b/analysis_result.py @@ -1,15 +1,15 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + + class AnalysisResult(object): - def __init__(self, - md5, - score=0, - short_result='', - long_result='', - last_scan_date=None, - last_error_msg='', - last_error_date=None, - stop_future_scans=False, - binary_not_available=False, - misc=''): + """ + This class holds binary analysis results information. + """ + + def __init__(self, md5: str, score: int = 0, short_result: str = '', long_result: str = '', last_scan_date=None, + last_error_msg: str = '', last_error_date=None, stop_future_scans: bool = False, + binary_not_available: bool = False, misc: str = ''): self.md5 = md5 self.short_result = short_result self.long_result = long_result diff --git a/binary_database.py b/binary_database.py index 2ea8a17..ad30611 100644 --- a/binary_database.py +++ b/binary_database.py @@ -1,3 +1,6 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + import logging from peewee import * diff --git a/config_handling.py b/config_handling.py index 84a7a50..8935982 100644 --- a/config_handling.py +++ b/config_handling.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright © 2018-2019 VMware, Inc. All Rights Reserved. +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. import configparser import logging @@ -124,8 +124,8 @@ def _extended_check(self) -> None: if globals.g_vacuum_seconds > 0: globals.g_vacuum_script = self._as_path("vacuum_script", required=True, is_dir=False, default=globals.g_vacuum_script) - logger.warning((f"Vacuum Script '{globals.g_vacuum_script}' is enabled; ", - "use this advanced feature at your own discretion!")) + logger.warning(f"Vacuum Script '{globals.g_vacuum_script}' is enabled; " + + "use this advanced feature at your own discretion!") else: if self._as_path("vacuum_script", required=False, default=globals.g_vacuum_script): logger.debug(f"{self.source} has 'vacuum_script' defined, but it is disabled") @@ -201,9 +201,10 @@ def _as_int(self, param: str, required: bool = False, default: int = None, min_v :raises ValueError: """ value = self._as_str(param, required) - if (value is None or value == "") and default is not None: - logger.warning(f"{self.source} has no defined '{param}'; using default of '{default}'") - return default + use_default = default if default is None else max(default, min_value) + if (value is None or value == "") and use_default is not None: + logger.warning(f"{self.source} has no defined '{param}'; using default of '{use_default}'") + return use_default else: return None if (value is None or value == "") else max(int(value), min_value) diff --git a/exceptions.py b/exceptions.py index 8bc53a9..dd4011f 100644 --- a/exceptions.py +++ b/exceptions.py @@ -1,6 +1,5 @@ -################################################################################ -# Exception Classes -################################################################################ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. class CbException(Exception): @@ -18,12 +17,28 @@ class CbInvalidConfig(CbException): class CbIconError(CbException): + """ + Exception raised if supplied icon is bad. + """ pass class CbInvalidFeed(CbException): + """ + Excepion raised if supplied Feed data is invalid. + """ pass class CbInvalidReport(CbException): + """ + Excepion raised if supplied Report data is invalid. + """ + pass + + +class SingleInstanceException(BaseException): + """ + Excepion raised if you attempt to run more than one instance.. + """ pass diff --git a/feed.py b/feed.py index d01eb40..ad88142 100644 --- a/feed.py +++ b/feed.py @@ -1,3 +1,6 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + import base64 import binascii import json diff --git a/globals.py b/globals.py index 66827ad..21730e6 100644 --- a/globals.py +++ b/globals.py @@ -1,3 +1,6 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + ################################################################################ # This module contains global variables used by a single instance. # diff --git a/main.py b/main.py index 9113173..bb2f461 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,6 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + import argparse import hashlib import json diff --git a/singleton.py b/singleton.py index 67a0c18..07d2c25 100644 --- a/singleton.py +++ b/singleton.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. import fcntl import logging @@ -6,21 +7,9 @@ import sys import tempfile -logger = logging.getLogger(__name__) - - -################################################################################ -# Exception Classes -################################################################################ - +from exceptions import SingleInstanceException -class SingleInstanceException(BaseException): - pass - - -################################################################################ -# Primary Classes -################################################################################ +logger = logging.getLogger(__name__) class SingleInstance(object): diff --git a/tasks.py b/tasks.py index 72a08ee..a0f7618 100644 --- a/tasks.py +++ b/tasks.py @@ -1,3 +1,6 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + import datetime import hashlib import logging diff --git a/test/test_cbFeed.py b/test/test_cbFeed.py index 8ba980b..5c9d1bc 100644 --- a/test/test_cbFeed.py +++ b/test/test_cbFeed.py @@ -1,3 +1,6 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + from unittest import TestCase from feed import CbFeed, CbInvalidFeed diff --git a/test/test_cbFeedInfo.py b/test/test_cbFeedInfo.py index 9b3e261..40a11e1 100644 --- a/test/test_cbFeedInfo.py +++ b/test/test_cbFeedInfo.py @@ -1,3 +1,6 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + from unittest import TestCase from feed import CbFeedInfo, CbIconError, CbInvalidFeed diff --git a/test/test_cbReport.py b/test/test_cbReport.py index 2851ef3..9e3a7db 100644 --- a/test/test_cbReport.py +++ b/test/test_cbReport.py @@ -1,3 +1,6 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + import time from unittest import TestCase diff --git a/test/test_configInit.py b/test/test_configInit.py index 94c5700..5d06e09 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright © 2018-2019 VMware, Inc. All Rights Reserved. +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. import os from unittest import TestCase @@ -13,6 +13,30 @@ class TestConfigurationInit(TestCase): + def setUp(self): + globals.g_config = {} + globals.g_output_file = './yara_feed.json' + globals.g_remote = False + globals.g_cb_server_url = 'https://127.0.0.1' + globals.g_cb_server_token = '' + globals.g_broker_url = '' + globals.g_yara_rules_dir = '{YARA}/yara_rules' + globals.g_yara_rule_map = {} + globals.g_yara_rule_map_hash_list = [] + globals.g_postgres_host = '127.0.0.1' + globals.g_postgres_username = 'cb' + globals.g_postgres_password = '' + globals.g_postgres_port = 5002 + globals.g_postgres_db = 'cb' + globals.g_max_hashes = 8 + globals.g_num_binaries_not_available = 0 + globals.g_num_binaries_analyzed = 0 + globals.g_disable_rescan = True + globals.g_num_days_binaries = 365 + globals.g_vacuum_seconds = -1 + globals.g_vacuum_script = '{YARA}/scripts/vacuumscript.sh' + globals.g_feed_database_dir = "./" + def test_01_missing_config(self): """ Ensure a missing config file is detected. diff --git a/test/test_main.py b/test/test_main.py index 1440d63..064099c 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright © 2018-2019 VMware, Inc. All Rights Reserved. +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. import os from unittest import TestCase diff --git a/test/test_singleInstance.py b/test/test_singleInstance.py index cc0969c..9c719c8 100644 --- a/test/test_singleInstance.py +++ b/test/test_singleInstance.py @@ -1,10 +1,14 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + import logging import os import sys from multiprocessing import Process from unittest import TestCase -from singleton import SingleInstance, SingleInstanceException +from exceptions import SingleInstanceException +from singleton import SingleInstance logger = logging.getLogger(__name__) diff --git a/test/test_tasks.py b/test/test_tasks.py index b3fe9f2..f141d44 100644 --- a/test/test_tasks.py +++ b/test/test_tasks.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright © 2018-2019 VMware, Inc. All Rights Reserved. +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. import os from unittest import TestCase diff --git a/utilities.py b/utilities.py index dba95be..d64f60f 100644 --- a/utilities.py +++ b/utilities.py @@ -1,3 +1,6 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + ################################################################################ # This file contains various package-wide utility functions ################################################################################ @@ -6,7 +9,7 @@ __all__ = ["YARAHOME", "placehold"] -# self location for the package +# self location for the package; remember to update this if this file is moved! YARAHOME = os.path.dirname(__file__) From 24396e825d62284b02f9d8f7f8d39049b1c8d725 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 30 Oct 2019 07:55:57 -0400 Subject: [PATCH 089/257] * added local dump folder for local use --- local/README.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 local/README.md diff --git a/local/README.md b/local/README.md new file mode 100644 index 0000000..29d571d --- /dev/null +++ b/local/README.md @@ -0,0 +1,7 @@ +# Yara Package Local Storage +This folder is here for storing any locally created artifacts. It is marked with `.gitignore` +so that any files here are not pushed to the remote repository. + +If you wish to use this folder in any configuration files, start your path with +`{YARA}/local/` -- it is suggested that you create subfolders to organize your usage +of this location. From d78a29a6d6ea016f01c164966d2cdbf285704bec Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 30 Oct 2019 08:21:49 -0400 Subject: [PATCH 090/257] * doc cleanup, prior to .gitignore lockdown --- .gitignore | 6 +++--- README.md | 15 +++------------ globals.py | 4 ++-- local/README.md | 6 +++--- {yara_rules => local/yara_rules}/README.md | 4 ++-- main.py | 4 +--- samples/sample_local.conf | 2 +- samples/sample_remote.conf | 2 +- test/test_configInit.py | 4 ++-- 9 files changed, 18 insertions(+), 29 deletions(-) rename {yara_rules => local/yara_rules}/README.md (79%) diff --git a/.gitignore b/.gitignore index 5f47e59..77feac6 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,6 @@ config.ini # created local DB binary.db -# local rules -yara_rules/*.yara -yara_rules/*.yar +# local storage +#local.yara_rules/*.yara +#local.yara_rules/*.yar diff --git a/README.md b/README.md index 7a3f7bd..66fdc22 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,6 @@ # Installing Yara Agent (Centos/RHEL 6) The Yara agent must be installed on the same system as Cb Response. - -* Create directories - - ``` - mkdir -p /usr/share/cb/integrations/yara/yara_rules - ``` * Download Yara Agent @@ -20,10 +14,7 @@ The Yara agent must be installed on the same system as Cb Response. wget -O /usr/share/cb/integrations/yara/yara-logo.png https://github.com/carbonblack/cb-yara-connector/releases/download/2.0.1/yara-logo.png ``` -* Create Yara Agent Config File - - -#### Sample Yara Agent Config +## Create Yara Agent Config Copy and modify either `sample_local.conf` or `sample_remote.conf` from the `samples` folder to your desired location. @@ -40,6 +31,7 @@ to your desired location. ``` #### Example Cron Entry +_[TBD]_ # Remote Worker Installation (Centos/RHEL 7) @@ -89,7 +81,6 @@ to your desired location. source ./venv/bin/activate pip install -r requirements.txt deactivate - mkdir yara_rules ``` @@ -116,7 +107,7 @@ to your desired location. ; Directory for temporary yara rules storage ; WARNING: Put your yara rules with the yara agent. This is just temporary storage. ; - yara_rules_dir=yara_rules + yara_rules_dir={YARA}/local/yara_rules * Copy, modify and save to `yara_worker.conf` diff --git a/globals.py b/globals.py index 21730e6..d17aabd 100644 --- a/globals.py +++ b/globals.py @@ -21,7 +21,7 @@ # noinspection PyUnusedName g_broker_url = '' -g_yara_rules_dir = '{YARA}/yara_rules' +g_yara_rules_dir = '{YARA}/local/yara_rules' g_yara_rule_map = {} g_yara_rule_map_hash_list = [] @@ -43,4 +43,4 @@ g_vacuum_seconds = -1 g_vacuum_script = '{YARA}/scripts/vacuumscript.sh' -g_feed_database_dir = "./" +g_feed_database_dir = "{YARA}/local" diff --git a/local/README.md b/local/README.md index 29d571d..45d2f49 100644 --- a/local/README.md +++ b/local/README.md @@ -2,6 +2,6 @@ This folder is here for storing any locally created artifacts. It is marked with `.gitignore` so that any files here are not pushed to the remote repository. -If you wish to use this folder in any configuration files, start your path with -`{YARA}/local/` -- it is suggested that you create subfolders to organize your usage -of this location. +Enter `{YARA}/local` for any configuration path that that you wish to use this location, +using subfolders for clearer organization. One subfolder, `yara_rules` is already provided +for local rule files. diff --git a/yara_rules/README.md b/local/yara_rules/README.md similarity index 79% rename from yara_rules/README.md rename to local/yara_rules/README.md index cbc04e9..2c16b18 100644 --- a/yara_rules/README.md +++ b/local/yara_rules/README.md @@ -1,8 +1,8 @@ -# yara_rules +# local.yara_rules This folder can be used a convenient location to locate your Yara rules. It can be defined in your configuration file as: ```ini -yara_rules_dir={YARA}/yara_rules +yara_rules_dir={YARA}/local/yara_rules ``` It is suggested that subfolders be used to organize any complex and differing rule sets. diff --git a/main.py b/main.py index bb2f461..4c5b0e9 100644 --- a/main.py +++ b/main.py @@ -447,9 +447,7 @@ def main(): try: globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database = SqliteDatabase( - os.path.join(globals.g_feed_database_dir, "binary.db") - ) + database = SqliteDatabase(os.path.join(globals.g_feed_database_dir, "binary.db")) db.initialize(database) db.connect() db.create_tables([BinaryDetonationResult]) diff --git a/samples/sample_local.conf b/samples/sample_local.conf index d078fea..e1bfbca 100644 --- a/samples/sample_local.conf +++ b/samples/sample_local.conf @@ -14,7 +14,7 @@ cb_server_token= ; ; path to directory containing yara rules ; -yara_rules_dir={YARA}/yara_rules +yara_rules_dir={YARA}/local/yara_rules ; ; Cb Response postgres Database settings; insert posgres password as used in cb.conf for `postgres_password` diff --git a/samples/sample_remote.conf b/samples/sample_remote.conf index 65ede2c..226a2be 100644 --- a/samples/sample_remote.conf +++ b/samples/sample_remote.conf @@ -14,7 +14,7 @@ broker_url=redis://127.0.0.1 ; ; path to directory containing yara rules ; -yara_rules_dir={YARA}/yara_rules +yara_rules_dir={YARA}/local/yara_rules ; ; Cb Response postgres Database settings; insert posgres password as used in cb.conf for `postgres_password` diff --git a/test/test_configInit.py b/test/test_configInit.py index 5d06e09..c2c9c0d 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -20,7 +20,7 @@ def setUp(self): globals.g_cb_server_url = 'https://127.0.0.1' globals.g_cb_server_token = '' globals.g_broker_url = '' - globals.g_yara_rules_dir = '{YARA}/yara_rules' + globals.g_yara_rules_dir = '{YARA}/local/yara_rules' globals.g_yara_rule_map = {} globals.g_yara_rule_map_hash_list = [] globals.g_postgres_host = '127.0.0.1' @@ -35,7 +35,7 @@ def setUp(self): globals.g_num_days_binaries = 365 globals.g_vacuum_seconds = -1 globals.g_vacuum_script = '{YARA}/scripts/vacuumscript.sh' - globals.g_feed_database_dir = "./" + globals.g_feed_database_dir = "{YARA}/local" def test_01_missing_config(self): """ From 674b93de6524d2b8e972cf379c4fa945feb2d6be Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 30 Oct 2019 08:57:53 -0400 Subject: [PATCH 091/257] Code cleanup --- .gitignore | 5 +- main.py | 134 ++++++++++++++++++++++++++--------------------------- 2 files changed, 68 insertions(+), 71 deletions(-) diff --git a/.gitignore b/.gitignore index 77feac6..8b11bd0 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,5 @@ config.ini # created local DB binary.db -# local storage -#local.yara_rules/*.yara -#local.yara_rules/*.yar +# local storage not to be pushed up +local diff --git a/main.py b/main.py index 4c5b0e9..38aab42 100644 --- a/main.py +++ b/main.py @@ -97,13 +97,11 @@ def generate_yara_rule_map_hash(yara_rule_path: str) -> None: md5.update(data) temp_list.append(str(md5.hexdigest())) - # FUTURE: Would this be better served as a map keyed by md5, with the value being the rule text, as for the - # following method? globals.g_yara_rule_map_hash_list = temp_list globals.g_yara_rule_map_hash_list.sort() -def generate_rule_map_remote(yara_rule_path) -> None: +def generate_rule_map_remote(yara_rule_path: str) -> None: """ Get remote rules and store into an internal map keyed by file name. :param yara_rule_path: path to wheer thr rules are stored @@ -176,10 +174,7 @@ def save_results(analysis_results: List[AnalysisResult]) -> None: """ Save the current analysis results. - TODO: figure out typing! - - :param analysis_results: - :return: + :param analysis_results: list of AnalysisResult objects """ for analysis_result in analysis_results: if analysis_result.binary_not_available: @@ -206,6 +201,10 @@ def save_results(analysis_results: List[AnalysisResult]) -> None: def get_database_conn(): + """ + Generate a database connection. + :return: + """ logger.info("Connecting to Postgres database...") conn = psycopg2.connect( host=globals.g_postgres_host, @@ -218,7 +217,13 @@ def get_database_conn(): return conn -def get_cursor(conn, start_date_binaries): +def get_cursor(conn, start_date_binaries: datetime): + """ + Get a query qursor into the database. + :param conn: database connection + :param start_date_binaries: Timestamp representing the earliest time to check for binaries + :return: cursor pointing to the query results + """ cur = conn.cursor(name="yara_agent") # noinspection SqlDialectInspection,SqlNoDataSourceInspection @@ -230,8 +235,11 @@ def get_cursor(conn, start_date_binaries): return cur -def execute_script(): - logger.warning("!!!Executing vacuum script!!!") +def execute_script() -> None: + """ + Execute a external maintenence script (vacuum script). + """ + logger.info("!!!Executing vacuum script!!!") target = os.path.join(os.getcwd(), globals.g_vacuum_script) @@ -241,10 +249,14 @@ def execute_script(): logger.error(stderr) if prog.returncode: logger.warning("program returned error code {0}".format(prog.returncode)) - logger.warning("!!!Done Executing vacuum script!!!") + logger.info("!!!Done Executing vacuum script!!!") -def perform(yara_rule_dir): +def perform(yara_rule_dir: str) -> None: + """ + Perform a yara search. + :param yara_rule_dir: location of the rules directory + """ if globals.g_remote: logger.info("Uploading yara rules to workers...") generate_rule_map_remote(yara_rule_dir) @@ -256,22 +268,18 @@ def perform(yara_rule_dir): start_time = time.time() + # Determine our binaries window (date forward) start_datetime = datetime.now() - - conn = get_database_conn() - start_date_binaries = start_datetime - timedelta(days=globals.g_num_days_binaries) + # make the connection, get the info, get out + conn = get_database_conn() cur = get_cursor(conn, start_date_binaries) - rows = cur.fetchall() - conn.commit() - conn.close() logger.info(f"Enumerating modulestore...found {len(rows)} resident binaries") - for row in rows: seconds_since_start = (datetime.now() - start_datetime).seconds if seconds_since_start >= globals.g_vacuum_seconds > 0: @@ -288,23 +296,27 @@ def perform(yara_rule_dir): else: num_binaries_skipped += 1 + # if we hit our hash chunking limit, save and reset if len(md5_hashes) >= globals.g_max_hashes: - _analyze_save_and_log( - md5_hashes, start_time, num_binaries_skipped, num_total_binaries - ) + _analyze_save_and_log(md5_hashes, start_time, num_binaries_skipped, num_total_binaries) md5_hashes = [] - _analyze_save_and_log( - md5_hashes, start_time, num_binaries_skipped, num_total_binaries - ) + # any finishup work + if len(md5_hashes) > 0: + _analyze_save_and_log(md5_hashes, start_time, num_binaries_skipped, num_total_binaries) generate_feed_from_db() -def _check_hash_against_feed(md5_hash): - query = BinaryDetonationResult.select().where( - BinaryDetonationResult.md5 == md5_hash - ) +def _check_hash_against_feed(md5_hash: str) -> bool: + """ + Check if the found hash matches our feed criteria. + + :param md5_hash: + :return: True if the binary does not exist + """ + query = BinaryDetonationResult.select().where(BinaryDetonationResult.md5 == md5_hash) + if query.exists(): try: bdr = BinaryDetonationResult.get(BinaryDetonationResult.md5 == md5_hash) @@ -312,29 +324,30 @@ def _check_hash_against_feed(md5_hash): if globals.g_disable_rescan and bdr.misc: return False + # If it is the same then we don't need to scan again if scanned_hash_list == globals.g_yara_rule_map_hash_list: - # - # If it is the same then we don't need to scan again - # return False - except Exception as e: - logger.error( - "Unable to decode yara rule map hash from database: {0}".format(e) - ) + except Exception as err: + logger.error(f"Unable to decode yara rule map hash from database: {err}") return False return True -def _analyze_save_and_log(hashes, start_time, num_binaries_skipped, num_total_binaries): +def _analyze_save_and_log(hashes: List[str], start_time: float, num_binaries_skipped: int, + num_total_binaries: int) -> None: + """ + Analyise and save any found binaries. + + :param hashes: List of hashes + :param start_time: start time of the operation (python time) + :param num_binaries_skipped: numb er of binaries skipped for any reason + :param num_total_binaries: numb er of binaries seen + """ analysis_results = analyze_binaries(hashes, local=(not globals.g_remote)) if analysis_results: for analysis_result in analysis_results: - logger.debug( - ( - f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" - f" {analysis_result.long_result} {analysis_result.last_error_msg}" - ) - ) + logger.debug((f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" + f" {analysis_result.long_result} {analysis_result.last_error_msg}")) if analysis_result.last_error_msg: logger.error(analysis_result.last_error_msg) save_results(analysis_results) @@ -342,37 +355,22 @@ def _analyze_save_and_log(hashes, start_time, num_binaries_skipped, num_total_bi _rule_logging(start_time, num_binaries_skipped, num_total_binaries) -def _rule_logging( - start_time: float, num_binaries_skipped: int, num_total_binaries: int -) -> None: +def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binaries: int) -> None: """ Simple method to log yara work. :param start_time: start time for the work - :param num_binaries_skipped: - :param num_total_binaries: - :return: + :param num_binaries_skipped: numb er of binaries skipped for any reason + :param num_total_binaries: numb er of binaries seen """ elapsed_time = time.time() - start_time logger.info("elapsed time: {0}".format(humanfriendly.format_timespan(elapsed_time))) - logger.debug( - " number binaries scanned: {0}".format(globals.g_num_binaries_analyzed) - ) - logger.debug(" number binaries already scanned: {0}".format(num_binaries_skipped)) - logger.debug( - " number binaries unavailable: {0}".format( - globals.g_num_binaries_not_available - ) - ) - logger.info("total binaries from db: {0}".format(num_total_binaries)) - logger.debug( - " binaries per second: {0}:".format( - round(num_total_binaries / elapsed_time, 2) - ) - ) - logger.info( - "num binaries score greater than zero: {0}".format( - len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)) - ) + logger.debug(f" number binaries scanned: {globals.g_num_binaries_analyzed}") + logger.debug(f" number binaries already scanned: {num_binaries_skipped}") + logger.debug(f" number binaries unavailable: {globals.g_num_binaries_not_available}") + logger.info(f"total binaries from db: {num_total_binaries}") + logger.debug(" binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) + logger.info("num binaries score greater than zero: {0}".format( + len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0))) ) logger.info("") From a70a2539ac19de831898589aea0a780b5e83a18f Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 30 Oct 2019 09:43:24 -0400 Subject: [PATCH 092/257] More cleanup --- config_handling.py | 6 -- globals.py | 25 +++--- main.py | 204 ++++++++++++++++++++++++--------------------- 3 files changed, 121 insertions(+), 114 deletions(-) diff --git a/config_handling.py b/config_handling.py index 8935982..2d45368 100644 --- a/config_handling.py +++ b/config_handling.py @@ -99,18 +99,12 @@ def _extended_check(self) -> None: :raises CbInvalidConfig: :raises ValueError: """ - - # TODO: validate url & token with test call (if local) - # TODO: validate broker with test call (if remote) - globals.g_postgres_host = self._as_str("postgres_host", default=globals.g_postgres_host) globals.g_postgres_username = self._as_str("postgres_username", default=globals.g_postgres_username) globals.g_postgres_password = self._as_str("postgres_password", required=True) globals.g_postgres_db = self._as_str("postgres_db", default=globals.g_postgres_username) globals.g_postgres_port = self._as_int("postgres_port", default=globals.g_postgres_port) - # TODO: validate postgres connection with supplied information? - value = self._as_int("niceness") if value: os.nice(value) diff --git a/globals.py b/globals.py index d17aabd..6d2f1a4 100644 --- a/globals.py +++ b/globals.py @@ -7,40 +7,35 @@ # A placeholder of "{YARA}" represents the location of this yara package ################################################################################ -# noinspection PyUnusedName g_config = {} -g_output_file = './yara_feed.json' +g_output_file = "" g_remote = False # local info -g_cb_server_url = 'https://127.0.0.1' -g_cb_server_token = '' +g_cb_server_url = "https://127.0.0.1" +g_cb_server_token = "" # remote info -# noinspection PyUnusedName -g_broker_url = '' +g_broker_url = "" -g_yara_rules_dir = '{YARA}/local/yara_rules' +g_yara_rules_dir = "{YARA}/local/yara_rules" g_yara_rule_map = {} g_yara_rule_map_hash_list = [] -g_postgres_host = '127.0.0.1' -g_postgres_username = 'cb' -g_postgres_password = '' +g_postgres_host = "127.0.0.1" +g_postgres_db = "cb" +g_postgres_username = "cb" +g_postgres_password = "" g_postgres_port = 5002 -g_postgres_db = 'cb' g_max_hashes = 8 - g_num_binaries_not_available = 0 g_num_binaries_analyzed = 0 - g_disable_rescan = True - g_num_days_binaries = 365 g_vacuum_seconds = -1 -g_vacuum_script = '{YARA}/scripts/vacuumscript.sh' +g_vacuum_script = "{YARA}/scripts/vacuumscript.sh" g_feed_database_dir = "{YARA}/local" diff --git a/main.py b/main.py index 38aab42..bd1569a 100644 --- a/main.py +++ b/main.py @@ -26,8 +26,10 @@ from analysis_result import AnalysisResult from binary_database import BinaryDetonationResult, db from config_handling import ConfigurationInit +from exceptions import SingleInstanceException from feed import CbFeed, CbFeedInfo, CbReport from tasks import analyze_binary, generate_rule_map, update_yara_rules_remote +from utilities import placehold logging_format = "%(asctime)s-%(name)s-%(lineno)d-%(levelname)s-%(message)s" logging.basicConfig(format=logging_format) @@ -42,7 +44,6 @@ def generate_feed_from_db() -> None: """ Creates a feed based on specific database information. - :return: """ query = BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0) @@ -76,13 +77,11 @@ def generate_feed_from_db() -> None: fp.write(feed.dump()) -# noinspection DuplicatedCode def generate_yara_rule_map_hash(yara_rule_path: str) -> None: """ Create a list of hashes for each yara rule. :param yara_rule_path: the path to where the yara rules are stored. - :return: """ temp_list = [] for fn in os.listdir(yara_rule_path): @@ -92,7 +91,6 @@ def generate_yara_rule_map_hash(yara_rule_path: str) -> None: continue with open(os.path.join(yara_rule_path, fn), "rb") as fp: data = fp.read() - # NOTE: Original logic resulted in a cumulative hash for each file (linking them) md5 = hashlib.md5() md5.update(data) temp_list.append(str(md5.hexdigest())) @@ -104,8 +102,8 @@ def generate_yara_rule_map_hash(yara_rule_path: str) -> None: def generate_rule_map_remote(yara_rule_path: str) -> None: """ Get remote rules and store into an internal map keyed by file name. - :param yara_rule_path: path to wheer thr rules are stored - :return: + + :param yara_rule_path: path to where the rules are stored """ ret_dict = {} for fn in os.listdir(yara_rule_path): @@ -202,8 +200,9 @@ def save_results(analysis_results: List[AnalysisResult]) -> None: def get_database_conn(): """ - Generate a database connection. - :return: + Generate a database connection based on our postgres settings. + + :return: database connection """ logger.info("Connecting to Postgres database...") conn = psycopg2.connect( @@ -220,6 +219,7 @@ def get_database_conn(): def get_cursor(conn, start_date_binaries: datetime): """ Get a query qursor into the database. + :param conn: database connection :param start_date_binaries: Timestamp representing the earliest time to check for binaries :return: cursor pointing to the query results @@ -241,20 +241,21 @@ def execute_script() -> None: """ logger.info("!!!Executing vacuum script!!!") - target = os.path.join(os.getcwd(), globals.g_vacuum_script) - - prog = subprocess.Popen(target, shell=True, universal_newlines=True) + prog = subprocess.Popen(globals.g_vacuum_script, shell=True, universal_newlines=True) stdout, stderr = prog.communicate() - logger.info(stdout) - logger.error(stderr) + if len(stdout.strip()) > 0: + logger.info(stdout) + if len(stderr.strip()) > 0: + logger.error(stderr) if prog.returncode: - logger.warning("program returned error code {0}".format(prog.returncode)) + logger.warning(f"program returned error code {prog.returncode}") logger.info("!!!Done Executing vacuum script!!!") def perform(yara_rule_dir: str) -> None: """ Perform a yara search. + :param yara_rule_dir: location of the rules directory """ if globals.g_remote: @@ -312,7 +313,7 @@ def _check_hash_against_feed(md5_hash: str) -> bool: """ Check if the found hash matches our feed criteria. - :param md5_hash: + :param md5_hash: hash to be checked :return: True if the binary does not exist """ query = BinaryDetonationResult.select().where(BinaryDetonationResult.md5 == md5_hash) @@ -341,7 +342,7 @@ def _analyze_save_and_log(hashes: List[str], start_time: float, num_binaries_ski :param hashes: List of hashes :param start_time: start time of the operation (python time) :param num_binaries_skipped: numb er of binaries skipped for any reason - :param num_total_binaries: numb er of binaries seen + :param num_total_binaries: number of binaries seen """ analysis_results = analyze_binaries(hashes, local=(not globals.g_remote)) if analysis_results: @@ -358,9 +359,10 @@ def _analyze_save_and_log(hashes: List[str], start_time: float, num_binaries_ski def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binaries: int) -> None: """ Simple method to log yara work. + :param start_time: start time for the work :param num_binaries_skipped: numb er of binaries skipped for any reason - :param num_total_binaries: numb er of binaries seen + :param num_total_binaries: number of binaries seen """ elapsed_time = time.time() - start_time logger.info("elapsed time: {0}".format(humanfriendly.format_timespan(elapsed_time))) @@ -369,95 +371,111 @@ def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binari logger.debug(f" number binaries unavailable: {globals.g_num_binaries_not_available}") logger.info(f"total binaries from db: {num_total_binaries}") logger.debug(" binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) - logger.info("num binaries score greater than zero: {0}".format( - len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0))) - ) - logger.info("") + overzero = len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)) + logger.info(f"num binaries score greater than zero: {overzero}\n") ################################################################################ # Main entrypoint ################################################################################ +def handle_arguments(): + """ + Setup the main program options. + + :return: parsed arguments + """ + parser = argparse.ArgumentParser(description="Yara Agent for Yara Connector") + parser.add_argument( + "--config-file", + required=True, + default="yara_agent.conf", + help="Location of the config file", + ) + parser.add_argument( + "--log-file", + default="{YARA}/local/yara_agent.log", + help="Log file output (defaults to `local` folder)" + ) + parser.add_argument( + "--output-file", + default="{YARA}/local/yara_feed.json", + help="output feed file (defaults to `local` folder)" + ) + parser.add_argument( + "--validate-yara-rules", + action="store_true", + help="ONLY validate yara rules in a specified directory", + ) + parser.add_argument( + "--debug", + action="store_true", + help="Provide additional logging" + ) + + return parser.parse_args() + + def main(): + """ + Main execution function. Script will exit with a non-zero value based on the following: + 1: Not the only instance running + 2: Configuration problem + 3: User interrupt + 4: Unexpected Yara scan exception + 5: Yara rule validation problem + """ try: - # check for single operation singleton.SingleInstance() + except SingleInstanceException as err: + logger.error(f"Only one instance of this script is allowed to run at a time: {err}") + sys.exit(1) + + args = handle_arguments() + if args.debug: + logger.setLevel(logging.DEBUG) + + if args.log_file: + use_log_file = os.path.abspath(os.path.expanduser(placehold(args.log_file))) + formatter = logging.Formatter(logging_format) + handler = logging.handlers.RotatingFileHandler(use_log_file, maxBytes=10 * 1000000, backupCount=10) + handler.setFormatter(formatter) + logger.addHandler(handler) + else: + use_log_file = None + + # Verify the configuration file and load up important global variables + try: + ConfigurationInit(args.config_file, use_log_file) except Exception as err: - logger.error( - f"Only one instance of this script is allowed to run at a time: {err}" - ) + logger.error(f"Unable to continue due to a configuration problem: {err}") + sys.exit(2) + + if args.validate_yara_rules: + logger.info(f"Validating yara rules in directory: {globals.g_yara_rules_dir}") + yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) + try: + yara.compile(filepaths=yara_rule_map) + logger.info("All yara rules compiled successfully") + except Exception as err: + logger.error(f"There were errors compiling yara rules: {err}\n{traceback.format_exc()}") + sys.exit(5) else: - parser = argparse.ArgumentParser(description="Yara Agent for Yara Connector") - parser.add_argument( - "--config-file", - required=True, - default="yara_agent.conf", - help="Location of the config file", - ) - parser.add_argument( - "--log-file", default="yara_agent.log", help="Log file output" - ) - parser.add_argument( - "--output-file", default="yara_feed.json", help="output feed file" - ) - parser.add_argument( - "--validate-yara-rules", - action="store_true", - help="ONLY validate yara rules in a specified directory", - ) - parser.add_argument("--debug", action="store_true") - - args = parser.parse_args() - - if args.debug: - logger.setLevel(logging.DEBUG) - - if args.log_file: - formatter = logging.Formatter(logging_format) - handler = logging.handlers.RotatingFileHandler( - args.log_file, maxBytes=10 * 1000000, backupCount=10 - ) - handler.setFormatter(formatter) - logger.addHandler(handler) - - # Verify the configuration file and load up important global variables try: - ConfigurationInit(args.config_file, args.output_file) + globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) + generate_yara_rule_map_hash(globals.g_yara_rules_dir) + database = SqliteDatabase(os.path.join(globals.g_feed_database_dir, "binary.db")) + db.initialize(database) + db.connect() + db.create_tables([BinaryDetonationResult]) + generate_feed_from_db() + perform(globals.g_yara_rules_dir) + except KeyboardInterrupt: + logger.info("\n\n##### Interupted by User!\n") + sys.exit(3) except Exception as err: - logger.error(f"Unable to continue due to a configuration problem: {err}") - sys.exit(1) - - if args.validate_yara_rules: - logger.info( - "Validating yara rules in directory: {0}".format( - globals.g_yara_rules_dir - ) - ) - yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - try: - yara.compile(filepaths=yara_rule_map) - logger.info("All yara rules compiled successfully") - except Exception as err: - logger.error(f"There were errors compiling yara rules: {err}") - logger.error(traceback.format_exc()) - else: - try: - globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database = SqliteDatabase(os.path.join(globals.g_feed_database_dir, "binary.db")) - db.initialize(database) - db.connect() - db.create_tables([BinaryDetonationResult]) - generate_feed_from_db() - perform(globals.g_yara_rules_dir) - except KeyboardInterrupt: - logger.info("\n\n##### Interupted by User!\n") - sys.exit(2) - except Exception as err: - logger.error(f"There were errors executing yara rules: {err}") - logger.error(traceback.format_exc()) - sys.exit(1) + logger.error(f"There were errors executing yara rules: {err}\n{traceback.format_exc()}") + sys.exit(4) if __name__ == "__main__": From 755f14924ab15cedb8f55cc19e69ed2fe3d09879 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 30 Oct 2019 09:47:06 -0400 Subject: [PATCH 093/257] fixed logging glitch --- main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index bd1569a..fe738a1 100644 --- a/main.py +++ b/main.py @@ -243,9 +243,9 @@ def execute_script() -> None: prog = subprocess.Popen(globals.g_vacuum_script, shell=True, universal_newlines=True) stdout, stderr = prog.communicate() - if len(stdout.strip()) > 0: + if stdout is not None and len(stdout.strip()) > 0: logger.info(stdout) - if len(stderr.strip()) > 0: + if stderr is not None and len(stderr.strip()) > 0: logger.error(stderr) if prog.returncode: logger.warning(f"program returned error code {prog.returncode}") From 0b4820028b9a86343a1309c28ce16ed5511414b2 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 30 Oct 2019 09:48:49 -0400 Subject: [PATCH 094/257] removed script verbosity --- scripts/vacuumscript.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/vacuumscript.sh b/scripts/vacuumscript.sh index efdf3b9..39c9f0d 100755 --- a/scripts/vacuumscript.sh +++ b/scripts/vacuumscript.sh @@ -1,6 +1,5 @@ #!/bin/bash -set -x +psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) storefiles;" psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) binary_status;" psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) sensor_registrations;" psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) vt_write_events;" -psql -p 5002 -d cb -c "vacuum (full,analyze, verbose) storefiles;" From 8561e929186f983840f2cd60ebb9bdb35165c8be Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 30 Oct 2019 09:53:27 -0400 Subject: [PATCH 095/257] cleaned up script logging output --- main.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index fe738a1..11be011 100644 --- a/main.py +++ b/main.py @@ -239,8 +239,7 @@ def execute_script() -> None: """ Execute a external maintenence script (vacuum script). """ - logger.info("!!!Executing vacuum script!!!") - + logger.info("----- Executing vacuum script ----------------------------------------") prog = subprocess.Popen(globals.g_vacuum_script, shell=True, universal_newlines=True) stdout, stderr = prog.communicate() if stdout is not None and len(stdout.strip()) > 0: @@ -249,7 +248,7 @@ def execute_script() -> None: logger.error(stderr) if prog.returncode: logger.warning(f"program returned error code {prog.returncode}") - logger.info("!!!Done Executing vacuum script!!!") + logger.info("---------------------------------------- Vacuum script completed -----\n") def perform(yara_rule_dir: str) -> None: From 9e1419659755f31171f89df6b5f191d9133a1550 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Thu, 31 Oct 2019 08:30:57 -0400 Subject: [PATCH 096/257] * changed `vacuum_seconds` to `vacuum_interval` and consider the value to be a number of minutes in operations. * updated unit tests * changed `feed_database_dir` so that it will create the directory if it does not exist * updated documentation and comments regarding the vacuum script feature --- README.md | 55 +++++++++++++++++++ config_handling.py | 23 ++++++-- globals.py | 6 +- main.py | 15 +++-- samples/sample_local.conf | 21 +++++++ samples/sample_remote.conf | 22 ++++++++ tasks.py | 2 +- ...econds.conf => bogus_vacuum_interval.conf} | 2 +- test/config/malformed_param.conf | 20 +++++++ test/config/missing_feed_database_dir.conf | 2 +- ...nds.conf => negative_vacuum_interval.conf} | 2 +- test/config/no_such_vacuum_script.conf | 2 +- test/config/vacuum_script_dir.conf | 2 +- test/config/vacuum_script_enabled.conf | 2 +- ...ds.conf => vacuum_script_no_interval.conf} | 2 +- test/test_configInit.py | 53 +++++++++++------- 16 files changed, 189 insertions(+), 42 deletions(-) rename test/config/{bogus_vacuum_seconds.conf => bogus_vacuum_interval.conf} (94%) create mode 100644 test/config/malformed_param.conf rename test/config/{negative_vacuum_seconds.conf => negative_vacuum_interval.conf} (94%) rename test/config/{vacuum_script_no_seconds.conf => vacuum_script_no_interval.conf} (95%) diff --git a/README.md b/README.md index 66fdc22..a564324 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,39 @@ to your desired location. ./yara_agent --config-file= ``` +##### Command-line Options +```text +usage: main.py [-h] --config-file CONFIG_FILE [--log-file LOG_FILE] + [--output-file OUTPUT_FILE] [--validate-yara-rules] [--debug] + +Yara Agent for Yara Connector + +optional arguments: + -h, --help show this help message and exit + --config-file CONFIG_FILE + Location of the config file + --log-file LOG_FILE Log file output (defaults to `local` folder) + --output-file OUTPUT_FILE + output feed file (defaults to `local` folder) + --validate-yara-rules + ONLY validate yara rules in a specified directory + --debug Provide additional logging + +``` +###### --config-file +Provides the path of the configuration file to be used _**(REQUIRED)**_ + +###### --log-file +Provides the path of the yara log file. If not supplied, defaults to `local/yara_agent.log` +within the current yara package. + +###### --output-file +Provides the path containing the feed description file. If not supplied, defaults to +`/local/yara_feed.json` within the current yara package. + +###### --validate-yara-rules +If supplied, yara rules will be validated and the script will exit. + #### Example Cron Entry _[TBD]_ @@ -141,6 +174,28 @@ _[TBD]_ ``` # Development Notes +## Vacuum Script +Included with this version is a feature for discretionary use by advanced users and +should be used with caution. + +When `vacuum_interval` is defined with a value greater than 0, it represents the interval +in minutes at which the yara agent will pause its work and execute and external +shell script, defined by default as `vacuumscript.sh` within the `scripts` folder +of the current Yara conector installation. After execution, the Yara aganet continues with +its work. + +```ini +; +; The use of the vacuum script is an ADVANCED FEATURE and should be used with caution! +; +; If "vacuum_interval" is greater than 0 it represents the interval in minutes after which the yara connector will +; pause to execute a shell script for databse maintenance. This can present risks. Be careful what you allow the +; script to do, and use this option at your own discretion. +; +vacuum_interval=-1 +vacuum_script={YARA}/scripts/vacuumscript.sh +``` + ## Yara Agent Build Instructions (Centos 6) ### Install Dependencies diff --git a/config_handling.py b/config_handling.py index 2d45368..ac66600 100644 --- a/config_handling.py +++ b/config_handling.py @@ -114,8 +114,8 @@ def _extended_check(self) -> None: globals.g_num_days_binaries = self._as_int("num_days_binaries", default=globals.g_num_days_binaries, min_value=1) - globals.g_vacuum_seconds = self._as_int("vacuum_seconds", default=globals.g_vacuum_seconds, min_value=0) - if globals.g_vacuum_seconds > 0: + globals.g_vacuum_interval = self._as_int("vacuum_interval", default=globals.g_vacuum_interval, min_value=0) + if globals.g_vacuum_interval > 0: globals.g_vacuum_script = self._as_path("vacuum_script", required=True, is_dir=False, default=globals.g_vacuum_script) logger.warning(f"Vacuum Script '{globals.g_vacuum_script}' is enabled; " + @@ -125,7 +125,7 @@ def _extended_check(self) -> None: logger.debug(f"{self.source} has 'vacuum_script' defined, but it is disabled") globals.g_feed_database_dir = self._as_path("feed_database_dir", required=True, is_dir=True, - default=globals.g_feed_database_dir) + default=globals.g_feed_database_dir, create_if_needed=True) # ----- Type Handlers @@ -139,7 +139,11 @@ def _as_str(self, param: str, required: bool = False, default: str = None) -> Op :return: the string value, or None/default if not required and no exception :raises CbInvalidConfig: """ - value = self.the_config.get(param, None) + try: + value = self.the_config.get(param, None) + except Exception as err: + raise CbInvalidConfig(f"{self.source} parameter '{param}' cannot be parsed: {err}") + if value is not None: value = value.strip() if (value is None or value == "") and default is not None: @@ -150,7 +154,7 @@ def _as_str(self, param: str, required: bool = False, default: str = None) -> Op return value def _as_path(self, param: str, required: bool = False, exists: bool = True, is_dir: bool = False, - default: str = None) -> Optional[str]: + default: str = None, create_if_needed: bool = False) -> Optional[str]: """ Get an string parameter from the configuration and treat it as a path, performing normalization to produce an absolute path. a "~" at the beginning will be treated as the current user's home @@ -161,6 +165,7 @@ def _as_path(self, param: str, required: bool = False, exists: bool = True, is_d :param exists: if True and required, check for existance as well :param is_dir: if exists and True, source must be a directory :param default: If not required, default value if not supplied + :param create_if_needed: if True, create any directory if it does not exist :return: the integer value, or None if not required and no exception :raises CbInvalidConfig: """ @@ -171,7 +176,13 @@ def _as_path(self, param: str, required: bool = False, exists: bool = True, is_d value = os.path.abspath(os.path.expanduser(placehold(value))) if exists: if not os.path.exists(value): - raise CbInvalidConfig(f"{self.source} specified path parameter '{param}' ({value}) does not exist") + if create_if_needed and is_dir: + try: + os.makedirs(value) + except Exception as err: + raise CbInvalidConfig(f"{self.source} unable to create '{value}' for '{param}': {err}") + else: + raise CbInvalidConfig(f"{self.source} specified path parameter '{param}' ({value}) does not exist") if is_dir: if not os.path.isdir(value): raise CbInvalidConfig(f"{self.source} specified path '{param}' ({value}) is not a directory") diff --git a/globals.py b/globals.py index 6d2f1a4..6a7494a 100644 --- a/globals.py +++ b/globals.py @@ -35,7 +35,9 @@ g_disable_rescan = True g_num_days_binaries = 365 -g_vacuum_seconds = -1 +# the vacuum interval, if 1 or greater, is the number of minutes between invocations of the +# configured vacuum script +g_vacuum_interval = -1 g_vacuum_script = "{YARA}/scripts/vacuumscript.sh" -g_feed_database_dir = "{YARA}/local" +g_feed_database_dir = "{YARA}/local/feed_db" diff --git a/main.py b/main.py index 11be011..1204ae5 100644 --- a/main.py +++ b/main.py @@ -269,8 +269,10 @@ def perform(yara_rule_dir: str) -> None: start_time = time.time() # Determine our binaries window (date forward) - start_datetime = datetime.now() - start_date_binaries = start_datetime - timedelta(days=globals.g_num_days_binaries) + start_date_binaries = datetime.now() - timedelta(days=globals.g_num_days_binaries) + + # vacuum script window start + vacuum_window_start = datetime.now() # make the connection, get the info, get out conn = get_database_conn() @@ -281,10 +283,11 @@ def perform(yara_rule_dir: str) -> None: logger.info(f"Enumerating modulestore...found {len(rows)} resident binaries") for row in rows: - seconds_since_start = (datetime.now() - start_datetime).seconds - if seconds_since_start >= globals.g_vacuum_seconds > 0: - execute_script() - start_datetime = datetime.now() + if globals.g_vacuum_interval > 0: + seconds_since_start = (datetime.now() - vacuum_window_start).seconds + if seconds_since_start >= globals.g_vacuum_interval * 60: + execute_script() + vacuum_window_start = datetime.now() num_total_binaries += 1 md5_hash = row[0].hex() diff --git a/samples/sample_local.conf b/samples/sample_local.conf index e1bfbca..b6d4b26 100644 --- a/samples/sample_local.conf +++ b/samples/sample_local.conf @@ -1,5 +1,9 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Sample local worker config file +;; +;; Where seen, the placeholder {YARA} will be replaced by the script with +;; the location of yara package being used. You may also use "~" if you wish +;; to locate files or directories in your home folder ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [general] @@ -46,3 +50,20 @@ disable_rescan=True ; a timestamp within the last year ; num_days_binaries=365 + +; +; The feed database directory is where locata database work files are stored. If the directory does not exist +; it will be created. +; +feed_database_dir={YARA}/local/feed_db + + +; +; The use of the vacuum script is an ADVANCED FEATURE and should be used with caution! +; +; If "vacuum_interval" is greater than 0 it represents the interval in minutes after which the yara connector will +; pause to execute a shell script for databse maintenance. This can present risks. Be careful what you allow the +; script to do, and use this option at your own discretion. +; +vacuum_interval=-1 +vacuum_script={YARA}/scripts/vacuumscript.sh diff --git a/samples/sample_remote.conf b/samples/sample_remote.conf index 226a2be..25930bd 100644 --- a/samples/sample_remote.conf +++ b/samples/sample_remote.conf @@ -1,5 +1,9 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Sample remote worker config file +;; +;; Where seen, the placeholder {YARA} will be replaced by the script with +;; the location of yara package being used. You may also use "~" if you wish +;; to locate files or directories in your home folder ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [general] @@ -46,3 +50,21 @@ disable_rescan=True ; a timestamp within the last year ; num_days_binaries=365 + + +; +; The feed database directory is where local database work files are stored. If the directory does not exist +; it will be created. +; +feed_database_dir={YARA}/local/feed_db + + +; +; The use of the vacuum script is an ADVANCED FEATURE and should be used with caution! +; +; If "vacuum_interval" is greater than 0 it represents the interval in minutes after which the yara connector will +; pause to execute a shell script for databse maintenance. This can present risks. Be careful what you allow the +; script to do, and use this option at your own discretion. +; +vacuum_interval=-1 +vacuum_script={YARA}/scripts/vacuumscript.sh diff --git a/tasks.py b/tasks.py index a0f7618..6d8e9e1 100644 --- a/tasks.py +++ b/tasks.py @@ -143,7 +143,7 @@ def analyze_binary(md5sum: str) -> AnalysisResult: matches = yara_rules.match(data=binary_data, timeout=30) except yara.TimeoutError: # yara timed out - analysis_result.last_error_msg = "Analysis timed out after 60 seconds" + analysis_result.last_error_msg = "Analysis timed out after 30 seconds" analysis_result.stop_future_scans = True except yara.Error as err: # Yara errored while trying to scan binary diff --git a/test/config/bogus_vacuum_seconds.conf b/test/config/bogus_vacuum_interval.conf similarity index 94% rename from test/config/bogus_vacuum_seconds.conf rename to test/config/bogus_vacuum_interval.conf index b77c8d9..0a91c28 100644 --- a/test/config/bogus_vacuum_seconds.conf +++ b/test/config/bogus_vacuum_interval.conf @@ -18,4 +18,4 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -vacuum_seconds=BOGUS +vacuum_interval=BOGUS diff --git a/test/config/malformed_param.conf b/test/config/malformed_param.conf new file mode 100644 index 0000000..7fa81e9 --- /dev/null +++ b/test/config/malformed_param.conf @@ -0,0 +1,20 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +; bad value +niceness=1% +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 diff --git a/test/config/missing_feed_database_dir.conf b/test/config/missing_feed_database_dir.conf index 9969c52..6fb4932 100644 --- a/test/config/missing_feed_database_dir.conf +++ b/test/config/missing_feed_database_dir.conf @@ -18,4 +18,4 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -feed_database_dir={YARA}/no-such-directory +feed_database_dir={YARA}/local/no-such-directory diff --git a/test/config/negative_vacuum_seconds.conf b/test/config/negative_vacuum_interval.conf similarity index 94% rename from test/config/negative_vacuum_seconds.conf rename to test/config/negative_vacuum_interval.conf index 9376351..03d182a 100644 --- a/test/config/negative_vacuum_seconds.conf +++ b/test/config/negative_vacuum_interval.conf @@ -18,4 +18,4 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -vacuum_seconds=-20 +vacuum_interval=-20 diff --git a/test/config/no_such_vacuum_script.conf b/test/config/no_such_vacuum_script.conf index 43a9ec8..12b1fab 100644 --- a/test/config/no_such_vacuum_script.conf +++ b/test/config/no_such_vacuum_script.conf @@ -18,5 +18,5 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -vacuum_seconds=3600 +vacuum_interval=36 vacuum_script=no-such-script.sh diff --git a/test/config/vacuum_script_dir.conf b/test/config/vacuum_script_dir.conf index edd47ba..38e5704 100644 --- a/test/config/vacuum_script_dir.conf +++ b/test/config/vacuum_script_dir.conf @@ -18,5 +18,5 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -vacuum_seconds=3600 +vacuum_interval=360 vacuum_script={YARA}/test/rules diff --git a/test/config/vacuum_script_enabled.conf b/test/config/vacuum_script_enabled.conf index cd7c967..45d1f79 100644 --- a/test/config/vacuum_script_enabled.conf +++ b/test/config/vacuum_script_enabled.conf @@ -18,5 +18,5 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -vacuum_seconds=3600 +vacuum_interval=360 vacuum_script={YARA}/scripts/vacuumscript.sh diff --git a/test/config/vacuum_script_no_seconds.conf b/test/config/vacuum_script_no_interval.conf similarity index 95% rename from test/config/vacuum_script_no_seconds.conf rename to test/config/vacuum_script_no_interval.conf index 06316e3..2dd8511 100644 --- a/test/config/vacuum_script_no_seconds.conf +++ b/test/config/vacuum_script_no_interval.conf @@ -18,5 +18,5 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -vacuum_seconds=0 +vacuum_interval=0 vacuum_script=../scripts/vacuumscript.sh diff --git a/test/test_configInit.py b/test/test_configInit.py index c2c9c0d..a2fa44d 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -7,6 +7,7 @@ import globals from config_handling import ConfigurationInit from exceptions import CbInvalidConfig +from utilities import placehold TESTS = os.path.abspath(os.path.dirname(__file__)) @@ -33,7 +34,7 @@ def setUp(self): globals.g_num_binaries_analyzed = 0 globals.g_disable_rescan = True globals.g_num_days_binaries = 365 - globals.g_vacuum_seconds = -1 + globals.g_vacuum_interval = -1 globals.g_vacuum_script = '{YARA}/scripts/vacuumscript.sh' globals.g_feed_database_dir = "{YARA}/local" @@ -331,21 +332,21 @@ def test_16b_config_bogus_num_days_binaries(self): ConfigurationInit(os.path.join(TESTS, "config", "bogus_num_days_binaries.conf"), "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - def test_17a_config_bogus_vacuum_seconds(self): + def test_17a_config_bogus_vacuum_interval(self): """ - Ensure that config with bogus (non-int) vacuum_seconds is detected. + Ensure that config with bogus (non-int) vacuum_interval is detected. """ with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "bogus_vacuum_seconds.conf"), "sample.json") + ConfigurationInit(os.path.join(TESTS, "config", "bogus_vacuum_interval.conf"), "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - def test_17b_config_negative_vacuum_seconds(self): + def test_17b_config_negative_vacuum_interval(self): """ - Ensure that config with bogus (non-int) vacuum_seconds is detected. + Ensure that config with bogus (non-int) vacuum_interval is detected. """ - globals.g_vacuum_seconds = None - ConfigurationInit(os.path.join(TESTS, "config", "negative_vacuum_seconds.conf"), "sample.json") - self.assertEqual(0, globals.g_vacuum_seconds) + globals.g_vacuum_interval = None + ConfigurationInit(os.path.join(TESTS, "config", "negative_vacuum_interval.conf"), "sample.json") + self.assertEqual(0, globals.g_vacuum_interval) def test_18a_config_missing_vacuum_script(self): """ @@ -365,31 +366,35 @@ def test_18b_config_bogus_vacuum_script_is_dir(self): def test_19a_config_vacuum_script_enabled(self): """ - Ensure that config with vacuum_script and vacuum_seconds is ready to go. + Ensure that config with vacuum_script and vacuum_interval is ready to go. """ - globals.g_vacuum_seconds = None + globals.g_vacuum_interval = None globals.g_vacuum_script = None ConfigurationInit(os.path.join(TESTS, "config", "vacuum_script_enabled.conf"), "sample.json") - self.assertEqual(3600, globals.g_vacuum_seconds) + self.assertEqual(360, globals.g_vacuum_interval) self.assertTrue(globals.g_vacuum_script.endswith("/scripts/vacuumscript.sh")) - def test_19a_config_vacuum_script_and_no_vacuum_seconds(self): + def test_19a_config_vacuum_script_and_no_vacuum_interval(self): """ - Ensure that config with vacuum_script but vacuum_seconds == 0 has it disabled. + Ensure that config with vacuum_script but vacuum_interval == 0 has it disabled. """ - globals.g_vacuum_seconds = None + globals.g_vacuum_interval = None globals.g_vacuum_script = None - ConfigurationInit(os.path.join(TESTS, "config", "vacuum_script_no_seconds.conf"), "sample.json") - self.assertEqual(0, globals.g_vacuum_seconds) + ConfigurationInit(os.path.join(TESTS, "config", "vacuum_script_no_interval.conf"), "sample.json") + self.assertEqual(0, globals.g_vacuum_interval) self.assertIsNone(globals.g_vacuum_script) def test_20a_config_feed_database_dir_not_exists(self): """ - Ensure that config with feed database directory that does not exist is detected. + Ensure that config with feed database directory that does not exist will create that directory. """ - with self.assertRaises(CbInvalidConfig) as err: + path = os.path.abspath(placehold("{YARA}/local/no-such-directory")) + self.assertFalse(os.path.exists(path)) + try: ConfigurationInit(os.path.join(TESTS, "config", "missing_feed_database_dir.conf"), "sample.json") - assert "does not exist" in "{0}".format(err.exception.args[0]) + self.assertTrue(os.path.exists(path)) + finally: + os.rmdir(path) def test_20b_config_feed_database_dir_not_directory(self): """ @@ -399,6 +404,14 @@ def test_20b_config_feed_database_dir_not_directory(self): ConfigurationInit(os.path.join(TESTS, "config", "bogus_feed_database_dir.conf"), "sample.json") assert "is not a directory" in "{0}".format(err.exception.args[0]) + def test_21_config_malformed_parameter(self): + """ + Ensure that config with malformed parameter is detected + """ + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS, "config", "malformed_param.conf"), "sample.json") + assert "cannot be parsed" in "{0}".format(err.exception.args[0]) + # ----- Minimal validation (worker) def test_90_minimal_validation_effects(self): From 2001a1733cf6d337266995bcd38c68bb0a94b773 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Thu, 31 Oct 2019 08:38:10 -0400 Subject: [PATCH 097/257] fixed doc typos --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a564324..1c6b02a 100644 --- a/README.md +++ b/README.md @@ -179,9 +179,9 @@ Included with this version is a feature for discretionary use by advanced users should be used with caution. When `vacuum_interval` is defined with a value greater than 0, it represents the interval -in minutes at which the yara agent will pause its work and execute and external +in minutes at which the yara agent will pause its work and execute an external shell script, defined by default as `vacuumscript.sh` within the `scripts` folder -of the current Yara conector installation. After execution, the Yara aganet continues with +of the current Yara connector installation. After execution, the Yara agent continues with its work. ```ini @@ -189,7 +189,7 @@ its work. ; The use of the vacuum script is an ADVANCED FEATURE and should be used with caution! ; ; If "vacuum_interval" is greater than 0 it represents the interval in minutes after which the yara connector will -; pause to execute a shell script for databse maintenance. This can present risks. Be careful what you allow the +; pause to execute a shell script for database maintenance. This can present risks. Be careful what you allow the ; script to do, and use this option at your own discretion. ; vacuum_interval=-1 From 48bdf42ec8ef98ea2de0577e8450e18aa791d620 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Thu, 31 Oct 2019 09:11:13 -0400 Subject: [PATCH 098/257] future prep work --- requirements.txt | 4 +--- src/__init__.py | 7 +++++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index af341cd..f62086b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,8 +7,7 @@ cbapi==1.3.6 celery==4.2.1 certifi==2018.11.29 chardet==3.0.4 -future==0.17.1 -humanfriendly==4.17 +humanfriendly==4.18 idna==2.8 kombu==4.2.2.post1 macholib==1.11 @@ -34,4 +33,3 @@ urllib3==1.24.1 vine==1.2.0 wcwidth==0.1.7 yara-python==3.8.1 -humanfriendly diff --git a/src/__init__.py b/src/__init__.py index e69de29..3c7bbe9 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -0,0 +1,7 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + +# noinspection PyUnusedName +__author__ = "Carbon Black" +# noinspection PyUnusedName +__version__ = "2.1.0" From e8b325c2230bf8707dc4447c0e0e2fad0db57446 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 28 Oct 2019 10:11:17 -0400 Subject: [PATCH 099/257] A deamonized yara connector for cbr --- MANIFEST | 4 + cb-yara-connector.rpm.spec | 23 + main.spec => cb-yara-connector.spec | 13 +- init-scripts/yaraconnector.conf | 13 + makefile | 19 + requirements.txt | 44 +- analysis_result.py => src/analysis_result.py | 11 + binary_database.py => src/binary_database.py | 0 exceptions.py => src/exceptions.py | 0 feed.py => src/feed.py | 0 globals.py => src/globals.py | 4 + main.py => src/main.py | 623 +++++++++++++------ singleton.py => src/singleton.py | 0 src/tasks.py | 372 +++++++++++ utilities.py => src/utilities.py | 0 tasks.py | 263 -------- 16 files changed, 897 insertions(+), 492 deletions(-) create mode 100644 MANIFEST create mode 100644 cb-yara-connector.rpm.spec rename main.spec => cb-yara-connector.spec (67%) create mode 100644 init-scripts/yaraconnector.conf create mode 100644 makefile rename analysis_result.py => src/analysis_result.py (74%) rename binary_database.py => src/binary_database.py (100%) rename exceptions.py => src/exceptions.py (100%) rename feed.py => src/feed.py (100%) rename globals.py => src/globals.py (94%) rename main.py => src/main.py (51%) rename singleton.py => src/singleton.py (100%) create mode 100644 src/tasks.py rename utilities.py => src/utilities.py (100%) delete mode 100644 tasks.py diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..9c66302 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,4 @@ +/usr/share/cb/integrations/yaraconnector/yaraconnector +%dir /usr/share/cb/integrations/yaraconnector +%dir /var/log/cb/integrations/yaraconnector +/etc/init/yaraconnector.conf \ No newline at end of file diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec new file mode 100644 index 0000000..d9c90f7 --- /dev/null +++ b/cb-yara-connector.rpm.spec @@ -0,0 +1,23 @@ +Name: python-cb-yara-connector +Version: 2.0 +Release: 2 +Summary: Carbon Black Yara Agent +License: MIT +Requires: redis + +%description +Carbon Black Yara Agent - Scans binaries with configured yara rules + +%build +pyinstaller %{_sourcedir}/cb-yara-connector.spec + +%install +mkdir -p ${RPM_BUILD_ROOT}/var/log/cb/integrations/yaraconnector +mkdir -p ${RPM_BUILD_ROOT}/usr/share/cb/integrations/yaraconnector +mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/yaraconnector +mkdir -p ${RPM_BUILD_ROOT}/etc/init +cp yara.conf ${RPMB_BUILD_ROOT}/etc/cb/integrations/yaraconnector/yaraconnector.conf.example +install -m 0755 init-scripts/yaraconnector.conf ${RPM_BUILD_ROOT}/etc/init/yaraconnector.conf +install -m 0755 dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/yaraconnector/yaraconnector + +%files -f MANIFEST \ No newline at end of file diff --git a/main.spec b/cb-yara-connector.spec similarity index 67% rename from main.spec rename to cb-yara-connector.spec index 134ba85..caba33f 100644 --- a/main.spec +++ b/cb-yara-connector.spec @@ -1,16 +1,13 @@ -import distutils -if distutils.distutils_path.endswith('__init__.py'): - distutils.distutils_path = os.path.dirname(distutils.distutils_path) +#import distutils +#if distutils.distutils_path.endswith('__init__.py'): +# distutils.distutils_path = os.path.dirname(distutils.distutils_path) block_cipher = None -a = Analysis(['main.py'], +a = Analysis(['src/main.py'], pathex=['.'], binaries=[], - datas=[ (HOMEPATH + '/cbapi/response/models/*', 'cbapi/response/models/'), - (HOMEPATH + '/cbapi/protection/models/*', 'cbapi/protection/models/'), - (HOMEPATH + '/cbapi/defense/models/*', 'cbapi/defense/models/') ], hiddenimports=['celery.fixups', 'celery.fixups.django', 'celery.loaders.app', 'celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends', 'celery.backends.redis', 'celery.app.events', 'celery.events', @@ -30,7 +27,7 @@ exe = EXE(pyz, a.zipfiles, a.datas, [], - name='yara_agent', + name='yaraconnector', debug=False, bootloader_ignore_signals=False, strip=False, diff --git a/init-scripts/yaraconnector.conf b/init-scripts/yaraconnector.conf new file mode 100644 index 0000000..cd8dd52 --- /dev/null +++ b/init-scripts/yaraconnector.conf @@ -0,0 +1,13 @@ +description "Cb Response Yara Connector" +author "zestep@vmware.com" + +start on (started network) +stop on runlevel [!2345] + +respawn + +pre-start script + /usr/share/cb/integrations/yaraconnector/yaraconnector --lock-file /var/run/yaraconnector.pid --validate-rules --config-file /etc/cb/integrations/yaraconnector/yaraconnector.conf &> /var/log/cb/integrations/yaraconnector/yaraconnector_config_check.log +end script + +exec /usr/share/cb/integrations/yaraconnector/yaraconnector --lock-file /var/run/yaraconnector.pid --config-file /etc/cb/integrations/yaraconnector/yaraconnector.conf &> /var/log/cb/integrations/yaraconnector/yaraconnector.log \ No newline at end of file diff --git a/makefile b/makefile new file mode 100644 index 0000000..b80a0ec --- /dev/null +++ b/makefile @@ -0,0 +1,19 @@ +SOURCEDIR = ~/rpmbuild/SOURCES +BUILDDIR = ~/rpmbuild/BUILD + +clean: + rm -rf ${SOURCEDIR} + rm -rf ${BUILDDIR} + rm -rf dist +rpm: + mkdir -p ${SOURCEDIR} + mkdir -p ${BUILDDIR} + mkdir -p ${SOURCEDIR}/src + mkdir -p ${BUILDDIR}/init-scripts + cp -rp src/* ${SOURCEDIR}/src + cp -rp init-scripts/* ${BUILDDIR}/init-scripts + cp yara.conf ${BUILDDIR} + cp MANIFEST ${BUILDDIR} + cp cb-yara-connector.spec ${SOURCEDIR}/cb-yara-connector.spec + ls ${SOURCEDIR} + rpmbuild -ba cb-yara-connector.rpm.spec \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index af341cd..e40e7b3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,37 +1,9 @@ -altgraph==0.16.1 -amqp==2.4.0 -attrdict==2.0.0 -billiard==3.5.0.5 -cachetools==3.0.0 -cbapi==1.3.6 -celery==4.2.1 -certifi==2018.11.29 -chardet==3.0.4 -future==0.17.1 -humanfriendly==4.17 -idna==2.8 -kombu==4.2.2.post1 -macholib==1.11 -pbr==5.1.1 -peewee==3.8.1 -pefile==2018.8.8 -pika==0.12.0 -prompt-toolkit==2.0.7 -protobuf==3.6.1 -psycopg2==2.7.6.1 -psycopg2-binary==2.7.6.1 -Pygments==2.3.1 -PyInstaller==3.4 -python-dateutil==2.7.5 -pytz==2018.9 -PyYAML==3.13 -redis==3.0.1 -requests==2.21.0 -six==1.12.0 -solrq==1.1.1 -tendo==0.2.12 -urllib3==1.24.1 -vine==1.2.0 -wcwidth==0.1.7 -yara-python==3.8.1 +celery +redis humanfriendly +lockfile +peewee +psycopg2-binary +python-daemon +requests +yara-python diff --git a/analysis_result.py b/src/analysis_result.py similarity index 74% rename from analysis_result.py rename to src/analysis_result.py index 644b043..00fc679 100644 --- a/analysis_result.py +++ b/src/analysis_result.py @@ -1,3 +1,6 @@ +import json +from datetime import datetime + class AnalysisResult(object): def __init__(self, md5, @@ -20,3 +23,11 @@ def __init__(self, self.stop_future_scans = stop_future_scans self.binary_not_available = binary_not_available self.misc = misc + + def toJSON(self): + dict_rep = self.__dict__ + for key, value in dict_rep.items(): + if isinstance(value, datetime): + dict_rep[key] = value.timetuple() + + return json.dumps(dict_rep) \ No newline at end of file diff --git a/binary_database.py b/src/binary_database.py similarity index 100% rename from binary_database.py rename to src/binary_database.py diff --git a/exceptions.py b/src/exceptions.py similarity index 100% rename from exceptions.py rename to src/exceptions.py diff --git a/feed.py b/src/feed.py similarity index 100% rename from feed.py rename to src/feed.py diff --git a/globals.py b/src/globals.py similarity index 94% rename from globals.py rename to src/globals.py index 5c6eb94..0f2a89a 100644 --- a/globals.py +++ b/src/globals.py @@ -36,3 +36,7 @@ g_vacuum_script = 'scripts/vacuumscript.sh' g_feed_database_path = "./" + +g_scanning_interval = 360 + +g_worker_network_timeout=5 diff --git a/main.py b/src/main.py similarity index 51% rename from main.py rename to src/main.py index 1f1276c..11a7762 100644 --- a/main.py +++ b/src/main.py @@ -8,25 +8,40 @@ import subprocess import sys import time +import signal import traceback +from daemon import daemon +import lockfile +from functools import partial from datetime import datetime, timedelta from typing import List, Optional +import threading +from threading import Thread, Event, Barrier +from queue import Queue, Empty + import humanfriendly import psycopg2 +import sched # noinspection PyPackageRequirements import yara from celery import group +from celery.bin import worker from peewee import SqliteDatabase import globals -import singleton from analysis_result import AnalysisResult from binary_database import BinaryDetonationResult, db from exceptions import CbInvalidConfig from feed import CbFeed, CbFeedInfo, CbReport -from tasks import analyze_binary, app, generate_rule_map, update_yara_rules_remote +from tasks import ( + analyze_binary, + app, + generate_rule_map, + update_yara_rules_remote, + analyze_bins, +) from utilities import placehold logging_format = "%(asctime)s-%(name)s-%(lineno)d-%(levelname)s-%(message)s" @@ -39,6 +54,55 @@ celery_logger.setLevel(logging.ERROR) +def promise_worker(exit_event, scanning_promise_queue, scanning_results_queue): + while not (exit_event.is_set()): + if not (scanning_promise_queue.empty()): + try: + promise = scanning_promise_queue.get(timeout=1.0) + result = promise.get(disable_sync_subtasks=False) + scanning_results_queue.put(result) + except Empty: + exit_event.wait(1) + else: + exit_event.wait(1) + + logger.debug("PROMISE WORKING EXITING") + + +""" Sqlite is not meant to be thread-safe + +This single-worker-thread writes the result(s) to the configured sqlite file to hold the feed-metadata and seen binaries/results from scans +""" + + +def results_worker(exit_event, results_queue): + while not (exit_event.is_set()): + if not (results_queue.empty()): + try: + result = results_queue.get() + save_result(result) + except Empty: + exit_event.wait(1) + else: + exit_event.wait(1) + + logger.debug("Results worker thread exiting") + + +def results_worker_chunked(exit_event, results_queue): + while not (exit_event.is_set()): + if not (results_queue.empty()): + try: + results = results_queue.get() + save_results(results) + except Empty: + exit_event.wait(1) + else: + exit_event.wait(1) + + logger.debug("Results worker thread exiting") + + def generate_feed_from_db() -> None: """ Creates a feed based on specific database information. @@ -124,52 +188,27 @@ def generate_rule_map_remote(yara_rule_path) -> None: time.sleep(0.1) -def analyze_binaries(md5_hashes: List[str], local: bool) -> Optional: - """ - Analyze binaries. +def analyze_binary_and_queue(scanning_promise_queue, md5sum): + """ Analyze Binary And Queue """ + promise = analyze_binary.delay(md5sum) + scanning_promise_queue.put(promise) - TODO: determine return typing! - :param md5_hashes: list of hashes to check. - :param local: True if local - :return: None if there is a problem; results otherwise - """ - if local: - try: - results = [] - for md5_hash in md5_hashes: - results.append(analyze_binary(md5_hash)) - except Exception as err: - logger.error("{0}".format(err)) - time.sleep(5) - return None - else: - return results - else: - try: - scan_group = [] - for md5_hash in md5_hashes: - scan_group.append(analyze_binary.s(md5_hash)) - job = group(scan_group) +def analyze_binaries_and_queue(scanning_promise_queue, md5_hashes): + """ Analyze each binary and enqueue """ + for h in md5_hashes: + analyze_binary_and_queue(scanning_promise_queue, h) - result = job.apply_async() - start = time.time() - while not result.ready(): - if time.time() - start >= 120: # 2 minute timeout - break - else: - time.sleep(0.1) - except Exception as err: - logger.error("Error when analyzing: {0}".format(err)) - logger.error(traceback.format_exc()) - time.sleep(5) - return None - else: - if result.successful(): - return result.get(timeout=30) - else: - return None +def analyze_binaries_and_queue_chunked(scanning_promise_queue, md5_hashes): + """ + Attempts to do work in parrallelized chunks of MAX_HASHES grouped + """ + promise = analyze_binary.chunks( + [(mh,) for mh in md5_hashes], globals.MAX_HASHES + ).apply_async() + for prom in promise.children: + scanning_promise_queue.put(prom) def save_results(analysis_results: List[AnalysisResult]) -> None: @@ -182,27 +221,31 @@ def save_results(analysis_results: List[AnalysisResult]) -> None: :return: """ for analysis_result in analysis_results: - if analysis_result.binary_not_available: - globals.g_num_binaries_not_available += 1 - continue + save_result(analysis_result) - bdr, created = BinaryDetonationResult.get_or_create(md5=analysis_result.md5) - try: - bdr.md5 = analysis_result.md5 - bdr.last_scan_date = datetime.now() - bdr.score = analysis_result.score - bdr.last_error_msg = analysis_result.last_error_msg - bdr.last_success_msg = analysis_result.short_result - bdr.misc = json.dumps(globals.g_yara_rule_map_hash_list) - bdr.save() - globals.g_num_binaries_analyzed += 1 - except Exception as err: - logger.error("Error saving to database: {0}".format(err)) - logger.error(traceback.format_exc()) - else: - if analysis_result.score > 0: - generate_feed_from_db() +def save_result(analysis_result): + if analysis_result.binary_not_available: + globals.g_num_binaries_not_available += 1 + return + + bdr, created = BinaryDetonationResult.get_or_create(md5=analysis_result.md5) + + try: + bdr.md5 = analysis_result.md5 + bdr.last_scan_date = datetime.now() + bdr.score = analysis_result.score + bdr.last_error_msg = analysis_result.last_error_msg + bdr.last_success_msg = analysis_result.short_result + bdr.misc = json.dumps(globals.g_yara_rule_map_hash_list) + bdr.save() + globals.g_num_binaries_analyzed += 1 + except Exception as err: + logger.error("Error saving to database: {0}".format(err)) + logger.error(traceback.format_exc()) + else: + if analysis_result.score > 0: + generate_feed_from_db() def get_database_conn(): @@ -218,19 +261,25 @@ def get_database_conn(): return conn -def get_cursor(conn, start_date_binaries): +def get_binary_file_cursor(conn, start_date_binaries): + logger.debug("Getting database cursor...") + cur = conn.cursor(name="yara_agent") # noinspection SqlDialectInspection,SqlNoDataSourceInspection - cur.execute( - "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' " - "ORDER BY timestamp DESC".format(start_date_binaries) + query = "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' ORDER BY timestamp DESC".format( + start_date_binaries ) + logger.debug(query) + + cur.execute(query) + return cur def execute_script(): + """ Execute the configured shell script """ logger.warning("!!!Executing vacuum script!!!") target = os.path.join(os.getcwd(), globals.g_vacuum_script) @@ -244,7 +293,9 @@ def execute_script(): logger.warning("!!!Done Executing vacuum script!!!") -def perform(yara_rule_dir): +def perform(yara_rule_dir, conn, scanning_promises_queue): + """ Main routine - checks the cbr modulestore/storfiles table for new hashes + by comparing the sliding-window (now - globals.g_num_days_binaries) with the contents of the feed database on disk""" if globals.g_remote: logger.info("Uploading yara rules to workers...") generate_rule_map_remote(yara_rule_dir) @@ -256,68 +307,46 @@ def perform(yara_rule_dir): start_time = time.time() - conn = get_database_conn() + start_datetime = datetime.now() - start_date_binaries = datetime.now() - timedelta(days=globals.g_num_days_binaries) + start_date_binaries = start_datetime - timedelta(days=globals.g_num_days_binaries) - cur = get_cursor(conn, start_date_binaries) + cur = get_binary_file_cursor(conn, start_date_binaries) rows = cur.fetchall() conn.commit() - conn.close() + num_total_binaries = len(rows) - logger.info("Enumerating modulestore...") - for row in rows: - - num_total_binaries += 1 - md5_hash = row[0].hex() - - num_binaries_queued += 1 - - if _check_hash_against_feed(md5_hash): - md5_hashes.append(md5_hash) + logger.info( + f"Enumerating modulestore...found {num_total_binaries} resident binaries" + ) - if len(md5_hashes) >= globals.MAX_HASHES: - _analyze_save_and_log( - md5_hashes, start_time, num_binaries_skipped, num_total_binaries - ) - md5_hashes = [] + md5_hashes = filter(_check_hash_against_feed, (row[0].hex() for row in rows)) - _analyze_save_and_log( - md5_hashes, start_time, num_binaries_skipped, num_total_binaries - ) + analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) - generate_feed_from_db() + logger.debug("Exit PERFORM") def _check_hash_against_feed(md5_hash): + query = BinaryDetonationResult.select().where( BinaryDetonationResult.md5 == md5_hash ) + if query.exists(): - try: - bdr = BinaryDetonationResult.get(BinaryDetonationResult.md5 == md5_hash) - scanned_hash_list = json.loads(bdr.misc) - if globals.g_disable_rescan and bdr.misc: - return False - - if scanned_hash_list == globals.g_yara_rule_map_hash_list: - # - # If it is the same then we don't need to scan again - # - return False - except Exception as e: - logger.error( - "Unable to decode yara rule map hash from database: {0}".format(e) - ) - return False + return False + return True -def _analyze_save_and_log(hashes, start_time, num_binaries_skipped, num_total_binaries): - analysis_results = analyze_binaries(hashes, local=(not globals.g_remote)) +def save_and_log( + analysis_results, start_time, num_binaries_skipped, num_total_binaries +): + + logger.debug(analysis_results) if analysis_results: for analysis_result in analysis_results: logger.debug( @@ -400,6 +429,20 @@ def verify_config(config_file: str, output_file: str = None) -> None: logger.debug(f"NOTE: output file will be '{globals.output_file}'") the_config = config["general"] + + if "mode" in config["general"]: + operating_mode = the_config["mode"].lower() + if operating_mode in ["master", "slave"]: + globals.g_mode = operating_mode + else: + raise CbInvalidConfig( + f"{header} does not specify a valid operating mode (slave/master)" + ) + else: + raise CbInvalidConfig( + f"{header} does not specify a valid operating mode (slave/master)" + ) + if "worker_type" in the_config: if ( the_config["worker_type"] == "local" @@ -430,15 +473,13 @@ def verify_config(config_file: str, output_file: str = None) -> None: else: raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_token'") # TODO: validate url & token with test call? - else: - if "broker_url" in the_config and the_config["broker_url"].strip() != "": - app.conf.update( - broker_url=the_config["broker_url"], - result_backend=the_config["broker_url"], - ) - else: - raise CbInvalidConfig(f"{header} is 'remote' and missing 'broker_url'") - # TODO: validate broker with test call? + + if "broker_url" in the_config and the_config["broker_url"].strip() != "": + app.conf.update( + broker_url=the_config["broker_url"], result_backend=the_config["broker_url"] + ) + elif globals.g_remote: + raise CbInvalidConfig(f"{header} is 'remote' and missing 'broker_url'") if "yara_rules_dir" in the_config and the_config["yara_rules_dir"].strip() != "": check = os.path.abspath( @@ -551,88 +592,300 @@ def verify_config(config_file: str, output_file: str = None) -> None: if not (os.path.exists(check) and os.path.isdir(check)): raise CbInvalidConfig("Invalid database path specified") + if "database_sweep_interval" in the_config: + globals.g_scanning_interval = the_config["database_sweep_interval"] + def main(): + parser = argparse.ArgumentParser(description="Yara Agent for Yara Connector") + + parser.add_argument( + "--config-file", + required=True, + default="yaraconnector.conf", + help="Location of the config file", + ) + + parser.add_argument( + "--log-file", default="yaraconnector.log", help="Log file output" + ) + + parser.add_argument( + "--output-file", default="yara_feed.json", help="output feed file" + ) + + parser.add_argument( + "--working-dir", default=".", help="working directory", required=False + ) + + parser.add_argument( + "--lock-file", default="./yaraconnector.lock", help="lock file", required=False + ) + + parser.add_argument( + "--validate-yara-rules", + action="store_true", + help="ONLY validate yara rules in a specified directory", + ) + + parser.add_argument("--debug", action="store_true") + + args = parser.parse_args() + + if args.debug: + logger.setLevel(logging.DEBUG) + + if args.log_file: + formatter = logging.Formatter(logging_format) + handler = logging.handlers.RotatingFileHandler( + args.log_file, maxBytes=10 * 1000000, backupCount=10 + ) + handler.setFormatter(formatter) + logger.addHandler(handler) + + # Verify the configuration file and load up important global variables try: - # check for single operation - singleton.SingleInstance() + verify_config(args.config_file, args.output_file) except Exception as err: - logger.error( - f"Only one instance of this script is allowed to run at a time: {err}" + logger.error(f"Unable to continue due to a configuration problem: {err}") + sys.exit(1) + + if args.validate_yara_rules: + logger.info( + "Validating yara rules in directory: {0}".format(globals.g_yara_rules_dir) ) + yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) + try: + yara.compile(filepaths=yara_rule_map) + logger.info("All yara rules compiled successfully") + except Exception as err: + logger.error(f"There were errors compiling yara rules: {err}") + logger.error(traceback.format_exc()) else: - parser = argparse.ArgumentParser(description="Yara Agent for Yara Connector") - parser.add_argument( - "--config-file", - required=True, - default="yara_agent.conf", - help="Location of the config file", - ) - parser.add_argument( - "--log-file", default="yara_agent.log", help="Log file output" - ) - parser.add_argument( - "--output-file", default="yara_feed.json", help="output feed file" - ) - parser.add_argument( - "--validate-yara-rules", - action="store_true", - help="ONLY validate yara rules in a specified directory", - ) - parser.add_argument("--debug", action="store_true") - args = parser.parse_args() + EXIT_EVENT = Event() + + try: + + working_dir = args.working_dir + + lock_file = lockfile.FileLock(args.lock_file) - if args.debug: - logger.setLevel(logging.DEBUG) + files_preserve = getLogFileHandles(logger) + files_preserve.extend([args.lock_file, args.log_file, args.output_file]) - if args.log_file: - formatter = logging.Formatter(logging_format) - handler = logging.handlers.RotatingFileHandler( - args.log_file, maxBytes=10 * 1000000, backupCount=10 + # defauls to piping to /dev/null + context = daemon.DaemonContext( + working_directory=working_dir, + pidfile=lock_file, + files_preserve=files_preserve, ) - handler.setFormatter(formatter) - logger.addHandler(handler) - # Verify the configuration file and load up important global variables - try: - verify_config(args.config_file, args.output_file) + run_as_master = globals.g_mode == "master" + + scanning_promise_queue = Queue() + scanning_results_queue = Queue() + + sig_handler = partial(handle_sig, EXIT_EVENT) + + context.signal_map = { + signal.SIGTERM: sig_handler, + signal.SIGQUIT: sig_handler, + } + + with context: + # only connect to cbr if we're the master + if run_as_master: + init_local_resources() + start_workers( + EXIT_EVENT, scanning_promise_queue, scanning_results_queue + ) + # start local celery if working mode is local + if not globals.g_remote: + start_celery_worker_thread(args.config_file) + else: + # otherwise, we must start a worker since we are not the master + start_celery_worker_thread(args.config_file) + + # run until the service/daemon gets a quitting sig + run_to_exit_signal(EXIT_EVENT) + wait_all_worker_exit() + logger.Info("Yara connector shutdown OK") + + except KeyboardInterrupt: + logger.info("\n\n##### Interupted by User!\n") + EXIT_EVENT.set() + sys.exit(2) except Exception as err: - logger.error(f"Unable to continue due to a configuration problem: {err}") + logger.error(f"There were errors executing yara rules: {err}") + logger.error(traceback.format_exc()) + EXIT_EVENT.set() sys.exit(1) - if args.validate_yara_rules: - logger.info( - "Validating yara rules in directory: {0}".format( - globals.g_yara_rules_dir - ) + +def getLogFileHandles(logger): + """ Get a list of filehandle numbers from logger + to be handed to DaemonContext.files_preserve + """ + handles = [] + for handler in logger.handlers: + handles.append(handler.stream.fileno()) + if logger.parent: + handles += getLogFileHandles(logger.parent) + return handles + + +def handle_sig(exit_event, sig, frame): + """ + Signal handler - handle the signal and mark exit if its an exiting signal + """ + exit_sigs = (signal.SIGTERM, signal.SIGQUIT, signal.SIGKILL) + if sig in exit_sigs: + exit_event.set() + logger.debug("Sig handler set exit event") + + +# +# wait until the exit_event has been set by the signal handler +# +def run_to_exit_signal(exit_event): + exit_event.wait() + logger.debug("Begin graceful shutdown...") + + +def init_local_resources(): + """ + Initialize the local resources required to get module information + from cbr module store as well as local storage of module and scanning + metadata in sqlite 'binary.db' - generate an initial fead from the + database + + generate yara_rule_set metadata + """ + globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) + generate_yara_rule_map_hash(globals.g_yara_rules_dir) + database = SqliteDatabase(os.path.join(globals.g_feed_database_path, "binary.db")) + db.initialize(database) + db.connect() + db.create_tables([BinaryDetonationResult]) + generate_feed_from_db() + + +def wait_all_worker_exit(): + """ Await the exit of our worker threads """ + threadcount = 2 + while threadcount > 1: + threads = list( + filter( + lambda running_thread: not running_thread.daemon + if hasattr(running_thread, "daemon") + else True, + threading.enumerate(), ) - yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - try: - yara.compile(filepaths=yara_rule_map) - logger.info("All yara rules compiled successfully") - except Exception as err: - logger.error(f"There were errors compiling yara rules: {err}") - logger.error(traceback.format_exc()) - else: - try: - globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database = SqliteDatabase( - os.path.join(globals.g_feed_database_path, "binary.db") - ) - db.initialize(database) - db.connect() - db.create_tables([BinaryDetonationResult]) - generate_feed_from_db() - perform(globals.g_yara_rules_dir) - except KeyboardInterrupt: - logger.info("\n\n##### Interupted by User!\n") - sys.exit(2) - except Exception as err: - logger.error(f"There were errors executing yara rules: {err}") - logger.error(traceback.format_exc()) - sys.exit(1) + ) + threadcount = len(threads) + logger.debug( + f"Main thread Waiting on {threadcount} live worker-threads (exluding deamons)..." + ) + logger.debug(f"Live threads (excluding daemons): {threads}") + time.sleep(0.1) + pass + + logger.debug("Main thread going to exit...") + + +# starts worker-threads (not celery workers) +# worker threads do work until they get the exit_event signal +def start_workers(exit_event, scanning_promises_queue, scanning_results_queue): + logger.debug("Starting perf thread") + + perf_thread = DatabaseScanningThread( + globals.g_scanning_interval, scanning_promises_queue, exit_event + ) + perf_thread.start() + + logger.debug("Starting promise thread(s)") + + for _ in range(2): + promise_worker_thread = Thread( + target=promise_worker, + args=(exit_event, scanning_promises_queue, scanning_results_queue), + ) + promise_worker_thread.start() + + logger.debug("Starting results saver thread") + results_worker_thread = Thread( + target=results_worker_chunked, args=(exit_event, scanning_results_queue) + ) + + results_worker_thread.start() + + +class DatabaseScanningThread(Thread): + + """ + A worker thread that scans over the database for new hashes ever INTERVAL seconds + Pushes work to scanning_promises_queue , exits when the event is triggered + by the signal handler + """ + + def __init__(self, interval, scanning_promises_queue, exit_event, *args, **kwargs): + super().__init__(*args, **kwargs) + self._args = args + self._kwargs = kwargs + self.exit_event = exit_event + self._conn = get_database_conn() + self._interval = interval + self._scanning_promises_queue = scanning_promises_queue + self._target = self.scan_until_exit + + def scan_until_exit(self): + # TODO DRIFT + self.do_db_scan() + while not self.exit_event.is_set(): + self.exit_event.wait(timeout=self._interval) + if self.exit_event.is_set(): + break + else: + self.do_db_scan() + logger.Debug("Database Scanning Thread told to exit") + return + + def do_db_scan(self): + logger.Debug("START database sweep") + try: + perform(globals.g_yara_rules_dir, self._conn, self._scanning_promises_queue) + except Exception as e: + logger.error(f"Something went wrong sweeping the CbR module store...{str(e)} \n {traceback.format_exc()}") + + def run(self): + """ Represents the lifetime of the thread """ + + try: + if self._target: + self._target(*self._args, **self._kwargs) + finally: + # Avoid a refcycle if the thread is running a function with + # an argument that has a member that points to the thread. + # shutdown database connection + self._conn.close() + del self._target, self._args, self._kwargs + logger.debug("Database scanning Thread Exiting gracefully") + + +# Start celery worker in a daemon-thread +# TODO - Aggresive autoscaling config options +def start_celery_worker_thread(config_file): + t = Thread(target=launch_celery_worker, kwargs={"config_file": config_file}) + t.daemon = True + t.start() + + +# launch a celery worker using the imported app context +def launch_celery_worker(config_file=None): + localworker = worker.worker(app=app) + localworker.run(config_file=config_file) + logger.debug("CELERY WORKER LAUNCHING THREAD EXITED") if __name__ == "__main__": diff --git a/singleton.py b/src/singleton.py similarity index 100% rename from singleton.py rename to src/singleton.py diff --git a/src/tasks.py b/src/tasks.py new file mode 100644 index 0000000..c771389 --- /dev/null +++ b/src/tasks.py @@ -0,0 +1,372 @@ +import configparser +import datetime +import hashlib +import logging +import os +import traceback +from typing import List + +# noinspection PyPackageRequirements +import yara +import requests +import io +import zipfile +from celery import bootsteps, Celery, group +from celery.result import ResultSet + +import globals +from analysis_result import AnalysisResult +from exceptions import CbInvalidConfig +from utilities import placehold +import multiprocessing + +app = Celery() +# noinspection PyUnusedName +app.conf.task_serializer = "pickle" +# noinspection PyUnusedName +app.conf.result_serializer = "pickle" +# noinspection PyUnusedName +app.conf.accept_content = {"pickle"} + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +class ReadWriteLock: + """ A lock object that allows many simultaneous "read locks", but + only one "write lock." """ + + def __init__(self): + self._read_ready = multiprocessing.Condition(multiprocessing.Lock()) + self._readers = 0 + + def acquire_read(self): + """ Acquire a read lock. Blocks only if a thread has + acquired the write lock. """ + self._read_ready.acquire() + try: + self._readers += 1 + finally: + self._read_ready.release() + + def release_read(self): + """ Release a read lock. """ + self._read_ready.acquire() + try: + self._readers -= 1 + if not self._readers: + self._read_ready.notify_all() + finally: + self._read_ready.release() + + def acquire_write(self): + """ Acquire a write lock. Blocks until there are no + acquired read or write locks. """ + self._read_ready.acquire() + while self._readers > 0: + self._read_ready.wait() + + def release_write(self): + """ Release a write lock. """ + self._read_ready.release() + + +compiled_yara_rules = None +compiled_rules_lock = ReadWriteLock() + + +# noinspection DuplicatedCode +def verify_config(config_file: str) -> None: + """ + Read and validate the current config file. + + NOTE: Replicates, to a smaller degree, the function in main.py; it is presumed that more detailed checks are there + :param config_file: path to the config file + """ + abs_config = os.path.abspath(os.path.expanduser(placehold(config_file))) + header = f"Config file '{abs_config}'" + + config = configparser.ConfigParser() + if not os.path.exists(config_file): + raise CbInvalidConfig(f"{header} does not exist!") + + try: + config.read(config_file) + except Exception as err: + raise CbInvalidConfig(err) + + logger.debug(f"NOTE: using config file '{abs_config}'") + if not config.has_section("general"): + raise CbInvalidConfig(f"{header} does not have a 'general' section") + + the_config = config["general"] + + if "yara_rules_dir" in the_config and the_config["yara_rules_dir"].strip() != "": + check = os.path.abspath( + os.path.expanduser(placehold(the_config["yara_rules_dir"])) + ) + if os.path.exists(check): + if os.path.isdir(check): + globals.g_yara_rules_dir = check + else: + raise CbInvalidConfig( + f"{header} specified 'yara_rules_dir' ({check}) is not a directory" + ) + else: + raise CbInvalidConfig( + f"{header} specified 'yara_rules_dir' ({check}) does not exist" + ) + else: + raise CbInvalidConfig(f"{header} has no 'yara_rules_dir' definition") + + + if "worker_network_timeout" in the_config: + globals.g_worker_network_timeout = int(the_config['worker_network_timeout']) + + if "worker_type" in the_config: + if ( + the_config["worker_type"] == "local" + ): + remote = False + elif the_config["worker_type"] == "remote": + remote = True + else: # anything else + raise CbInvalidConfig( + f"{header} has an invalid 'worker_type' ({the_config['worker_type']})" + ) + else: + remote = False + + # local/remote configuration data + if not remote: + if "cb_server_url" in the_config and the_config["cb_server_url"].strip() != "": + globals.g_cb_server_url = the_config["cb_server_url"] + else: + raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_url'") + if ( + "cb_server_token" in the_config + and the_config["cb_server_token"].strip() != "" + ): + globals.g_cb_server_token = the_config["cb_server_token"] + else: + raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_token'") + + if "broker_url" in the_config and the_config["broker_url"].strip() != "": + app.conf.update(broker_url=the_config["broker_url"], results_backend=the_config['broker_url']) + elif remote: + raise CbInvalidConfig(f"{header} is 'remote' and missing 'broker_url'") + + +def add_worker_arguments(parser): + parser.add_argument( + "--config-file", default="yara_worker.conf", help="Yara Worker Config" + ) + + +app.user_options["worker"].add(add_worker_arguments) + + +class MyBootstep(bootsteps.Step): + + # noinspection PyUnusedLocal + def __init__(self, worker, config_file="yara_worker.conf", **options): + super().__init__(self) + print(options) + verify_config(config_file) + + # g_yara_rules_dir = yara_rules_dir + + +app.steps["worker"].add(MyBootstep) + + +def generate_rule_map(yara_rule_path: str) -> dict: + """ + Create a dictionary keyed by filename containing file paths + :param yara_rule_path: location of yara rules + :return: + """ + rule_map = {} + for fn in os.listdir(yara_rule_path): + if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): + fullpath = os.path.join(yara_rule_path, fn) + if not os.path.isfile(fullpath): + continue + + last_dot = fn.rfind(".") + if last_dot != -1: + namespace = fn[:last_dot] + else: + namespace = fn + rule_map[namespace] = fullpath + + return rule_map + + +# noinspection DuplicatedCode +def generate_yara_rule_map_hash(yara_rule_path: str) -> List: + """ + Create a list of md5 hashes based on rule file contents. + + :param yara_rule_path: location of the yara rules + :return: + """ + temp_list = [] + for fn in os.listdir(yara_rule_path): + if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): + fullpath = os.path.join(yara_rule_path, fn) + if not os.path.isfile(fullpath): + continue + with open(os.path.join(yara_rule_path, fn), "rb") as fp: + data = fp.read() + # NOTE: Original logic resulted in a cumulative hash for each file (linking them) + md5 = hashlib.md5() + md5.update(data) + temp_list.append(str(md5.hexdigest())) + + temp_list.sort() + return temp_list + + +@app.task +def update_yara_rules_remote(yara_rules: dict) -> None: + """ + Update remote yara rules. + :param yara_rules: dict of rules, keyed by file name + :return: + """ + try: + for key in yara_rules: + with open(os.path.join(globals.g_yara_rules_dir, key), "wb") as fp: + fp.write(yara_rules[key]) + except Exception as err: + logger.error(f"Error writing rule file: {err}") + logger.error(traceback.format_exc()) + + +def update_yara_rules(): + global compiled_yara_rules + global compiled_rules_lock + compiled_rules_lock.acquire_read() + if compiled_yara_rules: + logger.debug("Reading the Compiled rules") + return + else: + logger.debug("Updating yara rules in worker(s)") + yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) + new_rules_object = yara.compile(filepaths=yara_rule_map) + compiled_rules_lock.release_read() + compiled_rules_lock.acquire_write() + compiled_yara_rules = new_rules_object + logger.debug("Succesfully updated yara rules") + compiled_rules_lock.release_write() + compiled_rules_lock.acquire_read() + return + + +def get_binary_by_hash(url, hsum, token): + """ + do a binary-retrival-by hash (husm) api call against + the configured server-by (url) using (token) + """ + headers = {"X-Auth-Token": token} + request_url = f"{url}/api/v1/binary/{hsum}" + response = requests.get(request_url, headers=headers, stream=True, verify=False, timeout=globals.g_worker_network_timeout) + if response: + with zipfile.ZipFile(io.BytesIO(response.content)) as the_binary_zip: + #the response contains the file ziped in 'filedata' + fp = the_binary_zip.open("filedata") + the_binary_zip.close() + return fp + else: + #otherwise return None which will be interpreted correctly in analyze_binary as haven failed to lookup the hash + return None + + +@app.task +def analyze_bins(hashes): + return group(analyze_binary.s(h) for h in hashes).apply_async() + + +@app.task +def analyze_binary(md5sum: str) -> AnalysisResult: + """ + Analyze binary information. + :param md5sum: md5 binary to check + :return: AnalysisResult instance + """ + global compiled_yara_rules + global compiled_rules_lock + + logger.debug(f"{md5sum}: in analyze_binary") + analysis_result = AnalysisResult(md5sum) + + try: + analysis_result.last_scan_date = datetime.datetime.now() + + binary_data = get_binary_by_hash( + globals.g_cb_server_url, md5sum.upper(), globals.g_cb_server_token + ) + + if not binary_data: + logger.debug(f"No binary agailable for {md5sum}") + analysis_result.binary_not_available = True + return analysis_result + + try: + # matches = "debug" + update_yara_rules() + matches = compiled_yara_rules.match(data=binary_data.read(), timeout=30) + if matches: + score = get_high_score(matches) + analysis_result.score = score + analysis_result.short_result = "Matched yara rules: %s" % ", ".join( + [match.rule for match in matches] + ) + # analysis_result.short_result = "Matched yara rules: debug" + analysis_result.long_result = analysis_result.long_result + analysis_result.misc = generate_yara_rule_map_hash( + globals.g_yara_rules_dir + ) + else: + analysis_result.score = 0 + analysis_result.short_result = "No Matches" + except yara.TimeoutError: + # yara timed out + analysis_result.last_error_msg = "Analysis timed out after 60 seconds" + analysis_result.stop_future_scans = True + except yara.Error as err: + # Yara errored while trying to scan binary + analysis_result.last_error_msg = f"Yara exception: {err}" + except Exception as err: + analysis_result.last_error_msg = ( + f"Other exception while matching rules: {err}\n" + + traceback.format_exc() + ) + finally: + compiled_rules_lock.release_read() + binary_data.close() + return analysis_result + except Exception as err: + error = f"Unexpected error: {err}\n" + traceback.format_exc() + logger.error(error) + analysis_result.last_error_msg = error + return analysis_result + + +def get_high_score(matches) -> int: + """ + Find the higest match score. + + NOTE: if str(matches) == "debug", return 100 + :param matches: List of rule matches. + :return: + """ + score = 0 + for match in matches: + if match.meta.get("score", 0) > score: + score = match.meta.get("score") + if score == 0: + return 100 + else: + return score diff --git a/utilities.py b/src/utilities.py similarity index 100% rename from utilities.py rename to src/utilities.py diff --git a/tasks.py b/tasks.py deleted file mode 100644 index f5b54a2..0000000 --- a/tasks.py +++ /dev/null @@ -1,263 +0,0 @@ -import configparser -import datetime -import hashlib -import logging -import os -import traceback -from typing import List - -# noinspection PyPackageRequirements -import yara -from cbapi.response.models import Binary -from cbapi.response.rest_api import CbResponseAPI -from celery import bootsteps, Celery - -import globals -from analysis_result import AnalysisResult -from exceptions import CbInvalidConfig -from utilities import placehold - -app = Celery() -# noinspection PyUnusedName -app.conf.task_serializer = "pickle" -# noinspection PyUnusedName -app.conf.result_serializer = "pickle" -# noinspection PyUnusedName -app.conf.accept_content = {"pickle"} - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -# noinspection DuplicatedCode -def verify_config(config_file: str) -> None: - """ - Read and validate the current config file. - - NOTE: Replicates, to a smaller degree, the function in main.py; it is presumed that more detailed checks are there - :param config_file: path to the config file - """ - abs_config = os.path.abspath(os.path.expanduser(placehold(config_file))) - header = f"Config file '{abs_config}'" - - config = configparser.ConfigParser() - if not os.path.exists(config_file): - raise CbInvalidConfig(f"{header} does not exist!") - - try: - config.read(config_file) - except Exception as err: - raise CbInvalidConfig(err) - - logger.debug(f"NOTE: using config file '{abs_config}'") - if not config.has_section('general'): - raise CbInvalidConfig(f"{header} does not have a 'general' section") - - the_config = config['general'] - - if 'yara_rules_dir' in the_config and the_config['yara_rules_dir'].strip() != "": - check = os.path.abspath(os.path.expanduser(placehold(the_config["yara_rules_dir"]))) - if os.path.exists(check): - if os.path.isdir(check): - globals.g_yara_rules_dir = check - else: - raise CbInvalidConfig(f"{header} specified 'yara_rules_dir' ({check}) is not a directory") - else: - raise CbInvalidConfig(f"{header} specified 'yara_rules_dir' ({check}) does not exist") - else: - raise CbInvalidConfig(f"{header} has no 'yara_rules_dir' definition") - - if 'worker_type' in the_config: - if the_config['worker_type'] == 'local' or the_config['worker_type'].strip() == "": - remote = False - elif the_config['worker_type'] == 'remote': - remote = True - else: # anything else - raise CbInvalidConfig(f"{header} has an invalid 'worker_type' ({the_config['worker_type']})") - else: - remote = False - - # local/remote configuration data - if not remote: - if 'cb_server_url' in the_config and the_config['cb_server_url'].strip() != "": - globals.g_cb_server_url = the_config['cb_server_url'] - else: - raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_url'") - if 'cb_server_token' in the_config and the_config['cb_server_token'].strip() != "": - globals.g_cb_server_token = the_config['cb_server_token'] - else: - raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_token'") - else: - if 'broker_url' in the_config and the_config['broker_url'].strip() != "": - app.conf.update(broker_url=the_config['broker_url'], result_backend=the_config['broker_url']) - else: - raise CbInvalidConfig(f"{header} is 'remote' and missing 'broker_url'") - - -def add_worker_arguments(parser): - parser.add_argument('--config-file', default='yara_worker.conf', help='Yara Worker Config') - - -app.user_options['worker'].add(add_worker_arguments) - - -class MyBootstep(bootsteps.Step): - - # noinspection PyUnusedLocal - def __init__(self, worker, config_file='yara_worker.conf', **options): - super().__init__(self) - verify_config(config_file) - - # g_yara_rules_dir = yara_rules_dir - - -app.steps['worker'].add(MyBootstep) - - -def generate_rule_map(yara_rule_path: str) -> dict: - """ - Create a dictionary keyed by filename containing file paths - :param yara_rule_path: location of yara rules - :return: - """ - rule_map = {} - for fn in os.listdir(yara_rule_path): - if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): - fullpath = os.path.join(yara_rule_path, fn) - if not os.path.isfile(fullpath): - continue - - last_dot = fn.rfind('.') - if last_dot != -1: - namespace = fn[:last_dot] - else: - namespace = fn - rule_map[namespace] = fullpath - - return rule_map - - -# noinspection DuplicatedCode -def generate_yara_rule_map_hash(yara_rule_path: str) -> List: - """ - Create a list of md5 hashes based on rule file contents. - - :param yara_rule_path: location of the yara rules - :return: - """ - temp_list = [] - for fn in os.listdir(yara_rule_path): - if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): - fullpath = os.path.join(yara_rule_path, fn) - if not os.path.isfile(fullpath): - continue - with open(os.path.join(yara_rule_path, fn), 'rb') as fp: - data = fp.read() - # NOTE: Original logic resulted in a cumulative hash for each file (linking them) - md5 = hashlib.md5() - md5.update(data) - temp_list.append(str(md5.hexdigest())) - - temp_list.sort() - return temp_list - - -@app.task -def update_yara_rules_remote(yara_rules: dict) -> None: - """ - Update remote yara rules. - :param yara_rules: dict of rules, keyed by file name - :return: - """ - try: - for key in yara_rules: - with open(os.path.join(globals.g_yara_rules_dir, key), 'wb') as fp: - fp.write(yara_rules[key]) - except Exception as err: - logger.error(f"Error writing rule file: {err}") - logger.error(traceback.format_exc()) - - -@app.task -def analyze_binary(md5sum: str) -> AnalysisResult: - """ - Analyze binary information. - :param md5sum: md5 binary to check - :return: AnalysisResult instance - """ - logger.debug(f"{md5sum}: in analyze_binary") - analysis_result = AnalysisResult(md5sum) - - try: - analysis_result.last_scan_date = datetime.datetime.now() - - cb = CbResponseAPI(url=globals.g_cb_server_url, - token=globals.g_cb_server_token, - ssl_verify=False, - timeout=5) - - binary_query = cb.select(Binary).where(f"md5:{md5sum}") - - if binary_query: - try: - binary_data = binary_query[0].file.read() - except Exception as err: - logger.debug(f"No binary agailable for {md5sum}: {err}") - analysis_result.binary_not_available = True - return analysis_result - - yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - yara_rules = yara.compile(filepaths=yara_rule_map) - - try: - # matches = "debug" - matches = yara_rules.match(data=binary_data, timeout=30) - except yara.TimeoutError: - # yara timed out - analysis_result.last_error_msg = "Analysis timed out after 60 seconds" - analysis_result.stop_future_scans = True - except yara.Error as err: - # Yara errored while trying to scan binary - analysis_result.last_error_msg = f"Yara exception: {err}" - except Exception as err: - analysis_result.last_error_msg = f"Other exception while matching rules: {err}\n" + \ - traceback.format_exc() - else: - if matches: - score = get_high_score(matches) - analysis_result.score = score - analysis_result.short_result = "Matched yara rules: %s" % ', '.join( - [match.rule for match in matches]) - # analysis_result.short_result = "Matched yara rules: debug" - analysis_result.long_result = analysis_result.long_result - analysis_result.misc = generate_yara_rule_map_hash(globals.g_yara_rules_dir) - else: - analysis_result.score = 0 - analysis_result.short_result = "No Matches" - - else: - analysis_result.binary_not_available = True - return analysis_result - except Exception as err: - error = f"Unexpected error: {err}\n" + traceback.format_exc() - logger.error(error) - analysis_result.last_error_msg = error - return analysis_result - - -def get_high_score(matches) -> int: - """ - Find the higest match score. - - NOTE: if str(matches) == "debug", return 100 - :param matches: List of rule matches. - :return: - """ - score = 0 - for match in matches: - if match.meta.get('score', 0) > score: - score = match.meta.get('score') - if score == 0: - return 100 - else: - return score From 9b9144f67e87471cb626637b6c862bc14cf51522 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Sat, 2 Nov 2019 16:45:44 -0400 Subject: [PATCH 100/257] Fixup logging messages --- src/main.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main.py b/src/main.py index 11a7762..5af149c 100644 --- a/src/main.py +++ b/src/main.py @@ -710,7 +710,7 @@ def main(): # run until the service/daemon gets a quitting sig run_to_exit_signal(EXIT_EVENT) wait_all_worker_exit() - logger.Info("Yara connector shutdown OK") + logger.info("Yara connector shutdown OK") except KeyboardInterrupt: logger.info("\n\n##### Interupted by User!\n") @@ -848,11 +848,11 @@ def scan_until_exit(self): break else: self.do_db_scan() - logger.Debug("Database Scanning Thread told to exit") + logger.debug("Database Scanning Thread told to exit") return def do_db_scan(self): - logger.Debug("START database sweep") + logger.debug("START database sweep") try: perform(globals.g_yara_rules_dir, self._conn, self._scanning_promises_queue) except Exception as e: @@ -871,6 +871,7 @@ def run(self): self._conn.close() del self._target, self._args, self._kwargs logger.debug("Database scanning Thread Exiting gracefully") + self.exit_event.set() # Start celery worker in a daemon-thread From 7c7f3c5c249bb7b223d0fa54749e6a246b9116b2 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Sat, 2 Nov 2019 21:11:22 -0400 Subject: [PATCH 101/257] Updates for large result sets --- src/main.py | 70 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/src/main.py b/src/main.py index 5af149c..db96dbc 100644 --- a/src/main.py +++ b/src/main.py @@ -80,7 +80,7 @@ def results_worker(exit_event, results_queue): if not (results_queue.empty()): try: result = results_queue.get() - save_result(result) + save_results_with_logging(result) except Empty: exit_event.wait(1) else: @@ -313,21 +313,50 @@ def perform(yara_rule_dir, conn, scanning_promises_queue): cur = get_binary_file_cursor(conn, start_date_binaries) - rows = cur.fetchall() - - conn.commit() + rows = cur.fetchmany() num_total_binaries = len(rows) - logger.info( - f"Enumerating modulestore...found {num_total_binaries} resident binaries" - ) + while num_total_binaries > 0: + + logger.info( + f"Enumerating modulestore...found {num_total_binaries} resident binaries" + ) + + md5_hashes = filter(_check_hash_against_feed, (row[0].hex() for row in rows)) - md5_hashes = filter(_check_hash_against_feed, (row[0].hex() for row in rows)) + logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") - analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) + analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) + + elapsed_time = datetime.now() - start_datetime + + """ + Holding the named-cursor through a large historical result set + will cause storefiles table fragmentation + After a configurable amount of time - use the configured + script to vacuum the table by hand before continuing + """ + + if elapsed_time > globals.g_vacuum_seconds and globals.g_vacuum_seconds > 0: + # Make sure the cursor is closed, and we are commited() + # to release SHARED access to the table + cur.close() + conn.commit() + # execute the configured script + execute_script() + # restore cursor + cur = get_binary_file_cursor(conn, start_date_binaries) + + rows = cur.fetchmany() + + num_total_binaries = len(rows) + + cur.close() + + conn.commit() - logger.debug("Exit PERFORM") + logger.debug("Exiting database sweep routine") def _check_hash_against_feed(md5_hash): @@ -342,6 +371,21 @@ def _check_hash_against_feed(md5_hash): return True +def save_results_with_logging(analysis_results): + logger.debug(analysis_results) + if analysis_results: + for analysis_result in analysis_results: + logger.debug( + ( + f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" + f" {analysis_result.long_result} {analysis_result.last_error_msg}" + ) + ) + if analysis_result.last_error_msg: + logger.error(analysis_result.last_error_msg) + save_results(analysis_results) + + def save_and_log( analysis_results, start_time, num_binaries_skipped, num_total_binaries ): @@ -619,7 +663,7 @@ def main(): ) parser.add_argument( - "--lock-file", default="./yaraconnector.lock", help="lock file", required=False + "--lock-file", default="./yaraconnector", help="lock file", required=False ) parser.add_argument( @@ -856,7 +900,9 @@ def do_db_scan(self): try: perform(globals.g_yara_rules_dir, self._conn, self._scanning_promises_queue) except Exception as e: - logger.error(f"Something went wrong sweeping the CbR module store...{str(e)} \n {traceback.format_exc()}") + logger.error( + f"Something went wrong sweeping the CbR module store...{str(e)} \n {traceback.format_exc()}" + ) def run(self): """ Represents the lifetime of the thread """ From a1f302074e9300b0998c303661045fd0091b8590 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Sat, 2 Nov 2019 21:25:44 -0400 Subject: [PATCH 102/257] Fixup --- src/main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index db96dbc..e88a80f 100644 --- a/src/main.py +++ b/src/main.py @@ -325,11 +325,11 @@ def perform(yara_rule_dir, conn, scanning_promises_queue): md5_hashes = filter(_check_hash_against_feed, (row[0].hex() for row in rows)) - logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") + #logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) - elapsed_time = datetime.now() - start_datetime + elapsed_time = (datetime.now() - start_datetime).total_seconds() """ Holding the named-cursor through a large historical result set @@ -345,6 +345,8 @@ def perform(yara_rule_dir, conn, scanning_promises_queue): conn.commit() # execute the configured script execute_script() + + start_datetime = datetime.now() # restore cursor cur = get_binary_file_cursor(conn, start_date_binaries) From 02f24c9ba62660854bc4ac1f19ffadd595fc6bd8 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Sat, 2 Nov 2019 21:48:24 -0400 Subject: [PATCH 103/257] updates to workers --- src/main.py | 69 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/src/main.py b/src/main.py index e88a80f..4889ca1 100644 --- a/src/main.py +++ b/src/main.py @@ -55,16 +55,19 @@ def promise_worker(exit_event, scanning_promise_queue, scanning_results_queue): - while not (exit_event.is_set()): - if not (scanning_promise_queue.empty()): - try: - promise = scanning_promise_queue.get(timeout=1.0) - result = promise.get(disable_sync_subtasks=False) - scanning_results_queue.put(result) - except Empty: + try: + while not (exit_event.is_set()): + if not (scanning_promise_queue.empty()): + try: + promise = scanning_promise_queue.get(timeout=1.0) + result = promise.get(disable_sync_subtasks=False) + scanning_results_queue.put(result) + except Empty: + exit_event.wait(1) + else: exit_event.wait(1) - else: - exit_event.wait(1) + finally: + exit_event.set() logger.debug("PROMISE WORKING EXITING") @@ -76,29 +79,35 @@ def promise_worker(exit_event, scanning_promise_queue, scanning_results_queue): def results_worker(exit_event, results_queue): - while not (exit_event.is_set()): - if not (results_queue.empty()): - try: - result = results_queue.get() - save_results_with_logging(result) - except Empty: + try: + while not (exit_event.is_set()): + if not (results_queue.empty()): + try: + result = results_queue.get() + save_results_with_logging(result) + except Empty: + exit_event.wait(1) + else: exit_event.wait(1) - else: - exit_event.wait(1) + finally: + exit_event.set() logger.debug("Results worker thread exiting") def results_worker_chunked(exit_event, results_queue): - while not (exit_event.is_set()): - if not (results_queue.empty()): - try: - results = results_queue.get() - save_results(results) - except Empty: + try: + while not (exit_event.is_set()): + if not (results_queue.empty()): + try: + results = results_queue.get() + save_results(results) + except Empty: + exit_event.wait(1) + else: exit_event.wait(1) - else: - exit_event.wait(1) + finally: + exit_event.set() logger.debug("Results worker thread exiting") @@ -313,7 +322,7 @@ def perform(yara_rule_dir, conn, scanning_promises_queue): cur = get_binary_file_cursor(conn, start_date_binaries) - rows = cur.fetchmany() + rows = cur.fetchmany(2000) num_total_binaries = len(rows) @@ -325,7 +334,7 @@ def perform(yara_rule_dir, conn, scanning_promises_queue): md5_hashes = filter(_check_hash_against_feed, (row[0].hex() for row in rows)) - #logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") + # logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) @@ -345,15 +354,17 @@ def perform(yara_rule_dir, conn, scanning_promises_queue): conn.commit() # execute the configured script execute_script() - + # restore start for elapsed_time start_datetime = datetime.now() # restore cursor cur = get_binary_file_cursor(conn, start_date_binaries) - rows = cur.fetchmany() + rows = cur.fetchmany(2000) num_total_binaries = len(rows) + # Closing since there are no more binaries of interest to scan + cur.close() conn.commit() From 8dc9418e02954a5a5fb35b3387fddd4713b99223 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 4 Nov 2019 07:24:46 -0500 Subject: [PATCH 104/257] cleanup --- tasks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tasks.py b/tasks.py index 6d8e9e1..e724f69 100644 --- a/tasks.py +++ b/tasks.py @@ -82,7 +82,6 @@ def generate_yara_rule_map_hash(yara_rule_path: str) -> List: continue with open(os.path.join(yara_rule_path, fn), 'rb') as fp: data = fp.read() - # NOTE: Original logic resulted in a cumulative hash for each file (linking them) md5 = hashlib.md5() md5.update(data) temp_list.append(str(md5.hexdigest())) From 04bd5e8a9965eab356271602c11ad6c17788cb30 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 4 Nov 2019 08:29:11 -0500 Subject: [PATCH 105/257] requirements cleanup --- requirements.txt | 49 ++++++++++++++---------------------------------- 1 file changed, 14 insertions(+), 35 deletions(-) diff --git a/requirements.txt b/requirements.txt index f62086b..9e5f2e8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,35 +1,14 @@ -altgraph==0.16.1 -amqp==2.4.0 -attrdict==2.0.0 -billiard==3.5.0.5 -cachetools==3.0.0 -cbapi==1.3.6 -celery==4.2.1 -certifi==2018.11.29 -chardet==3.0.4 -humanfriendly==4.18 -idna==2.8 -kombu==4.2.2.post1 -macholib==1.11 -pbr==5.1.1 -peewee==3.8.1 -pefile==2018.8.8 -pika==0.12.0 -prompt-toolkit==2.0.7 -protobuf==3.6.1 -psycopg2==2.7.6.1 -psycopg2-binary==2.7.6.1 -Pygments==2.3.1 -PyInstaller==3.4 -python-dateutil==2.7.5 -pytz==2018.9 -PyYAML==3.13 -redis==3.0.1 -requests==2.21.0 -six==1.12.0 -solrq==1.1.1 -tendo==0.2.12 -urllib3==1.24.1 -vine==1.2.0 -wcwidth==0.1.7 -yara-python==3.8.1 +################################################################################ +# Package requirements +# All versions are latest viable at date of package version release +################################################################################ + +celery==4.3.0 # Mar 31, 2019 +humanfriendly==4.18 # Feb 21, 2019 +lockfile==0.12.2 # Nov 25, 2015 +peewee==3.11.2 # Sep 24, 2019 +psycopg2-binary==0.1.1 # Jul 16, 2018 +python-daemon==2.2.4 # Oct 27, 2019 +redis==3.3.11 # Oct 13, 2019 +requests==2.22.0 # May 16, 2019 +yara-python==3.11.0 # Oct 10, 2019 From 43096d6640f8c5044abfac76d88d609de742eeb9 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 4 Nov 2019 08:35:10 -0500 Subject: [PATCH 106/257] requirements cleanup --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9e5f2e8..7fa083a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ celery==4.3.0 # Mar 31, 2019 humanfriendly==4.18 # Feb 21, 2019 lockfile==0.12.2 # Nov 25, 2015 peewee==3.11.2 # Sep 24, 2019 -psycopg2-binary==0.1.1 # Jul 16, 2018 +psycopg2-binary==2.8.4 # Oct 20, 2019 python-daemon==2.2.4 # Oct 27, 2019 redis==3.3.11 # Oct 13, 2019 requests==2.22.0 # May 16, 2019 From e94b124e9fe2a6e657dfcf6bfda7441b599e478a Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 4 Nov 2019 08:52:22 -0500 Subject: [PATCH 107/257] save work --- src/analysis_result.py | 1 + config_handling.py => src/config_handling.py | 0 src/utilities.py | 2 +- 3 files changed, 2 insertions(+), 1 deletion(-) rename config_handling.py => src/config_handling.py (100%) diff --git a/src/analysis_result.py b/src/analysis_result.py index 2be695b..d66fe2e 100644 --- a/src/analysis_result.py +++ b/src/analysis_result.py @@ -1,5 +1,6 @@ # coding: utf-8 # Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + import json from datetime import datetime diff --git a/config_handling.py b/src/config_handling.py similarity index 100% rename from config_handling.py rename to src/config_handling.py diff --git a/src/utilities.py b/src/utilities.py index d64f60f..3a2b927 100644 --- a/src/utilities.py +++ b/src/utilities.py @@ -10,7 +10,7 @@ __all__ = ["YARAHOME", "placehold"] # self location for the package; remember to update this if this file is moved! -YARAHOME = os.path.dirname(__file__) +YARAHOME = os.path.dirname(os.path.dirname(__file__)) def placehold(source: str) -> str: From fc6c1cefe81ddd7a6a21dd145f941e217aced0df Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 4 Nov 2019 09:09:14 -0500 Subject: [PATCH 108/257] save work --- requirements.txt | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index e40e7b3..3d97721 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,15 @@ -celery -redis -humanfriendly -lockfile -peewee -psycopg2-binary -python-daemon -requests -yara-python +################################################################################ +# Package requirements +# All versions are latest viable at date of package version release +################################################################################ + +celery==4.3.0 # Mar 31, 2019 +humanfriendly==4.18 # Feb 21, 2019 +lockfile==0.12.2 # Nov 25, 2015 +peewee==3.11.2 # Sep 24, 2019 +psycopg2-binary==2.8.4 # Oct 20, 2019 +python-daemon==2.2.4 # Oct 27, 2019 +redis==3.3.11 # Oct 13, 2019 +requests==2.22.0 # May 16, 2019 +simplejson==3.16.0 # Jun 28, 2018 +yara-python==3.11.0 # Oct 10, 2019 From 82046240c87e3db48b455c06705a93c3fa251ef1 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 4 Nov 2019 09:40:03 -0500 Subject: [PATCH 109/257] save work --- .gitignore | 2 +- README.md | 8 +++---- config_handling.py | 13 ++++++------ globals.py | 8 +++---- main.py | 7 +++---- requirements.txt | 1 + test/config/bogus_feed_database_dir.conf | 5 ++++- test/config/missing_concurrent_hashes.conf | 3 +++ test/config/missing_disable_rescan.conf | 3 +++ test/config/missing_feed_database_dir.conf | 6 +++++- test/config/missing_niceness.conf | 3 +++ test/config/missing_num_days_binaries.conf | 3 +++ test/config/missing_postgres_db.conf | 3 +++ test/config/missing_postgres_db2.conf | 3 +++ test/config/missing_postgres_host.conf | 3 +++ test/config/missing_postgres_host2.conf | 3 +++ test/config/missing_postgres_port.conf | 3 +++ test/config/missing_postgres_port2.conf | 3 +++ test/config/missing_postgres_username.conf | 3 +++ test/config/missing_postgres_username2.conf | 3 +++ test/config/missing_worker.conf | 3 +++ test/config/missing_worker2.conf | 3 +++ test/config/negative_vacuum_interval.conf | 1 + test/config/vacuum_script_dir.conf | 3 ++- test/config/vacuum_script_enabled.conf | 2 +- test/config/valid.conf | 3 +++ test/config/valid2.conf | 3 +++ test/test_configInit.py | 15 +++++++------- utilities.py | 23 --------------------- 29 files changed, 86 insertions(+), 56 deletions(-) delete mode 100644 utilities.py diff --git a/.gitignore b/.gitignore index 8b11bd0..442ae8c 100644 --- a/.gitignore +++ b/.gitignore @@ -43,4 +43,4 @@ config.ini binary.db # local storage not to be pushed up -local +# local diff --git a/README.md b/README.md index 1c6b02a..a6807de 100644 --- a/README.md +++ b/README.md @@ -20,9 +20,7 @@ to your desired location. > NOTES: -> 1) The use of `{YARA}` is a placeholder representing the location of the yara package's `main.py` file, -> allowing for the use of relative paths to the package itself. -> 2) All paths can use `~` to access your home directory, so you can locate files there as well. +> 1) All paths can use `~/` to allow the use of the user's home directory. #### Running Yara Agent Manually @@ -140,7 +138,7 @@ _[TBD]_ ; Directory for temporary yara rules storage ; WARNING: Put your yara rules with the yara agent. This is just temporary storage. ; - yara_rules_dir={YARA}/local/yara_rules + yara_rules_dir=./yara_rules * Copy, modify and save to `yara_worker.conf` @@ -193,7 +191,7 @@ its work. ; script to do, and use this option at your own discretion. ; vacuum_interval=-1 -vacuum_script={YARA}/scripts/vacuumscript.sh +vacuum_script=./scripts/vacuumscript.sh ``` ## Yara Agent Build Instructions (Centos 6) diff --git a/config_handling.py b/config_handling.py index ac66600..558137c 100644 --- a/config_handling.py +++ b/config_handling.py @@ -10,7 +10,6 @@ import globals from exceptions import CbInvalidConfig -from utilities import placehold logger = logging.getLogger(__name__) @@ -44,7 +43,7 @@ def __init__(self, config_file: str, output_file: str = None) -> None: :param config_file: The config file to validate :param output_file: the output file; if not specified assume we are a task worker (simplified validation) """ - self.abs_config = os.path.abspath(os.path.expanduser(placehold(config_file))) + self.abs_config = os.path.abspath(os.path.expanduser(config_file)) self.source = f"Config file '{self.abs_config}'" config = configparser.ConfigParser() @@ -64,7 +63,7 @@ def __init__(self, config_file: str, output_file: str = None) -> None: self._worker_check() if output_file is not None: - globals.g_output_file = os.path.abspath(os.path.expanduser(placehold(output_file))) + globals.g_output_file = os.path.abspath(os.path.expanduser(output_file)) logger.debug(f"NOTE: output file will be '{globals.g_output_file}'") self._extended_check() @@ -156,9 +155,9 @@ def _as_str(self, param: str, required: bool = False, default: str = None) -> Op def _as_path(self, param: str, required: bool = False, exists: bool = True, is_dir: bool = False, default: str = None, create_if_needed: bool = False) -> Optional[str]: """ - Get an string parameter from the configuration and treat it as a path, performing normalization - to produce an absolute path. a "~" at the beginning will be treated as the current user's home - directory; the placeholder "{YARA}" will be treated as the location of your yara package directory. + Get a string parameter from the configuration and treat it as a path, performing normalization + to produce an absolute path. a "~/" at the beginning will be treated as the current user's home + directory. :param param: Name of the configuration parameter :param required: True if this must be specified in the configuration @@ -173,7 +172,7 @@ def _as_path(self, param: str, required: bool = False, exists: bool = True, is_d if value is None: return value - value = os.path.abspath(os.path.expanduser(placehold(value))) + value = os.path.abspath(os.path.expanduser(value)) if exists: if not os.path.exists(value): if create_if_needed and is_dir: diff --git a/globals.py b/globals.py index 6a7494a..80270e1 100644 --- a/globals.py +++ b/globals.py @@ -3,8 +3,6 @@ ################################################################################ # This module contains global variables used by a single instance. -# -# A placeholder of "{YARA}" represents the location of this yara package ################################################################################ g_config = {} @@ -19,7 +17,7 @@ # remote info g_broker_url = "" -g_yara_rules_dir = "{YARA}/local/yara_rules" +g_yara_rules_dir = "./yara_rules" g_yara_rule_map = {} g_yara_rule_map_hash_list = [] @@ -38,6 +36,6 @@ # the vacuum interval, if 1 or greater, is the number of minutes between invocations of the # configured vacuum script g_vacuum_interval = -1 -g_vacuum_script = "{YARA}/scripts/vacuumscript.sh" +g_vacuum_script = "../scripts/vacuumscript.sh" -g_feed_database_dir = "{YARA}/local/feed_db" +g_feed_database_dir = "./feed_db" diff --git a/main.py b/main.py index 1204ae5..98972ae 100644 --- a/main.py +++ b/main.py @@ -29,7 +29,6 @@ from exceptions import SingleInstanceException from feed import CbFeed, CbFeedInfo, CbReport from tasks import analyze_binary, generate_rule_map, update_yara_rules_remote -from utilities import placehold logging_format = "%(asctime)s-%(name)s-%(lineno)d-%(levelname)s-%(message)s" logging.basicConfig(format=logging_format) @@ -396,12 +395,12 @@ def handle_arguments(): ) parser.add_argument( "--log-file", - default="{YARA}/local/yara_agent.log", + default="./yara_agent.log", help="Log file output (defaults to `local` folder)" ) parser.add_argument( "--output-file", - default="{YARA}/local/yara_feed.json", + default="./yara_feed.json", help="output feed file (defaults to `local` folder)" ) parser.add_argument( @@ -438,7 +437,7 @@ def main(): logger.setLevel(logging.DEBUG) if args.log_file: - use_log_file = os.path.abspath(os.path.expanduser(placehold(args.log_file))) + use_log_file = os.path.abspath(os.path.expanduser(args.log_file)) formatter = logging.Formatter(logging_format) handler = logging.handlers.RotatingFileHandler(use_log_file, maxBytes=10 * 1000000, backupCount=10) handler.setFormatter(formatter) diff --git a/requirements.txt b/requirements.txt index 7fa083a..c72cbdf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ # All versions are latest viable at date of package version release ################################################################################ +cbapi==1.5.4 # Oct 24, 2019 celery==4.3.0 # Mar 31, 2019 humanfriendly==4.18 # Feb 21, 2019 lockfile==0.12.2 # Nov 25, 2015 diff --git a/test/config/bogus_feed_database_dir.conf b/test/config/bogus_feed_database_dir.conf index 1c69eae..0d8cef4 100644 --- a/test/config/bogus_feed_database_dir.conf +++ b/test/config/bogus_feed_database_dir.conf @@ -18,5 +18,8 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh + ; file, not dir -feed_database_dir={YARA}/README.md +feed_database_dir=./__init__.py diff --git a/test/config/missing_concurrent_hashes.conf b/test/config/missing_concurrent_hashes.conf index 5d87506..284d424 100644 --- a/test/config/missing_concurrent_hashes.conf +++ b/test/config/missing_concurrent_hashes.conf @@ -18,3 +18,6 @@ niceness=1 concurrent_hashes= disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_disable_rescan.conf b/test/config/missing_disable_rescan.conf index 4a5078d..887cfa4 100644 --- a/test/config/missing_disable_rescan.conf +++ b/test/config/missing_disable_rescan.conf @@ -18,3 +18,6 @@ concurrent_hashes=8 ; undefined disable_rescan= num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_feed_database_dir.conf b/test/config/missing_feed_database_dir.conf index 6fb4932..143bb87 100644 --- a/test/config/missing_feed_database_dir.conf +++ b/test/config/missing_feed_database_dir.conf @@ -18,4 +18,8 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -feed_database_dir={YARA}/local/no-such-directory +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh + +; invalid path +feed_database_dir=./no-such-directory diff --git a/test/config/missing_niceness.conf b/test/config/missing_niceness.conf index 36d1715..c220bd1 100644 --- a/test/config/missing_niceness.conf +++ b/test/config/missing_niceness.conf @@ -18,3 +18,6 @@ niceness= concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_num_days_binaries.conf b/test/config/missing_num_days_binaries.conf index 1cc21fa..5ba6694 100644 --- a/test/config/missing_num_days_binaries.conf +++ b/test/config/missing_num_days_binaries.conf @@ -18,3 +18,6 @@ concurrent_hashes=8 disable_rescan=False ; undefined num_days_binaries= + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_db.conf b/test/config/missing_postgres_db.conf index 04b0589..a6f6efa 100644 --- a/test/config/missing_postgres_db.conf +++ b/test/config/missing_postgres_db.conf @@ -17,3 +17,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_db2.conf b/test/config/missing_postgres_db2.conf index cd02280..4c22cb3 100644 --- a/test/config/missing_postgres_db2.conf +++ b/test/config/missing_postgres_db2.conf @@ -18,3 +18,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_host.conf b/test/config/missing_postgres_host.conf index cc4b323..60b2cd6 100644 --- a/test/config/missing_postgres_host.conf +++ b/test/config/missing_postgres_host.conf @@ -18,3 +18,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_host2.conf b/test/config/missing_postgres_host2.conf index 4581a39..23b8cb1 100644 --- a/test/config/missing_postgres_host2.conf +++ b/test/config/missing_postgres_host2.conf @@ -19,3 +19,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_port.conf b/test/config/missing_postgres_port.conf index 8b69a87..9b471ed 100644 --- a/test/config/missing_postgres_port.conf +++ b/test/config/missing_postgres_port.conf @@ -17,3 +17,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_port2.conf b/test/config/missing_postgres_port2.conf index e88c40f..fe0ab83 100644 --- a/test/config/missing_postgres_port2.conf +++ b/test/config/missing_postgres_port2.conf @@ -18,3 +18,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_username.conf b/test/config/missing_postgres_username.conf index f3b2a50..e809df5 100644 --- a/test/config/missing_postgres_username.conf +++ b/test/config/missing_postgres_username.conf @@ -17,3 +17,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_username2.conf b/test/config/missing_postgres_username2.conf index 12e121e..f3a51cf 100644 --- a/test/config/missing_postgres_username2.conf +++ b/test/config/missing_postgres_username2.conf @@ -18,3 +18,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_worker.conf b/test/config/missing_worker.conf index ed07cae..25d19e5 100644 --- a/test/config/missing_worker.conf +++ b/test/config/missing_worker.conf @@ -16,3 +16,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_worker2.conf b/test/config/missing_worker2.conf index 123fb8e..80a3132 100644 --- a/test/config/missing_worker2.conf +++ b/test/config/missing_worker2.conf @@ -17,3 +17,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/negative_vacuum_interval.conf b/test/config/negative_vacuum_interval.conf index 03d182a..0690263 100644 --- a/test/config/negative_vacuum_interval.conf +++ b/test/config/negative_vacuum_interval.conf @@ -19,3 +19,4 @@ disable_rescan=False num_days_binaries=365 vacuum_interval=-20 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/vacuum_script_dir.conf b/test/config/vacuum_script_dir.conf index 38e5704..393d058 100644 --- a/test/config/vacuum_script_dir.conf +++ b/test/config/vacuum_script_dir.conf @@ -19,4 +19,5 @@ disable_rescan=False num_days_binaries=365 vacuum_interval=360 -vacuum_script={YARA}/test/rules +; invalid, is dir +vacuum_script=./rules diff --git a/test/config/vacuum_script_enabled.conf b/test/config/vacuum_script_enabled.conf index 45d1f79..dcedd0f 100644 --- a/test/config/vacuum_script_enabled.conf +++ b/test/config/vacuum_script_enabled.conf @@ -19,4 +19,4 @@ disable_rescan=False num_days_binaries=365 vacuum_interval=360 -vacuum_script={YARA}/scripts/vacuumscript.sh +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/valid.conf b/test/config/valid.conf index cdbdea9..dcedd0f 100644 --- a/test/config/valid.conf +++ b/test/config/valid.conf @@ -17,3 +17,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/valid2.conf b/test/config/valid2.conf index 5907483..e5c748f 100644 --- a/test/config/valid2.conf +++ b/test/config/valid2.conf @@ -16,3 +16,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/test_configInit.py b/test/test_configInit.py index a2fa44d..acb11f7 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -7,7 +7,6 @@ import globals from config_handling import ConfigurationInit from exceptions import CbInvalidConfig -from utilities import placehold TESTS = os.path.abspath(os.path.dirname(__file__)) @@ -21,7 +20,7 @@ def setUp(self): globals.g_cb_server_url = 'https://127.0.0.1' globals.g_cb_server_token = '' globals.g_broker_url = '' - globals.g_yara_rules_dir = '{YARA}/local/yara_rules' + globals.g_yara_rules_dir = './yara_rules' globals.g_yara_rule_map = {} globals.g_yara_rule_map_hash_list = [] globals.g_postgres_host = '127.0.0.1' @@ -35,8 +34,8 @@ def setUp(self): globals.g_disable_rescan = True globals.g_num_days_binaries = 365 globals.g_vacuum_interval = -1 - globals.g_vacuum_script = '{YARA}/scripts/vacuumscript.sh' - globals.g_feed_database_dir = "{YARA}/local" + globals.g_vacuum_script = './scripts/vacuumscript.sh' + globals.g_feed_database_dir = "./feed_db" def test_01_missing_config(self): """ @@ -388,13 +387,15 @@ def test_20a_config_feed_database_dir_not_exists(self): """ Ensure that config with feed database directory that does not exist will create that directory. """ - path = os.path.abspath(placehold("{YARA}/local/no-such-directory")) - self.assertFalse(os.path.exists(path)) + path = os.path.abspath("./no-such-directory") + if os.path.exists(path): + os.rmdir(path) try: ConfigurationInit(os.path.join(TESTS, "config", "missing_feed_database_dir.conf"), "sample.json") self.assertTrue(os.path.exists(path)) finally: - os.rmdir(path) + if os.path.exists(path): + os.rmdir(path) def test_20b_config_feed_database_dir_not_directory(self): """ diff --git a/utilities.py b/utilities.py deleted file mode 100644 index d64f60f..0000000 --- a/utilities.py +++ /dev/null @@ -1,23 +0,0 @@ -# coding: utf-8 -# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. - -################################################################################ -# This file contains various package-wide utility functions -################################################################################ - -import os - -__all__ = ["YARAHOME", "placehold"] - -# self location for the package; remember to update this if this file is moved! -YARAHOME = os.path.dirname(__file__) - - -def placehold(source: str) -> str: - """ - Locate any important string placeholders and substitute live values for them. - :param source: source string to convert - :return: converted string - """ - source = source.replace("{YARA}", YARAHOME) - return source From dd8df4ff31d3c6b70c3e6c4d36be327c82154f7a Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 4 Nov 2019 09:44:36 -0500 Subject: [PATCH 110/257] Removed placeholders code and comments referencing them. --- .gitignore | 6 +++--- local/README.md | 7 ------- local/yara_rules/README.md | 8 -------- samples/sample_local.conf | 13 ++++++------- samples/sample_remote.conf | 11 +++++------ 5 files changed, 14 insertions(+), 31 deletions(-) delete mode 100644 local/README.md delete mode 100644 local/yara_rules/README.md diff --git a/.gitignore b/.gitignore index 442ae8c..c0f6501 100644 --- a/.gitignore +++ b/.gitignore @@ -40,7 +40,7 @@ nosetests.xml config.ini # created local DB -binary.db +feed_db -# local storage not to be pushed up -# local +# created local rules +yara_rules diff --git a/local/README.md b/local/README.md deleted file mode 100644 index 45d2f49..0000000 --- a/local/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# Yara Package Local Storage -This folder is here for storing any locally created artifacts. It is marked with `.gitignore` -so that any files here are not pushed to the remote repository. - -Enter `{YARA}/local` for any configuration path that that you wish to use this location, -using subfolders for clearer organization. One subfolder, `yara_rules` is already provided -for local rule files. diff --git a/local/yara_rules/README.md b/local/yara_rules/README.md deleted file mode 100644 index 2c16b18..0000000 --- a/local/yara_rules/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# local.yara_rules -This folder can be used a convenient location to locate your Yara rules. It can be defined -in your configuration file as: -```ini -yara_rules_dir={YARA}/local/yara_rules -``` - -It is suggested that subfolders be used to organize any complex and differing rule sets. diff --git a/samples/sample_local.conf b/samples/sample_local.conf index b6d4b26..4756138 100644 --- a/samples/sample_local.conf +++ b/samples/sample_local.conf @@ -1,9 +1,8 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Sample local worker config file ;; -;; Where seen, the placeholder {YARA} will be replaced by the script with -;; the location of yara package being used. You may also use "~" if you wish -;; to locate files or directories in your home folder +;; You may also use "~" if you wish to locate files or directories in your +;; home folder ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [general] @@ -18,7 +17,7 @@ cb_server_token= ; ; path to directory containing yara rules ; -yara_rules_dir={YARA}/local/yara_rules +yara_rules_dir=./yara_rules ; ; Cb Response postgres Database settings; insert posgres password as used in cb.conf for `postgres_password` @@ -52,10 +51,10 @@ disable_rescan=True num_days_binaries=365 ; -; The feed database directory is where locata database work files are stored. If the directory does not exist +; The feed database directory is where local database work files are stored. If the directory does not exist ; it will be created. ; -feed_database_dir={YARA}/local/feed_db +feed_database_dir=./feed_db ; @@ -66,4 +65,4 @@ feed_database_dir={YARA}/local/feed_db ; script to do, and use this option at your own discretion. ; vacuum_interval=-1 -vacuum_script={YARA}/scripts/vacuumscript.sh +vacuum_script=./scripts/vacuumscript.sh diff --git a/samples/sample_remote.conf b/samples/sample_remote.conf index 25930bd..1666d4b 100644 --- a/samples/sample_remote.conf +++ b/samples/sample_remote.conf @@ -1,9 +1,8 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Sample remote worker config file ;; -;; Where seen, the placeholder {YARA} will be replaced by the script with -;; the location of yara package being used. You may also use "~" if you wish -;; to locate files or directories in your home folder +;; You may also use "~" if you wish to locate files or directories in your +;; home folder ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [general] @@ -18,7 +17,7 @@ broker_url=redis://127.0.0.1 ; ; path to directory containing yara rules ; -yara_rules_dir={YARA}/local/yara_rules +yara_rules_dir=./yara_rules ; ; Cb Response postgres Database settings; insert posgres password as used in cb.conf for `postgres_password` @@ -56,7 +55,7 @@ num_days_binaries=365 ; The feed database directory is where local database work files are stored. If the directory does not exist ; it will be created. ; -feed_database_dir={YARA}/local/feed_db +feed_database_dir=./feed_db ; @@ -67,4 +66,4 @@ feed_database_dir={YARA}/local/feed_db ; script to do, and use this option at your own discretion. ; vacuum_interval=-1 -vacuum_script={YARA}/scripts/vacuumscript.sh +vacuum_script=./scripts/vacuumscript.sh From 23a6cc215244923a551b9949b671ae0f7318ac57 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 4 Nov 2019 09:59:38 -0500 Subject: [PATCH 111/257] saved work for branch swap --- README.md | 8 +-- src/analysis_result.py | 11 --- src/config_handling.py | 11 ++- src/globals.py | 8 +-- src/main.py | 74 ++++++++------------- src/utilities.py | 23 ------- test/config/bogus_feed_database_dir.conf | 5 +- test/config/missing_concurrent_hashes.conf | 3 + test/config/missing_disable_rescan.conf | 3 + test/config/missing_feed_database_dir.conf | 6 +- test/config/missing_niceness.conf | 3 + test/config/missing_num_days_binaries.conf | 3 + test/config/missing_postgres_db.conf | 3 + test/config/missing_postgres_db2.conf | 3 + test/config/missing_postgres_host.conf | 3 + test/config/missing_postgres_host2.conf | 3 + test/config/missing_postgres_port.conf | 3 + test/config/missing_postgres_port2.conf | 3 + test/config/missing_postgres_username.conf | 3 + test/config/missing_postgres_username2.conf | 3 + test/config/missing_worker.conf | 3 + test/config/missing_worker2.conf | 3 + test/config/negative_vacuum_interval.conf | 1 + test/config/vacuum_script_dir.conf | 3 +- test/config/vacuum_script_enabled.conf | 2 +- test/config/valid.conf | 3 + test/config/valid2.conf | 3 + test/test_configInit.py | 15 +++-- 28 files changed, 108 insertions(+), 107 deletions(-) delete mode 100644 src/utilities.py diff --git a/README.md b/README.md index 1c6b02a..a6807de 100644 --- a/README.md +++ b/README.md @@ -20,9 +20,7 @@ to your desired location. > NOTES: -> 1) The use of `{YARA}` is a placeholder representing the location of the yara package's `main.py` file, -> allowing for the use of relative paths to the package itself. -> 2) All paths can use `~` to access your home directory, so you can locate files there as well. +> 1) All paths can use `~/` to allow the use of the user's home directory. #### Running Yara Agent Manually @@ -140,7 +138,7 @@ _[TBD]_ ; Directory for temporary yara rules storage ; WARNING: Put your yara rules with the yara agent. This is just temporary storage. ; - yara_rules_dir={YARA}/local/yara_rules + yara_rules_dir=./yara_rules * Copy, modify and save to `yara_worker.conf` @@ -193,7 +191,7 @@ its work. ; script to do, and use this option at your own discretion. ; vacuum_interval=-1 -vacuum_script={YARA}/scripts/vacuumscript.sh +vacuum_script=./scripts/vacuumscript.sh ``` ## Yara Agent Build Instructions (Centos 6) diff --git a/src/analysis_result.py b/src/analysis_result.py index d66fe2e..dc63a94 100644 --- a/src/analysis_result.py +++ b/src/analysis_result.py @@ -1,9 +1,6 @@ # coding: utf-8 # Copyright © 2014-2019 VMware, Inc. All Rights Reserved. -import json -from datetime import datetime - class AnalysisResult(object): """ @@ -23,11 +20,3 @@ def __init__(self, md5: str, score: int = 0, short_result: str = '', long_result self.stop_future_scans = stop_future_scans self.binary_not_available = binary_not_available self.misc = misc - - def toJSON(self): - dict_rep = self.__dict__ - for key, value in dict_rep.items(): - if isinstance(value, datetime): - dict_rep[key] = value.timetuple() - - return json.dumps(dict_rep) diff --git a/src/config_handling.py b/src/config_handling.py index ac66600..9f3046a 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -10,7 +10,6 @@ import globals from exceptions import CbInvalidConfig -from utilities import placehold logger = logging.getLogger(__name__) @@ -44,7 +43,7 @@ def __init__(self, config_file: str, output_file: str = None) -> None: :param config_file: The config file to validate :param output_file: the output file; if not specified assume we are a task worker (simplified validation) """ - self.abs_config = os.path.abspath(os.path.expanduser(placehold(config_file))) + self.abs_config = os.path.abspath(os.path.expanduser(config_file)) self.source = f"Config file '{self.abs_config}'" config = configparser.ConfigParser() @@ -64,7 +63,7 @@ def __init__(self, config_file: str, output_file: str = None) -> None: self._worker_check() if output_file is not None: - globals.g_output_file = os.path.abspath(os.path.expanduser(placehold(output_file))) + globals.g_output_file = os.path.abspath(os.path.expanduser(output_file)) logger.debug(f"NOTE: output file will be '{globals.g_output_file}'") self._extended_check() @@ -157,8 +156,8 @@ def _as_path(self, param: str, required: bool = False, exists: bool = True, is_d default: str = None, create_if_needed: bool = False) -> Optional[str]: """ Get an string parameter from the configuration and treat it as a path, performing normalization - to produce an absolute path. a "~" at the beginning will be treated as the current user's home - directory; the placeholder "{YARA}" will be treated as the location of your yara package directory. + to produce an absolute path. a "~/" at the beginning will be treated as the current user's home + directory. :param param: Name of the configuration parameter :param required: True if this must be specified in the configuration @@ -173,7 +172,7 @@ def _as_path(self, param: str, required: bool = False, exists: bool = True, is_d if value is None: return value - value = os.path.abspath(os.path.expanduser(placehold(value))) + value = os.path.abspath(os.path.expanduser(value)) if exists: if not os.path.exists(value): if create_if_needed and is_dir: diff --git a/src/globals.py b/src/globals.py index 4fe2d52..e1a3c64 100644 --- a/src/globals.py +++ b/src/globals.py @@ -3,8 +3,6 @@ ################################################################################ # This module contains global variables used by a single instance. -# -# A placeholder of "{YARA}" represents the location of this yara package ################################################################################ g_config = {} @@ -19,7 +17,7 @@ # remote info g_broker_url = "" -g_yara_rules_dir = "{YARA}/local/yara_rules" +g_yara_rules_dir = "./yara_rules" g_yara_rule_map = {} g_yara_rule_map_hash_list = [] @@ -35,10 +33,10 @@ g_disable_rescan = True g_num_days_binaries = 365 -g_feed_database_path = "./" +g_feed_database_dir = "./feed_db" g_scanning_interval = 360 g_worker_network_timeout=5 -g_vacuum_seconds = -1 +g_vacuum_interval = -1 g_vacuum_script = "./scripts/vacuumscript.sh" diff --git a/src/main.py b/src/main.py index 7f22e51..b802e3f 100644 --- a/src/main.py +++ b/src/main.py @@ -5,29 +5,25 @@ import logging import logging.handlers import os +import signal import subprocess import sys +import threading import time -import signal import traceback -from daemon import daemon -import lockfile -from functools import partial from datetime import datetime, timedelta -from typing import List, Optional - -import threading -from threading import Thread, Event, Barrier -from queue import Queue, Empty +from functools import partial +from queue import Empty, Queue +from threading import Event, Thread +from typing import List import humanfriendly +import lockfile import psycopg2 -import sched - # noinspection PyPackageRequirements import yara -from celery import group from celery.bin import worker +from daemon import daemon from peewee import SqliteDatabase import globals @@ -35,14 +31,7 @@ from binary_database import BinaryDetonationResult, db from exceptions import CbInvalidConfig from feed import CbFeed, CbFeedInfo, CbReport -from tasks import ( - analyze_binary, - app, - generate_rule_map, - update_yara_rules_remote, - analyze_bins, -) -from utilities import placehold +from tasks import analyze_binary, app, generate_rule_map, update_yara_rules_remote logging_format = "%(asctime)s-%(name)s-%(lineno)d-%(levelname)s-%(message)s" logging.basicConfig(format=logging_format) @@ -67,18 +56,17 @@ def promise_worker(exit_event, scanning_promise_queue, scanning_results_queue): else: exit_event.wait(1) finally: - exit_event.set() + exit_event.set() logger.debug("PROMISE WORKING EXITING") -""" Sqlite is not meant to be thread-safe - -This single-worker-thread writes the result(s) to the configured sqlite file to hold the feed-metadata and seen binaries/results from scans -""" - - def results_worker(exit_event, results_queue): + """ + Sqlite is not meant to be thread-safe. + + This single-worker-thread writes the result(s) to the configured sqlite file to hold the feed-metadata and seen binaries/results from scans + """ try: while not (exit_event.is_set()): if not (results_queue.empty()): @@ -145,7 +133,7 @@ def generate_feed_from_db() -> None: feed = CbFeed(feedinfo, reports) logger.debug("Writing out feed '{0}' to disk".format(feedinfo.data["name"])) - with open(globals.output_file, "w") as fp: + with open(globals.g_output_file, "w") as fp: fp.write(feed.dump()) @@ -165,13 +153,10 @@ def generate_yara_rule_map_hash(yara_rule_path: str) -> None: continue with open(os.path.join(yara_rule_path, fn), "rb") as fp: data = fp.read() - # NOTE: Original logic resulted in a cumulative hash for each file (linking them) md5 = hashlib.md5() md5.update(data) temp_list.append(str(md5.hexdigest())) - # FUTURE: Would this be better served as a map keyed by md5, with the value being the rule text, as for the - # following method? globals.g_yara_rule_map_hash_list = temp_list globals.g_yara_rule_map_hash_list.sort() @@ -214,7 +199,7 @@ def analyze_binaries_and_queue_chunked(scanning_promise_queue, md5_hashes): Attempts to do work in parrallelized chunks of MAX_HASHES grouped """ promise = analyze_binary.chunks( - [(mh,) for mh in md5_hashes], globals.MAX_HASHES + [(mh,) for mh in md5_hashes], globals.g_max_hashes ).apply_async() for prom in promise.children: scanning_promise_queue.put(prom) @@ -347,7 +332,7 @@ def perform(yara_rule_dir, conn, scanning_promises_queue): script to vacuum the table by hand before continuing """ - if elapsed_time > globals.g_vacuum_seconds and globals.g_vacuum_seconds > 0: + if elapsed_time > globals.g_vacuum_interval and globals.g_vacuum_seconds > 0: # Make sure the cursor is closed, and we are commited() # to release SHARED access to the table cur.close() @@ -373,7 +358,6 @@ def perform(yara_rule_dir, conn, scanning_promises_queue): def _check_hash_against_feed(md5_hash): - query = BinaryDetonationResult.select().where( BinaryDetonationResult.md5 == md5_hash ) @@ -400,9 +384,8 @@ def save_results_with_logging(analysis_results): def save_and_log( - analysis_results, start_time, num_binaries_skipped, num_total_binaries + analysis_results, start_time, num_binaries_skipped, num_total_binaries ): - logger.debug(analysis_results) if analysis_results: for analysis_result in analysis_results: @@ -420,7 +403,7 @@ def save_and_log( def _rule_logging( - start_time: float, num_binaries_skipped: int, num_total_binaries: int + start_time: float, num_binaries_skipped: int, num_total_binaries: int ) -> None: """ Simple method to log yara work. @@ -502,8 +485,8 @@ def verify_config(config_file: str, output_file: str = None) -> None: if "worker_type" in the_config: if ( - the_config["worker_type"] == "local" - or the_config["worker_type"].strip() == "" + the_config["worker_type"] == "local" + or the_config["worker_type"].strip() == "" ): globals.g_remote = False # 'local' or empty definition elif the_config["worker_type"] == "remote": @@ -523,8 +506,8 @@ def verify_config(config_file: str, output_file: str = None) -> None: else: raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_url'") if ( - "cb_server_token" in the_config - and the_config["cb_server_token"].strip() != "" + "cb_server_token" in the_config + and the_config["cb_server_token"].strip() != "" ): globals.g_cb_server_token = the_config["cb_server_token"] else: @@ -566,8 +549,8 @@ def verify_config(config_file: str, output_file: str = None) -> None: # NOTE: postgres_username has a default value in globals; use and warn if not defined if ( - "postgres_username" in the_config - and the_config["postgres_username"].strip() != "" + "postgres_username" in the_config + and the_config["postgres_username"].strip() != "" ): globals.g_postgres_username = the_config["postgres_username"] else: @@ -576,8 +559,8 @@ def verify_config(config_file: str, output_file: str = None) -> None: ) if ( - "postgres_password" in the_config - and the_config["postgres_password"].strip() != "" + "postgres_password" in the_config + and the_config["postgres_password"].strip() != "" ): globals.g_postgres_password = the_config["postgres_password"] else: @@ -879,7 +862,6 @@ def start_workers(exit_event, scanning_promises_queue, scanning_results_queue): class DatabaseScanningThread(Thread): - """ A worker thread that scans over the database for new hashes ever INTERVAL seconds Pushes work to scanning_promises_queue , exits when the event is triggered diff --git a/src/utilities.py b/src/utilities.py deleted file mode 100644 index 3a2b927..0000000 --- a/src/utilities.py +++ /dev/null @@ -1,23 +0,0 @@ -# coding: utf-8 -# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. - -################################################################################ -# This file contains various package-wide utility functions -################################################################################ - -import os - -__all__ = ["YARAHOME", "placehold"] - -# self location for the package; remember to update this if this file is moved! -YARAHOME = os.path.dirname(os.path.dirname(__file__)) - - -def placehold(source: str) -> str: - """ - Locate any important string placeholders and substitute live values for them. - :param source: source string to convert - :return: converted string - """ - source = source.replace("{YARA}", YARAHOME) - return source diff --git a/test/config/bogus_feed_database_dir.conf b/test/config/bogus_feed_database_dir.conf index 1c69eae..0d8cef4 100644 --- a/test/config/bogus_feed_database_dir.conf +++ b/test/config/bogus_feed_database_dir.conf @@ -18,5 +18,8 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh + ; file, not dir -feed_database_dir={YARA}/README.md +feed_database_dir=./__init__.py diff --git a/test/config/missing_concurrent_hashes.conf b/test/config/missing_concurrent_hashes.conf index 5d87506..284d424 100644 --- a/test/config/missing_concurrent_hashes.conf +++ b/test/config/missing_concurrent_hashes.conf @@ -18,3 +18,6 @@ niceness=1 concurrent_hashes= disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_disable_rescan.conf b/test/config/missing_disable_rescan.conf index 4a5078d..887cfa4 100644 --- a/test/config/missing_disable_rescan.conf +++ b/test/config/missing_disable_rescan.conf @@ -18,3 +18,6 @@ concurrent_hashes=8 ; undefined disable_rescan= num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_feed_database_dir.conf b/test/config/missing_feed_database_dir.conf index 6fb4932..143bb87 100644 --- a/test/config/missing_feed_database_dir.conf +++ b/test/config/missing_feed_database_dir.conf @@ -18,4 +18,8 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -feed_database_dir={YARA}/local/no-such-directory +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh + +; invalid path +feed_database_dir=./no-such-directory diff --git a/test/config/missing_niceness.conf b/test/config/missing_niceness.conf index 36d1715..c220bd1 100644 --- a/test/config/missing_niceness.conf +++ b/test/config/missing_niceness.conf @@ -18,3 +18,6 @@ niceness= concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_num_days_binaries.conf b/test/config/missing_num_days_binaries.conf index 1cc21fa..5ba6694 100644 --- a/test/config/missing_num_days_binaries.conf +++ b/test/config/missing_num_days_binaries.conf @@ -18,3 +18,6 @@ concurrent_hashes=8 disable_rescan=False ; undefined num_days_binaries= + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_db.conf b/test/config/missing_postgres_db.conf index 04b0589..a6f6efa 100644 --- a/test/config/missing_postgres_db.conf +++ b/test/config/missing_postgres_db.conf @@ -17,3 +17,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_db2.conf b/test/config/missing_postgres_db2.conf index cd02280..4c22cb3 100644 --- a/test/config/missing_postgres_db2.conf +++ b/test/config/missing_postgres_db2.conf @@ -18,3 +18,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_host.conf b/test/config/missing_postgres_host.conf index cc4b323..60b2cd6 100644 --- a/test/config/missing_postgres_host.conf +++ b/test/config/missing_postgres_host.conf @@ -18,3 +18,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_host2.conf b/test/config/missing_postgres_host2.conf index 4581a39..23b8cb1 100644 --- a/test/config/missing_postgres_host2.conf +++ b/test/config/missing_postgres_host2.conf @@ -19,3 +19,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_port.conf b/test/config/missing_postgres_port.conf index 8b69a87..9b471ed 100644 --- a/test/config/missing_postgres_port.conf +++ b/test/config/missing_postgres_port.conf @@ -17,3 +17,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_port2.conf b/test/config/missing_postgres_port2.conf index e88c40f..fe0ab83 100644 --- a/test/config/missing_postgres_port2.conf +++ b/test/config/missing_postgres_port2.conf @@ -18,3 +18,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_username.conf b/test/config/missing_postgres_username.conf index f3b2a50..e809df5 100644 --- a/test/config/missing_postgres_username.conf +++ b/test/config/missing_postgres_username.conf @@ -17,3 +17,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_username2.conf b/test/config/missing_postgres_username2.conf index 12e121e..f3a51cf 100644 --- a/test/config/missing_postgres_username2.conf +++ b/test/config/missing_postgres_username2.conf @@ -18,3 +18,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_worker.conf b/test/config/missing_worker.conf index ed07cae..25d19e5 100644 --- a/test/config/missing_worker.conf +++ b/test/config/missing_worker.conf @@ -16,3 +16,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_worker2.conf b/test/config/missing_worker2.conf index 123fb8e..80a3132 100644 --- a/test/config/missing_worker2.conf +++ b/test/config/missing_worker2.conf @@ -17,3 +17,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/negative_vacuum_interval.conf b/test/config/negative_vacuum_interval.conf index 03d182a..0690263 100644 --- a/test/config/negative_vacuum_interval.conf +++ b/test/config/negative_vacuum_interval.conf @@ -19,3 +19,4 @@ disable_rescan=False num_days_binaries=365 vacuum_interval=-20 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/vacuum_script_dir.conf b/test/config/vacuum_script_dir.conf index 38e5704..393d058 100644 --- a/test/config/vacuum_script_dir.conf +++ b/test/config/vacuum_script_dir.conf @@ -19,4 +19,5 @@ disable_rescan=False num_days_binaries=365 vacuum_interval=360 -vacuum_script={YARA}/test/rules +; invalid, is dir +vacuum_script=./rules diff --git a/test/config/vacuum_script_enabled.conf b/test/config/vacuum_script_enabled.conf index 45d1f79..dcedd0f 100644 --- a/test/config/vacuum_script_enabled.conf +++ b/test/config/vacuum_script_enabled.conf @@ -19,4 +19,4 @@ disable_rescan=False num_days_binaries=365 vacuum_interval=360 -vacuum_script={YARA}/scripts/vacuumscript.sh +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/valid.conf b/test/config/valid.conf index cdbdea9..dcedd0f 100644 --- a/test/config/valid.conf +++ b/test/config/valid.conf @@ -17,3 +17,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/valid2.conf b/test/config/valid2.conf index 5907483..e5c748f 100644 --- a/test/config/valid2.conf +++ b/test/config/valid2.conf @@ -16,3 +16,6 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh diff --git a/test/test_configInit.py b/test/test_configInit.py index a2fa44d..acb11f7 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -7,7 +7,6 @@ import globals from config_handling import ConfigurationInit from exceptions import CbInvalidConfig -from utilities import placehold TESTS = os.path.abspath(os.path.dirname(__file__)) @@ -21,7 +20,7 @@ def setUp(self): globals.g_cb_server_url = 'https://127.0.0.1' globals.g_cb_server_token = '' globals.g_broker_url = '' - globals.g_yara_rules_dir = '{YARA}/local/yara_rules' + globals.g_yara_rules_dir = './yara_rules' globals.g_yara_rule_map = {} globals.g_yara_rule_map_hash_list = [] globals.g_postgres_host = '127.0.0.1' @@ -35,8 +34,8 @@ def setUp(self): globals.g_disable_rescan = True globals.g_num_days_binaries = 365 globals.g_vacuum_interval = -1 - globals.g_vacuum_script = '{YARA}/scripts/vacuumscript.sh' - globals.g_feed_database_dir = "{YARA}/local" + globals.g_vacuum_script = './scripts/vacuumscript.sh' + globals.g_feed_database_dir = "./feed_db" def test_01_missing_config(self): """ @@ -388,13 +387,15 @@ def test_20a_config_feed_database_dir_not_exists(self): """ Ensure that config with feed database directory that does not exist will create that directory. """ - path = os.path.abspath(placehold("{YARA}/local/no-such-directory")) - self.assertFalse(os.path.exists(path)) + path = os.path.abspath("./no-such-directory") + if os.path.exists(path): + os.rmdir(path) try: ConfigurationInit(os.path.join(TESTS, "config", "missing_feed_database_dir.conf"), "sample.json") self.assertTrue(os.path.exists(path)) finally: - os.rmdir(path) + if os.path.exists(path): + os.rmdir(path) def test_20b_config_feed_database_dir_not_directory(self): """ From 042ffdb99dec512f055b7031b122a5f44bc92260 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 4 Nov 2019 10:54:02 -0500 Subject: [PATCH 112/257] doc cleanup --- src/config_handling.py | 2 + src/globals.py | 4 +- src/main.py | 538 +++++++++++++---------------------------- src/singleton.py | 88 ------- src/tasks.py | 93 +------ 5 files changed, 173 insertions(+), 552 deletions(-) delete mode 100644 src/singleton.py diff --git a/src/config_handling.py b/src/config_handling.py index 9f3046a..12b2114 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -91,6 +91,8 @@ def _worker_check(self) -> None: value = self._as_str("broker_url", required=True) app.conf.update(broker_url=value, result_backend=value) + globals.g_worker_network_timeout = self._as_int("worker_network_timeout") + def _extended_check(self) -> None: """ Validate entries used by the main process. diff --git a/src/globals.py b/src/globals.py index e1a3c64..5390c01 100644 --- a/src/globals.py +++ b/src/globals.py @@ -9,6 +9,7 @@ g_output_file = "" g_remote = False +g_mode = "" # local info g_cb_server_url = "https://127.0.0.1" @@ -37,6 +38,7 @@ g_scanning_interval = 360 -g_worker_network_timeout=5 g_vacuum_interval = -1 g_vacuum_script = "./scripts/vacuumscript.sh" + +g_worker_network_timeout = 5 diff --git a/src/main.py b/src/main.py index b802e3f..2672ad7 100644 --- a/src/main.py +++ b/src/main.py @@ -1,5 +1,4 @@ import argparse -import configparser import hashlib import json import logging @@ -29,7 +28,7 @@ import globals from analysis_result import AnalysisResult from binary_database import BinaryDetonationResult, db -from exceptions import CbInvalidConfig +from config_handling import ConfigurationInit from feed import CbFeed, CbFeedInfo, CbReport from tasks import analyze_binary, app, generate_rule_map, update_yara_rules_remote @@ -272,24 +271,31 @@ def get_binary_file_cursor(conn, start_date_binaries): return cur -def execute_script(): - """ Execute the configured shell script """ - logger.warning("!!!Executing vacuum script!!!") - - target = os.path.join(os.getcwd(), globals.g_vacuum_script) - - prog = subprocess.Popen(target, shell=True, universal_newlines=True) +def execute_script() -> None: + """ + Execute a external maintenence script (vacuum script). + """ + logger.info("----- Executing vacuum script ----------------------------------------") + prog = subprocess.Popen(globals.g_vacuum_script, shell=True, universal_newlines=True) stdout, stderr = prog.communicate() - logger.info(stdout) - logger.error(stderr) + if stdout is not None and len(stdout.strip()) > 0: + logger.info(stdout) + if stderr is not None and len(stderr.strip()) > 0: + logger.error(stderr) if prog.returncode: - logger.warning("program returned error code {0}".format(prog.returncode)) - logger.warning("!!!Done Executing vacuum script!!!") + logger.warning(f"program returned error code {prog.returncode}") + logger.info("---------------------------------------- Vacuum script completed -----\n") -def perform(yara_rule_dir, conn, scanning_promises_queue): - """ Main routine - checks the cbr modulestore/storfiles table for new hashes - by comparing the sliding-window (now - globals.g_num_days_binaries) with the contents of the feed database on disk""" +def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): + """ + Main routine - checks the cbr modulestore/storfiles table for new hashes by comparing the sliding-window + with the contents of the feed database on disk. + + :param yara_rule_dir: location of the rules directory + :param conn: The connection (TODO: type) + :param scanning_promises_queue: + """ if globals.g_remote: logger.info("Uploading yara rules to workers...") generate_rule_map_remote(yara_rule_dir) @@ -301,21 +307,18 @@ def perform(yara_rule_dir, conn, scanning_promises_queue): start_time = time.time() - start_datetime = datetime.now() + # Determine our binaries window (date forward) + start_date_binaries = datetime.now() - timedelta(days=globals.g_num_days_binaries) - start_date_binaries = start_datetime - timedelta(days=globals.g_num_days_binaries) + # vacuum script window start + vacuum_window_start = datetime.now() cur = get_binary_file_cursor(conn, start_date_binaries) - rows = cur.fetchmany(2000) - num_total_binaries = len(rows) while num_total_binaries > 0: - - logger.info( - f"Enumerating modulestore...found {num_total_binaries} resident binaries" - ) + logger.info(f"Enumerating modulestore...found {len(rows)} resident binaries") md5_hashes = filter(_check_hash_against_feed, (row[0].hex() for row in rows)) @@ -323,8 +326,6 @@ def perform(yara_rule_dir, conn, scanning_promises_queue): analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) - elapsed_time = (datetime.now() - start_datetime).total_seconds() - """ Holding the named-cursor through a large historical result set will cause storefiles table fragmentation @@ -332,26 +333,24 @@ def perform(yara_rule_dir, conn, scanning_promises_queue): script to vacuum the table by hand before continuing """ - if elapsed_time > globals.g_vacuum_interval and globals.g_vacuum_seconds > 0: - # Make sure the cursor is closed, and we are commited() - # to release SHARED access to the table - cur.close() - conn.commit() - # execute the configured script - execute_script() - # restore start for elapsed_time - start_datetime = datetime.now() - # restore cursor - cur = get_binary_file_cursor(conn, start_date_binaries) + if globals.g_vacuum_interval > 0: + seconds_since_start = (datetime.now() - vacuum_window_start).seconds + if seconds_since_start >= globals.g_vacuum_interval * 60: + # close connection + cur.close() + conn.commit() - rows = cur.fetchmany(2000) + execute_script() + vacuum_window_start = datetime.now() + + # get the connection back + cur = get_binary_file_cursor(conn, start_date_binaries) + rows = cur.fetchmany(2000) num_total_binaries = len(rows) # Closing since there are no more binaries of interest to scan - cur.close() - conn.commit() logger.debug("Exiting database sweep routine") @@ -383,9 +382,8 @@ def save_results_with_logging(analysis_results): save_results(analysis_results) -def save_and_log( - analysis_results, start_time, num_binaries_skipped, num_total_binaries -): +# noinspection PyUnusedFunction +def save_and_log(analysis_results, start_time, num_binaries_skipped, num_total_binaries): logger.debug(analysis_results) if analysis_results: for analysis_result in analysis_results: @@ -402,9 +400,7 @@ def save_and_log( _rule_logging(start_time, num_binaries_skipped, num_total_binaries) -def _rule_logging( - start_time: float, num_binaries_skipped: int, num_total_binaries: int -) -> None: +def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binaries: int) -> None: """ Simple method to log yara work. :param start_time: start time for the work @@ -437,333 +433,7 @@ def _rule_logging( logger.info("") -# noinspection DuplicatedCode -def verify_config(config_file: str, output_file: str = None) -> None: - """ - Validate the config file. - :param config_file: The config file to validate - :param output_file: the output file; if not specified equals config file plus ".json" - """ - abs_config = os.path.abspath(os.path.expanduser(placehold(config_file))) - header = f"Config file '{abs_config}'" - - config = configparser.ConfigParser() - if not os.path.exists(config_file): - raise CbInvalidConfig(f"{header} does not exist!") - - try: - config.read(config_file) - except Exception as err: - raise CbInvalidConfig(err) - - logger.debug(f"NOTE: using config file '{abs_config}'") - if not config.has_section("general"): - raise CbInvalidConfig(f"{header} does not have a 'general' section") - - globals.output_file = ( - output_file if output_file is not None else config_file.strip() + ".json" - ) - globals.output_file = os.path.abspath( - os.path.expanduser(placehold(globals.output_file)) - ) - logger.debug(f"NOTE: output file will be '{globals.output_file}'") - - the_config = config["general"] - - if "mode" in config["general"]: - operating_mode = the_config["mode"].lower() - if operating_mode in ["master", "slave"]: - globals.g_mode = operating_mode - else: - raise CbInvalidConfig( - f"{header} does not specify a valid operating mode (slave/master)" - ) - else: - raise CbInvalidConfig( - f"{header} does not specify a valid operating mode (slave/master)" - ) - - if "worker_type" in the_config: - if ( - the_config["worker_type"] == "local" - or the_config["worker_type"].strip() == "" - ): - globals.g_remote = False # 'local' or empty definition - elif the_config["worker_type"] == "remote": - globals.g_remote = True # 'remote' - else: # anything else - raise CbInvalidConfig( - f"{header} has an invalid 'worker_type' ({the_config['worker_type']})" - ) - else: - globals.g_remote = False - logger.warning(f"{header} does not specify 'worker_type', assuming local") - - # local/remote configuration data - if not globals.g_remote: - if "cb_server_url" in the_config and the_config["cb_server_url"].strip() != "": - globals.g_cb_server_url = the_config["cb_server_url"] - else: - raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_url'") - if ( - "cb_server_token" in the_config - and the_config["cb_server_token"].strip() != "" - ): - globals.g_cb_server_token = the_config["cb_server_token"] - else: - raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_token'") - # TODO: validate url & token with test call? - - if "broker_url" in the_config and the_config["broker_url"].strip() != "": - app.conf.update( - broker_url=the_config["broker_url"], result_backend=the_config["broker_url"] - ) - elif globals.g_remote: - raise CbInvalidConfig(f"{header} is 'remote' and missing 'broker_url'") - - if "yara_rules_dir" in the_config and the_config["yara_rules_dir"].strip() != "": - check = os.path.abspath( - os.path.expanduser(placehold(the_config["yara_rules_dir"])) - ) - if os.path.exists(check): - if os.path.isdir(check): - globals.g_yara_rules_dir = check - else: - raise CbInvalidConfig( - f"{header} specified 'yara_rules_dir' ({check}) is not a directory" - ) - else: - raise CbInvalidConfig( - f"{header} specified 'yara_rules_dir' ({check}) does not exist" - ) - else: - raise CbInvalidConfig(f"{header} has no 'yara_rules_dir' definition") - - # NOTE: postgres_host has a default value in globals; use and warn if not defined - if "postgres_host" in the_config and the_config["postgres_host"].strip() != "": - globals.g_postgres_host = the_config["postgres_host"] - else: - logger.warning( - f"{header} has no defined 'postgres_host'; using default of '{globals.g_postgres_host}'" - ) - - # NOTE: postgres_username has a default value in globals; use and warn if not defined - if ( - "postgres_username" in the_config - and the_config["postgres_username"].strip() != "" - ): - globals.g_postgres_username = the_config["postgres_username"] - else: - logger.warning( - f"{header} has no defined 'postgres_username'; using default of '{globals.g_postgres_username}'" - ) - - if ( - "postgres_password" in the_config - and the_config["postgres_password"].strip() != "" - ): - globals.g_postgres_password = the_config["postgres_password"] - else: - raise CbInvalidConfig(f"{header} has no 'postgres_password' defined") - - # NOTE: postgres_db has a default value in globals; use and warn if not defined - if "postgres_db" in the_config and the_config["postgres_db"].strip() != "": - globals.g_postgres_db = the_config["postgres_db"] - else: - logger.warning( - f"{header} has no defined 'postgres_db'; using default of '{globals.g_postgres_db}'" - ) - - # NOTE: postgres_port has a default value in globals; use and warn if not defined - if "postgres_port" in the_config: - globals.g_postgres_port = int(the_config["postgres_port"]) - else: - logger.warning( - f"{header} has no defined 'postgres_port'; using default of '{globals.g_postgres_port}'" - ) - - # TODO: validate postgres connection with supplied information? - - if "niceness" in the_config: - os.nice(int(the_config["niceness"])) - - if "concurrent_hashes" in the_config: - globals.MAX_HASHES = int(the_config["concurrent_hashes"]) - logger.debug("Consurrent Hashes: {0}".format(globals.MAX_HASHES)) - - if "disable_rescan" in the_config: - globals.g_disable_rescan = bool(the_config["disable_rescan"]) - logger.debug("Disable Rescan: {0}".format(globals.g_disable_rescan)) - - if "num_days_binaries" in the_config: - globals.g_num_days_binaries = max(int(the_config["num_days_binaries"]), 1) - logger.debug( - "Number of days for binaries: {0}".format(globals.g_num_days_binaries) - ) - - if "vacuum_seconds" in the_config: - globals.g_vacuum_seconds = max(int(the_config["vacuum_seconds"]), 0) - if "vacuum_script" in the_config and the_config["vacuum_seconds"].strip() != "": - if globals.g_vacuum_seconds > 0: - check = os.path.abspath( - os.path.expanduser(placehold(the_config["vacuum_script"])) - ) - if os.path.exists(check): - if os.path.isdir(check): - raise CbInvalidConfig( - f"{header} specified 'vacuum_script' ({check}) is a directory" - ) - else: - raise CbInvalidConfig( - f"{header} specified 'vacuum_script' ({check}) does not exist" - ) - globals.g_vacuum_script = check - logger.warning( - f"Vacuum Script '{check}' is enabled; use this advanced feature at your own discretion!" - ) - else: - logger.debug( - f"{header} has 'vacuum_script' defined, but it is disabled" - ) - - if "feed_database_path" in the_config: - globals.feed_database_path = the_config["feed_database_path"] - check = os.path.abspath(placehold(the_config["feed_database_path"])) - if not (os.path.exists(check) and os.path.isdir(check)): - raise CbInvalidConfig("Invalid database path specified") - - if "database_sweep_interval" in the_config: - globals.g_scanning_interval = int(the_config["database_sweep_interval"]) - - -def main(): - parser = argparse.ArgumentParser(description="Yara Agent for Yara Connector") - - parser.add_argument( - "--config-file", - required=True, - default="yaraconnector.conf", - help="Location of the config file", - ) - - parser.add_argument( - "--log-file", default="yaraconnector.log", help="Log file output" - ) - - parser.add_argument( - "--output-file", default="yara_feed.json", help="output feed file" - ) - - parser.add_argument( - "--working-dir", default=".", help="working directory", required=False - ) - - parser.add_argument( - "--lock-file", default="./yaraconnector", help="lock file", required=False - ) - - parser.add_argument( - "--validate-yara-rules", - action="store_true", - help="ONLY validate yara rules in a specified directory", - ) - - parser.add_argument("--debug", action="store_true") - - args = parser.parse_args() - - if args.debug: - logger.setLevel(logging.DEBUG) - - if args.log_file: - formatter = logging.Formatter(logging_format) - handler = logging.handlers.RotatingFileHandler( - args.log_file, maxBytes=10 * 1000000, backupCount=10 - ) - handler.setFormatter(formatter) - logger.addHandler(handler) - - # Verify the configuration file and load up important global variables - try: - verify_config(args.config_file, args.output_file) - except Exception as err: - logger.error(f"Unable to continue due to a configuration problem: {err}") - sys.exit(1) - - if args.validate_yara_rules: - logger.info( - "Validating yara rules in directory: {0}".format(globals.g_yara_rules_dir) - ) - yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - try: - yara.compile(filepaths=yara_rule_map) - logger.info("All yara rules compiled successfully") - except Exception as err: - logger.error(f"There were errors compiling yara rules: {err}") - logger.error(traceback.format_exc()) - else: - - EXIT_EVENT = Event() - - try: - - working_dir = args.working_dir - - lock_file = lockfile.FileLock(args.lock_file) - - files_preserve = getLogFileHandles(logger) - files_preserve.extend([args.lock_file, args.log_file, args.output_file]) - - # defauls to piping to /dev/null - context = daemon.DaemonContext( - working_directory=working_dir, - pidfile=lock_file, - files_preserve=files_preserve, - ) - - run_as_master = globals.g_mode == "master" - - scanning_promise_queue = Queue() - scanning_results_queue = Queue() - - sig_handler = partial(handle_sig, EXIT_EVENT) - - context.signal_map = { - signal.SIGTERM: sig_handler, - signal.SIGQUIT: sig_handler, - } - - with context: - # only connect to cbr if we're the master - if run_as_master: - init_local_resources() - start_workers( - EXIT_EVENT, scanning_promise_queue, scanning_results_queue - ) - # start local celery if working mode is local - if not globals.g_remote: - start_celery_worker_thread(args.config_file) - else: - # otherwise, we must start a worker since we are not the master - start_celery_worker_thread(args.config_file) - - # run until the service/daemon gets a quitting sig - run_to_exit_signal(EXIT_EVENT) - wait_all_worker_exit() - logger.info("Yara connector shutdown OK") - - except KeyboardInterrupt: - logger.info("\n\n##### Interupted by User!\n") - EXIT_EVENT.set() - sys.exit(2) - except Exception as err: - logger.error(f"There were errors executing yara rules: {err}") - logger.error(traceback.format_exc()) - EXIT_EVENT.set() - sys.exit(1) - - -def getLogFileHandles(logger): +def get_log_file_handles(logger): """ Get a list of filehandle numbers from logger to be handed to DaemonContext.files_preserve """ @@ -771,7 +441,7 @@ def getLogFileHandles(logger): for handler in logger.handlers: handles.append(handler.stream.fileno()) if logger.parent: - handles += getLogFileHandles(logger.parent) + handles += get_log_file_handles(logger.parent) return handles @@ -904,6 +574,7 @@ def run(self): try: if self._target: + # noinspection PyArgumentList self._target(*self._args, **self._kwargs) finally: # Avoid a refcycle if the thread is running a function with @@ -930,5 +601,124 @@ def launch_celery_worker(config_file=None): logger.debug("CELERY WORKER LAUNCHING THREAD EXITED") +################################################################################ +# Main entrypoint +################################################################################ + +def handle_arguments(): + """ + Setup the main program options. + + :return: parsed arguments + """ + parser = argparse.ArgumentParser(description="Yara Agent for Yara Connector") + + parser.add_argument("--config-file", required=True, default="yaraconnector.conf", + help="Location of the config file") + parser.add_argument("--log-file", default="yaraconnector.log", help="Log file output") + parser.add_argument("--output-file", default="yara_feed.json", help="output feed file") + parser.add_argument("--working-dir", default=".", help="working directory", required=False) + parser.add_argument("--lock-file", default="./yaraconnector", help="lock file", required=False) + parser.add_argument("--validate-yara-rules", action="store_true", help="Only validate yara rules, then exit") + parser.add_argument("--debug", action="store_true") + + return parser.parse_args() + + +def main(): + """ + Main execution function. Script will exit with a non-zero value based on the following: + 1: Not the only instance running + 2: Configuration problem + 3: User interrupt + 4: Unexpected Yara scan exception + 5: Yara rule validation problem + """ + args = handle_arguments() + + if args.debug: + logger.setLevel(logging.DEBUG) + + if args.log_file: + use_log_file = os.path.abspath(os.path.expanduser(args.log_file)) + formatter = logging.Formatter(logging_format) + handler = logging.handlers.RotatingFileHandler(use_log_file, maxBytes=10 * 1000000, backupCount=10) + handler.setFormatter(formatter) + logger.addHandler(handler) + else: + use_log_file = None + + # Verify the configuration file and load up important global variables + try: + ConfigurationInit(args.config_file, use_log_file) + except Exception as err: + logger.error(f"Unable to continue due to a configuration problem: {err}") + sys.exit(2) + + if args.validate_yara_rules: + logger.info(f"Validating yara rules in directory: {globals.g_yara_rules_dir}") + yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) + try: + yara.compile(filepaths=yara_rule_map) + logger.info("All yara rules compiled successfully") + except Exception as err: + logger.error(f"There were errors compiling yara rules: {err}\n{traceback.format_exc()}") + sys.exit(5) + else: + exit_event = Event() + + try: + working_dir = os.path.abspath(os.path.expanduser(args.working_dir)) + + lock_file = lockfile.FileLock(args.lock_file) + + files_preserve = get_log_file_handles(logger) + files_preserve.extend([args.lock_file, args.log_file, args.output_file]) + + # defauls to piping to /dev/null + context = daemon.DaemonContext(working_directory=working_dir, pidfile=lock_file, + files_preserve=files_preserve) + + run_as_master = globals.g_mode == "master" + + scanning_promise_queue = Queue() + scanning_results_queue = Queue() + + sig_handler = partial(handle_sig, exit_event) + + context.signal_map = { + signal.SIGTERM: sig_handler, + signal.SIGQUIT: sig_handler, + } + + with context: + # only connect to cbr if we're the master + if run_as_master: + init_local_resources() + start_workers( + exit_event, scanning_promise_queue, scanning_results_queue + ) + # start local celery if working mode is local + if not globals.g_remote: + start_celery_worker_thread(args.config_file) + else: + # otherwise, we must start a worker since we are not the master + start_celery_worker_thread(args.config_file) + + # run until the service/daemon gets a quitting sig + run_to_exit_signal(exit_event) + wait_all_worker_exit() + logger.info("Yara connector shutdown OK") + + except KeyboardInterrupt: + logger.info("\n\n##### Interupted by User!\n") + exit_event.set() + sys.exit(3) + except Exception as err: + logger.error(f"There were errors executing yara rules: {err}\n{traceback.format_exc()}") + exit_event.set() + sys.exit(4) + + if __name__ == "__main__": main() diff --git a/src/singleton.py b/src/singleton.py deleted file mode 100644 index 07d2c25..0000000 --- a/src/singleton.py +++ /dev/null @@ -1,88 +0,0 @@ -# coding: utf-8 -# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. - -import fcntl -import logging -import os -import sys -import tempfile - -from exceptions import SingleInstanceException - -logger = logging.getLogger(__name__) - - -class SingleInstance(object): - """Class that can be instantiated only once per machine. - - If you want to prevent your script from running in parallel just instantiate SingleInstance() class. If is there - another instance already running it will throw a `SingleInstanceException`. - - >>> import singleton - ... singleton.SingleInstance() - - This option is very useful if you have scripts executed by crontab at small amounts of time. - - Remember that this works by creating a lock file with a filename based on the full path to the script file. - - Providing a flavor_id will augment the filename with the provided flavor_id, allowing you to create multiple - singleton instances from the same file. This is particularly useful if you want specific functions to have their - own singleton instances. - """ - - def __init__(self, flavor_id: str = None, lockfile: str = None): - self.initialized = False - - # define the lockfile - if lockfile is not None: - self.lockfile = lockfile - else: - converted = os.path.splitext(os.path.abspath(sys.argv[0]))[0].replace( - "/", "-").replace(":", "").replace("\\", "-") - if flavor_id is not None: - converted += f"-{flavor_id}" - converted += '.lock' - self.lockfile = os.path.normpath( - tempfile.gettempdir() + '/' + converted) - logger.debug("SingleInstance lockfile: `{0}`".format(self.lockfile)) - - if sys.platform == 'win32': - try: - # file already exists, we try to remove (in case previous - # execution was interrupted) - if os.path.exists(self.lockfile): - os.unlink(self.lockfile) - self.fd = os.open(self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR) - except OSError as err: - the_type, e, tb = sys.exc_info() - if e.errno == 13: - raise SingleInstanceException("Another instance is already running, quitting.") - raise RuntimeError("[{0}] An error prevented creation of the lockfile: {1}".format(e.errno, err)) - else: # non Windows - self.fp = open(self.lockfile, 'w') - self.fp.flush() - try: - fcntl.lockf(self.fp, fcntl.LOCK_EX | fcntl.LOCK_NB) - except IOError: - raise SingleInstanceException("Another instance is already running, quitting.") - - # ready to go! - self.initialized = True - - def __del__(self): - if not self.initialized: - return - - try: - if sys.platform == 'win32': - if hasattr(self, 'fd'): - os.close(self.fd) - os.unlink(self.lockfile) - else: - fcntl.lockf(self.fp, fcntl.LOCK_UN) - self.fp.close() - if os.path.isfile(self.lockfile): - os.unlink(self.lockfile) - except Exception as err: - logger.warning(f"Unable to remove lockfile: {err}") - sys.exit(-1) diff --git a/src/tasks.py b/src/tasks.py index c771389..dce79c0 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -1,24 +1,20 @@ -import configparser import datetime import hashlib +import io import logging +import multiprocessing import os import traceback +import zipfile from typing import List +import requests # noinspection PyPackageRequirements import yara -import requests -import io -import zipfile from celery import bootsteps, Celery, group -from celery.result import ResultSet import globals from analysis_result import AnalysisResult -from exceptions import CbInvalidConfig -from utilities import placehold -import multiprocessing app = Celery() # noinspection PyUnusedName @@ -75,87 +71,6 @@ def release_write(self): compiled_rules_lock = ReadWriteLock() -# noinspection DuplicatedCode -def verify_config(config_file: str) -> None: - """ - Read and validate the current config file. - - NOTE: Replicates, to a smaller degree, the function in main.py; it is presumed that more detailed checks are there - :param config_file: path to the config file - """ - abs_config = os.path.abspath(os.path.expanduser(placehold(config_file))) - header = f"Config file '{abs_config}'" - - config = configparser.ConfigParser() - if not os.path.exists(config_file): - raise CbInvalidConfig(f"{header} does not exist!") - - try: - config.read(config_file) - except Exception as err: - raise CbInvalidConfig(err) - - logger.debug(f"NOTE: using config file '{abs_config}'") - if not config.has_section("general"): - raise CbInvalidConfig(f"{header} does not have a 'general' section") - - the_config = config["general"] - - if "yara_rules_dir" in the_config and the_config["yara_rules_dir"].strip() != "": - check = os.path.abspath( - os.path.expanduser(placehold(the_config["yara_rules_dir"])) - ) - if os.path.exists(check): - if os.path.isdir(check): - globals.g_yara_rules_dir = check - else: - raise CbInvalidConfig( - f"{header} specified 'yara_rules_dir' ({check}) is not a directory" - ) - else: - raise CbInvalidConfig( - f"{header} specified 'yara_rules_dir' ({check}) does not exist" - ) - else: - raise CbInvalidConfig(f"{header} has no 'yara_rules_dir' definition") - - - if "worker_network_timeout" in the_config: - globals.g_worker_network_timeout = int(the_config['worker_network_timeout']) - - if "worker_type" in the_config: - if ( - the_config["worker_type"] == "local" - ): - remote = False - elif the_config["worker_type"] == "remote": - remote = True - else: # anything else - raise CbInvalidConfig( - f"{header} has an invalid 'worker_type' ({the_config['worker_type']})" - ) - else: - remote = False - - # local/remote configuration data - if not remote: - if "cb_server_url" in the_config and the_config["cb_server_url"].strip() != "": - globals.g_cb_server_url = the_config["cb_server_url"] - else: - raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_url'") - if ( - "cb_server_token" in the_config - and the_config["cb_server_token"].strip() != "" - ): - globals.g_cb_server_token = the_config["cb_server_token"] - else: - raise CbInvalidConfig(f"{header} is 'local' and missing 'cb_server_token'") - - if "broker_url" in the_config and the_config["broker_url"].strip() != "": - app.conf.update(broker_url=the_config["broker_url"], results_backend=the_config['broker_url']) - elif remote: - raise CbInvalidConfig(f"{header} is 'remote' and missing 'broker_url'") - def add_worker_arguments(parser): parser.add_argument( From 968a1f38c3e65f4790fef4fd864f22a923bb3aaa Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 4 Nov 2019 11:49:11 -0500 Subject: [PATCH 113/257] more cleanup --- src/README.md | 1 - src/exceptions.py | 7 -- src/main.py | 39 ++++---- src/tasks.py | 32 ++++--- test/config/bogus_worker_network_timeout.conf | 24 +++++ .../missing_worker_network_timeout.conf | 24 +++++ test/test_configInit.py | 18 ++++ test/test_singleInstance.py | 89 ------------------- 8 files changed, 109 insertions(+), 125 deletions(-) delete mode 100644 src/README.md create mode 100644 test/config/bogus_worker_network_timeout.conf create mode 100644 test/config/missing_worker_network_timeout.conf delete mode 100644 test/test_singleInstance.py diff --git a/src/README.md b/src/README.md deleted file mode 100644 index c7c813a..0000000 --- a/src/README.md +++ /dev/null @@ -1 +0,0 @@ -Future home of the source code. diff --git a/src/exceptions.py b/src/exceptions.py index dd4011f..6bb11a4 100644 --- a/src/exceptions.py +++ b/src/exceptions.py @@ -35,10 +35,3 @@ class CbInvalidReport(CbException): Excepion raised if supplied Report data is invalid. """ pass - - -class SingleInstanceException(BaseException): - """ - Excepion raised if you attempt to run more than one instance.. - """ - pass diff --git a/src/main.py b/src/main.py index 2672ad7..3c06490 100644 --- a/src/main.py +++ b/src/main.py @@ -1,3 +1,6 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + import argparse import hashlib import json @@ -22,6 +25,7 @@ # noinspection PyPackageRequirements import yara from celery.bin import worker +# noinspection PyPackageRequirements from daemon import daemon from peewee import SqliteDatabase @@ -43,6 +47,13 @@ def promise_worker(exit_event, scanning_promise_queue, scanning_results_queue): + """ + + :param exit_event: + :param scanning_promise_queue: + :param scanning_results_queue: + :return: + """ try: while not (exit_event.is_set()): if not (scanning_promise_queue.empty()): @@ -60,11 +71,13 @@ def promise_worker(exit_event, scanning_promise_queue, scanning_results_queue): logger.debug("PROMISE WORKING EXITING") +# noinspection PyUnusedFunction def results_worker(exit_event, results_queue): """ Sqlite is not meant to be thread-safe. - This single-worker-thread writes the result(s) to the configured sqlite file to hold the feed-metadata and seen binaries/results from scans + This single-worker-thread writes the result(s) to the configured sqlite file to hold the feed-metadata and + seen binaries/results from scans """ try: while not (exit_event.is_set()): @@ -187,6 +200,7 @@ def analyze_binary_and_queue(scanning_promise_queue, md5sum): scanning_promise_queue.put(promise) +# noinspection PyUnusedFunction def analyze_binaries_and_queue(scanning_promise_queue, md5_hashes): """ Analyze each binary and enqueue """ for h in md5_hashes: @@ -260,9 +274,8 @@ def get_binary_file_cursor(conn, start_date_binaries): cur = conn.cursor(name="yara_agent") # noinspection SqlDialectInspection,SqlNoDataSourceInspection - query = "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND timestamp >= '{0}' ORDER BY timestamp DESC".format( - start_date_binaries - ) + query = "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND " + \ + "timestamp >= '{0}' ORDER BY timestamp DESC".format(start_date_binaries) logger.debug(query) @@ -300,13 +313,6 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): logger.info("Uploading yara rules to workers...") generate_rule_map_remote(yara_rule_dir) - num_total_binaries = 0 - num_binaries_skipped = 0 - num_binaries_queued = 0 - md5_hashes = [] - - start_time = time.time() - # Determine our binaries window (date forward) start_date_binaries = datetime.now() - timedelta(days=globals.g_num_days_binaries) @@ -433,18 +439,19 @@ def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binari logger.info("") -def get_log_file_handles(logger): +def get_log_file_handles(use_logger): """ Get a list of filehandle numbers from logger to be handed to DaemonContext.files_preserve """ handles = [] - for handler in logger.handlers: + for handler in use_logger.handlers: handles.append(handler.stream.fileno()) - if logger.parent: - handles += get_log_file_handles(logger.parent) + if use_logger.parent: + handles += get_log_file_handles(use_logger.parent) return handles +# noinspection PyUnusedLocal def handle_sig(exit_event, sig, frame): """ Signal handler - handle the signal and mark exit if its an exiting signal @@ -474,7 +481,7 @@ def init_local_resources(): """ globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database = SqliteDatabase(os.path.join(globals.g_feed_database_path, "binary.db")) + database = SqliteDatabase(os.path.join(globals.g_feed_database_dir, "binary.db")) db.initialize(database) db.connect() db.create_tables([BinaryDetonationResult]) diff --git a/src/tasks.py b/src/tasks.py index dce79c0..bd2ae79 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -1,3 +1,6 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + import datetime import hashlib import io @@ -15,6 +18,7 @@ import globals from analysis_result import AnalysisResult +from config_handling import ConfigurationInit app = Celery() # noinspection PyUnusedName @@ -71,7 +75,6 @@ def release_write(self): compiled_rules_lock = ReadWriteLock() - def add_worker_arguments(parser): parser.add_argument( "--config-file", default="yara_worker.conf", help="Yara Worker Config" @@ -82,14 +85,14 @@ def add_worker_arguments(parser): class MyBootstep(bootsteps.Step): + """ + Define the bootstrap task. + """ # noinspection PyUnusedLocal - def __init__(self, worker, config_file="yara_worker.conf", **options): + def __init__(self, worker, config_file='yara_worker.conf', **options): super().__init__(self) - print(options) - verify_config(config_file) - - # g_yara_rules_dir = yara_rules_dir + ConfigurationInit(config_file, None) app.steps["worker"].add(MyBootstep) @@ -186,18 +189,20 @@ def get_binary_by_hash(url, hsum, token): """ headers = {"X-Auth-Token": token} request_url = f"{url}/api/v1/binary/{hsum}" - response = requests.get(request_url, headers=headers, stream=True, verify=False, timeout=globals.g_worker_network_timeout) + response = requests.get(request_url, headers=headers, stream=True, verify=False, + timeout=globals.g_worker_network_timeout) if response: with zipfile.ZipFile(io.BytesIO(response.content)) as the_binary_zip: - #the response contains the file ziped in 'filedata' + # the response contains the file ziped in 'filedata' fp = the_binary_zip.open("filedata") the_binary_zip.close() return fp else: - #otherwise return None which will be interpreted correctly in analyze_binary as haven failed to lookup the hash + # otherwise return None which will be interpreted correctly in analyze_binary as haven failed to lookup the hash return None +# noinspection PyUnusedFunction @app.task def analyze_bins(hashes): return group(analyze_binary.s(h) for h in hashes).apply_async() @@ -255,8 +260,8 @@ def analyze_binary(md5sum: str) -> AnalysisResult: analysis_result.last_error_msg = f"Yara exception: {err}" except Exception as err: analysis_result.last_error_msg = ( - f"Other exception while matching rules: {err}\n" - + traceback.format_exc() + f"Other exception while matching rules: {err}\n" + + traceback.format_exc() ) finally: compiled_rules_lock.release_read() @@ -273,10 +278,13 @@ def get_high_score(matches) -> int: """ Find the higest match score. - NOTE: if str(matches) == "debug", return 100 :param matches: List of rule matches. :return: """ + # NOTE: if str(matches) == "debug", return 100 + if matches == "debug": + return 100 + score = 0 for match in matches: if match.meta.get("score", 0) > score: diff --git a/test/config/bogus_worker_network_timeout.conf b/test/config/bogus_worker_network_timeout.conf new file mode 100644 index 0000000..a51349b --- /dev/null +++ b/test/config/bogus_worker_network_timeout.conf @@ -0,0 +1,24 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh + +worker_network_timeout=BOGUS diff --git a/test/config/missing_worker_network_timeout.conf b/test/config/missing_worker_network_timeout.conf new file mode 100644 index 0000000..e018ec4 --- /dev/null +++ b/test/config/missing_worker_network_timeout.conf @@ -0,0 +1,24 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +vacuum_interval=360 +vacuum_script=../scripts/vacuumscript.sh + +worker_network_timeout= diff --git a/test/test_configInit.py b/test/test_configInit.py index acb11f7..550adcd 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -413,6 +413,24 @@ def test_21_config_malformed_parameter(self): ConfigurationInit(os.path.join(TESTS, "config", "malformed_param.conf"), "sample.json") assert "cannot be parsed" in "{0}".format(err.exception.args[0]) + def test_22a_config_missing_worker_network_timeout(self): + """ + Ensure that config with missing worker_network_timeout reverts to default + """ + check = globals.g_worker_network_timeout + + # defined as "num_days_binaries=" + ConfigurationInit(os.path.join(TESTS, "config", "missing_worker_network_timeout.conf"), "sample.json") + self.assertEqual(check, globals.g_worker_network_timeout) + + def test_22b_config_bogus_worker_network_timeout(self): + """ + Ensure that config with bogus (non-int) worker_network_timeout is detected. + """ + with self.assertRaises(ValueError) as err: + ConfigurationInit(os.path.join(TESTS, "config", "bogus_worker_network_timeout.conf"), "sample.json") + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + # ----- Minimal validation (worker) def test_90_minimal_validation_effects(self): diff --git a/test/test_singleInstance.py b/test/test_singleInstance.py deleted file mode 100644 index 9c719c8..0000000 --- a/test/test_singleInstance.py +++ /dev/null @@ -1,89 +0,0 @@ -# coding: utf-8 -# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. - -import logging -import os -import sys -from multiprocessing import Process -from unittest import TestCase - -from exceptions import SingleInstanceException -from singleton import SingleInstance - -logger = logging.getLogger(__name__) - - -def f(flavor: str = None): - tmp = logger.level - logger.setLevel(logging.CRITICAL) # we do not want to see the warning - si = None - try: - si = SingleInstance(flavor_id=flavor) # noqa - except SingleInstanceException: - sys.exit(1) - finally: - if si is not None: - del si - logger.setLevel(tmp) - - -class TestSingleInstance(TestCase): - - def test_01_unflavored(self): - si = SingleInstance() - logger.info("Lockfile: {0}".format(si.lockfile)) - self.assertTrue(os.path.exists(si.lockfile)) - - lock = si.lockfile - del si # now the lock should be removed - self.assertFalse(os.path.exists(lock)) - - def test_02_flavored(self): - si = SingleInstance(flavor_id="test-1") - logger.info("Lockfile: {0}".format(si.lockfile)) - self.assertTrue(os.path.exists(si.lockfile)) - try: - assert "test-1" in si.lockfile - except AssertionError: - del si - raise - - lock = si.lockfile - del si # now the lock should be removed - self.assertFalse(os.path.exists(lock)) - - def test_03_specified(self): - lockfile = '/tmp/foo.lock' - si = SingleInstance(lockfile=lockfile) - logger.info("Lockfile: {0}".format(si.lockfile)) - self.assertTrue(os.path.exists(lockfile)) - - del si # now the lock should be removed - self.assertFalse(os.path.exists(lockfile)) - - def test_04_as_process(self): - p = Process(target=f, args=("as-process",)) - p.start() - p.join() - # the called function should succeed - assert p.exitcode == 0, "%s != 0" % p.exitcode - - def test_05_as_process_multi_invoke(self): - # get an instance running - si = SingleInstance(flavor_id="test-05") - - p = Process(target=f, args=("test-05",)) - p.start() - p.join() - # the called function should fail because we already have another instance running - assert p.exitcode != 0, "%s != 0 (2nd execution)" % p.exitcode - - # try a different flavor - p = Process(target=f, args=("test-05a",)) - p.start() - p.join() - # the called function should fail because we already have another - # instance running - assert p.exitcode == 0, "%s != 0 (new flavor)" % p.exitcode - - del si From 721037269b1761aa250326ff699a82221e3031f1 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 4 Nov 2019 12:07:51 -0500 Subject: [PATCH 114/257] Fixing config handling and debug mode --- src/config_handling.py | 24 ++++++++++++++++++------ src/globals.py | 2 +- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/config_handling.py b/src/config_handling.py index 12b2114..d2aea96 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -60,6 +60,19 @@ def __init__(self, config_file: str, output_file: str = None) -> None: raise CbInvalidConfig(f"{self.source} does not have a 'general' section") self.the_config = config["general"] + if "mode" in self.the_config["general"]: + operating_mode = self.the_config["mode"].lower() + if operating_mode in ["master", "slave"]: + globals.g_mode = operating_mode + else: + raise CbInvalidConfig( + f"{self.source} does not specify a valid operating mode (slave/master)" + ) + else: + raise CbInvalidConfig( + f"{self.source} does not specify a valid operating mode (slave/master)" + ) + self._worker_check() if output_file is not None: @@ -84,12 +97,11 @@ def _worker_check(self) -> None: globals.g_yara_rules_dir = self._as_path("yara_rules_dir", required=True, exists=True, is_dir=True) # local/remote configuration data - if not globals.g_remote: - globals.g_cb_server_url = self._as_str("cb_server_url", required=True) - globals.g_cb_server_token = self._as_str("cb_server_token", required=True) - else: - value = self._as_str("broker_url", required=True) - app.conf.update(broker_url=value, result_backend=value) + globals.g_cb_server_url = self._as_str("cb_server_url", required=True) + globals.g_cb_server_token = self._as_str("cb_server_token", required=True) + + value = self._as_str("broker_url", required=True) + app.conf.update(broker_url=value, result_backend=value) globals.g_worker_network_timeout = self._as_int("worker_network_timeout") diff --git a/src/globals.py b/src/globals.py index 5390c01..4dd53c7 100644 --- a/src/globals.py +++ b/src/globals.py @@ -9,7 +9,7 @@ g_output_file = "" g_remote = False -g_mode = "" +g_mode = "master" # local info g_cb_server_url = "https://127.0.0.1" From e74bd81683ed4849f06b0a06b27a4ba39d085558 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 4 Nov 2019 12:08:03 -0500 Subject: [PATCH 115/257] more cleanup --- src/main.py | 58 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/src/main.py b/src/main.py index 3c06490..43ddc1a 100644 --- a/src/main.py +++ b/src/main.py @@ -95,7 +95,13 @@ def results_worker(exit_event, results_queue): logger.debug("Results worker thread exiting") -def results_worker_chunked(exit_event, results_queue): +def results_worker_chunked(exit_event, results_queue: Queue): + """ + + :param exit_event: + :param results_queue: + :return: + """ try: while not (exit_event.is_set()): if not (results_queue.empty()): @@ -511,42 +517,46 @@ def wait_all_worker_exit(): logger.debug("Main thread going to exit...") -# starts worker-threads (not celery workers) -# worker threads do work until they get the exit_event signal -def start_workers(exit_event, scanning_promises_queue, scanning_results_queue): +def start_workers(exit_event: Event, scanning_promises_queue: Queue, scanning_results_queue: Queue) -> None: + """ + Starts worker-threads (not celery workers). Worker threads do work until they get the exit_event signal + :param exit_event: event signaller + :param scanning_promises_queue: promises queue + :param scanning_results_queue: results queue + """ logger.debug("Starting perf thread") - - perf_thread = DatabaseScanningThread( - globals.g_scanning_interval, scanning_promises_queue, exit_event - ) + perf_thread = DatabaseScanningThread(globals.g_scanning_interval, scanning_promises_queue, exit_event) perf_thread.start() logger.debug("Starting promise thread(s)") - for _ in range(2): - promise_worker_thread = Thread( - target=promise_worker, - args=(exit_event, scanning_promises_queue, scanning_results_queue), - ) + promise_worker_thread = Thread(target=promise_worker, args=(exit_event, scanning_promises_queue, + scanning_results_queue)) promise_worker_thread.start() logger.debug("Starting results saver thread") - results_worker_thread = Thread( - target=results_worker_chunked, args=(exit_event, scanning_results_queue) - ) - + results_worker_thread = Thread(target=results_worker_chunked, args=(exit_event, scanning_results_queue)) results_worker_thread.start() class DatabaseScanningThread(Thread): """ - A worker thread that scans over the database for new hashes ever INTERVAL seconds - Pushes work to scanning_promises_queue , exits when the event is triggered - by the signal handler + A worker thread that scans over the database for new hashes ever INTERVAL seconds + Pushes work to scanning_promises_queue , exits when the event is triggered + by the signal handler """ - def __init__(self, interval, scanning_promises_queue, exit_event, *args, **kwargs): + def __init__(self, interval: int, scanning_promises_queue: Queue, exit_event: Event, *args, **kwargs): + """ + + :param interval: + :param scanning_promises_queue: promises queue + :param exit_event: event signaller + :param args: optional arguments + :param kwargs: optional keyword arguments + """ super().__init__(*args, **kwargs) + self._args = args self._kwargs = kwargs self.exit_event = exit_event @@ -556,7 +566,7 @@ def __init__(self, interval, scanning_promises_queue, exit_event, *args, **kwarg self._target = self.scan_until_exit def scan_until_exit(self): - # TODO DRIFT + # TODO: DRIFT self.do_db_scan() while not self.exit_event.is_set(): self.exit_event.wait(timeout=self._interval) @@ -702,9 +712,7 @@ def main(): # only connect to cbr if we're the master if run_as_master: init_local_resources() - start_workers( - exit_event, scanning_promise_queue, scanning_results_queue - ) + start_workers(exit_event, scanning_promise_queue, scanning_results_queue) # start local celery if working mode is local if not globals.g_remote: start_celery_worker_thread(args.config_file) From 251e90747da3d49d53e4a8df1bd7957f0ef6bc4b Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 4 Nov 2019 12:08:28 -0500 Subject: [PATCH 116/257] main file updates --- src/main.py | 82 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 19 deletions(-) diff --git a/src/main.py b/src/main.py index 43ddc1a..c240f30 100644 --- a/src/main.py +++ b/src/main.py @@ -22,9 +22,11 @@ import humanfriendly import lockfile import psycopg2 + # noinspection PyPackageRequirements import yara from celery.bin import worker + # noinspection PyPackageRequirements from daemon import daemon from peewee import SqliteDatabase @@ -280,8 +282,10 @@ def get_binary_file_cursor(conn, start_date_binaries): cur = conn.cursor(name="yara_agent") # noinspection SqlDialectInspection,SqlNoDataSourceInspection - query = "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND " + \ - "timestamp >= '{0}' ORDER BY timestamp DESC".format(start_date_binaries) + query = ( + "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND " + + "timestamp >= '{0}' ORDER BY timestamp DESC".format(start_date_binaries) + ) logger.debug(query) @@ -294,8 +298,12 @@ def execute_script() -> None: """ Execute a external maintenence script (vacuum script). """ - logger.info("----- Executing vacuum script ----------------------------------------") - prog = subprocess.Popen(globals.g_vacuum_script, shell=True, universal_newlines=True) + logger.info( + "----- Executing vacuum script ----------------------------------------" + ) + prog = subprocess.Popen( + globals.g_vacuum_script, shell=True, universal_newlines=True + ) stdout, stderr = prog.communicate() if stdout is not None and len(stdout.strip()) > 0: logger.info(stdout) @@ -303,7 +311,9 @@ def execute_script() -> None: logger.error(stderr) if prog.returncode: logger.warning(f"program returned error code {prog.returncode}") - logger.info("---------------------------------------- Vacuum script completed -----\n") + logger.info( + "---------------------------------------- Vacuum script completed -----\n" + ) def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): @@ -395,7 +405,9 @@ def save_results_with_logging(analysis_results): # noinspection PyUnusedFunction -def save_and_log(analysis_results, start_time, num_binaries_skipped, num_total_binaries): +def save_and_log( + analysis_results, start_time, num_binaries_skipped, num_total_binaries +): logger.debug(analysis_results) if analysis_results: for analysis_result in analysis_results: @@ -412,7 +424,9 @@ def save_and_log(analysis_results, start_time, num_binaries_skipped, num_total_b _rule_logging(start_time, num_binaries_skipped, num_total_binaries) -def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binaries: int) -> None: +def _rule_logging( + start_time: float, num_binaries_skipped: int, num_total_binaries: int +) -> None: """ Simple method to log yara work. :param start_time: start time for the work @@ -622,6 +636,7 @@ def launch_celery_worker(config_file=None): # Main entrypoint ################################################################################ + def handle_arguments(): """ Setup the main program options. @@ -630,13 +645,29 @@ def handle_arguments(): """ parser = argparse.ArgumentParser(description="Yara Agent for Yara Connector") - parser.add_argument("--config-file", required=True, default="yaraconnector.conf", - help="Location of the config file") - parser.add_argument("--log-file", default="yaraconnector.log", help="Log file output") - parser.add_argument("--output-file", default="yara_feed.json", help="output feed file") - parser.add_argument("--working-dir", default=".", help="working directory", required=False) - parser.add_argument("--lock-file", default="./yaraconnector", help="lock file", required=False) - parser.add_argument("--validate-yara-rules", action="store_true", help="Only validate yara rules, then exit") + parser.add_argument( + "--config-file", + required=True, + default="yaraconnector.conf", + help="Location of the config file", + ) + parser.add_argument( + "--log-file", default="yaraconnector.log", help="Log file output" + ) + parser.add_argument( + "--output-file", default="yara_feed.json", help="output feed file" + ) + parser.add_argument( + "--working-dir", default=".", help="working directory", required=False + ) + parser.add_argument( + "--lock-file", default="./yaraconnector", help="lock file", required=False + ) + parser.add_argument( + "--validate-yara-rules", + action="store_true", + help="Only validate yara rules, then exit", + ) parser.add_argument("--debug", action="store_true") return parser.parse_args() @@ -659,7 +690,9 @@ def main(): if args.log_file: use_log_file = os.path.abspath(os.path.expanduser(args.log_file)) formatter = logging.Formatter(logging_format) - handler = logging.handlers.RotatingFileHandler(use_log_file, maxBytes=10 * 1000000, backupCount=10) + handler = logging.handlers.RotatingFileHandler( + use_log_file, maxBytes=10 * 1000000, backupCount=10 + ) handler.setFormatter(formatter) logger.addHandler(handler) else: @@ -679,7 +712,9 @@ def main(): yara.compile(filepaths=yara_rule_map) logger.info("All yara rules compiled successfully") except Exception as err: - logger.error(f"There were errors compiling yara rules: {err}\n{traceback.format_exc()}") + logger.error( + f"There were errors compiling yara rules: {err}\n{traceback.format_exc()}" + ) sys.exit(5) else: exit_event = Event() @@ -693,8 +728,15 @@ def main(): files_preserve.extend([args.lock_file, args.log_file, args.output_file]) # defauls to piping to /dev/null - context = daemon.DaemonContext(working_directory=working_dir, pidfile=lock_file, - files_preserve=files_preserve) + + deamon_kwargs = { + "working_directory": working_dir, + "pidfile": lock_file, + "files_preserve": files_preserve, + } + if args.debug: + deamon_kwargs.update({"stdout": sys.stdout, "stderr": sys.stderr}) + context = daemon.DaemonContext(**deamon_kwargs) run_as_master = globals.g_mode == "master" @@ -730,7 +772,9 @@ def main(): exit_event.set() sys.exit(3) except Exception as err: - logger.error(f"There were errors executing yara rules: {err}\n{traceback.format_exc()}") + logger.error( + f"There were errors executing yara rules: {err}\n{traceback.format_exc()}" + ) exit_event.set() sys.exit(4) From 807db39eec06404f45268c98fe2f8caf60053983 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 4 Nov 2019 12:10:50 -0500 Subject: [PATCH 117/257] updates --- src/config_handling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config_handling.py b/src/config_handling.py index d2aea96..391aa09 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -60,7 +60,7 @@ def __init__(self, config_file: str, output_file: str = None) -> None: raise CbInvalidConfig(f"{self.source} does not have a 'general' section") self.the_config = config["general"] - if "mode" in self.the_config["general"]: + if "mode" in self.the_config: operating_mode = self.the_config["mode"].lower() if operating_mode in ["master", "slave"]: globals.g_mode = operating_mode From b906efd317a9e5f2371da4d27c2bf4069a73b050 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 4 Nov 2019 15:42:55 -0500 Subject: [PATCH 118/257] Remove extra app context" --- src/config_handling.py | 166 +++++++++++++++++++++++++++++------------ 1 file changed, 117 insertions(+), 49 deletions(-) diff --git a/src/config_handling.py b/src/config_handling.py index 391aa09..1cdaea6 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -11,27 +11,18 @@ import globals from exceptions import CbInvalidConfig +from tasks import app + logger = logging.getLogger(__name__) __all__ = ["ConfigurationInit", "app"] -################################################################################ -# Celery app -################################################################################ - -app = Celery() -# noinspection PyUnusedName -app.conf.task_serializer = "pickle" -# noinspection PyUnusedName -app.conf.result_serializer = "pickle" -# noinspection PyUnusedName -app.conf.accept_content = {"pickle"} - ################################################################################ # Configuration reader/validator ################################################################################ + class ConfigurationInit(object): """ Class to deal with all configuration loading and validation. @@ -92,14 +83,18 @@ def _worker_check(self) -> None: elif value == "remote": globals.g_remote = True else: - raise CbInvalidConfig(f"{self.source} has an invalid 'worker_type' ({value})") + raise CbInvalidConfig( + f"{self.source} has an invalid 'worker_type' ({value})" + ) - globals.g_yara_rules_dir = self._as_path("yara_rules_dir", required=True, exists=True, is_dir=True) + globals.g_yara_rules_dir = self._as_path( + "yara_rules_dir", required=True, exists=True, is_dir=True + ) # local/remote configuration data globals.g_cb_server_url = self._as_str("cb_server_url", required=True) globals.g_cb_server_token = self._as_str("cb_server_token", required=True) - + value = self._as_str("broker_url", required=True) app.conf.update(broker_url=value, result_backend=value) @@ -112,37 +107,69 @@ def _extended_check(self) -> None: :raises CbInvalidConfig: :raises ValueError: """ - globals.g_postgres_host = self._as_str("postgres_host", default=globals.g_postgres_host) - globals.g_postgres_username = self._as_str("postgres_username", default=globals.g_postgres_username) + globals.g_postgres_host = self._as_str( + "postgres_host", default=globals.g_postgres_host + ) + globals.g_postgres_username = self._as_str( + "postgres_username", default=globals.g_postgres_username + ) globals.g_postgres_password = self._as_str("postgres_password", required=True) - globals.g_postgres_db = self._as_str("postgres_db", default=globals.g_postgres_username) - globals.g_postgres_port = self._as_int("postgres_port", default=globals.g_postgres_port) + globals.g_postgres_db = self._as_str( + "postgres_db", default=globals.g_postgres_username + ) + globals.g_postgres_port = self._as_int( + "postgres_port", default=globals.g_postgres_port + ) value = self._as_int("niceness") if value: os.nice(value) - globals.g_max_hashes = self._as_int("concurrent_hashes", default=globals.g_max_hashes) - globals.g_disable_rescan = self._as_bool("disable_rescan", default=globals.g_disable_rescan) - globals.g_num_days_binaries = self._as_int("num_days_binaries", default=globals.g_num_days_binaries, - min_value=1) - - globals.g_vacuum_interval = self._as_int("vacuum_interval", default=globals.g_vacuum_interval, min_value=0) + globals.g_max_hashes = self._as_int( + "concurrent_hashes", default=globals.g_max_hashes + ) + globals.g_disable_rescan = self._as_bool( + "disable_rescan", default=globals.g_disable_rescan + ) + globals.g_num_days_binaries = self._as_int( + "num_days_binaries", default=globals.g_num_days_binaries, min_value=1 + ) + + globals.g_vacuum_interval = self._as_int( + "vacuum_interval", default=globals.g_vacuum_interval, min_value=0 + ) if globals.g_vacuum_interval > 0: - globals.g_vacuum_script = self._as_path("vacuum_script", required=True, is_dir=False, - default=globals.g_vacuum_script) - logger.warning(f"Vacuum Script '{globals.g_vacuum_script}' is enabled; " + - "use this advanced feature at your own discretion!") + globals.g_vacuum_script = self._as_path( + "vacuum_script", + required=True, + is_dir=False, + default=globals.g_vacuum_script, + ) + logger.warning( + f"Vacuum Script '{globals.g_vacuum_script}' is enabled; " + + "use this advanced feature at your own discretion!" + ) else: - if self._as_path("vacuum_script", required=False, default=globals.g_vacuum_script): - logger.debug(f"{self.source} has 'vacuum_script' defined, but it is disabled") + if self._as_path( + "vacuum_script", required=False, default=globals.g_vacuum_script + ): + logger.debug( + f"{self.source} has 'vacuum_script' defined, but it is disabled" + ) - globals.g_feed_database_dir = self._as_path("feed_database_dir", required=True, is_dir=True, - default=globals.g_feed_database_dir, create_if_needed=True) + globals.g_feed_database_dir = self._as_path( + "feed_database_dir", + required=True, + is_dir=True, + default=globals.g_feed_database_dir, + create_if_needed=True, + ) # ----- Type Handlers - def _as_str(self, param: str, required: bool = False, default: str = None) -> Optional[str]: + def _as_str( + self, param: str, required: bool = False, default: str = None + ) -> Optional[str]: """ Get a string parameter from the configuration. @@ -155,19 +182,30 @@ def _as_str(self, param: str, required: bool = False, default: str = None) -> Op try: value = self.the_config.get(param, None) except Exception as err: - raise CbInvalidConfig(f"{self.source} parameter '{param}' cannot be parsed: {err}") + raise CbInvalidConfig( + f"{self.source} parameter '{param}' cannot be parsed: {err}" + ) if value is not None: value = value.strip() if (value is None or value == "") and default is not None: value = default - logger.warning(f"{self.source} has no defined '{param}'; using default of '{default}'") + logger.warning( + f"{self.source} has no defined '{param}'; using default of '{default}'" + ) if required and (value is None or value == ""): raise CbInvalidConfig(f"{self.source} has no '{param}' definition") return value - def _as_path(self, param: str, required: bool = False, exists: bool = True, is_dir: bool = False, - default: str = None, create_if_needed: bool = False) -> Optional[str]: + def _as_path( + self, + param: str, + required: bool = False, + exists: bool = True, + is_dir: bool = False, + default: str = None, + create_if_needed: bool = False, + ) -> Optional[str]: """ Get an string parameter from the configuration and treat it as a path, performing normalization to produce an absolute path. a "~/" at the beginning will be treated as the current user's home @@ -193,19 +231,33 @@ def _as_path(self, param: str, required: bool = False, exists: bool = True, is_d try: os.makedirs(value) except Exception as err: - raise CbInvalidConfig(f"{self.source} unable to create '{value}' for '{param}': {err}") + raise CbInvalidConfig( + f"{self.source} unable to create '{value}' for '{param}': {err}" + ) else: - raise CbInvalidConfig(f"{self.source} specified path parameter '{param}' ({value}) does not exist") + raise CbInvalidConfig( + f"{self.source} specified path parameter '{param}' ({value}) does not exist" + ) if is_dir: if not os.path.isdir(value): - raise CbInvalidConfig(f"{self.source} specified path '{param}' ({value}) is not a directory") + raise CbInvalidConfig( + f"{self.source} specified path '{param}' ({value}) is not a directory" + ) else: if os.path.isdir(value): - raise CbInvalidConfig(f"{self.source} specified path '{param}' ({value}) is a directory") + raise CbInvalidConfig( + f"{self.source} specified path '{param}' ({value}) is a directory" + ) return value - def _as_int(self, param: str, required: bool = False, default: int = None, min_value: int = -1) -> Optional[int]: + def _as_int( + self, + param: str, + required: bool = False, + default: int = None, + min_value: int = -1, + ) -> Optional[int]: """ Get an integer configuration parameter from the configuration. A parameter that cannot be converted to an int will return a ValueError. @@ -221,13 +273,19 @@ def _as_int(self, param: str, required: bool = False, default: int = None, min_v value = self._as_str(param, required) use_default = default if default is None else max(default, min_value) if (value is None or value == "") and use_default is not None: - logger.warning(f"{self.source} has no defined '{param}'; using default of '{use_default}'") + logger.warning( + f"{self.source} has no defined '{param}'; using default of '{use_default}'" + ) return use_default else: - return None if (value is None or value == "") else max(int(value), min_value) + return ( + None if (value is None or value == "") else max(int(value), min_value) + ) # noinspection PySameParameterValue - def _as_bool(self, param: str, required: bool = False, default: bool = None) -> Optional[bool]: + def _as_bool( + self, param: str, required: bool = False, default: bool = None + ) -> Optional[bool]: """ Get a boolean configuration parameter from the configuration. A parameter not one of ["true", "yes", "false", "no"] will return a ValueError. @@ -239,10 +297,20 @@ def _as_bool(self, param: str, required: bool = False, default: bool = None) -> :raises ValueError: """ value = self._as_str(param, required) - if value is not None and value.lower() not in ["true", "yes", "false", "no", ""]: - raise ValueError(f"{self.source} parameter '{param}' is not a valid boolean value") + if value is not None and value.lower() not in [ + "true", + "yes", + "false", + "no", + "", + ]: + raise ValueError( + f"{self.source} parameter '{param}' is not a valid boolean value" + ) if value is None and default is not None: - logger.warning(f"{self.source} has no defined '{param}'; using default of '{default}'") + logger.warning( + f"{self.source} has no defined '{param}'; using default of '{default}'" + ) return default else: return value if value is None else value.lower() in ["true", "yes"] From df23858edb11013bac7bc5a6c147c91a81a42e55 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 4 Nov 2019 15:50:16 -0500 Subject: [PATCH 119/257] Trying to fix app import cycle --- src/celery_app.py | 10 ++++++++++ src/config_handling.py | 5 ++--- src/main.py | 3 ++- src/tasks.py | 8 +------- 4 files changed, 15 insertions(+), 11 deletions(-) create mode 100644 src/celery_app.py diff --git a/src/celery_app.py b/src/celery_app.py new file mode 100644 index 0000000..9840418 --- /dev/null +++ b/src/celery_app.py @@ -0,0 +1,10 @@ +from celery import Celery + + +app = Celery() +# noinspection PyUnusedName +app.conf.task_serializer = "pickle" +# noinspection PyUnusedName +app.conf.result_serializer = "pickle" +# noinspection PyUnusedName +app.conf.accept_content = {"pickle"} \ No newline at end of file diff --git a/src/config_handling.py b/src/config_handling.py index 1cdaea6..68f5f1e 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -10,12 +10,11 @@ import globals from exceptions import CbInvalidConfig - -from tasks import app +from celery_app import app logger = logging.getLogger(__name__) -__all__ = ["ConfigurationInit", "app"] +__all__ = ["ConfigurationInit"] ################################################################################ diff --git a/src/main.py b/src/main.py index c240f30..452440b 100644 --- a/src/main.py +++ b/src/main.py @@ -36,7 +36,8 @@ from binary_database import BinaryDetonationResult, db from config_handling import ConfigurationInit from feed import CbFeed, CbFeedInfo, CbReport -from tasks import analyze_binary, app, generate_rule_map, update_yara_rules_remote +from tasks import analyze_binary, generate_rule_map, update_yara_rules_remote +from celery_app import app logging_format = "%(asctime)s-%(name)s-%(lineno)d-%(levelname)s-%(message)s" logging.basicConfig(format=logging_format) diff --git a/src/tasks.py b/src/tasks.py index bd2ae79..191f040 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -20,13 +20,7 @@ from analysis_result import AnalysisResult from config_handling import ConfigurationInit -app = Celery() -# noinspection PyUnusedName -app.conf.task_serializer = "pickle" -# noinspection PyUnusedName -app.conf.result_serializer = "pickle" -# noinspection PyUnusedName -app.conf.accept_content = {"pickle"} +from celery_app import app logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) From 71b81489bff1b3c1bc4cfe5ba6a7fb16f62527f6 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 4 Nov 2019 15:55:48 -0500 Subject: [PATCH 120/257] Using fetchall --- src/main.py | 72 ++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/main.py b/src/main.py index 452440b..51acfd5 100644 --- a/src/main.py +++ b/src/main.py @@ -337,45 +337,28 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): vacuum_window_start = datetime.now() cur = get_binary_file_cursor(conn, start_date_binaries) - rows = cur.fetchmany(2000) + rows = cur.fetchall() num_total_binaries = len(rows) - while num_total_binaries > 0: - logger.info(f"Enumerating modulestore...found {len(rows)} resident binaries") + logger.info(f"Enumerating modulestore...found {len(rows)} resident binaries") - md5_hashes = filter(_check_hash_against_feed, (row[0].hex() for row in rows)) + md5_hashes = filter(_check_hash_against_feed, (row[0].hex() for row in rows)) - # logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") + # logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") - analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) - - """ - Holding the named-cursor through a large historical result set - will cause storefiles table fragmentation - After a configurable amount of time - use the configured - script to vacuum the table by hand before continuing - """ - - if globals.g_vacuum_interval > 0: - seconds_since_start = (datetime.now() - vacuum_window_start).seconds - if seconds_since_start >= globals.g_vacuum_interval * 60: - # close connection - cur.close() - conn.commit() - - execute_script() - vacuum_window_start = datetime.now() - - # get the connection back - cur = get_binary_file_cursor(conn, start_date_binaries) - - rows = cur.fetchmany(2000) - num_total_binaries = len(rows) + analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) # Closing since there are no more binaries of interest to scan cur.close() conn.commit() + if globals.g_vacuum_interval > 0: + seconds_since_start = (datetime.now() - vacuum_window_start).seconds + if seconds_since_start >= globals.g_vacuum_interval * 60: + # close connection + execute_script() + vacuum_window_start = datetime.now() + logger.debug("Exiting database sweep routine") @@ -532,7 +515,9 @@ def wait_all_worker_exit(): logger.debug("Main thread going to exit...") -def start_workers(exit_event: Event, scanning_promises_queue: Queue, scanning_results_queue: Queue) -> None: +def start_workers( + exit_event: Event, scanning_promises_queue: Queue, scanning_results_queue: Queue +) -> None: """ Starts worker-threads (not celery workers). Worker threads do work until they get the exit_event signal :param exit_event: event signaller @@ -540,17 +525,23 @@ def start_workers(exit_event: Event, scanning_promises_queue: Queue, scanning_re :param scanning_results_queue: results queue """ logger.debug("Starting perf thread") - perf_thread = DatabaseScanningThread(globals.g_scanning_interval, scanning_promises_queue, exit_event) + perf_thread = DatabaseScanningThread( + globals.g_scanning_interval, scanning_promises_queue, exit_event + ) perf_thread.start() logger.debug("Starting promise thread(s)") for _ in range(2): - promise_worker_thread = Thread(target=promise_worker, args=(exit_event, scanning_promises_queue, - scanning_results_queue)) + promise_worker_thread = Thread( + target=promise_worker, + args=(exit_event, scanning_promises_queue, scanning_results_queue), + ) promise_worker_thread.start() logger.debug("Starting results saver thread") - results_worker_thread = Thread(target=results_worker_chunked, args=(exit_event, scanning_results_queue)) + results_worker_thread = Thread( + target=results_worker_chunked, args=(exit_event, scanning_results_queue) + ) results_worker_thread.start() @@ -561,7 +552,14 @@ class DatabaseScanningThread(Thread): by the signal handler """ - def __init__(self, interval: int, scanning_promises_queue: Queue, exit_event: Event, *args, **kwargs): + def __init__( + self, + interval: int, + scanning_promises_queue: Queue, + exit_event: Event, + *args, + **kwargs, + ): """ :param interval: @@ -755,7 +753,9 @@ def main(): # only connect to cbr if we're the master if run_as_master: init_local_resources() - start_workers(exit_event, scanning_promise_queue, scanning_results_queue) + start_workers( + exit_event, scanning_promise_queue, scanning_results_queue + ) # start local celery if working mode is local if not globals.g_remote: start_celery_worker_thread(args.config_file) From c4fb76b100f86afc3460e8043a5a70e3521c2d8b Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 4 Nov 2019 15:57:21 -0500 Subject: [PATCH 121/257] using fetchall --- src/main.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/main.py b/src/main.py index 51acfd5..81a7be6 100644 --- a/src/main.py +++ b/src/main.py @@ -338,27 +338,26 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): cur = get_binary_file_cursor(conn, start_date_binaries) rows = cur.fetchall() + # Closing since there are no more binaries of interest to scan + cur.close() + conn.commit() num_total_binaries = len(rows) logger.info(f"Enumerating modulestore...found {len(rows)} resident binaries") md5_hashes = filter(_check_hash_against_feed, (row[0].hex() for row in rows)) - # logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") + logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) - # Closing since there are no more binaries of interest to scan - cur.close() - conn.commit() - if globals.g_vacuum_interval > 0: seconds_since_start = (datetime.now() - vacuum_window_start).seconds if seconds_since_start >= globals.g_vacuum_interval * 60: # close connection execute_script() vacuum_window_start = datetime.now() - + logger.debug("Exiting database sweep routine") From 9e3410bb28f65f9a0660d78ffb62dddccfb59334 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 4 Nov 2019 16:00:26 -0500 Subject: [PATCH 122/257] updates to not use traceback .format_exc --- src/main.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/main.py b/src/main.py index 81a7be6..62b4427 100644 --- a/src/main.py +++ b/src/main.py @@ -12,7 +12,6 @@ import sys import threading import time -import traceback from datetime import datetime, timedelta from functools import partial from queue import Empty, Queue @@ -257,8 +256,7 @@ def save_result(analysis_result): bdr.save() globals.g_num_binaries_analyzed += 1 except Exception as err: - logger.error("Error saving to database: {0}".format(err)) - logger.error(traceback.format_exc()) + logger.exception("Error saving to database: {0}".format(err)) else: if analysis_result.score > 0: generate_feed_from_db() @@ -594,12 +592,12 @@ def do_db_scan(self): try: perform(globals.g_yara_rules_dir, self._conn, self._scanning_promises_queue) except Exception as e: - logger.error( - f"Something went wrong sweeping the CbR module store...{str(e)} \n {traceback.format_exc()}" + logger.exception( + f"Something went wrong sweeping the CbR module store...{str(e)} " ) def run(self): - """ Represents the lifetime of the thread """ + """ Represents the lifetime of the thread """ try: if self._target: @@ -710,9 +708,7 @@ def main(): yara.compile(filepaths=yara_rule_map) logger.info("All yara rules compiled successfully") except Exception as err: - logger.error( - f"There were errors compiling yara rules: {err}\n{traceback.format_exc()}" - ) + logger.exception(f"There were errors compiling yara rules: {err}") sys.exit(5) else: exit_event = Event() @@ -772,9 +768,7 @@ def main(): exit_event.set() sys.exit(3) except Exception as err: - logger.error( - f"There were errors executing yara rules: {err}\n{traceback.format_exc()}" - ) + logger.exception(f"There were errors executing yara rules: {err}") exit_event.set() sys.exit(4) From 7580db410fffb6a9eb09e6ed830862247183e20f Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 4 Nov 2019 16:05:27 -0500 Subject: [PATCH 123/257] fixup --- src/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index 62b4427..25ddea2 100644 --- a/src/main.py +++ b/src/main.py @@ -345,7 +345,7 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): md5_hashes = filter(_check_hash_against_feed, (row[0].hex() for row in rows)) - logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") + #logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) From 81e929096929d353807c512022c80b518d98c6b5 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 4 Nov 2019 16:07:47 -0500 Subject: [PATCH 124/257] Fixing rpm build --- MANIFEST | 3 ++- cb-yara-connector.rpm.spec | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/MANIFEST b/MANIFEST index 9c66302..5f8315c 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,4 +1,5 @@ /usr/share/cb/integrations/yaraconnector/yaraconnector %dir /usr/share/cb/integrations/yaraconnector %dir /var/log/cb/integrations/yaraconnector -/etc/init/yaraconnector.conf \ No newline at end of file +/etc/init/yaraconnector.conf +/etc/cb/integrations/yaraconnector/yaraconnector.conf.example \ No newline at end of file diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index d9c90f7..e9507a3 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -16,7 +16,7 @@ mkdir -p ${RPM_BUILD_ROOT}/var/log/cb/integrations/yaraconnector mkdir -p ${RPM_BUILD_ROOT}/usr/share/cb/integrations/yaraconnector mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/yaraconnector mkdir -p ${RPM_BUILD_ROOT}/etc/init -cp yara.conf ${RPMB_BUILD_ROOT}/etc/cb/integrations/yaraconnector/yaraconnector.conf.example +cp yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/yaraconnector/yaraconnector.conf.example install -m 0755 init-scripts/yaraconnector.conf ${RPM_BUILD_ROOT}/etc/init/yaraconnector.conf install -m 0755 dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/yaraconnector/yaraconnector From 5361ed13114526a0c5ed004c915711d55743d846 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 4 Nov 2019 16:21:22 -0500 Subject: [PATCH 125/257] Addressing review comments as per dseidel-cb --- config_handling.py | 14 +++--- globals.py | 4 +- main.py | 105 +++++++++++++++++++++++++++++---------------- 3 files changed, 78 insertions(+), 45 deletions(-) diff --git a/config_handling.py b/config_handling.py index 558137c..d1c938e 100644 --- a/config_handling.py +++ b/config_handling.py @@ -113,15 +113,15 @@ def _extended_check(self) -> None: globals.g_num_days_binaries = self._as_int("num_days_binaries", default=globals.g_num_days_binaries, min_value=1) - globals.g_vacuum_interval = self._as_int("vacuum_interval", default=globals.g_vacuum_interval, min_value=0) - if globals.g_vacuum_interval > 0: - globals.g_vacuum_script = self._as_path("vacuum_script", required=True, is_dir=False, - default=globals.g_vacuum_script) - logger.warning(f"Vacuum Script '{globals.g_vacuum_script}' is enabled; " + + globals.g_utility_interval = self._as_int("utility_interval", default=globals.g_utility_interval, min_value=0) + if globals.g_utility_interval > 0: + globals.g_utility_script = self._as_path("utility_script", required=True, is_dir=False, + default=globals.g_utility_script) + logger.warning(f" Script '{globals.g_utility_script}' is enabled; " + "use this advanced feature at your own discretion!") else: - if self._as_path("vacuum_script", required=False, default=globals.g_vacuum_script): - logger.debug(f"{self.source} has 'vacuum_script' defined, but it is disabled") + if self._as_path("utility_script", required=False, default=globals.g_utility_script): + logger.debug(f"{self.source} has 'utility_script' defined, but it is disabled") globals.g_feed_database_dir = self._as_path("feed_database_dir", required=True, is_dir=True, default=globals.g_feed_database_dir, create_if_needed=True) diff --git a/globals.py b/globals.py index 80270e1..fd3501e 100644 --- a/globals.py +++ b/globals.py @@ -35,7 +35,7 @@ # the vacuum interval, if 1 or greater, is the number of minutes between invocations of the # configured vacuum script -g_vacuum_interval = -1 -g_vacuum_script = "../scripts/vacuumscript.sh" +g_utility_interval = -1 +g_utility_script = "../scripts/vacuumscript.sh" g_feed_database_dir = "./feed_db" diff --git a/main.py b/main.py index 98972ae..6f78894 100644 --- a/main.py +++ b/main.py @@ -10,12 +10,12 @@ import subprocess import sys import time -import traceback from datetime import datetime, timedelta from typing import List, Optional import humanfriendly import psycopg2 + # noinspection PyPackageRequirements import yara from celery import group @@ -156,8 +156,7 @@ def analyze_binaries(md5_hashes: List[str], local: bool) -> Optional: else: time.sleep(0.1) except Exception as err: - logger.error("Error when analyzing: {0}".format(err)) - logger.error(traceback.format_exc()) + logger.exception("Error when analyzing: {0}".format(err)) time.sleep(5) return None else: @@ -190,8 +189,7 @@ def save_results(analysis_results: List[AnalysisResult]) -> None: bdr.save() globals.g_num_binaries_analyzed += 1 except Exception as err: - logger.error("Error saving to database: {0}".format(err)) - logger.error(traceback.format_exc()) + logger.exception("Error saving to database: {0}".format(err)) else: if analysis_result.score > 0: generate_feed_from_db() @@ -236,10 +234,14 @@ def get_cursor(conn, start_date_binaries: datetime): def execute_script() -> None: """ - Execute a external maintenence script (vacuum script). + Execute a external maintenence script (utility script). """ - logger.info("----- Executing vacuum script ----------------------------------------") - prog = subprocess.Popen(globals.g_vacuum_script, shell=True, universal_newlines=True) + logger.info( + "----- Executing utility script ----------------------------------------" + ) + prog = subprocess.Popen( + globals.g_utility_script, shell=True, universal_newlines=True + ) stdout, stderr = prog.communicate() if stdout is not None and len(stdout.strip()) > 0: logger.info(stdout) @@ -247,7 +249,9 @@ def execute_script() -> None: logger.error(stderr) if prog.returncode: logger.warning(f"program returned error code {prog.returncode}") - logger.info("---------------------------------------- Vacuum script completed -----\n") + logger.info( + "---------------------------------------- utility script completed -----\n" + ) def perform(yara_rule_dir: str) -> None: @@ -270,8 +274,8 @@ def perform(yara_rule_dir: str) -> None: # Determine our binaries window (date forward) start_date_binaries = datetime.now() - timedelta(days=globals.g_num_days_binaries) - # vacuum script window start - vacuum_window_start = datetime.now() + # utility script window start + utility_window_start = datetime.now() # make the connection, get the info, get out conn = get_database_conn() @@ -282,11 +286,11 @@ def perform(yara_rule_dir: str) -> None: logger.info(f"Enumerating modulestore...found {len(rows)} resident binaries") for row in rows: - if globals.g_vacuum_interval > 0: - seconds_since_start = (datetime.now() - vacuum_window_start).seconds - if seconds_since_start >= globals.g_vacuum_interval * 60: + if globals.g_utility_interval > 0: + seconds_since_start = (datetime.now() - utility_window_start).seconds + if seconds_since_start >= globals.g_utility_interval * 60: execute_script() - vacuum_window_start = datetime.now() + utility_window_start = datetime.now() num_total_binaries += 1 md5_hash = row[0].hex() @@ -300,12 +304,16 @@ def perform(yara_rule_dir: str) -> None: # if we hit our hash chunking limit, save and reset if len(md5_hashes) >= globals.g_max_hashes: - _analyze_save_and_log(md5_hashes, start_time, num_binaries_skipped, num_total_binaries) + _analyze_save_and_log( + md5_hashes, start_time, num_binaries_skipped, num_total_binaries + ) md5_hashes = [] # any finishup work if len(md5_hashes) > 0: - _analyze_save_and_log(md5_hashes, start_time, num_binaries_skipped, num_total_binaries) + _analyze_save_and_log( + md5_hashes, start_time, num_binaries_skipped, num_total_binaries + ) generate_feed_from_db() @@ -317,7 +325,9 @@ def _check_hash_against_feed(md5_hash: str) -> bool: :param md5_hash: hash to be checked :return: True if the binary does not exist """ - query = BinaryDetonationResult.select().where(BinaryDetonationResult.md5 == md5_hash) + query = BinaryDetonationResult.select().where( + BinaryDetonationResult.md5 == md5_hash + ) if query.exists(): try: @@ -335,8 +345,12 @@ def _check_hash_against_feed(md5_hash: str) -> bool: return True -def _analyze_save_and_log(hashes: List[str], start_time: float, num_binaries_skipped: int, - num_total_binaries: int) -> None: +def _analyze_save_and_log( + hashes: List[str], + start_time: float, + num_binaries_skipped: int, + num_total_binaries: int, +) -> None: """ Analyise and save any found binaries. @@ -348,8 +362,12 @@ def _analyze_save_and_log(hashes: List[str], start_time: float, num_binaries_ski analysis_results = analyze_binaries(hashes, local=(not globals.g_remote)) if analysis_results: for analysis_result in analysis_results: - logger.debug((f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" - f" {analysis_result.long_result} {analysis_result.last_error_msg}")) + logger.debug( + ( + f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" + f" {analysis_result.long_result} {analysis_result.last_error_msg}" + ) + ) if analysis_result.last_error_msg: logger.error(analysis_result.last_error_msg) save_results(analysis_results) @@ -357,7 +375,9 @@ def _analyze_save_and_log(hashes: List[str], start_time: float, num_binaries_ski _rule_logging(start_time, num_binaries_skipped, num_total_binaries) -def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binaries: int) -> None: +def _rule_logging( + start_time: float, num_binaries_skipped: int, num_total_binaries: int +) -> None: """ Simple method to log yara work. @@ -369,10 +389,18 @@ def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binari logger.info("elapsed time: {0}".format(humanfriendly.format_timespan(elapsed_time))) logger.debug(f" number binaries scanned: {globals.g_num_binaries_analyzed}") logger.debug(f" number binaries already scanned: {num_binaries_skipped}") - logger.debug(f" number binaries unavailable: {globals.g_num_binaries_not_available}") + logger.debug( + f" number binaries unavailable: {globals.g_num_binaries_not_available}" + ) logger.info(f"total binaries from db: {num_total_binaries}") - logger.debug(" binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) - overzero = len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)) + logger.debug( + " binaries per second: {0}:".format( + round(num_total_binaries / elapsed_time, 2) + ) + ) + overzero = len( + BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0) + ) logger.info(f"num binaries score greater than zero: {overzero}\n") @@ -380,6 +408,7 @@ def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binari # Main entrypoint ################################################################################ + def handle_arguments(): """ Setup the main program options. @@ -396,12 +425,12 @@ def handle_arguments(): parser.add_argument( "--log-file", default="./yara_agent.log", - help="Log file output (defaults to `local` folder)" + help="Log file output (defaults to `local` folder)", ) parser.add_argument( "--output-file", default="./yara_feed.json", - help="output feed file (defaults to `local` folder)" + help="output feed file (defaults to `local` folder)", ) parser.add_argument( "--validate-yara-rules", @@ -409,9 +438,7 @@ def handle_arguments(): help="ONLY validate yara rules in a specified directory", ) parser.add_argument( - "--debug", - action="store_true", - help="Provide additional logging" + "--debug", action="store_true", help="Provide additional logging" ) return parser.parse_args() @@ -429,7 +456,9 @@ def main(): try: singleton.SingleInstance() except SingleInstanceException as err: - logger.error(f"Only one instance of this script is allowed to run at a time: {err}") + logger.error( + f"Only one instance of this script is allowed to run at a time: {err}" + ) sys.exit(1) args = handle_arguments() @@ -439,7 +468,9 @@ def main(): if args.log_file: use_log_file = os.path.abspath(os.path.expanduser(args.log_file)) formatter = logging.Formatter(logging_format) - handler = logging.handlers.RotatingFileHandler(use_log_file, maxBytes=10 * 1000000, backupCount=10) + handler = logging.handlers.RotatingFileHandler( + use_log_file, maxBytes=10 * 1000000, backupCount=10 + ) handler.setFormatter(formatter) logger.addHandler(handler) else: @@ -459,13 +490,15 @@ def main(): yara.compile(filepaths=yara_rule_map) logger.info("All yara rules compiled successfully") except Exception as err: - logger.error(f"There were errors compiling yara rules: {err}\n{traceback.format_exc()}") + logger.exception(f"There were errors compiling yara rules: {err}") sys.exit(5) else: try: globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database = SqliteDatabase(os.path.join(globals.g_feed_database_dir, "binary.db")) + database = SqliteDatabase( + os.path.join(globals.g_feed_database_dir, "binary.db") + ) db.initialize(database) db.connect() db.create_tables([BinaryDetonationResult]) @@ -475,7 +508,7 @@ def main(): logger.info("\n\n##### Interupted by User!\n") sys.exit(3) except Exception as err: - logger.error(f"There were errors executing yara rules: {err}\n{traceback.format_exc()}") + logger.error(f"There were errors executing yara rules: {err}") sys.exit(4) From 39f4bc4b75de6be27100ac8bbc0265e1c7f10444 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 4 Nov 2019 16:29:14 -0500 Subject: [PATCH 126/257] More MR comment addressed --- README.md | 14 +++++++------- main.py | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index a6807de..da56c7c 100644 --- a/README.md +++ b/README.md @@ -172,11 +172,11 @@ _[TBD]_ ``` # Development Notes -## Vacuum Script +## Utility Script Included with this version is a feature for discretionary use by advanced users and should be used with caution. -When `vacuum_interval` is defined with a value greater than 0, it represents the interval +When `utility_interval` is defined with a value greater than 0, it represents the interval in minutes at which the yara agent will pause its work and execute an external shell script, defined by default as `vacuumscript.sh` within the `scripts` folder of the current Yara connector installation. After execution, the Yara agent continues with @@ -184,14 +184,14 @@ its work. ```ini ; -; The use of the vacuum script is an ADVANCED FEATURE and should be used with caution! +; The use of the utility script is an ADVANCED FEATURE and should be used with caution! ; -; If "vacuum_interval" is greater than 0 it represents the interval in minutes after which the yara connector will -; pause to execute a shell script for database maintenance. This can present risks. Be careful what you allow the +; If "utility_interval" is greater than 0 it represents the interval in minutes after which the yara connector will +; pause to execute a shell script for general maintenance. This can present risks. Be careful what you allow the ; script to do, and use this option at your own discretion. ; -vacuum_interval=-1 -vacuum_script=./scripts/vacuumscript.sh +utility_interval=-1 +utility_script=./scripts/vacuumscript.sh ``` ## Yara Agent Build Instructions (Centos 6) diff --git a/main.py b/main.py index 6f78894..a7f960a 100644 --- a/main.py +++ b/main.py @@ -135,7 +135,7 @@ def analyze_binaries(md5_hashes: List[str], local: bool) -> Optional: for md5_hash in md5_hashes: results.append(analyze_binary(md5_hash)) except Exception as err: - logger.error("{0}".format(err)) + logger.exception("{0}".format(err)) time.sleep(5) return None else: @@ -340,7 +340,7 @@ def _check_hash_against_feed(md5_hash: str) -> bool: if scanned_hash_list == globals.g_yara_rule_map_hash_list: return False except Exception as err: - logger.error(f"Unable to decode yara rule map hash from database: {err}") + logger.exception(f"Unable to decode yara rule map hash from database: {err}") return False return True @@ -456,7 +456,7 @@ def main(): try: singleton.SingleInstance() except SingleInstanceException as err: - logger.error( + logger.exception( f"Only one instance of this script is allowed to run at a time: {err}" ) sys.exit(1) @@ -480,7 +480,7 @@ def main(): try: ConfigurationInit(args.config_file, use_log_file) except Exception as err: - logger.error(f"Unable to continue due to a configuration problem: {err}") + logger.exception(f"Unable to continue due to a configuration problem: {err}") sys.exit(2) if args.validate_yara_rules: @@ -508,7 +508,7 @@ def main(): logger.info("\n\n##### Interupted by User!\n") sys.exit(3) except Exception as err: - logger.error(f"There were errors executing yara rules: {err}") + logger.exception(f"There were errors executing yara rules: {err}") sys.exit(4) From ab3fc87207717f3e2ffbe917e993468938a56e62 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 4 Nov 2019 16:32:08 -0500 Subject: [PATCH 127/257] updates --- singleton.py | 5 +---- tasks.py | 5 ++--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/singleton.py b/singleton.py index 07d2c25..0399d93 100644 --- a/singleton.py +++ b/singleton.py @@ -1,6 +1,3 @@ -# coding: utf-8 -# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. - import fcntl import logging import os @@ -34,7 +31,7 @@ def __init__(self, flavor_id: str = None, lockfile: str = None): self.initialized = False # define the lockfile - if lockfile is not None: + if lockfile: self.lockfile = lockfile else: converted = os.path.splitext(os.path.abspath(sys.argv[0]))[0].replace( diff --git a/tasks.py b/tasks.py index e724f69..a029112 100644 --- a/tasks.py +++ b/tasks.py @@ -102,8 +102,7 @@ def update_yara_rules_remote(yara_rules: dict) -> None: with open(os.path.join(globals.g_yara_rules_dir, key), 'wb') as fp: fp.write(yara_rules[key]) except Exception as err: - logger.error(f"Error writing rule file: {err}") - logger.error(traceback.format_exc()) + logger.exception(f"Error writing rule file: {err}") @app.task @@ -168,7 +167,7 @@ def analyze_binary(md5sum: str) -> AnalysisResult: return analysis_result except Exception as err: error = f"Unexpected error: {err}\n" + traceback.format_exc() - logger.error(error) + logger.exception(error) analysis_result.last_error_msg = error return analysis_result From 756428a1da21b0147e7f5925ad7da920206bf900 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 4 Nov 2019 10:10:30 -0500 Subject: [PATCH 128/257] doc cleanup --- main.py | 80 +++++++++++++++++---------------------------------------- 1 file changed, 24 insertions(+), 56 deletions(-) diff --git a/main.py b/main.py index a7f960a..ebec04a 100644 --- a/main.py +++ b/main.py @@ -239,9 +239,7 @@ def execute_script() -> None: logger.info( "----- Executing utility script ----------------------------------------" ) - prog = subprocess.Popen( - globals.g_utility_script, shell=True, universal_newlines=True - ) + prog = subprocess.Popen(globals.g_utility_script, shell=True, universal_newlines=True) stdout, stderr = prog.communicate() if stdout is not None and len(stdout.strip()) > 0: logger.info(stdout) @@ -249,9 +247,7 @@ def execute_script() -> None: logger.error(stderr) if prog.returncode: logger.warning(f"program returned error code {prog.returncode}") - logger.info( - "---------------------------------------- utility script completed -----\n" - ) + logger.info("---------------------------------------- utility script completed -----\n") def perform(yara_rule_dir: str) -> None: @@ -304,16 +300,12 @@ def perform(yara_rule_dir: str) -> None: # if we hit our hash chunking limit, save and reset if len(md5_hashes) >= globals.g_max_hashes: - _analyze_save_and_log( - md5_hashes, start_time, num_binaries_skipped, num_total_binaries - ) + _analyze_save_and_log(md5_hashes, start_time, num_binaries_skipped, num_total_binaries) md5_hashes = [] # any finishup work if len(md5_hashes) > 0: - _analyze_save_and_log( - md5_hashes, start_time, num_binaries_skipped, num_total_binaries - ) + _analyze_save_and_log(md5_hashes, start_time, num_binaries_skipped, num_total_binaries) generate_feed_from_db() @@ -325,9 +317,7 @@ def _check_hash_against_feed(md5_hash: str) -> bool: :param md5_hash: hash to be checked :return: True if the binary does not exist """ - query = BinaryDetonationResult.select().where( - BinaryDetonationResult.md5 == md5_hash - ) + query = BinaryDetonationResult.select().where(BinaryDetonationResult.md5 == md5_hash) if query.exists(): try: @@ -345,12 +335,8 @@ def _check_hash_against_feed(md5_hash: str) -> bool: return True -def _analyze_save_and_log( - hashes: List[str], - start_time: float, - num_binaries_skipped: int, - num_total_binaries: int, -) -> None: +def _analyze_save_and_log(hashes: List[str], start_time: float, num_binaries_skipped: int, + num_total_binaries: int) -> None: """ Analyise and save any found binaries. @@ -362,12 +348,8 @@ def _analyze_save_and_log( analysis_results = analyze_binaries(hashes, local=(not globals.g_remote)) if analysis_results: for analysis_result in analysis_results: - logger.debug( - ( - f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" - f" {analysis_result.long_result} {analysis_result.last_error_msg}" - ) - ) + logger.debug((f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" + f" {analysis_result.long_result} {analysis_result.last_error_msg}")) if analysis_result.last_error_msg: logger.error(analysis_result.last_error_msg) save_results(analysis_results) @@ -375,9 +357,7 @@ def _analyze_save_and_log( _rule_logging(start_time, num_binaries_skipped, num_total_binaries) -def _rule_logging( - start_time: float, num_binaries_skipped: int, num_total_binaries: int -) -> None: +def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binaries: int) -> None: """ Simple method to log yara work. @@ -389,18 +369,10 @@ def _rule_logging( logger.info("elapsed time: {0}".format(humanfriendly.format_timespan(elapsed_time))) logger.debug(f" number binaries scanned: {globals.g_num_binaries_analyzed}") logger.debug(f" number binaries already scanned: {num_binaries_skipped}") - logger.debug( - f" number binaries unavailable: {globals.g_num_binaries_not_available}" - ) + logger.debug(f" number binaries unavailable: {globals.g_num_binaries_not_available}") logger.info(f"total binaries from db: {num_total_binaries}") - logger.debug( - " binaries per second: {0}:".format( - round(num_total_binaries / elapsed_time, 2) - ) - ) - overzero = len( - BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0) - ) + logger.debug(" binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) + overzero = len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)) logger.info(f"num binaries score greater than zero: {overzero}\n") @@ -424,21 +396,23 @@ def handle_arguments(): ) parser.add_argument( "--log-file", - default="./yara_agent.log", - help="Log file output (defaults to `local` folder)", + default="yara_agent.log", + help="Log file output" ) parser.add_argument( "--output-file", - default="./yara_feed.json", - help="output feed file (defaults to `local` folder)", + default="yara_feed.json", + help="output feed file" ) parser.add_argument( "--validate-yara-rules", action="store_true", - help="ONLY validate yara rules in a specified directory", + help="Only validate yara rules, then exit", ) parser.add_argument( - "--debug", action="store_true", help="Provide additional logging" + "--debug", + action="store_true", + help="Provide additional logging" ) return parser.parse_args() @@ -456,9 +430,7 @@ def main(): try: singleton.SingleInstance() except SingleInstanceException as err: - logger.exception( - f"Only one instance of this script is allowed to run at a time: {err}" - ) + logger.exception(f"Only one instance of this script is allowed to run at a time: {err}") sys.exit(1) args = handle_arguments() @@ -468,9 +440,7 @@ def main(): if args.log_file: use_log_file = os.path.abspath(os.path.expanduser(args.log_file)) formatter = logging.Formatter(logging_format) - handler = logging.handlers.RotatingFileHandler( - use_log_file, maxBytes=10 * 1000000, backupCount=10 - ) + handler = logging.handlers.RotatingFileHandler(use_log_file, maxBytes=10 * 1000000, backupCount=10) handler.setFormatter(formatter) logger.addHandler(handler) else: @@ -496,9 +466,7 @@ def main(): try: globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) generate_yara_rule_map_hash(globals.g_yara_rules_dir) - database = SqliteDatabase( - os.path.join(globals.g_feed_database_dir, "binary.db") - ) + database = SqliteDatabase(os.path.join(globals.g_feed_database_dir, "binary.db")) db.initialize(database) db.connect() db.create_tables([BinaryDetonationResult]) From 06011be39522fefaf81029c0eefcd3ecebbae2b6 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 5 Nov 2019 07:05:43 -0500 Subject: [PATCH 129/257] Core review changes --- README.md | 9 ++- config_handling.py | 26 ++++--- globals.py | 6 +- main.py | 4 +- samples/sample_local.conf | 10 +-- samples/sample_remote.conf | 10 +-- test/config/bogus_feed_database_dir.conf | 3 - ...l.conf => bogus_maintenance_interval.conf} | 2 +- ...ipt.conf => empty_maintenance_script.conf} | 4 +- ...t_dir.conf => maintenance_script_dir.conf} | 4 +- test/config/maintenance_script_enabled.conf | 22 ++++++ ...l.conf => maintenance_script_missing.conf} | 4 +- .../maintenance_script_no_interval.conf | 22 ++++++ test/config/missing_concurrent_hashes.conf | 3 - test/config/missing_disable_rescan.conf | 3 - test/config/missing_feed_database_dir.conf | 3 - test/config/missing_niceness.conf | 3 - test/config/missing_num_days_binaries.conf | 3 - test/config/missing_postgres_db.conf | 3 - test/config/missing_postgres_db2.conf | 3 - test/config/missing_postgres_host.conf | 3 - test/config/missing_postgres_host2.conf | 3 - test/config/missing_postgres_port.conf | 3 - test/config/missing_postgres_port2.conf | 3 - test/config/missing_postgres_username.conf | 3 - test/config/missing_postgres_username2.conf | 3 - test/config/missing_worker.conf | 3 - test/config/missing_worker2.conf | 3 - .../config/negative_maintenance_interval.conf | 22 ++++++ ...l.conf => no_such_maintenance_script.conf} | 4 +- test/config/vacuum_script_enabled.conf | 22 ------ test/config/valid.conf | 3 - test/config/valid2.conf | 3 - test/test_configInit.py | 68 +++++++++++-------- 34 files changed, 151 insertions(+), 142 deletions(-) rename test/config/{bogus_vacuum_interval.conf => bogus_maintenance_interval.conf} (93%) rename test/config/{no_such_vacuum_script.conf => empty_maintenance_script.conf} (87%) rename test/config/{vacuum_script_dir.conf => maintenance_script_dir.conf} (87%) create mode 100644 test/config/maintenance_script_enabled.conf rename test/config/{vacuum_script_no_interval.conf => maintenance_script_missing.conf} (85%) create mode 100644 test/config/maintenance_script_no_interval.conf create mode 100644 test/config/negative_maintenance_interval.conf rename test/config/{negative_vacuum_interval.conf => no_such_maintenance_script.conf} (85%) delete mode 100644 test/config/vacuum_script_enabled.conf diff --git a/README.md b/README.md index da56c7c..b78e31c 100644 --- a/README.md +++ b/README.md @@ -177,11 +177,14 @@ Included with this version is a feature for discretionary use by advanced users should be used with caution. When `utility_interval` is defined with a value greater than 0, it represents the interval -in minutes at which the yara agent will pause its work and execute an external -shell script, defined by default as `vacuumscript.sh` within the `scripts` folder -of the current Yara connector installation. After execution, the Yara agent continues with +in minutes at which the yara connector will pause its work and execute an external +shell script. A sample script, `vacuumscript.sh` is provided within the `scripts` folder +of the current Yara connector installation. After execution, the Yara connector continues with its work. +> _**NOTE:** As a safety for this feature, if an interval is defined but no script is defined, nothing is done. +> By default, no script is defined._ + ```ini ; ; The use of the utility script is an ADVANCED FEATURE and should be used with caution! diff --git a/config_handling.py b/config_handling.py index d1c938e..fe94855 100644 --- a/config_handling.py +++ b/config_handling.py @@ -113,12 +113,19 @@ def _extended_check(self) -> None: globals.g_num_days_binaries = self._as_int("num_days_binaries", default=globals.g_num_days_binaries, min_value=1) - globals.g_utility_interval = self._as_int("utility_interval", default=globals.g_utility_interval, min_value=0) - if globals.g_utility_interval > 0: - globals.g_utility_script = self._as_path("utility_script", required=True, is_dir=False, - default=globals.g_utility_script) - logger.warning(f" Script '{globals.g_utility_script}' is enabled; " + - "use this advanced feature at your own discretion!") + globals.g_maintenance_interval = self._as_int("maintenance_interval", default=globals.g_maintenance_interval, + min_value=0, quiet=True) + if globals.g_maintenance_interval > 0: + if self._as_str("maintenance_script", default=globals.g_maintenance_script) == "": + logger.warning( + f"{self.source} 'maintenance_interval' supplied but no script defined -- feature disabled") + globals.g_maintenance_interval = 0 + globals.g_maintenance_script = "" + else: + globals.g_maintenance_script = self._as_path("maintenance_script", required=False, is_dir=False, + default=globals.g_maintenance_script) + logger.warning(f"{self.source} maintenance Script '{globals.g_maintenance_script}' is enabled; " + + "use this advanced feature at your own discretion!") else: if self._as_path("utility_script", required=False, default=globals.g_utility_script): logger.debug(f"{self.source} has 'utility_script' defined, but it is disabled") @@ -191,7 +198,8 @@ def _as_path(self, param: str, required: bool = False, exists: bool = True, is_d return value - def _as_int(self, param: str, required: bool = False, default: int = None, min_value: int = -1) -> Optional[int]: + def _as_int(self, param: str, required: bool = False, default: int = None, min_value: int = -1, + quiet: bool = False) -> Optional[int]: """ Get an integer configuration parameter from the configuration. A parameter that cannot be converted to an int will return a ValueError. @@ -200,6 +208,7 @@ def _as_int(self, param: str, required: bool = False, default: int = None, min_v :param required: True if this must be specified in the configuration :param default: If not required, default value if not supplied :param min_value: minumum value allowed + :param quiet: if True, don't give warning about using default :return: the integer value, or None/default if not required and no exception :raises CbInvalidConfig: :raises ValueError: @@ -207,7 +216,8 @@ def _as_int(self, param: str, required: bool = False, default: int = None, min_v value = self._as_str(param, required) use_default = default if default is None else max(default, min_value) if (value is None or value == "") and use_default is not None: - logger.warning(f"{self.source} has no defined '{param}'; using default of '{use_default}'") + if not quiet: + logger.warning(f"{self.source} has no defined '{param}'; using default of '{use_default}'") return use_default else: return None if (value is None or value == "") else max(int(value), min_value) diff --git a/globals.py b/globals.py index fd3501e..318c8a8 100644 --- a/globals.py +++ b/globals.py @@ -33,9 +33,9 @@ g_disable_rescan = True g_num_days_binaries = 365 -# the vacuum interval, if 1 or greater, is the number of minutes between invocations of the -# configured vacuum script +# the utility interval, if 1 or greater, is the number of minutes between invocations of the +# configured utility script g_utility_interval = -1 -g_utility_script = "../scripts/vacuumscript.sh" +g_utility_script = None g_feed_database_dir = "./feed_db" diff --git a/main.py b/main.py index ebec04a..71dc266 100644 --- a/main.py +++ b/main.py @@ -236,9 +236,7 @@ def execute_script() -> None: """ Execute a external maintenence script (utility script). """ - logger.info( - "----- Executing utility script ----------------------------------------" - ) + logger.info("----- Executing utility script ----------------------------------------") prog = subprocess.Popen(globals.g_utility_script, shell=True, universal_newlines=True) stdout, stderr = prog.communicate() if stdout is not None and len(stdout.strip()) > 0: diff --git a/samples/sample_local.conf b/samples/sample_local.conf index 4756138..e655bf8 100644 --- a/samples/sample_local.conf +++ b/samples/sample_local.conf @@ -58,11 +58,11 @@ feed_database_dir=./feed_db ; -; The use of the vacuum script is an ADVANCED FEATURE and should be used with caution! +; The use of the maintenance script is an ADVANCED FEATURE and should be used with caution! ; -; If "vacuum_interval" is greater than 0 it represents the interval in minutes after which the yara connector will -; pause to execute a shell script for databse maintenance. This can present risks. Be careful what you allow the +; If "maintenance_interval" is greater than 0 it represents the interval in minutes after which the yara connector will +; pause to execute a shell script for database maintenance. This can present risks. Be careful what you allow the ; script to do, and use this option at your own discretion. ; -vacuum_interval=-1 -vacuum_script=./scripts/vacuumscript.sh +maintenance_interval=-1 +maintenance_script=./scripts/vacuumscript.sh diff --git a/samples/sample_remote.conf b/samples/sample_remote.conf index 1666d4b..00e1e3f 100644 --- a/samples/sample_remote.conf +++ b/samples/sample_remote.conf @@ -59,11 +59,11 @@ feed_database_dir=./feed_db ; -; The use of the vacuum script is an ADVANCED FEATURE and should be used with caution! +; The use of the maintenance script is an ADVANCED FEATURE and should be used with caution! ; -; If "vacuum_interval" is greater than 0 it represents the interval in minutes after which the yara connector will -; pause to execute a shell script for databse maintenance. This can present risks. Be careful what you allow the +; If "maintenance_interval" is greater than 0 it represents the interval in minutes after which the yara connector will +; pause to execute a shell script for database maintenance. This can present risks. Be careful what you allow the ; script to do, and use this option at your own discretion. ; -vacuum_interval=-1 -vacuum_script=./scripts/vacuumscript.sh +maintenance_interval=-1 +maintenance_script=./scripts/vacuumscript.sh diff --git a/test/config/bogus_feed_database_dir.conf b/test/config/bogus_feed_database_dir.conf index 0d8cef4..230c319 100644 --- a/test/config/bogus_feed_database_dir.conf +++ b/test/config/bogus_feed_database_dir.conf @@ -18,8 +18,5 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh - ; file, not dir feed_database_dir=./__init__.py diff --git a/test/config/bogus_vacuum_interval.conf b/test/config/bogus_maintenance_interval.conf similarity index 93% rename from test/config/bogus_vacuum_interval.conf rename to test/config/bogus_maintenance_interval.conf index 0a91c28..7d1a321 100644 --- a/test/config/bogus_vacuum_interval.conf +++ b/test/config/bogus_maintenance_interval.conf @@ -18,4 +18,4 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -vacuum_interval=BOGUS +maintenance_interval=BOGUS diff --git a/test/config/no_such_vacuum_script.conf b/test/config/empty_maintenance_script.conf similarity index 87% rename from test/config/no_such_vacuum_script.conf rename to test/config/empty_maintenance_script.conf index 12b1fab..8077ede 100644 --- a/test/config/no_such_vacuum_script.conf +++ b/test/config/empty_maintenance_script.conf @@ -18,5 +18,5 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -vacuum_interval=36 -vacuum_script=no-such-script.sh +maintenance_interval=36 +maintenance_script= diff --git a/test/config/vacuum_script_dir.conf b/test/config/maintenance_script_dir.conf similarity index 87% rename from test/config/vacuum_script_dir.conf rename to test/config/maintenance_script_dir.conf index 393d058..b9e104b 100644 --- a/test/config/vacuum_script_dir.conf +++ b/test/config/maintenance_script_dir.conf @@ -18,6 +18,6 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -vacuum_interval=360 +maintenance_interval=360 ; invalid, is dir -vacuum_script=./rules +maintenance_script=./rules diff --git a/test/config/maintenance_script_enabled.conf b/test/config/maintenance_script_enabled.conf new file mode 100644 index 0000000..e770125 --- /dev/null +++ b/test/config/maintenance_script_enabled.conf @@ -0,0 +1,22 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +maintenance_interval=360 +maintenance_script=../scripts/vacuumscript.sh diff --git a/test/config/vacuum_script_no_interval.conf b/test/config/maintenance_script_missing.conf similarity index 85% rename from test/config/vacuum_script_no_interval.conf rename to test/config/maintenance_script_missing.conf index 2dd8511..10013d4 100644 --- a/test/config/vacuum_script_no_interval.conf +++ b/test/config/maintenance_script_missing.conf @@ -18,5 +18,5 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -vacuum_interval=0 -vacuum_script=../scripts/vacuumscript.sh +maintenance_interval=360 +# MISSING: maintenance_script diff --git a/test/config/maintenance_script_no_interval.conf b/test/config/maintenance_script_no_interval.conf new file mode 100644 index 0000000..c609a3d --- /dev/null +++ b/test/config/maintenance_script_no_interval.conf @@ -0,0 +1,22 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +maintenance_interval=0 +maintenance_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_concurrent_hashes.conf b/test/config/missing_concurrent_hashes.conf index 284d424..5d87506 100644 --- a/test/config/missing_concurrent_hashes.conf +++ b/test/config/missing_concurrent_hashes.conf @@ -18,6 +18,3 @@ niceness=1 concurrent_hashes= disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_disable_rescan.conf b/test/config/missing_disable_rescan.conf index 887cfa4..4a5078d 100644 --- a/test/config/missing_disable_rescan.conf +++ b/test/config/missing_disable_rescan.conf @@ -18,6 +18,3 @@ concurrent_hashes=8 ; undefined disable_rescan= num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_feed_database_dir.conf b/test/config/missing_feed_database_dir.conf index 143bb87..847c491 100644 --- a/test/config/missing_feed_database_dir.conf +++ b/test/config/missing_feed_database_dir.conf @@ -18,8 +18,5 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh - ; invalid path feed_database_dir=./no-such-directory diff --git a/test/config/missing_niceness.conf b/test/config/missing_niceness.conf index c220bd1..36d1715 100644 --- a/test/config/missing_niceness.conf +++ b/test/config/missing_niceness.conf @@ -18,6 +18,3 @@ niceness= concurrent_hashes=8 disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_num_days_binaries.conf b/test/config/missing_num_days_binaries.conf index 5ba6694..1cc21fa 100644 --- a/test/config/missing_num_days_binaries.conf +++ b/test/config/missing_num_days_binaries.conf @@ -18,6 +18,3 @@ concurrent_hashes=8 disable_rescan=False ; undefined num_days_binaries= - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_db.conf b/test/config/missing_postgres_db.conf index a6f6efa..04b0589 100644 --- a/test/config/missing_postgres_db.conf +++ b/test/config/missing_postgres_db.conf @@ -17,6 +17,3 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_db2.conf b/test/config/missing_postgres_db2.conf index 4c22cb3..cd02280 100644 --- a/test/config/missing_postgres_db2.conf +++ b/test/config/missing_postgres_db2.conf @@ -18,6 +18,3 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_host.conf b/test/config/missing_postgres_host.conf index 60b2cd6..cc4b323 100644 --- a/test/config/missing_postgres_host.conf +++ b/test/config/missing_postgres_host.conf @@ -18,6 +18,3 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_host2.conf b/test/config/missing_postgres_host2.conf index 23b8cb1..4581a39 100644 --- a/test/config/missing_postgres_host2.conf +++ b/test/config/missing_postgres_host2.conf @@ -19,6 +19,3 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_port.conf b/test/config/missing_postgres_port.conf index 9b471ed..8b69a87 100644 --- a/test/config/missing_postgres_port.conf +++ b/test/config/missing_postgres_port.conf @@ -17,6 +17,3 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_port2.conf b/test/config/missing_postgres_port2.conf index fe0ab83..e88c40f 100644 --- a/test/config/missing_postgres_port2.conf +++ b/test/config/missing_postgres_port2.conf @@ -18,6 +18,3 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_username.conf b/test/config/missing_postgres_username.conf index e809df5..f3b2a50 100644 --- a/test/config/missing_postgres_username.conf +++ b/test/config/missing_postgres_username.conf @@ -17,6 +17,3 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_username2.conf b/test/config/missing_postgres_username2.conf index f3a51cf..12e121e 100644 --- a/test/config/missing_postgres_username2.conf +++ b/test/config/missing_postgres_username2.conf @@ -18,6 +18,3 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_worker.conf b/test/config/missing_worker.conf index 25d19e5..ed07cae 100644 --- a/test/config/missing_worker.conf +++ b/test/config/missing_worker.conf @@ -16,6 +16,3 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_worker2.conf b/test/config/missing_worker2.conf index 80a3132..123fb8e 100644 --- a/test/config/missing_worker2.conf +++ b/test/config/missing_worker2.conf @@ -17,6 +17,3 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/negative_maintenance_interval.conf b/test/config/negative_maintenance_interval.conf new file mode 100644 index 0000000..f62287b --- /dev/null +++ b/test/config/negative_maintenance_interval.conf @@ -0,0 +1,22 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +maintenance_interval=-20 +maintenance_script=../scripts/vacuumscript.sh diff --git a/test/config/negative_vacuum_interval.conf b/test/config/no_such_maintenance_script.conf similarity index 85% rename from test/config/negative_vacuum_interval.conf rename to test/config/no_such_maintenance_script.conf index 0690263..09f70b5 100644 --- a/test/config/negative_vacuum_interval.conf +++ b/test/config/no_such_maintenance_script.conf @@ -18,5 +18,5 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -vacuum_interval=-20 -vacuum_script=../scripts/vacuumscript.sh +maintenance_interval=36 +maintenance_script=no-such-script.sh diff --git a/test/config/vacuum_script_enabled.conf b/test/config/vacuum_script_enabled.conf deleted file mode 100644 index dcedd0f..0000000 --- a/test/config/vacuum_script_enabled.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/valid.conf b/test/config/valid.conf index dcedd0f..cdbdea9 100644 --- a/test/config/valid.conf +++ b/test/config/valid.conf @@ -17,6 +17,3 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/valid2.conf b/test/config/valid2.conf index e5c748f..5907483 100644 --- a/test/config/valid2.conf +++ b/test/config/valid2.conf @@ -16,6 +16,3 @@ niceness=1 concurrent_hashes=8 disable_rescan=False num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/test_configInit.py b/test/test_configInit.py index acb11f7..91bfd07 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -33,8 +33,8 @@ def setUp(self): globals.g_num_binaries_analyzed = 0 globals.g_disable_rescan = True globals.g_num_days_binaries = 365 - globals.g_vacuum_interval = -1 - globals.g_vacuum_script = './scripts/vacuumscript.sh' + globals.g_maintenance_interval = -1 + globals.g_maintenance_script = None globals.g_feed_database_dir = "./feed_db" def test_01_missing_config(self): @@ -331,57 +331,65 @@ def test_16b_config_bogus_num_days_binaries(self): ConfigurationInit(os.path.join(TESTS, "config", "bogus_num_days_binaries.conf"), "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - def test_17a_config_bogus_vacuum_interval(self): + def test_17a_config_bogus_maintenance_interval(self): """ - Ensure that config with bogus (non-int) vacuum_interval is detected. + Ensure that config with bogus (non-int) maintenance_interval is detected. """ with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "bogus_vacuum_interval.conf"), "sample.json") + ConfigurationInit(os.path.join(TESTS, "config", "bogus_maintenance_interval.conf"), "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - def test_17b_config_negative_vacuum_interval(self): + def test_17b_config_negative_maintenance_interval(self): """ - Ensure that config with bogus (non-int) vacuum_interval is detected. + Ensure that config with bogus (non-int) maintenance_interval is detected. """ - globals.g_vacuum_interval = None - ConfigurationInit(os.path.join(TESTS, "config", "negative_vacuum_interval.conf"), "sample.json") - self.assertEqual(0, globals.g_vacuum_interval) + globals.g_maintenance_interval = None + ConfigurationInit(os.path.join(TESTS, "config", "negative_maintenance_interval.conf"), "sample.json") + self.assertEqual(0, globals.g_maintenance_interval) - def test_18a_config_missing_vacuum_script(self): + def test_18a_config_missing_maintenance_script(self): """ - Ensure that config with missing vacuum_script is detected. + Ensure that config with non-existing maintenance_script is detected. """ with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "no_such_vacuum_script.conf"), "sample.json") + ConfigurationInit(os.path.join(TESTS, "config", "no_such_maintenance_script.conf"), "sample.json") assert "does not exist" in "{0}".format(err.exception.args[0]) - def test_18b_config_bogus_vacuum_script_is_dir(self): + def test_18b_config_bogus_maintenance_script_is_dir(self): """ - Ensure that config with vacuum_script as directory is detected. + Ensure that config with maintenance_script as directory is detected. """ with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "vacuum_script_dir.conf"), "sample.json") + ConfigurationInit(os.path.join(TESTS, "config", "maintenance_script_dir.conf"), "sample.json") assert "is a directory" in "{0}".format(err.exception.args[0]) - def test_19a_config_vacuum_script_enabled(self): + def test_18c_config_empty_maintenance_script(self): """ - Ensure that config with vacuum_script and vacuum_interval is ready to go. + Ensure that config with missing maintenance_script is detected. """ - globals.g_vacuum_interval = None - globals.g_vacuum_script = None - ConfigurationInit(os.path.join(TESTS, "config", "vacuum_script_enabled.conf"), "sample.json") - self.assertEqual(360, globals.g_vacuum_interval) - self.assertTrue(globals.g_vacuum_script.endswith("/scripts/vacuumscript.sh")) + ConfigurationInit(os.path.join(TESTS, "config", "empty_maintenance_script.conf"), "sample.json") + self.assertEqual(0, globals.g_maintenance_interval) + self.assertEqual("", globals.g_maintenance_script) - def test_19a_config_vacuum_script_and_no_vacuum_interval(self): + def test_19a_config_maintenance_script_enabled(self): """ - Ensure that config with vacuum_script but vacuum_interval == 0 has it disabled. + Ensure that config with maintenance_script and maintenance_interval is ready to go. """ - globals.g_vacuum_interval = None - globals.g_vacuum_script = None - ConfigurationInit(os.path.join(TESTS, "config", "vacuum_script_no_interval.conf"), "sample.json") - self.assertEqual(0, globals.g_vacuum_interval) - self.assertIsNone(globals.g_vacuum_script) + globals.g_maintenance_interval = None + globals.g_maintenance_script = None + ConfigurationInit(os.path.join(TESTS, "config", "maintenance_script_enabled.conf"), "sample.json") + self.assertEqual(360, globals.g_maintenance_interval) + self.assertTrue(globals.g_maintenance_script.endswith("/scripts/vacuumscript.sh")) + + def test_19a_config_maintenance_script_and_no_maintenance_interval(self): + """ + Ensure that config with maintenance_script but maintenance_interval == 0 has it disabled. + """ + globals.g_maintenance_interval = None + globals.g_maintenance_script = None + ConfigurationInit(os.path.join(TESTS, "config", "maintenance_script_no_interval.conf"), "sample.json") + self.assertEqual(0, globals.g_maintenance_interval) + self.assertIsNone(globals.g_maintenance_script) def test_20a_config_feed_database_dir_not_exists(self): """ From f598b7b7e5e15b452e59d30c12a26f9aab8334f6 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 5 Nov 2019 07:56:25 -0500 Subject: [PATCH 130/257] Merge fixes --- config_handling.py | 21 +++--- ...erval.conf => bogus_utility_interval.conf} | 2 +- ..._script.conf => empty_utility_script.conf} | 4 +- test/config/maintenance_script_enabled.conf | 22 ------ .../maintenance_script_no_interval.conf | 22 ------ .../config/negative_maintenance_interval.conf | 22 ------ test/config/negative_utility_interval.conf | 22 ++++++ ...ssing.conf => no_such_utility_script.conf} | 4 +- ...cript_dir.conf => utility_script_dir.conf} | 4 +- test/config/utility_script_enabled.conf | 22 ++++++ ...cript.conf => utility_script_missing.conf} | 4 +- test/config/utility_script_no_interval.conf | 22 ++++++ test/test_configInit.py | 70 +++++++++---------- 13 files changed, 120 insertions(+), 121 deletions(-) rename test/config/{bogus_maintenance_interval.conf => bogus_utility_interval.conf} (93%) rename test/config/{empty_maintenance_script.conf => empty_utility_script.conf} (89%) delete mode 100644 test/config/maintenance_script_enabled.conf delete mode 100644 test/config/maintenance_script_no_interval.conf delete mode 100644 test/config/negative_maintenance_interval.conf create mode 100644 test/config/negative_utility_interval.conf rename test/config/{maintenance_script_missing.conf => no_such_utility_script.conf} (86%) rename test/config/{maintenance_script_dir.conf => utility_script_dir.conf} (87%) create mode 100644 test/config/utility_script_enabled.conf rename test/config/{no_such_maintenance_script.conf => utility_script_missing.conf} (85%) create mode 100644 test/config/utility_script_no_interval.conf diff --git a/config_handling.py b/config_handling.py index fe94855..b2fde9f 100644 --- a/config_handling.py +++ b/config_handling.py @@ -113,18 +113,17 @@ def _extended_check(self) -> None: globals.g_num_days_binaries = self._as_int("num_days_binaries", default=globals.g_num_days_binaries, min_value=1) - globals.g_maintenance_interval = self._as_int("maintenance_interval", default=globals.g_maintenance_interval, - min_value=0, quiet=True) - if globals.g_maintenance_interval > 0: - if self._as_str("maintenance_script", default=globals.g_maintenance_script) == "": - logger.warning( - f"{self.source} 'maintenance_interval' supplied but no script defined -- feature disabled") - globals.g_maintenance_interval = 0 - globals.g_maintenance_script = "" + globals.g_utility_interval = self._as_int("utility_interval", default=globals.g_utility_interval, + min_value=0, quiet=True) + if globals.g_utility_interval > 0: + if self._as_str("utility_script", default=globals.g_utility_script) == "": + logger.warning(f"{self.source} 'utility_interval' supplied but no script defined -- feature disabled") + globals.g_utility_interval = 0 + globals.g_utility_script = "" else: - globals.g_maintenance_script = self._as_path("maintenance_script", required=False, is_dir=False, - default=globals.g_maintenance_script) - logger.warning(f"{self.source} maintenance Script '{globals.g_maintenance_script}' is enabled; " + + globals.g_utility_script = self._as_path("utility_script", required=True, is_dir=False, + default=globals.g_utility_script) + logger.warning(f"{self.source} utility script '{globals.g_utility_script}' is enabled; " + "use this advanced feature at your own discretion!") else: if self._as_path("utility_script", required=False, default=globals.g_utility_script): diff --git a/test/config/bogus_maintenance_interval.conf b/test/config/bogus_utility_interval.conf similarity index 93% rename from test/config/bogus_maintenance_interval.conf rename to test/config/bogus_utility_interval.conf index 7d1a321..0ae2f8a 100644 --- a/test/config/bogus_maintenance_interval.conf +++ b/test/config/bogus_utility_interval.conf @@ -18,4 +18,4 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -maintenance_interval=BOGUS +utility_interval=BOGUS diff --git a/test/config/empty_maintenance_script.conf b/test/config/empty_utility_script.conf similarity index 89% rename from test/config/empty_maintenance_script.conf rename to test/config/empty_utility_script.conf index 8077ede..e517457 100644 --- a/test/config/empty_maintenance_script.conf +++ b/test/config/empty_utility_script.conf @@ -18,5 +18,5 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -maintenance_interval=36 -maintenance_script= +utility_interval=36 +utility_script= diff --git a/test/config/maintenance_script_enabled.conf b/test/config/maintenance_script_enabled.conf deleted file mode 100644 index e770125..0000000 --- a/test/config/maintenance_script_enabled.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -maintenance_interval=360 -maintenance_script=../scripts/vacuumscript.sh diff --git a/test/config/maintenance_script_no_interval.conf b/test/config/maintenance_script_no_interval.conf deleted file mode 100644 index c609a3d..0000000 --- a/test/config/maintenance_script_no_interval.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -maintenance_interval=0 -maintenance_script=../scripts/vacuumscript.sh diff --git a/test/config/negative_maintenance_interval.conf b/test/config/negative_maintenance_interval.conf deleted file mode 100644 index f62287b..0000000 --- a/test/config/negative_maintenance_interval.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -maintenance_interval=-20 -maintenance_script=../scripts/vacuumscript.sh diff --git a/test/config/negative_utility_interval.conf b/test/config/negative_utility_interval.conf new file mode 100644 index 0000000..c7ce7d4 --- /dev/null +++ b/test/config/negative_utility_interval.conf @@ -0,0 +1,22 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +utility_interval=-20 +utility_script=../scripts/vacuumscript.sh diff --git a/test/config/maintenance_script_missing.conf b/test/config/no_such_utility_script.conf similarity index 86% rename from test/config/maintenance_script_missing.conf rename to test/config/no_such_utility_script.conf index 10013d4..c68a878 100644 --- a/test/config/maintenance_script_missing.conf +++ b/test/config/no_such_utility_script.conf @@ -18,5 +18,5 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -maintenance_interval=360 -# MISSING: maintenance_script +utility_interval=36 +utility_script=no-such-script.sh diff --git a/test/config/maintenance_script_dir.conf b/test/config/utility_script_dir.conf similarity index 87% rename from test/config/maintenance_script_dir.conf rename to test/config/utility_script_dir.conf index b9e104b..fb7cb61 100644 --- a/test/config/maintenance_script_dir.conf +++ b/test/config/utility_script_dir.conf @@ -18,6 +18,6 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -maintenance_interval=360 +utility_interval=360 ; invalid, is dir -maintenance_script=./rules +utility_script=./rules diff --git a/test/config/utility_script_enabled.conf b/test/config/utility_script_enabled.conf new file mode 100644 index 0000000..8af7c91 --- /dev/null +++ b/test/config/utility_script_enabled.conf @@ -0,0 +1,22 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +utility_interval=360 +utility_script=../scripts/vacuumscript.sh diff --git a/test/config/no_such_maintenance_script.conf b/test/config/utility_script_missing.conf similarity index 85% rename from test/config/no_such_maintenance_script.conf rename to test/config/utility_script_missing.conf index 09f70b5..86c9e34 100644 --- a/test/config/no_such_maintenance_script.conf +++ b/test/config/utility_script_missing.conf @@ -18,5 +18,5 @@ concurrent_hashes=8 disable_rescan=False num_days_binaries=365 -maintenance_interval=36 -maintenance_script=no-such-script.sh +utility_interval=360 +# MISSING: utility_script diff --git a/test/config/utility_script_no_interval.conf b/test/config/utility_script_no_interval.conf new file mode 100644 index 0000000..02b986c --- /dev/null +++ b/test/config/utility_script_no_interval.conf @@ -0,0 +1,22 @@ +[general] +worker_type=local + +; ONLY for worker_type of local +cb_server_url=https://127.0.0.1:443 +cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=6PGcbuwlQnIuPqOF +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +utility_interval=0 +utility_script=../scripts/vacuumscript.sh diff --git a/test/test_configInit.py b/test/test_configInit.py index 91bfd07..f9892ee 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -33,8 +33,8 @@ def setUp(self): globals.g_num_binaries_analyzed = 0 globals.g_disable_rescan = True globals.g_num_days_binaries = 365 - globals.g_maintenance_interval = -1 - globals.g_maintenance_script = None + globals.g_utility_interval = -1 + globals.g_utility_script = None globals.g_feed_database_dir = "./feed_db" def test_01_missing_config(self): @@ -331,65 +331,65 @@ def test_16b_config_bogus_num_days_binaries(self): ConfigurationInit(os.path.join(TESTS, "config", "bogus_num_days_binaries.conf"), "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - def test_17a_config_bogus_maintenance_interval(self): + def test_17a_config_bogus_utility_interval(self): """ - Ensure that config with bogus (non-int) maintenance_interval is detected. + Ensure that config with bogus (non-int) utility_interval is detected. """ with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "bogus_maintenance_interval.conf"), "sample.json") + ConfigurationInit(os.path.join(TESTS, "config", "bogus_utility_interval.conf"), "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - def test_17b_config_negative_maintenance_interval(self): + def test_17b_config_negative_utility_interval(self): """ - Ensure that config with bogus (non-int) maintenance_interval is detected. + Ensure that config with bogus (non-int) utility_interval is detected. """ - globals.g_maintenance_interval = None - ConfigurationInit(os.path.join(TESTS, "config", "negative_maintenance_interval.conf"), "sample.json") - self.assertEqual(0, globals.g_maintenance_interval) + globals.g_utility_interval = None + ConfigurationInit(os.path.join(TESTS, "config", "negative_utility_interval.conf"), "sample.json") + self.assertEqual(0, globals.g_utility_interval) - def test_18a_config_missing_maintenance_script(self): + def test_18a_config_missing_utility_script(self): """ - Ensure that config with non-existing maintenance_script is detected. + Ensure that config with non-existing utility_script is detected. """ with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "no_such_maintenance_script.conf"), "sample.json") + ConfigurationInit(os.path.join(TESTS, "config", "no_such_utility_script.conf"), "sample.json") assert "does not exist" in "{0}".format(err.exception.args[0]) - def test_18b_config_bogus_maintenance_script_is_dir(self): + def test_18b_config_bogus_utility_script_is_dir(self): """ - Ensure that config with maintenance_script as directory is detected. + Ensure that config with utility_script as directory is detected. """ with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "maintenance_script_dir.conf"), "sample.json") + ConfigurationInit(os.path.join(TESTS, "config", "utility_script_dir.conf"), "sample.json") assert "is a directory" in "{0}".format(err.exception.args[0]) - def test_18c_config_empty_maintenance_script(self): + def test_18c_config_empty_utility_script(self): """ - Ensure that config with missing maintenance_script is detected. + Ensure that config with missing utility_script is detected. """ - ConfigurationInit(os.path.join(TESTS, "config", "empty_maintenance_script.conf"), "sample.json") - self.assertEqual(0, globals.g_maintenance_interval) - self.assertEqual("", globals.g_maintenance_script) + ConfigurationInit(os.path.join(TESTS, "config", "empty_utility_script.conf"), "sample.json") + self.assertEqual(0, globals.g_utility_interval) + self.assertEqual("", globals.g_utility_script) - def test_19a_config_maintenance_script_enabled(self): + def test_19a_config_utility_script_enabled(self): """ - Ensure that config with maintenance_script and maintenance_interval is ready to go. + Ensure that config with utility_script and utility_interval is ready to go. """ - globals.g_maintenance_interval = None - globals.g_maintenance_script = None - ConfigurationInit(os.path.join(TESTS, "config", "maintenance_script_enabled.conf"), "sample.json") - self.assertEqual(360, globals.g_maintenance_interval) - self.assertTrue(globals.g_maintenance_script.endswith("/scripts/vacuumscript.sh")) + globals.g_utility_interval = None + globals.g_utility_script = None + ConfigurationInit(os.path.join(TESTS, "config", "utility_script_enabled.conf"), "sample.json") + self.assertEqual(360, globals.g_utility_interval) + self.assertTrue(globals.g_utility_script.endswith("/scripts/vacuumscript.sh")) - def test_19a_config_maintenance_script_and_no_maintenance_interval(self): + def test_19a_config_utility_script_and_no_utility_interval(self): """ - Ensure that config with maintenance_script but maintenance_interval == 0 has it disabled. + Ensure that config with utility_script but utility_interval == 0 has it disabled. """ - globals.g_maintenance_interval = None - globals.g_maintenance_script = None - ConfigurationInit(os.path.join(TESTS, "config", "maintenance_script_no_interval.conf"), "sample.json") - self.assertEqual(0, globals.g_maintenance_interval) - self.assertIsNone(globals.g_maintenance_script) + globals.g_utility_interval = None + globals.g_utility_script = None + ConfigurationInit(os.path.join(TESTS, "config", "utility_script_no_interval.conf"), "sample.json") + self.assertEqual(0, globals.g_utility_interval) + self.assertIsNone(globals.g_utility_script) def test_20a_config_feed_database_dir_not_exists(self): """ From c32f3c5ca512f912a98e31ea447fe0114261106a Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 5 Nov 2019 08:39:09 -0500 Subject: [PATCH 131/257] Removed errand logger config --- binary_database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/binary_database.py b/binary_database.py index ad30611..4d1f2d4 100644 --- a/binary_database.py +++ b/binary_database.py @@ -5,8 +5,8 @@ from peewee import * +# noinspection PyUnusedName logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) # # autostart must be False if we intend to dynamically create the database. From 72d501cdb3799eaacd1712329cc4f26ec0388d41 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 5 Nov 2019 08:52:44 -0500 Subject: [PATCH 132/257] Backed out some logger exceptions fixed some typing --- main.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/main.py b/main.py index 71dc266..f0d2c6d 100644 --- a/main.py +++ b/main.py @@ -15,7 +15,6 @@ import humanfriendly import psycopg2 - # noinspection PyPackageRequirements import yara from celery import group @@ -119,12 +118,10 @@ def generate_rule_map_remote(yara_rule_path: str) -> None: time.sleep(0.1) -def analyze_binaries(md5_hashes: List[str], local: bool) -> Optional: +def analyze_binaries(md5_hashes: List[str], local: bool) -> Optional[List[AnalysisResult]]: """ Analyze binaries. - TODO: determine return typing! - :param md5_hashes: list of hashes to check. :param local: True if local :return: None if there is a problem; results otherwise @@ -428,7 +425,7 @@ def main(): try: singleton.SingleInstance() except SingleInstanceException as err: - logger.exception(f"Only one instance of this script is allowed to run at a time: {err}") + logger.error(f"Only one instance of this script is allowed to run at a time: {err}") sys.exit(1) args = handle_arguments() @@ -448,7 +445,7 @@ def main(): try: ConfigurationInit(args.config_file, use_log_file) except Exception as err: - logger.exception(f"Unable to continue due to a configuration problem: {err}") + logger.error(f"Unable to continue due to a configuration problem: {err}") sys.exit(2) if args.validate_yara_rules: @@ -458,7 +455,7 @@ def main(): yara.compile(filepaths=yara_rule_map) logger.info("All yara rules compiled successfully") except Exception as err: - logger.exception(f"There were errors compiling yara rules: {err}") + logger.error(f"There were errors compiling yara rules: {err}") sys.exit(5) else: try: @@ -474,7 +471,7 @@ def main(): logger.info("\n\n##### Interupted by User!\n") sys.exit(3) except Exception as err: - logger.exception(f"There were errors executing yara rules: {err}") + logger.error(f"There were errors executing yara rules: {err}") sys.exit(4) From 14f6dca9bc30c392b81897207c3fdb2901bd5ecf Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 5 Nov 2019 09:07:35 -0500 Subject: [PATCH 133/257] updates to fix rpmbuild --- cb-yara-connector.rpm.spec | 2 +- example-conf/yara.conf | 67 ++++++++++++++++++++++++++++++++++++++ makefile | 2 +- 3 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 example-conf/yara.conf diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index e9507a3..b303c41 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -16,7 +16,7 @@ mkdir -p ${RPM_BUILD_ROOT}/var/log/cb/integrations/yaraconnector mkdir -p ${RPM_BUILD_ROOT}/usr/share/cb/integrations/yaraconnector mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/yaraconnector mkdir -p ${RPM_BUILD_ROOT}/etc/init -cp yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/yaraconnector/yaraconnector.conf.example +cp example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/yaraconnector/yaraconnector.conf.example install -m 0755 init-scripts/yaraconnector.conf ${RPM_BUILD_ROOT}/etc/init/yaraconnector.conf install -m 0755 dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/yaraconnector/yaraconnector diff --git a/example-conf/yara.conf b/example-conf/yara.conf new file mode 100644 index 0000000..f71e75f --- /dev/null +++ b/example-conf/yara.conf @@ -0,0 +1,67 @@ +[general] + +; +; either run a single worker locally or remotely +; valid types are 'local' or 'remote' +; +worker_type=local + +; +; ONLY for worker_type of remote +; IP Address of workers if worker_type is remote +; +broker_url=redis://127.0.0.1 + +mode=master + +; +; path to directory containing yara rules +; +yara_rules_dir= + +; +; Cb Response postgres Database settings +; +postgres_host=127.0.0.1 +postgres_username=cb +postgres_password= +postgres_db=cb +postgres_port=5002 + +; +; ONLY for worker_type of local +; Cb Response Server settings for scanning locally. +; For remote scanning please set these parameters in the yara worker config file +; Default: https://127.0.0.1 +; +cb_server_url=https://127.0.0.1 +cb_server_token= + +; +; nice value used for this script +; +niceness=1 + +; +; Number of hashes to send to the workers concurrently. Defaults to 8. +; Recommend setting to the number of workers on the remote system. +; +concurrent_hashes=8 + +; +; If you don't want binaries to be rescanned more than once, regardless of the rules used, set this to True +; Default: False +; +disable_rescan=False + +; +; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with +; a timestamp within the last year +; Default: 365 +; +num_days_binaries=365 + + +utility_interval=60 +utility_script=scripts/vacuumscript.sh +database_scanning_interval=60 \ No newline at end of file diff --git a/makefile b/makefile index b80a0ec..90b4419 100644 --- a/makefile +++ b/makefile @@ -12,7 +12,7 @@ rpm: mkdir -p ${BUILDDIR}/init-scripts cp -rp src/* ${SOURCEDIR}/src cp -rp init-scripts/* ${BUILDDIR}/init-scripts - cp yara.conf ${BUILDDIR} + cp example-conf/yara.conf ${BUILDDIR} cp MANIFEST ${BUILDDIR} cp cb-yara-connector.spec ${SOURCEDIR}/cb-yara-connector.spec ls ${SOURCEDIR} From e038ab4f5004e631be40fc44a06dcdf9e2cb35b3 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 5 Nov 2019 11:21:07 -0500 Subject: [PATCH 134/257] change interval to seconds --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index f0d2c6d..04b91b2 100644 --- a/main.py +++ b/main.py @@ -279,7 +279,7 @@ def perform(yara_rule_dir: str) -> None: for row in rows: if globals.g_utility_interval > 0: seconds_since_start = (datetime.now() - utility_window_start).seconds - if seconds_since_start >= globals.g_utility_interval * 60: + if seconds_since_start >= globals.g_utility_interval: execute_script() utility_window_start = datetime.now() From 4e997ff31cd154527651a7fff9ac5c45daa02484 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 5 Nov 2019 16:53:00 -0500 Subject: [PATCH 135/257] Unit tests working (*whew*) --- README.md | 23 +- src/config_handling.py | 292 ++++---- src/globals.py | 17 +- src/main.py | 22 +- test/config/bogus_concurrent_hashes.conf | 19 - test/config/bogus_disable_rescan.conf | 19 - test/config/bogus_feed_database_dir.conf | 25 - test/config/bogus_niceness.conf | 19 - test/config/bogus_num_days_binaries.conf | 19 - test/config/bogus_postgres_port.conf | 19 - test/config/bogus_rules_dir.conf | 26 - test/config/bogus_vacuum_interval.conf | 21 - test/config/bogus_worker.conf | 26 - test/config/bogus_worker_network_timeout.conf | 24 - test/config/from_main.conf | 19 - test/config/invalid_header.conf | 19 - test/config/local_worker_no_server_token.conf | 22 - .../config/local_worker_no_server_token2.conf | 23 - test/config/local_worker_no_server_url.conf | 22 - test/config/local_worker_no_server_url2.conf | 23 - test/config/malformed_param.conf | 20 - test/config/missing_concurrent_hashes.conf | 23 - test/config/missing_disable_rescan.conf | 23 - test/config/missing_feed_database_dir.conf | 25 - test/config/missing_header.conf | 19 - test/config/missing_niceness.conf | 23 - test/config/missing_num_days_binaries.conf | 23 - test/config/missing_postgres_db.conf | 22 - test/config/missing_postgres_db2.conf | 23 - test/config/missing_postgres_host.conf | 23 - test/config/missing_postgres_host2.conf | 24 - test/config/missing_postgres_password.conf | 19 - test/config/missing_postgres_password2.conf | 20 - test/config/missing_postgres_port.conf | 22 - test/config/missing_postgres_port2.conf | 23 - test/config/missing_postgres_username.conf | 22 - test/config/missing_postgres_username2.conf | 23 - test/config/missing_rules_dir.conf | 26 - test/config/missing_worker.conf | 21 - test/config/missing_worker2.conf | 22 - .../missing_worker_network_timeout.conf | 24 - test/config/negative_vacuum_interval.conf | 22 - test/config/no_rules_dir.conf | 19 - test/config/no_rules_dir2.conf | 20 - test/config/no_such_vacuum_script.conf | 22 - test/config/remote_worker_no_broker_url.conf | 17 - test/config/remote_worker_no_broker_url2.conf | 17 - test/config/vacuum_script_dir.conf | 23 - test/config/vacuum_script_enabled.conf | 22 - test/config/vacuum_script_no_interval.conf | 22 - test/config/valid.conf | 22 - test/config/valid2.conf | 21 - test/test_configInit.py | 684 +++++++++++++----- 53 files changed, 667 insertions(+), 1413 deletions(-) delete mode 100644 test/config/bogus_concurrent_hashes.conf delete mode 100644 test/config/bogus_disable_rescan.conf delete mode 100644 test/config/bogus_feed_database_dir.conf delete mode 100644 test/config/bogus_niceness.conf delete mode 100644 test/config/bogus_num_days_binaries.conf delete mode 100644 test/config/bogus_postgres_port.conf delete mode 100644 test/config/bogus_rules_dir.conf delete mode 100644 test/config/bogus_vacuum_interval.conf delete mode 100644 test/config/bogus_worker.conf delete mode 100644 test/config/bogus_worker_network_timeout.conf delete mode 100644 test/config/from_main.conf delete mode 100644 test/config/invalid_header.conf delete mode 100644 test/config/local_worker_no_server_token.conf delete mode 100644 test/config/local_worker_no_server_token2.conf delete mode 100644 test/config/local_worker_no_server_url.conf delete mode 100644 test/config/local_worker_no_server_url2.conf delete mode 100644 test/config/malformed_param.conf delete mode 100644 test/config/missing_concurrent_hashes.conf delete mode 100644 test/config/missing_disable_rescan.conf delete mode 100644 test/config/missing_feed_database_dir.conf delete mode 100644 test/config/missing_header.conf delete mode 100644 test/config/missing_niceness.conf delete mode 100644 test/config/missing_num_days_binaries.conf delete mode 100644 test/config/missing_postgres_db.conf delete mode 100644 test/config/missing_postgres_db2.conf delete mode 100644 test/config/missing_postgres_host.conf delete mode 100644 test/config/missing_postgres_host2.conf delete mode 100644 test/config/missing_postgres_password.conf delete mode 100644 test/config/missing_postgres_password2.conf delete mode 100644 test/config/missing_postgres_port.conf delete mode 100644 test/config/missing_postgres_port2.conf delete mode 100644 test/config/missing_postgres_username.conf delete mode 100644 test/config/missing_postgres_username2.conf delete mode 100644 test/config/missing_rules_dir.conf delete mode 100644 test/config/missing_worker.conf delete mode 100644 test/config/missing_worker2.conf delete mode 100644 test/config/missing_worker_network_timeout.conf delete mode 100644 test/config/negative_vacuum_interval.conf delete mode 100644 test/config/no_rules_dir.conf delete mode 100644 test/config/no_rules_dir2.conf delete mode 100644 test/config/no_such_vacuum_script.conf delete mode 100644 test/config/remote_worker_no_broker_url.conf delete mode 100644 test/config/remote_worker_no_broker_url2.conf delete mode 100644 test/config/vacuum_script_dir.conf delete mode 100644 test/config/vacuum_script_enabled.conf delete mode 100644 test/config/vacuum_script_no_interval.conf delete mode 100644 test/config/valid.conf delete mode 100644 test/config/valid2.conf diff --git a/README.md b/README.md index a6807de..b78e31c 100644 --- a/README.md +++ b/README.md @@ -172,26 +172,29 @@ _[TBD]_ ``` # Development Notes -## Vacuum Script +## Utility Script Included with this version is a feature for discretionary use by advanced users and should be used with caution. -When `vacuum_interval` is defined with a value greater than 0, it represents the interval -in minutes at which the yara agent will pause its work and execute an external -shell script, defined by default as `vacuumscript.sh` within the `scripts` folder -of the current Yara connector installation. After execution, the Yara agent continues with +When `utility_interval` is defined with a value greater than 0, it represents the interval +in minutes at which the yara connector will pause its work and execute an external +shell script. A sample script, `vacuumscript.sh` is provided within the `scripts` folder +of the current Yara connector installation. After execution, the Yara connector continues with its work. +> _**NOTE:** As a safety for this feature, if an interval is defined but no script is defined, nothing is done. +> By default, no script is defined._ + ```ini ; -; The use of the vacuum script is an ADVANCED FEATURE and should be used with caution! +; The use of the utility script is an ADVANCED FEATURE and should be used with caution! ; -; If "vacuum_interval" is greater than 0 it represents the interval in minutes after which the yara connector will -; pause to execute a shell script for database maintenance. This can present risks. Be careful what you allow the +; If "utility_interval" is greater than 0 it represents the interval in minutes after which the yara connector will +; pause to execute a shell script for general maintenance. This can present risks. Be careful what you allow the ; script to do, and use this option at your own discretion. ; -vacuum_interval=-1 -vacuum_script=./scripts/vacuumscript.sh +utility_interval=-1 +utility_script=./scripts/vacuumscript.sh ``` ## Yara Agent Build Instructions (Centos 6) diff --git a/src/config_handling.py b/src/config_handling.py index 68f5f1e..7b52611 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -4,23 +4,44 @@ import configparser import logging import os -from typing import Optional - -from celery import Celery +from typing import List, Optional import globals -from exceptions import CbInvalidConfig from celery_app import app +from exceptions import CbInvalidConfig logger = logging.getLogger(__name__) __all__ = ["ConfigurationInit"] - ################################################################################ # Configuration reader/validator ################################################################################ +# Known parameters -- flag others as potential typos! +KNOWN = [ + "broker_url", + "cb_server_token", + "cb_server_url", + "concurrent_hashes", + "disable_rescan", + "feed_database_dir", + "mode", + "niceness", + "num_days_binaries", + "postgres_db", + "postgres_host", + "postgres_password", + "postgres_port", + "postgres_username", + "utility_debug", # dev use only! + "utility_interval", + "utility_script", + "worker_network_timeout", + "worker_type", + "yara_rules_dir", +] + class ConfigurationInit(object): """ @@ -37,11 +58,13 @@ def __init__(self, config_file: str, output_file: str = None) -> None: self.source = f"Config file '{self.abs_config}'" config = configparser.ConfigParser() - if not os.path.exists(config_file): + if not os.path.exists(self.abs_config): raise CbInvalidConfig(f"{self.source} does not exist!") + if os.path.isdir(self.abs_config): + raise CbInvalidConfig(f"{self.source} is a directory!") try: - config.read(config_file) + config.read(self.abs_config) except Exception as err: raise CbInvalidConfig(err) @@ -50,22 +73,21 @@ def __init__(self, config_file: str, output_file: str = None) -> None: raise CbInvalidConfig(f"{self.source} does not have a 'general' section") self.the_config = config["general"] - if "mode" in self.the_config: - operating_mode = self.the_config["mode"].lower() - if operating_mode in ["master", "slave"]: - globals.g_mode = operating_mode - else: - raise CbInvalidConfig( - f"{self.source} does not specify a valid operating mode (slave/master)" - ) - else: - raise CbInvalidConfig( - f"{self.source} does not specify a valid operating mode (slave/master)" - ) - + # warn about unknown parameters -- typos? + extras = [] + try: + for item in config.items("general"): + if item[0] not in KNOWN: + extras.append(item[0]) + if len(extras) > 0: + raise CbInvalidConfig(f"{self.source} has unknown parameters: {extras}") + except configparser.InterpolationSyntaxError as err: + raise CbInvalidConfig(f"{self.source} cannot be parsed: {err}") + + # do the config checks self._worker_check() - if output_file is not None: + if output_file is not None and output_file != "": globals.g_output_file = os.path.abspath(os.path.expanduser(output_file)) logger.debug(f"NOTE: output file will be '{globals.g_output_file}'") self._extended_check() @@ -76,28 +98,26 @@ def _worker_check(self) -> None: :raises CbInvalidConfig: """ - value = self._as_str("worker_type", default="local") + globals.g_mode = self._as_str("mode", required=True, allowed=["master", "slave"]) + + value = self._as_str("worker_type", default="local", allowed=["local", "remote"]) if value == "local": globals.g_remote = False - elif value == "remote": - globals.g_remote = True else: - raise CbInvalidConfig( - f"{self.source} has an invalid 'worker_type' ({value})" - ) + globals.g_remote = True - globals.g_yara_rules_dir = self._as_path( - "yara_rules_dir", required=True, exists=True, is_dir=True - ) + globals.g_yara_rules_dir = self._as_path("yara_rules_dir", required=True, exists=True, is_dir=True) # local/remote configuration data - globals.g_cb_server_url = self._as_str("cb_server_url", required=True) - globals.g_cb_server_token = self._as_str("cb_server_token", required=True) + cb_req = not (globals.g_mode == "master" and globals.g_remote) + globals.g_cb_server_url = self._as_str("cb_server_url", required=cb_req) + globals.g_cb_server_token = self._as_str("cb_server_token", required=cb_req) value = self._as_str("broker_url", required=True) app.conf.update(broker_url=value, result_backend=value) - globals.g_worker_network_timeout = self._as_int("worker_network_timeout") + globals.g_worker_network_timeout = self._as_int("worker_network_timeout", + default=globals.g_worker_network_timeout) def _extended_check(self) -> None: """ @@ -106,107 +126,74 @@ def _extended_check(self) -> None: :raises CbInvalidConfig: :raises ValueError: """ - globals.g_postgres_host = self._as_str( - "postgres_host", default=globals.g_postgres_host - ) - globals.g_postgres_username = self._as_str( - "postgres_username", default=globals.g_postgres_username - ) + globals.g_postgres_host = self._as_str("postgres_host", default=globals.g_postgres_host) + globals.g_postgres_username = self._as_str("postgres_username", default=globals.g_postgres_username) globals.g_postgres_password = self._as_str("postgres_password", required=True) - globals.g_postgres_db = self._as_str( - "postgres_db", default=globals.g_postgres_username - ) - globals.g_postgres_port = self._as_int( - "postgres_port", default=globals.g_postgres_port - ) - - value = self._as_int("niceness") - if value: - os.nice(value) - - globals.g_max_hashes = self._as_int( - "concurrent_hashes", default=globals.g_max_hashes - ) - globals.g_disable_rescan = self._as_bool( - "disable_rescan", default=globals.g_disable_rescan - ) - globals.g_num_days_binaries = self._as_int( - "num_days_binaries", default=globals.g_num_days_binaries, min_value=1 - ) - - globals.g_vacuum_interval = self._as_int( - "vacuum_interval", default=globals.g_vacuum_interval, min_value=0 - ) - if globals.g_vacuum_interval > 0: - globals.g_vacuum_script = self._as_path( - "vacuum_script", - required=True, - is_dir=False, - default=globals.g_vacuum_script, - ) - logger.warning( - f"Vacuum Script '{globals.g_vacuum_script}' is enabled; " - + "use this advanced feature at your own discretion!" - ) + globals.g_postgres_db = self._as_str("postgres_db", default=globals.g_postgres_username) + globals.g_postgres_port = self._as_int("postgres_port", default=globals.g_postgres_port) + + value = self._as_str("niceness") + if value != "": + os.nice(self._as_int("niceness", min_value=0)) + + globals.g_max_hashes = self._as_int("concurrent_hashes", default=globals.g_max_hashes) + globals.g_disable_rescan = self._as_bool("disable_rescan", default=globals.g_disable_rescan) + globals.g_num_days_binaries = self._as_int("num_days_binaries", default=globals.g_num_days_binaries, + min_value=1) + + globals.g_utility_interval = self._as_int("utility_interval", default=globals.g_utility_interval, + min_value=0) + if globals.g_utility_interval > 0: + if self._as_str("utility_script", default=globals.g_utility_script) == "": + logger.warning(f"{self.source} 'utility_interval' supplied but no script defined -- feature disabled") + globals.g_utility_interval = 0 + globals.g_utility_script = "" + else: + globals.g_utility_script = self._as_path("utility_script", required=True, is_dir=False, + default=globals.g_utility_script) + logger.warning(f"{self.source} utility script '{globals.g_utility_script}' is enabled; " + + "use this advanced feature at your own discretion!") else: - if self._as_path( - "vacuum_script", required=False, default=globals.g_vacuum_script - ): - logger.debug( - f"{self.source} has 'vacuum_script' defined, but it is disabled" - ) - - globals.g_feed_database_dir = self._as_path( - "feed_database_dir", - required=True, - is_dir=True, - default=globals.g_feed_database_dir, - create_if_needed=True, - ) - - # ----- Type Handlers - - def _as_str( - self, param: str, required: bool = False, default: str = None - ) -> Optional[str]: + if self._as_path("utility_script", required=False, default=globals.g_utility_script): + logger.debug(f"{self.source} has 'utility_script' defined, but it is disabled") + + globals.g_feed_database_dir = self._as_path("feed_database_dir", required=True, is_dir=True, + default=globals.g_feed_database_dir, create_if_needed=True) + + # ----- Type Handlers ------------------------------------------------------------ + + def _as_str(self, param: str, required: bool = False, default: str = "", allowed: List[str] = None) -> str: """ Get a string parameter from the configuration. + NOTE: This is the base for all other parameter getting functions, so changes here will affect them as well! + :param param: Name of the configuration parameter :param required: True if this must be specified in the configuration - :param default: If not required, default value if not supplied - :return: the string value, or None/default if not required and no exception + :param default: default value if not supplied + :return: the string value, or default if not required and no exception :raises CbInvalidConfig: """ try: - value = self.the_config.get(param, None) + value = self.the_config.get(param, default) + value = "" if value is None else value.strip() + if value == "": + value = default # patch for supplied empty string except Exception as err: - raise CbInvalidConfig( - f"{self.source} parameter '{param}' cannot be parsed: {err}" - ) - - if value is not None: - value = value.strip() - if (value is None or value == "") and default is not None: - value = default - logger.warning( - f"{self.source} has no defined '{param}'; using default of '{default}'" - ) - if required and (value is None or value == ""): + raise CbInvalidConfig(f"{self.source} parameter '{param}' cannot be parsed: {err}") + + if required and value == "": raise CbInvalidConfig(f"{self.source} has no '{param}' definition") + + if allowed is not None and value not in allowed: + raise CbInvalidConfig(f"{self.source} does not specify an allowed value: {allowed}") + return value - def _as_path( - self, - param: str, - required: bool = False, - exists: bool = True, - is_dir: bool = False, - default: str = None, - create_if_needed: bool = False, - ) -> Optional[str]: + def _as_path(self, param: str, required: bool = False, exists: bool = True, is_dir: bool = False, + default: str = "", create_if_needed: bool = False) -> str: """ - Get an string parameter from the configuration and treat it as a path, performing normalization + Get a string parameter from the configuration and treat it as a path, performing normalization to produce an absolute path. a "~/" at the beginning will be treated as the current user's home directory. @@ -220,9 +207,6 @@ def _as_path( :raises CbInvalidConfig: """ value = self._as_str(param, required, default=default) - if value is None: - return value - value = os.path.abspath(os.path.expanduser(value)) if exists: if not os.path.exists(value): @@ -230,33 +214,20 @@ def _as_path( try: os.makedirs(value) except Exception as err: - raise CbInvalidConfig( - f"{self.source} unable to create '{value}' for '{param}': {err}" - ) + raise CbInvalidConfig(f"{self.source} unable to create '{value}' for '{param}': {err}") else: - raise CbInvalidConfig( - f"{self.source} specified path parameter '{param}' ({value}) does not exist" - ) + raise CbInvalidConfig(f"{self.source} specified path parameter '{param}' ({value}) does not exist") if is_dir: if not os.path.isdir(value): - raise CbInvalidConfig( - f"{self.source} specified path '{param}' ({value}) is not a directory" - ) + raise CbInvalidConfig(f"{self.source} specified path '{param}' ({value}) is not a directory") else: if os.path.isdir(value): - raise CbInvalidConfig( - f"{self.source} specified path '{param}' ({value}) is a directory" - ) + raise CbInvalidConfig(f"{self.source} specified path '{param}' ({value}) is a directory") return value - def _as_int( - self, - param: str, - required: bool = False, - default: int = None, - min_value: int = -1, - ) -> Optional[int]: + def _as_int(self, param: str, required: bool = False, default: int = -1, min_value: int = None, + ) -> int: """ Get an integer configuration parameter from the configuration. A parameter that cannot be converted to an int will return a ValueError. @@ -265,26 +236,17 @@ def _as_int( :param required: True if this must be specified in the configuration :param default: If not required, default value if not supplied :param min_value: minumum value allowed - :return: the integer value, or None/default if not required and no exception + :return: the integer value, or default if not required and no exception :raises CbInvalidConfig: :raises ValueError: """ - value = self._as_str(param, required) - use_default = default if default is None else max(default, min_value) - if (value is None or value == "") and use_default is not None: - logger.warning( - f"{self.source} has no defined '{param}'; using default of '{use_default}'" - ) - return use_default - else: - return ( - None if (value is None or value == "") else max(int(value), min_value) - ) + value = int(self._as_str(param, required=required, default=str(default))) + if min_value is not None and value < min_value: + raise CbInvalidConfig(f"{self.source} '{param}' must be greater or equal to {min_value}") + return value # noinspection PySameParameterValue - def _as_bool( - self, param: str, required: bool = False, default: bool = None - ) -> Optional[bool]: + def _as_bool(self, param: str, required: bool = False, default: bool = None) -> Optional[bool]: """ Get a boolean configuration parameter from the configuration. A parameter not one of ["true", "yes", "false", "no"] will return a ValueError. @@ -295,21 +257,11 @@ def _as_bool( :raises CbInvalidConfig: :raises ValueError: """ - value = self._as_str(param, required) - if value is not None and value.lower() not in [ - "true", - "yes", - "false", - "no", - "", - ]: - raise ValueError( - f"{self.source} parameter '{param}' is not a valid boolean value" - ) + value = self._as_str(param, required=required, default=str(default)) + if value is not None and value.lower() not in ["true", "yes", "false", "no"]: + raise ValueError(f"{self.source} parameter '{param}' is not a valid boolean value") if value is None and default is not None: - logger.warning( - f"{self.source} has no defined '{param}'; using default of '{default}'" - ) + logger.warning(f"{self.source} has no defined '{param}'; using default of '{default}'") return default else: return value if value is None else value.lower() in ["true", "yes"] diff --git a/src/globals.py b/src/globals.py index 4dd53c7..43b36ff 100644 --- a/src/globals.py +++ b/src/globals.py @@ -5,22 +5,21 @@ # This module contains global variables used by a single instance. ################################################################################ +# used by the agent g_config = {} g_output_file = "" +g_yara_rule_map = {} +g_yara_rule_map_hash_list = [] +# configuiration g_remote = False -g_mode = "master" +g_mode = "" -# local info -g_cb_server_url = "https://127.0.0.1" +g_cb_server_url = "" g_cb_server_token = "" - -# remote info g_broker_url = "" g_yara_rules_dir = "./yara_rules" -g_yara_rule_map = {} -g_yara_rule_map_hash_list = [] g_postgres_host = "127.0.0.1" g_postgres_db = "cb" @@ -38,7 +37,7 @@ g_scanning_interval = 360 -g_vacuum_interval = -1 -g_vacuum_script = "./scripts/vacuumscript.sh" +g_utility_interval = 0 +g_utility_script = "" g_worker_network_timeout = 5 diff --git a/src/main.py b/src/main.py index 25ddea2..57c38dd 100644 --- a/src/main.py +++ b/src/main.py @@ -21,11 +21,9 @@ import humanfriendly import lockfile import psycopg2 - # noinspection PyPackageRequirements import yara from celery.bin import worker - # noinspection PyPackageRequirements from daemon import daemon from peewee import SqliteDatabase @@ -33,10 +31,10 @@ import globals from analysis_result import AnalysisResult from binary_database import BinaryDetonationResult, db +from celery_app import app from config_handling import ConfigurationInit from feed import CbFeed, CbFeedInfo, CbReport from tasks import analyze_binary, generate_rule_map, update_yara_rules_remote -from celery_app import app logging_format = "%(asctime)s-%(name)s-%(lineno)d-%(levelname)s-%(message)s" logging.basicConfig(format=logging_format) @@ -301,7 +299,7 @@ def execute_script() -> None: "----- Executing vacuum script ----------------------------------------" ) prog = subprocess.Popen( - globals.g_vacuum_script, shell=True, universal_newlines=True + globals.g_utility_script, shell=True, universal_newlines=True ) stdout, stderr = prog.communicate() if stdout is not None and len(stdout.strip()) > 0: @@ -331,8 +329,8 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): # Determine our binaries window (date forward) start_date_binaries = datetime.now() - timedelta(days=globals.g_num_days_binaries) - # vacuum script window start - vacuum_window_start = datetime.now() + # utility script window start + utility_window_start = datetime.now() cur = get_binary_file_cursor(conn, start_date_binaries) rows = cur.fetchall() @@ -349,12 +347,12 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) - if globals.g_vacuum_interval > 0: - seconds_since_start = (datetime.now() - vacuum_window_start).seconds - if seconds_since_start >= globals.g_vacuum_interval * 60: + if globals.g_utility_interval > 0: + seconds_since_start = (datetime.now() - utility_window_start).seconds + if seconds_since_start >= globals.g_utility_interval * 60: # close connection execute_script() - vacuum_window_start = datetime.now() + utility_window_start = datetime.now() logger.debug("Exiting database sweep routine") @@ -708,7 +706,7 @@ def main(): yara.compile(filepaths=yara_rule_map) logger.info("All yara rules compiled successfully") except Exception as err: - logger.exception(f"There were errors compiling yara rules: {err}") + logger.error(f"There were errors compiling yara rules: {err}") sys.exit(5) else: exit_event = Event() @@ -768,7 +766,7 @@ def main(): exit_event.set() sys.exit(3) except Exception as err: - logger.exception(f"There were errors executing yara rules: {err}") + logger.error(f"There were errors executing yara rules: {err}") exit_event.set() sys.exit(4) diff --git a/test/config/bogus_concurrent_hashes.conf b/test/config/bogus_concurrent_hashes.conf deleted file mode 100644 index 5978db1..0000000 --- a/test/config/bogus_concurrent_hashes.conf +++ /dev/null @@ -1,19 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=BOGUS -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/bogus_disable_rescan.conf b/test/config/bogus_disable_rescan.conf deleted file mode 100644 index 43bb864..0000000 --- a/test/config/bogus_disable_rescan.conf +++ /dev/null @@ -1,19 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=BOGUS -num_days_binaries=365 diff --git a/test/config/bogus_feed_database_dir.conf b/test/config/bogus_feed_database_dir.conf deleted file mode 100644 index 0d8cef4..0000000 --- a/test/config/bogus_feed_database_dir.conf +++ /dev/null @@ -1,25 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh - -; file, not dir -feed_database_dir=./__init__.py diff --git a/test/config/bogus_niceness.conf b/test/config/bogus_niceness.conf deleted file mode 100644 index 487e1f5..0000000 --- a/test/config/bogus_niceness.conf +++ /dev/null @@ -1,19 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=BOGUS -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/bogus_num_days_binaries.conf b/test/config/bogus_num_days_binaries.conf deleted file mode 100644 index 59149e4..0000000 --- a/test/config/bogus_num_days_binaries.conf +++ /dev/null @@ -1,19 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=BOGUS diff --git a/test/config/bogus_postgres_port.conf b/test/config/bogus_postgres_port.conf deleted file mode 100644 index 7b64f50..0000000 --- a/test/config/bogus_postgres_port.conf +++ /dev/null @@ -1,19 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=BOGUS - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/bogus_rules_dir.conf b/test/config/bogus_rules_dir.conf deleted file mode 100644 index f7b1bef..0000000 --- a/test/config/bogus_rules_dir.conf +++ /dev/null @@ -1,26 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of remote -; IP Address of workers if worker_type is remote -;broker_url=redis:// - -yara_rules_dir=./rules/README.md - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -; ONLY for worker_type of local -; Cb Response Server settings for scanning locally. -; For remote scanning please set these parameters in the yara worker config file -; Default: https://127.0.0.1 -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/bogus_vacuum_interval.conf b/test/config/bogus_vacuum_interval.conf deleted file mode 100644 index 0a91c28..0000000 --- a/test/config/bogus_vacuum_interval.conf +++ /dev/null @@ -1,21 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=BOGUS diff --git a/test/config/bogus_worker.conf b/test/config/bogus_worker.conf deleted file mode 100644 index 01167f0..0000000 --- a/test/config/bogus_worker.conf +++ /dev/null @@ -1,26 +0,0 @@ -[general] -worker_type=bogus - -; ONLY for worker_type of remote -; IP Address of workers if worker_type is remote -;broker_url=redis:// - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -; ONLY for worker_type of local -; Cb Response Server settings for scanning locally. -; For remote scanning please set these parameters in the yara worker config file -; Default: https://127.0.0.1 -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/bogus_worker_network_timeout.conf b/test/config/bogus_worker_network_timeout.conf deleted file mode 100644 index a51349b..0000000 --- a/test/config/bogus_worker_network_timeout.conf +++ /dev/null @@ -1,24 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh - -worker_network_timeout=BOGUS diff --git a/test/config/from_main.conf b/test/config/from_main.conf deleted file mode 100644 index d660107..0000000 --- a/test/config/from_main.conf +++ /dev/null @@ -1,19 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./test/rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/invalid_header.conf b/test/config/invalid_header.conf deleted file mode 100644 index 622fb5b..0000000 --- a/test/config/invalid_header.conf +++ /dev/null @@ -1,19 +0,0 @@ -[bogus] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/local_worker_no_server_token.conf b/test/config/local_worker_no_server_token.conf deleted file mode 100644 index db60931..0000000 --- a/test/config/local_worker_no_server_token.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -; Cb Response Server settings for scanning locally. -; For remote scanning please set these parameters in the yara worker config file -; Default: https://127.0.0.1 -cb_server_url=https://127.0.0.1:443 -; MISSING: cb_server_token - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/local_worker_no_server_token2.conf b/test/config/local_worker_no_server_token2.conf deleted file mode 100644 index 38cb5d3..0000000 --- a/test/config/local_worker_no_server_token2.conf +++ /dev/null @@ -1,23 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -; Cb Response Server settings for scanning locally. -; For remote scanning please set these parameters in the yara worker config file -; Default: https://127.0.0.1 -cb_server_url=https://127.0.0.1:443 -; undefined -cb_server_token= - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/local_worker_no_server_url.conf b/test/config/local_worker_no_server_url.conf deleted file mode 100644 index 48556f8..0000000 --- a/test/config/local_worker_no_server_url.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -; Cb Response Server settings for scanning locally. -; For remote scanning please set these parameters in the yara worker config file -; Default: https://127.0.0.1 -; MISSING: cb_server_url -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/local_worker_no_server_url2.conf b/test/config/local_worker_no_server_url2.conf deleted file mode 100644 index 64d03a5..0000000 --- a/test/config/local_worker_no_server_url2.conf +++ /dev/null @@ -1,23 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -; Cb Response Server settings for scanning locally. -; For remote scanning please set these parameters in the yara worker config file -; Default: https://127.0.0.1 -; undefined -cb_server_url= -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/malformed_param.conf b/test/config/malformed_param.conf deleted file mode 100644 index 7fa81e9..0000000 --- a/test/config/malformed_param.conf +++ /dev/null @@ -1,20 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -; bad value -niceness=1% -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/missing_concurrent_hashes.conf b/test/config/missing_concurrent_hashes.conf deleted file mode 100644 index 284d424..0000000 --- a/test/config/missing_concurrent_hashes.conf +++ /dev/null @@ -1,23 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -; undefined -concurrent_hashes= -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_disable_rescan.conf b/test/config/missing_disable_rescan.conf deleted file mode 100644 index 887cfa4..0000000 --- a/test/config/missing_disable_rescan.conf +++ /dev/null @@ -1,23 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -; undefined -disable_rescan= -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_feed_database_dir.conf b/test/config/missing_feed_database_dir.conf deleted file mode 100644 index 143bb87..0000000 --- a/test/config/missing_feed_database_dir.conf +++ /dev/null @@ -1,25 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh - -; invalid path -feed_database_dir=./no-such-directory diff --git a/test/config/missing_header.conf b/test/config/missing_header.conf deleted file mode 100644 index c6cfe47..0000000 --- a/test/config/missing_header.conf +++ /dev/null @@ -1,19 +0,0 @@ -; MISSING: [general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/missing_niceness.conf b/test/config/missing_niceness.conf deleted file mode 100644 index c220bd1..0000000 --- a/test/config/missing_niceness.conf +++ /dev/null @@ -1,23 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -; undefined -niceness= -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_num_days_binaries.conf b/test/config/missing_num_days_binaries.conf deleted file mode 100644 index 5ba6694..0000000 --- a/test/config/missing_num_days_binaries.conf +++ /dev/null @@ -1,23 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -; undefined -num_days_binaries= - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_db.conf b/test/config/missing_postgres_db.conf deleted file mode 100644 index a6f6efa..0000000 --- a/test/config/missing_postgres_db.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -; MISSING: postgres_db -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_db2.conf b/test/config/missing_postgres_db2.conf deleted file mode 100644 index 4c22cb3..0000000 --- a/test/config/missing_postgres_db2.conf +++ /dev/null @@ -1,23 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -; undefined -postgres_db= -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_host.conf b/test/config/missing_postgres_host.conf deleted file mode 100644 index 60b2cd6..0000000 --- a/test/config/missing_postgres_host.conf +++ /dev/null @@ -1,23 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -; MISSING: postgres_host -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_host2.conf b/test/config/missing_postgres_host2.conf deleted file mode 100644 index 23b8cb1..0000000 --- a/test/config/missing_postgres_host2.conf +++ /dev/null @@ -1,24 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -; undefined -postgres_host= -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_password.conf b/test/config/missing_postgres_password.conf deleted file mode 100644 index 13c68a6..0000000 --- a/test/config/missing_postgres_password.conf +++ /dev/null @@ -1,19 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -; MISSING: postgres_password -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/missing_postgres_password2.conf b/test/config/missing_postgres_password2.conf deleted file mode 100644 index 2ae51b5..0000000 --- a/test/config/missing_postgres_password2.conf +++ /dev/null @@ -1,20 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -; undefined -postgres_password= -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/missing_postgres_port.conf b/test/config/missing_postgres_port.conf deleted file mode 100644 index 9b471ed..0000000 --- a/test/config/missing_postgres_port.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -; MISSING: postgres_port - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_port2.conf b/test/config/missing_postgres_port2.conf deleted file mode 100644 index fe0ab83..0000000 --- a/test/config/missing_postgres_port2.conf +++ /dev/null @@ -1,23 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -; undefined -postgres_port= - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_username.conf b/test/config/missing_postgres_username.conf deleted file mode 100644 index e809df5..0000000 --- a/test/config/missing_postgres_username.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -; MISSING: postgres_username -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_postgres_username2.conf b/test/config/missing_postgres_username2.conf deleted file mode 100644 index f3a51cf..0000000 --- a/test/config/missing_postgres_username2.conf +++ /dev/null @@ -1,23 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -; undefined -postgres_username= -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_rules_dir.conf b/test/config/missing_rules_dir.conf deleted file mode 100644 index 78742aa..0000000 --- a/test/config/missing_rules_dir.conf +++ /dev/null @@ -1,26 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of remote -; IP Address of workers if worker_type is remote -;broker_url=redis:// - -yara_rules_dir=./rules_not_here - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -; ONLY for worker_type of local -; Cb Response Server settings for scanning locally. -; For remote scanning please set these parameters in the yara worker config file -; Default: https://127.0.0.1 -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/missing_worker.conf b/test/config/missing_worker.conf deleted file mode 100644 index 25d19e5..0000000 --- a/test/config/missing_worker.conf +++ /dev/null @@ -1,21 +0,0 @@ -[general] -; MISSING: worker_type - -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_worker2.conf b/test/config/missing_worker2.conf deleted file mode 100644 index 80a3132..0000000 --- a/test/config/missing_worker2.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -; undefined -worker_type= - -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/missing_worker_network_timeout.conf b/test/config/missing_worker_network_timeout.conf deleted file mode 100644 index e018ec4..0000000 --- a/test/config/missing_worker_network_timeout.conf +++ /dev/null @@ -1,24 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh - -worker_network_timeout= diff --git a/test/config/negative_vacuum_interval.conf b/test/config/negative_vacuum_interval.conf deleted file mode 100644 index 0690263..0000000 --- a/test/config/negative_vacuum_interval.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=-20 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/no_rules_dir.conf b/test/config/no_rules_dir.conf deleted file mode 100644 index f77f803..0000000 --- a/test/config/no_rules_dir.conf +++ /dev/null @@ -1,19 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -; MISSING: yara_rules_dir - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/no_rules_dir2.conf b/test/config/no_rules_dir2.conf deleted file mode 100644 index 4978913..0000000 --- a/test/config/no_rules_dir2.conf +++ /dev/null @@ -1,20 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -; undefined -yara_rules_dir= - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/no_such_vacuum_script.conf b/test/config/no_such_vacuum_script.conf deleted file mode 100644 index 12b1fab..0000000 --- a/test/config/no_such_vacuum_script.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=36 -vacuum_script=no-such-script.sh diff --git a/test/config/remote_worker_no_broker_url.conf b/test/config/remote_worker_no_broker_url.conf deleted file mode 100644 index 9f5314c..0000000 --- a/test/config/remote_worker_no_broker_url.conf +++ /dev/null @@ -1,17 +0,0 @@ -[general] -worker_type=remote - -; MISSING: broker_url - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/remote_worker_no_broker_url2.conf b/test/config/remote_worker_no_broker_url2.conf deleted file mode 100644 index b399874..0000000 --- a/test/config/remote_worker_no_broker_url2.conf +++ /dev/null @@ -1,17 +0,0 @@ -[general] -worker_type=remote - -broker_url= - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 diff --git a/test/config/vacuum_script_dir.conf b/test/config/vacuum_script_dir.conf deleted file mode 100644 index 393d058..0000000 --- a/test/config/vacuum_script_dir.conf +++ /dev/null @@ -1,23 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -; invalid, is dir -vacuum_script=./rules diff --git a/test/config/vacuum_script_enabled.conf b/test/config/vacuum_script_enabled.conf deleted file mode 100644 index dcedd0f..0000000 --- a/test/config/vacuum_script_enabled.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/vacuum_script_no_interval.conf b/test/config/vacuum_script_no_interval.conf deleted file mode 100644 index 2dd8511..0000000 --- a/test/config/vacuum_script_no_interval.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=0 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/valid.conf b/test/config/valid.conf deleted file mode 100644 index dcedd0f..0000000 --- a/test/config/valid.conf +++ /dev/null @@ -1,22 +0,0 @@ -[general] -worker_type=local - -; ONLY for worker_type of local -cb_server_url=https://127.0.0.1:443 -cb_server_token=wFC4he6OBd20IBbB0XkhE5ZsjBQUj1GB - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/config/valid2.conf b/test/config/valid2.conf deleted file mode 100644 index e5c748f..0000000 --- a/test/config/valid2.conf +++ /dev/null @@ -1,21 +0,0 @@ -[general] -worker_type=remote - -; ONLY for worker_type of remote -broker_url=redis:// - -yara_rules_dir=./rules - -postgres_host=localhost -postgres_username=cb -postgres_password=6PGcbuwlQnIuPqOF -postgres_db=cb -postgres_port=5002 - -niceness=1 -concurrent_hashes=8 -disable_rescan=False -num_days_binaries=365 - -vacuum_interval=360 -vacuum_script=../scripts/vacuumscript.sh diff --git a/test/test_configInit.py b/test/test_configInit.py index 550adcd..0015103 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -2,6 +2,7 @@ # Copyright © 2014-2019 VMware, Inc. All Rights Reserved. import os +from typing import List from unittest import TestCase import globals @@ -10,15 +11,50 @@ TESTS = os.path.abspath(os.path.dirname(__file__)) +TESTCONF = os.path.join(TESTS, "conf-testing.conf") +BASE = """[general] +mode=master +worker_type=local + +cb_server_url=https://127.0.0.1:443 +cb_server_token=abcdefghijklmnopqrstuvwxyz012345 +broker_url=redis:// + +yara_rules_dir=./rules + +postgres_host=localhost +postgres_username=cb +postgres_password=abcdefghijklmnop +postgres_db=cb +postgres_port=5002 + +niceness=1 +concurrent_hashes=8 +disable_rescan=False +num_days_binaries=365 + +utility_interval=360 +utility_script=../scripts/vacuumscript.sh + +feed_database_dir=./feed_db + +worker_network_timeout=5 +""" + class TestConfigurationInit(TestCase): - def setUp(self): + def setUp(self) -> None: + """ + Reset globals and recreate a base configuration. + :return: + """ globals.g_config = {} - globals.g_output_file = './yara_feed.json' + globals.g_output_file = "" globals.g_remote = False - globals.g_cb_server_url = 'https://127.0.0.1' - globals.g_cb_server_token = '' + globals.g_mode = "" + globals.g_cb_server_url = "" + globals.g_cb_server_token = "" globals.g_broker_url = '' globals.g_yara_rules_dir = './yara_rules' globals.g_yara_rule_map = {} @@ -33,404 +69,712 @@ def setUp(self): globals.g_num_binaries_analyzed = 0 globals.g_disable_rescan = True globals.g_num_days_binaries = 365 - globals.g_vacuum_interval = -1 - globals.g_vacuum_script = './scripts/vacuumscript.sh' + globals.g_utility_interval = 0 + globals.g_utility_script = "" globals.g_feed_database_dir = "./feed_db" + globals.g_worker_network_timeout = 5 + + with open(TESTCONF, "w") as fp: + fp.write(BASE) - def test_01_missing_config(self): + def tearDown(self) -> None: """ - Ensure a missing config file is detected. + Cleanup after testing. """ - with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "no-such-config.conf")) - assert "does not exist!" in "{0}".format(err.exception.args[0]) + if os.path.exists(TESTCONF): + os.remove(TESTCONF) + + if os.path.exists(globals.g_feed_database_dir): + os.rmdir(globals.g_feed_database_dir) + + @staticmethod + def mangle(header: str = None, add: List[str] = None, change: dict = None): + """ + Mangle the base configuration file to produce the testing situation + :param header: mangle header entry, if specified + :param add: list of string entries to add to the end + :param change: dictionary of changes, keyed by parameter; a value of None removes the line + :return: + """ + with open(TESTCONF, "r") as fp: + original = fp.readlines() + + replace = [] + for line in original: + if header is not None and line.strip().startswith("[") and line.strip().endswith("]"): + line = header + "\n" + if change is not None: + for key, value in change.items(): + if line.startswith(key): + if value is None: + line = None + else: + line = f"{key}={value}\n" + break + if line is not None: + replace.append(line) + + if add is not None: + for item in add: + replace.append(item + "\n") - # ----- Full validation (main) + with open(TESTCONF, "w") as fp: + fp.writelines(replace) - def test_02_validate_config(self): - # valid local - globals.g_output_file = None - globals.g_remote = None - ConfigurationInit(os.path.join(TESTS, "config", "valid.conf"), "sample.json") + # ----- Begin Tests ---------------------------------------------------------------------- + + def test_00a_validate_config(self): + """ + Ensure our base configuration is valid. + """ + ConfigurationInit(TESTCONF, "sample.json") self.assertTrue(globals.g_output_file.endswith("sample.json")) self.assertFalse(globals.g_remote) - # valid remote - globals.g_remote = None - ConfigurationInit(os.path.join(TESTS, "config", "valid2.conf"), "sample2.json") - self.assertTrue(globals.g_output_file.endswith("sample2.json")) - self.assertTrue(globals.g_remote) + def test_00b_validate_config_worker(self): + """ + Ensure our base configuration is valid for worker types. + """ + ConfigurationInit(TESTCONF) + self.assertEquals("", globals.g_output_file) + self.assertFalse(globals.g_remote) - def test_03a_config_missing_header(self): + def test_01a_missing_config(self): """ - Ensure we detect a configuration file with no section header. + Ensure a missing config file is detected. """ with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "missing_header.conf"), "sample.json") + ConfigurationInit(os.path.join(TESTS, "config", "no-such-config.conf")) + assert "does not exist!" in "{0}".format(err.exception.args[0]) + + def test_01b_config_is_dir(self): + """ + Ensure a config path leading to a directory is detected. + """ + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(os.path.join(TESTS)) + assert "is a directory!" in "{0}".format(err.exception.args[0]) + + def test_02a_section_header_missing(self): + """ + Ensure we detect a configuration file without a "[general]" section header. + """ + self.mangle(change={"[general]": None}) + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(TESTCONF) assert "File contains no section headers" in "{0}".format(err.exception.args[0]) - def test_03b_config_invalid_header(self): + def test_02b_section_header_invalid(self): """ - Ensure we detect a configuration file with no "[general]" section header. + Ensure we detect a configuration file with a different section header than "[general]". """ + self.mangle(header="[foobar]") with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "invalid_header.conf"), "sample.json") + ConfigurationInit(TESTCONF) assert "does not have a 'general' section" in "{0}".format(err.exception.args[0]) - def test_04a_config_missing_worker(self): + def test_03a_mode_missing(self): + """ + Ensure we detect a configuration file without a required 'mode' definition. + """ + self.mangle(change={"mode": None}) + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(TESTCONF) + assert "has no 'mode' definition" in "{0}".format(err.exception.args[0]) + + def test_03b_mode_invalid(self): """ - Ensure that config lacking worker information defaults to local. + Ensure we detect a configuration file with an invalid 'mode' definition. """ - # not defined in file - globals.g_remote = None - ConfigurationInit(os.path.join(TESTS, "config", "missing_worker.conf"), "sample.json") + self.mangle(change={"mode": "bogus"}) + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(TESTCONF) + assert "does not specify an allowed value: ['master', 'slave']" in "{0}".format(err.exception.args[0]) + + def test_03c_mode_duplicated(self): + """ + Ensure we detect a configuration file with a duplicate 'mode' defintion (same logic applies + to all parameter duplicates). + """ + self.mangle(add=["mode=bogus"]) + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(TESTCONF) + assert "option 'mode' in section 'general' already exists" in "{0}".format(err.exception.args[0]) + + def test_04a_worker_missing(self): + """ + Ensure that lacking 'worker_type' information defaults to local. + """ + self.mangle(change={"worker_type": None}) + ConfigurationInit(TESTCONF) self.assertFalse(globals.g_remote) - # defined as "worker_type=" - globals.g_remote = None - ConfigurationInit(os.path.join(TESTS, "config", "missing_worker2.conf"), "sample.json") + def test_04b_worker_empty(self): + """ + Ensure that empty 'worker_type' information defaults to local. + """ + self.mangle(change={"worker_type": ""}) + ConfigurationInit(TESTCONF) self.assertFalse(globals.g_remote) - def test_04b_config_bogus_worker(self): + def test_04c_config_bogus_worker(self): """ - Ensure that config with bogus worker is detected. + Ensure that with bogus 'worker_type' is detected. """ + self.mangle(change={"worker_type": "BOGUS"}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "bogus_worker.conf"), "sample.json") - assert "invalid 'worker_type'" in "{0}".format(err.exception.args[0]) + ConfigurationInit(TESTCONF) + assert "does not specify an allowed value: ['local', 'remote']" in "{0}".format(err.exception.args[0]) - def test_05a_config_local_worker_missing_server_url(self): + def test_05a_cb_server_url_missing_for_master_and_remote(self): """ - Ensure that local worker config with missing server url is detected. + Ensure that 'cb_server_url' is not required if mode==slave and worker_type==remote """ - # not defined in file + self.mangle(change={"mode": "master", "worker_type": "remote", "cb_server_url": None}) + ConfigurationInit(TESTCONF) + self.assertEqual("", globals.g_cb_server_url) + + def test_05b_cb_server_url_empty_for_master_and_remote(self): + """ + Ensure that 'cb_server_url' is not required if mode==slave and worker_type==remote + """ + self.mangle(change={"mode": "master", "worker_type": "remote", "cb_server_url": ""}) + ConfigurationInit(TESTCONF) + self.assertEqual("", globals.g_cb_server_url) + + def test_05c_cb_server_url_missing_for_slave(self): + """ + Ensure that 'cb_server_url' is required and detected if mode=slave. + """ + self.mangle(change={"mode": "slave", "worker_type": "remote", "cb_server_url": None}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "local_worker_no_server_url.conf"), "sample.json") + ConfigurationInit(TESTCONF) assert "has no 'cb_server_url' definition" in "{0}".format(err.exception.args[0]) - # defined as "cb_server_url=" + def test_05d_cb_server_url_empty_for_slave(self): + """ + Ensure that 'cb_server_url' is required and detected if mode=slave. + """ + self.mangle(change={"mode": "slave", "worker_type": "remote", "cb_server_url": ""}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "local_worker_no_server_url2.conf"), "sample.json") + ConfigurationInit(TESTCONF) assert "has no 'cb_server_url' definition" in "{0}".format(err.exception.args[0]) - def test_05b_config_local_worker_missing_server_token(self): + def test_05e_cb_server_url_missing_for_local(self): """ - Ensure that local worker config with missing server token is detected. + Ensure that 'cb_server_url' is required and detected if worker_type=local. """ - # not defined in file + self.mangle(change={"mode": "master", "worker_type": "local", "cb_server_url": None}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "local_worker_no_server_token.conf"), "sample.json") + ConfigurationInit(TESTCONF) + assert "has no 'cb_server_url' definition" in "{0}".format(err.exception.args[0]) + + def test_05f_cb_server_url_empty_for_local(self): + """ + Ensure that 'cb_server_url' is required and detected if worker_type=local. + """ + self.mangle(change={"mode": "master", "worker_type": "local", "cb_server_url": ""}) + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(TESTCONF) + assert "has no 'cb_server_url' definition" in "{0}".format(err.exception.args[0]) + + def test_06a_cb_server_token_missing_for_master_and_remote(self): + """ + Ensure that 'cb_server_token' is not required if mode==slave and worker_type==remote + """ + self.mangle(change={"mode": "master", "worker_type": "remote", "cb_server_token": None}) + ConfigurationInit(TESTCONF) + self.assertEqual("", globals.g_cb_server_token) + + def test_06b_cb_server_token_empty_for_master_and_remote(self): + """ + Ensure that 'cb_server_url' is not required if mode==slave and worker_type==remote + """ + self.mangle(change={"mode": "master", "worker_type": "remote", "cb_server_token": ""}) + ConfigurationInit(TESTCONF) + self.assertEqual("", globals.g_cb_server_token) + + def test_06c_cb_server_url_missing_for_slave(self): + """ + Ensure that 'cb_server_token' is required and detected if mode=slave. + """ + self.mangle(change={"mode": "slave", "worker_type": "remote", "cb_server_token": None}) + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(TESTCONF) + assert "has no 'cb_server_token' definition" in "{0}".format(err.exception.args[0]) + + def test_06d_cb_server_token_empty_for_slave(self): + """ + Ensure that 'cb_server_token' is required and detected if mode=slave. + """ + self.mangle(change={"mode": "slave", "worker_type": "remote", "cb_server_token": ""}) + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(TESTCONF) assert "has no 'cb_server_token' definition" in "{0}".format(err.exception.args[0]) - # defined as "cb_server_token=" + def test_06e_cb_server_token_missing_for_local(self): + """ + Ensure that 'cb_server_token' is required and detected if worker_type=local. + """ + self.mangle(change={"mode": "master", "worker_type": "local", "cb_server_token": None}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "local_worker_no_server_token2.conf"), "sample.json") + ConfigurationInit(TESTCONF) assert "has no 'cb_server_token' definition" in "{0}".format(err.exception.args[0]) - def test_06_config_remote_worker_missing_broker_url(self): + def test_06f_cb_server_token_empty_for_local(self): """ - Ensure that remote worker config with missing broker url is detected. + Ensure that 'cb_server_token' is required and detected if worker_type=local. """ - # not defined in file + self.mangle(change={"mode": "master", "worker_type": "local", "cb_server_token": ""}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "remote_worker_no_broker_url.conf"), "sample.json") + ConfigurationInit(TESTCONF) + assert "has no 'cb_server_token' definition" in "{0}".format(err.exception.args[0]) + + def test_06a_broker_url_missing(self): + """ + Ensure that missing broker_url is detected. + """ + self.mangle(change={"broker_url": None}) + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(TESTCONF) assert "has no 'broker_url' definition" in "{0}".format(err.exception.args[0]) - # defined as "broker_url=" + def test_06b_broker_url_empty(self): + """ + Ensure that empty broker_url is detected. + """ + self.mangle(change={"broker_url": ""}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "remote_worker_no_broker_url2.conf"), "sample.json") + ConfigurationInit(TESTCONF) assert "has no 'broker_url' definition" in "{0}".format(err.exception.args[0]) - def test_07a_config_missing_yara_rules_dir(self): + def test_07a_yara_rules_dir_missing(self): """ Ensure that config with missing yara rules directory is detected. """ - # not defined in file + self.mangle(change={"yara_rules_dir": None}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "no_rules_dir.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "has no 'yara_rules_dir' definition" in "{0}".format(err.exception.args[0]) - # defined as "yara_rules_dir=" + def test_07b_yara_rules_dir_empty(self): + """ + Ensure that config with empty yara rules directory is detected. + """ + self.mangle(change={"yara_rules_dir": ""}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "no_rules_dir2.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "has no 'yara_rules_dir' definition" in "{0}".format(err.exception.args[0]) - def test_07b_config_yara_rules_dir_not_exists(self): + def test_07c_yara_rules_dir_not_exists(self): """ Ensure that config with yara rules directory that does not exist is detected. """ + self.mangle(change={"yara_rules_dir": "no-such-dir"}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "missing_rules_dir.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "does not exist" in "{0}".format(err.exception.args[0]) - def test_07c_config_yara_rules_dir_not_directory(self): + def test_07d_yara_rules_dir_not_directory(self): """ Ensure that config with yara rules directory that is not a directory is detected. """ + self.mangle(change={"yara_rules_dir": TESTCONF}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "bogus_rules_dir.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "is not a directory" in "{0}".format(err.exception.args[0]) - def test_08a_config_missing_postgres_host(self): + # ----- extended config, requires output_file with value ------------------------------ + + def test_08a_postgres_host_missing(self): """ Ensure that config with missing postgres_host uses defaults. """ check = globals.g_postgres_host - # undefined, use default in globals - ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_host.conf"), "sample.json") + self.mangle(change={"postgres_host": None}) + ConfigurationInit(TESTCONF, "sample.json") self.assertEqual(check, globals.g_postgres_host) - # defined as "postgres_host=" - ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_host2.conf"), "sample.json") - self.assertEqual(check, globals.g_postgres_host) + def test_08b_postgres_host_empty(self): + """ + Ensure that config with empty postgres_host uses defaults. + """ + check = globals.g_postgres_host - # TODO: test_08b_config_invalid_postgres_host + self.mangle(change={"postgres_host": ""}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(check, globals.g_postgres_host) - def test_09a_config_missing_postgres_username(self): + def test_09a_postgres_username_missing(self): """ Ensure that config with missing postgres_username uses defaults. """ check = globals.g_postgres_username - # undefined, use default in globals - ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_username.conf"), "sample.json") + self.mangle(change={"postgres_host": None}) + ConfigurationInit(TESTCONF, "sample.json") self.assertEqual(check, globals.g_postgres_username) - # defined as "postgres_username=" - ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_username2.conf"), "sample.json") - self.assertEqual(check, globals.g_postgres_username) + def test_09b_postgres_username_empty(self): + """ + Ensure that config with empty postgres_username uses defaults. + """ + check = globals.g_postgres_username - # TODO: test_09b_config_invalid_postgres_username + self.mangle(change={"postgres_host": ""}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(check, globals.g_postgres_username) - def test_10a_config_missing_postgres_password(self): + def test_10a_postgres_password_missing(self): """ Ensure that config with missing postgres_password is detected. """ - # undefined + self.mangle(change={"postgres_password": None}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_password.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "has no 'postgres_password' definition" in "{0}".format(err.exception.args[0]) - # defined as "postgres_password=" + def test_10b_postgres_password_empty(self): + """ + Ensure that config with empty postgres_password is detected. + """ + self.mangle(change={"postgres_password": ""}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_password2.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "has no 'postgres_password' definition" in "{0}".format(err.exception.args[0]) - # TODO: test_10a_config_invalid_postgres_password - - def test_11a_config_missing_postgres_db(self): + def test_11a_postgres_db_missing(self): """ Ensure that config with missing postgres_db is detected. """ check = globals.g_postgres_db - # undefined, use default in globals - ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_db.conf"), "sample.json") + self.mangle(change={"postgres_db": None}) + ConfigurationInit(TESTCONF, "sample.json") self.assertEqual(check, globals.g_postgres_db) - # defined as "postgres_db=" - ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_db2.conf"), "sample.json") - self.assertEqual(check, globals.g_postgres_db) + def test_11b_postgres_db_empty(self): + """ + Ensure that config with empty postgres_db is detected. + """ + check = globals.g_postgres_db - # TODO: test_11b_config_invalid_postgres_db + self.mangle(change={"postgres_db": ""}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(check, globals.g_postgres_db) - def test_12a_config_missing_postgres_port(self): + def test_12a_postgres_port_missing(self): """ Ensure that config with missing postgres_port is detected. """ check = globals.g_postgres_port - # undefined, use default in globals - ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_port.conf"), "sample.json") + self.mangle(change={"postgres_port": None}) + ConfigurationInit(TESTCONF, "sample.json") self.assertEqual(check, globals.g_postgres_port) - # defined as "postgres_port=" - ConfigurationInit(os.path.join(TESTS, "config", "missing_postgres_port2.conf"), "sample.json") + def test_12b_postgres_port_empty(self): + """ + Ensure that config with empty postgres_port is detected. + """ + check = globals.g_postgres_port + + self.mangle(change={"postgres_port": ""}) + ConfigurationInit(TESTCONF, "sample.json") self.assertEqual(check, globals.g_postgres_port) - def test_12b_config_bogus_postgres_port(self): + def test_12c_postgres_port_bogus(self): """ Ensure that config with bogus (non-int) postgres_port is detected. """ + self.mangle(change={"postgres_port": "BOGUS"}) with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "bogus_postgres_port.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - # TODO: test_12c_config_invalid_postgres_port + def test_13a_niceness_missing(self): + """ + Ensure that config with missing niceness is not a problem. + """ + self.mangle(change={"niceness": None}) + ConfigurationInit(TESTCONF, "sample.json") - def test_13a_config_missing_niceness(self): + def test_13b_niceness_empty(self): """ Ensure that config with missing niceness is not a problem. """ - # defined as "niceness=" - ConfigurationInit(os.path.join(TESTS, "config", "missing_niceness.conf"), "sample.json") + self.mangle(change={"niceness": ""}) + ConfigurationInit(TESTCONF, "sample.json") - def test_13b_config_bogus_niceness(self): + def test_13c_niceness_bogus(self): """ Ensure that config with bogus (non-int) niceness is detected. """ + self.mangle(change={"niceness": "BOGUS"}) with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "bogus_niceness.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - def test_14a_config_missing_concurrent_hashes(self): + def test_13d_niceness_negative(self): + """ + Ensure that config with bogus (non-int) niceness is detected. + """ + self.mangle(change={"niceness": "-1"}) + with self.assertRaises(Exception) as err: + ConfigurationInit(TESTCONF, "sample.json") + assert "'niceness' must be greater or equal to 0" in "{0}".format(err.exception.args[0]) + + def test_14a_concurrent_hashes_missing(self): + """ + Ensure that config with missing concurrent_hashes uses default. + """ + check = globals.g_max_hashes + + self.mangle(change={"concurrent_hashes": None}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(check, globals.g_max_hashes) + + def test_14b_concurrent_hashes_empty(self): """ Ensure that config with missing concurrent_hashes uses default. """ check = globals.g_max_hashes - # defined as "concurrent_hashes=" - ConfigurationInit(os.path.join(TESTS, "config", "missing_concurrent_hashes.conf"), "sample.json") + self.mangle(change={"concurrent_hashes": ""}) + ConfigurationInit(TESTCONF, "sample.json") self.assertEqual(check, globals.g_max_hashes) - def test_14b_config_bogus_concurrent_hashes(self): + def test_14c_concurrent_hashes_bogus(self): """ Ensure that config with bogus (non-int) concurrent_hashes is detected. """ + self.mangle(change={"concurrent_hashes": "BOGUS"}) with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "bogus_concurrent_hashes.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - def test_15a_config_missing_disable_rescan(self): + def test_15a_disable_rescan_missing(self): """ - Ensure that config with missing disable_rescan is detected. + Ensure that config with missing disable_rescan is replaced with default """ - globals.g_disable_rescan = None + check = globals.g_disable_rescan - # defined as "disable_rescan=" - ConfigurationInit(os.path.join(TESTS, "config", "missing_disable_rescan.conf"), "sample.json") - self.assertFalse(globals.g_disable_rescan) + self.mangle(change={"disable_rescan": None}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(check, globals.g_disable_rescan) - def test_15b_config_bogus_disable_rescan(self): + def test_15b_disable_rescan_empty(self): """ - Ensure that config with bogus (non-bool) disable_rescan is detected. + Ensure that config with missing disable_rescan is replaced with default """ - globals.g_disable_rescan = None + check = globals.g_disable_rescan - # Not true, false, yes, no + self.mangle(change={"disable_rescan": None}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(check, globals.g_disable_rescan) + + def test_15c_disable_rescan_bogus(self): + """ + Ensure that config with bogus (non-bool) disable_rescan is detected. + """ + self.mangle(change={"disable_rescan": "BOGUS"}) with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "bogus_disable_rescan.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "is not a valid boolean value" in "{0}".format(err.exception.args[0]) - def test_16a_config_missing_num_days_binaries(self): + def test_16a_num_days_binaries_missing(self): """ Ensure that config with missing num_days_binaries reverts to default """ check = globals.g_num_days_binaries - # defined as "num_days_binaries=" - ConfigurationInit(os.path.join(TESTS, "config", "missing_num_days_binaries.conf"), "sample.json") + self.mangle(change={"num_days_binaries": None}) + ConfigurationInit(TESTCONF, "sample.json") self.assertEqual(check, globals.g_num_days_binaries) - def test_16b_config_bogus_num_days_binaries(self): + def test_16b_num_days_binaries_empty(self): + """ + Ensure that config with empty num_days_binaries reverts to default + """ + check = globals.g_num_days_binaries + + self.mangle(change={"num_days_binaries": ""}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(check, globals.g_num_days_binaries) + + def test_16c_num_days_binaries_bogus(self): """ Ensure that config with bogus (non-int) num_days_binaries is detected. """ + self.mangle(change={"num_days_binaries": "BOGUS"}) with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "bogus_num_days_binaries.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - def test_17a_config_bogus_vacuum_interval(self): + def test_17a_utility_interval_missing(self): """ - Ensure that config with bogus (non-int) vacuum_interval is detected. + Ensure that missing utility_interval uses the default. """ + check = globals.g_utility_interval + + self.mangle(change={"utility_interval": None}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(check, globals.g_utility_interval) + + def test_17b_utility_interval_empty(self): + """ + Ensure that empty utility_interval uses the default. + """ + check = globals.g_utility_interval + + self.mangle(change={"utility_interval": ""}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(check, globals.g_utility_interval) + + def test_17c_utility_interval_bogus(self): + """ + Ensure that config with bogus (non-int) utility_interval is detected. + """ + self.mangle(change={"utility_interval": "BOGUS"}) with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "bogus_vacuum_interval.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - def test_17b_config_negative_vacuum_interval(self): + def test_17d_utility_interval_negative(self): """ - Ensure that config with bogus (non-int) vacuum_interval is detected. + Ensure that config with negative utility_interval is detected. """ - globals.g_vacuum_interval = None - ConfigurationInit(os.path.join(TESTS, "config", "negative_vacuum_interval.conf"), "sample.json") - self.assertEqual(0, globals.g_vacuum_interval) + self.mangle(change={"utility_interval": "-10"}) + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(TESTCONF, "sample.json") + assert "'utility_interval' must be greater or equal to 0" in "{0}".format(err.exception.args[0]) - def test_18a_config_missing_vacuum_script(self): + def test_18a_utility_script_not_exist(self): """ - Ensure that config with missing vacuum_script is detected. + Ensure that config with non-existing utility_script is detected. """ + self.mangle(change={"utility_script": "no-such-script.sh", "utility_interval": "10"}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "no_such_vacuum_script.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "does not exist" in "{0}".format(err.exception.args[0]) - def test_18b_config_bogus_vacuum_script_is_dir(self): + def test_18b_utility_script_is_dir(self): """ - Ensure that config with vacuum_script as directory is detected. + Ensure that config with utility_script as directory is detected. """ + self.mangle(change={"utility_script": TESTS, "utility_interval": "10"}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "vacuum_script_dir.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "is a directory" in "{0}".format(err.exception.args[0]) - def test_19a_config_vacuum_script_enabled(self): + def test_18c_utility_script_missing(self): + """ + Ensure that config with missing utility_script with positive interval is nullified. + """ + self.mangle(change={"utility_script": None, "utility_interval": "10"}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(0, globals.g_utility_interval) + self.assertEqual("", globals.g_utility_script) + + def test_18d_utility_script_empty(self): + """ + Ensure that config with empty utility_script with positive interval is nullified. + """ + self.mangle(change={"utility_script": "", "utility_interval": "10"}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(0, globals.g_utility_interval) + self.assertEqual("", globals.g_utility_script) + + def test_19a_utility_script_enabled(self): """ - Ensure that config with vacuum_script and vacuum_interval is ready to go. + Ensure that config with utility_script and utility_interval is ready to go. """ - globals.g_vacuum_interval = None - globals.g_vacuum_script = None - ConfigurationInit(os.path.join(TESTS, "config", "vacuum_script_enabled.conf"), "sample.json") - self.assertEqual(360, globals.g_vacuum_interval) - self.assertTrue(globals.g_vacuum_script.endswith("/scripts/vacuumscript.sh")) + self.mangle(change={"utility_script": "../scripts/vacuumscript.sh", "utility_interval": "10"}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(10, globals.g_utility_interval) + self.assertTrue(globals.g_utility_script.endswith("/scripts/vacuumscript.sh")) - def test_19a_config_vacuum_script_and_no_vacuum_interval(self): + def test_19b_utility_script_and_no_utility_interval(self): """ - Ensure that config with vacuum_script but vacuum_interval == 0 has it disabled. + Ensure that config with utility_script but utility_interval == 0 has it disabled. """ - globals.g_vacuum_interval = None - globals.g_vacuum_script = None - ConfigurationInit(os.path.join(TESTS, "config", "vacuum_script_no_interval.conf"), "sample.json") - self.assertEqual(0, globals.g_vacuum_interval) - self.assertIsNone(globals.g_vacuum_script) + self.mangle(change={"utility_script": "../scripts/vacuumscript.sh", "utility_interval": "0"}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(0, globals.g_utility_interval) + self.assertEqual("", globals.g_utility_script) - def test_20a_config_feed_database_dir_not_exists(self): + def test_20a_feed_database_dir_not_exists(self): """ Ensure that config with feed database directory that does not exist will create that directory. """ - path = os.path.abspath("./no-such-directory") + path = os.path.abspath("./no-such-feed-directory") if os.path.exists(path): os.rmdir(path) try: - ConfigurationInit(os.path.join(TESTS, "config", "missing_feed_database_dir.conf"), "sample.json") + self.mangle(change={"feed_database_dir": path}) + ConfigurationInit(TESTCONF, "sample.json") self.assertTrue(os.path.exists(path)) finally: if os.path.exists(path): os.rmdir(path) - def test_20b_config_feed_database_dir_not_directory(self): + def test_20b_feed_database_dir_not_directory(self): """ - Ensure that config with eed database directory that is not a directory is detected. + Ensure that config with feed database directory that is not a directory is detected. """ + self.mangle(change={"feed_database_dir": TESTCONF}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "bogus_feed_database_dir.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "is not a directory" in "{0}".format(err.exception.args[0]) def test_21_config_malformed_parameter(self): """ Ensure that config with malformed parameter is detected """ + self.mangle(change={"utility_interval": "1%"}) with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(os.path.join(TESTS, "config", "malformed_param.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "cannot be parsed" in "{0}".format(err.exception.args[0]) - def test_22a_config_missing_worker_network_timeout(self): + def test_22a_worker_network_timeout_missing(self): """ Ensure that config with missing worker_network_timeout reverts to default """ check = globals.g_worker_network_timeout - # defined as "num_days_binaries=" - ConfigurationInit(os.path.join(TESTS, "config", "missing_worker_network_timeout.conf"), "sample.json") + self.mangle(change={"worker_network_timeout": None}) + ConfigurationInit(TESTCONF, "sample.json") self.assertEqual(check, globals.g_worker_network_timeout) - def test_22b_config_bogus_worker_network_timeout(self): + def test_22b_worker_network_timeout_empty(self): + """ + Ensure that config with empty worker_network_timeout reverts to default + """ + check = globals.g_worker_network_timeout + + self.mangle(change={"worker_network_timeout": ""}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(check, globals.g_worker_network_timeout) + + def test_22c_worker_network_timeout_bogus(self): """ Ensure that config with bogus (non-int) worker_network_timeout is detected. """ + self.mangle(change={"worker_network_timeout": "BOGUS"}) with self.assertRaises(ValueError) as err: - ConfigurationInit(os.path.join(TESTS, "config", "bogus_worker_network_timeout.conf"), "sample.json") + ConfigurationInit(TESTCONF, "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + def test_23_config_unexpected_parameter(self): + """ + Ensure that config with unexpected parameter (typo?) is flagged + """ + self.mangle(add=["cb_server=https://localhost"]) # should be "cb_server_url" + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(TESTCONF, "sample.json") + assert "has unknown parameters: ['cb_server']" in "{0}".format(err.exception.args[0]) + # ----- Minimal validation (worker) def test_90_minimal_validation_effects(self): @@ -438,5 +782,5 @@ def test_90_minimal_validation_effects(self): Ensure that minimal caonfiguration does not set extra globals """ globals.g_postgres_host = None - ConfigurationInit(os.path.join(TESTS, "config", "valid.conf")) + ConfigurationInit(TESTCONF) self.assertIsNone(globals.g_postgres_host) From 2a342418e7f76b1bbb0506c1175d0a7448500388 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 5 Nov 2019 17:25:51 -0500 Subject: [PATCH 136/257] unit test tweaks, adding utility_debug change .gitignore to remove "local" --- .gitignore | 2 +- src/config_handling.py | 5 ++++- src/globals.py | 1 + src/main.py | 16 +++++----------- test/test_configInit.py | 38 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 49 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 8b11bd0..445a51e 100644 --- a/.gitignore +++ b/.gitignore @@ -43,4 +43,4 @@ config.ini binary.db # local storage not to be pushed up -local +#local diff --git a/src/config_handling.py b/src/config_handling.py index 7b52611..2ff3566 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -34,7 +34,7 @@ "postgres_password", "postgres_port", "postgres_username", - "utility_debug", # dev use only! + "utility_debug", # developer use only! "utility_interval", "utility_script", "worker_network_timeout", @@ -157,6 +157,9 @@ def _extended_check(self) -> None: if self._as_path("utility_script", required=False, default=globals.g_utility_script): logger.debug(f"{self.source} has 'utility_script' defined, but it is disabled") + # developer use only + globals.g_utility_debug = self._as_bool("utility_debug", default=False) + globals.g_feed_database_dir = self._as_path("feed_database_dir", required=True, is_dir=True, default=globals.g_feed_database_dir, create_if_needed=True) diff --git a/src/globals.py b/src/globals.py index 43b36ff..8aee961 100644 --- a/src/globals.py +++ b/src/globals.py @@ -39,5 +39,6 @@ g_utility_interval = 0 g_utility_script = "" +g_utility_debug = False # dev use only, reduces interval from minutes to seconds! g_worker_network_timeout = 5 diff --git a/src/main.py b/src/main.py index 57c38dd..537c35d 100644 --- a/src/main.py +++ b/src/main.py @@ -293,14 +293,10 @@ def get_binary_file_cursor(conn, start_date_binaries): def execute_script() -> None: """ - Execute a external maintenence script (vacuum script). + Execute a external utility script. """ - logger.info( - "----- Executing vacuum script ----------------------------------------" - ) - prog = subprocess.Popen( - globals.g_utility_script, shell=True, universal_newlines=True - ) + logger.info("----- Executing utility script ----------------------------------------") + prog = subprocess.Popen(globals.g_utility_script, shell=True, universal_newlines=True) stdout, stderr = prog.communicate() if stdout is not None and len(stdout.strip()) > 0: logger.info(stdout) @@ -308,9 +304,7 @@ def execute_script() -> None: logger.error(stderr) if prog.returncode: logger.warning(f"program returned error code {prog.returncode}") - logger.info( - "---------------------------------------- Vacuum script completed -----\n" - ) + logger.info("---------------------------------------- Utility script completed -----\n") def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): @@ -349,7 +343,7 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): if globals.g_utility_interval > 0: seconds_since_start = (datetime.now() - utility_window_start).seconds - if seconds_since_start >= globals.g_utility_interval * 60: + if seconds_since_start >= globals.g_utility_interval * 60 if not globals.g_utility_debug else 1: # close connection execute_script() utility_window_start = datetime.now() diff --git a/test/test_configInit.py b/test/test_configInit.py index 0015103..6829f1b 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -35,6 +35,7 @@ utility_interval=360 utility_script=../scripts/vacuumscript.sh +utility_debug=false feed_database_dir=./feed_db @@ -71,6 +72,7 @@ def setUp(self) -> None: globals.g_num_days_binaries = 365 globals.g_utility_interval = 0 globals.g_utility_script = "" + globals.g_utility_debug = False globals.g_feed_database_dir = "./feed_db" globals.g_worker_network_timeout = 5 @@ -775,6 +777,42 @@ def test_23_config_unexpected_parameter(self): ConfigurationInit(TESTCONF, "sample.json") assert "has unknown parameters: ['cb_server']" in "{0}".format(err.exception.args[0]) + def test_24a_utility_debug_missing(self): + """ + Ensure that config with missing utility_debug is always false. + """ + self.mangle(change={"utility_debug": None}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertFalse(globals.g_utility_debug) + + def test_24b_utility_debug_empty(self): + """ + Ensure that config with empty utility_debug is always false. + """ + self.mangle(change={"utility_debug": ""}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertFalse(globals.g_utility_debug) + + def test_24c_utility_debug_bogus(self): + """ + Ensure that config with bogus (non-bool) utility_debug is detected. + """ + self.mangle(change={"utility_debug": "BOGUS"}) + with self.assertRaises(ValueError) as err: + ConfigurationInit(TESTCONF, "sample.json") + assert "is not a valid boolean value" in "{0}".format(err.exception.args[0]) + + def test_24d_utility_debug_empty_global_changed(self): + """ + Ensure that config with empty utility_debug is always false, even if the globals are altered! + """ + globals.g_utility_debug = True + + self.mangle(change={"utility_debug": ""}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertFalse(globals.g_utility_debug) + + # ----- Minimal validation (worker) def test_90_minimal_validation_effects(self): From 29ee5ac1f28e6f48e01751415846b67de0f048b4 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 5 Nov 2019 17:30:06 -0500 Subject: [PATCH 137/257] Removed unused "local" --- .gitignore | 3 --- local/README.md | 7 ------- local/yara_rules/README.md | 8 -------- src/celery_app.py | 6 +++++- src/main.py | 26 ++++++++++++-------------- src/tasks.py | 5 ++--- 6 files changed, 19 insertions(+), 36 deletions(-) delete mode 100644 local/README.md delete mode 100644 local/yara_rules/README.md diff --git a/.gitignore b/.gitignore index 445a51e..bb9e8b3 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,3 @@ config.ini # created local DB binary.db - -# local storage not to be pushed up -#local diff --git a/local/README.md b/local/README.md deleted file mode 100644 index 45d2f49..0000000 --- a/local/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# Yara Package Local Storage -This folder is here for storing any locally created artifacts. It is marked with `.gitignore` -so that any files here are not pushed to the remote repository. - -Enter `{YARA}/local` for any configuration path that that you wish to use this location, -using subfolders for clearer organization. One subfolder, `yara_rules` is already provided -for local rule files. diff --git a/local/yara_rules/README.md b/local/yara_rules/README.md deleted file mode 100644 index 2c16b18..0000000 --- a/local/yara_rules/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# local.yara_rules -This folder can be used a convenient location to locate your Yara rules. It can be defined -in your configuration file as: -```ini -yara_rules_dir={YARA}/local/yara_rules -``` - -It is suggested that subfolders be used to organize any complex and differing rule sets. diff --git a/src/celery_app.py b/src/celery_app.py index 9840418..5fde742 100644 --- a/src/celery_app.py +++ b/src/celery_app.py @@ -1,3 +1,7 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + + from celery import Celery @@ -7,4 +11,4 @@ # noinspection PyUnusedName app.conf.result_serializer = "pickle" # noinspection PyUnusedName -app.conf.accept_content = {"pickle"} \ No newline at end of file +app.conf.accept_content = {"pickle"} diff --git a/src/main.py b/src/main.py index 537c35d..7fb6606 100644 --- a/src/main.py +++ b/src/main.py @@ -280,8 +280,8 @@ def get_binary_file_cursor(conn, start_date_binaries): # noinspection SqlDialectInspection,SqlNoDataSourceInspection query = ( - "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND " - + "timestamp >= '{0}' ORDER BY timestamp DESC".format(start_date_binaries) + "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND " + + "timestamp >= '{0}' ORDER BY timestamp DESC".format(start_date_binaries) ) logger.debug(query) @@ -331,20 +331,18 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): # Closing since there are no more binaries of interest to scan cur.close() conn.commit() - num_total_binaries = len(rows) logger.info(f"Enumerating modulestore...found {len(rows)} resident binaries") md5_hashes = filter(_check_hash_against_feed, (row[0].hex() for row in rows)) - #logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") + # logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) if globals.g_utility_interval > 0: seconds_since_start = (datetime.now() - utility_window_start).seconds if seconds_since_start >= globals.g_utility_interval * 60 if not globals.g_utility_debug else 1: - # close connection execute_script() utility_window_start = datetime.now() @@ -379,7 +377,7 @@ def save_results_with_logging(analysis_results): # noinspection PyUnusedFunction def save_and_log( - analysis_results, start_time, num_binaries_skipped, num_total_binaries + analysis_results, start_time, num_binaries_skipped, num_total_binaries ): logger.debug(analysis_results) if analysis_results: @@ -398,7 +396,7 @@ def save_and_log( def _rule_logging( - start_time: float, num_binaries_skipped: int, num_total_binaries: int + start_time: float, num_binaries_skipped: int, num_total_binaries: int ) -> None: """ Simple method to log yara work. @@ -505,7 +503,7 @@ def wait_all_worker_exit(): def start_workers( - exit_event: Event, scanning_promises_queue: Queue, scanning_results_queue: Queue + exit_event: Event, scanning_promises_queue: Queue, scanning_results_queue: Queue ) -> None: """ Starts worker-threads (not celery workers). Worker threads do work until they get the exit_event signal @@ -542,12 +540,12 @@ class DatabaseScanningThread(Thread): """ def __init__( - self, - interval: int, - scanning_promises_queue: Queue, - exit_event: Event, - *args, - **kwargs, + self, + interval: int, + scanning_promises_queue: Queue, + exit_event: Event, + *args, + **kwargs, ): """ diff --git a/src/tasks.py b/src/tasks.py index 191f040..49d62d9 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -14,13 +14,12 @@ import requests # noinspection PyPackageRequirements import yara -from celery import bootsteps, Celery, group +from celery import bootsteps, group import globals from analysis_result import AnalysisResult -from config_handling import ConfigurationInit - from celery_app import app +from config_handling import ConfigurationInit logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) From 65327c7c1aab4b92e7d7c7da9517b66478270908 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 5 Nov 2019 18:00:19 -0500 Subject: [PATCH 138/257] makefile updates for rpm build --- makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/makefile b/makefile index 90b4419..3af21fe 100644 --- a/makefile +++ b/makefile @@ -10,9 +10,10 @@ rpm: mkdir -p ${BUILDDIR} mkdir -p ${SOURCEDIR}/src mkdir -p ${BUILDDIR}/init-scripts + mkdir -p ${BUILDDIR}/example-conf cp -rp src/* ${SOURCEDIR}/src cp -rp init-scripts/* ${BUILDDIR}/init-scripts - cp example-conf/yara.conf ${BUILDDIR} + cp example-conf/yara.conf ${BUILDDIR}/example-conf/yara.conf.example cp MANIFEST ${BUILDDIR} cp cb-yara-connector.spec ${SOURCEDIR}/cb-yara-connector.spec ls ${SOURCEDIR} From 7898039b03c24a2f5cf889eb6ba79f5800891f63 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 6 Nov 2019 08:07:25 -0500 Subject: [PATCH 139/257] RPM build touchups --- MANIFEST | 8 ++++---- cb-yara-connector.rpm.spec | 10 +++++----- init-scripts/yaraconnector.conf | 4 ++-- makefile | 3 +-- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/MANIFEST b/MANIFEST index 5f8315c..9cecb1b 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,5 +1,5 @@ -/usr/share/cb/integrations/yaraconnector/yaraconnector -%dir /usr/share/cb/integrations/yaraconnector -%dir /var/log/cb/integrations/yaraconnector +/usr/share/cb/integrations/cb-yara-connector/yaraconnector +%dir /usr/share/cb/integrations/cb-yara-connector +%dir /var/log/cb/integrations/cb-yara-connector /etc/init/yaraconnector.conf -/etc/cb/integrations/yaraconnector/yaraconnector.conf.example \ No newline at end of file +/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example \ No newline at end of file diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index b303c41..eabfa08 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -12,12 +12,12 @@ Carbon Black Yara Agent - Scans binaries with configured yara rules pyinstaller %{_sourcedir}/cb-yara-connector.spec %install -mkdir -p ${RPM_BUILD_ROOT}/var/log/cb/integrations/yaraconnector -mkdir -p ${RPM_BUILD_ROOT}/usr/share/cb/integrations/yaraconnector -mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/yaraconnector +mkdir -p ${RPM_BUILD_ROOT}/var/log/cb/integrations/cb-yara-connector +mkdir -p ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector +mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector mkdir -p ${RPM_BUILD_ROOT}/etc/init -cp example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/yaraconnector/yaraconnector.conf.example +cp example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example install -m 0755 init-scripts/yaraconnector.conf ${RPM_BUILD_ROOT}/etc/init/yaraconnector.conf -install -m 0755 dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/yaraconnector/yaraconnector +install -m 0755 dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/yaraconnector %files -f MANIFEST \ No newline at end of file diff --git a/init-scripts/yaraconnector.conf b/init-scripts/yaraconnector.conf index cd8dd52..310e220 100644 --- a/init-scripts/yaraconnector.conf +++ b/init-scripts/yaraconnector.conf @@ -7,7 +7,7 @@ stop on runlevel [!2345] respawn pre-start script - /usr/share/cb/integrations/yaraconnector/yaraconnector --lock-file /var/run/yaraconnector.pid --validate-rules --config-file /etc/cb/integrations/yaraconnector/yaraconnector.conf &> /var/log/cb/integrations/yaraconnector/yaraconnector_config_check.log + /usr/share/cb/integrations/cb-yara-connector/yaraconnector --lock-file /var/run/yaraconnector.pid --validate-rules --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf &> /var/log/cb/integrations/cb-yara-connector/yaraconnector_config_check.log end script -exec /usr/share/cb/integrations/yaraconnector/yaraconnector --lock-file /var/run/yaraconnector.pid --config-file /etc/cb/integrations/yaraconnector/yaraconnector.conf &> /var/log/cb/integrations/yaraconnector/yaraconnector.log \ No newline at end of file +exec /usr/share/cb/integrations/cb-yara-connector/yaraconnector --lock-file /var/run/yaraconnector.pid --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf &> /var/log/cb/integrations/cb-yara-connector/yaraconnector.log \ No newline at end of file diff --git a/makefile b/makefile index 3af21fe..7f3c549 100644 --- a/makefile +++ b/makefile @@ -13,8 +13,7 @@ rpm: mkdir -p ${BUILDDIR}/example-conf cp -rp src/* ${SOURCEDIR}/src cp -rp init-scripts/* ${BUILDDIR}/init-scripts - cp example-conf/yara.conf ${BUILDDIR}/example-conf/yara.conf.example + cp example-conf/yara.conf ${BUILDDIR}/example-conf/yara.conf cp MANIFEST ${BUILDDIR} cp cb-yara-connector.spec ${SOURCEDIR}/cb-yara-connector.spec - ls ${SOURCEDIR} rpmbuild -ba cb-yara-connector.rpm.spec \ No newline at end of file From d6d1d874ab374b28929da06150c89362c4642c40 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 6 Nov 2019 08:39:42 -0500 Subject: [PATCH 140/257] fix inclusion of globals.py in pyinstaller spec file --- cb-yara-connector.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cb-yara-connector.spec b/cb-yara-connector.spec index caba33f..6e747bd 100644 --- a/cb-yara-connector.spec +++ b/cb-yara-connector.spec @@ -6,7 +6,7 @@ block_cipher = None a = Analysis(['src/main.py'], - pathex=['.'], + pathex=['./src'], binaries=[], hiddenimports=['celery.fixups', 'celery.fixups.django', 'celery.loaders.app', 'celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends', From e920adbd5e6a581876adb7b971e3130b30c36098 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 6 Nov 2019 08:44:22 -0500 Subject: [PATCH 141/257] fixing scanning interval handling in config_handlers.py --- src/config_handling.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/config_handling.py b/src/config_handling.py index 2ff3566..84e7f85 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -40,6 +40,7 @@ "worker_network_timeout", "worker_type", "yara_rules_dir", + "database_scanning_interval", ] @@ -84,6 +85,9 @@ def __init__(self, config_file: str, output_file: str = None) -> None: except configparser.InterpolationSyntaxError as err: raise CbInvalidConfig(f"{self.source} cannot be parsed: {err}") + if 'database_scanning_interval' in self.the_config: + globals.g_scanning_interval = max(int(self.the_config['database_scanning_interval']), globals.g_scanning_interval) + # do the config checks self._worker_check() From 7dfbd97995a76a8da85f16fb1c4354cc5e9b4ee8 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 6 Nov 2019 09:01:01 -0500 Subject: [PATCH 142/257] Fixup pyinstaller spec" --- cb-yara-connector.spec | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cb-yara-connector.spec b/cb-yara-connector.spec index 6e747bd..9e6a2c4 100644 --- a/cb-yara-connector.spec +++ b/cb-yara-connector.spec @@ -8,10 +8,10 @@ block_cipher = None a = Analysis(['src/main.py'], pathex=['./src'], binaries=[], - hiddenimports=['celery.fixups', 'celery.fixups.django', 'celery.loaders.app', - 'celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends', - 'celery.backends.redis', 'celery.app.events', 'celery.events', - 'kombu.transport.pyamqp'], + hiddenimports=['celery.apps','celery.apps.worker','celery.fixups', 'celery.fixups.django', 'celery.loaders.app', + 'celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends', + 'celery.backends.redis', 'celery.app.events', 'celery.events','celery.redis', + 'kombu.transport.pyamqp'], hookspath=[], runtime_hooks=[], excludes=[], From 154e608365fd95535646d56cb2bfdebe186d3ddc Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 6 Nov 2019 08:27:14 -0500 Subject: [PATCH 143/257] Fixed logging glitch --- src/main.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main.py b/src/main.py index 7fb6606..c7ca391 100644 --- a/src/main.py +++ b/src/main.py @@ -681,12 +681,10 @@ def main(): ) handler.setFormatter(formatter) logger.addHandler(handler) - else: - use_log_file = None # Verify the configuration file and load up important global variables try: - ConfigurationInit(args.config_file, use_log_file) + ConfigurationInit(args.config_file, args.output_file) except Exception as err: logger.error(f"Unable to continue due to a configuration problem: {err}") sys.exit(2) From dbf078da6854559bd212970b24e5fe003e971d19 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 6 Nov 2019 09:28:24 -0500 Subject: [PATCH 144/257] makefile updates for rpmbuild --- makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/makefile b/makefile index 7f3c549..f97274a 100644 --- a/makefile +++ b/makefile @@ -9,9 +9,11 @@ rpm: mkdir -p ${SOURCEDIR} mkdir -p ${BUILDDIR} mkdir -p ${SOURCEDIR}/src + mkdir -p ${BUILDDIR}/src mkdir -p ${BUILDDIR}/init-scripts mkdir -p ${BUILDDIR}/example-conf cp -rp src/* ${SOURCEDIR}/src + cp -rp src/* ${BUILDDIR}/src cp -rp init-scripts/* ${BUILDDIR}/init-scripts cp example-conf/yara.conf ${BUILDDIR}/example-conf/yara.conf cp MANIFEST ${BUILDDIR} From 893698c460d4d6cf2191e7dd0018b58a1d0c697a Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 6 Nov 2019 09:29:58 -0500 Subject: [PATCH 145/257] fixing inti script rule validation --- init-scripts/yaraconnector.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init-scripts/yaraconnector.conf b/init-scripts/yaraconnector.conf index 310e220..21f6bb7 100644 --- a/init-scripts/yaraconnector.conf +++ b/init-scripts/yaraconnector.conf @@ -7,7 +7,7 @@ stop on runlevel [!2345] respawn pre-start script - /usr/share/cb/integrations/cb-yara-connector/yaraconnector --lock-file /var/run/yaraconnector.pid --validate-rules --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf &> /var/log/cb/integrations/cb-yara-connector/yaraconnector_config_check.log + /usr/share/cb/integrations/cb-yara-connector/yaraconnector --lock-file /var/run/yaraconnector.pid --validate-yara-rules --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf &> /var/log/cb/integrations/cb-yara-connector/yaraconnector_config_check.log end script exec /usr/share/cb/integrations/cb-yara-connector/yaraconnector --lock-file /var/run/yaraconnector.pid --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf &> /var/log/cb/integrations/cb-yara-connector/yaraconnector.log \ No newline at end of file From 7f4007fbc080da6081bb3833c275a429d0de1085 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 6 Nov 2019 09:48:31 -0500 Subject: [PATCH 146/257] Sample config text updates --- example-conf/yara.conf | 3 ++- samples/sample_local.conf | 26 ++++++++++++++---------- samples/sample_remote.conf | 24 ++++++++++++---------- src/config_handling.py | 8 ++++---- test/test_configInit.py | 41 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 76 insertions(+), 26 deletions(-) diff --git a/example-conf/yara.conf b/example-conf/yara.conf index f71e75f..e94145e 100644 --- a/example-conf/yara.conf +++ b/example-conf/yara.conf @@ -64,4 +64,5 @@ num_days_binaries=365 utility_interval=60 utility_script=scripts/vacuumscript.sh -database_scanning_interval=60 \ No newline at end of file + +database_scanning_interval=360 diff --git a/samples/sample_local.conf b/samples/sample_local.conf index b6d4b26..60421a9 100644 --- a/samples/sample_local.conf +++ b/samples/sample_local.conf @@ -1,9 +1,8 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Sample local worker config file ;; -;; Where seen, the placeholder {YARA} will be replaced by the script with -;; the location of yara package being used. You may also use "~" if you wish -;; to locate files or directories in your home folder +;; You may also use "~" if you wish to locate files or directories in your +;; home folder ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [general] @@ -18,7 +17,7 @@ cb_server_token= ; ; path to directory containing yara rules ; -yara_rules_dir={YARA}/local/yara_rules +yara_rules_dir=./yara_rules ; ; Cb Response postgres Database settings; insert posgres password as used in cb.conf for `postgres_password` @@ -52,18 +51,23 @@ disable_rescan=True num_days_binaries=365 ; -; The feed database directory is where locata database work files are stored. If the directory does not exist +; The feed database directory is where local database work files are stored. If the directory does not exist ; it will be created. ; -feed_database_dir={YARA}/local/feed_db +feed_database_dir=./feed_db + +; +; This can be used to adjust the interval (in seconds) at which the database is scanned. +; +database_scanning_interval=360 ; -; The use of the vacuum script is an ADVANCED FEATURE and should be used with caution! +; The use of the maintenance script is an ADVANCED FEATURE and should be used with caution! ; -; If "vacuum_interval" is greater than 0 it represents the interval in minutes after which the yara connector will -; pause to execute a shell script for databse maintenance. This can present risks. Be careful what you allow the +; If "utility_interval" is greater than 0 it represents the interval in minutes after which the yara connector will +; pause to execute a shell script for database maintenance. This can present risks. Be careful what you allow the ; script to do, and use this option at your own discretion. ; -vacuum_interval=-1 -vacuum_script={YARA}/scripts/vacuumscript.sh +utility_interval=-1 +utility_script=./scripts/vacuumscript.sh diff --git a/samples/sample_remote.conf b/samples/sample_remote.conf index 25930bd..3a8c5df 100644 --- a/samples/sample_remote.conf +++ b/samples/sample_remote.conf @@ -1,9 +1,8 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Sample remote worker config file ;; -;; Where seen, the placeholder {YARA} will be replaced by the script with -;; the location of yara package being used. You may also use "~" if you wish -;; to locate files or directories in your home folder +;; You may also use "~" if you wish to locate files or directories in your +;; home folder ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [general] @@ -18,7 +17,7 @@ broker_url=redis://127.0.0.1 ; ; path to directory containing yara rules ; -yara_rules_dir={YARA}/local/yara_rules +yara_rules_dir=./yara_rules ; ; Cb Response postgres Database settings; insert posgres password as used in cb.conf for `postgres_password` @@ -56,15 +55,20 @@ num_days_binaries=365 ; The feed database directory is where local database work files are stored. If the directory does not exist ; it will be created. ; -feed_database_dir={YARA}/local/feed_db +feed_database_dir=./feed_db + +; +; This can be used to adjust the interval (in seconds) at which the database is scanned. +; +database_scanning_interval=360 ; -; The use of the vacuum script is an ADVANCED FEATURE and should be used with caution! +; The use of the maintenance script is an ADVANCED FEATURE and should be used with caution! ; -; If "vacuum_interval" is greater than 0 it represents the interval in minutes after which the yara connector will -; pause to execute a shell script for databse maintenance. This can present risks. Be careful what you allow the +; If "utility_interval" is greater than 0 it represents the interval in minutes after which the yara connector will +; pause to execute a shell script for database maintenance. This can present risks. Be careful what you allow the ; script to do, and use this option at your own discretion. ; -vacuum_interval=-1 -vacuum_script={YARA}/scripts/vacuumscript.sh +utility_interval=-1 +utility_script=./scripts/vacuumscript.sh diff --git a/src/config_handling.py b/src/config_handling.py index 84e7f85..3e4a6d8 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -24,6 +24,7 @@ "cb_server_token", "cb_server_url", "concurrent_hashes", + "database_scanning_interval", "disable_rescan", "feed_database_dir", "mode", @@ -40,7 +41,6 @@ "worker_network_timeout", "worker_type", "yara_rules_dir", - "database_scanning_interval", ] @@ -85,9 +85,6 @@ def __init__(self, config_file: str, output_file: str = None) -> None: except configparser.InterpolationSyntaxError as err: raise CbInvalidConfig(f"{self.source} cannot be parsed: {err}") - if 'database_scanning_interval' in self.the_config: - globals.g_scanning_interval = max(int(self.the_config['database_scanning_interval']), globals.g_scanning_interval) - # do the config checks self._worker_check() @@ -167,6 +164,9 @@ def _extended_check(self) -> None: globals.g_feed_database_dir = self._as_path("feed_database_dir", required=True, is_dir=True, default=globals.g_feed_database_dir, create_if_needed=True) + globals.g_scanning_interval = self._as_int('database_scanning_interval', default=globals.g_scanning_interval, + min_value=360) + # ----- Type Handlers ------------------------------------------------------------ def _as_str(self, param: str, required: bool = False, default: str = "", allowed: List[str] = None) -> str: diff --git a/test/test_configInit.py b/test/test_configInit.py index 6829f1b..97a6845 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -40,6 +40,7 @@ feed_database_dir=./feed_db worker_network_timeout=5 +database_scanning_interval=360 """ @@ -75,6 +76,7 @@ def setUp(self) -> None: globals.g_utility_debug = False globals.g_feed_database_dir = "./feed_db" globals.g_worker_network_timeout = 5 + globals.g_scanning_interval = 360 with open(TESTCONF, "w") as fp: fp.write(BASE) @@ -812,6 +814,45 @@ def test_24d_utility_debug_empty_global_changed(self): ConfigurationInit(TESTCONF, "sample.json") self.assertFalse(globals.g_utility_debug) + def test_25a_database_scanning_interval_missing(self): + """ + Ensure that config with missing database_scanning_interval reverts to default + """ + check = globals.g_scanning_interval + + self.mangle(change={"database_scanning_interval": None}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(check, globals.g_scanning_interval) + + def test_25b_database_scanning_interval_empty(self): + """ + Ensure that config with empty database_scanning_interval reverts to default + """ + check = globals.g_scanning_interval + + self.mangle(change={"database_scanning_interval": ""}) + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(check, globals.g_scanning_interval) + + def test_25c_database_scanning_interval_bogus(self): + """ + Ensure that config with bogus (non-int) database_scanning_interval is detected. + """ + self.mangle(change={"database_scanning_interval": "BOGUS"}) + with self.assertRaises(ValueError) as err: + ConfigurationInit(TESTCONF, "sample.json") + assert "invalid literal for int" in "{0}".format(err.exception.args[0]) + + def test_25d_database_scanning_interval_below_minimum(self): + """ + Ensure that config with missing database_scanning_interval reverts to default + """ + check = globals.g_scanning_interval + + self.mangle(change={"database_scanning_interval": "18"}) + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(TESTCONF, "sample.json") + assert "'database_scanning_interval' must be greater or equal to 360" in "{0}".format(err.exception.args[0]) # ----- Minimal validation (worker) From 22b91a230718f5cf8b8aadf66ea5377d4bb3a935 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 6 Nov 2019 11:42:48 -0500 Subject: [PATCH 147/257] updates to the pyinstaller specfile for missed hidden imports --- cb-yara-connector.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cb-yara-connector.spec b/cb-yara-connector.spec index 9e6a2c4..98645bf 100644 --- a/cb-yara-connector.spec +++ b/cb-yara-connector.spec @@ -8,7 +8,7 @@ block_cipher = None a = Analysis(['src/main.py'], pathex=['./src'], binaries=[], - hiddenimports=['celery.apps','celery.apps.worker','celery.fixups', 'celery.fixups.django', 'celery.loaders.app', + hiddenimports=['celery.apps','celery.apps.worker','celery.app.log','celery.fixups', 'celery.fixups.django', 'celery.loaders.app', 'celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends', 'celery.backends.redis', 'celery.app.events', 'celery.events','celery.redis', 'kombu.transport.pyamqp'], From acfeae50c95d78719b6b2ff050f5fbf34c28e206 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 6 Nov 2019 15:52:16 -0500 Subject: [PATCH 148/257] Updates to support batch processing --- src/main.py | 168 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 104 insertions(+), 64 deletions(-) diff --git a/src/main.py b/src/main.py index c7ca391..e179478 100644 --- a/src/main.py +++ b/src/main.py @@ -21,9 +21,11 @@ import humanfriendly import lockfile import psycopg2 + # noinspection PyPackageRequirements import yara from celery.bin import worker + # noinspection PyPackageRequirements from daemon import daemon from peewee import SqliteDatabase @@ -55,7 +57,7 @@ def promise_worker(exit_event, scanning_promise_queue, scanning_results_queue): :return: """ try: - while not (exit_event.is_set()): + while not (exit_event.is_set()) and not (scanning_promise_queue.empty()): if not (scanning_promise_queue.empty()): try: promise = scanning_promise_queue.get(timeout=1.0) @@ -80,7 +82,7 @@ def results_worker(exit_event, results_queue): seen binaries/results from scans """ try: - while not (exit_event.is_set()): + while not (exit_event.is_set()) and not (results_queue.empty()): if not (results_queue.empty()): try: result = results_queue.get() @@ -103,7 +105,7 @@ def results_worker_chunked(exit_event, results_queue: Queue): :return: """ try: - while not (exit_event.is_set()): + while not (exit_event.is_set()) and not (results_queue.empty()): if not (results_queue.empty()): try: results = results_queue.get() @@ -280,8 +282,8 @@ def get_binary_file_cursor(conn, start_date_binaries): # noinspection SqlDialectInspection,SqlNoDataSourceInspection query = ( - "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND " - + "timestamp >= '{0}' ORDER BY timestamp DESC".format(start_date_binaries) + "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND " + + "timestamp >= '{0}' ORDER BY timestamp DESC".format(start_date_binaries) ) logger.debug(query) @@ -295,8 +297,12 @@ def execute_script() -> None: """ Execute a external utility script. """ - logger.info("----- Executing utility script ----------------------------------------") - prog = subprocess.Popen(globals.g_utility_script, shell=True, universal_newlines=True) + logger.info( + "----- Executing utility script ----------------------------------------" + ) + prog = subprocess.Popen( + globals.g_utility_script, shell=True, universal_newlines=True + ) stdout, stderr = prog.communicate() if stdout is not None and len(stdout.strip()) > 0: logger.info(stdout) @@ -304,7 +310,9 @@ def execute_script() -> None: logger.error(stderr) if prog.returncode: logger.warning(f"program returned error code {prog.returncode}") - logger.info("---------------------------------------- Utility script completed -----\n") + logger.info( + "---------------------------------------- Utility script completed -----\n" + ) def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): @@ -342,7 +350,11 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): if globals.g_utility_interval > 0: seconds_since_start = (datetime.now() - utility_window_start).seconds - if seconds_since_start >= globals.g_utility_interval * 60 if not globals.g_utility_debug else 1: + if ( + seconds_since_start >= globals.g_utility_interval * 60 + if not globals.g_utility_debug + else 1 + ): execute_script() utility_window_start = datetime.now() @@ -377,7 +389,7 @@ def save_results_with_logging(analysis_results): # noinspection PyUnusedFunction def save_and_log( - analysis_results, start_time, num_binaries_skipped, num_total_binaries + analysis_results, start_time, num_binaries_skipped, num_total_binaries ): logger.debug(analysis_results) if analysis_results: @@ -396,7 +408,7 @@ def save_and_log( def _rule_logging( - start_time: float, num_binaries_skipped: int, num_total_binaries: int + start_time: float, num_binaries_skipped: int, num_total_binaries: int ) -> None: """ Simple method to log yara work. @@ -503,7 +515,10 @@ def wait_all_worker_exit(): def start_workers( - exit_event: Event, scanning_promises_queue: Queue, scanning_results_queue: Queue + exit_event: Event, + scanning_promises_queue: Queue, + scanning_results_queue: Queue, + run_only_once=False, ) -> None: """ Starts worker-threads (not celery workers). Worker threads do work until they get the exit_event signal @@ -513,7 +528,7 @@ def start_workers( """ logger.debug("Starting perf thread") perf_thread = DatabaseScanningThread( - globals.g_scanning_interval, scanning_promises_queue, exit_event + globals.g_scanning_interval, scanning_promises_queue, exit_event, run_only_once ) perf_thread.start() @@ -540,12 +555,13 @@ class DatabaseScanningThread(Thread): """ def __init__( - self, - interval: int, - scanning_promises_queue: Queue, - exit_event: Event, - *args, - **kwargs, + self, + interval: int, + scanning_promises_queue: Queue, + exit_event: Event, + run_only_once, + *args, + **kwargs, ): """ @@ -563,7 +579,15 @@ def __init__( self._conn = get_database_conn() self._interval = interval self._scanning_promises_queue = scanning_promises_queue - self._target = self.scan_until_exit + self._run_only_once = run_only_once + if not (self._run_only_once): + self._target = self.scan_until_exit + else: + self._target = self.scan_once_and_exit + + def scan_once_and_exit(self): + self.do_db_scan() + self.exit_event.set() def scan_until_exit(self): # TODO: DRIFT @@ -649,6 +673,9 @@ def handle_arguments(): parser.add_argument( "--lock-file", default="./yaraconnector", help="lock file", required=False ) + parser.add_argument( + "--run-once", default=False, help="Run as batch mode or no", required=False + ) parser.add_argument( "--validate-yara-rules", action="store_true", @@ -702,54 +729,67 @@ def main(): exit_event = Event() try: - working_dir = os.path.abspath(os.path.expanduser(args.working_dir)) - - lock_file = lockfile.FileLock(args.lock_file) - - files_preserve = get_log_file_handles(logger) - files_preserve.extend([args.lock_file, args.log_file, args.output_file]) - - # defauls to piping to /dev/null - - deamon_kwargs = { - "working_directory": working_dir, - "pidfile": lock_file, - "files_preserve": files_preserve, - } - if args.debug: - deamon_kwargs.update({"stdout": sys.stdout, "stderr": sys.stderr}) - context = daemon.DaemonContext(**deamon_kwargs) - - run_as_master = globals.g_mode == "master" - - scanning_promise_queue = Queue() - scanning_results_queue = Queue() - - sig_handler = partial(handle_sig, exit_event) - - context.signal_map = { - signal.SIGTERM: sig_handler, - signal.SIGQUIT: sig_handler, - } - - with context: - # only connect to cbr if we're the master - if run_as_master: - init_local_resources() - start_workers( - exit_event, scanning_promise_queue, scanning_results_queue - ) - # start local celery if working mode is local - if not globals.g_remote: + if not args.run_once: + working_dir = os.path.abspath(os.path.expanduser(args.working_dir)) + + lock_file = lockfile.FileLock(args.lock_file) + + files_preserve = get_log_file_handles(logger) + files_preserve.extend([args.lock_file, args.log_file, args.output_file]) + + # defauls to piping to /dev/null + + deamon_kwargs = { + "working_directory": working_dir, + "pidfile": lock_file, + "files_preserve": files_preserve, + } + if args.debug: + deamon_kwargs.update({"stdout": sys.stdout, "stderr": sys.stderr}) + context = daemon.DaemonContext(**deamon_kwargs) + + run_as_master = globals.g_mode == "master" + + scanning_promise_queue = Queue() + scanning_results_queue = Queue() + + sig_handler = partial(handle_sig, exit_event) + + context.signal_map = { + signal.SIGTERM: sig_handler, + signal.SIGQUIT: sig_handler, + } + + with context: + # only connect to cbr if we're the master + if run_as_master: + init_local_resources() + start_workers( + exit_event, scanning_promise_queue, scanning_results_queue + ) + # start local celery if working mode is local + if not globals.g_remote: + start_celery_worker_thread(args.config_file) + else: + # otherwise, we must start a worker since we are not the master start_celery_worker_thread(args.config_file) - else: - # otherwise, we must start a worker since we are not the master - start_celery_worker_thread(args.config_file) - # run until the service/daemon gets a quitting sig + # run until the service/daemon gets a quitting sig + run_to_exit_signal(exit_event) + wait_all_worker_exit() + logger.info("Yara connector shutdown OK") + else: # Just do one batch + init_local_resources() + start_workers( + exit_event, + scanning_promise_queue, + scanning_results_queue, + run_only_once=True, + ) + if not globals.g_remote: + start_celery_worker_thread(args.config_file) run_to_exit_signal(exit_event) wait_all_worker_exit() - logger.info("Yara connector shutdown OK") except KeyboardInterrupt: logger.info("\n\n##### Interupted by User!\n") From 101d877e62688210fb6b487ebea1d295993cb61c Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 6 Nov 2019 15:59:51 -0500 Subject: [PATCH 149/257] Fixing boolean logic --- src/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.py b/src/main.py index e179478..dd469ac 100644 --- a/src/main.py +++ b/src/main.py @@ -57,7 +57,7 @@ def promise_worker(exit_event, scanning_promise_queue, scanning_results_queue): :return: """ try: - while not (exit_event.is_set()) and not (scanning_promise_queue.empty()): + while not (exit_event.is_set()) or not (scanning_promise_queue.empty()): if not (scanning_promise_queue.empty()): try: promise = scanning_promise_queue.get(timeout=1.0) @@ -82,7 +82,7 @@ def results_worker(exit_event, results_queue): seen binaries/results from scans """ try: - while not (exit_event.is_set()) and not (results_queue.empty()): + while not (exit_event.is_set()) or not (results_queue.empty()): if not (results_queue.empty()): try: result = results_queue.get() @@ -105,7 +105,7 @@ def results_worker_chunked(exit_event, results_queue: Queue): :return: """ try: - while not (exit_event.is_set()) and not (results_queue.empty()): + while not (exit_event.is_set()) or not (results_queue.empty()): if not (results_queue.empty()): try: results = results_queue.get() From cad498282b9ff18185c9d79d4dcac1d89d496d41 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 6 Nov 2019 16:13:47 -0500 Subject: [PATCH 150/257] touch up --- src/main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index dd469ac..e03c6ae 100644 --- a/src/main.py +++ b/src/main.py @@ -727,6 +727,8 @@ def main(): sys.exit(5) else: exit_event = Event() + scanning_promise_queue = Queue() + scanning_results_queue = Queue() try: if not args.run_once: @@ -750,8 +752,7 @@ def main(): run_as_master = globals.g_mode == "master" - scanning_promise_queue = Queue() - scanning_results_queue = Queue() + sig_handler = partial(handle_sig, exit_event) From 616aff72065357d024b4e3c761bb245d66890798 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 7 Nov 2019 08:34:12 -0500 Subject: [PATCH 151/257] updating --- src/main.py | 52 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/src/main.py b/src/main.py index e03c6ae..edc88c9 100644 --- a/src/main.py +++ b/src/main.py @@ -62,6 +62,7 @@ def promise_worker(exit_event, scanning_promise_queue, scanning_results_queue): try: promise = scanning_promise_queue.get(timeout=1.0) result = promise.get(disable_sync_subtasks=False) + scanning_promise_queue.task_done() scanning_results_queue.put(result) except Empty: exit_event.wait(1) @@ -87,6 +88,7 @@ def results_worker(exit_event, results_queue): try: result = results_queue.get() save_results_with_logging(result) + results_queue.task_done() except Empty: exit_event.wait(1) else: @@ -110,6 +112,7 @@ def results_worker_chunked(exit_event, results_queue: Queue): try: results = results_queue.get() save_results(results) + results_queue.task_done() except Empty: exit_event.wait(1) else: @@ -528,7 +531,7 @@ def start_workers( """ logger.debug("Starting perf thread") perf_thread = DatabaseScanningThread( - globals.g_scanning_interval, scanning_promises_queue, exit_event, run_only_once + globals.g_scanning_interval, scanning_promises_queue, scanning_results_queue, exit_event, run_only_once ) perf_thread.start() @@ -558,8 +561,9 @@ def __init__( self, interval: int, scanning_promises_queue: Queue, + scanning_results_queue: Queue, exit_event: Event, - run_only_once, + run_only_once : bool, *args, **kwargs, ): @@ -579,6 +583,7 @@ def __init__( self._conn = get_database_conn() self._interval = interval self._scanning_promises_queue = scanning_promises_queue + self._scanning_results_queue = scanning_results_queue self._run_only_once = run_only_once if not (self._run_only_once): self._target = self.scan_until_exit @@ -587,6 +592,8 @@ def __init__( def scan_once_and_exit(self): self.do_db_scan() + self._scanning_promises_queue.join() + self._scanning_results_queue.join() self.exit_event.set() def scan_until_exit(self): @@ -655,27 +662,36 @@ def handle_arguments(): """ parser = argparse.ArgumentParser(description="Yara Agent for Yara Connector") + + #Controls config file (ini) parser.add_argument( "--config-file", required=True, default="yaraconnector.conf", help="Location of the config file", ) + + #Controls log file location+name parser.add_argument( "--log-file", default="yaraconnector.log", help="Log file output" ) + #Controls the output feed location+name parser.add_argument( "--output-file", default="yara_feed.json", help="output feed file" ) + #Controls the working directory parser.add_argument( "--working-dir", default=".", help="working directory", required=False ) + #Controls the lock File parser.add_argument( "--lock-file", default="./yaraconnector", help="lock file", required=False ) + #Controls batch vs continous mode , defaults to batch processing parser.add_argument( - "--run-once", default=False, help="Run as batch mode or no", required=False + "--run-once", default=True, help="Run as batch mode or no", required=False ) + #Validates the rules parser.add_argument( "--validate-yara-rules", action="store_true", @@ -725,17 +741,25 @@ def main(): except Exception as err: logger.error(f"There were errors compiling yara rules: {err}") sys.exit(5) - else: + else: #Doing a real run + + #Exit condition and queues for doing work exit_event = Event() scanning_promise_queue = Queue() scanning_results_queue = Queue() + #Lock file so this process is a singleton + lock_file = lockfile.FileLock(args.lock_file) try: if not args.run_once: + + #Running as a deamon + + #Get working dir setting working_dir = os.path.abspath(os.path.expanduser(args.working_dir)) - lock_file = lockfile.FileLock(args.lock_file) + #Mark files to be preserved files_preserve = get_log_file_handles(logger) files_preserve.extend([args.lock_file, args.log_file, args.output_file]) @@ -746,33 +770,40 @@ def main(): "pidfile": lock_file, "files_preserve": files_preserve, } + #If in debug mode, make sure stdout and stderr don't got to /dev/null if args.debug: deamon_kwargs.update({"stdout": sys.stdout, "stderr": sys.stderr}) + + context = daemon.DaemonContext(**deamon_kwargs) + + #Operating mode - are we the master a worker? run_as_master = globals.g_mode == "master" - + #Signal handler partial function sig_handler = partial(handle_sig, exit_event) - context.signal_map = { signal.SIGTERM: sig_handler, signal.SIGQUIT: sig_handler, } + #Make sure we close the deamon context at the end with context: # only connect to cbr if we're the master if run_as_master: + #initialize local resources init_local_resources() + #start working threads start_workers( exit_event, scanning_promise_queue, scanning_results_queue ) - # start local celery if working mode is local + # start local celeryD worker if working mode is local if not globals.g_remote: start_celery_worker_thread(args.config_file) else: - # otherwise, we must start a worker since we are not the master + # otherwise, we must start a celeryD worker since we are not the master start_celery_worker_thread(args.config_file) # run until the service/daemon gets a quitting sig @@ -780,13 +811,16 @@ def main(): wait_all_worker_exit() logger.info("Yara connector shutdown OK") else: # Just do one batch + #init local resources init_local_resources() + #start necessary worker threads start_workers( exit_event, scanning_promise_queue, scanning_results_queue, run_only_once=True, ) + #Start a celery worker if we need one if not globals.g_remote: start_celery_worker_thread(args.config_file) run_to_exit_signal(exit_event) From beac612d4b799918e6a74ed99b83e7bc5d78bd40 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Thu, 7 Nov 2019 08:29:12 -0500 Subject: [PATCH 152/257] tweaks --- samples/sample_local.conf | 2 +- samples/sample_remote.conf | 2 +- test/test_configInit.py | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/samples/sample_local.conf b/samples/sample_local.conf index 60421a9..f55b613 100644 --- a/samples/sample_local.conf +++ b/samples/sample_local.conf @@ -69,5 +69,5 @@ database_scanning_interval=360 ; pause to execute a shell script for database maintenance. This can present risks. Be careful what you allow the ; script to do, and use this option at your own discretion. ; -utility_interval=-1 +utility_interval=0 utility_script=./scripts/vacuumscript.sh diff --git a/samples/sample_remote.conf b/samples/sample_remote.conf index 3a8c5df..f83934f 100644 --- a/samples/sample_remote.conf +++ b/samples/sample_remote.conf @@ -70,5 +70,5 @@ database_scanning_interval=360 ; pause to execute a shell script for database maintenance. This can present risks. Be careful what you allow the ; script to do, and use this option at your own discretion. ; -utility_interval=-1 +utility_interval=0 utility_script=./scripts/vacuumscript.sh diff --git a/test/test_configInit.py b/test/test_configInit.py index 97a6845..55d66d4 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -847,8 +847,6 @@ def test_25d_database_scanning_interval_below_minimum(self): """ Ensure that config with missing database_scanning_interval reverts to default """ - check = globals.g_scanning_interval - self.mangle(change={"database_scanning_interval": "18"}) with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(TESTCONF, "sample.json") From 9db9bfefd1d89691ddfad892ea1e2cf5ed4aa9ab Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Thu, 7 Nov 2019 08:48:41 -0500 Subject: [PATCH 153/257] test re-arrange --- test/test_configInit.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/test/test_configInit.py b/test/test_configInit.py index 55d66d4..015fa7f 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -770,16 +770,7 @@ def test_22c_worker_network_timeout_bogus(self): ConfigurationInit(TESTCONF, "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - def test_23_config_unexpected_parameter(self): - """ - Ensure that config with unexpected parameter (typo?) is flagged - """ - self.mangle(add=["cb_server=https://localhost"]) # should be "cb_server_url" - with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(TESTCONF, "sample.json") - assert "has unknown parameters: ['cb_server']" in "{0}".format(err.exception.args[0]) - - def test_24a_utility_debug_missing(self): + def test_23a_utility_debug_missing(self): """ Ensure that config with missing utility_debug is always false. """ @@ -787,7 +778,7 @@ def test_24a_utility_debug_missing(self): ConfigurationInit(TESTCONF, "sample.json") self.assertFalse(globals.g_utility_debug) - def test_24b_utility_debug_empty(self): + def test_23b_utility_debug_empty(self): """ Ensure that config with empty utility_debug is always false. """ @@ -795,7 +786,7 @@ def test_24b_utility_debug_empty(self): ConfigurationInit(TESTCONF, "sample.json") self.assertFalse(globals.g_utility_debug) - def test_24c_utility_debug_bogus(self): + def test_23c_utility_debug_bogus(self): """ Ensure that config with bogus (non-bool) utility_debug is detected. """ @@ -804,7 +795,7 @@ def test_24c_utility_debug_bogus(self): ConfigurationInit(TESTCONF, "sample.json") assert "is not a valid boolean value" in "{0}".format(err.exception.args[0]) - def test_24d_utility_debug_empty_global_changed(self): + def test_23d_utility_debug_empty_global_changed(self): """ Ensure that config with empty utility_debug is always false, even if the globals are altered! """ @@ -814,7 +805,7 @@ def test_24d_utility_debug_empty_global_changed(self): ConfigurationInit(TESTCONF, "sample.json") self.assertFalse(globals.g_utility_debug) - def test_25a_database_scanning_interval_missing(self): + def test_24a_database_scanning_interval_missing(self): """ Ensure that config with missing database_scanning_interval reverts to default """ @@ -824,7 +815,7 @@ def test_25a_database_scanning_interval_missing(self): ConfigurationInit(TESTCONF, "sample.json") self.assertEqual(check, globals.g_scanning_interval) - def test_25b_database_scanning_interval_empty(self): + def test_24b_database_scanning_interval_empty(self): """ Ensure that config with empty database_scanning_interval reverts to default """ @@ -834,7 +825,7 @@ def test_25b_database_scanning_interval_empty(self): ConfigurationInit(TESTCONF, "sample.json") self.assertEqual(check, globals.g_scanning_interval) - def test_25c_database_scanning_interval_bogus(self): + def test_24c_database_scanning_interval_bogus(self): """ Ensure that config with bogus (non-int) database_scanning_interval is detected. """ @@ -843,7 +834,7 @@ def test_25c_database_scanning_interval_bogus(self): ConfigurationInit(TESTCONF, "sample.json") assert "invalid literal for int" in "{0}".format(err.exception.args[0]) - def test_25d_database_scanning_interval_below_minimum(self): + def test_24d_database_scanning_interval_below_minimum(self): """ Ensure that config with missing database_scanning_interval reverts to default """ @@ -852,6 +843,17 @@ def test_25d_database_scanning_interval_below_minimum(self): ConfigurationInit(TESTCONF, "sample.json") assert "'database_scanning_interval' must be greater or equal to 360" in "{0}".format(err.exception.args[0]) + # ----- Unknown configuration (typo detection) + + def test_80_unexpected_parameter(self): + """ + Ensure that config with unexpected parameter (typo?) is flagged + """ + self.mangle(add=["cb_server=https://localhost"]) # should be "cb_server_url" + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(TESTCONF, "sample.json") + assert "has unknown parameters: ['cb_server']" in "{0}".format(err.exception.args[0]) + # ----- Minimal validation (worker) def test_90_minimal_validation_effects(self): From ea962c7bf26a1fb518f7eb8cc2361eb9057d44ec Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Thu, 7 Nov 2019 16:48:08 -0500 Subject: [PATCH 154/257] Cleaned up added rules handling module updated tests --- .gitignore | 7 + src/main.py | 490 +++++++++++++++++++-------------------- src/rule_handling.py | 36 +++ src/tasks.py | 111 ++++----- test/test_main.py | 13 +- test/test_ruleHandler.py | 24 ++ test/test_tasks.py | 6 +- 7 files changed, 351 insertions(+), 336 deletions(-) create mode 100644 src/rule_handling.py create mode 100644 test/test_ruleHandler.py diff --git a/.gitignore b/.gitignore index bb9e8b3..8860d09 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +################################################################################ +## Set of file patterns to prevent being checked into the repo +################################################################################ + *.py[cod] # C extensions @@ -41,3 +45,6 @@ config.ini # created local DB binary.db + +# incase inadvertantly left by unit testing +conf-testing.conf diff --git a/src/main.py b/src/main.py index edc88c9..01b7856 100644 --- a/src/main.py +++ b/src/main.py @@ -2,7 +2,6 @@ # Copyright © 2014-2019 VMware, Inc. All Rights Reserved. import argparse -import hashlib import json import logging import logging.handlers @@ -16,16 +15,14 @@ from functools import partial from queue import Empty, Queue from threading import Event, Thread -from typing import List +from typing import Iterator, List import humanfriendly import lockfile import psycopg2 - # noinspection PyPackageRequirements import yara from celery.bin import worker - # noinspection PyPackageRequirements from daemon import daemon from peewee import SqliteDatabase @@ -36,6 +33,7 @@ from celery_app import app from config_handling import ConfigurationInit from feed import CbFeed, CbFeedInfo, CbReport +from rule_handling import generate_yara_rule_map_hash from tasks import analyze_binary, generate_rule_map, update_yara_rules_remote logging_format = "%(asctime)s-%(name)s-%(lineno)d-%(levelname)s-%(message)s" @@ -47,14 +45,17 @@ celery_logger = logging.getLogger("celery.app.trace") celery_logger.setLevel(logging.ERROR) +# number of promise worker threads to use +PROMISE_THREADS = 2 -def promise_worker(exit_event, scanning_promise_queue, scanning_results_queue): + +def promise_worker(exit_event: Event, scanning_promise_queue: Queue, scanning_results_queue: Queue) -> None: """ + The promise worker scanning function. - :param exit_event: - :param scanning_promise_queue: - :param scanning_results_queue: - :return: + :param exit_event: event signaller + :param scanning_promise_queue: the promises queue + :param scanning_results_queue: the results queue """ try: while not (exit_event.is_set()) or not (scanning_promise_queue.empty()): @@ -74,13 +75,17 @@ def promise_worker(exit_event, scanning_promise_queue, scanning_results_queue): logger.debug("PROMISE WORKING EXITING") +# NOTE: function retained for possible future need. # noinspection PyUnusedFunction -def results_worker(exit_event, results_queue): +def results_worker(exit_event: Event, results_queue: Queue) -> None: """ Sqlite is not meant to be thread-safe. This single-worker-thread writes the result(s) to the configured sqlite file to hold the feed-metadata and - seen binaries/results from scans + seen binaries/results from scans. + + :param exit_event: event signaller + :param results_queue: the results queue """ try: while not (exit_event.is_set()) or not (results_queue.empty()): @@ -99,11 +104,12 @@ def results_worker(exit_event, results_queue): logger.debug("Results worker thread exiting") -def results_worker_chunked(exit_event, results_queue: Queue): +def results_worker_chunked(exit_event: Event, results_queue: Queue) -> None: """ + Prossess entries in the results queue in chunks. - :param exit_event: - :param results_queue: + :param exit_event: event signaller + :param results_queue: the results queue :return: """ try: @@ -125,8 +131,7 @@ def results_worker_chunked(exit_event, results_queue: Queue): def generate_feed_from_db() -> None: """ - Creates a feed based on specific database information. - :return: + Creates a feed based on specific database information and save to our output file. """ query = BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0) @@ -160,35 +165,11 @@ def generate_feed_from_db() -> None: fp.write(feed.dump()) -# noinspection DuplicatedCode -def generate_yara_rule_map_hash(yara_rule_path: str) -> None: - """ - Create a list of hashes for each yara rule. - - :param yara_rule_path: the path to where the yara rules are stored. - :return: - """ - temp_list = [] - for fn in os.listdir(yara_rule_path): - if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): - fullpath = os.path.join(yara_rule_path, fn) - if not os.path.isfile(fullpath): - continue - with open(os.path.join(yara_rule_path, fn), "rb") as fp: - data = fp.read() - md5 = hashlib.md5() - md5.update(data) - temp_list.append(str(md5.hexdigest())) - - globals.g_yara_rule_map_hash_list = temp_list - globals.g_yara_rule_map_hash_list.sort() - - -def generate_rule_map_remote(yara_rule_path) -> None: +def generate_rule_map_remote(yara_rule_path: str) -> None: """ Get remote rules and store into an internal map keyed by file name. - :param yara_rule_path: path to wheer thr rules are stored - :return: + + :param yara_rule_path: path to where the rules are stored """ ret_dict = {} for fn in os.listdir(yara_rule_path): @@ -205,44 +186,56 @@ def generate_rule_map_remote(yara_rule_path) -> None: time.sleep(0.1) -def analyze_binary_and_queue(scanning_promise_queue, md5sum): - """ Analyze Binary And Queue """ +def analyze_binary_and_queue(scanning_promise_queue: Queue, md5sum: str) -> None: + """ + Analyze Binary for a given md5 and save any promises. + :param scanning_promise_queue: the promises queue + :param md5sum: md5 hash to look for + """ promise = analyze_binary.delay(md5sum) scanning_promise_queue.put(promise) +# NOTE: function retained for possible future need. # noinspection PyUnusedFunction -def analyze_binaries_and_queue(scanning_promise_queue, md5_hashes): - """ Analyze each binary and enqueue """ - for h in md5_hashes: - analyze_binary_and_queue(scanning_promise_queue, h) +def analyze_binaries_and_queue(scanning_promise_queue: Queue, md5_hashes: List[str]) -> None: + """ + Analyze each binary and enqueue. + :param scanning_promise_queue: the promise queue + :param md5_hashes: list of md5 hashes to scan + """ + for md5 in md5_hashes: + analyze_binary_and_queue(scanning_promise_queue, md5) -def analyze_binaries_and_queue_chunked(scanning_promise_queue, md5_hashes): +def analyze_binaries_and_queue_chunked(scanning_promise_queue: Queue, md5_hashes: Iterator) -> None: """ - Attempts to do work in parrallelized chunks of MAX_HASHES grouped + Attempts to do work in parrallelized chunks of MAX_HASHES grouped. + + :param scanning_promise_queue: the promise queue + :param md5_hashes: list of md5 hases """ - promise = analyze_binary.chunks( - [(mh,) for mh in md5_hashes], globals.g_max_hashes - ).apply_async() + promise = analyze_binary.chunks([(mh,) for mh in md5_hashes], globals.g_max_hashes).apply_async() for prom in promise.children: scanning_promise_queue.put(prom) def save_results(analysis_results: List[AnalysisResult]) -> None: """ - Save the current analysis results. - - TODO: figure out typing! + Save the current set of analysis results. - :param analysis_results: - :return: + :param analysis_results: list of current analysis results """ for analysis_result in analysis_results: save_result(analysis_result) -def save_result(analysis_result): +def save_result(analysis_result: AnalysisResult) -> None: + """ + Save an individual analysis result. + + :param analysis_result: result to be saved + """ if analysis_result.binary_not_available: globals.g_num_binaries_not_available += 1 return @@ -266,6 +259,11 @@ def save_result(analysis_result): def get_database_conn(): + """ + Get a postgres database connection. + + :return: the connection + """ logger.info("Connecting to Postgres database...") conn = psycopg2.connect( host=globals.g_postgres_host, @@ -274,23 +272,28 @@ def get_database_conn(): password=globals.g_postgres_password, port=globals.g_postgres_port, ) - return conn -def get_binary_file_cursor(conn, start_date_binaries): +def get_binary_file_cursor(conn, start_date_binaries: datetime): + """ + Get the cursor index to the binaries. + + :param conn: the postgres connection + :param start_date_binaries: earliest start time for the search window (up to now) + :return: the results cursor + """ logger.debug("Getting database cursor...") cur = conn.cursor(name="yara_agent") # noinspection SqlDialectInspection,SqlNoDataSourceInspection query = ( - "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND " - + "timestamp >= '{0}' ORDER BY timestamp DESC".format(start_date_binaries) + "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND " + + "timestamp >= '{0}' ORDER BY timestamp DESC".format(start_date_binaries) ) logger.debug(query) - cur.execute(query) return cur @@ -298,11 +301,9 @@ def get_binary_file_cursor(conn, start_date_binaries): def execute_script() -> None: """ - Execute a external utility script. + Execute an external utility script. """ - logger.info( - "----- Executing utility script ----------------------------------------" - ) + logger.info("----- Executing utility script ----------------------------------------") prog = subprocess.Popen( globals.g_utility_script, shell=True, universal_newlines=True ) @@ -313,19 +314,17 @@ def execute_script() -> None: logger.error(stderr) if prog.returncode: logger.warning(f"program returned error code {prog.returncode}") - logger.info( - "---------------------------------------- Utility script completed -----\n" - ) + logger.info("---------------------------------------- Utility script completed -----\n") -def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): +def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue) -> None: """ Main routine - checks the cbr modulestore/storfiles table for new hashes by comparing the sliding-window with the contents of the feed database on disk. :param yara_rule_dir: location of the rules directory - :param conn: The connection (TODO: type) - :param scanning_promises_queue: + :param conn: The postgres connection + :param scanning_promises_queue: the promises queue """ if globals.g_remote: logger.info("Uploading yara rules to workers...") @@ -346,63 +345,63 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue): logger.info(f"Enumerating modulestore...found {len(rows)} resident binaries") md5_hashes = filter(_check_hash_against_feed, (row[0].hex() for row in rows)) - - # logger.debug(f"After filtering...found new {len(md5_hashes)} hashes to scan") - analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) + # if gathering and analysis took longer than out utility script interval windo, kick it off if globals.g_utility_interval > 0: seconds_since_start = (datetime.now() - utility_window_start).seconds - if ( - seconds_since_start >= globals.g_utility_interval * 60 - if not globals.g_utility_debug - else 1 - ): + if seconds_since_start >= globals.g_utility_interval * 60 if not globals.g_utility_debug else 1: execute_script() - utility_window_start = datetime.now() logger.debug("Exiting database sweep routine") -def _check_hash_against_feed(md5_hash): - query = BinaryDetonationResult.select().where( - BinaryDetonationResult.md5 == md5_hash - ) +def _check_hash_against_feed(md5_hash: str) -> bool: + """ + Check discovered hash against the current feed. + :param md5_hash: md5 hash + :return: True if the hash needs to be added + """ + query = BinaryDetonationResult.select().where(BinaryDetonationResult.md5 == md5_hash) - if query.exists(): - return False + return not query.exists() - return True +def save_results_with_logging(analysis_results: List[AnalysisResult]) -> None: + """ + Save all analysis results, with extended logging. -def save_results_with_logging(analysis_results): + :param analysis_results: list of analysis results + """ logger.debug(analysis_results) if analysis_results: for analysis_result in analysis_results: - logger.debug( - ( - f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" - f" {analysis_result.long_result} {analysis_result.last_error_msg}" - ) - ) + logger.debug((f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" + f" {analysis_result.long_result} {analysis_result.last_error_msg}")) if analysis_result.last_error_msg: logger.error(analysis_result.last_error_msg) save_results(analysis_results) +# NOTE: function retained for possible future need. # noinspection PyUnusedFunction -def save_and_log( - analysis_results, start_time, num_binaries_skipped, num_total_binaries -): +def save_and_log(analysis_results: List[AnalysisResult], start_time: float, num_binaries_skipped: int, + num_total_binaries: int) -> None: + """ + Save and log analysis results. + + :param analysis_results: list of analysis results + :param start_time: starting time (seconds) + :param num_binaries_skipped: number of skipped binaries + :param num_total_binaries: total binary count + """ logger.debug(analysis_results) if analysis_results: for analysis_result in analysis_results: - logger.debug( - ( - f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" - f" {analysis_result.long_result} {analysis_result.last_error_msg}" - ) - ) + logger.debug(( + f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" + f" {analysis_result.long_result} {analysis_result.last_error_msg}")) + if analysis_result.last_error_msg: logger.error(analysis_result.last_error_msg) save_results(analysis_results) @@ -410,44 +409,31 @@ def save_and_log( _rule_logging(start_time, num_binaries_skipped, num_total_binaries) -def _rule_logging( - start_time: float, num_binaries_skipped: int, num_total_binaries: int -) -> None: +def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binaries: int) -> None: """ Simple method to log yara work. - :param start_time: start time for the work - :param num_binaries_skipped: - :param num_total_binaries: - :return: + + :param start_time: starting time (seconds) + :param num_binaries_skipped: number of skipped binaries + :param num_total_binaries: total binary count """ elapsed_time = time.time() - start_time logger.info("elapsed time: {0}".format(humanfriendly.format_timespan(elapsed_time))) - logger.debug( - " number binaries scanned: {0}".format(globals.g_num_binaries_analyzed) - ) + logger.debug(" number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) logger.debug(" number binaries already scanned: {0}".format(num_binaries_skipped)) - logger.debug( - " number binaries unavailable: {0}".format( - globals.g_num_binaries_not_available - ) - ) + logger.debug(" number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) logger.info("total binaries from db: {0}".format(num_total_binaries)) - logger.debug( - " binaries per second: {0}:".format( - round(num_total_binaries / elapsed_time, 2) - ) - ) - logger.info( - "num binaries score greater than zero: {0}".format( - len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)) - ) - ) - logger.info("") + logger.debug(" binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) + logger.info("num binaries score greater than zero: {0}\n".format( + len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) -def get_log_file_handles(use_logger): - """ Get a list of filehandle numbers from logger - to be handed to DaemonContext.files_preserve +def get_log_file_handles(use_logger) -> List: + """ + Get a list of filehandle numbers from logger to be handed to DaemonContext.files_preserve. + + :param use_logger: logger to check + :return: List of file handlers """ handles = [] for handler in use_logger.handlers: @@ -457,10 +443,12 @@ def get_log_file_handles(use_logger): return handles -# noinspection PyUnusedLocal -def handle_sig(exit_event, sig, frame): +def handle_sig(exit_event: Event, sig: int) -> None: """ - Signal handler - handle the signal and mark exit if its an exiting signal + Signal handler - handle the signal and mark exit if its an exiting signal type. + + :param exit_event: the event handler + :param sig: the signal seen """ exit_sigs = (signal.SIGTERM, signal.SIGQUIT, signal.SIGKILL) if sig in exit_sigs: @@ -471,22 +459,27 @@ def handle_sig(exit_event, sig, frame): # # wait until the exit_event has been set by the signal handler # -def run_to_exit_signal(exit_event): +def run_to_exit_signal(exit_event: Event) -> None: + """ + Wait-until-exit polling loop function. + :param exit_event: the event handler + """ exit_event.wait() logger.debug("Begin graceful shutdown...") -def init_local_resources(): +def init_local_resources() -> None: """ - Initialize the local resources required to get module information - from cbr module store as well as local storage of module and scanning - metadata in sqlite 'binary.db' - generate an initial fead from the - database + Initialize the local resources required to get module information + from cbr module store as well as local storage of module and scanning + metadata in sqlite 'binary.db' - generate an initial fead from the + database. - generate yara_rule_set metadata + generate yara_rule_set metadata """ globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - generate_yara_rule_map_hash(globals.g_yara_rules_dir) + generate_yara_rule_map_hash(globals.g_yara_rules_dir, return_list=False) # save to globals + database = SqliteDatabase(os.path.join(globals.g_feed_database_dir, "binary.db")) db.initialize(database) db.connect() @@ -494,9 +487,11 @@ def init_local_resources(): generate_feed_from_db() -def wait_all_worker_exit(): - """ Await the exit of our worker threads """ - threadcount = 2 +def wait_all_worker_exit() -> None: + """ + Await the exit of our worker threads. + """ + threadcount = PROMISE_THREADS while threadcount > 1: threads = list( filter( @@ -507,46 +502,35 @@ def wait_all_worker_exit(): ) ) threadcount = len(threads) - logger.debug( - f"Main thread Waiting on {threadcount} live worker-threads (exluding deamons)..." - ) + logger.debug(f"Main thread Waiting on {threadcount} live worker-threads (exluding deamons)...") logger.debug(f"Live threads (excluding daemons): {threads}") time.sleep(0.1) - pass logger.debug("Main thread going to exit...") -def start_workers( - exit_event: Event, - scanning_promises_queue: Queue, - scanning_results_queue: Queue, - run_only_once=False, -) -> None: +def start_workers(exit_event: Event, scanning_promises_queue: Queue, scanning_results_queue: Queue, + run_only_once=False) -> None: """ Starts worker-threads (not celery workers). Worker threads do work until they get the exit_event signal :param exit_event: event signaller :param scanning_promises_queue: promises queue :param scanning_results_queue: results queue + :param run_only_once: if True, run once an exit (default False) """ logger.debug("Starting perf thread") - perf_thread = DatabaseScanningThread( - globals.g_scanning_interval, scanning_promises_queue, scanning_results_queue, exit_event, run_only_once - ) + perf_thread = DatabaseScanningThread(globals.g_scanning_interval, scanning_promises_queue, scanning_results_queue, + exit_event, run_only_once) perf_thread.start() logger.debug("Starting promise thread(s)") - for _ in range(2): - promise_worker_thread = Thread( - target=promise_worker, - args=(exit_event, scanning_promises_queue, scanning_results_queue), - ) + for _ in range(PROMISE_THREADS): + promise_worker_thread = Thread(target=promise_worker, + args=(exit_event, scanning_promises_queue, scanning_results_queue)) promise_worker_thread.start() logger.debug("Starting results saver thread") - results_worker_thread = Thread( - target=results_worker_chunked, args=(exit_event, scanning_results_queue) - ) + results_worker_thread = Thread(target=results_worker_chunked, args=(exit_event, scanning_results_queue)) results_worker_thread.start() @@ -557,21 +541,16 @@ class DatabaseScanningThread(Thread): by the signal handler """ - def __init__( - self, - interval: int, - scanning_promises_queue: Queue, - scanning_results_queue: Queue, - exit_event: Event, - run_only_once : bool, - *args, - **kwargs, - ): + def __init__(self, interval: int, scanning_promises_queue: Queue, scanning_results_queue: Queue, exit_event: Event, + run_only_once: bool, *args, **kwargs): """ + Create a new database scanning object. - :param interval: + :param interval: interval in seconds between scans :param scanning_promises_queue: promises queue + :param scanning_results_queue: results queue :param exit_event: event signaller + :param run_only_once: if True, run once and then exit :param args: optional arguments :param kwargs: optional keyword arguments """ @@ -585,18 +564,24 @@ def __init__( self._scanning_promises_queue = scanning_promises_queue self._scanning_results_queue = scanning_results_queue self._run_only_once = run_only_once - if not (self._run_only_once): + if not self._run_only_once: self._target = self.scan_until_exit else: self._target = self.scan_once_and_exit - def scan_once_and_exit(self): + def scan_once_and_exit(self) -> None: + """ + Perform a database scan one, then exit. + """ self.do_db_scan() self._scanning_promises_queue.join() self._scanning_results_queue.join() self.exit_event.set() - def scan_until_exit(self): + def scan_until_exit(self) -> None: + """ + Continually scan the database until instructed to quit. + """ # TODO: DRIFT self.do_db_scan() while not self.exit_event.is_set(): @@ -609,23 +594,26 @@ def scan_until_exit(self): return def do_db_scan(self): + """ + Do the actual database scan, trappig ang problems. + """ logger.debug("START database sweep") try: perform(globals.g_yara_rules_dir, self._conn, self._scanning_promises_queue) - except Exception as e: - logger.exception( - f"Something went wrong sweeping the CbR module store...{str(e)} " - ) + except Exception as err: + logger.exception(f"Something went wrong sweeping the CbR module store: {err} ") def run(self): - """ Represents the lifetime of the thread """ + """ + Represents the lifetime of the thread. + """ try: if self._target: # noinspection PyArgumentList self._target(*self._args, **self._kwargs) finally: - # Avoid a refcycle if the thread is running a function with + # Avoid a recycle if the thread is running a function with # an argument that has a member that points to the thread. # shutdown database connection self._conn.close() @@ -634,16 +622,25 @@ def run(self): self.exit_event.set() -# Start celery worker in a daemon-thread -# TODO - Aggresive autoscaling config options -def start_celery_worker_thread(config_file): +# +def start_celery_worker_thread(config_file: str) -> None: + """ + Start celery worker in a daemon-thread. + + TODO: - Aggresive autoscaling config options + :param config_file: path to the yara configuration file + :return: + """ t = Thread(target=launch_celery_worker, kwargs={"config_file": config_file}) t.daemon = True t.start() -# launch a celery worker using the imported app context -def launch_celery_worker(config_file=None): +def launch_celery_worker(config_file: str = None) -> None: + """ + Launch a celery worker using the imported app context + :param config_file: optional path to a configuration file + """ localworker = worker.worker(app=app) localworker.run(config_file=config_file) logger.debug("CELERY WORKER LAUNCHING THREAD EXITED") @@ -662,36 +659,35 @@ def handle_arguments(): """ parser = argparse.ArgumentParser(description="Yara Agent for Yara Connector") - - #Controls config file (ini) + # Controls config file (ini) parser.add_argument( "--config-file", required=True, default="yaraconnector.conf", help="Location of the config file", ) - - #Controls log file location+name + # Controls log file location+name parser.add_argument( "--log-file", default="yaraconnector.log", help="Log file output" ) - #Controls the output feed location+name + # Controls the output feed location+name parser.add_argument( "--output-file", default="yara_feed.json", help="output feed file" ) - #Controls the working directory + # Controls the working directory parser.add_argument( "--working-dir", default=".", help="working directory", required=False ) - #Controls the lock File + # Controls the lock File parser.add_argument( "--lock-file", default="./yaraconnector", help="lock file", required=False ) - #Controls batch vs continous mode , defaults to batch processing + # Controls batch vs continous mode , defaults to batch processing parser.add_argument( "--run-once", default=True, help="Run as batch mode or no", required=False ) - #Validates the rules + + # Validates the rules parser.add_argument( "--validate-yara-rules", action="store_true", @@ -705,23 +701,22 @@ def handle_arguments(): def main(): """ Main execution function. Script will exit with a non-zero value based on the following: - 1: Not the only instance running - 2: Configuration problem + 1: Configuration problem + 2: Yara rule validation problem 3: User interrupt 4: Unexpected Yara scan exception - 5: Yara rule validation problem """ args = handle_arguments() + # check for extended logging if args.debug: logger.setLevel(logging.DEBUG) + # check for additional log file if args.log_file: use_log_file = os.path.abspath(os.path.expanduser(args.log_file)) formatter = logging.Formatter(logging_format) - handler = logging.handlers.RotatingFileHandler( - use_log_file, maxBytes=10 * 1000000, backupCount=10 - ) + handler = logging.handlers.RotatingFileHandler(use_log_file, maxBytes=10 * 1000000, backupCount=10) handler.setFormatter(formatter) logger.addHandler(handler) @@ -730,7 +725,7 @@ def main(): ConfigurationInit(args.config_file, args.output_file) except Exception as err: logger.error(f"Unable to continue due to a configuration problem: {err}") - sys.exit(2) + sys.exit(1) if args.validate_yara_rules: logger.info(f"Validating yara rules in directory: {globals.g_yara_rules_dir}") @@ -740,65 +735,50 @@ def main(): logger.info("All yara rules compiled successfully") except Exception as err: logger.error(f"There were errors compiling yara rules: {err}") - sys.exit(5) - else: #Doing a real run - - #Exit condition and queues for doing work + sys.exit(2) + else: # Doing a real run + # Exit condition and queues for doing work exit_event = Event() scanning_promise_queue = Queue() scanning_results_queue = Queue() - #Lock file so this process is a singleton + # Lock file so this process is a singleton lock_file = lockfile.FileLock(args.lock_file) try: - if not args.run_once: - - #Running as a deamon - - #Get working dir setting + if not args.run_once: # Running as a deamon + # Get working dir setting working_dir = os.path.abspath(os.path.expanduser(args.working_dir)) - - #Mark files to be preserved + # Mark files to be preserved files_preserve = get_log_file_handles(logger) files_preserve.extend([args.lock_file, args.log_file, args.output_file]) - # defauls to piping to /dev/null + # defaults to piping to /dev/null + deamon_kwargs = {"working_directory": working_dir, "pidfile": lock_file, + "files_preserve": files_preserve} - deamon_kwargs = { - "working_directory": working_dir, - "pidfile": lock_file, - "files_preserve": files_preserve, - } - #If in debug mode, make sure stdout and stderr don't got to /dev/null + # If in debug mode, make sure stdout and stderr don't go to /dev/null if args.debug: deamon_kwargs.update({"stdout": sys.stdout, "stderr": sys.stderr}) - - context = daemon.DaemonContext(**deamon_kwargs) - - #Operating mode - are we the master a worker? + # Operating mode - are we the master a worker? run_as_master = globals.g_mode == "master" - - #Signal handler partial function + # Signal handler partial function sig_handler = partial(handle_sig, exit_event) - context.signal_map = { - signal.SIGTERM: sig_handler, - signal.SIGQUIT: sig_handler, - } + context.signal_map = {signal.SIGTERM: sig_handler, signal.SIGQUIT: sig_handler} - #Make sure we close the deamon context at the end + # Make sure we close the deamon context at the end with context: # only connect to cbr if we're the master if run_as_master: - #initialize local resources + # initialize local resources init_local_resources() - #start working threads - start_workers( - exit_event, scanning_promise_queue, scanning_results_queue - ) + + # start working threads + start_workers(exit_event, scanning_promise_queue, scanning_results_queue) + # start local celeryD worker if working mode is local if not globals.g_remote: start_celery_worker_thread(args.config_file) @@ -811,21 +791,17 @@ def main(): wait_all_worker_exit() logger.info("Yara connector shutdown OK") else: # Just do one batch - #init local resources + # init local resources init_local_resources() - #start necessary worker threads - start_workers( - exit_event, - scanning_promise_queue, - scanning_results_queue, - run_only_once=True, - ) - #Start a celery worker if we need one + + # start necessary worker threads + start_workers(exit_event, scanning_promise_queue, scanning_results_queue, run_only_once=True) + + # Start a celery worker if we need one if not globals.g_remote: start_celery_worker_thread(args.config_file) run_to_exit_signal(exit_event) wait_all_worker_exit() - except KeyboardInterrupt: logger.info("\n\n##### Interupted by User!\n") exit_event.set() diff --git a/src/rule_handling.py b/src/rule_handling.py new file mode 100644 index 0000000..47907fd --- /dev/null +++ b/src/rule_handling.py @@ -0,0 +1,36 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + +import hashlib +import os +from typing import List, Optional + +import globals + + +def generate_yara_rule_map_hash(yara_rule_path: str, return_list: bool = False) -> Optional[List[str]]: + """ + Create a list of hashes for each yara rule. + + :param yara_rule_path: the path to where the yara rules are stored. + :param return_list: if True; return the list locally instead of saving to globals + """ + temp_list = [] + for fn in os.listdir(yara_rule_path): + if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): + fullpath = os.path.join(yara_rule_path, fn) + if not os.path.isfile(fullpath): + continue + with open(os.path.join(yara_rule_path, fn), "rb") as fp: + data = fp.read() + md5 = hashlib.md5() + md5.update(data) + temp_list.append(str(md5.hexdigest())) + + temp_list.sort() + + if not return_list: + globals.g_yara_rule_map_hash_list = temp_list + return None + else: + return temp_list diff --git a/src/tasks.py b/src/tasks.py index 49d62d9..ddc8654 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -2,7 +2,6 @@ # Copyright © 2014-2019 VMware, Inc. All Rights Reserved. import datetime -import hashlib import io import logging import multiprocessing @@ -20,30 +19,38 @@ from analysis_result import AnalysisResult from celery_app import app from config_handling import ConfigurationInit +from rule_handling import generate_yara_rule_map_hash logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) +# ----- Lock Object Class ------------------------------------------------------------ + class ReadWriteLock: - """ A lock object that allows many simultaneous "read locks", but - only one "write lock." """ + """ + A lock object that allows many simultaneous "read locks", but + only one "write lock." + """ def __init__(self): self._read_ready = multiprocessing.Condition(multiprocessing.Lock()) self._readers = 0 - def acquire_read(self): - """ Acquire a read lock. Blocks only if a thread has - acquired the write lock. """ + def acquire_read(self) -> None: + """ + Acquire a read lock. Blocks only if a thread has acquired the write lock. + """ self._read_ready.acquire() try: self._readers += 1 finally: self._read_ready.release() - def release_read(self): - """ Release a read lock. """ + def release_read(self) -> None: + """ + Release a read lock. + """ self._read_ready.acquire() try: self._readers -= 1 @@ -52,26 +59,33 @@ def release_read(self): finally: self._read_ready.release() - def acquire_write(self): - """ Acquire a write lock. Blocks until there are no + def acquire_write(self) -> None: + """ + Acquire a write lock. Blocks until there are no acquired read or write locks. """ self._read_ready.acquire() while self._readers > 0: self._read_ready.wait() - def release_write(self): - """ Release a write lock. """ + def release_write(self) -> None: + """ + Release a write lock. + """ self._read_ready.release() +# ----- Actual task functions ------------------------------------------------------------ + compiled_yara_rules = None compiled_rules_lock = ReadWriteLock() -def add_worker_arguments(parser): - parser.add_argument( - "--config-file", default="yara_worker.conf", help="Yara Worker Config" - ) +def add_worker_arguments(parser) -> None: + """ + Add yara worker configuration option. + :param parser: option parser + """ + parser.add_argument("--config-file", default="yara_worker.conf", help="Yara Worker Config") app.user_options["worker"].add(add_worker_arguments) @@ -95,7 +109,7 @@ def generate_rule_map(yara_rule_path: str) -> dict: """ Create a dictionary keyed by filename containing file paths :param yara_rule_path: location of yara rules - :return: + :return: dict of paths keyed by namespace """ rule_map = {} for fn in os.listdir(yara_rule_path): @@ -114,54 +128,27 @@ def generate_rule_map(yara_rule_path: str) -> dict: return rule_map -# noinspection DuplicatedCode -def generate_yara_rule_map_hash(yara_rule_path: str) -> List: - """ - Create a list of md5 hashes based on rule file contents. - - :param yara_rule_path: location of the yara rules - :return: - """ - temp_list = [] - for fn in os.listdir(yara_rule_path): - if fn.lower().endswith(".yar") or fn.lower().endswith(".yara"): - fullpath = os.path.join(yara_rule_path, fn) - if not os.path.isfile(fullpath): - continue - with open(os.path.join(yara_rule_path, fn), "rb") as fp: - data = fp.read() - # NOTE: Original logic resulted in a cumulative hash for each file (linking them) - md5 = hashlib.md5() - md5.update(data) - temp_list.append(str(md5.hexdigest())) - - temp_list.sort() - return temp_list - - @app.task def update_yara_rules_remote(yara_rules: dict) -> None: """ Update remote yara rules. :param yara_rules: dict of rules, keyed by file name - :return: """ try: for key in yara_rules: with open(os.path.join(globals.g_yara_rules_dir, key), "wb") as fp: fp.write(yara_rules[key]) except Exception as err: - logger.error(f"Error writing rule file: {err}") - logger.error(traceback.format_exc()) + logger.exception(f"Error writing rule file: {err}") def update_yara_rules(): global compiled_yara_rules global compiled_rules_lock + compiled_rules_lock.acquire_read() if compiled_yara_rules: logger.debug("Reading the Compiled rules") - return else: logger.debug("Updating yara rules in worker(s)") yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) @@ -172,13 +159,11 @@ def update_yara_rules(): logger.debug("Succesfully updated yara rules") compiled_rules_lock.release_write() compiled_rules_lock.acquire_read() - return -def get_binary_by_hash(url, hsum, token): +def get_binary_by_hash(url: str, hsum: str, token: str): """ - do a binary-retrival-by hash (husm) api call against - the configured server-by (url) using (token) + Do a binary-retrival-by hash (husm) api call against the configured server-by (url) using (token). """ headers = {"X-Auth-Token": token} request_url = f"{url}/api/v1/binary/{hsum}" @@ -197,7 +182,12 @@ def get_binary_by_hash(url, hsum, token): # noinspection PyUnusedFunction @app.task -def analyze_bins(hashes): +def analyze_bins(hashes: List[str]) -> group: + """ + Analize any returned binaries. + :param hashes: list of hashes + :return: celery group + """ return group(analyze_binary.s(h) for h in hashes).apply_async() @@ -217,9 +207,7 @@ def analyze_binary(md5sum: str) -> AnalysisResult: try: analysis_result.last_scan_date = datetime.datetime.now() - binary_data = get_binary_by_hash( - globals.g_cb_server_url, md5sum.upper(), globals.g_cb_server_token - ) + binary_data = get_binary_by_hash(globals.g_cb_server_url, md5sum.upper(), globals.g_cb_server_token) if not binary_data: logger.debug(f"No binary agailable for {md5sum}") @@ -227,20 +215,19 @@ def analyze_binary(md5sum: str) -> AnalysisResult: return analysis_result try: - # matches = "debug" update_yara_rules() matches = compiled_yara_rules.match(data=binary_data.read(), timeout=30) + + # NOTE: Below is for debugging use only + # matches = "debug" + if matches: score = get_high_score(matches) analysis_result.score = score - analysis_result.short_result = "Matched yara rules: %s" % ", ".join( - [match.rule for match in matches] - ) + analysis_result.short_result = "Matched yara rules: %s" % ", ".join([match.rule for match in matches]) # analysis_result.short_result = "Matched yara rules: debug" analysis_result.long_result = analysis_result.long_result - analysis_result.misc = generate_yara_rule_map_hash( - globals.g_yara_rules_dir - ) + analysis_result.misc = generate_yara_rule_map_hash(globals.g_yara_rules_dir, return_list=True) else: analysis_result.score = 0 analysis_result.short_result = "No Matches" @@ -272,7 +259,7 @@ def get_high_score(matches) -> int: Find the higest match score. :param matches: List of rule matches. - :return: + :return: highest score """ # NOTE: if str(matches) == "debug", return 100 if matches == "debug": diff --git a/test/test_main.py b/test/test_main.py index 064099c..f1ad151 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -1,19 +1,8 @@ # coding: utf-8 # Copyright © 2014-2019 VMware, Inc. All Rights Reserved. -import os from unittest import TestCase -import globals -from main import generate_yara_rule_map_hash - -TESTS = os.path.abspath(os.path.dirname(__file__)) - class TestMain(TestCase): - - def test_01_generate_yara_rule_map_hash(self): - globals.g_yara_rule_map_hash_list = [] - generate_yara_rule_map_hash(os.path.join(TESTS, "rules")) - self.assertEqual(1, len(globals.g_yara_rule_map_hash_list)) - self.assertEqual("191cc0ea3f9ef90ed1850a3650cd38ed", globals.g_yara_rule_map_hash_list[0]) + pass diff --git a/test/test_ruleHandler.py b/test/test_ruleHandler.py new file mode 100644 index 0000000..f7c2988 --- /dev/null +++ b/test/test_ruleHandler.py @@ -0,0 +1,24 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + +import os +from unittest import TestCase + +import globals +from rule_handling import generate_yara_rule_map_hash + +TESTS = os.path.abspath(os.path.dirname(__file__)) + + +class TestRuleHandler(TestCase): + def test_01_generate_yara_rule_map_hash_in_globals(self): + globals.g_yara_rule_map_hash_list = [] + check = generate_yara_rule_map_hash(os.path.join(TESTS, "rules")) + self.assertIsNone(check) + self.assertEqual(1, len(globals.g_yara_rule_map_hash_list)) + self.assertEqual("191cc0ea3f9ef90ed1850a3650cd38ed", globals.g_yara_rule_map_hash_list[0]) + + def test_01b_generate_yara_rule_map_hash(self): + the_list = generate_yara_rule_map_hash(os.path.join(TESTS, "rules"), return_list=True) + self.assertEqual(1, len(the_list)) + self.assertEqual("191cc0ea3f9ef90ed1850a3650cd38ed", the_list[0]) diff --git a/test/test_tasks.py b/test/test_tasks.py index f141d44..9611d43 100644 --- a/test/test_tasks.py +++ b/test/test_tasks.py @@ -4,7 +4,7 @@ import os from unittest import TestCase -from tasks import generate_rule_map, generate_yara_rule_map_hash +from tasks import generate_rule_map TESTS = os.path.abspath(os.path.dirname(__file__)) @@ -17,7 +17,3 @@ def test_01a_generate_yara_rule_map(self): self.assertTrue("test" in the_dict) self.assertTrue(the_dict["test"].endswith("test/rules/test.yara")) - def test_01b_generate_yara_rule_map_hash(self): - the_list = generate_yara_rule_map_hash(os.path.join(TESTS, "rules")) - self.assertEqual(1, len(the_list)) - self.assertEqual("191cc0ea3f9ef90ed1850a3650cd38ed", the_list[0]) From 9d1c1910e4cd7421b9588e8dcfc4be4d780b7ce2 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 22 Nov 2019 10:26:07 -0500 Subject: [PATCH 155/257] Finishing up CB28003 --- src/tasks.py | 47 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/src/tasks.py b/src/tasks.py index ddc8654..662c053 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -11,10 +11,12 @@ from typing import List import requests +import json # noinspection PyPackageRequirements import yara from celery import bootsteps, group +from io import StringIO import globals from analysis_result import AnalysisResult from celery_app import app @@ -77,6 +79,7 @@ def release_write(self) -> None: # ----- Actual task functions ------------------------------------------------------------ compiled_yara_rules = None +compiled_rules_hash = None compiled_rules_lock = ReadWriteLock() @@ -141,9 +144,16 @@ def update_yara_rules_remote(yara_rules: dict) -> None: except Exception as err: logger.exception(f"Error writing rule file: {err}") - +# Caller is obliged to compiled_rules_lock.release_read() def update_yara_rules(): + """ + gets a read-acess on the in-memory set of yara rules , which are locked with multiple possible readers + if there is no current in memory reference to the current yara rules + this function attemps to read the yara-rules directory on the worker, and a produce a new set of compiled rules + the rules are written to disk so that other workers can load them from disk rather than re-compiling them + """ global compiled_yara_rules + global compiled_rules_hash global compiled_rules_lock compiled_rules_lock.acquire_read() @@ -152,18 +162,36 @@ def update_yara_rules(): else: logger.debug("Updating yara rules in worker(s)") yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - new_rules_object = yara.compile(filepaths=yara_rule_map) - compiled_rules_lock.release_read() - compiled_rules_lock.acquire_write() - compiled_yara_rules = new_rules_object - logger.debug("Succesfully updated yara rules") - compiled_rules_lock.release_write() + rules_hash = generate_yara_rule_map_hash( + globals.g_yara_rules_dir + ) + compiled_rules_filepath = os.path.join(globals.g_yara_rules_dir, ".YARA_RULES_{0}".format(rules_hash)) + if not (os.path.exists(compiled_rules_filepath)): + new_rules_object = yara.compile(filepaths=yara_rule_map) + new_rules_object.save() + compiled_rules_lock.release_read() + compiled_rules_lock.acquire_write() + compiled_yara_rules = new_rules_object + compiled_rules_hash = rules_hash + logger.debug("Succesfully updated yara rules") + compiled_rules_lock.release_write() + else: # Another worker has already written the rules to a file for this rule-hash + new_rules_object = yara.load(compiled_rules_filepath) + new_rules_object.save() + compiled_rules_lock.release_read() + compiled_rules_lock.acquire_write() + compiled_yara_rules = new_rules_object + compiled_rules_hash = rules_hash + logger.debug("Succesfully updated yara rules") + compiled_rules_lock.release_write() compiled_rules_lock.acquire_read() def get_binary_by_hash(url: str, hsum: str, token: str): """ - Do a binary-retrival-by hash (husm) api call against the configured server-by (url) using (token). + + do a binary-retrival-by hash (husm) api call + the configured server-by (url) using (token) """ headers = {"X-Auth-Token": token} request_url = f"{url}/api/v1/binary/{hsum}" @@ -199,6 +227,7 @@ def analyze_binary(md5sum: str) -> AnalysisResult: :return: AnalysisResult instance """ global compiled_yara_rules + global compiled_rules_hash global compiled_rules_lock logger.debug(f"{md5sum}: in analyze_binary") @@ -227,7 +256,7 @@ def analyze_binary(md5sum: str) -> AnalysisResult: analysis_result.short_result = "Matched yara rules: %s" % ", ".join([match.rule for match in matches]) # analysis_result.short_result = "Matched yara rules: debug" analysis_result.long_result = analysis_result.long_result - analysis_result.misc = generate_yara_rule_map_hash(globals.g_yara_rules_dir, return_list=True) + analysis_result.misc = compiled_rules_hash else: analysis_result.score = 0 analysis_result.short_result = "No Matches" From 6c60207f1e7544f4008a0b2025d2a997836bf468 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 22 Nov 2019 10:27:00 -0500 Subject: [PATCH 156/257] spec updates for rpm and pyinstaller --- cb-yara-connector.rpm.spec | 4 ++-- cb-yara-connector.spec | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index eabfa08..c7c0dd2 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -9,7 +9,7 @@ Requires: redis Carbon Black Yara Agent - Scans binaries with configured yara rules %build -pyinstaller %{_sourcedir}/cb-yara-connector.spec +cd {%_sourcedir} ; pyinstaller cb-yara-connector.spec %install mkdir -p ${RPM_BUILD_ROOT}/var/log/cb/integrations/cb-yara-connector @@ -20,4 +20,4 @@ cp example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connecto install -m 0755 init-scripts/yaraconnector.conf ${RPM_BUILD_ROOT}/etc/init/yaraconnector.conf install -m 0755 dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/yaraconnector -%files -f MANIFEST \ No newline at end of file +%files -f MANIFEST diff --git a/cb-yara-connector.spec b/cb-yara-connector.spec index 98645bf..677fee9 100644 --- a/cb-yara-connector.spec +++ b/cb-yara-connector.spec @@ -8,7 +8,7 @@ block_cipher = None a = Analysis(['src/main.py'], pathex=['./src'], binaries=[], - hiddenimports=['celery.apps','celery.apps.worker','celery.app.log','celery.fixups', 'celery.fixups.django', 'celery.loaders.app', + hiddenimports=['lockfile','celery.apps','celery.apps.worker','celery.app.log','celery.fixups', 'celery.fixups.django', 'celery.loaders.app', 'celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends', 'celery.backends.redis', 'celery.app.events', 'celery.events','celery.redis', 'kombu.transport.pyamqp'], From 16ddbe6489e9960d90007f534a201264efde2f84 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 25 Nov 2019 10:30:57 -0500 Subject: [PATCH 157/257] updates --- src/tasks.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/tasks.py b/src/tasks.py index 662c053..c2b59ee 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -8,6 +8,7 @@ import os import traceback import zipfile +import hashlib from typing import List import requests @@ -162,13 +163,19 @@ def update_yara_rules(): else: logger.debug("Updating yara rules in worker(s)") yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - rules_hash = generate_yara_rule_map_hash( + generate_yara_rule_map_hash( globals.g_yara_rules_dir ) + md5sum = hashlib.md5() + for h in globals.g_yara_rule_map_hash_list: + md5sum.update(h.encode("utf-8")) + rules_hash = md5sum.hexdigest() + compiled_rules_filepath = os.path.join(globals.g_yara_rules_dir, ".YARA_RULES_{0}".format(rules_hash)) + logger.debug("yara rule path is {0}".format(compiled_rules_filepath)) if not (os.path.exists(compiled_rules_filepath)): new_rules_object = yara.compile(filepaths=yara_rule_map) - new_rules_object.save() + new_rules_object.save(compiled_rules_filepath) compiled_rules_lock.release_read() compiled_rules_lock.acquire_write() compiled_yara_rules = new_rules_object @@ -177,7 +184,7 @@ def update_yara_rules(): compiled_rules_lock.release_write() else: # Another worker has already written the rules to a file for this rule-hash new_rules_object = yara.load(compiled_rules_filepath) - new_rules_object.save() + new_rules_object.save(compiled_rules_filepath) compiled_rules_lock.release_read() compiled_rules_lock.acquire_write() compiled_yara_rules = new_rules_object @@ -236,6 +243,7 @@ def analyze_binary(md5sum: str) -> AnalysisResult: try: analysis_result.last_scan_date = datetime.datetime.now() + binary_data = get_binary_by_hash(globals.g_cb_server_url, md5sum.upper(), globals.g_cb_server_token) if not binary_data: @@ -245,6 +253,7 @@ def analyze_binary(md5sum: str) -> AnalysisResult: try: update_yara_rules() + matches = compiled_yara_rules.match(data=binary_data.read(), timeout=30) # NOTE: Below is for debugging use only From 7150f3be6ea25af2a4762432d63b3eb95c537486 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 2 Dec 2019 12:24:12 -0500 Subject: [PATCH 158/257] Cleaned up --- src/tasks.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/tasks.py b/src/tasks.py index c2b59ee..5195736 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -2,22 +2,20 @@ # Copyright © 2014-2019 VMware, Inc. All Rights Reserved. import datetime +import hashlib import io import logging import multiprocessing import os import traceback import zipfile -import hashlib from typing import List import requests -import json # noinspection PyPackageRequirements import yara from celery import bootsteps, group -from io import StringIO import globals from analysis_result import AnalysisResult from celery_app import app @@ -145,6 +143,7 @@ def update_yara_rules_remote(yara_rules: dict) -> None: except Exception as err: logger.exception(f"Error writing rule file: {err}") + # Caller is obliged to compiled_rules_lock.release_read() def update_yara_rules(): """ @@ -164,8 +163,8 @@ def update_yara_rules(): logger.debug("Updating yara rules in worker(s)") yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) generate_yara_rule_map_hash( - globals.g_yara_rules_dir - ) + globals.g_yara_rules_dir + ) md5sum = hashlib.md5() for h in globals.g_yara_rule_map_hash_list: md5sum.update(h.encode("utf-8")) @@ -182,7 +181,7 @@ def update_yara_rules(): compiled_rules_hash = rules_hash logger.debug("Succesfully updated yara rules") compiled_rules_lock.release_write() - else: # Another worker has already written the rules to a file for this rule-hash + else: # Another worker has already written the rules to a file for this rule-hash new_rules_object = yara.load(compiled_rules_filepath) new_rules_object.save(compiled_rules_filepath) compiled_rules_lock.release_read() @@ -243,7 +242,6 @@ def analyze_binary(md5sum: str) -> AnalysisResult: try: analysis_result.last_scan_date = datetime.datetime.now() - binary_data = get_binary_by_hash(globals.g_cb_server_url, md5sum.upper(), globals.g_cb_server_token) if not binary_data: From 8aad7344973e7b244725807ddb3277e53d9c3f7f Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 3 Dec 2019 10:32:31 -0500 Subject: [PATCH 159/257] added sample rule --- .gitignore | 2 ++ README.md | 52 +++++++++++++++++++++----------------- samples/matchover100kb.yar | 8 ++++++ 3 files changed, 39 insertions(+), 23 deletions(-) create mode 100644 samples/matchover100kb.yar diff --git a/.gitignore b/.gitignore index 8860d09..5dd3184 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,5 @@ binary.db # incase inadvertantly left by unit testing conf-testing.conf + +# diff --git a/README.md b/README.md index b78e31c..c7dd16a 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,11 @@ # Installing Yara Agent (Centos/RHEL 6) The Yara agent must be installed on the same system as Cb Response. - + +* Create installation area + ```shell script + mkdir -p /usr/share/cb/integrations/yara + ``` * Download Yara Agent ``` @@ -68,8 +72,8 @@ _[TBD]_ * Make sure openssl-devel is installed - ``` - sudo yum install openssl-devel + ``` + sudo yum install openssl-devel ``` * Install Git and GCC @@ -119,27 +123,29 @@ _[TBD]_ #### Example Yara Worker Config File - [general] +```ini +[general] + +; +; Python Celery Broker Url. Set this full url string for Redis +; Example: redis:// +; +broker_url=redis://127.0.0.1 + +; +; Cb Response Server Configuration +; Used for downloading binaries +; +cb_server_url= +cb_server_token= + +; +; Directory for temporary yara rules storage +; WARNING: Put your yara rules with the yara agent. This is just temporary storage. +; +yara_rules_dir=./yara_rules +``` - ; - ; Python Celery Broker Url. Set this full url string for Redis - ; Example: redis:// - ; - broker_url=redis://127.0.0.1 - - ; - ; Cb Response Server Configuration - ; Used for downloading binaries - ; - cb_server_url= - cb_server_token= - - ; - ; Directory for temporary yara rules storage - ; WARNING: Put your yara rules with the yara agent. This is just temporary storage. - ; - yara_rules_dir=./yara_rules - * Copy, modify and save to `yara_worker.conf` #### Run Yara Worker Manually diff --git a/samples/matchover100kb.yar b/samples/matchover100kb.yar new file mode 100644 index 0000000..889afc2 --- /dev/null +++ b/samples/matchover100kb.yar @@ -0,0 +1,8 @@ +// Sample rule to match binaries over 100kb in size + +rule matchover100kb { + meta: + score = 10 + condition: + filesize > 100KB +} From 6acf1c50aa676f9c8694877b8a0ab30740123735 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 3 Dec 2019 12:35:25 -0500 Subject: [PATCH 160/257] fixing bug --- src/tasks.py | 64 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/src/tasks.py b/src/tasks.py index 5195736..9f684d2 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -12,6 +12,7 @@ from typing import List import requests + # noinspection PyPackageRequirements import yara from celery import bootsteps, group @@ -28,6 +29,7 @@ # ----- Lock Object Class ------------------------------------------------------------ + class ReadWriteLock: """ A lock object that allows many simultaneous "read locks", but @@ -87,7 +89,9 @@ def add_worker_arguments(parser) -> None: Add yara worker configuration option. :param parser: option parser """ - parser.add_argument("--config-file", default="yara_worker.conf", help="Yara Worker Config") + parser.add_argument( + "--config-file", default="yara_worker.conf", help="Yara Worker Config" + ) app.user_options["worker"].add(add_worker_arguments) @@ -99,7 +103,7 @@ class MyBootstep(bootsteps.Step): """ # noinspection PyUnusedLocal - def __init__(self, worker, config_file='yara_worker.conf', **options): + def __init__(self, worker, config_file="yara_worker.conf", **options): super().__init__(self) ConfigurationInit(config_file, None) @@ -162,35 +166,32 @@ def update_yara_rules(): else: logger.debug("Updating yara rules in worker(s)") yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - generate_yara_rule_map_hash( - globals.g_yara_rules_dir - ) + generate_yara_rule_map_hash(globals.g_yara_rules_dir) md5sum = hashlib.md5() for h in globals.g_yara_rule_map_hash_list: md5sum.update(h.encode("utf-8")) rules_hash = md5sum.hexdigest() - compiled_rules_filepath = os.path.join(globals.g_yara_rules_dir, ".YARA_RULES_{0}".format(rules_hash)) + compiled_rules_filepath = os.path.join( + globals.g_yara_rules_dir, ".YARA_RULES_{0}".format(rules_hash) + ) logger.debug("yara rule path is {0}".format(compiled_rules_filepath)) - if not (os.path.exists(compiled_rules_filepath)): + + rules_already_exist = os.path.exists(compiled_rules_filepath) + if not(rules_already_exist): new_rules_object = yara.compile(filepaths=yara_rule_map) - new_rules_object.save(compiled_rules_filepath) - compiled_rules_lock.release_read() - compiled_rules_lock.acquire_write() - compiled_yara_rules = new_rules_object - compiled_rules_hash = rules_hash - logger.debug("Succesfully updated yara rules") - compiled_rules_lock.release_write() - else: # Another worker has already written the rules to a file for this rule-hash + else: new_rules_object = yara.load(compiled_rules_filepath) + compiled_rules_lock.release_read() + compiled_rules_lock.acquire_write() + if not(rules_already_exist): new_rules_object.save(compiled_rules_filepath) - compiled_rules_lock.release_read() - compiled_rules_lock.acquire_write() - compiled_yara_rules = new_rules_object - compiled_rules_hash = rules_hash - logger.debug("Succesfully updated yara rules") - compiled_rules_lock.release_write() + compiled_yara_rules = new_rules_object + compiled_rules_hash = rules_hash + logger.debug("Succesfully updated yara rules") + compiled_rules_lock.release_write() compiled_rules_lock.acquire_read() + logger.debug("Exiting update routine ok") def get_binary_by_hash(url: str, hsum: str, token: str): @@ -201,8 +202,13 @@ def get_binary_by_hash(url: str, hsum: str, token: str): """ headers = {"X-Auth-Token": token} request_url = f"{url}/api/v1/binary/{hsum}" - response = requests.get(request_url, headers=headers, stream=True, verify=False, - timeout=globals.g_worker_network_timeout) + response = requests.get( + request_url, + headers=headers, + stream=True, + verify=False, + timeout=globals.g_worker_network_timeout, + ) if response: with zipfile.ZipFile(io.BytesIO(response.content)) as the_binary_zip: # the response contains the file ziped in 'filedata' @@ -242,7 +248,9 @@ def analyze_binary(md5sum: str) -> AnalysisResult: try: analysis_result.last_scan_date = datetime.datetime.now() - binary_data = get_binary_by_hash(globals.g_cb_server_url, md5sum.upper(), globals.g_cb_server_token) + binary_data = get_binary_by_hash( + globals.g_cb_server_url, md5sum.upper(), globals.g_cb_server_token + ) if not binary_data: logger.debug(f"No binary agailable for {md5sum}") @@ -260,7 +268,9 @@ def analyze_binary(md5sum: str) -> AnalysisResult: if matches: score = get_high_score(matches) analysis_result.score = score - analysis_result.short_result = "Matched yara rules: %s" % ", ".join([match.rule for match in matches]) + analysis_result.short_result = "Matched yara rules: %s" % ", ".join( + [match.rule for match in matches] + ) # analysis_result.short_result = "Matched yara rules: debug" analysis_result.long_result = analysis_result.long_result analysis_result.misc = compiled_rules_hash @@ -276,8 +286,8 @@ def analyze_binary(md5sum: str) -> AnalysisResult: analysis_result.last_error_msg = f"Yara exception: {err}" except Exception as err: analysis_result.last_error_msg = ( - f"Other exception while matching rules: {err}\n" - + traceback.format_exc() + f"Other exception while matching rules: {err}\n" + + traceback.format_exc() ) finally: compiled_rules_lock.release_read() From 0f12b8b928b7559d53c9ffc5f9d81a3d53b9e315 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 3 Dec 2019 12:47:43 -0500 Subject: [PATCH 161/257] updates to config handling --- src/config_handling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config_handling.py b/src/config_handling.py index 3e4a6d8..7e45413 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -99,7 +99,7 @@ def _worker_check(self) -> None: :raises CbInvalidConfig: """ - globals.g_mode = self._as_str("mode", required=True, allowed=["master", "slave"]) + globals.g_mode = self._as_str("mode", required=False, default="master", allowed=["master", "slave"]) value = self._as_str("worker_type", default="local", allowed=["local", "remote"]) if value == "local": From 6717da921968220e9ceab5ebc1731246397615ed Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 3 Dec 2019 13:49:25 -0500 Subject: [PATCH 162/257] Tweaked sample config, removed outdated version --- README.md | 2 +- samples/sample_local.conf | 9 +++-- samples/sample_remote.conf | 74 -------------------------------------- 3 files changed, 8 insertions(+), 77 deletions(-) delete mode 100644 samples/sample_remote.conf diff --git a/README.md b/README.md index c7dd16a..f43ff06 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ The Yara agent must be installed on the same system as Cb Response. ``` ## Create Yara Agent Config -Copy and modify either `sample_local.conf` or `sample_remote.conf` from the `samples` folder +Copy and modify `sample_local.conf` from the `samples` folder to your desired location. diff --git a/samples/sample_local.conf b/samples/sample_local.conf index f55b613..2cde948 100644 --- a/samples/sample_local.conf +++ b/samples/sample_local.conf @@ -1,5 +1,5 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Sample local worker config file +;; Sample main and worker config file ;; ;; You may also use "~" if you wish to locate files or directories in your ;; home folder @@ -9,11 +9,16 @@ worker_type=local ; -; ONLY for worker_type of local; add a valid CBR user api token for `cb_server_token` +; dd a valid CBR user api token for `cb_server_token` ; cb_server_url=https://127.0.0.1 cb_server_token= +; +; IP Address of workers if worker_type is remote +; +broker_url=redis://127.0.0.1 + ; ; path to directory containing yara rules ; diff --git a/samples/sample_remote.conf b/samples/sample_remote.conf deleted file mode 100644 index f83934f..0000000 --- a/samples/sample_remote.conf +++ /dev/null @@ -1,74 +0,0 @@ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Sample remote worker config file -;; -;; You may also use "~" if you wish to locate files or directories in your -;; home folder -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -[general] -worker_type=local - -; -; ONLY for worker_type of remote -; IP Address of workers if worker_type is remote -; -broker_url=redis://127.0.0.1 - -; -; path to directory containing yara rules -; -yara_rules_dir=./yara_rules - -; -; Cb Response postgres Database settings; insert posgres password as used in cb.conf for `postgres_password` -; -postgres_host=localhost -postgres_username=cb -postgres_password= -postgres_db=cb -postgres_port=5002 - -; -; os.nice value used for this script, if desired -; -;niceness=1 - -; -; Number of hashes to send to the workers concurrently. -; Recommend setting to the number of workers on the remote system. -; -concurrent_hashes=8 - -; -; If you want binaries to be rescanned more than once, regardless of the rules used, set this to False -; -disable_rescan=True - -; -; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with -; a timestamp within the last year -; -num_days_binaries=365 - - -; -; The feed database directory is where local database work files are stored. If the directory does not exist -; it will be created. -; -feed_database_dir=./feed_db - -; -; This can be used to adjust the interval (in seconds) at which the database is scanned. -; -database_scanning_interval=360 - - -; -; The use of the maintenance script is an ADVANCED FEATURE and should be used with caution! -; -; If "utility_interval" is greater than 0 it represents the interval in minutes after which the yara connector will -; pause to execute a shell script for database maintenance. This can present risks. Be careful what you allow the -; script to do, and use this option at your own discretion. -; -utility_interval=0 -utility_script=./scripts/vacuumscript.sh From 654e3129a48ff273f7ce9195f42894601b8c446c Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 3 Dec 2019 14:44:27 -0500 Subject: [PATCH 163/257] updates to logging as per @bc --- src/main.py | 2 +- src/tasks.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index 01b7856..5400997 100644 --- a/src/main.py +++ b/src/main.py @@ -160,7 +160,7 @@ def generate_feed_from_db() -> None: feedinfo = CbFeedInfo(**feedinfo) feed = CbFeed(feedinfo, reports) - logger.debug("Writing out feed '{0}' to disk".format(feedinfo.data["name"])) + #logger.debug("Writing out feed '{0}' to disk".format(feedinfo.data["name"])) with open(globals.g_output_file, "w") as fp: fp.write(feed.dump()) diff --git a/src/tasks.py b/src/tasks.py index 9f684d2..1f02f01 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -191,7 +191,7 @@ def update_yara_rules(): logger.debug("Succesfully updated yara rules") compiled_rules_lock.release_write() compiled_rules_lock.acquire_read() - logger.debug("Exiting update routine ok") + #logger.debug("Exiting update routine ok") def get_binary_by_hash(url: str, hsum: str, token: str): From 9c04fccb95d67b2ff138fa34657686c18942feb5 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 4 Dec 2019 10:38:00 -0500 Subject: [PATCH 164/257] logging cleanup --- src/main.py | 4 ++-- src/tasks.py | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/main.py b/src/main.py index 5400997..f145cbd 100644 --- a/src/main.py +++ b/src/main.py @@ -43,7 +43,7 @@ logger.setLevel(logging.INFO) celery_logger = logging.getLogger("celery.app.trace") -celery_logger.setLevel(logging.ERROR) +celery_logger.setLevel(logging.INFO) # number of promise worker threads to use PROMISE_THREADS = 2 @@ -642,7 +642,7 @@ def launch_celery_worker(config_file: str = None) -> None: :param config_file: optional path to a configuration file """ localworker = worker.worker(app=app) - localworker.run(config_file=config_file) + localworker.run(loglevel=logging.ERROR, config_file=config_file) logger.debug("CELERY WORKER LAUNCHING THREAD EXITED") diff --git a/src/tasks.py b/src/tasks.py index 1f02f01..174f100 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -23,8 +23,12 @@ from config_handling import ConfigurationInit from rule_handling import generate_yara_rule_map_hash +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) +logger.setLevel(logging.ERROR) + # ----- Lock Object Class ------------------------------------------------------------ From bff33b921e6b296d12e6bbb3a89716dd672eb1cd Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 4 Dec 2019 10:51:40 -0500 Subject: [PATCH 165/257] Remove old rulesets from disk --- src/tasks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/tasks.py b/src/tasks.py index 174f100..873852b 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -22,6 +22,7 @@ from celery_app import app from config_handling import ConfigurationInit from rule_handling import generate_yara_rule_map_hash +import glob import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) @@ -184,6 +185,9 @@ def update_yara_rules(): rules_already_exist = os.path.exists(compiled_rules_filepath) if not(rules_already_exist): new_rules_object = yara.compile(filepaths=yara_rule_map) + #remove old rule set files + for rulesetfp in glob.glob(os.path.join(globals.g_yara_rules_dir,".YARA_RULES_*")): + os.remove(rulesetfp) else: new_rules_object = yara.load(compiled_rules_filepath) compiled_rules_lock.release_read() From d711f09978a38f5beafa13e6e26f3c122553873a Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 4 Dec 2019 13:54:40 -0500 Subject: [PATCH 166/257] updates to logging --- src/main.py | 4 +++- src/tasks.py | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main.py b/src/main.py index f145cbd..6e6647b 100644 --- a/src/main.py +++ b/src/main.py @@ -43,7 +43,7 @@ logger.setLevel(logging.INFO) celery_logger = logging.getLogger("celery.app.trace") -celery_logger.setLevel(logging.INFO) +celery_logger.setLevel(logging.CRITICAL) # number of promise worker threads to use PROMISE_THREADS = 2 @@ -67,6 +67,8 @@ def promise_worker(exit_event: Event, scanning_promise_queue: Queue, scanning_re scanning_results_queue.put(result) except Empty: exit_event.wait(1) + except: + exit_event.wait(0.1) else: exit_event.wait(1) finally: diff --git a/src/tasks.py b/src/tasks.py index 873852b..1d85b69 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -20,6 +20,7 @@ import globals from analysis_result import AnalysisResult from celery_app import app +from celery.utils.log import get_task_logger from config_handling import ConfigurationInit from rule_handling import generate_yara_rule_map_hash import glob @@ -27,8 +28,8 @@ import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) -logger = logging.getLogger(__name__) -logger.setLevel(logging.ERROR) +logger = get_task_logger(__name__) +logger.setLevel(logging.CRITICAL) From 5bb7a01a463b9b38a6f8b33f9b6c63961349f8ed Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 4 Dec 2019 14:12:40 -0500 Subject: [PATCH 167/257] Add debug logging ofr analysis results --- src/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main.py b/src/main.py index 6e6647b..f7943df 100644 --- a/src/main.py +++ b/src/main.py @@ -228,6 +228,7 @@ def save_results(analysis_results: List[AnalysisResult]) -> None: :param analysis_results: list of current analysis results """ + logger.debug(f"Saving {len(analysis_results)}...") for analysis_result in analysis_results: save_result(analysis_result) From da0cecee2a097d69c1cd49d893ca3f7da068de04 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 4 Dec 2019 14:14:58 -0500 Subject: [PATCH 168/257] updated debug analysis logging --- src/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index f7943df..74552c3 100644 --- a/src/main.py +++ b/src/main.py @@ -228,7 +228,7 @@ def save_results(analysis_results: List[AnalysisResult]) -> None: :param analysis_results: list of current analysis results """ - logger.debug(f"Saving {len(analysis_results)}...") + logger.debug(f"Saving {len(list(filter(lambda ar: not(ar.binary_not_available) , analysis_results)))}...") for analysis_result in analysis_results: save_result(analysis_result) From 429aeade271b7fd7fbdc742a91ba6e210fe9be31 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 5 Dec 2019 12:09:23 -0500 Subject: [PATCH 169/257] Updates after review --- src/main.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/main.py b/src/main.py index 74552c3..abc5bfc 100644 --- a/src/main.py +++ b/src/main.py @@ -228,7 +228,7 @@ def save_results(analysis_results: List[AnalysisResult]) -> None: :param analysis_results: list of current analysis results """ - logger.debug(f"Saving {len(list(filter(lambda ar: not(ar.binary_not_available) , analysis_results)))}...") + logger.debug(f"Saving {len(list(filter(lambda ar: not(ar.binary_not_available) , analysis_results)))} analysis results...") for analysis_result in analysis_results: save_result(analysis_result) @@ -347,7 +347,7 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue) -> None: logger.info(f"Enumerating modulestore...found {len(rows)} resident binaries") - md5_hashes = filter(_check_hash_against_feed, (row[0].hex() for row in rows)) + md5_hashes = list(filter(_check_hash_against_feed, (row[0].hex() for row in rows))) analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) # if gathering and analysis took longer than out utility script interval windo, kick it off @@ -356,6 +356,8 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue) -> None: if seconds_since_start >= globals.g_utility_interval * 60 if not globals.g_utility_debug else 1: execute_script() + logger.info(f"Queued {len(md5_hashes)} binaries for analysis") + logger.debug("Exiting database sweep routine") @@ -366,7 +368,7 @@ def _check_hash_against_feed(md5_hash: str) -> bool: :return: True if the hash needs to be added """ query = BinaryDetonationResult.select().where(BinaryDetonationResult.md5 == md5_hash) - + #logger.debug(f"Hash = {md5_hash} exists = {query.exists()}") return not query.exists() @@ -467,7 +469,10 @@ def run_to_exit_signal(exit_event: Event) -> None: Wait-until-exit polling loop function. :param exit_event: the event handler """ - exit_event.wait() + while not(exit_event.is_set()): + exit_event.wait(30.0) + numbins = BinaryDetonationResult.select().count() + logger.info(f"Analyzed {numbins} binaries so far ... ") logger.debug("Begin graceful shutdown...") From 18d7e06bf3b9adb4f725253b62e99d37d3d2cd77 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 5 Dec 2019 13:50:08 -0500 Subject: [PATCH 170/257] Update to task to log ruleset changes --- src/tasks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/tasks.py b/src/tasks.py index 1d85b69..3883e2a 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -31,7 +31,8 @@ logger = get_task_logger(__name__) logger.setLevel(logging.CRITICAL) - +rulelogger = logging.getLogger("yaraworker") +rulelogger.setLevel(logging.INFO) # ----- Lock Object Class ------------------------------------------------------------ @@ -186,14 +187,17 @@ def update_yara_rules(): rules_already_exist = os.path.exists(compiled_rules_filepath) if not(rules_already_exist): new_rules_object = yara.compile(filepaths=yara_rule_map) + rulelogger.info(f"Compiled new set of yara-rules - {rules_hash} - ") #remove old rule set files for rulesetfp in glob.glob(os.path.join(globals.g_yara_rules_dir,".YARA_RULES_*")): os.remove(rulesetfp) else: + rulelogger.info(f"Loaded compiled rule set from disk at {compiled_rules_filepath}") new_rules_object = yara.load(compiled_rules_filepath) compiled_rules_lock.release_read() compiled_rules_lock.acquire_write() if not(rules_already_exist): + rulelogger.info(f"Saved ruleset to disk {compiled_rules_filepath}") new_rules_object.save(compiled_rules_filepath) compiled_yara_rules = new_rules_object compiled_rules_hash = rules_hash From a559582d3b2f2538945c4df98bbcd40eea8be5bd Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Thu, 5 Dec 2019 18:02:31 -0500 Subject: [PATCH 171/257] Static analysis and polish work --- requirements.txt | 1 + src/main.py | 14 ++++++++------ src/tasks.py | 24 ++++++++++++------------ test/test_configInit.py | 2 +- 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3d97721..265d4b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,5 @@ python-daemon==2.2.4 # Oct 27, 2019 redis==3.3.11 # Oct 13, 2019 requests==2.22.0 # May 16, 2019 simplejson==3.16.0 # Jun 28, 2018 +urllib3==1.25.7 yara-python==3.11.0 # Oct 10, 2019 diff --git a/src/main.py b/src/main.py index abc5bfc..315743d 100644 --- a/src/main.py +++ b/src/main.py @@ -67,7 +67,8 @@ def promise_worker(exit_event: Event, scanning_promise_queue: Queue, scanning_re scanning_results_queue.put(result) except Empty: exit_event.wait(1) - except: + except Exception as err: + logger.debug(f"Exception in wait: {err}") exit_event.wait(0.1) else: exit_event.wait(1) @@ -162,7 +163,7 @@ def generate_feed_from_db() -> None: feedinfo = CbFeedInfo(**feedinfo) feed = CbFeed(feedinfo, reports) - #logger.debug("Writing out feed '{0}' to disk".format(feedinfo.data["name"])) + # logger.debug("Writing out feed '{0}' to disk".format(feedinfo.data["name"])) with open(globals.g_output_file, "w") as fp: fp.write(feed.dump()) @@ -228,7 +229,8 @@ def save_results(analysis_results: List[AnalysisResult]) -> None: :param analysis_results: list of current analysis results """ - logger.debug(f"Saving {len(list(filter(lambda ar: not(ar.binary_not_available) , analysis_results)))} analysis results...") + logger.debug( + f"Saving {len(list(filter(lambda ar: not ar.binary_not_available, analysis_results)))} analysis results...") for analysis_result in analysis_results: save_result(analysis_result) @@ -356,7 +358,7 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue) -> None: if seconds_since_start >= globals.g_utility_interval * 60 if not globals.g_utility_debug else 1: execute_script() - logger.info(f"Queued {len(md5_hashes)} binaries for analysis") + logger.info(f"Queued {len(md5_hashes)} binaries for analysis") logger.debug("Exiting database sweep routine") @@ -368,7 +370,7 @@ def _check_hash_against_feed(md5_hash: str) -> bool: :return: True if the hash needs to be added """ query = BinaryDetonationResult.select().where(BinaryDetonationResult.md5 == md5_hash) - #logger.debug(f"Hash = {md5_hash} exists = {query.exists()}") + # logger.debug(f"Hash = {md5_hash} exists = {query.exists()}") return not query.exists() @@ -469,7 +471,7 @@ def run_to_exit_signal(exit_event: Event) -> None: Wait-until-exit polling loop function. :param exit_event: the event handler """ - while not(exit_event.is_set()): + while not (exit_event.is_set()): exit_event.wait(30.0) numbins = BinaryDetonationResult.select().count() logger.info(f"Analyzed {numbins} binaries so far ... ") diff --git a/src/tasks.py b/src/tasks.py index 3883e2a..5e8bf6c 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -2,6 +2,7 @@ # Copyright © 2014-2019 VMware, Inc. All Rights Reserved. import datetime +import glob import hashlib import io import logging @@ -12,20 +13,18 @@ from typing import List import requests - +import urllib3 # noinspection PyPackageRequirements import yara from celery import bootsteps, group +from celery.utils.log import get_task_logger import globals from analysis_result import AnalysisResult from celery_app import app -from celery.utils.log import get_task_logger from config_handling import ConfigurationInit from rule_handling import generate_yara_rule_map_hash -import glob -import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) logger = get_task_logger(__name__) @@ -34,6 +33,7 @@ rulelogger = logging.getLogger("yaraworker") rulelogger.setLevel(logging.INFO) + # ----- Lock Object Class ------------------------------------------------------------ @@ -184,19 +184,19 @@ def update_yara_rules(): ) logger.debug("yara rule path is {0}".format(compiled_rules_filepath)) - rules_already_exist = os.path.exists(compiled_rules_filepath) - if not(rules_already_exist): + rules_already_exist = os.path.exists(compiled_rules_filepath) + if not rules_already_exist: new_rules_object = yara.compile(filepaths=yara_rule_map) rulelogger.info(f"Compiled new set of yara-rules - {rules_hash} - ") - #remove old rule set files - for rulesetfp in glob.glob(os.path.join(globals.g_yara_rules_dir,".YARA_RULES_*")): + # remove old rule set files + for rulesetfp in glob.glob(os.path.join(globals.g_yara_rules_dir, ".YARA_RULES_*")): os.remove(rulesetfp) else: rulelogger.info(f"Loaded compiled rule set from disk at {compiled_rules_filepath}") new_rules_object = yara.load(compiled_rules_filepath) compiled_rules_lock.release_read() compiled_rules_lock.acquire_write() - if not(rules_already_exist): + if not rules_already_exist: rulelogger.info(f"Saved ruleset to disk {compiled_rules_filepath}") new_rules_object.save(compiled_rules_filepath) compiled_yara_rules = new_rules_object @@ -204,7 +204,7 @@ def update_yara_rules(): logger.debug("Succesfully updated yara rules") compiled_rules_lock.release_write() compiled_rules_lock.acquire_read() - #logger.debug("Exiting update routine ok") + # logger.debug("Exiting update routine ok") def get_binary_by_hash(url: str, hsum: str, token: str): @@ -299,8 +299,8 @@ def analyze_binary(md5sum: str) -> AnalysisResult: analysis_result.last_error_msg = f"Yara exception: {err}" except Exception as err: analysis_result.last_error_msg = ( - f"Other exception while matching rules: {err}\n" - + traceback.format_exc() + f"Other exception while matching rules: {err}\n" + + traceback.format_exc() ) finally: compiled_rules_lock.release_read() diff --git a/test/test_configInit.py b/test/test_configInit.py index 015fa7f..40132d7 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -140,7 +140,7 @@ def test_00b_validate_config_worker(self): Ensure our base configuration is valid for worker types. """ ConfigurationInit(TESTCONF) - self.assertEquals("", globals.g_output_file) + self.assertEqual("", globals.g_output_file) self.assertFalse(globals.g_remote) def test_01a_missing_config(self): From af147c8e2b6fefad292ec9f4e40cb3567cb05cf9 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 26 Dec 2019 15:15:35 -0500 Subject: [PATCH 172/257] Updates for intermittent results_backend problems in redis-client --- src/main.py | 185 ++++++++++++---------------------------------------- 1 file changed, 40 insertions(+), 145 deletions(-) diff --git a/src/main.py b/src/main.py index 315743d..fce58bc 100644 --- a/src/main.py +++ b/src/main.py @@ -45,26 +45,32 @@ celery_logger = logging.getLogger("celery.app.trace") celery_logger.setLevel(logging.CRITICAL) -# number of promise worker threads to use -PROMISE_THREADS = 2 - - -def promise_worker(exit_event: Event, scanning_promise_queue: Queue, scanning_results_queue: Queue) -> None: +def analysis_worker(exit_event: Event, hash_queue: Queue, scanning_results_queue: Queue) -> None: """ The promise worker scanning function. :param exit_event: event signaller - :param scanning_promise_queue: the promises queue + :param hash_queue :param scanning_results_queue: the results queue """ try: - while not (exit_event.is_set()) or not (scanning_promise_queue.empty()): - if not (scanning_promise_queue.empty()): + while not (exit_event.is_set()) or not (hash_queue.empty()): + if not (hash_queue.empty()): try: - promise = scanning_promise_queue.get(timeout=1.0) - result = promise.get(disable_sync_subtasks=False) - scanning_promise_queue.task_done() - scanning_results_queue.put(result) + exit_set = False + md5_hashes = hash_queue.get() + promise = analyze_binary.chunks([(mh,) for mh in md5_hashes], globals.g_max_hashes).apply_async() + for prom in promise.children: + exit_set = exit_event.is_set() + if exit_set: + break + results = prom.get(disable_sync_subtasks=False) + scanning_results_queue.put(results) + if not exit_set: + promise.get(disable_sync_subtasks=False, timeout=1) + else: + promise.forget(disable_sync_subtasks=False, timeout=1) + hash_queue.task_done() except Empty: exit_event.wait(1) except Exception as err: @@ -73,38 +79,8 @@ def promise_worker(exit_event: Event, scanning_promise_queue: Queue, scanning_re else: exit_event.wait(1) finally: - exit_event.set() - - logger.debug("PROMISE WORKING EXITING") - - -# NOTE: function retained for possible future need. -# noinspection PyUnusedFunction -def results_worker(exit_event: Event, results_queue: Queue) -> None: - """ - Sqlite is not meant to be thread-safe. - - This single-worker-thread writes the result(s) to the configured sqlite file to hold the feed-metadata and - seen binaries/results from scans. - - :param exit_event: event signaller - :param results_queue: the results queue - """ - try: - while not (exit_event.is_set()) or not (results_queue.empty()): - if not (results_queue.empty()): - try: - result = results_queue.get() - save_results_with_logging(result) - results_queue.task_done() - except Empty: - exit_event.wait(1) - else: - exit_event.wait(1) - finally: - exit_event.set() - - logger.debug("Results worker thread exiting") + hash_queue.task_done() + logger.debug("ANALYSIS WORKER EXITING") def results_worker_chunked(exit_event: Event, results_queue: Queue) -> None: @@ -127,9 +103,7 @@ def results_worker_chunked(exit_event: Event, results_queue: Queue) -> None: else: exit_event.wait(1) finally: - exit_event.set() - - logger.debug("Results worker thread exiting") + logger.debug(f"Results worker thread exiting {exit_event.is_set()}") def generate_feed_from_db() -> None: @@ -189,40 +163,6 @@ def generate_rule_map_remote(yara_rule_path: str) -> None: time.sleep(0.1) -def analyze_binary_and_queue(scanning_promise_queue: Queue, md5sum: str) -> None: - """ - Analyze Binary for a given md5 and save any promises. - :param scanning_promise_queue: the promises queue - :param md5sum: md5 hash to look for - """ - promise = analyze_binary.delay(md5sum) - scanning_promise_queue.put(promise) - - -# NOTE: function retained for possible future need. -# noinspection PyUnusedFunction -def analyze_binaries_and_queue(scanning_promise_queue: Queue, md5_hashes: List[str]) -> None: - """ - Analyze each binary and enqueue. - :param scanning_promise_queue: the promise queue - :param md5_hashes: list of md5 hashes to scan - """ - for md5 in md5_hashes: - analyze_binary_and_queue(scanning_promise_queue, md5) - - -def analyze_binaries_and_queue_chunked(scanning_promise_queue: Queue, md5_hashes: Iterator) -> None: - """ - Attempts to do work in parrallelized chunks of MAX_HASHES grouped. - - :param scanning_promise_queue: the promise queue - :param md5_hashes: list of md5 hases - """ - promise = analyze_binary.chunks([(mh,) for mh in md5_hashes], globals.g_max_hashes).apply_async() - for prom in promise.children: - scanning_promise_queue.put(prom) - - def save_results(analysis_results: List[AnalysisResult]) -> None: """ Save the current set of analysis results. @@ -322,7 +262,7 @@ def execute_script() -> None: logger.info("---------------------------------------- Utility script completed -----\n") -def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue) -> None: +def perform(yara_rule_dir: str, conn, hash_queue: Queue) -> None: """ Main routine - checks the cbr modulestore/storfiles table for new hashes by comparing the sliding-window with the contents of the feed database on disk. @@ -350,7 +290,8 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue) -> None: logger.info(f"Enumerating modulestore...found {len(rows)} resident binaries") md5_hashes = list(filter(_check_hash_against_feed, (row[0].hex() for row in rows))) - analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) + hash_queue.put(md5_hashes) + #analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) # if gathering and analysis took longer than out utility script interval windo, kick it off if globals.g_utility_interval > 0: @@ -358,7 +299,7 @@ def perform(yara_rule_dir: str, conn, scanning_promises_queue: Queue) -> None: if seconds_since_start >= globals.g_utility_interval * 60 if not globals.g_utility_debug else 1: execute_script() - logger.info(f"Queued {len(md5_hashes)} binaries for analysis") + logger.info(f"Queued {len(md5_hashes)} new binaries for analysis") logger.debug("Exiting database sweep routine") @@ -389,52 +330,6 @@ def save_results_with_logging(analysis_results: List[AnalysisResult]) -> None: logger.error(analysis_result.last_error_msg) save_results(analysis_results) - -# NOTE: function retained for possible future need. -# noinspection PyUnusedFunction -def save_and_log(analysis_results: List[AnalysisResult], start_time: float, num_binaries_skipped: int, - num_total_binaries: int) -> None: - """ - Save and log analysis results. - - :param analysis_results: list of analysis results - :param start_time: starting time (seconds) - :param num_binaries_skipped: number of skipped binaries - :param num_total_binaries: total binary count - """ - logger.debug(analysis_results) - if analysis_results: - for analysis_result in analysis_results: - logger.debug(( - f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" - f" {analysis_result.long_result} {analysis_result.last_error_msg}")) - - if analysis_result.last_error_msg: - logger.error(analysis_result.last_error_msg) - save_results(analysis_results) - - _rule_logging(start_time, num_binaries_skipped, num_total_binaries) - - -def _rule_logging(start_time: float, num_binaries_skipped: int, num_total_binaries: int) -> None: - """ - Simple method to log yara work. - - :param start_time: starting time (seconds) - :param num_binaries_skipped: number of skipped binaries - :param num_total_binaries: total binary count - """ - elapsed_time = time.time() - start_time - logger.info("elapsed time: {0}".format(humanfriendly.format_timespan(elapsed_time))) - logger.debug(" number binaries scanned: {0}".format(globals.g_num_binaries_analyzed)) - logger.debug(" number binaries already scanned: {0}".format(num_binaries_skipped)) - logger.debug(" number binaries unavailable: {0}".format(globals.g_num_binaries_not_available)) - logger.info("total binaries from db: {0}".format(num_total_binaries)) - logger.debug(" binaries per second: {0}:".format(round(num_total_binaries / elapsed_time, 2))) - logger.info("num binaries score greater than zero: {0}\n".format( - len(BinaryDetonationResult.select().where(BinaryDetonationResult.score > 0)))) - - def get_log_file_handles(use_logger) -> List: """ Get a list of filehandle numbers from logger to be handed to DaemonContext.files_preserve. @@ -501,7 +396,7 @@ def wait_all_worker_exit() -> None: """ Await the exit of our worker threads. """ - threadcount = PROMISE_THREADS + threadcount = 2 while threadcount > 1: threads = list( filter( @@ -519,7 +414,7 @@ def wait_all_worker_exit() -> None: logger.debug("Main thread going to exit...") -def start_workers(exit_event: Event, scanning_promises_queue: Queue, scanning_results_queue: Queue, +def start_workers(exit_event: Event, hash_queue: Queue, scanning_results_queue: Queue, run_only_once=False) -> None: """ Starts worker-threads (not celery workers). Worker threads do work until they get the exit_event signal @@ -529,15 +424,14 @@ def start_workers(exit_event: Event, scanning_promises_queue: Queue, scanning_re :param run_only_once: if True, run once an exit (default False) """ logger.debug("Starting perf thread") - perf_thread = DatabaseScanningThread(globals.g_scanning_interval, scanning_promises_queue, scanning_results_queue, + perf_thread = DatabaseScanningThread(globals.g_scanning_interval, hash_queue, scanning_results_queue, exit_event, run_only_once) perf_thread.start() - logger.debug("Starting promise thread(s)") - for _ in range(PROMISE_THREADS): - promise_worker_thread = Thread(target=promise_worker, - args=(exit_event, scanning_promises_queue, scanning_results_queue)) - promise_worker_thread.start() + logger.debug("Starting analysis thread") + analysis_worker_thread = Thread(target=analysis_worker, + args=(exit_event, hash_queue, scanning_results_queue)) + analysis_worker_thread.start() logger.debug("Starting results saver thread") results_worker_thread = Thread(target=results_worker_chunked, args=(exit_event, scanning_results_queue)) @@ -551,7 +445,7 @@ class DatabaseScanningThread(Thread): by the signal handler """ - def __init__(self, interval: int, scanning_promises_queue: Queue, scanning_results_queue: Queue, exit_event: Event, + def __init__(self, interval: int, hash_queue: Queue, scanning_results_queue: Queue, exit_event: Event, run_only_once: bool, *args, **kwargs): """ Create a new database scanning object. @@ -571,7 +465,7 @@ def __init__(self, interval: int, scanning_promises_queue: Queue, scanning_resul self.exit_event = exit_event self._conn = get_database_conn() self._interval = interval - self._scanning_promises_queue = scanning_promises_queue + self._hash_queue = hash_queue self._scanning_results_queue = scanning_results_queue self._run_only_once = run_only_once if not self._run_only_once: @@ -584,7 +478,7 @@ def scan_once_and_exit(self) -> None: Perform a database scan one, then exit. """ self.do_db_scan() - self._scanning_promises_queue.join() + self._hash_queue.join() self._scanning_results_queue.join() self.exit_event.set() @@ -609,7 +503,8 @@ def do_db_scan(self): """ logger.debug("START database sweep") try: - perform(globals.g_yara_rules_dir, self._conn, self._scanning_promises_queue) + perform(globals.g_yara_rules_dir, self._conn, self._hash_queue) + except Exception as err: logger.exception(f"Something went wrong sweeping the CbR module store: {err} ") @@ -749,7 +644,7 @@ def main(): else: # Doing a real run # Exit condition and queues for doing work exit_event = Event() - scanning_promise_queue = Queue() + hash_queue = Queue() scanning_results_queue = Queue() # Lock file so this process is a singleton lock_file = lockfile.FileLock(args.lock_file) @@ -787,7 +682,7 @@ def main(): init_local_resources() # start working threads - start_workers(exit_event, scanning_promise_queue, scanning_results_queue) + start_workers(exit_event, hash_queue, scanning_results_queue) # start local celeryD worker if working mode is local if not globals.g_remote: @@ -805,7 +700,7 @@ def main(): init_local_resources() # start necessary worker threads - start_workers(exit_event, scanning_promise_queue, scanning_results_queue, run_only_once=True) + start_workers(exit_event, hash_queue, scanning_results_queue, run_only_once=True) # Start a celery worker if we need one if not globals.g_remote: From c855a801361f6ab9ecf432c8be54911e79936539 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 27 Dec 2019 13:27:16 -0500 Subject: [PATCH 173/257] logic touchup and pyinstaller fixes --- cb-yara-connector.spec | 5 +---- src/main.py | 5 ++--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/cb-yara-connector.spec b/cb-yara-connector.spec index 677fee9..6d3e091 100644 --- a/cb-yara-connector.spec +++ b/cb-yara-connector.spec @@ -8,10 +8,7 @@ block_cipher = None a = Analysis(['src/main.py'], pathex=['./src'], binaries=[], - hiddenimports=['lockfile','celery.apps','celery.apps.worker','celery.app.log','celery.fixups', 'celery.fixups.django', 'celery.loaders.app', - 'celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends', - 'celery.backends.redis', 'celery.app.events', 'celery.events','celery.redis', - 'kombu.transport.pyamqp'], + hiddenimports=['lockfile','celery.app.control','celery.worker.strategy','celery.worker.consumer','celery.events.state','celery.worker.autoscale','celery.worker.components','celery.concurrency.prefork','celery.apps','celery.apps.worker','celery.app.log','celery.fixups', 'celery.fixups.django', 'celery.loaders.app','celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends','celery.backends.redis', 'celery.app.events', 'celery.events','celery.redis','kombu.transport.pyamqp'], hookspath=[], runtime_hooks=[], excludes=[], diff --git a/src/main.py b/src/main.py index fce58bc..62d40b4 100644 --- a/src/main.py +++ b/src/main.py @@ -69,7 +69,7 @@ def analysis_worker(exit_event: Event, hash_queue: Queue, scanning_results_queue if not exit_set: promise.get(disable_sync_subtasks=False, timeout=1) else: - promise.forget(disable_sync_subtasks=False, timeout=1) + promise.forget(timeout=1) hash_queue.task_done() except Empty: exit_event.wait(1) @@ -79,8 +79,7 @@ def analysis_worker(exit_event: Event, hash_queue: Queue, scanning_results_queue else: exit_event.wait(1) finally: - hash_queue.task_done() - logger.debug("ANALYSIS WORKER EXITING") + logger.debug("ANALYSIS WORKER EXITING") def results_worker_chunked(exit_event: Event, results_queue: Queue) -> None: From d06330ed0f905c96ca1d752f3bbf4c94ce84f2df Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 27 Dec 2019 13:56:08 -0500 Subject: [PATCH 174/257] Fixing up rpm build spec --- cb-yara-connector.rpm.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index c7c0dd2..a59765c 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -9,7 +9,7 @@ Requires: redis Carbon Black Yara Agent - Scans binaries with configured yara rules %build -cd {%_sourcedir} ; pyinstaller cb-yara-connector.spec +cd %_sourcedir ; pyinstaller cb-yara-connector.spec %install mkdir -p ${RPM_BUILD_ROOT}/var/log/cb/integrations/cb-yara-connector @@ -18,6 +18,6 @@ mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector mkdir -p ${RPM_BUILD_ROOT}/etc/init cp example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example install -m 0755 init-scripts/yaraconnector.conf ${RPM_BUILD_ROOT}/etc/init/yaraconnector.conf -install -m 0755 dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/yaraconnector +install -m 0755 ${RPM_SOURCE_DIR}/dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/ %files -f MANIFEST From 92e8376b2bacd191b0108e5201574d2afb61685a Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 27 Dec 2019 16:24:01 -0500 Subject: [PATCH 175/257] Allow users to fully configure celery worker options at config-time --- src/config_handling.py | 5 ++++ src/globals.py | 2 ++ src/main.py | 64 +++++++++++++++++++++++++++++++----------- 3 files changed, 55 insertions(+), 16 deletions(-) diff --git a/src/config_handling.py b/src/config_handling.py index 7e45413..7bcfba6 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -41,6 +41,7 @@ "worker_network_timeout", "worker_type", "yara_rules_dir", + "celery_worker_kwargs" ] @@ -120,6 +121,10 @@ def _worker_check(self) -> None: globals.g_worker_network_timeout = self._as_int("worker_network_timeout", default=globals.g_worker_network_timeout) + celeryworkerkwargs = self.the_config.get("celery_worker_kwargs", None) + if celeryworkerkwargs and len(celeryworkerkwargs) > 0 : + globals.g_celeryworkerkwargs = celeryworkerkwargs + def _extended_check(self) -> None: """ Validate entries used by the main process. diff --git a/src/globals.py b/src/globals.py index 8aee961..6475b04 100644 --- a/src/globals.py +++ b/src/globals.py @@ -42,3 +42,5 @@ g_utility_debug = False # dev use only, reduces interval from minutes to seconds! g_worker_network_timeout = 5 + +g_celeryworkerkwargs = None diff --git a/src/main.py b/src/main.py index 62d40b4..0afac65 100644 --- a/src/main.py +++ b/src/main.py @@ -11,6 +11,7 @@ import sys import threading import time +import psutil from datetime import datetime, timedelta from functools import partial from queue import Empty, Queue @@ -45,6 +46,7 @@ celery_logger = logging.getLogger("celery.app.trace") celery_logger.setLevel(logging.CRITICAL) + def analysis_worker(exit_event: Event, hash_queue: Queue, scanning_results_queue: Queue) -> None: """ The promise worker scanning function. @@ -54,7 +56,7 @@ def analysis_worker(exit_event: Event, hash_queue: Queue, scanning_results_queue :param scanning_results_queue: the results queue """ try: - while not (exit_event.is_set()) or not (hash_queue.empty()): + while not (exit_event.is_set()): if not (hash_queue.empty()): try: exit_set = False @@ -69,7 +71,7 @@ def analysis_worker(exit_event: Event, hash_queue: Queue, scanning_results_queue if not exit_set: promise.get(disable_sync_subtasks=False, timeout=1) else: - promise.forget(timeout=1) + promise.forget() hash_queue.task_done() except Empty: exit_event.wait(1) @@ -79,7 +81,7 @@ def analysis_worker(exit_event: Event, hash_queue: Queue, scanning_results_queue else: exit_event.wait(1) finally: - logger.debug("ANALYSIS WORKER EXITING") + logger.debug(f"ANALYSIS WORKER EXITING {exit_event.is_set()}") def results_worker_chunked(exit_event: Event, results_queue: Queue) -> None: @@ -91,7 +93,7 @@ def results_worker_chunked(exit_event: Event, results_queue: Queue) -> None: :return: """ try: - while not (exit_event.is_set()) or not (results_queue.empty()): + while not (exit_event.is_set()): if not (results_queue.empty()): try: results = results_queue.get() @@ -527,7 +529,7 @@ def run(self): # -def start_celery_worker_thread(config_file: str) -> None: +def start_celery_worker_thread(worker, workerkwargs=None, config_file=None ) -> None: """ Start celery worker in a daemon-thread. @@ -535,20 +537,41 @@ def start_celery_worker_thread(config_file: str) -> None: :param config_file: path to the yara configuration file :return: """ - t = Thread(target=launch_celery_worker, kwargs={"config_file": config_file}) + t = Thread(target=launch_celery_worker, kwargs={"worker": worker , "workerkwargs": workerkwargs, "config_file": config_file}) t.daemon = True t.start() -def launch_celery_worker(config_file: str = None) -> None: +def launch_celery_worker(worker, workerkwargs=None,config_file: str = None) -> None: """ Launch a celery worker using the imported app context :param config_file: optional path to a configuration file """ - localworker = worker.worker(app=app) - localworker.run(loglevel=logging.ERROR, config_file=config_file) + logger.debug(f"Celery worker args are {workerkwargs} ") + if workerkwargs is None: + worker.run(loglevel=logging.ERROR, config_file=config_file, pidfile='/tmp/yaraconnectorceleryworker') + else: + worker.run(loglevel=logging.ERROR, config_file=config_file, pidfile='/tmp/yaraconnectorceleryworker', **workerkwargs) logger.debug("CELERY WORKER LAUNCHING THREAD EXITED") +def terminate_celery_worker(worker=None): + """ Attempt to use the pidfile to gracefully terminate celery workers if they exist + if the worker hasn't terminated gracefully after 5 seconds, kill it using the .die() command + """ + with open('/tmp/yaraconnectorceleryworker') as cworkerpidfile: + worker_pid = int(cworkerpidfile.readline()) + parent = psutil.Process(worker_pid) + children = parent.children(recursive=True) + for child in children: + logger.debug(f"Sending term sig to celery worker child - {worker_pid}") + os.kill(child.pid,signal.SIGTERM) + logger.debug(f"Sending term sig to celery worker - {worker_pid}") + os.kill(worker_pid, signal.SIGTERM) + + time.sleep(5.0) + if worker: + worker.die() + ################################################################################ # Main entrypoint @@ -647,6 +670,10 @@ def main(): scanning_results_queue = Queue() # Lock file so this process is a singleton lock_file = lockfile.FileLock(args.lock_file) + localworker = None + workerkwargs = json.loads(globals.g_celeryworkerkwargs) if globals.g_celeryworkerkwargs is not None else None + if len(workerkwargs) == 0: + workerkwargs = None try: if not args.run_once: # Running as a deamon @@ -685,14 +712,17 @@ def main(): # start local celeryD worker if working mode is local if not globals.g_remote: - start_celery_worker_thread(args.config_file) + localworker = worker.worker(app=app) + start_celery_worker_thread(localworker, workerkwargs, args.config_file) else: # otherwise, we must start a celeryD worker since we are not the master - start_celery_worker_thread(args.config_file) + localworker = worker.worker(app=app) + start_celery_worker_thread(localworker, workerkwargs, args.config_file) # run until the service/daemon gets a quitting sig run_to_exit_signal(exit_event) wait_all_worker_exit() + terminate_celery_worker(localworker) logger.info("Yara connector shutdown OK") else: # Just do one batch # init local resources @@ -703,18 +733,20 @@ def main(): # Start a celery worker if we need one if not globals.g_remote: - start_celery_worker_thread(args.config_file) + localworker = worker.worker(app=app) + start_celery_worker_thread(localworker, workerkwargs, args.config_file) run_to_exit_signal(exit_event) wait_all_worker_exit() + terminate_celery_worker(localworker) except KeyboardInterrupt: logger.info("\n\n##### Interupted by User!\n") - exit_event.set() - sys.exit(3) except Exception as err: logger.error(f"There were errors executing yara rules: {err}") + finally: exit_event.set() - sys.exit(4) + #wait_all_worker_exit() + terminate_celery_worker(localworker) if __name__ == "__main__": - main() + main() \ No newline at end of file From 79e2613e8e99b2eb3dc44b70736e0dc3c9365e6e Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 30 Dec 2019 16:45:49 -0500 Subject: [PATCH 176/257] updates for cenots7 rpm build --- MANIFEST | 5 ++++- cb-yara-connector.rpm.spec | 6 +++++- example-conf/yara.conf | 2 +- makefile | 3 ++- src/config_handling.py | 6 +++--- src/main.py | 2 +- 6 files changed, 16 insertions(+), 8 deletions(-) diff --git a/MANIFEST b/MANIFEST index 9cecb1b..2ec3940 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,5 +1,8 @@ /usr/share/cb/integrations/cb-yara-connector/yaraconnector %dir /usr/share/cb/integrations/cb-yara-connector %dir /var/log/cb/integrations/cb-yara-connector -/etc/init/yaraconnector.conf +/var/log/cb/integrations/cb-yara-connector/yaraconnector.log +#/etc/init/yaraconnector.conf +/etc/systemd/system/cb-yara-connector.service +/etc/cb/integrations/cb-yara-connector/yara_rules /etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example \ No newline at end of file diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index a59765c..f392713 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -15,9 +15,13 @@ cd %_sourcedir ; pyinstaller cb-yara-connector.spec mkdir -p ${RPM_BUILD_ROOT}/var/log/cb/integrations/cb-yara-connector mkdir -p ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector +mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yara_rules mkdir -p ${RPM_BUILD_ROOT}/etc/init +mkdir -p ${RPM_BUILD_ROOT}/etc/systemd/system cp example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example -install -m 0755 init-scripts/yaraconnector.conf ${RPM_BUILD_ROOT}/etc/init/yaraconnector.conf +install -m 0644 cb-yara-connector.service ${RPM_BUILD_ROOT}/etc/systemd/system/cb-yara-connector.service +#install -m 0755 init-scripts/yaraconnector.conf ${RPM_BUILD_ROOT}/etc/init/yaraconnector.conf install -m 0755 ${RPM_SOURCE_DIR}/dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/ %files -f MANIFEST +%config /etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example diff --git a/example-conf/yara.conf b/example-conf/yara.conf index e94145e..5e7f4e0 100644 --- a/example-conf/yara.conf +++ b/example-conf/yara.conf @@ -17,7 +17,7 @@ mode=master ; ; path to directory containing yara rules ; -yara_rules_dir= +yara_rules_dir=/etc/cb/integrations/cb-yara-connector/yara_rules ; ; Cb Response postgres Database settings diff --git a/makefile b/makefile index f97274a..2da5178 100644 --- a/makefile +++ b/makefile @@ -17,5 +17,6 @@ rpm: cp -rp init-scripts/* ${BUILDDIR}/init-scripts cp example-conf/yara.conf ${BUILDDIR}/example-conf/yara.conf cp MANIFEST ${BUILDDIR} + cp cb-yara-connector.service ${BUILDDIR} cp cb-yara-connector.spec ${SOURCEDIR}/cb-yara-connector.spec - rpmbuild -ba cb-yara-connector.rpm.spec \ No newline at end of file + rpmbuild -ba cb-yara-connector.rpm.spec diff --git a/src/config_handling.py b/src/config_handling.py index 7bcfba6..35fc2f1 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -159,9 +159,9 @@ def _extended_check(self) -> None: default=globals.g_utility_script) logger.warning(f"{self.source} utility script '{globals.g_utility_script}' is enabled; " + "use this advanced feature at your own discretion!") - else: - if self._as_path("utility_script", required=False, default=globals.g_utility_script): - logger.debug(f"{self.source} has 'utility_script' defined, but it is disabled") + #else: + # if self._as_path("utility_script", required=False, default=globals.g_utility_script): + # logger.debug(f"{self.source} has 'utility_script' defined, but it is disabled") # developer use only globals.g_utility_debug = self._as_bool("utility_debug", default=False) diff --git a/src/main.py b/src/main.py index 0afac65..c0d7a67 100644 --- a/src/main.py +++ b/src/main.py @@ -672,7 +672,7 @@ def main(): lock_file = lockfile.FileLock(args.lock_file) localworker = None workerkwargs = json.loads(globals.g_celeryworkerkwargs) if globals.g_celeryworkerkwargs is not None else None - if len(workerkwargs) == 0: + if workerkwargs and len(workerkwargs) == 0: workerkwargs = None try: From 28a2d2d924c0deb26bbd1a875a9fe237c1045a25 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Thu, 2 Jan 2020 14:37:00 -0500 Subject: [PATCH 177/257] Static analysis fixes --- requirements.txt | 1 + src/config_handling.py | 6 ++--- src/main.py | 56 ++++++++++++++++++++++++----------------- test/test_configInit.py | 7 +++--- 4 files changed, 40 insertions(+), 30 deletions(-) diff --git a/requirements.txt b/requirements.txt index 265d4b6..7fba336 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,3 +14,4 @@ requests==2.22.0 # May 16, 2019 simplejson==3.16.0 # Jun 28, 2018 urllib3==1.25.7 yara-python==3.11.0 # Oct 10, 2019 +psutil==5.6.7 diff --git a/src/config_handling.py b/src/config_handling.py index 35fc2f1..6a140b6 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -122,8 +122,8 @@ def _worker_check(self) -> None: default=globals.g_worker_network_timeout) celeryworkerkwargs = self.the_config.get("celery_worker_kwargs", None) - if celeryworkerkwargs and len(celeryworkerkwargs) > 0 : - globals.g_celeryworkerkwargs = celeryworkerkwargs + if celeryworkerkwargs and len(celeryworkerkwargs) > 0: + globals.g_celeryworkerkwargs = celeryworkerkwargs def _extended_check(self) -> None: """ @@ -159,7 +159,7 @@ def _extended_check(self) -> None: default=globals.g_utility_script) logger.warning(f"{self.source} utility script '{globals.g_utility_script}' is enabled; " + "use this advanced feature at your own discretion!") - #else: + # else: # if self._as_path("utility_script", required=False, default=globals.g_utility_script): # logger.debug(f"{self.source} has 'utility_script' defined, but it is disabled") diff --git a/src/main.py b/src/main.py index c0d7a67..bd49f37 100644 --- a/src/main.py +++ b/src/main.py @@ -11,15 +11,14 @@ import sys import threading import time -import psutil from datetime import datetime, timedelta from functools import partial from queue import Empty, Queue from threading import Event, Thread -from typing import Iterator, List +from typing import List -import humanfriendly import lockfile +import psutil import psycopg2 # noinspection PyPackageRequirements import yara @@ -68,7 +67,7 @@ def analysis_worker(exit_event: Event, hash_queue: Queue, scanning_results_queue break results = prom.get(disable_sync_subtasks=False) scanning_results_queue.put(results) - if not exit_set: + if not exit_set: promise.get(disable_sync_subtasks=False, timeout=1) else: promise.forget() @@ -270,7 +269,7 @@ def perform(yara_rule_dir: str, conn, hash_queue: Queue) -> None: :param yara_rule_dir: location of the rules directory :param conn: The postgres connection - :param scanning_promises_queue: the promises queue + :param hash_queue: the queue of hashes to handle """ if globals.g_remote: logger.info("Uploading yara rules to workers...") @@ -292,7 +291,7 @@ def perform(yara_rule_dir: str, conn, hash_queue: Queue) -> None: md5_hashes = list(filter(_check_hash_against_feed, (row[0].hex() for row in rows))) hash_queue.put(md5_hashes) - #analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) + # analyze_binaries_and_queue_chunked(scanning_promises_queue, md5_hashes) # if gathering and analysis took longer than out utility script interval windo, kick it off if globals.g_utility_interval > 0: @@ -316,6 +315,7 @@ def _check_hash_against_feed(md5_hash: str) -> bool: return not query.exists() +# FIXME: Unused def save_results_with_logging(analysis_results: List[AnalysisResult]) -> None: """ Save all analysis results, with extended logging. @@ -331,6 +331,7 @@ def save_results_with_logging(analysis_results: List[AnalysisResult]) -> None: logger.error(analysis_result.last_error_msg) save_results(analysis_results) + def get_log_file_handles(use_logger) -> List: """ Get a list of filehandle numbers from logger to be handed to DaemonContext.files_preserve. @@ -420,7 +421,7 @@ def start_workers(exit_event: Event, hash_queue: Queue, scanning_results_queue: """ Starts worker-threads (not celery workers). Worker threads do work until they get the exit_event signal :param exit_event: event signaller - :param scanning_promises_queue: promises queue + :param hash_queue: promises queue :param scanning_results_queue: results queue :param run_only_once: if True, run once an exit (default False) """ @@ -431,7 +432,7 @@ def start_workers(exit_event: Event, hash_queue: Queue, scanning_results_queue: logger.debug("Starting analysis thread") analysis_worker_thread = Thread(target=analysis_worker, - args=(exit_event, hash_queue, scanning_results_queue)) + args=(exit_event, hash_queue, scanning_results_queue)) analysis_worker_thread.start() logger.debug("Starting results saver thread") @@ -528,35 +529,44 @@ def run(self): self.exit_event.set() -# -def start_celery_worker_thread(worker, workerkwargs=None, config_file=None ) -> None: +def start_celery_worker_thread(worker_obj: worker.worker, workerkwargs: dict = None, config_file: str = None) -> None: """ Start celery worker in a daemon-thread. TODO: - Aggresive autoscaling config options + :param worker_obj: worker object + :param workerkwargs: dictionary of arguments :param config_file: path to the yara configuration file :return: """ - t = Thread(target=launch_celery_worker, kwargs={"worker": worker , "workerkwargs": workerkwargs, "config_file": config_file}) + t = Thread(target=launch_celery_worker, + kwargs={"worker": worker_obj, "workerkwargs": workerkwargs, "config_file": config_file}) t.daemon = True t.start() -def launch_celery_worker(worker, workerkwargs=None,config_file: str = None) -> None: +def launch_celery_worker(worker_obj: worker.worker, workerkwargs=None, config_file: str = None) -> None: """ Launch a celery worker using the imported app context + :param worker_obj: worker object + :param workerkwargs: dictionary of arguments :param config_file: optional path to a configuration file """ logger.debug(f"Celery worker args are {workerkwargs} ") if workerkwargs is None: - worker.run(loglevel=logging.ERROR, config_file=config_file, pidfile='/tmp/yaraconnectorceleryworker') + worker_obj.run(loglevel=logging.ERROR, config_file=config_file, pidfile='/tmp/yaraconnectorceleryworker') else: - worker.run(loglevel=logging.ERROR, config_file=config_file, pidfile='/tmp/yaraconnectorceleryworker', **workerkwargs) + worker_obj.run(loglevel=logging.ERROR, config_file=config_file, pidfile='/tmp/yaraconnectorceleryworker', + **workerkwargs) logger.debug("CELERY WORKER LAUNCHING THREAD EXITED") -def terminate_celery_worker(worker=None): - """ Attempt to use the pidfile to gracefully terminate celery workers if they exist - if the worker hasn't terminated gracefully after 5 seconds, kill it using the .die() command + +def terminate_celery_worker(worker_obj: worker.worker = None): + """ + Attempt to use the pidfile to gracefully terminate celery workers if they exist + if the worker hasn't terminated gracefully after 5 seconds, kill it using the .die() command + + :param worker_obj: worker object """ with open('/tmp/yaraconnectorceleryworker') as cworkerpidfile: worker_pid = int(cworkerpidfile.readline()) @@ -564,13 +574,13 @@ def terminate_celery_worker(worker=None): children = parent.children(recursive=True) for child in children: logger.debug(f"Sending term sig to celery worker child - {worker_pid}") - os.kill(child.pid,signal.SIGTERM) + os.kill(child.pid, signal.SIGTERM) logger.debug(f"Sending term sig to celery worker - {worker_pid}") os.kill(worker_pid, signal.SIGTERM) - time.sleep(5.0) - if worker: - worker.die() + time.sleep(5.0) + if worker_obj: + worker_obj.die("Worker terminated") ################################################################################ @@ -744,9 +754,9 @@ def main(): logger.error(f"There were errors executing yara rules: {err}") finally: exit_event.set() - #wait_all_worker_exit() + # wait_all_worker_exit() terminate_celery_worker(localworker) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/test/test_configInit.py b/test/test_configInit.py index 40132d7..606a086 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -179,12 +179,11 @@ def test_02b_section_header_invalid(self): def test_03a_mode_missing(self): """ - Ensure we detect a configuration file without a required 'mode' definition. + Ensure we detect a configuration file without a 'mode' definition (defaults to "master") """ self.mangle(change={"mode": None}) - with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(TESTCONF) - assert "has no 'mode' definition" in "{0}".format(err.exception.args[0]) + ConfigurationInit(TESTCONF) + self.assertEqual("master", globals.g_mode) def test_03b_mode_invalid(self): """ From a4e08500726c15f523cb3aa61ff2de785dcb8a61 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 3 Jan 2020 16:24:36 -0500 Subject: [PATCH 178/257] updates for rpmbuild --- MANIFEST | 5 +- cb-yara-connector.rpm.spec | 11 +- cb-yara-connector.service | 1 + makefile | 3 +- src/main.py | 231 ++++++++++++++++++++++++++++--------- 5 files changed, 193 insertions(+), 58 deletions(-) create mode 100644 cb-yara-connector.service diff --git a/MANIFEST b/MANIFEST index 2ec3940..8960ff1 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,8 +1,9 @@ /usr/share/cb/integrations/cb-yara-connector/yaraconnector +/usr/share/cb/integrations/cb-yara-connector/yara-logo.png %dir /usr/share/cb/integrations/cb-yara-connector %dir /var/log/cb/integrations/cb-yara-connector /var/log/cb/integrations/cb-yara-connector/yaraconnector.log -#/etc/init/yaraconnector.conf /etc/systemd/system/cb-yara-connector.service /etc/cb/integrations/cb-yara-connector/yara_rules -/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example \ No newline at end of file +/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example +/tmp/yaraconnectorceleryworker diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index f392713..79c28ab 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -1,6 +1,6 @@ Name: python-cb-yara-connector -Version: 2.0 -Release: 2 +Version: 2.1 +Release: 0 Summary: Carbon Black Yara Agent License: MIT Requires: redis @@ -18,10 +18,15 @@ mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yara_rules mkdir -p ${RPM_BUILD_ROOT}/etc/init mkdir -p ${RPM_BUILD_ROOT}/etc/systemd/system +mkdir -p ${RPM_BUILD_ROOT}/tmp +mkdir -p ${RPM_BUILD_ROOT}/var/run/ cp example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example install -m 0644 cb-yara-connector.service ${RPM_BUILD_ROOT}/etc/systemd/system/cb-yara-connector.service -#install -m 0755 init-scripts/yaraconnector.conf ${RPM_BUILD_ROOT}/etc/init/yaraconnector.conf install -m 0755 ${RPM_SOURCE_DIR}/dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/ +install ${RPM_SOURCE_DIR}/yara-logo.png ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/yara-logo.png +touch ${RPM_BUILD_ROOT}/var/log/cb/integrations/cb-yara-connector/yaraconnector.log +touch ${RPM_BUILD_ROOT}/tmp/yaraconnectorceleryworker +#touch ${RPM_BUILD_ROOT}/var/run/yaraconnector.pid %files -f MANIFEST %config /etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example diff --git a/cb-yara-connector.service b/cb-yara-connector.service new file mode 100644 index 0000000..393eb55 --- /dev/null +++ b/cb-yara-connector.service @@ -0,0 +1 @@ +StandardErr=/var/log/cb/integrations/cb-yara-connector/yaraconnector.log diff --git a/makefile b/makefile index 2da5178..7023bc3 100644 --- a/makefile +++ b/makefile @@ -1,5 +1,5 @@ SOURCEDIR = ~/rpmbuild/SOURCES -BUILDDIR = ~/rpmbuild/BUILD +BUILDDIR = ~/pmbuild/BUILD clean: rm -rf ${SOURCEDIR} @@ -12,6 +12,7 @@ rpm: mkdir -p ${BUILDDIR}/src mkdir -p ${BUILDDIR}/init-scripts mkdir -p ${BUILDDIR}/example-conf + cp yara-logo.png ${SOURCEDIR}/yara-logo.png cp -rp src/* ${SOURCEDIR}/src cp -rp src/* ${BUILDDIR}/src cp -rp init-scripts/* ${BUILDDIR}/init-scripts diff --git a/src/main.py b/src/main.py index bd49f37..27e6071 100644 --- a/src/main.py +++ b/src/main.py @@ -20,9 +20,11 @@ import lockfile import psutil import psycopg2 + # noinspection PyPackageRequirements import yara from celery.bin import worker + # noinspection PyPackageRequirements from daemon import daemon from peewee import SqliteDatabase @@ -46,7 +48,9 @@ celery_logger.setLevel(logging.CRITICAL) -def analysis_worker(exit_event: Event, hash_queue: Queue, scanning_results_queue: Queue) -> None: +def analysis_worker( + exit_event: Event, hash_queue: Queue, scanning_results_queue: Queue +) -> None: """ The promise worker scanning function. @@ -60,7 +64,9 @@ def analysis_worker(exit_event: Event, hash_queue: Queue, scanning_results_queue try: exit_set = False md5_hashes = hash_queue.get() - promise = analyze_binary.chunks([(mh,) for mh in md5_hashes], globals.g_max_hashes).apply_async() + promise = analyze_binary.chunks( + [(mh,) for mh in md5_hashes], globals.g_max_hashes + ).apply_async() for prom in promise.children: exit_set = exit_event.is_set() if exit_set: @@ -131,7 +137,7 @@ def generate_feed_from_db() -> None: "provider_url": "http://plusvic.github.io/yara/", "summary": "Scan binaries collected by Carbon Black with Yara.", "tech_data": "There are no requirements to share any data with Carbon Black to use this feed.", - "icon": "yara-logo.png", + "icon": "./yara-logo.png", "category": "Connectors", } feedinfo = CbFeedInfo(**feedinfo) @@ -170,7 +176,8 @@ def save_results(analysis_results: List[AnalysisResult]) -> None: :param analysis_results: list of current analysis results """ logger.debug( - f"Saving {len(list(filter(lambda ar: not ar.binary_not_available, analysis_results)))} analysis results...") + f"Saving {len(list(filter(lambda ar: not ar.binary_not_available, analysis_results)))} analysis results..." + ) for analysis_result in analysis_results: save_result(analysis_result) @@ -234,8 +241,8 @@ def get_binary_file_cursor(conn, start_date_binaries: datetime): # noinspection SqlDialectInspection,SqlNoDataSourceInspection query = ( - "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND " - + "timestamp >= '{0}' ORDER BY timestamp DESC".format(start_date_binaries) + "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND " + + "timestamp >= '{0}' ORDER BY timestamp DESC".format(start_date_binaries) ) logger.debug(query) @@ -248,7 +255,9 @@ def execute_script() -> None: """ Execute an external utility script. """ - logger.info("----- Executing utility script ----------------------------------------") + logger.info( + "----- Executing utility script ----------------------------------------" + ) prog = subprocess.Popen( globals.g_utility_script, shell=True, universal_newlines=True ) @@ -259,7 +268,9 @@ def execute_script() -> None: logger.error(stderr) if prog.returncode: logger.warning(f"program returned error code {prog.returncode}") - logger.info("---------------------------------------- Utility script completed -----\n") + logger.info( + "---------------------------------------- Utility script completed -----\n" + ) def perform(yara_rule_dir: str, conn, hash_queue: Queue) -> None: @@ -296,7 +307,11 @@ def perform(yara_rule_dir: str, conn, hash_queue: Queue) -> None: # if gathering and analysis took longer than out utility script interval windo, kick it off if globals.g_utility_interval > 0: seconds_since_start = (datetime.now() - utility_window_start).seconds - if seconds_since_start >= globals.g_utility_interval * 60 if not globals.g_utility_debug else 1: + if ( + seconds_since_start >= globals.g_utility_interval * 60 + if not globals.g_utility_debug + else 1 + ): execute_script() logger.info(f"Queued {len(md5_hashes)} new binaries for analysis") @@ -310,7 +325,9 @@ def _check_hash_against_feed(md5_hash: str) -> bool: :param md5_hash: md5 hash :return: True if the hash needs to be added """ - query = BinaryDetonationResult.select().where(BinaryDetonationResult.md5 == md5_hash) + query = BinaryDetonationResult.select().where( + BinaryDetonationResult.md5 == md5_hash + ) # logger.debug(f"Hash = {md5_hash} exists = {query.exists()}") return not query.exists() @@ -325,8 +342,12 @@ def save_results_with_logging(analysis_results: List[AnalysisResult]) -> None: logger.debug(analysis_results) if analysis_results: for analysis_result in analysis_results: - logger.debug((f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" - f" {analysis_result.long_result} {analysis_result.last_error_msg}")) + logger.debug( + ( + f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" + f" {analysis_result.long_result} {analysis_result.last_error_msg}" + ) + ) if analysis_result.last_error_msg: logger.error(analysis_result.last_error_msg) save_results(analysis_results) @@ -385,7 +406,9 @@ def init_local_resources() -> None: generate yara_rule_set metadata """ globals.g_yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) - generate_yara_rule_map_hash(globals.g_yara_rules_dir, return_list=False) # save to globals + generate_yara_rule_map_hash( + globals.g_yara_rules_dir, return_list=False + ) # save to globals database = SqliteDatabase(os.path.join(globals.g_feed_database_dir, "binary.db")) db.initialize(database) @@ -394,11 +417,32 @@ def init_local_resources() -> None: generate_feed_from_db() -def wait_all_worker_exit() -> None: +def wait_all_worker_exit_threads(threads, timeout=None): + """ return when all of the given threads + have exited (sans daemon threads) """ + living_threads_count = 2 + start = time.time() + while living_threads_count > 1: + living_threads_count = len( + list( + filter( + lambda t: t.isAlive() and not getattr(t, "daemon", True), threads + ) + ) + ) + time.sleep(0.1) + now = time.time() + elapsed = now - start + if timeout and elapsed >= timeout: + return + + +def wait_all_worker_exit(timeout=None) -> None: """ Await the exit of our worker threads. """ threadcount = 2 + start = time.time() while threadcount > 1: threads = list( filter( @@ -409,15 +453,28 @@ def wait_all_worker_exit() -> None: ) ) threadcount = len(threads) - logger.debug(f"Main thread Waiting on {threadcount} live worker-threads (exluding deamons)...") + logger.debug( + f"Main thread Waiting on {threadcount} live worker-threads (exluding deamons)..." + ) logger.debug(f"Live threads (excluding daemons): {threads}") time.sleep(0.1) + timenow = time.time() + elapsed = timenow - start + if timeout and elapsed >= timeout: + logger.debug( + f"Main thread exiting after workers failed to timetout in {timeout}" + ) + return logger.debug("Main thread going to exit...") -def start_workers(exit_event: Event, hash_queue: Queue, scanning_results_queue: Queue, - run_only_once=False) -> None: +def start_workers( + exit_event: Event, + hash_queue: Queue, + scanning_results_queue: Queue, + run_only_once=False, +) -> None: """ Starts worker-threads (not celery workers). Worker threads do work until they get the exit_event signal :param exit_event: event signaller @@ -426,19 +483,29 @@ def start_workers(exit_event: Event, hash_queue: Queue, scanning_results_queue: :param run_only_once: if True, run once an exit (default False) """ logger.debug("Starting perf thread") - perf_thread = DatabaseScanningThread(globals.g_scanning_interval, hash_queue, scanning_results_queue, - exit_event, run_only_once) + perf_thread = DatabaseScanningThread( + globals.g_scanning_interval, + hash_queue, + scanning_results_queue, + exit_event, + run_only_once, + ) perf_thread.start() logger.debug("Starting analysis thread") - analysis_worker_thread = Thread(target=analysis_worker, - args=(exit_event, hash_queue, scanning_results_queue)) + analysis_worker_thread = Thread( + target=analysis_worker, args=(exit_event, hash_queue, scanning_results_queue) + ) analysis_worker_thread.start() logger.debug("Starting results saver thread") - results_worker_thread = Thread(target=results_worker_chunked, args=(exit_event, scanning_results_queue)) + results_worker_thread = Thread( + target=results_worker_chunked, args=(exit_event, scanning_results_queue) + ) results_worker_thread.start() + return [perf_thread, results_worker_thread, analysis_worker_thread] + class DatabaseScanningThread(Thread): """ @@ -447,8 +514,16 @@ class DatabaseScanningThread(Thread): by the signal handler """ - def __init__(self, interval: int, hash_queue: Queue, scanning_results_queue: Queue, exit_event: Event, - run_only_once: bool, *args, **kwargs): + def __init__( + self, + interval: int, + hash_queue: Queue, + scanning_results_queue: Queue, + exit_event: Event, + run_only_once: bool, + *args, + **kwargs, + ): """ Create a new database scanning object. @@ -488,7 +563,6 @@ def scan_until_exit(self) -> None: """ Continually scan the database until instructed to quit. """ - # TODO: DRIFT self.do_db_scan() while not self.exit_event.is_set(): self.exit_event.wait(timeout=self._interval) @@ -501,14 +575,16 @@ def scan_until_exit(self) -> None: def do_db_scan(self): """ - Do the actual database scan, trappig ang problems. + Do the actual database scan, traping any problems. """ logger.debug("START database sweep") try: perform(globals.g_yara_rules_dir, self._conn, self._hash_queue) except Exception as err: - logger.exception(f"Something went wrong sweeping the CbR module store: {err} ") + logger.exception( + f"Something went wrong sweeping the CbR module store: {err} " + ) def run(self): """ @@ -530,6 +606,7 @@ def run(self): def start_celery_worker_thread(worker_obj: worker.worker, workerkwargs: dict = None, config_file: str = None) -> None: + """ Start celery worker in a daemon-thread. @@ -537,13 +614,20 @@ def start_celery_worker_thread(worker_obj: worker.worker, workerkwargs: dict = N :param worker_obj: worker object :param workerkwargs: dictionary of arguments :param config_file: path to the yara configuration file - :return: - """ - t = Thread(target=launch_celery_worker, - kwargs={"worker": worker_obj, "workerkwargs": workerkwargs, "config_file": config_file}) + :return the thread, started: + """ + t = Thread( + target=launch_celery_worker, + kwargs={ + "worker": worker, + "workerkwargs": workerkwargs, + "config_file": config_file, + }, + ) t.daemon = True t.start() + return t def launch_celery_worker(worker_obj: worker.worker, workerkwargs=None, config_file: str = None) -> None: """ @@ -653,7 +737,9 @@ def main(): if args.log_file: use_log_file = os.path.abspath(os.path.expanduser(args.log_file)) formatter = logging.Formatter(logging_format) - handler = logging.handlers.RotatingFileHandler(use_log_file, maxBytes=10 * 1000000, backupCount=10) + handler = logging.handlers.RotatingFileHandler( + use_log_file, maxBytes=10 * 1000000, backupCount=10 + ) handler.setFormatter(formatter) logger.addHandler(handler) @@ -665,6 +751,7 @@ def main(): sys.exit(1) if args.validate_yara_rules: + """ RULE VALIDATION MODE OF OPERATION """ logger.info(f"Validating yara rules in directory: {globals.g_yara_rules_dir}") yara_rule_map = generate_rule_map(globals.g_yara_rules_dir) try: @@ -673,7 +760,8 @@ def main(): except Exception as err: logger.error(f"There were errors compiling yara rules: {err}") sys.exit(2) - else: # Doing a real run + else: + # Doing a real run # Exit condition and queues for doing work exit_event = Event() hash_queue = Queue() @@ -681,11 +769,24 @@ def main(): # Lock file so this process is a singleton lock_file = lockfile.FileLock(args.lock_file) localworker = None - workerkwargs = json.loads(globals.g_celeryworkerkwargs) if globals.g_celeryworkerkwargs is not None else None + workerkwargs = ( + json.loads(globals.g_celeryworkerkwargs) + if globals.g_celeryworkerkwargs is not None + else None + ) if workerkwargs and len(workerkwargs) == 0: workerkwargs = None try: + """ + There are four principle modes of operation - + 1) master and worker + 2) remote and local + Support running as 1) just the binary-getting + 2) binary-getting and analysis locally + 3) binary-getting and analysis to happen on some worker on the same redis/amqp/backend broker + 4) Worker (either local to to the cbr machine or remote) + """ if not args.run_once: # Running as a deamon # Get working dir setting working_dir = os.path.abspath(os.path.expanduser(args.working_dir)) @@ -695,8 +796,11 @@ def main(): files_preserve.extend([args.lock_file, args.log_file, args.output_file]) # defaults to piping to /dev/null - deamon_kwargs = {"working_directory": working_dir, "pidfile": lock_file, - "files_preserve": files_preserve} + deamon_kwargs = { + "working_directory": working_dir, + "pidfile": lock_file, + "files_preserve": files_preserve, + } # If in debug mode, make sure stdout and stderr don't go to /dev/null if args.debug: @@ -706,11 +810,15 @@ def main(): # Operating mode - are we the master a worker? run_as_master = globals.g_mode == "master" - # Signal handler partial function + # Signal handler sig_handler = partial(handle_sig, exit_event) - context.signal_map = {signal.SIGTERM: sig_handler, signal.SIGQUIT: sig_handler} + context.signal_map = { + signal.SIGTERM: sig_handler, + signal.SIGQUIT: sig_handler, + } # Make sure we close the deamon context at the end + threads = [] with context: # only connect to cbr if we're the master if run_as_master: @@ -718,43 +826,62 @@ def main(): init_local_resources() # start working threads - start_workers(exit_event, hash_queue, scanning_results_queue) + threads = start_workers( + exit_event, hash_queue, scanning_results_queue + ) # start local celeryD worker if working mode is local if not globals.g_remote: localworker = worker.worker(app=app) - start_celery_worker_thread(localworker, workerkwargs, args.config_file) + threads.append( + start_celery_worker_thread( + localworker, workerkwargs, args.config_file + ) + ) else: # otherwise, we must start a celeryD worker since we are not the master localworker = worker.worker(app=app) - start_celery_worker_thread(localworker, workerkwargs, args.config_file) + threads.append( + start_celery_worker_thread( + localworker, workerkwargs, args.config_file + ) + ) # run until the service/daemon gets a quitting sig - run_to_exit_signal(exit_event) - wait_all_worker_exit() - terminate_celery_worker(localworker) - logger.info("Yara connector shutdown OK") - else: # Just do one batch - # init local resources + try: + run_to_exit_signal(exit_event) + finally: + try: + wait_all_worker_exit_threads(threads, timeout=10.0) + finally: + terminate_celery_worker(localworker) + logger.info("Yara connector shutdown") + + else: # | | | BATCH MODE | | | init_local_resources() # start necessary worker threads - start_workers(exit_event, hash_queue, scanning_results_queue, run_only_once=True) + threads = start_workers( + exit_event, hash_queue, scanning_results_queue, run_only_once=True + ) # Start a celery worker if we need one if not globals.g_remote: localworker = worker.worker(app=app) - start_celery_worker_thread(localworker, workerkwargs, args.config_file) + threads.append( + start_celery_worker_thread( + localworker, workerkwargs, args.config_file + ) + ) run_to_exit_signal(exit_event) - wait_all_worker_exit() - terminate_celery_worker(localworker) + wait_all_worker_exit_threads(threads,timeout=10.0) + # terminate_celery_worker(localworker) except KeyboardInterrupt: logger.info("\n\n##### Interupted by User!\n") except Exception as err: logger.error(f"There were errors executing yara rules: {err}") finally: exit_event.set() - # wait_all_worker_exit() terminate_celery_worker(localworker) From a27bfd51aa67924f6f22fca979a9bb00399a047c Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 3 Jan 2020 16:25:06 -0500 Subject: [PATCH 179/257] updates to service --- cb-yara-connector.service | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cb-yara-connector.service b/cb-yara-connector.service index 393eb55..0a1589a 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -1 +1,18 @@ +[Unit] +Description=Carbon Black Response Yara Connector +After=syslog.target network.target + +[Service] +Type=forking +PIDFile=/var/run/yaraconnector.pid +ExecStart=C_FORCE_ROOT=1 /usr/share/cb/integrations/cb-yara-connector/yaraconnector --working-dir /usr/share/cb/integrations/cb-yara-connector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --lock-file /var/run/yaraconnector.pid --run-once False --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log +ExecStop=kill `cat /var/run/yaraconnector.pid` StandardErr=/var/log/cb/integrations/cb-yara-connector/yaraconnector.log +StandardOutput=/var/log/cb/integrations/cb-yara-connector/yaraconnector.log +WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector +#User=cb +#Group=cb + + +[Install] +WantedBy=multi-user.target From 4dcfd98b4fc92ca13cc90c3fc88dd268750d1900 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 3 Jan 2020 19:08:58 -0500 Subject: [PATCH 180/257] updates to rpm build --- cb-yara-connector.rpm.spec | 5 ++--- cb-yara-connector.service | 11 +++++----- makefile | 11 +++++++--- src/main.py | 42 ++++++++++++++++++++++---------------- 4 files changed, 39 insertions(+), 30 deletions(-) diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index 79c28ab..fbd2a9b 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -20,13 +20,12 @@ mkdir -p ${RPM_BUILD_ROOT}/etc/init mkdir -p ${RPM_BUILD_ROOT}/etc/systemd/system mkdir -p ${RPM_BUILD_ROOT}/tmp mkdir -p ${RPM_BUILD_ROOT}/var/run/ -cp example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example -install -m 0644 cb-yara-connector.service ${RPM_BUILD_ROOT}/etc/systemd/system/cb-yara-connector.service +cp ${RPM_SOURCE_DIR}/example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example +install -m 0644 ${RPM_SOURCE_DIR}/cb-yara-connector.service ${RPM_BUILD_ROOT}/etc/systemd/system/cb-yara-connector.service install -m 0755 ${RPM_SOURCE_DIR}/dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/ install ${RPM_SOURCE_DIR}/yara-logo.png ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/yara-logo.png touch ${RPM_BUILD_ROOT}/var/log/cb/integrations/cb-yara-connector/yaraconnector.log touch ${RPM_BUILD_ROOT}/tmp/yaraconnectorceleryworker -#touch ${RPM_BUILD_ROOT}/var/run/yaraconnector.pid %files -f MANIFEST %config /etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example diff --git a/cb-yara-connector.service b/cb-yara-connector.service index 0a1589a..0cf21ab 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -3,16 +3,15 @@ Description=Carbon Black Response Yara Connector After=syslog.target network.target [Service] +Environment=C_FORCE_ROOT=1 Type=forking -PIDFile=/var/run/yaraconnector.pid -ExecStart=C_FORCE_ROOT=1 /usr/share/cb/integrations/cb-yara-connector/yaraconnector --working-dir /usr/share/cb/integrations/cb-yara-connector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --lock-file /var/run/yaraconnector.pid --run-once False --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log -ExecStop=kill `cat /var/run/yaraconnector.pid` -StandardErr=/var/log/cb/integrations/cb-yara-connector/yaraconnector.log -StandardOutput=/var/log/cb/integrations/cb-yara-connector/yaraconnector.log +ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --lock-file /var/run/yaraconnector.pid --run-forever True +StandardError=file:/var/log/cb/integrations/cb-yara-connector/yaraconnector.log +StandardOutput=file:/var/log/cb/integrations/cb-yara-connector/yaraconnector.log WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector #User=cb #Group=cb [Install] -WantedBy=multi-user.target +WantedBy=multi-user.target \ No newline at end of file diff --git a/makefile b/makefile index 7023bc3..9be8f58 100644 --- a/makefile +++ b/makefile @@ -1,9 +1,12 @@ SOURCEDIR = ~/rpmbuild/SOURCES -BUILDDIR = ~/pmbuild/BUILD +BUILDDIR = ~/rpmbuild/BUILD +RPMDIR = ~/rpmbuild/RPMS clean: rm -rf ${SOURCEDIR} rm -rf ${BUILDDIR} + rm -rf ${RPMDIR} + rm -rf ~/rpmbuild rm -rf dist rpm: mkdir -p ${SOURCEDIR} @@ -12,12 +15,14 @@ rpm: mkdir -p ${BUILDDIR}/src mkdir -p ${BUILDDIR}/init-scripts mkdir -p ${BUILDDIR}/example-conf + mkdir -p ${SOURCEDIR}/example-conf cp yara-logo.png ${SOURCEDIR}/yara-logo.png cp -rp src/* ${SOURCEDIR}/src cp -rp src/* ${BUILDDIR}/src cp -rp init-scripts/* ${BUILDDIR}/init-scripts cp example-conf/yara.conf ${BUILDDIR}/example-conf/yara.conf - cp MANIFEST ${BUILDDIR} - cp cb-yara-connector.service ${BUILDDIR} + cp -rp example-conf/yara.conf ${SOURCEDIR}/example-conf/yara.conf + cp -p MANIFEST ${BUILDDIR}/MANIFEST + cp cb-yara-connector.service ${SOURCEDIR}/cb-yara-connector.service cp cb-yara-connector.spec ${SOURCEDIR}/cb-yara-connector.spec rpmbuild -ba cb-yara-connector.rpm.spec diff --git a/src/main.py b/src/main.py index 27e6071..89b356d 100644 --- a/src/main.py +++ b/src/main.py @@ -23,7 +23,7 @@ # noinspection PyPackageRequirements import yara -from celery.bin import worker +from celery.bin.worker import worker # noinspection PyPackageRequirements from daemon import daemon @@ -605,7 +605,7 @@ def run(self): self.exit_event.set() -def start_celery_worker_thread(worker_obj: worker.worker, workerkwargs: dict = None, config_file: str = None) -> None: +def start_celery_worker_thread(worker_obj , workerkwargs: dict = None, config_file: str = None) -> None: """ Start celery worker in a daemon-thread. @@ -619,7 +619,7 @@ def start_celery_worker_thread(worker_obj: worker.worker, workerkwargs: dict = N t = Thread( target=launch_celery_worker, kwargs={ - "worker": worker, + "worker_obj": worker_obj, "workerkwargs": workerkwargs, "config_file": config_file, }, @@ -629,7 +629,7 @@ def start_celery_worker_thread(worker_obj: worker.worker, workerkwargs: dict = N return t -def launch_celery_worker(worker_obj: worker.worker, workerkwargs=None, config_file: str = None) -> None: +def launch_celery_worker(worker_obj , workerkwargs=None, config_file: str = None) -> None: """ Launch a celery worker using the imported app context :param worker_obj: worker object @@ -645,7 +645,7 @@ def launch_celery_worker(worker_obj: worker.worker, workerkwargs=None, config_fi logger.debug("CELERY WORKER LAUNCHING THREAD EXITED") -def terminate_celery_worker(worker_obj: worker.worker = None): +def terminate_celery_worker(worker_obj: worker = None): """ Attempt to use the pidfile to gracefully terminate celery workers if they exist if the worker hasn't terminated gracefully after 5 seconds, kill it using the .die() command @@ -653,14 +653,18 @@ def terminate_celery_worker(worker_obj: worker.worker = None): :param worker_obj: worker object """ with open('/tmp/yaraconnectorceleryworker') as cworkerpidfile: - worker_pid = int(cworkerpidfile.readline()) - parent = psutil.Process(worker_pid) - children = parent.children(recursive=True) - for child in children: - logger.debug(f"Sending term sig to celery worker child - {worker_pid}") - os.kill(child.pid, signal.SIGTERM) - logger.debug(f"Sending term sig to celery worker - {worker_pid}") - os.kill(worker_pid, signal.SIGTERM) + worker_pid_str = cworkerpidfile.readline() + worker_pid = int(worker_pid_str) if len(worker_pid_str.strip()) > 0 else None + if worker_pid: + parent = psutil.Process(worker_pid) if worker_pid else psutil.Process() + children = parent.children(recursive=True) + for child in children: + logger.debug(f"Sending term sig to celery worker child - {worker_pid}") + os.kill(child.pid, signal.SIGTERM) + logger.debug(f"Sending term sig to celery worker - {worker_pid}") + os.kill(worker_pid, signal.SIGTERM) + else: + logger.debug("Didn't find a worker-pidfile to terminate on exit...") time.sleep(5.0) if worker_obj: @@ -705,7 +709,7 @@ def handle_arguments(): ) # Controls batch vs continous mode , defaults to batch processing parser.add_argument( - "--run-once", default=True, help="Run as batch mode or no", required=False + "--run-forever", default=False, help="Run as batch mode or no", required=False ) # Validates the rules @@ -787,7 +791,8 @@ def main(): 3) binary-getting and analysis to happen on some worker on the same redis/amqp/backend broker 4) Worker (either local to to the cbr machine or remote) """ - if not args.run_once: # Running as a deamon + if args.run_forever: # Running as a deamon + logger.debug("RUNNING AS DEMON") # Get working dir setting working_dir = os.path.abspath(os.path.expanduser(args.working_dir)) @@ -832,7 +837,7 @@ def main(): # start local celeryD worker if working mode is local if not globals.g_remote: - localworker = worker.worker(app=app) + localworker = worker(app=app) threads.append( start_celery_worker_thread( localworker, workerkwargs, args.config_file @@ -840,7 +845,7 @@ def main(): ) else: # otherwise, we must start a celeryD worker since we are not the master - localworker = worker.worker(app=app) + localworker = worker(app=app) threads.append( start_celery_worker_thread( localworker, workerkwargs, args.config_file @@ -858,6 +863,7 @@ def main(): logger.info("Yara connector shutdown") else: # | | | BATCH MODE | | | + logger.debug("BATCH MODE") init_local_resources() # start necessary worker threads @@ -867,7 +873,7 @@ def main(): # Start a celery worker if we need one if not globals.g_remote: - localworker = worker.worker(app=app) + localworker = worker(app=app) threads.append( start_celery_worker_thread( localworker, workerkwargs, args.config_file From 6e3005c884bedb8efeeea9404e01fd338abc03ed Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Fri, 3 Jan 2020 10:30:13 -0500 Subject: [PATCH 181/257] * updated requirements * fixed _as_path logic --- requirements.txt | 20 ++++++++++---------- src/config_handling.py | 9 +++++---- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7fba336..2a8fb2b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,15 +3,15 @@ # All versions are latest viable at date of package version release ################################################################################ -celery==4.3.0 # Mar 31, 2019 -humanfriendly==4.18 # Feb 21, 2019 -lockfile==0.12.2 # Nov 25, 2015 -peewee==3.11.2 # Sep 24, 2019 -psycopg2-binary==2.8.4 # Oct 20, 2019 -python-daemon==2.2.4 # Oct 27, 2019 -redis==3.3.11 # Oct 13, 2019 -requests==2.22.0 # May 16, 2019 -simplejson==3.16.0 # Jun 28, 2018 +celery==4.4.0 +humanfriendly==4.18 +lockfile==0.12.2 +peewee==3.13.1 +psycopg2-binary==2.8.4 +python-daemon==2.2.4 +redis==3.3.11 +requests==2.22.0 +simplejson==3.17.0 urllib3==1.25.7 -yara-python==3.11.0 # Oct 10, 2019 +yara-python==3.11.0 psutil==5.6.7 diff --git a/src/config_handling.py b/src/config_handling.py index 6a140b6..1005806 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -159,9 +159,10 @@ def _extended_check(self) -> None: default=globals.g_utility_script) logger.warning(f"{self.source} utility script '{globals.g_utility_script}' is enabled; " + "use this advanced feature at your own discretion!") - # else: - # if self._as_path("utility_script", required=False, default=globals.g_utility_script): - # logger.debug(f"{self.source} has 'utility_script' defined, but it is disabled") + else: + if self._as_path("utility_script", required=False, default=globals.g_utility_script): + logger.debug(f"{self.source} has 'utility_script' defined, but it is disabled") + globals.g_utility_script = "" # developer use only globals.g_utility_debug = self._as_bool("utility_debug", default=False) @@ -220,7 +221,7 @@ def _as_path(self, param: str, required: bool = False, exists: bool = True, is_d """ value = self._as_str(param, required, default=default) value = os.path.abspath(os.path.expanduser(value)) - if exists: + if exists and required: if not os.path.exists(value): if create_if_needed and is_dir: try: From 3494ce007d7dcb5de4217dde396f605efa701f8e Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 6 Jan 2020 08:31:12 -0500 Subject: [PATCH 182/257] Cleanups and added fixed hinting --- src/main.py | 58 +++++++++++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/src/main.py b/src/main.py index 89b356d..0d3aafa 100644 --- a/src/main.py +++ b/src/main.py @@ -20,11 +20,9 @@ import lockfile import psutil import psycopg2 - # noinspection PyPackageRequirements import yara from celery.bin.worker import worker - # noinspection PyPackageRequirements from daemon import daemon from peewee import SqliteDatabase @@ -49,7 +47,7 @@ def analysis_worker( - exit_event: Event, hash_queue: Queue, scanning_results_queue: Queue + exit_event: Event, hash_queue: Queue, scanning_results_queue: Queue ) -> None: """ The promise worker scanning function. @@ -241,8 +239,8 @@ def get_binary_file_cursor(conn, start_date_binaries: datetime): # noinspection SqlDialectInspection,SqlNoDataSourceInspection query = ( - "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND " - + "timestamp >= '{0}' ORDER BY timestamp DESC".format(start_date_binaries) + "SELECT md5hash FROM storefiles WHERE present_locally = TRUE AND " + + "timestamp >= '{0}' ORDER BY timestamp DESC".format(start_date_binaries) ) logger.debug(query) @@ -308,9 +306,9 @@ def perform(yara_rule_dir: str, conn, hash_queue: Queue) -> None: if globals.g_utility_interval > 0: seconds_since_start = (datetime.now() - utility_window_start).seconds if ( - seconds_since_start >= globals.g_utility_interval * 60 - if not globals.g_utility_debug - else 1 + seconds_since_start >= globals.g_utility_interval * 60 + if not globals.g_utility_debug + else 1 ): execute_script() @@ -437,6 +435,7 @@ def wait_all_worker_exit_threads(threads, timeout=None): return +# FIXME: Unused def wait_all_worker_exit(timeout=None) -> None: """ Await the exit of our worker threads. @@ -469,12 +468,8 @@ def wait_all_worker_exit(timeout=None) -> None: logger.debug("Main thread going to exit...") -def start_workers( - exit_event: Event, - hash_queue: Queue, - scanning_results_queue: Queue, - run_only_once=False, -) -> None: +def start_workers(exit_event: Event, hash_queue: Queue, scanning_results_queue: Queue, + run_only_once=False) -> List[Thread]: """ Starts worker-threads (not celery workers). Worker threads do work until they get the exit_event signal :param exit_event: event signaller @@ -515,14 +510,14 @@ class DatabaseScanningThread(Thread): """ def __init__( - self, - interval: int, - hash_queue: Queue, - scanning_results_queue: Queue, - exit_event: Event, - run_only_once: bool, - *args, - **kwargs, + self, + interval: int, + hash_queue: Queue, + scanning_results_queue: Queue, + exit_event: Event, + run_only_once: bool, + *args, + **kwargs, ): """ Create a new database scanning object. @@ -605,8 +600,7 @@ def run(self): self.exit_event.set() -def start_celery_worker_thread(worker_obj , workerkwargs: dict = None, config_file: str = None) -> None: - +def start_celery_worker_thread(worker_obj, workerkwargs: dict = None, config_file: str = None) -> Thread: """ Start celery worker in a daemon-thread. @@ -629,7 +623,8 @@ def start_celery_worker_thread(worker_obj , workerkwargs: dict = None, config_fi return t -def launch_celery_worker(worker_obj , workerkwargs=None, config_file: str = None) -> None: + +def launch_celery_worker(worker_obj, workerkwargs=None, config_file: str = None) -> None: """ Launch a celery worker using the imported app context :param worker_obj: worker object @@ -764,7 +759,7 @@ def main(): except Exception as err: logger.error(f"There were errors compiling yara rules: {err}") sys.exit(2) - else: + else: # Doing a real run # Exit condition and queues for doing work exit_event = Event() @@ -783,13 +778,14 @@ def main(): try: """ - There are four principle modes of operation - + There are four principle modes of operation - 1) master and worker 2) remote and local Support running as 1) just the binary-getting - 2) binary-getting and analysis locally - 3) binary-getting and analysis to happen on some worker on the same redis/amqp/backend broker - 4) Worker (either local to to the cbr machine or remote) + 2) binary-getting and analysis locally + 3) binary-getting and analysis to happen on some worker on the same + redis/amqp/backend broker + 4) Worker (either local to to the cbr machine or remote) """ if args.run_forever: # Running as a deamon logger.debug("RUNNING AS DEMON") @@ -880,7 +876,7 @@ def main(): ) ) run_to_exit_signal(exit_event) - wait_all_worker_exit_threads(threads,timeout=10.0) + wait_all_worker_exit_threads(threads, timeout=10.0) # terminate_celery_worker(localworker) except KeyboardInterrupt: logger.info("\n\n##### Interupted by User!\n") From 736bbeba700a2fa28410a3c5aab52af5c0558cde Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 6 Jan 2020 13:44:14 -0500 Subject: [PATCH 183/257] updates --- MANIFEST | 2 +- cb-yara-connector.service | 4 +--- cb-yara-connector.spec | 2 +- requirements.txt | 2 ++ src/main.py | 17 +++++++++-------- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/MANIFEST b/MANIFEST index 8960ff1..a8d69cf 100644 --- a/MANIFEST +++ b/MANIFEST @@ -6,4 +6,4 @@ /etc/systemd/system/cb-yara-connector.service /etc/cb/integrations/cb-yara-connector/yara_rules /etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example -/tmp/yaraconnectorceleryworker +/tmp/yaraconnectorceleryworker \ No newline at end of file diff --git a/cb-yara-connector.service b/cb-yara-connector.service index 0cf21ab..7ec1912 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -5,9 +5,7 @@ After=syslog.target network.target [Service] Environment=C_FORCE_ROOT=1 Type=forking -ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --lock-file /var/run/yaraconnector.pid --run-forever True -StandardError=file:/var/log/cb/integrations/cb-yara-connector/yaraconnector.log -StandardOutput=file:/var/log/cb/integrations/cb-yara-connector/yaraconnector.log +ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector #User=cb #Group=cb diff --git a/cb-yara-connector.spec b/cb-yara-connector.spec index 6d3e091..2581a05 100644 --- a/cb-yara-connector.spec +++ b/cb-yara-connector.spec @@ -8,7 +8,7 @@ block_cipher = None a = Analysis(['src/main.py'], pathex=['./src'], binaries=[], - hiddenimports=['lockfile','celery.app.control','celery.worker.strategy','celery.worker.consumer','celery.events.state','celery.worker.autoscale','celery.worker.components','celery.concurrency.prefork','celery.apps','celery.apps.worker','celery.app.log','celery.fixups', 'celery.fixups.django', 'celery.loaders.app','celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends','celery.backends.redis', 'celery.app.events', 'celery.events','celery.redis','kombu.transport.pyamqp'], + hiddenimports=['billiard','billiard.heap','lockfile','mmap','celery.app.control','celery.worker.strategy','celery.worker.consumer','celery.events.state','celery.worker.autoscale','celery.worker.components','celery.concurrency.prefork','celery.apps','celery.apps.worker','celery.app.log','celery.fixups', 'celery.fixups.django', 'celery.loaders.app','celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends','celery.backends.redis', 'celery.app.events', 'celery.events','celery.redis','kombu.transport.pyamqp'], hookspath=[], runtime_hooks=[], excludes=[], diff --git a/requirements.txt b/requirements.txt index 2a8fb2b..dace174 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,5 @@ simplejson==3.17.0 urllib3==1.25.7 yara-python==3.11.0 psutil==5.6.7 +yara-python==3.11.0 # Oct 10, 2019 +psutil==5.6.7 diff --git a/src/main.py b/src/main.py index 0d3aafa..ef0fb46 100644 --- a/src/main.py +++ b/src/main.py @@ -19,6 +19,7 @@ import lockfile import psutil +import mmap import psycopg2 # noinspection PyPackageRequirements import yara @@ -366,7 +367,7 @@ def get_log_file_handles(use_logger) -> List: return handles -def handle_sig(exit_event: Event, sig: int) -> None: +def handle_sig(exit_event: Event, sig: int, frame) -> None: """ Signal handler - handle the signal and mark exit if its an exiting signal type. @@ -647,7 +648,7 @@ def terminate_celery_worker(worker_obj: worker = None): :param worker_obj: worker object """ - with open('/tmp/yaraconnectorceleryworker') as cworkerpidfile: + """with open('/tmp/yaraconnectorceleryworker') as cworkerpidfile: worker_pid_str = cworkerpidfile.readline() worker_pid = int(worker_pid_str) if len(worker_pid_str.strip()) > 0 else None if worker_pid: @@ -661,7 +662,7 @@ def terminate_celery_worker(worker_obj: worker = None): else: logger.debug("Didn't find a worker-pidfile to terminate on exit...") - time.sleep(5.0) + time.sleep(5.0)""" if worker_obj: worker_obj.die("Worker terminated") @@ -853,9 +854,9 @@ def main(): run_to_exit_signal(exit_event) finally: try: - wait_all_worker_exit_threads(threads, timeout=10.0) + wait_all_worker_exit_threads(threads, timeout=4.0) finally: - terminate_celery_worker(localworker) + #terminate_celery_worker(localworker) logger.info("Yara connector shutdown") else: # | | | BATCH MODE | | | @@ -876,15 +877,15 @@ def main(): ) ) run_to_exit_signal(exit_event) - wait_all_worker_exit_threads(threads, timeout=10.0) - # terminate_celery_worker(localworker) + wait_all_worker_exit_threads(threads,timeout=4.0) + #terminate_celery_worker(localworker) except KeyboardInterrupt: logger.info("\n\n##### Interupted by User!\n") except Exception as err: logger.error(f"There were errors executing yara rules: {err}") finally: exit_event.set() - terminate_celery_worker(localworker) + #terminate_celery_worker(localworker) if __name__ == "__main__": From ca7c68de69379f4697c822fd883aa4674233318a Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 6 Jan 2020 15:04:22 -0500 Subject: [PATCH 184/257] updates --- cb-yara-connector.service | 6 +++--- src/main.py | 9 +++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/cb-yara-connector.service b/cb-yara-connector.service index 7ec1912..cc3d389 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -7,9 +7,9 @@ Environment=C_FORCE_ROOT=1 Type=forking ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector -#User=cb -#Group=cb +User=cb +Group=cb [Install] -WantedBy=multi-user.target \ No newline at end of file +WantedBy=multi-user.target diff --git a/src/main.py b/src/main.py index ef0fb46..abd8058 100644 --- a/src/main.py +++ b/src/main.py @@ -31,6 +31,7 @@ import globals from analysis_result import AnalysisResult from binary_database import BinaryDetonationResult, db +from celery.exceptions import WorkerLostError from celery_app import app from config_handling import ConfigurationInit from feed import CbFeed, CbFeedInfo, CbReport @@ -79,6 +80,10 @@ def analysis_worker( hash_queue.task_done() except Empty: exit_event.wait(1) + except WorkerLostError as we: + logger.debug(f"Lost connection to remote worker..exiting") + exit_event.set() + break except Exception as err: logger.debug(f"Exception in wait: {err}") exit_event.wait(0.1) @@ -648,7 +653,7 @@ def terminate_celery_worker(worker_obj: worker = None): :param worker_obj: worker object """ - """with open('/tmp/yaraconnectorceleryworker') as cworkerpidfile: + with open('/tmp/yaraconnectorceleryworker') as cworkerpidfile: worker_pid_str = cworkerpidfile.readline() worker_pid = int(worker_pid_str) if len(worker_pid_str.strip()) > 0 else None if worker_pid: @@ -662,7 +667,7 @@ def terminate_celery_worker(worker_obj: worker = None): else: logger.debug("Didn't find a worker-pidfile to terminate on exit...") - time.sleep(5.0)""" + time.sleep(1.0) if worker_obj: worker_obj.die("Worker terminated") From ac677398e27253826d60f9a755406380e0e192fb Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 6 Jan 2020 15:09:40 -0500 Subject: [PATCH 185/257] docker file for building rpms --- dockerfile | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 dockerfile diff --git a/dockerfile b/dockerfile new file mode 100644 index 0000000..a07c931 --- /dev/null +++ b/dockerfile @@ -0,0 +1,27 @@ +FROM centos:7 +RUN yum -y install rpm-build +RUN yum -y install epel-release +RUN yum -y install python36 python36-devel +RUN yum -y install git +RUN yum -y install make +RUN yum -y install gcc gcc-devel +RUN yum -y install automake libtool make gcc +RUN groupadd -r cb && useradd --no-log-init -r -g cb cb +RUN mkdir /home/cb +RUN chown cb:cb /home/cb +RUN pip3 install virtualenv virtualenvwrapper +USER cb +WORKDIR /home/cb +RUN mkdir -p ~/rpmbuild/{BUILD,RPMS,SOURCES,SPECS,SRPMS} +RUN virtualenv yaraconnector +RUN source ./yaraconnector/bin/activate +RUN git clone https://github.com/carbonblack/cb-yara-connector +WORKDIR /home/cb/cb-yara-connector +RUN git checkout feature-cb-28268 +RUN pip3 install -r requirements.txt --user +RUN pip3 install pyinstaller==3.5.0 --user +ENV PATH $PATH:~/.local/bin +RUN make clean ; make rpm +USER root +#RUN yum install -y /home/cb/rpmbuild/RPMS/x86_64/python-cb-yara-connector.*.rpm +CMD ["/bin/bash","-c"] From 0b9a0a98f59a7943d9212c9290a60f98d49cdd86 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 6 Jan 2020 16:37:09 -0500 Subject: [PATCH 186/257] Docker utils and readme updates --- README.md | 198 ++++++++++++++++++++++++++------------------ docker-build-rpm.sh | 9 ++ 2 files changed, 127 insertions(+), 80 deletions(-) create mode 100755 docker-build-rpm.sh diff --git a/README.md b/README.md index f43ff06..448abb1 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,96 @@ -# Installing Yara Agent (Centos/RHEL 6) +# Installing Yara Agent (Centos/RHEL 7+) -The Yara agent must be installed on the same system as Cb Response. +The Yara agent has two parts - a master and a potentially remote worker. -* Create installation area - ```shell script - mkdir -p /usr/share/cb/integrations/yara - ``` -* Download Yara Agent +The task-master service must be installed on the same system as Cb Response. - ``` - wget -O /usr/share/cb/integrations/yara/yara_agent https://github.com/carbonblack/cb-yara-connector/releases/download/2.0.1/yara_agent - ``` - -* Download Yara Logo +You can download the latest RPM from the github releases page, here(https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-0b4e650fa85727815eb2/python-cb-threatconnect-connector-2.1.0-1.x86_64.rpm). - ``` - wget -O /usr/share/cb/integrations/yara/yara-logo.png https://github.com/carbonblack/cb-yara-connector/releases/download/2.0.1/yara-logo.png - ``` +`yum install python-cb-yara-connector-.rpm` will install the connector from the downloaded RPM. + +The connector uses a configured directory containing yara rules, to efficiently scan binaries as they +are seen by the CB Response Server, and uses the generated threat information to produce an +intelligence feed for consumption by the CbR server. + +The yara connector uses celery-queues to distribute work to remote workers - you will need to install and +configure a broker (probbably, redis - but any broker compatible with celery 4.x+ will do) that is accessible +to the master node and to any worker(s). + +# Dev install # + +`git clone https://github.com/carbonblack/cb-yara-connector` +create a virtual environment with python3.6+ and install the requirements from the requirements.txt file +`pip3 install -r requirements.txt` + +There is a docker file provided that setups a basic dev/build envirionment for the connector. ## Create Yara Agent Config -Copy and modify `sample_local.conf` from the `samples` folder -to your desired location. +The installation process will create a sample configuration file +`/etc/cb/integrations/cb-yara-connector/yaraconnector.sample.conf` +use this and create the real configuration file: +`cp /etc/cb/integrations/cb-yara-connector/yaraconnector.sample.conf /etc/cb/integrations/cb-yara-connector/yaraconnector.conf` -> NOTES: -> 1) All paths can use `~/` to allow the use of the user's home directory. +You must configure the postgres connection information for your CBR server , and the rest API location and credentails as well. -#### Running Yara Agent Manually +~~~ +; +; Cb Response postgres Database settings +; +postgres_host=127.0.0.1 +postgres_username=cb +postgres_password= +postgres_db=cb +postgres_port=5002 +~~~ -```shell script -./yara_agent --config-file= -``` +You can find your postgres credentails in `/etc/cb/cb.conf`, the port, host, db, user should be as above. + +You can find your API credential information in UI settings pane of your Carbon Black Response server. + +~~~ +; +; ONLY for worker_type of local +; Cb Response Server settings for scanning locally. +; For remote scanning please set these parameters in the yara worker config file +; Default: https://127.0.0.1 +; +cb_server_url=https://localhost +cb_server_token= +~~~ + +You must configure `broker=` which sets the broker and results_backend for celery. You will set this appropriately as per the celery documentation - here (https://docs.celeryproject.org/en/latest/getting-started/brokers/). + + +The yarar-connector RPM contains a service that can be run in a few distinct configurations: +1) A local task master and remote worker(s) (RECOMMENDED) + +This is the prefered mode of operation, using a celery broker to distribute analysis-tasks to a remote worker from a local task-master that keeps track of binaries resident in the configured server. + +Install the connector on the cbr server - as the task-master and specify that the worker will be remote: + +Read through the configuration file, specify the mode of operation, the `mode=master` and `worker_type=remote`. This represents the configuration for the task master, which will distribute work over the configured broker/backend to each configured worker. + +On the worker(s), install the rpm again, and in the configuration file specify the same CBR server information, but +configure `mode=slave` and `worker_type=local`. This configuration file doesn't need postgres credentials, but does require rest API access to the Carbon Black Response server in question. + +2) A local task master and local celery worker (NOTRECOMMENDED) +Read through the configuration file, specify the mode of operation as `mode=master` and `worker_type=local`. +A single daemon will scan binaries, local to the cbr instance. This configuration requires both the postgres , and REST API credentials from Carbon Black Response, in order to function correctly. + +## Input your yara rules + +The yara connector monitors the directory `/etc/cb/integrations/cb-yara-connector/yara_rules` for files (`.yar`) each specifying one or more yara rule. Your rules need to have `metadata` section with a `score: [1-10]` tag to appropriately score matching binaries. This directory is configurable in your configuration file. + +The yara connector is boudn by libyara.so's limitations for matched strings, number of compiler rules, etc. + +#### Running Yara Agent + +`systemctl start cb-yara-connector` will up the service using systemD. +`systemctl stop cb-yara-connector` will gracefully stop the yara-connector. +`systemctl status -l cb-yara-connector` will display logging information. + +These commands are identical for both the master and any remote workers. ##### Command-line Options ```text @@ -121,61 +181,61 @@ _[TBD]_ * Create Yara Worker Config File `yara_worker.conf` -#### Example Yara Worker Config File +#### Example Yara Connector Master configuration ```ini [general] ; -; Python Celery Broker Url. Set this full url string for Redis +; Python Celery Broker Url. Set this full url stringg ; Example: redis:// ; broker_url=redis://127.0.0.1 +mode=master + +worker_type=remote + ; ; Cb Response Server Configuration ; Used for downloading binaries ; -cb_server_url= -cb_server_token= +cb_server_url=https://localhost +cb_server_token=aafdasfdsafdsafdsa ; ; Directory for temporary yara rules storage ; WARNING: Put your yara rules with the yara agent. This is just temporary storage. ; -yara_rules_dir=./yara_rules +yara_rules_dir=/etc/cb/integrations/cb-yara-connector/yara-rules ``` -* Copy, modify and save to `yara_worker.conf` - -#### Run Yara Worker Manually +### Example Remote Worker configuration - celery -A tasks worker --config-file=yara_worker.conf --concurrency=10 --loglevel=info - -#### Example Supervisor Config - - [program:yara_workers] - stdout_logfile=/var/log/yara_worker.log - stderr_logfile=/var/log/yara_worker.log - user= - directory=/home//cb-yara-connector - command=/home//cb-yara-connector/venv/bin/celery -A tasks worker --config-file=yara_worker.conf --concurrency=10 --loglevel=info - autostart=true - autorestart=true - -* Copy the above, modify and add to `/etc/supervisord.conf` +```ini +[general] + +; +; Python Celery Broker Url. Set this full url stringg +; Example: redis:// +; +broker_url=redis://127.0.0.1 + +mode=slave + +worker_type=local + +; +; Cb Response Server Configuration +; Used for downloading binaries +; +cb_server_url=https://localhost +cb_server_token=aafdasfdsafdsafdsa + +``` -* Enabled Supervisor - ``` - systemctl enable supervisord - ``` - -* Restart Supervisor - ``` - systemctl restart supervisord - ``` # Development Notes ## Utility Script @@ -203,31 +263,9 @@ utility_interval=-1 utility_script=./scripts/vacuumscript.sh ``` -## Yara Agent Build Instructions (Centos 6) - -### Install Dependencies - -* zlib-devel -* openssl-devel -* sqlite-devel - -### Install Python 3.6 - - - ./configure --prefix=/usr/local --enable-shared LDFLAGS="-Wl,-rpath /usr/local/lib" - make - make altinstall - - -### Create VirtualEnv - - - python3.6 -m venv venv-build - source ./venv-build/bin/activate - pip install -r requirements.txt - - -### Create Executable +## Yara Agent Build Instructions +The dockerfile in the top-level of the repo contains a centos7 environment for running, building, and testing +the connector. - pyinstaller main.spec +The provided script `docker-build-rpm.sh` will use docker to build the project, and place the RPM(s) in $PWD/RPMS. diff --git a/docker-build-rpm.sh b/docker-build-rpm.sh new file mode 100755 index 0000000..d8818f5 --- /dev/null +++ b/docker-build-rpm.sh @@ -0,0 +1,9 @@ +#!/bin/bash +docker rmi yaraconnectorrpmbuild --force +docker rm yaraconnectorrpmbuild --force +docker build -t yaraconnectorrpmbuild . +docker run -d --name yaraconnectorrpmbuild -it yaraconnectorrpmbuild tail -f /dev/null +docker cp yaraconnectorrpmbuild:/home/cb/rpmbuild/RPMS . +docker stop yaraconnectorrpmbuild +docker rm yaraconnectorrpmbuild +docker rmi yaraconnectorrpmbuild --force From f271374aa17adae3d3e3392f345ad7100366e218 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 7 Jan 2020 09:26:35 -0500 Subject: [PATCH 187/257] * Finished core unit tests. * changed utility logic to not require path on check --- src/config_handling.py | 74 +++++----- test/test_configCore.py | 319 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 360 insertions(+), 33 deletions(-) create mode 100644 test/test_configCore.py diff --git a/src/config_handling.py b/src/config_handling.py index 1005806..701a64d 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -50,7 +50,7 @@ class ConfigurationInit(object): Class to deal with all configuration loading and validation. """ - def __init__(self, config_file: str, output_file: str = None) -> None: + def __init__(self, config_file: str, output_file: str = None, **kwargs) -> None: """ Validate the config file. :param config_file: The config file to validate @@ -75,6 +75,10 @@ def __init__(self, config_file: str, output_file: str = None) -> None: raise CbInvalidConfig(f"{self.source} does not have a 'general' section") self.the_config = config["general"] + # if testing core methods, get out now + if "TESTING_ONLY" in kwargs: + return + # warn about unknown parameters -- typos? extras = [] try: @@ -108,7 +112,7 @@ def _worker_check(self) -> None: else: globals.g_remote = True - globals.g_yara_rules_dir = self._as_path("yara_rules_dir", required=True, exists=True, is_dir=True) + globals.g_yara_rules_dir = self._as_path("yara_rules_dir", required=True, check_exists=True, expect_dir=True) # local/remote configuration data cb_req = not (globals.g_mode == "master" and globals.g_remote) @@ -155,19 +159,19 @@ def _extended_check(self) -> None: globals.g_utility_interval = 0 globals.g_utility_script = "" else: - globals.g_utility_script = self._as_path("utility_script", required=True, is_dir=False, + globals.g_utility_script = self._as_path("utility_script", required=True, expect_dir=False, default=globals.g_utility_script) logger.warning(f"{self.source} utility script '{globals.g_utility_script}' is enabled; " + "use this advanced feature at your own discretion!") else: - if self._as_path("utility_script", required=False, default=globals.g_utility_script): + if self._as_str("utility_script", required=False, default=globals.g_utility_script) != "": logger.debug(f"{self.source} has 'utility_script' defined, but it is disabled") globals.g_utility_script = "" # developer use only globals.g_utility_debug = self._as_bool("utility_debug", default=False) - globals.g_feed_database_dir = self._as_path("feed_database_dir", required=True, is_dir=True, + globals.g_feed_database_dir = self._as_path("feed_database_dir", required=True, expect_dir=True, default=globals.g_feed_database_dir, create_if_needed=True) globals.g_scanning_interval = self._as_int('database_scanning_interval', default=globals.g_scanning_interval, @@ -203,7 +207,7 @@ def _as_str(self, param: str, required: bool = False, default: str = "", allowed return value - def _as_path(self, param: str, required: bool = False, exists: bool = True, is_dir: bool = False, + def _as_path(self, param: str, required: bool = False, check_exists: bool = True, expect_dir: bool = False, default: str = "", create_if_needed: bool = False) -> str: """ Get a string parameter from the configuration and treat it as a path, performing normalization @@ -212,35 +216,37 @@ def _as_path(self, param: str, required: bool = False, exists: bool = True, is_d :param param: Name of the configuration parameter :param required: True if this must be specified in the configuration - :param exists: if True and required, check for existance as well - :param is_dir: if exists and True, source must be a directory :param default: If not required, default value if not supplied - :param create_if_needed: if True, create any directory if it does not exist - :return: the integer value, or None if not required and no exception + :param check_exists: if True, check for existance + :param expect_dir: if exists and True, target must be a directory + :param create_if_needed: if True and we expect a directory, create if it does not exist + :return: the path value, or empty string if not required and no exception :raises CbInvalidConfig: """ - value = self._as_str(param, required, default=default) - value = os.path.abspath(os.path.expanduser(value)) - if exists and required: - if not os.path.exists(value): - if create_if_needed and is_dir: - try: - os.makedirs(value) - except Exception as err: - raise CbInvalidConfig(f"{self.source} unable to create '{value}' for '{param}': {err}") - else: - raise CbInvalidConfig(f"{self.source} specified path parameter '{param}' ({value}) does not exist") - if is_dir: - if not os.path.isdir(value): - raise CbInvalidConfig(f"{self.source} specified path '{param}' ({value}) is not a directory") - else: - if os.path.isdir(value): - raise CbInvalidConfig(f"{self.source} specified path '{param}' ({value}) is a directory") + value = self._as_str(param, required=required, default=default) - return value - - def _as_int(self, param: str, required: bool = False, default: int = -1, min_value: int = None, - ) -> int: + if value == "": # not required and not specified + return value + else: + value = os.path.abspath(os.path.expanduser(value)) + if check_exists: + if os.path.exists(value): # path exists + if expect_dir and not os.path.isdir(value): + raise CbInvalidConfig(f"{self.source} specified path '{param}' ({value}) is not a directory") + elif not expect_dir and os.path.isdir(value): + raise CbInvalidConfig(f"{self.source} specified path '{param}' ({value}) is a directory") + else: # does not exist + if create_if_needed and expect_dir: + try: + os.makedirs(value) + except Exception as err: + raise CbInvalidConfig(f"{self.source} unable to create '{value}' for '{param}': {err}") + else: + raise CbInvalidConfig( + f"{self.source} specified path parameter '{param}' ({value}) does not exist") + return value + + def _as_int(self, param: str, required: bool = False, default: int = -1, min_value: int = None) -> int: """ Get an integer configuration parameter from the configuration. A parameter that cannot be converted to an int will return a ValueError. @@ -253,23 +259,25 @@ def _as_int(self, param: str, required: bool = False, default: int = -1, min_val :raises CbInvalidConfig: :raises ValueError: """ + self._as_str(param, required=required) # required check value = int(self._as_str(param, required=required, default=str(default))) if min_value is not None and value < min_value: raise CbInvalidConfig(f"{self.source} '{param}' must be greater or equal to {min_value}") return value # noinspection PySameParameterValue - def _as_bool(self, param: str, required: bool = False, default: bool = None) -> Optional[bool]: + def _as_bool(self, param: str, required: bool = False, default: bool = False) -> Optional[bool]: """ Get a boolean configuration parameter from the configuration. A parameter not one of ["true", "yes", "false", "no"] will return a ValueError. :param param: Name of the configuration parameter :param required: True if this must be specified in the configuration - :return: the boolean value, or None if not required and no exception + :return: the boolean value, or False if not required and no exception :raises CbInvalidConfig: :raises ValueError: """ + self._as_str(param, required=required) # required check value = self._as_str(param, required=required, default=str(default)) if value is not None and value.lower() not in ["true", "yes", "false", "no"]: raise ValueError(f"{self.source} parameter '{param}' is not a valid boolean value") diff --git a/test/test_configCore.py b/test/test_configCore.py new file mode 100644 index 0000000..071873a --- /dev/null +++ b/test/test_configCore.py @@ -0,0 +1,319 @@ +# coding: utf-8 +# Copyright © 2014-2019 VMware, Inc. All Rights Reserved. + +import os +import shutil +from typing import Union +from unittest import TestCase + +from config_handling import ConfigurationInit +from exceptions import CbInvalidConfig + +TESTS = os.path.abspath(os.path.dirname(__file__)) +JUNK = os.path.join(TESTS, "test-artifacts") +TESTCONF = os.path.join(JUNK, "conf-testing.conf") + + +class TestConfigurationCore(TestCase): + + def setUp(self) -> None: + """ + Reset globals and recreate a base configuration. + :return: + """ + if os.path.exists(JUNK): + shutil.rmtree(JUNK) + os.makedirs(JUNK) + + def tearDown(self) -> None: + """ + Cleanup after testing. + """ + if os.path.exists(JUNK): + shutil.rmtree(JUNK) + + @staticmethod + def config(value: Union[int, str, bool] = None) -> ConfigurationInit: + """ + Create a test config file with the TESTME test param. + :param value: value to be used; if None, it won't exist. + """ + with open(TESTCONF, "w") as fp: + fp.write("[general]\n") + if value is not None: + fp.write("TESTME={0}".format(value)) + + return ConfigurationInit(TESTCONF, TESTING_ONLY=True) + + @staticmethod + def makedir(name: str) -> str: + """ + Make directory for testing. + :param name: directory name to be created + """ + real_path = os.path.join(JUNK, name) + os.makedirs(real_path) + return real_path + + @staticmethod + def makefile(name: str) -> str: + """ + Make file for testing. + :param name: file name to be created + """ + real_path = os.path.join(JUNK, name) + with open(real_path, "w") as fp: + fp.write("OK") + fp.flush() + return real_path + + # ----- Begin Tests ---------------------------------------------------------------------- + + def test_01a_as_str_default(self): + """ + Validate _as_str when no value specified (use default) + """ + cfg = self.config() + + value = cfg._as_str("TESTME") + self.assertEqual("", value) + + def test_01b_as_str_changed_default(self): + """ + Validate _as_str when no value specified (use new default) + """ + cfg = self.config() + + value = cfg._as_str("TESTME", default="changed") + self.assertEqual("changed", value) + + def test_01c_as_str_value(self): + """ + Validate _as_str when value specified. + """ + cfg = self.config("ok") + + value = cfg._as_str("TESTME") + self.assertEqual("ok", value) + + def test_01d_as_str_required(self): + """ + Validate _as_str when no value specified but is required. + """ + cfg = self.config() + + with self.assertRaises(CbInvalidConfig): + cfg._as_str("TESTME", required=True) + + def test_01e_as_str_value_not_in_allowed(self): + """ + Validate _as_str when value specified but not in allowed values. + """ + cfg = self.config("okay") + + with self.assertRaises(CbInvalidConfig): + cfg._as_str("TESTME", allowed=["allowed"]) + + def test_01f_as_str_value_in_allowed(self): + """ + Validate _as_str when value specified. + """ + cfg = self.config("ok") + + value = cfg._as_str("TESTME", allowed=["okay", "ok"]) + self.assertEqual("ok", value) + + def test_02a_as_int_default(self): + """ + Validate _as_int when no value specified (use default) + """ + cfg = self.config() + + value = cfg._as_int("TESTME") + self.assertEqual(-1, value) + + def test_02b_as_int_changed_default(self): + """ + Validate _as_int when no value specified (use new default) + """ + cfg = self.config() + + value = cfg._as_int("TESTME", default=10) + self.assertEqual(10, value) + + def test_02c_as_int_value(self): + """ + Validate _as_int when value specified. + """ + cfg = self.config(20) + + value = cfg._as_int("TESTME") + self.assertEqual(20, value) + + def test_02d_as_int_required(self): + """ + Validate _as_int when no value specified but is required. + """ + cfg = self.config() + + with self.assertRaises(CbInvalidConfig): + cfg._as_int("TESTME", required=True) + + def test_02e_as_int_value_below_minimum(self): + """ + Validate _as_int when value specified but is below the allowed minimum. + """ + cfg = self.config(5) + + with self.assertRaises(CbInvalidConfig): + cfg._as_int("TESTME", min_value=10) + + def test_02f_as_int_value_at_minimum(self): + """ + Validate _as_int when value specified but is at the allowed minimum. + """ + cfg = self.config(5) + + value = cfg._as_int("TESTME", min_value=5) + self.assertEqual(5, value) + + def test_02g_as_int_value_above_minimum(self): + """ + Validate _as_int when value specified but is above the allowed minimum. + """ + cfg = self.config(10) + + value = cfg._as_int("TESTME", min_value=5) + self.assertEqual(10, value) + + def test_03a_as_bool_default(self): + """ + Validate _as_bool when no value specified (use default) + """ + cfg = self.config() + + value = cfg._as_bool("TESTME") + self.assertEqual(False, value) + + def test_03b_as_bool_changed_default(self): + """ + Validate _as_bool when no value specified (use new default) + """ + cfg = self.config() + + value = cfg._as_bool("TESTME", default=True) + self.assertEqual(True, value) + + def test_03c_as_bool_value(self): + """ + Validate _as_bool when value specified. + """ + cfg = self.config(True) + + value = cfg._as_bool("TESTME") + self.assertEqual(True, value) + + def test_03d_as_bool_required(self): + """ + Validate _as_bool when no value specified but is required. + """ + cfg = self.config() + + with self.assertRaises(CbInvalidConfig): + cfg._as_bool("TESTME", required=True) + + def test_04a_as_path_default(self): + """ + Validate _as_path when no value specified (use default) + """ + cfg = self.config() + + value = cfg._as_path("TESTME") + self.assertEqual("", value) + + def test_04b_as_path_changed_default(self): + """ + Validate _as_path when no value specified (use new default) + """ + cfg = self.config() + + value = cfg._as_path("TESTME", default="/tmp", check_exists=False) + self.assertEqual("/tmp", value) + + def test_04c_as_path_required(self): + """ + Validate _as_path when no value specified but is required. + """ + cfg = self.config() + + with self.assertRaises(CbInvalidConfig): + cfg._as_bool("TESTME", required=True) + + def test_04d_as_path_specified_not_exists(self): + """ + Validate _as_path when supplied path does not exist + """ + cfg = self.config("NOSUCH") + + with self.assertRaises(CbInvalidConfig): + cfg._as_path("TESTME", check_exists=True) + + def test_04e_as_path_specified_check_exists_dir(self): + """ + Validate _as_path when supplied path resolves to a directory and we expect a directory + """ + path = self.makedir("CREATED-04e") + cfg = self.config(path) + + value = cfg._as_path("TESTME", check_exists=True, expect_dir=True) + self.assertEqual(path, value) + + def test_04f_as_path_specified_check_exists_dir_but_is_file(self): + """ + Validate _as_path when supplied path resolves to a file and we expect a directory + """ + path = self.makefile("CREATED-04f") + cfg = self.config(path) + + with self.assertRaises(CbInvalidConfig): + cfg._as_path("TESTME", check_exists=True, expect_dir=True) + + def test_04g_as_path_specified_check_exists_file(self): + """ + Validate _as_path when supplied path resolves to a file and we expect a file. + """ + path = self.makefile("CREATED-04g") + cfg = self.config(path) + + value = cfg._as_path("TESTME", check_exists=True, expect_dir=False) + self.assertEqual(path, value) + + def test_04h_as_path_specified_check_exists_file_but_is_dir(self): + """ + Validate _as_path when supplied path resolves to a directory and we expect a directory + """ + path = self.makedir("CREATED-04h") + cfg = self.config(path) + + with self.assertRaises(CbInvalidConfig): + cfg._as_path("TESTME", check_exists=True, expect_dir=False) + + def test_04i_as_path_specified_check_exists_create_if_needed_dir(self): + """ + Validate _as_path when supplied path does not exist but we specify create_if_needed. + """ + path = os.path.join(JUNK, "CREATED-04i") + cfg = self.config(path) + + value = cfg._as_path("TESTME", check_exists=True, expect_dir=True, create_if_needed=True) + self.assertEqual(path, value) + + def test_04j_as_path_specified_check_exists_create_if_needed_file(self): + """ + Validate _as_path when supplied path does not exist but we can't create if need if we are expecting a file. + """ + path = os.path.join(JUNK, "CREATED-04j") + cfg = self.config(path) + + with self.assertRaises(CbInvalidConfig): + cfg._as_path("TESTME", check_exists=True, expect_dir=False, create_if_needed=True) From 4a8663a4c0c7287c4772dad08d2d6380de8c52d1 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 7 Jan 2020 10:29:15 -0500 Subject: [PATCH 188/257] README touchup --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 448abb1..4ea28fc 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ The Yara agent has two parts - a master and a potentially remote worker. The task-master service must be installed on the same system as Cb Response. -You can download the latest RPM from the github releases page, here(https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-0b4e650fa85727815eb2/python-cb-threatconnect-connector-2.1.0-1.x86_64.rpm). +You can download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-0b4e650fa85727815eb2/python-cb-threatconnect-connector-2.1.0-1.x86_64.rpm). `yum install python-cb-yara-connector-.rpm` will install the connector from the downloaded RPM. From 538e2550039bd57e53471e378814620311f84f57 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 7 Jan 2020 10:52:06 -0500 Subject: [PATCH 189/257] Update README.md --- README.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 4ea28fc..93af354 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,18 @@ # Installing Yara Agent (Centos/RHEL 7+) -The Yara agent has two parts - a master and a potentially remote worker. +The Yara agent has two parts a master and one or more workers. -The task-master service must be installed on the same system as Cb Response. +The master service must be installed on the same system as Cb Response. -You can download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-0b4e650fa85727815eb2/python-cb-threatconnect-connector-2.1.0-1.x86_64.rpm). +Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-0b4e650fa85727815eb2/python-cb-threatconnect-connector-2.1.0-1.x86_64.rpm). -`yum install python-cb-yara-connector-.rpm` will install the connector from the downloaded RPM. +The connector can be easily installed from an rpm: + +`yum install python-cb-yara-connector-.rpm` The connector uses a configured directory containing yara rules, to efficiently scan binaries as they -are seen by the CB Response Server, and uses the generated threat information to produce an -intelligence feed for consumption by the CbR server. +are seen by the CB Response Server. The generated threat information is used to produce an +intelligence feed for ingest by the Cb Response Server again. The yara connector uses celery-queues to distribute work to remote workers - you will need to install and configure a broker (probbably, redis - but any broker compatible with celery 4.x+ will do) that is accessible @@ -44,9 +46,8 @@ postgres_db=cb postgres_port=5002 ~~~ -You can find your postgres credentails in `/etc/cb/cb.conf`, the port, host, db, user should be as above. - -You can find your API credential information in UI settings pane of your Carbon Black Response server. +The postgres credentails in `/etc/cb/cb.conf`, the port, host, db, user should be as above. +REST API Credentials are available in the UI of your Carbon Black Response Server. ~~~ ; From 4f140658e1a529e27aa71b34ad73b0bd9664f170 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 7 Jan 2020 10:58:33 -0500 Subject: [PATCH 190/257] Update README.md --- README.md | 142 ++++++++++++++++++------------------------------------ 1 file changed, 46 insertions(+), 96 deletions(-) diff --git a/README.md b/README.md index 93af354..27d52f7 100644 --- a/README.md +++ b/README.md @@ -20,17 +20,20 @@ to the master node and to any worker(s). # Dev install # +Use git to retrieve the project, create a new virtual environment using python3.6+ and use pip to install the requirements: + `git clone https://github.com/carbonblack/cb-yara-connector` -create a virtual environment with python3.6+ and install the requirements from the requirements.txt file `pip3 install -r requirements.txt` -There is a docker file provided that setups a basic dev/build envirionment for the connector. - ## Create Yara Agent Config -The installation process will create a sample configuration file +The connector is configured by a .ini formatted configuration file at `/etc/cb/integrations/cb-yara-connector/yaraconnector.conf`. + +The installation process will create a sample configuration file: `/etc/cb/integrations/cb-yara-connector/yaraconnector.sample.conf` -use this and create the real configuration file: + +Copy the sample configuration file, to edit to produce a working configuration for the connector: + `cp /etc/cb/integrations/cb-yara-connector/yaraconnector.sample.conf /etc/cb/integrations/cb-yara-connector/yaraconnector.conf` You must configure the postgres connection information for your CBR server , and the rest API location and credentails as well. @@ -91,97 +94,6 @@ The yara connector is boudn by libyara.so's limitations for matched strings, num `systemctl stop cb-yara-connector` will gracefully stop the yara-connector. `systemctl status -l cb-yara-connector` will display logging information. -These commands are identical for both the master and any remote workers. - -##### Command-line Options -```text -usage: main.py [-h] --config-file CONFIG_FILE [--log-file LOG_FILE] - [--output-file OUTPUT_FILE] [--validate-yara-rules] [--debug] - -Yara Agent for Yara Connector - -optional arguments: - -h, --help show this help message and exit - --config-file CONFIG_FILE - Location of the config file - --log-file LOG_FILE Log file output (defaults to `local` folder) - --output-file OUTPUT_FILE - output feed file (defaults to `local` folder) - --validate-yara-rules - ONLY validate yara rules in a specified directory - --debug Provide additional logging - -``` -###### --config-file -Provides the path of the configuration file to be used _**(REQUIRED)**_ - -###### --log-file -Provides the path of the yara log file. If not supplied, defaults to `local/yara_agent.log` -within the current yara package. - -###### --output-file -Provides the path containing the feed description file. If not supplied, defaults to -`/local/yara_feed.json` within the current yara package. - -###### --validate-yara-rules -If supplied, yara rules will be validated and the script will exit. - -#### Example Cron Entry -_[TBD]_ - -# Remote Worker Installation (Centos/RHEL 7) - -* Make sure openssl-devel is installed - - ``` - sudo yum install openssl-devel - ``` - -* Install Git and GCC - - ``` - sudo yum install git - sudo yum install gcc - ``` - -* Install Python 3.6 - - ``` - sudo yum install epel-release - sudo yum install python36 - sudo yum install python36-devel - ``` - -* Install Redis - - ``` - sudo yum install redis - sudo systemctl start redis - sudo systemctl enable redis - ``` - - -* Install Supervisord - - ``` - sudo yum install supervisor - ``` - -* Install Yara Worker - - ``` - git clone https://github.com/carbonblack/cb-yara-connector.git - cd cb-yara-connector - git checkout yara_version2 - python3.6 -m venv venv - source ./venv/bin/activate - pip install -r requirements.txt - deactivate - ``` - - -* Create Yara Worker Config File `yara_worker.conf` - #### Example Yara Connector Master configuration ```ini @@ -270,3 +182,41 @@ The dockerfile in the top-level of the repo contains a centos7 environment for r the connector. The provided script `docker-build-rpm.sh` will use docker to build the project, and place the RPM(s) in $PWD/RPMS. + + + +##### Command-line Options +```text +usage: main.py [-h] --config-file CONFIG_FILE [--log-file LOG_FILE] + [--output-file OUTPUT_FILE] [--validate-yara-rules] [--debug] + +Yara Agent for Yara Connector + +optional arguments: + -h, --help show this help message and exit + --config-file CONFIG_FILE + Location of the config file + --log-file LOG_FILE Log file output (defaults to `local` folder) + --output-file OUTPUT_FILE + output feed file (defaults to `local` folder) + --validate-yara-rules + ONLY validate yara rules in a specified directory + --debug Provide additional logging + +``` +###### --config-file +Provides the path of the configuration file to be used _**(REQUIRED)**_ + +###### --log-file +Provides the path of the yara log file. If not supplied, defaults to `local/yara_agent.log` +within the current yara package. + +###### --output-file +Provides the path containing the feed description file. If not supplied, defaults to +`/local/yara_feed.json` within the current yara package. + +###### --validate-yara-rules +If supplied, yara rules will be validated and the script will exit. + +#### Example Cron Entry +_[TBD]_ From 00628080eaef5703230a18620b9de02dfa653cb7 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 7 Jan 2020 11:07:38 -0500 Subject: [PATCH 191/257] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 27d52f7..a8cf57c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ The Yara agent has two parts a master and one or more workers. The master service must be installed on the same system as Cb Response. -Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-0b4e650fa85727815eb2/python-cb-threatconnect-connector-2.1.0-1.x86_64.rpm). +Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-543514324cea5461aa06/python-cb-yara-connector-2.1-0.x86_64.rpm). The connector can be easily installed from an rpm: From a7a52730c1672d8c1128ba3ffbbeb331c1100dc9 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 7 Jan 2020 15:39:57 -0500 Subject: [PATCH 192/257] updates to operating mode --- example-conf/yara.conf | 6 ------ src/config_handling.py | 43 +++++++++++++++++++++++++++--------------- src/globals.py | 3 +-- src/main.py | 20 ++++++++------------ 4 files changed, 37 insertions(+), 35 deletions(-) diff --git a/example-conf/yara.conf b/example-conf/yara.conf index 5e7f4e0..6ffeee4 100644 --- a/example-conf/yara.conf +++ b/example-conf/yara.conf @@ -1,11 +1,5 @@ [general] -; -; either run a single worker locally or remotely -; valid types are 'local' or 'remote' -; -worker_type=local - ; ; ONLY for worker_type of remote ; IP Address of workers if worker_type is remote diff --git a/src/config_handling.py b/src/config_handling.py index 701a64d..ee31bb5 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -5,6 +5,7 @@ import logging import os from typing import List, Optional +import re import globals from celery_app import app @@ -104,20 +105,16 @@ def _worker_check(self) -> None: :raises CbInvalidConfig: """ - globals.g_mode = self._as_str("mode", required=False, default="master", allowed=["master", "slave"]) - - value = self._as_str("worker_type", default="local", allowed=["local", "remote"]) - if value == "local": - globals.g_remote = False - else: - globals.g_remote = True + globals.g_mode = self._as_str("mode", required=False, default="master", allowed=["master", "worker", "master+worker"]) + globals.g_yara_rules_dir = self._as_path("yara_rules_dir", required=True, check_exists=True, expect_dir=True) - # local/remote configuration data - cb_req = not (globals.g_mode == "master" and globals.g_remote) + #we need the cb_server_api information whenever required (ie, we are a worker) + cb_req = "worker" in globals.g_mode + globals.g_cb_server_url = self._as_str("cb_server_url", required=cb_req) - globals.g_cb_server_token = self._as_str("cb_server_token", required=cb_req) + globals.g_cb_server_token = self._as_str("cb_server_token", required=cb_req) value = self._as_str("broker_url", required=True) app.conf.update(broker_url=value, result_backend=value) @@ -136,11 +133,27 @@ def _extended_check(self) -> None: :raises CbInvalidConfig: :raises ValueError: """ - globals.g_postgres_host = self._as_str("postgres_host", default=globals.g_postgres_host) - globals.g_postgres_username = self._as_str("postgres_username", default=globals.g_postgres_username) - globals.g_postgres_password = self._as_str("postgres_password", required=True) - globals.g_postgres_db = self._as_str("postgres_db", default=globals.g_postgres_username) - globals.g_postgres_port = self._as_int("postgres_port", default=globals.g_postgres_port) + + config = configparser.ConfigParser() + if os.path.isfile('/etc/cb/cb.conf'): + try: + config.read_file(open('/etc/cb/cb.conf')) + dburl = config['DatabaseURL'].strip() + dbregex = "postgresql\+psycopg2:\/\/(.+):(.+)@localhost:(\d+)/(.+)" + matches = re.match(dbregex, dburl) + globals.g_postgres_user = "cb" + globals.g_postgres_password = matches.group(2) if matches else "NONE" + globals.g_postgres_port = 5002 + globals.g_postgres_db = "cb" + globals.g_postgres_host = "https://localhost" + except Exception: + logger.exception("Someting went wrong trying to parse /etc/cb/cb.conf for postgres details") + else: + globals.g_postgres_host = self._as_str("postgres_host", default=globals.g_postgres_host) + globals.g_postgres_username = self._as_str("postgres_username", default=globals.g_postgres_username) + globals.g_postgres_password = self._as_str("postgres_password", required=True) + globals.g_postgres_db = self._as_str("postgres_db", default=globals.g_postgres_username) + globals.g_postgres_port = self._as_int("postgres_port", default=globals.g_postgres_port) value = self._as_str("niceness") if value != "": diff --git a/src/globals.py b/src/globals.py index 6475b04..4ae67d8 100644 --- a/src/globals.py +++ b/src/globals.py @@ -12,8 +12,7 @@ g_yara_rule_map_hash_list = [] # configuiration -g_remote = False -g_mode = "" +g_mode = "master" g_cb_server_url = "" g_cb_server_token = "" diff --git a/src/main.py b/src/main.py index abd8058..10b74fa 100644 --- a/src/main.py +++ b/src/main.py @@ -286,7 +286,7 @@ def perform(yara_rule_dir: str, conn, hash_queue: Queue) -> None: :param conn: The postgres connection :param hash_queue: the queue of hashes to handle """ - if globals.g_remote: + if globals.g_mode == "master": logger.info("Uploading yara rules to workers...") generate_rule_map_remote(yara_rule_dir) @@ -784,14 +784,10 @@ def main(): try: """ - There are four principle modes of operation - - 1) master and worker - 2) remote and local - Support running as 1) just the binary-getting - 2) binary-getting and analysis locally - 3) binary-getting and analysis to happen on some worker on the same - redis/amqp/backend broker - 4) Worker (either local to to the cbr machine or remote) + 3 modes of operation + 1) task master + 2) standalone worker + 3) worker+master unit """ if args.run_forever: # Running as a deamon logger.debug("RUNNING AS DEMON") @@ -815,7 +811,7 @@ def main(): context = daemon.DaemonContext(**deamon_kwargs) # Operating mode - are we the master a worker? - run_as_master = globals.g_mode == "master" + run_as_master = "master" in globals.g_mode # Signal handler sig_handler = partial(handle_sig, exit_event) @@ -838,7 +834,7 @@ def main(): ) # start local celeryD worker if working mode is local - if not globals.g_remote: + if "worker" in globals.g_mode: localworker = worker(app=app) threads.append( start_celery_worker_thread( @@ -874,7 +870,7 @@ def main(): ) # Start a celery worker if we need one - if not globals.g_remote: + if "worker" in globals.g_mode: localworker = worker(app=app) threads.append( start_celery_worker_thread( From 46b4a8852bc7cf352ce3d122150b12aa3bfad89b Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 7 Jan 2020 16:01:48 -0500 Subject: [PATCH 193/257] README updates to reflect new operating modes --- README.md | 30 ++++++++---------------------- src/main.py | 3 +++ 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index a8cf57c..e0f6a8e 100644 --- a/README.md +++ b/README.md @@ -30,15 +30,15 @@ Use git to retrieve the project, create a new virtual environment using python3. The connector is configured by a .ini formatted configuration file at `/etc/cb/integrations/cb-yara-connector/yaraconnector.conf`. The installation process will create a sample configuration file: -`/etc/cb/integrations/cb-yara-connector/yaraconnector.sample.conf` +`/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.sample` Copy the sample configuration file, to edit to produce a working configuration for the connector: -`cp /etc/cb/integrations/cb-yara-connector/yaraconnector.sample.conf /etc/cb/integrations/cb-yara-connector/yaraconnector.conf` +`cp /etc/cb/integrations/cb-yara-connector/yaraconnector.conf.sample /etc/cb/integrations/cb-yara-connector/yaraconnector.conf` -You must configure the postgres connection information for your CBR server , and the rest API location and credentails as well. +The daemon will attempt to load the postgres credentails from disk, if available - optionally, configure the postgres connection information for your CBR server , and the rest API location and credentails as well using the `postgres_xxxx` keys in the configuration file. -~~~ +~~~ini ; ; Cb Response postgres Database settings ; @@ -49,10 +49,7 @@ postgres_db=cb postgres_port=5002 ~~~ -The postgres credentails in `/etc/cb/cb.conf`, the port, host, db, user should be as above. -REST API Credentials are available in the UI of your Carbon Black Response Server. - -~~~ +~~~ini ; ; ONLY for worker_type of local ; Cb Response Server settings for scanning locally. @@ -66,21 +63,11 @@ cb_server_token= You must configure `broker=` which sets the broker and results_backend for celery. You will set this appropriately as per the celery documentation - here (https://docs.celeryproject.org/en/latest/getting-started/brokers/). -The yarar-connector RPM contains a service that can be run in a few distinct configurations: -1) A local task master and remote worker(s) (RECOMMENDED) - -This is the prefered mode of operation, using a celery broker to distribute analysis-tasks to a remote worker from a local task-master that keeps track of binaries resident in the configured server. +The yara-connector RPM contains a service that is primarily intended to serve as a distributed system, with a master serving work to remote worker machine(s) for analysis and compiling a threat intelligence feed for Carbon Black Response EDR. -Install the connector on the cbr server - as the task-master and specify that the worker will be remote: +There are two operating modes to support the two roles: `mode=master` and `mode=worker`. -Read through the configuration file, specify the mode of operation, the `mode=master` and `worker_type=remote`. This represents the configuration for the task master, which will distribute work over the configured broker/backend to each configured worker. - -On the worker(s), install the rpm again, and in the configuration file specify the same CBR server information, but -configure `mode=slave` and `worker_type=local`. This configuration file doesn't need postgres credentials, but does require rest API access to the Carbon Black Response server in question. - -2) A local task master and local celery worker (NOTRECOMMENDED) -Read through the configuration file, specify the mode of operation as `mode=master` and `worker_type=local`. -A single daemon will scan binaries, local to the cbr instance. This configuration requires both the postgres , and REST API credentials from Carbon Black Response, in order to function correctly. +Install the connector on the cbr server, and config it with the master mode - configure postgres credentials, and a directory of monitored yara rules. In worker mode, configure REST API credentials. Both modes require a broker for celery communications. ## Input your yara rules @@ -184,7 +171,6 @@ the connector. The provided script `docker-build-rpm.sh` will use docker to build the project, and place the RPM(s) in $PWD/RPMS. - ##### Command-line Options ```text usage: main.py [-h] --config-file CONFIG_FILE [--log-file LOG_FILE] diff --git a/src/main.py b/src/main.py index 10b74fa..704c8a2 100644 --- a/src/main.py +++ b/src/main.py @@ -555,15 +555,18 @@ def scan_once_and_exit(self) -> None: """ Perform a database scan one, then exit. """ + logger.debug("Scanning once before exit (batch)") self.do_db_scan() self._hash_queue.join() self._scanning_results_queue.join() self.exit_event.set() + logger.debug("Batch done!") def scan_until_exit(self) -> None: """ Continually scan the database until instructed to quit. """ + logger.debug("Scanning until exit...(continuous)") self.do_db_scan() while not self.exit_event.is_set(): self.exit_event.wait(timeout=self._interval) From 62270dd63ecba8fcf279210155597b51e7acb316 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 8 Jan 2020 10:12:49 -0500 Subject: [PATCH 194/257] unit test fixes --- README.md | 6 +- src/config_handling.py | 42 ++++++++---- src/main.py | 34 +++++----- test/test_configInit.py | 137 ++++++++++++---------------------------- 4 files changed, 88 insertions(+), 131 deletions(-) diff --git a/README.md b/README.md index e0f6a8e..27e85ed 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,10 @@ to the master node and to any worker(s). Use git to retrieve the project, create a new virtual environment using python3.6+ and use pip to install the requirements: -`git clone https://github.com/carbonblack/cb-yara-connector` -`pip3 install -r requirements.txt` +``` +git clone https://github.com/carbonblack/cb-yara-connector +pip3 install -r requirements.txt +``` ## Create Yara Agent Config diff --git a/src/config_handling.py b/src/config_handling.py index ee31bb5..56087dd 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -2,10 +2,11 @@ # Copyright © 2014-2019 VMware, Inc. All Rights Reserved. import configparser +import json import logging import os -from typing import List, Optional import re +from typing import List, Optional import globals from celery_app import app @@ -24,6 +25,7 @@ "broker_url", "cb_server_token", "cb_server_url", + "celery_worker_kwargs", "concurrent_hashes", "database_scanning_interval", "disable_rescan", @@ -40,9 +42,7 @@ "utility_interval", "utility_script", "worker_network_timeout", - "worker_type", "yara_rules_dir", - "celery_worker_kwargs" ] @@ -105,26 +105,23 @@ def _worker_check(self) -> None: :raises CbInvalidConfig: """ - globals.g_mode = self._as_str("mode", required=False, default="master", allowed=["master", "worker", "master+worker"]) - + globals.g_mode = self._as_str("mode", required=False, default="master", + allowed=["master", "worker", "master+worker"]) globals.g_yara_rules_dir = self._as_path("yara_rules_dir", required=True, check_exists=True, expect_dir=True) - #we need the cb_server_api information whenever required (ie, we are a worker) + # we need the cb_server_api information whenever required (ie, we are a worker) cb_req = "worker" in globals.g_mode globals.g_cb_server_url = self._as_str("cb_server_url", required=cb_req) - globals.g_cb_server_token = self._as_str("cb_server_token", required=cb_req) + globals.g_cb_server_token = self._as_str("cb_server_token", required=cb_req) value = self._as_str("broker_url", required=True) app.conf.update(broker_url=value, result_backend=value) globals.g_worker_network_timeout = self._as_int("worker_network_timeout", default=globals.g_worker_network_timeout) - - celeryworkerkwargs = self.the_config.get("celery_worker_kwargs", None) - if celeryworkerkwargs and len(celeryworkerkwargs) > 0: - globals.g_celeryworkerkwargs = celeryworkerkwargs + globals.g_celeryworkerkwargs = self._as_json("celery_worker_kwargs") def _extended_check(self) -> None: """ @@ -139,15 +136,15 @@ def _extended_check(self) -> None: try: config.read_file(open('/etc/cb/cb.conf')) dburl = config['DatabaseURL'].strip() - dbregex = "postgresql\+psycopg2:\/\/(.+):(.+)@localhost:(\d+)/(.+)" + dbregex = r"postgresql\+psycopg2:\/\/(.+):(.+)@localhost:(\d+)/(.+)" matches = re.match(dbregex, dburl) globals.g_postgres_user = "cb" globals.g_postgres_password = matches.group(2) if matches else "NONE" globals.g_postgres_port = 5002 globals.g_postgres_db = "cb" globals.g_postgres_host = "https://localhost" - except Exception: - logger.exception("Someting went wrong trying to parse /etc/cb/cb.conf for postgres details") + except Exception as err: + logger.exception(f"Someting went wrong trying to parse /etc/cb/cb.conf for postgres details: {err}") else: globals.g_postgres_host = self._as_str("postgres_host", default=globals.g_postgres_host) globals.g_postgres_username = self._as_str("postgres_username", default=globals.g_postgres_username) @@ -299,3 +296,20 @@ def _as_bool(self, param: str, required: bool = False, default: bool = False) -> return default else: return value if value is None else value.lower() in ["true", "yes"] + + # noinspection PySameParameterValue + def _as_json(self, param: str, required: bool = False) -> Optional[dict]: + """ + Get a single-line JSON string and convert to a python dict for use as a kwargs. + :param param: Name of the configuration parameter + :param required: True if this must be specified in the configuration + :return: dictionary converstion, or None if not required and not supplied + """ + value = self._as_str(param, required=required) # required check + if value == "": + return None + + try: + return json.loads(value) + except Exception as err: + raise CbInvalidConfig(f"{self.source} '{param}' has invalid JSON: {err}") diff --git a/src/main.py b/src/main.py index 704c8a2..c78ca58 100644 --- a/src/main.py +++ b/src/main.py @@ -19,11 +19,11 @@ import lockfile import psutil -import mmap import psycopg2 # noinspection PyPackageRequirements import yara from celery.bin.worker import worker +from celery.exceptions import WorkerLostError # noinspection PyPackageRequirements from daemon import daemon from peewee import SqliteDatabase @@ -31,7 +31,6 @@ import globals from analysis_result import AnalysisResult from binary_database import BinaryDetonationResult, db -from celery.exceptions import WorkerLostError from celery_app import app from config_handling import ConfigurationInit from feed import CbFeed, CbFeedInfo, CbReport @@ -80,8 +79,8 @@ def analysis_worker( hash_queue.task_done() except Empty: exit_event.wait(1) - except WorkerLostError as we: - logger.debug(f"Lost connection to remote worker..exiting") + except WorkerLostError as err: + logger.debug(f"Lost connection to remote worker and exiting\n{err}") exit_event.set() break except Exception as err: @@ -372,12 +371,14 @@ def get_log_file_handles(use_logger) -> List: return handles +# noinspection PyUnusedLocal def handle_sig(exit_event: Event, sig: int, frame) -> None: """ Signal handler - handle the signal and mark exit if its an exiting signal type. :param exit_event: the event handler :param sig: the signal seen + :param frame: frame event (sent by DaemonContext, unused) """ exit_sigs = (signal.SIGTERM, signal.SIGQUIT, signal.SIGKILL) if sig in exit_sigs: @@ -649,6 +650,7 @@ def launch_celery_worker(worker_obj, workerkwargs=None, config_file: str = None) logger.debug("CELERY WORKER LAUNCHING THREAD EXITED") +# FIXME: Unused def terminate_celery_worker(worker_obj: worker = None): """ Attempt to use the pidfile to gracefully terminate celery workers if they exist @@ -776,14 +778,10 @@ def main(): scanning_results_queue = Queue() # Lock file so this process is a singleton lock_file = lockfile.FileLock(args.lock_file) + + # noinspection PyUnusedLocal + # used for local worker handling in some scenarios localworker = None - workerkwargs = ( - json.loads(globals.g_celeryworkerkwargs) - if globals.g_celeryworkerkwargs is not None - else None - ) - if workerkwargs and len(workerkwargs) == 0: - workerkwargs = None try: """ @@ -841,7 +839,7 @@ def main(): localworker = worker(app=app) threads.append( start_celery_worker_thread( - localworker, workerkwargs, args.config_file + localworker, globals.g_celeryworkerkwargs, args.config_file ) ) else: @@ -849,7 +847,7 @@ def main(): localworker = worker(app=app) threads.append( start_celery_worker_thread( - localworker, workerkwargs, args.config_file + localworker, globals.g_celeryworkerkwargs, args.config_file ) ) @@ -860,7 +858,7 @@ def main(): try: wait_all_worker_exit_threads(threads, timeout=4.0) finally: - #terminate_celery_worker(localworker) + # terminate_celery_worker(localworker) logger.info("Yara connector shutdown") else: # | | | BATCH MODE | | | @@ -877,19 +875,19 @@ def main(): localworker = worker(app=app) threads.append( start_celery_worker_thread( - localworker, workerkwargs, args.config_file + localworker, globals.g_celeryworkerkwargs, args.config_file ) ) run_to_exit_signal(exit_event) - wait_all_worker_exit_threads(threads,timeout=4.0) - #terminate_celery_worker(localworker) + wait_all_worker_exit_threads(threads, timeout=4.0) + # terminate_celery_worker(localworker) except KeyboardInterrupt: logger.info("\n\n##### Interupted by User!\n") except Exception as err: logger.error(f"There were errors executing yara rules: {err}") finally: exit_event.set() - #terminate_celery_worker(localworker) + # terminate_celery_worker(localworker) if __name__ == "__main__": diff --git a/test/test_configInit.py b/test/test_configInit.py index 606a086..a18cd2f 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -14,7 +14,6 @@ TESTCONF = os.path.join(TESTS, "conf-testing.conf") BASE = """[general] mode=master -worker_type=local cb_server_url=https://127.0.0.1:443 cb_server_token=abcdefghijklmnopqrstuvwxyz012345 @@ -41,6 +40,8 @@ worker_network_timeout=5 database_scanning_interval=360 + +celery_worker_kwargs={"autoscale":"4,4"} """ @@ -53,7 +54,6 @@ def setUp(self) -> None: """ globals.g_config = {} globals.g_output_file = "" - globals.g_remote = False globals.g_mode = "" globals.g_cb_server_url = "" globals.g_cb_server_token = "" @@ -77,6 +77,7 @@ def setUp(self) -> None: globals.g_feed_database_dir = "./feed_db" globals.g_worker_network_timeout = 5 globals.g_scanning_interval = 360 + globals.g_celeryworkerkwargs = None with open(TESTCONF, "w") as fp: fp.write(BASE) @@ -133,7 +134,6 @@ def test_00a_validate_config(self): """ ConfigurationInit(TESTCONF, "sample.json") self.assertTrue(globals.g_output_file.endswith("sample.json")) - self.assertFalse(globals.g_remote) def test_00b_validate_config_worker(self): """ @@ -141,7 +141,6 @@ def test_00b_validate_config_worker(self): """ ConfigurationInit(TESTCONF) self.assertEqual("", globals.g_output_file) - self.assertFalse(globals.g_remote) def test_01a_missing_config(self): """ @@ -192,7 +191,8 @@ def test_03b_mode_invalid(self): self.mangle(change={"mode": "bogus"}) with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(TESTCONF) - assert "does not specify an allowed value: ['master', 'slave']" in "{0}".format(err.exception.args[0]) + assert "does not specify an allowed value: ['master', 'worker', 'master+worker']" in "{0}".format( + err.exception.args[0]) def test_03c_mode_duplicated(self): """ @@ -204,135 +204,60 @@ def test_03c_mode_duplicated(self): ConfigurationInit(TESTCONF) assert "option 'mode' in section 'general' already exists" in "{0}".format(err.exception.args[0]) - def test_04a_worker_missing(self): - """ - Ensure that lacking 'worker_type' information defaults to local. - """ - self.mangle(change={"worker_type": None}) - ConfigurationInit(TESTCONF) - self.assertFalse(globals.g_remote) - - def test_04b_worker_empty(self): - """ - Ensure that empty 'worker_type' information defaults to local. - """ - self.mangle(change={"worker_type": ""}) - ConfigurationInit(TESTCONF) - self.assertFalse(globals.g_remote) - - def test_04c_config_bogus_worker(self): - """ - Ensure that with bogus 'worker_type' is detected. - """ - self.mangle(change={"worker_type": "BOGUS"}) - with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(TESTCONF) - assert "does not specify an allowed value: ['local', 'remote']" in "{0}".format(err.exception.args[0]) + # test_04 worker_type removed - def test_05a_cb_server_url_missing_for_master_and_remote(self): + def test_05a_cb_server_url_missing_for_master(self): """ Ensure that 'cb_server_url' is not required if mode==slave and worker_type==remote """ - self.mangle(change={"mode": "master", "worker_type": "remote", "cb_server_url": None}) + self.mangle(change={"mode": "master", "cb_server_url": None}) ConfigurationInit(TESTCONF) self.assertEqual("", globals.g_cb_server_url) - def test_05b_cb_server_url_empty_for_master_and_remote(self): + def test_05b_cb_server_url_empty_for_master(self): """ Ensure that 'cb_server_url' is not required if mode==slave and worker_type==remote """ - self.mangle(change={"mode": "master", "worker_type": "remote", "cb_server_url": ""}) + self.mangle(change={"mode": "master", "cb_server_url": ""}) ConfigurationInit(TESTCONF) self.assertEqual("", globals.g_cb_server_url) - def test_05c_cb_server_url_missing_for_slave(self): + def test_05c_cb_server_url_missing_for_worker(self): """ - Ensure that 'cb_server_url' is required and detected if mode=slave. + Ensure that 'cb_server_url' is required and detected if mode=worker. """ - self.mangle(change={"mode": "slave", "worker_type": "remote", "cb_server_url": None}) + self.mangle(change={"mode": "worker", "cb_server_url": None}) with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(TESTCONF) assert "has no 'cb_server_url' definition" in "{0}".format(err.exception.args[0]) - def test_05d_cb_server_url_empty_for_slave(self): + def test_05d_cb_server_url_empty_for_worker(self): """ - Ensure that 'cb_server_url' is required and detected if mode=slave. + Ensure that 'cb_server_url' is required and detected if mode=worker. """ - self.mangle(change={"mode": "slave", "worker_type": "remote", "cb_server_url": ""}) + self.mangle(change={"mode": "worker", "cb_server_url": ""}) with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(TESTCONF) assert "has no 'cb_server_url' definition" in "{0}".format(err.exception.args[0]) - def test_05e_cb_server_url_missing_for_local(self): + def test_05e_cb_server_url_missing_for_worker(self): """ - Ensure that 'cb_server_url' is required and detected if worker_type=local. + Ensure that 'cb_server_url' is required and detected. """ - self.mangle(change={"mode": "master", "worker_type": "local", "cb_server_url": None}) + self.mangle(change={"mode": "worker", "cb_server_url": None}) with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(TESTCONF) assert "has no 'cb_server_url' definition" in "{0}".format(err.exception.args[0]) - def test_05f_cb_server_url_empty_for_local(self): + def test_05f_cb_server_url_empty_for_worker(self): """ - Ensure that 'cb_server_url' is required and detected if worker_type=local. + Ensure that 'cb_server_url' is required and detected. """ - self.mangle(change={"mode": "master", "worker_type": "local", "cb_server_url": ""}) + self.mangle(change={"mode": "worker", "cb_server_url": ""}) with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(TESTCONF) assert "has no 'cb_server_url' definition" in "{0}".format(err.exception.args[0]) - def test_06a_cb_server_token_missing_for_master_and_remote(self): - """ - Ensure that 'cb_server_token' is not required if mode==slave and worker_type==remote - """ - self.mangle(change={"mode": "master", "worker_type": "remote", "cb_server_token": None}) - ConfigurationInit(TESTCONF) - self.assertEqual("", globals.g_cb_server_token) - - def test_06b_cb_server_token_empty_for_master_and_remote(self): - """ - Ensure that 'cb_server_url' is not required if mode==slave and worker_type==remote - """ - self.mangle(change={"mode": "master", "worker_type": "remote", "cb_server_token": ""}) - ConfigurationInit(TESTCONF) - self.assertEqual("", globals.g_cb_server_token) - - def test_06c_cb_server_url_missing_for_slave(self): - """ - Ensure that 'cb_server_token' is required and detected if mode=slave. - """ - self.mangle(change={"mode": "slave", "worker_type": "remote", "cb_server_token": None}) - with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(TESTCONF) - assert "has no 'cb_server_token' definition" in "{0}".format(err.exception.args[0]) - - def test_06d_cb_server_token_empty_for_slave(self): - """ - Ensure that 'cb_server_token' is required and detected if mode=slave. - """ - self.mangle(change={"mode": "slave", "worker_type": "remote", "cb_server_token": ""}) - with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(TESTCONF) - assert "has no 'cb_server_token' definition" in "{0}".format(err.exception.args[0]) - - def test_06e_cb_server_token_missing_for_local(self): - """ - Ensure that 'cb_server_token' is required and detected if worker_type=local. - """ - self.mangle(change={"mode": "master", "worker_type": "local", "cb_server_token": None}) - with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(TESTCONF) - assert "has no 'cb_server_token' definition" in "{0}".format(err.exception.args[0]) - - def test_06f_cb_server_token_empty_for_local(self): - """ - Ensure that 'cb_server_token' is required and detected if worker_type=local. - """ - self.mangle(change={"mode": "master", "worker_type": "local", "cb_server_token": ""}) - with self.assertRaises(CbInvalidConfig) as err: - ConfigurationInit(TESTCONF) - assert "has no 'cb_server_token' definition" in "{0}".format(err.exception.args[0]) - def test_06a_broker_url_missing(self): """ Ensure that missing broker_url is detected. @@ -842,6 +767,24 @@ def test_24d_database_scanning_interval_below_minimum(self): ConfigurationInit(TESTCONF, "sample.json") assert "'database_scanning_interval' must be greater or equal to 360" in "{0}".format(err.exception.args[0]) + def test_25a_celery_worker_config(self): + """ + Ensure that basic celery worker config is handled + """ + ConfigurationInit(TESTCONF, "sample.json") + self.assertEqual(1, len(globals.g_celeryworkerkwargs)) + self.assertTrue("autoscale" in globals.g_celeryworkerkwargs) + self.assertEqual("4,4", globals.g_celeryworkerkwargs['autoscale']) + + def test_25b_celery_worker_config_bad_json(self): + """ + Ensure that basic celery worker config is handled + """ + self.mangle(change={"celery_worker_kwargs": "{BOGUS}"}) + with self.assertRaises(CbInvalidConfig) as err: + ConfigurationInit(TESTCONF, "sample.json") + assert "invalid JSON" in err.exception.args[0] + # ----- Unknown configuration (typo detection) def test_80_unexpected_parameter(self): From 8b9aa8815ca8815b69590c92dbd94b3d9f06a79c Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 8 Jan 2020 10:21:26 -0500 Subject: [PATCH 195/257] added unit test --- test/test_configInit.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/test/test_configInit.py b/test/test_configInit.py index a18cd2f..5b11728 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -778,13 +778,20 @@ def test_25a_celery_worker_config(self): def test_25b_celery_worker_config_bad_json(self): """ - Ensure that basic celery worker config is handled + Ensure that basic celery worker config is handled with bad json. """ self.mangle(change={"celery_worker_kwargs": "{BOGUS}"}) with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(TESTCONF, "sample.json") assert "invalid JSON" in err.exception.args[0] + def test_25c_celery_worker_config_missing(self): + """ + Ensure that basic celery worker config is handled when missing + """ + self.mangle(change={"celery_worker_kwargs": None}) + self.assertEqual(None, globals.g_celeryworkerkwargs) + # ----- Unknown configuration (typo detection) def test_80_unexpected_parameter(self): From 0a3fce588f78bd433214fccd739fad3bad333c2a Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 10:14:25 -0500 Subject: [PATCH 196/257] updates --- src/main.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/main.py b/src/main.py index c78ca58..4dcab89 100644 --- a/src/main.py +++ b/src/main.py @@ -653,8 +653,7 @@ def launch_celery_worker(worker_obj, workerkwargs=None, config_file: str = None) # FIXME: Unused def terminate_celery_worker(worker_obj: worker = None): """ - Attempt to use the pidfile to gracefully terminate celery workers if they exist - if the worker hasn't terminated gracefully after 5 seconds, kill it using the .die() command + Attempt to use the pidfil to shutdown workers correctly, try .die() afterward :param worker_obj: worker object """ @@ -666,15 +665,13 @@ def terminate_celery_worker(worker_obj: worker = None): children = parent.children(recursive=True) for child in children: logger.debug(f"Sending term sig to celery worker child - {worker_pid}") - os.kill(child.pid, signal.SIGTERM) + os.kill(child.pid, signal.SIGQUIT) logger.debug(f"Sending term sig to celery worker - {worker_pid}") - os.kill(worker_pid, signal.SIGTERM) + os.kill(worker_pid, signal.SIGQUIT) else: - logger.debug("Didn't find a worker-pidfile to terminate on exit...") - - time.sleep(1.0) - if worker_obj: - worker_obj.die("Worker terminated") + logger.debug("Didn't find a worker-pidfile to terminate on exit.") + #if worker_obj: + # worker_obj.die("Worker terminated") ################################################################################ @@ -858,7 +855,7 @@ def main(): try: wait_all_worker_exit_threads(threads, timeout=4.0) finally: - # terminate_celery_worker(localworker) + terminate_celery_worker(localworker) logger.info("Yara connector shutdown") else: # | | | BATCH MODE | | | From deed0befdd76e784964e643200ee57874dd489db Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 11:01:14 -0500 Subject: [PATCH 197/257] updates --- src/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index 4dcab89..032a602 100644 --- a/src/main.py +++ b/src/main.py @@ -21,7 +21,7 @@ import psutil import psycopg2 # noinspection PyPackageRequirements -import yara +import mmap from celery.bin.worker import worker from celery.exceptions import WorkerLostError # noinspection PyPackageRequirements @@ -380,7 +380,7 @@ def handle_sig(exit_event: Event, sig: int, frame) -> None: :param sig: the signal seen :param frame: frame event (sent by DaemonContext, unused) """ - exit_sigs = (signal.SIGTERM, signal.SIGQUIT, signal.SIGKILL) + exit_sigs = (signal.SIGTERM, signal.SIGQUIT, signal.SIGKILL,signal.SIGQUIT) if sig in exit_sigs: exit_event.set() logger.debug("Sig handler set exit event") From 849d671a1cdb571458857a56a3ecce5588851e5f Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 11:08:42 -0500 Subject: [PATCH 198/257] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 27e85ed..3ef935b 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ The Yara agent has two parts a master and one or more workers. The master service must be installed on the same system as Cb Response. -Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-543514324cea5461aa06/python-cb-yara-connector-2.1-0.x86_64.rpm). +Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-e5650883f82210808c8f/python-cb-yara-connector-2.1-0.x86_64.rpm). The connector can be easily installed from an rpm: From e46a445fd4c8f154685919fc623173da8ae9727c Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 8 Jan 2020 11:21:14 -0500 Subject: [PATCH 199/257] unit test and config tweaks --- src/config_handling.py | 2 +- test/test_configInit.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/config_handling.py b/src/config_handling.py index 56087dd..dee8e89 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -111,7 +111,7 @@ def _worker_check(self) -> None: globals.g_yara_rules_dir = self._as_path("yara_rules_dir", required=True, check_exists=True, expect_dir=True) # we need the cb_server_api information whenever required (ie, we are a worker) - cb_req = "worker" in globals.g_mode + cb_req = "worker" in globals.g_mode or "master+worker" in globals.g_mode globals.g_cb_server_url = self._as_str("cb_server_url", required=cb_req) globals.g_cb_server_token = self._as_str("cb_server_token", required=cb_req) diff --git a/test/test_configInit.py b/test/test_configInit.py index 5b11728..a74eae1 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -208,7 +208,7 @@ def test_03c_mode_duplicated(self): def test_05a_cb_server_url_missing_for_master(self): """ - Ensure that 'cb_server_url' is not required if mode==slave and worker_type==remote + Ensure that 'cb_server_url' is not required if mode==master """ self.mangle(change={"mode": "master", "cb_server_url": None}) ConfigurationInit(TESTCONF) @@ -216,7 +216,7 @@ def test_05a_cb_server_url_missing_for_master(self): def test_05b_cb_server_url_empty_for_master(self): """ - Ensure that 'cb_server_url' is not required if mode==slave and worker_type==remote + Ensure that 'cb_server_url' is not required if mode==master """ self.mangle(change={"mode": "master", "cb_server_url": ""}) ConfigurationInit(TESTCONF) @@ -240,27 +240,27 @@ def test_05d_cb_server_url_empty_for_worker(self): ConfigurationInit(TESTCONF) assert "has no 'cb_server_url' definition" in "{0}".format(err.exception.args[0]) - def test_05e_cb_server_url_missing_for_worker(self): + def test_05e_cb_server_url_missing_for_master_worker(self): """ - Ensure that 'cb_server_url' is required and detected. + Ensure that 'cb_server_url' is not required if mode==master+worker """ - self.mangle(change={"mode": "worker", "cb_server_url": None}) + self.mangle(change={"mode": "master+worker", "cb_server_url": None}) with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(TESTCONF) assert "has no 'cb_server_url' definition" in "{0}".format(err.exception.args[0]) - def test_05f_cb_server_url_empty_for_worker(self): + def test_05f_cb_server_url_empty_for_master_worker(self): """ - Ensure that 'cb_server_url' is required and detected. + Ensure that 'cb_server_url' is not required if mode==master+worker """ - self.mangle(change={"mode": "worker", "cb_server_url": ""}) + self.mangle(change={"mode": "master+worker", "cb_server_url": ""}) with self.assertRaises(CbInvalidConfig) as err: ConfigurationInit(TESTCONF) assert "has no 'cb_server_url' definition" in "{0}".format(err.exception.args[0]) def test_06a_broker_url_missing(self): """ - Ensure that missing broker_url is detected. + Ensure that missing broker_url is detected. """ self.mangle(change={"broker_url": None}) with self.assertRaises(CbInvalidConfig) as err: From 89d1362f385ea142012295fd005faf7d9460abb9 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 12:26:05 -0500 Subject: [PATCH 200/257] tasking update --- src/tasks.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/tasks.py b/src/tasks.py index 5e8bf6c..a2032c3 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -16,7 +16,7 @@ import urllib3 # noinspection PyPackageRequirements import yara -from celery import bootsteps, group +from celery import bootsteps, group, Task from celery.utils.log import get_task_logger import globals @@ -33,6 +33,12 @@ rulelogger = logging.getLogger("yaraworker") rulelogger.setLevel(logging.INFO) +class MyTask(Task): + + def on_failure(self, exc, task_id, args, kwargs, einfo): + print('{0!r} failed: {1!r}'.format(task_id, exc)) + + # ----- Lock Object Class ------------------------------------------------------------ @@ -141,7 +147,7 @@ def generate_rule_map(yara_rule_path: str) -> dict: return rule_map -@app.task +@app.task(base=MyTask) def update_yara_rules_remote(yara_rules: dict) -> None: """ Update remote yara rules. @@ -232,19 +238,7 @@ def get_binary_by_hash(url: str, hsum: str, token: str): # otherwise return None which will be interpreted correctly in analyze_binary as haven failed to lookup the hash return None - -# noinspection PyUnusedFunction -@app.task -def analyze_bins(hashes: List[str]) -> group: - """ - Analize any returned binaries. - :param hashes: list of hashes - :return: celery group - """ - return group(analyze_binary.s(h) for h in hashes).apply_async() - - -@app.task +@app.task(base=MyTask) def analyze_binary(md5sum: str) -> AnalysisResult: """ Analyze binary information. From 6d4545a25a1a3346af3fa0dccf424b3939eb6cfa Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 12:39:19 -0500 Subject: [PATCH 201/257] updates --- cb-yara-connector.service | 4 ++-- src/tasks.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cb-yara-connector.service b/cb-yara-connector.service index cc3d389..ece133d 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -7,8 +7,8 @@ Environment=C_FORCE_ROOT=1 Type=forking ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector -User=cb -Group=cb +#User=cb +#Group=cb [Install] diff --git a/src/tasks.py b/src/tasks.py index a2032c3..f565fc1 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -36,7 +36,8 @@ class MyTask(Task): def on_failure(self, exc, task_id, args, kwargs, einfo): - print('{0!r} failed: {1!r}'.format(task_id, exc)) + pass + #print('{0!r} failed: {1!r}'.format(task_id, exc)) From 2ffeed14785d4e0784c9b7658147e5fc143abafa Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 13:09:53 -0500 Subject: [PATCH 202/257] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3ef935b..8e0230d 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ The Yara agent has two parts a master and one or more workers. The master service must be installed on the same system as Cb Response. -Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-e5650883f82210808c8f/python-cb-yara-connector-2.1-0.x86_64.rpm). +Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-6cfa6daacf8dddb82b22/python-cb-yara-connector-2.1-0.x86_64.rpm). The connector can be easily installed from an rpm: From 5189d9cb3026df2adc11bf95a076409c2462ef91 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 8 Jan 2020 13:18:22 -0500 Subject: [PATCH 203/257] analysis cleanup --- requirements.txt | 2 -- src/main.py | 7 ++++--- src/tasks.py | 9 +++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index dace174..2a8fb2b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,5 +15,3 @@ simplejson==3.17.0 urllib3==1.25.7 yara-python==3.11.0 psutil==5.6.7 -yara-python==3.11.0 # Oct 10, 2019 -psutil==5.6.7 diff --git a/src/main.py b/src/main.py index 032a602..79b15f7 100644 --- a/src/main.py +++ b/src/main.py @@ -21,7 +21,8 @@ import psutil import psycopg2 # noinspection PyPackageRequirements -import mmap +import yara +# noinspection PyPackageRequirements from celery.bin.worker import worker from celery.exceptions import WorkerLostError # noinspection PyPackageRequirements @@ -380,7 +381,7 @@ def handle_sig(exit_event: Event, sig: int, frame) -> None: :param sig: the signal seen :param frame: frame event (sent by DaemonContext, unused) """ - exit_sigs = (signal.SIGTERM, signal.SIGQUIT, signal.SIGKILL,signal.SIGQUIT) + exit_sigs = (signal.SIGTERM, signal.SIGQUIT, signal.SIGKILL, signal.SIGQUIT) if sig in exit_sigs: exit_event.set() logger.debug("Sig handler set exit event") @@ -670,7 +671,7 @@ def terminate_celery_worker(worker_obj: worker = None): os.kill(worker_pid, signal.SIGQUIT) else: logger.debug("Didn't find a worker-pidfile to terminate on exit.") - #if worker_obj: + # if worker_obj: # worker_obj.die("Worker terminated") diff --git a/src/tasks.py b/src/tasks.py index f565fc1..4bc1275 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -10,13 +10,12 @@ import os import traceback import zipfile -from typing import List import requests import urllib3 # noinspection PyPackageRequirements import yara -from celery import bootsteps, group, Task +from celery import bootsteps, Task from celery.utils.log import get_task_logger import globals @@ -33,12 +32,13 @@ rulelogger = logging.getLogger("yaraworker") rulelogger.setLevel(logging.INFO) + +# FIXME: Does not implement all abstract tasks class MyTask(Task): def on_failure(self, exc, task_id, args, kwargs, einfo): pass - #print('{0!r} failed: {1!r}'.format(task_id, exc)) - + # print('{0!r} failed: {1!r}'.format(task_id, exc)) # ----- Lock Object Class ------------------------------------------------------------ @@ -239,6 +239,7 @@ def get_binary_by_hash(url: str, hsum: str, token: str): # otherwise return None which will be interpreted correctly in analyze_binary as haven failed to lookup the hash return None + @app.task(base=MyTask) def analyze_binary(md5sum: str) -> AnalysisResult: """ From a89a20842441cbb382bc6829fe16fc974b2d8172 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 13:39:14 -0500 Subject: [PATCH 204/257] Removed unused functions --- src/main.py | 54 ----------------------------------------------------- 1 file changed, 54 deletions(-) diff --git a/src/main.py b/src/main.py index 79b15f7..495694c 100644 --- a/src/main.py +++ b/src/main.py @@ -336,27 +336,6 @@ def _check_hash_against_feed(md5_hash: str) -> bool: return not query.exists() -# FIXME: Unused -def save_results_with_logging(analysis_results: List[AnalysisResult]) -> None: - """ - Save all analysis results, with extended logging. - - :param analysis_results: list of analysis results - """ - logger.debug(analysis_results) - if analysis_results: - for analysis_result in analysis_results: - logger.debug( - ( - f"Analysis result is {analysis_result.md5} {analysis_result.binary_not_available}" - f" {analysis_result.long_result} {analysis_result.last_error_msg}" - ) - ) - if analysis_result.last_error_msg: - logger.error(analysis_result.last_error_msg) - save_results(analysis_results) - - def get_log_file_handles(use_logger) -> List: """ Get a list of filehandle numbers from logger to be handed to DaemonContext.files_preserve. @@ -443,39 +422,6 @@ def wait_all_worker_exit_threads(threads, timeout=None): return -# FIXME: Unused -def wait_all_worker_exit(timeout=None) -> None: - """ - Await the exit of our worker threads. - """ - threadcount = 2 - start = time.time() - while threadcount > 1: - threads = list( - filter( - lambda running_thread: not running_thread.daemon - if hasattr(running_thread, "daemon") - else True, - threading.enumerate(), - ) - ) - threadcount = len(threads) - logger.debug( - f"Main thread Waiting on {threadcount} live worker-threads (exluding deamons)..." - ) - logger.debug(f"Live threads (excluding daemons): {threads}") - time.sleep(0.1) - timenow = time.time() - elapsed = timenow - start - if timeout and elapsed >= timeout: - logger.debug( - f"Main thread exiting after workers failed to timetout in {timeout}" - ) - return - - logger.debug("Main thread going to exit...") - - def start_workers(exit_event: Event, hash_queue: Queue, scanning_results_queue: Queue, run_only_once=False) -> List[Thread]: """ From ba78e1a0c5fb4e125aa15b98b74f99171c4fce4a Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 8 Jan 2020 13:42:45 -0500 Subject: [PATCH 205/257] analysis cleanup --- src/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tasks.py b/src/tasks.py index 4bc1275..fb1730c 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -15,6 +15,7 @@ import urllib3 # noinspection PyPackageRequirements import yara +# noinspection PyProtectedMember from celery import bootsteps, Task from celery.utils.log import get_task_logger @@ -33,7 +34,7 @@ rulelogger.setLevel(logging.INFO) -# FIXME: Does not implement all abstract tasks +# noinspection PyAbstractClass class MyTask(Task): def on_failure(self, exc, task_id, args, kwargs, einfo): From 8dcae05c28be7bd1de70120ec2a025767e8307b1 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 13:45:19 -0500 Subject: [PATCH 206/257] updates --- src/main.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/src/main.py b/src/main.py index 495694c..4ed3651 100644 --- a/src/main.py +++ b/src/main.py @@ -597,30 +597,6 @@ def launch_celery_worker(worker_obj, workerkwargs=None, config_file: str = None) logger.debug("CELERY WORKER LAUNCHING THREAD EXITED") -# FIXME: Unused -def terminate_celery_worker(worker_obj: worker = None): - """ - Attempt to use the pidfil to shutdown workers correctly, try .die() afterward - - :param worker_obj: worker object - """ - with open('/tmp/yaraconnectorceleryworker') as cworkerpidfile: - worker_pid_str = cworkerpidfile.readline() - worker_pid = int(worker_pid_str) if len(worker_pid_str.strip()) > 0 else None - if worker_pid: - parent = psutil.Process(worker_pid) if worker_pid else psutil.Process() - children = parent.children(recursive=True) - for child in children: - logger.debug(f"Sending term sig to celery worker child - {worker_pid}") - os.kill(child.pid, signal.SIGQUIT) - logger.debug(f"Sending term sig to celery worker - {worker_pid}") - os.kill(worker_pid, signal.SIGQUIT) - else: - logger.debug("Didn't find a worker-pidfile to terminate on exit.") - # if worker_obj: - # worker_obj.die("Worker terminated") - - ################################################################################ # Main entrypoint ################################################################################ @@ -802,7 +778,6 @@ def main(): try: wait_all_worker_exit_threads(threads, timeout=4.0) finally: - terminate_celery_worker(localworker) logger.info("Yara connector shutdown") else: # | | | BATCH MODE | | | @@ -824,14 +799,12 @@ def main(): ) run_to_exit_signal(exit_event) wait_all_worker_exit_threads(threads, timeout=4.0) - # terminate_celery_worker(localworker) except KeyboardInterrupt: logger.info("\n\n##### Interupted by User!\n") except Exception as err: logger.error(f"There were errors executing yara rules: {err}") finally: exit_event.set() - # terminate_celery_worker(localworker) if __name__ == "__main__": From 434226a4d3f65b1d177710b3e77fd1c0e4a72835 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 8 Jan 2020 13:53:07 -0500 Subject: [PATCH 207/257] analysis cleanup --- src/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main.py b/src/main.py index 4ed3651..1261995 100644 --- a/src/main.py +++ b/src/main.py @@ -9,7 +9,6 @@ import signal import subprocess import sys -import threading import time from datetime import datetime, timedelta from functools import partial @@ -18,7 +17,6 @@ from typing import List import lockfile -import psutil import psycopg2 # noinspection PyPackageRequirements import yara From 6cebf5b424a151ada6c8ba8aaa5bed0fa7c13cf3 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 14:01:23 -0500 Subject: [PATCH 208/257] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8e0230d..db9ba6c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ The Yara agent has two parts a master and one or more workers. The master service must be installed on the same system as Cb Response. -Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-6cfa6daacf8dddb82b22/python-cb-yara-connector-2.1-0.x86_64.rpm). +Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-845bf75ee6de094b22f9/python-cb-yara-connector-2.1-0.x86_64.rpm). The connector can be easily installed from an rpm: From 1837b94ca25b81d5128b3ddc79ed49ee8e391e34 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 15:06:52 -0500 Subject: [PATCH 209/257] Update README.md --- README.md | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index db9ba6c..58b685f 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Copy the sample configuration file, to edit to produce a working configuration f `cp /etc/cb/integrations/cb-yara-connector/yaraconnector.conf.sample /etc/cb/integrations/cb-yara-connector/yaraconnector.conf` -The daemon will attempt to load the postgres credentails from disk, if available - optionally, configure the postgres connection information for your CBR server , and the rest API location and credentails as well using the `postgres_xxxx` keys in the configuration file. +The daemon will attempt to load the postgres credentails from disk, if available - optionally, configure the postgres connection information for your CBR server using the `postgres_xxxx` keys in the config. The REST API location and credentials are specified in the `cb_server_url` and `cb_server_token` keys, respectively. ~~~ini ; @@ -73,9 +73,7 @@ Install the connector on the cbr server, and config it with the master mode - co ## Input your yara rules -The yara connector monitors the directory `/etc/cb/integrations/cb-yara-connector/yara_rules` for files (`.yar`) each specifying one or more yara rule. Your rules need to have `metadata` section with a `score: [1-10]` tag to appropriately score matching binaries. This directory is configurable in your configuration file. - -The yara connector is boudn by libyara.so's limitations for matched strings, number of compiler rules, etc. +The yara connector monitors the directory `/etc/cb/integrations/cb-yara-connector/yara_rules` for files (`.yar`) each specifying one or more yara rule. Your rules need to have `metadata` section with a `score: [1-100]` tag to appropriately score matching binaries. This directory is configurable in your configuration file. #### Running Yara Agent @@ -136,8 +134,6 @@ cb_server_token=aafdasfdsafdsafdsa ``` - - # Development Notes ## Utility Script From bb1de749960034a20064053678584934a66007ea Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 15:08:40 -0500 Subject: [PATCH 210/257] service update --- cb-yara-connector.service | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cb-yara-connector.service b/cb-yara-connector.service index ece133d..cc3d389 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -7,8 +7,8 @@ Environment=C_FORCE_ROOT=1 Type=forking ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector -#User=cb -#Group=cb +User=cb +Group=cb [Install] From 0dfe1e4cce48bbc5f0d69d2df7d3858c509e926f Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 15:23:11 -0500 Subject: [PATCH 211/257] Put feed data in its own directory - /var/cb/data/cb-yara-connector` --- MANIFEST | 4 +++- cb-yara-connector.rpm.spec | 1 + cb-yara-connector.service | 2 +- example-conf/yara.conf | 4 ++++ src/main.py | 2 ++ 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/MANIFEST b/MANIFEST index a8d69cf..19a36b6 100644 --- a/MANIFEST +++ b/MANIFEST @@ -6,4 +6,6 @@ /etc/systemd/system/cb-yara-connector.service /etc/cb/integrations/cb-yara-connector/yara_rules /etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example -/tmp/yaraconnectorceleryworker \ No newline at end of file +/tmp/yaraconnectorceleryworker +%dir /var/cb/data/cb-yara-connector +%dir /var/cb/data/cb-yara-connector/feed_db diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index fbd2a9b..38485f8 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -20,6 +20,7 @@ mkdir -p ${RPM_BUILD_ROOT}/etc/init mkdir -p ${RPM_BUILD_ROOT}/etc/systemd/system mkdir -p ${RPM_BUILD_ROOT}/tmp mkdir -p ${RPM_BUILD_ROOT}/var/run/ +mkdir -p ${RPM_BUILD_ROOT}/var/cb/data/cb-yara-connector/feed_db cp ${RPM_SOURCE_DIR}/example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example install -m 0644 ${RPM_SOURCE_DIR}/cb-yara-connector.service ${RPM_BUILD_ROOT}/etc/systemd/system/cb-yara-connector.service install -m 0755 ${RPM_SOURCE_DIR}/dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/ diff --git a/cb-yara-connector.service b/cb-yara-connector.service index cc3d389..374261e 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -5,7 +5,7 @@ After=syslog.target network.target [Service] Environment=C_FORCE_ROOT=1 Type=forking -ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True +ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --output-file /var/cb/data/cb-yara-connector/feed.json --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector User=cb Group=cb diff --git a/example-conf/yara.conf b/example-conf/yara.conf index 6ffeee4..60476b8 100644 --- a/example-conf/yara.conf +++ b/example-conf/yara.conf @@ -60,3 +60,7 @@ utility_interval=60 utility_script=scripts/vacuumscript.sh database_scanning_interval=360 + + + +feed_database_dir=/var/cb/data/cb-yara-connector/feed_db \ No newline at end of file diff --git a/src/main.py b/src/main.py index 1261995..e1d4704 100644 --- a/src/main.py +++ b/src/main.py @@ -20,6 +20,8 @@ import psycopg2 # noinspection PyPackageRequirements import yara +#DO NOT REMOVE NEEDED FOR RPM BUILD +import mmap # noinspection PyPackageRequirements from celery.bin.worker import worker from celery.exceptions import WorkerLostError From ce93eabdd7f072e0990d982d50707e9d70cc02fb Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 8 Jan 2020 15:29:33 -0500 Subject: [PATCH 212/257] analysis cleanup --- src/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index e1d4704..5a56214 100644 --- a/src/main.py +++ b/src/main.py @@ -5,6 +5,8 @@ import json import logging import logging.handlers +# noinspection PyUnresolvedReferences +import mmap # NEEDED FOR RPM BUILD import os import signal import subprocess @@ -20,8 +22,6 @@ import psycopg2 # noinspection PyPackageRequirements import yara -#DO NOT REMOVE NEEDED FOR RPM BUILD -import mmap # noinspection PyPackageRequirements from celery.bin.worker import worker from celery.exceptions import WorkerLostError From 2f08560acfe5c5336255f868e97ca3ea6f29c495 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 15:32:05 -0500 Subject: [PATCH 213/257] updates --- MANIFEST | 2 +- cb-yara-connector.service | 3 ++- docker-build-rpm.sh | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/MANIFEST b/MANIFEST index 19a36b6..bb1ccd6 100644 --- a/MANIFEST +++ b/MANIFEST @@ -8,4 +8,4 @@ /etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example /tmp/yaraconnectorceleryworker %dir /var/cb/data/cb-yara-connector -%dir /var/cb/data/cb-yara-connector/feed_db +%dir /var/cb/data/cb-yara-connector/feed_db \ No newline at end of file diff --git a/cb-yara-connector.service b/cb-yara-connector.service index 374261e..be5fe36 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -5,7 +5,8 @@ After=syslog.target network.target [Service] Environment=C_FORCE_ROOT=1 Type=forking -ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --output-file /var/cb/data/cb-yara-connector/feed.json --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True +ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log + --output-file /var/cb/data/cb-yara-connector/feed.json --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector User=cb Group=cb diff --git a/docker-build-rpm.sh b/docker-build-rpm.sh index d8818f5..eaa2d3d 100755 --- a/docker-build-rpm.sh +++ b/docker-build-rpm.sh @@ -1,7 +1,7 @@ #!/bin/bash docker rmi yaraconnectorrpmbuild --force docker rm yaraconnectorrpmbuild --force -docker build -t yaraconnectorrpmbuild . +docker build -t yaraconnectorrpmbuild . --no-cache docker run -d --name yaraconnectorrpmbuild -it yaraconnectorrpmbuild tail -f /dev/null docker cp yaraconnectorrpmbuild:/home/cb/rpmbuild/RPMS . docker stop yaraconnectorrpmbuild From 5e9fbaccda7bd22cf3318536e625926fbcef817b Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 15:42:08 -0500 Subject: [PATCH 214/257] updates --- cb-yara-connector.service | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cb-yara-connector.service b/cb-yara-connector.service index be5fe36..fa20387 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -5,8 +5,7 @@ After=syslog.target network.target [Service] Environment=C_FORCE_ROOT=1 Type=forking -ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log - --output-file /var/cb/data/cb-yara-connector/feed.json --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True +ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --output-file /var/cb/data/cb-yara-connector/feed.json --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector User=cb Group=cb From 6e7bb9aa5b34baf13721d35fd07622bbf8ef105d Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Wed, 8 Jan 2020 15:47:46 -0500 Subject: [PATCH 215/257] updates before build --- cb-yara-connector.service | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cb-yara-connector.service b/cb-yara-connector.service index fa20387..6d105c8 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -7,8 +7,8 @@ Environment=C_FORCE_ROOT=1 Type=forking ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --output-file /var/cb/data/cb-yara-connector/feed.json --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector -User=cb -Group=cb +#User=cb +#Group=cb [Install] From 49ff9f87428457fca89d59f4ff8addaf378027b6 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Wed, 8 Jan 2020 15:55:47 -0500 Subject: [PATCH 216/257] doc cleanup --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 58b685f..b725b44 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,9 @@ The yara connector monitors the directory `/etc/cb/integrations/cb-yara-connecto #### Running Yara Agent `systemctl start cb-yara-connector` will up the service using systemD. + `systemctl stop cb-yara-connector` will gracefully stop the yara-connector. + `systemctl status -l cb-yara-connector` will display logging information. #### Example Yara Connector Master configuration From 47ec1ce191c49e25da3d87e9c69c2aa743c98176 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Thu, 9 Jan 2020 09:01:32 -0500 Subject: [PATCH 217/257] Moved feed source --- test/test_cbFeed.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/test/test_cbFeed.py b/test/test_cbFeed.py index 5c9d1bc..7f285b7 100644 --- a/test/test_cbFeed.py +++ b/test/test_cbFeed.py @@ -5,9 +5,7 @@ from feed import CbFeed, CbInvalidFeed - -class TestCbFeed(TestCase): - SOURCE = """{ +SOURCE = """{ "feedinfo": { "category": "Local Feed QA Feed BBI893963562", "display_name": "QA Feed BBI893963562", @@ -54,11 +52,14 @@ class TestCbFeed(TestCase): ] }""" + +class TestCbFeed(TestCase): + def test_load_and_dump(self): """ Ensure that the load functionality works as expected. """ - feed = CbFeed.load(self.SOURCE) + feed = CbFeed.load(SOURCE) fi = feed.data['feedinfo'].data self.assertEqual('qafeedbbi893963562', fi['name']) @@ -68,13 +69,13 @@ def test_load_and_dump(self): self.assertEqual(2, len(rpts)) check = feed.dump() - self.assertEqual(self.SOURCE, check) + self.assertEqual(SOURCE, check) def test_duplicate_report_ids(self): """ Ensure that report ids cannot be the same.. """ - feed = CbFeed.load(self.SOURCE) + feed = CbFeed.load(SOURCE) reps = feed.data['reports'] reps[1].data['id'] = reps[0].data['id'] @@ -83,7 +84,7 @@ def test_duplicate_report_ids(self): assert "duplicate report id" in "{0}".format(err.exception.args[0]) def test_dumpjson(self): - feed = CbFeed.load(self.SOURCE) + feed = CbFeed.load(SOURCE) json = feed.dumpjson() fi = feed.data['feedinfo'].data @@ -100,7 +101,7 @@ def test_dumpjson(self): self.assertEqual(check[entry][key], rpt[key]) def test_iter_iocs(self): - feed = CbFeed.load(self.SOURCE) + feed = CbFeed.load(SOURCE) check = { "ID36724710133780394307691457860616137.exe": "58ce99ab4ca124973fe2bfee428862a0", From 8d2aaa547058409b8a95c12d4bf9b66ec06d32db Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Thu, 9 Jan 2020 10:01:12 -0500 Subject: [PATCH 218/257] Added sample rule to README, removed unrequired folder --- README.md | 17 ++++++++- samples/matchover100kb.yar | 8 ---- samples/sample_local.conf | 78 -------------------------------------- 3 files changed, 16 insertions(+), 87 deletions(-) delete mode 100644 samples/matchover100kb.yar delete mode 100644 samples/sample_local.conf diff --git a/README.md b/README.md index b725b44..8700db3 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,22 @@ Install the connector on the cbr server, and config it with the master mode - co ## Input your yara rules -The yara connector monitors the directory `/etc/cb/integrations/cb-yara-connector/yara_rules` for files (`.yar`) each specifying one or more yara rule. Your rules need to have `metadata` section with a `score: [1-100]` tag to appropriately score matching binaries. This directory is configurable in your configuration file. +The yara connector monitors the directory `/etc/cb/integrations/cb-yara-connector/yara_rules` for files (`.yar`) each +specifying one or more yara rule. Your rules need to have `metadata` section with a +`score: [1-100]` tag to appropriately score matching binaries. This directory is +configurable in your configuration file. + +###### Sample Yara Rule File +``` +// Sample rule to match binaries over 100kb in size + +rule matchover100kb { + meta: + score = 10 + condition: + filesize > 100KB +} +``` #### Running Yara Agent diff --git a/samples/matchover100kb.yar b/samples/matchover100kb.yar deleted file mode 100644 index 889afc2..0000000 --- a/samples/matchover100kb.yar +++ /dev/null @@ -1,8 +0,0 @@ -// Sample rule to match binaries over 100kb in size - -rule matchover100kb { - meta: - score = 10 - condition: - filesize > 100KB -} diff --git a/samples/sample_local.conf b/samples/sample_local.conf deleted file mode 100644 index 2cde948..0000000 --- a/samples/sample_local.conf +++ /dev/null @@ -1,78 +0,0 @@ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Sample main and worker config file -;; -;; You may also use "~" if you wish to locate files or directories in your -;; home folder -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -[general] -worker_type=local - -; -; dd a valid CBR user api token for `cb_server_token` -; -cb_server_url=https://127.0.0.1 -cb_server_token= - -; -; IP Address of workers if worker_type is remote -; -broker_url=redis://127.0.0.1 - -; -; path to directory containing yara rules -; -yara_rules_dir=./yara_rules - -; -; Cb Response postgres Database settings; insert posgres password as used in cb.conf for `postgres_password` -; -postgres_host=localhost -postgres_username=cb -postgres_password= -postgres_db=cb -postgres_port=5002 - -; -; os.nice value used for this script, if desired -; -;niceness=1 - -; -; Number of hashes to send to the workers concurrently. -; Recommend setting to the number of workers on the remote system. -; -concurrent_hashes=8 - -; -; If you want binaries to be rescanned more than once, regardless of the rules used, set this to False -; -disable_rescan=True - -; -; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with -; a timestamp within the last year -; -num_days_binaries=365 - -; -; The feed database directory is where local database work files are stored. If the directory does not exist -; it will be created. -; -feed_database_dir=./feed_db - -; -; This can be used to adjust the interval (in seconds) at which the database is scanned. -; -database_scanning_interval=360 - - -; -; The use of the maintenance script is an ADVANCED FEATURE and should be used with caution! -; -; If "utility_interval" is greater than 0 it represents the interval in minutes after which the yara connector will -; pause to execute a shell script for database maintenance. This can present risks. Be careful what you allow the -; script to do, and use this option at your own discretion. -; -utility_interval=0 -utility_script=./scripts/vacuumscript.sh From 51cb5d5a04c4000f9fa09b8d635cdbaf97e0e5cb Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 9 Jan 2020 10:21:41 -0500 Subject: [PATCH 219/257] upates to work with cb.conf --- src/config_handling.py | 23 ++++++++++++++--------- src/main.py | 1 + 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/config_handling.py b/src/config_handling.py index dee8e89..a65e6ab 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -13,6 +13,7 @@ from exceptions import CbInvalidConfig logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) __all__ = ["ConfigurationInit"] @@ -133,19 +134,23 @@ def _extended_check(self) -> None: config = configparser.ConfigParser() if os.path.isfile('/etc/cb/cb.conf'): + logger.debug("FOUND CB.CONF") try: - config.read_file(open('/etc/cb/cb.conf')) - dburl = config['DatabaseURL'].strip() - dbregex = r"postgresql\+psycopg2:\/\/(.+):(.+)@localhost:(\d+)/(.+)" - matches = re.match(dbregex, dburl) - globals.g_postgres_user = "cb" - globals.g_postgres_password = matches.group(2) if matches else "NONE" - globals.g_postgres_port = 5002 - globals.g_postgres_db = "cb" - globals.g_postgres_host = "https://localhost" + with open('/etc/cb/cb.conf') as cbconffile: + for line in cbconffile.readlines(): + if line.startswith("DatabaseURL="): + dbregex = r"DatabaseURL=postgresql\+psycopg2:\/\/(.+):(.+)@localhost:(\d+)/(.+)" + matches = re.match(dbregex, line) + globals.g_postgres_user = "cb" + globals.g_postgres_password = matches.group(2) if matches else "NONE" + globals.g_postgres_port = 5002 + globals.g_postgres_db = "cb" + globals.g_postgres_host = "127.0.01" + break except Exception as err: logger.exception(f"Someting went wrong trying to parse /etc/cb/cb.conf for postgres details: {err}") else: + logger.debug("Couldn't find /etc/cb/cb.conf") globals.g_postgres_host = self._as_str("postgres_host", default=globals.g_postgres_host) globals.g_postgres_username = self._as_str("postgres_username", default=globals.g_postgres_username) globals.g_postgres_password = self._as_str("postgres_password", required=True) diff --git a/src/main.py b/src/main.py index 5a56214..1353314 100644 --- a/src/main.py +++ b/src/main.py @@ -672,6 +672,7 @@ def main(): ) handler.setFormatter(formatter) logger.addHandler(handler) + # Verify the configuration file and load up important global variables try: From ab6396eeed85dab072607069b09f5475a0af2146 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 9 Jan 2020 10:34:39 -0500 Subject: [PATCH 220/257] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8700db3..0ca381a 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ The Yara agent has two parts a master and one or more workers. The master service must be installed on the same system as Cb Response. -Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-845bf75ee6de094b22f9/python-cb-yara-connector-2.1-0.x86_64.rpm). +Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-845bf75ee6de094b22f9/python-cb-yara-connector-2.1-0.x86_64.rp). The connector can be easily installed from an rpm: @@ -75,7 +75,7 @@ Install the connector on the cbr server, and config it with the master mode - co The yara connector monitors the directory `/etc/cb/integrations/cb-yara-connector/yara_rules` for files (`.yar`) each specifying one or more yara rule. Your rules need to have `metadata` section with a -`score: [1-100]` tag to appropriately score matching binaries. This directory is +`score= [1-10]` tag to appropriately score matching binaries. This directory is configurable in your configuration file. ###### Sample Yara Rule File From 0dc635c3816b6279ca9b996c4b4ad7c26d721893 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 9 Jan 2020 12:04:42 -0500 Subject: [PATCH 221/257] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0ca381a..23e351a 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ The Yara agent has two parts a master and one or more workers. The master service must be installed on the same system as Cb Response. -Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-845bf75ee6de094b22f9/python-cb-yara-connector-2.1-0.x86_64.rp). +Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-95171adc2cb55d3f96c5/python-cb-yara-connector-2.1-0.x86_64.rpm). The connector can be easily installed from an rpm: From 7bc82c098b260c5f3ec44746f8f6f46dfaef51a5 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 10 Jan 2020 10:30:46 -0500 Subject: [PATCH 222/257] Update README.md --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 23e351a..28b69b5 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ The daemon will attempt to load the postgres credentails from disk, if available ~~~ini ; -; Cb Response postgres Database settings +; Cb Response postgres Database settings (master) ; postgres_host=127.0.0.1 postgres_username=cb @@ -53,8 +53,8 @@ postgres_port=5002 ~~~ini ; -; ONLY for worker_type of local -; Cb Response Server settings for scanning locally. +; ONLY FOR WORKER(S) +; Cb Response Server settings for scanning locally ; For remote scanning please set these parameters in the yara worker config file ; Default: https://127.0.0.1 ; @@ -74,9 +74,9 @@ Install the connector on the cbr server, and config it with the master mode - co ## Input your yara rules The yara connector monitors the directory `/etc/cb/integrations/cb-yara-connector/yara_rules` for files (`.yar`) each -specifying one or more yara rule. Your rules need to have `metadata` section with a -`score= [1-10]` tag to appropriately score matching binaries. This directory is -configurable in your configuration file. +specifying one or more yara rule. Your rules must have `meta` section with a +`score = [1-10]` tag to appropriately score matching binaries. This directory is +configurable in your configuration file. C-style comments are supported. ###### Sample Yara Rule File ``` From 2fb8ac6b176ff4d00ce039cb70103bccc401c088 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Fri, 10 Jan 2020 12:22:48 -0500 Subject: [PATCH 223/257] output location tweaks. --- cb-yara-connector.service | 2 +- example-conf/yara-dev.conf | 79 ++++++++++++++++++++++++++++++++++++++ example-conf/yara.conf | 22 ++++++++--- src/config_handling.py | 22 +++++++---- src/main.py | 3 +- test/test_configInit.py | 5 ++- 6 files changed, 115 insertions(+), 18 deletions(-) create mode 100644 example-conf/yara-dev.conf diff --git a/cb-yara-connector.service b/cb-yara-connector.service index 6d105c8..9e7164a 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -5,7 +5,7 @@ After=syslog.target network.target [Service] Environment=C_FORCE_ROOT=1 Type=forking -ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --output-file /var/cb/data/cb-yara-connector/feed.json --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log +ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector #User=cb #Group=cb diff --git a/example-conf/yara-dev.conf b/example-conf/yara-dev.conf new file mode 100644 index 0000000..28bb6f8 --- /dev/null +++ b/example-conf/yara-dev.conf @@ -0,0 +1,79 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Sample main and worker config file +;; +;; You may also use "~" if you wish to locate files or directories in your +;; home folder +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +[general] +worker_type=local + +; +; dd a valid CBR user api token for `cb_server_token` +; +cb_server_url=https://127.0.0.1 +cb_server_token= + +; +; IP Address of workers if worker_type is remote +; +broker_url=redis://127.0.0.1 + +; +; path to directory containing yara rules +; +yara_rules_dir=./yara_rules + +; +; Cb Response postgres Database settings; insert posgres password as used in cb.conf for `postgres_password` +; +postgres_host=localhost +postgres_username=cb +postgres_password= +postgres_db=cb +postgres_port=5002 + +; +; os.nice value used for this script, if desired +; +;niceness=1 + +; +; Number of hashes to send to the workers concurrently. +; Recommend setting to the number of workers on the remote system. +; +concurrent_hashes=8 + +; +; If you want binaries to be rescanned more than once, regardless of the rules used, set this to False +; +disable_rescan=True + +; +; The agent will pull binaries up to the configured number of days. For exmaple, 365 will pull all binaries with +; a timestamp within the last year +; +num_days_binaries=365 + +; +; The feed database directory is where local database work files are stored. If the directory does not exist +; it will be created. +; +feed_database_dir=./feed_db + +; +; This can be used to adjust the interval (in seconds) at which the database is scanned. +; +database_scanning_interval=900 + + +; +; The use of the maintenance script is an ADVANCED FEATURE and should be used with caution! +; +; If "utility_interval" is greater than 0 it represents the interval in minutes after which the yara connector will +; pause to execute a shell script for database maintenance. This can present risks. Be careful what you allow the +; script to do, and use this option at your own discretion. +; +utility_interval=0 +utility_script=./scripts/vacuumscript.sh + diff --git a/example-conf/yara.conf b/example-conf/yara.conf index 60476b8..0da4f2b 100644 --- a/example-conf/yara.conf +++ b/example-conf/yara.conf @@ -56,11 +56,23 @@ disable_rescan=False num_days_binaries=365 -utility_interval=60 -utility_script=scripts/vacuumscript.sh - -database_scanning_interval=360 +; +; The use of the maintenance script is an ADVANCED FEATURE and should be used with caution! +; +; If "utility_interval" is greater than 0 it represents the interval in minutes after which the yara connector will +; pause to execute a user-created shell script designed for database maintenance, located with a "utility_script" +; definition that must be added. This can present risks. Be careful what you allow the script to do, and use this +; option at your own discretion. +; +utility_interval=0 +; +; This can be used to adjust the interval (in seconds) at which the database is scanned. +; +database_scanning_interval=900 -feed_database_dir=/var/cb/data/cb-yara-connector/feed_db \ No newline at end of file +; +; Location of the SQLite database and results JSON file, used to manage the searching. +; +feed_database_dir=/var/cb/data/cb-yara-connector/feed_db diff --git a/src/config_handling.py b/src/config_handling.py index a65e6ab..5b91ea0 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -92,13 +92,14 @@ def __init__(self, config_file: str, output_file: str = None, **kwargs) -> None: except configparser.InterpolationSyntaxError as err: raise CbInvalidConfig(f"{self.source} cannot be parsed: {err}") + globals.g_mode = self._as_str("mode", required=False, default="master", + allowed=["master", "worker", "master+worker"]) + # do the config checks self._worker_check() - if output_file is not None and output_file != "": - globals.g_output_file = os.path.abspath(os.path.expanduser(output_file)) - logger.debug(f"NOTE: output file will be '{globals.g_output_file}'") - self._extended_check() + if globals.g_mode in ["master", "master+worker"]: + self._master_check(output_file) def _worker_check(self) -> None: """ @@ -106,9 +107,6 @@ def _worker_check(self) -> None: :raises CbInvalidConfig: """ - globals.g_mode = self._as_str("mode", required=False, default="master", - allowed=["master", "worker", "master+worker"]) - globals.g_yara_rules_dir = self._as_path("yara_rules_dir", required=True, check_exists=True, expect_dir=True) # we need the cb_server_api information whenever required (ie, we are a worker) @@ -124,10 +122,11 @@ def _worker_check(self) -> None: default=globals.g_worker_network_timeout) globals.g_celeryworkerkwargs = self._as_json("celery_worker_kwargs") - def _extended_check(self) -> None: + def _master_check(self, output_file) -> None: """ Validate entries used by the main process. + :param output_file: output file location from command line :raises CbInvalidConfig: :raises ValueError: """ @@ -189,6 +188,13 @@ def _extended_check(self) -> None: globals.g_feed_database_dir = self._as_path("feed_database_dir", required=True, expect_dir=True, default=globals.g_feed_database_dir, create_if_needed=True) + if output_file is not None and output_file != "": + globals.g_output_file = os.path.abspath(os.path.expanduser(output_file)) + else: # same location as feed db, called "feed.json" + globals.g_output_file = os.path.join(os.path.dirname(globals.g_feed_database_dir), "feed.json") + + logger.debug(f"NOTE: output file will be '{globals.g_output_file}'") + globals.g_scanning_interval = self._as_int('database_scanning_interval', default=globals.g_scanning_interval, min_value=360) diff --git a/src/main.py b/src/main.py index 1353314..bba8705 100644 --- a/src/main.py +++ b/src/main.py @@ -623,7 +623,7 @@ def handle_arguments(): ) # Controls the output feed location+name parser.add_argument( - "--output-file", default="yara_feed.json", help="output feed file" + "--output-file", default=None, help="output feed file" ) # Controls the working directory parser.add_argument( @@ -672,7 +672,6 @@ def main(): ) handler.setFormatter(formatter) logger.addHandler(handler) - # Verify the configuration file and load up important global variables try: diff --git a/test/test_configInit.py b/test/test_configInit.py index a74eae1..395f398 100644 --- a/test/test_configInit.py +++ b/test/test_configInit.py @@ -140,7 +140,7 @@ def test_00b_validate_config_worker(self): Ensure our base configuration is valid for worker types. """ ConfigurationInit(TESTCONF) - self.assertEqual("", globals.g_output_file) + self.assertTrue(globals.g_output_file.endswith("feed.json")) def test_01a_missing_config(self): """ @@ -807,8 +807,9 @@ def test_80_unexpected_parameter(self): def test_90_minimal_validation_effects(self): """ - Ensure that minimal caonfiguration does not set extra globals + Ensure that minimal configuration does not set extra globals """ + self.mangle(change={"mode": "worker"}) globals.g_postgres_host = None ConfigurationInit(TESTCONF) self.assertIsNone(globals.g_postgres_host) From 23d39366192f8a0dea967e68b8df2884fb600581 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 10 Jan 2020 13:11:38 -0500 Subject: [PATCH 224/257] updates --- cb-yara-connector.service | 1 + src/main.py | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/cb-yara-connector.service b/cb-yara-connector.service index 9e7164a..d38f652 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -7,6 +7,7 @@ Environment=C_FORCE_ROOT=1 Type=forking ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector +KillSignal=SIGQUIT #User=cb #Group=cb diff --git a/src/main.py b/src/main.py index bba8705..79cf63e 100644 --- a/src/main.py +++ b/src/main.py @@ -20,6 +20,7 @@ import lockfile import psycopg2 +import psutil # noinspection PyPackageRequirements import yara # noinspection PyPackageRequirements @@ -81,7 +82,7 @@ def analysis_worker( except Empty: exit_event.wait(1) except WorkerLostError as err: - logger.debug(f"Lost connection to remote worker and exiting\n{err}") + logger.debug(f"Lost connection to remote worker and exiting \n {err}") exit_event.set() break except Exception as err: @@ -597,6 +598,30 @@ def launch_celery_worker(worker_obj, workerkwargs=None, config_file: str = None) logger.debug("CELERY WORKER LAUNCHING THREAD EXITED") +def terminate_celery_worker(worker_obj: worker = None): + """ + Attempt to use the pidfile to gracefully terminate celery workers if they exist + if the worker hasn't terminated gracefully after 5 seconds, kill it using the .die() command + :param worker_obj: worker object + """ + """with open('/tmp/yaraconnectorceleryworker') as cworkerpidfile: + worker_pid_str = cworkerpidfile.readline() + worker_pid = int(worker_pid_str) if len(worker_pid_str.strip()) > 0 else None + if worker_pid: + parent = psutil.Process(worker_pid) if worker_pid else psutil.Process() + children = parent.children(recursive=True) + for child in children: + logger.debug(f"Sending term sig to celery worker child - {worker_pid}") + os.kill(child.pid, signal.SIGQUIT) + logger.debug(f"Sending term sig to celery worker - {worker_pid}") + os.kill(worker_pid, signal.SIGQUIT) + else: + logger.debug("Didn't find a worker-pidfile to terminate on exit...") + + time.sleep(1.0) """ + if worker_obj: + worker_obj.die("Worker terminated") + ################################################################################ # Main entrypoint ################################################################################ @@ -773,11 +798,13 @@ def main(): # run until the service/daemon gets a quitting sig try: + logger.debug("Started as demon OK") run_to_exit_signal(exit_event) finally: try: wait_all_worker_exit_threads(threads, timeout=4.0) finally: + #terminate_celery_worker(localworker) logger.info("Yara connector shutdown") else: # | | | BATCH MODE | | | From fc2983f879c8b996d6c88af56645bd976731c31a Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 10 Jan 2020 13:25:37 -0500 Subject: [PATCH 225/257] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 28b69b5..a163baf 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ The Yara agent has two parts a master and one or more workers. The master service must be installed on the same system as Cb Response. -Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-95171adc2cb55d3f96c5/python-cb-yara-connector-2.1-0.x86_64.rpm). +Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-ac560575c782caf791c5/python-cb-yara-connector-2.1-0.x86_64.rpm). The connector can be easily installed from an rpm: From 6875fc082f4983dfc81cd90c919ad1ccc4834fe2 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 10 Jan 2020 13:43:58 -0500 Subject: [PATCH 226/257] updates to example config --- example-conf/yara.conf | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/example-conf/yara.conf b/example-conf/yara.conf index 0da4f2b..e1af7f4 100644 --- a/example-conf/yara.conf +++ b/example-conf/yara.conf @@ -6,6 +6,11 @@ ; broker_url=redis://127.0.0.1 + +; +; Operating mode - task 'master' or remote 'worker +; options worker/master +; mode=master ; @@ -15,6 +20,7 @@ yara_rules_dir=/etc/cb/integrations/cb-yara-connector/yara_rules ; ; Cb Response postgres Database settings +; only for master mode ; postgres_host=127.0.0.1 postgres_username=cb @@ -23,7 +29,7 @@ postgres_db=cb postgres_port=5002 ; -; ONLY for worker_type of local +; ONLY for worker mode ; Cb Response Server settings for scanning locally. ; For remote scanning please set these parameters in the yara worker config file ; Default: https://127.0.0.1 From 84acfc9c38d591ac22cd60fa7be8514b70d3e348 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Fri, 10 Jan 2020 15:12:50 -0500 Subject: [PATCH 227/257] fixing bug in worker mode --- src/main.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index 79cf63e..277e0bc 100644 --- a/src/main.py +++ b/src/main.py @@ -377,8 +377,9 @@ def run_to_exit_signal(exit_event: Event) -> None: """ while not (exit_event.is_set()): exit_event.wait(30.0) - numbins = BinaryDetonationResult.select().count() - logger.info(f"Analyzed {numbins} binaries so far ... ") + if "master" in globals.g_mode: + numbins = BinaryDetonationResult.select().count() + logger.info(f"Analyzed {numbins} binaries so far ... ") logger.debug("Begin graceful shutdown...") @@ -800,6 +801,8 @@ def main(): try: logger.debug("Started as demon OK") run_to_exit_signal(exit_event) + except Exception as e: + logger.exception("Error starting {e}") finally: try: wait_all_worker_exit_threads(threads, timeout=4.0) From b35506d8cd6a317716b51bc5c54768ae9885c02f Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Fri, 10 Jan 2020 16:59:42 -0500 Subject: [PATCH 228/257] output location tweaks. --- example-conf/yara-dev.conf | 7 ++++--- example-conf/yara.conf | 20 ++++++++------------ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/example-conf/yara-dev.conf b/example-conf/yara-dev.conf index 28bb6f8..9820d09 100644 --- a/example-conf/yara-dev.conf +++ b/example-conf/yara-dev.conf @@ -15,7 +15,7 @@ cb_server_url=https://127.0.0.1 cb_server_token= ; -; IP Address of workers if worker_type is remote +; URL of the redis server. ; broker_url=redis://127.0.0.1 @@ -71,8 +71,9 @@ database_scanning_interval=900 ; The use of the maintenance script is an ADVANCED FEATURE and should be used with caution! ; ; If "utility_interval" is greater than 0 it represents the interval in minutes after which the yara connector will -; pause to execute a shell script for database maintenance. This can present risks. Be careful what you allow the -; script to do, and use this option at your own discretion. +; pause to execute a user-created shell script designed for database maintenance, located with a "utility_script" +; definition that must be added. This can present risks. Be careful what you allow the script to do, and use this +; option at your own discretion. ; utility_interval=0 utility_script=./scripts/vacuumscript.sh diff --git a/example-conf/yara.conf b/example-conf/yara.conf index e1af7f4..b15d7f6 100644 --- a/example-conf/yara.conf +++ b/example-conf/yara.conf @@ -1,14 +1,7 @@ [general] ; -; ONLY for worker_type of remote -; IP Address of workers if worker_type is remote -; -broker_url=redis://127.0.0.1 - - -; -; Operating mode - task 'master' or remote 'worker +; Operating mode - choose 'master' for the main system remote 'worker'. ; options worker/master ; mode=master @@ -29,14 +22,17 @@ postgres_db=cb postgres_port=5002 ; -; ONLY for worker mode -; Cb Response Server settings for scanning locally. -; For remote scanning please set these parameters in the yara worker config file -; Default: https://127.0.0.1 +; ONLY for worker mode for Cb Response Server scanning locally. +; Default: https://127.0.0.1 for the master system ; cb_server_url=https://127.0.0.1 cb_server_token= +; +; URL of the redis server. +; +broker_url=redis://127.0.0.1 + ; ; nice value used for this script ; From 75e18dc451129f51acae6a444fe657abbbc63da0 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 13 Jan 2020 09:00:28 -0500 Subject: [PATCH 229/257] updates --- src/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index 277e0bc..9b7afe2 100644 --- a/src/main.py +++ b/src/main.py @@ -377,7 +377,7 @@ def run_to_exit_signal(exit_event: Event) -> None: """ while not (exit_event.is_set()): exit_event.wait(30.0) - if "master" in globals.g_mode: + if "master" in globals.g_mode: numbins = BinaryDetonationResult.select().count() logger.info(f"Analyzed {numbins} binaries so far ... ") logger.debug("Begin graceful shutdown...") From 2d28b774e755cd195be2574b78102e1744b85441 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 13 Jan 2020 09:14:50 -0500 Subject: [PATCH 230/257] service update --- cb-yara-connector.service | 2 -- 1 file changed, 2 deletions(-) diff --git a/cb-yara-connector.service b/cb-yara-connector.service index d38f652..d603c3a 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -8,8 +8,6 @@ Type=forking ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector KillSignal=SIGQUIT -#User=cb -#Group=cb [Install] From cc7deca5690d1bcfa4d96e56bd32d37f1d95da23 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 13 Jan 2020 10:51:22 -0500 Subject: [PATCH 231/257] Service fix --- cb-yara-connector.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cb-yara-connector.service b/cb-yara-connector.service index d603c3a..f0174b0 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -5,7 +5,7 @@ After=syslog.target network.target [Service] Environment=C_FORCE_ROOT=1 Type=forking -ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log +ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --output-file /var/cb/data/cb-yara-connector/feed.json --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector KillSignal=SIGQUIT From 07efad104f1fc8ccc408b2523098cb83128acc67 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Mon, 13 Jan 2020 11:48:39 -0500 Subject: [PATCH 232/257] doc changes --- README.md | 87 ++++++++++++++++++++++++------------------ example-conf/yara.conf | 21 +++++----- src/config_handling.py | 13 ++++--- 3 files changed, 69 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index a163baf..4e7f0a8 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,17 @@ # Installing Yara Agent (Centos/RHEL 7+) -The Yara agent has two parts a master and one or more workers. +The Yara agent is made up of two parts -- a master and one or more workers. +The master service must be installed on the same system as Cb Response, while workers +are usually installed on other systems (but can also be on the master system, if so +desired). -The master service must be installed on the same system as Cb Response. +The yara connector itself uses celery-queues to distribute work to and remote (or local) workers - you will need to install and +configure a broker (probably, redis - but any broker compatible with celery 4.x+ will do) that is accessible +to the master node and to any worker(s). -Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-ac560575c782caf791c5/python-cb-yara-connector-2.1-0.x86_64.rpm). +Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-c64dc62eb602dc1b82df/python-cb-yara-connector-2.1-0.x86_64.rpm). -The connector can be easily installed from an rpm: +Once downloaded, the connector can be easily installed from the rpm: `yum install python-cb-yara-connector-.rpm` @@ -14,11 +19,8 @@ The connector uses a configured directory containing yara rules, to efficiently are seen by the CB Response Server. The generated threat information is used to produce an intelligence feed for ingest by the Cb Response Server again. -The yara connector uses celery-queues to distribute work to remote workers - you will need to install and -configure a broker (probbably, redis - but any broker compatible with celery 4.x+ will do) that is accessible -to the master node and to any worker(s). -# Dev install # +# Dev install Use git to retrieve the project, create a new virtual environment using python3.6+ and use pip to install the requirements: @@ -27,43 +29,58 @@ git clone https://github.com/carbonblack/cb-yara-connector pip3 install -r requirements.txt ``` -## Create Yara Agent Config - -The connector is configured by a .ini formatted configuration file at `/etc/cb/integrations/cb-yara-connector/yaraconnector.conf`. - -The installation process will create a sample configuration file: -`/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.sample` - -Copy the sample configuration file, to edit to produce a working configuration for the connector: +# Create Yara Agent Config -`cp /etc/cb/integrations/cb-yara-connector/yaraconnector.conf.sample /etc/cb/integrations/cb-yara-connector/yaraconnector.conf` +The installation process will create a sample configuration file in the control directory +as `/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.sample`. Simply copy +this sample template to `/etc/cb/integrations/cb-yara-connector/yaraconnector.conf`, +which is looked for by the yara connectory service. You will likely have to edit this +configuration file on each system (master and workers) to supply any missing +information: +* worker systems will need to change the mode to `worker`; if you plan to use the master +system to also run a worker (not suggested, but allowed), the mode must be `master+worker`. +* Remote worker systems will require the master's URL for `cb_server_url` (local workers need no modification); + they also require the token of a global admin user for `cb_server_token`. +* Remote workers will require the URL of the master's redis server -The daemon will attempt to load the postgres credentails from disk, if available - optionally, configure the postgres connection information for your CBR server using the `postgres_xxxx` keys in the config. The REST API location and credentials are specified in the `cb_server_url` and `cb_server_token` keys, respectively. +The daemon will attempt to load the postgres credentals from the response server's `cb.conf`, +if available, falling back to postgres connection information for your CBR server +in the master's configurration file using the `postgres_xxxx` keys in the config. The REST API location and credentials are specified in the `cb_server_url` and `cb_server_token` keys, respectively. -~~~ini +```ini ; -; Cb Response postgres Database settings (master) +; Cb Response postgres Database settings, required for 'master' and 'master+worker' systems +; The seever will attempt to read from local cb.conf file first and fall back +; to these settings if it cannot do so. ; postgres_host=127.0.0.1 postgres_username=cb -postgres_password= +postgres_password= postgres_db=cb postgres_port=5002 -~~~ +``` -~~~ini +```ini ; -; ONLY FOR WORKER(S) -; Cb Response Server settings for scanning locally -; For remote scanning please set these parameters in the yara worker config file -; Default: https://127.0.0.1 +; Cb Response Server settings, required for 'worker' and 'master+worker' systems +; For remote workers, the cb_server_url mus be that of the master ; -cb_server_url=https://localhost +cb_server_url=https://127.0.0.1 cb_server_token= -~~~ +``` -You must configure `broker=` which sets the broker and results_backend for celery. You will set this appropriately as per the celery documentation - here (https://docs.celeryproject.org/en/latest/getting-started/brokers/). +You must configure `broker=` which sets the broker and results_backend for celery. +You will set this appropriately as per the celery documentation - +here (https://docs.celeryproject.org/en/latest/getting-started/brokers/). +```ini +; +; URL of the redis server, defaulting to the local response server redis for the master. If this is a worker +; system, alter to point to the master system. If you are using a standalone redis server, both master and +; workers must point to the same server. +; +broker_url=redis://127.0.0.1 +``` The yara-connector RPM contains a service that is primarily intended to serve as a distributed system, with a master serving work to remote worker machine(s) for analysis and compiling a threat intelligence feed for Carbon Black Response EDR. @@ -111,8 +128,6 @@ broker_url=redis://127.0.0.1 mode=master -worker_type=remote - ; ; Cb Response Server Configuration ; Used for downloading binaries @@ -136,17 +151,15 @@ yara_rules_dir=/etc/cb/integrations/cb-yara-connector/yara-rules ; Python Celery Broker Url. Set this full url stringg ; Example: redis:// ; -broker_url=redis://127.0.0.1 +broker_url=redis://master.server.url mode=slave -worker_type=local - ; ; Cb Response Server Configuration ; Used for downloading binaries ; -cb_server_url=https://localhost +cb_server_url=https://master.server.url cb_server_token=aafdasfdsafdsafdsa ``` @@ -214,7 +227,7 @@ within the current yara package. ###### --output-file Provides the path containing the feed description file. If not supplied, defaults to -`/local/yara_feed.json` within the current yara package. +`feed.json` in the same location as the configured `feed_database_dir` folder. ###### --validate-yara-rules If supplied, yara rules will be validated and the script will exit. diff --git a/example-conf/yara.conf b/example-conf/yara.conf index b15d7f6..7dea10f 100644 --- a/example-conf/yara.conf +++ b/example-conf/yara.conf @@ -1,8 +1,8 @@ [general] ; -; Operating mode - choose 'master' for the main system remote 'worker'. -; options worker/master +; Operating mode - choose 'master' for the main system, 'worker' for a remote worker. +; If you wish to run worker threads locally (not suggested), use 'master+worker' ; mode=master @@ -12,24 +12,27 @@ mode=master yara_rules_dir=/etc/cb/integrations/cb-yara-connector/yara_rules ; -; Cb Response postgres Database settings -; only for master mode +; Cb Response postgres Database settings, required for 'master' and 'master+worker' systems +; The seever will attempt to read from local cb.conf file first and fall back +; to these settings if it cannot do so. ; postgres_host=127.0.0.1 postgres_username=cb -postgres_password= +postgres_password= postgres_db=cb postgres_port=5002 ; -; ONLY for worker mode for Cb Response Server scanning locally. -; Default: https://127.0.0.1 for the master system +; Cb Response Server settings, required for 'worker' and 'master+worker' systems +; For remote workers, the cb_server_url mus be that of the master ; cb_server_url=https://127.0.0.1 -cb_server_token= +cb_server_token= ; -; URL of the redis server. +; URL of the redis server, defaulting to the local response server redis for the master. If this is a worker +; system, alter to point to the master system. If you are using a standalone redis server, both master and +; workers must point to the same server. ; broker_url=redis://127.0.0.1 diff --git a/src/config_handling.py b/src/config_handling.py index 5b91ea0..da8b07b 100644 --- a/src/config_handling.py +++ b/src/config_handling.py @@ -130,10 +130,9 @@ def _master_check(self, output_file) -> None: :raises CbInvalidConfig: :raises ValueError: """ - - config = configparser.ConfigParser() + use_fallback = True if os.path.isfile('/etc/cb/cb.conf'): - logger.debug("FOUND CB.CONF") + logger.debug("Found local 'cb.conf', attempting to configure postgres from it...") try: with open('/etc/cb/cb.conf') as cbconffile: for line in cbconffile.readlines(): @@ -144,12 +143,14 @@ def _master_check(self, output_file) -> None: globals.g_postgres_password = matches.group(2) if matches else "NONE" globals.g_postgres_port = 5002 globals.g_postgres_db = "cb" - globals.g_postgres_host = "127.0.01" + globals.g_postgres_host = "127.0.0.1" break + use_fallback = False # we good! except Exception as err: logger.exception(f"Someting went wrong trying to parse /etc/cb/cb.conf for postgres details: {err}") - else: - logger.debug("Couldn't find /etc/cb/cb.conf") + + if use_fallback: + logger.debug("Falling back to config settings for postgres...") globals.g_postgres_host = self._as_str("postgres_host", default=globals.g_postgres_host) globals.g_postgres_username = self._as_str("postgres_username", default=globals.g_postgres_username) globals.g_postgres_password = self._as_str("postgres_password", required=True) From f4241d2ebf9cc02dc31680ee51b0d181fe3b748b Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 13 Jan 2020 14:02:49 -0500 Subject: [PATCH 233/257] Update README.md --- README.md | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 4e7f0a8..03cb21e 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,12 @@ # Installing Yara Agent (Centos/RHEL 7+) -The Yara agent is made up of two parts -- a master and one or more workers. +The Yara Integration is made up of two parts -- a master and one or more workers. The master service must be installed on the same system as Cb Response, while workers are usually installed on other systems (but can also be on the master system, if so -desired). +desired). -The yara connector itself uses celery-queues to distribute work to and remote (or local) workers - you will need to install and -configure a broker (probably, redis - but any broker compatible with celery 4.x+ will do) that is accessible -to the master node and to any worker(s). +The yara connector itself uses celery to distribute work to and remote (or local) workers - you will need to install and +configure a broker (ex, redis, postgres) that is accessible to both the task-master and the remote worker instance(s). Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-c64dc62eb602dc1b82df/python-cb-yara-connector-2.1-0.x86_64.rpm). @@ -19,17 +18,7 @@ The connector uses a configured directory containing yara rules, to efficiently are seen by the CB Response Server. The generated threat information is used to produce an intelligence feed for ingest by the Cb Response Server again. - -# Dev install - -Use git to retrieve the project, create a new virtual environment using python3.6+ and use pip to install the requirements: - -``` -git clone https://github.com/carbonblack/cb-yara-connector -pip3 install -r requirements.txt -``` - -# Create Yara Agent Config +# Create Yara Connector Config The installation process will create a sample configuration file in the control directory as `/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.sample`. Simply copy @@ -133,7 +122,7 @@ mode=master ; Used for downloading binaries ; cb_server_url=https://localhost -cb_server_token=aafdasfdsafdsafdsa +cb_server_token=12345678910 ; ; Directory for temporary yara rules storage @@ -153,14 +142,14 @@ yara_rules_dir=/etc/cb/integrations/cb-yara-connector/yara-rules ; broker_url=redis://master.server.url -mode=slave +mode=worker ; ; Cb Response Server Configuration ; Used for downloading binaries ; cb_server_url=https://master.server.url -cb_server_token=aafdasfdsafdsafdsa +cb_server_token=12345678910 ``` @@ -234,3 +223,12 @@ If supplied, yara rules will be validated and the script will exit. #### Example Cron Entry _[TBD]_ + +# Dev install + +Use git to retrieve the project, create a new virtual environment using python3.6+ and use pip to install the requirements: + +``` +git clone https://github.com/carbonblack/cb-yara-connector +pip3 install -r requirements.txt +``` From c3b4ba29b4af730b63859e332c792db0ed105dcf Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 13 Jan 2020 14:26:03 -0500 Subject: [PATCH 234/257] Update README.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 03cb21e..d1eb5c9 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,9 @@ The Yara Integration is made up of two parts -- a master and one or more workers. The master service must be installed on the same system as Cb Response, while workers are usually installed on other systems (but can also be on the master system, if so -desired). - +desired). The yara connector itself uses celery to distribute work to and remote (or local) workers - you will need to install and -configure a broker (ex, redis, postgres) that is accessible to both the task-master and the remote worker instance(s). +configure a [broker](https://docs.celeryproject.org/en/latest/getting-started/brokers/) (ex. redis) that is accessible to both the task-master and the remote worker instance(s). Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-c64dc62eb602dc1b82df/python-cb-yara-connector-2.1-0.x86_64.rpm). @@ -104,6 +103,8 @@ rule matchover100kb { `systemctl status -l cb-yara-connector` will display logging information. +`journalctl -u cb-yara-connector.service` - verbose logs. + #### Example Yara Connector Master configuration ```ini From f92f467337c7f1f4acdd2b118b63f50298636bbd Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 14 Jan 2020 09:58:57 -0500 Subject: [PATCH 235/257] doc changes, code cleanup --- README.md | 66 ++++++++--------------------------------------------- src/main.py | 45 +++++++++++------------------------- 2 files changed, 23 insertions(+), 88 deletions(-) diff --git a/README.md b/README.md index d1eb5c9..22f64fa 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,10 @@ The master service must be installed on the same system as Cb Response, while wo are usually installed on other systems (but can also be on the master system, if so desired). The yara connector itself uses celery to distribute work to and remote (or local) workers - you will need to install and -configure a [broker](https://docs.celeryproject.org/en/latest/getting-started/brokers/) (ex. redis) that is accessible to both the task-master and the remote worker instance(s). +configure a [broker](https://docs.celeryproject.org/en/latest/getting-started/brokers/) (ex. redis) that is accessible + to both the task-master and the remote worker instance(s). -Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-c64dc62eb602dc1b82df/python-cb-yara-connector-2.1-0.x86_64.rpm). +Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-b39ed959488c9ec78055/python-cb-yara-connector-2.1-0.x86_64.rpm). Once downloaded, the connector can be easily installed from the rpm: @@ -22,7 +23,7 @@ intelligence feed for ingest by the Cb Response Server again. The installation process will create a sample configuration file in the control directory as `/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.sample`. Simply copy this sample template to `/etc/cb/integrations/cb-yara-connector/yaraconnector.conf`, -which is looked for by the yara connectory service. You will likely have to edit this +which is looked for by the yara connector service. You will likely have to edit this configuration file on each system (master and workers) to supply any missing information: * worker systems will need to change the mode to `worker`; if you plan to use the master @@ -31,9 +32,9 @@ system to also run a worker (not suggested, but allowed), the mode must be `mast they also require the token of a global admin user for `cb_server_token`. * Remote workers will require the URL of the master's redis server -The daemon will attempt to load the postgres credentals from the response server's `cb.conf`, +The daemon will attempt to load the postgres credentials from the response server's `cb.conf`, if available, falling back to postgres connection information for your CBR server -in the master's configurration file using the `postgres_xxxx` keys in the config. The REST API location and credentials are specified in the `cb_server_url` and `cb_server_token` keys, respectively. +in the master's configuration file using the `postgres_xxxx` keys in the config. The REST API location and credentials are specified in the `cb_server_url` and `cb_server_token` keys, respectively. ```ini ; @@ -76,7 +77,7 @@ There are two operating modes to support the two roles: `mode=master` and `mode= Install the connector on the cbr server, and config it with the master mode - configure postgres credentials, and a directory of monitored yara rules. In worker mode, configure REST API credentials. Both modes require a broker for celery communications. -## Input your yara rules +## Create your yara rules The yara connector monitors the directory `/etc/cb/integrations/cb-yara-connector/yara_rules` for files (`.yar`) each specifying one or more yara rule. Your rules must have `meta` section with a @@ -95,7 +96,7 @@ rule matchover100kb { } ``` -#### Running Yara Agent +#### Controlling the Yara Agent `systemctl start cb-yara-connector` will up the service using systemD. @@ -105,54 +106,6 @@ rule matchover100kb { `journalctl -u cb-yara-connector.service` - verbose logs. -#### Example Yara Connector Master configuration - -```ini -[general] - -; -; Python Celery Broker Url. Set this full url stringg -; Example: redis:// -; -broker_url=redis://127.0.0.1 - -mode=master - -; -; Cb Response Server Configuration -; Used for downloading binaries -; -cb_server_url=https://localhost -cb_server_token=12345678910 - -; -; Directory for temporary yara rules storage -; WARNING: Put your yara rules with the yara agent. This is just temporary storage. -; -yara_rules_dir=/etc/cb/integrations/cb-yara-connector/yara-rules -``` - -### Example Remote Worker configuration - -```ini -[general] - -; -; Python Celery Broker Url. Set this full url stringg -; Example: redis:// -; -broker_url=redis://master.server.url - -mode=worker - -; -; Cb Response Server Configuration -; Used for downloading binaries -; -cb_server_url=https://master.server.url -cb_server_token=12345678910 - -``` # Development Notes @@ -186,7 +139,7 @@ utility_script=./scripts/vacuumscript.sh The dockerfile in the top-level of the repo contains a centos7 environment for running, building, and testing the connector. -The provided script `docker-build-rpm.sh` will use docker to build the project, and place the RPM(s) in $PWD/RPMS. +The provided script `docker-build-rpm.sh` will use docker to build the project, and place the RPM(s) in ${PWD}/RPMS. ##### Command-line Options @@ -225,6 +178,7 @@ If supplied, yara rules will be validated and the script will exit. #### Example Cron Entry _[TBD]_ +--- # Dev install Use git to retrieve the project, create a new virtual environment using python3.6+ and use pip to install the requirements: diff --git a/src/main.py b/src/main.py index 9b7afe2..fa541ca 100644 --- a/src/main.py +++ b/src/main.py @@ -19,8 +19,9 @@ from typing import List import lockfile -import psycopg2 +# noinspection PyUnresolvedReferences import psutil +import psycopg2 # noinspection PyPackageRequirements import yara # noinspection PyPackageRequirements @@ -49,9 +50,7 @@ celery_logger.setLevel(logging.CRITICAL) -def analysis_worker( - exit_event: Event, hash_queue: Queue, scanning_results_queue: Queue -) -> None: +def analysis_worker(exit_event: Event, hash_queue: Queue, scanning_results_queue: Queue) -> None: """ The promise worker scanning function. @@ -372,14 +371,17 @@ def handle_sig(exit_event: Event, sig: int, frame) -> None: # def run_to_exit_signal(exit_event: Event) -> None: """ - Wait-until-exit polling loop function. + Wait-until-exit polling loop function. Spam reduced by only updating when count changes. :param exit_event: the event handler """ + last_numbins = 0 while not (exit_event.is_set()): exit_event.wait(30.0) if "master" in globals.g_mode: numbins = BinaryDetonationResult.select().count() - logger.info(f"Analyzed {numbins} binaries so far ... ") + if numbins != last_numbins: + logger.info(f"Analyzed {numbins} binaries so far ... ") + last_numbins = numbins logger.debug("Begin graceful shutdown...") @@ -599,30 +601,6 @@ def launch_celery_worker(worker_obj, workerkwargs=None, config_file: str = None) logger.debug("CELERY WORKER LAUNCHING THREAD EXITED") -def terminate_celery_worker(worker_obj: worker = None): - """ - Attempt to use the pidfile to gracefully terminate celery workers if they exist - if the worker hasn't terminated gracefully after 5 seconds, kill it using the .die() command - :param worker_obj: worker object - """ - """with open('/tmp/yaraconnectorceleryworker') as cworkerpidfile: - worker_pid_str = cworkerpidfile.readline() - worker_pid = int(worker_pid_str) if len(worker_pid_str.strip()) > 0 else None - if worker_pid: - parent = psutil.Process(worker_pid) if worker_pid else psutil.Process() - children = parent.children(recursive=True) - for child in children: - logger.debug(f"Sending term sig to celery worker child - {worker_pid}") - os.kill(child.pid, signal.SIGQUIT) - logger.debug(f"Sending term sig to celery worker - {worker_pid}") - os.kill(worker_pid, signal.SIGQUIT) - else: - logger.debug("Didn't find a worker-pidfile to terminate on exit...") - - time.sleep(1.0) """ - if worker_obj: - worker_obj.die("Worker terminated") - ################################################################################ # Main entrypoint ################################################################################ @@ -729,6 +707,7 @@ def main(): # used for local worker handling in some scenarios localworker = None + exit_rc = 0 try: """ 3 modes of operation @@ -802,12 +781,11 @@ def main(): logger.debug("Started as demon OK") run_to_exit_signal(exit_event) except Exception as e: - logger.exception("Error starting {e}") + logger.exception(f"Error while executing: {e}") finally: try: wait_all_worker_exit_threads(threads, timeout=4.0) finally: - #terminate_celery_worker(localworker) logger.info("Yara connector shutdown") else: # | | | BATCH MODE | | | @@ -831,10 +809,13 @@ def main(): wait_all_worker_exit_threads(threads, timeout=4.0) except KeyboardInterrupt: logger.info("\n\n##### Interupted by User!\n") + exit_rc = 3 except Exception as err: logger.error(f"There were errors executing yara rules: {err}") + exit_rc = 4 finally: exit_event.set() + sys.exit(exit_rc) if __name__ == "__main__": From d66484cd4f83a559534892dc84231f7eb8e193cb Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 14 Jan 2020 10:04:48 -0500 Subject: [PATCH 236/257] removed unused psutil --- src/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main.py b/src/main.py index fa541ca..d31c271 100644 --- a/src/main.py +++ b/src/main.py @@ -19,8 +19,6 @@ from typing import List import lockfile -# noinspection PyUnresolvedReferences -import psutil import psycopg2 # noinspection PyPackageRequirements import yara From c827330946f67d4990dbd4e8cab0d380411e4c52 Mon Sep 17 00:00:00 2001 From: Burton Choinski Date: Tue, 14 Jan 2020 10:21:20 -0500 Subject: [PATCH 237/257] removed unused psutil --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2a8fb2b..4910a08 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,3 @@ requests==2.22.0 simplejson==3.17.0 urllib3==1.25.7 yara-python==3.11.0 -psutil==5.6.7 From 02b3b4170956cd0b803ce34af4f233e4853d9f36 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Mon, 13 Jan 2020 11:37:53 -0500 Subject: [PATCH 238/257] updates --- cb-yara-connector.service | 2 +- cb-yara-connector.spec | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cb-yara-connector.service b/cb-yara-connector.service index f0174b0..81a3fd7 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -5,7 +5,7 @@ After=syslog.target network.target [Service] Environment=C_FORCE_ROOT=1 Type=forking -ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --output-file /var/cb/data/cb-yara-connector/feed.json --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log +ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector KillSignal=SIGQUIT diff --git a/cb-yara-connector.spec b/cb-yara-connector.spec index 2581a05..5e70c0a 100644 --- a/cb-yara-connector.spec +++ b/cb-yara-connector.spec @@ -8,7 +8,7 @@ block_cipher = None a = Analysis(['src/main.py'], pathex=['./src'], binaries=[], - hiddenimports=['billiard','billiard.heap','lockfile','mmap','celery.app.control','celery.worker.strategy','celery.worker.consumer','celery.events.state','celery.worker.autoscale','celery.worker.components','celery.concurrency.prefork','celery.apps','celery.apps.worker','celery.app.log','celery.fixups', 'celery.fixups.django', 'celery.loaders.app','celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends','celery.backends.redis', 'celery.app.events', 'celery.events','celery.redis','kombu.transport.pyamqp'], + hiddenimports=['psutil','billiard','billiard.heap','lockfile','mmap','celery.app.control','celery.worker.strategy','celery.worker.consumer','celery.events.state','celery.worker.autoscale','celery.worker.components','celery.concurrency.prefork','celery.apps','celery.apps.worker','celery.app.log','celery.fixups', 'celery.fixups.django', 'celery.loaders.app','celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends','celery.backends.redis', 'celery.app.events', 'celery.events','celery.redis','kombu.transport.pyamqp'], hookspath=[], runtime_hooks=[], excludes=[], From 334a29f3d281a9204375569abd8ea3ff01636c4c Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 14 Jan 2020 14:03:25 -0500 Subject: [PATCH 239/257] Initial init.d support for centos6 --- MANIFEST | 3 ++- cb-yara-connector | 33 +++++++++++++++++++++++++++++++++ cb-yara-connector.rpm.spec | 2 ++ makefile | 1 + 4 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 cb-yara-connector diff --git a/MANIFEST b/MANIFEST index bb1ccd6..0b3e868 100644 --- a/MANIFEST +++ b/MANIFEST @@ -4,8 +4,9 @@ %dir /var/log/cb/integrations/cb-yara-connector /var/log/cb/integrations/cb-yara-connector/yaraconnector.log /etc/systemd/system/cb-yara-connector.service +/etc/init.d/cb-yara-connector /etc/cb/integrations/cb-yara-connector/yara_rules /etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example /tmp/yaraconnectorceleryworker %dir /var/cb/data/cb-yara-connector -%dir /var/cb/data/cb-yara-connector/feed_db \ No newline at end of file +%dir /var/cb/data/cb-yara-connector/feed_db diff --git a/cb-yara-connector b/cb-yara-connector new file mode 100644 index 0000000..1aacd44 --- /dev/null +++ b/cb-yara-connector @@ -0,0 +1,33 @@ +#!/bin/bash +# +# /etc/rc.d/init.d/cb-yara-connector +# +. /etc/init.d/functions + + +start() { + echo -n "Starting cb-yara-connector: " + return /usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --working-dir /usr/share/cb/integrations/cb-yara-connector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json +} + +stop() { + echo -n "Shutting down cb-yara-connector: " + kill -3 /usr/share/cb/integrations/cb-yara-connector/yaraconnector.lock +} + +case "$1" in + start) + start + ;; + stop) + stop + ;; + status) + echo 'running' + ;; + restart) + stop + start + ;; +esac +exit $? diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index 38485f8..26e50de 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -17,12 +17,14 @@ mkdir -p ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yara_rules mkdir -p ${RPM_BUILD_ROOT}/etc/init +mkdir -p ${RPM_BUILD_ROOT}/etc/init.d/ mkdir -p ${RPM_BUILD_ROOT}/etc/systemd/system mkdir -p ${RPM_BUILD_ROOT}/tmp mkdir -p ${RPM_BUILD_ROOT}/var/run/ mkdir -p ${RPM_BUILD_ROOT}/var/cb/data/cb-yara-connector/feed_db cp ${RPM_SOURCE_DIR}/example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example install -m 0644 ${RPM_SOURCE_DIR}/cb-yara-connector.service ${RPM_BUILD_ROOT}/etc/systemd/system/cb-yara-connector.service +install -m 644 ${RPM_SOURCE_DIR}/cb-yara-connector ${RPM_BUILD_ROOT}/etc/init.d/cb-yara-connector install -m 0755 ${RPM_SOURCE_DIR}/dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/ install ${RPM_SOURCE_DIR}/yara-logo.png ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/yara-logo.png touch ${RPM_BUILD_ROOT}/var/log/cb/integrations/cb-yara-connector/yaraconnector.log diff --git a/makefile b/makefile index 9be8f58..29eeeaf 100644 --- a/makefile +++ b/makefile @@ -20,6 +20,7 @@ rpm: cp -rp src/* ${SOURCEDIR}/src cp -rp src/* ${BUILDDIR}/src cp -rp init-scripts/* ${BUILDDIR}/init-scripts + cp -rp cb-yara-connector ${SOURCEDIR}/cb-yara-connector cp example-conf/yara.conf ${BUILDDIR}/example-conf/yara.conf cp -rp example-conf/yara.conf ${SOURCEDIR}/example-conf/yara.conf cp -p MANIFEST ${BUILDDIR}/MANIFEST From ac8087c7870d21fbfda86f933a137a1f71a9af47 Mon Sep 17 00:00:00 2001 From: dseidel Date: Tue, 14 Jan 2020 14:42:46 -0500 Subject: [PATCH 240/257] CB-29781: revise README --- README.md | 99 +++++++++++++++++++++++-------------------------------- 1 file changed, 41 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index 22f64fa..1995fed 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,29 @@ -# Installing Yara Agent (Centos/RHEL 7+) +# Installing YARA Agent (Centos/RHEL 7+) -The Yara Integration is made up of two parts -- a master and one or more workers. -The master service must be installed on the same system as Cb Response, while workers -are usually installed on other systems (but can also be on the master system, if so -desired). -The yara connector itself uses celery to distribute work to and remote (or local) workers - you will need to install and -configure a [broker](https://docs.celeryproject.org/en/latest/getting-started/brokers/) (ex. redis) that is accessible - to both the task-master and the remote worker instance(s). +[YARA](https://virustotal.github.io/yara/) Integration is made up of two parts -- a master and one or more workers. The master service must be installed on the same system as Cb Response, while workers are usually installed on other systems (but can also be on the master system, if so desired). The YARA connector itself uses [Celery](http://www.celeryproject.org/) to distribute work to and remote (or local) workers - you will need to install and configure a [broker](https://docs.celeryproject.org/en/latest/getting-started/brokers/) (e.g., [Redis](https://redis.io/)) that is accessible to both the task-master and the remote worker instance(s). -Download the latest RPM from the github releases page, [here](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-b39ed959488c9ec78055/python-cb-yara-connector-2.1-0.x86_64.rpm). +The connector uses a configured directory containing YARA rules, to efficiently scan binaries as they are seen by the CB Response Server. The generated threat information is used to produce an intelligence feed for ingest by the Cb Response Server again. -Once downloaded, the connector can be easily installed from the rpm: +1. Download the latest RPM from the [GitHub releases page](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-b39ed959488c9ec78055/python-cb-yara-connector-2.1-0.x86_64.rpm). +1. Install the RPM: -`yum install python-cb-yara-connector-.rpm` + `yum install python-cb-yara-connector-.rpm` -The connector uses a configured directory containing yara rules, to efficiently scan binaries as they -are seen by the CB Response Server. The generated threat information is used to produce an -intelligence feed for ingest by the Cb Response Server again. +1. Enable the service: -# Create Yara Connector Config + `systemctl enable cb-yara-connector` -The installation process will create a sample configuration file in the control directory -as `/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.sample`. Simply copy +# Create YARA Connector Config + +The installation process creates a sample configuration file: `/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.sample`. Copy this sample template to `/etc/cb/integrations/cb-yara-connector/yaraconnector.conf`, -which is looked for by the yara connector service. You will likely have to edit this +which is the filename and location that the connector expects. You will likely have to edit this configuration file on each system (master and workers) to supply any missing information: -* worker systems will need to change the mode to `worker`; if you plan to use the master -system to also run a worker (not suggested, but allowed), the mode must be `master+worker`. +* There are two operating modes to support the two roles: `mode=master` and `mode=worker`. Both modes require a broker for Celery communications. Worker systems will need to change the mode to `worker`; if you plan to use the master system to also run a worker (not suggested, but allowed), the mode must be `master+worker`. * Remote worker systems will require the master's URL for `cb_server_url` (local workers need no modification); they also require the token of a global admin user for `cb_server_token`. -* Remote workers will require the URL of the master's redis server +* Remote workers will require the URL of the master's Redis server The daemon will attempt to load the postgres credentials from the response server's `cb.conf`, if available, falling back to postgres connection information for your CBR server @@ -58,33 +51,25 @@ cb_server_url=https://127.0.0.1 cb_server_token= ``` -You must configure `broker=` which sets the broker and results_backend for celery. -You will set this appropriately as per the celery documentation - -here (https://docs.celeryproject.org/en/latest/getting-started/brokers/). +You must configure `broker=` which sets the broker and results_backend for Celery. +Set this appropriately as per the [Celery documentation](https://docs.celeryproject.org/en/latest/getting-started/brokers/). ```ini ; -; URL of the redis server, defaulting to the local response server redis for the master. If this is a worker -; system, alter to point to the master system. If you are using a standalone redis server, both master and +; URL of the Redis server, defaulting to the local response server Redis for the master. If this is a worker +; system, alter to point to the master system. If you are using a standalone Redis server, both master and ; workers must point to the same server. ; broker_url=redis://127.0.0.1 ``` +## Create your YARA rules -The yara-connector RPM contains a service that is primarily intended to serve as a distributed system, with a master serving work to remote worker machine(s) for analysis and compiling a threat intelligence feed for Carbon Black Response EDR. - -There are two operating modes to support the two roles: `mode=master` and `mode=worker`. - -Install the connector on the cbr server, and config it with the master mode - configure postgres credentials, and a directory of monitored yara rules. In worker mode, configure REST API credentials. Both modes require a broker for celery communications. - -## Create your yara rules - -The yara connector monitors the directory `/etc/cb/integrations/cb-yara-connector/yara_rules` for files (`.yar`) each -specifying one or more yara rule. Your rules must have `meta` section with a +The YARA connector monitors the directory `/etc/cb/integrations/cb-yara-connector/yara_rules` for files (`.yar`) each +specifying one or more YARA rule. Your rules must have `meta` section with a `score = [1-10]` tag to appropriately score matching binaries. This directory is configurable in your configuration file. C-style comments are supported. -###### Sample Yara Rule File +###### Sample YARA Rule File ``` // Sample rule to match binaries over 100kb in size @@ -96,16 +81,14 @@ rule matchover100kb { } ``` -#### Controlling the Yara Agent - -`systemctl start cb-yara-connector` will up the service using systemD. - -`systemctl stop cb-yara-connector` will gracefully stop the yara-connector. - -`systemctl status -l cb-yara-connector` will display logging information. - -`journalctl -u cb-yara-connector.service` - verbose logs. +#### Controlling the YARA Agent +| Action | Command | +| ------ | ------- | +| Start the service | `systemctl start cb-yara-connector` | +| Stop the service | `systemctl stop cb-yara-connector` | +| Display logging information | `systemctl status -l cb-yara-connector` | +| Displaying verbose logs | `journalctl -u cb-YARA-connector.service` | # Development Notes @@ -114,9 +97,9 @@ Included with this version is a feature for discretionary use by advanced users should be used with caution. When `utility_interval` is defined with a value greater than 0, it represents the interval -in minutes at which the yara connector will pause its work and execute an external +in minutes at which the YARA connector will pause its work and execute an external shell script. A sample script, `vacuumscript.sh` is provided within the `scripts` folder -of the current Yara connector installation. After execution, the Yara connector continues with +of the current YARA connector installation. After execution, the YARA connector continues with its work. > _**NOTE:** As a safety for this feature, if an interval is defined but no script is defined, nothing is done. @@ -126,7 +109,7 @@ its work. ; ; The use of the utility script is an ADVANCED FEATURE and should be used with caution! ; -; If "utility_interval" is greater than 0 it represents the interval in minutes after which the yara connector will +; If "utility_interval" is greater than 0 it represents the interval in minutes after which the YARA connector will ; pause to execute a shell script for general maintenance. This can present risks. Be careful what you allow the ; script to do, and use this option at your own discretion. ; @@ -134,12 +117,12 @@ utility_interval=-1 utility_script=./scripts/vacuumscript.sh ``` -## Yara Agent Build Instructions +## YARA Agent Build Instructions -The dockerfile in the top-level of the repo contains a centos7 environment for running, building, and testing +The dockerfile in the top-level of the repo contains a CentOS 7 environment for running, building, and testing the connector. -The provided script `docker-build-rpm.sh` will use docker to build the project, and place the RPM(s) in ${PWD}/RPMS. +The provided script `docker-build-rpm.sh` will use docker to build the project, and place the RPM(s) in `${PWD}/RPMS`. ##### Command-line Options @@ -147,7 +130,7 @@ The provided script `docker-build-rpm.sh` will use docker to build the project, usage: main.py [-h] --config-file CONFIG_FILE [--log-file LOG_FILE] [--output-file OUTPUT_FILE] [--validate-yara-rules] [--debug] -Yara Agent for Yara Connector +YARA Agent for YARA Connector optional arguments: -h, --help show this help message and exit @@ -157,7 +140,7 @@ optional arguments: --output-file OUTPUT_FILE output feed file (defaults to `local` folder) --validate-yara-rules - ONLY validate yara rules in a specified directory + ONLY validate YARA rules in a specified directory --debug Provide additional logging ``` @@ -165,15 +148,15 @@ optional arguments: Provides the path of the configuration file to be used _**(REQUIRED)**_ ###### --log-file -Provides the path of the yara log file. If not supplied, defaults to `local/yara_agent.log` -within the current yara package. +Provides the path of the YARA log file. If not supplied, defaults to `local/yara_agent.log` +within the current YARA package. ###### --output-file Provides the path containing the feed description file. If not supplied, defaults to `feed.json` in the same location as the configured `feed_database_dir` folder. ###### --validate-yara-rules -If supplied, yara rules will be validated and the script will exit. +If supplied, YARA rules will be validated and the script will exit. #### Example Cron Entry _[TBD]_ @@ -181,7 +164,7 @@ _[TBD]_ --- # Dev install -Use git to retrieve the project, create a new virtual environment using python3.6+ and use pip to install the requirements: +Use Git to retrieve the project, create a new virtual environment using Python 3.6+, and use pip to install the requirements: ``` git clone https://github.com/carbonblack/cb-yara-connector From ff866d8962bb5e6ec64f9545eb398d1222a47eb7 Mon Sep 17 00:00:00 2001 From: dseidel Date: Tue, 14 Jan 2020 14:53:19 -0500 Subject: [PATCH 241/257] CB-29781: more README tweaks --- README.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 1995fed..8059f48 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Installing YARA Agent (Centos/RHEL 7+) -[YARA](https://virustotal.github.io/yara/) Integration is made up of two parts -- a master and one or more workers. The master service must be installed on the same system as Cb Response, while workers are usually installed on other systems (but can also be on the master system, if so desired). The YARA connector itself uses [Celery](http://www.celeryproject.org/) to distribute work to and remote (or local) workers - you will need to install and configure a [broker](https://docs.celeryproject.org/en/latest/getting-started/brokers/) (e.g., [Redis](https://redis.io/)) that is accessible to both the task-master and the remote worker instance(s). +[YARA](https://virustotal.github.io/yara/) Integration is made up of two parts -- a master and one or more workers. The master service must be installed on the same system as CB EDR, while workers are usually installed on other systems (but can also be on the master system, if so desired). The YARA connector itself uses [Celery](http://www.celeryproject.org/) to distribute work to and remote (or local) workers - you will need to install and configure a [broker](https://docs.celeryproject.org/en/latest/getting-started/brokers/) (e.g., [Redis](https://redis.io/)) that is accessible to both the task-master and the remote worker instance(s). -The connector uses a configured directory containing YARA rules, to efficiently scan binaries as they are seen by the CB Response Server. The generated threat information is used to produce an intelligence feed for ingest by the Cb Response Server again. +The connector reads YARA rules from a configured directory to efficiently scan binaries as they are seen by the CB EDR server. The generated threat information is used to produce an intelligence feed for ingest by the CB EDR Server. 1. Download the latest RPM from the [GitHub releases page](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-b39ed959488c9ec78055/python-cb-yara-connector-2.1-0.x86_64.rpm). 1. Install the RPM: @@ -25,13 +25,12 @@ information: they also require the token of a global admin user for `cb_server_token`. * Remote workers will require the URL of the master's Redis server -The daemon will attempt to load the postgres credentials from the response server's `cb.conf`, -if available, falling back to postgres connection information for your CBR server -in the master's configuration file using the `postgres_xxxx` keys in the config. The REST API location and credentials are specified in the `cb_server_url` and `cb_server_token` keys, respectively. +The daemon will attempt to load the PostgreSQL credentials from the CB EDR server's `cb.conf` file, +if available, falling back to the PostgreSQL connection information in the master's configuration file using the `postgres_xxxx` keys in the config. The REST API location and credentials are specified in the `cb_server_url` and `cb_server_token` keys, respectively. ```ini ; -; Cb Response postgres Database settings, required for 'master' and 'master+worker' systems +; Cb Response PostgreSQL Database settings, required for 'master' and 'master+worker' systems ; The seever will attempt to read from local cb.conf file first and fall back ; to these settings if it cannot do so. ; @@ -44,7 +43,7 @@ postgres_port=5002 ```ini ; -; Cb Response Server settings, required for 'worker' and 'master+worker' systems +; Cb EDR server settings, required for 'worker' and 'master+worker' systems ; For remote workers, the cb_server_url mus be that of the master ; cb_server_url=https://127.0.0.1 @@ -56,7 +55,7 @@ Set this appropriately as per the [Celery documentation](https://docs.celeryproj ```ini ; -; URL of the Redis server, defaulting to the local response server Redis for the master. If this is a worker +; URL of the Redis server, defaulting to the local CB EDR server Redis for the master. If this is a worker ; system, alter to point to the master system. If you are using a standalone Redis server, both master and ; workers must point to the same server. ; From da933d39518fbca54ce039fde974347d548bcfbd Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 14 Jan 2020 15:08:31 -0500 Subject: [PATCH 242/257] updates to centos6 install --- cb-yara-connector | 15 ++++++++++++--- cb-yara-connector.rpm.spec | 1 - 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/cb-yara-connector b/cb-yara-connector index 1aacd44..a56a973 100644 --- a/cb-yara-connector +++ b/cb-yara-connector @@ -7,12 +7,21 @@ start() { echo -n "Starting cb-yara-connector: " - return /usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --working-dir /usr/share/cb/integrations/cb-yara-connector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json + /usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --working-dir /usr/share/cb/integrations/cb-yara-connector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json + return $ } stop() { echo -n "Shutting down cb-yara-connector: " - kill -3 /usr/share/cb/integrations/cb-yara-connector/yaraconnector.lock + kill -3 /usr/share/cb/integrations/cb-yara-connector/yaraconnector.lock +} + +status() { + if [ -f "/usr/share/cb/integrations/cb-yara-connector/yaraconnector.lock" ]; then + echo 'running - (pidfile exists)' + else + echo 'stopped/dead' + fi } case "$1" in @@ -23,7 +32,7 @@ case "$1" in stop ;; status) - echo 'running' + status ;; restart) stop diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index 26e50de..2ee05a9 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -3,7 +3,6 @@ Version: 2.1 Release: 0 Summary: Carbon Black Yara Agent License: MIT -Requires: redis %description Carbon Black Yara Agent - Scans binaries with configured yara rules From 713b5eed9e288d55673d2a658f9f48839b95fb31 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 14 Jan 2020 15:19:44 -0500 Subject: [PATCH 243/257] centos 6 build updates --- cb-yara-connector | 2 +- cb-yara-connector.rpm.spec | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cb-yara-connector b/cb-yara-connector index a56a973..357302a 100644 --- a/cb-yara-connector +++ b/cb-yara-connector @@ -8,7 +8,7 @@ start() { echo -n "Starting cb-yara-connector: " /usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --working-dir /usr/share/cb/integrations/cb-yara-connector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json - return $ + return $? } stop() { diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index 2ee05a9..06649f0 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -23,7 +23,7 @@ mkdir -p ${RPM_BUILD_ROOT}/var/run/ mkdir -p ${RPM_BUILD_ROOT}/var/cb/data/cb-yara-connector/feed_db cp ${RPM_SOURCE_DIR}/example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example install -m 0644 ${RPM_SOURCE_DIR}/cb-yara-connector.service ${RPM_BUILD_ROOT}/etc/systemd/system/cb-yara-connector.service -install -m 644 ${RPM_SOURCE_DIR}/cb-yara-connector ${RPM_BUILD_ROOT}/etc/init.d/cb-yara-connector +install -m 700 ${RPM_SOURCE_DIR}/cb-yara-connector ${RPM_BUILD_ROOT}/etc/init.d/cb-yara-connector install -m 0755 ${RPM_SOURCE_DIR}/dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/ install ${RPM_SOURCE_DIR}/yara-logo.png ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/yara-logo.png touch ${RPM_BUILD_ROOT}/var/log/cb/integrations/cb-yara-connector/yaraconnector.log From 8baa420e323aac734b4bdeb2fd458ed3eafafe68 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Tue, 14 Jan 2020 16:12:29 -0500 Subject: [PATCH 244/257] Update README.md --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8059f48..a8f744c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Installing YARA Agent (Centos/RHEL 7+) +# Installing YARA Agent (Centos/RHEL 6+) [YARA](https://virustotal.github.io/yara/) Integration is made up of two parts -- a master and one or more workers. The master service must be installed on the same system as CB EDR, while workers are usually installed on other systems (but can also be on the master system, if so desired). The YARA connector itself uses [Celery](http://www.celeryproject.org/) to distribute work to and remote (or local) workers - you will need to install and configure a [broker](https://docs.celeryproject.org/en/latest/getting-started/brokers/) (e.g., [Redis](https://redis.io/)) that is accessible to both the task-master and the remote worker instance(s). @@ -89,6 +89,9 @@ rule matchover100kb { | Display logging information | `systemctl status -l cb-yara-connector` | | Displaying verbose logs | `journalctl -u cb-YARA-connector.service` | + +Use `service` commands instead of runnnig on Centos 6. SystemD is prefered. + # Development Notes ## Utility Script From a7b80d638a3edffe5002afb0143deaa94e9c802e Mon Sep 17 00:00:00 2001 From: dseidel Date: Tue, 14 Jan 2020 16:49:26 -0500 Subject: [PATCH 245/257] README tweaks --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a8f744c..467624d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Installing YARA Agent (Centos/RHEL 6+) +# Installing YARA Agent (CentOS/RHEL 6/7) [YARA](https://virustotal.github.io/yara/) Integration is made up of two parts -- a master and one or more workers. The master service must be installed on the same system as CB EDR, while workers are usually installed on other systems (but can also be on the master system, if so desired). The YARA connector itself uses [Celery](http://www.celeryproject.org/) to distribute work to and remote (or local) workers - you will need to install and configure a [broker](https://docs.celeryproject.org/en/latest/getting-started/brokers/) (e.g., [Redis](https://redis.io/)) that is accessible to both the task-master and the remote worker instance(s). @@ -89,8 +89,7 @@ rule matchover100kb { | Display logging information | `systemctl status -l cb-yara-connector` | | Displaying verbose logs | `journalctl -u cb-YARA-connector.service` | - -Use `service` commands instead of runnnig on Centos 6. SystemD is prefered. +Use `service` commands instead if running on CentOS 6.x (systemd is preferred). # Development Notes From 17dc4b9e329d82fe0fad658b2c5641fe2cf0de80 Mon Sep 17 00:00:00 2001 From: John Capolino Date: Tue, 14 Jan 2020 16:58:13 -0600 Subject: [PATCH 246/257] Added EL version support to rpm. --- .gitignore | 5 ++++- cb-yara-connector.rpm.spec | 10 ++++++++-- dockerfile | 4 +--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 5dd3184..b7459b2 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ nosetests.xml # IDE exclusions .idea +*.iml config.ini @@ -49,4 +50,6 @@ binary.db # incase inadvertantly left by unit testing conf-testing.conf -# +# Build files +RPMS/ + diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index 06649f0..7a82116 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -1,8 +1,14 @@ +%define version 2.1.0 +%define release 1 + Name: python-cb-yara-connector -Version: 2.1 -Release: 0 +Version: %{version} +Release: %{release}%{?dist} Summary: Carbon Black Yara Agent License: MIT +BuildArch: x86_64 +Vendor: Carbon Black +Url: http://www.carbonblack.com/ %description Carbon Black Yara Agent - Scans binaries with configured yara rules diff --git a/dockerfile b/dockerfile index a07c931..7139fbf 100644 --- a/dockerfile +++ b/dockerfile @@ -15,13 +15,11 @@ WORKDIR /home/cb RUN mkdir -p ~/rpmbuild/{BUILD,RPMS,SOURCES,SPECS,SRPMS} RUN virtualenv yaraconnector RUN source ./yaraconnector/bin/activate -RUN git clone https://github.com/carbonblack/cb-yara-connector +COPY --chown=cb ./ /home/cb/cb-yara-connector/ WORKDIR /home/cb/cb-yara-connector -RUN git checkout feature-cb-28268 RUN pip3 install -r requirements.txt --user RUN pip3 install pyinstaller==3.5.0 --user ENV PATH $PATH:~/.local/bin RUN make clean ; make rpm USER root -#RUN yum install -y /home/cb/rpmbuild/RPMS/x86_64/python-cb-yara-connector.*.rpm CMD ["/bin/bash","-c"] From 44ef110ac881b6b312100499c78c671f59d053f1 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 6 Feb 2020 16:43:30 -0500 Subject: [PATCH 247/257] Update yara.conf --- example-conf/yara.conf | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/example-conf/yara.conf b/example-conf/yara.conf index 7dea10f..b9dbad3 100644 --- a/example-conf/yara.conf +++ b/example-conf/yara.conf @@ -2,7 +2,6 @@ ; ; Operating mode - choose 'master' for the main system, 'worker' for a remote worker. -; If you wish to run worker threads locally (not suggested), use 'master+worker' ; mode=master @@ -12,7 +11,7 @@ mode=master yara_rules_dir=/etc/cb/integrations/cb-yara-connector/yara_rules ; -; Cb Response postgres Database settings, required for 'master' and 'master+worker' systems +; Cb Response postgres Database settings, required for 'master' systems ; The seever will attempt to read from local cb.conf file first and fall back ; to these settings if it cannot do so. ; @@ -23,7 +22,7 @@ postgres_db=cb postgres_port=5002 ; -; Cb Response Server settings, required for 'worker' and 'master+worker' systems +; Cb Response Server settings, required for 'worker' systems ; For remote workers, the cb_server_url mus be that of the master ; cb_server_url=https://127.0.0.1 @@ -32,9 +31,9 @@ cb_server_token= ; ; URL of the redis server, defaulting to the local response server redis for the master. If this is a worker ; system, alter to point to the master system. If you are using a standalone redis server, both master and -; workers must point to the same server. +; workers must point to the same server ; -broker_url=redis://127.0.0.1 +broker_url=redis://localhost:6379 ; ; nice value used for this script From 1da21edfb44ae19761cb9b1a883c22ae18885c2a Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 6 Feb 2020 16:45:26 -0500 Subject: [PATCH 248/257] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 467624d..69d4b40 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,8 @@ this sample template to `/etc/cb/integrations/cb-yara-connector/yaraconnector.co which is the filename and location that the connector expects. You will likely have to edit this configuration file on each system (master and workers) to supply any missing information: -* There are two operating modes to support the two roles: `mode=master` and `mode=worker`. Both modes require a broker for Celery communications. Worker systems will need to change the mode to `worker`; if you plan to use the master system to also run a worker (not suggested, but allowed), the mode must be `master+worker`. +* There are two operating modes to support the two roles: `mode=master` and `mode=worker`. Both modes require a broker for Celery communications. Worker systems will need to change the mode to `worker`; + * Remote worker systems will require the master's URL for `cb_server_url` (local workers need no modification); they also require the token of a global admin user for `cb_server_token`. * Remote workers will require the URL of the master's Redis server From 75639fd6619c41f7cf43851f427224fa2e88b959 Mon Sep 17 00:00:00 2001 From: Zachary Estep Date: Thu, 6 Feb 2020 16:46:15 -0500 Subject: [PATCH 249/257] Update cb-yara-connector.service --- cb-yara-connector.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cb-yara-connector.service b/cb-yara-connector.service index 81a3fd7..00b5a8f 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -5,7 +5,7 @@ After=syslog.target network.target [Service] Environment=C_FORCE_ROOT=1 Type=forking -ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json +ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector KillSignal=SIGQUIT From 868aaee97354a26449f97e55e1a610a6a23a2686 Mon Sep 17 00:00:00 2001 From: John Capolino Date: Tue, 25 Feb 2020 23:48:27 -0600 Subject: [PATCH 250/257] WIP: CB-30152: Fixed critical issue with yara and centos 6/7 service commands. --- .gitignore | 3 +++ MANIFEST6 | 11 +++++++++ MANIFEST => MANIFEST7 | 5 ++--- cb-yara-connector | 40 ++++++++++++++++++++++++++------- cb-yara-connector.rpm.spec | 19 ++++++++++------ cb-yara-connector.spec | 2 +- docker-build-rpm.sh | 8 +++---- dockerfile | 23 ++++++++----------- init-scripts/yaraconnector.conf | 5 ++--- makefile | 22 +++++++++--------- requirements.txt | 1 - src/__init__.py | 2 +- src/main.py | 38 +++++++++++++++++-------------- 13 files changed, 108 insertions(+), 71 deletions(-) create mode 100644 MANIFEST6 rename MANIFEST => MANIFEST7 (94%) diff --git a/.gitignore b/.gitignore index b7459b2..1c222dc 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,9 @@ nosetests.xml # Translations *.mo +# Virtual Environments +.venv + # Mr Developer .mr.developer.cfg .project diff --git a/MANIFEST6 b/MANIFEST6 new file mode 100644 index 0000000..95b7b91 --- /dev/null +++ b/MANIFEST6 @@ -0,0 +1,11 @@ +/usr/share/cb/integrations/cb-yara-connector/yaraconnector +/usr/share/cb/integrations/cb-yara-connector/yara-logo.png +%dir /usr/share/cb/integrations/cb-yara-connector +%dir /var/log/cb/integrations/cb-yara-connector +/var/log/cb/integrations/cb-yara-connector/yaraconnector.log +/etc/cb/integrations/cb-yara-connector/yara_rules +/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example +%dir /var/cb/data/cb-yara-connector +%dir /var/cb/data/cb-yara-connector/feed_db +/tmp/yaraconnectorceleryworker +/etc/init.d/cb-yara-connector \ No newline at end of file diff --git a/MANIFEST b/MANIFEST7 similarity index 94% rename from MANIFEST rename to MANIFEST7 index 0b3e868..275e5da 100644 --- a/MANIFEST +++ b/MANIFEST7 @@ -3,10 +3,9 @@ %dir /usr/share/cb/integrations/cb-yara-connector %dir /var/log/cb/integrations/cb-yara-connector /var/log/cb/integrations/cb-yara-connector/yaraconnector.log -/etc/systemd/system/cb-yara-connector.service -/etc/init.d/cb-yara-connector /etc/cb/integrations/cb-yara-connector/yara_rules /etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example -/tmp/yaraconnectorceleryworker %dir /var/cb/data/cb-yara-connector %dir /var/cb/data/cb-yara-connector/feed_db +/tmp/yaraconnectorceleryworker +/etc/systemd/system/cb-yara-connector.service diff --git a/cb-yara-connector b/cb-yara-connector index 357302a..78527e4 100644 --- a/cb-yara-connector +++ b/cb-yara-connector @@ -4,23 +4,47 @@ # . /etc/init.d/functions +prog="cb-yara-connector" +piddir="/var/run/cb/integrations/$prog" +pidfile="$piddir/$prog.pid" +command="/usr/share/cb/integrations/cb-yara-connector/yaraconnector --pid-file $pidfile --working-dir /usr/share/cb/integrations/cb-yara-connector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json" start() { - echo -n "Starting cb-yara-connector: " - /usr/share/cb/integrations/cb-yara-connector/yaraconnector --debug --working-dir /usr/share/cb/integrations/cb-yara-connector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json - return $? + if [ -f "$pidfile" ] && kill -0 $(cat "$pidfile"); then + echo "Already started" + return 1 + fi + + mkdir -p $piddir + echo -n "Starting $prog: " + $command + result=$? + + if [ -f "$pidfile" ]; then + echo "Ok" + else + echo "Failed" + fi + return $result } stop() { - echo -n "Shutting down cb-yara-connector: " - kill -3 /usr/share/cb/integrations/cb-yara-connector/yaraconnector.lock + echo -n "Shutting down $prog: " + if [ ! -f "$pidfile" ] || ! kill -0 $(cat "$pidfile"); then + echo "Ok" + return 1 + fi + kill -s SIGTERM $(cat $pidfile) && rm -f $pidfile + echo "Ok" + return 0 } status() { - if [ -f "/usr/share/cb/integrations/cb-yara-connector/yaraconnector.lock" ]; then - echo 'running - (pidfile exists)' + echo -n "Status of $prog: " + if [ -f "$pidfile" ] && kill -0 $(cat "$pidfile"); then + echo "Running" else - echo 'stopped/dead' + echo "Stopped" fi } diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index 7a82116..5bc01ae 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -1,5 +1,5 @@ -%define version 2.1.0 -%define release 1 +%define version 2.1.1 +%define release 0 Name: python-cb-yara-connector Version: %{version} @@ -21,15 +21,20 @@ mkdir -p ${RPM_BUILD_ROOT}/var/log/cb/integrations/cb-yara-connector mkdir -p ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector mkdir -p ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yara_rules -mkdir -p ${RPM_BUILD_ROOT}/etc/init -mkdir -p ${RPM_BUILD_ROOT}/etc/init.d/ -mkdir -p ${RPM_BUILD_ROOT}/etc/systemd/system mkdir -p ${RPM_BUILD_ROOT}/tmp mkdir -p ${RPM_BUILD_ROOT}/var/run/ mkdir -p ${RPM_BUILD_ROOT}/var/cb/data/cb-yara-connector/feed_db -cp ${RPM_SOURCE_DIR}/example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example -install -m 0644 ${RPM_SOURCE_DIR}/cb-yara-connector.service ${RPM_BUILD_ROOT}/etc/systemd/system/cb-yara-connector.service + +%if "%{?dist}" == ".el6" +mkdir -p ${RPM_BUILD_ROOT}/etc/init +mkdir -p ${RPM_BUILD_ROOT}/etc/init.d/ install -m 700 ${RPM_SOURCE_DIR}/cb-yara-connector ${RPM_BUILD_ROOT}/etc/init.d/cb-yara-connector +%else # EL7 and up +mkdir -p ${RPM_BUILD_ROOT}/etc/systemd/system +install -m 0644 ${RPM_SOURCE_DIR}/cb-yara-connector.service ${RPM_BUILD_ROOT}/etc/systemd/system/cb-yara-connector.service +%endif + +cp ${RPM_SOURCE_DIR}/example-conf/yara.conf ${RPM_BUILD_ROOT}/etc/cb/integrations/cb-yara-connector/yaraconnector.conf.example install -m 0755 ${RPM_SOURCE_DIR}/dist/yaraconnector ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/ install ${RPM_SOURCE_DIR}/yara-logo.png ${RPM_BUILD_ROOT}/usr/share/cb/integrations/cb-yara-connector/yara-logo.png touch ${RPM_BUILD_ROOT}/var/log/cb/integrations/cb-yara-connector/yaraconnector.log diff --git a/cb-yara-connector.spec b/cb-yara-connector.spec index 5e70c0a..3e2432f 100644 --- a/cb-yara-connector.spec +++ b/cb-yara-connector.spec @@ -8,7 +8,7 @@ block_cipher = None a = Analysis(['src/main.py'], pathex=['./src'], binaries=[], - hiddenimports=['psutil','billiard','billiard.heap','lockfile','mmap','celery.app.control','celery.worker.strategy','celery.worker.consumer','celery.events.state','celery.worker.autoscale','celery.worker.components','celery.concurrency.prefork','celery.apps','celery.apps.worker','celery.app.log','celery.fixups', 'celery.fixups.django', 'celery.loaders.app','celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends','celery.backends.redis', 'celery.app.events', 'celery.events','celery.redis','kombu.transport.pyamqp'], + hiddenimports=['psutil','billiard','billiard.heap','lockfile','mmap','pkg_resources.py2_warn','celery.app.control','celery.worker.strategy','celery.worker.consumer','celery.events.state','celery.worker.autoscale','celery.worker.components','celery.concurrency.prefork','celery.apps','celery.apps.worker','celery.app.log','celery.fixups', 'celery.fixups.django', 'celery.loaders.app','celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends','celery.backends.redis', 'celery.app.events', 'celery.events','celery.redis','kombu.transport.pyamqp'], hookspath=[], runtime_hooks=[], excludes=[], diff --git a/docker-build-rpm.sh b/docker-build-rpm.sh index eaa2d3d..af53ac5 100755 --- a/docker-build-rpm.sh +++ b/docker-build-rpm.sh @@ -1,9 +1,9 @@ #!/bin/bash -docker rmi yaraconnectorrpmbuild --force -docker rm yaraconnectorrpmbuild --force -docker build -t yaraconnectorrpmbuild . --no-cache +# docker rmi yaraconnectorrpmbuild --force +docker rm yaraconnectorrpmbuild +docker build --build-arg REBUILD_STEP=`date +%s` -t yaraconnectorrpmbuild . docker run -d --name yaraconnectorrpmbuild -it yaraconnectorrpmbuild tail -f /dev/null docker cp yaraconnectorrpmbuild:/home/cb/rpmbuild/RPMS . docker stop yaraconnectorrpmbuild docker rm yaraconnectorrpmbuild -docker rmi yaraconnectorrpmbuild --force +# docker rmi yaraconnectorrpmbuild --force diff --git a/dockerfile b/dockerfile index 7139fbf..1a1cafe 100644 --- a/dockerfile +++ b/dockerfile @@ -1,25 +1,20 @@ FROM centos:7 -RUN yum -y install rpm-build -RUN yum -y install epel-release -RUN yum -y install python36 python36-devel -RUN yum -y install git -RUN yum -y install make -RUN yum -y install gcc gcc-devel -RUN yum -y install automake libtool make gcc +RUN yum -y install rpm-build epel-release +RUN yum -y install python36 python36-devel git make gcc gcc-devel automak libtool make RUN groupadd -r cb && useradd --no-log-init -r -g cb cb -RUN mkdir /home/cb -RUN chown cb:cb /home/cb +RUN mkdir /home/cb && \ + chown cb:cb /home/cb RUN pip3 install virtualenv virtualenvwrapper USER cb WORKDIR /home/cb -RUN mkdir -p ~/rpmbuild/{BUILD,RPMS,SOURCES,SPECS,SRPMS} +ENV PATH ~/yaraconnector/bin:$PATH:~/.local/bin +ARG REBUILD_STEP=unknown +RUN REBUILD_STEP=${REBUILD_STEP} mkdir -p ~/rpmbuild/{BUILD,RPMS,SOURCES,SPECS,SRPMS} RUN virtualenv yaraconnector -RUN source ./yaraconnector/bin/activate COPY --chown=cb ./ /home/cb/cb-yara-connector/ WORKDIR /home/cb/cb-yara-connector -RUN pip3 install -r requirements.txt --user -RUN pip3 install pyinstaller==3.5.0 --user -ENV PATH $PATH:~/.local/bin +RUN pip3 install -r requirements.txt +RUN pip3 install pyinstaller==3.5.0 RUN make clean ; make rpm USER root CMD ["/bin/bash","-c"] diff --git a/init-scripts/yaraconnector.conf b/init-scripts/yaraconnector.conf index 21f6bb7..d2a9aef 100644 --- a/init-scripts/yaraconnector.conf +++ b/init-scripts/yaraconnector.conf @@ -1,5 +1,4 @@ description "Cb Response Yara Connector" -author "zestep@vmware.com" start on (started network) stop on runlevel [!2345] @@ -7,7 +6,7 @@ stop on runlevel [!2345] respawn pre-start script - /usr/share/cb/integrations/cb-yara-connector/yaraconnector --lock-file /var/run/yaraconnector.pid --validate-yara-rules --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf &> /var/log/cb/integrations/cb-yara-connector/yaraconnector_config_check.log + /usr/share/cb/integrations/cb-yara-connector/yaraconnector --validate-yara-rules --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf &> /var/log/cb/integrations/cb-yara-connector/yaraconnector_config_check.log end script -exec /usr/share/cb/integrations/cb-yara-connector/yaraconnector --lock-file /var/run/yaraconnector.pid --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf &> /var/log/cb/integrations/cb-yara-connector/yaraconnector.log \ No newline at end of file +exec /usr/share/cb/integrations/cb-yara-connector/yaraconnector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf &> /var/log/cb/integrations/cb-yara-connector/yaraconnector.log \ No newline at end of file diff --git a/makefile b/makefile index 29eeeaf..c06d71b 100644 --- a/makefile +++ b/makefile @@ -1,29 +1,27 @@ SOURCEDIR = ~/rpmbuild/SOURCES BUILDDIR = ~/rpmbuild/BUILD RPMDIR = ~/rpmbuild/RPMS +EL_VERSION := $(shell rpm -E %{rhel}) clean: - rm -rf ${SOURCEDIR} - rm -rf ${BUILDDIR} - rm -rf ${RPMDIR} rm -rf ~/rpmbuild rm -rf dist rpm: + # Source DIR Setup mkdir -p ${SOURCEDIR} - mkdir -p ${BUILDDIR} mkdir -p ${SOURCEDIR}/src - mkdir -p ${BUILDDIR}/src - mkdir -p ${BUILDDIR}/init-scripts - mkdir -p ${BUILDDIR}/example-conf mkdir -p ${SOURCEDIR}/example-conf + cp yara-logo.png ${SOURCEDIR}/yara-logo.png cp -rp src/* ${SOURCEDIR}/src - cp -rp src/* ${BUILDDIR}/src - cp -rp init-scripts/* ${BUILDDIR}/init-scripts - cp -rp cb-yara-connector ${SOURCEDIR}/cb-yara-connector - cp example-conf/yara.conf ${BUILDDIR}/example-conf/yara.conf cp -rp example-conf/yara.conf ${SOURCEDIR}/example-conf/yara.conf - cp -p MANIFEST ${BUILDDIR}/MANIFEST + cp -rp cb-yara-connector ${SOURCEDIR}/cb-yara-connector cp cb-yara-connector.service ${SOURCEDIR}/cb-yara-connector.service cp cb-yara-connector.spec ${SOURCEDIR}/cb-yara-connector.spec + cp -rp init-scripts/* ${SOURCEDIR}/init-scripts + + # Build DIR Setup + mkdir -p ${BUILDDIR} + cp -p MANIFEST${EL_VERSION} ${BUILDDIR}/MANIFEST + rpmbuild -ba cb-yara-connector.rpm.spec diff --git a/requirements.txt b/requirements.txt index 4910a08..89e5317 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,6 @@ celery==4.4.0 humanfriendly==4.18 -lockfile==0.12.2 peewee==3.13.1 psycopg2-binary==2.8.4 python-daemon==2.2.4 diff --git a/src/__init__.py b/src/__init__.py index 3c7bbe9..d3d37e1 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -4,4 +4,4 @@ # noinspection PyUnusedName __author__ = "Carbon Black" # noinspection PyUnusedName -__version__ = "2.1.0" +__version__ = "2.1.1" diff --git a/src/main.py b/src/main.py index d31c271..f33aa8a 100644 --- a/src/main.py +++ b/src/main.py @@ -18,7 +18,6 @@ from threading import Event, Thread from typing import List -import lockfile import psycopg2 # noinspection PyPackageRequirements import yara @@ -631,9 +630,9 @@ def handle_arguments(): parser.add_argument( "--working-dir", default=".", help="working directory", required=False ) - # Controls the lock File + # Controls the pid File parser.add_argument( - "--lock-file", default="./yaraconnector", help="lock file", required=False + "--pid-file", default="", help="pid file location", required=False ) # Controls batch vs continous mode , defaults to batch processing parser.add_argument( @@ -651,6 +650,17 @@ def handle_arguments(): return parser.parse_args() +def write_pid_file(file_location: str): + if not file_location: + return + try: + with open(file_location, 'w+') as f: + f.write(str(os.getpid())) + except (IOError, OSError) as ex: + logger.error(F"Failed to write to PID file: {ex}") + exit(1) + + def main(): """ Main execution function. Script will exit with a non-zero value based on the following: @@ -698,8 +708,7 @@ def main(): exit_event = Event() hash_queue = Queue() scanning_results_queue = Queue() - # Lock file so this process is a singleton - lock_file = lockfile.FileLock(args.lock_file) + write_pid_file(args.pid_file) # noinspection PyUnusedLocal # used for local worker handling in some scenarios @@ -720,19 +729,14 @@ def main(): # Mark files to be preserved files_preserve = get_log_file_handles(logger) - files_preserve.extend([args.lock_file, args.log_file, args.output_file]) + files_preserve.extend([args.log_file, args.output_file]) - # defaults to piping to /dev/null - deamon_kwargs = { - "working_directory": working_dir, - "pidfile": lock_file, - "files_preserve": files_preserve, - } - - # If in debug mode, make sure stdout and stderr don't go to /dev/null - if args.debug: - deamon_kwargs.update({"stdout": sys.stdout, "stderr": sys.stderr}) - context = daemon.DaemonContext(**deamon_kwargs) + context = daemon.DaemonContext( + working_directory=working_dir, + files_preserve=files_preserve, + stdout=sys.stdout if args.debug else None, + stderr=sys.stderr if args.debug else None + ) # Operating mode - are we the master a worker? run_as_master = "master" in globals.g_mode From be30593d2fe37ae5478edb59f4a044cec45ca7b5 Mon Sep 17 00:00:00 2001 From: John Capolino Date: Wed, 26 Feb 2020 11:31:05 -0600 Subject: [PATCH 251/257] WIP: CB-30152: Fixed issues with PID file. --- cb-yara-connector | 2 +- cb-yara-connector.service | 8 +++++--- docker-build-rpm.sh | 2 -- src/main.py | 25 +++++++++++++++++++++++++ 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/cb-yara-connector b/cb-yara-connector index 78527e4..a17ff08 100644 --- a/cb-yara-connector +++ b/cb-yara-connector @@ -5,7 +5,7 @@ . /etc/init.d/functions prog="cb-yara-connector" -piddir="/var/run/cb/integrations/$prog" +piddir="/run/cb/integrations/$prog" pidfile="$piddir/$prog.pid" command="/usr/share/cb/integrations/cb-yara-connector/yaraconnector --pid-file $pidfile --working-dir /usr/share/cb/integrations/cb-yara-connector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json" diff --git a/cb-yara-connector.service b/cb-yara-connector.service index 00b5a8f..eecaf8c 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -5,10 +5,12 @@ After=syslog.target network.target [Service] Environment=C_FORCE_ROOT=1 Type=forking -ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json +ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --pid-file /run/cb/integrations/cb-yara-connector/cb-yara-connector.pid --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json +PIDFile=/run/cb/integrations/cb-yara-connector/cb-yara-connector.pid WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector -KillSignal=SIGQUIT - +KillSignal=SIGTERM +Restart=on-failure +RestartSec=10s [Install] WantedBy=multi-user.target diff --git a/docker-build-rpm.sh b/docker-build-rpm.sh index af53ac5..d0a2214 100755 --- a/docker-build-rpm.sh +++ b/docker-build-rpm.sh @@ -1,9 +1,7 @@ #!/bin/bash -# docker rmi yaraconnectorrpmbuild --force docker rm yaraconnectorrpmbuild docker build --build-arg REBUILD_STEP=`date +%s` -t yaraconnectorrpmbuild . docker run -d --name yaraconnectorrpmbuild -it yaraconnectorrpmbuild tail -f /dev/null docker cp yaraconnectorrpmbuild:/home/cb/rpmbuild/RPMS . docker stop yaraconnectorrpmbuild docker rm yaraconnectorrpmbuild -# docker rmi yaraconnectorrpmbuild --force diff --git a/src/main.py b/src/main.py index f33aa8a..aece2b9 100644 --- a/src/main.py +++ b/src/main.py @@ -228,6 +228,27 @@ def get_database_conn(): return conn +def test_database_conn() -> bool: + """ + Tests the connection to the postgres database. Closes the connection if successful. + :return: Returns True if connection to db was successful. + """ + logger.info("Testing connection to Postgres database...") + try: + conn = psycopg2.connect( + host=globals.g_postgres_host, + database=globals.g_postgres_db, + user=globals.g_postgres_username, + password=globals.g_postgres_password, + port=globals.g_postgres_port, + ) + conn.close() + except psycopg2.DatabaseError as ex: + logger.error(F"Failed to connect to postgres database: {ex}") + return False + return True + + def get_binary_file_cursor(conn, start_date_binaries: datetime): """ Get the cursor index to the binaries. @@ -654,6 +675,7 @@ def write_pid_file(file_location: str): if not file_location: return try: + os.mkdir(os.path.dirname(file_location)) with open(file_location, 'w+') as f: f.write(str(os.getpid())) except (IOError, OSError) as ex: @@ -688,6 +710,8 @@ def main(): # Verify the configuration file and load up important global variables try: ConfigurationInit(args.config_file, args.output_file) + if not test_database_conn(): + sys.exit(1) except Exception as err: logger.error(f"Unable to continue due to a configuration problem: {err}") sys.exit(1) @@ -751,6 +775,7 @@ def main(): # Make sure we close the deamon context at the end threads = [] with context: + write_pid_file(args.pid_file) # only connect to cbr if we're the master if run_as_master: # initialize local resources From 4f0ac6139e1ce39ab26575e7cceaeb272a38ec36 Mon Sep 17 00:00:00 2001 From: John Capolino Date: Wed, 26 Feb 2020 11:44:30 -0600 Subject: [PATCH 252/257] WIP: CB-30152: Changed release version to 1. --- cb-yara-connector.rpm.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cb-yara-connector.rpm.spec b/cb-yara-connector.rpm.spec index 5bc01ae..43ef83f 100644 --- a/cb-yara-connector.rpm.spec +++ b/cb-yara-connector.rpm.spec @@ -1,5 +1,5 @@ %define version 2.1.1 -%define release 0 +%define release 1 Name: python-cb-yara-connector Version: %{version} From f0cedbd3397b07fc94117df3c7a43703181878f3 Mon Sep 17 00:00:00 2001 From: John Capolino Date: Wed, 26 Feb 2020 13:02:39 -0600 Subject: [PATCH 253/257] WIP: CB-30152: Changed directory creation to makedirs for recursive creation. --- src/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index aece2b9..4ab59cf 100644 --- a/src/main.py +++ b/src/main.py @@ -675,7 +675,7 @@ def write_pid_file(file_location: str): if not file_location: return try: - os.mkdir(os.path.dirname(file_location)) + os.makedirs(os.path.dirname(file_location), exist_ok=True) with open(file_location, 'w+') as f: f.write(str(os.getpid())) except (IOError, OSError) as ex: From cb08c2c2e85aff9666554902111a3af99399e8c6 Mon Sep 17 00:00:00 2001 From: John Capolino Date: Wed, 26 Feb 2020 16:24:26 -0600 Subject: [PATCH 254/257] WIP: CB-30152: Fixed service logging. Pushing to startup.log. --- cb-yara-connector | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cb-yara-connector b/cb-yara-connector index a17ff08..cdd5b5e 100644 --- a/cb-yara-connector +++ b/cb-yara-connector @@ -7,7 +7,10 @@ prog="cb-yara-connector" piddir="/run/cb/integrations/$prog" pidfile="$piddir/$prog.pid" -command="/usr/share/cb/integrations/cb-yara-connector/yaraconnector --pid-file $pidfile --working-dir /usr/share/cb/integrations/cb-yara-connector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json" +logdir="/var/log/cb/integrations/$prog" +logfile="/$prog.startup.log" + +command="/usr/share/cb/integrations/cb-yara-connector/yaraconnector --pid-file $pidfile --working-dir /usr/share/cb/integrations/cb-yara-connector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file $logdir/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json" start() { if [ -f "$pidfile" ] && kill -0 $(cat "$pidfile"); then @@ -17,7 +20,7 @@ start() { mkdir -p $piddir echo -n "Starting $prog: " - $command + $command &> $logfile result=$? if [ -f "$pidfile" ]; then From 49d66474c6ebc6ae91e149befe297505d20a980a Mon Sep 17 00:00:00 2001 From: John Capolino Date: Wed, 26 Feb 2020 20:05:55 -0600 Subject: [PATCH 255/257] WIP: CB-30152: Fixed service not exiting on service stop in centos 7. --- src/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index 4ab59cf..4ff476b 100644 --- a/src/main.py +++ b/src/main.py @@ -680,7 +680,7 @@ def write_pid_file(file_location: str): f.write(str(os.getpid())) except (IOError, OSError) as ex: logger.error(F"Failed to write to PID file: {ex}") - exit(1) + sys.exit(1) def main(): @@ -814,6 +814,8 @@ def main(): wait_all_worker_exit_threads(threads, timeout=4.0) finally: logger.info("Yara connector shutdown") + # noinspection PyProtectedMember + os._exit(exit_rc) else: # | | | BATCH MODE | | | logger.debug("BATCH MODE") From 302bc4a78828299b07ed77b905b7c1d33e23ca54 Mon Sep 17 00:00:00 2001 From: John Capolino Date: Wed, 26 Feb 2020 21:27:59 -0600 Subject: [PATCH 256/257] CB-30152: Finalized changes for Yara 2.1.1 release. * Changed --run-forever option to --daemon * Cleaned up help output * Fixed missing hidden imports in pyinstaller definition --- README.md | 73 +++++++++++++++++++++++++++------------ cb-yara-connector | 2 +- cb-yara-connector.service | 2 +- cb-yara-connector.spec | 3 +- src/main.py | 36 +++++++++---------- 5 files changed, 70 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 69d4b40..8c41777 100644 --- a/README.md +++ b/README.md @@ -4,14 +4,30 @@ The connector reads YARA rules from a configured directory to efficiently scan binaries as they are seen by the CB EDR server. The generated threat information is used to produce an intelligence feed for ingest by the CB EDR Server. -1. Download the latest RPM from the [GitHub releases page](https://github.com/carbonblack/cb-yara-connector/releases/download/untagged-b39ed959488c9ec78055/python-cb-yara-connector-2.1-0.x86_64.rpm). -1. Install the RPM: +1. Install the CbOpenSource repository if it isn't already present: + + ``` + cd /etc/yum.repos.d + curl -O https://opensource.carbonblack.com/CbOpenSource.repo + ``` - `yum install python-cb-yara-connector-.rpm` +1. Install the RPM: + ``` + yum install python-cb-yara-connector + ``` 1. Enable the service: - - `systemctl enable cb-yara-connector` + + 1. For Centos/Red Had 6: + + ``` + chkconfig cb-yara-connector on + ``` + 1. For Centos/Red Had 7: + + ``` + systemctl enable cb-yara-connector + ``` # Create YARA Connector Config @@ -69,7 +85,7 @@ specifying one or more YARA rule. Your rules must have `meta` section with a `score = [1-10]` tag to appropriately score matching binaries. This directory is configurable in your configuration file. C-style comments are supported. -###### Sample YARA Rule File +#### Sample YARA Rule File ``` // Sample rule to match binaries over 100kb in size @@ -81,16 +97,24 @@ rule matchover100kb { } ``` -#### Controlling the YARA Agent +## Controlling the YARA Agent + +#### CentOS / Red Hat 6 + +| Action | Command | +| ------ | ------- | +| Start the service | `service cb-yara-connector start` | +| Stop the service | `service cb-yara-connector stop` | +| Display service status | `service cb-yara-connector status` | + +#### CentOS / Red Hat 7 | Action | Command | | ------ | ------- | | Start the service | `systemctl start cb-yara-connector` | | Stop the service | `systemctl stop cb-yara-connector` | -| Display logging information | `systemctl status -l cb-yara-connector` | -| Displaying verbose logs | `journalctl -u cb-YARA-connector.service` | - -Use `service` commands instead if running on CentOS 6.x (systemd is preferred). +| Display service status | `systemctl status -l cb-yara-connector` | +| Displaying verbose logs | `journalctl -u cb-yara-connector` | # Development Notes @@ -129,22 +153,28 @@ The provided script `docker-build-rpm.sh` will use docker to build the project, ##### Command-line Options ```text -usage: main.py [-h] --config-file CONFIG_FILE [--log-file LOG_FILE] - [--output-file OUTPUT_FILE] [--validate-yara-rules] [--debug] +usage: yaraconnector [-h] --config-file CONFIG_FILE [--log-file LOG_FILE] + [--output-file OUTPUT_FILE] [--working-dir WORKING_DIR] + [--pid-file PID_FILE] [--daemon] + [--validate-yara-rules] [--debug] -YARA Agent for YARA Connector +Yara Agent for Yara Connector optional arguments: -h, --help show this help message and exit --config-file CONFIG_FILE - Location of the config file - --log-file LOG_FILE Log file output (defaults to `local` folder) + location of the config file + --log-file LOG_FILE file location for log output --output-file OUTPUT_FILE - output feed file (defaults to `local` folder) + file location for feed file + --working-dir WORKING_DIR + working directory + --pid-file PID_FILE pid file location - if not supplied, will not write a + pid file + --daemon run in daemon mode (run as a service) --validate-yara-rules - ONLY validate YARA rules in a specified directory - --debug Provide additional logging - + only validate the yara rules, then exit + --debug enabled debug level logging ``` ###### --config-file Provides the path of the configuration file to be used _**(REQUIRED)**_ @@ -160,9 +190,6 @@ Provides the path containing the feed description file. If not supplied, defaul ###### --validate-yara-rules If supplied, YARA rules will be validated and the script will exit. -#### Example Cron Entry -_[TBD]_ - --- # Dev install diff --git a/cb-yara-connector b/cb-yara-connector index cdd5b5e..0889363 100644 --- a/cb-yara-connector +++ b/cb-yara-connector @@ -10,7 +10,7 @@ pidfile="$piddir/$prog.pid" logdir="/var/log/cb/integrations/$prog" logfile="/$prog.startup.log" -command="/usr/share/cb/integrations/cb-yara-connector/yaraconnector --pid-file $pidfile --working-dir /usr/share/cb/integrations/cb-yara-connector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file $logdir/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json" +command="/usr/share/cb/integrations/cb-yara-connector/yaraconnector --pid-file $pidfile --working-dir /usr/share/cb/integrations/cb-yara-connector --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --daemon --log-file $logdir/cb-yara-connector.log --output-file /var/cb/data/cb-yara-connector/feed.json" start() { if [ -f "$pidfile" ] && kill -0 $(cat "$pidfile"); then diff --git a/cb-yara-connector.service b/cb-yara-connector.service index eecaf8c..d8c0d22 100644 --- a/cb-yara-connector.service +++ b/cb-yara-connector.service @@ -5,7 +5,7 @@ After=syslog.target network.target [Service] Environment=C_FORCE_ROOT=1 Type=forking -ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --pid-file /run/cb/integrations/cb-yara-connector/cb-yara-connector.pid --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --run-forever True --log-file /var/log/cb/integrations/cb-yara-connector/yaraconnector.log --output-file /var/cb/data/cb-yara-connector/feed.json +ExecStart=/usr/share/cb/integrations/cb-yara-connector/yaraconnector --pid-file /run/cb/integrations/cb-yara-connector/cb-yara-connector.pid --config-file /etc/cb/integrations/cb-yara-connector/yaraconnector.conf --daemon --log-file /var/log/cb/integrations/cb-yara-connector/cb-yara-connector.log --output-file /var/cb/data/cb-yara-connector/feed.json PIDFile=/run/cb/integrations/cb-yara-connector/cb-yara-connector.pid WorkingDirectory=/usr/share/cb/integrations/cb-yara-connector KillSignal=SIGTERM diff --git a/cb-yara-connector.spec b/cb-yara-connector.spec index 3e2432f..a5c467e 100644 --- a/cb-yara-connector.spec +++ b/cb-yara-connector.spec @@ -4,11 +4,10 @@ block_cipher = None - a = Analysis(['src/main.py'], pathex=['./src'], binaries=[], - hiddenimports=['psutil','billiard','billiard.heap','lockfile','mmap','pkg_resources.py2_warn','celery.app.control','celery.worker.strategy','celery.worker.consumer','celery.events.state','celery.worker.autoscale','celery.worker.components','celery.concurrency.prefork','celery.apps','celery.apps.worker','celery.app.log','celery.fixups', 'celery.fixups.django', 'celery.loaders.app','celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends','celery.backends.redis', 'celery.app.events', 'celery.events','celery.redis','kombu.transport.pyamqp'], + hiddenimports=['billiard','billiard.heap','lockfile','mmap','pkg_resources.py2_warn','celery.app.control','celery.worker.strategy','celery.worker.consumer','celery.events.state','celery.worker.autoscale','celery.worker.components','celery.concurrency.prefork','celery.apps','celery.apps.worker','celery.app.log','celery.fixups', 'celery.fixups.django', 'celery.loaders.app','celery.app.amqp', 'kombu.transport.redis', 'redis', 'celery.backends','celery.backends.redis', 'celery.app.events', 'celery.events', 'kombu.transport.pyamqp'], hookspath=[], runtime_hooks=[], excludes=[], diff --git a/src/main.py b/src/main.py index 4ff476b..d488378 100644 --- a/src/main.py +++ b/src/main.py @@ -632,41 +632,38 @@ def handle_arguments(): """ parser = argparse.ArgumentParser(description="Yara Agent for Yara Connector") - # Controls config file (ini) + # Controls config file (ini) parser.add_argument( - "--config-file", - required=True, - default="yaraconnector.conf", - help="Location of the config file", + "--config-file", default="yaraconnector.conf", help="location of the config file", required=True, ) # Controls log file location+name parser.add_argument( - "--log-file", default="yaraconnector.log", help="Log file output" + "--log-file", default="cb-yara-connector.log", help="file location for log output" ) # Controls the output feed location+name parser.add_argument( - "--output-file", default=None, help="output feed file" + "--output-file", default=None, help="file location for feed file" ) # Controls the working directory parser.add_argument( - "--working-dir", default=".", help="working directory", required=False + "--working-dir", default=".", help="working directory" ) # Controls the pid File parser.add_argument( - "--pid-file", default="", help="pid file location", required=False - ) - # Controls batch vs continous mode , defaults to batch processing - parser.add_argument( - "--run-forever", default=False, help="Run as batch mode or no", required=False + "--pid-file", default="", help="pid file location - if not supplied, will not write a pid file" ) + group = parser.add_mutually_exclusive_group() + # Controls if we run in daemon mode + group.add_argument( + "--daemon", action='store_true', help="run in daemon mode (run as a service)" + ) # Validates the rules - parser.add_argument( - "--validate-yara-rules", - action="store_true", - help="Only validate yara rules, then exit", + group.add_argument( + "--validate-yara-rules", action="store_true", help="only validate the yara rules, then exit" ) - parser.add_argument("--debug", action="store_true") + + parser.add_argument("--debug", action="store_true", help="enabled debug level logging") return parser.parse_args() @@ -726,6 +723,7 @@ def main(): except Exception as err: logger.error(f"There were errors compiling yara rules: {err}") sys.exit(2) + sys.exit() else: # Doing a real run # Exit condition and queues for doing work @@ -746,7 +744,7 @@ def main(): 2) standalone worker 3) worker+master unit """ - if args.run_forever: # Running as a deamon + if args.daemon: logger.debug("RUNNING AS DEMON") # Get working dir setting working_dir = os.path.abspath(os.path.expanduser(args.working_dir)) From b28013ab8dcb8e9fa63016d638d1cdbb6a2fb8c3 Mon Sep 17 00:00:00 2001 From: John Capolino Date: Thu, 27 Feb 2020 10:54:25 -0600 Subject: [PATCH 257/257] CB-30152: Fixed automak -> automake in dockerfile. --- dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockerfile b/dockerfile index 1a1cafe..8bfb51c 100644 --- a/dockerfile +++ b/dockerfile @@ -1,6 +1,6 @@ FROM centos:7 RUN yum -y install rpm-build epel-release -RUN yum -y install python36 python36-devel git make gcc gcc-devel automak libtool make +RUN yum -y install python36 python36-devel git make gcc gcc-devel automake libtool make RUN groupadd -r cb && useradd --no-log-init -r -g cb cb RUN mkdir /home/cb && \ chown cb:cb /home/cb