From 2aa9ff1c7bb79738dc90af7c0c9a8b9013016612 Mon Sep 17 00:00:00 2001 From: miteshvp Date: Fri, 1 Mar 2019 18:20:02 +0530 Subject: [PATCH] add only unique stacks for training --- f8a_report/report_helper.py | 6 +++++- tests/data/collateddata.json | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/f8a_report/report_helper.py b/f8a_report/report_helper.py index 5dda634..ddeafdb 100644 --- a/f8a_report/report_helper.py +++ b/f8a_report/report_helper.py @@ -228,12 +228,16 @@ def store_training_data(self, result): model_version = dt.now().strftime('%Y-%m-%d') for eco, stacks in result.items(): + unique_stacks = {} obj_key = '{eco}/{depl_prefix}/{model_version}/data/manifest.json'.format( eco=eco, depl_prefix=self.s3.deployment_prefix, model_version=model_version) package_list_for_eco = [] for packages, reccurrence_count in stacks.items(): package_list = [x.strip().split(' ')[0] for x in packages.split(',')] - package_list_for_eco.append(package_list) + stack_str = "".join(package_list) + if stack_str not in unique_stacks: + unique_stacks[stack_str] = 1 + package_list_for_eco.append(package_list) training_data = { 'ecosystem': eco, diff --git a/tests/data/collateddata.json b/tests/data/collateddata.json index e9c89ec..36e9395 100644 --- a/tests/data/collateddata.json +++ b/tests/data/collateddata.json @@ -7,6 +7,7 @@ "flask 1.2.3, sqlalchemy 1.4.3": 10 }, "maven": { - "io.vertx:vertx-core 3.4.2,io.vertx:vertx-web 3.4.2": 1 + "io.vertx:vertx-core 3.4.2,io.vertx:vertx-web 3.4.2": 1, + "io.vertx:vertx-core 3.4.1,io.vertx:vertx-web 3.4.1": 1 } } \ No newline at end of file