Skip to content

Commit

Permalink
add only unique stacks for training
Browse files Browse the repository at this point in the history
  • Loading branch information
miteshvp committed Mar 1, 2019
1 parent f431489 commit 2aa9ff1
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
6 changes: 5 additions & 1 deletion f8a_report/report_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,12 +228,16 @@ def store_training_data(self, result):
model_version = dt.now().strftime('%Y-%m-%d')

for eco, stacks in result.items():
unique_stacks = {}
obj_key = '{eco}/{depl_prefix}/{model_version}/data/manifest.json'.format(
eco=eco, depl_prefix=self.s3.deployment_prefix, model_version=model_version)
package_list_for_eco = []
for packages, reccurrence_count in stacks.items():
package_list = [x.strip().split(' ')[0] for x in packages.split(',')]
package_list_for_eco.append(package_list)
stack_str = "".join(package_list)
if stack_str not in unique_stacks:
unique_stacks[stack_str] = 1
package_list_for_eco.append(package_list)

training_data = {
'ecosystem': eco,
Expand Down
3 changes: 2 additions & 1 deletion tests/data/collateddata.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"flask 1.2.3, sqlalchemy 1.4.3": 10
},
"maven": {
"io.vertx:vertx-core 3.4.2,io.vertx:vertx-web 3.4.2": 1
"io.vertx:vertx-core 3.4.2,io.vertx:vertx-web 3.4.2": 1,
"io.vertx:vertx-core 3.4.1,io.vertx:vertx-web 3.4.1": 1
}
}

0 comments on commit 2aa9ff1

Please sign in to comment.