From 0bdc829f48388b0080ae21e4e0ba66d32295ff69 Mon Sep 17 00:00:00 2001 From: "lukas.laskowski" Date: Mon, 15 Mar 2021 22:37:26 +0100 Subject: [PATCH 1/4] making column label as not needed. still needed for preloaded experiments --- .../experiment/experimentProvider/file/formats/sigmod2021.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/wrapper/src/api/providers/experiment/experimentProvider/file/formats/sigmod2021.ts b/wrapper/src/api/providers/experiment/experimentProvider/file/formats/sigmod2021.ts index a4fed685..0b4420ab 100644 --- a/wrapper/src/api/providers/experiment/experimentProvider/file/formats/sigmod2021.ts +++ b/wrapper/src/api/providers/experiment/experimentProvider/file/formats/sigmod2021.ts @@ -7,7 +7,6 @@ export class Sigmod2021ExperimentInserter extends CSVInserter { protected readonly requiredColumns: string[] = [ 'left_instance_id', 'right_instance_id', - 'label', ]; protected addRow(row: { @@ -20,7 +19,7 @@ export class Sigmod2021ExperimentInserter extends CSVInserter { row.left_instance_id!, // eslint-disable-next-line @typescript-eslint/no-non-null-assertion row.right_instance_id!, - row.label !== '0' + row.label ? row.label !== '0' : true ); } } From eea6e948a70a628684004809c63cb54d1f822490 Mon Sep 17 00:00:00 2001 From: Martin Graf Date: Tue, 16 Mar 2021 08:55:02 +0100 Subject: [PATCH 2/4] We do not need to check for undefined as undefined !== '0' --- .../experiment/experimentProvider/file/formats/sigmod2021.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wrapper/src/api/providers/experiment/experimentProvider/file/formats/sigmod2021.ts b/wrapper/src/api/providers/experiment/experimentProvider/file/formats/sigmod2021.ts index 0b4420ab..1ed74e92 100644 --- a/wrapper/src/api/providers/experiment/experimentProvider/file/formats/sigmod2021.ts +++ b/wrapper/src/api/providers/experiment/experimentProvider/file/formats/sigmod2021.ts @@ -19,7 +19,7 @@ export class Sigmod2021ExperimentInserter extends CSVInserter { row.left_instance_id!, // eslint-disable-next-line @typescript-eslint/no-non-null-assertion row.right_instance_id!, - row.label ? row.label !== '0' : true + row.label !== '0' ); } } From 20003de7934b7991996f91d4dd744eb2b50fc025 Mon Sep 17 00:00:00 2001 From: flosld Date: Tue, 16 Mar 2021 09:23:18 +0100 Subject: [PATCH 3/4] Add some documentation on SIGMOD21 format --- docs/basic_usage/experiments.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/basic_usage/experiments.md b/docs/basic_usage/experiments.md index 795809a8..e30e81a2 100644 --- a/docs/basic_usage/experiments.md +++ b/docs/basic_usage/experiments.md @@ -95,6 +95,24 @@ The open-source matching solution Magellan is widely used in research. We suppor For more information, see [here](../../sigmod2021). +```csv +left_instance_id,right_instance_id,label +http://store.com/42,http://otherstore.net/af82,1 +http://store.com/243,http://otherstore.net/cn82,0 +... +``` + +The label _(1=duplicate, 0=non-duplicate)_ is **optional**. A missing label will consider the pair as duplicates. + +Therefore, the following list would be interpreted as duplicates: + +```csv +left_instance_id,right_instance_id +http://store.com/42,http://otherstore.net/af82 +http://store.com/243,http://otherstore.net/cn82 +... +``` + ### Proprietary Formats We support a range of proprietary experiment formats. A list of those can be found here: From c41866dc179ec2ef2e50c1b9f58a7f902d298326 Mon Sep 17 00:00:00 2001 From: flosld Date: Tue, 16 Mar 2021 09:24:22 +0100 Subject: [PATCH 4/4] Make it clearer --- docs/basic_usage/experiments.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/basic_usage/experiments.md b/docs/basic_usage/experiments.md index e30e81a2..d62ec5f8 100644 --- a/docs/basic_usage/experiments.md +++ b/docs/basic_usage/experiments.md @@ -102,7 +102,7 @@ http://store.com/243,http://otherstore.net/cn82,0 ... ``` -The label _(1=duplicate, 0=non-duplicate)_ is **optional**. A missing label will consider the pair as duplicates. +The label _(1=duplicate, 0=non-duplicate)_ is **optional** in Snowman. A missing label will consider the pair as duplicates. Therefore, the following list would be interpreted as duplicates: