From 815bfe68235d740c8d152330b7f05409921d7f9d Mon Sep 17 00:00:00 2001 From: Language Team Date: Thu, 19 Oct 2023 21:37:01 +0000 Subject: [PATCH] Dataset release for EMNLP 2023 paper on reasoning with spatial prepositions PiperOrigin-RevId: 575005500 --- language/capwap/utils/checkpoint_utils.py | 2 +- language/gscan/data/world.py | 8 ++- language/quest/xattn/gen_training_examples.py | 2 +- language/spatial_prep/README.md | 52 +++++++++++++++++++ 4 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 language/spatial_prep/README.md diff --git a/language/capwap/utils/checkpoint_utils.py b/language/capwap/utils/checkpoint_utils.py index 60ce7779..0700c8c6 100644 --- a/language/capwap/utils/checkpoint_utils.py +++ b/language/capwap/utils/checkpoint_utils.py @@ -24,7 +24,7 @@ def log_variables(name, var_names): tf.logging.info("%s (%d total): %s", name, len(var_names), - random.sample(var_names, min(len(var_names), 5))) + random.sample(list(var_names), min(len(var_names), 5))) def init_from_checkpoint(checkpoint_path, diff --git a/language/gscan/data/world.py b/language/gscan/data/world.py index 4fd2564b..71380611 100644 --- a/language/gscan/data/world.py +++ b/language/gscan/data/world.py @@ -62,7 +62,9 @@ def sample_nearby_position(self, position, exclude_locations=None): actual_available_positions = self.get_nearby_positions( position, exclude_locations=exclude_locations) if actual_available_positions: - sampled_position = random.sample(actual_available_positions, 1).pop() + sampled_position = random.sample( + list(actual_available_positions), 1 + ).pop() return world.Position(column=sampled_position[0], row=sampled_position[1]) return None @@ -76,7 +78,9 @@ def sample_non_nearby_position(self, position): for dir in DIR_TO_DIR_VEC.values()]) actual_available_positions = available_positions - nearby_positions if actual_available_positions: - sampled_position = random.sample(actual_available_positions, 1).pop() + sampled_position = random.sample( + list(actual_available_positions), 1 + ).pop() return world.Position(column=sampled_position[0], row=sampled_position[1]) return None diff --git a/language/quest/xattn/gen_training_examples.py b/language/quest/xattn/gen_training_examples.py index 549de439..31726554 100644 --- a/language/quest/xattn/gen_training_examples.py +++ b/language/quest/xattn/gen_training_examples.py @@ -122,7 +122,7 @@ def main(unused_argv): # Add additional random negatives. if FLAGS.random_negatives > 0: - random_titles = random.sample(doc_titles, FLAGS.random_negatives) + random_titles = random.sample(list(doc_titles), FLAGS.random_negatives) for doc_title in random_titles: if doc_title not in relevant_titles: new_example = xattn_utils.get_example( diff --git a/language/spatial_prep/README.md b/language/spatial_prep/README.md new file mode 100644 index 00000000..ca4e38f3 --- /dev/null +++ b/language/spatial_prep/README.md @@ -0,0 +1,52 @@ +# A Benchmark for Reasoning with Spatial Prepositions + +This folder contains the dataset described in the paper + "A Benchmark for Reasoning with Spatial Prepositions" (EMNLP 2023). + +## Download the data + +* [English dataset](https://storage.googleapis.com/spatial-prepositions-dataset/spatial_prepositions_benchmark_en.tsv) +* [Romanian dataset](https://storage.googleapis.com/spatial-prepositions-dataset/spatial_prepositions_benchmark_ro.tsv) + +## Data format + +The datasets are provided in TSV (tab-separated) format. + Each row contains an incongruent and a congruent example, both formed using + the same prepositions, with the following tab-separated elements: + `premise1_a`, `premise2_a`, `conclusion_a`, + `no` (language-specific, indicating that `conclusion_a` is invalid), + `premise1_b`, `premise2_b`, `conclusion_b`, + `yes` (language-specific, indicating that `conclusion_b` is valid). + +## Building examples + +From each example, a question can be created by combining the two premises + and the conclusion, as follows: + "If `premise_1` and `premise_2`, does that imply that `conclusion`?" + +The examples with invalid conclusions are designed such that a wrong + interpretation of the spatial prepositions in the premises can make the + conclusion appear valid. For example: + +* If `John is in the crib` and `the crib is in the living room`, + does that imply that `John is in the living room`? -> `yes` + (congruent example with valid conclusion) + +* If `John is in the newspaper` and `the newspaper is in the kitchen`, + does that imply that `John is in the kitchen`? -> `no` + (incongruent example with invalid conclusion) + + +## Citation + +If you use this data, please cite: + +``` +@inproceedings{comsa2023, + author = {Comșa, Iulia-Maria and Narayanan, Srini}, + title = "{A Benchmark for Reasoning with Spatial Prepositions}", + publisher = "Association for Computational Linguistics", + booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing", + year = {2023}, +} +```