From 2775e346f4601f6b479db6a1029290661b94bc62 Mon Sep 17 00:00:00 2001 From: rgraber Date: Tue, 17 Dec 2024 09:57:26 -0500 Subject: [PATCH 1/2] feat: include backfilled answers in exports --- setup.py | 2 +- src/formpack/reporting/export.py | 33 ++++++---- tests/fixtures/backfilled_answers/__init__.py | 16 +++++ tests/fixtures/backfilled_answers/v1.json | 21 +++++++ tests/fixtures/backfilled_answers/v2.json | 23 +++++++ tests/test_exports.py | 60 +++++++++++++++++++ 6 files changed, 144 insertions(+), 11 deletions(-) create mode 100644 tests/fixtures/backfilled_answers/__init__.py create mode 100644 tests/fixtures/backfilled_answers/v1.json create mode 100644 tests/fixtures/backfilled_answers/v2.json diff --git a/setup.py b/setup.py index cf08a406..99fa6ff0 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ setup( name='formpack', - version='3.0.0', + version='3.1.0', description='Manipulation tools for KoBo forms', author='the formpack contributors (https://github.com/kobotoolbox/formpack/graphs/contributors)', url='https://github.com/kobotoolbox/formpack/', diff --git a/src/formpack/reporting/export.py b/src/formpack/reporting/export.py index c03fcf53..bbbfa212 100644 --- a/src/formpack/reporting/export.py +++ b/src/formpack/reporting/export.py @@ -171,14 +171,20 @@ def parse_one_submission(self, submission, version=None): submission = FormSubmission(submission) return self.format_one_submission([submission.data], section) - def parse_submissions(self, submissions): + def parse_submissions(self, submissions, version_uid=None): """ Return a generator yielding formatted 'chunks' for each submission from the data set + + Args: + version_uid (str): optional, explicit version uid to use for all + submission instead of inferring the version from the submissions + themselves """ self.reset() + form_version = self.versions[version_uid] if version_uid else None for submission in submissions: - formatted_chunks = self.parse_one_submission(submission) + formatted_chunks = self.parse_one_submission(submission, version=form_version) if not formatted_chunks: continue yield formatted_chunks @@ -542,7 +548,6 @@ def to_dict(self, submissions): """ d = OrderedDict() - for section, labels in self.labels.items(): d[section] = {'fields': list(labels), 'data': []} @@ -552,12 +557,19 @@ def to_dict(self, submissions): return d - def to_csv(self, submissions, sep=';', quote='"'): + def to_csv(self, submissions, version_uid=None, sep=';', quote='"'): """ Return a generator yielding csv lines. We don't use the csv module to avoid buffering the lines in memory. + + Args: + version_uid (str): optional, explicit version uid to use for all + submissions instead of inferring the version from the submissions + themselves + sep (str): optional, separator char, default ';' + quote (str): optional, quote char, default '"' """ sections = list(self.labels.items()) @@ -591,7 +603,7 @@ def format_line(line, sep, quote): for tag_row in tag_rows: yield format_line(tag_row, sep, quote) - for chunk in self.parse_submissions(submissions): + for chunk in self.parse_submissions(submissions, version_uid=version_uid): for section_name, rows in chunk.items(): if section == section_name: for row in rows: @@ -600,6 +612,7 @@ def format_line(line, sep, quote): def to_geojson( self, submissions: Iterator, + version_uid: str = None, flatten: bool = True, geo_question_name: Optional[str] = None, ) -> Generator: @@ -689,7 +702,7 @@ def to_geojson( # We need direct access to the field objects (available inside the # version) and the unformatted submission data - version = self.get_version_for_submission(submission) + version = self.versions[version_uid] if version_uid else self.get_version_for_submission(submission) formatted_chunks = self.parse_one_submission(submission, version) if not formatted_chunks: continue @@ -792,7 +805,7 @@ def to_table(self, submissions): return table - def to_xlsx(self, filename, submissions): + def to_xlsx(self, filename, submissions, version_uid=None): workbook = xlsxwriter.Workbook( filename, { @@ -845,7 +858,7 @@ def _append_row_to_sheet(sheet_, data): row_index += 1 sheet_row_positions[sheet_] = row_index - for chunk in self.parse_submissions(submissions): + for chunk in self.parse_submissions(submissions, version_uid=version_uid): for section_name, rows in chunk.items(): try: sheet_name = sheet_name_mapping[section_name] @@ -874,7 +887,7 @@ def _append_row_to_sheet(sheet_, data): workbook.close() - def to_html(self, submissions): + def to_html(self, submissions, version_uid=None): """ Yield lines of and HTML table strings. """ @@ -892,7 +905,7 @@ def to_html(self, submissions): yield '' - for chunk in self.parse_submissions(submissions): + for chunk in self.parse_submissions(submissions, version_uid=version_uid): for section_name, rows in chunk.items(): if section == section_name: for row in rows: diff --git a/tests/fixtures/backfilled_answers/__init__.py b/tests/fixtures/backfilled_answers/__init__.py new file mode 100644 index 00000000..c63246a8 --- /dev/null +++ b/tests/fixtures/backfilled_answers/__init__.py @@ -0,0 +1,16 @@ +""" +backfilled_answers: + +* has a question added in v2 with answers backfilled in some submissions +""" + +from ..load_fixture_json import load_fixture_json + +DATA = { + 'title': 'Backfilled answers', + 'id_string': 'backfilled_answers', + 'versions': [ + load_fixture_json('backfilled_answers/v1'), + load_fixture_json('backfilled_answers/v2'), + ], +} diff --git a/tests/fixtures/backfilled_answers/v1.json b/tests/fixtures/backfilled_answers/v1.json new file mode 100644 index 00000000..2bc437d9 --- /dev/null +++ b/tests/fixtures/backfilled_answers/v1.json @@ -0,0 +1,21 @@ +{ + "version": "v1", + "content": { + "survey": [ + { + "type": "text", + "name": "restaurant_name", + "label": "restaurant name" + } + ] + }, + "submissions": [ + { + "restaurant_name": "Potato Heaven" + }, + { + "restaurant_name": "Potato Purgatory", + "restaurant_location": "0 0 0 0" + } + ] +} diff --git a/tests/fixtures/backfilled_answers/v2.json b/tests/fixtures/backfilled_answers/v2.json new file mode 100644 index 00000000..78f7ff67 --- /dev/null +++ b/tests/fixtures/backfilled_answers/v2.json @@ -0,0 +1,23 @@ +{ + "version": "v2", + "content": { + "survey": [ + { + "type": "text", + "name": "restaurant_name", + "label": "restaurant name" + }, + { + "type": "geopoint", + "name": "restaurant_location", + "label": "restaurant location" + } + ] + }, + "submissions": [ + { + "restaurant_name": "Potato Limbo", + "restaurant_location": "0 1 0 0" + } + ] +} diff --git a/tests/test_exports.py b/tests/test_exports.py index 18468103..58ecb491 100644 --- a/tests/test_exports.py +++ b/tests/test_exports.py @@ -1716,6 +1716,24 @@ def test_csv_with_tag_headers_select_multiple_summary_or_details(self): ) assert rows[1] == ('"#loc+name";"#indicator+diet";"";"";""') + def test_csv_with_backfilled_data(self): + title, schemas, submissions = build_fixture('backfilled_answers') + fp = FormPack(schemas, title) + assert len(fp.versions) == 2 + + fp = FormPack(schemas, title) + export = fp.export(versions=fp.versions.keys()) + csv_lines = list(export.to_csv(submissions, version_uid='v2')) + + # Ensure the submission with backfilled data + # (i.e. added after initial submission under a new form version) + # makes it in to the export + expected_values = [['Potato Heaven', None, None, None, None, None], + ['Potato Purgatory', '0 0 0 0', '0', '0', '0', '0'], + ['Potato Limbo', '0 1 0 0', '0', '1', '0', '0']] + expected_lines = [';'.join(f'"{val or ""}"' for val in row) for row in expected_values] + assert csv_lines[1:] == expected_lines + # disabled for now # @raises(RuntimeError) # def test_csv_on_repeatable_groups(self): @@ -2068,6 +2086,28 @@ def test_xlsx_with_tag_headers(self): row_values = [cell.value for cell in sheet[2]] assert row_values == ['#beneficiary', None, None] + def test_xslx_with_backfilled_data(self): + title, schemas, submissions = build_fixture('backfilled_answers') + fp = FormPack(schemas, title) + assert len(fp.versions) == 2 + export = fp.export(versions=fp.versions.keys()) + temporary_xlsx = io.BytesIO() + export.to_xlsx(temporary_xlsx, submissions, version_uid='v2') + + # Ensure the submission with backfilled data + # (i.e. added after initial submission under a new form version) + # makes it in to the export + expected_rows = [['Potato Heaven',None, None,None,None,None], + ['Potato Purgatory', '0 0 0 0', '0', '0', '0', '0'], + ['Potato Limbo', '0 1 0 0', '0', '1', '0', '0']] + book = openpyxl.load_workbook(temporary_xlsx, read_only=True) + sheet = book[title] + rows = [row for row in sheet][1:] + + rows_as_lists = [[cell.value for cell in row] for row in rows] + + assert rows_as_lists == expected_rows + def test_force_index(self): title, schemas, submissions = customer_satisfaction @@ -3099,3 +3139,23 @@ def test_geojson_unflattened(self): ], }, ] + + def test_geojson_with_backfilled_data(self): + title, schemas, submissions = build_fixture('backfilled_answers') + fp = FormPack(schemas, title) + assert len(fp.versions) == 2 + + fp = FormPack(schemas, title) + export = fp.export(versions=fp.versions.keys()) + geojson_gen = export.to_geojson(submissions, version_uid='v2') + geojson_str = ''.join(geojson_gen) + + # Ensure the submission with backfilled geodata + # (i.e. added after initial submission under a new form version) + # makes it in to the export + geojson_obj = json.loads(geojson_str) + assert len(geojson_obj['features']) == 2 + answer1 = geojson_obj['features'][0] + assert answer1['properties']['restaurant_name'] == 'Potato Purgatory' + answer2 = geojson_obj['features'][1] + assert answer2['properties']['restaurant_name'] == 'Potato Limbo' From 7c0a6bce075e85ae64fbdc0ab83411c52f135046 Mon Sep 17 00:00:00 2001 From: rgraber Date: Tue, 17 Dec 2024 10:31:09 -0500 Subject: [PATCH 2/2] fixup!: format --- src/formpack/reporting/export.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/formpack/reporting/export.py b/src/formpack/reporting/export.py index bbbfa212..09692f5e 100644 --- a/src/formpack/reporting/export.py +++ b/src/formpack/reporting/export.py @@ -184,7 +184,9 @@ def parse_submissions(self, submissions, version_uid=None): self.reset() form_version = self.versions[version_uid] if version_uid else None for submission in submissions: - formatted_chunks = self.parse_one_submission(submission, version=form_version) + formatted_chunks = self.parse_one_submission( + submission, version=form_version + ) if not formatted_chunks: continue yield formatted_chunks @@ -702,7 +704,8 @@ def to_geojson( # We need direct access to the field objects (available inside the # version) and the unformatted submission data - version = self.versions[version_uid] if version_uid else self.get_version_for_submission(submission) + version = self.versions[version_uid] if version_uid \ + else self.get_version_for_submission(submission) formatted_chunks = self.parse_one_submission(submission, version) if not formatted_chunks: continue