kobotoolbox · rgraber · Dec 17, 2024 · Dec 17, 2024
diff --git a/setup.py b/setup.py
@@ -20,7 +20,7 @@
 
 setup(
     name='formpack',
-    version='3.0.0',
+    version='3.1.0',
     description='Manipulation tools for KoBo forms',
     author='the formpack contributors (https://github.com/kobotoolbox/formpack/graphs/contributors)',
     url='https://github.com/kobotoolbox/formpack/',

diff --git a/src/formpack/reporting/export.py b/src/formpack/reporting/export.py
@@ -171,14 +171,22 @@ def parse_one_submission(self, submission, version=None):
         submission = FormSubmission(submission)
         return self.format_one_submission([submission.data], section)
 
-    def parse_submissions(self, submissions):
+    def parse_submissions(self, submissions, version_uid=None):
         """
         Return a generator yielding formatted 'chunks' for each submission from
         the data set
+
+        Args:
+            version_uid (str): optional, explicit version uid to use for all
+                submission instead of inferring the version from the submissions
+                themselves
         """
         self.reset()
+        form_version = self.versions[version_uid] if version_uid else None
         for submission in submissions:
-            formatted_chunks = self.parse_one_submission(submission)
+            formatted_chunks = self.parse_one_submission(
+                submission, version=form_version
+            )
             if not formatted_chunks:
                 continue
             yield formatted_chunks
@@ -542,7 +550,6 @@ def to_dict(self, submissions):
         """
 
         d = OrderedDict()
-
         for section, labels in self.labels.items():
             d[section] = {'fields': list(labels), 'data': []}
 
@@ -552,12 +559,19 @@ def to_dict(self, submissions):
 
         return d
 
-    def to_csv(self, submissions, sep=';', quote='"'):
+    def to_csv(self, submissions, version_uid=None, sep=';', quote='"'):
         """
         Return a generator yielding csv lines.
 
         We don't use the csv module to avoid buffering the lines
         in memory.
+
+        Args:
+            version_uid (str): optional, explicit version uid to use for all
+                submissions instead of inferring the version from the submissions
+                themselves
+            sep (str): optional, separator char, default ';'
+            quote (str): optional, quote char, default '"'
         """
 
         sections = list(self.labels.items())
@@ -591,7 +605,7 @@ def format_line(line, sep, quote):
         for tag_row in tag_rows:
             yield format_line(tag_row, sep, quote)
 
-        for chunk in self.parse_submissions(submissions):
+        for chunk in self.parse_submissions(submissions, version_uid=version_uid):
             for section_name, rows in chunk.items():
                 if section == section_name:
                     for row in rows:
@@ -600,6 +614,7 @@ def format_line(line, sep, quote):
     def to_geojson(
         self,
         submissions: Iterator,
+        version_uid: str = None,
         flatten: bool = True,
         geo_question_name: Optional[str] = None,
     ) -> Generator:
@@ -689,7 +704,8 @@ def to_geojson(
 
             # We need direct access to the field objects (available inside the
             # version) and the unformatted submission data
-            version = self.get_version_for_submission(submission)
+            version = self.versions[version_uid] if version_uid \
+                else self.get_version_for_submission(submission)
             formatted_chunks = self.parse_one_submission(submission, version)
             if not formatted_chunks:
                 continue
@@ -792,7 +808,7 @@ def to_table(self, submissions):
 
         return table
 
-    def to_xlsx(self, filename, submissions):
+    def to_xlsx(self, filename, submissions, version_uid=None):
         workbook = xlsxwriter.Workbook(
             filename,
             {
@@ -845,7 +861,7 @@ def _append_row_to_sheet(sheet_, data):
             row_index += 1
             sheet_row_positions[sheet_] = row_index
 
-        for chunk in self.parse_submissions(submissions):
+        for chunk in self.parse_submissions(submissions, version_uid=version_uid):
             for section_name, rows in chunk.items():
                 try:
                     sheet_name = sheet_name_mapping[section_name]
@@ -874,7 +890,7 @@ def _append_row_to_sheet(sheet_, data):
 
         workbook.close()
 
-    def to_html(self, submissions):
+    def to_html(self, submissions, version_uid=None):
         """
         Yield lines of and HTML table strings.
         """
@@ -892,7 +908,7 @@ def to_html(self, submissions):
 
         yield '<tbody>'
 
-        for chunk in self.parse_submissions(submissions):
+        for chunk in self.parse_submissions(submissions, version_uid=version_uid):
             for section_name, rows in chunk.items():
                 if section == section_name:
                     for row in rows:

diff --git a/tests/fixtures/backfilled_answers/__init__.py b/tests/fixtures/backfilled_answers/__init__.py
@@ -0,0 +1,16 @@
+"""
+backfilled_answers:
+
+* has a question added in v2 with answers backfilled in some submissions
+"""
+
+from ..load_fixture_json import load_fixture_json
+
+DATA = {
+    'title': 'Backfilled answers',
+    'id_string': 'backfilled_answers',
+    'versions': [
+        load_fixture_json('backfilled_answers/v1'),
+        load_fixture_json('backfilled_answers/v2'),
+    ],
+}
diff --git a/tests/fixtures/backfilled_answers/v1.json b/tests/fixtures/backfilled_answers/v1.json
@@ -0,0 +1,21 @@
+{
+  "version": "v1",
+    "content": {
+        "survey": [
+            {
+                "type": "text",
+                "name": "restaurant_name",
+                "label": "restaurant name"
+            }
+        ]
+    },
+    "submissions": [
+      {
+      "restaurant_name": "Potato Heaven"
+    },
+    {
+      "restaurant_name": "Potato Purgatory",
+      "restaurant_location": "0 0 0 0"
+    }
+    ]
+}
diff --git a/tests/fixtures/backfilled_answers/v2.json b/tests/fixtures/backfilled_answers/v2.json
@@ -0,0 +1,23 @@
+{
+    "version": "v2",
+    "content": {
+        "survey": [
+            {
+                "type": "text",
+                "name": "restaurant_name",
+                "label": "restaurant name"
+            },
+           {
+                "type": "geopoint",
+                "name": "restaurant_location",
+                "label": "restaurant location"
+           }
+        ]
+    },
+  "submissions": [
+        {
+      "restaurant_name": "Potato Limbo",
+      "restaurant_location": "0 1 0 0"
+    }
+  ]
+}
diff --git a/tests/test_exports.py b/tests/test_exports.py
@@ -1716,6 +1716,24 @@ def test_csv_with_tag_headers_select_multiple_summary_or_details(self):
         )
         assert rows[1] == ('"#loc+name";"#indicator+diet";"";"";""')
 
+    def test_csv_with_backfilled_data(self):
+        title, schemas, submissions = build_fixture('backfilled_answers')
+        fp = FormPack(schemas, title)
+        assert len(fp.versions) == 2
+
+        fp = FormPack(schemas, title)
+        export = fp.export(versions=fp.versions.keys())
+        csv_lines = list(export.to_csv(submissions, version_uid='v2'))
+
+        # Ensure the submission with backfilled data
+        # (i.e. added after initial submission under a new form version)
+        # makes it in to the export
+        expected_values = [['Potato Heaven', None, None, None, None, None],
+                           ['Potato Purgatory', '0 0 0 0', '0', '0', '0', '0'],
+                           ['Potato Limbo', '0 1 0 0', '0', '1', '0', '0']]
+        expected_lines = [';'.join(f'"{val or ""}"' for val in row) for row in expected_values]
+        assert csv_lines[1:] == expected_lines
+
     # disabled for now
     # @raises(RuntimeError)
     # def test_csv_on_repeatable_groups(self):
@@ -2068,6 +2086,28 @@ def test_xlsx_with_tag_headers(self):
             row_values = [cell.value for cell in sheet[2]]
             assert row_values == ['#beneficiary', None, None]
 
+    def test_xslx_with_backfilled_data(self):
+        title, schemas, submissions = build_fixture('backfilled_answers')
+        fp = FormPack(schemas, title)
+        assert len(fp.versions) == 2
+        export = fp.export(versions=fp.versions.keys())
+        temporary_xlsx = io.BytesIO()
+        export.to_xlsx(temporary_xlsx, submissions, version_uid='v2')
+
+        # Ensure the submission with backfilled data
+        # (i.e. added after initial submission under a new form version)
+        # makes it in to the export
+        expected_rows = [['Potato Heaven',None, None,None,None,None],
+                           ['Potato Purgatory', '0 0 0 0', '0', '0', '0', '0'],
+                           ['Potato Limbo', '0 1 0 0', '0', '1', '0', '0']]
+        book = openpyxl.load_workbook(temporary_xlsx, read_only=True)
+        sheet = book[title]
+        rows = [row for row in sheet][1:]
+
+        rows_as_lists = [[cell.value for cell in row] for row in rows]
+
+        assert rows_as_lists == expected_rows
+
     def test_force_index(self):
         title, schemas, submissions = customer_satisfaction
 
@@ -3099,3 +3139,23 @@ def test_geojson_unflattened(self):
                 ],
             },
         ]
+
+    def test_geojson_with_backfilled_data(self):
+        title, schemas, submissions = build_fixture('backfilled_answers')
+        fp = FormPack(schemas, title)
+        assert len(fp.versions) == 2
+
+        fp = FormPack(schemas, title)
+        export = fp.export(versions=fp.versions.keys())
+        geojson_gen = export.to_geojson(submissions, version_uid='v2')
+        geojson_str = ''.join(geojson_gen)
+
+        # Ensure the submission with backfilled geodata
+        # (i.e. added after initial submission under a new form version)
+        # makes it in to the export
+        geojson_obj = json.loads(geojson_str)
+        assert len(geojson_obj['features']) == 2
+        answer1 = geojson_obj['features'][0]
+        assert answer1['properties']['restaurant_name'] == 'Potato Purgatory'
+        answer2 = geojson_obj['features'][1]
+        assert answer2['properties']['restaurant_name'] == 'Potato Limbo'