Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: optionally specify version in exports TASK-1244 #325

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

setup(
name='formpack',
version='3.0.0',
version='3.1.0',
description='Manipulation tools for KoBo forms',
author='the formpack contributors (https://github.com/kobotoolbox/formpack/graphs/contributors)',
url='https://github.com/kobotoolbox/formpack/',
Expand Down
36 changes: 26 additions & 10 deletions src/formpack/reporting/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,22 @@ def parse_one_submission(self, submission, version=None):
submission = FormSubmission(submission)
return self.format_one_submission([submission.data], section)

def parse_submissions(self, submissions):
def parse_submissions(self, submissions, version_uid=None):
"""
Return a generator yielding formatted 'chunks' for each submission from
the data set

Args:
version_uid (str): optional, explicit version uid to use for all
submission instead of inferring the version from the submissions
themselves
"""
self.reset()
form_version = self.versions[version_uid] if version_uid else None
for submission in submissions:
formatted_chunks = self.parse_one_submission(submission)
formatted_chunks = self.parse_one_submission(
submission, version=form_version
)
if not formatted_chunks:
continue
yield formatted_chunks
Expand Down Expand Up @@ -542,7 +550,6 @@ def to_dict(self, submissions):
"""

d = OrderedDict()

for section, labels in self.labels.items():
d[section] = {'fields': list(labels), 'data': []}

Expand All @@ -552,12 +559,19 @@ def to_dict(self, submissions):

return d

def to_csv(self, submissions, sep=';', quote='"'):
def to_csv(self, submissions, version_uid=None, sep=';', quote='"'):
"""
Return a generator yielding csv lines.

We don't use the csv module to avoid buffering the lines
in memory.

Args:
version_uid (str): optional, explicit version uid to use for all
submissions instead of inferring the version from the submissions
themselves
sep (str): optional, separator char, default ';'
quote (str): optional, quote char, default '"'
"""

sections = list(self.labels.items())
Expand Down Expand Up @@ -591,7 +605,7 @@ def format_line(line, sep, quote):
for tag_row in tag_rows:
yield format_line(tag_row, sep, quote)

for chunk in self.parse_submissions(submissions):
for chunk in self.parse_submissions(submissions, version_uid=version_uid):
for section_name, rows in chunk.items():
if section == section_name:
for row in rows:
Expand All @@ -600,6 +614,7 @@ def format_line(line, sep, quote):
def to_geojson(
self,
submissions: Iterator,
version_uid: str = None,
flatten: bool = True,
geo_question_name: Optional[str] = None,
) -> Generator:
Expand Down Expand Up @@ -689,7 +704,8 @@ def to_geojson(

# We need direct access to the field objects (available inside the
# version) and the unformatted submission data
version = self.get_version_for_submission(submission)
version = self.versions[version_uid] if version_uid \
else self.get_version_for_submission(submission)
formatted_chunks = self.parse_one_submission(submission, version)
if not formatted_chunks:
continue
Expand Down Expand Up @@ -792,7 +808,7 @@ def to_table(self, submissions):

return table

def to_xlsx(self, filename, submissions):
def to_xlsx(self, filename, submissions, version_uid=None):
workbook = xlsxwriter.Workbook(
filename,
{
Expand Down Expand Up @@ -845,7 +861,7 @@ def _append_row_to_sheet(sheet_, data):
row_index += 1
sheet_row_positions[sheet_] = row_index

for chunk in self.parse_submissions(submissions):
for chunk in self.parse_submissions(submissions, version_uid=version_uid):
for section_name, rows in chunk.items():
try:
sheet_name = sheet_name_mapping[section_name]
Expand Down Expand Up @@ -874,7 +890,7 @@ def _append_row_to_sheet(sheet_, data):

workbook.close()

def to_html(self, submissions):
def to_html(self, submissions, version_uid=None):
"""
Yield lines of and HTML table strings.
"""
Expand All @@ -892,7 +908,7 @@ def to_html(self, submissions):

yield '<tbody>'

for chunk in self.parse_submissions(submissions):
for chunk in self.parse_submissions(submissions, version_uid=version_uid):
for section_name, rows in chunk.items():
if section == section_name:
for row in rows:
Expand Down
16 changes: 16 additions & 0 deletions tests/fixtures/backfilled_answers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""
backfilled_answers:

* has a question added in v2 with answers backfilled in some submissions
"""

from ..load_fixture_json import load_fixture_json

DATA = {
'title': 'Backfilled answers',
'id_string': 'backfilled_answers',
'versions': [
load_fixture_json('backfilled_answers/v1'),
load_fixture_json('backfilled_answers/v2'),
],
}
21 changes: 21 additions & 0 deletions tests/fixtures/backfilled_answers/v1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"version": "v1",
"content": {
"survey": [
{
"type": "text",
"name": "restaurant_name",
"label": "restaurant name"
}
]
},
"submissions": [
{
"restaurant_name": "Potato Heaven"
},
{
"restaurant_name": "Potato Purgatory",
"restaurant_location": "0 0 0 0"
}
]
}
23 changes: 23 additions & 0 deletions tests/fixtures/backfilled_answers/v2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"version": "v2",
"content": {
"survey": [
{
"type": "text",
"name": "restaurant_name",
"label": "restaurant name"
},
{
"type": "geopoint",
"name": "restaurant_location",
"label": "restaurant location"
}
]
},
"submissions": [
{
"restaurant_name": "Potato Limbo",
"restaurant_location": "0 1 0 0"
}
]
}
60 changes: 60 additions & 0 deletions tests/test_exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -1716,6 +1716,24 @@ def test_csv_with_tag_headers_select_multiple_summary_or_details(self):
)
assert rows[1] == ('"#loc+name";"#indicator+diet";"";"";""')

def test_csv_with_backfilled_data(self):
title, schemas, submissions = build_fixture('backfilled_answers')
fp = FormPack(schemas, title)
assert len(fp.versions) == 2

fp = FormPack(schemas, title)
export = fp.export(versions=fp.versions.keys())
csv_lines = list(export.to_csv(submissions, version_uid='v2'))

# Ensure the submission with backfilled data
# (i.e. added after initial submission under a new form version)
# makes it in to the export
expected_values = [['Potato Heaven', None, None, None, None, None],
['Potato Purgatory', '0 0 0 0', '0', '0', '0', '0'],
['Potato Limbo', '0 1 0 0', '0', '1', '0', '0']]
expected_lines = [';'.join(f'"{val or ""}"' for val in row) for row in expected_values]
assert csv_lines[1:] == expected_lines

# disabled for now
# @raises(RuntimeError)
# def test_csv_on_repeatable_groups(self):
Expand Down Expand Up @@ -2068,6 +2086,28 @@ def test_xlsx_with_tag_headers(self):
row_values = [cell.value for cell in sheet[2]]
assert row_values == ['#beneficiary', None, None]

def test_xslx_with_backfilled_data(self):
title, schemas, submissions = build_fixture('backfilled_answers')
fp = FormPack(schemas, title)
assert len(fp.versions) == 2
export = fp.export(versions=fp.versions.keys())
temporary_xlsx = io.BytesIO()
export.to_xlsx(temporary_xlsx, submissions, version_uid='v2')

# Ensure the submission with backfilled data
# (i.e. added after initial submission under a new form version)
# makes it in to the export
expected_rows = [['Potato Heaven',None, None,None,None,None],
['Potato Purgatory', '0 0 0 0', '0', '0', '0', '0'],
['Potato Limbo', '0 1 0 0', '0', '1', '0', '0']]
book = openpyxl.load_workbook(temporary_xlsx, read_only=True)
sheet = book[title]
rows = [row for row in sheet][1:]

rows_as_lists = [[cell.value for cell in row] for row in rows]

assert rows_as_lists == expected_rows

def test_force_index(self):
title, schemas, submissions = customer_satisfaction

Expand Down Expand Up @@ -3099,3 +3139,23 @@ def test_geojson_unflattened(self):
],
},
]

def test_geojson_with_backfilled_data(self):
title, schemas, submissions = build_fixture('backfilled_answers')
fp = FormPack(schemas, title)
assert len(fp.versions) == 2

fp = FormPack(schemas, title)
export = fp.export(versions=fp.versions.keys())
geojson_gen = export.to_geojson(submissions, version_uid='v2')
geojson_str = ''.join(geojson_gen)

# Ensure the submission with backfilled geodata
# (i.e. added after initial submission under a new form version)
# makes it in to the export
geojson_obj = json.loads(geojson_str)
assert len(geojson_obj['features']) == 2
answer1 = geojson_obj['features'][0]
assert answer1['properties']['restaurant_name'] == 'Potato Purgatory'
answer2 = geojson_obj['features'][1]
assert answer2['properties']['restaurant_name'] == 'Potato Limbo'
Loading