diff --git a/src/formpack/constants.py b/src/formpack/constants.py index bdd8f30..1069d7a 100644 --- a/src/formpack/constants.py +++ b/src/formpack/constants.py @@ -51,6 +51,14 @@ 'hxl': '', } +# Which columns can contain media filenames? Do not confuse this with question +# types that can collect media! From https://xlsform.org/en/#media: +# You can include questions in your form that display images or that play +# video or audio files +# … +# Media is translatable in the same way as labels and hints… +MEDIA_COLUMN_NAMES = ('audio', 'image', 'video') + # Export Settings EXPORT_SETTING_FIELDS = 'fields' EXPORT_SETTING_FIELDS_FROM_ALL_VERSIONS = 'fields_from_all_versions' diff --git a/src/formpack/utils/bugfix.py b/src/formpack/utils/bugfix.py new file mode 100644 index 0000000..face387 --- /dev/null +++ b/src/formpack/utils/bugfix.py @@ -0,0 +1,78 @@ +# coding: utf-8 + +from copy import deepcopy + +from ..constants import UNTRANSLATED + + +def repair_file_column_content_in_place(content) -> bool: + """ + #321 introduced a bug where the `file` column gets renamed to `media::file` + and treated as a translatable column (see #322). This function repairs that + damage. + + This function is intended to be run by KPI, which should it to correct + `Asset` and `AssetVersion` content. + + Returns `True` if any change was made + """ + + # Store updates and apply them to `content` only of all conditions are met + updates = {} + + try: + for key in 'translated', 'translations', 'survey': + updates[key] = deepcopy(content[key]) + except KeyError: + # Do not proceed if content is incomplete + return False + + try: + updates['translated'].remove('media::file') + except ValueError: + # The invalid column `media::file` inside `translated` is a hallmark of + # the problem this method is intended to fix. Do not proceed if it was + # not found + return False + + max_label_list_length = 0 + any_row_fixed = False + for row in updates['survey']: + max_label_list_length = max( + max_label_list_length, len(row.get('label', [])) + ) + bad_file_col = row.get('media::file') + if not bad_file_col: + continue + if not isinstance(bad_file_col, list): + # All problems of our own making (#322) will result in + # `media::file` being a list (or array when JSON) + continue + for val in bad_file_col: + if val is not None: + row['file'] = val + del row['media::file'] + any_row_fixed = True + break + if not any_row_fixed: + return False + + # Multi-language forms need an additional fix to remove a superfluous null + # translation added by the bogus `media::file` column + if len(updates['translations']) > max_label_list_length: + labels_translations_mismatch = True + if len(updates['translations']) == max_label_list_length + 1: + try: + updates['translations'].remove(UNTRANSLATED) + except ValueError: + pass + else: + labels_translations_mismatch = False # Fixed it! + if labels_translations_mismatch: + # This form has uncorrected problems. Bail out instead of modifying + # it at all + return False + + # Success! Apply updates to the original content + content.update(updates) + return True diff --git a/src/formpack/utils/expand_content.py b/src/formpack/utils/expand_content.py index 1e0cbd9..852e50b 100644 --- a/src/formpack/utils/expand_content.py +++ b/src/formpack/utils/expand_content.py @@ -18,8 +18,9 @@ from .array_to_xpath import EXPANDABLE_FIELD_TYPES from .iterator import get_first_occurrence -from .replace_aliases import MEDIA_TYPES, META_TYPES, selects +from .replace_aliases import META_TYPES, selects from ..constants import ( + MEDIA_COLUMN_NAMES, OR_OTHER_COLUMN, TAG_COLUMNS_AND_SEPARATORS, UNTRANSLATED, @@ -186,7 +187,7 @@ def _get_known_translated_cols(translated_cols: List[str]) -> List[str]: _translated_cols = [] for col in translated_cols: - if col in MEDIA_TYPES: + if col in MEDIA_COLUMN_NAMES: col = f'media::{col}' _translated_cols.append(col) @@ -207,7 +208,7 @@ def _get_special_survey_cols( 'hint::English', For more examples, see tests. """ - RE_MEDIA_TYPES = '|'.join(MEDIA_TYPES) + RE_MEDIA_COLUMN_NAMES = '|'.join(MEDIA_COLUMN_NAMES) uniq_cols = OrderedDict() special = OrderedDict() @@ -238,14 +239,14 @@ def _mark_special(**kwargs: str) -> None: column=column_name, translation=UNTRANSLATED, ) - if ':' not in column_name and column_name not in MEDIA_TYPES: + if ':' not in column_name and column_name not in MEDIA_COLUMN_NAMES: continue if column_name.startswith('bind:'): continue if column_name.startswith('body:'): continue mtch = re.match( - rf'^(media\s*::?\s*)?({RE_MEDIA_TYPES})\s*::?\s*([^:]+)$', + rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})\s*::?\s*([^:]+)$', column_name, ) if mtch: @@ -260,7 +261,9 @@ def _mark_special(**kwargs: str) -> None: translation=translation, ) continue - mtch = re.match(rf'^(media\s*::?\s*)?({RE_MEDIA_TYPES})$', column_name) + mtch = re.match( + rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})$', column_name + ) if mtch: matched = mtch.groups() media_type = matched[1] diff --git a/tests/test_bugfix.py b/tests/test_bugfix.py new file mode 100644 index 0000000..8b98e6c --- /dev/null +++ b/tests/test_bugfix.py @@ -0,0 +1,32 @@ +# coding: utf-8 + +from formpack.utils.bugfix import repair_file_column_content_in_place + + +def test_repair_file_column(): + content = { + 'schema': '1', + 'settings': {}, + 'survey': [ + { + 'label': [ + "Введіть ім'я співробітника:", + "Enter interviewer's name", + ], + 'name': 'interviewer_name_text', + 'type': 'text', + }, + { + 'label': ['Область', 'Oblast'], + 'media::file': [None, None, 'oblast.csv'], + 'name': 'oblast', + 'type': 'select_one_from_file', + }, + ], + 'translated': ['label', 'media::file'], + 'translations': ['Ukrainian', 'English', None], + } + assert repair_file_column_content_in_place(content) + assert content['survey'][1]['file'] == 'oblast.csv' + assert content['translated'] == ['label'] + assert content['translations'] == ['Ukrainian', 'English'] diff --git a/tests/test_exports.py b/tests/test_exports.py index 70f299a..1846810 100644 --- a/tests/test_exports.py +++ b/tests/test_exports.py @@ -376,6 +376,45 @@ def test_translations_labels_mismatch(self): with self.assertRaises(TranslationError): FormPack(schemas, title) + def test_translations_with_select_multiple_from_file(self): + """ + Make sure that exports do not crash if `select_multiple_from_file` is + used in a multi-language form. See #322 + """ + + title, schemas, submissions = restaurant_profile + + # Add a `select_multiple_from_file` question to existing `rpV4` schema + assert schemas[-1]['version'] == 'rpV4' + schemas[-1]['content']['survey'].append( + { + 'type': 'select_multiple_from_file suppliers.csv', + 'name': 'suppliers', + 'label::english': 'suppliers', + 'label::french': 'fournisseurs', + } + ) + fp = FormPack(schemas[-1:], title) + + # Expect that a `file` column has been added; see + # `test_expand_content.test_expand_select_x_from_file()` + assert ( + fp.versions['rpV4'].schema['content']['survey'][-1]['file'] + == 'suppliers.csv' + ) + + # Feed this schema with `file` back into formpack again + fp = FormPack([fp.versions['rpV4'].schema], title) + + # At this point, issue #322 would have already caused + # `TranslationError: Mismatched labels and translations`, but perform a + # simple export anyway, adding a response for `suppliers` + submissions[-1]['suppliers'] = 'SFG Ocsys' + export = fp.export(versions=fp.versions) + actual_dict = export.to_dict(submissions) + assert actual_dict['Restaurant profile']['fields'][-1] == 'suppliers' + assert actual_dict['Restaurant profile']['data'][-1][-1] == 'SFG Ocsys' + def test_simple_nested_grouped_repeatable(self): title, schemas, submissions = build_fixture( 'simple_nested_grouped_repeatable'