Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make deepcopying faster #217

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
'cyordereddict',
'jsonschema',
'lxml',
'msgpack<1', # 1.0 drops to a python backup on Python 2
'path.py<12', # Pinned for Python 2 compatibility
'pyquery',
'pyxform',
Expand All @@ -38,6 +39,7 @@
'begins',
'jsonschema',
'lxml',
'msgpack',
'path.py',
'pyquery',
'pyxform',
Expand Down
6 changes: 4 additions & 2 deletions src/formpack/pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import difflib
import json
from copy import deepcopy

from .utils.fast_deepcopy import fast_deepcopy
from .version import FormVersion
from .utils import str_types
from .reporting import Export, AutoReport
Expand Down Expand Up @@ -112,7 +112,9 @@ def _stats(self):

def load_all_versions(self, versions):
for schema in versions:
self.load_version(deepcopy(schema))
# This is safe, because `schema` is JSON-compatible
copied_schema = fast_deepcopy(schema)
self.load_version(copied_schema)

def load_version(self, schema):
""" Load one version and attach it to this Formpack
Expand Down
5 changes: 3 additions & 2 deletions src/formpack/utils/expand_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
# the standardization step: expand_content_in_place(...)
from __future__ import (unicode_literals, print_function,
absolute_import, division)
from copy import deepcopy
import re

from .array_to_xpath import EXPANDABLE_FIELD_TYPES
from .fast_deepcopy import fast_deepcopy
from .future import iteritems, OrderedDict
from .iterator import get_first_occurrence
from .replace_aliases import META_TYPES
Expand Down Expand Up @@ -141,7 +141,8 @@ def expand_content(content, in_place=False):
expand_content_in_place(content)
return None
else:
content_copy = deepcopy(content)
# This is safe, because `content` is JSON-compatible
content_copy = fast_deepcopy(content)
expand_content_in_place(content_copy)
return content_copy

Expand Down
16 changes: 16 additions & 0 deletions src/formpack/utils/fast_deepcopy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import msgpack
import sys


def fast_deepcopy(obj):
"""
This only works with simple JSON-like structures.
"""

if sys.version_info[0] == 2:
return msgpack.unpackb(
msgpack.packb(obj, use_bin_type=True),
encoding='utf-8'
)

return msgpack.unpackb(msgpack.packb(obj))
7 changes: 4 additions & 3 deletions src/formpack/utils/flatten_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

import re
from collections import defaultdict, OrderedDict
from copy import deepcopy
from functools import reduce

from .array_to_xpath import array_to_xpath
from .fast_deepcopy import fast_deepcopy
from .future import range
from .string import str_types
from ..constants import (UNTRANSLATED, OR_OTHER_COLUMN,
Expand Down Expand Up @@ -59,7 +59,8 @@ def flatten_content(survey_content, in_place=False, **opts):
flatten_content_in_place(survey_content, **opts)
return None
else:
survey_content_copy = deepcopy(survey_content)
# This is safe, because `survey_content` is JSON-compatible
survey_content_copy = fast_deepcopy(survey_content)
flatten_content_in_place(survey_content_copy, **opts)
return survey_content_copy

Expand Down Expand Up @@ -172,7 +173,7 @@ def _place_col_in_order(col_, base_col=None):
col_order.append(col_)
_placed_cols.update([col_])

o_row = deepcopy(row)
o_row = fast_deepcopy(row)
translations_range = list(range(0, len(translations)))
for key in (k for k in translated_cols if k in row):
items = row[key]
Expand Down
5 changes: 3 additions & 2 deletions src/formpack/utils/replace_aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
absolute_import, division)

from collections import defaultdict
from copy import deepcopy
import json

from pyxform import aliases as pyxform_aliases
from pyxform.question_type_dictionary import QUESTION_TYPE_DICT

from .fast_deepcopy import fast_deepcopy
from .future import iteritems, OrderedDict
from .string import str_types

Expand Down Expand Up @@ -224,7 +224,8 @@ def replace_aliases(content, in_place=False, allowed_types=None):
replace_aliases_in_place(content, allowed_types=allowed_types)
return None
else:
_content = deepcopy(content)
# This is safe, because `content` is JSON-compatible
_content = fast_deepcopy(content)
replace_aliases_in_place(_content, allowed_types=allowed_types)
return _content

Expand Down
5 changes: 3 additions & 2 deletions src/formpack/utils/spreadsheet_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
absolute_import, division)

import re
from copy import deepcopy

from .fast_deepcopy import fast_deepcopy
from ..constants import TAG_COLUMNS_AND_SEPARATORS
from .flatten_content import (_flatten_translated_fields, _flatten_survey_row,
_flatten_tags,
Expand Down Expand Up @@ -50,7 +50,8 @@ def flatten_to_spreadsheet_content(content,
if remove_sheets is None:
remove_sheets = []
if not in_place:
content = deepcopy(content)
# This is safe, because `content` is JSON-compatible
content = fast_deepcopy(content)

translations = content.pop('translations', [])
translated_cols = content.pop('translated', [])
Expand Down