Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix charging for selenocysteine #1082

Merged
merged 5 commits into from
Jun 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions reconstruction/ecoli/dataclasses/getter_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,17 +397,8 @@ def _build_modified_rna_masses(self, raw_data):
modified_rna_id for rna in raw_data.rnas
for modified_rna_id in rna['modified_forms']}

# Get IDs of charging reactions that should be removed
removed_charging_reaction_ids = {
rxn['id'] for rxn in raw_data.trna_charging_reactions_removed
}

# Loop through each charging reaction
for rxn in raw_data.trna_charging_reactions:
# Skip removed reactions
if rxn['id'] in removed_charging_reaction_ids:
continue

# Find molecule IDs whose masses are still unknown
unknown_mol_ids = [
mol_id for mol_id in rxn['stoichiometry'].keys()
Expand Down
8 changes: 0 additions & 8 deletions reconstruction/ecoli/dataclasses/process/transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,19 +540,11 @@ def _build_charged_trna(self, raw_data, sim_data):
synthetase_names = []
synthetase_mapping_aa = []
synthetase_mapping_syn = []

# Get IDs of charging reactions that should be removed
removed_reaction_ids = {
rxn['id'] for rxn in raw_data.trna_charging_reactions_removed}

# Get IDs of all metabolites
metabolite_ids = {met['id'] for met in raw_data.metabolites}

# Create stoichiometry matrix for charging reactions
for reaction in raw_data.trna_charging_reactions:
if reaction['id'] in removed_reaction_ids:
continue

# Get uncharged tRNA name for the given reaction
trna = None
for mol_id in reaction['stoichiometry'].keys():
Expand Down
2 changes: 2 additions & 0 deletions reconstruction/ecoli/flat/trna_charging_reactions_added.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
"id" "stoichiometry" "catalyzed_by" "common_name" "_notes"
"SELENOCYSTEINE--TRNA-LIGASE-RXN_selC-tRNA" {"L-SELENOCYSTEINE":-1,"ATP":-1,"selC-tRNA":-1,"PPI":1,"AMP":1,"charged-selC-tRNA":1} ["CPLX0-1141"] "SELENOCYSTEIN SYNTHASE" "Simplified selenocysteine charging from two steps (RXN0-2161 and 2.9.1.1-RXN) into one to match modeling framework of other charging reactions"
11 changes: 1 addition & 10 deletions reconstruction/ecoli/flat/trna_charging_reactions_removed.tsv
Original file line number Diff line number Diff line change
@@ -1,11 +1,2 @@
"id" "_comments"
"GLUTRNAREDUCT-RXN" "Charging reaction does not use canonical amino acid (uses GLUTAMATE-1-SEMIALDEHYDE)"
"GLUTRNAREDUCT-R_1" "Charging reaction does not use canonical amino acid (uses GLUTAMATE-1-SEMIALDEHYDE)"
"GLUTRNAREDUCT-R_0" "Charging reaction does not use canonical amino acid (uses GLUTAMATE-1-SEMIALDEHYDE)"
"GLUTRNAREDUCT-R_2" "Charging reaction does not use canonical amino acid (uses GLUTAMATE-1-SEMIALDEHYDE)"
"RX_WC_METHIONYL-TRNA-FORMYLTRANSFERASE-RXN1" "Removed to allow only one charged tRNA per uncharged tRNA"
"RX_WC_METHIONYL-TRNA-FORMYLTRANSFERASE-RXN2" "Removed to allow only one charged tRNA per uncharged tRNA"
"RX_WC_METHIONYL-TRNA-FORMYLTRANSFERASE-RXN3" "Removed to allow only one charged tRNA per uncharged tRNA"
"RX_WC_METHIONYL-TRNA-FORMYLTRANSFERASE-RXN4" "Removed to allow only one charged tRNA per uncharged tRNA"
"RXN0-6434" "Reaction does not have both an uncharged and charged tRNA"
"2.9.1.1-R_0" "Reaction does not have both an uncharged and charged tRNA"
"RXN0-2161_selC-tRNA" "Ignored because two step Sel charging (Ser charging, Ser -> Sel) does not fit with modeling framework of other charging reactions. Replaced by SELENOCYSTEINE--TRNA-LIGASE-RXN_selC-tRNA."
45 changes: 37 additions & 8 deletions reconstruction/ecoli/knowledge_base_raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
"tf_one_component_bound.tsv",
"translation_efficiency.tsv",
"trna_charging_reactions.tsv",
"trna_charging_reactions_added.tsv",
"trna_charging_reactions_removed.tsv",
"two_component_systems.tsv",
"two_component_system_templates.tsv",
Expand Down Expand Up @@ -108,6 +109,11 @@
# TODO: add other removed files here and not handle removing in scripts
REMOVED_DATA = {
'metabolite_concentrations': 'metabolite_concentrations_removed',
'trna_charging_reactions': 'trna_charging_reactions_removed',
}
# TODO: move added rows from some flat files to new files and add here
ADDED_DATA = {
'trna_charging_reactions': 'trna_charging_reactions_added',
}

class DataStore(object):
Expand All @@ -128,6 +134,7 @@ def __init__(self):
self._load_parameters(os.path.join(FLAT_DIR, filename))

self._prune_data()
self._join_data()

self.genome_sequence = self._load_sequence(os.path.join(FLAT_DIR, SEQUENCE_FILE))

Expand Down Expand Up @@ -178,18 +185,40 @@ def _prune_data(self):
"""

# Check each pair of files to be removed
for data, to_remove in REMOVED_DATA.items():
for data_attr, attr_to_remove in REMOVED_DATA.items():
# Build the set of data to identify rows to be removed
attr_removed = getattr(self, to_remove)
removed_cols = list(attr_removed[0].keys())
data_to_remove = getattr(self, attr_to_remove)
removed_cols = list(data_to_remove[0].keys())
removed_ids = set()
for row in attr_removed:
for row in data_to_remove:
removed_ids.add(tuple([row[col] for col in removed_cols]))

# Remove any matching rows
attr_data = getattr(self, data)
n_entries = len(attr_data)
for i, row in enumerate(attr_data[::-1]):
data = getattr(self, data_attr)
n_entries = len(data)
for i, row in enumerate(data[::-1]):
checked_id = tuple([row[col] for col in removed_cols])
if checked_id in removed_ids:
attr_data.pop(n_entries - i - 1)
data.pop(n_entries - i - 1)

def _join_data(self):
"""
Add rows that are specified in additional files. Data will only be added
if all the loaded columns from both datasets match.
"""

# Join data for each file with data to be added
for data_attr, attr_to_add in ADDED_DATA.items():
# Get datasets to join
data = getattr(self, data_attr)
added_data = getattr(self, attr_to_add)

# Check columns are the same for each dataset
col_diff = set(data[0].keys()).symmetric_difference(added_data[0].keys())
if col_diff:
raise ValueError(f'Could not join datasets {data_attr} and {attr_to_add} '
f'because columns do not match (different columns: {col_diff}).')

# Join datasets
for row in added_data:
data.append(row)
4 changes: 2 additions & 2 deletions reconstruction/spreadsheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,10 @@ def __init__(self, *args, **kwargs):
fieldnames = [fieldname.strip('"') for fieldname in fieldnames]
self.tsv_dict_reader.fieldnames = fieldnames

# Discard private field names that begin with underscore
# Discard private field names that begin with underscore and empty field names
self._fieldnames = [
fieldname for fieldname in fieldnames
if not fieldname.startswith('_')]
if not fieldname.startswith('_') and fieldname != '']

def __iter__(self):
return self
Expand Down