CovertLab · tahorst · Jun 9, 2021 · Jun 7, 2021 · Jun 7, 2021 · Jun 7, 2021
diff --git a/reconstruction/ecoli/dataclasses/getter_functions.py b/reconstruction/ecoli/dataclasses/getter_functions.py
@@ -397,17 +397,8 @@ def _build_modified_rna_masses(self, raw_data):
 			modified_rna_id for rna in raw_data.rnas
 			for modified_rna_id in rna['modified_forms']}
 
-		# Get IDs of charging reactions that should be removed
-		removed_charging_reaction_ids = {
-			rxn['id'] for rxn in raw_data.trna_charging_reactions_removed
-			}
-
 		# Loop through each charging reaction
 		for rxn in raw_data.trna_charging_reactions:
-			# Skip removed reactions
-			if rxn['id'] in removed_charging_reaction_ids:
-				continue
-
 			# Find molecule IDs whose masses are still unknown
 			unknown_mol_ids = [
 				mol_id for mol_id in rxn['stoichiometry'].keys()

diff --git a/reconstruction/ecoli/dataclasses/process/transcription.py b/reconstruction/ecoli/dataclasses/process/transcription.py
@@ -540,19 +540,11 @@ def _build_charged_trna(self, raw_data, sim_data):
 		synthetase_names = []
 		synthetase_mapping_aa = []
 		synthetase_mapping_syn = []
-
-		# Get IDs of charging reactions that should be removed
-		removed_reaction_ids = {
-			rxn['id'] for rxn in raw_data.trna_charging_reactions_removed}
-
 		# Get IDs of all metabolites
 		metabolite_ids = {met['id'] for met in raw_data.metabolites}
 
 		# Create stoichiometry matrix for charging reactions
 		for reaction in raw_data.trna_charging_reactions:
-			if reaction['id'] in removed_reaction_ids:
-				continue
-
 			# Get uncharged tRNA name for the given reaction
 			trna = None
 			for mol_id in reaction['stoichiometry'].keys():

diff --git a/reconstruction/ecoli/flat/trna_charging_reactions_added.tsv b/reconstruction/ecoli/flat/trna_charging_reactions_added.tsv
@@ -0,0 +1,2 @@
+"id"	"stoichiometry"	"catalyzed_by"	"common_name"	"_notes"
+"SELENOCYSTEINE--TRNA-LIGASE-RXN_selC-tRNA"	{"L-SELENOCYSTEINE":-1,"ATP":-1,"selC-tRNA":-1,"PPI":1,"AMP":1,"charged-selC-tRNA":1}	["CPLX0-1141"]	"SELENOCYSTEIN SYNTHASE"	"Simplified selenocysteine charging from two steps (RXN0-2161 and 2.9.1.1-RXN) into one to match modeling framework of other charging reactions"
diff --git a/reconstruction/ecoli/flat/trna_charging_reactions_removed.tsv b/reconstruction/ecoli/flat/trna_charging_reactions_removed.tsv
@@ -1,11 +1,2 @@
 "id"	"_comments"
-"GLUTRNAREDUCT-RXN"	"Charging reaction does not use canonical amino acid (uses GLUTAMATE-1-SEMIALDEHYDE)"
-"GLUTRNAREDUCT-R_1"	"Charging reaction does not use canonical amino acid (uses GLUTAMATE-1-SEMIALDEHYDE)"
-"GLUTRNAREDUCT-R_0"	"Charging reaction does not use canonical amino acid (uses GLUTAMATE-1-SEMIALDEHYDE)"
-"GLUTRNAREDUCT-R_2"	"Charging reaction does not use canonical amino acid (uses GLUTAMATE-1-SEMIALDEHYDE)"
-"RX_WC_METHIONYL-TRNA-FORMYLTRANSFERASE-RXN1"	"Removed to allow only one charged tRNA per uncharged tRNA"
-"RX_WC_METHIONYL-TRNA-FORMYLTRANSFERASE-RXN2"	"Removed to allow only one charged tRNA per uncharged tRNA"
-"RX_WC_METHIONYL-TRNA-FORMYLTRANSFERASE-RXN3"	"Removed to allow only one charged tRNA per uncharged tRNA"
-"RX_WC_METHIONYL-TRNA-FORMYLTRANSFERASE-RXN4"	"Removed to allow only one charged tRNA per uncharged tRNA"
-"RXN0-6434"	"Reaction does not have both an uncharged and charged tRNA"
-"2.9.1.1-R_0"	"Reaction does not have both an uncharged and charged tRNA"
+"RXN0-2161_selC-tRNA"	"Ignored because two step Sel charging (Ser charging, Ser -> Sel) does not fit with modeling framework of other charging reactions. Replaced by SELENOCYSTEINE--TRNA-LIGASE-RXN_selC-tRNA."
diff --git a/reconstruction/ecoli/knowledge_base_raw.py b/reconstruction/ecoli/knowledge_base_raw.py
@@ -58,6 +58,7 @@
 	"tf_one_component_bound.tsv",
 	"translation_efficiency.tsv",
 	"trna_charging_reactions.tsv",
+	"trna_charging_reactions_added.tsv",
 	"trna_charging_reactions_removed.tsv",
 	"two_component_systems.tsv",
 	"two_component_system_templates.tsv",
@@ -108,6 +109,11 @@
 # TODO: add other removed files here and not handle removing in scripts
 REMOVED_DATA = {
 	'metabolite_concentrations': 'metabolite_concentrations_removed',
+	'trna_charging_reactions': 'trna_charging_reactions_removed',
+	}
+# TODO: move added rows from some flat files to new files and add here
+ADDED_DATA = {
+	'trna_charging_reactions': 'trna_charging_reactions_added',
 	}
 
 class DataStore(object):
@@ -128,6 +134,7 @@ def __init__(self):
 			self._load_parameters(os.path.join(FLAT_DIR, filename))
 
 		self._prune_data()
+		self._join_data()
 
 		self.genome_sequence = self._load_sequence(os.path.join(FLAT_DIR, SEQUENCE_FILE))
 
@@ -178,18 +185,40 @@ def _prune_data(self):
 		"""
 
 		# Check each pair of files to be removed
-		for data, to_remove in REMOVED_DATA.items():
+		for data_attr, attr_to_remove in REMOVED_DATA.items():
 			# Build the set of data to identify rows to be removed
-			attr_removed = getattr(self, to_remove)
-			removed_cols = list(attr_removed[0].keys())
+			data_to_remove = getattr(self, attr_to_remove)
+			removed_cols = list(data_to_remove[0].keys())
 			removed_ids = set()
-			for row in attr_removed:
+			for row in data_to_remove:
 				removed_ids.add(tuple([row[col] for col in removed_cols]))
 
 			# Remove any matching rows
-			attr_data = getattr(self, data)
-			n_entries = len(attr_data)
-			for i, row in enumerate(attr_data[::-1]):
+			data = getattr(self, data_attr)
+			n_entries = len(data)
+			for i, row in enumerate(data[::-1]):
 				checked_id = tuple([row[col] for col in removed_cols])
 				if checked_id in removed_ids:
-					attr_data.pop(n_entries - i - 1)
+					data.pop(n_entries - i - 1)
+
+	def _join_data(self):
+		"""
+		Add rows that are specified in additional files. Data will only be added
+		if all the loaded columns from both datasets match.
+		"""
+
+		# Join data for each file with data to be added
+		for data_attr, attr_to_add in ADDED_DATA.items():
+			# Get datasets to join
+			data = getattr(self, data_attr)
+			added_data = getattr(self, attr_to_add)
+
+			# Check columns are the same for each dataset
+			col_diff = set(data[0].keys()).symmetric_difference(added_data[0].keys())
+			if col_diff:
+				raise ValueError(f'Could not join datasets {data_attr} and {attr_to_add} '
+					f'because columns do not match (different columns: {col_diff}).')
+
+			# Join datasets
+			for row in added_data:
+				data.append(row)
diff --git a/reconstruction/spreadsheets.py b/reconstruction/spreadsheets.py
@@ -140,10 +140,10 @@ def __init__(self, *args, **kwargs):
 		fieldnames = [fieldname.strip('"') for fieldname in fieldnames]
 		self.tsv_dict_reader.fieldnames = fieldnames
 
-		# Discard private field names that begin with underscore
+		# Discard private field names that begin with underscore and empty field names
 		self._fieldnames = [
 			fieldname for fieldname in fieldnames
-			if not fieldname.startswith('_')]
+			if not fieldname.startswith('_') and fieldname != '']
 
 	def __iter__(self):
 		return self
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		"id" "stoichiometry" "catalyzed_by" "common_name" "_notes"
		"SELENOCYSTEINE--TRNA-LIGASE-RXN_selC-tRNA" {"L-SELENOCYSTEINE":-1,"ATP":-1,"selC-tRNA":-1,"PPI":1,"AMP":1,"charged-selC-tRNA":1} ["CPLX0-1141"] "SELENOCYSTEIN SYNTHASE" "Simplified selenocysteine charging from two steps (RXN0-2161 and 2.9.1.1-RXN) into one to match modeling framework of other charging reactions"