diff --git a/.gitattributes b/.gitattributes index 012b630f..3e0ca4ab 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5,6 +5,7 @@ # Mark as binary to prevent EOL changes. Also no need to track mafft.bat binary mafft/ binary +mafft binary # Files/folders that should not be included when downloading repo as ZIP .gitattributes export-ignore .gitignore export-ignore diff --git a/INIT/runINIT.m b/INIT/runINIT.m index 5221b858..9da20c4a 100644 --- a/INIT/runINIT.m +++ b/INIT/runINIT.m @@ -283,7 +283,7 @@ prob.b=zeros(size(prob.a,1), 1); prob.ub=[prob.bux; prob.buc]; prob.osense=1; -prob.csense=char(1,zeros(size(prob.a,1))); +prob.csense=char(zeros(1,size(prob.a,1))); prob.csense(:)='E'; %We still don't know which of the presentMets that can be produced. Go diff --git a/core/addRxns.m b/core/addRxns.m index 05172dfa..4dd9a94c 100755 --- a/core/addRxns.m +++ b/core/addRxns.m @@ -60,7 +60,7 @@ % interpreted % 1 - The metabolites are matched to model.mets. New % metabolites (if allowed) are added to -% "compartment" +% "compartment" (default) % 2 - The metabolites are matched to model.metNames and % all metabolites are assigned to "compartment". Any % new metabolites that are added will be assigned @@ -101,6 +101,16 @@ % Usage: newModel=addRxns(model,rxnsToAdd,eqnType,compartment,... % allowNewMets,allowNewGenes) +if nargin<3 + eqnType=1; +elseif ~isnumeric(eqnType) + EM='eqnType must be numeric'; + dispEM(EM); +elseif ~ismember(eqnType,[1 2 3]) + EM='eqnType must be 1, 2, or 3'; + dispEM(EM); +end + if nargin<4 compartment=[]; else @@ -139,17 +149,6 @@ return; end -%Check the input -if isfield(rxnsToAdd,'stoichCoeffs') - eqnType=1; -elseif ~isnumeric(eqnType) - EM='eqnType must be numeric'; - dispEM(EM); -elseif ~ismember(eqnType,[1 2 3]) - EM='eqnType must be 1, 2, or 3'; - dispEM(EM); -end - if eqnType==2 || (eqnType==1 && allowNewMets==true) compartment=char(compartment); if ~ismember(compartment,model.comps) @@ -518,18 +517,18 @@ if eqnType==1 [I, J]=ismember(mets,model.mets); if ~all(I) - if allowNewMets==true | isstr(allowNewMets) + if allowNewMets==true || ischar(allowNewMets) %Add the new mets metsToAdd.mets=mets(~I); metsToAdd.metNames=metsToAdd.mets; metsToAdd.compartments=compartment; - if isstr(allowNewMets) + if ischar(allowNewMets) newModel=addMets(newModel,metsToAdd,true,allowNewMets); else newModel=addMets(newModel,metsToAdd,true); end else - EM='One or more equations contain metabolites that are not in model.mets. Set allowNewMets to true to allow this function to add metabolites or use addMets to add them before calling this function'; + EM='One or more equations contain metabolites that are not in model.mets. Set allowNewMets to true to allow this function to add metabolites or use addMets to add them before calling this function. Are you sure that eqnType=1?'; dispEM(EM); end end @@ -553,11 +552,11 @@ [I, J]=ismember(t1,t2); if ~all(I) - if allowNewMets==true | isstr(allowNewMets) + if allowNewMets==true || ischar(allowNewMets) %Add the new mets metsToAdd.metNames=mets(~I); metsToAdd.compartments=compartment; - if isstr(allowNewMets) + if ischar(allowNewMets) newModel=addMets(newModel,metsToAdd,true,allowNewMets); else newModel=addMets(newModel,metsToAdd,true); @@ -603,11 +602,11 @@ [I, J]=ismember(t1,t2); if ~all(I) - if allowNewMets==true | isstr(allowNewMets) + if allowNewMets==true || ischar(allowNewMets) %Add the new mets metsToAdd.metNames=metNames(~I); metsToAdd.compartments=compartments(~I); - if isstr(allowNewMets) + if ischar(allowNewMets) newModel=addMets(newModel,metsToAdd,true,allowNewMets); else newModel=addMets(newModel,metsToAdd,true); diff --git a/core/changeRxns.m b/core/changeRxns.m index e4b068cc..aee91e85 100755 --- a/core/changeRxns.m +++ b/core/changeRxns.m @@ -13,7 +13,7 @@ % interpreted % 1 - The metabolites are matched to model.mets. New % metabolites (if allowed) are added to -% "compartment" +% "compartment" (default) % 2 - The metabolites are matched to model.metNames and % all metabolites are assigned to "compartment". Any % new metabolites that are added will be assigned @@ -53,7 +53,7 @@ % % Usage: model=changeRxns(model,rxns,equations,eqnType,compartment,allowNewMets) -if nargin<4 & isfield(equations,'stoichCoeffs') +if nargin<4 eqnType=1; end @@ -65,7 +65,6 @@ end rxns=convertCharArray(rxns); -equations=convertCharArray(equations); compartment=char(compartment); %Find the indexes of the reactions and throw an error if they aren't all @@ -86,7 +85,7 @@ rxnsToChange.mets=equations.mets; rxnsToChange.stoichCoeffs=equations.stoichCoeffs; else - rxnsToChange.equations=equations; + rxnsToChange.equations=convertCharArray(equations); end if isfield(model,'rxnNames') rxnsToChange.rxnNames=model.rxnNames(J); diff --git a/core/getGenesFromGrRules.m b/core/getGenesFromGrRules.m index 176ec081..7f84fa5f 100644 --- a/core/getGenesFromGrRules.m +++ b/core/getGenesFromGrRules.m @@ -1,9 +1,9 @@ -function [genes,rxnGeneMat] = getGenesFromGrRules(grRules) +function [genes,rxnGeneMat] = getGenesFromGrRules(grRules, originalGenes) %getGenesFromGrRules Extract gene list and rxnGeneMat from grRules array. % % USAGE: % -% [genes,rxnGeneMat] = getGenesFromGrRules(grRules); +% [genes,rxnGeneMat] = getGenesFromGrRules(grRules, originalGenes); % % INPUTS: % @@ -12,6 +12,7 @@ % NOTE: Boolean operators can be text ("and", "or") or % symbolic ("&", "|"), but there must be a space % between operators and gene names/IDs. +% originalGenes The original gene list from the model as reference % % OUTPUTS: % @@ -24,6 +25,11 @@ % +% handle input arguments +if nargin < 2 + originalGenes = []; +end + % check if the grRules use written or symbolic boolean operators if any(contains(grRules,{'&','|'})) % fix some potential missing spaces between parentheses and &/| @@ -50,10 +56,16 @@ nonEmpty = ~cellfun(@isempty,rxnGenes); genes = unique([rxnGenes{nonEmpty}]'); +if ~isempty(originalGenes) + if ~isequal(sort(originalGenes), sort(genes)) + error('The grRules and original gene list are inconsistent!'); + else + genes = originalGenes; + end +end + % construct new rxnGeneMat (if requested) if nargout > 1 rxnGeneCell = cellfun(@(rg) ismember(genes,rg),rxnGenes,'UniformOutput',false); rxnGeneMat = sparse(double(horzcat(rxnGeneCell{:})')); -end - - +end \ No newline at end of file diff --git a/doc/INIT/runINIT.html b/doc/INIT/runINIT.html index 63312a58..2b85ff7c 100644 --- a/doc/INIT/runINIT.html +++ b/doc/INIT/runINIT.html @@ -388,7 +388,7 @@

SOURCE CODE ^'E'; 0288 0289 %We still don't know which of the presentMets that can be produced. Go diff --git a/doc/core/addRxns.html b/doc/core/addRxns.html index 22f9d1d2..bdb55b20 100644 --- a/doc/core/addRxns.html +++ b/doc/core/addRxns.html @@ -88,7 +88,7 @@

DESCRIPTION ^SOURCE CODE ^% interpreted 0061 % 1 - The metabolites are matched to model.mets. New 0062 % metabolites (if allowed) are added to -0063 % "compartment" +0063 % "compartment" (default) 0064 % 2 - The metabolites are matched to model.metNames and 0065 % all metabolites are assigned to "compartment". Any 0066 % new metabolites that are added will be assigned @@ -245,561 +245,560 @@

SOURCE CODE ^% Usage: newModel=addRxns(model,rxnsToAdd,eqnType,compartment,... 0102 % allowNewMets,allowNewGenes) 0103 -0104 if nargin<4 -0105 compartment=[]; -0106 else -0107 compartment=char(compartment); -0108 end -0109 if nargin<5 -0110 allowNewMets=false; -0111 elseif ~islogical(allowNewMets) -0112 allowNewMets=char(allowNewMets); -0113 end -0114 if nargin<6 -0115 allowNewGenes=false; -0116 end -0117 -0118 if allowNewGenes & isfield(rxnsToAdd,'grRules') -0119 genesToAdd.genes = strjoin(convertCharArray(rxnsToAdd.grRules)); -0120 genesToAdd.genes = regexp(genesToAdd.genes,' |)|(|and|or','split'); % Remove all grRule punctuation -0121 genesToAdd.genes = genesToAdd.genes(~cellfun(@isempty,genesToAdd.genes)); % Remove spaces and empty genes -0122 genesToAdd.genes = setdiff(unique(genesToAdd.genes),model.genes); % Only keep new genes -0123 if isfield(model,'geneComps') -0124 genesToAdd.geneComps(1:numel(genesToAdd.genes)) = repmat(11,numel(genesToAdd.genes),1); -0125 end -0126 if ~isempty(genesToAdd.genes) -0127 fprintf('\nNew genes added to the model:\n') -0128 fprintf([strjoin(genesToAdd.genes,'\n') '\n']) -0129 newModel=addGenesRaven(model,genesToAdd); -0130 else -0131 newModel=model; -0132 end -0133 else -0134 newModel=model; -0135 end -0136 -0137 %If no reactions should be added -0138 if isempty(rxnsToAdd) -0139 return; -0140 end -0141 -0142 %Check the input -0143 if isfield(rxnsToAdd,'stoichCoeffs') -0144 eqnType=1; -0145 elseif ~isnumeric(eqnType) -0146 EM='eqnType must be numeric'; -0147 dispEM(EM); -0148 elseif ~ismember(eqnType,[1 2 3]) -0149 EM='eqnType must be 1, 2, or 3'; -0150 dispEM(EM); -0151 end -0152 -0153 if eqnType==2 || (eqnType==1 && allowNewMets==true) -0154 compartment=char(compartment); -0155 if ~ismember(compartment,model.comps) -0156 EM='compartment must match one of the compartments in model.comps'; -0157 dispEM(EM); -0158 end -0159 end -0160 -0161 if ~isfield(rxnsToAdd,'rxns') -0162 EM='rxns is a required field in rxnsToAdd'; -0163 dispEM(EM); -0164 else -0165 rxnsToAdd.rxns=convertCharArray(rxnsToAdd.rxns); -0166 %To fit with some later printing -0167 rxnsToAdd.rxns=rxnsToAdd.rxns(:); -0168 end -0169 if ~(isfield(rxnsToAdd,'equations') || (isfield(rxnsToAdd,'mets') && isfield(rxnsToAdd,'stoichCoeffs'))) -0170 EM='Either "equations" or "mets"+"stoichCoeffs" are required fields in rxnsToAdd'; -0171 dispEM(EM); -0172 end -0173 -0174 if any(ismember(rxnsToAdd.rxns,model.rxns)) -0175 EM='One or more reaction id was already present in the model. Use changeRxns or remove the existing reactions before adding new ones'; -0176 dispEM(EM); -0177 end -0178 -0179 %Normal case: equations provided -0180 if isfield(rxnsToAdd,'equations') -0181 rxnsToAdd.equations=convertCharArray(rxnsToAdd.equations); -0182 -0183 %Alternative case: mets+stoichiometry provided -0184 else -0185 %In the case of 1 rxn added (array of strings + vector), transform to -0186 %cells of length=1: -0187 if iscellstr(rxnsToAdd.mets) -0188 rxnsToAdd.mets={rxnsToAdd.mets}; -0189 end -0190 if isnumeric(rxnsToAdd.stoichCoeffs) -0191 rxnsToAdd.stoichCoeffs = {rxnsToAdd.stoichCoeffs}; -0192 end -0193 %Now both rxnsToAdd.mets and rxnsToAdd.stoichCoeffs should be cell -0194 %arrays & of the same size: -0195 if ~iscell(rxnsToAdd.mets) || ~iscell(rxnsToAdd.stoichCoeffs) -0196 EM='rxnsToAdd.mets & rxnsToAdd.stoichCoeffs must be cell arrays'; -0197 dispEM(EM); -0198 elseif length(rxnsToAdd.stoichCoeffs) ~= length(rxnsToAdd.mets) -0199 EM = 'rxnsToAdd.stoichCoeffs must have the same number of elements as rxnsToAdd.mets'; -0200 dispEM(EM); -0201 end -0202 %In this case we need lb to decide if the reaction is reversible or not: -0203 if ~isfield(rxnsToAdd,'lb') -0204 %Fill with standard if it doesn't exist -0205 rxnsToAdd.lb=-inf(size(rxnsToAdd.mets)); -0206 elseif ~isnumeric(rxnsToAdd.lb) -0207 EM = 'rxnsToAdd.lb must be a vector'; -0208 dispEM(EM); -0209 elseif length(rxnsToAdd.lb) ~= length(rxnsToAdd.mets) -0210 EM = 'rxnsToAdd.lb must have the same number of elements as rxnsToAdd.mets'; -0211 dispEM(EM); -0212 end -0213 %Now we construct equations, to comply with the rest of the script: -0214 rxnsToAdd.equations = cell(size(rxnsToAdd.mets)); -0215 for i = 1:length(rxnsToAdd.mets) -0216 mets = rxnsToAdd.mets{i}; -0217 stoichCoeffs = rxnsToAdd.stoichCoeffs{i}; -0218 isrev = rxnsToAdd.lb(i) < 0; -0219 rxnsToAdd.equations{i} = buildEquation(mets,stoichCoeffs,isrev); -0220 end -0221 end -0222 -0223 nRxns=numel(rxnsToAdd.rxns); -0224 nOldRxns=numel(model.rxns); -0225 filler=cell(nRxns,1); -0226 filler(:)={''}; -0227 cellfiller=cellfun(@(x) cell(0,0),filler,'UniformOutput',false); -0228 largeFiller=cell(nOldRxns,1); -0229 largeFiller(:)={''}; -0230 celllargefiller=cellfun(@(x) cell(0,0),largeFiller,'UniformOutput',false); -0231 -0232 %***Add everything to the model except for the equations. -0233 if numel(rxnsToAdd.equations)~=nRxns -0234 EM='rxnsToAdd.equations must have the same number of elements as rxnsToAdd.rxns'; -0235 dispEM(EM); -0236 end -0237 -0238 %Parse the equations. This is done at this early stage since I need the -0239 %reversibility info -0240 [S, mets, badRxns, reversible]=constructS(rxnsToAdd.equations); -0241 EM='The following equations have one or more metabolites both as substrate and product. Only the net equations will be added:'; -0242 dispEM(EM,false,rxnsToAdd.rxns(badRxns)); -0243 -0244 newModel.rev=[newModel.rev;reversible]; -0245 newModel.rxns=[newModel.rxns;rxnsToAdd.rxns(:)]; -0246 -0247 if isfield(rxnsToAdd,'rxnNames') -0248 rxnsToAdd.rxnNames=convertCharArray(rxnsToAdd.rxnNames); -0249 if numel(rxnsToAdd.rxnNames)~=nRxns -0250 EM='rxnsToAdd.rxnNames must have the same number of elements as rxnsToAdd.rxns'; -0251 dispEM(EM); -0252 end -0253 %Fill with standard if it doesn't exist -0254 if ~isfield(newModel,'rxnNames') -0255 newModel.rxnNames=largeFiller; -0256 end -0257 newModel.rxnNames=[newModel.rxnNames;rxnsToAdd.rxnNames(:)]; -0258 else -0259 %Fill with standard if it doesn't exist -0260 if isfield(newModel,'rxnNames') -0261 newModel.rxnNames=[newModel.rxnNames;filler]; -0262 end -0263 end -0264 -0265 if isfield(newModel,'annotation') & isfield(newModel.annotation,'defaultLB') -0266 newLb=newModel.annotation.defaultLB; -0267 else -0268 newLb=-inf; -0269 end -0270 -0271 if isfield(rxnsToAdd,'lb') -0272 if numel(rxnsToAdd.lb)~=nRxns -0273 EM='rxnsToAdd.lb must have the same number of elements as rxnsToAdd.rxns'; -0274 dispEM(EM); -0275 end -0276 %Fill with standard if it doesn't exist -0277 if ~isfield(newModel,'lb') -0278 newModel.lb=zeros(nOldRxns,1); -0279 newModel.lb(newModel.rev~=0)=newLb; -0280 end -0281 newModel.lb=[newModel.lb;rxnsToAdd.lb(:)]; -0282 else -0283 %Fill with standard if it doesn't exist -0284 if isfield(newModel,'lb') -0285 I=zeros(nRxns,1); -0286 I(reversible~=0)=newLb; -0287 newModel.lb=[newModel.lb;I]; -0288 end -0289 end -0290 -0291 if isfield(newModel,'annotation') & isfield(newModel.annotation,'defaultUB') -0292 newUb=newModel.annotation.defaultUB; -0293 else -0294 newUb=inf; -0295 end -0296 -0297 if isfield(rxnsToAdd,'ub') -0298 if numel(rxnsToAdd.ub)~=nRxns -0299 EM='rxnsToAdd.ub must have the same number of elements as rxnsToAdd.rxns'; -0300 dispEM(EM); -0301 end -0302 %Fill with standard if it doesn't exist -0303 if ~isfield(newModel,'ub') -0304 newModel.ub=repmat(newUb,nOldRxns,1); -0305 end -0306 newModel.ub=[newModel.ub;rxnsToAdd.ub(:)]; -0307 else -0308 %Fill with standard if it doesn't exist -0309 if isfield(newModel,'ub') -0310 newModel.ub=[newModel.ub;repmat(newUb,nRxns,1)]; -0311 end -0312 end -0313 -0314 if isfield(rxnsToAdd,'c') -0315 if numel(rxnsToAdd.c)~=nRxns -0316 EM='rxnsToAdd.c must have the same number of elements as rxnsToAdd.rxns'; -0317 dispEM(EM); -0318 end -0319 %Fill with standard if it doesn't exist -0320 if ~isfield(newModel,'c') -0321 newModel.c=zeros(nOldRxns,1); -0322 end -0323 newModel.c=[newModel.c;rxnsToAdd.c(:)]; -0324 else -0325 %Fill with standard if it doesn't exist -0326 if isfield(newModel,'c') -0327 newModel.c=[newModel.c;zeros(nRxns,1)]; -0328 end -0329 end -0330 -0331 if isfield(rxnsToAdd,'eccodes') -0332 rxnsToAdd.eccodes=convertCharArray(rxnsToAdd.eccodes); -0333 if numel(rxnsToAdd.eccodes)~=nRxns -0334 EM='rxnsToAdd.eccodes must have the same number of elements as rxnsToAdd.rxns'; -0335 dispEM(EM); -0336 end -0337 %Fill with standard if it doesn't exist -0338 if ~isfield(newModel,'eccodes') -0339 newModel.eccodes=largeFiller; -0340 end -0341 newModel.eccodes=[newModel.eccodes;rxnsToAdd.eccodes(:)]; -0342 else -0343 %Fill with standard if it doesn't exist -0344 if isfield(newModel,'eccodes') -0345 newModel.eccodes=[newModel.eccodes;filler]; -0346 end -0347 end -0348 -0349 if isfield(rxnsToAdd,'subSystems') -0350 if numel(rxnsToAdd.subSystems)~=nRxns -0351 EM='rxnsToAdd.subSystems must have the same number of elements as rxnsToAdd.rxns'; -0352 dispEM(EM); -0353 end -0354 for i=1:numel(rxnsToAdd.subSystems) -0355 if ischar(rxnsToAdd.subSystems{i}) -0356 rxnsToAdd.subSystems{i}=rxnsToAdd.subSystems(i); -0357 end -0358 end -0359 %Fill with standard if it doesn't exist -0360 if ~isfield(newModel,'subSystems') -0361 newModel.subSystems=celllargefiller; -0362 end -0363 newModel.subSystems=[newModel.subSystems;rxnsToAdd.subSystems]; -0364 else -0365 %Fill with standard if it doesn't exist -0366 if isfield(newModel,'subSystems') -0367 newModel.subSystems=[newModel.subSystems;cellfiller]; -0368 end -0369 end -0370 if isfield(rxnsToAdd,'rxnMiriams') -0371 if numel(rxnsToAdd.rxnMiriams)~=nRxns -0372 EM='rxnsToAdd.rxnMiriams must have the same number of elements as rxnsToAdd.rxns'; -0373 dispEM(EM); -0374 end -0375 %Fill with standard if it doesn't exist -0376 if ~isfield(newModel,'rxnMiriams') -0377 newModel.rxnMiriams=cell(nOldRxns,1); -0378 end -0379 newModel.rxnMiriams=[newModel.rxnMiriams;rxnsToAdd.rxnMiriams(:)]; -0380 else -0381 %Fill with standard if it doesn't exist -0382 if isfield(newModel,'rxnMiriams') -0383 newModel.rxnMiriams=[newModel.rxnMiriams;cell(nRxns,1)]; -0384 end -0385 end -0386 if isfield(rxnsToAdd,'rxnComps') -0387 if numel(rxnsToAdd.rxnComps)~=nRxns -0388 EM='rxnsToAdd.rxnComps must have the same number of elements as rxnsToAdd.rxns'; -0389 dispEM(EM); -0390 end -0391 %Fill with standard if it doesn't exist -0392 if ~isfield(newModel,'rxnComps') -0393 newModel.rxnComps=ones(nOldRxns,1); -0394 EM='Adding reactions with compartment information to a model without such information. All existing reactions will be assigned to the first compartment'; -0395 dispEM(EM,false); -0396 end -0397 newModel.rxnComps=[newModel.rxnComps;rxnsToAdd.rxnComps(:)]; -0398 else -0399 %Fill with standard if it doesn't exist -0400 if isfield(newModel,'rxnComps') -0401 newModel.rxnComps=[newModel.rxnComps;ones(nRxns,1)]; -0402 %fprintf('NOTE: The added reactions will be assigned to the first -0403 %compartment\n'); -0404 end -0405 end -0406 if isfield(rxnsToAdd,'grRules') -0407 rxnsToAdd.grRules=convertCharArray(rxnsToAdd.grRules); -0408 if numel(rxnsToAdd.grRules)~=nRxns -0409 EM='rxnsToAdd.grRules must have the same number of elements as rxnsToAdd.rxns'; -0410 dispEM(EM); -0411 end -0412 %Fill with standard if it doesn't exist -0413 if ~isfield(newModel,'grRules') -0414 newModel.grRules=largeFiller; -0415 end -0416 newModel.grRules=[newModel.grRules;rxnsToAdd.grRules(:)]; -0417 else -0418 %Fill with standard if it doesn't exist -0419 if isfield(newModel,'grRules') -0420 newModel.grRules=[newModel.grRules;filler]; -0421 end -0422 end -0423 -0424 if isfield(rxnsToAdd,'rxnFrom') -0425 rxnsToAdd.rxnFrom=convertCharArray(rxnsToAdd.rxnFrom); -0426 if numel(rxnsToAdd.rxnFrom)~=nRxns -0427 EM='rxnsToAdd.rxnFrom must have the same number of elements as rxnsToAdd.rxns'; -0428 dispEM(EM); -0429 end -0430 %Fill with standard if it doesn't exist -0431 if ~isfield(newModel,'rxnFrom') -0432 newModel.rxnFrom=largeFiller; -0433 end -0434 newModel.rxnFrom=[newModel.rxnFrom;rxnsToAdd.rxnFrom(:)]; -0435 else -0436 %Fill with standard if it doesn't exist -0437 if isfield(newModel,'rxnFrom') -0438 newModel.rxnFrom=[newModel.rxnFrom;filler]; -0439 end -0440 end -0441 -0442 if isfield(rxnsToAdd,'rxnNotes') -0443 rxnsToAdd.rxnNotes=convertCharArray(rxnsToAdd.rxnNotes); -0444 if numel(rxnsToAdd.rxnNotes)~=nRxns -0445 EM='rxnsToAdd.rxnNotes must have the same number of elements as rxnsToAdd.rxns'; -0446 dispEM(EM); -0447 end -0448 %Fill with standard if it doesn't exist -0449 if ~isfield(newModel,'rxnNotes') -0450 newModel.rxnNotes=largeFiller; -0451 end -0452 newModel.rxnNotes=[newModel.rxnNotes;rxnsToAdd.rxnNotes(:)]; -0453 else -0454 %Fill with standard if it doesn't exist -0455 if isfield(newModel,'rxnNotes') -0456 newModel.rxnNotes=[newModel.rxnNotes;filler]; -0457 end -0458 end -0459 -0460 if isfield(rxnsToAdd,'rxnReferences') -0461 rxnsToAdd.rxnReferences=convertCharArray(rxnsToAdd.rxnReferences); -0462 if numel(rxnsToAdd.rxnReferences)~=nRxns -0463 EM='rxnsToAdd.rxnReferences must have the same number of elements as rxnsToAdd.rxns'; -0464 dispEM(EM); -0465 end -0466 %Fill with standard if it doesn't exist -0467 if ~isfield(newModel,'rxnReferences') -0468 newModel.rxnReferences=largeFiller; -0469 end -0470 newModel.rxnReferences=[newModel.rxnReferences;rxnsToAdd.rxnReferences(:)]; -0471 else -0472 %Fill with standard if it doesn't exist -0473 if isfield(newModel,'rxnReferences') -0474 newModel.rxnReferences=[newModel.rxnReferences;filler]; -0475 end -0476 end -0477 -0478 if isfield(rxnsToAdd,'pwys') -0479 rxnsToAdd.pwys=convertCharArray(rxnsToAdd.pwys); -0480 if numel(rxnsToAdd.pwys)~=nRxns -0481 EM='rxnsToAdd.pwys must have the same number of elements as rxnsToAdd.rxns'; -0482 dispEM(EM); -0483 end -0484 %Fill with standard if it doesn't exist -0485 if ~isfield(newModel,'pwys') -0486 newModel.pwys=largeFiller; -0487 end -0488 newModel.pwys=[newModel.pwys;rxnsToAdd.pwys(:)]; -0489 else -0490 %Fill with standard if it doesn't exist -0491 if isfield(newModel,'pwys') -0492 newModel.pwys=[newModel.pwys;filler]; -0493 end -0494 end -0495 -0496 if isfield(rxnsToAdd,'rxnConfidenceScores') -0497 if numel(rxnsToAdd.rxnConfidenceScores)~=nRxns -0498 EM='rxnsToAdd.rxnConfidenceScores must have the same number of elements as rxnsToAdd.rxns'; -0499 dispEM(EM); -0500 end -0501 %Fill with standard if it doesn't exist -0502 if ~isfield(newModel,'rxnConfidenceScores') -0503 newModel.rxnConfidenceScores=NaN(nOldRxns,1); -0504 EM='Adding reactions with confidence scores without such information. All existing reactions will have confidence scores as NaNs'; -0505 dispEM(EM,false); -0506 end -0507 newModel.rxnConfidenceScores=[newModel.rxnConfidenceScores;rxnsToAdd.rxnConfidenceScores(:)]; -0508 else -0509 %Fill with standard if it doesn't exist -0510 if isfield(newModel,'rxnConfidenceScores') -0511 newModel.rxnConfidenceScores=[newModel.rxnConfidenceScores;NaN(nRxns,1)]; -0512 end -0513 end +0104 if nargin<3 +0105 eqnType=1; +0106 elseif ~isnumeric(eqnType) +0107 EM='eqnType must be numeric'; +0108 dispEM(EM); +0109 elseif ~ismember(eqnType,[1 2 3]) +0110 EM='eqnType must be 1, 2, or 3'; +0111 dispEM(EM); +0112 end +0113 +0114 if nargin<4 +0115 compartment=[]; +0116 else +0117 compartment=char(compartment); +0118 end +0119 if nargin<5 +0120 allowNewMets=false; +0121 elseif ~islogical(allowNewMets) +0122 allowNewMets=char(allowNewMets); +0123 end +0124 if nargin<6 +0125 allowNewGenes=false; +0126 end +0127 +0128 if allowNewGenes & isfield(rxnsToAdd,'grRules') +0129 genesToAdd.genes = strjoin(convertCharArray(rxnsToAdd.grRules)); +0130 genesToAdd.genes = regexp(genesToAdd.genes,' |)|(|and|or','split'); % Remove all grRule punctuation +0131 genesToAdd.genes = genesToAdd.genes(~cellfun(@isempty,genesToAdd.genes)); % Remove spaces and empty genes +0132 genesToAdd.genes = setdiff(unique(genesToAdd.genes),model.genes); % Only keep new genes +0133 if isfield(model,'geneComps') +0134 genesToAdd.geneComps(1:numel(genesToAdd.genes)) = repmat(11,numel(genesToAdd.genes),1); +0135 end +0136 if ~isempty(genesToAdd.genes) +0137 fprintf('\nNew genes added to the model:\n') +0138 fprintf([strjoin(genesToAdd.genes,'\n') '\n']) +0139 newModel=addGenesRaven(model,genesToAdd); +0140 else +0141 newModel=model; +0142 end +0143 else +0144 newModel=model; +0145 end +0146 +0147 %If no reactions should be added +0148 if isempty(rxnsToAdd) +0149 return; +0150 end +0151 +0152 if eqnType==2 || (eqnType==1 && allowNewMets==true) +0153 compartment=char(compartment); +0154 if ~ismember(compartment,model.comps) +0155 EM='compartment must match one of the compartments in model.comps'; +0156 dispEM(EM); +0157 end +0158 end +0159 +0160 if ~isfield(rxnsToAdd,'rxns') +0161 EM='rxns is a required field in rxnsToAdd'; +0162 dispEM(EM); +0163 else +0164 rxnsToAdd.rxns=convertCharArray(rxnsToAdd.rxns); +0165 %To fit with some later printing +0166 rxnsToAdd.rxns=rxnsToAdd.rxns(:); +0167 end +0168 if ~(isfield(rxnsToAdd,'equations') || (isfield(rxnsToAdd,'mets') && isfield(rxnsToAdd,'stoichCoeffs'))) +0169 EM='Either "equations" or "mets"+"stoichCoeffs" are required fields in rxnsToAdd'; +0170 dispEM(EM); +0171 end +0172 +0173 if any(ismember(rxnsToAdd.rxns,model.rxns)) +0174 EM='One or more reaction id was already present in the model. Use changeRxns or remove the existing reactions before adding new ones'; +0175 dispEM(EM); +0176 end +0177 +0178 %Normal case: equations provided +0179 if isfield(rxnsToAdd,'equations') +0180 rxnsToAdd.equations=convertCharArray(rxnsToAdd.equations); +0181 +0182 %Alternative case: mets+stoichiometry provided +0183 else +0184 %In the case of 1 rxn added (array of strings + vector), transform to +0185 %cells of length=1: +0186 if iscellstr(rxnsToAdd.mets) +0187 rxnsToAdd.mets={rxnsToAdd.mets}; +0188 end +0189 if isnumeric(rxnsToAdd.stoichCoeffs) +0190 rxnsToAdd.stoichCoeffs = {rxnsToAdd.stoichCoeffs}; +0191 end +0192 %Now both rxnsToAdd.mets and rxnsToAdd.stoichCoeffs should be cell +0193 %arrays & of the same size: +0194 if ~iscell(rxnsToAdd.mets) || ~iscell(rxnsToAdd.stoichCoeffs) +0195 EM='rxnsToAdd.mets & rxnsToAdd.stoichCoeffs must be cell arrays'; +0196 dispEM(EM); +0197 elseif length(rxnsToAdd.stoichCoeffs) ~= length(rxnsToAdd.mets) +0198 EM = 'rxnsToAdd.stoichCoeffs must have the same number of elements as rxnsToAdd.mets'; +0199 dispEM(EM); +0200 end +0201 %In this case we need lb to decide if the reaction is reversible or not: +0202 if ~isfield(rxnsToAdd,'lb') +0203 %Fill with standard if it doesn't exist +0204 rxnsToAdd.lb=-inf(size(rxnsToAdd.mets)); +0205 elseif ~isnumeric(rxnsToAdd.lb) +0206 EM = 'rxnsToAdd.lb must be a vector'; +0207 dispEM(EM); +0208 elseif length(rxnsToAdd.lb) ~= length(rxnsToAdd.mets) +0209 EM = 'rxnsToAdd.lb must have the same number of elements as rxnsToAdd.mets'; +0210 dispEM(EM); +0211 end +0212 %Now we construct equations, to comply with the rest of the script: +0213 rxnsToAdd.equations = cell(size(rxnsToAdd.mets)); +0214 for i = 1:length(rxnsToAdd.mets) +0215 mets = rxnsToAdd.mets{i}; +0216 stoichCoeffs = rxnsToAdd.stoichCoeffs{i}; +0217 isrev = rxnsToAdd.lb(i) < 0; +0218 rxnsToAdd.equations{i} = buildEquation(mets,stoichCoeffs,isrev); +0219 end +0220 end +0221 +0222 nRxns=numel(rxnsToAdd.rxns); +0223 nOldRxns=numel(model.rxns); +0224 filler=cell(nRxns,1); +0225 filler(:)={''}; +0226 cellfiller=cellfun(@(x) cell(0,0),filler,'UniformOutput',false); +0227 largeFiller=cell(nOldRxns,1); +0228 largeFiller(:)={''}; +0229 celllargefiller=cellfun(@(x) cell(0,0),largeFiller,'UniformOutput',false); +0230 +0231 %***Add everything to the model except for the equations. +0232 if numel(rxnsToAdd.equations)~=nRxns +0233 EM='rxnsToAdd.equations must have the same number of elements as rxnsToAdd.rxns'; +0234 dispEM(EM); +0235 end +0236 +0237 %Parse the equations. This is done at this early stage since I need the +0238 %reversibility info +0239 [S, mets, badRxns, reversible]=constructS(rxnsToAdd.equations); +0240 EM='The following equations have one or more metabolites both as substrate and product. Only the net equations will be added:'; +0241 dispEM(EM,false,rxnsToAdd.rxns(badRxns)); +0242 +0243 newModel.rev=[newModel.rev;reversible]; +0244 newModel.rxns=[newModel.rxns;rxnsToAdd.rxns(:)]; +0245 +0246 if isfield(rxnsToAdd,'rxnNames') +0247 rxnsToAdd.rxnNames=convertCharArray(rxnsToAdd.rxnNames); +0248 if numel(rxnsToAdd.rxnNames)~=nRxns +0249 EM='rxnsToAdd.rxnNames must have the same number of elements as rxnsToAdd.rxns'; +0250 dispEM(EM); +0251 end +0252 %Fill with standard if it doesn't exist +0253 if ~isfield(newModel,'rxnNames') +0254 newModel.rxnNames=largeFiller; +0255 end +0256 newModel.rxnNames=[newModel.rxnNames;rxnsToAdd.rxnNames(:)]; +0257 else +0258 %Fill with standard if it doesn't exist +0259 if isfield(newModel,'rxnNames') +0260 newModel.rxnNames=[newModel.rxnNames;filler]; +0261 end +0262 end +0263 +0264 if isfield(newModel,'annotation') & isfield(newModel.annotation,'defaultLB') +0265 newLb=newModel.annotation.defaultLB; +0266 else +0267 newLb=-inf; +0268 end +0269 +0270 if isfield(rxnsToAdd,'lb') +0271 if numel(rxnsToAdd.lb)~=nRxns +0272 EM='rxnsToAdd.lb must have the same number of elements as rxnsToAdd.rxns'; +0273 dispEM(EM); +0274 end +0275 %Fill with standard if it doesn't exist +0276 if ~isfield(newModel,'lb') +0277 newModel.lb=zeros(nOldRxns,1); +0278 newModel.lb(newModel.rev~=0)=newLb; +0279 end +0280 newModel.lb=[newModel.lb;rxnsToAdd.lb(:)]; +0281 else +0282 %Fill with standard if it doesn't exist +0283 if isfield(newModel,'lb') +0284 I=zeros(nRxns,1); +0285 I(reversible~=0)=newLb; +0286 newModel.lb=[newModel.lb;I]; +0287 end +0288 end +0289 +0290 if isfield(newModel,'annotation') & isfield(newModel.annotation,'defaultUB') +0291 newUb=newModel.annotation.defaultUB; +0292 else +0293 newUb=inf; +0294 end +0295 +0296 if isfield(rxnsToAdd,'ub') +0297 if numel(rxnsToAdd.ub)~=nRxns +0298 EM='rxnsToAdd.ub must have the same number of elements as rxnsToAdd.rxns'; +0299 dispEM(EM); +0300 end +0301 %Fill with standard if it doesn't exist +0302 if ~isfield(newModel,'ub') +0303 newModel.ub=repmat(newUb,nOldRxns,1); +0304 end +0305 newModel.ub=[newModel.ub;rxnsToAdd.ub(:)]; +0306 else +0307 %Fill with standard if it doesn't exist +0308 if isfield(newModel,'ub') +0309 newModel.ub=[newModel.ub;repmat(newUb,nRxns,1)]; +0310 end +0311 end +0312 +0313 if isfield(rxnsToAdd,'c') +0314 if numel(rxnsToAdd.c)~=nRxns +0315 EM='rxnsToAdd.c must have the same number of elements as rxnsToAdd.rxns'; +0316 dispEM(EM); +0317 end +0318 %Fill with standard if it doesn't exist +0319 if ~isfield(newModel,'c') +0320 newModel.c=zeros(nOldRxns,1); +0321 end +0322 newModel.c=[newModel.c;rxnsToAdd.c(:)]; +0323 else +0324 %Fill with standard if it doesn't exist +0325 if isfield(newModel,'c') +0326 newModel.c=[newModel.c;zeros(nRxns,1)]; +0327 end +0328 end +0329 +0330 if isfield(rxnsToAdd,'eccodes') +0331 rxnsToAdd.eccodes=convertCharArray(rxnsToAdd.eccodes); +0332 if numel(rxnsToAdd.eccodes)~=nRxns +0333 EM='rxnsToAdd.eccodes must have the same number of elements as rxnsToAdd.rxns'; +0334 dispEM(EM); +0335 end +0336 %Fill with standard if it doesn't exist +0337 if ~isfield(newModel,'eccodes') +0338 newModel.eccodes=largeFiller; +0339 end +0340 newModel.eccodes=[newModel.eccodes;rxnsToAdd.eccodes(:)]; +0341 else +0342 %Fill with standard if it doesn't exist +0343 if isfield(newModel,'eccodes') +0344 newModel.eccodes=[newModel.eccodes;filler]; +0345 end +0346 end +0347 +0348 if isfield(rxnsToAdd,'subSystems') +0349 if numel(rxnsToAdd.subSystems)~=nRxns +0350 EM='rxnsToAdd.subSystems must have the same number of elements as rxnsToAdd.rxns'; +0351 dispEM(EM); +0352 end +0353 for i=1:numel(rxnsToAdd.subSystems) +0354 if ischar(rxnsToAdd.subSystems{i}) +0355 rxnsToAdd.subSystems{i}=rxnsToAdd.subSystems(i); +0356 end +0357 end +0358 %Fill with standard if it doesn't exist +0359 if ~isfield(newModel,'subSystems') +0360 newModel.subSystems=celllargefiller; +0361 end +0362 newModel.subSystems=[newModel.subSystems;rxnsToAdd.subSystems]; +0363 else +0364 %Fill with standard if it doesn't exist +0365 if isfield(newModel,'subSystems') +0366 newModel.subSystems=[newModel.subSystems;cellfiller]; +0367 end +0368 end +0369 if isfield(rxnsToAdd,'rxnMiriams') +0370 if numel(rxnsToAdd.rxnMiriams)~=nRxns +0371 EM='rxnsToAdd.rxnMiriams must have the same number of elements as rxnsToAdd.rxns'; +0372 dispEM(EM); +0373 end +0374 %Fill with standard if it doesn't exist +0375 if ~isfield(newModel,'rxnMiriams') +0376 newModel.rxnMiriams=cell(nOldRxns,1); +0377 end +0378 newModel.rxnMiriams=[newModel.rxnMiriams;rxnsToAdd.rxnMiriams(:)]; +0379 else +0380 %Fill with standard if it doesn't exist +0381 if isfield(newModel,'rxnMiriams') +0382 newModel.rxnMiriams=[newModel.rxnMiriams;cell(nRxns,1)]; +0383 end +0384 end +0385 if isfield(rxnsToAdd,'rxnComps') +0386 if numel(rxnsToAdd.rxnComps)~=nRxns +0387 EM='rxnsToAdd.rxnComps must have the same number of elements as rxnsToAdd.rxns'; +0388 dispEM(EM); +0389 end +0390 %Fill with standard if it doesn't exist +0391 if ~isfield(newModel,'rxnComps') +0392 newModel.rxnComps=ones(nOldRxns,1); +0393 EM='Adding reactions with compartment information to a model without such information. All existing reactions will be assigned to the first compartment'; +0394 dispEM(EM,false); +0395 end +0396 newModel.rxnComps=[newModel.rxnComps;rxnsToAdd.rxnComps(:)]; +0397 else +0398 %Fill with standard if it doesn't exist +0399 if isfield(newModel,'rxnComps') +0400 newModel.rxnComps=[newModel.rxnComps;ones(nRxns,1)]; +0401 %fprintf('NOTE: The added reactions will be assigned to the first +0402 %compartment\n'); +0403 end +0404 end +0405 if isfield(rxnsToAdd,'grRules') +0406 rxnsToAdd.grRules=convertCharArray(rxnsToAdd.grRules); +0407 if numel(rxnsToAdd.grRules)~=nRxns +0408 EM='rxnsToAdd.grRules must have the same number of elements as rxnsToAdd.rxns'; +0409 dispEM(EM); +0410 end +0411 %Fill with standard if it doesn't exist +0412 if ~isfield(newModel,'grRules') +0413 newModel.grRules=largeFiller; +0414 end +0415 newModel.grRules=[newModel.grRules;rxnsToAdd.grRules(:)]; +0416 else +0417 %Fill with standard if it doesn't exist +0418 if isfield(newModel,'grRules') +0419 newModel.grRules=[newModel.grRules;filler]; +0420 end +0421 end +0422 +0423 if isfield(rxnsToAdd,'rxnFrom') +0424 rxnsToAdd.rxnFrom=convertCharArray(rxnsToAdd.rxnFrom); +0425 if numel(rxnsToAdd.rxnFrom)~=nRxns +0426 EM='rxnsToAdd.rxnFrom must have the same number of elements as rxnsToAdd.rxns'; +0427 dispEM(EM); +0428 end +0429 %Fill with standard if it doesn't exist +0430 if ~isfield(newModel,'rxnFrom') +0431 newModel.rxnFrom=largeFiller; +0432 end +0433 newModel.rxnFrom=[newModel.rxnFrom;rxnsToAdd.rxnFrom(:)]; +0434 else +0435 %Fill with standard if it doesn't exist +0436 if isfield(newModel,'rxnFrom') +0437 newModel.rxnFrom=[newModel.rxnFrom;filler]; +0438 end +0439 end +0440 +0441 if isfield(rxnsToAdd,'rxnNotes') +0442 rxnsToAdd.rxnNotes=convertCharArray(rxnsToAdd.rxnNotes); +0443 if numel(rxnsToAdd.rxnNotes)~=nRxns +0444 EM='rxnsToAdd.rxnNotes must have the same number of elements as rxnsToAdd.rxns'; +0445 dispEM(EM); +0446 end +0447 %Fill with standard if it doesn't exist +0448 if ~isfield(newModel,'rxnNotes') +0449 newModel.rxnNotes=largeFiller; +0450 end +0451 newModel.rxnNotes=[newModel.rxnNotes;rxnsToAdd.rxnNotes(:)]; +0452 else +0453 %Fill with standard if it doesn't exist +0454 if isfield(newModel,'rxnNotes') +0455 newModel.rxnNotes=[newModel.rxnNotes;filler]; +0456 end +0457 end +0458 +0459 if isfield(rxnsToAdd,'rxnReferences') +0460 rxnsToAdd.rxnReferences=convertCharArray(rxnsToAdd.rxnReferences); +0461 if numel(rxnsToAdd.rxnReferences)~=nRxns +0462 EM='rxnsToAdd.rxnReferences must have the same number of elements as rxnsToAdd.rxns'; +0463 dispEM(EM); +0464 end +0465 %Fill with standard if it doesn't exist +0466 if ~isfield(newModel,'rxnReferences') +0467 newModel.rxnReferences=largeFiller; +0468 end +0469 newModel.rxnReferences=[newModel.rxnReferences;rxnsToAdd.rxnReferences(:)]; +0470 else +0471 %Fill with standard if it doesn't exist +0472 if isfield(newModel,'rxnReferences') +0473 newModel.rxnReferences=[newModel.rxnReferences;filler]; +0474 end +0475 end +0476 +0477 if isfield(rxnsToAdd,'pwys') +0478 rxnsToAdd.pwys=convertCharArray(rxnsToAdd.pwys); +0479 if numel(rxnsToAdd.pwys)~=nRxns +0480 EM='rxnsToAdd.pwys must have the same number of elements as rxnsToAdd.rxns'; +0481 dispEM(EM); +0482 end +0483 %Fill with standard if it doesn't exist +0484 if ~isfield(newModel,'pwys') +0485 newModel.pwys=largeFiller; +0486 end +0487 newModel.pwys=[newModel.pwys;rxnsToAdd.pwys(:)]; +0488 else +0489 %Fill with standard if it doesn't exist +0490 if isfield(newModel,'pwys') +0491 newModel.pwys=[newModel.pwys;filler]; +0492 end +0493 end +0494 +0495 if isfield(rxnsToAdd,'rxnConfidenceScores') +0496 if numel(rxnsToAdd.rxnConfidenceScores)~=nRxns +0497 EM='rxnsToAdd.rxnConfidenceScores must have the same number of elements as rxnsToAdd.rxns'; +0498 dispEM(EM); +0499 end +0500 %Fill with standard if it doesn't exist +0501 if ~isfield(newModel,'rxnConfidenceScores') +0502 newModel.rxnConfidenceScores=NaN(nOldRxns,1); +0503 EM='Adding reactions with confidence scores without such information. All existing reactions will have confidence scores as NaNs'; +0504 dispEM(EM,false); +0505 end +0506 newModel.rxnConfidenceScores=[newModel.rxnConfidenceScores;rxnsToAdd.rxnConfidenceScores(:)]; +0507 else +0508 %Fill with standard if it doesn't exist +0509 if isfield(newModel,'rxnConfidenceScores') +0510 newModel.rxnConfidenceScores=[newModel.rxnConfidenceScores;NaN(nRxns,1)]; +0511 end +0512 end +0513 0514 -0515 -0516 %***Start parsing the equations and adding the info to the S matrix The -0517 %mets are matched to model.mets -0518 if eqnType==1 -0519 [I, J]=ismember(mets,model.mets); -0520 if ~all(I) -0521 if allowNewMets==true | isstr(allowNewMets) -0522 %Add the new mets -0523 metsToAdd.mets=mets(~I); -0524 metsToAdd.metNames=metsToAdd.mets; -0525 metsToAdd.compartments=compartment; -0526 if isstr(allowNewMets) -0527 newModel=addMets(newModel,metsToAdd,true,allowNewMets); -0528 else -0529 newModel=addMets(newModel,metsToAdd,true); -0530 end -0531 else -0532 EM='One or more equations contain metabolites that are not in model.mets. Set allowNewMets to true to allow this function to add metabolites or use addMets to add them before calling this function'; -0533 dispEM(EM); -0534 end -0535 end -0536 %Calculate the indexes of the metabolites and add the info -0537 metIndexes=J; -0538 metIndexes(~I)=numel(newModel.mets)-sum(~I)+1:numel(newModel.mets); -0539 end -0540 -0541 %Do some stuff that is the same for eqnType=2 and eqnType=3 -0542 if eqnType==2 || eqnType==3 -0543 %For later.. -0544 t2=strcat(model.metNames,'***',model.comps(model.metComps)); -0545 end -0546 -0547 %The mets are matched to model.metNames and assigned to "compartment" -0548 if eqnType==2 -0549 %%Check that the metabolite names aren't present in the same -0550 %%compartment. -0551 %Not the neatest way maybe.. -0552 t1=strcat(mets,'***',compartment); -0553 [I, J]=ismember(t1,t2); -0554 -0555 if ~all(I) -0556 if allowNewMets==true | isstr(allowNewMets) -0557 %Add the new mets -0558 metsToAdd.metNames=mets(~I); -0559 metsToAdd.compartments=compartment; -0560 if isstr(allowNewMets) -0561 newModel=addMets(newModel,metsToAdd,true,allowNewMets); -0562 else -0563 newModel=addMets(newModel,metsToAdd,true); -0564 end -0565 else -0566 EM='One or more equations contain metabolites that are not in model.mets. Set allowNewMets to true to allow this function to add metabolites or use addMets to add them before calling this function'; -0567 dispEM(EM); -0568 end -0569 end -0570 -0571 %Calculate the indexes of the metabolites -0572 metIndexes=J; -0573 metIndexes(~I)=numel(newModel.mets)-sum(~I)+1:numel(newModel.mets); -0574 end -0575 -0576 %The equations are on the form metNames[compName] -0577 if eqnType==3 -0578 %Parse the metabolite names -0579 metNames=cell(numel(mets),1); -0580 compartments=metNames; -0581 for i=1:numel(mets) -0582 starts=max(strfind(mets{i},'[')); -0583 ends=max(strfind(mets{i},']')); -0584 -0585 %Check that the formatting is correct -0586 if isempty(starts) || isempty(ends) || ends<numel(mets{i}) -0587 EM=['The metabolite ' mets{i} ' is not correctly formatted for eqnType=3']; -0588 dispEM(EM); -0589 end -0590 -0591 %Check that the compartment is correct -0592 compartments{i}=mets{i}(starts+1:ends-1); -0593 I=ismember(compartments(i),newModel.comps); -0594 if ~I -0595 EM=['The metabolite ' mets{i} ' has a compartment that is not in model.comps']; -0596 dispEM(EM); -0597 end -0598 metNames{i}=mets{i}(1:starts-1); -0599 end -0600 -0601 %Check if the metabolite exists already -0602 t1=strcat(metNames,'***',compartments); -0603 [I, J]=ismember(t1,t2); -0604 -0605 if ~all(I) -0606 if allowNewMets==true | isstr(allowNewMets) -0607 %Add the new mets -0608 metsToAdd.metNames=metNames(~I); -0609 metsToAdd.compartments=compartments(~I); -0610 if isstr(allowNewMets) -0611 newModel=addMets(newModel,metsToAdd,true,allowNewMets); -0612 else -0613 newModel=addMets(newModel,metsToAdd,true); -0614 end -0615 else -0616 EM='One or more equations contain metabolites that are not in model.metNames. Set allowNewMets to true to allow this function to add metabolites or use addMets to add them before calling this function'; -0617 dispEM(EM); -0618 end -0619 end -0620 -0621 %Calculate the indexes of the metabolites -0622 metIndexes=J; -0623 metIndexes(~I)=numel(newModel.mets)-sum(~I)+1:numel(newModel.mets); -0624 end -0625 -0626 %Add the info to the stoichiometric matrix -0627 newModel.S=[newModel.S sparse(size(newModel.S,1),nRxns)]; -0628 -0629 for i=1:nRxns -0630 newModel.S(metIndexes,nOldRxns+i)=S(:,i); -0631 %Parse the grRules and check whether all genes in grRules appear in -0632 %genes -0633 if isfield(newModel,'grRules') -0634 rule=newModel.grRules{nOldRxns+i}; -0635 rule=strrep(rule,'(',''); -0636 rule=strrep(rule,')',''); -0637 rule=strrep(rule,' or ',' '); -0638 rule=strrep(rule,' and ',' '); -0639 genes=regexp(rule,' ','split'); -0640 [I, J]=ismember(genes,newModel.genes); -0641 if ~all(I) && any(rule) -0642 EM=['Not all genes for reaction ' rxnsToAdd.rxns{i} ' were found in model.genes. If needed, add genes with addGenesRaven before calling this function, or set the ''allowNewGenes'' flag to true']; -0643 dispEM(EM); -0644 end -0645 end -0646 end -0647 -0648 %Make temporary minimal model structure with only new rxns, to parse to -0649 %standardizeGrRules -0650 newRxnsModel.genes=newModel.genes; -0651 newRxnsModel.grRules=newModel.grRules(length(model.rxns)+1:end); -0652 newRxnsModel.rxns=newModel.rxns(length(model.rxns)+1:end); -0653 -0654 %Fix grRules and reconstruct rxnGeneMat -0655 [grRules,rxnGeneMat] = standardizeGrRules(newRxnsModel,true); -0656 newModel.rxnGeneMat = [newModel.rxnGeneMat; rxnGeneMat]; -0657 newModel.grRules = [newModel.grRules(1:nOldRxns); grRules]; -0658 end +0515 %***Start parsing the equations and adding the info to the S matrix The +0516 %mets are matched to model.mets +0517 if eqnType==1 +0518 [I, J]=ismember(mets,model.mets); +0519 if ~all(I) +0520 if allowNewMets==true || ischar(allowNewMets) +0521 %Add the new mets +0522 metsToAdd.mets=mets(~I); +0523 metsToAdd.metNames=metsToAdd.mets; +0524 metsToAdd.compartments=compartment; +0525 if ischar(allowNewMets) +0526 newModel=addMets(newModel,metsToAdd,true,allowNewMets); +0527 else +0528 newModel=addMets(newModel,metsToAdd,true); +0529 end +0530 else +0531 EM='One or more equations contain metabolites that are not in model.mets. Set allowNewMets to true to allow this function to add metabolites or use addMets to add them before calling this function. Are you sure that eqnType=1?'; +0532 dispEM(EM); +0533 end +0534 end +0535 %Calculate the indexes of the metabolites and add the info +0536 metIndexes=J; +0537 metIndexes(~I)=numel(newModel.mets)-sum(~I)+1:numel(newModel.mets); +0538 end +0539 +0540 %Do some stuff that is the same for eqnType=2 and eqnType=3 +0541 if eqnType==2 || eqnType==3 +0542 %For later.. +0543 t2=strcat(model.metNames,'***',model.comps(model.metComps)); +0544 end +0545 +0546 %The mets are matched to model.metNames and assigned to "compartment" +0547 if eqnType==2 +0548 %%Check that the metabolite names aren't present in the same +0549 %%compartment. +0550 %Not the neatest way maybe.. +0551 t1=strcat(mets,'***',compartment); +0552 [I, J]=ismember(t1,t2); +0553 +0554 if ~all(I) +0555 if allowNewMets==true || ischar(allowNewMets) +0556 %Add the new mets +0557 metsToAdd.metNames=mets(~I); +0558 metsToAdd.compartments=compartment; +0559 if ischar(allowNewMets) +0560 newModel=addMets(newModel,metsToAdd,true,allowNewMets); +0561 else +0562 newModel=addMets(newModel,metsToAdd,true); +0563 end +0564 else +0565 EM='One or more equations contain metabolites that are not in model.mets. Set allowNewMets to true to allow this function to add metabolites or use addMets to add them before calling this function'; +0566 dispEM(EM); +0567 end +0568 end +0569 +0570 %Calculate the indexes of the metabolites +0571 metIndexes=J; +0572 metIndexes(~I)=numel(newModel.mets)-sum(~I)+1:numel(newModel.mets); +0573 end +0574 +0575 %The equations are on the form metNames[compName] +0576 if eqnType==3 +0577 %Parse the metabolite names +0578 metNames=cell(numel(mets),1); +0579 compartments=metNames; +0580 for i=1:numel(mets) +0581 starts=max(strfind(mets{i},'[')); +0582 ends=max(strfind(mets{i},']')); +0583 +0584 %Check that the formatting is correct +0585 if isempty(starts) || isempty(ends) || ends<numel(mets{i}) +0586 EM=['The metabolite ' mets{i} ' is not correctly formatted for eqnType=3']; +0587 dispEM(EM); +0588 end +0589 +0590 %Check that the compartment is correct +0591 compartments{i}=mets{i}(starts+1:ends-1); +0592 I=ismember(compartments(i),newModel.comps); +0593 if ~I +0594 EM=['The metabolite ' mets{i} ' has a compartment that is not in model.comps']; +0595 dispEM(EM); +0596 end +0597 metNames{i}=mets{i}(1:starts-1); +0598 end +0599 +0600 %Check if the metabolite exists already +0601 t1=strcat(metNames,'***',compartments); +0602 [I, J]=ismember(t1,t2); +0603 +0604 if ~all(I) +0605 if allowNewMets==true || ischar(allowNewMets) +0606 %Add the new mets +0607 metsToAdd.metNames=metNames(~I); +0608 metsToAdd.compartments=compartments(~I); +0609 if ischar(allowNewMets) +0610 newModel=addMets(newModel,metsToAdd,true,allowNewMets); +0611 else +0612 newModel=addMets(newModel,metsToAdd,true); +0613 end +0614 else +0615 EM='One or more equations contain metabolites that are not in model.metNames. Set allowNewMets to true to allow this function to add metabolites or use addMets to add them before calling this function'; +0616 dispEM(EM); +0617 end +0618 end +0619 +0620 %Calculate the indexes of the metabolites +0621 metIndexes=J; +0622 metIndexes(~I)=numel(newModel.mets)-sum(~I)+1:numel(newModel.mets); +0623 end +0624 +0625 %Add the info to the stoichiometric matrix +0626 newModel.S=[newModel.S sparse(size(newModel.S,1),nRxns)]; +0627 +0628 for i=1:nRxns +0629 newModel.S(metIndexes,nOldRxns+i)=S(:,i); +0630 %Parse the grRules and check whether all genes in grRules appear in +0631 %genes +0632 if isfield(newModel,'grRules') +0633 rule=newModel.grRules{nOldRxns+i}; +0634 rule=strrep(rule,'(',''); +0635 rule=strrep(rule,')',''); +0636 rule=strrep(rule,' or ',' '); +0637 rule=strrep(rule,' and ',' '); +0638 genes=regexp(rule,' ','split'); +0639 [I, J]=ismember(genes,newModel.genes); +0640 if ~all(I) && any(rule) +0641 EM=['Not all genes for reaction ' rxnsToAdd.rxns{i} ' were found in model.genes. If needed, add genes with addGenesRaven before calling this function, or set the ''allowNewGenes'' flag to true']; +0642 dispEM(EM); +0643 end +0644 end +0645 end +0646 +0647 %Make temporary minimal model structure with only new rxns, to parse to +0648 %standardizeGrRules +0649 newRxnsModel.genes=newModel.genes; +0650 newRxnsModel.grRules=newModel.grRules(length(model.rxns)+1:end); +0651 newRxnsModel.rxns=newModel.rxns(length(model.rxns)+1:end); +0652 +0653 %Fix grRules and reconstruct rxnGeneMat +0654 [grRules,rxnGeneMat] = standardizeGrRules(newRxnsModel,true); +0655 newModel.rxnGeneMat = [newModel.rxnGeneMat; rxnGeneMat]; +0656 newModel.grRules = [newModel.grRules(1:nOldRxns); grRules]; +0657 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/changeRxns.html b/doc/core/changeRxns.html index 9bbdc649..10c1310c 100644 --- a/doc/core/changeRxns.html +++ b/doc/core/changeRxns.html @@ -41,7 +41,7 @@

DESCRIPTION ^SOURCE CODE ^% interpreted 0014 % 1 - The metabolites are matched to model.mets. New 0015 % metabolites (if allowed) are added to -0016 % "compartment" +0016 % "compartment" (default) 0017 % 2 - The metabolites are matched to model.metNames and 0018 % all metabolites are assigned to "compartment". Any 0019 % new metabolites that are added will be assigned @@ -149,7 +149,7 @@

SOURCE CODE ^% 0054 % Usage: model=changeRxns(model,rxns,equations,eqnType,compartment,allowNewMets) 0055 -0056 if nargin<4 & isfield(equations,'stoichCoeffs') +0056 if nargin<4 0057 eqnType=1; 0058 end 0059 @@ -161,87 +161,86 @@

SOURCE CODE ^end 0066 0067 rxns=convertCharArray(rxns); -0068 equations=convertCharArray(equations); -0069 compartment=char(compartment); -0070 -0071 %Find the indexes of the reactions and throw an error if they aren't all -0072 %found -0073 [I, J]=ismember(rxns,model.rxns); -0074 if ~all(I) -0075 EM='All reaction ids must exist in the model'; -0076 dispEM(EM); -0077 end -0078 -0079 %The reactions are changed in the following manner. First create a -0080 %rxns-structure by copying info from the model. Then remove the old -0081 %reactions. Then add the updated ones using addRxns. Lastly, the model is -0082 %reordered to match the original order. This is done like this to make use -0083 %of the advanced parsing of equations that addRxns use. -0084 rxnsToChange.rxns=rxns; -0085 if isfield(equations,'mets') && isfield(equations,'stoichCoeffs') -0086 rxnsToChange.mets=equations.mets; -0087 rxnsToChange.stoichCoeffs=equations.stoichCoeffs; -0088 else -0089 rxnsToChange.equations=equations; -0090 end -0091 if isfield(model,'rxnNames') -0092 rxnsToChange.rxnNames=model.rxnNames(J); -0093 end -0094 if isfield(model,'lb') -0095 rxnsToChange.lb=model.lb(J); -0096 end -0097 if isfield(model,'ub') -0098 rxnsToChange.ub=model.ub(J); -0099 end -0100 if isfield(model,'c') -0101 rxnsToChange.c=model.c(J); -0102 end -0103 if isfield(model,'eccodes') -0104 rxnsToChange.eccodes=model.eccodes(J); -0105 end -0106 if isfield(model,'subSystems') -0107 rxnsToChange.subSystems=model.subSystems(J); -0108 end -0109 if isfield(model,'rxnComps') -0110 rxnsToChange.rxnComps=model.rxnComps(J); -0111 end -0112 if isfield(model,'grRules') -0113 rxnsToChange.grRules=model.grRules(J); -0114 end -0115 if isfield(model,'rxnFrom') -0116 rxnsToChange.rxnFrom=model.rxnFrom(J); -0117 end -0118 if isfield(model,'rxnScores') -0119 rxnsToChange.rxnScores=model.rxnScores(J); -0120 end -0121 if isfield(model,'rxnMiriams') -0122 rxnsToChange.rxnMiriams=model.rxnMiriams(J); -0123 end -0124 if isfield(model,'rxnNotes') -0125 rxnsToChange.rxnNotes=model.rxnNotes(J); -0126 end -0127 if isfield(model,'rxnReferences') -0128 rxnsToChange.rxnReferences=model.rxnReferences(J); -0129 end -0130 if isfield(model,'rxnConfidenceScores') -0131 rxnsToChange.rxnConfidenceScores=model.rxnConfidenceScores(J); -0132 end -0133 if isfield(model,'pwys') -0134 rxnsToChange.pwys=model.pwys(J); -0135 end -0136 -0137 %Calculate the new order of reactions -0138 order=ones(numel(model.rxns),1); -0139 order(J)=0; -0140 order=cumsum(order); -0141 order(J)=order(end)+1:order(end)+numel(rxns); -0142 -0143 %Remove the original reactions -0144 model=removeReactions(model,rxns); -0145 -0146 model=addRxns(model,rxnsToChange,eqnType,compartment,allowNewMets); -0147 model=permuteModel(model,order,'rxns'); -0148 end +0068 compartment=char(compartment); +0069 +0070 %Find the indexes of the reactions and throw an error if they aren't all +0071 %found +0072 [I, J]=ismember(rxns,model.rxns); +0073 if ~all(I) +0074 EM='All reaction ids must exist in the model'; +0075 dispEM(EM); +0076 end +0077 +0078 %The reactions are changed in the following manner. First create a +0079 %rxns-structure by copying info from the model. Then remove the old +0080 %reactions. Then add the updated ones using addRxns. Lastly, the model is +0081 %reordered to match the original order. This is done like this to make use +0082 %of the advanced parsing of equations that addRxns use. +0083 rxnsToChange.rxns=rxns; +0084 if isfield(equations,'mets') && isfield(equations,'stoichCoeffs') +0085 rxnsToChange.mets=equations.mets; +0086 rxnsToChange.stoichCoeffs=equations.stoichCoeffs; +0087 else +0088 rxnsToChange.equations=convertCharArray(equations); +0089 end +0090 if isfield(model,'rxnNames') +0091 rxnsToChange.rxnNames=model.rxnNames(J); +0092 end +0093 if isfield(model,'lb') +0094 rxnsToChange.lb=model.lb(J); +0095 end +0096 if isfield(model,'ub') +0097 rxnsToChange.ub=model.ub(J); +0098 end +0099 if isfield(model,'c') +0100 rxnsToChange.c=model.c(J); +0101 end +0102 if isfield(model,'eccodes') +0103 rxnsToChange.eccodes=model.eccodes(J); +0104 end +0105 if isfield(model,'subSystems') +0106 rxnsToChange.subSystems=model.subSystems(J); +0107 end +0108 if isfield(model,'rxnComps') +0109 rxnsToChange.rxnComps=model.rxnComps(J); +0110 end +0111 if isfield(model,'grRules') +0112 rxnsToChange.grRules=model.grRules(J); +0113 end +0114 if isfield(model,'rxnFrom') +0115 rxnsToChange.rxnFrom=model.rxnFrom(J); +0116 end +0117 if isfield(model,'rxnScores') +0118 rxnsToChange.rxnScores=model.rxnScores(J); +0119 end +0120 if isfield(model,'rxnMiriams') +0121 rxnsToChange.rxnMiriams=model.rxnMiriams(J); +0122 end +0123 if isfield(model,'rxnNotes') +0124 rxnsToChange.rxnNotes=model.rxnNotes(J); +0125 end +0126 if isfield(model,'rxnReferences') +0127 rxnsToChange.rxnReferences=model.rxnReferences(J); +0128 end +0129 if isfield(model,'rxnConfidenceScores') +0130 rxnsToChange.rxnConfidenceScores=model.rxnConfidenceScores(J); +0131 end +0132 if isfield(model,'pwys') +0133 rxnsToChange.pwys=model.pwys(J); +0134 end +0135 +0136 %Calculate the new order of reactions +0137 order=ones(numel(model.rxns),1); +0138 order(J)=0; +0139 order=cumsum(order); +0140 order(J)=order(end)+1:order(end)+numel(rxns); +0141 +0142 %Remove the original reactions +0143 model=removeReactions(model,rxns); +0144 +0145 model=addRxns(model,rxnsToChange,eqnType,compartment,allowNewMets); +0146 model=permuteModel(model,order,'rxns'); +0147 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/getGenesFromGrRules.html b/doc/core/getGenesFromGrRules.html index 892e652f..12835184 100644 --- a/doc/core/getGenesFromGrRules.html +++ b/doc/core/getGenesFromGrRules.html @@ -24,14 +24,14 @@

PURPOSE ^getGenesFromGrRules Extract gene list and rxnGeneMat from grRules array.

SYNOPSIS ^

-
function [genes,rxnGeneMat] = getGenesFromGrRules(grRules)
+
function [genes,rxnGeneMat] = getGenesFromGrRules(grRules, originalGenes)

DESCRIPTION ^

getGenesFromGrRules  Extract gene list and rxnGeneMat from grRules array.
 
  USAGE:
 
-   [genes,rxnGeneMat] = getGenesFromGrRules(grRules);
+   [genes,rxnGeneMat] = getGenesFromGrRules(grRules, originalGenes);
 
  INPUTS:
 
@@ -40,6 +40,7 @@ 

DESCRIPTION ^CROSS-REFERENCE INFORMATION ^
 
 
 <h2><a name=SOURCE CODE ^

-
0001 function [genes,rxnGeneMat] = getGenesFromGrRules(grRules)
+
0001 function [genes,rxnGeneMat] = getGenesFromGrRules(grRules, originalGenes)
 0002 %getGenesFromGrRules  Extract gene list and rxnGeneMat from grRules array.
 0003 %
 0004 % USAGE:
 0005 %
-0006 %   [genes,rxnGeneMat] = getGenesFromGrRules(grRules);
+0006 %   [genes,rxnGeneMat] = getGenesFromGrRules(grRules, originalGenes);
 0007 %
 0008 % INPUTS:
 0009 %
@@ -77,51 +78,63 @@ 

SOURCE CODE ^% NOTE: Boolean operators can be text ("and", "or") or 0013 % symbolic ("&", "|"), but there must be a space 0014 % between operators and gene names/IDs. -0015 % -0016 % OUTPUTS: -0017 % -0018 % genes A unique list of all gene IDs that appear in grRules. -0019 % -0020 % rxnGeneMat (Optional) A binary matrix indicating which genes -0021 % participate in each reaction, where rows correspond to -0022 % reactions (entries in grRules) and columns correspond to -0023 % genes. -0024 % -0025 +0015 % originalGenes The original gene list from the model as reference +0016 % +0017 % OUTPUTS: +0018 % +0019 % genes A unique list of all gene IDs that appear in grRules. +0020 % +0021 % rxnGeneMat (Optional) A binary matrix indicating which genes +0022 % participate in each reaction, where rows correspond to +0023 % reactions (entries in grRules) and columns correspond to +0024 % genes. +0025 % 0026 -0027 % check if the grRules use written or symbolic boolean operators -0028 if any(contains(grRules,{'&','|'})) -0029 % fix some potential missing spaces between parentheses and &/| -0030 grRules = regexprep(grRules,'\)&',') &'); % ")&" -> ") &" -0031 grRules = regexprep(grRules,'&\(','& ('); % "&(" -> "& (" -0032 grRules = regexprep(grRules,'\)\|',') |'); % ")|" -> ") |" -0033 grRules = regexprep(grRules,'\|\(','| ('); % "|(" -> "| (" -0034 else -0035 % fix some potential missing spaces between parentheses and AND/OR -0036 grRules = regexprep(grRules,'\)and',') and'); % ")and" -> ") and" -0037 grRules = regexprep(grRules,'and\(','and ('); % "and(" -> "and (" -0038 grRules = regexprep(grRules,'\)or',') or'); % ")or" -> ") or" -0039 grRules = regexprep(grRules,'or\(','or ('); % "or(" -> "or (" -0040 -0041 % convert "and" to "&" and "or" to "|" (easier to work with symbols) -0042 grRules = regexprep(grRules, ' or ', ' | '); -0043 grRules = regexprep(grRules, ' and ', ' & '); -0044 end -0045 -0046 % extract list of genes from each reaction -0047 rxnGenes = cellfun(@(r) unique(regexp(r,'[^&|\(\) ]+','match')),grRules,'UniformOutput',false); -0048 -0049 % construct new gene list -0050 nonEmpty = ~cellfun(@isempty,rxnGenes); -0051 genes = unique([rxnGenes{nonEmpty}]'); -0052 -0053 % construct new rxnGeneMat (if requested) -0054 if nargout > 1 -0055 rxnGeneCell = cellfun(@(rg) ismember(genes,rg),rxnGenes,'UniformOutput',false); -0056 rxnGeneMat = sparse(double(horzcat(rxnGeneCell{:})')); -0057 end +0027 +0028 % handle input arguments +0029 if nargin < 2 +0030 originalGenes = []; +0031 end +0032 +0033 % check if the grRules use written or symbolic boolean operators +0034 if any(contains(grRules,{'&','|'})) +0035 % fix some potential missing spaces between parentheses and &/| +0036 grRules = regexprep(grRules,'\)&',') &'); % ")&" -> ") &" +0037 grRules = regexprep(grRules,'&\(','& ('); % "&(" -> "& (" +0038 grRules = regexprep(grRules,'\)\|',') |'); % ")|" -> ") |" +0039 grRules = regexprep(grRules,'\|\(','| ('); % "|(" -> "| (" +0040 else +0041 % fix some potential missing spaces between parentheses and AND/OR +0042 grRules = regexprep(grRules,'\)and',') and'); % ")and" -> ") and" +0043 grRules = regexprep(grRules,'and\(','and ('); % "and(" -> "and (" +0044 grRules = regexprep(grRules,'\)or',') or'); % ")or" -> ") or" +0045 grRules = regexprep(grRules,'or\(','or ('); % "or(" -> "or (" +0046 +0047 % convert "and" to "&" and "or" to "|" (easier to work with symbols) +0048 grRules = regexprep(grRules, ' or ', ' | '); +0049 grRules = regexprep(grRules, ' and ', ' & '); +0050 end +0051 +0052 % extract list of genes from each reaction +0053 rxnGenes = cellfun(@(r) unique(regexp(r,'[^&|\(\) ]+','match')),grRules,'UniformOutput',false); +0054 +0055 % construct new gene list +0056 nonEmpty = ~cellfun(@isempty,rxnGenes); +0057 genes = unique([rxnGenes{nonEmpty}]'); 0058 -0059

+0059 if ~isempty(originalGenes) +0060 if ~isequal(sort(originalGenes), sort(genes)) +0061 error('The grRules and original gene list are inconsistent!'); +0062 else +0063 genes = originalGenes; +0064 end +0065 end +0066 +0067 % construct new rxnGeneMat (if requested) +0068 if nargout > 1 +0069 rxnGeneCell = cellfun(@(rg) ismember(genes,rg),rxnGenes,'UniformOutput',false); +0070 rxnGeneMat = sparse(double(horzcat(rxnGeneCell{:})')); +0071 end

Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/external/kegg/constructMultiFasta.html b/doc/external/kegg/constructMultiFasta.html index 68996f41..bfca2a56 100644 --- a/doc/external/kegg/constructMultiFasta.html +++ b/doc/external/kegg/constructMultiFasta.html @@ -117,7 +117,7 @@

SOURCE CODE ^'NOTICE: If Matlab is freezing and does not provide any output in 30 minutes, consider increasing Java Heap Memory\n', ... 0060 'in MATLAB settings and start over with the new session\n']); -0061 fprintf('Mapping genes to the multi-FASTA source file... '); +0061 fprintf('Mapping genes to the multi-FASTA source file... 0%% complete'); 0062 %Now loop through the file to see which genes are present in the gene list 0063 %and save their position IN elementPositions! This is to enable a easy way 0064 %to get the distance to the following element @@ -150,83 +150,89 @@

SOURCE CODE ^end 0093 end -0094 end -0095 fprintf('COMPLETE\n'); -0096 -0097 fprintf('Generating the KEGG Orthology specific multi-FASTA files... 0%% complete'); -0098 %Loop through the reactions and print the corresponding sequences -0099 for i=1:numel(model.rxns) -0100 -0101 %Do not overwrite existing files -0102 if ~exist(fullfile(outputDir,[model.rxns{i} '.fa']), 'file') -0103 -0104 %Get the positions in elementPositions for the involved genes -0105 genesUsed=model.rxnGeneMat(i,:); -0106 -0107 %Open a file for this reaction. This saves empty files for KOs -0108 %without genes -0109 rxnfid=fopen(fullfile(outputDir,[model.rxns{i} '.fa']),'w'); -0110 -0111 if any(genesUsed) -0112 positions=genePositions(genesUsed~=0); -0113 -0114 %It could be that some genes were not found. Delete those -0115 %elements -0116 positions(positions==0)=[]; -0117 -0118 %Print each sequence -0119 for j=1:numel(positions) -0120 fseek(fid,elementPositions(positions(j)),-1); -0121 %Should check that it ends with a gene!!!! Check for eof -0122 if positions(j)<numel(elementPositions) -0123 str=fread(fid,[1 double(elementPositions(positions(j)+1))-double(elementPositions(positions(j)))-1],'*char'); -0124 -0125 %If the string does not end with a line feed character -0126 if str(end)~=10 -0127 str=[str fread(fid,[1 double(elementPositions(positions(j)+2))-double(elementPositions(positions(j)+1))],'*char')]; -0128 -0129 %This is if we still have not found a new gene. -0130 %Maximal unluck. This whole check should be done -0131 %when elementPositions are calculated! -0132 if str(end)~=10 -0133 %Skip this gene -0134 continue; -0135 end -0136 end -0137 else -0138 str=fread(fid,[1 inf],'*char'); -0139 end -0140 fwrite(rxnfid,['>' str]); -0141 end -0142 end -0143 fclose(rxnfid); -0144 end -0145 %Print the progress -0146 if rem(i-1,50) == 0 -0147 progress=num2str(i/numel(model.rxns)); -0148 progress=pad(progress,3,'left'); -0149 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0094 %Print the progress +0095 if rem(i-1,350000) == 0 +0096 progress=num2str(floor(100*i/numel(elementPositions))); +0097 progress=pad(progress,3,'left'); +0098 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0099 end +0100 end +0101 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0102 +0103 fprintf('Generating the KEGG Orthology specific multi-FASTA files... 0%% complete'); +0104 %Loop through the reactions and print the corresponding sequences +0105 for i=1:numel(model.rxns) +0106 +0107 %Do not overwrite existing files +0108 if ~exist(fullfile(outputDir,[model.rxns{i} '.fa']), 'file') +0109 +0110 %Get the positions in elementPositions for the involved genes +0111 genesUsed=model.rxnGeneMat(i,:); +0112 +0113 %Open a file for this reaction. This saves empty files for KOs +0114 %without genes +0115 rxnfid=fopen(fullfile(outputDir,[model.rxns{i} '.fa']),'w'); +0116 +0117 if any(genesUsed) +0118 positions=genePositions(genesUsed~=0); +0119 +0120 %It could be that some genes were not found. Delete those +0121 %elements +0122 positions(positions==0)=[]; +0123 +0124 %Print each sequence +0125 for j=1:numel(positions) +0126 fseek(fid,elementPositions(positions(j)),-1); +0127 %Should check that it ends with a gene!!!! Check for eof +0128 if positions(j)<numel(elementPositions) +0129 str=fread(fid,[1 double(elementPositions(positions(j)+1))-double(elementPositions(positions(j)))-1],'*char'); +0130 +0131 %If the string does not end with a line feed character +0132 if str(end)~=10 +0133 str=[str fread(fid,[1 double(elementPositions(positions(j)+2))-double(elementPositions(positions(j)+1))],'*char')]; +0134 +0135 %This is if we still have not found a new gene. +0136 %Maximal unluck. This whole check should be done +0137 %when elementPositions are calculated! +0138 if str(end)~=10 +0139 %Skip this gene +0140 continue; +0141 end +0142 end +0143 else +0144 str=fread(fid,[1 inf],'*char'); +0145 end +0146 fwrite(rxnfid,['>' str]); +0147 end +0148 end +0149 fclose(rxnfid); 0150 end -0151 end -0152 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); -0153 -0154 %Close the source file -0155 fclose(fid); -0156 end -0157 -0158 function files=listFiles(directory) -0159 %Supporter function to list the files in a directory and return them as a -0160 %cell array -0161 temp=dir(directory); -0162 files=cell(numel(temp),1); -0163 for i=1:numel(temp) -0164 files{i}=temp(i,1).name; -0165 end -0166 files=strrep(files,'.fa',''); -0167 files=strrep(files,'.hmm',''); -0168 files=strrep(files,'.out',''); -0169 files=strrep(files,'.faw',''); -0170 end

+0151 %Print the progress +0152 if rem(i-1,50) == 0 +0153 progress=num2str(floor(100*i/numel(model.rxns))); +0154 progress=pad(progress,3,'left'); +0155 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0156 end +0157 end +0158 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0159 +0160 %Close the source file +0161 fclose(fid); +0162 end +0163 +0164 function files=listFiles(directory) +0165 %Supporter function to list the files in a directory and return them as a +0166 %cell array +0167 temp=dir(directory); +0168 files=cell(numel(temp),1); +0169 for i=1:numel(temp) +0170 files{i}=temp(i,1).name; +0171 end +0172 files=strrep(files,'.fa',''); +0173 files=strrep(files,'.hmm',''); +0174 files=strrep(files,'.out',''); +0175 files=strrep(files,'.faw',''); +0176 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/external/kegg/getGenesFromKEGG.html b/doc/external/kegg/getGenesFromKEGG.html index fd844f21..073e8e63 100644 --- a/doc/external/kegg/getGenesFromKEGG.html +++ b/doc/external/kegg/getGenesFromKEGG.html @@ -187,7 +187,7 @@

SOURCE CODE ^'The files ''ko'' and ''reaction'' cannot be located at ' strrep(keggPath,'\','/') '/ and should be downloaded from the KEGG FTP\n']); 0081 dispEM(EM); 0082 else -0083 fprintf('Generating keggGenes.mat file... '); +0083 fprintf('Generating keggGenes.mat file...\n'); 0084 %Get all KOs that are associated to reactions 0085 allKOs=getAllKOs(keggPath); 0086 @@ -368,81 +368,83 @@

SOURCE CODE ^end 0262 0263 %Only get the KOs in koList -0264 I=~ismember(model.rxns,koList); -0265 model=removeReactions(model,I,true,true); -0266 fprintf('COMPLETE\n'); +0264 if nargin>1 +0265 I=~ismember(model.rxns,koList); +0266 model=removeReactions(model,I,true,true); 0267 end -0268 -0269 function allKOs=getAllKOs(keggPath) -0270 %Retrieves all KOs that are associated to reactions. This is because the -0271 %number of genes in KEGG is very large so without this parsing it would -0272 %take many hours -0273 -0274 allKOs={}; +0268 fprintf('COMPLETE\n'); +0269 end +0270 +0271 function allKOs=getAllKOs(keggPath) +0272 %Retrieves all KOs that are associated to reactions. This is because the +0273 %number of genes in KEGG is very large so without this parsing it would +0274 %take many hours 0275 -0276 %First check if the reactions have already been parsed -0277 ravenPath=findRAVENroot; -0278 rxnsFile=fullfile(ravenPath,'external','kegg','keggRxns.mat'); -0279 if exist(rxnsFile, 'file') -0280 fprintf(['NOTE: Importing KEGG ORTHOLOGY list from ' strrep(rxnsFile,'\','/') '.\n']); -0281 load(rxnsFile,'model'); -0282 -0283 %Loop through the reactions and add the corresponding genes -0284 for i=1:numel(model.rxns) -0285 if isstruct(model.rxnMiriams{i}) -0286 %Get all KOs -0287 allKOs=[allKOs;model.rxnMiriams{i}.value(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology'))]; -0288 end -0289 end -0290 else -0291 %Parse the reaction file instead First load information on reaction ID, -0292 %reaction name, KO, pathway and ec-number -0293 fid = fopen(fullfile(keggPath,'reaction'), 'r'); -0294 orthology=false; -0295 while 1 -0296 %Get the next line -0297 tline = fgetl(fid); -0298 -0299 %Abort at end of file -0300 if ~ischar(tline) -0301 break; -0302 end -0303 -0304 %Skip '///' -0305 if numel(tline)<12 -0306 continue; -0307 end -0308 -0309 %Check if it's a new reaction -0310 if strcmp(tline(1:12),'ENTRY ') -0311 orthology=false; -0312 end -0313 -0314 if strcmp(tline(1:9),'REFERENCE') -0315 orthology=false; -0316 end -0317 -0318 %Add KO-ids -0319 if numel(tline)>16 -0320 if strcmp(tline(1:16),'ORTHOLOGY KO: ') || strcmp(tline(1:16),' KO: ') || strcmp(tline(1:12),'ORTHOLOGY ') || orthology==true -0321 if strcmp(tline(13:16),'KO:') -0322 %This is in the old version -0323 allKOs=[allKOs;tline(17:22)]; -0324 elseif strcmp(tline(1:12),'ORTHOLOGY ') -0325 %This means that it found one KO in the new format and -0326 %that subsequent lines might be other KOs -0327 orthology=true; -0328 allKOs=[allKOs;tline(13:18)]; -0329 end -0330 end -0331 end -0332 end -0333 -0334 %Close the file -0335 fclose(fid); -0336 end -0337 allKOs=unique(allKOs); -0338 end +0276 allKOs={}; +0277 +0278 %First check if the reactions have already been parsed +0279 ravenPath=findRAVENroot; +0280 rxnsFile=fullfile(ravenPath,'external','kegg','keggRxns.mat'); +0281 if exist(rxnsFile, 'file') +0282 fprintf(['NOTE: Importing KEGG ORTHOLOGY list from ' strrep(rxnsFile,'\','/') '.\n']); +0283 load(rxnsFile,'model'); +0284 +0285 %Loop through the reactions and add the corresponding genes +0286 for i=1:numel(model.rxns) +0287 if isstruct(model.rxnMiriams{i}) +0288 %Get all KOs +0289 allKOs=[allKOs;model.rxnMiriams{i}.value(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology'))]; +0290 end +0291 end +0292 else +0293 %Parse the reaction file instead First load information on reaction ID, +0294 %reaction name, KO, pathway and ec-number +0295 fid = fopen(fullfile(keggPath,'reaction'), 'r'); +0296 orthology=false; +0297 while 1 +0298 %Get the next line +0299 tline = fgetl(fid); +0300 +0301 %Abort at end of file +0302 if ~ischar(tline) +0303 break; +0304 end +0305 +0306 %Skip '///' +0307 if numel(tline)<12 +0308 continue; +0309 end +0310 +0311 %Check if it's a new reaction +0312 if strcmp(tline(1:12),'ENTRY ') +0313 orthology=false; +0314 end +0315 +0316 if strcmp(tline(1:9),'REFERENCE') +0317 orthology=false; +0318 end +0319 +0320 %Add KO-ids +0321 if numel(tline)>16 +0322 if strcmp(tline(1:16),'ORTHOLOGY KO: ') || strcmp(tline(1:16),' KO: ') || strcmp(tline(1:12),'ORTHOLOGY ') || orthology==true +0323 if strcmp(tline(13:16),'KO:') +0324 %This is in the old version +0325 allKOs=[allKOs;tline(17:22)]; +0326 elseif strcmp(tline(1:12),'ORTHOLOGY ') +0327 %This means that it found one KO in the new format and +0328 %that subsequent lines might be other KOs +0329 orthology=true; +0330 allKOs=[allKOs;tline(13:18)]; +0331 end +0332 end +0333 end +0334 end +0335 +0336 %Close the file +0337 fclose(fid); +0338 end +0339 allKOs=unique(allKOs); +0340 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/external/kegg/getKEGGModelForOrganism.html b/doc/external/kegg/getKEGGModelForOrganism.html index d6c84b28..77306bdf 100644 --- a/doc/external/kegg/getKEGGModelForOrganism.html +++ b/doc/external/kegg/getKEGGModelForOrganism.html @@ -67,7 +67,7 @@

DESCRIPTION ^DESCRIPTION ^SOURCE CODE ^% the HMMs were trained on pro- or eukaryotic 0043 % sequences, using a sequence similarity threshold of 0044 % XXX %, fitting the KEGG version YY. E.g. -0045 % euk90_kegg100. (opt, see note about fastaFile. Note +0045 % euk90_kegg102. (opt, see note about fastaFile. Note 0046 % that in order to rebuild the KEGG model from a 0047 % database dump, as opposed to using the version 0048 % supplied with RAVEN, you would still need to supply @@ -431,911 +427,907 @@

SOURCE CODE ^% should be used for constructing Hidden Markov models (HMMs), and 0141 % later for matching your sequences to. 0142 % The most common alternatives here would be to use sequences from -0143 % only eukaryotes, only prokaryotes or all sequences in KEGG. As -0144 % explained in the README.md file, various sets of pre-trained hidden -0145 % Markov models are available at <a href="matlab: -0146 % web('http://biomet-toolbox.chalmers.se/index.php?page=downtools-raven')">BioMet -0147 % Toolbox</a>. This is normally the most convenient way, but if you -0148 % would like to use, for example, only fungal sequences for training -0149 % the HMMs then you need to re-run this step. -0150 % 2b. KO-specific protein FASTA files are re-organised into -0151 % non-redundant protein sets with CD-HIT. The user can only set -0152 % seqIdentity parameter, which corresponds to '-c' parameter in -0153 % CD-HIT, described as "sequence identity threshold". CD-HIT suggsted -0154 % sequence identity specific word_length (-n) parameters are used. -0155 % 2c. Does a multi sequence alignment for multi-FASTA files obtained in -0156 % Step 2b for future use. MAFFT software with automatic selection of -0157 % alignment algorithm is used in this step ('--auto'). -0158 % 2d. Trains hidden Markov models using HMMer for each of the aligned -0159 % KO-specific FASTA files obtained in Step 2c. This is performed with -0160 % 'hmmbuild' using the default settings. -0161 % -0162 % Step 2 may be reasonable to be re-done if the user wants to tweak the -0163 % settings in proteins filtering, clustering, multi sequence alignment or -0164 % HMMs training steps. However, it requires to have KO-specific protein -0165 % FASTA files obtained in Step 1a. As such files are not provided in -0166 % RAVEN and BioMet ToolBox, the user can only generate these files from -0167 % KEGG FTP dump files, so KEGG FTP license is needed. -0168 % -0169 % 3a. Queries the HMMs with sequences for the organism you are making a -0170 % model for. This step uses both the output from step 1a and from 2d. -0171 % This is done with 'hmmsearch' function under default settings. The -0172 % significance threshold value set in 'cutOff' parameter is used -0173 % later when parsing '*.out' files to filter out KO hits with higher -0174 % value than 'cutOff' value. The results with passable E values are -0175 % summarised into KO-gene occurence matrix with E values in -0176 % intersections as 'koGeneMat'. The parameters 'minScoreRatioG' and -0177 % 'minScoreRatioKO' are then applied to 'prune' KO-gene associations -0178 % (see the function descriptions above for more details). The -0179 % intersection values for these 'prunable' associations are converted -0180 % to zeroes. -0181 % 3b. Constructs a model based on the pre-processed KO-gene association -0182 % matrix (koGeneMat). As the full KEGG model already has reaction-KO -0183 % relationships, KOs are converted into the query genes. The final -0184 % draft model contains only these reactions, which are associated -0185 % with KOs from koGeneMat. The reactions without the genes may also -0186 % be included, if the user set keepSpontaneous as 'true'. -0187 % -0188 % The Step 3 is specific to the organism for which the model is -0189 % reconstructed. -0190 % -0191 % In principle the function looks at which output that is already available -0192 % and runs only the parts that are required for step 3. This means -0193 % that (see the definition of the parameters for details): -0194 % -1a is only performed if there are no KEGG model files in the -0195 % RAVEN\external\kegg directory -0196 % -1b is only performed if not all required HMMs OR aligned FASTA files -0197 % OR multi-FASTA files exist in the defined dataDir. This means that this -0198 % step is skipped if the HMMs are downloaded from BioMet Toolbox instead -0199 % (see above). If not all files exist it will try to find -0200 % the KEGG database files in dataDir. -0201 % -2a is only performed if not all required HMMs OR aligned FASTA files -0202 % files exist in the defined dataDir. This means that this step is skipped -0203 % if the HMMs are downloaded from BioMet Toolbox instead (see above). -0204 % -2b is only performed if not all required HMMs exist in the defined -0205 % dataDir. This means that this step is skipped if the FASTA files or -0206 % HMMs are downloaded from BioMet Toolbox instead (see above). -0207 % -3a is performed for the required HMMs for which no corresponding .out -0208 % file exists in outDir. This is just a way to enable the function to be -0209 % run in parallel or to resume if interrupted. -0210 % -3b is always performed. -0211 % -0212 % These steps are specific to the organism for which you are -0213 % reconstructing the model. -0214 % -0215 % Regarding the whole pipeline, the function checks the output that is -0216 % already available and runs only the parts that are required for step 3. -0217 % This means that (see the definition of the parameters for details): -0218 % -1a is only performed if there are no KEGG model files in the -0219 % RAVEN\external\kegg directory. -0220 % -1b is only performed if any of required KOs do not have HMMs, aligned -0221 % FASTA files, clustered FASTA files and raw FASTA files in the defined -0222 % dataDir. This means that this step is skipped if the HMMs are -0223 % downloaded from BioMet Toolbox instead (see above). If not all files -0224 % exist it will try to find the KEGG database files in dataDir. -0225 % -2ab are only performed if any of required KOs do not have HMMs, -0226 % aligned FASTA files and clustered FASTA files in the defined dataDir. -0227 % This means that this step is skipped if the HMMs are downloaded from -0228 % BioMet Toolbox instead (see above). -0229 % -2c is only performed if any of required KOs do not have HMMs and -0230 % aligned FASTA files in the defined dataDir. This means that this step -0231 % is skipped if the HMMs are downloaded from BioMet Toolbox instead (see -0232 % above). -0233 % -2d is only performed if any of required KOs do not have HMMs exist in -0234 % the defined dataDir. This means that this step is skipped if the FASTA -0235 % files or HMMs are downloaded from BioMet Toolbox instead (see above). -0236 % -3a is performed for the required HMMs for which no corresponding .out -0237 % file exists in outDir. This is just a way to enable the function to be -0238 % run in parallel or to resume if interrupted. -0239 % -3b is always performed. -0240 % -0241 % NOTE: it is also possible to obtain draft model from KEGG without -0242 % providing protein FASTA file for the target organism. In such case the -0243 % organism three-four letter abbreviation set as 'organismID' must exist -0244 % in the local KEGG database. In such case, the program just fetches all -0245 % the reactions, which are associated with given 'organismID'. -0246 % -0247 % Usage: model=getKEGGModelForOrganism(organismID,fastaFile,dataDir,... -0248 % outDir,keepSpontaneous,keepUndefinedStoich,keepIncomplete,... -0249 % keepGeneral,cutOff,minScoreRatioKO,minScoreRatioG,maxPhylDist,... -0250 % nSequences,seqIdentity) -0251 -0252 if nargin<2 || isempty(fastaFile) -0253 fastaFile=[]; -0254 else -0255 fastaFile=char(fastaFile); -0256 end -0257 if nargin<3 -0258 dataDir=[]; -0259 else -0260 dataDir=char(dataDir); -0261 end -0262 if nargin<4 || isempty(outDir) -0263 outDir=tempdir; -0264 %Delete all *.out files if any exist -0265 delete(fullfile(outDir,'*.out')); -0266 else -0267 outDir=char(outDir); -0268 end -0269 if nargin<5 -0270 keepSpontaneous=true; -0271 end -0272 if nargin<6 -0273 keepUndefinedStoich=true; -0274 end -0275 if nargin<7 -0276 keepIncomplete=true; -0277 end -0278 if nargin<8 -0279 keepGeneral=false; -0280 end -0281 if nargin<9 -0282 cutOff=10^-50; -0283 end -0284 if nargin<10 -0285 minScoreRatioKO=0.3; -0286 end -0287 if nargin<11 -0288 minScoreRatioG=0.8; +0143 % only eukaryotes, only prokaryotes or all sequences in KEGG, but you +0144 % could also play around with the parameters to use e.g. only fungal +0145 % sequences. +0146 % 2b. KO-specific protein FASTA files are re-organised into +0147 % non-redundant protein sets with CD-HIT. The user can only set +0148 % seqIdentity parameter, which corresponds to '-c' parameter in +0149 % CD-HIT, described as "sequence identity threshold". CD-HIT suggsted +0150 % sequence identity specific word_length (-n) parameters are used. +0151 % 2c. Does a multi sequence alignment for multi-FASTA files obtained in +0152 % Step 2b for future use. MAFFT software with automatic selection of +0153 % alignment algorithm is used in this step ('--auto'). +0154 % 2d. Trains hidden Markov models using HMMer for each of the aligned +0155 % KO-specific FASTA files obtained in Step 2c. This is performed with +0156 % 'hmmbuild' using the default settings. +0157 % +0158 % Step 2 may be reasonable to be re-done if the user wants to tweak the +0159 % settings in proteins filtering, clustering, multi sequence alignment or +0160 % HMMs training steps. However, it requires to have KO-specific protein +0161 % FASTA files obtained in Step 1a. As such files are not provided in +0162 % RAVEN and BioMet ToolBox, the user can only generate these files from +0163 % KEGG FTP dump files, so KEGG FTP license is needed. +0164 % +0165 % 3a. Queries the HMMs with sequences for the organism you are making a +0166 % model for. This step uses both the output from step 1a and from 2d. +0167 % This is done with 'hmmsearch' function under default settings. The +0168 % significance threshold value set in 'cutOff' parameter is used +0169 % later when parsing '*.out' files to filter out KO hits with higher +0170 % value than 'cutOff' value. The results with passable E values are +0171 % summarised into KO-gene occurence matrix with E values in +0172 % intersections as 'koGeneMat'. The parameters 'minScoreRatioG' and +0173 % 'minScoreRatioKO' are then applied to 'prune' KO-gene associations +0174 % (see the function descriptions above for more details). The +0175 % intersection values for these 'prunable' associations are converted +0176 % to zeroes. +0177 % 3b. Constructs a model based on the pre-processed KO-gene association +0178 % matrix (koGeneMat). As the full KEGG model already has reaction-KO +0179 % relationships, KOs are converted into the query genes. The final +0180 % draft model contains only these reactions, which are associated +0181 % with KOs from koGeneMat. The reactions without the genes may also +0182 % be included, if the user set keepSpontaneous as 'true'. +0183 % +0184 % The Step 3 is specific to the organism for which the model is +0185 % reconstructed. +0186 % +0187 % In principle the function looks at which output that is already available +0188 % and runs only the parts that are required for step 3. This means +0189 % that (see the definition of the parameters for details): +0190 % -1a is only performed if there are no KEGG model files in the +0191 % RAVEN\external\kegg directory +0192 % -1b is only performed if not all required HMMs OR aligned FASTA files +0193 % OR multi-FASTA files exist in the defined dataDir. This means that this +0194 % step is skipped if the HMMs are downloaded from BioMet Toolbox instead +0195 % (see above). If not all files exist it will try to find +0196 % the KEGG database files in dataDir. +0197 % -2a is only performed if not all required HMMs OR aligned FASTA files +0198 % files exist in the defined dataDir. This means that this step is skipped +0199 % if the HMMs are downloaded from BioMet Toolbox instead (see above). +0200 % -2b is only performed if not all required HMMs exist in the defined +0201 % dataDir. This means that this step is skipped if the FASTA files or +0202 % HMMs are downloaded from BioMet Toolbox instead (see above). +0203 % -3a is performed for the required HMMs for which no corresponding .out +0204 % file exists in outDir. This is just a way to enable the function to be +0205 % run in parallel or to resume if interrupted. +0206 % -3b is always performed. +0207 % +0208 % These steps are specific to the organism for which you are +0209 % reconstructing the model. +0210 % +0211 % Regarding the whole pipeline, the function checks the output that is +0212 % already available and runs only the parts that are required for step 3. +0213 % This means that (see the definition of the parameters for details): +0214 % -1a is only performed if there are no KEGG model files in the +0215 % RAVEN\external\kegg directory. +0216 % -1b is only performed if any of required KOs do not have HMMs, aligned +0217 % FASTA files, clustered FASTA files and raw FASTA files in the defined +0218 % dataDir. This means that this step is skipped if the HMMs are +0219 % downloaded from BioMet Toolbox instead (see above). If not all files +0220 % exist it will try to find the KEGG database files in dataDir. +0221 % -2ab are only performed if any of required KOs do not have HMMs, +0222 % aligned FASTA files and clustered FASTA files in the defined dataDir. +0223 % This means that this step is skipped if the HMMs are downloaded from +0224 % BioMet Toolbox instead (see above). +0225 % -2c is only performed if any of required KOs do not have HMMs and +0226 % aligned FASTA files in the defined dataDir. This means that this step +0227 % is skipped if the HMMs are downloaded from BioMet Toolbox instead (see +0228 % above). +0229 % -2d is only performed if any of required KOs do not have HMMs exist in +0230 % the defined dataDir. This means that this step is skipped if the FASTA +0231 % files or HMMs are downloaded from BioMet Toolbox instead (see above). +0232 % -3a is performed for the required HMMs for which no corresponding .out +0233 % file exists in outDir. This is just a way to enable the function to be +0234 % run in parallel or to resume if interrupted. +0235 % -3b is always performed. +0236 % +0237 % NOTE: it is also possible to obtain draft model from KEGG without +0238 % providing protein FASTA file for the target organism. In such case the +0239 % organism three-four letter abbreviation set as 'organismID' must exist +0240 % in the local KEGG database. In such case, the program just fetches all +0241 % the reactions, which are associated with given 'organismID'. +0242 % +0243 % Usage: model=getKEGGModelForOrganism(organismID,fastaFile,dataDir,... +0244 % outDir,keepSpontaneous,keepUndefinedStoich,keepIncomplete,... +0245 % keepGeneral,cutOff,minScoreRatioKO,minScoreRatioG,maxPhylDist,... +0246 % nSequences,seqIdentity) +0247 +0248 if nargin<2 || isempty(fastaFile) +0249 fastaFile=[]; +0250 else +0251 fastaFile=char(fastaFile); +0252 end +0253 if nargin<3 +0254 dataDir=[]; +0255 else +0256 dataDir=char(dataDir); +0257 end +0258 if nargin<4 || isempty(outDir) +0259 outDir=tempdir; +0260 %Delete all *.out files if any exist +0261 delete(fullfile(outDir,'*.out')); +0262 else +0263 outDir=char(outDir); +0264 end +0265 if nargin<5 +0266 keepSpontaneous=true; +0267 end +0268 if nargin<6 +0269 keepUndefinedStoich=true; +0270 end +0271 if nargin<7 +0272 keepIncomplete=true; +0273 end +0274 if nargin<8 +0275 keepGeneral=false; +0276 end +0277 if nargin<9 +0278 cutOff=10^-50; +0279 end +0280 if nargin<10 +0281 minScoreRatioKO=0.3; +0282 end +0283 if nargin<11 +0284 minScoreRatioG=0.8; +0285 end +0286 if nargin<12 +0287 maxPhylDist=inf; +0288 %Include all sequences for each reaction 0289 end -0290 if nargin<12 -0291 maxPhylDist=inf; +0290 if nargin<13 +0291 nSequences=inf; 0292 %Include all sequences for each reaction 0293 end -0294 if nargin<13 -0295 nSequences=inf; -0296 %Include all sequences for each reaction -0297 end -0298 if nargin<14 -0299 seqIdentity=0.9; -0300 end -0301 -0302 if isempty(fastaFile) -0303 fprintf(['\n\n*** The model reconstruction from KEGG based on the annotation available for KEGG Species <strong>' organismID '</strong> ***\n\n']); -0304 else -0305 fprintf('\n\n*** The model reconstruction from KEGG based on the protein homology search against KEGG Orthology specific HMMs ***\n\n'); -0306 %Check if query fasta exists -0307 fastaFile=checkFileExistence(fastaFile,2); %Copy file to temp dir -0308 end -0309 -0310 %Run the external binaries multi-threaded to use all logical cores assigned -0311 %to MATLAB -0312 cores = evalc('feature(''numcores'')'); -0313 cores = strsplit(cores, 'MATLAB was assigned: '); -0314 cores = regexp(cores{2},'^\d*','match'); -0315 cores = cores{1}; -0316 -0317 %Get the directory for RAVEN Toolbox. -0318 ravenPath=findRAVENroot(); -0319 -0320 %Checking if dataDir is consistent. It must point to pre-trained HMMs set, -0321 %compatible with the the current RAVEN version. The user may have the -0322 %required zip file already in working directory or have it extracted. If -0323 %the zip file and directory is not here, it is downloaded from the cloud -0324 if ~isempty(dataDir) -0325 hmmOptions={'euk90_kegg100','prok90_kegg100'}; -0326 if ~endsWith(dataDir,hmmOptions) %Check if dataDir ends with any of the hmmOptions. -0327 %If not, then check whether the required folders exist anyway. -0328 if ~exist(fullfile(dataDir,'keggdb','genes.pep'),'file') && ... -0329 ~exist(fullfile(dataDir,'fasta'),'dir') && ... -0330 ~exist(fullfile(dataDir,'aligned'),'dir') && ... -0331 ~exist(fullfile(dataDir,'hmms'),'dir') -0332 error(['Pre-trained HMMs set is not recognised. If you want download RAVEN provided sets, it should match any of the following: ' strjoin(hmmOptions,' or ')]) -0333 end -0334 else -0335 if exist(dataDir,'dir') && exist(fullfile(dataDir,'hmms','K00844.hmm'),'file') -0336 fprintf(['NOTE: Found <strong>' dataDir '</strong> directory with pre-trained HMMs, it will therefore be used during reconstruction\n']); -0337 elseif ~exist(dataDir,'dir') && exist([dataDir,'.zip'],'file') -0338 fprintf('Extracting the HMMs archive file... '); -0339 unzip([dataDir,'.zip']); -0340 fprintf('COMPLETE\n'); -0341 else -0342 hmmIndex=strcmp(dataDir,hmmOptions); -0343 if ~any(hmmIndex) -0344 error(['Pre-trained HMMs are only provided with proteins clustered at 90% sequence identity (i.e. prok90_kegg100 and euk90_kegg100). ' ... -0345 'Use either of these datasets, or otherwise download the relevant sequence data from KEGG to train HMMs with your desired sequence identity']) -0346 else -0347 fprintf('Downloading the HMMs archive file... '); -0348 try -0349 websave([dataDir,'.zip'],['https://github.com/SysBioChalmers/RAVEN/releases/download/v2.6.0/',hmmOptions{hmmIndex},'.zip']); -0350 catch ME -0351 if strcmp(ME.identifier,'MATLAB:webservices:HTTP404StatusCodeError') -0352 error('Failed to download the HMMs archive file, the server returned a 404 error, try again later. If the problem persists please report it on the RAVEN GitHub Issues page: https://github.com/SysBioChalmers/RAVEN/issues') -0353 end -0354 end -0355 end -0356 -0357 fprintf('COMPLETE\n'); -0358 fprintf('Extracting the HMMs archive file... '); -0359 unzip([dataDir,'.zip']); -0360 fprintf('COMPLETE\n'); +0294 if nargin<14 +0295 seqIdentity=0.9; +0296 end +0297 +0298 if isempty(fastaFile) +0299 fprintf(['\n*** The model reconstruction from KEGG based on the annotation available for KEGG Species <strong>' organismID '</strong> ***\n\n']); +0300 else +0301 fprintf('\n*** The model reconstruction from KEGG based on the protein homology search against KEGG Orthology specific HMMs ***\n\n'); +0302 %Check if query fasta exists +0303 fastaFile=checkFileExistence(fastaFile,2); %Copy file to temp dir +0304 end +0305 +0306 %Run the external binaries multi-threaded to use all logical cores assigned +0307 %to MATLAB +0308 cores = evalc('feature(''numcores'')'); +0309 cores = strsplit(cores, 'MATLAB was assigned: '); +0310 cores = regexp(cores{2},'^\d*','match'); +0311 cores = cores{1}; +0312 +0313 %Get the directory for RAVEN Toolbox. +0314 ravenPath=findRAVENroot(); +0315 +0316 %Checking if dataDir is consistent. It must point to pre-trained HMMs set, +0317 %compatible with the the current RAVEN version. The user may have the +0318 %required zip file already in working directory or have it extracted. If +0319 %the zip file and directory is not here, it is downloaded from the cloud +0320 if ~isempty(dataDir) +0321 hmmOptions={'euk90_kegg102','prok90_kegg102'}; +0322 if ~endsWith(dataDir,hmmOptions) %Check if dataDir ends with any of the hmmOptions. +0323 %If not, then check whether the required folders exist anyway. +0324 if ~exist(fullfile(dataDir,'keggdb','genes.pep'),'file') && ... +0325 ~exist(fullfile(dataDir,'fasta'),'dir') && ... +0326 ~exist(fullfile(dataDir,'aligned'),'dir') && ... +0327 ~exist(fullfile(dataDir,'hmms'),'dir') +0328 error(['Pre-trained HMMs set is not recognised. If you want download RAVEN provided sets, it should match any of the following: ' strjoin(hmmOptions,' or ')]) +0329 end +0330 else +0331 if exist(dataDir,'dir') && exist(fullfile(dataDir,'hmms','K00844.hmm'),'file') +0332 fprintf(['NOTE: Found <strong>' dataDir '</strong> directory with pre-trained HMMs, it will therefore be used during reconstruction\n']); +0333 elseif ~exist(dataDir,'dir') && exist([dataDir,'.zip'],'file') +0334 fprintf('Extracting the HMMs archive file... '); +0335 unzip([dataDir,'.zip']); +0336 fprintf('COMPLETE\n'); +0337 else +0338 hmmIndex=strcmp(dataDir,hmmOptions); +0339 if ~any(hmmIndex) +0340 error(['Pre-trained HMMs are only provided with proteins clustered at 90% sequence identity (i.e. prok90_kegg102 and euk90_kegg102). ' ... +0341 'Use either of these datasets, or otherwise download the relevant sequence data from KEGG to train HMMs with your desired sequence identity']) +0342 else +0343 fprintf('Downloading the HMMs archive file... '); +0344 try +0345 websave([dataDir,'.zip'],['https://github.com/SysBioChalmers/RAVEN/releases/download/v2.7.4/',hmmOptions{hmmIndex},'.zip']); +0346 catch ME +0347 if strcmp(ME.identifier,'MATLAB:webservices:HTTP404StatusCodeError') +0348 error('Failed to download the HMMs archive file, the server returned a 404 error, try again later. If the problem persists please report it on the RAVEN GitHub Issues page: https://github.com/SysBioChalmers/RAVEN/issues') +0349 end +0350 end +0351 end +0352 +0353 fprintf('COMPLETE\n'); +0354 fprintf('Extracting the HMMs archive file... '); +0355 unzip([dataDir,'.zip']); +0356 fprintf('COMPLETE\n'); +0357 end +0358 %Check if HMMs are extracted +0359 if ~exist(fullfile(dataDir,'hmms','K00844.hmm'),'file') +0360 error(['The HMM files seem improperly extracted and not found in ',dataDir,'/hmms. Please remove ',dataDir,' folder and rerun getKEGGModelForOrganism']); 0361 end -0362 %Check if HMMs are extracted -0363 if ~exist(fullfile(dataDir,'hmms','K00844.hmm'),'file') -0364 error(['The HMM files seem improperly extracted and not found in ',dataDir,'/hmms. Please remove ',dataDir,' folder and rerun getKEGGModelForOrganism']); -0365 end -0366 end -0367 end -0368 -0369 %Check if the fasta-file contains '/' or'\'. If not then it's probably just -0370 %a file name. Expand to full path. -0371 if any(fastaFile) -0372 if ~any(strfind(fastaFile,'\')) && ~any(strfind(fastaFile,'/')) -0373 fastaFile=which(fastaFile); +0362 end +0363 end +0364 +0365 %Check if the fasta-file contains '/' or'\'. If not then it's probably just +0366 %a file name. Expand to full path. +0367 if any(fastaFile) +0368 if ~any(strfind(fastaFile,'\')) && ~any(strfind(fastaFile,'/')) +0369 fastaFile=which(fastaFile); +0370 end +0371 %Create the required sub-folders in dataDir if they dont exist +0372 if ~exist(fullfile(dataDir,'keggdb'),'dir') +0373 mkdir(dataDir,'keggdb'); 0374 end -0375 %Create the required sub-folders in dataDir if they dont exist -0376 if ~exist(fullfile(dataDir,'keggdb'),'dir') -0377 mkdir(dataDir,'keggdb'); -0378 end -0379 if ~exist(fullfile(dataDir,'fasta'),'dir') -0380 mkdir(dataDir,'fasta'); -0381 end -0382 if ~exist(fullfile(dataDir,'aligned'),'dir') -0383 mkdir(dataDir,'aligned'); -0384 end -0385 if ~exist(fullfile(dataDir,'hmms'),'dir') -0386 mkdir(dataDir,'hmms'); -0387 end -0388 if ~exist(outDir,'dir') -0389 mkdir(outDir); -0390 end -0391 end -0392 -0393 %First generate the full global KEGG model. Can be provided as input. -0394 %Otherwise, getModelFromKEGG is run. The dataDir must not be supplied as -0395 %there is also an internal RAVEN version available -0396 if nargin==15 -0397 model=globalModel.model; -0398 KOModel=globalModel.KOModel; -0399 elseif any(dataDir) -0400 [model, KOModel]=getModelFromKEGG(fullfile(dataDir,'keggdb'),keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); -0401 else -0402 [model, KOModel]=getModelFromKEGG([],keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); -0403 end -0404 model.id=organismID; -0405 model.c=zeros(numel(model.rxns),1); -0406 -0407 %If no FASTA file is supplied, then just remove all genes which are not for -0408 %the given organism ID -0409 if isempty(fastaFile) -0410 %Check if organismID can be found in KEGG species list or is -0411 %set to "eukaryotes" or "prokaryotes" -0412 phylDistsFull=getPhylDist(fullfile(dataDir,'keggdb'),true); -0413 if ~ismember(organismID,[phylDistsFull.ids 'eukaryotes' 'prokaryotes']) -0414 error('Provided organismID is incorrect. Only species abbreviations from KEGG Species List or "eukaryotes"/"prokaryotes" are allowed.'); -0415 end -0416 -0417 fprintf(['Pruning the model from <strong>non-' organismID '</strong> genes... ']); -0418 if ismember(organismID,{'eukaryotes','prokaryotes'}) -0419 phylDists=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1); -0420 if strcmp(organismID,'eukaryotes') -0421 proxyid='hsa'; -0422 %Use H. sapiens here -0423 else -0424 proxyid='eco'; -0425 %Use E. coli here -0426 end -0427 [~, phylDistId]=ismember(proxyid,phylDists.ids); -0428 idsToKeep=phylDists.ids(~isinf(phylDists.distMat(phylDistId,:))); -0429 taxIDs=cellfun(@(x) x{1},cellfun(@(x) strsplit(x,':'),model.genes,'UniformOutput',false),'UniformOutput',false); -0430 I=ismember(upper(taxIDs),upper(idsToKeep)); -0431 else -0432 %KEGG organism IDs may have three or four letters -0433 organismID=strcat(organismID,':'); -0434 %Add colon for accurate matching -0435 if length(organismID)==4 -0436 I=cellfun(@(x) strcmpi(x(1:4),organismID),model.genes); -0437 elseif length(organismID)==5 -0438 I=cellfun(@(x) strcmpi(x(1:5),organismID),model.genes); -0439 end -0440 end -0441 %Remove those genes -0442 model.genes=model.genes(I); -0443 model.rxnGeneMat=model.rxnGeneMat(:,I); -0444 fprintf('COMPLETE\n'); -0445 end -0446 -0447 %First remove all reactions without genes -0448 if keepSpontaneous==true -0449 fprintf('Removing non-spontaneous reactions without GPR rules... '); -0450 load(fullfile(ravenPath,'external','kegg','keggRxns.mat'),'isSpontaneous'); -0451 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); -0452 spontRxnsWithGenes=model.rxns(any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous)); -0453 else -0454 fprintf('Removing reactions without GPR rules... '); -0455 I=~any(model.rxnGeneMat,2); -0456 end -0457 model=removeReactions(model,I,true); -0458 fprintf('COMPLETE\n'); -0459 -0460 %Clean gene names -0461 fprintf('Fixing gene names in the model... '); -0462 %Get rid of the prefix organism id -0463 model.genes=regexprep(model.genes,'^\w+?:',''); -0464 fprintf('COMPLETE\n'); -0465 -0466 %If no FASTA file is supplied, then we are done here -0467 if isempty(fastaFile) -0468 %Create grRules -0469 fprintf('Constructing GPR associations and annotations for the model... '); -0470 model.grRules=cell(numel(model.rxns),1); -0471 model.grRules(:)={''}; -0472 %Add the gene associations as 'or' -0473 for i=1:numel(model.rxns) -0474 %Find the involved genes -0475 I=find(model.rxnGeneMat(i,:)); -0476 if any(I) -0477 model.grRules{i}=['(' model.genes{I(1)}]; -0478 for j=2:numel(I) -0479 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; -0480 end -0481 model.grRules{i}=[model.grRules{i} ')']; -0482 end -0483 end -0484 %Fix grRules and reconstruct rxnGeneMat -0485 [grRules,rxnGeneMat] = standardizeGrRules(model); %Give detailed output -0486 model.grRules = grRules; -0487 model.rxnGeneMat = rxnGeneMat; -0488 %Add geneMiriams, assuming that it follows the syntax -0489 %kegg.genes/organismID:geneName -0490 model.geneMiriams=''; -0491 for i=1:numel(model.genes) -0492 model.geneMiriams{i,1}.name{1,1}='kegg.genes'; -0493 model.geneMiriams{i,1}.value{1,1}=strcat(lower(organismID),model.genes{i,1}); -0494 end -0495 %Add the description to the reactions -0496 for i=1:numel(model.rxns) -0497 if ~isempty(model.rxnNotes{i}) -0498 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (without HMMs).',model.rxnNotes(i)); -0499 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); -0500 else -0501 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (without HMMs)'}; -0502 end -0503 end -0504 fprintf('COMPLETE\n\n'); -0505 fprintf('*** Model reconstruction complete ***\n'); -0506 return; -0507 end +0375 if ~exist(fullfile(dataDir,'fasta'),'dir') +0376 mkdir(dataDir,'fasta'); +0377 end +0378 if ~exist(fullfile(dataDir,'aligned'),'dir') +0379 mkdir(dataDir,'aligned'); +0380 end +0381 if ~exist(fullfile(dataDir,'hmms'),'dir') +0382 mkdir(dataDir,'hmms'); +0383 end +0384 if ~exist(outDir,'dir') +0385 mkdir(outDir); +0386 end +0387 end +0388 +0389 %First generate the full global KEGG model. Can be provided as input. +0390 %Otherwise, getModelFromKEGG is run. The dataDir must not be supplied as +0391 %there is also an internal RAVEN version available +0392 if nargin==15 +0393 model=globalModel.model; +0394 KOModel=globalModel.KOModel; +0395 elseif any(dataDir) +0396 [model, KOModel]=getModelFromKEGG(fullfile(dataDir,'keggdb'),keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); +0397 else +0398 [model, KOModel]=getModelFromKEGG([],keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); +0399 end +0400 model.id=organismID; +0401 model.c=zeros(numel(model.rxns),1); +0402 +0403 %If no FASTA file is supplied, then just remove all genes which are not for +0404 %the given organism ID +0405 if isempty(fastaFile) +0406 %Check if organismID can be found in KEGG species list or is +0407 %set to "eukaryotes" or "prokaryotes" +0408 phylDistsFull=getPhylDist(fullfile(dataDir,'keggdb'),true); +0409 if ~ismember(organismID,[phylDistsFull.ids 'eukaryotes' 'prokaryotes']) +0410 error('Provided organismID is incorrect. Only species abbreviations from KEGG Species List or "eukaryotes"/"prokaryotes" are allowed.'); +0411 end +0412 +0413 fprintf(['Pruning the model from <strong>non-' organismID '</strong> genes... ']); +0414 if ismember(organismID,{'eukaryotes','prokaryotes'}) +0415 phylDists=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1); +0416 if strcmp(organismID,'eukaryotes') +0417 proxyid='hsa'; +0418 %Use H. sapiens here +0419 else +0420 proxyid='eco'; +0421 %Use E. coli here +0422 end +0423 [~, phylDistId]=ismember(proxyid,phylDists.ids); +0424 idsToKeep=phylDists.ids(~isinf(phylDists.distMat(phylDistId,:))); +0425 taxIDs=cellfun(@(x) x{1},cellfun(@(x) strsplit(x,':'),model.genes,'UniformOutput',false),'UniformOutput',false); +0426 I=ismember(upper(taxIDs),upper(idsToKeep)); +0427 else +0428 %KEGG organism IDs may have three or four letters +0429 organismID=strcat(organismID,':'); +0430 %Add colon for accurate matching +0431 if length(organismID)==4 +0432 I=cellfun(@(x) strcmpi(x(1:4),organismID),model.genes); +0433 elseif length(organismID)==5 +0434 I=cellfun(@(x) strcmpi(x(1:5),organismID),model.genes); +0435 end +0436 end +0437 %Remove those genes +0438 model.genes=model.genes(I); +0439 model.rxnGeneMat=model.rxnGeneMat(:,I); +0440 fprintf('COMPLETE\n'); +0441 end +0442 +0443 %First remove all reactions without genes +0444 if keepSpontaneous==true +0445 fprintf('Removing non-spontaneous reactions without GPR rules... '); +0446 load(fullfile(ravenPath,'external','kegg','keggRxns.mat'),'isSpontaneous'); +0447 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); +0448 spontRxnsWithGenes=model.rxns(any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous)); +0449 else +0450 fprintf('Removing reactions without GPR rules... '); +0451 I=~any(model.rxnGeneMat,2); +0452 end +0453 model=removeReactions(model,I,true); +0454 fprintf('COMPLETE\n'); +0455 +0456 %Clean gene names +0457 fprintf('Fixing gene names in the model... '); +0458 %Get rid of the prefix organism id +0459 model.genes=regexprep(model.genes,'^\w+?:',''); +0460 fprintf('COMPLETE\n'); +0461 +0462 %If no FASTA file is supplied, then we are done here +0463 if isempty(fastaFile) +0464 %Create grRules +0465 fprintf('Constructing GPR associations and annotations for the model... '); +0466 model.grRules=cell(numel(model.rxns),1); +0467 model.grRules(:)={''}; +0468 %Add the gene associations as 'or' +0469 for i=1:numel(model.rxns) +0470 %Find the involved genes +0471 I=find(model.rxnGeneMat(i,:)); +0472 if any(I) +0473 model.grRules{i}=['(' model.genes{I(1)}]; +0474 for j=2:numel(I) +0475 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; +0476 end +0477 model.grRules{i}=[model.grRules{i} ')']; +0478 end +0479 end +0480 %Fix grRules and reconstruct rxnGeneMat +0481 [grRules,rxnGeneMat] = standardizeGrRules(model); %Give detailed output +0482 model.grRules = grRules; +0483 model.rxnGeneMat = rxnGeneMat; +0484 %Add geneMiriams, assuming that it follows the syntax +0485 %kegg.genes/organismID:geneName +0486 model.geneMiriams=''; +0487 for i=1:numel(model.genes) +0488 model.geneMiriams{i,1}.name{1,1}='kegg.genes'; +0489 model.geneMiriams{i,1}.value{1,1}=strcat(lower(organismID),model.genes{i,1}); +0490 end +0491 %Add the description to the reactions +0492 for i=1:numel(model.rxns) +0493 if ~isempty(model.rxnNotes{i}) +0494 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (without HMMs).',model.rxnNotes(i)); +0495 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); +0496 else +0497 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (without HMMs)'}; +0498 end +0499 end +0500 fprintf('COMPLETE\n\n'); +0501 fprintf('*** Model reconstruction complete ***\n'); +0502 return; +0503 end +0504 +0505 %Create a phylogenetic distance structure +0506 phylDistStruct=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1); +0507 [~, phylDistId]=ismember(model.id,phylDistStruct.ids); 0508 -0509 %Create a phylogenetic distance structure -0510 phylDistStruct=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1); -0511 [~, phylDistId]=ismember(model.id,phylDistStruct.ids); -0512 -0513 %Calculate the real maximal distance now. An abitary large number of 1000 -0514 %is used for the "all in kingdom" or "all sequences" options. This is a bit -0515 %inconvenient way to do it, but it is to make it fit with some older code -0516 if isinf(maxPhylDist) || maxPhylDist==-1 -0517 maxPhylDist=1000; -0518 end -0519 -0520 %Get the KO ids for which files have been generated. Maybe not the neatest -0521 %way.. -0522 fastaFiles=listFiles(fullfile(dataDir,'fasta','*.fa')); -0523 alignedFiles=listFiles(fullfile(dataDir,'aligned','*.fa')); -0524 alignedWorking=listFiles(fullfile(dataDir,'aligned','*.faw')); -0525 hmmFiles=listFiles(fullfile(dataDir,'hmms','*.hmm')); -0526 outFiles=listFiles(fullfile(outDir,'*.out')); -0527 -0528 %Check if multi-FASTA files should be generated. This should only be -0529 %performed if there are IDs in the KOModel structure that haven't been -0530 %parsed yet -0531 missingFASTA=setdiff(KOModel.rxns,[fastaFiles;alignedFiles;hmmFiles;outFiles]); -0532 -0533 if ~isempty(missingFASTA) -0534 if ~exist(fullfile(dataDir,'keggdb','genes.pep'),'file') -0535 EM=['The file ''genes.pep'' cannot be located at ' strrep(dataDir,'\','/') '/ and should be downloaded from the KEGG FTP.\n']; -0536 dispEM(EM); -0537 end -0538 %Only construct models for KOs which don't have files already -0539 fastaModel=removeReactions(KOModel,setdiff(KOModel.rxns,missingFASTA),true,true); -0540 %Permute the order of the KOs in the model so that constructMultiFasta -0541 %can be run on several processors at once -0542 fastaModel=permuteModel(fastaModel,randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(fastaModel.rxns)),'rxns'); -0543 constructMultiFasta(fastaModel,fullfile(dataDir,'keggdb','genes.pep'),fullfile(dataDir,'fasta')); -0544 else -0545 fprintf('Generating the KEGG Orthology specific multi-FASTA files... COMPLETE\n'); -0546 end -0547 -0548 if isunix -0549 if ismac -0550 binEnd='.mac'; -0551 else -0552 binEnd=''; -0553 end -0554 elseif ispc -0555 binEnd=''; -0556 else -0557 EM='Unknown OS, exiting.'; -0558 disp(EM); -0559 return -0560 end -0561 -0562 %Check if alignment of FASTA files should be performed -0563 missingAligned=setdiff(KOModel.rxns,[alignedFiles;hmmFiles;alignedWorking;outFiles]); -0564 if ~isempty(missingAligned) -0565 if seqIdentity==-1 -0566 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... 0%% complete'); -0567 else -0568 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... 0%% complete'); -0569 end -0570 missingAligned=missingAligned(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingAligned))); -0571 tmpFile=tempname; -0572 %On Windows, paths need to be translated to Unix before parsing it to WSL -0573 if ispc -0574 wslPath.tmpFile=getWSLpath(tmpFile); -0575 %mafft has problems writing to terminal (/dev/stderr) when running -0576 %on WSL via MATLAB, instead write and read progress file -0577 mafftOutput = tempname; -0578 wslPath.mafftOutput=getWSLpath(mafftOutput); -0579 wslPath.mafft=getWSLpath(fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat')); -0580 wslPath.cdhit=getWSLpath(fullfile(ravenPath,'software','cd-hit','cd-hit')); -0581 end -0582 -0583 for i=1:numel(missingAligned) -0584 %This is checked here because it could be that it is created by a -0585 %parallel process. The faw-files are saved as temporary files to -0586 %kept track of which files are being worked on -0587 if ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'file') &&... -0588 ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'file') -0589 %Check that the multi-FASTA file exists. It should do so since -0590 %we are saving empty files as well. Print a warning and -0591 %continue if not -0592 if ~exist(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']),'file') -0593 EM=['WARNING: The multi-FASTA file for ' missingAligned{i} ' does not exist']; -0594 dispEM(EM,false); -0595 continue; -0596 end -0597 -0598 %If the multi-FASTA file is empty then save an empty aligned -0599 %file and continue -0600 s=dir(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); -0601 if s.bytes<=0 -0602 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'w'); -0603 fclose(fid); -0604 continue; -0605 end -0606 -0607 %Create an empty file to prevent other threads to start to work -0608 %on the same alignment -0609 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'w'); -0610 fclose(fid); -0611 -0612 %First load the FASTA file, then select up to nSequences -0613 %sequences of the most closely related species, apply any -0614 %constraints from maxPhylDist, and save it as a temporary file, -0615 %and create the model from that -0616 -0617 fastaStruct=fastaread(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); -0618 phylDist=inf(numel(fastaStruct),1); -0619 for j=1:numel(fastaStruct) -0620 %Get the organism abbreviation -0621 index=strfind(fastaStruct(j).Header,':'); -0622 if any(index) -0623 abbrev=fastaStruct(j).Header(1:index(1)-1); -0624 [~, index]=ismember(abbrev,phylDistStruct.ids); -0625 if any(index) -0626 phylDist(j)=phylDistStruct.distMat(index(1),phylDistId); -0627 end -0628 end -0629 end -0630 -0631 %Inf means that it should not be included -0632 phylDist(phylDist>maxPhylDist)=[]; -0633 -0634 %Sort based on phylDist -0635 [~, order]=sort(phylDist); -0636 -0637 %Save the first nSequences hits to a temporary FASTA file -0638 if nSequences<=numel(fastaStruct) -0639 fastaStruct=fastaStruct(order(1:nSequences)); -0640 else -0641 fastaStruct=fastaStruct(order); -0642 end -0643 -0644 %Do the clustering and alignment if there are more than one -0645 %sequences, otherwise just save the sequence (or an empty file) -0646 if numel(fastaStruct)>1 -0647 if seqIdentity~=-1 -0648 cdhitInpCustom=tempname; -0649 fastawrite(cdhitInpCustom,fastaStruct); -0650 if seqIdentity<=1 && seqIdentity>0.7 -0651 nparam='5'; -0652 elseif seqIdentity>0.6 -0653 nparam='4'; -0654 elseif seqIdentity>0.5 -0655 nparam='3'; -0656 elseif seqIdentity>0.4 -0657 nparam='2'; -0658 else -0659 EM='The provided seqIdentity must be between 0 and 1\n'; -0660 dispEM(EM); -0661 end -0662 if ispc -0663 wslPath.cdhitInpCustom=getWSLpath(cdhitInpCustom); -0664 [status, output]=system(['wsl "' wslPath.cdhit '" -T "' num2str(cores) '" -i "' wslPath.cdhitInpCustom '" -o "' wslPath.tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); -0665 elseif ismac || isunix -0666 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); +0509 %Calculate the real maximal distance now. An abitary large number of 1000 +0510 %is used for the "all in kingdom" or "all sequences" options. This is a bit +0511 %inconvenient way to do it, but it is to make it fit with some older code +0512 if isinf(maxPhylDist) || maxPhylDist==-1 +0513 maxPhylDist=1000; +0514 end +0515 +0516 %Get the KO ids for which files have been generated. Maybe not the neatest +0517 %way.. +0518 fastaFiles=listFiles(fullfile(dataDir,'fasta','*.fa')); +0519 alignedFiles=listFiles(fullfile(dataDir,'aligned','*.fa')); +0520 alignedWorking=listFiles(fullfile(dataDir,'aligned','*.faw')); +0521 hmmFiles=listFiles(fullfile(dataDir,'hmms','*.hmm')); +0522 outFiles=listFiles(fullfile(outDir,'*.out')); +0523 +0524 %Check if multi-FASTA files should be generated. This should only be +0525 %performed if there are IDs in the KOModel structure that haven't been +0526 %parsed yet +0527 missingFASTA=setdiff(KOModel.rxns,[fastaFiles;alignedFiles;hmmFiles;outFiles]); +0528 +0529 if ~isempty(missingFASTA) +0530 if ~exist(fullfile(dataDir,'keggdb','genes.pep'),'file') +0531 EM=['The file ''genes.pep'' cannot be located at ' strrep(dataDir,'\','/') '/ and should be downloaded from the KEGG FTP.\n']; +0532 dispEM(EM); +0533 end +0534 %Only construct models for KOs which don't have files already +0535 fastaModel=removeReactions(KOModel,setdiff(KOModel.rxns,missingFASTA),true,true); +0536 %Permute the order of the KOs in the model so that constructMultiFasta +0537 %can be run on several processors at once +0538 fastaModel=permuteModel(fastaModel,randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(fastaModel.rxns)),'rxns'); +0539 constructMultiFasta(fastaModel,fullfile(dataDir,'keggdb','genes.pep'),fullfile(dataDir,'fasta')); +0540 else +0541 fprintf('Generating the KEGG Orthology specific multi-FASTA files... COMPLETE\n'); +0542 end +0543 +0544 if isunix +0545 if ismac +0546 binEnd='.mac'; +0547 else +0548 binEnd=''; +0549 end +0550 elseif ispc +0551 binEnd=''; +0552 else +0553 EM='Unknown OS, exiting.'; +0554 disp(EM); +0555 return +0556 end +0557 +0558 %Check if alignment of FASTA files should be performed +0559 missingAligned=setdiff(KOModel.rxns,[alignedFiles;hmmFiles;alignedWorking;outFiles]); +0560 if ~isempty(missingAligned) +0561 if seqIdentity==-1 +0562 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... 0%% complete'); +0563 else +0564 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... 0%% complete'); +0565 end +0566 missingAligned=missingAligned(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingAligned))); +0567 tmpFile=tempname; +0568 %On Windows, paths need to be translated to Unix before parsing it to WSL +0569 if ispc +0570 wslPath.tmpFile=getWSLpath(tmpFile); +0571 %mafft has problems writing to terminal (/dev/stderr) when running +0572 %on WSL via MATLAB, instead write and read progress file +0573 mafftOutput = tempname; +0574 wslPath.mafftOutput=getWSLpath(mafftOutput); +0575 wslPath.mafft=getWSLpath(fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat')); +0576 wslPath.cdhit=getWSLpath(fullfile(ravenPath,'software','cd-hit','cd-hit')); +0577 end +0578 +0579 for i=1:numel(missingAligned) +0580 %This is checked here because it could be that it is created by a +0581 %parallel process. The faw-files are saved as temporary files to +0582 %kept track of which files are being worked on +0583 if ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'file') &&... +0584 ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'file') +0585 %Check that the multi-FASTA file exists. It should do so since +0586 %we are saving empty files as well. Print a warning and +0587 %continue if not +0588 if ~exist(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']),'file') +0589 EM=['WARNING: The multi-FASTA file for ' missingAligned{i} ' does not exist']; +0590 dispEM(EM,false); +0591 continue; +0592 end +0593 +0594 %If the multi-FASTA file is empty then save an empty aligned +0595 %file and continue +0596 s=dir(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); +0597 if s.bytes<=0 +0598 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'w'); +0599 fclose(fid); +0600 continue; +0601 end +0602 +0603 %Create an empty file to prevent other threads to start to work +0604 %on the same alignment +0605 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'w'); +0606 fclose(fid); +0607 +0608 %First load the FASTA file, then select up to nSequences +0609 %sequences of the most closely related species, apply any +0610 %constraints from maxPhylDist, and save it as a temporary file, +0611 %and create the model from that +0612 +0613 fastaStruct=fastaread(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); +0614 phylDist=inf(numel(fastaStruct),1); +0615 for j=1:numel(fastaStruct) +0616 %Get the organism abbreviation +0617 index=strfind(fastaStruct(j).Header,':'); +0618 if any(index) +0619 abbrev=fastaStruct(j).Header(1:index(1)-1); +0620 [~, index]=ismember(abbrev,phylDistStruct.ids); +0621 if any(index) +0622 phylDist(j)=phylDistStruct.distMat(index(1),phylDistId); +0623 end +0624 end +0625 end +0626 +0627 %Inf means that it should not be included +0628 phylDist(phylDist>maxPhylDist)=[]; +0629 +0630 %Sort based on phylDist +0631 [~, order]=sort(phylDist); +0632 +0633 %Save the first nSequences hits to a temporary FASTA file +0634 if nSequences<=numel(fastaStruct) +0635 fastaStruct=fastaStruct(order(1:nSequences)); +0636 else +0637 fastaStruct=fastaStruct(order); +0638 end +0639 +0640 %Do the clustering and alignment if there are more than one +0641 %sequences, otherwise just save the sequence (or an empty file) +0642 if numel(fastaStruct)>1 +0643 if seqIdentity~=-1 +0644 cdhitInpCustom=tempname; +0645 fastawrite(cdhitInpCustom,fastaStruct); +0646 if seqIdentity<=1 && seqIdentity>0.7 +0647 nparam='5'; +0648 elseif seqIdentity>0.6 +0649 nparam='4'; +0650 elseif seqIdentity>0.5 +0651 nparam='3'; +0652 elseif seqIdentity>0.4 +0653 nparam='2'; +0654 else +0655 EM='The provided seqIdentity must be between 0 and 1\n'; +0656 dispEM(EM); +0657 end +0658 if ispc +0659 wslPath.cdhitInpCustom=getWSLpath(cdhitInpCustom); +0660 [status, output]=system(['wsl "' wslPath.cdhit '" -T "' num2str(cores) '" -i "' wslPath.cdhitInpCustom '" -o "' wslPath.tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); +0661 elseif ismac || isunix +0662 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); +0663 end +0664 if status~=0 +0665 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; +0666 dispEM(EM); 0667 end -0668 if status~=0 -0669 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; -0670 dispEM(EM); +0668 %Remove the old tempfile +0669 if exist(cdhitInpCustom, 'file') +0670 delete([cdhitInpCustom '*']); 0671 end -0672 %Remove the old tempfile -0673 if exist(cdhitInpCustom, 'file') -0674 delete([cdhitInpCustom '*']); -0675 end -0676 else -0677 %This means that CD-HIT should be skipped since -0678 %seqIdentity is equal to -1 -0679 fastawrite(tmpFile,fastaStruct); -0680 end -0681 %Do the alignment for this file -0682 if ismac -0683 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); -0684 elseif isunix -0685 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); -0686 elseif ispc -0687 wslPath.fawFile=getWSLpath(fullfile(dataDir,'aligned',[missingAligned{i} '.faw'])); -0688 [status, ~]=system(['wsl "' wslPath.mafft '" --auto --anysymbol --progress "' wslPath.mafftOutput '" --thread "' num2str(cores) '" --out "' wslPath.fawFile '" "' wslPath.tmpFile '"']); -0689 output=fileread(mafftOutput); -0690 delete(mafftOutput); -0691 end -0692 if status~=0 -0693 %It could be that alignment failed because only one -0694 %sequence was left after clustering. If that is the -0695 %case, then the clustered file is just copied as 'faw' -0696 %file -0697 if any(regexp(output,'Only 1 sequence found')) -0698 movefile(tmpFile,fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'f'); -0699 else -0700 EM=['Error when performing alignment of ' missingAligned{i} ':\n' output]; -0701 dispEM(EM); -0702 end +0672 else +0673 %This means that CD-HIT should be skipped since +0674 %seqIdentity is equal to -1 +0675 fastawrite(tmpFile,fastaStruct); +0676 end +0677 %Do the alignment for this file +0678 if ismac +0679 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); +0680 elseif isunix +0681 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); +0682 elseif ispc +0683 wslPath.fawFile=getWSLpath(fullfile(dataDir,'aligned',[missingAligned{i} '.faw'])); +0684 [status, ~]=system(['wsl "' wslPath.mafft '" --auto --anysymbol --progress "' wslPath.mafftOutput '" --thread "' num2str(cores) '" --out "' wslPath.fawFile '" "' wslPath.tmpFile '"']); +0685 output=fileread(mafftOutput); +0686 delete(mafftOutput); +0687 end +0688 if status~=0 +0689 %It could be that alignment failed because only one +0690 %sequence was left after clustering. If that is the +0691 %case, then the clustered file is just copied as 'faw' +0692 %file +0693 if any(regexp(output,'Only 1 sequence found')) +0694 movefile(tmpFile,fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'f'); +0695 else +0696 EM=['Error when performing alignment of ' missingAligned{i} ':\n' output]; +0697 dispEM(EM); +0698 end +0699 end +0700 %Remove the old tempfile +0701 if exist(tmpFile, 'file') +0702 delete([tmpFile '*']); 0703 end -0704 %Remove the old tempfile -0705 if exist(tmpFile, 'file') -0706 delete([tmpFile '*']); -0707 end -0708 else -0709 %If there is only one sequence then it's not possible to do -0710 %a multiple alignment. Just print the sequence instead. An -0711 %empty file was written previously so that doesn't have to -0712 %be dealt with -0713 if numel(fastaStruct)==1 -0714 warnState = warning; %Save the current warning state -0715 warning('off','Bioinfo:fastawrite:AppendToFile'); -0716 fastawrite(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fastaStruct); -0717 warning(warnState) %Reset warning state to previous settings -0718 end -0719 end -0720 %Move the temporary file to the real one -0721 movefile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'f'); -0722 -0723 %Print the progress every 25 files -0724 if rem(i-1,25) == 0 -0725 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'aligned','*.fa')))/numel(KOModel.rxns))); -0726 progress=pad(progress,3,'left'); -0727 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); -0728 end -0729 end -0730 end -0731 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); -0732 else -0733 if seqIdentity==-1 -0734 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); -0735 else -0736 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); -0737 end -0738 end -0739 -0740 %Check if training of Hidden Markov models should be performed -0741 missingHMMs=setdiff(KOModel.rxns,[hmmFiles;outFiles]); -0742 if ~isempty(missingHMMs) -0743 fprintf('Training the KEGG Orthology specific HMMs... 0%% complete'); -0744 missingHMMs=missingHMMs(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingHMMs))); -0745 %Train models for all missing KOs -0746 for i=1:numel(missingHMMs) -0747 %This is checked here because it could be that it is created by a -0748 %parallel process -0749 if ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'file') && ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'file') -0750 %Check that the aligned FASTA file exists. It could be that it -0751 %is still being worked on by some other instance of the program -0752 %(the .faw file should then exist). This should not happen on a -0753 %single computer. It doesn't throw an error, because it should -0754 %finalize the ones it can -0755 if ~exist(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']),'file') -0756 EM=['The aligned FASTA file for ' missingHMMs{i} ' does not exist']; -0757 dispEM(EM,false); -0758 continue; -0759 end -0760 -0761 %If the multi-FASTA file is empty then save an empty aligned -0762 %file and continue -0763 s=dir(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa'])); -0764 if s.bytes<=0 -0765 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'w'); -0766 fclose(fid); -0767 continue; -0768 end -0769 %Create a temporary file to indicate that it is working on the -0770 %KO. This is because hmmbuild cannot overwrite existing files -0771 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'w'); -0772 fclose(fid); -0773 -0774 %Create HMM -0775 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']) '" "' fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']) '"']); -0776 if status~=0 -0777 EM=['Error when training HMM for ' missingHMMs{i} ':\n' output]; -0778 dispEM(EM); -0779 end -0780 -0781 %Delete the temporary file -0782 delete(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw'])); -0783 -0784 %Print the progress every 25 files -0785 if rem(i-1,25) == 0 -0786 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'hmms','*.hmm')))/numel(KOModel.rxns))); -0787 progress=pad(progress,3,'left'); -0788 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); -0789 end -0790 end -0791 end -0792 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); -0793 else -0794 fprintf('Training the KEGG Orthology specific HMMs... COMPLETE\n'); -0795 end -0796 -0797 %Check which new .out files that should be generated. Check if training of -0798 %Hidden Markov models should be performed -0799 missingOUT=setdiff(KOModel.rxns,outFiles); -0800 if ~isempty(missingOUT) -0801 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... 0%% complete'); -0802 missingOUT=missingOUT(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingOUT))); -0803 for i=1:numel(missingOUT) -0804 %This is checked here because it could be that it is created by a -0805 %parallel process -0806 if ~exist(fullfile(outDir,[missingOUT{i} '.out']),'file') -0807 %Check that the HMM file exists. It should do so since %we are -0808 %saving empty files as well. Print a warning and continue if -0809 %not -0810 if ~exist(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']),'file') -0811 EM=['The HMM file for ' missingOUT{i} ' does not exist']; -0812 dispEM(EM,false); -0813 continue; -0814 end -0815 -0816 %Save an empty file to prevent several threads working on the -0817 %same file -0818 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); -0819 fclose(fid); -0820 -0821 %If the HMM file is empty then save an out file and continue -0822 s=dir(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm'])); -0823 if s.bytes<=0 -0824 continue; -0825 end -0826 -0827 %Check each gene in the input file against this model -0828 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']) '" "' fastaFile '"']); -0829 if status~=0 -0830 EM=['Error when querying HMM for ' missingOUT{i} ':\n' output]; -0831 dispEM(EM); -0832 end -0833 -0834 %Save the output to a file -0835 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); -0836 fwrite(fid,output); -0837 fclose(fid); -0838 -0839 %Print the progress every 25 files -0840 if rem(i-1,25) == 0 -0841 progress=num2str(floor(100*numel(listFiles(fullfile(outDir,'*.out')))/numel(KOModel.rxns))); -0842 progress=pad(progress,3,'left'); -0843 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); -0844 end -0845 end -0846 end -0847 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); -0848 else -0849 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... COMPLETE\n'); -0850 end -0851 -0852 -0853 %***Begin retrieving the output and putting together the resulting model -0854 -0855 fprintf('Parsing the HMM search results... '); -0856 %Retrieve matched genes from the HMMs -0857 koGeneMat=zeros(numel(KOModel.rxns),3000); %Make room for 3000 genes -0858 genes=cell(3000,1); -0859 %Store the best score for a gene in a hash list (since it will be searching -0860 %many times) -0861 hTable = java.util.Hashtable; -0862 -0863 geneCounter=0; -0864 for i=1:numel(KOModel.rxns) -0865 if exist(fullfile(outDir,[KOModel.rxns{i} '.out']), 'file') -0866 fid=fopen(fullfile(outDir,[KOModel.rxns{i} '.out']),'r'); -0867 beginMatches=false; -0868 while 1 -0869 %Get the next line -0870 tline = fgetl(fid); -0871 -0872 %Abort at end of file -0873 if ~ischar(tline) +0704 else +0705 %If there is only one sequence then it's not possible to do +0706 %a multiple alignment. Just print the sequence instead. An +0707 %empty file was written previously so that doesn't have to +0708 %be dealt with +0709 if numel(fastaStruct)==1 +0710 warnState = warning; %Save the current warning state +0711 warning('off','Bioinfo:fastawrite:AppendToFile'); +0712 fastawrite(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fastaStruct); +0713 warning(warnState) %Reset warning state to previous settings +0714 end +0715 end +0716 %Move the temporary file to the real one +0717 movefile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'f'); +0718 +0719 %Print the progress every 25 files +0720 if rem(i-1,25) == 0 +0721 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'aligned','*.fa')))/numel(KOModel.rxns))); +0722 progress=pad(progress,3,'left'); +0723 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0724 end +0725 end +0726 end +0727 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0728 else +0729 if seqIdentity==-1 +0730 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); +0731 else +0732 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); +0733 end +0734 end +0735 +0736 %Check if training of Hidden Markov models should be performed +0737 missingHMMs=setdiff(KOModel.rxns,[hmmFiles;outFiles]); +0738 if ~isempty(missingHMMs) +0739 fprintf('Training the KEGG Orthology specific HMMs... 0%% complete'); +0740 missingHMMs=missingHMMs(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingHMMs))); +0741 %Train models for all missing KOs +0742 for i=1:numel(missingHMMs) +0743 %This is checked here because it could be that it is created by a +0744 %parallel process +0745 if ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'file') && ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'file') +0746 %Check that the aligned FASTA file exists. It could be that it +0747 %is still being worked on by some other instance of the program +0748 %(the .faw file should then exist). This should not happen on a +0749 %single computer. It doesn't throw an error, because it should +0750 %finalize the ones it can +0751 if ~exist(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']),'file') +0752 EM=['The aligned FASTA file for ' missingHMMs{i} ' does not exist']; +0753 dispEM(EM,false); +0754 continue; +0755 end +0756 +0757 %If the multi-FASTA file is empty then save an empty aligned +0758 %file and continue +0759 s=dir(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa'])); +0760 if s.bytes<=0 +0761 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'w'); +0762 fclose(fid); +0763 continue; +0764 end +0765 %Create a temporary file to indicate that it is working on the +0766 %KO. This is because hmmbuild cannot overwrite existing files +0767 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'w'); +0768 fclose(fid); +0769 +0770 %Create HMM +0771 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']) '" "' fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']) '"']); +0772 if status~=0 +0773 EM=['Error when training HMM for ' missingHMMs{i} ':\n' output]; +0774 dispEM(EM); +0775 end +0776 +0777 %Delete the temporary file +0778 delete(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw'])); +0779 +0780 %Print the progress every 25 files +0781 if rem(i-1,25) == 0 +0782 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'hmms','*.hmm')))/numel(KOModel.rxns))); +0783 progress=pad(progress,3,'left'); +0784 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0785 end +0786 end +0787 end +0788 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0789 else +0790 fprintf('Training the KEGG Orthology specific HMMs... COMPLETE\n'); +0791 end +0792 +0793 %Check which new .out files that should be generated. Check if training of +0794 %Hidden Markov models should be performed +0795 missingOUT=setdiff(KOModel.rxns,outFiles); +0796 if ~isempty(missingOUT) +0797 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... 0%% complete'); +0798 missingOUT=missingOUT(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingOUT))); +0799 for i=1:numel(missingOUT) +0800 %This is checked here because it could be that it is created by a +0801 %parallel process +0802 if ~exist(fullfile(outDir,[missingOUT{i} '.out']),'file') +0803 %Check that the HMM file exists. It should do so since %we are +0804 %saving empty files as well. Print a warning and continue if +0805 %not +0806 if ~exist(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']),'file') +0807 EM=['The HMM file for ' missingOUT{i} ' does not exist']; +0808 dispEM(EM,false); +0809 continue; +0810 end +0811 +0812 %Save an empty file to prevent several threads working on the +0813 %same file +0814 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); +0815 fclose(fid); +0816 +0817 %If the HMM file is empty then save an out file and continue +0818 s=dir(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm'])); +0819 if s.bytes<=0 +0820 continue; +0821 end +0822 +0823 %Check each gene in the input file against this model +0824 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']) '" "' fastaFile '"']); +0825 if status~=0 +0826 EM=['Error when querying HMM for ' missingOUT{i} ':\n' output]; +0827 dispEM(EM); +0828 end +0829 +0830 %Save the output to a file +0831 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); +0832 fwrite(fid,output); +0833 fclose(fid); +0834 +0835 %Print the progress every 25 files +0836 if rem(i-1,25) == 0 +0837 progress=num2str(floor(100*numel(listFiles(fullfile(outDir,'*.out')))/numel(KOModel.rxns))); +0838 progress=pad(progress,3,'left'); +0839 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0840 end +0841 end +0842 end +0843 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0844 else +0845 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... COMPLETE\n'); +0846 end +0847 +0848 +0849 %***Begin retrieving the output and putting together the resulting model +0850 +0851 fprintf('Parsing the HMM search results... '); +0852 %Retrieve matched genes from the HMMs +0853 koGeneMat=zeros(numel(KOModel.rxns),3000); %Make room for 3000 genes +0854 genes=cell(3000,1); +0855 %Store the best score for a gene in a hash list (since it will be searching +0856 %many times) +0857 hTable = java.util.Hashtable; +0858 +0859 geneCounter=0; +0860 for i=1:numel(KOModel.rxns) +0861 if exist(fullfile(outDir,[KOModel.rxns{i} '.out']), 'file') +0862 fid=fopen(fullfile(outDir,[KOModel.rxns{i} '.out']),'r'); +0863 beginMatches=false; +0864 while 1 +0865 %Get the next line +0866 tline = fgetl(fid); +0867 +0868 %Abort at end of file +0869 if ~ischar(tline) +0870 break; +0871 end +0872 +0873 if and(beginMatches,strcmp(tline,' ------ inclusion threshold ------')) 0874 break; 0875 end 0876 -0877 if and(beginMatches,strcmp(tline,' ------ inclusion threshold ------')) -0878 break; -0879 end -0880 -0881 if beginMatches==false -0882 %This is how the listing of matches begins -0883 if any(strfind(tline,'E-value ')) -0884 %Read one more line that is only padding -0885 tline = fgetl(fid); -0886 beginMatches=true; -0887 end -0888 else -0889 %If matches should be read -0890 if ~strcmp(tline,' [No hits detected that satisfy reporting thresholds]') && ~isempty(tline) -0891 elements=regexp(tline,' ','split'); -0892 elements=elements(cellfun(@any,elements)); -0893 -0894 %Check if the match is below the treshhold -0895 score=str2double(elements{1}); -0896 gene=elements{9}; -0897 if score<=cutOff -0898 %If the score is exactly 0, change it to a very -0899 %small value to avoid NaN -0900 if score==0 -0901 score=10^-250; -0902 end -0903 %Check if the gene is added already and, is so, get -0904 %the best score for it -0905 I=hTable.get(gene); -0906 if any(I) -0907 koGeneMat(i,I)=score; -0908 else -0909 geneCounter=geneCounter+1; -0910 %The gene was not present yet so add it -0911 hTable.put(gene,geneCounter); -0912 genes{geneCounter}=gene; -0913 koGeneMat(i,geneCounter)=score; -0914 end -0915 end -0916 else -0917 break; -0918 end -0919 end -0920 end -0921 fclose(fid); -0922 end -0923 end -0924 fprintf('COMPLETE\n'); -0925 -0926 fprintf('Removing gene, KEGG Orthology associations below minScoreRatioKO, minScoreRatioG... '); -0927 koGeneMat=koGeneMat(:,1:geneCounter); -0928 -0929 %Remove the genes for each KO that are below minScoreRatioKO. -0930 for i=1:size(koGeneMat,1) -0931 J=find(koGeneMat(i,:)); -0932 if any(J) -0933 koGeneMat(i,J(log(koGeneMat(i,J))/log(min(koGeneMat(i,J)))<minScoreRatioKO))=0; -0934 end -0935 end -0936 -0937 %Remove the KOs for each gene that are below minScoreRatioG -0938 for i=1:size(koGeneMat,2) -0939 J=find(koGeneMat(:,i)); -0940 if any(J) -0941 koGeneMat(J(log(koGeneMat(J,i))/log(min(koGeneMat(J,i)))<minScoreRatioG),i)=0; -0942 end -0943 end -0944 fprintf('COMPLETE\n'); -0945 -0946 fprintf('Adding gene annotations to the model... '); -0947 %Create the new model -0948 model.genes=genes(1:geneCounter); -0949 model.grRules=cell(numel(model.rxns),1); -0950 model.grRules(:)={''}; -0951 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes)); -0952 -0953 %Loop through the reactions and add the corresponding genes -0954 for i=1:numel(model.rxns) -0955 if isstruct(model.rxnMiriams{i}) -0956 %Get all KOs -0957 I=find(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology')); -0958 KOs=model.rxnMiriams{i}.value(I); -0959 %Find the KOs and the corresponding genes -0960 J=ismember(KOModel.rxns,KOs); -0961 [~, K]=find(koGeneMat(J,:)); -0962 -0963 if any(K) -0964 model.rxnGeneMat(i,K)=1; -0965 %Also delete KOs for which no genes were found. If no genes at -0966 %all were matched to the reaction it will be deleted later -0967 L=sum(koGeneMat(J,:),2)==0; -0968 model.rxnMiriams{i}.value(I(L))=[]; -0969 model.rxnMiriams{i}.name(I(L))=[]; -0970 end -0971 end -0972 end -0973 fprintf('COMPLETE\n'); -0974 -0975 %Find and delete all reactions without genes. This also removes genes that -0976 %are not used (which could happen because minScoreRatioG and -0977 %minScoreRatioKO). If keepSpontaneous==true, the spontaneous reactions -0978 %without genes are kept in the model. Spontaneous reactions with original -0979 %gene associations are treated in the same way, like the rest of the -0980 %reactions - if gene associations were removed during HMM search, such -0981 %reactions are deleted from the model -0982 if keepSpontaneous==true -0983 %Not the most comprise way to delete reactions without genes, but this -0984 %makes the code easier to understand. Firstly the non-spontaneous -0985 %reactions without genes are removed. After that, the second deletion -0986 %step removes spontaneous reactions, which had gene associations before -0987 %HMM search, but no longer have after it -0988 fprintf('Removing non-spontaneous reactions which after HMM search no longer have GPR rules... '); -0989 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); -0990 model=removeReactions(model,I,true,true); -0991 I=~any(model.rxnGeneMat,2)&ismember(model.rxns,spontRxnsWithGenes); -0992 model=removeReactions(model,I,true,true); -0993 else -0994 %Just simply check for any new reactions without genes and remove -0995 %it -0996 fprintf('Removing reactions which after HMM search no longer have GPR rules... '); -0997 I=~any(model.rxnGeneMat,2); -0998 model=removeReactions(model,I,true,true); -0999 end -1000 fprintf('COMPLETE\n'); -1001 -1002 fprintf('Constructing GPR rules and finalizing the model... '); -1003 %Add the gene associations as 'or' -1004 for i=1:numel(model.rxns) -1005 %Find the involved genes -1006 I=find(model.rxnGeneMat(i,:)); -1007 if any(I) -1008 model.grRules{i}=['(' model.genes{I(1)}]; -1009 for j=2:numel(I) -1010 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; -1011 end -1012 model.grRules{i}=[model.grRules{i} ')']; -1013 end -1014 end -1015 -1016 %Fix grRules and reconstruct rxnGeneMat -1017 [grRules,rxnGeneMat] = standardizeGrRules(model,false); %Give detailed output -1018 model.grRules = grRules; -1019 model.rxnGeneMat = rxnGeneMat; -1020 -1021 %Add the description to the reactions -1022 for i=1:numel(model.rxns) -1023 if ~isempty(model.rxnNotes{i}) -1024 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (using HMMs).',model.rxnNotes(i)); -1025 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); -1026 else -1027 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (using HMMs)'}; -1028 end +0877 if beginMatches==false +0878 %This is how the listing of matches begins +0879 if any(strfind(tline,'E-value ')) +0880 %Read one more line that is only padding +0881 tline = fgetl(fid); +0882 beginMatches=true; +0883 end +0884 else +0885 %If matches should be read +0886 if ~strcmp(tline,' [No hits detected that satisfy reporting thresholds]') && ~isempty(tline) +0887 elements=regexp(tline,' ','split'); +0888 elements=elements(cellfun(@any,elements)); +0889 +0890 %Check if the match is below the treshhold +0891 score=str2double(elements{1}); +0892 gene=elements{9}; +0893 if score<=cutOff +0894 %If the score is exactly 0, change it to a very +0895 %small value to avoid NaN +0896 if score==0 +0897 score=10^-250; +0898 end +0899 %Check if the gene is added already and, is so, get +0900 %the best score for it +0901 I=hTable.get(gene); +0902 if any(I) +0903 koGeneMat(i,I)=score; +0904 else +0905 geneCounter=geneCounter+1; +0906 %The gene was not present yet so add it +0907 hTable.put(gene,geneCounter); +0908 genes{geneCounter}=gene; +0909 koGeneMat(i,geneCounter)=score; +0910 end +0911 end +0912 else +0913 break; +0914 end +0915 end +0916 end +0917 fclose(fid); +0918 end +0919 end +0920 fprintf('COMPLETE\n'); +0921 +0922 fprintf('Removing gene, KEGG Orthology associations below minScoreRatioKO, minScoreRatioG... '); +0923 koGeneMat=koGeneMat(:,1:geneCounter); +0924 +0925 %Remove the genes for each KO that are below minScoreRatioKO. +0926 for i=1:size(koGeneMat,1) +0927 J=find(koGeneMat(i,:)); +0928 if any(J) +0929 koGeneMat(i,J(log(koGeneMat(i,J))/log(min(koGeneMat(i,J)))<minScoreRatioKO))=0; +0930 end +0931 end +0932 +0933 %Remove the KOs for each gene that are below minScoreRatioG +0934 for i=1:size(koGeneMat,2) +0935 J=find(koGeneMat(:,i)); +0936 if any(J) +0937 koGeneMat(J(log(koGeneMat(J,i))/log(min(koGeneMat(J,i)))<minScoreRatioG),i)=0; +0938 end +0939 end +0940 fprintf('COMPLETE\n'); +0941 +0942 fprintf('Adding gene annotations to the model... '); +0943 %Create the new model +0944 model.genes=genes(1:geneCounter); +0945 model.grRules=cell(numel(model.rxns),1); +0946 model.grRules(:)={''}; +0947 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes)); +0948 +0949 %Loop through the reactions and add the corresponding genes +0950 for i=1:numel(model.rxns) +0951 if isstruct(model.rxnMiriams{i}) +0952 %Get all KOs +0953 I=find(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology')); +0954 KOs=model.rxnMiriams{i}.value(I); +0955 %Find the KOs and the corresponding genes +0956 J=ismember(KOModel.rxns,KOs); +0957 [~, K]=find(koGeneMat(J,:)); +0958 +0959 if any(K) +0960 model.rxnGeneMat(i,K)=1; +0961 %Also delete KOs for which no genes were found. If no genes at +0962 %all were matched to the reaction it will be deleted later +0963 L=sum(koGeneMat(J,:),2)==0; +0964 model.rxnMiriams{i}.value(I(L))=[]; +0965 model.rxnMiriams{i}.name(I(L))=[]; +0966 end +0967 end +0968 end +0969 fprintf('COMPLETE\n'); +0970 +0971 %Find and delete all reactions without genes. This also removes genes that +0972 %are not used (which could happen because minScoreRatioG and +0973 %minScoreRatioKO). If keepSpontaneous==true, the spontaneous reactions +0974 %without genes are kept in the model. Spontaneous reactions with original +0975 %gene associations are treated in the same way, like the rest of the +0976 %reactions - if gene associations were removed during HMM search, such +0977 %reactions are deleted from the model +0978 if keepSpontaneous==true +0979 %Not the most comprise way to delete reactions without genes, but this +0980 %makes the code easier to understand. Firstly the non-spontaneous +0981 %reactions without genes are removed. After that, the second deletion +0982 %step removes spontaneous reactions, which had gene associations before +0983 %HMM search, but no longer have after it +0984 fprintf('Removing non-spontaneous reactions which after HMM search no longer have GPR rules... '); +0985 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); +0986 model=removeReactions(model,I,true,true); +0987 I=~any(model.rxnGeneMat,2)&ismember(model.rxns,spontRxnsWithGenes); +0988 model=removeReactions(model,I,true,true); +0989 else +0990 %Just simply check for any new reactions without genes and remove +0991 %it +0992 fprintf('Removing reactions which after HMM search no longer have GPR rules... '); +0993 I=~any(model.rxnGeneMat,2); +0994 model=removeReactions(model,I,true,true); +0995 end +0996 fprintf('COMPLETE\n'); +0997 +0998 fprintf('Constructing GPR rules and finalizing the model... '); +0999 %Add the gene associations as 'or' +1000 for i=1:numel(model.rxns) +1001 %Find the involved genes +1002 I=find(model.rxnGeneMat(i,:)); +1003 if any(I) +1004 model.grRules{i}=['(' model.genes{I(1)}]; +1005 for j=2:numel(I) +1006 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; +1007 end +1008 model.grRules{i}=[model.grRules{i} ')']; +1009 end +1010 end +1011 +1012 %Fix grRules and reconstruct rxnGeneMat +1013 [grRules,rxnGeneMat] = standardizeGrRules(model,false); %Give detailed output +1014 model.grRules = grRules; +1015 model.rxnGeneMat = rxnGeneMat; +1016 +1017 %Add the description to the reactions +1018 for i=1:numel(model.rxns) +1019 if ~isempty(model.rxnNotes{i}) +1020 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (using HMMs).',model.rxnNotes(i)); +1021 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); +1022 else +1023 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (using HMMs)'}; +1024 end +1025 end +1026 %Remove the temp fasta file +1027 delete(fastaFile) +1028 fprintf('COMPLETE\n\n*** Model reconstruction complete ***\n'); 1029 end -1030 %Remove the temp fasta file -1031 delete(fastaFile) -1032 fprintf('COMPLETE\n\n*** Model reconstruction complete ***\n'); -1033 end -1034 -1035 function files=listFiles(directory) -1036 %Supporter function to list the files in a directory and return them as a -1037 %cell array -1038 temp=dir(directory); -1039 files=cell(numel(temp),1); -1040 for i=1:numel(temp) -1041 files{i}=temp(i,1).name; -1042 end -1043 files=strrep(files,'.fa',''); -1044 files=strrep(files,'.hmm',''); -1045 files=strrep(files,'.out',''); -1046 files=strrep(files,'.faw',''); -1047 end +1030 +1031 function files=listFiles(directory) +1032 %Supporter function to list the files in a directory and return them as a +1033 %cell array +1034 temp=dir(directory); +1035 files=cell(numel(temp),1); +1036 for i=1:numel(temp) +1037 files{i}=temp(i,1).name; +1038 end +1039 files=strrep(files,'.fa',''); +1040 files=strrep(files,'.hmm',''); +1041 files=strrep(files,'.out',''); +1042 files=strrep(files,'.faw',''); +1043 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/external/kegg/getMetsFromKEGG.html b/doc/external/kegg/getMetsFromKEGG.html index 659d38ef..d2be217b 100644 --- a/doc/external/kegg/getMetsFromKEGG.html +++ b/doc/external/kegg/getMetsFromKEGG.html @@ -188,11 +188,11 @@

SOURCE CODE ^'KEGG'; 0085 model.name='Automatically generated from KEGG database'; 0086 -0087 %Preallocate memory for 30000 metabolites -0088 model.mets=cell(30000,1); -0089 model.metNames=cell(30000,1); -0090 model.metFormulas=cell(30000,1); -0091 model.metMiriams=cell(30000,1); +0087 %Preallocate memory for 50000 metabolites +0088 model.mets=cell(50000,1); +0089 model.metNames=cell(50000,1); +0090 model.metFormulas=cell(50000,1); +0091 model.metMiriams=cell(50000,1); 0092 0093 %First load information on metabolite ID, metabolite name, 0094 %composition, and ChEBI diff --git a/doc/external/kegg/getPhylDist.html b/doc/external/kegg/getPhylDist.html index 71e5d44f..032424ed 100644 --- a/doc/external/kegg/getPhylDist.html +++ b/doc/external/kegg/getPhylDist.html @@ -106,7 +106,7 @@

SOURCE CODE ^'The file ''taxonomy'' cannot be located at ' strrep(keggPath,'\','/') '/ and should be downloaded from the KEGG FTP.\n']); 0044 dispEM(EM); 0045 else -0046 fprintf(['Generating the KEGG phylogenetic distance matrix from ' fullfile(keggPath,'taxonomy') '... ']); +0046 fprintf('Generating keggPhylDist.mat file... '); 0047 %Open the file that describes the naming of the species 0048 fid = fopen(fullfile(keggPath,'taxonomy'), 'r'); 0049 @@ -160,46 +160,47 @@

SOURCE CODE ^end 0098 end 0099 end -0100 -0101 %Generate a distance matrix (very straight forward here, not neat) -0102 phylDistStruct.distMat=zeros(numel(phylDistStruct.ids)); -0103 for i=1:numel(phylDistStruct.ids) -0104 for j=1:numel(phylDistStruct.ids) -0105 if onlyInKingdom==true -0106 if ~strcmp(orgCat{i}(1),orgCat{j}(1)) -0107 phylDistStruct.distMat(i,j)=Inf; -0108 continue; -0109 end -0110 end -0111 %Calculate the distance between then -0112 dist=numel(orgCat{i})-numel(orgCat{j}); -0113 if dist>0 -0114 aCat=orgCat{i}(1:end-dist); -0115 else -0116 aCat=orgCat{i}; -0117 end -0118 if dist<0 -0119 bCat=orgCat{j}(1:end+dist); -0120 else -0121 bCat=orgCat{j}; -0122 end -0123 -0124 %Loop through the categories and stop when they are the -0125 %same -0126 for k=numel(aCat):-1:1 -0127 if strcmp(aCat{k},bCat{k}) -0128 break; -0129 end -0130 end -0131 phylDistStruct.distMat(i,j)=dist+numel(aCat)-k; -0132 end -0133 end -0134 %Save the structure -0135 save(distFile,'phylDistStruct'); -0136 fprintf('COMPLETE\n'); -0137 end -0138 end -0139 end +0100 %Generate a distance matrix (very straight forward here, not neat) +0101 phylDistStruct.distMat=zeros(numel(phylDistStruct.ids)); +0102 phylDistStructOnlyInKingdom.distMat=zeros(numel(phylDistStruct.ids)); +0103 phylDistStructOnlyInKingdom.ids=phylDistStruct.ids; +0104 for i=1:numel(phylDistStruct.ids) +0105 for j=1:numel(phylDistStruct.ids) +0106 if ~strcmp(orgCat{i}(1),orgCat{j}(1)) +0107 phylDistStructOnlyInKingdom.distMat(i,j)=Inf; +0108 end +0109 %Calculate the distance between then +0110 dist=numel(orgCat{i})-numel(orgCat{j}); +0111 if dist>0 +0112 aCat=orgCat{i}(1:end-dist); +0113 else +0114 aCat=orgCat{i}; +0115 end +0116 if dist<0 +0117 bCat=orgCat{j}(1:end+dist); +0118 else +0119 bCat=orgCat{j}; +0120 end +0121 +0122 %Loop through the categories and stop when they are the +0123 %same +0124 for k=numel(aCat):-1:1 +0125 if strcmp(aCat{k},bCat{k}) +0126 break; +0127 end +0128 end +0129 phylDistStruct.distMat(i,j)=dist+numel(aCat)-k; +0130 end +0131 end +0132 %Save the structure +0133 save(distFile,'phylDistStruct','phylDistStructOnlyInKingdom'); +0134 fprintf('COMPLETE\n'); +0135 end +0136 end +0137 if onlyInKingdom==true +0138 phylDistStruct=phylDistStructOnlyInKingdom; +0139 end +0140 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/external/kegg/getRxnsFromKEGG.html b/doc/external/kegg/getRxnsFromKEGG.html index 5b48f54d..21191878 100644 --- a/doc/external/kegg/getRxnsFromKEGG.html +++ b/doc/external/kegg/getRxnsFromKEGG.html @@ -304,309 +304,308 @@

SOURCE CODE ^if strcmp(tline(1:12),'ENZYME ') 0196 model.eccodes{rxnCounter}=tline(13:end); 0197 model.eccodes{rxnCounter}=deblank(model.eccodes{rxnCounter}); -0198 model.eccodes{rxnCounter}=strcat('ec-code/',model.eccodes{rxnCounter}); -0199 model.eccodes{rxnCounter}=regexprep(model.eccodes{rxnCounter},'\s+',';ec-code/'); -0200 end -0201 if numel(tline)>8 -0202 if strcmp(tline(1:9),'REFERENCE') -0203 pathway=false; -0204 orthology=false; -0205 end -0206 end -0207 -0208 %Add module ids -0209 if numel(tline)>18 -0210 if strcmp(tline(1:12),'MODULE ') || module==true -0211 pathway=false; -0212 orthology=false; -0213 if isstruct(model.rxnMiriams{rxnCounter}) -0214 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1; -0215 else -0216 addToIndex=1; -0217 end -0218 tempStruct=model.rxnMiriams{rxnCounter}; -0219 tempStruct.name{addToIndex,1}='kegg.module'; -0220 tempStruct.value{addToIndex,1}=tline(13:18); -0221 model.rxnMiriams{rxnCounter}=tempStruct; -0222 end -0223 end -0224 -0225 %Add RHEA id -0226 if numel(tline)>18 -0227 if strcmp(tline(1:18),'DBLINKS RHEA: ') -0228 pathway=false; -0229 orthology=false; -0230 module=false; -0231 if isstruct(model.rxnMiriams{rxnCounter}) -0232 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1; -0233 else -0234 addToIndex=1; -0235 end -0236 tempStruct=model.rxnMiriams{rxnCounter}; -0237 tempStruct.name{addToIndex,1}='rhea'; -0238 tempStruct.value{addToIndex,1}=tline(19:end); -0239 model.rxnMiriams{rxnCounter}=tempStruct; -0240 end -0241 end -0242 -0243 %Add KO-ids -0244 if numel(tline)>16 -0245 if strcmp(tline(1:16),'ORTHOLOGY KO: ') || strcmp(tline(1:16),' KO: ') || strcmp(tline(1:12),'ORTHOLOGY ') || orthology==true -0246 pathway=false; -0247 module=false; -0248 %Check if KO has been added already (each reaction may -0249 %belong to several) -0250 if isstruct(model.rxnMiriams{rxnCounter}) -0251 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1; -0252 else -0253 addToIndex=1; -0254 end -0255 -0256 tempStruct=model.rxnMiriams{rxnCounter}; -0257 tempStruct.name{addToIndex,1}='kegg.orthology'; -0258 if strcmp(tline(13:16),'KO:') -0259 %This is in the old version -0260 tempStruct.value{addToIndex,1}=tline(17:22); -0261 else -0262 %This means that it found one KO in the new format -0263 %and that subsequent lines might be other KOs -0264 orthology=true; -0265 tempStruct.value{addToIndex,1}=tline(13:18); -0266 end -0267 model.rxnMiriams{rxnCounter}=tempStruct; -0268 end -0269 end -0270 -0271 %Add pathways -0272 if numel(tline)>18 -0273 if strcmp(tline(1:18),'PATHWAY PATH: ') || strcmp(tline(1:18),' PATH: ') || strcmp(tline(1:12),'PATHWAY ') || pathway==true -0274 orthology=false; -0275 module=false; -0276 %Check if annotation has been added already -0277 if isstruct(model.rxnMiriams{rxnCounter}) -0278 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1; -0279 else -0280 addToIndex=1; -0281 end -0282 -0283 tempStruct=model.rxnMiriams{rxnCounter}; -0284 tempStruct.name{addToIndex,1}='kegg.pathway'; -0285 %If it is the old version -0286 if strcmp(tline(14:17),'PATH:') -0287 tempStruct.value{addToIndex,1}=tline(19:25); -0288 else -0289 %If it is the new version -0290 tempStruct.value{addToIndex,1}=tline(13:19); -0291 pathway=true; -0292 end -0293 -0294 %Do not save global or overview pathways. The ids for -0295 %such pathways begin with rn011 or rn012 -0296 if ~strcmp('rn011',tempStruct.value{addToIndex,1}(1:5)) && ~strcmp('rn012',tempStruct.value{addToIndex,1}(1:5)) -0297 model.rxnMiriams{rxnCounter}=tempStruct; -0298 -0299 %Also save the subSystems names. For the old KEGG -0300 %format, only the first mentioned subsystem is -0301 %picked. Use the newer KEGG format to fetch all the -0302 %subsystems -0303 if strcmp(tline(14:17),'PATH:') -0304 %The old format -0305 model.subSystems{rxnCounter}=tline(28:end); -0306 else -0307 %The new format -0308 model.subSystems{rxnCounter,1}{1,numel(model.subSystems{rxnCounter,1})+1}=tline(22:end); -0309 end -0310 end -0311 end -0312 end -0313 end -0314 -0315 %Close the file -0316 fclose(fid); -0317 -0318 %This is done here since the the indexes won't match since some -0319 %reactions are removed along the way -0320 isIncomplete=model.rxns(isIncomplete); -0321 isGeneral=model.rxns(isGeneral); -0322 isSpontaneous=model.rxns(isSpontaneous); -0323 -0324 %If too much space was allocated, shrink the model -0325 model.rxns=model.rxns(1:rxnCounter); -0326 model.rxnNames=model.rxnNames(1:rxnCounter); -0327 model.eccodes=model.eccodes(1:rxnCounter); -0328 equations=equations(1:rxnCounter); -0329 model.rxnMiriams=model.rxnMiriams(1:rxnCounter); -0330 model.rxnNotes=model.rxnNotes(1:rxnCounter); -0331 model.subSystems=model.subSystems(1:rxnCounter); -0332 -0333 %Then load the equations from another file. This is because the -0334 %equations are easier to retrieve from there -0335 -0336 %The format is rxnID: equation The reactions should have been -0337 %loaded in the exact same order -0338 fid = fopen(fullfile(keggPath,'reaction.lst'), 'r'); -0339 -0340 %Loop through the file -0341 for i=1:rxnCounter -0342 %Get the next line -0343 tline = fgetl(fid); -0344 -0345 equations{i}=tline(9:end); -0346 end -0347 -0348 %Close the file -0349 fclose(fid); -0350 -0351 %Several equations may have two whitespaces between the last -0352 %reactant and the reversible arrow sign. The number of whitespaces -0353 %is thus reduced to one -0354 equations = regexprep(equations,' <=>', ' <=>'); -0355 -0356 %Construct the S matrix and list of metabolites -0357 [S, mets, badRxns]=constructS(equations); -0358 model.S=S; -0359 model.mets=mets; -0360 -0361 %There is some limited evidence for directionality in -0362 %reaction_mapformula.lst. The information there concerns how the -0363 %reactions are drawn in the KEGG maps. If a reaction is -0364 %irreversible in the same direction for all maps, then I consider -0365 %is irreversible, otherwise reversible. Also, not all reactions are -0366 %present in the maps, so not all will have directionality. They -0367 %will be considered to be reversible -0368 -0369 %The format is R00005: 00330: C01010 => C00011 Generate a -0370 %reversibility structure with the fields: *rxns: reaction ids -0371 %*product: one met id that is a product. This is because the -0372 %*reactions might be written in another direction compared to in -0373 % the reactions.lst file -0374 %*rev: 1 if reversible, otherwise 0 -0375 reversibility.rxns={}; -0376 reversibility.product={}; -0377 reversibility.rev=[]; -0378 -0379 fid = fopen(fullfile(keggPath,'reaction_mapformula.lst'), 'r'); -0380 while 1 -0381 %Get the next line -0382 tline = fgetl(fid); -0383 -0384 %Abort at end of file -0385 if ~ischar(tline) -0386 break; -0387 end -0388 -0389 rxn=tline(1:6); -0390 prod=tline(end-5:end); -0391 rev=any(strfind(tline,'<=>')); -0392 if isempty(reversibility.rxns) -0393 reversibility.rxns{1}=rxn; -0394 reversibility.product{1}=prod; -0395 reversibility.rev(1)=rev; -0396 elseif strcmp(reversibility.rxns(end),rxn) -0397 %Check if the reaction was added before. It's an ordered -0398 %list, so only check the last element If it's reversible in -0399 %the new reaction or reversible in the old reaction then -0400 %set (keep) to be reversible -0401 if rev==1 || reversibility.rev(end)==1 -0402 reversibility.rev(end)=1; -0403 else -0404 %This means that the reaction was already loaded, that -0405 %it was irreversible before and irreversible in the new -0406 %reaction. However, it could be that they are written -0407 %in diferent directions. If the product differ, then -0408 %set to be reversible. This assumes that the reactions -0409 %are written with the same metabolite as the last one -0410 %if they are in the same direction -0411 if ~strcmp(prod,reversibility.product(end)) -0412 reversibility.rev(end)=1; -0413 end -0414 end -0415 else -0416 reversibility.rxns=[reversibility.rxns;rxn]; -0417 reversibility.product=[reversibility.product;prod]; -0418 reversibility.rev=[reversibility.rev;rev]; -0419 end -0420 end -0421 fclose(fid); -0422 -0423 %Update the reversibility -0424 model.rev=ones(rxnCounter,1); -0425 %Match the reaction ids -0426 irrevIDs=find(reversibility.rev==0); -0427 [~, I]=ismember(reversibility.rxns(irrevIDs),model.rxns); -0428 [~, prodMetIDs]=ismember(reversibility.product(irrevIDs),model.mets); -0429 model.rev(I)=0; -0430 -0431 %See if the reactions are written in the same order in model.S -0432 linearInd=sub2ind(size(model.S), prodMetIDs, I); -0433 changeOrder=I(model.S(linearInd)<0); -0434 %Change the order of these reactions -0435 model.S(:,changeOrder)=model.S(:,changeOrder).*-1; -0436 -0437 %Add some stuff to get a correct model structure -0438 model.ub=ones(rxnCounter,1)*1000; -0439 model.lb=model.rev*-1000; -0440 model.c=zeros(rxnCounter,1); -0441 model.b=zeros(numel(model.mets),1); -0442 model=removeReactions(model,badRxns,true,true); -0443 -0444 %Identify reactions with undefined stoichiometry. Such -0445 %reactions involve metabolites with an ID containing the letter "n" -0446 %or "m" -0447 I=cellfun(@any,strfind(model.mets,'n')) | cellfun(@any,strfind(model.mets,'m')); -0448 [~, J]=find(model.S(I,:)); -0449 isUndefinedStoich=model.rxns(unique(J)); -0450 %Sort model that metabolites with undefined stoichiometry would -0451 %appear in the end of metabolites list -0452 metList=[model.mets(~I);model.mets(I)]; -0453 [~,metIndexes]=ismember(metList,model.mets); -0454 model=permuteModel(model,metIndexes,'mets'); -0455 -0456 %Sort model that i) spontaneous, ii) with undefined -0457 %stoichiometry, iii) incomplete and iv) general reactions would bve -0458 %ranked in the end of the model -0459 endRxnList=unique([model.rxns(ismember(model.rxns,isSpontaneous));model.rxns(ismember(model.rxns,isUndefinedStoich));model.rxns(ismember(model.rxns,isIncomplete));model.rxns(ismember(model.rxns,isGeneral))],'stable'); -0460 rxnList=[model.rxns(~ismember(model.rxns,endRxnList));endRxnList]; -0461 [~,rxnIndexes]=ismember(rxnList,model.rxns); -0462 model=permuteModel(model,rxnIndexes,'rxns'); -0463 -0464 %Add information in rxnNotes, whether reaction belongs to any of -0465 %type i-iv mentioned a few lines above -0466 for i=(numel(rxnList)-numel(endRxnList)+1):numel(model.rxns) -0467 if ismember(model.rxns(i),isSpontaneous) -0468 model.rxnNotes(i)=strcat(model.rxnNotes(i),'Spontaneous'); -0469 end -0470 if ismember(model.rxns(i),isUndefinedStoich) -0471 if isempty(model.rxnNotes{i}) -0472 model.rxnNotes(i)=strcat(model.rxnNotes(i),'With undefined stoichiometry'); -0473 else -0474 model.rxnNotes(i)=strcat(model.rxnNotes(i),', with undefined stoichiometry'); -0475 end -0476 end -0477 if ismember(model.rxns(i),isIncomplete) -0478 if isempty(model.rxnNotes{i}) -0479 model.rxnNotes(i)=strcat(model.rxnNotes(i),'Incomplete'); -0480 else -0481 model.rxnNotes(i)=strcat(model.rxnNotes(i),', incomplete'); -0482 end -0483 end -0484 if ismember(model.rxns(i),isGeneral) -0485 if isempty(model.rxnNotes{i}) -0486 model.rxnNotes(i)=strcat(model.rxnNotes(i),'General'); -0487 else -0488 model.rxnNotes(i)=strcat(model.rxnNotes(i),', general'); -0489 end -0490 end -0491 model.rxnNotes(i)=strcat(model.rxnNotes(i),' reaction'); -0492 end -0493 -0494 %Save the model structure -0495 save(rxnsFile,'model','isGeneral','isIncomplete','isUndefinedStoich','isSpontaneous'); -0496 end -0497 end -0498 fprintf('COMPLETE\n'); -0499 -0500 end +0198 model.eccodes{rxnCounter}=regexprep(model.eccodes{rxnCounter},'\s+',';'); +0199 end +0200 if numel(tline)>8 +0201 if strcmp(tline(1:9),'REFERENCE') +0202 pathway=false; +0203 orthology=false; +0204 end +0205 end +0206 +0207 %Add module ids +0208 if numel(tline)>18 +0209 if strcmp(tline(1:12),'MODULE ') || module==true +0210 pathway=false; +0211 orthology=false; +0212 if isstruct(model.rxnMiriams{rxnCounter}) +0213 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1; +0214 else +0215 addToIndex=1; +0216 end +0217 tempStruct=model.rxnMiriams{rxnCounter}; +0218 tempStruct.name{addToIndex,1}='kegg.module'; +0219 tempStruct.value{addToIndex,1}=tline(13:18); +0220 model.rxnMiriams{rxnCounter}=tempStruct; +0221 end +0222 end +0223 +0224 %Add RHEA id +0225 if numel(tline)>18 +0226 if strcmp(tline(1:18),'DBLINKS RHEA: ') +0227 pathway=false; +0228 orthology=false; +0229 module=false; +0230 if isstruct(model.rxnMiriams{rxnCounter}) +0231 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1; +0232 else +0233 addToIndex=1; +0234 end +0235 tempStruct=model.rxnMiriams{rxnCounter}; +0236 tempStruct.name{addToIndex,1}='rhea'; +0237 tempStruct.value{addToIndex,1}=tline(19:end); +0238 model.rxnMiriams{rxnCounter}=tempStruct; +0239 end +0240 end +0241 +0242 %Add KO-ids +0243 if numel(tline)>16 +0244 if strcmp(tline(1:16),'ORTHOLOGY KO: ') || strcmp(tline(1:16),' KO: ') || strcmp(tline(1:12),'ORTHOLOGY ') || orthology==true +0245 pathway=false; +0246 module=false; +0247 %Check if KO has been added already (each reaction may +0248 %belong to several) +0249 if isstruct(model.rxnMiriams{rxnCounter}) +0250 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1; +0251 else +0252 addToIndex=1; +0253 end +0254 +0255 tempStruct=model.rxnMiriams{rxnCounter}; +0256 tempStruct.name{addToIndex,1}='kegg.orthology'; +0257 if strcmp(tline(13:16),'KO:') +0258 %This is in the old version +0259 tempStruct.value{addToIndex,1}=tline(17:22); +0260 else +0261 %This means that it found one KO in the new format +0262 %and that subsequent lines might be other KOs +0263 orthology=true; +0264 tempStruct.value{addToIndex,1}=tline(13:18); +0265 end +0266 model.rxnMiriams{rxnCounter}=tempStruct; +0267 end +0268 end +0269 +0270 %Add pathways +0271 if numel(tline)>18 +0272 if strcmp(tline(1:18),'PATHWAY PATH: ') || strcmp(tline(1:18),' PATH: ') || strcmp(tline(1:12),'PATHWAY ') || pathway==true +0273 orthology=false; +0274 module=false; +0275 %Check if annotation has been added already +0276 if isstruct(model.rxnMiriams{rxnCounter}) +0277 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1; +0278 else +0279 addToIndex=1; +0280 end +0281 +0282 tempStruct=model.rxnMiriams{rxnCounter}; +0283 tempStruct.name{addToIndex,1}='kegg.pathway'; +0284 %If it is the old version +0285 if strcmp(tline(14:17),'PATH:') +0286 tempStruct.value{addToIndex,1}=tline(19:25); +0287 else +0288 %If it is the new version +0289 tempStruct.value{addToIndex,1}=tline(13:19); +0290 pathway=true; +0291 end +0292 +0293 %Do not save global or overview pathways. The ids for +0294 %such pathways begin with rn011 or rn012 +0295 if ~strcmp('rn011',tempStruct.value{addToIndex,1}(1:5)) && ~strcmp('rn012',tempStruct.value{addToIndex,1}(1:5)) +0296 model.rxnMiriams{rxnCounter}=tempStruct; +0297 +0298 %Also save the subSystems names. For the old KEGG +0299 %format, only the first mentioned subsystem is +0300 %picked. Use the newer KEGG format to fetch all the +0301 %subsystems +0302 if strcmp(tline(14:17),'PATH:') +0303 %The old format +0304 model.subSystems{rxnCounter}=tline(28:end); +0305 else +0306 %The new format +0307 model.subSystems{rxnCounter,1}{1,numel(model.subSystems{rxnCounter,1})+1}=tline(22:end); +0308 end +0309 end +0310 end +0311 end +0312 end +0313 +0314 %Close the file +0315 fclose(fid); +0316 +0317 %This is done here since the the indexes won't match since some +0318 %reactions are removed along the way +0319 isIncomplete=model.rxns(isIncomplete); +0320 isGeneral=model.rxns(isGeneral); +0321 isSpontaneous=model.rxns(isSpontaneous); +0322 +0323 %If too much space was allocated, shrink the model +0324 model.rxns=model.rxns(1:rxnCounter); +0325 model.rxnNames=model.rxnNames(1:rxnCounter); +0326 model.eccodes=model.eccodes(1:rxnCounter); +0327 equations=equations(1:rxnCounter); +0328 model.rxnMiriams=model.rxnMiriams(1:rxnCounter); +0329 model.rxnNotes=model.rxnNotes(1:rxnCounter); +0330 model.subSystems=model.subSystems(1:rxnCounter); +0331 +0332 %Then load the equations from another file. This is because the +0333 %equations are easier to retrieve from there +0334 +0335 %The format is rxnID: equation The reactions should have been +0336 %loaded in the exact same order +0337 fid = fopen(fullfile(keggPath,'reaction.lst'), 'r'); +0338 +0339 %Loop through the file +0340 for i=1:rxnCounter +0341 %Get the next line +0342 tline = fgetl(fid); +0343 +0344 equations{i}=tline(9:end); +0345 end +0346 +0347 %Close the file +0348 fclose(fid); +0349 +0350 %Several equations may have two whitespaces between the last +0351 %reactant and the reversible arrow sign. The number of whitespaces +0352 %is thus reduced to one +0353 equations = regexprep(equations,' <=>', ' <=>'); +0354 +0355 %Construct the S matrix and list of metabolites +0356 [S, mets, badRxns]=constructS(equations); +0357 model.S=S; +0358 model.mets=mets; +0359 +0360 %There is some limited evidence for directionality in +0361 %reaction_mapformula.lst. The information there concerns how the +0362 %reactions are drawn in the KEGG maps. If a reaction is +0363 %irreversible in the same direction for all maps, then I consider +0364 %is irreversible, otherwise reversible. Also, not all reactions are +0365 %present in the maps, so not all will have directionality. They +0366 %will be considered to be reversible +0367 +0368 %The format is R00005: 00330: C01010 => C00011 Generate a +0369 %reversibility structure with the fields: *rxns: reaction ids +0370 %*product: one met id that is a product. This is because the +0371 %*reactions might be written in another direction compared to in +0372 % the reactions.lst file +0373 %*rev: 1 if reversible, otherwise 0 +0374 reversibility.rxns={}; +0375 reversibility.product={}; +0376 reversibility.rev=[]; +0377 +0378 fid = fopen(fullfile(keggPath,'reaction_mapformula.lst'), 'r'); +0379 while 1 +0380 %Get the next line +0381 tline = fgetl(fid); +0382 +0383 %Abort at end of file +0384 if ~ischar(tline) +0385 break; +0386 end +0387 +0388 rxn=tline(1:6); +0389 prod=tline(end-5:end); +0390 rev=any(strfind(tline,'<=>')); +0391 if isempty(reversibility.rxns) +0392 reversibility.rxns{1}=rxn; +0393 reversibility.product{1}=prod; +0394 reversibility.rev(1)=rev; +0395 elseif strcmp(reversibility.rxns(end),rxn) +0396 %Check if the reaction was added before. It's an ordered +0397 %list, so only check the last element If it's reversible in +0398 %the new reaction or reversible in the old reaction then +0399 %set (keep) to be reversible +0400 if rev==1 || reversibility.rev(end)==1 +0401 reversibility.rev(end)=1; +0402 else +0403 %This means that the reaction was already loaded, that +0404 %it was irreversible before and irreversible in the new +0405 %reaction. However, it could be that they are written +0406 %in diferent directions. If the product differ, then +0407 %set to be reversible. This assumes that the reactions +0408 %are written with the same metabolite as the last one +0409 %if they are in the same direction +0410 if ~strcmp(prod,reversibility.product(end)) +0411 reversibility.rev(end)=1; +0412 end +0413 end +0414 else +0415 reversibility.rxns=[reversibility.rxns;rxn]; +0416 reversibility.product=[reversibility.product;prod]; +0417 reversibility.rev=[reversibility.rev;rev]; +0418 end +0419 end +0420 fclose(fid); +0421 +0422 %Update the reversibility +0423 model.rev=ones(rxnCounter,1); +0424 %Match the reaction ids +0425 irrevIDs=find(reversibility.rev==0); +0426 [~, I]=ismember(reversibility.rxns(irrevIDs),model.rxns); +0427 [~, prodMetIDs]=ismember(reversibility.product(irrevIDs),model.mets); +0428 model.rev(I)=0; +0429 +0430 %See if the reactions are written in the same order in model.S +0431 linearInd=sub2ind(size(model.S), prodMetIDs, I); +0432 changeOrder=I(model.S(linearInd)<0); +0433 %Change the order of these reactions +0434 model.S(:,changeOrder)=model.S(:,changeOrder).*-1; +0435 +0436 %Add some stuff to get a correct model structure +0437 model.ub=ones(rxnCounter,1)*1000; +0438 model.lb=model.rev*-1000; +0439 model.c=zeros(rxnCounter,1); +0440 model.b=zeros(numel(model.mets),1); +0441 model=removeReactions(model,badRxns,true,true); +0442 +0443 %Identify reactions with undefined stoichiometry. Such +0444 %reactions involve metabolites with an ID containing the letter "n" +0445 %or "m" +0446 I=cellfun(@any,strfind(model.mets,'n')) | cellfun(@any,strfind(model.mets,'m')); +0447 [~, J]=find(model.S(I,:)); +0448 isUndefinedStoich=model.rxns(unique(J)); +0449 %Sort model that metabolites with undefined stoichiometry would +0450 %appear in the end of metabolites list +0451 metList=[model.mets(~I);model.mets(I)]; +0452 [~,metIndexes]=ismember(metList,model.mets); +0453 model=permuteModel(model,metIndexes,'mets'); +0454 +0455 %Sort model that i) spontaneous, ii) with undefined +0456 %stoichiometry, iii) incomplete and iv) general reactions would bve +0457 %ranked in the end of the model +0458 endRxnList=unique([model.rxns(ismember(model.rxns,isSpontaneous));model.rxns(ismember(model.rxns,isUndefinedStoich));model.rxns(ismember(model.rxns,isIncomplete));model.rxns(ismember(model.rxns,isGeneral))],'stable'); +0459 rxnList=[model.rxns(~ismember(model.rxns,endRxnList));endRxnList]; +0460 [~,rxnIndexes]=ismember(rxnList,model.rxns); +0461 model=permuteModel(model,rxnIndexes,'rxns'); +0462 +0463 %Add information in rxnNotes, whether reaction belongs to any of +0464 %type i-iv mentioned a few lines above +0465 for i=(numel(rxnList)-numel(endRxnList)+1):numel(model.rxns) +0466 if ismember(model.rxns(i),isSpontaneous) +0467 model.rxnNotes(i)=strcat(model.rxnNotes(i),'Spontaneous'); +0468 end +0469 if ismember(model.rxns(i),isUndefinedStoich) +0470 if isempty(model.rxnNotes{i}) +0471 model.rxnNotes(i)=strcat(model.rxnNotes(i),'With undefined stoichiometry'); +0472 else +0473 model.rxnNotes(i)=strcat(model.rxnNotes(i),', with undefined stoichiometry'); +0474 end +0475 end +0476 if ismember(model.rxns(i),isIncomplete) +0477 if isempty(model.rxnNotes{i}) +0478 model.rxnNotes(i)=strcat(model.rxnNotes(i),'Incomplete'); +0479 else +0480 model.rxnNotes(i)=strcat(model.rxnNotes(i),', incomplete'); +0481 end +0482 end +0483 if ismember(model.rxns(i),isGeneral) +0484 if isempty(model.rxnNotes{i}) +0485 model.rxnNotes(i)=strcat(model.rxnNotes(i),'General'); +0486 else +0487 model.rxnNotes(i)=strcat(model.rxnNotes(i),', general'); +0488 end +0489 end +0490 model.rxnNotes(i)=strcat(model.rxnNotes(i),' reaction'); +0491 end +0492 +0493 %Save the model structure +0494 save(rxnsFile,'model','isGeneral','isIncomplete','isUndefinedStoich','isSpontaneous'); +0495 end +0496 end +0497 fprintf('COMPLETE\n'); +0498 +0499 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/installation/checkInstallation.html b/doc/installation/checkInstallation.html index f087c3bb..e3ce75a7 100644 --- a/doc/installation/checkInstallation.html +++ b/doc/installation/checkInstallation.html @@ -196,7 +196,7 @@

SOURCE CODE ^'\n=== Model solvers ===\n'); 0140 0141 %Get current solver. Set it to 'none', if it is not set -0142 fprintf(' > Checking for functional LP solvers\n') +0142 fprintf(' > Checking for LP solvers\n') 0143 res=runtests('solverTests.m','OutputDetail',0); 0144 0145 fprintf([myStr(' > glpk',40) '%f']) @@ -220,118 +220,120 @@

SOURCE CODE ^'Fail\n') 0164 end 0165 -0166 fprintf(' > Checking for functional MILP solvers\n') +0166 fprintf(' > Checking for MILP solvers\n') 0167 res=runtests('fillGapsSmallTests.m','OutputDetail',0); 0168 0169 fprintf([myStr(' > glpk',40) '%f']) 0170 if res(1).Passed == 1 0171 fprintf('Pass\n') -0172 else -0173 fprintf('Fail\n') -0174 end -0175 -0176 fprintf([myStr(' > gurobi',40) '%f']) -0177 if res(2).Passed == 1 -0178 fprintf('Pass\n') -0179 else -0180 fprintf('Fail\n') -0181 end -0182 -0183 fprintf([myStr(' > cobra',40) '%f']) -0184 if res(3).Passed == 1 -0185 fprintf('Pass\n') -0186 else -0187 fprintf('Fail\n') -0188 end -0189 -0190 fprintf([myStr(' > Set RAVEN solver',40) '%f']) -0191 try -0192 oldSolver=getpref('RAVEN','solver'); -0193 solverIdx=find(strcmp(oldSolver,{'glpk','gurobi','cobra'})); -0194 catch -0195 solverIdx=0; -0196 end -0197 % Do not change old solver if functional -0198 if solverIdx~=0 && res(solverIdx).Passed == 1 -0199 fprintf([oldSolver '\n']) -0200 % Order of preference: gurobi > glpk > cobra -0201 elseif res(2).Passed == 1 -0202 fprintf('gurobi\n') -0203 setRavenSolver('gurobi'); -0204 elseif res(1).Passed == 1 -0205 fprintf('glpk\n') -0206 setRavenSolver('glpk'); -0207 elseif res(3).Passed == 1 -0208 fprintf('cobra\n') -0209 setRavenSolver('cobra'); -0210 else -0211 fprintf('None, no functional solvers\n') -0212 fprintf(' The glpk should always be working, check your RAVEN installation to make sure all files are present\n') -0213 end -0214 -0215 fprintf('\n=== Essential binary executables ===\n'); -0216 fprintf([myStr(' > Checking BLAST+',40) '%f']) -0217 res=runtests('blastPlusTests.m','OutputDetail',0); -0218 res=interpretResults(res); -0219 if res==false -0220 fprintf(' This is essential to run getBlast()\n') -0221 end -0222 -0223 fprintf([myStr(' > Checking DIAMOND',40) '%f']) -0224 res=runtests('diamondTests.m','OutputDetail',0); -0225 res=interpretResults(res); -0226 if res==false -0227 fprintf(' This is essential to run the getDiamond()\n') -0228 end -0229 -0230 fprintf([myStr(' > Checking HMMER',40) '%f']) -0231 res=runtests('hmmerTests.m','OutputDetail',0); -0232 res=interpretResults(res); -0233 if res==false -0234 fprintf([' This is essential to run getKEGGModelFromHomology()\n'... -0235 ' when using a FASTA file as input\n']) -0236 end -0237 -0238 if develMode -0239 fprintf('\n=== Development binary executables ===\n'); -0240 fprintf('NOTE: These binaries are only required when using KEGG FTP dump files in getKEGGModelForOrganism\n'); -0241 -0242 fprintf([myStr(' > Checking CD-HIT',40) '%f']) -0243 res=runtests('cdhitTests.m','OutputDetail',0); -0244 interpretResults(res); -0245 -0246 fprintf([myStr(' > Checking MAFFT',40) '%f']) -0247 res=runtests('mafftTests.m','OutputDetail',0); -0248 interpretResults(res); -0249 end -0250 -0251 fprintf('\n=== Compatibility ===\n'); -0252 fprintf([myStr(' > Checking function uniqueness',40) '%f']) -0253 checkFunctionUniqueness(); -0254 -0255 fprintf('\n*** checkInstallation complete ***\n\n'); -0256 end -0257 -0258 function res=interpretResults(results) -0259 if results.Failed==0 && results.Incomplete==0 -0260 fprintf('Pass\n'); -0261 res=true; -0262 else -0263 fprintf('Fail\n') -0264 fprintf(' Download/compile the binary and rerun checkInstallation\n'); -0265 res=false; -0266 end -0267 end -0268 -0269 function str = myStr(InputStr,len) -0270 str=InputStr; -0271 lenDiff = len - length(str); -0272 if lenDiff < 0 -0273 warning('String too long'); -0274 else -0275 str = [str blanks(lenDiff)]; -0276 end -0277 end +0172 fprintf([' While passing here, we do not recommended glpk\n'... +0173 ' for MILPs due to occasional inconsistent results\n']) +0174 else +0175 fprintf('Fail\n') +0176 end +0177 +0178 fprintf([myStr(' > gurobi',40) '%f']) +0179 if res(2).Passed == 1 +0180 fprintf('Pass\n') +0181 else +0182 fprintf('Fail\n') +0183 end +0184 +0185 fprintf([myStr(' > cobra',40) '%f']) +0186 if res(3).Passed == 1 +0187 fprintf('Pass\n') +0188 else +0189 fprintf('Fail\n') +0190 end +0191 +0192 fprintf([myStr(' > Set RAVEN solver',40) '%f']) +0193 try +0194 oldSolver=getpref('RAVEN','solver'); +0195 solverIdx=find(strcmp(oldSolver,{'glpk','gurobi','cobra'})); +0196 catch +0197 solverIdx=0; +0198 end +0199 % Do not change old solver if functional +0200 if solverIdx~=0 && res(solverIdx).Passed == 1 +0201 fprintf([oldSolver '\n']) +0202 % Order of preference: gurobi > glpk > cobra +0203 elseif res(2).Passed == 1 +0204 fprintf('gurobi\n') +0205 setRavenSolver('gurobi'); +0206 elseif res(1).Passed == 1 +0207 fprintf('glpk\n') +0208 setRavenSolver('glpk'); +0209 elseif res(3).Passed == 1 +0210 fprintf('cobra\n') +0211 setRavenSolver('cobra'); +0212 else +0213 fprintf('None, no functional solvers\n') +0214 fprintf(' The glpk should always be working, check your RAVEN installation to make sure all files are present\n') +0215 end +0216 +0217 fprintf('\n=== Essential binary executables ===\n'); +0218 fprintf([myStr(' > Checking BLAST+',40) '%f']) +0219 res=runtests('blastPlusTests.m','OutputDetail',0); +0220 res=interpretResults(res); +0221 if res==false +0222 fprintf(' This is essential to run getBlast()\n') +0223 end +0224 +0225 fprintf([myStr(' > Checking DIAMOND',40) '%f']) +0226 res=runtests('diamondTests.m','OutputDetail',0); +0227 res=interpretResults(res); +0228 if res==false +0229 fprintf(' This is essential to run the getDiamond()\n') +0230 end +0231 +0232 fprintf([myStr(' > Checking HMMER',40) '%f']) +0233 res=runtests('hmmerTests.m','OutputDetail',0); +0234 res=interpretResults(res); +0235 if res==false +0236 fprintf([' This is essential to run getKEGGModelFromHomology()\n'... +0237 ' when using a FASTA file as input\n']) +0238 end +0239 +0240 if develMode +0241 fprintf('\n=== Development binary executables ===\n'); +0242 fprintf('NOTE: These binaries are only required when using KEGG FTP dump files in getKEGGModelForOrganism\n'); +0243 +0244 fprintf([myStr(' > Checking CD-HIT',40) '%f']) +0245 res=runtests('cdhitTests.m','OutputDetail',0); +0246 interpretResults(res); +0247 +0248 fprintf([myStr(' > Checking MAFFT',40) '%f']) +0249 res=runtests('mafftTests.m','OutputDetail',0); +0250 interpretResults(res); +0251 end +0252 +0253 fprintf('\n=== Compatibility ===\n'); +0254 fprintf([myStr(' > Checking function uniqueness',40) '%f']) +0255 checkFunctionUniqueness(); +0256 +0257 fprintf('\n*** checkInstallation complete ***\n\n'); +0258 end +0259 +0260 function res=interpretResults(results) +0261 if results.Failed==0 && results.Incomplete==0 +0262 fprintf('Pass\n'); +0263 res=true; +0264 else +0265 fprintf('Fail\n') +0266 fprintf(' Download/compile the binary and rerun checkInstallation\n'); +0267 res=false; +0268 end +0269 end +0270 +0271 function str = myStr(InputStr,len) +0272 str=InputStr; +0273 lenDiff = len - length(str); +0274 if lenDiff < 0 +0275 warning('String too long'); +0276 else +0277 str = [str blanks(lenDiff)]; +0278 end +0279 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/testing/unit_tests/modelCurationTests.html b/doc/testing/unit_tests/modelCurationTests.html index 0e68319f..6cf1a9f3 100644 --- a/doc/testing/unit_tests/modelCurationTests.html +++ b/doc/testing/unit_tests/modelCurationTests.html @@ -186,353 +186,358 @@

SOURCE CODE ^end 0141 -0142 -0143 function addMets_oneCompTest(testCase) -0144 sourceDir = fileparts(which(mfilename)); -0145 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); -0146 -0147 metsToAdd.metNames={'metaboliteName','3-Phospho-D-glycerate'}; -0148 metsToAdd.compartments={'e'}; -0149 metsToAdd.metCharges=[2,0]; -0150 metsToAdd.metNotes={'this is just a test','another one'}; -0151 metsToAdd.metMiriams{1}=struct('name',{{'testDB1';'testDB2'}},'value',... -0152 {{'testValue1';'testValue2'}}); -0153 metsToAdd.metMiriams{2}=struct('name',{{'testDB1'}},'value',... -0154 {{'testValue1'}}); -0155 -0156 evalc('modelNew=addMets(model,metsToAdd,true);'); -0157 -0158 %Perform the curation manually for comparison -0159 modelManual=model; -0160 modelManual.metNotes(1:numel(modelManual.mets),1)={''}; -0161 modelManual.metCharges(1:numel(modelManual.mets),1)=NaN(numel(modelManual.mets),1); -0162 modelManual.mets(end+1:end+2)={'m_0001','m_0002'}; -0163 modelManual.metNames(end+1:end+2)=metsToAdd.metNames; -0164 modelManual.metNotes(end+1:end+2)=metsToAdd.metNotes; -0165 modelManual.metMiriams(end+1:end+2)=metsToAdd.metMiriams; -0166 modelManual.metCharges(end+1:end+2)=[2,NaN]; -0167 modelManual.b(end+1:end+2)=[0,0]; -0168 modelManual.metFormulas(end+1:end+2)={'','C3H4O7P'}; -0169 modelManual.metComps(end+1:end+2)=2; -0170 modelManual.S(end+1:end+2,:)=sparse(zeros(2,numel(modelManual.rxns))); -0171 -0172 verifyEqual(testCase,modelManual,modelNew) -0173 end -0174 -0175 function removeMetsTest(testCase) -0176 sourceDir = fileparts(which(mfilename)); -0177 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); -0178 -0179 modelNew=removeMets(model,'Acetate',true,true,true,true); -0180 -0181 %Perform the curation manually for comparison -0182 modelManual=model; -0183 modelManual.mets(6:7)=[]; -0184 modelManual.metNames(6:7)=[]; -0185 modelManual.metMiriams(6:7)=[]; -0186 modelManual.b(6:7)=[]; -0187 modelManual.metFormulas(6:7)=[]; -0188 modelManual.metComps(6:7)=[]; -0189 modelManual.S(6:7,:)=[]; -0190 -0191 modelManual.rxnGeneMat(20,:)=[]; -0192 modelManual.grRules(20)=[]; -0193 modelManual.rxns(20)=[]; -0194 modelManual.rxnNames(20)=[]; -0195 modelManual.lb(20)=[]; -0196 modelManual.ub(20)=[]; -0197 modelManual.c(20)=[]; -0198 modelManual.rev(20)=[]; -0199 modelManual.S(:,20)=[]; -0200 modelManual.eccodes(20)=[]; -0201 verifyEqual(testCase,modelNew,modelManual) -0202 end -0203 -0204 function addRxnsTest(testCase) -0205 sourceDir = fileparts(which(mfilename)); -0206 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); -0207 rxnsToAdd.rxns='test1'; -0208 rxnsToAdd.equations='2-Oxoglutarate => TEST'; -0209 rxnsToAdd.rxnNames={'testRxn1'}; % test cell array -0210 rxnsToAdd.rxnMiriams{1}=struct('name',{{'testDB1';'testDB2'}},'value',... -0211 {{'testValue1';'testValue2'}}); -0212 rxnsToAdd.grRules="b0008"; % test string -0213 evalc('modelNew=addRxns(model,rxnsToAdd,2,''c'',true);'); -0214 -0215 rxnsToAdd.rxns='test2'; -0216 rxnsToAdd=rmfield(rxnsToAdd,'equations'); -0217 rxnsToAdd.mets={{'6pgl_c','atp_c'}}; -0218 rxnsToAdd.stoichCoeffs=[[-1,2]]; -0219 rxnsToAdd.lb=-1000; -0220 rxnsToAdd.grRules='test'; -0221 evalc('modelNew=addRxns(modelNew,rxnsToAdd,1,''c'',true,true);'); -0222 -0223 %Perform the curation manually for comparison -0224 modelManual=model; -0225 -0226 modelManual.rxnMiriams=cell(numel(modelManual.rxns),1); -0227 modelManual.rxns(end+1:end+2)={'test1';'test2'}; -0228 modelManual.mets(end+1)={'m_0001'}; -0229 modelManual.S(end+1,:)=zeros(1,numel(modelManual.rxns)-2); -0230 modelManual.S(:,end+1:end+2)=zeros(numel(modelManual.mets),2); -0231 modelManual.S(end,96)=1; -0232 modelManual.S([14,73],end-1)=[-1,1]; -0233 modelManual.S([5,17],end)=[-1,2]; -0234 modelManual.lb(end+1:end+2)=[0;-1000]; -0235 modelManual.ub(end+1:end+2)=[Inf;Inf]; -0236 modelManual.rev(end+1:end+2)=[0;1]; -0237 modelManual.c(end+1:end+2)=[0;0]; -0238 modelManual.b(end+1)=[0]; -0239 modelManual.rxnNames(end+1:end+2)={'testRxn1';'testRxn1'}; -0240 modelManual.grRules(end+1:end+2)={'b0008';'test'}; -0241 -0242 modelManual.rxnGeneMat(end+1:end+2,:)=zeros(2,numel(modelManual.genes)); -0243 modelManual.rxnGeneMat(:,end+1)=zeros(numel(modelManual.rxns),1); -0244 modelManual.rxnGeneMat(97,end)=1; -0245 modelManual.rxnGeneMat(end-1,1)=1; -0246 modelManual.rxnGeneMat(end,138)=1; -0247 -0248 modelManual.eccodes(end+1:end+2)={'';''}; -0249 modelManual.genes(end+1)={'test'}; -0250 modelManual.geneShortNames(end+1)={''}; -0251 modelManual.metNames(end+1)={'TEST'}; -0252 modelManual.metComps(end+1)=[1]; -0253 modelManual.metFormulas(end+1)={''}; -0254 modelManual.metMiriams(end+1)={[]}; -0255 modelManual.rxnMiriams(end+1:end+2)={struct('name',{{'testDB1';'testDB2'}},'value',... -0256 {{'testValue1';'testValue2'}})}; -0257 -0258 verifyEqual(testCase,modelManual,modelNew) -0259 end -0260 -0261 function removeReactionsTest(testCase) -0262 sourceDir = fileparts(which(mfilename)); -0263 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); -0264 -0265 modelNew=removeMets(model,'Acetate',true,true,true,true); -0266 -0267 %Perform the curation manually for comparison -0268 modelManual=model; -0269 modelManual.mets(6:7)=[]; -0270 modelManual.metNames(6:7)=[]; -0271 modelManual.metMiriams(6:7)=[]; -0272 modelManual.b(6:7)=[]; -0273 modelManual.metFormulas(6:7)=[]; -0274 modelManual.metComps(6:7)=[]; -0275 modelManual.S(6:7,:)=[]; -0276 -0277 modelManual.rxnGeneMat(20,:)=[]; -0278 modelManual.grRules(20)=[]; -0279 modelManual.rxns(20)=[]; -0280 modelManual.rxnNames(20)=[]; -0281 modelManual.lb(20)=[]; -0282 modelManual.ub(20)=[]; -0283 modelManual.c(20)=[]; -0284 modelManual.rev(20)=[]; -0285 modelManual.S(:,20)=[]; -0286 modelManual.eccodes(20)=[]; -0287 verifyEqual(testCase,modelNew,modelManual) -0288 end -0289 -0290 function getMetsInCompTest(testCase) -0291 %Load the model in RAVEN format -0292 sourceDir=fileparts(fileparts(fileparts(which(mfilename)))); -0293 load(fullfile(sourceDir,'testing','unit_tests','test_data','ecoli_textbook.mat'), 'model'); -0294 -0295 evalc('[testOut.I, testOut.metNames]=getMetsInComp(model,''e'');'); -0296 -0297 testCheck.I=[false;false;false;false;false;false;true;false;true;false;false;false;false;false;true;false;false;false;false;true;false;false;false;false;true;false;false;false;true;true;false;true;false;false;true;false;true;false;true;false;false;true;false;true;false;false;true;false;true;false;false;false;false;false;true;false;true;false;false;false;true;false;true;false;false;false;false;false;false;true;false;false]; -0298 testCheck.metNames={'Acetate';'Acetaldehyde';'2-Oxoglutarate';'CO2';'Ethanol';'Formate';'D-Fructose';'Fumarate';'D-Glucose';'L-Glutamine';'L-Glutamate';'H2O';'H+';'D-Lactate';'L-Malate';'Ammonium';'O2';'Phosphate';'Pyruvate';'Succinate'}; -0299 verifyEqual(testCase,testOut,testCheck) -0300 end -0301 -0302 function getRxnsInCompTest(testCase) -0303 %Load the model in RAVEN format -0304 sourceDir=fileparts(fileparts(fileparts(which(mfilename)))); -0305 load(fullfile(sourceDir,'testing','unit_tests','test_data','ecoli_textbook.mat'), 'model'); -0306 -0307 evalc('[testOut.I, testOut.rxnNames]=getRxnsInComp(model,''e'');'); -0308 -0309 testCheck.I=[20;21;22;23;24;25;26;27;28;29;30;31;32;33;34;35;36;37;38;39]; -0310 testCheck.rxnNames={'Acetate exchange';'Acetaldehyde exchange';'2-Oxoglutarate exchange';'CO2 exchange';'Ethanol exchange';'Formate exchange';'D-Fructose exchange';'Fumarate exchange';'D-Glucose exchange';'L-Glutamine exchange';'L-Glutamate exchange';'H+ exchange';'H2O exchange';'D-lactate exchange';'L-Malate exchange';'Ammonia exchange';'O2 exchange';'Phosphate exchange';'Pyruvate exchange';'Succinate exchange'}; -0311 verifyEqual(testCase,testOut,testCheck) -0312 end -0313 -0314 function getAllRxnsFromGenesTest(testCase) -0315 %Load the model in RAVEN format -0316 sourceDir=fileparts(fileparts(fileparts(which(mfilename)))); -0317 load(fullfile(sourceDir,'testing','unit_tests','test_data','ecoli_textbook.mat'), 'model'); -0318 evalc('testOut=getAllRxnsFromGenes(model,''ACONTa'');'); -0319 -0320 testCheck={'ACONTa';'ACONTb'}; -0321 verifyEqual(testCase,testOut,testCheck) -0322 end -0323 -0324 function changeRxnsTest(testCase) -0325 %Load the model in RAVEN format -0326 sourceDir=fileparts(fileparts(fileparts(which(mfilename)))); -0327 load(fullfile(sourceDir,'testing','unit_tests','test_data','ecoli_textbook.mat'), 'model'); -0328 -0329 evalc('modelNew=changeRxns(model,''ACKr'',''2-Oxoglutarate => TEST'',2,''c'',true);'); -0330 -0331 %Perform the curation manually for comparison -0332 modelManual=model; -0333 -0334 modelManual.mets(end+1)={'m_0001'}; -0335 modelManual.S(end+1,:)=zeros(1,numel(modelManual.rxns)); -0336 modelManual.S(:,3)=0; -0337 modelManual.S([14,73],3)=[-1,1]; -0338 -0339 modelManual.metNames(end+1)={'TEST'}; -0340 modelManual.metComps(end+1)=[1]; -0341 modelManual.metFormulas(end+1)={''}; -0342 modelManual.metMiriams(end+1)={[]}; -0343 modelManual.b(end+1)=[0]; -0344 modelManual.rev(3)=[0]; -0345 -0346 verifyEqual(testCase,modelNew,modelManual) -0347 end -0348 -0349 function removeBadRxnsTest(testCase) -0350 %Load the model in RAVEN format -0351 sourceDir=fileparts(fileparts(fileparts(which(mfilename)))); -0352 load(fullfile(sourceDir,'testing','unit_tests','test_data','ecoli_textbook.mat'), 'model'); +0142 function addMets_oneCompTest(testCase) +0143 sourceDir = fileparts(which(mfilename)); +0144 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); +0145 +0146 metsToAdd.metNames={'metaboliteName','3-Phospho-D-glycerate'}; +0147 metsToAdd.compartments={'e'}; +0148 metsToAdd.metCharges=[2,0]; +0149 metsToAdd.metNotes={'this is just a test','another one'}; +0150 metsToAdd.metMiriams{1}=struct('name',{{'testDB1';'testDB2'}},'value',... +0151 {{'testValue1';'testValue2'}}); +0152 metsToAdd.metMiriams{2}=struct('name',{{'testDB1'}},'value',... +0153 {{'testValue1'}}); +0154 +0155 evalc('modelNew=addMets(model,metsToAdd,true);'); +0156 +0157 %Perform the curation manually for comparison +0158 modelManual=model; +0159 modelManual.metNotes(1:numel(modelManual.mets),1)={''}; +0160 modelManual.metCharges(1:numel(modelManual.mets),1)=NaN(numel(modelManual.mets),1); +0161 modelManual.mets(end+1:end+2)={'m_0001','m_0002'}; +0162 modelManual.metNames(end+1:end+2)=metsToAdd.metNames; +0163 modelManual.metNotes(end+1:end+2)=metsToAdd.metNotes; +0164 modelManual.metMiriams(end+1:end+2)=metsToAdd.metMiriams; +0165 modelManual.metCharges(end+1:end+2)=[2,NaN]; +0166 modelManual.b(end+1:end+2)=[0,0]; +0167 modelManual.metFormulas(end+1:end+2)={'','C3H4O7P'}; +0168 modelManual.metComps(end+1:end+2)=2; +0169 modelManual.S(end+1:end+2,:)=sparse(zeros(2,numel(modelManual.rxns))); +0170 +0171 verifyEqual(testCase,modelManual,modelNew) +0172 end +0173 +0174 function removeMetsTest(testCase) +0175 sourceDir = fileparts(which(mfilename)); +0176 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); +0177 +0178 modelNew=removeMets(model,'Acetate',true,true,true,true); +0179 +0180 %Perform the curation manually for comparison +0181 modelManual=model; +0182 modelManual.mets(6:7)=[]; +0183 modelManual.metNames(6:7)=[]; +0184 modelManual.metMiriams(6:7)=[]; +0185 modelManual.b(6:7)=[]; +0186 modelManual.metFormulas(6:7)=[]; +0187 modelManual.metComps(6:7)=[]; +0188 modelManual.S(6:7,:)=[]; +0189 +0190 modelManual.rxnGeneMat(20,:)=[]; +0191 modelManual.grRules(20)=[]; +0192 modelManual.rxns(20)=[]; +0193 modelManual.rxnNames(20)=[]; +0194 modelManual.lb(20)=[]; +0195 modelManual.ub(20)=[]; +0196 modelManual.c(20)=[]; +0197 modelManual.rev(20)=[]; +0198 modelManual.S(:,20)=[]; +0199 modelManual.eccodes(20)=[]; +0200 verifyEqual(testCase,modelNew,modelManual) +0201 end +0202 +0203 function addRxnsTest(testCase) +0204 sourceDir = fileparts(which(mfilename)); +0205 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); +0206 rxnsToAdd.rxns='test1'; +0207 rxnsToAdd.equations='2-Oxoglutarate => TEST'; +0208 rxnsToAdd.rxnNames={'testRxn1'}; % test cell array +0209 rxnsToAdd.rxnMiriams{1}=struct('name',{{'testDB1';'testDB2'}},'value',... +0210 {{'testValue1';'testValue2'}}); +0211 rxnsToAdd.grRules="b0008"; % test string +0212 evalc('modelNew=addRxns(model,rxnsToAdd,2,''c'',true);'); +0213 +0214 rxnsToAdd.rxns='test2'; +0215 rxnsToAdd=rmfield(rxnsToAdd,'equations'); +0216 rxnsToAdd.mets={{'6pgl_c','atp_c'}}; +0217 rxnsToAdd.stoichCoeffs=[[-1,2]]; +0218 rxnsToAdd.lb=-1000; +0219 rxnsToAdd.grRules='test'; +0220 evalc('modelNew=addRxns(modelNew,rxnsToAdd,1,''c'',true,true);'); +0221 +0222 %Perform the curation manually for comparison +0223 modelManual=model; +0224 +0225 modelManual.rxnMiriams=cell(numel(modelManual.rxns),1); +0226 modelManual.rxns(end+1:end+2)={'test1';'test2'}; +0227 modelManual.mets(end+1)={'m_0001'}; +0228 modelManual.S(end+1,:)=zeros(1,numel(modelManual.rxns)-2); +0229 modelManual.S(:,end+1:end+2)=zeros(numel(modelManual.mets),2); +0230 modelManual.S(end,96)=1; +0231 modelManual.S([14,73],end-1)=[-1,1]; +0232 modelManual.S([5,17],end)=[-1,2]; +0233 modelManual.lb(end+1:end+2)=[0;-1000]; +0234 modelManual.ub(end+1:end+2)=[Inf;Inf]; +0235 modelManual.rev(end+1:end+2)=[0;1]; +0236 modelManual.c(end+1:end+2)=[0;0]; +0237 modelManual.b(end+1)=[0]; +0238 modelManual.rxnNames(end+1:end+2)={'testRxn1';'testRxn1'}; +0239 modelManual.grRules(end+1:end+2)={'b0008';'test'}; +0240 +0241 modelManual.rxnGeneMat(end+1:end+2,:)=zeros(2,numel(modelManual.genes)); +0242 modelManual.rxnGeneMat(:,end+1)=zeros(numel(modelManual.rxns),1); +0243 modelManual.rxnGeneMat(97,end)=1; +0244 modelManual.rxnGeneMat(end-1,1)=1; +0245 modelManual.rxnGeneMat(end,138)=1; +0246 +0247 modelManual.eccodes(end+1:end+2)={'';''}; +0248 modelManual.genes(end+1)={'test'}; +0249 modelManual.geneShortNames(end+1)={''}; +0250 modelManual.metNames(end+1)={'TEST'}; +0251 modelManual.metComps(end+1)=[1]; +0252 modelManual.metFormulas(end+1)={''}; +0253 modelManual.metMiriams(end+1)={[]}; +0254 modelManual.rxnMiriams(end+1:end+2)={struct('name',{{'testDB1';'testDB2'}},'value',... +0255 {{'testValue1';'testValue2'}})}; +0256 +0257 verifyEqual(testCase,modelManual,modelNew) +0258 end +0259 +0260 function removeReactionsTest(testCase) +0261 sourceDir = fileparts(which(mfilename)); +0262 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); +0263 +0264 modelNew=removeMets(model,'Acetate',true,true,true,true); +0265 +0266 %Perform the curation manually for comparison +0267 modelManual=model; +0268 modelManual.mets(6:7)=[]; +0269 modelManual.metNames(6:7)=[]; +0270 modelManual.metMiriams(6:7)=[]; +0271 modelManual.b(6:7)=[]; +0272 modelManual.metFormulas(6:7)=[]; +0273 modelManual.metComps(6:7)=[]; +0274 modelManual.S(6:7,:)=[]; +0275 +0276 modelManual.rxnGeneMat(20,:)=[]; +0277 modelManual.grRules(20)=[]; +0278 modelManual.rxns(20)=[]; +0279 modelManual.rxnNames(20)=[]; +0280 modelManual.lb(20)=[]; +0281 modelManual.ub(20)=[]; +0282 modelManual.c(20)=[]; +0283 modelManual.rev(20)=[]; +0284 modelManual.S(:,20)=[]; +0285 modelManual.eccodes(20)=[]; +0286 verifyEqual(testCase,modelNew,modelManual) +0287 end +0288 +0289 function getMetsInCompTest(testCase) +0290 %Load the model in RAVEN format +0291 sourceDir=fileparts(fileparts(fileparts(which(mfilename)))); +0292 load(fullfile(sourceDir,'testing','unit_tests','test_data','ecoli_textbook.mat'), 'model'); +0293 +0294 evalc('[testOut.I, testOut.metNames]=getMetsInComp(model,''e'');'); +0295 +0296 testCheck.I=[false;false;false;false;false;false;true;false;true;false;false;false;false;false;true;false;false;false;false;true;false;false;false;false;true;false;false;false;true;true;false;true;false;false;true;false;true;false;true;false;false;true;false;true;false;false;true;false;true;false;false;false;false;false;true;false;true;false;false;false;true;false;true;false;false;false;false;false;false;true;false;false]; +0297 testCheck.metNames={'Acetate';'Acetaldehyde';'2-Oxoglutarate';'CO2';'Ethanol';'Formate';'D-Fructose';'Fumarate';'D-Glucose';'L-Glutamine';'L-Glutamate';'H2O';'H+';'D-Lactate';'L-Malate';'Ammonium';'O2';'Phosphate';'Pyruvate';'Succinate'}; +0298 verifyEqual(testCase,testOut,testCheck) +0299 end +0300 +0301 function getRxnsInCompTest(testCase) +0302 %Load the model in RAVEN format +0303 sourceDir=fileparts(fileparts(fileparts(which(mfilename)))); +0304 load(fullfile(sourceDir,'testing','unit_tests','test_data','ecoli_textbook.mat'), 'model'); +0305 +0306 evalc('[testOut.I, testOut.rxnNames]=getRxnsInComp(model,''e'');'); +0307 +0308 testCheck.I=[20;21;22;23;24;25;26;27;28;29;30;31;32;33;34;35;36;37;38;39]; +0309 testCheck.rxnNames={'Acetate exchange';'Acetaldehyde exchange';'2-Oxoglutarate exchange';'CO2 exchange';'Ethanol exchange';'Formate exchange';'D-Fructose exchange';'Fumarate exchange';'D-Glucose exchange';'L-Glutamine exchange';'L-Glutamate exchange';'H+ exchange';'H2O exchange';'D-lactate exchange';'L-Malate exchange';'Ammonia exchange';'O2 exchange';'Phosphate exchange';'Pyruvate exchange';'Succinate exchange'}; +0310 verifyEqual(testCase,testOut,testCheck) +0311 end +0312 +0313 function getAllRxnsFromGenesTest(testCase) +0314 %Load the model in RAVEN format +0315 sourceDir=fileparts(fileparts(fileparts(which(mfilename)))); +0316 load(fullfile(sourceDir,'testing','unit_tests','test_data','ecoli_textbook.mat'), 'model'); +0317 evalc('testOut=getAllRxnsFromGenes(model,''ACONTa'');'); +0318 +0319 testCheck={'ACONTa';'ACONTb'}; +0320 verifyEqual(testCase,testOut,testCheck) +0321 end +0322 +0323 function changeRxnsTest(testCase) +0324 %Load the model in RAVEN format +0325 sourceDir=fileparts(fileparts(fileparts(which(mfilename)))); +0326 load(fullfile(sourceDir,'testing','unit_tests','test_data','ecoli_textbook.mat'), 'model'); +0327 +0328 evalc('modelNew=changeRxns(model,''ACKr'',''2-Oxoglutarate <=> TEST'',2,''c'',true);'); +0329 +0330 equations.mets={{'2-Oxoglutarate','TEST'}}; +0331 equations.stoichCoeffs={[-1,1]}; +0332 +0333 evalc('modelNewStruct=changeRxns(model,''ACKr'',equations,2,''c'',true);'); +0334 +0335 %Perform the curation manually for comparison +0336 modelManual=model; +0337 +0338 modelManual.mets(end+1)={'m_0001'}; +0339 modelManual.S(end+1,:)=zeros(1,numel(modelManual.rxns)); +0340 modelManual.S(:,3)=0; +0341 modelManual.S([14,73],3)=[-1,1]; +0342 +0343 modelManual.metNames(end+1)={'TEST'}; +0344 modelManual.metComps(end+1)=[1]; +0345 modelManual.metFormulas(end+1)={''}; +0346 modelManual.metMiriams(end+1)={[]}; +0347 modelManual.b(end+1)=[0]; +0348 modelManual.rev(3)=[1]; +0349 +0350 verifyEqual(testCase,modelNew,modelManual) +0351 verifyEqual(testCase,modelNewStruct,modelManual) +0352 end 0353 -0354 [~,exchRxns]=getExchangeRxns(model); -0355 modelNew=model; -0356 -0357 modelNew.lb(exchRxns)=[0]; -0358 modelNew.ub(exchRxns)=[0]; -0359 modelNew.lb(modelNew.lb>0)=0; -0360 modelNew.S(17,:)=1; -0361 [~, testOut]=removeBadRxns(modelNew); -0362 evalc('[~, testOut]=removeBadRxns(modelNew,3,modelNew.metComps==2);'); -0363 -0364 import matlab.unittest.constraints.AnyCellOf; -0365 import matlab.unittest.constraints.ContainsSubstring; -0366 verifyThat(testCase,AnyCellOf({'SUCDi','FRD7'}),ContainsSubstring(testOut{:}),'test') -0367 %Either SUCDi or FRD7 should be given as output -0368 end -0369 -0370 function changeGrRulesTest(testCase) -0371 sourceDir = fileparts(which(mfilename)); -0372 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); -0373 -0374 modelNew=changeGrRules(model,{'ACKr','ACONTa'},{'b2296 and b1849';'b1849'},true); -0375 modelNew=changeGrRules(modelNew,'ACONTa','b1849',false); -0376 -0377 %Perform the curation manually for comparison -0378 modelManual=model; -0379 modelManual.grRules(3:4)={'b2296 and b1849';'b1849 or b1849'}; -0380 modelManual.rxnGeneMat(3:4,:)=0; -0381 modelManual.rxnGeneMat(3,[57,76])=1; -0382 modelManual.rxnGeneMat(4,57)=1; -0383 -0384 verifyEqual(testCase,modelNew,modelManual) -0385 end -0386 -0387 function copyToCompsTest(testCase) -0388 sourceDir = fileparts(which(mfilename)); -0389 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); -0390 -0391 modelNew=copyToComps(model,{'p','e'},'ACKr'); -0392 -0393 %Perform the curation manually for comparison -0394 modelManual=model; +0354 function removeBadRxnsTest(testCase) +0355 %Load the model in RAVEN format +0356 sourceDir=fileparts(fileparts(fileparts(which(mfilename)))); +0357 load(fullfile(sourceDir,'testing','unit_tests','test_data','ecoli_textbook.mat'), 'model'); +0358 +0359 [~,exchRxns]=getExchangeRxns(model); +0360 modelNew=model; +0361 +0362 modelNew.lb(exchRxns)=[0]; +0363 modelNew.ub(exchRxns)=[0]; +0364 modelNew.lb(modelNew.lb>0)=0; +0365 modelNew.S(17,:)=1; +0366 [~, testOut]=removeBadRxns(modelNew); +0367 evalc('[~, testOut]=removeBadRxns(modelNew,3,modelNew.metComps==2);'); +0368 +0369 import matlab.unittest.constraints.AnyCellOf; +0370 import matlab.unittest.constraints.ContainsSubstring; +0371 verifyThat(testCase,AnyCellOf({'SUCDi','FRD7'}),ContainsSubstring(testOut{:}),'test') +0372 %Either SUCDi or FRD7 should be given as output +0373 end +0374 +0375 function changeGrRulesTest(testCase) +0376 sourceDir = fileparts(which(mfilename)); +0377 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); +0378 +0379 modelNew=changeGrRules(model,{'ACKr','ACONTa'},{'b2296 and b1849';'b1849'},true); +0380 modelNew=changeGrRules(modelNew,'ACONTa','b1849',false); +0381 +0382 %Perform the curation manually for comparison +0383 modelManual=model; +0384 modelManual.grRules(3:4)={'b2296 and b1849';'b1849 or b1849'}; +0385 modelManual.rxnGeneMat(3:4,:)=0; +0386 modelManual.rxnGeneMat(3,[57,76])=1; +0387 modelManual.rxnGeneMat(4,57)=1; +0388 +0389 verifyEqual(testCase,modelNew,modelManual) +0390 end +0391 +0392 function copyToCompsTest(testCase) +0393 sourceDir = fileparts(which(mfilename)); +0394 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); 0395 -0396 modelManual.rxns(end+1:end+2)={'ACKr_p';'ACKr_e'}; -0397 modelManual.mets(end+1:end+7)={'ac_c_p';'actp_c_p';'adp_c_p';'atp_c_p';'actp_c_e';'adp_c_e';'atp_c_e'}; -0398 modelManual.S(end+1:end+7,:)=zeros(7,numel(modelManual.rxns)-2); -0399 modelManual.S(:,end+1:end+2)=zeros(numel(modelManual.mets),2); +0396 modelNew=copyToComps(model,{'p','e'},'ACKr'); +0397 +0398 %Perform the curation manually for comparison +0399 modelManual=model; 0400 -0401 modelManual.S([73,74,75,76],end-1)=[1,-1,-1,1]; -0402 modelManual.S([7,77,78,79],end)=[1,-1,-1,1]; -0403 -0404 modelManual.lb(end+1:end+2)=[-1000;-1000]; -0405 modelManual.ub(end+1:end+2)=[1000;1000]; -0406 modelManual.rev(end+1:end+2)=[1;1]; -0407 modelManual.c(end+1:end+2)=[0;0]; -0408 modelManual.rxnNames(end+1:end+2)={'acetate kinase';'acetate kinase'}; -0409 modelManual.grRules(end+1:end+2)={'b1849 or b2296 or b3115';'b1849 or b2296 or b3115'}; -0410 modelManual.eccodes(end+1:end+2)={'2.7.2.1';'2.7.2.1'}; -0411 -0412 modelManual.rxnGeneMat(end+1:end+2,:)=zeros(2,numel(modelManual.genes)); -0413 modelManual.rxnGeneMat(end-1:end,[57,76,97])=1; -0414 -0415 modelManual.metNames(end+1:end+7)={'Acetate';'Acetyl phosphate';'ADP';'ATP';'Acetyl phosphate';'ADP';'ATP'}; -0416 modelManual.metComps(end+1:end+7)=[3;3;3;3;2;2;2]; -0417 modelManual.metFormulas(end+1:end+7)={'C2H3O2';'C2H3O5P';'C10H12N5O10P2';'C10H12N5O13P3';'C2H3O5P';'C10H12N5O10P2';'C10H12N5O13P3'}; -0418 modelManual.metMiriams(end+1:end+7)=modelManual.metMiriams([6,12,13,17,12,13,17]); -0419 modelManual.b(end+1:end+7)=[0]; -0420 -0421 modelManual.comps(end+1)={'p'}; -0422 modelManual.compNames(end+1)={'p'}; -0423 -0424 verifyEqual(testCase,modelNew,modelManual) -0425 end -0426 -0427 function setExchangeBoundsTest(testCase) -0428 sourceDir = fileparts(which(mfilename)); -0429 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); -0430 -0431 modelNew=setExchangeBounds(model,{'ac_e';'akg_e'},-500,500); -0432 -0433 %Perform the curation manually for comparison -0434 modelManual=model; +0401 modelManual.rxns(end+1:end+2)={'ACKr_p';'ACKr_e'}; +0402 modelManual.mets(end+1:end+7)={'ac_c_p';'actp_c_p';'adp_c_p';'atp_c_p';'actp_c_e';'adp_c_e';'atp_c_e'}; +0403 modelManual.S(end+1:end+7,:)=zeros(7,numel(modelManual.rxns)-2); +0404 modelManual.S(:,end+1:end+2)=zeros(numel(modelManual.mets),2); +0405 +0406 modelManual.S([73,74,75,76],end-1)=[1,-1,-1,1]; +0407 modelManual.S([7,77,78,79],end)=[1,-1,-1,1]; +0408 +0409 modelManual.lb(end+1:end+2)=[-1000;-1000]; +0410 modelManual.ub(end+1:end+2)=[1000;1000]; +0411 modelManual.rev(end+1:end+2)=[1;1]; +0412 modelManual.c(end+1:end+2)=[0;0]; +0413 modelManual.rxnNames(end+1:end+2)={'acetate kinase';'acetate kinase'}; +0414 modelManual.grRules(end+1:end+2)={'b1849 or b2296 or b3115';'b1849 or b2296 or b3115'}; +0415 modelManual.eccodes(end+1:end+2)={'2.7.2.1';'2.7.2.1'}; +0416 +0417 modelManual.rxnGeneMat(end+1:end+2,:)=zeros(2,numel(modelManual.genes)); +0418 modelManual.rxnGeneMat(end-1:end,[57,76,97])=1; +0419 +0420 modelManual.metNames(end+1:end+7)={'Acetate';'Acetyl phosphate';'ADP';'ATP';'Acetyl phosphate';'ADP';'ATP'}; +0421 modelManual.metComps(end+1:end+7)=[3;3;3;3;2;2;2]; +0422 modelManual.metFormulas(end+1:end+7)={'C2H3O2';'C2H3O5P';'C10H12N5O10P2';'C10H12N5O13P3';'C2H3O5P';'C10H12N5O10P2';'C10H12N5O13P3'}; +0423 modelManual.metMiriams(end+1:end+7)=modelManual.metMiriams([6,12,13,17,12,13,17]); +0424 modelManual.b(end+1:end+7)=[0]; +0425 +0426 modelManual.comps(end+1)={'p'}; +0427 modelManual.compNames(end+1)={'p'}; +0428 +0429 verifyEqual(testCase,modelNew,modelManual) +0430 end +0431 +0432 function setExchangeBoundsTest(testCase) +0433 sourceDir = fileparts(which(mfilename)); +0434 load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); 0435 -0436 exchRxns=[21;23;24;25;26;27;28;29;30;31;32;33;34;35;36;37;38;39]; -0437 modelManual.lb(exchRxns)=0; -0438 modelManual.lb([20,22])=-500; -0439 modelManual.ub([20,22])=500; +0436 modelNew=setExchangeBounds(model,{'ac_e';'akg_e'},-500,500); +0437 +0438 %Perform the curation manually for comparison +0439 modelManual=model; 0440 -0441 verifyEqual(testCase,modelNew,modelManual) -0442 end -0443 -0444 function addRxnsGenesMetsTest(testCase) -0445 sourceDir = fileparts(which(mfilename)); -0446 load(fullfile(sourceDir,'test_data','ecoli_textbook.mat'), 'model'); -0447 -0448 sbmlFile=fullfile(sourceDir,'..','..','tutorial','empty.xml'); -0449 evalc('modelEmpty=importModel(sbmlFile)'); % Repress warnings -0450 -0451 evalc('modelNew=addRxnsGenesMets(model,modelEmpty,''r1'',true);'); +0441 exchRxns=[21;23;24;25;26;27;28;29;30;31;32;33;34;35;36;37;38;39]; +0442 modelManual.lb(exchRxns)=0; +0443 modelManual.lb([20,22])=-500; +0444 modelManual.ub([20,22])=500; +0445 +0446 verifyEqual(testCase,modelNew,modelManual) +0447 end +0448 +0449 function addRxnsGenesMetsTest(testCase) +0450 sourceDir = fileparts(which(mfilename)); +0451 load(fullfile(sourceDir,'test_data','ecoli_textbook.mat'), 'model'); 0452 -0453 %Perform the curation manually for comparison -0454 modelManual=model; +0453 sbmlFile=fullfile(sourceDir,'..','..','tutorial','empty.xml'); +0454 evalc('modelEmpty=importModel(sbmlFile)'); % Repress warnings 0455 -0456 modelManual.rxns(end+1)={'r1'}; -0457 modelManual.mets(end+1:end+3)={'m1';'m2';'m3'}; -0458 modelManual.S(end+1:end+3,:)=zeros(3,numel(modelManual.rxns)-1); -0459 modelManual.S(:,end+1)=zeros(numel(modelManual.mets),1); -0460 modelManual.S([42,73,74,75],end)=[-1,-1,1,1]; -0461 -0462 modelManual.lb(end+1)=[0]; -0463 modelManual.ub(end+1)=[1000]; -0464 modelManual.rev(end+1)=[0]; -0465 modelManual.c(end+1)=[0]; -0466 modelManual.rxnNames(end+1)={'Breakdown of sucrose (invertase)'}; -0467 modelManual.grRules(end+1)={'g1'}; -0468 modelManual.eccodes(end+1)={''}; -0469 modelManual.rxnNotes(1:numel(modelManual.rxns),1)={''}; -0470 modelManual.rxnNotes(end)={'Added via addRxnsGenesMets()'}; -0471 modelManual.rxnConfidenceScores(1:numel(modelManual.rxns),1)=NaN; -0472 modelManual.rxnConfidenceScores(end)=[0]; -0473 -0474 modelManual.genes(end+1)={'g1'}; -0475 modelManual.geneShortNames(end+1)={''}; -0476 -0477 modelManual.rxnGeneMat(end+1,:)=zeros(1,numel(modelManual.genes)-1); -0478 modelManual.rxnGeneMat(:,end+1)=zeros(numel(modelManual.rxns),1); -0479 modelManual.rxnGeneMat(end,end)=1; -0480 -0481 modelManual.metNames(end+1:end+3)={'sucrose';'glucose';'fructose'}; -0482 modelManual.metComps(end+1:end+3)=[2;2;2]; -0483 modelManual.metFormulas(end+1:end+3)={'C12H22O11';'C6H12O6';'C6H12O6'}; -0484 modelManual.metMiriams(end+1:end+3)={[];[];[]}; -0485 modelManual.b(end+1:end+3)=[0]; -0486 -0487 verifyEqual(testCase,modelNew,modelManual) -0488 end +0456 evalc('modelNew=addRxnsGenesMets(model,modelEmpty,''r1'',true);'); +0457 +0458 %Perform the curation manually for comparison +0459 modelManual=model; +0460 +0461 modelManual.rxns(end+1)={'r1'}; +0462 modelManual.mets(end+1:end+3)={'m1';'m2';'m3'}; +0463 modelManual.S(end+1:end+3,:)=zeros(3,numel(modelManual.rxns)-1); +0464 modelManual.S(:,end+1)=zeros(numel(modelManual.mets),1); +0465 modelManual.S([42,73,74,75],end)=[-1,-1,1,1]; +0466 +0467 modelManual.lb(end+1)=[0]; +0468 modelManual.ub(end+1)=[1000]; +0469 modelManual.rev(end+1)=[0]; +0470 modelManual.c(end+1)=[0]; +0471 modelManual.rxnNames(end+1)={'Breakdown of sucrose (invertase)'}; +0472 modelManual.grRules(end+1)={'g1'}; +0473 modelManual.eccodes(end+1)={''}; +0474 modelManual.rxnNotes(1:numel(modelManual.rxns),1)={''}; +0475 modelManual.rxnNotes(end)={'Added via addRxnsGenesMets()'}; +0476 modelManual.rxnConfidenceScores(1:numel(modelManual.rxns),1)=NaN; +0477 modelManual.rxnConfidenceScores(end)=[0]; +0478 +0479 modelManual.genes(end+1)={'g1'}; +0480 modelManual.geneShortNames(end+1)={''}; +0481 +0482 modelManual.rxnGeneMat(end+1,:)=zeros(1,numel(modelManual.genes)-1); +0483 modelManual.rxnGeneMat(:,end+1)=zeros(numel(modelManual.rxns),1); +0484 modelManual.rxnGeneMat(end,end)=1; +0485 +0486 modelManual.metNames(end+1:end+3)={'sucrose';'glucose';'fructose'}; +0487 modelManual.metComps(end+1:end+3)=[2;2;2]; +0488 modelManual.metFormulas(end+1:end+3)={'C12H22O11';'C6H12O6';'C6H12O6'}; +0489 modelManual.metMiriams(end+1:end+3)={[];[];[]}; +0490 modelManual.b(end+1:end+3)=[0]; +0491 +0492 verifyEqual(testCase,modelNew,modelManual) +0493 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/external/kegg/constructMultiFasta.m b/external/kegg/constructMultiFasta.m index 1e924698..e3cb5ee9 100755 --- a/external/kegg/constructMultiFasta.m +++ b/external/kegg/constructMultiFasta.m @@ -58,7 +58,7 @@ function constructMultiFasta(model,sourceFile,outputDir) fprintf(['NOTICE: If Matlab is freezing and does not provide any output in 30 minutes, consider increasing Java Heap Memory\n', ... 'in MATLAB settings and start over with the new session\n']); -fprintf('Mapping genes to the multi-FASTA source file... '); +fprintf('Mapping genes to the multi-FASTA source file... 0%% complete'); %Now loop through the file to see which genes are present in the gene list %and save their position IN elementPositions! This is to enable a easy way %to get the distance to the following element @@ -91,8 +91,14 @@ function constructMultiFasta(model,sourceFile,outputDir) genePositions(id)=i; end end + %Print the progress + if rem(i-1,350000) == 0 + progress=num2str(floor(100*i/numel(elementPositions))); + progress=pad(progress,3,'left'); + fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); + end end -fprintf('COMPLETE\n'); +fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); fprintf('Generating the KEGG Orthology specific multi-FASTA files... 0%% complete'); %Loop through the reactions and print the corresponding sequences @@ -144,7 +150,7 @@ function constructMultiFasta(model,sourceFile,outputDir) end %Print the progress if rem(i-1,50) == 0 - progress=num2str(i/numel(model.rxns)); + progress=num2str(floor(100*i/numel(model.rxns))); progress=pad(progress,3,'left'); fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); end diff --git a/external/kegg/getGenesFromKEGG.m b/external/kegg/getGenesFromKEGG.m index 43ef0905..76dc0ad1 100755 --- a/external/kegg/getGenesFromKEGG.m +++ b/external/kegg/getGenesFromKEGG.m @@ -80,7 +80,7 @@ EM=fprintf(['The files ''ko'' and ''reaction'' cannot be located at ' strrep(keggPath,'\','/') '/ and should be downloaded from the KEGG FTP\n']); dispEM(EM); else - fprintf('Generating keggGenes.mat file... '); + fprintf('Generating keggGenes.mat file...\n'); %Get all KOs that are associated to reactions allKOs=getAllKOs(keggPath); @@ -261,8 +261,10 @@ end %Only get the KOs in koList -I=~ismember(model.rxns,koList); -model=removeReactions(model,I,true,true); +if nargin>1 + I=~ismember(model.rxns,koList); + model=removeReactions(model,I,true,true); +end fprintf('COMPLETE\n'); end diff --git a/external/kegg/getKEGGModelForOrganism.m b/external/kegg/getKEGGModelForOrganism.m index 28fbf9d4..38d55508 100755 --- a/external/kegg/getKEGGModelForOrganism.m +++ b/external/kegg/getKEGGModelForOrganism.m @@ -42,7 +42,7 @@ % the HMMs were trained on pro- or eukaryotic % sequences, using a sequence similarity threshold of % XXX %, fitting the KEGG version YY. E.g. -% euk90_kegg100. (opt, see note about fastaFile. Note +% euk90_kegg102. (opt, see note about fastaFile. Note % that in order to rebuild the KEGG model from a % database dump, as opposed to using the version % supplied with RAVEN, you would still need to supply @@ -140,13 +140,9 @@ % should be used for constructing Hidden Markov models (HMMs), and % later for matching your sequences to. % The most common alternatives here would be to use sequences from -% only eukaryotes, only prokaryotes or all sequences in KEGG. As -% explained in the README.md file, various sets of pre-trained hidden -% Markov models are available at BioMet -% Toolbox. This is normally the most convenient way, but if you -% would like to use, for example, only fungal sequences for training -% the HMMs then you need to re-run this step. +% only eukaryotes, only prokaryotes or all sequences in KEGG, but you +% could also play around with the parameters to use e.g. only fungal +% sequences. % 2b. KO-specific protein FASTA files are re-organised into % non-redundant protein sets with CD-HIT. The user can only set % seqIdentity parameter, which corresponds to '-c' parameter in @@ -300,9 +296,9 @@ end if isempty(fastaFile) - fprintf(['\n\n*** The model reconstruction from KEGG based on the annotation available for KEGG Species ' organismID ' ***\n\n']); + fprintf(['\n*** The model reconstruction from KEGG based on the annotation available for KEGG Species ' organismID ' ***\n\n']); else - fprintf('\n\n*** The model reconstruction from KEGG based on the protein homology search against KEGG Orthology specific HMMs ***\n\n'); + fprintf('\n*** The model reconstruction from KEGG based on the protein homology search against KEGG Orthology specific HMMs ***\n\n'); %Check if query fasta exists fastaFile=checkFileExistence(fastaFile,2); %Copy file to temp dir end @@ -322,7 +318,7 @@ %required zip file already in working directory or have it extracted. If %the zip file and directory is not here, it is downloaded from the cloud if ~isempty(dataDir) - hmmOptions={'euk90_kegg100','prok90_kegg100'}; + hmmOptions={'euk90_kegg102','prok90_kegg102'}; if ~endsWith(dataDir,hmmOptions) %Check if dataDir ends with any of the hmmOptions. %If not, then check whether the required folders exist anyway. if ~exist(fullfile(dataDir,'keggdb','genes.pep'),'file') && ... @@ -341,12 +337,12 @@ else hmmIndex=strcmp(dataDir,hmmOptions); if ~any(hmmIndex) - error(['Pre-trained HMMs are only provided with proteins clustered at 90% sequence identity (i.e. prok90_kegg100 and euk90_kegg100). ' ... + error(['Pre-trained HMMs are only provided with proteins clustered at 90% sequence identity (i.e. prok90_kegg102 and euk90_kegg102). ' ... 'Use either of these datasets, or otherwise download the relevant sequence data from KEGG to train HMMs with your desired sequence identity']) else fprintf('Downloading the HMMs archive file... '); try - websave([dataDir,'.zip'],['https://github.com/SysBioChalmers/RAVEN/releases/download/v2.6.0/',hmmOptions{hmmIndex},'.zip']); + websave([dataDir,'.zip'],['https://github.com/SysBioChalmers/RAVEN/releases/download/v2.7.4/',hmmOptions{hmmIndex},'.zip']); catch ME if strcmp(ME.identifier,'MATLAB:webservices:HTTP404StatusCodeError') error('Failed to download the HMMs archive file, the server returned a 404 error, try again later. If the problem persists please report it on the RAVEN GitHub Issues page: https://github.com/SysBioChalmers/RAVEN/issues') diff --git a/external/kegg/getMetsFromKEGG.m b/external/kegg/getMetsFromKEGG.m index aeead0cd..d5046ad1 100755 --- a/external/kegg/getMetsFromKEGG.m +++ b/external/kegg/getMetsFromKEGG.m @@ -84,11 +84,11 @@ model.id='KEGG'; model.name='Automatically generated from KEGG database'; - %Preallocate memory for 30000 metabolites - model.mets=cell(30000,1); - model.metNames=cell(30000,1); - model.metFormulas=cell(30000,1); - model.metMiriams=cell(30000,1); + %Preallocate memory for 50000 metabolites + model.mets=cell(50000,1); + model.metNames=cell(50000,1); + model.metFormulas=cell(50000,1); + model.metMiriams=cell(50000,1); %First load information on metabolite ID, metabolite name, %composition, and ChEBI diff --git a/external/kegg/getPhylDist.m b/external/kegg/getPhylDist.m index 7b83afbd..bcc75067 100755 --- a/external/kegg/getPhylDist.m +++ b/external/kegg/getPhylDist.m @@ -43,7 +43,7 @@ EM=fprintf(['The file ''taxonomy'' cannot be located at ' strrep(keggPath,'\','/') '/ and should be downloaded from the KEGG FTP.\n']); dispEM(EM); else - fprintf(['Generating the KEGG phylogenetic distance matrix from ' fullfile(keggPath,'taxonomy') '... ']); + fprintf('Generating keggPhylDist.mat file... '); %Open the file that describes the naming of the species fid = fopen(fullfile(keggPath,'taxonomy'), 'r'); @@ -97,16 +97,14 @@ end end end - %Generate a distance matrix (very straight forward here, not neat) phylDistStruct.distMat=zeros(numel(phylDistStruct.ids)); + phylDistStructOnlyInKingdom.distMat=zeros(numel(phylDistStruct.ids)); + phylDistStructOnlyInKingdom.ids=phylDistStruct.ids; for i=1:numel(phylDistStruct.ids) for j=1:numel(phylDistStruct.ids) - if onlyInKingdom==true - if ~strcmp(orgCat{i}(1),orgCat{j}(1)) - phylDistStruct.distMat(i,j)=Inf; - continue; - end + if ~strcmp(orgCat{i}(1),orgCat{j}(1)) + phylDistStructOnlyInKingdom.distMat(i,j)=Inf; end %Calculate the distance between then dist=numel(orgCat{i})-numel(orgCat{j}); @@ -132,8 +130,11 @@ end end %Save the structure - save(distFile,'phylDistStruct'); + save(distFile,'phylDistStruct','phylDistStructOnlyInKingdom'); fprintf('COMPLETE\n'); end end +if onlyInKingdom==true + phylDistStruct=phylDistStructOnlyInKingdom; +end end diff --git a/external/kegg/getRxnsFromKEGG.m b/external/kegg/getRxnsFromKEGG.m index 01b52013..41ac1e6f 100755 --- a/external/kegg/getRxnsFromKEGG.m +++ b/external/kegg/getRxnsFromKEGG.m @@ -195,8 +195,7 @@ if strcmp(tline(1:12),'ENZYME ') model.eccodes{rxnCounter}=tline(13:end); model.eccodes{rxnCounter}=deblank(model.eccodes{rxnCounter}); - model.eccodes{rxnCounter}=strcat('ec-code/',model.eccodes{rxnCounter}); - model.eccodes{rxnCounter}=regexprep(model.eccodes{rxnCounter},'\s+',';ec-code/'); + model.eccodes{rxnCounter}=regexprep(model.eccodes{rxnCounter},'\s+',';'); end if numel(tline)>8 if strcmp(tline(1:9),'REFERENCE') diff --git a/external/kegg/keggGenes.mat b/external/kegg/keggGenes.mat index 809cdc14..50e6e726 100644 Binary files a/external/kegg/keggGenes.mat and b/external/kegg/keggGenes.mat differ diff --git a/external/kegg/keggMets.mat b/external/kegg/keggMets.mat index 1f86d788..c06f8a87 100644 Binary files a/external/kegg/keggMets.mat and b/external/kegg/keggMets.mat differ diff --git a/external/kegg/keggPhylDist.mat b/external/kegg/keggPhylDist.mat index f170ef86..b8456a22 100644 Binary files a/external/kegg/keggPhylDist.mat and b/external/kegg/keggPhylDist.mat differ diff --git a/external/kegg/keggRxns.mat b/external/kegg/keggRxns.mat index 8b413117..036fd78d 100644 Binary files a/external/kegg/keggRxns.mat and b/external/kegg/keggRxns.mat differ diff --git a/installation/checkInstallation.m b/installation/checkInstallation.m index 2bfa8adf..a64a8505 100755 --- a/installation/checkInstallation.m +++ b/installation/checkInstallation.m @@ -139,7 +139,7 @@ function checkInstallation(develMode) fprintf('\n=== Model solvers ===\n'); %Get current solver. Set it to 'none', if it is not set -fprintf(' > Checking for functional LP solvers\n') +fprintf(' > Checking for LP solvers\n') res=runtests('solverTests.m','OutputDetail',0); fprintf([myStr(' > glpk',40) '%f']) @@ -163,12 +163,14 @@ function checkInstallation(develMode) fprintf('Fail\n') end -fprintf(' > Checking for functional MILP solvers\n') +fprintf(' > Checking for MILP solvers\n') res=runtests('fillGapsSmallTests.m','OutputDetail',0); fprintf([myStr(' > glpk',40) '%f']) if res(1).Passed == 1 fprintf('Pass\n') + fprintf([' While passing here, we do not recommended glpk\n'... + ' for MILPs due to occasional inconsistent results\n']) else fprintf('Fail\n') end diff --git a/testing/unit_tests/modelCurationTests.m b/testing/unit_tests/modelCurationTests.m index 77554c22..90fa7ff6 100644 --- a/testing/unit_tests/modelCurationTests.m +++ b/testing/unit_tests/modelCurationTests.m @@ -139,7 +139,6 @@ function addMetsTest(testCase) verifyEqual(testCase,modelManual,modelNew) end - function addMets_oneCompTest(testCase) sourceDir = fileparts(which(mfilename)); load([sourceDir,'/test_data/ecoli_textbook.mat'], 'model'); @@ -326,7 +325,12 @@ function changeRxnsTest(testCase) sourceDir=fileparts(fileparts(fileparts(which(mfilename)))); load(fullfile(sourceDir,'testing','unit_tests','test_data','ecoli_textbook.mat'), 'model'); -evalc('modelNew=changeRxns(model,''ACKr'',''2-Oxoglutarate => TEST'',2,''c'',true);'); +evalc('modelNew=changeRxns(model,''ACKr'',''2-Oxoglutarate <=> TEST'',2,''c'',true);'); + +equations.mets={{'2-Oxoglutarate','TEST'}}; +equations.stoichCoeffs={[-1,1]}; + +evalc('modelNewStruct=changeRxns(model,''ACKr'',equations,2,''c'',true);'); %Perform the curation manually for comparison modelManual=model; @@ -341,9 +345,10 @@ function changeRxnsTest(testCase) modelManual.metFormulas(end+1)={''}; modelManual.metMiriams(end+1)={[]}; modelManual.b(end+1)=[0]; -modelManual.rev(3)=[0]; +modelManual.rev(3)=[1]; verifyEqual(testCase,modelNew,modelManual) +verifyEqual(testCase,modelNewStruct,modelManual) end function removeBadRxnsTest(testCase)