diff --git a/nesoni/CHANGES b/nesoni/CHANGES index 73935f9..eed2dea 100644 --- a/nesoni/CHANGES +++ b/nesoni/CHANGES @@ -1,4 +1,7 @@ +0.132 - link_up_annotations forgiving of duplicated ID if ID not used. + Execute can have a specified location for state and log files. + 0.131 - Log transformation is now performed using Varistran. Ignore spurious whitespace when loading GFF files. Default to norm factors of 1 if calcNormFactors fails diff --git a/nesoni/__init__.py b/nesoni/__init__.py index cde3d61..220b92a 100644 --- a/nesoni/__init__.py +++ b/nesoni/__init__.py @@ -1,4 +1,4 @@ -VERSION='0.131' +VERSION='0.132' #^ Note: this first line is read by the setup.py script to get the version import sys diff --git a/nesoni/annotation.py b/nesoni/annotation.py index 7ecb43c..988e97e 100644 --- a/nesoni/annotation.py +++ b/nesoni/annotation.py @@ -176,18 +176,23 @@ def get_seq(self, seq_dict): def link_up_annotations(annotations): """ Link up GFF3 annotations using parent/child relationships """ index = { } + bad = set() for item in annotations: item.children = [ ] if 'ID' not in item.attr: continue ID = item.attr['ID'] - assert ID not in index, 'Annotations contain a duplicated ID: '+ID + #assert ID not in index, 'Annotations contain a duplicated ID: '+ID + if ID in index: + bad.add(ID) index[ID] = item for item in annotations: if 'Parent' not in item.attr: item.parents = [ ] else: - item.parents = [ index[parent_id] for parent_id in item.attr['Parent'].split(',') ] + parent_ids = item.attr['Parent'].split(',') + assert bad.isdisjoint(parent_ids), "Duplicate ID used as Parent in GFF file: "+",".join(bad&set(parent_ids)) + item.parents = [ index[parent_id] for parent_id in parent_ids if parent_id in index ] for parent in item.parents: parent.children.append(item) @@ -388,6 +393,9 @@ def write_gff3_header(f): print >> f, '##gff-version 3' def write_gff3(filename, items): + # IGV likes to index large GFFs, and needs them to be sorted for this + items = sorted(items, key=lambda item: (item.seqid, item.start)) + with io.open_possibly_compressed_writer(filename) as f: write_gff3_header(f) for item in items: diff --git a/nesoni/legion.py b/nesoni/legion.py index 684c515..96d29e1 100644 --- a/nesoni/legion.py +++ b/nesoni/legion.py @@ -967,6 +967,7 @@ def process_make(action, stage=None): """) @config.Int_flag('cores','Advise how many cores the command will use.', affects_output=False) +@config.String_flag('prefix','Location of state and log files.') @config.Main_section('command','Command to execute', allow_flags=True, empty_is_ok=False) @config.Section('execution_options', 'Extra options to add to start of command, eg to set the number of cores to use. ' @@ -976,6 +977,19 @@ class Execute(config.Action_filter): cores = 1 command = [ ] execution_options = [ ] + prefix = None + + + def log_filename(self): + if self.prefix is None: + return None + return self.prefix + '_log.txt' + + def state_filename(self): + if self.prefix is None: + return super(Execute,self).state_filename() + return self.prefix + '.state' + def cores_required(self): return self.cores diff --git a/nesoni/nesoni-r/R/counts.R b/nesoni/nesoni-r/R/counts.R index 2fc1447..5a1ced5 100644 --- a/nesoni/nesoni-r/R/counts.R +++ b/nesoni/nesoni-r/R/counts.R @@ -51,7 +51,7 @@ read.counts <- function(filename, min.total=0, min.max=0, keep=NULL, norm.file=N if (!quiet && (min.total > 0 || min.max > 0)) cat(sprintf("%d genes after filtering\n", sum(good))) - result <- DGEList(counts=counts[good,], gene=gene[good,]) + result <- DGEList(counts=counts[good,], genes=gene[good,]) mean.lib.size <- exp(mean(log(result$samples$lib.size))) diff --git a/nesoni/nesoni-r/R/nesoni_version.R b/nesoni/nesoni-r/R/nesoni_version.R index b6bf564..48ee958 100644 --- a/nesoni/nesoni-r/R/nesoni_version.R +++ b/nesoni/nesoni-r/R/nesoni_version.R @@ -1,2 +1,2 @@ #Autogenerated -nesoni_version <- function() { '0.130' } +nesoni_version <- function() { '0.132' }