diff --git a/virmet/covplot.py b/virmet/covplot.py index 17ac9db..2f6a1a7 100644 --- a/virmet/covplot.py +++ b/virmet/covplot.py @@ -10,9 +10,9 @@ def best_species(orgs_file, organism): - orgs_list = pd.read_csv(orgs_file, sep=',', header=0) + orgs_list = pd.read_csv(orgs_file, sep='\t', header=0) # assert decreasing sorted - diff = orgs_list['0'] - orgs_list['0'].shift(1) + diff = orgs_list['reads'] - orgs_list['reads'].shift(1) assert (diff > 0).sum() == 0, diff # criterion is "startswith" # criterion = orgs_list['sscinames'].map(lambda x: x.startswith(organism)) @@ -20,7 +20,7 @@ def best_species(orgs_file, organism): matching_orgs = orgs_list[criterion] # organism matching that given on command line with most reads is the first # W.O. this assumes descending order of reads - return str(matching_orgs.iloc[0].sscinames) + return str(matching_orgs.iloc[0].organism) def main(args): @@ -37,10 +37,9 @@ def main(args): blast_file = os.path.join(outdir, 'unique.tsv.gz') unique = pd.read_csv(blast_file, sep='\t', header=0, compression='gzip') matching_reads = unique[unique['sscinames'] == best_spec] - best_seqids = matching_reads.groupby('sseqid').size().order(ascending=False) + best_seqids = matching_reads.groupby('sseqid').size().sort_values(ascending=False) - # TODO: upgrade for NCBI outphase of GI - gi, dsc, acc = str(best_seqids.index.tolist()[0]).split('|')[1:4] + dsc, acc = str(best_seqids.index.tolist()[0]).split('|')[:2] # copy single genome, index, align viral_reads os.chdir(outdir)