From 76ec343dec8a6b6489c6ef8fdd785d99d918e390 Mon Sep 17 00:00:00 2001 From: ozagordi Date: Wed, 19 Apr 2017 10:32:36 +0200 Subject: [PATCH] Solve GI phase out and FutureDeprecation warning This was (presumably) the last piece of code to change to make the whole pipeline work without GIs after their phase out. Further, `sort_values` is used instead of `order` from `pandas`, thus squashing the warning that appeared. Fix #16 and fix #17. --- virmet/covplot.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/virmet/covplot.py b/virmet/covplot.py index 17ac9db..2f6a1a7 100644 --- a/virmet/covplot.py +++ b/virmet/covplot.py @@ -10,9 +10,9 @@ def best_species(orgs_file, organism): - orgs_list = pd.read_csv(orgs_file, sep=',', header=0) + orgs_list = pd.read_csv(orgs_file, sep='\t', header=0) # assert decreasing sorted - diff = orgs_list['0'] - orgs_list['0'].shift(1) + diff = orgs_list['reads'] - orgs_list['reads'].shift(1) assert (diff > 0).sum() == 0, diff # criterion is "startswith" # criterion = orgs_list['sscinames'].map(lambda x: x.startswith(organism)) @@ -20,7 +20,7 @@ def best_species(orgs_file, organism): matching_orgs = orgs_list[criterion] # organism matching that given on command line with most reads is the first # W.O. this assumes descending order of reads - return str(matching_orgs.iloc[0].sscinames) + return str(matching_orgs.iloc[0].organism) def main(args): @@ -37,10 +37,9 @@ def main(args): blast_file = os.path.join(outdir, 'unique.tsv.gz') unique = pd.read_csv(blast_file, sep='\t', header=0, compression='gzip') matching_reads = unique[unique['sscinames'] == best_spec] - best_seqids = matching_reads.groupby('sseqid').size().order(ascending=False) + best_seqids = matching_reads.groupby('sseqid').size().sort_values(ascending=False) - # TODO: upgrade for NCBI outphase of GI - gi, dsc, acc = str(best_seqids.index.tolist()[0]).split('|')[1:4] + dsc, acc = str(best_seqids.index.tolist()[0]).split('|')[:2] # copy single genome, index, align viral_reads os.chdir(outdir)