Skip to content

Commit

Permalink
Infer chromosome automatically
Browse files Browse the repository at this point in the history
  • Loading branch information
rnowling committed Nov 7, 2021
1 parent eba3ec4 commit 02b1e2b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
3 changes: 0 additions & 3 deletions bats_tests/test_localize.bats
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ load model_setup_helper
--pca-associations-tsv "${TEST_TEMP_DIR}/pca_tests.tsv" \
--plot-fl "${TEST_TEMP_DIR}/manhattan_plot_comp2.png" \
--component 2 \
--chromosome 1 \
--n-windows -1

[ "$status" -eq 0 ]
Expand All @@ -68,7 +67,6 @@ load model_setup_helper
--pca-associations-tsv "${TEST_TEMP_DIR}/pca_tests_counts.tsv" \
--plot-fl "${TEST_TEMP_DIR}/manhattan_plot_comp1.png" \
--component 1 \
--chromosome 1 \
--n-windows -1

[ "$status" -eq 0 ]
Expand All @@ -79,7 +77,6 @@ load model_setup_helper
--pca-associations-tsv "${TEST_TEMP_DIR}/pca_tests_counts.tsv" \
--plot-fl "${TEST_TEMP_DIR}/manhattan_plot_comp2.png" \
--component 2 \
--chromosome 1 \
--n-windows -1

[ "$status" -eq 0 ]
Expand Down
25 changes: 18 additions & 7 deletions bin/asaph_detect_and_localize
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,28 @@ from asaph.vcf import VCFStreamer

ALPHA = 0.01

def read_snp_table(flname, component, chromosome):
def read_snp_table(flname, component, chromosome=None):
with open(flname) as fl:
df = pd.read_csv(fl, delim_whitespace=True)
df["chrom"] = df["chrom"].astype(str)
mask = (df["chrom"] == chromosome) & (df["component"] == component)

if chromosome == None:
chromosomes = set(df["chrom"])
if len(chromosomes) > 1:
print("SNPs for more than one chromosome are present in the association file. Use the --chromosome flag to indicate which chromosome should be plotted.")
sys.exit(1)

chromosome = next(iter(chromosomes))

mask = (df["chrom"] == chromosome) & (df["component"] == component)
df = df[mask]

if len(df) == 0:
print("No association tests for the given chromosome or component.")
sys.exit(1)

df = df.sort_values(by="pos")

return df

def mark_significant_snps(df, threshold=None):
Expand Down Expand Up @@ -218,7 +231,6 @@ def parseargs():
type=int)

plot_parser.add_argument("--chromosome",
required=True,
type=str)

plot_parser.add_argument("--y-limit",
Expand All @@ -240,7 +252,6 @@ def parseargs():
type=int)

boundary_parser.add_argument("--chromosome",
required=True,
type=str)

boundary_parser.add_argument("--n-windows",
Expand Down Expand Up @@ -281,7 +292,7 @@ if __name__ == "__main__":
if args.mode == "plot":
df = read_snp_table(args.pca_associations_tsv,
args.component,
args.chromosome)
chromosome=args.chromosome)

# avoid domain errors from trying to take the log of 0
df["pvalue"] = np.maximum(df["pvalue"], np.power(10., -300.))
Expand All @@ -303,7 +314,7 @@ if __name__ == "__main__":
elif args.mode == "detect-boundaries":
df = read_snp_table(args.pca_associations_tsv,
args.component,
args.chromosome)
chromosome=args.chromosome)

df = mark_significant_snps(df)

Expand Down

0 comments on commit 02b1e2b

Please sign in to comment.