-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
19b8cc2
commit ebd1c32
Showing
37 changed files
with
6,250 additions
and
1,594 deletions.
There are no files selected for viewing
1,242 changes: 1,242 additions & 0 deletions
1,242
.ipynb_checkpoints/check_k3l_test-checkpoint.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
File renamed without changes.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "08ba2028-5b81-4d9b-93fc-37efa999a608", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# imports\n", | ||
"import argparse\n", | ||
"from Bio.SeqUtils import GC\n", | ||
"from Bio.SeqUtils import MeltingTemp as mt\n", | ||
"from Bio.Seq import Seq\n", | ||
"from Bio import SeqIO\n", | ||
"import math\n", | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"import main_package # my package\n", | ||
"\n", | ||
"# parse arguments\n", | ||
"parser = argparse.ArgumentParser()\n", | ||
"parser.add_argument(\"wt\", help=\"Genbank file path containing wild type (WT) sequence\", type=str)\n", | ||
"parser.add_argument(\"o\", help=\"Output prefix\", type=str)\n", | ||
"parser.add_argument(\"--vector\", help=\"Genbank file path containing vector sequence\", type=str, default=False)\n", | ||
"parser.add_argument(\"--codon_table\", help=\"Specify codon table to use\", type=str, default='Standard')\n", | ||
"parser.add_argument(\"--homo_len\", help=\"Length of homology arm in fwd primer\", type=int, default=20)\n", | ||
"parser.add_argument(\"--oligo_len\", help=\"Ideal max total length of oligo\", type=int, default=60)\n", | ||
"parser.add_argument(\"--melt_temp\", help=\"Melting temp of fwd primer\", type=int, default=50)\n", | ||
"parser.add_argument(\"--rev_melt_temp\", help=\"Melting temp of rev primer\", type=int, default=55)\n", | ||
"parser.add_argument(\"--syn_snp_rate\", help=\"Percentage of synonymous SNPs 0-1\", type=float, default=.05)\n", | ||
"parser.add_argument(\"--stop_rate\", help=\"Percentage of stop codon SNPs, default = keep 10% of stop SNPs\", type=float, default=.10)\n", | ||
"parser.add_argument(\"--rng_seed\", help=\"Set seed for repoducibly selecting synonymous codon sites\", type=int, default=42)\n", | ||
"parser.add_argument(\"--out_dir\", help='Local output directory e.g. \"data\"', type=str)\n", | ||
"args = parser.parse_args()\n", | ||
"\n", | ||
"# parse genbank files\n", | ||
"wt_file = SeqIO.read(args.wt, 'genbank')\n", | ||
"\n", | ||
"# check for vector file\n", | ||
"if not args.vector:\n", | ||
" args.vector = args.wt\n", | ||
"vector_file = SeqIO.read(vector_input, 'genbank')\n", | ||
" \n", | ||
"wt_seq = str(wt_file.seq.upper())\n", | ||
"vector_seq = str(vector_file.seq.upper())\n", | ||
"\n", | ||
"# ERROR CHECKS\n", | ||
"if len(wt_seq) != len(vector_seq):\n", | ||
" print('ERROR: WildType and Vector GenBank sequences are not of equal length')\n", | ||
" return\n", | ||
"# check for -20 bp homology\n", | ||
"# check that the strand is going forward\n", | ||
"\n", | ||
"# get start and stop of gene for codon positions\n", | ||
"for feature in wt_file.features:\n", | ||
" if feature.type == 'gene':\n", | ||
" gene_start = feature.location.start.position\n", | ||
" gene_end = feature.location.end.position\n", | ||
"\n", | ||
"# setup seq_data\n", | ||
"seq_data = {}\n", | ||
"seq_data['wt_seq'] = wt_seq\n", | ||
"seq_data['vector_seq'] = vector_seq\n", | ||
"seq_data['gene_start'] = gene_start\n", | ||
"seq_data['gene_end'] = gene_end\n", | ||
"seq_data['fasta_file'] = []\n", | ||
"seq_data['df'] = pd.DataFrame()\n", | ||
"seq_data['rng'] = np.random.RandomState(42)\n", | ||
"\n", | ||
"# this needs to be fixed (user input? yaml?)\n", | ||
"targ_windows = ['window_1', 'window_2', 'window_3']\n", | ||
"\n", | ||
"for feature in wt_file.features:\n", | ||
" if feature.type not in targ_windows:\n", | ||
" continue\n", | ||
" \n", | ||
" start_index = feature.location.start.position\n", | ||
" window_end = feature.location.end.position\n", | ||
" \n", | ||
" # loop for each sub_window\n", | ||
" sub_window_n = 1\n", | ||
" while start_index < window_end: # this could be an issue to toggle\n", | ||
" data_dict = {}\n", | ||
" data_dict['start_index'] = start_index\n", | ||
" data_dict['window_end'] = window_end\n", | ||
" data_dict['sub_window_name'] = {str(feature.type)}-{sub_window_n}\n", | ||
" \n", | ||
" # 1. homology arm\n", | ||
" data_dict = main_package.primer_design.homology_arm(seq_data, data_dict, args)\n", | ||
" \n", | ||
" # 2. reverse primer\n", | ||
" data_dict = main_package.primer_design.reverse_primer(seq_data, data_dict, args)\n", | ||
" \n", | ||
" # 3. forward primer\n", | ||
" data_dict = forward_primer(seq_data, data_dict, args)\n", | ||
" \n", | ||
" # 4. variant window\n", | ||
" seq_data, data_dict = main_package.primer_design.sub_window(seq_data, data_dict, args)\n", | ||
" \n", | ||
" # reset the start index for the next mini-window\n", | ||
" start_index = primer_start\n", | ||
" sub_window_n += 1 \n", | ||
"\n", | ||
"# setup .fa output, truncate if file exists\n", | ||
"file = open(f\"{output_prefix}.fa\",'w+')\n", | ||
"file.writelines(seq_data['fasta_file'])\n", | ||
"file.close()\n", | ||
"\n", | ||
"# polish dataframe\n", | ||
"df = seq_data['df']\n", | ||
"df['position'] = df['position'].astype(int)\n", | ||
"\n", | ||
"df['forward_primer_tm'] = df['forward_primer'].apply(lambda x: mt.Tm_NN(x)).round(1)\n", | ||
"df['forward_primer_gc'] = df['forward_primer'].apply(GC).round(1)\n", | ||
"df['forward_primer_len'] = df['forward_primer'].str.len()\n", | ||
"\n", | ||
"df['reverse_primer_tm'] = df['reverse_primer'].apply(lambda x: mt.Tm_NN(x)).round(1)\n", | ||
"df['reverse_primer_gc'] = df['reverse_primer'].apply(GC).round(1)\n", | ||
"df['reverse_primer_len'] = df['reverse_primer'].str.len()\n", | ||
"\n", | ||
"cols = ['name','sub_window_name','wt','position','iupac','codon_sub','synonymous_codons','no_stop_codons','primer','homology_arm','sub_window','forward_primer','forward_primer_tm','forward_primer_gc','forward_primer_len','reverse_primer','reverse_primer_name','reverse_primer_tm','reverse_primer_gc','reverse_primer_len']\n", | ||
"df = df[cols]\n", | ||
"\n", | ||
"# save dataframe as .tsv\n", | ||
"df.to_csv(f'{output_prefix}.tsv', index=False, sep='\\t')" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "alignparse-environment", | ||
"language": "python", | ||
"name": "alignparse-environment" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.