Skip to content

Commit

Permalink
need stop bool update
Browse files Browse the repository at this point in the history
  • Loading branch information
greenkidneybean committed Mar 26, 2022
1 parent 19b8cc2 commit ebd1c32
Show file tree
Hide file tree
Showing 37 changed files with 6,250 additions and 1,594 deletions.
Binary file added .DS_Store
Binary file not shown.
1,242 changes: 1,242 additions & 0 deletions .ipynb_checkpoints/check_k3l_test-checkpoint.ipynb

Large diffs are not rendered by default.

File renamed without changes.
436 changes: 97 additions & 339 deletions .ipynb_checkpoints/main-checkpoint.ipynb

Large diffs are not rendered by default.

151 changes: 151 additions & 0 deletions .ipynb_checkpoints/main-script-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "08ba2028-5b81-4d9b-93fc-37efa999a608",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"import argparse\n",
"from Bio.SeqUtils import GC\n",
"from Bio.SeqUtils import MeltingTemp as mt\n",
"from Bio.Seq import Seq\n",
"from Bio import SeqIO\n",
"import math\n",
"import numpy as np\n",
"import pandas as pd\n",
"import main_package # my package\n",
"\n",
"# parse arguments\n",
"parser = argparse.ArgumentParser()\n",
"parser.add_argument(\"wt\", help=\"Genbank file path containing wild type (WT) sequence\", type=str)\n",
"parser.add_argument(\"o\", help=\"Output prefix\", type=str)\n",
"parser.add_argument(\"--vector\", help=\"Genbank file path containing vector sequence\", type=str, default=False)\n",
"parser.add_argument(\"--codon_table\", help=\"Specify codon table to use\", type=str, default='Standard')\n",
"parser.add_argument(\"--homo_len\", help=\"Length of homology arm in fwd primer\", type=int, default=20)\n",
"parser.add_argument(\"--oligo_len\", help=\"Ideal max total length of oligo\", type=int, default=60)\n",
"parser.add_argument(\"--melt_temp\", help=\"Melting temp of fwd primer\", type=int, default=50)\n",
"parser.add_argument(\"--rev_melt_temp\", help=\"Melting temp of rev primer\", type=int, default=55)\n",
"parser.add_argument(\"--syn_snp_rate\", help=\"Percentage of synonymous SNPs 0-1\", type=float, default=.05)\n",
"parser.add_argument(\"--stop_rate\", help=\"Percentage of stop codon SNPs, default = keep 10% of stop SNPs\", type=float, default=.10)\n",
"parser.add_argument(\"--rng_seed\", help=\"Set seed for repoducibly selecting synonymous codon sites\", type=int, default=42)\n",
"parser.add_argument(\"--out_dir\", help='Local output directory e.g. \"data\"', type=str)\n",
"args = parser.parse_args()\n",
"\n",
"# parse genbank files\n",
"wt_file = SeqIO.read(args.wt, 'genbank')\n",
"\n",
"# check for vector file\n",
"if not args.vector:\n",
" args.vector = args.wt\n",
"vector_file = SeqIO.read(vector_input, 'genbank')\n",
" \n",
"wt_seq = str(wt_file.seq.upper())\n",
"vector_seq = str(vector_file.seq.upper())\n",
"\n",
"# ERROR CHECKS\n",
"if len(wt_seq) != len(vector_seq):\n",
" print('ERROR: WildType and Vector GenBank sequences are not of equal length')\n",
" return\n",
"# check for -20 bp homology\n",
"# check that the strand is going forward\n",
"\n",
"# get start and stop of gene for codon positions\n",
"for feature in wt_file.features:\n",
" if feature.type == 'gene':\n",
" gene_start = feature.location.start.position\n",
" gene_end = feature.location.end.position\n",
"\n",
"# setup seq_data\n",
"seq_data = {}\n",
"seq_data['wt_seq'] = wt_seq\n",
"seq_data['vector_seq'] = vector_seq\n",
"seq_data['gene_start'] = gene_start\n",
"seq_data['gene_end'] = gene_end\n",
"seq_data['fasta_file'] = []\n",
"seq_data['df'] = pd.DataFrame()\n",
"seq_data['rng'] = np.random.RandomState(42)\n",
"\n",
"# this needs to be fixed (user input? yaml?)\n",
"targ_windows = ['window_1', 'window_2', 'window_3']\n",
"\n",
"for feature in wt_file.features:\n",
" if feature.type not in targ_windows:\n",
" continue\n",
" \n",
" start_index = feature.location.start.position\n",
" window_end = feature.location.end.position\n",
" \n",
" # loop for each sub_window\n",
" sub_window_n = 1\n",
" while start_index < window_end: # this could be an issue to toggle\n",
" data_dict = {}\n",
" data_dict['start_index'] = start_index\n",
" data_dict['window_end'] = window_end\n",
" data_dict['sub_window_name'] = {str(feature.type)}-{sub_window_n}\n",
" \n",
" # 1. homology arm\n",
" data_dict = main_package.primer_design.homology_arm(seq_data, data_dict, args)\n",
" \n",
" # 2. reverse primer\n",
" data_dict = main_package.primer_design.reverse_primer(seq_data, data_dict, args)\n",
" \n",
" # 3. forward primer\n",
" data_dict = forward_primer(seq_data, data_dict, args)\n",
" \n",
" # 4. variant window\n",
" seq_data, data_dict = main_package.primer_design.sub_window(seq_data, data_dict, args)\n",
" \n",
" # reset the start index for the next mini-window\n",
" start_index = primer_start\n",
" sub_window_n += 1 \n",
"\n",
"# setup .fa output, truncate if file exists\n",
"file = open(f\"{output_prefix}.fa\",'w+')\n",
"file.writelines(seq_data['fasta_file'])\n",
"file.close()\n",
"\n",
"# polish dataframe\n",
"df = seq_data['df']\n",
"df['position'] = df['position'].astype(int)\n",
"\n",
"df['forward_primer_tm'] = df['forward_primer'].apply(lambda x: mt.Tm_NN(x)).round(1)\n",
"df['forward_primer_gc'] = df['forward_primer'].apply(GC).round(1)\n",
"df['forward_primer_len'] = df['forward_primer'].str.len()\n",
"\n",
"df['reverse_primer_tm'] = df['reverse_primer'].apply(lambda x: mt.Tm_NN(x)).round(1)\n",
"df['reverse_primer_gc'] = df['reverse_primer'].apply(GC).round(1)\n",
"df['reverse_primer_len'] = df['reverse_primer'].str.len()\n",
"\n",
"cols = ['name','sub_window_name','wt','position','iupac','codon_sub','synonymous_codons','no_stop_codons','primer','homology_arm','sub_window','forward_primer','forward_primer_tm','forward_primer_gc','forward_primer_len','reverse_primer','reverse_primer_name','reverse_primer_tm','reverse_primer_gc','reverse_primer_len']\n",
"df = df[cols]\n",
"\n",
"# save dataframe as .tsv\n",
"df.to_csv(f'{output_prefix}.tsv', index=False, sep='\\t')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "alignparse-environment",
"language": "python",
"name": "alignparse-environment"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit ebd1c32

Please sign in to comment.