-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from wlegrand91/main
Notebook for processing .dh files
- Loading branch information
Showing
2 changed files
with
218 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
|
||
import pandas as pd | ||
|
||
def read_heats_file(dh_file,uncertainty,output_file): | ||
""" | ||
Read the heats file written out by the MicroCal/Origin ITC analysis | ||
package. | ||
Parameters | ||
---------- | ||
dh_file : str | ||
name of .dh file written out by microcal software | ||
output_file : str | ||
name of file to write out data | ||
uncertainty : float | ||
user estimate of the uncertainty on each measured heat | ||
Returns | ||
------- | ||
meta_data : dict | ||
dictionary with metadata read from the top of the file: temperature | ||
in Kelvin, cell and titrant concentrations in molar, and cell_volume | ||
in microliters | ||
""" | ||
|
||
# Read data file | ||
with open(dh_file,'r') as f: | ||
lines = f.readlines() | ||
|
||
# Grab third line and split on "," | ||
meta = lines[2].split(",") | ||
|
||
# Parse meta data on the third line | ||
temperature = float(meta[0]) | ||
stationary_cell_conc = float(meta[1])*1e-3 | ||
titrant_syringe_conc = float(meta[2])*1e-3 | ||
cell_volume = float(meta[3])*1e3 | ||
|
||
# Split rows 6-end on "," and grab first and secon columns | ||
shots = [] | ||
heats = [] | ||
for l in lines[5:]: | ||
col = l.split(",") | ||
shots.append(float(col[0])) | ||
heats.append(float(col[1])) | ||
|
||
# Make a list of uncertainty repeated once for every observed heat | ||
heats_stdev = [uncertainty for i in range(len(heats))] | ||
|
||
# Construct dataframe with data and write out a spreadsheet | ||
to_df = {"injection":shots, | ||
"heat":heats, | ||
"heat_stdev":heats_stdev} | ||
df = pd.DataFrame(to_df) | ||
df.to_csv(output_file,index=False) | ||
|
||
# Build dictionary holding meta data | ||
out = {} | ||
out["temperature"] = temperature | ||
out["cell_conc"] = stationary_cell_conc | ||
out["titrant_conc"] = titrant_syringe_conc | ||
out["cell_volume"] = titrant_syringe_conc | ||
|
||
return out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 27, | ||
"id": "decd9f42-6209-4a88-9a15-eb0b32cb2505", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"import pandas as pd\n", | ||
"\n", | ||
"def read_heats_file(dh_file,uncertainty,output_file):\n", | ||
" \"\"\"\n", | ||
" Read the heats file written out by the MicroCal/Origin ITC analysis\n", | ||
" package.\n", | ||
"\n", | ||
" Parameters\n", | ||
" ----------\n", | ||
" dh_file : str\n", | ||
" name of .dh file written out by microcal software\n", | ||
" output_file : str\n", | ||
" name of file to write out data\n", | ||
" uncertainty : float\n", | ||
" user estimate of the uncertainty on each measured heat\n", | ||
"\n", | ||
" Returns\n", | ||
" -------\n", | ||
" meta_data : dict\n", | ||
" dictionary with metadata read from the top of the file: temperature\n", | ||
" in Kelvin, cell and titrant concentrations in molar, and cell_volume\n", | ||
" in microliters\n", | ||
" \"\"\"\n", | ||
"\n", | ||
" # Read data file\n", | ||
" with open(dh_file,'r') as f:\n", | ||
" lines = f.readlines()\n", | ||
"\n", | ||
" # Grab third line and split on \",\"\n", | ||
" meta = lines[2].split(\",\")\n", | ||
"\n", | ||
" # Parse meta data on the third line\n", | ||
" temperature = float(meta[0])\n", | ||
" stationary_cell_conc = float(meta[1])*1e-3\n", | ||
" titrant_syringe_conc = float(meta[2])*1e-3\n", | ||
" cell_volume = float(meta[3])*1e3\n", | ||
" \n", | ||
" # Split rows 6-end on \",\" and grab first and secon columns\n", | ||
" shots = []\n", | ||
" heats = []\n", | ||
" for l in lines[5:]:\n", | ||
" col = l.split(\",\")\n", | ||
" shots.append(float(col[0]))\n", | ||
" heats.append(float(col[1]))\n", | ||
"\n", | ||
" # Make a list of uncertainty repeated once for every observed heat\n", | ||
" heats_stdev = [uncertainty for i in range(len(heats))]\n", | ||
"\n", | ||
" # Construct dataframe with data and write out a spreadsheet\n", | ||
" to_df = {\"injection\":shots,\n", | ||
" \"heat\":heats,\n", | ||
" \"heat_stdev\":heats_stdev}\n", | ||
" df = pd.DataFrame(to_df)\n", | ||
" df.to_csv(output_file,index=False)\n", | ||
"\n", | ||
" # Build dictionary holding meta data\n", | ||
" out = {}\n", | ||
" out[\"temperature\"] = temperature\n", | ||
" out[\"cell_conc\"] = stationary_cell_conc\n", | ||
" out[\"titrant_conc\"] = titrant_syringe_conc\n", | ||
" out[\"cell_volume\"] = titrant_syringe_conc\n", | ||
"\n", | ||
"\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 30, | ||
"id": "3cb1902e-ad74-482a-b9a9-86b676fe6698", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import shutil\n", | ||
"## Define input directory that contains .dh files\n", | ||
"## Define output directory to put .csv files in\n", | ||
"\n", | ||
"## Running this script twice will overwrite any previous runs of the code\n", | ||
"\n", | ||
"inputdir = r\"C:/Users/willi/linkage/notebooks/rawdata\"\n", | ||
"outputdir = r\"C:/Users/willi/linkage/notebooks/processed_data\" # Specify your desired output directory\n", | ||
"\n", | ||
"def iterate_dh_to_csv(inputdir, outputdir):\n", | ||
" for dirpath, dirnames, filenames in os.walk(inputdir):\n", | ||
" for filename in filenames:\n", | ||
" if filename.lower().endswith('.dh'):\n", | ||
" filepath = os.path.join(dirpath, filename)\n", | ||
" \n", | ||
" output_filepath = os.path.splitext(filepath)[0] + \".csv\"\n", | ||
" read_heats_file(filepath, 0, output_filepath)\n", | ||
"\n", | ||
" # Calculate the relative path of the .csv file within the inputdir\n", | ||
" rel_path = os.path.relpath(output_filepath, inputdir)\n", | ||
"\n", | ||
" # Create the corresponding directory structure in the outputdir\n", | ||
" output_dirpath = os.path.join(outputdir, os.path.dirname(rel_path))\n", | ||
" os.makedirs(output_dirpath, exist_ok=True)\n", | ||
"\n", | ||
" # Copy the .csv file to the new location\n", | ||
" shutil.copy2(output_filepath, output_dirpath)\n", | ||
"\n", | ||
"\n", | ||
"\n", | ||
"iterate_and_process(inputdir, outputdir)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "9e905fab-5dbb-481f-aa71-3160c51d7c3e", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "f665fa30-63e2-4eb7-bc6e-86aaadd6e681", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |