From c50a35f54dfa2d7b7b6910a6ca86b8d25da5d72f Mon Sep 17 00:00:00 2001 From: wlegrand Date: Tue, 6 Aug 2024 15:45:17 -0700 Subject: [PATCH] Notebook for processing .dh files --- notebooks/read_heats.py | 64 ++++++++++++++ notebooks/readheatstest.ipynb | 154 ++++++++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 notebooks/read_heats.py create mode 100644 notebooks/readheatstest.ipynb diff --git a/notebooks/read_heats.py b/notebooks/read_heats.py new file mode 100644 index 0000000..df3bcb2 --- /dev/null +++ b/notebooks/read_heats.py @@ -0,0 +1,64 @@ + +import pandas as pd + +def read_heats_file(dh_file,uncertainty,output_file): + """ + Read the heats file written out by the MicroCal/Origin ITC analysis + package. + + Parameters + ---------- + dh_file : str + name of .dh file written out by microcal software + output_file : str + name of file to write out data + uncertainty : float + user estimate of the uncertainty on each measured heat + + Returns + ------- + meta_data : dict + dictionary with metadata read from the top of the file: temperature + in Kelvin, cell and titrant concentrations in molar, and cell_volume + in microliters + """ + + # Read data file + with open(dh_file,'r') as f: + lines = f.readlines() + + # Grab third line and split on "," + meta = lines[2].split(",") + + # Parse meta data on the third line + temperature = float(meta[0]) + stationary_cell_conc = float(meta[1])*1e-3 + titrant_syringe_conc = float(meta[2])*1e-3 + cell_volume = float(meta[3])*1e3 + + # Split rows 6-end on "," and grab first and secon columns + shots = [] + heats = [] + for l in lines[5:]: + col = l.split(",") + shots.append(float(col[0])) + heats.append(float(col[1])) + + # Make a list of uncertainty repeated once for every observed heat + heats_stdev = [uncertainty for i in range(len(heats))] + + # Construct dataframe with data and write out a spreadsheet + to_df = {"injection":shots, + "heat":heats, + "heat_stdev":heats_stdev} + df = pd.DataFrame(to_df) + df.to_csv(output_file,index=False) + + # Build dictionary holding meta data + out = {} + out["temperature"] = temperature + out["cell_conc"] = stationary_cell_conc + out["titrant_conc"] = titrant_syringe_conc + out["cell_volume"] = titrant_syringe_conc + + return out diff --git a/notebooks/readheatstest.ipynb b/notebooks/readheatstest.ipynb new file mode 100644 index 0000000..6591da2 --- /dev/null +++ b/notebooks/readheatstest.ipynb @@ -0,0 +1,154 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 27, + "id": "decd9f42-6209-4a88-9a15-eb0b32cb2505", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import pandas as pd\n", + "\n", + "def read_heats_file(dh_file,uncertainty,output_file):\n", + " \"\"\"\n", + " Read the heats file written out by the MicroCal/Origin ITC analysis\n", + " package.\n", + "\n", + " Parameters\n", + " ----------\n", + " dh_file : str\n", + " name of .dh file written out by microcal software\n", + " output_file : str\n", + " name of file to write out data\n", + " uncertainty : float\n", + " user estimate of the uncertainty on each measured heat\n", + "\n", + " Returns\n", + " -------\n", + " meta_data : dict\n", + " dictionary with metadata read from the top of the file: temperature\n", + " in Kelvin, cell and titrant concentrations in molar, and cell_volume\n", + " in microliters\n", + " \"\"\"\n", + "\n", + " # Read data file\n", + " with open(dh_file,'r') as f:\n", + " lines = f.readlines()\n", + "\n", + " # Grab third line and split on \",\"\n", + " meta = lines[2].split(\",\")\n", + "\n", + " # Parse meta data on the third line\n", + " temperature = float(meta[0])\n", + " stationary_cell_conc = float(meta[1])*1e-3\n", + " titrant_syringe_conc = float(meta[2])*1e-3\n", + " cell_volume = float(meta[3])*1e3\n", + " \n", + " # Split rows 6-end on \",\" and grab first and secon columns\n", + " shots = []\n", + " heats = []\n", + " for l in lines[5:]:\n", + " col = l.split(\",\")\n", + " shots.append(float(col[0]))\n", + " heats.append(float(col[1]))\n", + "\n", + " # Make a list of uncertainty repeated once for every observed heat\n", + " heats_stdev = [uncertainty for i in range(len(heats))]\n", + "\n", + " # Construct dataframe with data and write out a spreadsheet\n", + " to_df = {\"injection\":shots,\n", + " \"heat\":heats,\n", + " \"heat_stdev\":heats_stdev}\n", + " df = pd.DataFrame(to_df)\n", + " df.to_csv(output_file,index=False)\n", + "\n", + " # Build dictionary holding meta data\n", + " out = {}\n", + " out[\"temperature\"] = temperature\n", + " out[\"cell_conc\"] = stationary_cell_conc\n", + " out[\"titrant_conc\"] = titrant_syringe_conc\n", + " out[\"cell_volume\"] = titrant_syringe_conc\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "3cb1902e-ad74-482a-b9a9-86b676fe6698", + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "## Define input directory that contains .dh files\n", + "## Define output directory to put .csv files in\n", + "\n", + "## Running this script twice will overwrite any previous runs of the code\n", + "\n", + "inputdir = r\"C:/Users/willi/linkage/notebooks/rawdata\"\n", + "outputdir = r\"C:/Users/willi/linkage/notebooks/processed_data\" # Specify your desired output directory\n", + "\n", + "def iterate_dh_to_csv(inputdir, outputdir):\n", + " for dirpath, dirnames, filenames in os.walk(inputdir):\n", + " for filename in filenames:\n", + " if filename.lower().endswith('.dh'):\n", + " filepath = os.path.join(dirpath, filename)\n", + " \n", + " output_filepath = os.path.splitext(filepath)[0] + \".csv\"\n", + " read_heats_file(filepath, 0, output_filepath)\n", + "\n", + " # Calculate the relative path of the .csv file within the inputdir\n", + " rel_path = os.path.relpath(output_filepath, inputdir)\n", + "\n", + " # Create the corresponding directory structure in the outputdir\n", + " output_dirpath = os.path.join(outputdir, os.path.dirname(rel_path))\n", + " os.makedirs(output_dirpath, exist_ok=True)\n", + "\n", + " # Copy the .csv file to the new location\n", + " shutil.copy2(output_filepath, output_dirpath)\n", + "\n", + "\n", + "\n", + "iterate_and_process(inputdir, outputdir)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e905fab-5dbb-481f-aa71-3160c51d7c3e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f665fa30-63e2-4eb7-bc6e-86aaadd6e681", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}