Merge pull request #3 from wlegrand91/main

Notebook for processing .dh files
harmslab · Aug 6, 2024 · 46cc0cd · 46cc0cd
2 parents cebe6b4 + c50a35f
commit 46cc0cd
Show file tree

Hide file tree

Showing 2 changed files with 218 additions and 0 deletions.
diff --git a/notebooks/read_heats.py b/notebooks/read_heats.py
@@ -0,0 +1,64 @@
+
+import pandas as pd
+
+def read_heats_file(dh_file,uncertainty,output_file):
+    """
+    Read the heats file written out by the MicroCal/Origin ITC analysis
+    package.
+
+    Parameters
+    ----------
+    dh_file : str
+        name of .dh file written out by microcal software
+    output_file : str
+        name of file to write out data
+    uncertainty : float
+        user estimate of the uncertainty on each measured heat
+
+    Returns
+    -------
+    meta_data : dict
+        dictionary with metadata read from the top of the file: temperature
+        in Kelvin, cell and titrant concentrations in molar, and cell_volume
+        in microliters
+    """
+
+    # Read data file
+    with open(dh_file,'r') as f:
+        lines = f.readlines()
+
+    # Grab third line and split on ","
+    meta = lines[2].split(",")
+
+    # Parse meta data on the third line
+    temperature = float(meta[0])
+    stationary_cell_conc = float(meta[1])*1e-3
+    titrant_syringe_conc = float(meta[2])*1e-3
+    cell_volume = float(meta[3])*1e3
+
+    # Split rows 6-end on "," and grab first and secon columns
+    shots = []
+    heats = []
+    for l in lines[5:]:
+        col = l.split(",")
+        shots.append(float(col[0]))
+        heats.append(float(col[1]))
+
+    # Make a list of uncertainty repeated once for every observed heat
+    heats_stdev = [uncertainty for i in range(len(heats))]
+
+    # Construct dataframe with data and write out a spreadsheet
+    to_df = {"injection":shots,
+             "heat":heats,
+             "heat_stdev":heats_stdev}
+    df = pd.DataFrame(to_df)
+    df.to_csv(output_file,index=False)
+
+    # Build dictionary holding meta data
+    out = {}
+    out["temperature"] = temperature
+    out["cell_conc"] = stationary_cell_conc
+    out["titrant_conc"] = titrant_syringe_conc
+    out["cell_volume"] = titrant_syringe_conc
+
+    return out
diff --git a/notebooks/readheatstest.ipynb b/notebooks/readheatstest.ipynb
@@ -0,0 +1,154 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "decd9f42-6209-4a88-9a15-eb0b32cb2505",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "def read_heats_file(dh_file,uncertainty,output_file):\n",
+    "    \"\"\"\n",
+    "    Read the heats file written out by the MicroCal/Origin ITC analysis\n",
+    "    package.\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    dh_file : str\n",
+    "        name of .dh file written out by microcal software\n",
+    "    output_file : str\n",
+    "        name of file to write out data\n",
+    "    uncertainty : float\n",
+    "        user estimate of the uncertainty on each measured heat\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    meta_data : dict\n",
+    "        dictionary with metadata read from the top of the file: temperature\n",
+    "        in Kelvin, cell and titrant concentrations in molar, and cell_volume\n",
+    "        in microliters\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Read data file\n",
+    "    with open(dh_file,'r') as f:\n",
+    "        lines = f.readlines()\n",
+    "\n",
+    "    # Grab third line and split on \",\"\n",
+    "    meta = lines[2].split(\",\")\n",
+    "\n",
+    "    # Parse meta data on the third line\n",
+    "    temperature = float(meta[0])\n",
+    "    stationary_cell_conc = float(meta[1])*1e-3\n",
+    "    titrant_syringe_conc = float(meta[2])*1e-3\n",
+    "    cell_volume = float(meta[3])*1e3\n",
+    "    \n",
+    "    # Split rows 6-end on \",\" and grab first and secon columns\n",
+    "    shots = []\n",
+    "    heats = []\n",
+    "    for l in lines[5:]:\n",
+    "        col = l.split(\",\")\n",
+    "        shots.append(float(col[0]))\n",
+    "        heats.append(float(col[1]))\n",
+    "\n",
+    "    # Make a list of uncertainty repeated once for every observed heat\n",
+    "    heats_stdev = [uncertainty for i in range(len(heats))]\n",
+    "\n",
+    "    # Construct dataframe with data and write out a spreadsheet\n",
+    "    to_df = {\"injection\":shots,\n",
+    "             \"heat\":heats,\n",
+    "             \"heat_stdev\":heats_stdev}\n",
+    "    df = pd.DataFrame(to_df)\n",
+    "    df.to_csv(output_file,index=False)\n",
+    "\n",
+    "    # Build dictionary holding meta data\n",
+    "    out = {}\n",
+    "    out[\"temperature\"] = temperature\n",
+    "    out[\"cell_conc\"] = stationary_cell_conc\n",
+    "    out[\"titrant_conc\"] = titrant_syringe_conc\n",
+    "    out[\"cell_volume\"] = titrant_syringe_conc\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "3cb1902e-ad74-482a-b9a9-86b676fe6698",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import shutil\n",
+    "## Define input directory that contains .dh files\n",
+    "## Define output directory to put .csv files in\n",
+    "\n",
+    "## Running this script twice will overwrite any previous runs of the code\n",
+    "\n",
+    "inputdir = r\"C:/Users/willi/linkage/notebooks/rawdata\"\n",
+    "outputdir = r\"C:/Users/willi/linkage/notebooks/processed_data\" # Specify your desired output directory\n",
+    "\n",
+    "def iterate_dh_to_csv(inputdir, outputdir):\n",
+    "    for dirpath, dirnames, filenames in os.walk(inputdir):\n",
+    "        for filename in filenames:\n",
+    "            if filename.lower().endswith('.dh'):\n",
+    "                filepath = os.path.join(dirpath, filename)\n",
+    "                \n",
+    "                output_filepath = os.path.splitext(filepath)[0] + \".csv\"\n",
+    "                read_heats_file(filepath, 0, output_filepath)\n",
+    "\n",
+    "                # Calculate the relative path of the .csv file within the inputdir\n",
+    "                rel_path = os.path.relpath(output_filepath, inputdir)\n",
+    "\n",
+    "                # Create the corresponding directory structure in the outputdir\n",
+    "                output_dirpath = os.path.join(outputdir, os.path.dirname(rel_path))\n",
+    "                os.makedirs(output_dirpath, exist_ok=True)\n",
+    "\n",
+    "                # Copy the .csv file to the new location\n",
+    "                shutil.copy2(output_filepath, output_dirpath)\n",
+    "\n",
+    "\n",
+    "\n",
+    "iterate_and_process(inputdir, outputdir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9e905fab-5dbb-481f-aa71-3160c51d7c3e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f665fa30-63e2-4eb7-bc6e-86aaadd6e681",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}