From 54bef34c1fedf701000808e61f871e5450e9090d Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Tue, 2 Apr 2024 15:56:31 -0400
Subject: [PATCH 01/46] raven's test notebook with a class to parse billed
 amounts and sbrs

---
 .../hls/test-notebooks/claim-test.ipynb       | 273 ++++++++++++++++++
 1 file changed, 273 insertions(+)
 create mode 100644 databricksx12/hls/test-notebooks/claim-test.ipynb

diff --git a/databricksx12/hls/test-notebooks/claim-test.ipynb b/databricksx12/hls/test-notebooks/claim-test.ipynb
new file mode 100644
index 0000000..481f558
--- /dev/null
+++ b/databricksx12/hls/test-notebooks/claim-test.ipynb
@@ -0,0 +1,273 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['/Users/raven.mukherjee/edi-sol-accelerator/x12-edi-parser/databricksx12/hls/test-notebooks', '/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python312.zip', '/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12', '/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/lib-dynload', '', '/Users/raven.mukherjee/edi-sol-accelerator/edi-parse-env/lib/python3.12/site-packages']\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sys\n",
+    "print(sys.path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "    \"EDI.sender_tax_id\": \"ZZ\",\n",
+      "    \"list\": [\n",
+      "        {\n",
+      "            \"FunctionalGroup.receiver\": \"123456789\",\n",
+      "            \"FunctionalGroup.sender\": \"CLEARINGHOUSE\",\n",
+      "            \"FunctionalGroup.transaction_datetime\": \"20180508:0833\",\n",
+      "            \"FunctionalGroup.transaction_type\": \"222\",\n",
+      "            \"list\": [\n",
+      "                {\n",
+      "                    \"Transaction.transaction_type\": \"222\"\n",
+      "                },\n",
+      "                {\n",
+      "                    \"Transaction.transaction_type\": \"222\"\n",
+      "                },\n",
+      "                {\n",
+      "                    \"Transaction.transaction_type\": \"222\"\n",
+      "                },\n",
+      "                {\n",
+      "                    \"Transaction.transaction_type\": \"222\"\n",
+      "                },\n",
+      "                {\n",
+      "                    \"Transaction.transaction_type\": \"222\"\n",
+      "                }\n",
+      "            ]\n",
+      "        }\n",
+      "    ]\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "from databricksx12.edi import *\n",
+    "x =  EDIManager(EDI(open(\"/Users/raven.mukherjee/solution_accelerators/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt\", \"rb\").read().decode(\"utf-8\")))\n",
+    "\n",
+    "import json\n",
+    "print(json.dumps(x.flatten(x.data), indent=4))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample_data_837i = open(\"/Users/raven.mukherjee/solution_accelerators/x12-edi-parser/sampledata/837/CC_837I_EDI.txt\", \"rb\").read().decode(\"utf-8\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from databricksx12.edi import *\n",
+    "\n",
+    "# class extend_transaction(EDI):\n",
+    "#     def __init__(self, data, delim_cls=AnsiX12Delim):\n",
+    "#         super().__init__(data, delim_cls)\n",
+    "\n",
+    "#     @property\n",
+    "#     def full_transaction(self):\n",
+    "#         transaction_start_indexes = [i for i, segment in enumerate(self.data) if segment.segment_name() == \"ST\"]\n",
+    "#         transaction_end_indexes = [i for i, segment in enumerate(self.data) if segment.segment_name() == \"SE\"]\n",
+    "\n",
+    "#         transactions = []\n",
+    "#         for start, end in zip(transaction_start_indexes, transaction_end_indexes):\n",
+    "#             transaction_segments = self.data[start:end+1]\n",
+    "#             transactions.append(transaction_segments)\n",
+    "#         return transactions\n",
+    "\n",
+    "#     @property\n",
+    "#     def claim_identifier(self):\n",
+    "#         transactions = self.full_transaction\n",
+    "#         claim_identifiers = []\n",
+    "\n",
+    "#         for transaction_segments in transactions:\n",
+    "#             claim_id = None\n",
+    "#             for segment in transaction_segments:\n",
+    "#                 if segment.segment_name() == \"BHT\":\n",
+    "#                     claim_id = segment.element(3) #confirm\n",
+    "#                     break\n",
+    "#             claim_identifiers.append(claim_id)\n",
+    "\n",
+    "#         return claim_identifiers\n",
+    "\n",
+    "#     @property\n",
+    "#     def header_billing_amount(self):\n",
+    "#         transactions = self.full_transaction\n",
+    "#         billing_headers = []\n",
+    "\n",
+    "#         for transaction_segments in transactions:\n",
+    "#             for segment in transaction_segments:\n",
+    "#                 if segment.segment_name() == \"CLM\":\n",
+    "#                     bill_header = segment.element(1)\n",
+    "#                     billing_headers.append(bill_header)\n",
+    "#                     break  # one CLM segment per transaction?\n",
+    "\n",
+    "#         return billing_headers\n",
+    "\n",
+    "#     @property\n",
+    "#     def billed_amount(self):\n",
+    "#         transactions = self.full_transaction\n",
+    "#         billed_amounts = []\n",
+    "\n",
+    "#         for transaction_segments in transactions:\n",
+    "#             for segment in transaction_segments:\n",
+    "#                 if segment.segment_name() == \"CLM\":\n",
+    "#                     billed_amount = segment.element(2)  # Billed amount is the second element\n",
+    "#                     billed_amounts.append(billed_amount)\n",
+    "#                     break\n",
+    "\n",
+    "#         return billed_amounts\n",
+    "    \n",
+    "#     # @property\n",
+    "#     # def subscriber(self):\n",
+    "#     #     transactions = self.full_transaction\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # use raw EDI data\n",
+    "# edi_object = extend_transaction(sample_data_837i)\n",
+    "\n",
+    "# # call  different vars\n",
+    "# transactions = edi_object.full_transaction\n",
+    "# claim_ids = edi_object.claim_identifier\n",
+    "# header = edi_object.header_billing_amount\n",
+    "# billed_amount = edi_object.billed_amount"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#  identify elements functionally!\n",
+    "\n",
+    "from databricksx12.edi import *\n",
+    "\n",
+    "class extend_transaction(EDI):\n",
+    "    def __init__(self, data, delim_cls=AnsiX12Delim):\n",
+    "        super().__init__(data, delim_cls)\n",
+    "\n",
+    "        # Use map and lambda to populate billed amounts and subscribers\n",
+    "        self.billed_amounts = list(map(lambda x: x.element(2), self.segments_by_name(\"CLM\")))\n",
+    "        self.subscribers = list(map(lambda x: x.element(4), self.segments_by_name(\"SBR\")))\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample_data_chpw_claimdata = open(\"/Users/raven.mukherjee/edi-sol-accelerator/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt\", \"rb\").read().decode(\"utf-8\")\n",
+    "# use raw EDI data\n",
+    "edi_object = extend_transaction(sample_data_chpw_claimdata)\n",
+    "billed_amounts = edi_object.billed_amounts\n",
+    "subscribers = edi_object.subscribers\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['20', '50.1', '11.64', '234', '20']"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "billed_amounts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['COMMUNITY HLTH PLAN OF WASH',\n",
+       " 'COMMUNITY HLTH PLAN OF WASH',\n",
+       " 'COMMUNITY HLTH PLAN OF WASH',\n",
+       " 'COMMUNITY HLTH PLAN OF WASH',\n",
+       " 'COMMUNITY HLTH PLAN OF WASH']"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "subscribers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "edi-parse-kernel",
+   "language": "python",
+   "name": "edi-parse-env"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From ab0a584d9f8971af673e3e11bea1485dd1c0af5c Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Thu, 4 Apr 2024 18:04:42 -0400
Subject: [PATCH 02/46] building a class to extract Billing provider Subscriber
 names based on the Loop called

---
 databricksx12/hls/hierarchicalloop.py | 79 +++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 databricksx12/hls/hierarchicalloop.py

diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py
new file mode 100644
index 0000000..f633cd2
--- /dev/null
+++ b/databricksx12/hls/hierarchicalloop.py
@@ -0,0 +1,79 @@
+from databricksx12.edi import *
+
+class LoopMapping:
+    def __init__(self):
+        self.mappings = {
+            '2000A': ('20', 'NM1', '3'),
+            '2000B': ('22', 'SBR', '4'),
+        }
+
+    def get_identifiers(self, loop_type):
+        return self.mappings.get(loop_type, (None, None))
+
+
+class HierarchicalLoop(EDI):
+    def __init__(self, data, delim_cls=AnsiX12Delim, Loop='2000B'):
+        super().__init__(data, delim_cls)
+        self.loop_mapping = LoopMapping()
+        self.target_element, self.target_segment_name, self.target_element_index = self.loop_mapping.get_identifiers(
+            Loop)
+
+        # find all HL segments along with the 3rd element that denotes 2000A or 2000B
+        self.hl_segments = self._hl_identifiers()
+
+        # find all HL segments along with the 3rd element that denotes 2000A or 2000B
+        self.clm_segments = self._clm_identifiers()
+
+        # Calculate ranges and then extract 2000A lines based on those ranges
+        self.ranges = self.select_range_of_interest(
+            self.hl_segments, self.clm_segments, self.target_element)
+        self.extracted_lines = self.extract_lines_based_on_ranges(
+            self.ranges, self.target_segment_name, self.target_element_index)
+
+    def _hl_identifiers(self):
+        # Find the segments where HL loop begins
+        indexed_HL_segments = self.segments_by_name_index("HL")
+        return [(i, x.element(3)) for i, x in indexed_HL_segments]
+
+    def _clm_identifiers(self):
+        # Find the segments where CLM loop begins
+        indexed_CLM_segments = self.segments_by_name_index("CLM")
+        return [(i, x.element(2)) for i, x in indexed_CLM_segments]
+
+    def select_range_of_interest(self, hl_indexes, clm_indexes, target_value):
+        ranges = []
+        start_index = None
+        last_index = None
+
+        for index, value in hl_indexes:
+            if value == target_value:
+                if start_index is not None:
+                    ranges.append((start_index+1, index))
+                start_index = index
+            elif start_index is not None:
+                ranges.append((start_index+1, index))
+                start_index = None
+        if clm_indexes:
+            last_index = clm_indexes[-1][0]
+        if last_index and start_index is not None:
+            ranges.append((start_index+1, last_index))
+        return ranges
+
+    def extract_lines_based_on_ranges(self, ranges, target_value, target_index):
+        extracted_elements = []
+        # Iterate through each range in the list
+        for start, end in ranges:
+            # Retrieve the segments within this range
+            segments_in_range = self.segments_by_position(start, end)
+
+            desired_elements = map(
+                lambda segment: segment.element(int(target_index)),
+                filter(
+                    lambda segment: segment.segment_name() == target_value and len(
+                        segment.data.split(segment.format_cls.ELEMENT_DELIM)) > int(target_index),
+                    segments_in_range
+                )
+            )
+            extracted_elements.extend(desired_elements)
+
+        return list(extracted_elements)

From 05518c0d789241c3de01f0332edb50a3e4e57695 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Thu, 4 Apr 2024 18:09:50 -0400
Subject: [PATCH 03/46] changed a few comments

---
 databricksx12/hls/hierarchicalloop.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py
index f633cd2..29e9223 100644
--- a/databricksx12/hls/hierarchicalloop.py
+++ b/databricksx12/hls/hierarchicalloop.py
@@ -18,13 +18,13 @@ def __init__(self, data, delim_cls=AnsiX12Delim, Loop='2000B'):
         self.target_element, self.target_segment_name, self.target_element_index = self.loop_mapping.get_identifiers(
             Loop)
 
-        # find all HL segments along with the 3rd element that denotes 2000A or 2000B
+        # find all HL segments along with the 3rd element that denotes 2000A (20) or 2000B (22)
         self.hl_segments = self._hl_identifiers()
 
-        # find all HL segments along with the 3rd element that denotes 2000A or 2000B
+        # find all CLM segments (important for indexing the last HL or SBR within a tx)
         self.clm_segments = self._clm_identifiers()
 
-        # Calculate ranges and then extract 2000A lines based on those ranges
+        # Calculate ranges and then extract 2000A/B lines based on those ranges
         self.ranges = self.select_range_of_interest(
             self.hl_segments, self.clm_segments, self.target_element)
         self.extracted_lines = self.extract_lines_based_on_ranges(

From 9c96544aeb74d059589002986b781f3dbac511a1 Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Tue, 9 Apr 2024 10:33:16 -0400
Subject: [PATCH 04/46] today's discussion

---
 databricksx12/hls/hierarchicalloop.py | 67 +++++++++++++++++++++++++--
 1 file changed, 62 insertions(+), 5 deletions(-)

diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py
index 29e9223..e443011 100644
--- a/databricksx12/hls/hierarchicalloop.py
+++ b/databricksx12/hls/hierarchicalloop.py
@@ -1,20 +1,38 @@
 from databricksx12.edi import *
 
 class LoopMapping:
+
+    def __init__(self, mappings=None):
+        self.mappings = (mappings if mappings is not None else {
+            '20': {
+                'description': 'Information Source',
+                'loop': '2000A'
+                },
+            '22': {
+                'description': 'Subscriber',
+                'loop': '2000B'
+                }
+            })
+
+
+   
+        
+    """
     def __init__(self):
         self.mappings = {
             '2000A': ('20', 'NM1', '3'),
             '2000B': ('22', 'SBR', '4'),
         }
 
-    def get_identifiers(self, loop_type):
-        return self.mappings.get(loop_type, (None, None))
+
+    ADZ want our key = (lookup value found in data), value = additional info needed 
+    """
 
 
 class HierarchicalLoop(EDI):
-    def __init__(self, data, delim_cls=AnsiX12Delim, Loop='2000B'):
+    def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping= LoopMapping.mappings):
         super().__init__(data, delim_cls)
-        self.loop_mapping = LoopMapping()
+        self.loop_mapping = loop_mapping
         self.target_element, self.target_segment_name, self.target_element_index = self.loop_mapping.get_identifiers(
             Loop)
 
@@ -32,7 +50,7 @@ def __init__(self, data, delim_cls=AnsiX12Delim, Loop='2000B'):
 
     def _hl_identifiers(self):
         # Find the segments where HL loop begins
-        indexed_HL_segments = self.segments_by_name_index("HL")
+        indexed_HL_segments = self.segments_by_nloop_object.extracted_linesame_index("HL")
         return [(i, x.element(3)) for i, x in indexed_HL_segments]
 
     def _clm_identifiers(self):
@@ -77,3 +95,42 @@ def extract_lines_based_on_ranges(self, ranges, target_value, target_index):
             extracted_elements.extend(desired_elements)
 
         return list(extracted_elements)
+
+
+    def parent_loops(self):
+        pass
+
+    def child_loops(self, parent_loop_num):
+        pass
+
+    """
+       @return
+         -index of each HL segment
+         -index of parent segments
+         -be able to answer "where is loop XYZ?" and "at this location, what looop am i in?"
+    """
+    def _hl_segment_indexes(self):
+        pass
+
+
+    self.hl_parents = {
+        parent:
+        { index_start : value
+          index_end : value
+           children : [
+               hl_child : {
+                   index_start: value
+                   index_end: value
+                   }
+               ]
+        }
+
+
+    self.hl = HL()
+    self.claim_start_index = segment(clm)
+
+    Who is my billing provider?  hl.get_loop(20)
+    Who is my subscriber?
+    Who is my patient? 
+    
+    

From 235f0f1abab0b687f133cc479bd7d155a8539568 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Thu, 11 Apr 2024 11:24:16 -0400
Subject: [PATCH 05/46] two classes in two files; one for finding the
 hierarchical loops and the other for asking complex questions about claims.
 Also a new edited sample data example with dependent sub-child in Line 161

---
 databricksx12/hls/build-func.py          | 138 ++++++++++++++
 databricksx12/hls/claim.py               | 184 +++++++++++++++----
 databricksx12/hls/hierarchicalloop.py    | 222 ++++++++++-------------
 sampledata/837/CHPW_Claimdata_edited.txt | 182 +++++++++++++++++++
 4 files changed, 558 insertions(+), 168 deletions(-)
 create mode 100644 databricksx12/hls/build-func.py
 create mode 100644 sampledata/837/CHPW_Claimdata_edited.txt

diff --git a/databricksx12/hls/build-func.py b/databricksx12/hls/build-func.py
new file mode 100644
index 0000000..2de4f64
--- /dev/null
+++ b/databricksx12/hls/build-func.py
@@ -0,0 +1,138 @@
+"""
+Apr 9 notes
+"""
+from databricksx12.edi import *
+
+class LoopMapping:
+
+    def __init__(self, mappings=None):
+        self.mappings = (mappings if mappings is not None else {
+            '20': {
+                'description': 'Information Source',
+                'loop': '2000A'
+                },
+            '22': {
+                'description': 'Subscriber',
+                'loop': '2000B'
+                }
+            })
+
+
+   
+        
+    """
+    def __init__(self):
+        self.mappings = {
+            '2000A': ('20', 'NM1', '3'),
+            '2000B': ('22', 'SBR', '4'),
+        }
+
+
+    ADZ want our key = (lookup value found in data), value = additional info needed 
+    """
+
+
+class HierarchicalLoop(EDI):
+    def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping= LoopMapping.mappings):
+        super().__init__(data, delim_cls)
+        self.loop_mapping = loop_mapping
+        self.target_element, self.target_segment_name, self.target_element_index = self.loop_mapping.get_identifiers(
+            Loop)
+
+        # find all HL segments along with the 3rd element that denotes 2000A (20) or 2000B (22)
+        self.hl_segments = self._hl_identifiers()
+
+        # find all CLM segments (important for indexing the last HL or SBR within a tx)
+        self.clm_segments = self._clm_identifiers()
+
+        # Calculate ranges and then extract 2000A/B lines based on those ranges
+        self.ranges = self.select_range_of_interest(
+            self.hl_segments, self.clm_segments, self.target_element)
+        self.extracted_lines = self.extract_lines_based_on_ranges(
+            self.ranges, self.target_segment_name, self.target_element_index)
+
+    def _hl_identifiers(self):
+        # Find the segments where HL loop begins
+        indexed_HL_segments = self.segments_by_nloop_object.extracted_linesame_index("HL")
+        return [(i, x.element(3)) for i, x in indexed_HL_segments]
+
+    def _clm_identifiers(self):
+        # Find the segments where CLM loop begins
+        indexed_CLM_segments = self.segments_by_name_index("CLM")
+        return [(i, x.element(2)) for i, x in indexed_CLM_segments]
+
+    def select_range_of_interest(self, hl_indexes, clm_indexes, target_value):
+        ranges = []
+        start_index = None
+        last_index = None
+
+        for index, value in hl_indexes:
+            if value == target_value:
+                if start_index is not None:
+                    ranges.append((start_index+1, index))
+                start_index = index
+            elif start_index is not None:
+                ranges.append((start_index+1, index))
+                start_index = None
+        if clm_indexes:
+            last_index = clm_indexes[-1][0]
+        if last_index and start_index is not None:
+            ranges.append((start_index+1, last_index))
+        return ranges
+
+    def extract_lines_based_on_ranges(self, ranges, target_value, target_index):
+        extracted_elements = []
+        # Iterate through each range in the list
+        for start, end in ranges:
+            # Retrieve the segments within this range
+            segments_in_range = self.segments_by_position(start, end)
+
+            desired_elements = map(
+                lambda segment: segment.element(int(target_index)),
+                filter(
+                    lambda segment: segment.segment_name() == target_value and len(
+                        segment.data.split(segment.format_cls.ELEMENT_DELIM)) > int(target_index),
+                    segments_in_range
+                )
+            )
+            extracted_elements.extend(desired_elements)
+
+        return list(extracted_elements)
+
+
+    def parent_loops(self):
+        pass
+
+    def child_loops(self, parent_loop_num):
+        pass
+
+    """
+       @return
+         -index of each HL segment
+         -index of parent segments
+         -be able to answer "where is loop XYZ?" and "at this location, what looop am i in?"
+    """
+    def _hl_segment_indexes(self):
+        pass
+
+
+    self.hl_parents = {
+        parent:
+        { index_start : value
+          index_end : value
+           children : [
+               hl_child : {
+                   index_start: value
+                   index_end: value
+                   }
+               ]
+        }
+
+
+    self.hl = HL()
+    self.claim_start_index = segment(clm)
+
+    Who is my billing provider?  hl.get_loop(20)
+    Who is my subscriber?
+    Who is my patient? 
+    
diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 94c21c3..88aa76f 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -1,59 +1,169 @@
 from databricksx12.edi import *
-
+from databricksx12.hls import hierarchicalloop
 #
 # Base claim class
 #
+from databricksx12.edi import *
+
+
+class LoopMapping:
+    def __init__(self, mappings=None):
+        self.mappings = (mappings if mappings is not None else {
+            '20': {
+                'description': 'Information Source',
+                'loop': '2000A'
+            },
+            '22': {
+                'description': 'Subscriber',
+                'loop': '2000B'
+            }
+        })
+
+    def get_mapping(self, element):
+        return self.mappings.get(element, None)
+
+
 class Claim(EDI):
+    def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping(), element=None):
+        super().__init__(data, delim_cls)
+        self.loop_mapping = loop_mapping
+        self.target_element = element
+        self.loop_value = self.loop_mapping.get_mapping(
+            self.target_element).get('loop')
+        self.target_segment_name, self.target_element_index = self.get_reference_names_of_loop(
+            self.loop_value)
 
-    def __init__(self, segments, delim_cls = AnsiX12Delim):
-        self.data = segments
-        self.format_cls = delim_cls
-        #For Raven TODO marked
-        self.claim_identifier = None #TODO include both CH and RP values here 
-        self.claim_lines = None #TODO Maintain a list of claim lines using ClaimLine class
-        self.subscriber = None  #TODO selecting the subscriber
-        self.patient = None     #TODO selecting the patient info, maybe patient should be its own class? 
+        # find all CLM segments (important for indexing the last HL or SBR within a tx)
+        self.clm_segments = self._clm_identifiers()
 
+    # this feels misplaced; how to fix?
+    def get_reference_names_of_loop(self, loop):
+        identifiers = {
+            '2000A': ('NM1', '3'),
+            '2000B': ('SBR', '4'),
+        }
+        return identifiers.get(loop, (None, None))
 
-    #
-    # TODO total amount billed at the header of the claim
-    #
-    def header_total_billed_amount(self):
-        pass
+    def _clm_identifiers(self):
+        # Find the segments where CLM loop begins
+        indexed_CLM_segments = self.segments_by_name_index("CLM")
+        return [(i, x.element(2)) for i, x in indexed_CLM_segments]
+
+    def extract_lines_based_on_ranges(self, ranges, target_value, target_index):
+        extracted_elements = []
+        # Iterate through each range in the list
+        for start, end in ranges:
+            # Retrieve the segments within this range
+            segments_in_range = self.segments_by_position(start, end)
+
+            desired_elements = map(
+                lambda segment: segment.element(int(target_index)),
+                filter(
+                    lambda segment: segment.segment_name() == target_value and len(
+                        segment.data.split(segment.format_cls.ELEMENT_DELIM)) > int(target_index),
+                    segments_in_range
+                )
+            )
+            extracted_elements.extend(desired_elements)
+
+        return list(extracted_elements)
 
-    #
-    # TODO total amount billed across lines
-    #
-    def lines_total_billed_amount(self):
-        pass 
 
+class ClaimManager:
+    def __init__(self, data, delim_cls=AnsiX12Delim):
+        self.hlmanager = hierarchicalloop.HierarchicalLoopManager(data)
+        self.hl_summary = self.hlmanager.summary
+        # need to sort mapping dependency
+        self.claim = Claim(data, delim_cls, LoopMapping(), element='20')
+        self.claim_summaries = [{
+            'clm_ind': clm_index,
+            'parent_counter': self._find_claim_in_tx(self.hl_summary, clm_index)[0],
+            'child_start_index': self._find_claim_in_tx(self.hl_summary, clm_index)[1],
+            'parent_range': self.get_ranges(self.hl_summary, self._find_claim_in_tx(self.hl_summary, clm_index)[0])[0],
+            'children_ranges': self.get_ranges(self.hl_summary, self._find_claim_in_tx(self.hl_summary, clm_index)[0])[1]
+        } for clm_index in [i for (i, j) in self.claim.clm_segments]]
 
-    
-class ClaimLine(Segment):
+    def _find_claim_in_tx(self, tx_summary, clm_index):
+        for parent_counter, parent_info in tx_summary.items():
+            if parent_info['parent_index_start'] <= int(clm_index) <= parent_info['parent_index_end']:
+                for child_info in parent_info['children']:
+                    if child_info['child_index_start'] <= int(clm_index) <= child_info['child_index_stop']:
+                        return parent_counter, child_info['child_index_start']
+        return None, None
 
-    #
-    # TODO build out claim line uses (case class)
-    #
-    def __init__(self):
+    def get_ranges(self, hl_summary_dict, loop_counter):
+        info = hl_summary_dict.get(loop_counter, None)
+        if info is None:
+            return None
+
+        parent_range = (info['parent_index_start'], info['parent_index_end'])
+        children_ranges = [(child['child_index_start'], child['child_index_stop'])
+                           for child in info.get('children', [])]
+
+        return parent_range, children_ranges
+
+    def _find_billing_providers(self):
+        first_lines = []
+        for summary in self.claim_summaries:
+            lines = self.claim.extract_lines_based_on_ranges(
+                [summary['parent_range']
+                 ], self.claim.target_segment_name, self.claim.target_element_index
+            )
+            if lines:  # Check if any lines were extracted
+                # Append the first line of this iteration
+                first_lines.append(lines[0])
+        return first_lines
+
+    def _find_subscribers(self):
         pass
-        """
-        select fields: 
-          procedure code
-          procedure code type (HCPCS, CPT4, ICD10)
-          revenuce code
-          procedure modifier codes
-          billed amount 
-          
-        """
+
+
+"""
+claims = ClaimManager(sample_data_chpw_claimdata)
+claims.claim_summaries
+
+[{'clm_ind': 23,
+  'parent_counter': '1',
+  'child_start_index': 16,
+  'parent_range': (7, 35),
+  'children_ranges': [(16, 35)]},
+ {'clm_ind': 57,
+  'parent_counter': '63',
+  'child_start_index': 50,
+  'parent_range': (41, 69),
+  'children_ranges': [(50, 69)]},
+ {'clm_ind': 91,
+  'parent_counter': '49',
+  'child_start_index': 84,
+  'parent_range': (75, 103),
+  'children_ranges': [(84, 103)]},
+ {'clm_ind': 125,
+  'parent_counter': '75',
+  'child_start_index': 118,
+  'parent_range': (109, 138),
+  'children_ranges': [(118, 138)]},
+ {'clm_ind': 160,
+  'parent_counter': '79',
+  'child_start_index': 153,
+  'parent_range': (144, 172),
+  'children_ranges': [(153, 172)]}]
+
+  claims._find_billing_providers()
+  ['BH CLINIC OF VANCOUVER',
+ 'BH CLINIC OF VANCOUVER',
+ 'BH CLINIC OF VANCOUVER',
+ 'BH CLINIC OF VANCOUVER',
+ 'BH CLINIC OF VANCOUVER']
+"""
 
 
 class Claim837i(Claim):
 
     NAME = "837I"
 
-#Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf
+# Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf
+
+
 class Claim837p(Claim):
 
     NAME = "837P"
-
-    
diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py
index e443011..6afcbda 100644
--- a/databricksx12/hls/hierarchicalloop.py
+++ b/databricksx12/hls/hierarchicalloop.py
@@ -1,136 +1,96 @@
 from databricksx12.edi import *
 
-class LoopMapping:
-
-    def __init__(self, mappings=None):
-        self.mappings = (mappings if mappings is not None else {
-            '20': {
-                'description': 'Information Source',
-                'loop': '2000A'
-                },
-            '22': {
-                'description': 'Subscriber',
-                'loop': '2000B'
-                }
-            })
-
-
-   
-        
-    """
-    def __init__(self):
-        self.mappings = {
-            '2000A': ('20', 'NM1', '3'),
-            '2000B': ('22', 'SBR', '4'),
-        }
-
-
-    ADZ want our key = (lookup value found in data), value = additional info needed 
-    """
-
 
 class HierarchicalLoop(EDI):
-    def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping= LoopMapping.mappings):
+    def __init__(self, data, delim_cls=AnsiX12Delim):
         super().__init__(data, delim_cls)
-        self.loop_mapping = loop_mapping
-        self.target_element, self.target_segment_name, self.target_element_index = self.loop_mapping.get_identifiers(
-            Loop)
-
-        # find all HL segments along with the 3rd element that denotes 2000A (20) or 2000B (22)
-        self.hl_segments = self._hl_identifiers()
-
-        # find all CLM segments (important for indexing the last HL or SBR within a tx)
-        self.clm_segments = self._clm_identifiers()
-
-        # Calculate ranges and then extract 2000A/B lines based on those ranges
-        self.ranges = self.select_range_of_interest(
-            self.hl_segments, self.clm_segments, self.target_element)
-        self.extracted_lines = self.extract_lines_based_on_ranges(
-            self.ranges, self.target_segment_name, self.target_element_index)
-
-    def _hl_identifiers(self):
-        # Find the segments where HL loop begins
-        indexed_HL_segments = self.segments_by_nloop_object.extracted_linesame_index("HL")
-        return [(i, x.element(3)) for i, x in indexed_HL_segments]
-
-    def _clm_identifiers(self):
-        # Find the segments where CLM loop begins
-        indexed_CLM_segments = self.segments_by_name_index("CLM")
-        return [(i, x.element(2)) for i, x in indexed_CLM_segments]
-
-    def select_range_of_interest(self, hl_indexes, clm_indexes, target_value):
-        ranges = []
-        start_index = None
-        last_index = None
-
-        for index, value in hl_indexes:
-            if value == target_value:
-                if start_index is not None:
-                    ranges.append((start_index+1, index))
-                start_index = index
-            elif start_index is not None:
-                ranges.append((start_index+1, index))
-                start_index = None
-        if clm_indexes:
-            last_index = clm_indexes[-1][0]
-        if last_index and start_index is not None:
-            ranges.append((start_index+1, last_index))
-        return ranges
-
-    def extract_lines_based_on_ranges(self, ranges, target_value, target_index):
-        extracted_elements = []
-        # Iterate through each range in the list
-        for start, end in ranges:
-            # Retrieve the segments within this range
-            segments_in_range = self.segments_by_position(start, end)
-
-            desired_elements = map(
-                lambda segment: segment.element(int(target_index)),
-                filter(
-                    lambda segment: segment.segment_name() == target_value and len(
-                        segment.data.split(segment.format_cls.ELEMENT_DELIM)) > int(target_index),
-                    segments_in_range
-                )
-            )
-            extracted_elements.extend(desired_elements)
-
-        return list(extracted_elements)
-
-
-    def parent_loops(self):
-        pass
-
-    def child_loops(self, parent_loop_num):
-        pass
-
-    """
-       @return
-         -index of each HL segment
-         -index of parent segments
-         -be able to answer "where is loop XYZ?" and "at this location, what looop am i in?"
-    """
-    def _hl_segment_indexes(self):
-        pass
-
-
-    self.hl_parents = {
-        parent:
-        { index_start : value
-          index_end : value
-           children : [
-               hl_child : {
-                   index_start: value
-                   index_end: value
-                   }
-               ]
-        }
-
-
-    self.hl = HL()
-    self.claim_start_index = segment(clm)
-
-    Who is my billing provider?  hl.get_loop(20)
-    Who is my subscriber?
-    Who is my patient? 
-    
-    
+
+        # find all HL and SE segments to find start and end of loops + CLM segments
+        self.indexed_HL_segments = self.segments_by_name_index("HL")
+        self.indexed_SE_segments = self.segments_by_name_index("SE")
+
+        # parent and children loops
+        self.parent_loops = self._parent_loops()
+        self.child_loops = self._child_loops(self.parent_loops)
+        self.subchild_loops = self._child_loops(self.child_loops)
+
+    def _parent_loops(self):
+        parent_start_loops = []
+        for i, segment in self.indexed_HL_segments:
+            # Check if the second element is empty (and the third element is '20' and the last element is '1')
+            if segment.element(2) == '':
+                # index of parent, counter, and if child
+                parent_start_loops.append(
+                    (i, segment.element(1), segment.element(-1)))
+
+        parent_end_loops = [i for i, x in self.indexed_SE_segments]
+        return [(tup + (j,)) for tup, j in zip(parent_start_loops, parent_end_loops)]
+
+    def _child_loops(self, parent_loops):
+        child_loops = []
+        for parent_start_index, counter, child_id, parent_stop_index in parent_loops:
+            if child_id == '1':
+                for i, segment in self.indexed_HL_segments:
+                    if segment.element(2) == counter:
+                        # index of child, parent/tx counter, and if sub-child
+                        child_loops.append(
+                            (i, counter, segment.element(-1), parent_stop_index))
+
+        # If child_id is greater than 1, recursively call the fn
+            if int(child_id) > 1:
+                child_loops.extend(self._child_loops(
+                    [(parent_start_index, counter, str(int(child_id) - 1), parent_stop_index)]))
+        return child_loops
+
+
+class HierarchicalLoopManager:
+    def __init__(self, data, delim_cls=AnsiX12Delim):
+        self.hl = HierarchicalLoop(data, delim_cls)
+        self.summary = {}
+        self.generate_summary()
+
+    def generate_summary(self):
+        for pl in self.hl.parent_loops:
+            parent_summary = {
+                'parent_index_start': pl[0],
+                'parent_index_end': pl[-1],
+                'children': []
+            }
+            # Find children loops for this parent tx
+            children = []
+            for cl in self.hl.child_loops:
+                if pl[0] < cl[0] < pl[-1]:
+                    children.append({
+                        'child_index_start': cl[0],
+                        'child_index_stop': cl[-1]
+                    })
+
+            # Add children to parent summary
+            parent_summary['children'] = children
+
+            # Add HL 1 counter to the summary as the key
+            self.summary[pl[1]] = parent_summary
+
+
+"""
+loop_manager = HierarchicalLoopManager(sample_data_837i_edited)  
+summary = loop_manager.summary 
+
+output:
+{'1': {'parent_index_start': 7,
+  'parent_index_end': 35,
+  'children': [{'child_index_start': 16, 'child_index_stop': 35}]},
+ '63': {'parent_index_start': 41,
+  'parent_index_end': 69,
+  'children': [{'child_index_start': 50, 'child_index_stop': 69}]},
+ '49': {'parent_index_start': 75,
+  'parent_index_end': 103,
+  'children': [{'child_index_start': 84, 'child_index_stop': 103}]},
+ '75': {'parent_index_start': 109,
+  'parent_index_end': 138,
+  'children': [{'child_index_start': 118, 'child_index_stop': 138}]},
+ '79': {'parent_index_start': 144,
+  'parent_index_end': 179,
+  'children': [{'child_index_start': 153, 'child_index_stop': 179},
+   {'child_index_start': 160, 'child_index_stop': 179}]}}
+"""
diff --git a/sampledata/837/CHPW_Claimdata_edited.txt b/sampledata/837/CHPW_Claimdata_edited.txt
new file mode 100644
index 0000000..72ecb44
--- /dev/null
+++ b/sampledata/837/CHPW_Claimdata_edited.txt
@@ -0,0 +1,182 @@
+ISA*00*          *00*          *01*987654321      *ZZ*123456789      *180508*0833*^*00501*697773230*1*P*:~
+GS*HC*CLEARINGHOUSE*123456789*20180508*0833*212950697*X*005010X222A1~
+ST*837*000000001*005010X222A1~
+BHT*0019*00*7349063984*20180508*0833*CH~
+NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~
+PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~
+NM1*40*2*123456789*****46*CHPWA~
+HL*1**20*1~
+NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~
+N3*12345 MAIN ST~
+N4*VANCOUVER*WA*98662~
+REF*EI*720000000~
+PER*IC*CONTACT*TE*9185551212~
+NM1*87*2~
+N3*PO BOX 1234~
+N4*VANCOUVER*WA*986681234~
+HL*2*1*22*0~
+SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~
+NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~
+N3*987 65TH PL~
+N4*VANCOUVER*WA*986640001~
+DMG*D8*19881225*M~
+NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~
+CLM*1805080AV3648339*20***57:B:1*Y*A*Y*Y~
+REF*D9*7349065509~
+HI*ABK:F1120~
+NM1*82*1*PROVIDER*JAMES****XX*1112223338~
+PRV*PE*PXC*261QR0405X~
+NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~
+N3*12345 MAIN ST SUITE A1~
+N4*VANCOUVER*WA*98662~
+LX*1~
+SV1*HC:H0003*20*UN*1***1~
+DTP*472*D8*20180428~
+REF*6R*142671~
+SE*34*000000001~
+ST*837*000000002*005010X222A1~
+BHT*0019*00*7349063984*20180508*0833*CH~
+NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~
+PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~
+NM1*40*2*123456789*****46*CHPWA~
+HL*63**20*1~
+NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~
+N3*12345 MAIN ST~
+N4*VANCOUVER*WA*98662~
+REF*EI*720000000~
+PER*IC*CONTACT*TE*9185551212~
+NM1*87*2~
+N3*PO BOX 1234~
+N4*VANCOUVER*WA*986681234~
+HL*64*63*22*0~
+SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~
+NM1*IL*1*PATIENT*SUSAN*E***MI*765123~
+N3*765 43RD ST~
+N4*VANCOUVER*WA*986640002~
+DMG*D8*19881031*F~
+NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~
+CLM*1805080AV3648347*50.1***57:B:1*Y*A*Y*Y~
+REF*D9*7349065730~
+HI*ABK:F1520*ABF:F1220~
+NM1*82*1*PROVIDER*SUSAN****XX*1112223346~
+PRV*PE*PXC*261QR0405X~
+NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~
+N3*12345 MAIN ST SUITE A1~
+N4*VANCOUVER*WA*98662~
+LX*1~
+SV1*HC:96153:HF*50.1*UN*6***1:2~
+DTP*472*D8*20180426~
+REF*6R*143792~
+SE*34*000000002~
+ST*837*000000003*005010X222A1~
+BHT*0019*00*7349063984*20180508*0833*CH~
+NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~
+PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~
+NM1*40*2*123456789*****46*CHPWA~
+HL*49**20*1~
+NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~
+N3*12345 MAIN ST~
+N4*VANCOUVER*WA*98662~
+REF*EI*720000000~
+PER*IC*CONTACT*TE*9185551212~
+NM1*87*2~
+N3*PO BOX 1234~
+N4*VANCOUVER*WA*986681234~
+HL*50*49*22*0~
+SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~
+NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~
+N3*987 65TH PL~
+N4*VANCOUVER*WA*986640001~
+DMG*D8*19881225*M~
+NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~
+CLM*1805080AV3648340*11.64***57:B:1*Y*A*Y*Y~
+REF*D9*7349065492~
+HI*ABK:F1020*ABF:F1220~
+NM1*82*1*PROVIDER*SUSAN****XX*1112223346~
+PRV*PE*PXC*261QR0405X~
+NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~
+N3*12345 MAIN ST SUITE A1~
+N4*VANCOUVER*WA*98662~
+LX*1~
+SV1*HC:T1017:HF*11.64*UN*1***1:2~
+DTP*472*D8*20180427~
+REF*6R*140976~
+SE*34*000000003~
+ST*837*000000004*005010X222A1~
+BHT*0019*00*7349063984*20180508*0833*CH~
+NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~
+PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~
+NM1*40*2*123456789*****46*CHPWA~
+HL*75**20*1~
+NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~
+N3*12345 MAIN ST~
+N4*VANCOUVER*WA*98662~
+REF*EI*720000000~
+PER*IC*CONTACT*TE*9185551212~
+NM1*87*2~
+N3*PO BOX 1234~
+N4*VANCOUVER*WA*986681234~
+HL*76*75*22*0~
+SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~
+NM1*IL*1*PATIENT*SUSAN*E***MI*765123~
+N3*765 43RD ST~
+N4*VANCOUVER*WA*986640002~
+DMG*D8*19881031*F~
+NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~
+CLM*1805080AV3648353*234***53:B:1*Y*A*Y*Y~
+REF*D9*7349064290~
+HI*ABK:F251~
+NM1*82*1*PROVIDER*SUSAN****XX*1112223346~
+PRV*PE*PXC*251S00000X~
+NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~
+N3*12345 MAIN ST SUITE A1~
+N4*VANCOUVER*WA*98662~
+LX*1~
+SV1*HC:90853*234*UN*120***1~
+DTP*472*D8*20180427~
+REF*6R*140787~
+NTE*ADD*05~
+SE*35*000000004~
+ST*837*000000005*005010X222A1~
+BHT*0019*00*7349063984*20180508*0833*CH~
+NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~
+PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~
+NM1*40*2*123456789*****46*CHPWA~
+HL*79**20*2~
+NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~
+N3*12345 MAIN ST~
+N4*VANCOUVER*WA*98662~
+REF*EI*720000000~
+PER*IC*CONTACT*TE*9185551212~
+NM1*87*2~
+N3*PO BOX 1234~
+N4*VANCOUVER*WA*986681234~
+HL*80*79*22*0~
+SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~
+NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~
+N3*987 65TH PL~
+N4*VANCOUVER*WA*986640001~
+DMG*D8*19881225*M~
+NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~
+HL*81*79*23*1~
+SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~
+NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~
+N3*987 65TH PL~
+N4*VANCOUVER*WA*986640001~
+DMG*D8*19881225*M~
+NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~
+CLM*1805080AV3648355*20***57:B:1*Y*A*Y*Y~
+REF*D9*7349064036~
+HI*ABK:F1020*ABF:F1120~
+NM1*82*1*PROVIDER*JAMES****XX*1112223338~
+PRV*PE*PXC*261QR0405X~
+NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~
+N3*12345 MAIN ST SUITE A1~
+N4*VANCOUVER*WA*98662~
+LX*1~
+SV1*HC:H0003*20*UN*1***1:2~
+DTP*472*D8*20180427~
+REF*6R*143907~
+SE*34*000000005~
+GE*5*212950697~
+IEA*1*697773230~

From 44cc70343ccc4fe489ca16e267ae129a45944751 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Wed, 17 Apr 2024 19:20:05 -0400
Subject: [PATCH 06/46] converted the functions to follow filter/map protocol
 for processing hierarchical loops

---
 databricksx12/hls/hierarchicalloop.py | 88 +++++++++++++--------------
 1 file changed, 41 insertions(+), 47 deletions(-)

diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py
index 6afcbda..c078f3b 100644
--- a/databricksx12/hls/hierarchicalloop.py
+++ b/databricksx12/hls/hierarchicalloop.py
@@ -1,46 +1,40 @@
 from databricksx12.edi import *
+import functools
 
 
 class HierarchicalLoop(EDI):
     def __init__(self, data, delim_cls=AnsiX12Delim):
         super().__init__(data, delim_cls)
-
-        # find all HL and SE segments to find start and end of loops + CLM segments
-        self.indexed_HL_segments = self.segments_by_name_index("HL")
-        self.indexed_SE_segments = self.segments_by_name_index("SE")
-
-        # parent and children loops
+        self.parent_start_loops = self._parent_start_tup_loops()  # returns tuple; to check
+        self.parent_end_loops = self._parent_end_loops()
         self.parent_loops = self._parent_loops()
         self.child_loops = self._child_loops(self.parent_loops)
-        self.subchild_loops = self._child_loops(self.child_loops)
+        self.subchild_loops = self._child_loops(self.child_loops) # recursive cases
 
-    def _parent_loops(self):
-        parent_start_loops = []
-        for i, segment in self.indexed_HL_segments:
-            # Check if the second element is empty (and the third element is '20' and the last element is '1')
-            if segment.element(2) == '':
-                # index of parent, counter, and if child
-                parent_start_loops.append(
-                    (i, segment.element(1), segment.element(-1)))
+    def _parent_start_tup_loops(self):
+        # index of parent, counter, and if child
+        # TODO unit test to return tuple
+        return [(i, x.element(1), x.element(-1)) for i, x in self.segments_by_name_index("HL") if x.element(2) == ""]
 
-        parent_end_loops = [i for i, x in self.indexed_SE_segments]
-        return [(tup + (j,)) for tup, j in zip(parent_start_loops, parent_end_loops)]
+    def _parent_end_loops(self):
+        return [i for i, x in self.segments_by_name_index("SE")]
+
+    def _parent_loops(self):
+        return [(tup + (j,)) for tup, j in zip(self.parent_start_loops, self.parent_end_loops)]
 
     def _child_loops(self, parent_loops):
-        child_loops = []
-        for parent_start_index, counter, child_id, parent_stop_index in parent_loops:
-            if child_id == '1':
-                for i, segment in self.indexed_HL_segments:
-                    if segment.element(2) == counter:
-                        # index of child, parent/tx counter, and if sub-child
-                        child_loops.append(
-                            (i, counter, segment.element(-1), parent_stop_index))
+        child_loops = [(i, counter, segment.element(-1), parent_stop_index)
+                       for _, counter, child_id, parent_stop_index in parent_loops if int(child_id) == 1
+                       for i, segment in self.segments_by_name_index("HL") if segment.element(2) == counter]
 
-        # If child_id is greater than 1, recursively call the fn
-            if int(child_id) > 1:
-                child_loops.extend(self._child_loops(
-                    [(parent_start_index, counter, str(int(child_id) - 1), parent_stop_index)]))
-        return child_loops
+        # recursive cases where child_id is greater than 1 == sub_child
+        subchild_cases = filter(lambda x: int(x[2]) > 1, parent_loops)
+        subchild_loops = map(
+            lambda x: self._child_loops(
+                [(x[0], x[1], str(int(x[2]) - 1), x[3])]),
+            subchild_cases
+        )
+        return functools.reduce(lambda acc, lst: acc + lst, subchild_loops, child_loops)
 
 
 class HierarchicalLoopManager:
@@ -50,26 +44,26 @@ def __init__(self, data, delim_cls=AnsiX12Delim):
         self.generate_summary()
 
     def generate_summary(self):
-        for pl in self.hl.parent_loops:
+        def process_parent_loop(parent_loop):
+            # filter/map child loops within a parent loop
+            children = list(map(
+                lambda child_loop: {
+                    'child_index_start': child_loop[0],
+                    'child_index_stop': child_loop[-1]
+                },
+                filter(
+                    lambda child_loop: parent_loop[0] < child_loop[0] < parent_loop[-1], self.hl.child_loops)
+            ))
+            # summary dict for each parent
             parent_summary = {
-                'parent_index_start': pl[0],
-                'parent_index_end': pl[-1],
-                'children': []
+                'parent_index_start': parent_loop[0],
+                'parent_index_end': parent_loop[-1],
+                'children': children
             }
-            # Find children loops for this parent tx
-            children = []
-            for cl in self.hl.child_loops:
-                if pl[0] < cl[0] < pl[-1]:
-                    children.append({
-                        'child_index_start': cl[0],
-                        'child_index_stop': cl[-1]
-                    })
-
-            # Add children to parent summary
-            parent_summary['children'] = children
+            return (parent_loop[1], parent_summary)
 
-            # Add HL 1 counter to the summary as the key
-            self.summary[pl[1]] = parent_summary
+        # summarize all parent loops
+        self.summary = dict(map(process_parent_loop, self.hl.parent_loops))
 
 
 """

From 45a09fa35da47b0446a2d065a1c6d424dc98abe8 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Thu, 18 Apr 2024 19:12:23 -0400
Subject: [PATCH 07/46] removed claim classes and reorganized code to create a
 new loop class

---
 databricksx12/hls/claim.py | 154 +------------------------------------
 databricksx12/hls/loop.py  | 122 +++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+), 153 deletions(-)
 create mode 100644 databricksx12/hls/loop.py

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 88aa76f..2b8148c 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -1,160 +1,8 @@
 from databricksx12.edi import *
-from databricksx12.hls import hierarchicalloop
+
 #
 # Base claim class
 #
-from databricksx12.edi import *
-
-
-class LoopMapping:
-    def __init__(self, mappings=None):
-        self.mappings = (mappings if mappings is not None else {
-            '20': {
-                'description': 'Information Source',
-                'loop': '2000A'
-            },
-            '22': {
-                'description': 'Subscriber',
-                'loop': '2000B'
-            }
-        })
-
-    def get_mapping(self, element):
-        return self.mappings.get(element, None)
-
-
-class Claim(EDI):
-    def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping(), element=None):
-        super().__init__(data, delim_cls)
-        self.loop_mapping = loop_mapping
-        self.target_element = element
-        self.loop_value = self.loop_mapping.get_mapping(
-            self.target_element).get('loop')
-        self.target_segment_name, self.target_element_index = self.get_reference_names_of_loop(
-            self.loop_value)
-
-        # find all CLM segments (important for indexing the last HL or SBR within a tx)
-        self.clm_segments = self._clm_identifiers()
-
-    # this feels misplaced; how to fix?
-    def get_reference_names_of_loop(self, loop):
-        identifiers = {
-            '2000A': ('NM1', '3'),
-            '2000B': ('SBR', '4'),
-        }
-        return identifiers.get(loop, (None, None))
-
-    def _clm_identifiers(self):
-        # Find the segments where CLM loop begins
-        indexed_CLM_segments = self.segments_by_name_index("CLM")
-        return [(i, x.element(2)) for i, x in indexed_CLM_segments]
-
-    def extract_lines_based_on_ranges(self, ranges, target_value, target_index):
-        extracted_elements = []
-        # Iterate through each range in the list
-        for start, end in ranges:
-            # Retrieve the segments within this range
-            segments_in_range = self.segments_by_position(start, end)
-
-            desired_elements = map(
-                lambda segment: segment.element(int(target_index)),
-                filter(
-                    lambda segment: segment.segment_name() == target_value and len(
-                        segment.data.split(segment.format_cls.ELEMENT_DELIM)) > int(target_index),
-                    segments_in_range
-                )
-            )
-            extracted_elements.extend(desired_elements)
-
-        return list(extracted_elements)
-
-
-class ClaimManager:
-    def __init__(self, data, delim_cls=AnsiX12Delim):
-        self.hlmanager = hierarchicalloop.HierarchicalLoopManager(data)
-        self.hl_summary = self.hlmanager.summary
-        # need to sort mapping dependency
-        self.claim = Claim(data, delim_cls, LoopMapping(), element='20')
-        self.claim_summaries = [{
-            'clm_ind': clm_index,
-            'parent_counter': self._find_claim_in_tx(self.hl_summary, clm_index)[0],
-            'child_start_index': self._find_claim_in_tx(self.hl_summary, clm_index)[1],
-            'parent_range': self.get_ranges(self.hl_summary, self._find_claim_in_tx(self.hl_summary, clm_index)[0])[0],
-            'children_ranges': self.get_ranges(self.hl_summary, self._find_claim_in_tx(self.hl_summary, clm_index)[0])[1]
-        } for clm_index in [i for (i, j) in self.claim.clm_segments]]
-
-    def _find_claim_in_tx(self, tx_summary, clm_index):
-        for parent_counter, parent_info in tx_summary.items():
-            if parent_info['parent_index_start'] <= int(clm_index) <= parent_info['parent_index_end']:
-                for child_info in parent_info['children']:
-                    if child_info['child_index_start'] <= int(clm_index) <= child_info['child_index_stop']:
-                        return parent_counter, child_info['child_index_start']
-        return None, None
-
-    def get_ranges(self, hl_summary_dict, loop_counter):
-        info = hl_summary_dict.get(loop_counter, None)
-        if info is None:
-            return None
-
-        parent_range = (info['parent_index_start'], info['parent_index_end'])
-        children_ranges = [(child['child_index_start'], child['child_index_stop'])
-                           for child in info.get('children', [])]
-
-        return parent_range, children_ranges
-
-    def _find_billing_providers(self):
-        first_lines = []
-        for summary in self.claim_summaries:
-            lines = self.claim.extract_lines_based_on_ranges(
-                [summary['parent_range']
-                 ], self.claim.target_segment_name, self.claim.target_element_index
-            )
-            if lines:  # Check if any lines were extracted
-                # Append the first line of this iteration
-                first_lines.append(lines[0])
-        return first_lines
-
-    def _find_subscribers(self):
-        pass
-
-
-"""
-claims = ClaimManager(sample_data_chpw_claimdata)
-claims.claim_summaries
-
-[{'clm_ind': 23,
-  'parent_counter': '1',
-  'child_start_index': 16,
-  'parent_range': (7, 35),
-  'children_ranges': [(16, 35)]},
- {'clm_ind': 57,
-  'parent_counter': '63',
-  'child_start_index': 50,
-  'parent_range': (41, 69),
-  'children_ranges': [(50, 69)]},
- {'clm_ind': 91,
-  'parent_counter': '49',
-  'child_start_index': 84,
-  'parent_range': (75, 103),
-  'children_ranges': [(84, 103)]},
- {'clm_ind': 125,
-  'parent_counter': '75',
-  'child_start_index': 118,
-  'parent_range': (109, 138),
-  'children_ranges': [(118, 138)]},
- {'clm_ind': 160,
-  'parent_counter': '79',
-  'child_start_index': 153,
-  'parent_range': (144, 172),
-  'children_ranges': [(153, 172)]}]
-
-  claims._find_billing_providers()
-  ['BH CLINIC OF VANCOUVER',
- 'BH CLINIC OF VANCOUVER',
- 'BH CLINIC OF VANCOUVER',
- 'BH CLINIC OF VANCOUVER',
- 'BH CLINIC OF VANCOUVER']
-"""
 
 
 class Claim837i(Claim):
diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py
new file mode 100644
index 0000000..6d654eb
--- /dev/null
+++ b/databricksx12/hls/loop.py
@@ -0,0 +1,122 @@
+from databricksx12.edi import *
+from databricksx12.hls import hierarchicalloop
+
+class LoopMapping:
+    def __init__(self, mappings=None):
+        self.mappings = (mappings if mappings is not None else {
+            '20': {
+                'description': 'Information Source',
+                'loop': '2000A',
+                'reference_ids': ('NM1', '3'), ## might delete and use elsewhere
+                },
+            '22': {
+                'description': 'Subscriber',
+                'loop': '2000B',
+                'reference_ids': ('SBR', '4'), 
+                }
+        })
+    
+    def get_mapping(self, element):
+        return self.mappings.get(element, None)
+
+
+class Loop(EDI):
+    def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()):
+        super().__init__(data, delim_cls)
+        self.loop_mapping = loop_mapping
+        self.hlmanager = hierarchicalloop.HierarchicalLoopManager(data)
+        self.hl_summary = self.hlmanager.summary
+        self.clm_segments = self._clm_identifiers()
+
+    def _clm_identifiers(self):
+        return [(i, x.element(2)) for i, x in self.segments_by_name_index("CLM")]
+    
+    #
+    # returns element of interest from a range based on first element and index from the line
+    #
+    def _find_elements_based_on_ranges(self, ranges, target_value, target_index):
+        def process_range(range_tuple):
+            start, end = range_tuple
+            segments_in_range = self.segments_by_position(start, end) # find segments within range
+            return list(map(
+                lambda segment: segment.element(int(target_index)),
+                filter(
+                    lambda segment: segment.segment_name() == target_value and 
+                                    segment.segment_len() > int(target_index),
+                                    segments_in_range
+                )
+            ))
+        return functools.reduce(
+            lambda acc, lst: acc + lst, map(process_range, ranges),[]) # map to apply processing to each range and flatten 
+    
+    #
+    # if a claim index (str), returns the parent counter and child_start_index of Tx from the Hierarchical loop summary
+    #
+    def _find_tx_from_clm(self, tx_summary, clm_index):
+        try:
+            return next(
+                (parent_counter, child['child_index_start'])
+                for parent_counter, parent_info in tx_summary.items()
+                if parent_info['parent_index_start'] <= int(clm_index) <= parent_info['parent_index_end']
+                for child in parent_info['children']
+                if child['child_index_start'] <= int(clm_index) <= child['child_index_stop']
+            )
+        except StopIteration:
+            return None, None
+    
+    #
+    # if a loop counter (str), returns the parent and children ranges (tuple) from the Tx of interest from the Hierarchical loop summary
+    #
+    def _get_ranges(self, tx_summary, loop_counter):
+        info = tx_summary.get(loop_counter, None)
+        if info is None:
+            return None 
+        parent_range = (info['parent_index_start'], info['parent_index_end'])
+        children_ranges = [(child['child_index_start'], child['child_index_stop']) for child in info.get('children', [])]
+        return parent_range, children_ranges
+
+    #
+    # filters a claim's tx segment to extract its reference elements
+    #
+    def _get_elements_from_claim(self, clm_segment, target_segment_name, target_element_index, use_children=False):
+        clm_index = clm_segment[0]
+        parent_counter, child_start_index = self._find_tx_from_clm(self.hl_summary, clm_index)
+        if not parent_counter:
+            return None
+        parent_range, children_range = self._get_ranges(self.hl_summary, parent_counter)
+        return self._find_elements_based_on_ranges([parent_range], target_segment_name, target_element_index)
+    
+    #
+    # map to apply the find_element function over all claim segments based on choice of loop
+    #
+    def find_reference_elements(self, loop_key):
+        loop_info = self.loop_mapping.get_mapping(loop_key)
+        if not loop_info:
+            return []
+        target_segment_name, target_element_index = loop_info['reference_ids']
+        use_children = loop_key == '22'  # Use children ranges for '22'
+        process_clm_segment = lambda clm_segment: self._get_elements_from_claim(clm_segment, target_segment_name, target_element_index, use_children)
+        reference_list = list(filter(None, map(process_clm_segment, self.clm_segments)))
+        return [summary[0] for summary in reference_list] if reference_list else [] # only first element or it generalizes to all segments in the range
+
+
+"""
+sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8")
+claims = Loop(sample_data_837i_edited)
+claims.find_reference_elements('20')
+Outputs:
+['BH CLINIC OF VANCOUVER',
+ 'BH CLINIC OF VANCOUVER',
+ 'BH CLINIC OF VANCOUVER',
+ 'BH CLINIC OF VANCOUVER',
+ 'BH CLINIC OF VANCOUVER']
+"""
+"""
+claims.find_reference_elements('22')
+Outputs:
+['COMMUNITY HLTH PLAN OF WASH',
+ 'COMMUNITY HLTH PLAN OF WASH',
+ 'COMMUNITY HLTH PLAN OF WASH',
+ 'COMMUNITY HLTH PLAN OF WASH',
+ 'COMMUNITY HLTH PLAN OF WASH']
+"""
\ No newline at end of file

From 0ec239ab8c8515621c66f213bf7ce717121fa8b7 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Fri, 26 Apr 2024 20:54:23 -0400
Subject: [PATCH 08/46] updates to Hierarchical Loop Manager to create list in
 lists for sub-children

---
 databricksx12/hls/hierarchicalloop.py | 75 +++++++++++++--------------
 1 file changed, 37 insertions(+), 38 deletions(-)

diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py
index c078f3b..6833df8 100644
--- a/databricksx12/hls/hierarchicalloop.py
+++ b/databricksx12/hls/hierarchicalloop.py
@@ -40,30 +40,26 @@ def _child_loops(self, parent_loops):
 class HierarchicalLoopManager:
     def __init__(self, data, delim_cls=AnsiX12Delim):
         self.hl = HierarchicalLoop(data, delim_cls)
-        self.summary = {}
-        self.generate_summary()
+        self.summary = self.generate_summary()
 
-    def generate_summary(self):
-        def process_parent_loop(parent_loop):
-            # filter/map child loops within a parent loop
-            children = list(map(
-                lambda child_loop: {
-                    'child_index_start': child_loop[0],
-                    'child_index_stop': child_loop[-1]
-                },
-                filter(
-                    lambda child_loop: parent_loop[0] < child_loop[0] < parent_loop[-1], self.hl.child_loops)
-            ))
-            # summary dict for each parent
-            parent_summary = {
-                'parent_index_start': parent_loop[0],
-                'parent_index_end': parent_loop[-1],
-                'children': children
-            }
-            return (parent_loop[1], parent_summary)
+    def get_child_loops(self, parent_loop, loops):
+        return [loop for loop in loops if parent_loop[0] < loop[0] < parent_loop[-1]]
+
+    def process_loop(self, loop, level=0):
+        child_loops = self.get_child_loops(loop, self.hl.child_loops)
+        children = [self.process_loop(child, level + 1) for child in child_loops]
+        
+        loop_summary = {
+            'index_start': loop[0],
+            'index_end': loop[-1],
+            'children': children or None
+        }
+        return loop_summary
 
-        # summarize all parent loops
-        self.summary = dict(map(process_parent_loop, self.hl.parent_loops))
+    def generate_summary(self):
+        """Generate a hierarchical summary for each top-level parent loop."""
+        loop_processing = lambda loop: (str(loop[1]), self.process_loop(loop))
+        return dict(map(loop_processing, self.hl.parent_loops))
 
 
 """
@@ -71,20 +67,23 @@ def process_parent_loop(parent_loop):
 summary = loop_manager.summary 
 
 output:
-{'1': {'parent_index_start': 7,
-  'parent_index_end': 35,
-  'children': [{'child_index_start': 16, 'child_index_stop': 35}]},
- '63': {'parent_index_start': 41,
-  'parent_index_end': 69,
-  'children': [{'child_index_start': 50, 'child_index_stop': 69}]},
- '49': {'parent_index_start': 75,
-  'parent_index_end': 103,
-  'children': [{'child_index_start': 84, 'child_index_stop': 103}]},
- '75': {'parent_index_start': 109,
-  'parent_index_end': 138,
-  'children': [{'child_index_start': 118, 'child_index_stop': 138}]},
- '79': {'parent_index_start': 144,
-  'parent_index_end': 179,
-  'children': [{'child_index_start': 153, 'child_index_stop': 179},
-   {'child_index_start': 160, 'child_index_stop': 179}]}}
+{'1': {'index_start': 7,
+  'index_end': 35,
+  'children': [{'index_start': 16, 'index_end': 35, 'children': None}]},
+ '63': {'index_start': 41,
+  'index_end': 69,
+  'children': [{'index_start': 50, 'index_end': 69, 'children': None}]},
+ '49': {'index_start': 75,
+  'index_end': 103,
+  'children': [{'index_start': 84, 'index_end': 103, 'children': None}]},
+ '75': {'index_start': 109,
+  'index_end': 138,
+  'children': [{'index_start': 118, 'index_end': 138, 'children': None}]},
+ '79': {'index_start': 144,
+  'index_end': 179,
+  'children': [{'index_start': 153,
+    'index_end': 179,
+    'children': [{'index_start': 160, 'index_end': 179, 'children': None}]},
+   {'index_start': 160, 'index_end': 179, 'children': None}]}} 
+   # the last 'children' list, there is a repeat that is tricky to remove
 """

From 0caa0b3e47b557422066c2c4c000d3867a840aad Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Tue, 30 Apr 2024 15:02:16 -0400
Subject: [PATCH 09/46] adding looping at the subscriber level

---
 sampledata/837/837p.txt | 45 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 sampledata/837/837p.txt

diff --git a/sampledata/837/837p.txt b/sampledata/837/837p.txt
new file mode 100644
index 0000000..a69643f
--- /dev/null
+++ b/sampledata/837/837p.txt
@@ -0,0 +1,45 @@
+ISA*00*          *00*          *ZZ*1234567        *ZZ*11111          *170508*1141*^*00501*000000101*1*P*:~
+GS*HC*XXXXXXX*XXXXX*20170617*1741*101*X*005010X222A1~
+ST*837*1239*005010X222A1~
+BHT*0019*00*010*20170617*1741*CH~
+NM1*41*2*SUBMITTER*****46*ABC123~
+PER*IC*BOB SMITH*TE*4805551212~
+NM1*40*2*RECEIVER*****46*44556~
+HL*1**20*1~
+NM1*85*2*BILLING PROVIDER*****XX*1122334455~
+N3*1234 SOME ROAD~
+N4*CHICAGO*IL*606739999~
+REF*EI*999999999~
+HL*2*1*22*0~
+SBR*P*18*******12~
+NM1*IL*1*BLOGGS*JOE****MI*1234567890~
+N3*1 SOME BLVD~
+N4*CHICAGO*IL*606129998~
+DMG*D8*19570111*M~
+NM1*PR*2*PAYER*****PI*12345~
+N3*1 PAYER WAY~
+N4*ST LOUIS*MO*212441850~
+REF*2U*W1014~
+CLM*1000A*140***19:B:1*Y*A*Y*Y~
+HI*ABK:I10~
+LX*1~
+SV1*HC:99213*140*UN*1***1~
+DTP*472*D8*20151124~
+HL*3*1*22*0~
+SBR*P*18*******12~
+NM1*IL*1*BLOGGS*FRED****MI*9876543201~
+N3*1 ANOTHER STR~
+N4*CHICAGO*IL*606129998~
+DMG*D8*19700601*M~
+NM1*PR*2*PAYER*****PI*12345~
+N3*1 PAYER WAY~
+N4*ST LOUIS*MO*212441850~
+REF*2U*W1014~
+CLM*1001A*140***19:B:1*Y*A*Y*Y~
+HI*ABK:I10~
+LX*1~
+SV1*HC:99213*140*UN*1***1~
+DTP*472*D8*20151124~
+SE*41*1239~
+GE*1*101~
+IEA*1*000000101~

From 97a57c84276269ede3e1c11bd78af8e87c34c848 Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Tue, 30 Apr 2024 15:13:54 -0400
Subject: [PATCH 10/46] claim sample code

---
 databricksx12/hls/claim.py      | 66 +++++++++++++++++++++++++++++++++
 databricksx12/hls/healthcare.py | 10 +----
 2 files changed, 68 insertions(+), 8 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 2b8148c..1436a75 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -3,6 +3,14 @@
 #
 # Base claim class
 #
+class Claim():
+
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def from_dictionary(d):
+        pass
 
 
 class Claim837i(Claim):
@@ -15,3 +23,61 @@ class Claim837i(Claim):
 class Claim837p(Claim):
 
     NAME = "837P"
+
+
+#
+# Base claim builder (transaction -> 1 or more claims)
+#
+class ClaimBuilder(EDI):
+
+    #
+    # Given claim type (837i, 837p, etc), segments, and delim class, build claim level classes
+    #
+    def __init__(self, trnx_type, trnx_data, delim_cls):
+        self.trnx_type = trnx_type
+        self.data = trnx_data
+        self.delim_cls = delim_cls
+
+        pass #self.hl = ??? TODO 
+        
+    def claim_count():
+        return len(self.segments_by_name("CLM"))
+
+    #
+    # Returns a dictionary of "loop name" : "loop data" 
+    #
+    def build_claim(self, clm_index, clm_segment):
+        return {
+            "1000A": {
+                "desc": "Submitter Name",
+                "segments": "TODO"
+            },
+            "1000B": {
+                "desc": "Reciever Name",
+                "segments": "TODO"
+            },
+            "2000A": {
+                "desc": "Billing Provider HL Level"
+                "segments": "TODO"
+            },
+            "2000B": {
+                "desc": "Subscriber HL Level",
+                "segments": "TODO"
+            },
+            "2000C" : {
+                "desc": "Patient HL Level",
+                "segments": "TODO"
+            },
+            "2300": {
+                "desc": "Claim Information",
+                "segments": "TODO"
+            }
+        }
+    
+    #
+    # Given transaction type, transaction segments, and delim info, build out claims in the transaction
+    #  @return a list of Claim for each "clm" segment 
+    #
+    def build(self):
+        return [self.build_claim(i, x) for i,x in segments_by_name_index("CLM")]
+    
diff --git a/databricksx12/hls/healthcare.py b/databricksx12/hls/healthcare.py
index 91858de..efd60f5 100644
--- a/databricksx12/hls/healthcare.py
+++ b/databricksx12/hls/healthcare.py
@@ -28,12 +28,6 @@ def from_functional_group(self, fg):
     #  @mapping = mapping the GS08 segment to the type of healthcare transaction
     #
     def from_transaction(self, trnx):
-        type = self.mapping.get(trnx.transaction_type)
-        data = [x for x in trnx.data if x.segment_name() not in ['ST', 'SE']]
-        if type == "837P": 
-            return Claim837p(data, trnx.format_cls)
-        elif type == "837I":
-            return Claim837i(data, trnx.format_cls)
-        else:
-            return None #no mapping available
+        return ClaimBuilder(self.mapping.get(trnx.transaction_type),
+                            [x for x in trnx.data if x.segment_name() not in ['ST', 'SE']], trnx.delim_cls).build()
     

From 4c384ee20ebb70e044ee5a90503b480de679de2b Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Tue, 30 Apr 2024 21:56:01 -0400
Subject: [PATCH 11/46] modified loop class to contain additional mappings such
 as IL and Payer

---
 databricksx12/hls/loop.py | 205 +++++++++++++++++++++-----------------
 1 file changed, 116 insertions(+), 89 deletions(-)

diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py
index 6d654eb..c70ecb5 100644
--- a/databricksx12/hls/loop.py
+++ b/databricksx12/hls/loop.py
@@ -1,24 +1,47 @@
 from databricksx12.edi import *
+from functools import reduce
 from databricksx12.hls import hierarchicalloop
 
 class LoopMapping:
     def __init__(self, mappings=None):
-        self.mappings = (mappings if mappings is not None else {
+        self.mappings = mappings if mappings is not None else {
             '20': {
-                'description': 'Information Source',
-                'loop': '2000A',
-                'reference_ids': ('NM1', '3'), ## might delete and use elsewhere
-                },
+                'Information Source': {
+                    'loop': '2000A',
+                    'reference_ids': ('NM1', '3'),
+                    'secondary_reference': ('85', '1') 
+                }
+            },
             '22': {
-                'description': 'Subscriber',
-                'loop': '2000B',
-                'reference_ids': ('SBR', '4'), 
+                'Subscriber': {
+                    'loop': '2000B',
+                    'reference_ids': ('SBR', '4')
+                },
+                'Individual First Name': {
+                    'loop': '2010BA',
+                    'reference_ids': ('NM1', '4'), 
+                    'secondary_reference': ('IL', '1') 
+                },
+                'Individual Last Name': {
+                    'loop': '2010BA',
+                    'reference_ids': ('NM1', '3'), 
+                    'secondary_reference': ('IL', '1') 
+                },
+                'Payer Name': {
+                    'loop': '2010BB',
+                    'reference_ids': ('NM1', '3'), 
+                    'secondary_reference': ('PR', '1') 
                 }
-        })
+            }
+        }
+    
+    def get_mapping(self, element, description=None):
+        """ Returns a specific mapping based on element key and description. """
+        mappings = self.mappings.get(element, {})
+        if description:
+            return mappings.get(description, None)
+        return None
     
-    def get_mapping(self, element):
-        return self.mappings.get(element, None)
-
 
 class Loop(EDI):
     def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()):
@@ -26,97 +49,101 @@ def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()):
         self.loop_mapping = loop_mapping
         self.hlmanager = hierarchicalloop.HierarchicalLoopManager(data)
         self.hl_summary = self.hlmanager.summary
-        self.clm_segments = self._clm_identifiers()
-
-    def _clm_identifiers(self):
-        return [(i, x.element(2)) for i, x in self.segments_by_name_index("CLM")]
-    
-    #
-    # returns element of interest from a range based on first element and index from the line
-    #
-    def _find_elements_based_on_ranges(self, ranges, target_value, target_index):
-        def process_range(range_tuple):
-            start, end = range_tuple
-            segments_in_range = self.segments_by_position(start, end) # find segments within range
-            return list(map(
-                lambda segment: segment.element(int(target_index)),
-                filter(
-                    lambda segment: segment.segment_name() == target_value and 
-                                    segment.segment_len() > int(target_index),
-                                    segments_in_range
-                )
-            ))
-        return functools.reduce(
-            lambda acc, lst: acc + lst, map(process_range, ranges),[]) # map to apply processing to each range and flatten 
+        self.clm_segments = [(i, x.element(2)) for i, x in self.segments_by_name_index("CLM")]
     
-    #
-    # if a claim index (str), returns the parent counter and child_start_index of Tx from the Hierarchical loop summary
-    #
-    def _find_tx_from_clm(self, tx_summary, clm_index):
-        try:
-            return next(
-                (parent_counter, child['child_index_start'])
-                for parent_counter, parent_info in tx_summary.items()
-                if parent_info['parent_index_start'] <= int(clm_index) <= parent_info['parent_index_end']
-                for child in parent_info['children']
-                if child['child_index_start'] <= int(clm_index) <= child['child_index_stop']
-            )
-        except StopIteration:
-            return None, None
-    
-    #
-    # if a loop counter (str), returns the parent and children ranges (tuple) from the Tx of interest from the Hierarchical loop summary
-    #
-    def _get_ranges(self, tx_summary, loop_counter):
-        info = tx_summary.get(loop_counter, None)
-        if info is None:
-            return None 
-        parent_range = (info['parent_index_start'], info['parent_index_end'])
-        children_ranges = [(child['child_index_start'], child['child_index_stop']) for child in info.get('children', [])]
-        return parent_range, children_ranges
+    def get_transaction_info(self, tx_summary, clm_index):
+        """
+        retrieves transaction information for a claim from hierarchical summary
+        Eg., Getting the transaction range for claim index "x"
+        """
+        return next((info for _, info in tx_summary.items()
+                     if info['index_start'] <= int(clm_index) <= info['index_end']), None)
+
+    def get_ranges(self, tx_info, use_children=False):
+        """
+        extracts numeric ranges for parent and optionally children based on transaction
+        Eg., find ranges for parent and children loops for processing
+        """
+        parent_range = (tx_info['index_start'], tx_info['index_end'])
+        if use_children:
+            return [(child['index_start'], child['index_end']) for child in tx_info.get('children', [])]
+        return [parent_range]
 
-    #
-    # filters a claim's tx segment to extract its reference elements
-    #
-    def _get_elements_from_claim(self, clm_segment, target_segment_name, target_element_index, use_children=False):
+
+    def find_elements_based_on_ranges(self, ranges, target_segment_name, target_element_index, secondary_reference=None):
+        """
+        filters and maps EDI segments to extract required elements based on their position and type
+        """
+        process_range = lambda range_tuple: [
+            segment.element(int(target_element_index))
+            for segment in self.segments_by_position(range_tuple[0], range_tuple[1])
+            if segment.segment_name() == target_segment_name and 
+            segment.segment_len() > int(target_element_index) and
+            (secondary_reference is None or segment.element(int(secondary_reference[1])) == secondary_reference[0])
+        ]
+        return reduce(lambda acc, lst: acc + lst, map(process_range, ranges), [])
+
+    def extract_elements_from_claim(self, clm_segment, target_segment_name, target_element_index, use_children=False, secondary_reference=None):
+        """
+        a higher-level function that ties together the previous functions to get tx info, the ranges of interest, and elements from every range
+        """
         clm_index = clm_segment[0]
-        parent_counter, child_start_index = self._find_tx_from_clm(self.hl_summary, clm_index)
-        if not parent_counter:
+        tx_info = self.get_transaction_info(self.hl_summary, clm_index)
+        if not tx_info:
             return None
-        parent_range, children_range = self._get_ranges(self.hl_summary, parent_counter)
-        return self._find_elements_based_on_ranges([parent_range], target_segment_name, target_element_index)
-    
-    #
-    # map to apply the find_element function over all claim segments based on choice of loop
-    #
-    def find_reference_elements(self, loop_key):
-        loop_info = self.loop_mapping.get_mapping(loop_key)
+
+        ranges = self.get_ranges(tx_info, use_children)
+        return self.find_elements_based_on_ranges(ranges, target_segment_name, target_element_index, secondary_reference)
+
+
+    def find_reference_elements(self, loop_key, description=None):
+        """
+        extract reference elements from claims based on loop mapping and hierarchy and handles children separately if specified by loop key.
+        Eg., find billing provider names under loop '20' from an EDI transaction.
+        """
+
+        loop_info = self.loop_mapping.get_mapping(loop_key, description)
         if not loop_info:
             return []
+        
         target_segment_name, target_element_index = loop_info['reference_ids']
-        use_children = loop_key == '22'  # Use children ranges for '22'
-        process_clm_segment = lambda clm_segment: self._get_elements_from_claim(clm_segment, target_segment_name, target_element_index, use_children)
+        secondary_reference = loop_info.get('secondary_reference', None)
+
+        use_children = loop_key == '22'
+        process_clm_segment = lambda clm_segment: self.extract_elements_from_claim(clm_segment, 
+                                                                                   target_segment_name, 
+                                                                                   target_element_index, 
+                                                                                   use_children,
+                                                                                   secondary_reference)
         reference_list = list(filter(None, map(process_clm_segment, self.clm_segments)))
-        return [summary[0] for summary in reference_list] if reference_list else [] # only first element or it generalizes to all segments in the range
+        
+        return [summary for summary in reference_list] 
 
 
 """
 sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8")
 claims = Loop(sample_data_837i_edited)
-claims.find_reference_elements('20')
+claims.find_reference_elements('20', 'Information Source')
+Outputs:
+[['BH CLINIC OF VANCOUVER'],
+ ['BH CLINIC OF VANCOUVER'],
+ ['BH CLINIC OF VANCOUVER'],
+ ['BH CLINIC OF VANCOUVER'],
+ ['BH CLINIC OF VANCOUVER']]
+"""
+"""
+claims.find_reference_elements('22', 'Payer Name')
 Outputs:
-['BH CLINIC OF VANCOUVER',
- 'BH CLINIC OF VANCOUVER',
- 'BH CLINIC OF VANCOUVER',
- 'BH CLINIC OF VANCOUVER',
- 'BH CLINIC OF VANCOUVER']
+[['COMMUNITY HEALTH PLAN OF WASHINGTON'],
+ ['COMMUNITY HEALTH PLAN OF WASHINGTON'],
+ ['COMMUNITY HEALTH PLAN OF WASHINGTON'],
+ ['COMMUNITY HEALTH PLAN OF WASHINGTON'],
+ ['COMMUNITY HEALTH PLAN OF WASHINGTON',
+  'COMMUNITY HEALTH PLAN OF WASHINGTON',
+  'COMMUNITY HEALTH PLAN OF WASHINGTON']]
 """
 """
-claims.find_reference_elements('22')
+claims.find_reference_elements('22', 'Individual First Name')
 Outputs:
-['COMMUNITY HLTH PLAN OF WASH',
- 'COMMUNITY HLTH PLAN OF WASH',
- 'COMMUNITY HLTH PLAN OF WASH',
- 'COMMUNITY HLTH PLAN OF WASH',
- 'COMMUNITY HLTH PLAN OF WASH']
-"""
\ No newline at end of file
+[['JOHN'], ['SUSAN'], ['JOHN'], ['SUSAN'], ['JOHN', 'JOHN', 'JOHN']]
+  """
\ No newline at end of file

From 49ba286c9783589288ff1d3f379f1ec04bc238c9 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Tue, 30 Apr 2024 22:03:57 -0400
Subject: [PATCH 12/46] new loop mappings and clearer func names

---
 databricksx12/hls/loop.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py
index c70ecb5..365b1ea 100644
--- a/databricksx12/hls/loop.py
+++ b/databricksx12/hls/loop.py
@@ -36,7 +36,6 @@ def __init__(self, mappings=None):
         }
     
     def get_mapping(self, element, description=None):
-        """ Returns a specific mapping based on element key and description. """
         mappings = self.mappings.get(element, {})
         if description:
             return mappings.get(description, None)
@@ -54,7 +53,7 @@ def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()):
     def get_transaction_info(self, tx_summary, clm_index):
         """
         retrieves transaction information for a claim from hierarchical summary
-        Eg., Getting the transaction range for claim index "x"
+        get the transaction range for claim index "x"
         """
         return next((info for _, info in tx_summary.items()
                      if info['index_start'] <= int(clm_index) <= info['index_end']), None)
@@ -62,7 +61,7 @@ def get_transaction_info(self, tx_summary, clm_index):
     def get_ranges(self, tx_info, use_children=False):
         """
         extracts numeric ranges for parent and optionally children based on transaction
-        Eg., find ranges for parent and children loops for processing
+        find ranges for parent and children loops for processing
         """
         parent_range = (tx_info['index_start'], tx_info['index_end'])
         if use_children:
@@ -99,7 +98,7 @@ def extract_elements_from_claim(self, clm_segment, target_segment_name, target_e
     def find_reference_elements(self, loop_key, description=None):
         """
         extract reference elements from claims based on loop mapping and hierarchy and handles children separately if specified by loop key.
-        Eg., find billing provider names under loop '20' from an EDI transaction.
+        find billing provider names under loop '20' from an EDI transaction.
         """
 
         loop_info = self.loop_mapping.get_mapping(loop_key, description)

From b8d23d47ad2f335cd0a2d7dc7e3aa4a8e26d07fd Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Thu, 2 May 2024 01:49:14 -0400
Subject: [PATCH 13/46] adjusted Loop and LoopManager to process nested loops
 with modified claim data with children

---
 databricksx12/hls/hierarchicalloop.py    | 105 +++++++++++++++--------
 sampledata/837/CHPW_Claimdata_edited.txt |  11 ++-
 2 files changed, 80 insertions(+), 36 deletions(-)

diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py
index 6833df8..af2e333 100644
--- a/databricksx12/hls/hierarchicalloop.py
+++ b/databricksx12/hls/hierarchicalloop.py
@@ -1,40 +1,41 @@
 from databricksx12.edi import *
-import functools
 
+import itertools
 
 class HierarchicalLoop(EDI):
     def __init__(self, data, delim_cls=AnsiX12Delim):
         super().__init__(data, delim_cls)
-        self.parent_start_loops = self._parent_start_tup_loops()  # returns tuple; to check
+
+        # parent and children loops
+        self.parent_start_loops = self._parent_start_tup_loops()
         self.parent_end_loops = self._parent_end_loops()
         self.parent_loops = self._parent_loops()
         self.child_loops = self._child_loops(self.parent_loops)
-        self.subchild_loops = self._child_loops(self.child_loops) # recursive cases
-
+        self.subchild_loops = self._subchild_loops(self.child_loops)
+    
     def _parent_start_tup_loops(self):
-        # index of parent, counter, and if child
-        # TODO unit test to return tuple
-        return [(i, x.element(1), x.element(-1)) for i, x in self.segments_by_name_index("HL") if x.element(2) == ""]
+        # index of parent, counter, and if child 
+        return [(i, x.element(1), x.element(-1)) for i, x in self.segments_by_name_index("HL") if x.element(2) == ""] # TODO unit test to return tuple
 
     def _parent_end_loops(self):
         return [i for i, x in self.segments_by_name_index("SE")]
-
+    
     def _parent_loops(self):
         return [(tup + (j,)) for tup, j in zip(self.parent_start_loops, self.parent_end_loops)]
 
+
     def _child_loops(self, parent_loops):
         child_loops = [(i, counter, segment.element(-1), parent_stop_index)
-                       for _, counter, child_id, parent_stop_index in parent_loops if int(child_id) == 1
-                       for i, segment in self.segments_by_name_index("HL") if segment.element(2) == counter]
+                    for _, counter, child_id, parent_stop_index in parent_loops
+                    for i, segment in self.segments_by_name_index("HL") if segment.element(2) == counter]
+        return child_loops
+
+    def _subchild_loops(self, child_loops):
+        it1, it2 = itertools.tee(child_loops)
+        next(it2, None) 
+        return [pair[1] for pair in zip(it1, it2) if int(pair[0][2]) == 1]
+
 
-        # recursive cases where child_id is greater than 1 == sub_child
-        subchild_cases = filter(lambda x: int(x[2]) > 1, parent_loops)
-        subchild_loops = map(
-            lambda x: self._child_loops(
-                [(x[0], x[1], str(int(x[2]) - 1), x[3])]),
-            subchild_cases
-        )
-        return functools.reduce(lambda acc, lst: acc + lst, subchild_loops, child_loops)
 
 
 class HierarchicalLoopManager:
@@ -43,23 +44,52 @@ def __init__(self, data, delim_cls=AnsiX12Delim):
         self.summary = self.generate_summary()
 
     def get_child_loops(self, parent_loop, loops):
-        return [loop for loop in loops if parent_loop[0] < loop[0] < parent_loop[-1]]
+        """Filter child loops that fall within the given parent loop's range."""
+        return list(filter(lambda x: parent_loop[0] < x[0] < parent_loop[3], loops))
 
-    def process_loop(self, loop, level=0):
-        child_loops = self.get_child_loops(loop, self.hl.child_loops)
-        children = [self.process_loop(child, level + 1) for child in child_loops]
+    def calculate_child_end_index(self, current_child, next_child, parent_end):
+        """Calculate the end index of a child, adjusting to avoid overlap with the next child."""
+        return min(current_child[3], next_child[0] - 1 if next_child else parent_end)
+    
+    def process_child(self, child, subchildren, parent_end):
+        """Map a single child loop to its dictionary representation, including subchildren if applicable."""
+        return {
+            'index_start': child[0],
+            'index_end': self.calculate_child_end_index(child, None, parent_end),  # No next_child directly handled here
+            'children': subchildren if subchildren else None
+        }
+    
+    def add_subchildren_to_children(self, children, subchild_loops, parent_end):
+        """Map function to add subchildren to corresponding children, exclude children that are subchildren."""
+        #subchild_ids = {sc[1] for sc in subchild_loops}  # Set of parent_ids for subchildren
+        children = [child for child in children if child not in subchild_loops]  # Filter out subchildren
+        # subchild_lookup = {sc[1]: sc for sc in subchild_loops}  # Lookup for subchildren by parent counter
         
-        loop_summary = {
+        # Process each child, include subchildren when applicable
+        return list(map(lambda child: self.process_child(
+            child,
+            [{ 'index_start': subchild[0],
+               'index_end': self.calculate_child_end_index(subchild, None, child[-1]),
+               'children': None } 
+             for subchild in subchild_loops if subchild[1] == child[1] and int(child[2]) == 1],
+            parent_end),
+            children))
+
+    def process_loop(self, loop):
+        child_loops = sorted(self.get_child_loops(loop, self.hl.child_loops), key=lambda x: x[0])
+        children = self.add_subchildren_to_children(child_loops, self.hl.subchild_loops, loop[3])
+        return {
             'index_start': loop[0],
-            'index_end': loop[-1],
+            'index_end': loop[3],
             'children': children or None
         }
-        return loop_summary
 
     def generate_summary(self):
-        """Generate a hierarchical summary for each top-level parent loop."""
-        loop_processing = lambda loop: (str(loop[1]), self.process_loop(loop))
-        return dict(map(loop_processing, self.hl.parent_loops))
+        return {str(loop[1]): self.process_loop(loop) for loop in self.hl.parent_loops}
+
+
+
+
 
 
 """
@@ -80,10 +110,17 @@ def generate_summary(self):
   'index_end': 138,
   'children': [{'index_start': 118, 'index_end': 138, 'children': None}]},
  '79': {'index_start': 144,
-  'index_end': 179,
-  'children': [{'index_start': 153,
-    'index_end': 179,
-    'children': [{'index_start': 160, 'index_end': 179, 'children': None}]},
-   {'index_start': 160, 'index_end': 179, 'children': None}]}} 
-   # the last 'children' list, there is a repeat that is tricky to remove
+  'index_end': 186,
+  'children': [{'index_start': 153, 'index_end': 186, 'children': None},
+   {'index_start': 160,
+    'index_end': 186,
+    'children': [{'index_start': 167, 'index_end': 186, 'children': None}]}]}}
+"""
+"""
+sample_data_837p = open("./sampledata/837/837p.txt", "rb").read().decode("utf-8").replace("\\n", "")
+HierarchicalLoopManager(sample_data_837p).summary
+{'1': {'index_start': 7,
+  'index_end': 42,
+  'children': [{'index_start': 12, 'index_end': 42, 'children': None},
+   {'index_start': 27, 'index_end': 42, 'children': None}]}}
 """
diff --git a/sampledata/837/CHPW_Claimdata_edited.txt b/sampledata/837/CHPW_Claimdata_edited.txt
index 72ecb44..176e84a 100644
--- a/sampledata/837/CHPW_Claimdata_edited.txt
+++ b/sampledata/837/CHPW_Claimdata_edited.txt
@@ -142,7 +142,7 @@ BHT*0019*00*7349063984*20180508*0833*CH~
 NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~
 PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~
 NM1*40*2*123456789*****46*CHPWA~
-HL*79**20*2~
+HL*79**20*1~
 NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~
 N3*12345 MAIN ST~
 N4*VANCOUVER*WA*98662~
@@ -158,7 +158,14 @@ N3*987 65TH PL~
 N4*VANCOUVER*WA*986640001~
 DMG*D8*19881225*M~
 NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~
-HL*81*79*23*1~
+HL*81*79*22*1~
+SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~
+NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~
+N3*987 65TH PL~
+N4*VANCOUVER*WA*986640001~
+DMG*D8*19881225*M~
+NM1*PR*2*COMMUNITY HEALTH PLAN OF MASS*****PI*CHPWA~
+HL*82*79*23*0~
 SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~
 NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~
 N3*987 65TH PL~

From e677ef24783909204a0b57e3091ce7f67c9dfbd1 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Thu, 2 May 2024 20:00:15 -0400
Subject: [PATCH 14/46] claim builder v0 using hlmanager and loop

---
 databricksx12/hls/claim.py            | 109 ++++++++++++++++++-----
 databricksx12/hls/hierarchicalloop.py |  79 +++++++++--------
 databricksx12/hls/loop.py             | 121 ++++++++++++--------------
 3 files changed, 184 insertions(+), 125 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 1436a75..6a9e66e 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -1,8 +1,10 @@
 from databricksx12.edi import *
-
+from databricksx12.hls import loop
 #
 # Base claim class
 #
+
+
 class Claim():
 
     def __init__(self):
@@ -28,8 +30,8 @@ class Claim837p(Claim):
 #
 # Base claim builder (transaction -> 1 or more claims)
 #
-class ClaimBuilder(EDI):
 
+class ClaimBuilder(EDI):
     #
     # Given claim type (837i, 837p, etc), segments, and delim class, build claim level classes
     #
@@ -38,46 +40,105 @@ def __init__(self, trnx_type, trnx_data, delim_cls):
         self.data = trnx_data
         self.delim_cls = delim_cls
 
-        pass #self.hl = ??? TODO 
-        
-    def claim_count():
-        return len(self.segments_by_name("CLM"))
+        self.loop_summary = loop.Loop(trnx_data)
 
     #
-    # Returns a dictionary of "loop name" : "loop data" 
+    # Returns a dictionary of "loop name" : "loop data"
     #
-    def build_claim(self, clm_index, clm_segment):
+
+    def build_claim(self, clm_segment):
         return {
             "1000A": {
                 "desc": "Submitter Name",
-                "segments": "TODO"
+                "segments": self.loop_summary.sender
             },
             "1000B": {
-                "desc": "Reciever Name",
-                "segments": "TODO"
+                "desc": "Receiver Name",
+                "segments": self.loop_summary.receiver
             },
             "2000A": {
-                "desc": "Billing Provider HL Level"
-                "segments": "TODO"
+                "desc": "Billing Provider",
+                "segments": self.loop_summary.find_reference_element(clm_segment, '20', 'Information Source')
             },
             "2000B": {
-                "desc": "Subscriber HL Level",
-                "segments": "TODO"
+                "desc": "Subscriber",
+                "segments": self.loop_summary.find_reference_element(clm_segment, '22', 'Subscriber')
             },
-            "2000C" : {
-                "desc": "Patient HL Level",
-                "segments": "TODO"
+            "2010BA": {
+                "desc": "Patient",
+                "segments": (self.loop_summary.find_reference_element(clm_segment, '22', 'Individual First Name'),
+                             self.loop_summary.find_reference_element(clm_segment, '22', 'Individual Last Name'))
+
+            },
+            "2010BB": {
+                "desc": "Payer",
+                "segments": self.loop_summary.find_reference_element(clm_segment, '22', 'Payer Name'),
             },
             "2300": {
-                "desc": "Claim Information",
-                "segments": "TODO"
+                "desc": "Claim",
+                "segments": (self.loop_summary.find_reference_element(clm_segment, '22', 'Claim ID'),
+                             self.loop_summary.find_reference_element(clm_segment, '22', 'Claim Amount'))
             }
         }
-    
+
     #
     # Given transaction type, transaction segments, and delim info, build out claims in the transaction
-    #  @return a list of Claim for each "clm" segment 
+    #  @return a list of Claim for each "clm" segment
     #
     def build(self):
-        return [self.build_claim(i, x) for i,x in segments_by_name_index("CLM")]
-    
+        return [self.build_claim(seg) for seg in self.loop_summary.claim_segments()]
+
+
+"""
+sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8")
+claim_class = ClaimBuilder(trnx_type='837I', trnx_data=sample_data_837i_edited, delim_cls=AnsiX12Delim)
+claim_class.build()
+
+[{'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'},
+  '1000B': {'desc': 'Receiver Name', 'segments': '123456789'},
+  '2000A': {'desc': 'Billing Provider',
+   'segments': ['BH CLINIC OF VANCOUVER']},
+  '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']},
+  '2010BA': {'desc': 'Patient', 'segments': (['JOHN'], ['SUBSCRIBER'])},
+  '2010BB': {'desc': 'Payer',
+   'segments': ['COMMUNITY HEALTH PLAN OF WASHINGTON']},
+  '2300': {'desc': 'Claim', 'segments': (['1805080AV3648339'], ['20'])}},
+ {'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'},
+  '1000B': {'desc': 'Receiver Name', 'segments': '123456789'},
+  '2000A': {'desc': 'Billing Provider',
+   'segments': ['BH CLINIC OF VANCOUVER']},
+  '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']},
+  '2010BA': {'desc': 'Patient', 'segments': (['SUSAN'], ['PATIENT'])},
+  '2010BB': {'desc': 'Payer',
+   'segments': ['COMMUNITY HEALTH PLAN OF WASHINGTON']},
+  '2300': {'desc': 'Claim', 'segments': (['1805080AV3648347'], ['50.1'])}},
+ {'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'},
+  '1000B': {'desc': 'Receiver Name', 'segments': '123456789'},
+  '2000A': {'desc': 'Billing Provider',
+   'segments': ['BH CLINIC OF VANCOUVER']},
+  '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']},
+  '2010BA': {'desc': 'Patient', 'segments': (['JOHN'], ['SUBSCRIBER'])},
+  '2010BB': {'desc': 'Payer',
+   'segments': ['COMMUNITY HEALTH PLAN OF WASHINGTON']},
+  '2300': {'desc': 'Claim', 'segments': (['1805080AV3648340'], ['11.64'])}},
+ {'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'},
+  '1000B': {'desc': 'Receiver Name', 'segments': '123456789'},
+  '2000A': {'desc': 'Billing Provider',
+   'segments': ['BH CLINIC OF VANCOUVER']},
+  '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']},
+  '2010BA': {'desc': 'Patient', 'segments': (['SUSAN'], ['PATIENT'])},
+  '2010BB': {'desc': 'Payer',
+   'segments': ['COMMUNITY HEALTH PLAN OF WASHINGTON']},
+  '2300': {'desc': 'Claim', 'segments': (['1805080AV3648353'], ['234'])}},
+ {'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'},
+  '1000B': {'desc': 'Receiver Name', 'segments': '123456789'},
+  '2000A': {'desc': 'Billing Provider',
+   'segments': ['BH CLINIC OF VANCOUVER']},
+  '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']},
+  '2010BA': {'desc': 'Patient',
+   'segments': (['JOHN', 'JOHN'], ['SUBSCRIBER', 'SUBSCRIBER'])},
+  '2010BB': {'desc': 'Payer',
+   'segments': ['COMMUNITY HEALTH PLAN OF MASS',
+    'COMMUNITY HEALTH PLAN OF WASHINGTON']},
+  '2300': {'desc': 'Claim', 'segments': (['1805080AV3648355'], ['20'])}}]
+"""
diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py
index af2e333..c047f89 100644
--- a/databricksx12/hls/hierarchicalloop.py
+++ b/databricksx12/hls/hierarchicalloop.py
@@ -2,6 +2,7 @@
 
 import itertools
 
+
 class HierarchicalLoop(EDI):
     def __init__(self, data, delim_cls=AnsiX12Delim):
         super().__init__(data, delim_cls)
@@ -12,32 +13,30 @@ def __init__(self, data, delim_cls=AnsiX12Delim):
         self.parent_loops = self._parent_loops()
         self.child_loops = self._child_loops(self.parent_loops)
         self.subchild_loops = self._subchild_loops(self.child_loops)
-    
+
     def _parent_start_tup_loops(self):
-        # index of parent, counter, and if child 
-        return [(i, x.element(1), x.element(-1)) for i, x in self.segments_by_name_index("HL") if x.element(2) == ""] # TODO unit test to return tuple
+        # index of parent, counter, and if child
+        # TODO unit test to return tuple
+        return [(i, x.element(1), x.element(-1)) for i, x in self.segments_by_name_index("HL") if x.element(2) == ""]
 
     def _parent_end_loops(self):
         return [i for i, x in self.segments_by_name_index("SE")]
-    
+
     def _parent_loops(self):
         return [(tup + (j,)) for tup, j in zip(self.parent_start_loops, self.parent_end_loops)]
 
-
     def _child_loops(self, parent_loops):
         child_loops = [(i, counter, segment.element(-1), parent_stop_index)
-                    for _, counter, child_id, parent_stop_index in parent_loops
-                    for i, segment in self.segments_by_name_index("HL") if segment.element(2) == counter]
+                       for _, counter, child_id, parent_stop_index in parent_loops
+                       for i, segment in self.segments_by_name_index("HL") if segment.element(2) == counter]
         return child_loops
 
     def _subchild_loops(self, child_loops):
         it1, it2 = itertools.tee(child_loops)
-        next(it2, None) 
+        next(it2, None)
         return [pair[1] for pair in zip(it1, it2) if int(pair[0][2]) == 1]
 
 
-
-
 class HierarchicalLoopManager:
     def __init__(self, data, delim_cls=AnsiX12Delim):
         self.hl = HierarchicalLoop(data, delim_cls)
@@ -50,34 +49,42 @@ def get_child_loops(self, parent_loop, loops):
     def calculate_child_end_index(self, current_child, next_child, parent_end):
         """Calculate the end index of a child, adjusting to avoid overlap with the next child."""
         return min(current_child[3], next_child[0] - 1 if next_child else parent_end)
-    
-    def process_child(self, child, subchildren, parent_end):
-        """Map a single child loop to its dictionary representation, including subchildren if applicable."""
+
+    def process_subchildren(self, child, subchild_loops, parent_end):
+        """Process subchildren for a given child."""
+        return [
+            {'index_start': subchild[0], 'index_end': self.calculate_child_end_index(
+                subchild, None, parent_end), 'children': None}
+            for subchild in subchild_loops if subchild[1] == child[1] and int(child[2]) == 1
+        ]
+
+    def process_child_entry(self, child, index, children, subchild_loops, parent_end):
+        """Helper function to process each child entry."""
+        next_child = children[index +
+                              1] if (index + 1) < len(children) else None
+        subchildren = self.process_subchildren(
+            child, subchild_loops, parent_end)
         return {
             'index_start': child[0],
-            'index_end': self.calculate_child_end_index(child, None, parent_end),  # No next_child directly handled here
-            'children': subchildren if subchildren else None
+            'index_end': self.calculate_child_end_index(child, next_child, parent_end),
+            'children': subchildren or None
         }
-    
-    def add_subchildren_to_children(self, children, subchild_loops, parent_end):
-        """Map function to add subchildren to corresponding children, exclude children that are subchildren."""
-        #subchild_ids = {sc[1] for sc in subchild_loops}  # Set of parent_ids for subchildren
-        children = [child for child in children if child not in subchild_loops]  # Filter out subchildren
-        # subchild_lookup = {sc[1]: sc for sc in subchild_loops}  # Lookup for subchildren by parent counter
-        
-        # Process each child, include subchildren when applicable
-        return list(map(lambda child: self.process_child(
-            child,
-            [{ 'index_start': subchild[0],
-               'index_end': self.calculate_child_end_index(subchild, None, child[-1]),
-               'children': None } 
-             for subchild in subchild_loops if subchild[1] == child[1] and int(child[2]) == 1],
-            parent_end),
-            children))
+
+    def process_children(self, children, subchild_loops, parent_end):
+        """Process all children, adjusting their end indices correctly, and add subchildren using functional programming."""
+        # Filter out subchildren from main children list
+        filtered_children = [
+            child for child in children if child not in subchild_loops]
+        # Apply processing to each child and collect the results
+        processed_children = list(map(lambda child: self.process_child_entry(child, filtered_children.index(
+            child), filtered_children, subchild_loops, parent_end), filtered_children))
+        return processed_children
 
     def process_loop(self, loop):
-        child_loops = sorted(self.get_child_loops(loop, self.hl.child_loops), key=lambda x: x[0])
-        children = self.add_subchildren_to_children(child_loops, self.hl.subchild_loops, loop[3])
+        child_loops = sorted(self.get_child_loops(
+            loop, self.hl.child_loops), key=lambda x: x[0])
+        children = self.process_children(
+            child_loops, self.hl.subchild_loops, loop[3])
         return {
             'index_start': loop[0],
             'index_end': loop[3],
@@ -88,10 +95,6 @@ def generate_summary(self):
         return {str(loop[1]): self.process_loop(loop) for loop in self.hl.parent_loops}
 
 
-
-
-
-
 """
 loop_manager = HierarchicalLoopManager(sample_data_837i_edited)  
 summary = loop_manager.summary 
@@ -111,7 +114,7 @@ def generate_summary(self):
   'children': [{'index_start': 118, 'index_end': 138, 'children': None}]},
  '79': {'index_start': 144,
   'index_end': 186,
-  'children': [{'index_start': 153, 'index_end': 186, 'children': None},
+  'children': [{'index_start': 153, 'index_end': 159, 'children': None},
    {'index_start': 160,
     'index_end': 186,
     'children': [{'index_start': 167, 'index_end': 186, 'children': None}]}]}}
diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py
index 365b1ea..1b71442 100644
--- a/databricksx12/hls/loop.py
+++ b/databricksx12/hls/loop.py
@@ -2,6 +2,7 @@
 from functools import reduce
 from databricksx12.hls import hierarchicalloop
 
+
 class LoopMapping:
     def __init__(self, mappings=None):
         self.mappings = mappings if mappings is not None else {
@@ -9,8 +10,13 @@ def __init__(self, mappings=None):
                 'Information Source': {
                     'loop': '2000A',
                     'reference_ids': ('NM1', '3'),
-                    'secondary_reference': ('85', '1') 
+                    'secondary_reference': ('85', '1')
+                },
+                'Provider Address Line 1': {
+                    'loop': '2000AA',
+                    'reference_ids': ('N3', '1')
                 }
+
             },
             '22': {
                 'Subscriber': {
@@ -19,28 +25,37 @@ def __init__(self, mappings=None):
                 },
                 'Individual First Name': {
                     'loop': '2010BA',
-                    'reference_ids': ('NM1', '4'), 
-                    'secondary_reference': ('IL', '1') 
+                    'reference_ids': ('NM1', '4'),
+                    'secondary_reference': ('IL', '1')
                 },
                 'Individual Last Name': {
                     'loop': '2010BA',
-                    'reference_ids': ('NM1', '3'), 
-                    'secondary_reference': ('IL', '1') 
+                    'reference_ids': ('NM1', '3'),
+                    'secondary_reference': ('IL', '1')
                 },
                 'Payer Name': {
                     'loop': '2010BB',
-                    'reference_ids': ('NM1', '3'), 
-                    'secondary_reference': ('PR', '1') 
+                    'reference_ids': ('NM1', '3'),
+                    'secondary_reference': ('PR', '1')
+                },
+                'Claim ID': {
+                    'loop': '2300',
+                    'reference_ids': ('CLM', '1')
+                },
+                'Claim Amount': {
+                    'loop': '2300',
+                    'reference_ids': ('CLM', '2')
                 }
             }
         }
-    
+
     def get_mapping(self, element, description=None):
+        """ Returns a specific mapping based on element key and description. """
         mappings = self.mappings.get(element, {})
         if description:
             return mappings.get(description, None)
         return None
-    
+
 
 class Loop(EDI):
     def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()):
@@ -48,37 +63,46 @@ def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()):
         self.loop_mapping = loop_mapping
         self.hlmanager = hierarchicalloop.HierarchicalLoopManager(data)
         self.hl_summary = self.hlmanager.summary
-        self.clm_segments = [(i, x.element(2)) for i, x in self.segments_by_name_index("CLM")]
-    
+
+        self.sender = self.segments_by_name("GS")[0].element(2)
+        self.receiver = self.segments_by_name("GS")[0].element(3)
+
+    def claim_segments(self):
+        return [(i, x.element(2)) for i, x in self.segments_by_name_index("CLM")]
+
+    def claim_count(self):
+        return len(self.segments_by_name_index("CLM"))
+
     def get_transaction_info(self, tx_summary, clm_index):
         """
         retrieves transaction information for a claim from hierarchical summary
-        get the transaction range for claim index "x"
+        Eg., get the transaction range for claim index "x"
         """
         return next((info for _, info in tx_summary.items()
                      if info['index_start'] <= int(clm_index) <= info['index_end']), None)
 
-    def get_ranges(self, tx_info, use_children=False):
+    def get_ranges(self, tx_info, clm_index, use_children=False):
         """
-        extracts numeric ranges for parent and optionally children based on transaction
-        find ranges for parent and children loops for processing
+        extracts numeric ranges for parent and optionally children based on transaction but if children add an index to filter to the right one
+        Eg., find ranges for parent and children loops for processing
         """
-        parent_range = (tx_info['index_start'], tx_info['index_end'])
-        if use_children:
-            return [(child['index_start'], child['index_end']) for child in tx_info.get('children', [])]
-        return [parent_range]
-
+        if use_children and 'children' in tx_info:
+            return [(child['index_start'], child['index_end']) for child in tx_info['children']
+                    if child['index_start'] <= int(clm_index) <= child['index_end']]
+        else:
+            return [(tx_info['index_start'], tx_info['index_end'])]
 
     def find_elements_based_on_ranges(self, ranges, target_segment_name, target_element_index, secondary_reference=None):
         """
         filters and maps EDI segments to extract required elements based on their position and type
         """
-        process_range = lambda range_tuple: [
+        def process_range(range_tuple): return [
             segment.element(int(target_element_index))
             for segment in self.segments_by_position(range_tuple[0], range_tuple[1])
-            if segment.segment_name() == target_segment_name and 
+            if segment.segment_name() == target_segment_name and
             segment.segment_len() > int(target_element_index) and
-            (secondary_reference is None or segment.element(int(secondary_reference[1])) == secondary_reference[0])
+            (secondary_reference is None or segment.element(
+                int(secondary_reference[1])) == secondary_reference[0])
         ]
         return reduce(lambda acc, lst: acc + lst, map(process_range, ranges), [])
 
@@ -91,58 +115,29 @@ def extract_elements_from_claim(self, clm_segment, target_segment_name, target_e
         if not tx_info:
             return None
 
-        ranges = self.get_ranges(tx_info, use_children)
+        ranges = self.get_ranges(tx_info, clm_index, use_children)
         return self.find_elements_based_on_ranges(ranges, target_segment_name, target_element_index, secondary_reference)
 
-
-    def find_reference_elements(self, loop_key, description=None):
-        """
-        extract reference elements from claims based on loop mapping and hierarchy and handles children separately if specified by loop key.
-        find billing provider names under loop '20' from an EDI transaction.
-        """
-
+    def find_reference_element(self, clm_segment, loop_key, description=None):
         loop_info = self.loop_mapping.get_mapping(loop_key, description)
         if not loop_info:
             return []
-        
+
         target_segment_name, target_element_index = loop_info['reference_ids']
         secondary_reference = loop_info.get('secondary_reference', None)
-
         use_children = loop_key == '22'
-        process_clm_segment = lambda clm_segment: self.extract_elements_from_claim(clm_segment, 
-                                                                                   target_segment_name, 
-                                                                                   target_element_index, 
-                                                                                   use_children,
-                                                                                   secondary_reference)
-        reference_list = list(filter(None, map(process_clm_segment, self.clm_segments)))
-        
-        return [summary for summary in reference_list] 
+
+        return self.extract_elements_from_claim(clm_segment,
+                                                target_segment_name,
+                                                target_element_index,
+                                                use_children,
+                                                secondary_reference)
 
 
 """
 sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8")
 claims = Loop(sample_data_837i_edited)
-claims.find_reference_elements('20', 'Information Source')
-Outputs:
-[['BH CLINIC OF VANCOUVER'],
- ['BH CLINIC OF VANCOUVER'],
- ['BH CLINIC OF VANCOUVER'],
- ['BH CLINIC OF VANCOUVER'],
- ['BH CLINIC OF VANCOUVER']]
-"""
-"""
-claims.find_reference_elements('22', 'Payer Name')
+claims.find_reference_element(claims.claim_segments()[0], '22', 'Claim ID')
 Outputs:
-[['COMMUNITY HEALTH PLAN OF WASHINGTON'],
- ['COMMUNITY HEALTH PLAN OF WASHINGTON'],
- ['COMMUNITY HEALTH PLAN OF WASHINGTON'],
- ['COMMUNITY HEALTH PLAN OF WASHINGTON'],
- ['COMMUNITY HEALTH PLAN OF WASHINGTON',
-  'COMMUNITY HEALTH PLAN OF WASHINGTON',
-  'COMMUNITY HEALTH PLAN OF WASHINGTON']]
-"""
+['1805080AV3648339']
 """
-claims.find_reference_elements('22', 'Individual First Name')
-Outputs:
-[['JOHN'], ['SUSAN'], ['JOHN'], ['SUSAN'], ['JOHN', 'JOHN', 'JOHN']]
-  """
\ No newline at end of file

From df88ec9d8569a7d665b3b40e869994861c1a7a39 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Thu, 2 May 2024 20:08:04 -0400
Subject: [PATCH 15/46] sample data with patient dependents in the 5th tx

---
 sampledata/837/CHPW_Claimdata_edited.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sampledata/837/CHPW_Claimdata_edited.txt b/sampledata/837/CHPW_Claimdata_edited.txt
index 176e84a..5ec175a 100644
--- a/sampledata/837/CHPW_Claimdata_edited.txt
+++ b/sampledata/837/CHPW_Claimdata_edited.txt
@@ -166,7 +166,7 @@ N4*VANCOUVER*WA*986640001~
 DMG*D8*19881225*M~
 NM1*PR*2*COMMUNITY HEALTH PLAN OF MASS*****PI*CHPWA~
 HL*82*79*23*0~
-SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~
+PAT*P*18**DEPENDENT PATIENT*****CI~
 NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~
 N3*987 65TH PL~
 N4*VANCOUVER*WA*986640001~

From d15ddef7fa7071e1529d702b56001b6a3eb3dd3e Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Fri, 3 May 2024 00:02:26 -0400
Subject: [PATCH 16/46] traverse HLs wip

---
 databricksx12/hls/loop.py | 180 +++++++++++++++++---------------------
 1 file changed, 81 insertions(+), 99 deletions(-)

diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py
index 1b71442..db6f588 100644
--- a/databricksx12/hls/loop.py
+++ b/databricksx12/hls/loop.py
@@ -1,51 +1,16 @@
 from databricksx12.edi import *
 from functools import reduce
-from databricksx12.hls import hierarchicalloop
-
 
 class LoopMapping:
     def __init__(self, mappings=None):
         self.mappings = mappings if mappings is not None else {
             '20': {
-                'Information Source': {
-                    'loop': '2000A',
-                    'reference_ids': ('NM1', '3'),
-                    'secondary_reference': ('85', '1')
-                },
-                'Provider Address Line 1': {
-                    'loop': '2000AA',
-                    'reference_ids': ('N3', '1')
-                }
-
+                'loop name': 'Information Source',
+                'loop': '2000A'
             },
             '22': {
-                'Subscriber': {
-                    'loop': '2000B',
-                    'reference_ids': ('SBR', '4')
-                },
-                'Individual First Name': {
-                    'loop': '2010BA',
-                    'reference_ids': ('NM1', '4'),
-                    'secondary_reference': ('IL', '1')
-                },
-                'Individual Last Name': {
-                    'loop': '2010BA',
-                    'reference_ids': ('NM1', '3'),
-                    'secondary_reference': ('IL', '1')
-                },
-                'Payer Name': {
-                    'loop': '2010BB',
-                    'reference_ids': ('NM1', '3'),
-                    'secondary_reference': ('PR', '1')
-                },
-                'Claim ID': {
-                    'loop': '2300',
-                    'reference_ids': ('CLM', '1')
-                },
-                'Claim Amount': {
-                    'loop': '2300',
-                    'reference_ids': ('CLM', '2')
-                }
+                'loop name': 'Subscriber',
+                'loop': '2000B'
             }
         }
 
@@ -58,82 +23,99 @@ def get_mapping(self, element, description=None):
 
 
 class Loop(EDI):
+
+    
     def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()):
         super().__init__(data, delim_cls)
         self.loop_mapping = loop_mapping
-        self.hlmanager = hierarchicalloop.HierarchicalLoopManager(data)
-        self.hl_summary = self.hlmanager.summary
-
-        self.sender = self.segments_by_name("GS")[0].element(2)
-        self.receiver = self.segments_by_name("GS")[0].element(3)
-
-    def claim_segments(self):
-        return [(i, x.element(2)) for i, x in self.segments_by_name_index("CLM")]
-
-    def claim_count(self):
-        return len(self.segments_by_name_index("CLM"))
-
-    def get_transaction_info(self, tx_summary, clm_index):
+        self._start_indexes = self._build_hierarchy_start_indexes()
+        self.loop_hierarchy = self.build_hierarchy()
         """
-        retrieves transaction information for a claim from hierarchical summary
-        Eg., get the transaction range for claim index "x"
+        loop_hierarchy = { unique_id : {
+            start_idx : ""
+            end_idx : ""
+            parent_id : ""
+            hl_code : ""
+            child_code: ""
+           }
+        }
         """
-        return next((info for _, info in tx_summary.items()
-                     if info['index_start'] <= int(clm_index) <= info['index_end']), None)
 
-    def get_ranges(self, tx_info, clm_index, use_children=False):
+    #
+    # Build a complete hierarchical view of all HL segments start and end positions 
+    #
+    def build_hierarchy(self):
         """
-        extracts numeric ranges for parent and optionally children based on transaction but if children add an index to filter to the right one
-        Eg., find ranges for parent and children loops for processing
+        Return all start indexes         
         """
-        if use_children and 'children' in tx_info:
-            return [(child['index_start'], child['index_end']) for child in tx_info['children']
-                    if child['index_start'] <= int(clm_index) <= child['index_end']]
-        else:
-            return [(tx_info['index_start'], tx_info['index_end'])]
+        return {
+            x[0]: {
+                "start_idx": x[1],
+                "end_idx": self._determine_end_index(x[1]),
+                "parent_id": x[2],
+                "hl_code": x[3],
+                "child_code": x[4]
+            }
+            for x in self._start_indexes
+        }
 
-    def find_elements_based_on_ranges(self, ranges, target_segment_name, target_element_index, secondary_reference=None):
-        """
-        filters and maps EDI segments to extract required elements based on their position and type
-        """
-        def process_range(range_tuple): return [
-            segment.element(int(target_element_index))
-            for segment in self.segments_by_position(range_tuple[0], range_tuple[1])
-            if segment.segment_name() == target_segment_name and
-            segment.segment_len() > int(target_element_index) and
-            (secondary_reference is None or segment.element(
-                int(secondary_reference[1])) == secondary_reference[0])
-        ]
-        return reduce(lambda acc, lst: acc + lst, map(process_range, ranges), [])
-
-    def extract_elements_from_claim(self, clm_segment, target_segment_name, target_element_index, use_children=False, secondary_reference=None):
-        """
-        a higher-level function that ties together the previous functions to get tx info, the ranges of interest, and elements from every range
-        """
-        clm_index = clm_segment[0]
-        tx_info = self.get_transaction_info(self.hl_summary, clm_index)
-        if not tx_info:
+    #
+    # Return a tuple of all HL segments, start index, id, parent id, child code, and hl_code
+    #
+    def _build_hierarchy_start_indexes(self):
+        return [ ( x.element(1), #id
+                   i, # "start_idx"
+                   x.element(2), # "parent_id"
+                   x.element(3), # "hl_code"
+                   x.element(4))  # "child_code"
+         for i,x in self.segments_by_name_index("HL")]
+
+    #
+    # Determine the end index of an HL segment
+    #  @param start_idx - the start index of the existing HL segment
+    #  x[1] = start index from tuple in _build_hierarchy_start_indexes
+    #
+    def _determine_end_index(self, start_idx):
+        return min([x[1] for x in self._start_indexes if x[1] > start_idx] + [len(self.data)])
+
+    #
+    # Primary search function within HL
+    #   @param pos_idx - the reference point
+    #   @param hl_code - the hl code being searched for
+    #
+    #   @return - a tuple of the start and end position of the hl segment containing hl_code, otherwise None if not found
+    #
+    def find_hl_codes(self, pos_idx, hl_code):
+        return (self._filter_on_position(pos_idx, hl_code)[0] if self._filter_on_position(pos_idx, hl_code) else self.traverse_loops(pos_idx, hl_code))
+
+    def traverse_loops(self, pos_idx, hl_code, parent_idx = None):
+        if parent_idx == "":
             return None
+        elif parent_idx == None:
+            return traverse_loops(pos_idx, hl_code, parent_idx = self._filter_hl_on_position(pos_idx))
+        else:
+            return (temp[0] if (temp := self._filter_hl_on_parent(hl_code, parent_idx)) else traverse_loops(pos_idx, hl_code, ...???
+    
 
-        ranges = self.get_ranges(tx_info, clm_index, use_children)
-        return self.find_elements_based_on_ranges(ranges, target_segment_name, target_element_index, secondary_reference)
 
-    def find_reference_element(self, clm_segment, loop_key, description=None):
-        loop_info = self.loop_mapping.get_mapping(loop_key, description)
-        if not loop_info:
-            return []
+    def _filter_hl_on_position(self, pos_idx):
+        return (temp[0] if (temp := filter(lambda k,v: v if v['start_idx'] <= pos_idx <= v['end_idx'] ,self.loop_hierarchy)) else "")
 
-        target_segment_name, target_element_index = loop_info['reference_ids']
-        secondary_reference = loop_info.get('secondary_reference', None)
-        use_children = loop_key == '22'
+        
+    #
+    # Will only ever return one element or None
+    #
+    def _fitler_hl_on_position_and_code(self, pos_idx, hl_code):
+        return filter(lambda k,v: v if v['hl_code'] == hl_code and v['start_idx'] <= pos_idx <= v['end_idx'] ,self.loop_hierarchy)
 
-        return self.extract_elements_from_claim(clm_segment,
-                                                target_segment_name,
-                                                target_element_index,
-                                                use_children,
-                                                secondary_reference)
+    #
+    # Will only ever return one element or None
+    #                
+    def _filter_hl_on_parent(self, hl_code, parent_id):
+        return filter(lambda k,v: v if v['hl_code'] == hl_code and v['id'] == parent_id, self.loop_hierarchy)
 
 
+                
 """
 sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8")
 claims = Loop(sample_data_837i_edited)

From 143fc222ef8b94343a90624cfcbbe4b09b9c7eb3 Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Sun, 5 May 2024 17:44:38 -0400
Subject: [PATCH 17/46] passing loop tests

---
 databricksx12/hls/loop.py                     | 36 +++++++--------
 ...ited.txt => CHPW_Claimdata_edited.txt.tmp} |  0
 tests/test_loop.py                            | 44 +++++++++++++++++++
 tests/test_pyspark.py                         |  7 ++-
 4 files changed, 66 insertions(+), 21 deletions(-)
 rename sampledata/837/{CHPW_Claimdata_edited.txt => CHPW_Claimdata_edited.txt.tmp} (100%)
 create mode 100644 tests/test_loop.py

diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py
index db6f588..c0e0334 100644
--- a/databricksx12/hls/loop.py
+++ b/databricksx12/hls/loop.py
@@ -83,37 +83,35 @@ def _determine_end_index(self, start_idx):
     #   @param pos_idx - the reference point
     #   @param hl_code - the hl code being searched for
     #
-    #   @return - a tuple of the start and end position of the hl segment containing hl_code, otherwise None if not found
-    #
+    #   @returns None if not found, otherwise the value from loop_hierarchy
     def find_hl_codes(self, pos_idx, hl_code):
-        return (self._filter_on_position(pos_idx, hl_code)[0] if self._filter_on_position(pos_idx, hl_code) else self.traverse_loops(pos_idx, hl_code))
+        init_hl = self._filter_hl_on_position(pos_idx)
+        return (None if init_hl is None else self.traverse_loops(hl_code, init_hl))
+
 
-    def traverse_loops(self, pos_idx, hl_code, parent_idx = None):
-        if parent_idx == "":
+    #
+    # Go from child to parent searching for the specified hl_code
+    #
+    def traverse_loops(self, hl_code, loop):
+        if loop['hl_code'] == hl_code:
+            return loop 
+        elif loop['parent_id'] == "":
             return None
-        elif parent_idx == None:
-            return traverse_loops(pos_idx, hl_code, parent_idx = self._filter_hl_on_position(pos_idx))
         else:
-            return (temp[0] if (temp := self._filter_hl_on_parent(hl_code, parent_idx)) else traverse_loops(pos_idx, hl_code, ...???
-    
-
+            return self.traverse_loops(hl_code, self.loop_hierarchy.get(loop['parent_id']))
 
+    #
+    # 
+    #
     def _filter_hl_on_position(self, pos_idx):
-        return (temp[0] if (temp := filter(lambda k,v: v if v['start_idx'] <= pos_idx <= v['end_idx'] ,self.loop_hierarchy)) else "")
+        return (list(temp)[0] if (temp := filter(lambda v: v['start_idx'] <= pos_idx <= v['end_idx'], self.loop_hierarchy.values())) else None)
 
         
     #
     # Will only ever return one element or None
     #
     def _fitler_hl_on_position_and_code(self, pos_idx, hl_code):
-        return filter(lambda k,v: v if v['hl_code'] == hl_code and v['start_idx'] <= pos_idx <= v['end_idx'] ,self.loop_hierarchy)
-
-    #
-    # Will only ever return one element or None
-    #                
-    def _filter_hl_on_parent(self, hl_code, parent_id):
-        return filter(lambda k,v: v if v['hl_code'] == hl_code and v['id'] == parent_id, self.loop_hierarchy)
-
+        return (list(temp)[0] if (temp := filter(lambda v: v['hl_code'] == hl_code and v['start_idx'] <= pos_idx <= v['end_idx'] ,self.loop_hierarchy.values())) else None) 
 
                 
 """
diff --git a/sampledata/837/CHPW_Claimdata_edited.txt b/sampledata/837/CHPW_Claimdata_edited.txt.tmp
similarity index 100%
rename from sampledata/837/CHPW_Claimdata_edited.txt
rename to sampledata/837/CHPW_Claimdata_edited.txt.tmp
diff --git a/tests/test_loop.py b/tests/test_loop.py
new file mode 100644
index 0000000..a54737b
--- /dev/null
+++ b/tests/test_loop.py
@@ -0,0 +1,44 @@
+from test_spark_base import *
+from databricksx12.hls.loop import *
+import unittest, re
+
+
+class TestLoop(PysparkBaseTest):
+
+    data = open("sampledata/837/837p.txt", "rb").read().decode("utf-8")
+    loop = Loop(data)
+    
+    #
+    # Test Loop base info
+    #
+    def test_loop_hierarchy_build(self):
+        assert (set(TestLoop.loop.loop_hierarchy.keys()) == set({'1','2','3'}))
+        assert (TestLoop.loop.loop_hierarchy.get('1')['start_idx'] == 7)
+        assert (TestLoop.loop.loop_hierarchy.get('2')['start_idx'] == 12)
+        assert (TestLoop.loop.loop_hierarchy.get('3')['start_idx'] == 27)
+        assert (TestLoop.loop.loop_hierarchy.get('1')['end_idx'] == 12)
+        assert (TestLoop.loop.loop_hierarchy.get('2')['end_idx'] == 27)
+        assert (TestLoop.loop.loop_hierarchy.get('3')['end_idx'] == 45)
+        assert ([x.get('hl_code') for x in list(TestLoop.loop.loop_hierarchy.values())] == ['20','22','22'])
+        assert ([x.get('child_code') for x in list(TestLoop.loop.loop_hierarchy.values())] == ['1','0','0'])
+        
+
+    #
+    # Test traversing hierarchy 
+    #
+    def test_loop_hierarchy(self):
+        clms = TestLoop.loop.segments_by_name_index("CLM")
+        assert (clms[0][0] == 22)
+        assert (clms[1][0] == 37)
+        
+        assert (TestLoop.loop.find_hl_codes(22, '20') == TestLoop.loop.find_hl_codes(37, '20'))
+        assert (TestLoop.loop.find_hl_codes(22, '22') !=  TestLoop.loop.find_hl_codes(37, '22'))
+
+        assert  (TestLoop.loop.find_hl_codes(22, '20')['start_idx'] == 7)
+        assert  (TestLoop.loop.find_hl_codes(22, '22')['start_idx'] == 12)
+        assert  (TestLoop.loop.find_hl_codes(37, '22')['start_idx'] == 27)
+        
+if __name__ == '__main__':
+    unittest.main()        
+        
+
diff --git a/tests/test_pyspark.py b/tests/test_pyspark.py
index 36327d5..b7fcd3e 100644
--- a/tests/test_pyspark.py
+++ b/tests/test_pyspark.py
@@ -10,6 +10,9 @@ def test_transaction_count(self):
                 .map(lambda x: EDI(x))
                 .map(lambda x: {"transaction_count": x.num_transactions()})
                 ).toDF()
-        assert ( data.count() == 4) #4 rows
-        assert ( data.select(data.transaction_count).groupBy().sum().collect()[0]["sum(transaction_count)"] == 8) #8 ST/SE transactions
+        assert ( data.count() == 5) #5 rows
+        assert ( data.select(data.transaction_count).groupBy().sum().collect()[0]["sum(transaction_count)"] == 9) #8 ST/SE transactions
 
+
+if __name__ == '__main__':
+    unittest.main()

From 9a7b84bda27d0725d85764a011d64239cd8769d7 Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Tue, 7 May 2024 15:11:33 -0400
Subject: [PATCH 18/46] bug and testing

---
 databricksx12/hls/loop.py | 54 +++++++++++++++++++++++++++++++++------
 1 file changed, 46 insertions(+), 8 deletions(-)

diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py
index c0e0334..9c3a767 100644
--- a/databricksx12/hls/loop.py
+++ b/databricksx12/hls/loop.py
@@ -2,6 +2,10 @@
 from functools import reduce
 
 class LoopMapping:
+
+    #
+    # class to hold k,v of hl_code, loop
+    #
     def __init__(self, mappings=None):
         self.mappings = mappings if mappings is not None else {
             '20': {
@@ -14,6 +18,12 @@ def __init__(self, mappings=None):
             }
         }
 
+    #
+    # Get hl_code associated with the loop
+    #
+    def get_hl_code(self, loop):
+        return None if (temp := [hl_code for hl_code, v in self.mappings.items() if v['loop'] == loop]) == [] else temp[0]
+
     def get_mapping(self, element, description=None):
         """ Returns a specific mapping based on element key and description. """
         mappings = self.mappings.get(element, {})
@@ -27,7 +37,7 @@ class Loop(EDI):
     
     def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()):
         super().__init__(data, delim_cls)
-        self.loop_mapping = loop_mapping
+        self.mapping = loop_mapping
         self._start_indexes = self._build_hierarchy_start_indexes()
         self.loop_hierarchy = self.build_hierarchy()
         """
@@ -41,6 +51,16 @@ def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()):
         }
         """
 
+    #
+    # Get the specified loop based upon a position, else return None if does not exist
+    #  @param pos - the position of the data point
+    #  @param loop - the loop from the mapping that is being searched for
+    #
+    #  @return None if not found, otherwise value from loop_hierarchy
+    #
+    def get_loop(self, pos, loop):
+        return None if (temp := self.mapping.get_hl_code(loop)) is None else self.find_hl_codes(pos, temp)
+
     #
     # Build a complete hierarchical view of all HL segments start and end positions 
     #
@@ -54,7 +74,8 @@ def build_hierarchy(self):
                 "end_idx": self._determine_end_index(x[1]),
                 "parent_id": x[2],
                 "hl_code": x[3],
-                "child_code": x[4]
+                "child_code": x[4],
+                "subordinate_ind": self.subordinate_child_indicator(x[1]) #true if previous HL04=1 
             }
             for x in self._start_indexes
         }
@@ -98,21 +119,38 @@ def traverse_loops(self, hl_code, loop):
         elif loop['parent_id'] == "":
             return None
         else:
-            return self.traverse_loops(hl_code, self.loop_hierarchy.get(loop['parent_id']))
+            return self.traverse_loops(hl_code, self.determine_parent(loop))
 
     #
-    # 
+    # parent is either the parent_id or the previous HL segment if there was a child indicator section
+    #
+    def determine_parent(self, loop):
+        return loop['parent_id'] if loop['subordinate_ind'] == 0 else self.loop_hierarchy.get(self.determine_previous_hl(loop['start_idx'])[0])
+        
+    #
+    #  returns the HL segment 
     #
     def _filter_hl_on_position(self, pos_idx):
         return (list(temp)[0] if (temp := filter(lambda v: v['start_idx'] <= pos_idx <= v['end_idx'], self.loop_hierarchy.values())) else None)
 
-        
+
+    #
+    # determine if the HL segment at pos is a subordinate child of a parent
+    #  i.e. (parent has child code =1) and parent is previous HL segment 
     #
-    # Will only ever return one element or None
     #
-    def _fitler_hl_on_position_and_code(self, pos_idx, hl_code):
-        return (list(temp)[0] if (temp := filter(lambda v: v['hl_code'] == hl_code and v['start_idx'] <= pos_idx <= v['end_idx'] ,self.loop_hierarchy.values())) else None) 
+    def subordinate_child_indicator(self, pos):
+        return 0 if self.determine_previous_hl(pos) is None else self.determine_previous_hl(pos)[4]
 
+    #
+    # Determine the previous HL segment based upon a position
+    #
+    def determine_previous_hl(self, pos):
+        try:
+            return reduce(lambda a,b: a if a[1] > b[1] else b,
+                          filter(lambda x: x[1] < pos, self._start_indexes))
+        except: 
+            return None #when there is no preceding hl segment
                 
 """
 sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8")

From af27bdf2db6f106e3d2fe0a3c1d1ed069570c7c7 Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Tue, 7 May 2024 22:53:42 -0400
Subject: [PATCH 19/46] subordinate child looping tests

---
 databricksx12/hls/loop.py |  4 ++++
 tests/test_loop.py        | 11 +++++++++++
 2 files changed, 15 insertions(+)

diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py
index 9c3a767..b6d6638 100644
--- a/databricksx12/hls/loop.py
+++ b/databricksx12/hls/loop.py
@@ -15,6 +15,10 @@ def __init__(self, mappings=None):
             '22': {
                 'loop name': 'Subscriber',
                 'loop': '2000B'
+            },
+            '23': {
+                'loop name': 'Patient',
+                'loop': '2000C'
             }
         }
 
diff --git a/tests/test_loop.py b/tests/test_loop.py
index a54737b..a831c47 100644
--- a/tests/test_loop.py
+++ b/tests/test_loop.py
@@ -37,6 +37,17 @@ def test_loop_hierarchy(self):
         assert  (TestLoop.loop.find_hl_codes(22, '20')['start_idx'] == 7)
         assert  (TestLoop.loop.find_hl_codes(22, '22')['start_idx'] == 12)
         assert  (TestLoop.loop.find_hl_codes(37, '22')['start_idx'] == 27)
+
+    def test_loop_hierarchy_child_codes(self):
+        data = open("./sampledata/837/CHPW_Claimdata_edited.txt.tmp", "rb").read().decode("utf-8")
+        loop = Loop(data)
+        assert(loop.find_hl_codes(174, '22')['start_idx'] == 160)
+
+    def test_loop_search_by_name(self):
+        assert(TestLoop.loop.get_loop(22, "2000A")['start_idx'] == 7)
+        assert(TestLoop.loop.get_loop(22, "2000B")['start_idx'] == 12)
+        assert(TestLoop.loop.get_loop(37, "2000A")['start_idx'] == 7)
+        assert(TestLoop.loop.get_loop(37, "2000B")['start_idx'] == 27)
         
 if __name__ == '__main__':
     unittest.main()        

From a6f0321f169d7e265e81bd6691ede1adf9d00fe8 Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Thu, 9 May 2024 15:23:21 -0400
Subject: [PATCH 20/46] build data

---
 databricksx12/hls/build-func.py               | 138 ---------
 databricksx12/hls/claim.py                    | 142 +++++----
 databricksx12/hls/healthcare.py               |  12 +-
 databricksx12/hls/hierarchicalloop.py         | 129 ---------
 databricksx12/{ => hls}/hl7.py                |   0
 databricksx12/hls/loop.py                     |   9 +-
 .../hls/test-notebooks/claim-test.ipynb       | 273 ------------------
 tests/test_loop.py                            |  19 ++
 8 files changed, 123 insertions(+), 599 deletions(-)
 delete mode 100644 databricksx12/hls/build-func.py
 delete mode 100644 databricksx12/hls/hierarchicalloop.py
 rename databricksx12/{ => hls}/hl7.py (100%)
 delete mode 100644 databricksx12/hls/test-notebooks/claim-test.ipynb

diff --git a/databricksx12/hls/build-func.py b/databricksx12/hls/build-func.py
deleted file mode 100644
index 2de4f64..0000000
--- a/databricksx12/hls/build-func.py
+++ /dev/null
@@ -1,138 +0,0 @@
-"""
-Apr 9 notes
-"""
-from databricksx12.edi import *
-
-class LoopMapping:
-
-    def __init__(self, mappings=None):
-        self.mappings = (mappings if mappings is not None else {
-            '20': {
-                'description': 'Information Source',
-                'loop': '2000A'
-                },
-            '22': {
-                'description': 'Subscriber',
-                'loop': '2000B'
-                }
-            })
-
-
-   
-        
-    """
-    def __init__(self):
-        self.mappings = {
-            '2000A': ('20', 'NM1', '3'),
-            '2000B': ('22', 'SBR', '4'),
-        }
-
-
-    ADZ want our key = (lookup value found in data), value = additional info needed 
-    """
-
-
-class HierarchicalLoop(EDI):
-    def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping= LoopMapping.mappings):
-        super().__init__(data, delim_cls)
-        self.loop_mapping = loop_mapping
-        self.target_element, self.target_segment_name, self.target_element_index = self.loop_mapping.get_identifiers(
-            Loop)
-
-        # find all HL segments along with the 3rd element that denotes 2000A (20) or 2000B (22)
-        self.hl_segments = self._hl_identifiers()
-
-        # find all CLM segments (important for indexing the last HL or SBR within a tx)
-        self.clm_segments = self._clm_identifiers()
-
-        # Calculate ranges and then extract 2000A/B lines based on those ranges
-        self.ranges = self.select_range_of_interest(
-            self.hl_segments, self.clm_segments, self.target_element)
-        self.extracted_lines = self.extract_lines_based_on_ranges(
-            self.ranges, self.target_segment_name, self.target_element_index)
-
-    def _hl_identifiers(self):
-        # Find the segments where HL loop begins
-        indexed_HL_segments = self.segments_by_nloop_object.extracted_linesame_index("HL")
-        return [(i, x.element(3)) for i, x in indexed_HL_segments]
-
-    def _clm_identifiers(self):
-        # Find the segments where CLM loop begins
-        indexed_CLM_segments = self.segments_by_name_index("CLM")
-        return [(i, x.element(2)) for i, x in indexed_CLM_segments]
-
-    def select_range_of_interest(self, hl_indexes, clm_indexes, target_value):
-        ranges = []
-        start_index = None
-        last_index = None
-
-        for index, value in hl_indexes:
-            if value == target_value:
-                if start_index is not None:
-                    ranges.append((start_index+1, index))
-                start_index = index
-            elif start_index is not None:
-                ranges.append((start_index+1, index))
-                start_index = None
-        if clm_indexes:
-            last_index = clm_indexes[-1][0]
-        if last_index and start_index is not None:
-            ranges.append((start_index+1, last_index))
-        return ranges
-
-    def extract_lines_based_on_ranges(self, ranges, target_value, target_index):
-        extracted_elements = []
-        # Iterate through each range in the list
-        for start, end in ranges:
-            # Retrieve the segments within this range
-            segments_in_range = self.segments_by_position(start, end)
-
-            desired_elements = map(
-                lambda segment: segment.element(int(target_index)),
-                filter(
-                    lambda segment: segment.segment_name() == target_value and len(
-                        segment.data.split(segment.format_cls.ELEMENT_DELIM)) > int(target_index),
-                    segments_in_range
-                )
-            )
-            extracted_elements.extend(desired_elements)
-
-        return list(extracted_elements)
-
-
-    def parent_loops(self):
-        pass
-
-    def child_loops(self, parent_loop_num):
-        pass
-
-    """
-       @return
-         -index of each HL segment
-         -index of parent segments
-         -be able to answer "where is loop XYZ?" and "at this location, what looop am i in?"
-    """
-    def _hl_segment_indexes(self):
-        pass
-
-
-    self.hl_parents = {
-        parent:
-        { index_start : value
-          index_end : value
-           children : [
-               hl_child : {
-                   index_start: value
-                   index_end: value
-                   }
-               ]
-        }
-
-
-    self.hl = HL()
-    self.claim_start_index = segment(clm)
-
-    Who is my billing provider?  hl.get_loop(20)
-    Who is my subscriber?
-    Who is my patient? 
-    
diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 6a9e66e..8cada93 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -1,31 +1,88 @@
 from databricksx12.edi import *
-from databricksx12.hls import loop
+from databricksx12.hls.loop import *
+import itertools
+
+
 #
 # Base claim class
 #
 
 
-class Claim():
+class MedicalClaim(EDI):
+
+    def __init__(self,
+                 sender_loop = [],
+                 receiver_loop = [],
+                 billing_loop = [],
+                 subscriber_loop = [],
+                 patient_loop = [],
+                 claim_loop =  [],
+                 sl_loop = [] #service line loop
+                 ):
+        self.sender_loop = sender_loop
+        self.receiver_loop = receiver_loop
+        self.billing_loop = billing_loop
+        self.subscriber_loop = subscriber_loop
+        self.patient_loop = patient_loop
+        self.claim_loop = claim_loop
+        self.sl_loop = sl_loop
+        
+        self.build()
+
+    def billing_loop(self):
+        return {
+            "billing_prvdr_name": "TODO",
+            "billing_npi": "TODO",
+            "billing_street_address": "TODO",
+            "billing_zip_cd": "TODO",
+            "billing_state_cd": "TODO"
+            }
+    
+    def subscriber_loop(self):
+        return {
+            "TODO": "TODO"
+            }
+
+    #
+    #
+    #
+    def patient_loop(self):
+        #Note - if this doesn't exist then its the same as subscriber loop
+        return {
+            "TODO": "TODO"
+            }
+    
+    def toJson(self):
+        {
+            **self.patient_loop(),
+            **self.subscriber_loop(),
+            **self.billing_loop()
+         }
 
-    def __init__(self):
-        pass
 
-    @staticmethod
-    def from_dictionary(d):
-        pass
+    #not sure if this should be here or not, but you get the idea
+    def build():
+        self.billing_info = self.billing_loop()
+        self.subscriber_info = self.subscriber_loop()
+        self.patient_info = self.subscriber_loop() if self.patient_loop = [] else self.patient_loop()
+        
 
 
-class Claim837i(Claim):
+class Claim837i(MedicalClaim):
 
     NAME = "837I"
 
-# Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf
 
+# Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf
 
-class Claim837p(Claim):
+class Claim837p(MedicalClaim):
 
     NAME = "837P"
 
+class Claim835(MedicalClaim):
+    
+    NAME = "835"
+
 
 #
 # Base claim builder (transaction -> 1 or more claims)
@@ -35,59 +92,38 @@ class ClaimBuilder(EDI):
     #
     # Given claim type (837i, 837p, etc), segments, and delim class, build claim level classes
     #
-    def __init__(self, trnx_type, trnx_data, delim_cls):
-        self.trnx_type = trnx_type
+    def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim):
         self.data = trnx_data
-        self.delim_cls = delim_cls
-
-        self.loop_summary = loop.Loop(trnx_data)
+        self.format_cls = delim_cls
+        self.trnx_cls = trnx_type_cls
+        self.loop = Loop(trnx_data)
+        
 
     #
-    # Returns a dictionary of "loop name" : "loop data"
+    # Builds a claim object from
     #
-
-    def build_claim(self, clm_segment):
-        return {
-            "1000A": {
-                "desc": "Submitter Name",
-                "segments": self.loop_summary.sender
-            },
-            "1000B": {
-                "desc": "Receiver Name",
-                "segments": self.loop_summary.receiver
-            },
-            "2000A": {
-                "desc": "Billing Provider",
-                "segments": self.loop_summary.find_reference_element(clm_segment, '20', 'Information Source')
-            },
-            "2000B": {
-                "desc": "Subscriber",
-                "segments": self.loop_summary.find_reference_element(clm_segment, '22', 'Subscriber')
-            },
-            "2010BA": {
-                "desc": "Patient",
-                "segments": (self.loop_summary.find_reference_element(clm_segment, '22', 'Individual First Name'),
-                             self.loop_summary.find_reference_element(clm_segment, '22', 'Individual Last Name'))
-
-            },
-            "2010BB": {
-                "desc": "Payer",
-                "segments": self.loop_summary.find_reference_element(clm_segment, '22', 'Payer Name'),
-            },
-            "2300": {
-                "desc": "Claim",
-                "segments": (self.loop_summary.find_reference_element(clm_segment, '22', 'Claim ID'),
-                             self.loop_summary.find_reference_element(clm_segment, '22', 'Claim Amount'))
-            }
-        }
+    # @param clm_segment - the claim segment of claim to build
+    # @param idx - the index of the claim segment in the data
+    #
+    #  @return the clas containing the relevent claim information
+    #
+    def build_claim(self, clm_segment, idx):
+        return self.trnx_cls(
+            sender_loop = [],
+            receiver_loop = [],
+            billing_loop = self.loop.get_loop_segments(idx, "2000A"),
+            subscriber_loop = self.loop.get_loop_segments(idx, "2000B"),
+            patient_loop = self.loop.get_loop_segments(idx, "2000C"),
+            claim_loop =  [],
+            sl_loop = [] #service line loop
+        )
 
     #
     # Given transaction type, transaction segments, and delim info, build out claims in the transaction
     #  @return a list of Claim for each "clm" segment
     #
     def build(self):
-        return [self.build_claim(seg) for seg in self.loop_summary.claim_segments()]
-
+        return [self.build_claim(seg, i) for i, seg in self.segments_by_name_index("CLM")]
 
 """
 sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8")
diff --git a/databricksx12/hls/healthcare.py b/databricksx12/hls/healthcare.py
index efd60f5..00cc22e 100644
--- a/databricksx12/hls/healthcare.py
+++ b/databricksx12/hls/healthcare.py
@@ -6,9 +6,9 @@
 class HealthcareManager(EDI):
 
     def __init__(self, mapping = {
-            "222": "837P",
-            "223": "837I",
-            "221": "835"
+            "222": Claim837i,
+            "223": Claim837p,
+            "221": None # "835"
     }):
         self.mapping = mapping
 
@@ -17,8 +17,10 @@ def __init__(self, mapping = {
     # Given an EDI message, return a list of healthcare claims
     #
     def from_edi(self, edi):
-        return list(itertools.chain.from_iterable([self.from_functional_group(y) for y in edi.functional_segments()])) 
+        return self.flatmap(self.flatmap([self.from_functional_group(y) for y in edi.functional_segments()]))
 
+    def flatmap(self,x):
+        return list(itertools.chain.from_iterable(x))
 
     def from_functional_group(self, fg):
         return [self.from_transaction(x) for x in fg.transaction_segments()]
@@ -29,5 +31,5 @@ def from_functional_group(self, fg):
     #
     def from_transaction(self, trnx):
         return ClaimBuilder(self.mapping.get(trnx.transaction_type),
-                            [x for x in trnx.data if x.segment_name() not in ['ST', 'SE']], trnx.delim_cls).build()
+                            [x for x in trnx.data if x.segment_name() not in ['ST', 'SE']], trnx.format_cls).build()
     
diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py
deleted file mode 100644
index c047f89..0000000
--- a/databricksx12/hls/hierarchicalloop.py
+++ /dev/null
@@ -1,129 +0,0 @@
-from databricksx12.edi import *
-
-import itertools
-
-
-class HierarchicalLoop(EDI):
-    def __init__(self, data, delim_cls=AnsiX12Delim):
-        super().__init__(data, delim_cls)
-
-        # parent and children loops
-        self.parent_start_loops = self._parent_start_tup_loops()
-        self.parent_end_loops = self._parent_end_loops()
-        self.parent_loops = self._parent_loops()
-        self.child_loops = self._child_loops(self.parent_loops)
-        self.subchild_loops = self._subchild_loops(self.child_loops)
-
-    def _parent_start_tup_loops(self):
-        # index of parent, counter, and if child
-        # TODO unit test to return tuple
-        return [(i, x.element(1), x.element(-1)) for i, x in self.segments_by_name_index("HL") if x.element(2) == ""]
-
-    def _parent_end_loops(self):
-        return [i for i, x in self.segments_by_name_index("SE")]
-
-    def _parent_loops(self):
-        return [(tup + (j,)) for tup, j in zip(self.parent_start_loops, self.parent_end_loops)]
-
-    def _child_loops(self, parent_loops):
-        child_loops = [(i, counter, segment.element(-1), parent_stop_index)
-                       for _, counter, child_id, parent_stop_index in parent_loops
-                       for i, segment in self.segments_by_name_index("HL") if segment.element(2) == counter]
-        return child_loops
-
-    def _subchild_loops(self, child_loops):
-        it1, it2 = itertools.tee(child_loops)
-        next(it2, None)
-        return [pair[1] for pair in zip(it1, it2) if int(pair[0][2]) == 1]
-
-
-class HierarchicalLoopManager:
-    def __init__(self, data, delim_cls=AnsiX12Delim):
-        self.hl = HierarchicalLoop(data, delim_cls)
-        self.summary = self.generate_summary()
-
-    def get_child_loops(self, parent_loop, loops):
-        """Filter child loops that fall within the given parent loop's range."""
-        return list(filter(lambda x: parent_loop[0] < x[0] < parent_loop[3], loops))
-
-    def calculate_child_end_index(self, current_child, next_child, parent_end):
-        """Calculate the end index of a child, adjusting to avoid overlap with the next child."""
-        return min(current_child[3], next_child[0] - 1 if next_child else parent_end)
-
-    def process_subchildren(self, child, subchild_loops, parent_end):
-        """Process subchildren for a given child."""
-        return [
-            {'index_start': subchild[0], 'index_end': self.calculate_child_end_index(
-                subchild, None, parent_end), 'children': None}
-            for subchild in subchild_loops if subchild[1] == child[1] and int(child[2]) == 1
-        ]
-
-    def process_child_entry(self, child, index, children, subchild_loops, parent_end):
-        """Helper function to process each child entry."""
-        next_child = children[index +
-                              1] if (index + 1) < len(children) else None
-        subchildren = self.process_subchildren(
-            child, subchild_loops, parent_end)
-        return {
-            'index_start': child[0],
-            'index_end': self.calculate_child_end_index(child, next_child, parent_end),
-            'children': subchildren or None
-        }
-
-    def process_children(self, children, subchild_loops, parent_end):
-        """Process all children, adjusting their end indices correctly, and add subchildren using functional programming."""
-        # Filter out subchildren from main children list
-        filtered_children = [
-            child for child in children if child not in subchild_loops]
-        # Apply processing to each child and collect the results
-        processed_children = list(map(lambda child: self.process_child_entry(child, filtered_children.index(
-            child), filtered_children, subchild_loops, parent_end), filtered_children))
-        return processed_children
-
-    def process_loop(self, loop):
-        child_loops = sorted(self.get_child_loops(
-            loop, self.hl.child_loops), key=lambda x: x[0])
-        children = self.process_children(
-            child_loops, self.hl.subchild_loops, loop[3])
-        return {
-            'index_start': loop[0],
-            'index_end': loop[3],
-            'children': children or None
-        }
-
-    def generate_summary(self):
-        return {str(loop[1]): self.process_loop(loop) for loop in self.hl.parent_loops}
-
-
-"""
-loop_manager = HierarchicalLoopManager(sample_data_837i_edited)  
-summary = loop_manager.summary 
-
-output:
-{'1': {'index_start': 7,
-  'index_end': 35,
-  'children': [{'index_start': 16, 'index_end': 35, 'children': None}]},
- '63': {'index_start': 41,
-  'index_end': 69,
-  'children': [{'index_start': 50, 'index_end': 69, 'children': None}]},
- '49': {'index_start': 75,
-  'index_end': 103,
-  'children': [{'index_start': 84, 'index_end': 103, 'children': None}]},
- '75': {'index_start': 109,
-  'index_end': 138,
-  'children': [{'index_start': 118, 'index_end': 138, 'children': None}]},
- '79': {'index_start': 144,
-  'index_end': 186,
-  'children': [{'index_start': 153, 'index_end': 159, 'children': None},
-   {'index_start': 160,
-    'index_end': 186,
-    'children': [{'index_start': 167, 'index_end': 186, 'children': None}]}]}}
-"""
-"""
-sample_data_837p = open("./sampledata/837/837p.txt", "rb").read().decode("utf-8").replace("\\n", "")
-HierarchicalLoopManager(sample_data_837p).summary
-{'1': {'index_start': 7,
-  'index_end': 42,
-  'children': [{'index_start': 12, 'index_end': 42, 'children': None},
-   {'index_start': 27, 'index_end': 42, 'children': None}]}}
-"""
diff --git a/databricksx12/hl7.py b/databricksx12/hls/hl7.py
similarity index 100%
rename from databricksx12/hl7.py
rename to databricksx12/hls/hl7.py
diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py
index b6d6638..005b3dd 100644
--- a/databricksx12/hls/loop.py
+++ b/databricksx12/hls/loop.py
@@ -40,7 +40,8 @@ class Loop(EDI):
 
     
     def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()):
-        super().__init__(data, delim_cls)
+        self.data = data
+        self.format_cls = delim_cls
         self.mapping = loop_mapping
         self._start_indexes = self._build_hierarchy_start_indexes()
         self.loop_hierarchy = self.build_hierarchy()
@@ -65,6 +66,12 @@ def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()):
     def get_loop(self, pos, loop):
         return None if (temp := self.mapping.get_hl_code(loop)) is None else self.find_hl_codes(pos, temp)
 
+    #
+    # same as above, but only returns segment list
+    #
+    def get_loop_segments(self, pos, loop):
+        return [] if (temp := self.get_loop(pos, loop)) is None else self.data[temp['start_idx']:temp['end_idx']]
+
     #
     # Build a complete hierarchical view of all HL segments start and end positions 
     #
diff --git a/databricksx12/hls/test-notebooks/claim-test.ipynb b/databricksx12/hls/test-notebooks/claim-test.ipynb
deleted file mode 100644
index 481f558..0000000
--- a/databricksx12/hls/test-notebooks/claim-test.ipynb
+++ /dev/null
@@ -1,273 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['/Users/raven.mukherjee/edi-sol-accelerator/x12-edi-parser/databricksx12/hls/test-notebooks', '/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python312.zip', '/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12', '/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/lib-dynload', '', '/Users/raven.mukherjee/edi-sol-accelerator/edi-parse-env/lib/python3.12/site-packages']\n"
-     ]
-    }
-   ],
-   "source": [
-    "import sys\n",
-    "print(sys.path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{\n",
-      "    \"EDI.sender_tax_id\": \"ZZ\",\n",
-      "    \"list\": [\n",
-      "        {\n",
-      "            \"FunctionalGroup.receiver\": \"123456789\",\n",
-      "            \"FunctionalGroup.sender\": \"CLEARINGHOUSE\",\n",
-      "            \"FunctionalGroup.transaction_datetime\": \"20180508:0833\",\n",
-      "            \"FunctionalGroup.transaction_type\": \"222\",\n",
-      "            \"list\": [\n",
-      "                {\n",
-      "                    \"Transaction.transaction_type\": \"222\"\n",
-      "                },\n",
-      "                {\n",
-      "                    \"Transaction.transaction_type\": \"222\"\n",
-      "                },\n",
-      "                {\n",
-      "                    \"Transaction.transaction_type\": \"222\"\n",
-      "                },\n",
-      "                {\n",
-      "                    \"Transaction.transaction_type\": \"222\"\n",
-      "                },\n",
-      "                {\n",
-      "                    \"Transaction.transaction_type\": \"222\"\n",
-      "                }\n",
-      "            ]\n",
-      "        }\n",
-      "    ]\n",
-      "}\n"
-     ]
-    }
-   ],
-   "source": [
-    "from databricksx12.edi import *\n",
-    "x =  EDIManager(EDI(open(\"/Users/raven.mukherjee/solution_accelerators/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt\", \"rb\").read().decode(\"utf-8\")))\n",
-    "\n",
-    "import json\n",
-    "print(json.dumps(x.flatten(x.data), indent=4))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample_data_837i = open(\"/Users/raven.mukherjee/solution_accelerators/x12-edi-parser/sampledata/837/CC_837I_EDI.txt\", \"rb\").read().decode(\"utf-8\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# from databricksx12.edi import *\n",
-    "\n",
-    "# class extend_transaction(EDI):\n",
-    "#     def __init__(self, data, delim_cls=AnsiX12Delim):\n",
-    "#         super().__init__(data, delim_cls)\n",
-    "\n",
-    "#     @property\n",
-    "#     def full_transaction(self):\n",
-    "#         transaction_start_indexes = [i for i, segment in enumerate(self.data) if segment.segment_name() == \"ST\"]\n",
-    "#         transaction_end_indexes = [i for i, segment in enumerate(self.data) if segment.segment_name() == \"SE\"]\n",
-    "\n",
-    "#         transactions = []\n",
-    "#         for start, end in zip(transaction_start_indexes, transaction_end_indexes):\n",
-    "#             transaction_segments = self.data[start:end+1]\n",
-    "#             transactions.append(transaction_segments)\n",
-    "#         return transactions\n",
-    "\n",
-    "#     @property\n",
-    "#     def claim_identifier(self):\n",
-    "#         transactions = self.full_transaction\n",
-    "#         claim_identifiers = []\n",
-    "\n",
-    "#         for transaction_segments in transactions:\n",
-    "#             claim_id = None\n",
-    "#             for segment in transaction_segments:\n",
-    "#                 if segment.segment_name() == \"BHT\":\n",
-    "#                     claim_id = segment.element(3) #confirm\n",
-    "#                     break\n",
-    "#             claim_identifiers.append(claim_id)\n",
-    "\n",
-    "#         return claim_identifiers\n",
-    "\n",
-    "#     @property\n",
-    "#     def header_billing_amount(self):\n",
-    "#         transactions = self.full_transaction\n",
-    "#         billing_headers = []\n",
-    "\n",
-    "#         for transaction_segments in transactions:\n",
-    "#             for segment in transaction_segments:\n",
-    "#                 if segment.segment_name() == \"CLM\":\n",
-    "#                     bill_header = segment.element(1)\n",
-    "#                     billing_headers.append(bill_header)\n",
-    "#                     break  # one CLM segment per transaction?\n",
-    "\n",
-    "#         return billing_headers\n",
-    "\n",
-    "#     @property\n",
-    "#     def billed_amount(self):\n",
-    "#         transactions = self.full_transaction\n",
-    "#         billed_amounts = []\n",
-    "\n",
-    "#         for transaction_segments in transactions:\n",
-    "#             for segment in transaction_segments:\n",
-    "#                 if segment.segment_name() == \"CLM\":\n",
-    "#                     billed_amount = segment.element(2)  # Billed amount is the second element\n",
-    "#                     billed_amounts.append(billed_amount)\n",
-    "#                     break\n",
-    "\n",
-    "#         return billed_amounts\n",
-    "    \n",
-    "#     # @property\n",
-    "#     # def subscriber(self):\n",
-    "#     #     transactions = self.full_transaction\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# # use raw EDI data\n",
-    "# edi_object = extend_transaction(sample_data_837i)\n",
-    "\n",
-    "# # call  different vars\n",
-    "# transactions = edi_object.full_transaction\n",
-    "# claim_ids = edi_object.claim_identifier\n",
-    "# header = edi_object.header_billing_amount\n",
-    "# billed_amount = edi_object.billed_amount"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#  identify elements functionally!\n",
-    "\n",
-    "from databricksx12.edi import *\n",
-    "\n",
-    "class extend_transaction(EDI):\n",
-    "    def __init__(self, data, delim_cls=AnsiX12Delim):\n",
-    "        super().__init__(data, delim_cls)\n",
-    "\n",
-    "        # Use map and lambda to populate billed amounts and subscribers\n",
-    "        self.billed_amounts = list(map(lambda x: x.element(2), self.segments_by_name(\"CLM\")))\n",
-    "        self.subscribers = list(map(lambda x: x.element(4), self.segments_by_name(\"SBR\")))\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample_data_chpw_claimdata = open(\"/Users/raven.mukherjee/edi-sol-accelerator/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt\", \"rb\").read().decode(\"utf-8\")\n",
-    "# use raw EDI data\n",
-    "edi_object = extend_transaction(sample_data_chpw_claimdata)\n",
-    "billed_amounts = edi_object.billed_amounts\n",
-    "subscribers = edi_object.subscribers\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['20', '50.1', '11.64', '234', '20']"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "billed_amounts"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['COMMUNITY HLTH PLAN OF WASH',\n",
-       " 'COMMUNITY HLTH PLAN OF WASH',\n",
-       " 'COMMUNITY HLTH PLAN OF WASH',\n",
-       " 'COMMUNITY HLTH PLAN OF WASH',\n",
-       " 'COMMUNITY HLTH PLAN OF WASH']"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "subscribers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "edi-parse-kernel",
-   "language": "python",
-   "name": "edi-parse-env"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/tests/test_loop.py b/tests/test_loop.py
index a831c47..e0dcd13 100644
--- a/tests/test_loop.py
+++ b/tests/test_loop.py
@@ -38,16 +38,35 @@ def test_loop_hierarchy(self):
         assert  (TestLoop.loop.find_hl_codes(22, '22')['start_idx'] == 12)
         assert  (TestLoop.loop.find_hl_codes(37, '22')['start_idx'] == 27)
 
+    #
+    # Test traversing heirarchy to find correct loops
+    #
     def test_loop_hierarchy_child_codes(self):
         data = open("./sampledata/837/CHPW_Claimdata_edited.txt.tmp", "rb").read().decode("utf-8")
         loop = Loop(data)
         assert(loop.find_hl_codes(174, '22')['start_idx'] == 160)
 
+    #
+    # Test getting all segments within a loop
+    #
+    def test_get_segments(self):
+        data = open("./sampledata/837/CHPW_Claimdata_edited.txt.tmp", "rb").read().decode("utf-8")
+        loop = Loop(data)
+        assert(loop.get_loop(174, '2000A')['start_idx'] ==  144 and loop.get_loop(174, '2000A')['end_idx'] == 153)
+        assert( len(loop.get_loop_segments(174, '2000A')) == 153 - 144)
+        assert(loop.get_loop_segments(174, '2000A')[0].element(0) == "HL")
+        assert( len([x.element(0) for x in loop.get_loop_segments(174, '2000A') if x.element(0) == "HL"]) == 1)
+
+    #
+    # Test loop start places by position using loop name search
+    #
     def test_loop_search_by_name(self):
         assert(TestLoop.loop.get_loop(22, "2000A")['start_idx'] == 7)
         assert(TestLoop.loop.get_loop(22, "2000B")['start_idx'] == 12)
         assert(TestLoop.loop.get_loop(37, "2000A")['start_idx'] == 7)
         assert(TestLoop.loop.get_loop(37, "2000B")['start_idx'] == 27)
+
+
         
 if __name__ == '__main__':
     unittest.main()        

From 6105e3b012a081313ae34c865bd78fba10c5113a Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Fri, 10 May 2024 22:49:32 -0400
Subject: [PATCH 21/46] medical claim builder

---
 databricksx12/edi.py                          |   2 +
 databricksx12/hls/claim.py                    | 109 +++++++++---------
 databricksx12/hls/support_classes/__init__.py |   0
 .../hls/support_classes/identities.py         |  73 ++++++++++++
 setup.py                                      |   3 +-
 5 files changed, 129 insertions(+), 58 deletions(-)
 create mode 100644 databricksx12/hls/support_classes/__init__.py
 create mode 100644 databricksx12/hls/support_classes/identities.py

diff --git a/databricksx12/edi.py b/databricksx12/edi.py
index 550fe83..905c33d 100644
--- a/databricksx12/edi.py
+++ b/databricksx12/edi.py
@@ -124,6 +124,8 @@ def toRows(self):
     def header(self):
         return self.data[0]
 
+
+
 class Segment():
 
     #
diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 8cada93..2e15487 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -1,6 +1,7 @@
-from databricksx12.edi import *
-from databricksx12.hls.loop import *
-import itertools
+from databricksx12.edi import EDI, AnsiX12Delim
+from databricksx12.hls.loop import Loop
+from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity
+from typing import List, Dict
 
 
 #
@@ -10,77 +11,68 @@
 
 class MedicalClaim(EDI):
 
-    def __init__(self,
-                 sender_loop = [],
-                 receiver_loop = [],
-                 billing_loop = [],
-                 subscriber_loop = [],
-                 patient_loop = [],
-                 claim_loop =  [],
-                 sl_loop = [] #service line loop
-                 ):
-        self.sender_loop = sender_loop
+    def __init__(
+        self,
+        sender_loop: List = [],
+        receiver_loop: List = [],
+        billing_loop: List = [],
+        subscriber_loop: List = [],
+        patient_loop: List = [],
+        claim_loop: List = [],
+        sl_loop: List = [],  # service line loop
+    ):
+        self.sender_loop = sender_loop # is this a loop or does it only occur once in a document?
         self.receiver_loop = receiver_loop
         self.billing_loop = billing_loop
         self.subscriber_loop = subscriber_loop
         self.patient_loop = patient_loop
         self.claim_loop = claim_loop
         self.sl_loop = sl_loop
-        
+
         self.build()
 
-    def billing_loop(self):
-        return {
-            "billing_prvdr_name": "TODO",
-            "billing_npi": "TODO",
-            "billing_street_address": "TODO",
-            "billing_zip_cd": "TODO",
-            "billing_state_cd": "TODO"
-            }
-    
-    def subscriber_loop(self):
-        return {
-            "TODO": "TODO"
-            }
+    def _populate_billing_loop(self) -> Dict[str, str]:
+        return BillingIdentity(self.billing_loop)
+
+    def _populate_subscriber_loop(self) -> Dict[str, str]:
+        return SubscriberIdentity(self.subscriber_loop)
 
     #
     #
     #
-    def patient_loop(self):
-        #Note - if this doesn't exist then its the same as subscriber loop
-        return {
-            "TODO": "TODO"
-            }
-    
-    def toJson(self):
-        {
-            **self.patient_loop(),
-            **self.subscriber_loop(),
-            **self.billing_loop()
-         }
+    def _populate_patient_loop(self) -> Dict[str, str]:
+        # Note - if this doesn't exist then its the same as subscriber loop
+        # Note to include in loop: information about subscriber/dependent relationship is marked by Element 2
+        # 01 = Spouse; 18 = Self; 19 = Child; G8 = Other
+        return PatientIdentity(self.patient_loop)
 
-
-    #not sure if this should be here or not, but you get the idea
-    def build():
-        self.billing_info = self.billing_loop()
-        self.subscriber_info = self.subscriber_loop()
-        self.patient_info = self.subscriber_loop() if self.patient_loop = [] else self.patient_loop()
-        
+    def toJson(self):
+        {**self.patient_loop(), **self.subscriber_loop(), **self.billing_loop()}
+
+    # not sure if this should be here or not, but you get the idea
+    def build(self) -> None:
+        self.billing_info = self._populate_billing_loop()
+        self.subscriber_info = self._populate_subscriber_loop()
+        self.patient_info = (
+            self._populate_subscriber_loop() if self.patient_loop == [] else self._populate_patient_loop()
+        )
 
 
 class Claim837i(MedicalClaim):
 
     NAME = "837I"
-
+    # sender / receiver ?
 
 # Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf
 
+
 class Claim837p(MedicalClaim):
 
     NAME = "837P"
 
+
 class Claim835(MedicalClaim):
-    
+
     NAME = "835"
 
 
@@ -88,6 +80,7 @@ class Claim835(MedicalClaim):
 # Base claim builder (transaction -> 1 or more claims)
 #
 
+
 class ClaimBuilder(EDI):
     #
     # Given claim type (837i, 837p, etc), segments, and delim class, build claim level classes
@@ -97,7 +90,6 @@ def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim):
         self.format_cls = delim_cls
         self.trnx_cls = trnx_type_cls
         self.loop = Loop(trnx_data)
-        
 
     #
     # Builds a claim object from
@@ -109,13 +101,13 @@ def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim):
     #
     def build_claim(self, clm_segment, idx):
         return self.trnx_cls(
-            sender_loop = [],
-            receiver_loop = [],
-            billing_loop = self.loop.get_loop_segments(idx, "2000A"),
-            subscriber_loop = self.loop.get_loop_segments(idx, "2000B"),
-            patient_loop = self.loop.get_loop_segments(idx, "2000C"),
-            claim_loop =  [],
-            sl_loop = [] #service line loop
+            sender_loop=[],
+            receiver_loop=[], # assuming this is true of all claim types check!
+            billing_loop=self.loop.get_loop_segments(idx, "2000A"),
+            subscriber_loop=self.loop.get_loop_segments(idx, "2000B"),
+            patient_loop=self.loop.get_loop_segments(idx, "2000C"),
+            claim_loop=[],
+            sl_loop=[],  # service line loop
         )
 
     #
@@ -123,7 +115,10 @@ def build_claim(self, clm_segment, idx):
     #  @return a list of Claim for each "clm" segment
     #
     def build(self):
-        return [self.build_claim(seg, i) for i, seg in self.segments_by_name_index("CLM")]
+        return [
+            self.build_claim(seg, i) for i, seg in self.segments_by_name_index("CLM")
+        ]
+
 
 """
 sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8")
diff --git a/databricksx12/hls/support_classes/__init__.py b/databricksx12/hls/support_classes/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
new file mode 100644
index 0000000..ffc41ca
--- /dev/null
+++ b/databricksx12/hls/support_classes/identities.py
@@ -0,0 +1,73 @@
+from databricksx12.edi import Segment
+from typing import List
+
+
+class Identity:
+    def __init__(self, segments: List[Segment]):
+        self.name: str = None
+        self.street: str = None
+        self.type: str = None
+        self.city: str = None
+        self.state: str = None
+        self.zip: str = None
+        self.build(segments)
+
+    def build(self, billing_loop: List[Segment]):
+        for segment in billing_loop:
+            if segment.element(0) == 'N3':
+                self.street = segment.element(1)
+            elif segment.element(0) == 'N4':
+                self.city = segment.element(1)
+                self.state = segment.element(2)
+                self.zip = segment.element(3)
+
+    def to_dict(self):
+        return {k: v for k, v in self.__dict__.items() if v is not None}
+
+
+class BillingIdentity(Identity):
+    def __init__(self, billing_segments: List[Segment]):
+        super().__init__(billing_segments)
+        self.npi = None
+        self.build_billing(billing_segments)
+
+    def build_billing(self, billing_loop: List[Segment]):
+        for segment in billing_loop:
+            if segment.element(0) == 'NM1':
+                if segment.element(1) == '85':      # Hardcoded to 85 for Billing Providers
+                    self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
+                    self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
+                    self.npi = segment.element(9)
+
+
+class SubscriberIdentity(Identity):
+    def __init__(self, subscriber_segments: List[Segment]):
+        super().__init__(subscriber_segments)
+        self.id_code = None
+        self.relationship_to_insured = None
+        self.build_subscriber(subscriber_segments)
+
+    def build_subscriber(self, subscriber_loop: List[Segment]):
+        for segment in subscriber_loop:
+            if segment.element(0) == 'NM1':
+                if segment.element(1) == 'IL':      # Hardcoded to IL for Insured
+                    self.type = 'Entity' if segment.element(2) == '2' else 'Individual'
+                    self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
+                    self.id_code = segment.element(9)
+            elif segment.element(0) == 'SBR':
+                self.relationship_to_insured = 'Self' if segment.element(2) == '18' else 'Dependent'     # information about subscriber/dependent 01 = Spouse; 18 = Self; 19 = Child; G8 = Other
+
+
+class PatientIdentity(Identity):
+    def __init__(self, patient_segments: List[Segment]):
+        super().__init__(patient_segments)
+        self.id_code = None
+        self.build_patient(patient_segments)
+
+    def build_patient(self, patient_loop: List[Segment]):
+        for segment in patient_loop:
+            if segment.element(0) == 'NM1':
+                if segment.element(1) == 'QC':      # Hardcoded to QC for Patient
+                    self.type = 'Patient'
+                    self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)])
+
diff --git a/setup.py b/setup.py
index f087d59..b95d690 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,8 @@
 setup(
     name="databricksx12",
     version="0.0.1",
-    python_requires='>=3.9.*',
+    # python_requires='>=3.9.*',
+    python_requires='>=3.9',
     author="",
     author_email="aaron.zavora@databricks.com",
     description= "Parser for handling x12 EDI transactions in Spark",

From 4cca5f61c5b237b13f399f9887593eacc8d06ac1 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Sun, 12 May 2024 23:48:10 -0400
Subject: [PATCH 22/46] added claim and service lines to claim builder

---
 databricksx12/hls/claim.py |  8 ++++----
 databricksx12/hls/loop.py  | 39 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 2e15487..1cb713c 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -101,13 +101,13 @@ def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim):
     #
     def build_claim(self, clm_segment, idx):
         return self.trnx_cls(
-            sender_loop=[],
-            receiver_loop=[], # assuming this is true of all claim types check!
+            sender_loop=self.loop.get_sender(),
+            receiver_loop=self.loop.get_receiver(), # assuming this is true of all claim types check!
             billing_loop=self.loop.get_loop_segments(idx, "2000A"),
             subscriber_loop=self.loop.get_loop_segments(idx, "2000B"),
             patient_loop=self.loop.get_loop_segments(idx, "2000C"),
-            claim_loop=[],
-            sl_loop=[],  # service line loop
+            claim_loop=self.loop.get_claim_loop(idx),
+            sl_loop=self.loop.get_service_line_loop(idx),  # service line loop
         )
 
     #
diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py
index 005b3dd..91c76e6 100644
--- a/databricksx12/hls/loop.py
+++ b/databricksx12/hls/loop.py
@@ -19,7 +19,7 @@ def __init__(self, mappings=None):
             '23': {
                 'loop name': 'Patient',
                 'loop': '2000C'
-            }
+            },
         }
 
     #
@@ -162,6 +162,43 @@ def determine_previous_hl(self, pos):
                           filter(lambda x: x[1] < pos, self._start_indexes))
         except: 
             return None #when there is no preceding hl segment
+        
+    #
+    # Determine claim loop: starts at the clm index and ends at LX segment, or CLM segment, or end of data
+    #
+    def get_claim_loop(self, clm_idx):
+        sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX"))))
+        clm_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("CLM"))))
+
+        if sl_start_indexes:
+            clm_end_idx = min(sl_start_indexes)
+        elif clm_indexes:
+            clm_end_idx = min(clm_indexes + [len(self.data)])
+        else:
+            clm_end_idx = len(self.data)
+        
+        return self.data[clm_idx:clm_end_idx]
+    
+    #
+    # fetch the indices of LX and CLM segments that are beyond the current clm index
+    #
+    def get_service_line_loop(self, clm_idx):
+        sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX"))))
+        tx_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("SE"))))
+        # Determine the end of the service line loop
+        if sl_start_indexes:
+            sl_end_idx = min(tx_end_indexes + [len(self.data)])
+            return self.data[min(sl_start_indexes):sl_end_idx]
+        return []
+    
+    def get_sender(self):
+        return [x.element(2) for i, x in self.segments_by_name_index("GS")] # same as ISA06
+    
+    def get_receiver(self):
+        return [x.element(3) for i, x in self.segments_by_name_index("GS")] 
+
+
+
                 
 """
 sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8")

From 6faf2cd51276d8bc01f4c41c34b060b2312ce113 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Mon, 13 May 2024 15:14:41 -0400
Subject: [PATCH 23/46] filled in claim lines in claim build

---
 databricksx12/hls/claim.py                    | 16 +++++++++-----
 databricksx12/hls/loop.py                     |  9 ++------
 .../hls/support_classes/identities.py         | 22 ++++++++++++++++---
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 1cb713c..d39d4b2 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -1,6 +1,6 @@
 from databricksx12.edi import EDI, AnsiX12Delim
 from databricksx12.hls.loop import Loop
-from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity
+from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity, ClaimIdentity
 from typing import List, Dict
 
 
@@ -8,7 +8,6 @@
 # Base claim class
 #
 
-
 class MedicalClaim(EDI):
 
     def __init__(
@@ -21,7 +20,7 @@ def __init__(
         claim_loop: List = [],
         sl_loop: List = [],  # service line loop
     ):
-        self.sender_loop = sender_loop # is this a loop or does it only occur once in a document?
+        self.sender_loop = sender_loop
         self.receiver_loop = receiver_loop
         self.billing_loop = billing_loop
         self.subscriber_loop = subscriber_loop
@@ -31,6 +30,7 @@ def __init__(
 
         self.build()
 
+
     def _populate_billing_loop(self) -> Dict[str, str]:
         return BillingIdentity(self.billing_loop)
 
@@ -45,9 +45,12 @@ def _populate_patient_loop(self) -> Dict[str, str]:
         # Note to include in loop: information about subscriber/dependent relationship is marked by Element 2
         # 01 = Spouse; 18 = Self; 19 = Child; G8 = Other
         return PatientIdentity(self.patient_loop)
+    
+    def _populate_claim_loop(self) -> Dict[str, str]:
+        return ClaimIdentity(self.claim_loop)
 
     def toJson(self):
-        {**self.patient_loop(), **self.subscriber_loop(), **self.billing_loop()}
+        {**self.claim_loop(), **self.patient_loop(), **self.subscriber_loop(), **self.billing_loop()}
 
     # not sure if this should be here or not, but you get the idea
     def build(self) -> None:
@@ -56,6 +59,7 @@ def build(self) -> None:
         self.patient_info = (
             self._populate_subscriber_loop() if self.patient_loop == [] else self._populate_patient_loop()
         )
+        self.claim_info = self._populate_claim_loop()
 
 
 class Claim837i(MedicalClaim):
@@ -101,8 +105,8 @@ def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim):
     #
     def build_claim(self, clm_segment, idx):
         return self.trnx_cls(
-            sender_loop=self.loop.get_sender(),
-            receiver_loop=self.loop.get_receiver(), # assuming this is true of all claim types check!
+            sender_loop=[],
+            receiver_loop=[], # assuming this is true of all claim types check!
             billing_loop=self.loop.get_loop_segments(idx, "2000A"),
             subscriber_loop=self.loop.get_loop_segments(idx, "2000B"),
             patient_loop=self.loop.get_loop_segments(idx, "2000C"),
diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py
index 91c76e6..3e063e1 100644
--- a/databricksx12/hls/loop.py
+++ b/databricksx12/hls/loop.py
@@ -45,6 +45,7 @@ def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()):
         self.mapping = loop_mapping
         self._start_indexes = self._build_hierarchy_start_indexes()
         self.loop_hierarchy = self.build_hierarchy()
+
         """
         loop_hierarchy = { unique_id : {
             start_idx : ""
@@ -190,14 +191,8 @@ def get_service_line_loop(self, clm_idx):
             sl_end_idx = min(tx_end_indexes + [len(self.data)])
             return self.data[min(sl_start_indexes):sl_end_idx]
         return []
-    
-    def get_sender(self):
-        return [x.element(2) for i, x in self.segments_by_name_index("GS")] # same as ISA06
-    
-    def get_receiver(self):
-        return [x.element(3) for i, x in self.segments_by_name_index("GS")] 
-
 
+    
 
                 
 """
diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index ffc41ca..56f9dcd 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -12,8 +12,8 @@ def __init__(self, segments: List[Segment]):
         self.zip: str = None
         self.build(segments)
 
-    def build(self, billing_loop: List[Segment]):
-        for segment in billing_loop:
+    def build(self, loop: List[Segment]):
+        for segment in loop:
             if segment.element(0) == 'N3':
                 self.street = segment.element(1)
             elif segment.element(0) == 'N4':
@@ -61,7 +61,6 @@ def build_subscriber(self, subscriber_loop: List[Segment]):
 class PatientIdentity(Identity):
     def __init__(self, patient_segments: List[Segment]):
         super().__init__(patient_segments)
-        self.id_code = None
         self.build_patient(patient_segments)
 
     def build_patient(self, patient_loop: List[Segment]):
@@ -71,3 +70,20 @@ def build_patient(self, patient_loop: List[Segment]):
                     self.type = 'Patient'
                     self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)])
 
+
+class ClaimIdentity(Identity):
+    def __init__(self, claim_segments: List[Segment]):
+        super().__init__(claim_segments)
+        self.id_code = None
+        self.facility_code = None
+        self.claim_amount = None
+        self.build_claim_lines(claim_segments)
+
+    def build_claim_lines(self, claim_loop: List[Segment]):
+        for segment in claim_loop:
+            if segment.element(0) == 'CLM':
+                self.id_code = segment.element(1) # submitter's identifier
+                self.claim_amount = segment.element(2)
+                if segment.element(5).split(':')[1] == 'B':
+                    self.facility_code = 'Outpatient Hospital' if segment.element(3).split(':')[0]== 22 else 'Other'
+                
\ No newline at end of file

From 18a73d4bc90cb941212ee73219ce26032188991c Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Mon, 13 May 2024 15:48:00 -0400
Subject: [PATCH 24/46] fixed a claim line element

---
 databricksx12/hls/support_classes/identities.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index 56f9dcd..8c03336 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -85,5 +85,5 @@ def build_claim_lines(self, claim_loop: List[Segment]):
                 self.id_code = segment.element(1) # submitter's identifier
                 self.claim_amount = segment.element(2)
                 if segment.element(5).split(':')[1] == 'B':
-                    self.facility_code = 'Outpatient Hospital' if segment.element(3).split(':')[0]== 22 else 'Other'
+                    self.facility_code = 'Outpatient Hospital' if segment.element(5).split(':')[0]== 22 else 'Other'
                 
\ No newline at end of file

From 797d6e44c96fbce41be0094792ab07ca880ba05a Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Tue, 14 May 2024 17:28:46 -0400
Subject: [PATCH 25/46] populated submitter and receiver info within claim

---
 databricksx12/hls/claim.py                    | 26 +++++----
 databricksx12/hls/loop.py                     | 12 ++++-
 .../hls/support_classes/identities.py         | 53 ++++++++++++++++++-
 3 files changed, 77 insertions(+), 14 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index d39d4b2..03ab1d7 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -1,6 +1,6 @@
 from databricksx12.edi import EDI, AnsiX12Delim
 from databricksx12.hls.loop import Loop
-from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity, ClaimIdentity
+from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity, ClaimIdentity, SubmitterIdentity, ReceiverIdentity
 from typing import List, Dict
 
 
@@ -12,16 +12,14 @@ class MedicalClaim(EDI):
 
     def __init__(
         self,
-        sender_loop: List = [],
-        receiver_loop: List = [],
+        sender_receiver_loop: List = [],
         billing_loop: List = [],
         subscriber_loop: List = [],
         patient_loop: List = [],
         claim_loop: List = [],
-        sl_loop: List = [],  # service line loop
+        sl_loop: List = [], 
     ):
-        self.sender_loop = sender_loop
-        self.receiver_loop = receiver_loop
+        self.sender_receiver_loop = sender_receiver_loop # extracted together
         self.billing_loop = billing_loop
         self.subscriber_loop = subscriber_loop
         self.patient_loop = patient_loop
@@ -30,7 +28,12 @@ def __init__(
 
         self.build()
 
-
+    def _populate_submitter_loop(self) -> Dict[str, str]:
+        return SubmitterIdentity(self.sender_receiver_loop)
+    
+    def _populate_receiver_loop(self) -> Dict[str, str]:
+        return ReceiverIdentity(self.sender_receiver_loop)
+    
     def _populate_billing_loop(self) -> Dict[str, str]:
         return BillingIdentity(self.billing_loop)
 
@@ -48,12 +51,15 @@ def _populate_patient_loop(self) -> Dict[str, str]:
     
     def _populate_claim_loop(self) -> Dict[str, str]:
         return ClaimIdentity(self.claim_loop)
+    
 
     def toJson(self):
-        {**self.claim_loop(), **self.patient_loop(), **self.subscriber_loop(), **self.billing_loop()}
+        {**self.sender_receiver_loop(), **self.claim_loop(), **self.patient_loop(), **self.subscriber_loop(), **self.billing_loop()}
 
     # not sure if this should be here or not, but you get the idea
     def build(self) -> None:
+        self.submitter_info = self._populate_submitter_loop()
+        self.receiver_info = self._populate_receiver_loop()
         self.billing_info = self._populate_billing_loop()
         self.subscriber_info = self._populate_subscriber_loop()
         self.patient_info = (
@@ -62,6 +68,7 @@ def build(self) -> None:
         self.claim_info = self._populate_claim_loop()
 
 
+
 class Claim837i(MedicalClaim):
 
     NAME = "837I"
@@ -105,8 +112,7 @@ def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim):
     #
     def build_claim(self, clm_segment, idx):
         return self.trnx_cls(
-            sender_loop=[],
-            receiver_loop=[], # assuming this is true of all claim types check!
+            sender_receiver_loop=self.loop.get_submitter_receiver_loop(idx),
             billing_loop=self.loop.get_loop_segments(idx, "2000A"),
             subscriber_loop=self.loop.get_loop_segments(idx, "2000B"),
             patient_loop=self.loop.get_loop_segments(idx, "2000C"),
diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py
index 3e063e1..8654b22 100644
--- a/databricksx12/hls/loop.py
+++ b/databricksx12/hls/loop.py
@@ -186,13 +186,21 @@ def get_claim_loop(self, clm_idx):
     def get_service_line_loop(self, clm_idx):
         sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX"))))
         tx_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("SE"))))
-        # Determine the end of the service line loop
         if sl_start_indexes:
             sl_end_idx = min(tx_end_indexes + [len(self.data)])
             return self.data[min(sl_start_indexes):sl_end_idx]
         return []
 
-    
+    def get_submitter_receiver_loop(self, clm_idx):
+        bht_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx, self.segments_by_name_index("BHT"))))
+        bht_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx and x[1].element(3) == '20', self.segments_by_name_index("HL"))))
+        if bht_start_indexes:
+            sub_rec_start_idx = max(bht_start_indexes)
+            sub_rec_end_idx = max(bht_end_indexes)
+
+            return self.data[sub_rec_start_idx:sub_rec_end_idx]
+        return []
+
 
                 
 """
diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index 8c03336..98dacd4 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -51,7 +51,7 @@ def build_subscriber(self, subscriber_loop: List[Segment]):
         for segment in subscriber_loop:
             if segment.element(0) == 'NM1':
                 if segment.element(1) == 'IL':      # Hardcoded to IL for Insured
-                    self.type = 'Entity' if segment.element(2) == '2' else 'Individual'
+                    self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
                     self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
                     self.id_code = segment.element(9)
             elif segment.element(0) == 'SBR':
@@ -86,4 +86,53 @@ def build_claim_lines(self, claim_loop: List[Segment]):
                 self.claim_amount = segment.element(2)
                 if segment.element(5).split(':')[1] == 'B':
                     self.facility_code = 'Outpatient Hospital' if segment.element(5).split(':')[0]== 22 else 'Other'
-                
\ No newline at end of file
+
+class SubmitterIdentity(Identity):
+    def __init__(self, submitter_segments: List[Segment]):
+        super().__init__(submitter_segments)
+        self.tax_id = None
+        self.contact_name = None
+        self.contacts = []
+        self.build_submitter_lines(submitter_segments)
+
+    def build_submitter_lines(self, submitter_loop: List[Segment]):
+        contact_methods = {
+            'EM': 'Email',
+            'TE': 'Telephone',
+            'FX': 'Fax'
+        }
+        for segment in submitter_loop:
+            if segment.element(0) == 'NM1' and segment.element(1) == '41':
+                self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
+                self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
+                self.tax_id = segment.element(9) # id
+            elif segment.element(0) == 'PER':
+                self.contact_name = segment.element(2)
+                contact = {
+                    'contact_method': contact_methods.get(segment.element(3), 'Unknown method'),
+                    'contact_number': segment.element(4)
+                    }
+                # Add additional contact details if present
+                if segment.element(5) in contact_methods:
+                    contact['second_contact_method'] = contact_methods.get(segment.element(5), 'Unknown method')
+                    contact['second_contact_number'] = segment.element(6)
+                
+                if segment.element(7) in contact_methods:
+                    contact['other_contact_method'] = contact_methods.get(segment.element(7), 'Unknown method')
+                    contact['other_contact_number'] = segment.element(8)
+                
+                self.contacts.append(contact)
+
+
+class ReceiverIdentity(Identity):
+    def __init__(self, receiver_segments: List[Segment]):
+        super().__init__(receiver_segments)
+        self.id_code = None
+        self.build_receiver_lines(receiver_segments)
+
+    def build_receiver_lines(self, receiver_loop: List[Segment]):
+        for segment in receiver_loop:
+            if segment.element(0) == 'NM1' and segment.element(1) == '40':
+                self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
+                self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
+                self.id_code = segment.element(9) # id
\ No newline at end of file

From 040dcdbdd8dc8048990f87e2bcb4e5bd8064cee4 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Tue, 14 May 2024 22:29:36 -0400
Subject: [PATCH 26/46] service lines professional and institutional

---
 databricksx12/hls/claim.py                    |  6 +-
 .../hls/support_classes/identities.py         | 99 ++++++++++++++-----
 2 files changed, 81 insertions(+), 24 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 03ab1d7..dd248c1 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -1,6 +1,6 @@
 from databricksx12.edi import EDI, AnsiX12Delim
 from databricksx12.hls.loop import Loop
-from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity, ClaimIdentity, SubmitterIdentity, ReceiverIdentity
+from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity, ClaimIdentity, SubmitterIdentity, ReceiverIdentity, ServiceIdentity
 from typing import List, Dict
 
 
@@ -51,6 +51,9 @@ def _populate_patient_loop(self) -> Dict[str, str]:
     
     def _populate_claim_loop(self) -> Dict[str, str]:
         return ClaimIdentity(self.claim_loop)
+
+    def _populate_sl_loop(self) -> Dict[str, str]:
+        return ServiceIdentity(self.sl_loop)
     
 
     def toJson(self):
@@ -66,6 +69,7 @@ def build(self) -> None:
             self._populate_subscriber_loop() if self.patient_loop == [] else self._populate_patient_loop()
         )
         self.claim_info = self._populate_claim_loop()
+        self.sl_info = self._populate_sl_loop()
 
 
 
diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index 98dacd4..6377da6 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -82,10 +82,14 @@ def __init__(self, claim_segments: List[Segment]):
     def build_claim_lines(self, claim_loop: List[Segment]):
         for segment in claim_loop:
             if segment.element(0) == 'CLM':
+                # TODO Inst/Prof
                 self.id_code = segment.element(1) # submitter's identifier
                 self.claim_amount = segment.element(2)
-                if segment.element(5).split(':')[1] == 'B':
+                if segment.element(5).split(':')[1] == 'B': # professional claims
                     self.facility_code = 'Outpatient Hospital' if segment.element(5).split(':')[0]== 22 else 'Other'
+            # TODO: additional provider lines?
+
+
 
 class SubmitterIdentity(Identity):
     def __init__(self, submitter_segments: List[Segment]):
@@ -96,32 +100,38 @@ def __init__(self, submitter_segments: List[Segment]):
         self.build_submitter_lines(submitter_segments)
 
     def build_submitter_lines(self, submitter_loop: List[Segment]):
+        for segment in submitter_loop:
+            if segment.element(0) == 'NM1'and segment.element(1) == '41':
+                self.process_nm1_segment(segment)
+            elif segment.element(0) == 'PER':
+                self.process_per_segment(segment)
+    
+    def process_nm1_segment(self, segment):
+        self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
+        self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
+        self.tax_id = segment.element(9) # id
+
+    def process_per_segment(self, segment):
+        self.contact_name = segment.element(2)
         contact_methods = {
             'EM': 'Email',
             'TE': 'Telephone',
             'FX': 'Fax'
         }
-        for segment in submitter_loop:
-            if segment.element(0) == 'NM1' and segment.element(1) == '41':
-                self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
-                self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
-                self.tax_id = segment.element(9) # id
-            elif segment.element(0) == 'PER':
-                self.contact_name = segment.element(2)
-                contact = {
-                    'contact_method': contact_methods.get(segment.element(3), 'Unknown method'),
-                    'contact_number': segment.element(4)
-                    }
-                # Add additional contact details if present
-                if segment.element(5) in contact_methods:
-                    contact['second_contact_method'] = contact_methods.get(segment.element(5), 'Unknown method')
-                    contact['second_contact_number'] = segment.element(6)
-                
-                if segment.element(7) in contact_methods:
-                    contact['other_contact_method'] = contact_methods.get(segment.element(7), 'Unknown method')
-                    contact['other_contact_number'] = segment.element(8)
-                
-                self.contacts.append(contact)
+        contact = {
+            'contact_method': contact_methods.get(segment.element(3), 'Unknown method'),
+            'contact_number': segment.element(4)
+            }
+        # Add additional contact details if present
+        if segment.element(5) in contact_methods:
+            contact['contact_method_2'] = contact_methods.get(segment.element(5), 'Unknown method')
+            contact['contact_number_2'] = segment.element(6)
+        
+        if segment.element(7) in contact_methods:
+            contact['contact_method_3'] = contact_methods.get(segment.element(7), 'Unknown method')
+            contact['contact_number_3'] = segment.element(8)
+        
+        self.contacts.append(contact)
 
 
 class ReceiverIdentity(Identity):
@@ -135,4 +145,47 @@ def build_receiver_lines(self, receiver_loop: List[Segment]):
             if segment.element(0) == 'NM1' and segment.element(1) == '40':
                 self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
                 self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
-                self.id_code = segment.element(9) # id
\ No newline at end of file
+                self.id_code = segment.element(9) # id
+
+
+class ServiceIdentity(Identity):
+    def __init__(self, sl_segments: List[Segment]):
+            super().__init__(sl_segments)
+            self.services = {
+                'Professional': [],
+                'Institutional': []
+            }
+            self.build_sl_lines(sl_segments)
+
+    def build_sl_lines(self, sl_loop: List[Segment]):
+        for segment in sl_loop:
+            if segment.element(0) == 'SV1':  # Professional service
+                service = self.parse_professional_service(segment)
+                self.services['Professional'].append(service)
+            elif segment.element(0) == 'SV2':  # Institutional service
+                service = self.parse_institutional_service(segment)
+                self.services['Institutional'].append(service)
+
+    def parse_professional_service(self, segment: Segment):
+        service_type, procedure_code = segment.element(1).split(':')[0:2] #assuming 7 elements but choosing first two
+        return {
+            'Type of service/claim': 'Professional',
+            'Type': service_type,
+            'Procedure Code': procedure_code,
+            'Procedure Amount': segment.element(2)
+        }
+
+    def parse_institutional_service(self, segment: Segment):
+        revenue_code = segment.element(1)
+        service_type, procedure_code = segment.element(2).split(':')[0:2] #assuming 7 elements but choosing first two
+        return {
+            'Type of service/claim': 'Institutional',
+            'Revenue Code': revenue_code,
+            'Type': service_type,
+            'Procedure Code': procedure_code,
+            'Procedure Amount': segment.element(3)
+        }
+
+
+                
+

From b654d9383097d81da54960a4e3d1803a1dc7d028 Mon Sep 17 00:00:00 2001
From: Aaron Zavora <aaron.zavora@databricks.com>
Date: Mon, 20 May 2024 11:20:15 -0400
Subject: [PATCH 27/46] Update README.md

---
 README.md | 35 ++++++++++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 8a27233..4be7046 100644
--- a/README.md
+++ b/README.md
@@ -20,8 +20,8 @@ pip install git+https://github.com/databricks-industry-solutions/x12-edi-parser
 Default format used is AnsiX12 (* as a delim and ~ as segment separator)
 
 ```python
-from databricksx12.format import *
-from databricksx12.edi import *
+from databricksx12 import *
+
 ediFormat = AnsiX12Delim #specifying formats of data, ansi is also the default if nothing is specified 
 df = spark.read.text("sampledata/837/*", wholetext = True)
 
@@ -92,7 +92,8 @@ from pyspark.sql.functions import input_file_name
 #### Parsing Healthcare Transactions
 
 ```python
-from databricksx12.hls.healthcare import *
+from databricksx12 import *
+from databricksx12.hls import *
 
 hm = HealthcareManager()
 x =  EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8"))
@@ -101,6 +102,34 @@ hm.from_edi(x)
 #[<databricksx12.hls.claim.Claim837p object at 0x1027003d0>, <databricksx12.hls.claim.Claim837p object at 0x1027006a0>, <databricksx12.hls.claim.Claim837p object at 0x102700700>, <databricksx12.hls.claim.Claim837p object at 0x102700550>, <databricksx12.hls.claim.Claim837p object at 0x1027002b0>]
 
 one_claim = hm.from_edi(x)[0]
+
+#print a json representation of a claim
+import json
+print(json.dumps(one_claim.toJson(), indent=4))
+"""
+{
+    "submitter": {
+        "name": "CLEARINGHOUSE LLC",
+        "type": "Organization",
+        "tax_id": "987654321",
+        "contact_name": "CLEARINGHOUSE CLIENT SERVICES",
+        "contacts": [
+            {
+                "contact_method": "Telephone",
+                "contact_number": "8005551212",
+                "contact_method_2": "Fax",
+                "contact_number_2": "8005551212"
+            }
+        ]
+    },
+    "reciever": {
+        "name": "123456789",
+        "type": "Organization",
+        "id_code": "CHPWA"
+    },
+    "subscriber": {...
+"""
+#print raw EDI Segments
 print("\n".join([y.data for y in one_claim.data])) #Print one claim to look at the segments of it
 """
 BHT*0019*00*7349063984*20180508*0833*CH

From 1ee215d363785cecc514011ce51c58f840eba966 Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Mon, 20 May 2024 11:23:36 -0400
Subject: [PATCH 28/46] init

---
 databricksx12/__init__.py     |  5 +++
 databricksx12/hls/__init__.py |  3 ++
 databricksx12/hls/claim.py    | 74 +++++++++++++++++++++++++++++++++--
 databricksx12/hls/loop.py     | 38 ------------------
 4 files changed, 78 insertions(+), 42 deletions(-)
 create mode 100644 databricksx12/hls/__init__.py

diff --git a/databricksx12/__init__.py b/databricksx12/__init__.py
index 8b13789..bbfa1e5 100644
--- a/databricksx12/__init__.py
+++ b/databricksx12/__init__.py
@@ -1 +1,6 @@
+from .edi import *
+from .format import *
+from .functional import *
+from .transaction import *
+
 
diff --git a/databricksx12/hls/__init__.py b/databricksx12/hls/__init__.py
new file mode 100644
index 0000000..4785b04
--- /dev/null
+++ b/databricksx12/hls/__init__.py
@@ -0,0 +1,3 @@
+from .healthcare import *
+from .claim import *
+from .loop import *
diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index dd248c1..4804576 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -56,8 +56,36 @@ def _populate_sl_loop(self) -> Dict[str, str]:
         return ServiceIdentity(self.sl_loop)
     
 
+    """
+    Overall Asks
+    - Coordination of Benefits flag
+    - Patient / Subscriber same person flag
+
+    Claim needs
+    - principal ICD10 diagnosis code
+    - other ICD10 diagnosis codes as an array
+    - hcfa place of service
+    - claim id?
+    - admission type code
+    - facility type code
+    - claim frequency code
+
+    Claim line needs
+    - This should return an array 
+    
+    Servicing provider needs
+    - TBD
+    """
     def toJson(self):
-        {**self.sender_receiver_loop(), **self.claim_loop(), **self.patient_loop(), **self.subscriber_loop(), **self.billing_loop()}
+        return {
+            **{'submitter': self.submitter_info.to_dict()},
+            **{'reciever': self.receiver_info.to_dict()},
+            **{'subscriber': self.subscriber_info.to_dict()},
+            **{'patient': self.patient_info.to_dict()},
+            **{'billing_provider': self.billing_info.to_dict()},
+            **{'claim_header': self.claim_info.to_dict()},
+            **{'claim_lines': self.sl_info.to_dict()}
+        }
 
     # not sure if this should be here or not, but you get the idea
     def build(self) -> None:
@@ -116,14 +144,52 @@ def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim):
     #
     def build_claim(self, clm_segment, idx):
         return self.trnx_cls(
-            sender_receiver_loop=self.loop.get_submitter_receiver_loop(idx),
+            sender_receiver_loop=self.get_submitter_receiver_loop(idx),
             billing_loop=self.loop.get_loop_segments(idx, "2000A"),
             subscriber_loop=self.loop.get_loop_segments(idx, "2000B"),
             patient_loop=self.loop.get_loop_segments(idx, "2000C"),
-            claim_loop=self.loop.get_claim_loop(idx),
-            sl_loop=self.loop.get_service_line_loop(idx),  # service line loop
+            claim_loop=self.get_claim_loop(idx),
+            sl_loop=self.get_service_line_loop(idx),  # service line loop
         )
 
+    #
+    # Determine claim loop: starts at the clm index and ends at LX segment, or CLM segment, or end of data
+    #
+    def get_claim_loop(self, clm_idx):
+        sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX"))))
+        clm_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("CLM"))))
+
+        if sl_start_indexes:
+            clm_end_idx = min(sl_start_indexes)
+        elif clm_indexes:
+            clm_end_idx = min(clm_indexes + [len(self.data)])
+        else:
+            clm_end_idx = len(self.data)
+        
+        return self.data[clm_idx:clm_end_idx]
+
+    #
+    # fetch the indices of LX and CLM segments that are beyond the current clm index
+    #
+    def get_service_line_loop(self, clm_idx):
+        sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX"))))
+        tx_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("SE"))))
+        if sl_start_indexes:
+            sl_end_idx = min(tx_end_indexes + [len(self.data)])
+            return self.data[min(sl_start_indexes):sl_end_idx]
+        return []
+
+    def get_submitter_receiver_loop(self, clm_idx):
+        bht_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx, self.segments_by_name_index("BHT"))))
+        bht_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx and x[1].element(3) == '20', self.segments_by_name_index("HL"))))
+        if bht_start_indexes:
+            sub_rec_start_idx = max(bht_start_indexes)
+            sub_rec_end_idx = max(bht_end_indexes)
+
+            return self.data[sub_rec_start_idx:sub_rec_end_idx]
+        return []
+
+
     #
     # Given transaction type, transaction segments, and delim info, build out claims in the transaction
     #  @return a list of Claim for each "clm" segment
diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py
index 8654b22..e7b4e33 100644
--- a/databricksx12/hls/loop.py
+++ b/databricksx12/hls/loop.py
@@ -164,44 +164,6 @@ def determine_previous_hl(self, pos):
         except: 
             return None #when there is no preceding hl segment
         
-    #
-    # Determine claim loop: starts at the clm index and ends at LX segment, or CLM segment, or end of data
-    #
-    def get_claim_loop(self, clm_idx):
-        sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX"))))
-        clm_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("CLM"))))
-
-        if sl_start_indexes:
-            clm_end_idx = min(sl_start_indexes)
-        elif clm_indexes:
-            clm_end_idx = min(clm_indexes + [len(self.data)])
-        else:
-            clm_end_idx = len(self.data)
-        
-        return self.data[clm_idx:clm_end_idx]
-    
-    #
-    # fetch the indices of LX and CLM segments that are beyond the current clm index
-    #
-    def get_service_line_loop(self, clm_idx):
-        sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX"))))
-        tx_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("SE"))))
-        if sl_start_indexes:
-            sl_end_idx = min(tx_end_indexes + [len(self.data)])
-            return self.data[min(sl_start_indexes):sl_end_idx]
-        return []
-
-    def get_submitter_receiver_loop(self, clm_idx):
-        bht_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx, self.segments_by_name_index("BHT"))))
-        bht_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx and x[1].element(3) == '20', self.segments_by_name_index("HL"))))
-        if bht_start_indexes:
-            sub_rec_start_idx = max(bht_start_indexes)
-            sub_rec_end_idx = max(bht_end_indexes)
-
-            return self.data[sub_rec_start_idx:sub_rec_end_idx]
-        return []
-
-
                 
 """
 sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8")

From 4238bfcde7c2b169ece3c5b6092c71ccf15915b1 Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Mon, 20 May 2024 16:10:18 -0400
Subject: [PATCH 29/46] init

---
 README.md                                      |  1 -
 databricksx12/edi.py                           |  5 ++---
 databricksx12/hls/claim.py                     |  3 +--
 databricksx12/hls/healthcare.py                | 18 ++++++++++++++++++
 .../hls/support_classes/identities.py          |  6 +-----
 5 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 4be7046..165ad2c 100644
--- a/README.md
+++ b/README.md
@@ -164,7 +164,6 @@ LX*1
 SV1*HC:H0003*20*UN*1***1
 DTP*472*D8*20180428
 REF*6R*142671
-
 """
 ```
 
diff --git a/databricksx12/edi.py b/databricksx12/edi.py
index 905c33d..e2ae36e 100644
--- a/databricksx12/edi.py
+++ b/databricksx12/edi.py
@@ -175,8 +175,6 @@ def filter(self, value, element, sub_element, dne="na/dne"):
         return self if value == self.get_element(element, sub_element, dne) else None
 
 
-
-
 #
 # Manage relationship heirarchy within EDI
 # 
@@ -225,7 +223,8 @@ def flatten(data = None):
             }
         else:
             return EDIManager.class_metadata(data)
-    
+
+
 
 """
 from databricksx12.edi import *
diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 4804576..8989192 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -76,7 +76,7 @@ def _populate_sl_loop(self) -> Dict[str, str]:
     Servicing provider needs
     - TBD
     """
-    def toJson(self):
+    def to_json(self):
         return {
             **{'submitter': self.submitter_info.to_dict()},
             **{'reciever': self.receiver_info.to_dict()},
@@ -104,7 +104,6 @@ def build(self) -> None:
 class Claim837i(MedicalClaim):
 
     NAME = "837I"
-    # sender / receiver ?
 
 # Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf
 
diff --git a/databricksx12/hls/healthcare.py b/databricksx12/hls/healthcare.py
index 00cc22e..8745efd 100644
--- a/databricksx12/hls/healthcare.py
+++ b/databricksx12/hls/healthcare.py
@@ -32,4 +32,22 @@ def from_functional_group(self, fg):
     def from_transaction(self, trnx):
         return ClaimBuilder(self.mapping.get(trnx.transaction_type),
                             [x for x in trnx.data if x.segment_name() not in ['ST', 'SE']], trnx.format_cls).build()
+
+    #
+    # Convert all data to json data
+    #
+    def to_json(self, edi):
+        return {
+            **EDIManager.class_metadata(edi),
+            'FuncitonalGroup': [
+                {
+                    **EDIManager.class_metadata(fg),
+                    'Transactions': [
+                        {
+                            **EDIManager.class_metadata(trnx),
+                            'Claims': [clm.to_json() for clm in self.from_transaction(trnx)]
+                        } for trnx in fg.transaction_segments()]
+                } for fg in edi.functional_segments()] 
+        }
     
+
diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index 6377da6..98da506 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -151,11 +151,7 @@ def build_receiver_lines(self, receiver_loop: List[Segment]):
 class ServiceIdentity(Identity):
     def __init__(self, sl_segments: List[Segment]):
             super().__init__(sl_segments)
-            self.services = {
-                'Professional': [],
-                'Institutional': []
-            }
-            self.build_sl_lines(sl_segments)
+            #self.claim_lines = build_sl_lines(sl_segments)
 
     def build_sl_lines(self, sl_loop: List[Segment]):
         for segment in sl_loop:

From adc737100ec6b6ca401462de15c8d077e7fb1737 Mon Sep 17 00:00:00 2001
From: Aaron Zavora <aaron.zavora@databricks.com>
Date: Mon, 20 May 2024 16:16:07 -0400
Subject: [PATCH 30/46] Update README.md

---
 README.md | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 165ad2c..7649f69 100644
--- a/README.md
+++ b/README.md
@@ -94,18 +94,26 @@ from pyspark.sql.functions import input_file_name
 ```python
 from databricksx12 import *
 from databricksx12.hls import *
+import json
 
 hm = HealthcareManager()
-x =  EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8"))
+edi =  EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8"))
 
-hm.from_edi(x) 
+hm.from_edi(edi) 
 #[<databricksx12.hls.claim.Claim837p object at 0x1027003d0>, <databricksx12.hls.claim.Claim837p object at 0x1027006a0>, <databricksx12.hls.claim.Claim837p object at 0x102700700>, <databricksx12.hls.claim.Claim837p object at 0x102700550>, <databricksx12.hls.claim.Claim837p object at 0x1027002b0>]
 
+#TODO replace this with Spark tomorrow
+print(json.dumps(hm.to_json(edi), indent=4)) 
+
+
+"""
+TODO update tomorrow below
+"""
+
 one_claim = hm.from_edi(x)[0]
 
 #print a json representation of a claim
-import json
-print(json.dumps(one_claim.toJson(), indent=4))
+print(json.dumps(one_claim.to_json(), indent=4))
 """
 {
     "submitter": {

From 01c080fa7b6795cf3eafd6a93a6cf68385f16e91 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Mon, 20 May 2024 16:38:26 -0400
Subject: [PATCH 31/46] functionized identities and claim

---
 .../hls/support_classes/identities.py         | 250 +++++++++++++-----
 1 file changed, 177 insertions(+), 73 deletions(-)

diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index 6377da6..513025c 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -1,8 +1,25 @@
 from databricksx12.edi import Segment
-from typing import List
+from typing import List, Dict
 
+from collections import defaultdict
+from functools import reduce
 
 class Identity:
+    nm1_identifiers = {
+        '85': 'Billing Provider',  # entity that is billing for the services provided
+        '87': 'Pay-to Provider',   # entity to which payments are to be sent
+        'PR': 'Payer',             # insurance company or payer
+        'IL': 'Insured',           # insured individual
+        'QC': 'Patient',           # patient
+        '82': 'Rendering Provider',# individual or group that performed the service
+        'DN': 'Referring Provider',# doctor who referred the patient to another doctor
+        '77': 'Service Facility',  # location where the service was performed
+        'DQ': 'Supervising Provider', # provider who oversees the patient's care
+        '71': 'Attending Provider',# provider with primary responsibility for the patient at the time of service
+        'DK': 'Ordering Provider', # provider who ordered the service or item
+        'PE': 'Payee',             # entity receiving the payment
+    }
+        
     def __init__(self, segments: List[Segment]):
         self.name: str = None
         self.street: str = None
@@ -10,53 +27,106 @@ def __init__(self, segments: List[Segment]):
         self.city: str = None
         self.state: str = None
         self.zip: str = None
+        self.id: str = None
+        self.npi: str = None
         self.build(segments)
 
     def build(self, loop: List[Segment]):
-        for segment in loop:
-            if segment.element(0) == 'N3':
-                self.street = segment.element(1)
-            elif segment.element(0) == 'N4':
-                self.city = segment.element(1)
-                self.state = segment.element(2)
-                self.zip = segment.element(3)
+        nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.segment_len() >= 10, loop)
+        n3_segments = filter(lambda segment: segment.element(0) == 'N3', loop)
+        n4_segments = filter(lambda segment: segment.element(0) == 'N4', loop)
+
+        list(map(self.process_nm1_segment, nm1_segments))
+        list(map(self.process_n3_segment, n3_segments))
+        list(map(self.process_n4_segment, n4_segments))
+        return self.to_dict()
+    
+    def process_nm1_segment(self, segment: Segment):
+        self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
+        self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
+        self.npi = segment.element(9) if len(segment.element(9)) == 10 else None
+        self.id = segment.element(9) if len(segment.element(9)) != 10 else None
+
+    def process_n3_segment(self, segment: Segment):
+        self.street = segment.element(1)
+
+    def process_n4_segment(self, segment: Segment):
+        self.city = segment.element(1)
+        self.state = segment.element(2)
+        self.zip = segment.element(3)
+
 
     def to_dict(self):
         return {k: v for k, v in self.__dict__.items() if v is not None}
+    
+    
+    @staticmethod
+    def group_segments_by_provider(loop: List[Segment], nm1_identifiers: dict) -> Dict[str, List[List[Segment]]]:
+        def reducer(acc, segment):
+            provider_type, grouped = acc
+            if segment.element(0) == 'NM1':
+                provider_type = nm1_identifiers.get(segment.element(1))
+                if provider_type:
+                    grouped[provider_type].append([segment])
+            elif provider_type:
+                grouped[provider_type][-1].append(segment)
+            return provider_type, grouped
+        
+        _, grouped = reduce(reducer, loop, (None, defaultdict(list)))
+        return grouped
 
 
 class BillingIdentity(Identity):
     def __init__(self, billing_segments: List[Segment]):
+        self.providers = defaultdict(list)
         super().__init__(billing_segments)
-        self.npi = None
         self.build_billing(billing_segments)
 
     def build_billing(self, billing_loop: List[Segment]):
-        for segment in billing_loop:
-            if segment.element(0) == 'NM1':
-                if segment.element(1) == '85':      # Hardcoded to 85 for Billing Providers
-                    self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
-                    self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
-                    self.npi = segment.element(9)
+        grouped_segments = self.group_segments_by_provider(billing_loop, self.nm1_identifiers)
+        self.providers = defaultdict(list, {
+            provider_type: [Identity(segments).to_dict() for segments in group]
+            for provider_type, group in grouped_segments.items()
+        })
+        return self.to_dict()
 
+    def to_dict(self):
+        base_dict = super().to_dict()
+        base_dict.update({
+            'providers': dict(self.providers)
+        })
+        return base_dict
+    
 
 class SubscriberIdentity(Identity):
     def __init__(self, subscriber_segments: List[Segment]):
-        super().__init__(subscriber_segments)
-        self.id_code = None
+        self.subscribers = defaultdict(list)
         self.relationship_to_insured = None
+        super().__init__(subscriber_segments)
         self.build_subscriber(subscriber_segments)
 
     def build_subscriber(self, subscriber_loop: List[Segment]):
-        for segment in subscriber_loop:
-            if segment.element(0) == 'NM1':
-                if segment.element(1) == 'IL':      # Hardcoded to IL for Insured
-                    self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
-                    self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
-                    self.id_code = segment.element(9)
-            elif segment.element(0) == 'SBR':
-                self.relationship_to_insured = 'Self' if segment.element(2) == '18' else 'Dependent'     # information about subscriber/dependent 01 = Spouse; 18 = Self; 19 = Child; G8 = Other
+        grouped_segments = self.group_segments_by_provider(subscriber_loop, self.nm1_identifiers)
+        self.subscribers = defaultdict(list, {
+            subscriber_type: [self.process_segments_with_relationship(segments).to_dict() for segments in group]
+            for subscriber_type, group in grouped_segments.items()
+        })
+        return self.to_dict()
+    
+    def process_segments_with_relationship(self, segments: List[Segment]) -> Identity:
+        identity = Identity(segments)
+        sbr_segment = next(filter(lambda s: s.element(0) == 'SBR', segments), None)
+        if sbr_segment:
+            identity.relationship_to_insured = 'Self' if sbr_segment.element(2) == '18' else 'Dependent'
+        return identity
 
+    def to_dict(self):
+        base_dict = super().to_dict()
+        base_dict.update({
+            'subscribers': dict(self.subscribers),
+            'relationship_to_insured': self.relationship_to_insured
+        })
+        return base_dict
 
 class PatientIdentity(Identity):
     def __init__(self, patient_segments: List[Segment]):
@@ -64,52 +134,72 @@ def __init__(self, patient_segments: List[Segment]):
         self.build_patient(patient_segments)
 
     def build_patient(self, patient_loop: List[Segment]):
-        for segment in patient_loop:
-            if segment.element(0) == 'NM1':
-                if segment.element(1) == 'QC':      # Hardcoded to QC for Patient
-                    self.type = 'Patient'
-                    self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)])
+        def process_patient_segment(segment: Segment):
+            self.type = 'Patient'
+            self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)])
+        return list(map(process_patient_segment, filter(lambda s: s.element(0) == 'NM1' and s.element(1) == 'QC', patient_loop)))
+
 
 
 class ClaimIdentity(Identity):
     def __init__(self, claim_segments: List[Segment]):
-        super().__init__(claim_segments)
-        self.id_code = None
-        self.facility_code = None
+        self.patient_id = None
         self.claim_amount = None
+        self.facility_type_code = None
+        self.claim_code_freq = None
+        self.date = None
+        self.providers = defaultdict(list)
+        super().__init__(claim_segments)
         self.build_claim_lines(claim_segments)
 
     def build_claim_lines(self, claim_loop: List[Segment]):
-        for segment in claim_loop:
+        def process_segment(segment: Segment):
             if segment.element(0) == 'CLM':
-                # TODO Inst/Prof
-                self.id_code = segment.element(1) # submitter's identifier
+                self.patient_id = segment.element(1)  # submitter's identifier
                 self.claim_amount = segment.element(2)
-                if segment.element(5).split(':')[1] == 'B': # professional claims
-                    self.facility_code = 'Outpatient Hospital' if segment.element(5).split(':')[0]== 22 else 'Other'
-            # TODO: additional provider lines?
+                codes = segment.element(5).split(':') # codes[1] == A for institutional and B for professional
+                self.facility_type_code = codes[0]
+                self.claim_code_freq = codes[2]
+
+            if segment.element(0) == 'DTP':
+                self.date = segment.element(3)  # format D8:CCYYMMDD
+
+            if segment.element(0) == 'NM1':
+                provider_type = self.nm1_identifiers.get(segment.element(1))
+                if provider_type:
+                    identity = Identity([segment])
+                    self.providers[provider_type].append(identity.to_dict())
+
+        list(map(process_segment, claim_loop))
+
+    def to_dict(self):
+        base_dict = super().to_dict()
+        base_dict.update({
+            'patient_id': self.patient_id,
+            'claim_amount': self.claim_amount,
+            'facility_type_code': self.facility_type_code,
+            'claim_code_freq': self.claim_code_freq,
+            'date': self.date,
+            'providers': dict(self.providers)
+        })
+        return base_dict
 
 
 
 class SubmitterIdentity(Identity):
     def __init__(self, submitter_segments: List[Segment]):
-        super().__init__(submitter_segments)
-        self.tax_id = None
         self.contact_name = None
         self.contacts = []
+        super().__init__(submitter_segments)
         self.build_submitter_lines(submitter_segments)
-
-    def build_submitter_lines(self, submitter_loop: List[Segment]):
-        for segment in submitter_loop:
-            if segment.element(0) == 'NM1'and segment.element(1) == '41':
-                self.process_nm1_segment(segment)
-            elif segment.element(0) == 'PER':
-                self.process_per_segment(segment)
     
-    def process_nm1_segment(self, segment):
-        self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
-        self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
-        self.tax_id = segment.element(9) # id
+    def build_submitter_lines(self, submitter_loop: List[Segment]):
+        nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.element(1) == '41', submitter_loop)
+        per_segments = filter(lambda segment: segment.element(0) == 'PER', submitter_loop)
+
+        list(map(self.process_nm1_segment, nm1_segments))
+        list(map(self.process_per_segment, per_segments))
+        return self.to_dict()
 
     def process_per_segment(self, segment):
         self.contact_name = segment.element(2)
@@ -132,42 +222,50 @@ def process_per_segment(self, segment):
             contact['contact_number_3'] = segment.element(8)
         
         self.contacts.append(contact)
+    
+    def to_dict(self):
+        base_dict = super().to_dict()
+        base_dict.update({
+            'contact_name': self.contact_name,
+            'contacts': self.contacts
+        })
+        return base_dict
 
 
 class ReceiverIdentity(Identity):
     def __init__(self, receiver_segments: List[Segment]):
         super().__init__(receiver_segments)
-        self.id_code = None
         self.build_receiver_lines(receiver_segments)
 
     def build_receiver_lines(self, receiver_loop: List[Segment]):
-        for segment in receiver_loop:
-            if segment.element(0) == 'NM1' and segment.element(1) == '40':
-                self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
-                self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
-                self.id_code = segment.element(9) # id
+        nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.element(1) == '40', receiver_loop)
+        list(map(self.process_nm1_segment, nm1_segments))
+        return self.to_dict()
 
 
 class ServiceIdentity(Identity):
     def __init__(self, sl_segments: List[Segment]):
-            super().__init__(sl_segments)
-            self.services = {
-                'Professional': [],
-                'Institutional': []
-            }
-            self.build_sl_lines(sl_segments)
+        self.services = {
+            'Professional': [],
+            'Institutional': []
+        }
+        super().__init__(sl_segments)
+        self.build_sl_lines(sl_segments)
 
     def build_sl_lines(self, sl_loop: List[Segment]):
-        for segment in sl_loop:
-            if segment.element(0) == 'SV1':  # Professional service
-                service = self.parse_professional_service(segment)
-                self.services['Professional'].append(service)
-            elif segment.element(0) == 'SV2':  # Institutional service
-                service = self.parse_institutional_service(segment)
-                self.services['Institutional'].append(service)
+        sv1_segments = filter(lambda segment: segment.element(0) == 'SV1', sl_loop)
+        sv2_segments = filter(lambda segment: segment.element(0) == 'SV2', sl_loop)
+
+        professional_services = map(self.parse_professional_service, sv1_segments)
+        institutional_services = map(self.parse_institutional_service, sv2_segments)
+
+        self.services['Professional'] = list(professional_services)
+        self.services['Institutional'] = list(institutional_services)
+
+        return self.to_dict()
 
     def parse_professional_service(self, segment: Segment):
-        service_type, procedure_code = segment.element(1).split(':')[0:2] #assuming 7 elements but choosing first two
+        service_type, procedure_code = segment.element(1).split(':')[0:2]  # assuming 7 elements but choosing first two
         return {
             'Type of service/claim': 'Professional',
             'Type': service_type,
@@ -177,7 +275,7 @@ def parse_professional_service(self, segment: Segment):
 
     def parse_institutional_service(self, segment: Segment):
         revenue_code = segment.element(1)
-        service_type, procedure_code = segment.element(2).split(':')[0:2] #assuming 7 elements but choosing first two
+        service_type, procedure_code = segment.element(2).split(':')[0:2]  # assuming 7 elements but choosing first two
         return {
             'Type of service/claim': 'Institutional',
             'Revenue Code': revenue_code,
@@ -186,6 +284,12 @@ def parse_institutional_service(self, segment: Segment):
             'Procedure Amount': segment.element(3)
         }
 
+    def to_dict(self):
+        base_dict = super().to_dict()
+        base_dict.update({
+            'services': self.services
+        })
+        return base_dict
 
                 
 

From 5639ff1433ed1df388c3df414f5ddd06204eb20e Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Mon, 20 May 2024 18:29:57 -0400
Subject: [PATCH 32/46] adjust dictionary use in build

---
 .../hls/support_classes/identities.py         | 66 +++----------------
 1 file changed, 9 insertions(+), 57 deletions(-)

diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index 513025c..9db1b8c 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -5,6 +5,8 @@
 from functools import reduce
 
 class Identity:
+
+    # provider name identities associated with every NM1 line
     nm1_identifiers = {
         '85': 'Billing Provider',  # entity that is billing for the services provided
         '87': 'Pay-to Provider',   # entity to which payments are to be sent
@@ -31,6 +33,7 @@ def __init__(self, segments: List[Segment]):
         self.npi: str = None
         self.build(segments)
 
+    # build name and address for any identity
     def build(self, loop: List[Segment]):
         nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.segment_len() >= 10, loop)
         n3_segments = filter(lambda segment: segment.element(0) == 'N3', loop)
@@ -39,7 +42,6 @@ def build(self, loop: List[Segment]):
         list(map(self.process_nm1_segment, nm1_segments))
         list(map(self.process_n3_segment, n3_segments))
         list(map(self.process_n4_segment, n4_segments))
-        return self.to_dict()
     
     def process_nm1_segment(self, segment: Segment):
         self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
@@ -88,16 +90,10 @@ def build_billing(self, billing_loop: List[Segment]):
             provider_type: [Identity(segments).to_dict() for segments in group]
             for provider_type, group in grouped_segments.items()
         })
-        return self.to_dict()
-
-    def to_dict(self):
-        base_dict = super().to_dict()
-        base_dict.update({
-            'providers': dict(self.providers)
-        })
-        return base_dict
     
 
+    #TODO class pay_to()
+
 class SubscriberIdentity(Identity):
     def __init__(self, subscriber_segments: List[Segment]):
         self.subscribers = defaultdict(list)
@@ -108,25 +104,11 @@ def __init__(self, subscriber_segments: List[Segment]):
     def build_subscriber(self, subscriber_loop: List[Segment]):
         grouped_segments = self.group_segments_by_provider(subscriber_loop, self.nm1_identifiers)
         self.subscribers = defaultdict(list, {
-            subscriber_type: [self.process_segments_with_relationship(segments).to_dict() for segments in group]
+            subscriber_type: [Identity(segments).to_dict() for segments in group]
             for subscriber_type, group in grouped_segments.items()
         })
-        return self.to_dict()
     
-    def process_segments_with_relationship(self, segments: List[Segment]) -> Identity:
-        identity = Identity(segments)
-        sbr_segment = next(filter(lambda s: s.element(0) == 'SBR', segments), None)
-        if sbr_segment:
-            identity.relationship_to_insured = 'Self' if sbr_segment.element(2) == '18' else 'Dependent'
-        return identity
 
-    def to_dict(self):
-        base_dict = super().to_dict()
-        base_dict.update({
-            'subscribers': dict(self.subscribers),
-            'relationship_to_insured': self.relationship_to_insured
-        })
-        return base_dict
 
 class PatientIdentity(Identity):
     def __init__(self, patient_segments: List[Segment]):
@@ -170,19 +152,7 @@ def process_segment(segment: Segment):
                     identity = Identity([segment])
                     self.providers[provider_type].append(identity.to_dict())
 
-        list(map(process_segment, claim_loop))
-
-    def to_dict(self):
-        base_dict = super().to_dict()
-        base_dict.update({
-            'patient_id': self.patient_id,
-            'claim_amount': self.claim_amount,
-            'facility_type_code': self.facility_type_code,
-            'claim_code_freq': self.claim_code_freq,
-            'date': self.date,
-            'providers': dict(self.providers)
-        })
-        return base_dict
+        return list(map(process_segment, claim_loop))
 
 
 
@@ -199,7 +169,6 @@ def build_submitter_lines(self, submitter_loop: List[Segment]):
 
         list(map(self.process_nm1_segment, nm1_segments))
         list(map(self.process_per_segment, per_segments))
-        return self.to_dict()
 
     def process_per_segment(self, segment):
         self.contact_name = segment.element(2)
@@ -222,14 +191,7 @@ def process_per_segment(self, segment):
             contact['contact_number_3'] = segment.element(8)
         
         self.contacts.append(contact)
-    
-    def to_dict(self):
-        base_dict = super().to_dict()
-        base_dict.update({
-            'contact_name': self.contact_name,
-            'contacts': self.contacts
-        })
-        return base_dict
+
 
 
 class ReceiverIdentity(Identity):
@@ -239,8 +201,7 @@ def __init__(self, receiver_segments: List[Segment]):
 
     def build_receiver_lines(self, receiver_loop: List[Segment]):
         nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.element(1) == '40', receiver_loop)
-        list(map(self.process_nm1_segment, nm1_segments))
-        return self.to_dict()
+        return list(map(self.process_nm1_segment, nm1_segments))
 
 
 class ServiceIdentity(Identity):
@@ -262,8 +223,6 @@ def build_sl_lines(self, sl_loop: List[Segment]):
         self.services['Professional'] = list(professional_services)
         self.services['Institutional'] = list(institutional_services)
 
-        return self.to_dict()
-
     def parse_professional_service(self, segment: Segment):
         service_type, procedure_code = segment.element(1).split(':')[0:2]  # assuming 7 elements but choosing first two
         return {
@@ -284,12 +243,5 @@ def parse_institutional_service(self, segment: Segment):
             'Procedure Amount': segment.element(3)
         }
 
-    def to_dict(self):
-        base_dict = super().to_dict()
-        base_dict.update({
-            'services': self.services
-        })
-        return base_dict
-
                 
 

From c6daff7a337f1f872894976c8b2c123dfd90d43a Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Mon, 20 May 2024 19:15:09 -0400
Subject: [PATCH 33/46] removed unnecessary funcs from identities

---
 databricksx12/hls/claim.py                    |  2 +-
 .../hls/support_classes/identities.py         | 44 +++++++------------
 2 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 8989192..d11c3cb 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -53,7 +53,7 @@ def _populate_claim_loop(self) -> Dict[str, str]:
         return ClaimIdentity(self.claim_loop)
 
     def _populate_sl_loop(self) -> Dict[str, str]:
-        return ServiceIdentity(self.sl_loop)
+        return ServiceIdentity(self.sl_loop) 
     
 
     """
diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index 9db1b8c..f2a6a55 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -6,7 +6,7 @@
 
 class Identity:
 
-    # provider name identities associated with every NM1 line
+    # provider name identities associated with every NM1 line. a combination may occur within loops
     nm1_identifiers = {
         '85': 'Billing Provider',  # entity that is billing for the services provided
         '87': 'Pay-to Provider',   # entity to which payments are to be sent
@@ -135,7 +135,7 @@ def __init__(self, claim_segments: List[Segment]):
         self.build_claim_lines(claim_segments)
 
     def build_claim_lines(self, claim_loop: List[Segment]):
-        def process_segment(segment: Segment):
+        def process_claim_segment(segment: Segment):
             if segment.element(0) == 'CLM':
                 self.patient_id = segment.element(1)  # submitter's identifier
                 self.claim_amount = segment.element(2)
@@ -146,20 +146,21 @@ def process_segment(segment: Segment):
             if segment.element(0) == 'DTP':
                 self.date = segment.element(3)  # format D8:CCYYMMDD
 
-            if segment.element(0) == 'NM1':
-                provider_type = self.nm1_identifiers.get(segment.element(1))
-                if provider_type:
-                    identity = Identity([segment])
-                    self.providers[provider_type].append(identity.to_dict())
+        # process claim-specific segments
+        list(map(process_claim_segment, claim_loop))
+
+        # process NM1 segments for providers
+        nm1_segments = filter(lambda segment: segment.element(0) == 'NM1', claim_loop)
 
-        return list(map(process_segment, claim_loop))
+        # append instead of extend for single items
+        list(map(lambda segment: self.providers[self.nm1_identifiers.get(segment.element(1))].append(Identity([segment]).to_dict()), nm1_segments))
 
 
 
 class SubmitterIdentity(Identity):
     def __init__(self, submitter_segments: List[Segment]):
         self.contact_name = None
-        self.contacts = []
+        self.contacts = defaultdict(list)
         super().__init__(submitter_segments)
         self.build_submitter_lines(submitter_segments)
     
@@ -190,7 +191,7 @@ def process_per_segment(self, segment):
             contact['contact_method_3'] = contact_methods.get(segment.element(7), 'Unknown method')
             contact['contact_number_3'] = segment.element(8)
         
-        self.contacts.append(contact)
+        self.contacts['primary'].append(contact)
 
 
 
@@ -204,44 +205,33 @@ def build_receiver_lines(self, receiver_loop: List[Segment]):
         return list(map(self.process_nm1_segment, nm1_segments))
 
 
+                
 class ServiceIdentity(Identity):
     def __init__(self, sl_segments: List[Segment]):
-        self.services = {
-            'Professional': [],
-            'Institutional': []
-        }
+        self.services = defaultdict(list)
         super().__init__(sl_segments)
         self.build_sl_lines(sl_segments)
 
     def build_sl_lines(self, sl_loop: List[Segment]):
         sv1_segments = filter(lambda segment: segment.element(0) == 'SV1', sl_loop)
         sv2_segments = filter(lambda segment: segment.element(0) == 'SV2', sl_loop)
+        self.services['Professional'] = [self.parse_professional_service(segment) for segment in sv1_segments]
+        self.services['Institutional'] = [self.parse_institutional_service(segment) for segment in sv2_segments]
 
-        professional_services = map(self.parse_professional_service, sv1_segments)
-        institutional_services = map(self.parse_institutional_service, sv2_segments)
-
-        self.services['Professional'] = list(professional_services)
-        self.services['Institutional'] = list(institutional_services)
 
     def parse_professional_service(self, segment: Segment):
         service_type, procedure_code = segment.element(1).split(':')[0:2]  # assuming 7 elements but choosing first two
         return {
-            'Type of service/claim': 'Professional',
             'Type': service_type,
             'Procedure Code': procedure_code,
             'Procedure Amount': segment.element(2)
         }
 
     def parse_institutional_service(self, segment: Segment):
-        revenue_code = segment.element(1)
         service_type, procedure_code = segment.element(2).split(':')[0:2]  # assuming 7 elements but choosing first two
         return {
-            'Type of service/claim': 'Institutional',
-            'Revenue Code': revenue_code,
             'Type': service_type,
+            'Revenue Code': segment.element(1),
             'Procedure Code': procedure_code,
             'Procedure Amount': segment.element(3)
-        }
-
-                
-
+        }
\ No newline at end of file

From 2ca454328a83ed25ea415a116c259086f59a2d7f Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Mon, 20 May 2024 19:30:39 -0400
Subject: [PATCH 34/46] relationship of subscriber and patient added in Sub
 identity

---
 databricksx12/hls/claim.py                      | 2 +-
 databricksx12/hls/support_classes/identities.py | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index d11c3cb..1b79607 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -59,7 +59,7 @@ def _populate_sl_loop(self) -> Dict[str, str]:
     """
     Overall Asks
     - Coordination of Benefits flag
-    - Patient / Subscriber same person flag
+    - Patient / Subscriber same person flag --> self.relationship_to_insured in Suscriber
 
     Claim needs
     - principal ICD10 diagnosis code
diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index f2a6a55..6a8d615 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -92,8 +92,6 @@ def build_billing(self, billing_loop: List[Segment]):
         })
     
 
-    #TODO class pay_to()
-
 class SubscriberIdentity(Identity):
     def __init__(self, subscriber_segments: List[Segment]):
         self.subscribers = defaultdict(list)
@@ -102,6 +100,10 @@ def __init__(self, subscriber_segments: List[Segment]):
         self.build_subscriber(subscriber_segments)
 
     def build_subscriber(self, subscriber_loop: List[Segment]):
+        sbr_segment = next(filter(lambda s: s.element(0) == 'SBR', subscriber_loop), None)
+        if sbr_segment:
+            self.relationship_to_insured = 'Self' if sbr_segment.element(2) == '18' else 'Dependent'
+
         grouped_segments = self.group_segments_by_provider(subscriber_loop, self.nm1_identifiers)
         self.subscribers = defaultdict(list, {
             subscriber_type: [Identity(segments).to_dict() for segments in group]

From 327f60ba96a4a2f92b06db0eab9919e28bd03f43 Mon Sep 17 00:00:00 2001
From: Aaron Zavora <aaron.zavora@databricks.com>
Date: Tue, 21 May 2024 09:42:35 -0400
Subject: [PATCH 35/46] Update README.md

---
 README.md | 212 ++++++++++++++++++++----------------------------------
 1 file changed, 79 insertions(+), 133 deletions(-)

diff --git a/README.md b/README.md
index 7649f69..8770375 100644
--- a/README.md
+++ b/README.md
@@ -3,27 +3,31 @@
 [![CLOUD](https://img.shields.io/badge/CLOUD-ALL-blue?logo=googlecloud&style=for-the-badge)](https://cloud.google.com/databricks)
 [![POC](https://img.shields.io/badge/POC-10_days-green?style=for-the-badge)](https://databricks.com/try-databricks)
 
-## Business Problem (Under Construction / Not Stable)
+# Business Problem 
 
-Addressing the issue of working with various parts of an x12 EDI transaction in Spark on Databricks.
+Working with various x12 EDI transactions in Spark on Databricks.
 
-## Install
+# Install
 
 ```python
 pip install git+https://github.com/databricks-industry-solutions/x12-edi-parser
 ```
 
-## Run 
+# Run 
 
-### Reading in EDI Data
+## Reading in EDI Data
 
 Default format used is AnsiX12 (* as a delim and ~ as segment separator)
 
 ```python
 from databricksx12 import *
 
-ediFormat = AnsiX12Delim #specifying formats of data, ansi is also the default if nothing is specified 
-df = spark.read.text("sampledata/837/*", wholetext = True)
+#EDI format type
+ediFormat = AnsiX12Delim #specifying formats of data, ansi is also the default if nothing is specified
+#can also specify customer formats (below is the same as AnsiX12Delim)
+ediFormat = type("", (), dict({'SEGMENT_DELIM': '~', 'ELEMENT_DELIM': '*', 'SUB_DELIM': ':'}))
+
+df = spark.read.text("sampledata/837/*txt", wholetext = True)
 
 (df.rdd
   .map(lambda x: x.asDict().get("value"))
@@ -37,59 +41,28 @@ df = spark.read.text("sampledata/837/*", wholetext = True)
 |                1|
 |                1|
 |                1|
-+-----------------+
-
-
-
-#Building a dynamic/custom format
-customFormat = type("", (), dict({'SEGMENT_DELIM': '~', 'ELEMENT_DELIM': '*', 'SUB_DELIM': ':'}))
-(df.rdd
-  .map(lambda x: x.asDict().get("value"))
-  .map(lambda x: EDI(x, delim_cls = customFormat))
-  .map(lambda x: {"transaction_count": x.num_transactions()})
-).toDF().show()
-+-----------------+
-|transaction_count|
-+-----------------+
-|                5|
-|                1|
-|                1|
 |                1|
 +-----------------+
 
-
 ```
 
-#### EDI as a Table for SQL
+## Parsing Healthcare Transactions
+
+Currently supports 837s. Records in each format type should be saved separately, e.g. do not mix 835s & 837s in the df.save() command.
+
+### 837i and 837p sample data in Spark
 
 ```python
-""""
-Look at all data refernce -> https://justransform.com/edi-essentials/edi-structure/
-  (1) Including control header / ISA & IEA segments
-"""
-from pyspark.sql.functions import input_file_name
+from databricksx12 import *
+from databricksx12.hls import *
+import json
 
-( df.withColumn("filename", input_file_name()).rdd
-  .map(lambda x: (x.asDict().get("filename"),x.asDict().get("value")))
-  .map(lambda x: (x[0], EDI(x[1])))
-  .map(lambda x: [{**{"filename": x[0]}, **y} for y in x[1].toRows()])
-  .flatMap(lambda x: x)
-  .toDF()).show()
+df = spark.read.text("sampledata/837/*", wholetext = True)
 
-"""
-+--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+
-|            row_data|row_number|segment_element_delim_char|segment_length|segment_name|segment_subelement_delim_char|filename|
-+--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+
-|ISA*00*          ...|         0|                         *|            17|         ISA|                            :|file:///|
-|GS*HC*CLEARINGHOU...|         1|                         *|             9|          GS|                            :|file:///|
-|ST*837*000000001*...|         2|                         *|             4|          ST|                            :|file:///|
-|BHT*0019*00*73490...|         3|                         *|             7|         BHT|                            :|file:///|
-|NM1*41*2*CLEARING...|         4|                         *|            10|         NM1|                            :|file:///|
-|PER*IC*CLEARINGHO...|         5|                         *|             7|         PER|                            :|file:///|
-|NM1*40*2*12345678...|         6|                         *|            10|         NM1|                            :|file:///|
 ```
 
-#### Parsing Healthcare Transactions
+### Sample data outside of Spark
+
 
 ```python
 from databricksx12 import *
@@ -99,45 +72,39 @@ import json
 hm = HealthcareManager()
 edi =  EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8"))
 
+#Returns parsed claim data
 hm.from_edi(edi) 
-#[<databricksx12.hls.claim.Claim837p object at 0x1027003d0>, <databricksx12.hls.claim.Claim837p object at 0x1027006a0>, <databricksx12.hls.claim.Claim837p object at 0x102700700>, <databricksx12.hls.claim.Claim837p object at 0x102700550>, <databricksx12.hls.claim.Claim837p object at 0x1027002b0>]
+#[<databricksx12.hls.claim.Claim837i object at 0x1056309a0>, <databricksx12.hls.claim.Claim837i object at 0x105630580>, <databricksx12.hls.claim.Claim837i object at 0x1056973d0>, <databricksx12.hls.claim.Claim837i object at 0x105697100>, <databricksx12.hls.claim.Claim837i object at 0x1056972b0>]
 
-#TODO replace this with Spark tomorrow
+#Print in json format
 print(json.dumps(hm.to_json(edi), indent=4)) 
 
-
-"""
-TODO update tomorrow below
-"""
-
-one_claim = hm.from_edi(x)[0]
-
-#print a json representation of a claim
-print(json.dumps(one_claim.to_json(), indent=4))
 """
 {
-    "submitter": {
-        "name": "CLEARINGHOUSE LLC",
-        "type": "Organization",
-        "tax_id": "987654321",
-        "contact_name": "CLEARINGHOUSE CLIENT SERVICES",
-        "contacts": [
-            {
-                "contact_method": "Telephone",
-                "contact_number": "8005551212",
-                "contact_method_2": "Fax",
-                "contact_number_2": "8005551212"
-            }
-        ]
-    },
-    "reciever": {
-        "name": "123456789",
-        "type": "Organization",
-        "id_code": "CHPWA"
-    },
-    "subscriber": {...
+    "EDI.sender_tax_id": "ZZ",
+    "FuncitonalGroup": [
+        {
+            "FunctionalGroup.receiver": "123456789",
+            "FunctionalGroup.sender": "CLEARINGHOUSE",
+            "FunctionalGroup.transaction_datetime": "20180508:0833",
+            "FunctionalGroup.transaction_type": "222",
+            "Transactions": [
+                {
+                    "Transaction.transaction_type": "222",
+                    "Claims": [
+                        {
+                            "submitter": {
+                                "contact_name": "CLEARINGHOUSE CLIENT SERVICES",
+                                "contacts": {
+                                    "primary": [
+                                        {
+                                            "contact_method": "Telephone",
+                                            "contact_number": "8005551212",
+...
 """
-#print raw EDI Segments
+
+#print the raw EDI Segments of one claim
+one_claim = hm.from_edi(edi)[0]
 print("\n".join([y.data for y in one_claim.data])) #Print one claim to look at the segments of it
 """
 BHT*0019*00*7349063984*20180508*0833*CH
@@ -157,66 +124,45 @@ HL*2*1*22*0
 SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI
 NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321
 N3*987 65TH PL
-N4*VANCOUVER*WA*986640001
-DMG*D8*19881225*M
-NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA
-CLM*1805080AV3648339*20***57:B:1*Y*A*Y*Y
-REF*D9*7349065509
-HI*ABK:F1120
-NM1*82*1*PROVIDER*JAMES****XX*1112223338
-PRV*PE*PXC*261QR0405X
-NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455
-N3*12345 MAIN ST SUITE A1
-N4*VANCOUVER*WA*98662
-LX*1
-SV1*HC:H0003*20*UN*1***1
-DTP*472*D8*20180428
-REF*6R*142671
+...
 """
 ```
 
-#### Further EDI Parsing in Pyspark
-
-
->  **Warning** 
-> Sections below this are under construction
+## EDI as a Table for SQL
 
 ```python
-from databricksx12.edi import *
-x =  EDIManager(EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8")))
+""""
+Look at all data refernce -> https://justransform.com/edi-essentials/edi-structure/
+  (1) Including control header / ISA & IEA segments
+"""
+from pyspark.sql.functions import input_file_name
 
-import json
-print(json.dumps(x.flatten(x.data), indent=4))
-{
-    "EDI.sender_tax_id": "ZZ",
-    "list": [
-        {
-            "FunctionalGroup.receiver": "123456789",
-            "FunctionalGroup.sender": "CLEARINGHOUSE",
-            "FunctionalGroup.transaction_datetime": "20180508:0833",
-            "FunctionalGroup.transaction_type": "222",
-            "list": [
-                {
-                    "Transaction.transaction_type": "222"
-                },
-                {
-                    "Transaction.transaction_type": "222"
-                },
-                {
-                    "Transaction.transaction_type": "222"
-                },
-                {
-                    "Transaction.transaction_type": "222"
-                },
-                {
-                    "Transaction.transaction_type": "222"
-                }
-            ]
-        }
-    ]
-}
+( df.withColumn("filename", input_file_name()).rdd
+  .map(lambda x: (x.asDict().get("filename"),x.asDict().get("value")))
+  .map(lambda x: (x[0], EDI(x[1])))
+  .map(lambda x: [{**{"filename": x[0]}, **y} for y in x[1].toRows()])
+  .flatMap(lambda x: x)
+  .toDF()).show()
+
+"""
++--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+
+|            row_data|row_number|segment_element_delim_char|segment_length|segment_name|segment_subelement_delim_char|filename|
++--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+
+|ISA*00*          ...|         0|                         *|            17|         ISA|                            :|file:///|
+|GS*HC*CLEARINGHOU...|         1|                         *|             9|          GS|                            :|file:///|
+|ST*837*000000001*...|         2|                         *|             4|          ST|                            :|file:///|
+|BHT*0019*00*73490...|         3|                         *|             7|         BHT|                            :|file:///|
+|NM1*41*2*CLEARING...|         4|                         *|            10|         NM1|                            :|file:///|
+|PER*IC*CLEARINGHO...|         5|                         *|             7|         PER|                            :|file:///|
+|NM1*40*2*12345678...|         6|                         *|            10|         NM1|                            :|file:///|
 ```
 
+#### Other types of EDI Parsing in Pyspark
+
+
+>  **Warning** 
+> Sections below this are under construction
+
 ```python
 
 """

From b1668688f67dc7df55cb0aee1e5347afaa1fc3b7 Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Tue, 21 May 2024 10:14:04 -0400
Subject: [PATCH 36/46] updated GS08

---
 databricksx12/hls/claim.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 1b79607..10ddff7 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -84,7 +84,7 @@ def to_json(self):
             **{'patient': self.patient_info.to_dict()},
             **{'billing_provider': self.billing_info.to_dict()},
             **{'claim_header': self.claim_info.to_dict()},
-            **{'claim_lines': self.sl_info.to_dict()}
+            **{'claim_lines': 'TODO'}
         }
 
     # not sure if this should be here or not, but you get the idea
@@ -97,7 +97,7 @@ def build(self) -> None:
             self._populate_subscriber_loop() if self.patient_loop == [] else self._populate_patient_loop()
         )
         self.claim_info = self._populate_claim_loop()
-        self.sl_info = self._populate_sl_loop()
+        self.sl_info =  self._populate_sl_loop()
 
 
 

From bbcea3d55a2dfa223680a1592b31c0cecb6d3608 Mon Sep 17 00:00:00 2001
From: Aaron Zavora <aaron.zavora@databricks.com>
Date: Tue, 21 May 2024 13:14:09 -0400
Subject: [PATCH 37/46] Update README.md

---
 README.md | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 8770375..6d6f48a 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,7 @@ df = spark.read.text("sampledata/837/*txt", wholetext = True)
   .map(lambda x: EDI(x, delim_cls = ediFormat))
   .map(lambda x: {"transaction_count": x.num_transactions()})
 ).toDF().show()
+"""
 +-----------------+
 |transaction_count|
 +-----------------+
@@ -43,7 +44,7 @@ df = spark.read.text("sampledata/837/*txt", wholetext = True)
 |                1|
 |                1|
 +-----------------+
-
+"""
 ```
 
 ## Parsing Healthcare Transactions
@@ -56,8 +57,18 @@ Currently supports 837s. Records in each format type should be saved separately,
 from databricksx12 import *
 from databricksx12.hls import *
 import json
+from pyspark.sql.functions import input_file_name
+
+hm = HealthcareManager()
+df = spark.read.text("sampledata/837/*txt", wholetext = True)
 
-df = spark.read.text("sampledata/837/*", wholetext = True)
+
+rdd = (
+ df.withColumn("filename", input_file_name()).rdd
+  .map(lambda x: (x.asDict().get("filename"),x.asDict().get("value")))
+  .map(lambda x: (x[0], EDI(x[1])))
+  .map(lambda x: { **{'filename': x[0]}, **hm.to_json(x[1])} )
+)
 
 ```
 
@@ -74,7 +85,7 @@ edi =  EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8"
 
 #Returns parsed claim data
 hm.from_edi(edi) 
-#[<databricksx12.hls.claim.Claim837i object at 0x1056309a0>, <databricksx12.hls.claim.Claim837i object at 0x105630580>, <databricksx12.hls.claim.Claim837i object at 0x1056973d0>, <databricksx12.hls.claim.Claim837i object at 0x105697100>, <databricksx12.hls.claim.Claim837i object at 0x1056972b0>]
+#[<databricksx12.hls.claim.Claim837p object at 0x106e57b50>, <databricksx12.hls.claim.Claim837p object at 0x106e57c40>, <databricksx12.hls.claim.Claim837p object at 0x106e57eb0>, <databricksx12.hls.claim.Claim837p object at 0x106e57b20>, <databricksx12.hls.claim.Claim837p object at 0x106e721f0>]
 
 #Print in json format
 print(json.dumps(hm.to_json(edi), indent=4)) 
@@ -157,11 +168,7 @@ from pyspark.sql.functions import input_file_name
 |NM1*40*2*12345678...|         6|                         *|            10|         NM1|                            :|file:///|
 ```
 
-#### Other types of EDI Parsing in Pyspark
-
-
->  **Warning** 
-> Sections below this are under construction
+#### Other EDI Parsing in Pyspark
 
 ```python
 
@@ -231,8 +238,6 @@ ediDF.show()
 """
 
 
-
-
 #show first line of each transaction
 trxDF.filter(x.row_number == 0).show()
 """

From 0ebcd3b62a58c5ab44937b160f7916e979f9a162 Mon Sep 17 00:00:00 2001
From: Aaron Zavora <aaron.zavora@databricks.com>
Date: Tue, 21 May 2024 13:29:06 -0400
Subject: [PATCH 38/46] Update README.md

---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index 6d6f48a..f13985d 100644
--- a/README.md
+++ b/README.md
@@ -68,7 +68,13 @@ rdd = (
   .map(lambda x: (x.asDict().get("filename"),x.asDict().get("value")))
   .map(lambda x: (x[0], EDI(x[1])))
   .map(lambda x: { **{'filename': x[0]}, **hm.to_json(x[1])} )
+  .map(lambda x: json.dumps(x))
 )
+claims = spark.read.json(rdd)
+
+#Claim header table TODO 
+
+#Claim line table TODO 
 
 ```
 

From f999d9930ff04014f2e7db0015c9a4ed93eb88ed Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Tue, 21 May 2024 19:55:00 -0400
Subject: [PATCH 39/46] billing providers function in claim

---
 databricksx12/hls/claim.py                    |  64 +++++--
 .../hls/support_classes/identities.py         | 156 +++++++++---------
 2 files changed, 132 insertions(+), 88 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 10ddff7..f03c8e8 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -1,7 +1,18 @@
-from databricksx12.edi import EDI, AnsiX12Delim
+from databricksx12.edi import EDI, AnsiX12Delim, Segment
 from databricksx12.hls.loop import Loop
-from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity, ClaimIdentity, SubmitterIdentity, ReceiverIdentity, ServiceIdentity
+from databricksx12.hls.support_classes.identities import (
+    Identity,
+    BillingIdentity, 
+    SubscriberIdentity, 
+    PatientIdentity, 
+    ClaimIdentity, 
+    SubmitterIdentity, 
+    ReceiverIdentity, 
+    ServiceIdentity,
+)
 from typing import List, Dict
+from collections import defaultdict
+from functools import reduce
 
 
 #
@@ -35,12 +46,11 @@ def _populate_receiver_loop(self) -> Dict[str, str]:
         return ReceiverIdentity(self.sender_receiver_loop)
     
     def _populate_billing_loop(self) -> Dict[str, str]:
-        return BillingIdentity(self.billing_loop)
+        return BillingIdentity(self.sender_receiver_loop)
 
     def _populate_subscriber_loop(self) -> Dict[str, str]:
         return SubscriberIdentity(self.subscriber_loop)
-
-    #
+    
     #
     #
     def _populate_patient_loop(self) -> Dict[str, str]:
@@ -55,20 +65,41 @@ def _populate_claim_loop(self) -> Dict[str, str]:
     def _populate_sl_loop(self) -> Dict[str, str]:
         return ServiceIdentity(self.sl_loop) 
     
+    def _populate_grouped_entities(self, loop: List[Segment]) -> Dict[str, List[Dict[str, str]]]:
+        # if we want a list of NM1 entities belonging within a loop 
+        def group_segments_by_provider(loop, nm1_identifiers: dict = Identity.nm1_identifiers) -> Dict[str, List[List[Segment]]]:
+            def reducer(acc, segment):
+                provider_type, grouped = acc
+                if segment.element(0) == 'NM1':
+                    provider_type = nm1_identifiers.get(segment.element(1))
+                    if provider_type:
+                        grouped[provider_type].append([segment])
+                elif provider_type:
+                    grouped[provider_type][-1].append(segment)
+                return provider_type, grouped
+            
+            _, grouped = reduce(reducer, loop, (None, defaultdict(list)))
+            return grouped
+        
+        return defaultdict(list, {
+            provider_type: [Identity(segments).to_dict() for segments in group]
+            for provider_type, group in group_segments_by_provider(loop).items()
+        })
+    
 
     """
     Overall Asks
-    - Coordination of Benefits flag
-    - Patient / Subscriber same person flag --> self.relationship_to_insured in Suscriber
+    - Coordination of Benefits flag -- > self.benefits_assign_flag in Claim Identity
+    - Patient / Subscriber same person flag --> self.relationship_to_insured in Suscriber in Claim Identity
 
     Claim needs
     - principal ICD10 diagnosis code
-    - other ICD10 diagnosis codes as an array
-    - hcfa place of service
-    - claim id?
-    - admission type code
-    - facility type code
-    - claim frequency code
+    - other ICD10 diagnosis codes as an array 
+    - hcfa place of service -- segment.element(5).split(':')?
+    - claim id? - done
+    - admission type code - only in 837i?
+    - facility type code - done
+    - claim frequency code - done
 
     Claim line needs
     - This should return an array 
@@ -84,7 +115,8 @@ def to_json(self):
             **{'patient': self.patient_info.to_dict()},
             **{'billing_provider': self.billing_info.to_dict()},
             **{'claim_header': self.claim_info.to_dict()},
-            **{'claim_lines': 'TODO'}
+            **{'claim_lines': 'TODO'},
+            **{'grouped_subscriber_entities': self.subscriber_entities_info.to_dict()}, # call for all entities in a loop[]
         }
 
     # not sure if this should be here or not, but you get the idea
@@ -99,6 +131,9 @@ def build(self) -> None:
         self.claim_info = self._populate_claim_loop()
         self.sl_info =  self._populate_sl_loop()
 
+        self.claim_entities_info = self._populate_grouped_entities(self.claim_loop)
+        self.subscriber_entities_info = self._populate_grouped_entities(self.subscriber_loop)
+
 
 
 class Claim837i(MedicalClaim):
@@ -112,6 +147,7 @@ class Claim837p(MedicalClaim):
 
     NAME = "837P"
 
+    
 
 class Claim835(MedicalClaim):
 
diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index 6a8d615..6708e10 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -6,26 +6,27 @@
 
 class Identity:
 
-    # provider name identities associated with every NM1 line. a combination may occur within loops
+    # entity name identities associated with every NM1 line. a combination may occur within loops
     nm1_identifiers = {
-        '85': 'Billing Provider',  # entity that is billing for the services provided
-        '87': 'Pay-to Provider',   # entity to which payments are to be sent
-        'PR': 'Payer',             # insurance company or payer
+        '85': 'Billing Provider',  # entity that is billing for the services provided and 87 disregarded
         'IL': 'Insured',           # insured individual
-        'QC': 'Patient',           # patient
+        'QC': 'Patient',           # patient for 837P and PAT segments in 837i
         '82': 'Rendering Provider',# individual or group that performed the service
         'DN': 'Referring Provider',# doctor who referred the patient to another doctor
         '77': 'Service Facility',  # location where the service was performed
         'DQ': 'Supervising Provider', # provider who oversees the patient's care
         '71': 'Attending Provider',# provider with primary responsibility for the patient at the time of service
         'DK': 'Ordering Provider', # provider who ordered the service or item
+        'PR': 'Payer',             # insurance company or payer
         'PE': 'Payee',             # entity receiving the payment
+
     }
         
     def __init__(self, segments: List[Segment]):
         self.name: str = None
         self.street: str = None
         self.type: str = None
+        self.provider_type: str = None
         self.city: str = None
         self.state: str = None
         self.zip: str = None
@@ -33,19 +34,20 @@ def __init__(self, segments: List[Segment]):
         self.npi: str = None
         self.build(segments)
 
-    # build name and address for any identity
+    # build entity and address for any identity
     def build(self, loop: List[Segment]):
         nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.segment_len() >= 10, loop)
-        n3_segments = filter(lambda segment: segment.element(0) == 'N3', loop)
-        n4_segments = filter(lambda segment: segment.element(0) == 'N4', loop)
+        n3_segment = next(filter(lambda segment: segment.element(0) == 'N3', loop), None) # taking only the first address lines
+        n4_segment = next(filter(lambda segment: segment.element(0) == 'N4', loop), None)
 
         list(map(self.process_nm1_segment, nm1_segments))
-        list(map(self.process_n3_segment, n3_segments))
-        list(map(self.process_n4_segment, n4_segments))
+        list(map(self.process_n3_segment, [n3_segment] if n3_segment else []))
+        list(map(self.process_n4_segment, [n4_segment] if n4_segment else []))
     
     def process_nm1_segment(self, segment: Segment):
         self.type = 'Organization' if segment.element(2) == '2' else 'Individual'
         self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)])
+        self.entity_type = self.nm1_identifiers.get(segment.element(1), 'Unknown')
         self.npi = segment.element(9) if len(segment.element(9)) == 10 else None
         self.id = segment.element(9) if len(segment.element(9)) != 10 else None
 
@@ -57,105 +59,111 @@ def process_n4_segment(self, segment: Segment):
         self.state = segment.element(2)
         self.zip = segment.element(3)
 
-
     def to_dict(self):
         return {k: v for k, v in self.__dict__.items() if v is not None}
     
-    
-    @staticmethod
-    def group_segments_by_provider(loop: List[Segment], nm1_identifiers: dict) -> Dict[str, List[List[Segment]]]:
-        def reducer(acc, segment):
-            provider_type, grouped = acc
-            if segment.element(0) == 'NM1':
-                provider_type = nm1_identifiers.get(segment.element(1))
-                if provider_type:
-                    grouped[provider_type].append([segment])
-            elif provider_type:
-                grouped[provider_type][-1].append(segment)
-            return provider_type, grouped
-        
-        _, grouped = reduce(reducer, loop, (None, defaultdict(list)))
-        return grouped
 
 
 class BillingIdentity(Identity):
     def __init__(self, billing_segments: List[Segment]):
-        self.providers = defaultdict(list)
         super().__init__(billing_segments)
-        self.build_billing(billing_segments)
-
-    def build_billing(self, billing_loop: List[Segment]):
-        grouped_segments = self.group_segments_by_provider(billing_loop, self.nm1_identifiers)
-        self.providers = defaultdict(list, {
-            provider_type: [Identity(segments).to_dict() for segments in group]
-            for provider_type, group in grouped_segments.items()
-        })
-    
+        list(map(lambda segment: Identity([segment]).to_dict(), billing_segments))
+        
 
 class SubscriberIdentity(Identity):
     def __init__(self, subscriber_segments: List[Segment]):
-        self.subscribers = defaultdict(list)
         self.relationship_to_insured = None
         super().__init__(subscriber_segments)
         self.build_subscriber(subscriber_segments)
 
     def build_subscriber(self, subscriber_loop: List[Segment]):
-        sbr_segment = next(filter(lambda s: s.element(0) == 'SBR', subscriber_loop), None)
+        sbr_segment = next(filter(lambda segment: segment.element(0) == 'SBR', subscriber_loop), None)
         if sbr_segment:
             self.relationship_to_insured = 'Self' if sbr_segment.element(2) == '18' else 'Dependent'
 
-        grouped_segments = self.group_segments_by_provider(subscriber_loop, self.nm1_identifiers)
-        self.subscribers = defaultdict(list, {
-            subscriber_type: [Identity(segments).to_dict() for segments in group]
-            for subscriber_type, group in grouped_segments.items()
-        })
-    
-
 
 class PatientIdentity(Identity):
-    def __init__(self, patient_segments: List[Segment]):
-        super().__init__(patient_segments)
-        self.build_patient(patient_segments)
-
-    def build_patient(self, patient_loop: List[Segment]):
-        def process_patient_segment(segment: Segment):
-            self.type = 'Patient'
-            self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)])
-        return list(map(process_patient_segment, filter(lambda s: s.element(0) == 'NM1' and s.element(1) == 'QC', patient_loop)))
-
-
-
+        def __init__(self, patient_segments: List[Segment]):
+            super().__init__(patient_segments)
+            self.build_patient(patient_segments)
+
+        def build_patient(self, patient_loop: List[Segment]):
+            def process_patient_segment(segment: Segment):
+                self.type = 'Patient'
+                self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)])
+            return list(map(process_patient_segment, filter(lambda s: s.element(0) == 'NM1' and s.element(1) == 'QC', patient_loop)))
+        
+        
 class ClaimIdentity(Identity):
     def __init__(self, claim_segments: List[Segment]):
         self.patient_id = None
         self.claim_amount = None
         self.facility_type_code = None
         self.claim_code_freq = None
-        self.date = None
-        self.providers = defaultdict(list)
+        self.admission_date = None
+        self.benefits_assign_flag = None
+        self.claim_id = None
+        self.admission_type = None # only 837I?
+
+        self.pricipal_diagnosis_code = None
+
+        self.providers = defaultdict(list) # still need?
         super().__init__(claim_segments)
         self.build_claim_lines(claim_segments)
 
     def build_claim_lines(self, claim_loop: List[Segment]):
-        def process_claim_segment(segment: Segment):
-            if segment.element(0) == 'CLM':
-                self.patient_id = segment.element(1)  # submitter's identifier
-                self.claim_amount = segment.element(2)
-                codes = segment.element(5).split(':') # codes[1] == A for institutional and B for professional
-                self.facility_type_code = codes[0]
-                self.claim_code_freq = codes[2]
+        # Process claim-specific segments
+        clm_segments = filter(lambda segment: segment.element(0) == 'CLM', claim_loop)
+        dtp_segments = filter(lambda segment: segment.element(0) == 'DTP', claim_loop)
+        cli_segments = filter(lambda segment: segment.element(0) == 'CLI', claim_loop)
+        ref_segments = filter(lambda segment: segment.element(0) == 'REF' and segment.element(1) == 'D9', claim_loop)
+        
+        # get only the first HI segment for the pricipal diagnosis code
+        principle_diagnosis_segment = filter(lambda segment: segment.element(0) == 'HI' and segment.element(1).split(':')[0] in ['ABK', 'BK'], claim_loop)
+        # get all other HI segments for other diagnosis codes
+        other_diagnosis_segments = filter(lambda segment: segment.element(0) == 'HI' and segment.element(1).split(':')[0] in ['ABF', 'BF'], claim_loop)
+
+
+        list(map(self.process_clm_segment, clm_segments))
+        list(map(self.process_dtp_segment, dtp_segments))
+        list(map(self.process_cli_segment, cli_segments))
+        list(map(self.process_ref_segment, ref_segments))
+        # if principle_diagnosis_segment:
+        #     self.process_principal_diagnosis_segment(principle_diagnosis_segment)
+    
+        # Process other diagnosis codes
+        # self.other_diagnosis_codes = [
+        #     code for segment in other_diagnosis_segments
+        #     for i, code in enumerate(segment.element(1).split(':'))
+        #     if i % 2 != 0
+        # ]
+        
+
+        # Process NM1 segments for providers
+        nm1_segments = filter(lambda segment: segment.element(0) == 'NM1', claim_loop)
+        list(map(lambda segment: self.providers[self.nm1_identifiers.get(segment.element(1))].append(Identity([segment]).to_dict()), nm1_segments))
 
-            if segment.element(0) == 'DTP':
-                self.date = segment.element(3)  # format D8:CCYYMMDD
+    def process_clm_segment(self, segment: Segment):
+        self.patient_id = segment.element(1)  # submitter's identifier
+        self.claim_amount = segment.element(2)
+        self.benefits_assign_flag = 'Yes' if segment.element(8) == 'Y' else 'No'  # Benefits flag
 
-        # process claim-specific segments
-        list(map(process_claim_segment, claim_loop))
+        place_of_service = segment.element(5).split(':')  # codes[1] == A for institutional and B for professional
+        self.facility_type_code = place_of_service[0]
+        self.claim_code_freq = place_of_service[2]
 
-        # process NM1 segments for providers
-        nm1_segments = filter(lambda segment: segment.element(0) == 'NM1', claim_loop)
+    def process_dtp_segment(self, segment: Segment):
+        self.date = segment.element(3)  # format D8:CCYYMMDD
+
+    def process_cli_segment(self, segment: Segment):
+        self.admission_date = segment.element(1)  # Only in 837I
+
+    def process_ref_segment(self, segment: Segment):
+        self.claim_id = segment.element(2)
+
+    # def process_principal_diagnosis_segment(self, segment: Segment):
+    #     self.principal_diagnosis_code = segment.element(2)  # assuming HI segment's first element is the principal diagnosis code
 
-        # append instead of extend for single items
-        list(map(lambda segment: self.providers[self.nm1_identifiers.get(segment.element(1))].append(Identity([segment]).to_dict()), nm1_segments))
 
 
 

From 41485788b23490b62f69ad2c2e4363cea6e93443 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Tue, 21 May 2024 20:15:31 -0400
Subject: [PATCH 40/46] cleaned grouping

---
 databricksx12/hls/claim.py                      | 11 +++++++----
 databricksx12/hls/support_classes/identities.py |  6 +++---
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index f03c8e8..a7307db 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -46,7 +46,7 @@ def _populate_receiver_loop(self) -> Dict[str, str]:
         return ReceiverIdentity(self.sender_receiver_loop)
     
     def _populate_billing_loop(self) -> Dict[str, str]:
-        return BillingIdentity(self.sender_receiver_loop)
+        return BillingIdentity(self.billing_loop)
 
     def _populate_subscriber_loop(self) -> Dict[str, str]:
         return SubscriberIdentity(self.subscriber_loop)
@@ -65,6 +65,9 @@ def _populate_claim_loop(self) -> Dict[str, str]:
     def _populate_sl_loop(self) -> Dict[str, str]:
         return ServiceIdentity(self.sl_loop) 
     
+    #
+    #
+    #
     def _populate_grouped_entities(self, loop: List[Segment]) -> Dict[str, List[Dict[str, str]]]:
         # if we want a list of NM1 entities belonging within a loop 
         def group_segments_by_provider(loop, nm1_identifiers: dict = Identity.nm1_identifiers) -> Dict[str, List[List[Segment]]]:
@@ -73,11 +76,11 @@ def reducer(acc, segment):
                 if segment.element(0) == 'NM1':
                     provider_type = nm1_identifiers.get(segment.element(1))
                     if provider_type:
-                        grouped[provider_type].append([segment])
+                        grouped[provider_type] = grouped.get(provider_type, []) + [[segment]]
                 elif provider_type:
-                    grouped[provider_type][-1].append(segment)
+                    grouped[provider_type][-1] += [segment]
                 return provider_type, grouped
-            
+        
             _, grouped = reduce(reducer, loop, (None, defaultdict(list)))
             return grouped
         
diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index 6708e10..5fa2dc8 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -36,11 +36,11 @@ def __init__(self, segments: List[Segment]):
 
     # build entity and address for any identity
     def build(self, loop: List[Segment]):
-        nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.segment_len() >= 10, loop)
+        nm1_segment = next(filter(lambda segment: segment.element(0) == 'NM1' and segment.segment_len() >= 10, loop), None)
         n3_segment = next(filter(lambda segment: segment.element(0) == 'N3', loop), None) # taking only the first address lines
         n4_segment = next(filter(lambda segment: segment.element(0) == 'N4', loop), None)
 
-        list(map(self.process_nm1_segment, nm1_segments))
+        list(map(self.process_nm1_segment, [nm1_segment] if nm1_segment else []))
         list(map(self.process_n3_segment, [n3_segment] if n3_segment else []))
         list(map(self.process_n4_segment, [n4_segment] if n4_segment else []))
     
@@ -93,7 +93,7 @@ def process_patient_segment(segment: Segment):
                 self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)])
             return list(map(process_patient_segment, filter(lambda s: s.element(0) == 'NM1' and s.element(1) == 'QC', patient_loop)))
         
-        
+
 class ClaimIdentity(Identity):
     def __init__(self, claim_segments: List[Segment]):
         self.patient_id = None

From 207a2ac649eb6407a6bccd0aedb4dc39cb74d8c4 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Wed, 22 May 2024 09:15:32 -0400
Subject: [PATCH 41/46] added service units

---
 databricksx12/hls/claim.py                      | 2 ++
 databricksx12/hls/support_classes/identities.py | 8 ++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index a7307db..77207b0 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -90,6 +90,8 @@ def reducer(acc, segment):
         })
     
 
+   
+
     """
     Overall Asks
     - Coordination of Benefits flag -- > self.benefits_assign_flag in Claim Identity
diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index 5fa2dc8..b2301b0 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -234,7 +234,9 @@ def parse_professional_service(self, segment: Segment):
         return {
             'Type': service_type,
             'Procedure Code': procedure_code,
-            'Procedure Amount': segment.element(2)
+            'Procedure Amount': segment.element(2),
+            'Measurement Code': segment.element(3), #UN or if anesthesia, MJ
+            'Service unit': segment.element(4),
         }
 
     def parse_institutional_service(self, segment: Segment):
@@ -243,5 +245,7 @@ def parse_institutional_service(self, segment: Segment):
             'Type': service_type,
             'Revenue Code': segment.element(1),
             'Procedure Code': procedure_code,
-            'Procedure Amount': segment.element(3)
+            'Procedure Amount': segment.element(3),
+            'Measurement Code': segment.element(4), #UN or if anesthesia, MJ
+            'Service unit': segment.element(5),
         }
\ No newline at end of file

From f1cea0934f28bcce22d153772d8de9a11c189052 Mon Sep 17 00:00:00 2001
From: Raven <raven.mukherjee@databricks.com>
Date: Wed, 22 May 2024 10:52:03 -0400
Subject: [PATCH 42/46] fixed grouped entities

---
 databricksx12/hls/claim.py                      | 8 ++++----
 databricksx12/hls/support_classes/identities.py | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 77207b0..10c7038 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -81,13 +81,13 @@ def reducer(acc, segment):
                     grouped[provider_type][-1] += [segment]
                 return provider_type, grouped
         
-            _, grouped = reduce(reducer, loop, (None, defaultdict(list)))
+            _, grouped = reduce(reducer, loop, (None, {}))
             return grouped
         
-        return defaultdict(list, {
+        return {
             provider_type: [Identity(segments).to_dict() for segments in group]
             for provider_type, group in group_segments_by_provider(loop).items()
-        })
+        }
     
 
    
@@ -121,7 +121,7 @@ def to_json(self):
             **{'billing_provider': self.billing_info.to_dict()},
             **{'claim_header': self.claim_info.to_dict()},
             **{'claim_lines': 'TODO'},
-            **{'grouped_subscriber_entities': self.subscriber_entities_info.to_dict()}, # call for all entities in a loop[]
+            **{'grouped_subscriber_entities': self.subscriber_entities_info}, # call for all entities in a loop[]
         }
 
     # not sure if this should be here or not, but you get the idea
diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index b2301b0..afe571b 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -82,6 +82,8 @@ def build_subscriber(self, subscriber_loop: List[Segment]):
             self.relationship_to_insured = 'Self' if sbr_segment.element(2) == '18' else 'Dependent'
 
 
+
+
 class PatientIdentity(Identity):
         def __init__(self, patient_segments: List[Segment]):
             super().__init__(patient_segments)

From 1fd11fd60aa139473c1dc2449f04222b41bb9933 Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Wed, 22 May 2024 14:06:05 -0400
Subject: [PATCH 43/46] helper function

---
 databricksx12/edi.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/databricksx12/edi.py b/databricksx12/edi.py
index e2ae36e..50a293d 100644
--- a/databricksx12/edi.py
+++ b/databricksx12/edi.py
@@ -59,7 +59,12 @@ def num_transactions(self):
     #
     def num_functional_groups(self):
         return len(self.segments_by_name("GE"))
-    
+
+    #
+    # Maps a list of indexes [0,4,7] to a series of ranges -> [(0,4), (4,7)]
+    #
+    def _index_to_tuples(self, indexes):
+        return list((zip(indexes, indexes[1:])))
     
     #
     # Return all segments associated with each funtional group

From 54ecf0586fcc375ee99081e5dd000d98f8e4986f Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Wed, 22 May 2024 15:34:40 -0400
Subject: [PATCH 44/46] service_line loops

---
 databricksx12/hls/claim.py                    | 51 +++++++-------
 databricksx12/hls/healthcare.py               |  8 ++-
 .../hls/support_classes/identities.py         | 67 ++++++++++---------
 3 files changed, 68 insertions(+), 58 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index 10c7038..a8cef81 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -1,15 +1,6 @@
 from databricksx12.edi import EDI, AnsiX12Delim, Segment
 from databricksx12.hls.loop import Loop
-from databricksx12.hls.support_classes.identities import (
-    Identity,
-    BillingIdentity, 
-    SubscriberIdentity, 
-    PatientIdentity, 
-    ClaimIdentity, 
-    SubmitterIdentity, 
-    ReceiverIdentity, 
-    ServiceIdentity,
-)
+from databricksx12.hls.support_classes.identities import *
 from typing import List, Dict
 from collections import defaultdict
 from functools import reduce
@@ -36,8 +27,8 @@ def __init__(
         self.patient_loop = patient_loop
         self.claim_loop = claim_loop
         self.sl_loop = sl_loop
-
         self.build()
+        
 
     def _populate_submitter_loop(self) -> Dict[str, str]:
         return SubmitterIdentity(self.sender_receiver_loop)
@@ -62,9 +53,6 @@ def _populate_patient_loop(self) -> Dict[str, str]:
     def _populate_claim_loop(self) -> Dict[str, str]:
         return ClaimIdentity(self.claim_loop)
 
-    def _populate_sl_loop(self) -> Dict[str, str]:
-        return ServiceIdentity(self.sl_loop) 
-    
     #
     #
     #
@@ -118,12 +106,19 @@ def to_json(self):
             **{'reciever': self.receiver_info.to_dict()},
             **{'subscriber': self.subscriber_info.to_dict()},
             **{'patient': self.patient_info.to_dict()},
-            **{'billing_provider': self.billing_info.to_dict()},
+            **{'providers': [{"TODO":"TODO"}]},
             **{'claim_header': self.claim_info.to_dict()},
-            **{'claim_lines': 'TODO'},
+            **{'claim_lines': [x.to_dict() for x in self.sl_info]}, #List 
             **{'grouped_subscriber_entities': self.subscriber_entities_info}, # call for all entities in a loop[]
         }
 
+    #
+    # Returns each claim line as an array of segments that make up the claim line
+    #
+    def claim_lines(self):
+        return list(map(lambda i: self.sl_loop[i[0]:i[1]],
+                self._index_to_tuples([(i) for i,y in enumerate(self.sl_loop) if y.segment_name()=="LX"]+[len(self.sl_loop)])))
+
     # not sure if this should be here or not, but you get the idea
     def build(self) -> None:
         self.submitter_info = self._populate_submitter_loop()
@@ -140,23 +135,33 @@ def build(self) -> None:
         self.subscriber_entities_info = self._populate_grouped_entities(self.subscriber_loop)
 
 
-
 class Claim837i(MedicalClaim):
 
     NAME = "837I"
 
-# Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf
+    # Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf
 
+    def _populate_sl_loop(self, missing=""):
+        return list(
+            map(lambda s:
+                ServiceLine(
+                    sv2=[x for x in s if x.segment_name()=="SV2"][0],
+                    lx=[x for x in s if x.segment_name()=="LX"][0],
+                    dtp=[x for x in s if x.segment_name()=="DTP"][0]
+                ),self.claim_lines()))
 
 class Claim837p(MedicalClaim):
 
     NAME = "837P"
-
     
-
-class Claim835(MedicalClaim):
-
-    NAME = "835"
+    def _populate_sl_loop(self, missing=""):
+        return list(
+            map(lambda s:
+                ServiceLine(
+                    sv1=[x for x in s if x.segment_name()=="SV1"][0],
+                    lx=[x for x in s if x.segment_name()=="LX"][0],
+                    dtp=[x for x in s if x.segment_name()=="DTP"][0]
+                ), self.claim_lines()))
 
 
 #
diff --git a/databricksx12/hls/healthcare.py b/databricksx12/hls/healthcare.py
index 8745efd..dc69885 100644
--- a/databricksx12/hls/healthcare.py
+++ b/databricksx12/hls/healthcare.py
@@ -6,9 +6,11 @@
 class HealthcareManager(EDI):
 
     def __init__(self, mapping = {
-            "222": Claim837i,
-            "223": Claim837p,
-            "221": None # "835"
+            "221": None, # Remittance "835"
+            "222": Claim837p,
+            "223": Claim837i,
+            "224": None #Dental 
+
     }):
         self.mapping = mapping
 
diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index afe571b..f57e64b 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -218,36 +218,39 @@ def build_receiver_lines(self, receiver_loop: List[Segment]):
 
 
                 
-class ServiceIdentity(Identity):
-    def __init__(self, sl_segments: List[Segment]):
-        self.services = defaultdict(list)
-        super().__init__(sl_segments)
-        self.build_sl_lines(sl_segments)
-
-    def build_sl_lines(self, sl_loop: List[Segment]):
-        sv1_segments = filter(lambda segment: segment.element(0) == 'SV1', sl_loop)
-        sv2_segments = filter(lambda segment: segment.element(0) == 'SV2', sl_loop)
-        self.services['Professional'] = [self.parse_professional_service(segment) for segment in sv1_segments]
-        self.services['Institutional'] = [self.parse_institutional_service(segment) for segment in sv2_segments]
-
-
-    def parse_professional_service(self, segment: Segment):
-        service_type, procedure_code = segment.element(1).split(':')[0:2]  # assuming 7 elements but choosing first two
-        return {
-            'Type': service_type,
-            'Procedure Code': procedure_code,
-            'Procedure Amount': segment.element(2),
-            'Measurement Code': segment.element(3), #UN or if anesthesia, MJ
-            'Service unit': segment.element(4),
-        }
+class ServiceLine(Identity):
+
+    def common(self, sv, lx, dtp):
+        self.claim_line_number = lx.element(1)
+        self.service_date = dtp.element(3)
+        self.service_time = dtp.element(1)
+        self.service_date_format = dtp.element(2)
+
+    #
+    # Institutional Claims
+    # 
+    def __init__(self, sv2, lx, dtp):
+        self.common(sv2, lx, dtp)
+        self.units = sv2.element(6)
+        self.units_measurement = sv1.element(5)
+        self.line_chrg_amt = sv2.element(4)
+        self.prcdr_cd = sv2.element(2, 1)
+        self.prcdr_cd_type = sv2.element(2, 0)
+        self.modifier_cds = ','.join(filter(lambda x: x!="", [sv1.element(2, 2, ""), sv1.element(2, 3, ""), sv1.element(2, 4, ""), sv1.element(2, 5, "")]))
+        self.revenue_cd = sv2.element(1)
+
+    #
+    # Professional Claims
+    #
+    def __init__(self, sv1, lx, dtp):
+        self.common(sv1, lx, dtp)
+        self.units = sv1.element(4)
+        self.units_measurement = sv1.element(3)
+        self.line_chrg_amt = sv1.element(2)
+        self.prcdr_cd = sv1.element(1, 1)
+        self.prcdr_cd_type = sv1.element(1, 0)
+        self.modifier_cds = ','.join(filter(lambda x: x!="", [sv1.element(1, 2, ""), sv1.element(1, 3, ""), sv1.element(1, 4, ""), sv1.element(1, 5, "")]))
+        self.place_of_service = sv1.element(5)
+        self.dg_cd_pntr = sv1.element(7)
 
-    def parse_institutional_service(self, segment: Segment):
-        service_type, procedure_code = segment.element(2).split(':')[0:2]  # assuming 7 elements but choosing first two
-        return {
-            'Type': service_type,
-            'Revenue Code': segment.element(1),
-            'Procedure Code': procedure_code,
-            'Procedure Amount': segment.element(3),
-            'Measurement Code': segment.element(4), #UN or if anesthesia, MJ
-            'Service unit': segment.element(5),
-        }
\ No newline at end of file
+        

From b4e04572d8e38a0d8d0e674106097b4b4556de2a Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Wed, 22 May 2024 17:24:33 -0400
Subject: [PATCH 45/46] claim lines

---
 databricksx12/hls/claim.py                    |  4 +-
 .../hls/support_classes/identities.py         | 66 ++++++++++++-------
 2 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py
index a8cef81..6ad5d91 100644
--- a/databricksx12/hls/claim.py
+++ b/databricksx12/hls/claim.py
@@ -144,7 +144,7 @@ class Claim837i(MedicalClaim):
     def _populate_sl_loop(self, missing=""):
         return list(
             map(lambda s:
-                ServiceLine(
+                ServiceLine.from_sv2(
                     sv2=[x for x in s if x.segment_name()=="SV2"][0],
                     lx=[x for x in s if x.segment_name()=="LX"][0],
                     dtp=[x for x in s if x.segment_name()=="DTP"][0]
@@ -157,7 +157,7 @@ class Claim837p(MedicalClaim):
     def _populate_sl_loop(self, missing=""):
         return list(
             map(lambda s:
-                ServiceLine(
+                ServiceLine.from_sv1(
                     sv1=[x for x in s if x.segment_name()=="SV1"][0],
                     lx=[x for x in s if x.segment_name()=="LX"][0],
                     dtp=[x for x in s if x.segment_name()=="DTP"][0]
diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py
index f57e64b..a2bed66 100644
--- a/databricksx12/hls/support_classes/identities.py
+++ b/databricksx12/hls/support_classes/identities.py
@@ -220,37 +220,53 @@ def build_receiver_lines(self, receiver_loop: List[Segment]):
                 
 class ServiceLine(Identity):
 
-    def common(self, sv, lx, dtp):
-        self.claim_line_number = lx.element(1)
-        self.service_date = dtp.element(3)
-        self.service_time = dtp.element(1)
-        self.service_date_format = dtp.element(2)
+    def __init__(self, d):
+        for k,v in d.items():
+            setattr(self,k,v)
+
+    @staticmethod
+    def common(sv, lx, dtp):
+        return {
+            "claim_line_number": lx.element(1),
+            "service_date": dtp.element(3),
+            "service_time": dtp.element(1),
+            "service_date_format": dtp.element(2)
+        }
 
     #
     # Institutional Claims
-    # 
-    def __init__(self, sv2, lx, dtp):
-        self.common(sv2, lx, dtp)
-        self.units = sv2.element(6)
-        self.units_measurement = sv1.element(5)
-        self.line_chrg_amt = sv2.element(4)
-        self.prcdr_cd = sv2.element(2, 1)
-        self.prcdr_cd_type = sv2.element(2, 0)
-        self.modifier_cds = ','.join(filter(lambda x: x!="", [sv1.element(2, 2, ""), sv1.element(2, 3, ""), sv1.element(2, 4, ""), sv1.element(2, 5, "")]))
-        self.revenue_cd = sv2.element(1)
+    #
+    @classmethod
+    def from_sv2(cls, sv2, lx, dtp):
+        return cls({**cls.common(sv2, lx, dtp),
+                    **{
+                        "units": sv2.element(5),
+                        "units_measurement": sv2.element(4),
+                        "line_chrg_amt": sv2.element(3),
+                        "prcdr_cd": sv2.element(2, 1, ""),
+                        "prcdr_cd_type": sv2.element(2, 0, ""),
+                        "modifier_cds": ','.join(filter(lambda x: x!="", [sv2.element(2, 2, ""), sv2.element(2, 3, ""), sv2.element(2, 4,""), sv2.element(2, 5, "")])),
+                        "revenue_cd": sv2.element(1)
+                    }
+                })
 
     #
     # Professional Claims
     #
-    def __init__(self, sv1, lx, dtp):
-        self.common(sv1, lx, dtp)
-        self.units = sv1.element(4)
-        self.units_measurement = sv1.element(3)
-        self.line_chrg_amt = sv1.element(2)
-        self.prcdr_cd = sv1.element(1, 1)
-        self.prcdr_cd_type = sv1.element(1, 0)
-        self.modifier_cds = ','.join(filter(lambda x: x!="", [sv1.element(1, 2, ""), sv1.element(1, 3, ""), sv1.element(1, 4, ""), sv1.element(1, 5, "")]))
-        self.place_of_service = sv1.element(5)
-        self.dg_cd_pntr = sv1.element(7)
+    @classmethod
+    def from_sv1(cls, sv1, lx, dtp):
+        return cls({**cls.common(sv1, lx, dtp),
+                    **{
+                        "units": sv1.element(4),
+                        "units_measurement": sv1.element(3),
+                        "line_chrg_amt": sv1.element(2),
+                        "prcdr_cd": sv1.element(1, 1),
+                        "prcdr_cd_type": sv1.element(1, 0),
+                        "modifier_cds": ','.join(filter(lambda x: x!="", [sv1.element(1, 2, ""), sv1.element(1, 3, ""), sv1.element(1, 4,""), sv1.element(1, 5, "")])),
+                        "place_of_service": sv1.element(5),
+                        "dg_cd_pntr": sv1.element(7)
+                    }
+                })
+        
 
         

From 59ddfbedaf1b8b37407c9fbf4db0c205b2f38fee Mon Sep 17 00:00:00 2001
From: Aaron Z <aaron.zavora@databricks.com>
Date: Wed, 22 May 2024 17:29:51 -0400
Subject: [PATCH 46/46] adding claim lines

---
 tests/test_claims.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 tests/test_claims.py

diff --git a/tests/test_claims.py b/tests/test_claims.py
new file mode 100644
index 0000000..2dff588
--- /dev/null
+++ b/tests/test_claims.py
@@ -0,0 +1,25 @@
+from test_spark_base import *
+from databricksx12.hls import *
+from databricksx12 import *
+import unittest, re
+
+class TestClaims(PySparkBaseTest):    
+
+    def test_professional_service_lines(self):
+        edi = EDI(open("sampledata/837/CC_837P_EDI.txt", "rb").read().decode("utf-8"))
+        hm = HealthcareManager()
+        data = hm.from_edi(edi)[0]
+        assert(len(data.sl_info) == 2)
+        assert([y.to_dict().get("claim_line_number") for y in data.sl_info] == ['1', '2'])
+        assert([y.to_dict().get("place_of_service") for y in data.sl_info] == ['11', '11'])
+        assert([y.to_dict().get("line_chrg_amt") for y in data.sl_info] == ['300', '300'])
+        
+    def test_institutional_service_lines(self):
+        edi = EDI(open("sampledata/837/CC_837I_EDI.txt", "rb").read().decode("utf-8"))
+        hm = HealthcareManager()
+        data = hm.from_edi(edi)[0]
+        assert([y.to_dict().get("claim_line_number") for y in data.sl_info] == ['1', '2', '3', '4', '5', '6', '7', '8', '9'])
+        assert([y.to_dict().get("revenue_cd") for y in data.sl_info] ==['0124', '0250', '0260', '0300', '0301', '0305', '0306', '0307', '0351'])
+        assert( sum([float(y.to_dict().get("line_chrg_amt")) for y in data.sl_info]) == 17166.7)
+
+