From 54bef34c1fedf701000808e61f871e5450e9090d Mon Sep 17 00:00:00 2001 From: Raven Date: Tue, 2 Apr 2024 15:56:31 -0400 Subject: [PATCH 01/46] raven's test notebook with a class to parse billed amounts and sbrs --- .../hls/test-notebooks/claim-test.ipynb | 273 ++++++++++++++++++ 1 file changed, 273 insertions(+) create mode 100644 databricksx12/hls/test-notebooks/claim-test.ipynb diff --git a/databricksx12/hls/test-notebooks/claim-test.ipynb b/databricksx12/hls/test-notebooks/claim-test.ipynb new file mode 100644 index 0000000..481f558 --- /dev/null +++ b/databricksx12/hls/test-notebooks/claim-test.ipynb @@ -0,0 +1,273 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['/Users/raven.mukherjee/edi-sol-accelerator/x12-edi-parser/databricksx12/hls/test-notebooks', '/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python312.zip', '/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12', '/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/lib-dynload', '', '/Users/raven.mukherjee/edi-sol-accelerator/edi-parse-env/lib/python3.12/site-packages']\n" + ] + } + ], + "source": [ + "import sys\n", + "print(sys.path)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"EDI.sender_tax_id\": \"ZZ\",\n", + " \"list\": [\n", + " {\n", + " \"FunctionalGroup.receiver\": \"123456789\",\n", + " \"FunctionalGroup.sender\": \"CLEARINGHOUSE\",\n", + " \"FunctionalGroup.transaction_datetime\": \"20180508:0833\",\n", + " \"FunctionalGroup.transaction_type\": \"222\",\n", + " \"list\": [\n", + " {\n", + " \"Transaction.transaction_type\": \"222\"\n", + " },\n", + " {\n", + " \"Transaction.transaction_type\": \"222\"\n", + " },\n", + " {\n", + " \"Transaction.transaction_type\": \"222\"\n", + " },\n", + " {\n", + " \"Transaction.transaction_type\": \"222\"\n", + " },\n", + " {\n", + " \"Transaction.transaction_type\": \"222\"\n", + " }\n", + " ]\n", + " }\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "from databricksx12.edi import *\n", + "x = EDIManager(EDI(open(\"/Users/raven.mukherjee/solution_accelerators/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt\", \"rb\").read().decode(\"utf-8\")))\n", + "\n", + "import json\n", + "print(json.dumps(x.flatten(x.data), indent=4))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "sample_data_837i = open(\"/Users/raven.mukherjee/solution_accelerators/x12-edi-parser/sampledata/837/CC_837I_EDI.txt\", \"rb\").read().decode(\"utf-8\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# from databricksx12.edi import *\n", + "\n", + "# class extend_transaction(EDI):\n", + "# def __init__(self, data, delim_cls=AnsiX12Delim):\n", + "# super().__init__(data, delim_cls)\n", + "\n", + "# @property\n", + "# def full_transaction(self):\n", + "# transaction_start_indexes = [i for i, segment in enumerate(self.data) if segment.segment_name() == \"ST\"]\n", + "# transaction_end_indexes = [i for i, segment in enumerate(self.data) if segment.segment_name() == \"SE\"]\n", + "\n", + "# transactions = []\n", + "# for start, end in zip(transaction_start_indexes, transaction_end_indexes):\n", + "# transaction_segments = self.data[start:end+1]\n", + "# transactions.append(transaction_segments)\n", + "# return transactions\n", + "\n", + "# @property\n", + "# def claim_identifier(self):\n", + "# transactions = self.full_transaction\n", + "# claim_identifiers = []\n", + "\n", + "# for transaction_segments in transactions:\n", + "# claim_id = None\n", + "# for segment in transaction_segments:\n", + "# if segment.segment_name() == \"BHT\":\n", + "# claim_id = segment.element(3) #confirm\n", + "# break\n", + "# claim_identifiers.append(claim_id)\n", + "\n", + "# return claim_identifiers\n", + "\n", + "# @property\n", + "# def header_billing_amount(self):\n", + "# transactions = self.full_transaction\n", + "# billing_headers = []\n", + "\n", + "# for transaction_segments in transactions:\n", + "# for segment in transaction_segments:\n", + "# if segment.segment_name() == \"CLM\":\n", + "# bill_header = segment.element(1)\n", + "# billing_headers.append(bill_header)\n", + "# break # one CLM segment per transaction?\n", + "\n", + "# return billing_headers\n", + "\n", + "# @property\n", + "# def billed_amount(self):\n", + "# transactions = self.full_transaction\n", + "# billed_amounts = []\n", + "\n", + "# for transaction_segments in transactions:\n", + "# for segment in transaction_segments:\n", + "# if segment.segment_name() == \"CLM\":\n", + "# billed_amount = segment.element(2) # Billed amount is the second element\n", + "# billed_amounts.append(billed_amount)\n", + "# break\n", + "\n", + "# return billed_amounts\n", + " \n", + "# # @property\n", + "# # def subscriber(self):\n", + "# # transactions = self.full_transaction\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# # use raw EDI data\n", + "# edi_object = extend_transaction(sample_data_837i)\n", + "\n", + "# # call different vars\n", + "# transactions = edi_object.full_transaction\n", + "# claim_ids = edi_object.claim_identifier\n", + "# header = edi_object.header_billing_amount\n", + "# billed_amount = edi_object.billed_amount" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# identify elements functionally!\n", + "\n", + "from databricksx12.edi import *\n", + "\n", + "class extend_transaction(EDI):\n", + " def __init__(self, data, delim_cls=AnsiX12Delim):\n", + " super().__init__(data, delim_cls)\n", + "\n", + " # Use map and lambda to populate billed amounts and subscribers\n", + " self.billed_amounts = list(map(lambda x: x.element(2), self.segments_by_name(\"CLM\")))\n", + " self.subscribers = list(map(lambda x: x.element(4), self.segments_by_name(\"SBR\")))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "sample_data_chpw_claimdata = open(\"/Users/raven.mukherjee/edi-sol-accelerator/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt\", \"rb\").read().decode(\"utf-8\")\n", + "# use raw EDI data\n", + "edi_object = extend_transaction(sample_data_chpw_claimdata)\n", + "billed_amounts = edi_object.billed_amounts\n", + "subscribers = edi_object.subscribers\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20', '50.1', '11.64', '234', '20']" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "billed_amounts" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['COMMUNITY HLTH PLAN OF WASH',\n", + " 'COMMUNITY HLTH PLAN OF WASH',\n", + " 'COMMUNITY HLTH PLAN OF WASH',\n", + " 'COMMUNITY HLTH PLAN OF WASH',\n", + " 'COMMUNITY HLTH PLAN OF WASH']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "subscribers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "edi-parse-kernel", + "language": "python", + "name": "edi-parse-env" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From ab0a584d9f8971af673e3e11bea1485dd1c0af5c Mon Sep 17 00:00:00 2001 From: Raven Date: Thu, 4 Apr 2024 18:04:42 -0400 Subject: [PATCH 02/46] building a class to extract Billing provider Subscriber names based on the Loop called --- databricksx12/hls/hierarchicalloop.py | 79 +++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 databricksx12/hls/hierarchicalloop.py diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py new file mode 100644 index 0000000..f633cd2 --- /dev/null +++ b/databricksx12/hls/hierarchicalloop.py @@ -0,0 +1,79 @@ +from databricksx12.edi import * + +class LoopMapping: + def __init__(self): + self.mappings = { + '2000A': ('20', 'NM1', '3'), + '2000B': ('22', 'SBR', '4'), + } + + def get_identifiers(self, loop_type): + return self.mappings.get(loop_type, (None, None)) + + +class HierarchicalLoop(EDI): + def __init__(self, data, delim_cls=AnsiX12Delim, Loop='2000B'): + super().__init__(data, delim_cls) + self.loop_mapping = LoopMapping() + self.target_element, self.target_segment_name, self.target_element_index = self.loop_mapping.get_identifiers( + Loop) + + # find all HL segments along with the 3rd element that denotes 2000A or 2000B + self.hl_segments = self._hl_identifiers() + + # find all HL segments along with the 3rd element that denotes 2000A or 2000B + self.clm_segments = self._clm_identifiers() + + # Calculate ranges and then extract 2000A lines based on those ranges + self.ranges = self.select_range_of_interest( + self.hl_segments, self.clm_segments, self.target_element) + self.extracted_lines = self.extract_lines_based_on_ranges( + self.ranges, self.target_segment_name, self.target_element_index) + + def _hl_identifiers(self): + # Find the segments where HL loop begins + indexed_HL_segments = self.segments_by_name_index("HL") + return [(i, x.element(3)) for i, x in indexed_HL_segments] + + def _clm_identifiers(self): + # Find the segments where CLM loop begins + indexed_CLM_segments = self.segments_by_name_index("CLM") + return [(i, x.element(2)) for i, x in indexed_CLM_segments] + + def select_range_of_interest(self, hl_indexes, clm_indexes, target_value): + ranges = [] + start_index = None + last_index = None + + for index, value in hl_indexes: + if value == target_value: + if start_index is not None: + ranges.append((start_index+1, index)) + start_index = index + elif start_index is not None: + ranges.append((start_index+1, index)) + start_index = None + if clm_indexes: + last_index = clm_indexes[-1][0] + if last_index and start_index is not None: + ranges.append((start_index+1, last_index)) + return ranges + + def extract_lines_based_on_ranges(self, ranges, target_value, target_index): + extracted_elements = [] + # Iterate through each range in the list + for start, end in ranges: + # Retrieve the segments within this range + segments_in_range = self.segments_by_position(start, end) + + desired_elements = map( + lambda segment: segment.element(int(target_index)), + filter( + lambda segment: segment.segment_name() == target_value and len( + segment.data.split(segment.format_cls.ELEMENT_DELIM)) > int(target_index), + segments_in_range + ) + ) + extracted_elements.extend(desired_elements) + + return list(extracted_elements) From 05518c0d789241c3de01f0332edb50a3e4e57695 Mon Sep 17 00:00:00 2001 From: Raven Date: Thu, 4 Apr 2024 18:09:50 -0400 Subject: [PATCH 03/46] changed a few comments --- databricksx12/hls/hierarchicalloop.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py index f633cd2..29e9223 100644 --- a/databricksx12/hls/hierarchicalloop.py +++ b/databricksx12/hls/hierarchicalloop.py @@ -18,13 +18,13 @@ def __init__(self, data, delim_cls=AnsiX12Delim, Loop='2000B'): self.target_element, self.target_segment_name, self.target_element_index = self.loop_mapping.get_identifiers( Loop) - # find all HL segments along with the 3rd element that denotes 2000A or 2000B + # find all HL segments along with the 3rd element that denotes 2000A (20) or 2000B (22) self.hl_segments = self._hl_identifiers() - # find all HL segments along with the 3rd element that denotes 2000A or 2000B + # find all CLM segments (important for indexing the last HL or SBR within a tx) self.clm_segments = self._clm_identifiers() - # Calculate ranges and then extract 2000A lines based on those ranges + # Calculate ranges and then extract 2000A/B lines based on those ranges self.ranges = self.select_range_of_interest( self.hl_segments, self.clm_segments, self.target_element) self.extracted_lines = self.extract_lines_based_on_ranges( From 9c96544aeb74d059589002986b781f3dbac511a1 Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Tue, 9 Apr 2024 10:33:16 -0400 Subject: [PATCH 04/46] today's discussion --- databricksx12/hls/hierarchicalloop.py | 67 +++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 5 deletions(-) diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py index 29e9223..e443011 100644 --- a/databricksx12/hls/hierarchicalloop.py +++ b/databricksx12/hls/hierarchicalloop.py @@ -1,20 +1,38 @@ from databricksx12.edi import * class LoopMapping: + + def __init__(self, mappings=None): + self.mappings = (mappings if mappings is not None else { + '20': { + 'description': 'Information Source', + 'loop': '2000A' + }, + '22': { + 'description': 'Subscriber', + 'loop': '2000B' + } + }) + + + + + """ def __init__(self): self.mappings = { '2000A': ('20', 'NM1', '3'), '2000B': ('22', 'SBR', '4'), } - def get_identifiers(self, loop_type): - return self.mappings.get(loop_type, (None, None)) + + ADZ want our key = (lookup value found in data), value = additional info needed + """ class HierarchicalLoop(EDI): - def __init__(self, data, delim_cls=AnsiX12Delim, Loop='2000B'): + def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping= LoopMapping.mappings): super().__init__(data, delim_cls) - self.loop_mapping = LoopMapping() + self.loop_mapping = loop_mapping self.target_element, self.target_segment_name, self.target_element_index = self.loop_mapping.get_identifiers( Loop) @@ -32,7 +50,7 @@ def __init__(self, data, delim_cls=AnsiX12Delim, Loop='2000B'): def _hl_identifiers(self): # Find the segments where HL loop begins - indexed_HL_segments = self.segments_by_name_index("HL") + indexed_HL_segments = self.segments_by_nloop_object.extracted_linesame_index("HL") return [(i, x.element(3)) for i, x in indexed_HL_segments] def _clm_identifiers(self): @@ -77,3 +95,42 @@ def extract_lines_based_on_ranges(self, ranges, target_value, target_index): extracted_elements.extend(desired_elements) return list(extracted_elements) + + + def parent_loops(self): + pass + + def child_loops(self, parent_loop_num): + pass + + """ + @return + -index of each HL segment + -index of parent segments + -be able to answer "where is loop XYZ?" and "at this location, what looop am i in?" + """ + def _hl_segment_indexes(self): + pass + + + self.hl_parents = { + parent: + { index_start : value + index_end : value + children : [ + hl_child : { + index_start: value + index_end: value + } + ] + } + + + self.hl = HL() + self.claim_start_index = segment(clm) + + Who is my billing provider? hl.get_loop(20) + Who is my subscriber? + Who is my patient? + + From 235f0f1abab0b687f133cc479bd7d155a8539568 Mon Sep 17 00:00:00 2001 From: Raven Date: Thu, 11 Apr 2024 11:24:16 -0400 Subject: [PATCH 05/46] two classes in two files; one for finding the hierarchical loops and the other for asking complex questions about claims. Also a new edited sample data example with dependent sub-child in Line 161 --- databricksx12/hls/build-func.py | 138 ++++++++++++++ databricksx12/hls/claim.py | 184 +++++++++++++++---- databricksx12/hls/hierarchicalloop.py | 222 ++++++++++------------- sampledata/837/CHPW_Claimdata_edited.txt | 182 +++++++++++++++++++ 4 files changed, 558 insertions(+), 168 deletions(-) create mode 100644 databricksx12/hls/build-func.py create mode 100644 sampledata/837/CHPW_Claimdata_edited.txt diff --git a/databricksx12/hls/build-func.py b/databricksx12/hls/build-func.py new file mode 100644 index 0000000..2de4f64 --- /dev/null +++ b/databricksx12/hls/build-func.py @@ -0,0 +1,138 @@ +""" +Apr 9 notes +""" +from databricksx12.edi import * + +class LoopMapping: + + def __init__(self, mappings=None): + self.mappings = (mappings if mappings is not None else { + '20': { + 'description': 'Information Source', + 'loop': '2000A' + }, + '22': { + 'description': 'Subscriber', + 'loop': '2000B' + } + }) + + + + + """ + def __init__(self): + self.mappings = { + '2000A': ('20', 'NM1', '3'), + '2000B': ('22', 'SBR', '4'), + } + + + ADZ want our key = (lookup value found in data), value = additional info needed + """ + + +class HierarchicalLoop(EDI): + def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping= LoopMapping.mappings): + super().__init__(data, delim_cls) + self.loop_mapping = loop_mapping + self.target_element, self.target_segment_name, self.target_element_index = self.loop_mapping.get_identifiers( + Loop) + + # find all HL segments along with the 3rd element that denotes 2000A (20) or 2000B (22) + self.hl_segments = self._hl_identifiers() + + # find all CLM segments (important for indexing the last HL or SBR within a tx) + self.clm_segments = self._clm_identifiers() + + # Calculate ranges and then extract 2000A/B lines based on those ranges + self.ranges = self.select_range_of_interest( + self.hl_segments, self.clm_segments, self.target_element) + self.extracted_lines = self.extract_lines_based_on_ranges( + self.ranges, self.target_segment_name, self.target_element_index) + + def _hl_identifiers(self): + # Find the segments where HL loop begins + indexed_HL_segments = self.segments_by_nloop_object.extracted_linesame_index("HL") + return [(i, x.element(3)) for i, x in indexed_HL_segments] + + def _clm_identifiers(self): + # Find the segments where CLM loop begins + indexed_CLM_segments = self.segments_by_name_index("CLM") + return [(i, x.element(2)) for i, x in indexed_CLM_segments] + + def select_range_of_interest(self, hl_indexes, clm_indexes, target_value): + ranges = [] + start_index = None + last_index = None + + for index, value in hl_indexes: + if value == target_value: + if start_index is not None: + ranges.append((start_index+1, index)) + start_index = index + elif start_index is not None: + ranges.append((start_index+1, index)) + start_index = None + if clm_indexes: + last_index = clm_indexes[-1][0] + if last_index and start_index is not None: + ranges.append((start_index+1, last_index)) + return ranges + + def extract_lines_based_on_ranges(self, ranges, target_value, target_index): + extracted_elements = [] + # Iterate through each range in the list + for start, end in ranges: + # Retrieve the segments within this range + segments_in_range = self.segments_by_position(start, end) + + desired_elements = map( + lambda segment: segment.element(int(target_index)), + filter( + lambda segment: segment.segment_name() == target_value and len( + segment.data.split(segment.format_cls.ELEMENT_DELIM)) > int(target_index), + segments_in_range + ) + ) + extracted_elements.extend(desired_elements) + + return list(extracted_elements) + + + def parent_loops(self): + pass + + def child_loops(self, parent_loop_num): + pass + + """ + @return + -index of each HL segment + -index of parent segments + -be able to answer "where is loop XYZ?" and "at this location, what looop am i in?" + """ + def _hl_segment_indexes(self): + pass + + + self.hl_parents = { + parent: + { index_start : value + index_end : value + children : [ + hl_child : { + index_start: value + index_end: value + } + ] + } + + + self.hl = HL() + self.claim_start_index = segment(clm) + + Who is my billing provider? hl.get_loop(20) + Who is my subscriber? + Who is my patient? + diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 94c21c3..88aa76f 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -1,59 +1,169 @@ from databricksx12.edi import * - +from databricksx12.hls import hierarchicalloop # # Base claim class # +from databricksx12.edi import * + + +class LoopMapping: + def __init__(self, mappings=None): + self.mappings = (mappings if mappings is not None else { + '20': { + 'description': 'Information Source', + 'loop': '2000A' + }, + '22': { + 'description': 'Subscriber', + 'loop': '2000B' + } + }) + + def get_mapping(self, element): + return self.mappings.get(element, None) + + class Claim(EDI): + def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping(), element=None): + super().__init__(data, delim_cls) + self.loop_mapping = loop_mapping + self.target_element = element + self.loop_value = self.loop_mapping.get_mapping( + self.target_element).get('loop') + self.target_segment_name, self.target_element_index = self.get_reference_names_of_loop( + self.loop_value) - def __init__(self, segments, delim_cls = AnsiX12Delim): - self.data = segments - self.format_cls = delim_cls - #For Raven TODO marked - self.claim_identifier = None #TODO include both CH and RP values here - self.claim_lines = None #TODO Maintain a list of claim lines using ClaimLine class - self.subscriber = None #TODO selecting the subscriber - self.patient = None #TODO selecting the patient info, maybe patient should be its own class? + # find all CLM segments (important for indexing the last HL or SBR within a tx) + self.clm_segments = self._clm_identifiers() + # this feels misplaced; how to fix? + def get_reference_names_of_loop(self, loop): + identifiers = { + '2000A': ('NM1', '3'), + '2000B': ('SBR', '4'), + } + return identifiers.get(loop, (None, None)) - # - # TODO total amount billed at the header of the claim - # - def header_total_billed_amount(self): - pass + def _clm_identifiers(self): + # Find the segments where CLM loop begins + indexed_CLM_segments = self.segments_by_name_index("CLM") + return [(i, x.element(2)) for i, x in indexed_CLM_segments] + + def extract_lines_based_on_ranges(self, ranges, target_value, target_index): + extracted_elements = [] + # Iterate through each range in the list + for start, end in ranges: + # Retrieve the segments within this range + segments_in_range = self.segments_by_position(start, end) + + desired_elements = map( + lambda segment: segment.element(int(target_index)), + filter( + lambda segment: segment.segment_name() == target_value and len( + segment.data.split(segment.format_cls.ELEMENT_DELIM)) > int(target_index), + segments_in_range + ) + ) + extracted_elements.extend(desired_elements) + + return list(extracted_elements) - # - # TODO total amount billed across lines - # - def lines_total_billed_amount(self): - pass +class ClaimManager: + def __init__(self, data, delim_cls=AnsiX12Delim): + self.hlmanager = hierarchicalloop.HierarchicalLoopManager(data) + self.hl_summary = self.hlmanager.summary + # need to sort mapping dependency + self.claim = Claim(data, delim_cls, LoopMapping(), element='20') + self.claim_summaries = [{ + 'clm_ind': clm_index, + 'parent_counter': self._find_claim_in_tx(self.hl_summary, clm_index)[0], + 'child_start_index': self._find_claim_in_tx(self.hl_summary, clm_index)[1], + 'parent_range': self.get_ranges(self.hl_summary, self._find_claim_in_tx(self.hl_summary, clm_index)[0])[0], + 'children_ranges': self.get_ranges(self.hl_summary, self._find_claim_in_tx(self.hl_summary, clm_index)[0])[1] + } for clm_index in [i for (i, j) in self.claim.clm_segments]] - -class ClaimLine(Segment): + def _find_claim_in_tx(self, tx_summary, clm_index): + for parent_counter, parent_info in tx_summary.items(): + if parent_info['parent_index_start'] <= int(clm_index) <= parent_info['parent_index_end']: + for child_info in parent_info['children']: + if child_info['child_index_start'] <= int(clm_index) <= child_info['child_index_stop']: + return parent_counter, child_info['child_index_start'] + return None, None - # - # TODO build out claim line uses (case class) - # - def __init__(self): + def get_ranges(self, hl_summary_dict, loop_counter): + info = hl_summary_dict.get(loop_counter, None) + if info is None: + return None + + parent_range = (info['parent_index_start'], info['parent_index_end']) + children_ranges = [(child['child_index_start'], child['child_index_stop']) + for child in info.get('children', [])] + + return parent_range, children_ranges + + def _find_billing_providers(self): + first_lines = [] + for summary in self.claim_summaries: + lines = self.claim.extract_lines_based_on_ranges( + [summary['parent_range'] + ], self.claim.target_segment_name, self.claim.target_element_index + ) + if lines: # Check if any lines were extracted + # Append the first line of this iteration + first_lines.append(lines[0]) + return first_lines + + def _find_subscribers(self): pass - """ - select fields: - procedure code - procedure code type (HCPCS, CPT4, ICD10) - revenuce code - procedure modifier codes - billed amount - - """ + + +""" +claims = ClaimManager(sample_data_chpw_claimdata) +claims.claim_summaries + +[{'clm_ind': 23, + 'parent_counter': '1', + 'child_start_index': 16, + 'parent_range': (7, 35), + 'children_ranges': [(16, 35)]}, + {'clm_ind': 57, + 'parent_counter': '63', + 'child_start_index': 50, + 'parent_range': (41, 69), + 'children_ranges': [(50, 69)]}, + {'clm_ind': 91, + 'parent_counter': '49', + 'child_start_index': 84, + 'parent_range': (75, 103), + 'children_ranges': [(84, 103)]}, + {'clm_ind': 125, + 'parent_counter': '75', + 'child_start_index': 118, + 'parent_range': (109, 138), + 'children_ranges': [(118, 138)]}, + {'clm_ind': 160, + 'parent_counter': '79', + 'child_start_index': 153, + 'parent_range': (144, 172), + 'children_ranges': [(153, 172)]}] + + claims._find_billing_providers() + ['BH CLINIC OF VANCOUVER', + 'BH CLINIC OF VANCOUVER', + 'BH CLINIC OF VANCOUVER', + 'BH CLINIC OF VANCOUVER', + 'BH CLINIC OF VANCOUVER'] +""" class Claim837i(Claim): NAME = "837I" -#Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf +# Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf + + class Claim837p(Claim): NAME = "837P" - - diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py index e443011..6afcbda 100644 --- a/databricksx12/hls/hierarchicalloop.py +++ b/databricksx12/hls/hierarchicalloop.py @@ -1,136 +1,96 @@ from databricksx12.edi import * -class LoopMapping: - - def __init__(self, mappings=None): - self.mappings = (mappings if mappings is not None else { - '20': { - 'description': 'Information Source', - 'loop': '2000A' - }, - '22': { - 'description': 'Subscriber', - 'loop': '2000B' - } - }) - - - - - """ - def __init__(self): - self.mappings = { - '2000A': ('20', 'NM1', '3'), - '2000B': ('22', 'SBR', '4'), - } - - - ADZ want our key = (lookup value found in data), value = additional info needed - """ - class HierarchicalLoop(EDI): - def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping= LoopMapping.mappings): + def __init__(self, data, delim_cls=AnsiX12Delim): super().__init__(data, delim_cls) - self.loop_mapping = loop_mapping - self.target_element, self.target_segment_name, self.target_element_index = self.loop_mapping.get_identifiers( - Loop) - - # find all HL segments along with the 3rd element that denotes 2000A (20) or 2000B (22) - self.hl_segments = self._hl_identifiers() - - # find all CLM segments (important for indexing the last HL or SBR within a tx) - self.clm_segments = self._clm_identifiers() - - # Calculate ranges and then extract 2000A/B lines based on those ranges - self.ranges = self.select_range_of_interest( - self.hl_segments, self.clm_segments, self.target_element) - self.extracted_lines = self.extract_lines_based_on_ranges( - self.ranges, self.target_segment_name, self.target_element_index) - - def _hl_identifiers(self): - # Find the segments where HL loop begins - indexed_HL_segments = self.segments_by_nloop_object.extracted_linesame_index("HL") - return [(i, x.element(3)) for i, x in indexed_HL_segments] - - def _clm_identifiers(self): - # Find the segments where CLM loop begins - indexed_CLM_segments = self.segments_by_name_index("CLM") - return [(i, x.element(2)) for i, x in indexed_CLM_segments] - - def select_range_of_interest(self, hl_indexes, clm_indexes, target_value): - ranges = [] - start_index = None - last_index = None - - for index, value in hl_indexes: - if value == target_value: - if start_index is not None: - ranges.append((start_index+1, index)) - start_index = index - elif start_index is not None: - ranges.append((start_index+1, index)) - start_index = None - if clm_indexes: - last_index = clm_indexes[-1][0] - if last_index and start_index is not None: - ranges.append((start_index+1, last_index)) - return ranges - - def extract_lines_based_on_ranges(self, ranges, target_value, target_index): - extracted_elements = [] - # Iterate through each range in the list - for start, end in ranges: - # Retrieve the segments within this range - segments_in_range = self.segments_by_position(start, end) - - desired_elements = map( - lambda segment: segment.element(int(target_index)), - filter( - lambda segment: segment.segment_name() == target_value and len( - segment.data.split(segment.format_cls.ELEMENT_DELIM)) > int(target_index), - segments_in_range - ) - ) - extracted_elements.extend(desired_elements) - - return list(extracted_elements) - - - def parent_loops(self): - pass - - def child_loops(self, parent_loop_num): - pass - - """ - @return - -index of each HL segment - -index of parent segments - -be able to answer "where is loop XYZ?" and "at this location, what looop am i in?" - """ - def _hl_segment_indexes(self): - pass - - - self.hl_parents = { - parent: - { index_start : value - index_end : value - children : [ - hl_child : { - index_start: value - index_end: value - } - ] - } - - - self.hl = HL() - self.claim_start_index = segment(clm) - - Who is my billing provider? hl.get_loop(20) - Who is my subscriber? - Who is my patient? - - + + # find all HL and SE segments to find start and end of loops + CLM segments + self.indexed_HL_segments = self.segments_by_name_index("HL") + self.indexed_SE_segments = self.segments_by_name_index("SE") + + # parent and children loops + self.parent_loops = self._parent_loops() + self.child_loops = self._child_loops(self.parent_loops) + self.subchild_loops = self._child_loops(self.child_loops) + + def _parent_loops(self): + parent_start_loops = [] + for i, segment in self.indexed_HL_segments: + # Check if the second element is empty (and the third element is '20' and the last element is '1') + if segment.element(2) == '': + # index of parent, counter, and if child + parent_start_loops.append( + (i, segment.element(1), segment.element(-1))) + + parent_end_loops = [i for i, x in self.indexed_SE_segments] + return [(tup + (j,)) for tup, j in zip(parent_start_loops, parent_end_loops)] + + def _child_loops(self, parent_loops): + child_loops = [] + for parent_start_index, counter, child_id, parent_stop_index in parent_loops: + if child_id == '1': + for i, segment in self.indexed_HL_segments: + if segment.element(2) == counter: + # index of child, parent/tx counter, and if sub-child + child_loops.append( + (i, counter, segment.element(-1), parent_stop_index)) + + # If child_id is greater than 1, recursively call the fn + if int(child_id) > 1: + child_loops.extend(self._child_loops( + [(parent_start_index, counter, str(int(child_id) - 1), parent_stop_index)])) + return child_loops + + +class HierarchicalLoopManager: + def __init__(self, data, delim_cls=AnsiX12Delim): + self.hl = HierarchicalLoop(data, delim_cls) + self.summary = {} + self.generate_summary() + + def generate_summary(self): + for pl in self.hl.parent_loops: + parent_summary = { + 'parent_index_start': pl[0], + 'parent_index_end': pl[-1], + 'children': [] + } + # Find children loops for this parent tx + children = [] + for cl in self.hl.child_loops: + if pl[0] < cl[0] < pl[-1]: + children.append({ + 'child_index_start': cl[0], + 'child_index_stop': cl[-1] + }) + + # Add children to parent summary + parent_summary['children'] = children + + # Add HL 1 counter to the summary as the key + self.summary[pl[1]] = parent_summary + + +""" +loop_manager = HierarchicalLoopManager(sample_data_837i_edited) +summary = loop_manager.summary + +output: +{'1': {'parent_index_start': 7, + 'parent_index_end': 35, + 'children': [{'child_index_start': 16, 'child_index_stop': 35}]}, + '63': {'parent_index_start': 41, + 'parent_index_end': 69, + 'children': [{'child_index_start': 50, 'child_index_stop': 69}]}, + '49': {'parent_index_start': 75, + 'parent_index_end': 103, + 'children': [{'child_index_start': 84, 'child_index_stop': 103}]}, + '75': {'parent_index_start': 109, + 'parent_index_end': 138, + 'children': [{'child_index_start': 118, 'child_index_stop': 138}]}, + '79': {'parent_index_start': 144, + 'parent_index_end': 179, + 'children': [{'child_index_start': 153, 'child_index_stop': 179}, + {'child_index_start': 160, 'child_index_stop': 179}]}} +""" diff --git a/sampledata/837/CHPW_Claimdata_edited.txt b/sampledata/837/CHPW_Claimdata_edited.txt new file mode 100644 index 0000000..72ecb44 --- /dev/null +++ b/sampledata/837/CHPW_Claimdata_edited.txt @@ -0,0 +1,182 @@ +ISA*00* *00* *01*987654321 *ZZ*123456789 *180508*0833*^*00501*697773230*1*P*:~ +GS*HC*CLEARINGHOUSE*123456789*20180508*0833*212950697*X*005010X222A1~ +ST*837*000000001*005010X222A1~ +BHT*0019*00*7349063984*20180508*0833*CH~ +NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~ +PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~ +NM1*40*2*123456789*****46*CHPWA~ +HL*1**20*1~ +NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST~ +N4*VANCOUVER*WA*98662~ +REF*EI*720000000~ +PER*IC*CONTACT*TE*9185551212~ +NM1*87*2~ +N3*PO BOX 1234~ +N4*VANCOUVER*WA*986681234~ +HL*2*1*22*0~ +SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~ +N3*987 65TH PL~ +N4*VANCOUVER*WA*986640001~ +DMG*D8*19881225*M~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~ +CLM*1805080AV3648339*20***57:B:1*Y*A*Y*Y~ +REF*D9*7349065509~ +HI*ABK:F1120~ +NM1*82*1*PROVIDER*JAMES****XX*1112223338~ +PRV*PE*PXC*261QR0405X~ +NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST SUITE A1~ +N4*VANCOUVER*WA*98662~ +LX*1~ +SV1*HC:H0003*20*UN*1***1~ +DTP*472*D8*20180428~ +REF*6R*142671~ +SE*34*000000001~ +ST*837*000000002*005010X222A1~ +BHT*0019*00*7349063984*20180508*0833*CH~ +NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~ +PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~ +NM1*40*2*123456789*****46*CHPWA~ +HL*63**20*1~ +NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST~ +N4*VANCOUVER*WA*98662~ +REF*EI*720000000~ +PER*IC*CONTACT*TE*9185551212~ +NM1*87*2~ +N3*PO BOX 1234~ +N4*VANCOUVER*WA*986681234~ +HL*64*63*22*0~ +SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +NM1*IL*1*PATIENT*SUSAN*E***MI*765123~ +N3*765 43RD ST~ +N4*VANCOUVER*WA*986640002~ +DMG*D8*19881031*F~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~ +CLM*1805080AV3648347*50.1***57:B:1*Y*A*Y*Y~ +REF*D9*7349065730~ +HI*ABK:F1520*ABF:F1220~ +NM1*82*1*PROVIDER*SUSAN****XX*1112223346~ +PRV*PE*PXC*261QR0405X~ +NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST SUITE A1~ +N4*VANCOUVER*WA*98662~ +LX*1~ +SV1*HC:96153:HF*50.1*UN*6***1:2~ +DTP*472*D8*20180426~ +REF*6R*143792~ +SE*34*000000002~ +ST*837*000000003*005010X222A1~ +BHT*0019*00*7349063984*20180508*0833*CH~ +NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~ +PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~ +NM1*40*2*123456789*****46*CHPWA~ +HL*49**20*1~ +NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST~ +N4*VANCOUVER*WA*98662~ +REF*EI*720000000~ +PER*IC*CONTACT*TE*9185551212~ +NM1*87*2~ +N3*PO BOX 1234~ +N4*VANCOUVER*WA*986681234~ +HL*50*49*22*0~ +SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~ +N3*987 65TH PL~ +N4*VANCOUVER*WA*986640001~ +DMG*D8*19881225*M~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~ +CLM*1805080AV3648340*11.64***57:B:1*Y*A*Y*Y~ +REF*D9*7349065492~ +HI*ABK:F1020*ABF:F1220~ +NM1*82*1*PROVIDER*SUSAN****XX*1112223346~ +PRV*PE*PXC*261QR0405X~ +NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST SUITE A1~ +N4*VANCOUVER*WA*98662~ +LX*1~ +SV1*HC:T1017:HF*11.64*UN*1***1:2~ +DTP*472*D8*20180427~ +REF*6R*140976~ +SE*34*000000003~ +ST*837*000000004*005010X222A1~ +BHT*0019*00*7349063984*20180508*0833*CH~ +NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~ +PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~ +NM1*40*2*123456789*****46*CHPWA~ +HL*75**20*1~ +NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST~ +N4*VANCOUVER*WA*98662~ +REF*EI*720000000~ +PER*IC*CONTACT*TE*9185551212~ +NM1*87*2~ +N3*PO BOX 1234~ +N4*VANCOUVER*WA*986681234~ +HL*76*75*22*0~ +SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +NM1*IL*1*PATIENT*SUSAN*E***MI*765123~ +N3*765 43RD ST~ +N4*VANCOUVER*WA*986640002~ +DMG*D8*19881031*F~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~ +CLM*1805080AV3648353*234***53:B:1*Y*A*Y*Y~ +REF*D9*7349064290~ +HI*ABK:F251~ +NM1*82*1*PROVIDER*SUSAN****XX*1112223346~ +PRV*PE*PXC*251S00000X~ +NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST SUITE A1~ +N4*VANCOUVER*WA*98662~ +LX*1~ +SV1*HC:90853*234*UN*120***1~ +DTP*472*D8*20180427~ +REF*6R*140787~ +NTE*ADD*05~ +SE*35*000000004~ +ST*837*000000005*005010X222A1~ +BHT*0019*00*7349063984*20180508*0833*CH~ +NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~ +PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~ +NM1*40*2*123456789*****46*CHPWA~ +HL*79**20*2~ +NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST~ +N4*VANCOUVER*WA*98662~ +REF*EI*720000000~ +PER*IC*CONTACT*TE*9185551212~ +NM1*87*2~ +N3*PO BOX 1234~ +N4*VANCOUVER*WA*986681234~ +HL*80*79*22*0~ +SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~ +N3*987 65TH PL~ +N4*VANCOUVER*WA*986640001~ +DMG*D8*19881225*M~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~ +HL*81*79*23*1~ +SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~ +N3*987 65TH PL~ +N4*VANCOUVER*WA*986640001~ +DMG*D8*19881225*M~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~ +CLM*1805080AV3648355*20***57:B:1*Y*A*Y*Y~ +REF*D9*7349064036~ +HI*ABK:F1020*ABF:F1120~ +NM1*82*1*PROVIDER*JAMES****XX*1112223338~ +PRV*PE*PXC*261QR0405X~ +NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST SUITE A1~ +N4*VANCOUVER*WA*98662~ +LX*1~ +SV1*HC:H0003*20*UN*1***1:2~ +DTP*472*D8*20180427~ +REF*6R*143907~ +SE*34*000000005~ +GE*5*212950697~ +IEA*1*697773230~ From 44cc70343ccc4fe489ca16e267ae129a45944751 Mon Sep 17 00:00:00 2001 From: Raven Date: Wed, 17 Apr 2024 19:20:05 -0400 Subject: [PATCH 06/46] converted the functions to follow filter/map protocol for processing hierarchical loops --- databricksx12/hls/hierarchicalloop.py | 88 +++++++++++++-------------- 1 file changed, 41 insertions(+), 47 deletions(-) diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py index 6afcbda..c078f3b 100644 --- a/databricksx12/hls/hierarchicalloop.py +++ b/databricksx12/hls/hierarchicalloop.py @@ -1,46 +1,40 @@ from databricksx12.edi import * +import functools class HierarchicalLoop(EDI): def __init__(self, data, delim_cls=AnsiX12Delim): super().__init__(data, delim_cls) - - # find all HL and SE segments to find start and end of loops + CLM segments - self.indexed_HL_segments = self.segments_by_name_index("HL") - self.indexed_SE_segments = self.segments_by_name_index("SE") - - # parent and children loops + self.parent_start_loops = self._parent_start_tup_loops() # returns tuple; to check + self.parent_end_loops = self._parent_end_loops() self.parent_loops = self._parent_loops() self.child_loops = self._child_loops(self.parent_loops) - self.subchild_loops = self._child_loops(self.child_loops) + self.subchild_loops = self._child_loops(self.child_loops) # recursive cases - def _parent_loops(self): - parent_start_loops = [] - for i, segment in self.indexed_HL_segments: - # Check if the second element is empty (and the third element is '20' and the last element is '1') - if segment.element(2) == '': - # index of parent, counter, and if child - parent_start_loops.append( - (i, segment.element(1), segment.element(-1))) + def _parent_start_tup_loops(self): + # index of parent, counter, and if child + # TODO unit test to return tuple + return [(i, x.element(1), x.element(-1)) for i, x in self.segments_by_name_index("HL") if x.element(2) == ""] - parent_end_loops = [i for i, x in self.indexed_SE_segments] - return [(tup + (j,)) for tup, j in zip(parent_start_loops, parent_end_loops)] + def _parent_end_loops(self): + return [i for i, x in self.segments_by_name_index("SE")] + + def _parent_loops(self): + return [(tup + (j,)) for tup, j in zip(self.parent_start_loops, self.parent_end_loops)] def _child_loops(self, parent_loops): - child_loops = [] - for parent_start_index, counter, child_id, parent_stop_index in parent_loops: - if child_id == '1': - for i, segment in self.indexed_HL_segments: - if segment.element(2) == counter: - # index of child, parent/tx counter, and if sub-child - child_loops.append( - (i, counter, segment.element(-1), parent_stop_index)) + child_loops = [(i, counter, segment.element(-1), parent_stop_index) + for _, counter, child_id, parent_stop_index in parent_loops if int(child_id) == 1 + for i, segment in self.segments_by_name_index("HL") if segment.element(2) == counter] - # If child_id is greater than 1, recursively call the fn - if int(child_id) > 1: - child_loops.extend(self._child_loops( - [(parent_start_index, counter, str(int(child_id) - 1), parent_stop_index)])) - return child_loops + # recursive cases where child_id is greater than 1 == sub_child + subchild_cases = filter(lambda x: int(x[2]) > 1, parent_loops) + subchild_loops = map( + lambda x: self._child_loops( + [(x[0], x[1], str(int(x[2]) - 1), x[3])]), + subchild_cases + ) + return functools.reduce(lambda acc, lst: acc + lst, subchild_loops, child_loops) class HierarchicalLoopManager: @@ -50,26 +44,26 @@ def __init__(self, data, delim_cls=AnsiX12Delim): self.generate_summary() def generate_summary(self): - for pl in self.hl.parent_loops: + def process_parent_loop(parent_loop): + # filter/map child loops within a parent loop + children = list(map( + lambda child_loop: { + 'child_index_start': child_loop[0], + 'child_index_stop': child_loop[-1] + }, + filter( + lambda child_loop: parent_loop[0] < child_loop[0] < parent_loop[-1], self.hl.child_loops) + )) + # summary dict for each parent parent_summary = { - 'parent_index_start': pl[0], - 'parent_index_end': pl[-1], - 'children': [] + 'parent_index_start': parent_loop[0], + 'parent_index_end': parent_loop[-1], + 'children': children } - # Find children loops for this parent tx - children = [] - for cl in self.hl.child_loops: - if pl[0] < cl[0] < pl[-1]: - children.append({ - 'child_index_start': cl[0], - 'child_index_stop': cl[-1] - }) - - # Add children to parent summary - parent_summary['children'] = children + return (parent_loop[1], parent_summary) - # Add HL 1 counter to the summary as the key - self.summary[pl[1]] = parent_summary + # summarize all parent loops + self.summary = dict(map(process_parent_loop, self.hl.parent_loops)) """ From 45a09fa35da47b0446a2d065a1c6d424dc98abe8 Mon Sep 17 00:00:00 2001 From: Raven Date: Thu, 18 Apr 2024 19:12:23 -0400 Subject: [PATCH 07/46] removed claim classes and reorganized code to create a new loop class --- databricksx12/hls/claim.py | 154 +------------------------------------ databricksx12/hls/loop.py | 122 +++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 153 deletions(-) create mode 100644 databricksx12/hls/loop.py diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 88aa76f..2b8148c 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -1,160 +1,8 @@ from databricksx12.edi import * -from databricksx12.hls import hierarchicalloop + # # Base claim class # -from databricksx12.edi import * - - -class LoopMapping: - def __init__(self, mappings=None): - self.mappings = (mappings if mappings is not None else { - '20': { - 'description': 'Information Source', - 'loop': '2000A' - }, - '22': { - 'description': 'Subscriber', - 'loop': '2000B' - } - }) - - def get_mapping(self, element): - return self.mappings.get(element, None) - - -class Claim(EDI): - def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping(), element=None): - super().__init__(data, delim_cls) - self.loop_mapping = loop_mapping - self.target_element = element - self.loop_value = self.loop_mapping.get_mapping( - self.target_element).get('loop') - self.target_segment_name, self.target_element_index = self.get_reference_names_of_loop( - self.loop_value) - - # find all CLM segments (important for indexing the last HL or SBR within a tx) - self.clm_segments = self._clm_identifiers() - - # this feels misplaced; how to fix? - def get_reference_names_of_loop(self, loop): - identifiers = { - '2000A': ('NM1', '3'), - '2000B': ('SBR', '4'), - } - return identifiers.get(loop, (None, None)) - - def _clm_identifiers(self): - # Find the segments where CLM loop begins - indexed_CLM_segments = self.segments_by_name_index("CLM") - return [(i, x.element(2)) for i, x in indexed_CLM_segments] - - def extract_lines_based_on_ranges(self, ranges, target_value, target_index): - extracted_elements = [] - # Iterate through each range in the list - for start, end in ranges: - # Retrieve the segments within this range - segments_in_range = self.segments_by_position(start, end) - - desired_elements = map( - lambda segment: segment.element(int(target_index)), - filter( - lambda segment: segment.segment_name() == target_value and len( - segment.data.split(segment.format_cls.ELEMENT_DELIM)) > int(target_index), - segments_in_range - ) - ) - extracted_elements.extend(desired_elements) - - return list(extracted_elements) - - -class ClaimManager: - def __init__(self, data, delim_cls=AnsiX12Delim): - self.hlmanager = hierarchicalloop.HierarchicalLoopManager(data) - self.hl_summary = self.hlmanager.summary - # need to sort mapping dependency - self.claim = Claim(data, delim_cls, LoopMapping(), element='20') - self.claim_summaries = [{ - 'clm_ind': clm_index, - 'parent_counter': self._find_claim_in_tx(self.hl_summary, clm_index)[0], - 'child_start_index': self._find_claim_in_tx(self.hl_summary, clm_index)[1], - 'parent_range': self.get_ranges(self.hl_summary, self._find_claim_in_tx(self.hl_summary, clm_index)[0])[0], - 'children_ranges': self.get_ranges(self.hl_summary, self._find_claim_in_tx(self.hl_summary, clm_index)[0])[1] - } for clm_index in [i for (i, j) in self.claim.clm_segments]] - - def _find_claim_in_tx(self, tx_summary, clm_index): - for parent_counter, parent_info in tx_summary.items(): - if parent_info['parent_index_start'] <= int(clm_index) <= parent_info['parent_index_end']: - for child_info in parent_info['children']: - if child_info['child_index_start'] <= int(clm_index) <= child_info['child_index_stop']: - return parent_counter, child_info['child_index_start'] - return None, None - - def get_ranges(self, hl_summary_dict, loop_counter): - info = hl_summary_dict.get(loop_counter, None) - if info is None: - return None - - parent_range = (info['parent_index_start'], info['parent_index_end']) - children_ranges = [(child['child_index_start'], child['child_index_stop']) - for child in info.get('children', [])] - - return parent_range, children_ranges - - def _find_billing_providers(self): - first_lines = [] - for summary in self.claim_summaries: - lines = self.claim.extract_lines_based_on_ranges( - [summary['parent_range'] - ], self.claim.target_segment_name, self.claim.target_element_index - ) - if lines: # Check if any lines were extracted - # Append the first line of this iteration - first_lines.append(lines[0]) - return first_lines - - def _find_subscribers(self): - pass - - -""" -claims = ClaimManager(sample_data_chpw_claimdata) -claims.claim_summaries - -[{'clm_ind': 23, - 'parent_counter': '1', - 'child_start_index': 16, - 'parent_range': (7, 35), - 'children_ranges': [(16, 35)]}, - {'clm_ind': 57, - 'parent_counter': '63', - 'child_start_index': 50, - 'parent_range': (41, 69), - 'children_ranges': [(50, 69)]}, - {'clm_ind': 91, - 'parent_counter': '49', - 'child_start_index': 84, - 'parent_range': (75, 103), - 'children_ranges': [(84, 103)]}, - {'clm_ind': 125, - 'parent_counter': '75', - 'child_start_index': 118, - 'parent_range': (109, 138), - 'children_ranges': [(118, 138)]}, - {'clm_ind': 160, - 'parent_counter': '79', - 'child_start_index': 153, - 'parent_range': (144, 172), - 'children_ranges': [(153, 172)]}] - - claims._find_billing_providers() - ['BH CLINIC OF VANCOUVER', - 'BH CLINIC OF VANCOUVER', - 'BH CLINIC OF VANCOUVER', - 'BH CLINIC OF VANCOUVER', - 'BH CLINIC OF VANCOUVER'] -""" class Claim837i(Claim): diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py new file mode 100644 index 0000000..6d654eb --- /dev/null +++ b/databricksx12/hls/loop.py @@ -0,0 +1,122 @@ +from databricksx12.edi import * +from databricksx12.hls import hierarchicalloop + +class LoopMapping: + def __init__(self, mappings=None): + self.mappings = (mappings if mappings is not None else { + '20': { + 'description': 'Information Source', + 'loop': '2000A', + 'reference_ids': ('NM1', '3'), ## might delete and use elsewhere + }, + '22': { + 'description': 'Subscriber', + 'loop': '2000B', + 'reference_ids': ('SBR', '4'), + } + }) + + def get_mapping(self, element): + return self.mappings.get(element, None) + + +class Loop(EDI): + def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()): + super().__init__(data, delim_cls) + self.loop_mapping = loop_mapping + self.hlmanager = hierarchicalloop.HierarchicalLoopManager(data) + self.hl_summary = self.hlmanager.summary + self.clm_segments = self._clm_identifiers() + + def _clm_identifiers(self): + return [(i, x.element(2)) for i, x in self.segments_by_name_index("CLM")] + + # + # returns element of interest from a range based on first element and index from the line + # + def _find_elements_based_on_ranges(self, ranges, target_value, target_index): + def process_range(range_tuple): + start, end = range_tuple + segments_in_range = self.segments_by_position(start, end) # find segments within range + return list(map( + lambda segment: segment.element(int(target_index)), + filter( + lambda segment: segment.segment_name() == target_value and + segment.segment_len() > int(target_index), + segments_in_range + ) + )) + return functools.reduce( + lambda acc, lst: acc + lst, map(process_range, ranges),[]) # map to apply processing to each range and flatten + + # + # if a claim index (str), returns the parent counter and child_start_index of Tx from the Hierarchical loop summary + # + def _find_tx_from_clm(self, tx_summary, clm_index): + try: + return next( + (parent_counter, child['child_index_start']) + for parent_counter, parent_info in tx_summary.items() + if parent_info['parent_index_start'] <= int(clm_index) <= parent_info['parent_index_end'] + for child in parent_info['children'] + if child['child_index_start'] <= int(clm_index) <= child['child_index_stop'] + ) + except StopIteration: + return None, None + + # + # if a loop counter (str), returns the parent and children ranges (tuple) from the Tx of interest from the Hierarchical loop summary + # + def _get_ranges(self, tx_summary, loop_counter): + info = tx_summary.get(loop_counter, None) + if info is None: + return None + parent_range = (info['parent_index_start'], info['parent_index_end']) + children_ranges = [(child['child_index_start'], child['child_index_stop']) for child in info.get('children', [])] + return parent_range, children_ranges + + # + # filters a claim's tx segment to extract its reference elements + # + def _get_elements_from_claim(self, clm_segment, target_segment_name, target_element_index, use_children=False): + clm_index = clm_segment[0] + parent_counter, child_start_index = self._find_tx_from_clm(self.hl_summary, clm_index) + if not parent_counter: + return None + parent_range, children_range = self._get_ranges(self.hl_summary, parent_counter) + return self._find_elements_based_on_ranges([parent_range], target_segment_name, target_element_index) + + # + # map to apply the find_element function over all claim segments based on choice of loop + # + def find_reference_elements(self, loop_key): + loop_info = self.loop_mapping.get_mapping(loop_key) + if not loop_info: + return [] + target_segment_name, target_element_index = loop_info['reference_ids'] + use_children = loop_key == '22' # Use children ranges for '22' + process_clm_segment = lambda clm_segment: self._get_elements_from_claim(clm_segment, target_segment_name, target_element_index, use_children) + reference_list = list(filter(None, map(process_clm_segment, self.clm_segments))) + return [summary[0] for summary in reference_list] if reference_list else [] # only first element or it generalizes to all segments in the range + + +""" +sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8") +claims = Loop(sample_data_837i_edited) +claims.find_reference_elements('20') +Outputs: +['BH CLINIC OF VANCOUVER', + 'BH CLINIC OF VANCOUVER', + 'BH CLINIC OF VANCOUVER', + 'BH CLINIC OF VANCOUVER', + 'BH CLINIC OF VANCOUVER'] +""" +""" +claims.find_reference_elements('22') +Outputs: +['COMMUNITY HLTH PLAN OF WASH', + 'COMMUNITY HLTH PLAN OF WASH', + 'COMMUNITY HLTH PLAN OF WASH', + 'COMMUNITY HLTH PLAN OF WASH', + 'COMMUNITY HLTH PLAN OF WASH'] +""" \ No newline at end of file From 0ec239ab8c8515621c66f213bf7ce717121fa8b7 Mon Sep 17 00:00:00 2001 From: Raven Date: Fri, 26 Apr 2024 20:54:23 -0400 Subject: [PATCH 08/46] updates to Hierarchical Loop Manager to create list in lists for sub-children --- databricksx12/hls/hierarchicalloop.py | 75 +++++++++++++-------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py index c078f3b..6833df8 100644 --- a/databricksx12/hls/hierarchicalloop.py +++ b/databricksx12/hls/hierarchicalloop.py @@ -40,30 +40,26 @@ def _child_loops(self, parent_loops): class HierarchicalLoopManager: def __init__(self, data, delim_cls=AnsiX12Delim): self.hl = HierarchicalLoop(data, delim_cls) - self.summary = {} - self.generate_summary() + self.summary = self.generate_summary() - def generate_summary(self): - def process_parent_loop(parent_loop): - # filter/map child loops within a parent loop - children = list(map( - lambda child_loop: { - 'child_index_start': child_loop[0], - 'child_index_stop': child_loop[-1] - }, - filter( - lambda child_loop: parent_loop[0] < child_loop[0] < parent_loop[-1], self.hl.child_loops) - )) - # summary dict for each parent - parent_summary = { - 'parent_index_start': parent_loop[0], - 'parent_index_end': parent_loop[-1], - 'children': children - } - return (parent_loop[1], parent_summary) + def get_child_loops(self, parent_loop, loops): + return [loop for loop in loops if parent_loop[0] < loop[0] < parent_loop[-1]] + + def process_loop(self, loop, level=0): + child_loops = self.get_child_loops(loop, self.hl.child_loops) + children = [self.process_loop(child, level + 1) for child in child_loops] + + loop_summary = { + 'index_start': loop[0], + 'index_end': loop[-1], + 'children': children or None + } + return loop_summary - # summarize all parent loops - self.summary = dict(map(process_parent_loop, self.hl.parent_loops)) + def generate_summary(self): + """Generate a hierarchical summary for each top-level parent loop.""" + loop_processing = lambda loop: (str(loop[1]), self.process_loop(loop)) + return dict(map(loop_processing, self.hl.parent_loops)) """ @@ -71,20 +67,23 @@ def process_parent_loop(parent_loop): summary = loop_manager.summary output: -{'1': {'parent_index_start': 7, - 'parent_index_end': 35, - 'children': [{'child_index_start': 16, 'child_index_stop': 35}]}, - '63': {'parent_index_start': 41, - 'parent_index_end': 69, - 'children': [{'child_index_start': 50, 'child_index_stop': 69}]}, - '49': {'parent_index_start': 75, - 'parent_index_end': 103, - 'children': [{'child_index_start': 84, 'child_index_stop': 103}]}, - '75': {'parent_index_start': 109, - 'parent_index_end': 138, - 'children': [{'child_index_start': 118, 'child_index_stop': 138}]}, - '79': {'parent_index_start': 144, - 'parent_index_end': 179, - 'children': [{'child_index_start': 153, 'child_index_stop': 179}, - {'child_index_start': 160, 'child_index_stop': 179}]}} +{'1': {'index_start': 7, + 'index_end': 35, + 'children': [{'index_start': 16, 'index_end': 35, 'children': None}]}, + '63': {'index_start': 41, + 'index_end': 69, + 'children': [{'index_start': 50, 'index_end': 69, 'children': None}]}, + '49': {'index_start': 75, + 'index_end': 103, + 'children': [{'index_start': 84, 'index_end': 103, 'children': None}]}, + '75': {'index_start': 109, + 'index_end': 138, + 'children': [{'index_start': 118, 'index_end': 138, 'children': None}]}, + '79': {'index_start': 144, + 'index_end': 179, + 'children': [{'index_start': 153, + 'index_end': 179, + 'children': [{'index_start': 160, 'index_end': 179, 'children': None}]}, + {'index_start': 160, 'index_end': 179, 'children': None}]}} + # the last 'children' list, there is a repeat that is tricky to remove """ From 0caa0b3e47b557422066c2c4c000d3867a840aad Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Tue, 30 Apr 2024 15:02:16 -0400 Subject: [PATCH 09/46] adding looping at the subscriber level --- sampledata/837/837p.txt | 45 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 sampledata/837/837p.txt diff --git a/sampledata/837/837p.txt b/sampledata/837/837p.txt new file mode 100644 index 0000000..a69643f --- /dev/null +++ b/sampledata/837/837p.txt @@ -0,0 +1,45 @@ +ISA*00* *00* *ZZ*1234567 *ZZ*11111 *170508*1141*^*00501*000000101*1*P*:~ +GS*HC*XXXXXXX*XXXXX*20170617*1741*101*X*005010X222A1~ +ST*837*1239*005010X222A1~ +BHT*0019*00*010*20170617*1741*CH~ +NM1*41*2*SUBMITTER*****46*ABC123~ +PER*IC*BOB SMITH*TE*4805551212~ +NM1*40*2*RECEIVER*****46*44556~ +HL*1**20*1~ +NM1*85*2*BILLING PROVIDER*****XX*1122334455~ +N3*1234 SOME ROAD~ +N4*CHICAGO*IL*606739999~ +REF*EI*999999999~ +HL*2*1*22*0~ +SBR*P*18*******12~ +NM1*IL*1*BLOGGS*JOE****MI*1234567890~ +N3*1 SOME BLVD~ +N4*CHICAGO*IL*606129998~ +DMG*D8*19570111*M~ +NM1*PR*2*PAYER*****PI*12345~ +N3*1 PAYER WAY~ +N4*ST LOUIS*MO*212441850~ +REF*2U*W1014~ +CLM*1000A*140***19:B:1*Y*A*Y*Y~ +HI*ABK:I10~ +LX*1~ +SV1*HC:99213*140*UN*1***1~ +DTP*472*D8*20151124~ +HL*3*1*22*0~ +SBR*P*18*******12~ +NM1*IL*1*BLOGGS*FRED****MI*9876543201~ +N3*1 ANOTHER STR~ +N4*CHICAGO*IL*606129998~ +DMG*D8*19700601*M~ +NM1*PR*2*PAYER*****PI*12345~ +N3*1 PAYER WAY~ +N4*ST LOUIS*MO*212441850~ +REF*2U*W1014~ +CLM*1001A*140***19:B:1*Y*A*Y*Y~ +HI*ABK:I10~ +LX*1~ +SV1*HC:99213*140*UN*1***1~ +DTP*472*D8*20151124~ +SE*41*1239~ +GE*1*101~ +IEA*1*000000101~ From 97a57c84276269ede3e1c11bd78af8e87c34c848 Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Tue, 30 Apr 2024 15:13:54 -0400 Subject: [PATCH 10/46] claim sample code --- databricksx12/hls/claim.py | 66 +++++++++++++++++++++++++++++++++ databricksx12/hls/healthcare.py | 10 +---- 2 files changed, 68 insertions(+), 8 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 2b8148c..1436a75 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -3,6 +3,14 @@ # # Base claim class # +class Claim(): + + def __init__(self): + pass + + @staticmethod + def from_dictionary(d): + pass class Claim837i(Claim): @@ -15,3 +23,61 @@ class Claim837i(Claim): class Claim837p(Claim): NAME = "837P" + + +# +# Base claim builder (transaction -> 1 or more claims) +# +class ClaimBuilder(EDI): + + # + # Given claim type (837i, 837p, etc), segments, and delim class, build claim level classes + # + def __init__(self, trnx_type, trnx_data, delim_cls): + self.trnx_type = trnx_type + self.data = trnx_data + self.delim_cls = delim_cls + + pass #self.hl = ??? TODO + + def claim_count(): + return len(self.segments_by_name("CLM")) + + # + # Returns a dictionary of "loop name" : "loop data" + # + def build_claim(self, clm_index, clm_segment): + return { + "1000A": { + "desc": "Submitter Name", + "segments": "TODO" + }, + "1000B": { + "desc": "Reciever Name", + "segments": "TODO" + }, + "2000A": { + "desc": "Billing Provider HL Level" + "segments": "TODO" + }, + "2000B": { + "desc": "Subscriber HL Level", + "segments": "TODO" + }, + "2000C" : { + "desc": "Patient HL Level", + "segments": "TODO" + }, + "2300": { + "desc": "Claim Information", + "segments": "TODO" + } + } + + # + # Given transaction type, transaction segments, and delim info, build out claims in the transaction + # @return a list of Claim for each "clm" segment + # + def build(self): + return [self.build_claim(i, x) for i,x in segments_by_name_index("CLM")] + diff --git a/databricksx12/hls/healthcare.py b/databricksx12/hls/healthcare.py index 91858de..efd60f5 100644 --- a/databricksx12/hls/healthcare.py +++ b/databricksx12/hls/healthcare.py @@ -28,12 +28,6 @@ def from_functional_group(self, fg): # @mapping = mapping the GS08 segment to the type of healthcare transaction # def from_transaction(self, trnx): - type = self.mapping.get(trnx.transaction_type) - data = [x for x in trnx.data if x.segment_name() not in ['ST', 'SE']] - if type == "837P": - return Claim837p(data, trnx.format_cls) - elif type == "837I": - return Claim837i(data, trnx.format_cls) - else: - return None #no mapping available + return ClaimBuilder(self.mapping.get(trnx.transaction_type), + [x for x in trnx.data if x.segment_name() not in ['ST', 'SE']], trnx.delim_cls).build() From 4c384ee20ebb70e044ee5a90503b480de679de2b Mon Sep 17 00:00:00 2001 From: Raven Date: Tue, 30 Apr 2024 21:56:01 -0400 Subject: [PATCH 11/46] modified loop class to contain additional mappings such as IL and Payer --- databricksx12/hls/loop.py | 205 +++++++++++++++++++++----------------- 1 file changed, 116 insertions(+), 89 deletions(-) diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py index 6d654eb..c70ecb5 100644 --- a/databricksx12/hls/loop.py +++ b/databricksx12/hls/loop.py @@ -1,24 +1,47 @@ from databricksx12.edi import * +from functools import reduce from databricksx12.hls import hierarchicalloop class LoopMapping: def __init__(self, mappings=None): - self.mappings = (mappings if mappings is not None else { + self.mappings = mappings if mappings is not None else { '20': { - 'description': 'Information Source', - 'loop': '2000A', - 'reference_ids': ('NM1', '3'), ## might delete and use elsewhere - }, + 'Information Source': { + 'loop': '2000A', + 'reference_ids': ('NM1', '3'), + 'secondary_reference': ('85', '1') + } + }, '22': { - 'description': 'Subscriber', - 'loop': '2000B', - 'reference_ids': ('SBR', '4'), + 'Subscriber': { + 'loop': '2000B', + 'reference_ids': ('SBR', '4') + }, + 'Individual First Name': { + 'loop': '2010BA', + 'reference_ids': ('NM1', '4'), + 'secondary_reference': ('IL', '1') + }, + 'Individual Last Name': { + 'loop': '2010BA', + 'reference_ids': ('NM1', '3'), + 'secondary_reference': ('IL', '1') + }, + 'Payer Name': { + 'loop': '2010BB', + 'reference_ids': ('NM1', '3'), + 'secondary_reference': ('PR', '1') } - }) + } + } + + def get_mapping(self, element, description=None): + """ Returns a specific mapping based on element key and description. """ + mappings = self.mappings.get(element, {}) + if description: + return mappings.get(description, None) + return None - def get_mapping(self, element): - return self.mappings.get(element, None) - class Loop(EDI): def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()): @@ -26,97 +49,101 @@ def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()): self.loop_mapping = loop_mapping self.hlmanager = hierarchicalloop.HierarchicalLoopManager(data) self.hl_summary = self.hlmanager.summary - self.clm_segments = self._clm_identifiers() - - def _clm_identifiers(self): - return [(i, x.element(2)) for i, x in self.segments_by_name_index("CLM")] - - # - # returns element of interest from a range based on first element and index from the line - # - def _find_elements_based_on_ranges(self, ranges, target_value, target_index): - def process_range(range_tuple): - start, end = range_tuple - segments_in_range = self.segments_by_position(start, end) # find segments within range - return list(map( - lambda segment: segment.element(int(target_index)), - filter( - lambda segment: segment.segment_name() == target_value and - segment.segment_len() > int(target_index), - segments_in_range - ) - )) - return functools.reduce( - lambda acc, lst: acc + lst, map(process_range, ranges),[]) # map to apply processing to each range and flatten + self.clm_segments = [(i, x.element(2)) for i, x in self.segments_by_name_index("CLM")] - # - # if a claim index (str), returns the parent counter and child_start_index of Tx from the Hierarchical loop summary - # - def _find_tx_from_clm(self, tx_summary, clm_index): - try: - return next( - (parent_counter, child['child_index_start']) - for parent_counter, parent_info in tx_summary.items() - if parent_info['parent_index_start'] <= int(clm_index) <= parent_info['parent_index_end'] - for child in parent_info['children'] - if child['child_index_start'] <= int(clm_index) <= child['child_index_stop'] - ) - except StopIteration: - return None, None - - # - # if a loop counter (str), returns the parent and children ranges (tuple) from the Tx of interest from the Hierarchical loop summary - # - def _get_ranges(self, tx_summary, loop_counter): - info = tx_summary.get(loop_counter, None) - if info is None: - return None - parent_range = (info['parent_index_start'], info['parent_index_end']) - children_ranges = [(child['child_index_start'], child['child_index_stop']) for child in info.get('children', [])] - return parent_range, children_ranges + def get_transaction_info(self, tx_summary, clm_index): + """ + retrieves transaction information for a claim from hierarchical summary + Eg., Getting the transaction range for claim index "x" + """ + return next((info for _, info in tx_summary.items() + if info['index_start'] <= int(clm_index) <= info['index_end']), None) + + def get_ranges(self, tx_info, use_children=False): + """ + extracts numeric ranges for parent and optionally children based on transaction + Eg., find ranges for parent and children loops for processing + """ + parent_range = (tx_info['index_start'], tx_info['index_end']) + if use_children: + return [(child['index_start'], child['index_end']) for child in tx_info.get('children', [])] + return [parent_range] - # - # filters a claim's tx segment to extract its reference elements - # - def _get_elements_from_claim(self, clm_segment, target_segment_name, target_element_index, use_children=False): + + def find_elements_based_on_ranges(self, ranges, target_segment_name, target_element_index, secondary_reference=None): + """ + filters and maps EDI segments to extract required elements based on their position and type + """ + process_range = lambda range_tuple: [ + segment.element(int(target_element_index)) + for segment in self.segments_by_position(range_tuple[0], range_tuple[1]) + if segment.segment_name() == target_segment_name and + segment.segment_len() > int(target_element_index) and + (secondary_reference is None or segment.element(int(secondary_reference[1])) == secondary_reference[0]) + ] + return reduce(lambda acc, lst: acc + lst, map(process_range, ranges), []) + + def extract_elements_from_claim(self, clm_segment, target_segment_name, target_element_index, use_children=False, secondary_reference=None): + """ + a higher-level function that ties together the previous functions to get tx info, the ranges of interest, and elements from every range + """ clm_index = clm_segment[0] - parent_counter, child_start_index = self._find_tx_from_clm(self.hl_summary, clm_index) - if not parent_counter: + tx_info = self.get_transaction_info(self.hl_summary, clm_index) + if not tx_info: return None - parent_range, children_range = self._get_ranges(self.hl_summary, parent_counter) - return self._find_elements_based_on_ranges([parent_range], target_segment_name, target_element_index) - - # - # map to apply the find_element function over all claim segments based on choice of loop - # - def find_reference_elements(self, loop_key): - loop_info = self.loop_mapping.get_mapping(loop_key) + + ranges = self.get_ranges(tx_info, use_children) + return self.find_elements_based_on_ranges(ranges, target_segment_name, target_element_index, secondary_reference) + + + def find_reference_elements(self, loop_key, description=None): + """ + extract reference elements from claims based on loop mapping and hierarchy and handles children separately if specified by loop key. + Eg., find billing provider names under loop '20' from an EDI transaction. + """ + + loop_info = self.loop_mapping.get_mapping(loop_key, description) if not loop_info: return [] + target_segment_name, target_element_index = loop_info['reference_ids'] - use_children = loop_key == '22' # Use children ranges for '22' - process_clm_segment = lambda clm_segment: self._get_elements_from_claim(clm_segment, target_segment_name, target_element_index, use_children) + secondary_reference = loop_info.get('secondary_reference', None) + + use_children = loop_key == '22' + process_clm_segment = lambda clm_segment: self.extract_elements_from_claim(clm_segment, + target_segment_name, + target_element_index, + use_children, + secondary_reference) reference_list = list(filter(None, map(process_clm_segment, self.clm_segments))) - return [summary[0] for summary in reference_list] if reference_list else [] # only first element or it generalizes to all segments in the range + + return [summary for summary in reference_list] """ sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8") claims = Loop(sample_data_837i_edited) -claims.find_reference_elements('20') +claims.find_reference_elements('20', 'Information Source') +Outputs: +[['BH CLINIC OF VANCOUVER'], + ['BH CLINIC OF VANCOUVER'], + ['BH CLINIC OF VANCOUVER'], + ['BH CLINIC OF VANCOUVER'], + ['BH CLINIC OF VANCOUVER']] +""" +""" +claims.find_reference_elements('22', 'Payer Name') Outputs: -['BH CLINIC OF VANCOUVER', - 'BH CLINIC OF VANCOUVER', - 'BH CLINIC OF VANCOUVER', - 'BH CLINIC OF VANCOUVER', - 'BH CLINIC OF VANCOUVER'] +[['COMMUNITY HEALTH PLAN OF WASHINGTON'], + ['COMMUNITY HEALTH PLAN OF WASHINGTON'], + ['COMMUNITY HEALTH PLAN OF WASHINGTON'], + ['COMMUNITY HEALTH PLAN OF WASHINGTON'], + ['COMMUNITY HEALTH PLAN OF WASHINGTON', + 'COMMUNITY HEALTH PLAN OF WASHINGTON', + 'COMMUNITY HEALTH PLAN OF WASHINGTON']] """ """ -claims.find_reference_elements('22') +claims.find_reference_elements('22', 'Individual First Name') Outputs: -['COMMUNITY HLTH PLAN OF WASH', - 'COMMUNITY HLTH PLAN OF WASH', - 'COMMUNITY HLTH PLAN OF WASH', - 'COMMUNITY HLTH PLAN OF WASH', - 'COMMUNITY HLTH PLAN OF WASH'] -""" \ No newline at end of file +[['JOHN'], ['SUSAN'], ['JOHN'], ['SUSAN'], ['JOHN', 'JOHN', 'JOHN']] + """ \ No newline at end of file From 49ba286c9783589288ff1d3f379f1ec04bc238c9 Mon Sep 17 00:00:00 2001 From: Raven Date: Tue, 30 Apr 2024 22:03:57 -0400 Subject: [PATCH 12/46] new loop mappings and clearer func names --- databricksx12/hls/loop.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py index c70ecb5..365b1ea 100644 --- a/databricksx12/hls/loop.py +++ b/databricksx12/hls/loop.py @@ -36,7 +36,6 @@ def __init__(self, mappings=None): } def get_mapping(self, element, description=None): - """ Returns a specific mapping based on element key and description. """ mappings = self.mappings.get(element, {}) if description: return mappings.get(description, None) @@ -54,7 +53,7 @@ def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()): def get_transaction_info(self, tx_summary, clm_index): """ retrieves transaction information for a claim from hierarchical summary - Eg., Getting the transaction range for claim index "x" + get the transaction range for claim index "x" """ return next((info for _, info in tx_summary.items() if info['index_start'] <= int(clm_index) <= info['index_end']), None) @@ -62,7 +61,7 @@ def get_transaction_info(self, tx_summary, clm_index): def get_ranges(self, tx_info, use_children=False): """ extracts numeric ranges for parent and optionally children based on transaction - Eg., find ranges for parent and children loops for processing + find ranges for parent and children loops for processing """ parent_range = (tx_info['index_start'], tx_info['index_end']) if use_children: @@ -99,7 +98,7 @@ def extract_elements_from_claim(self, clm_segment, target_segment_name, target_e def find_reference_elements(self, loop_key, description=None): """ extract reference elements from claims based on loop mapping and hierarchy and handles children separately if specified by loop key. - Eg., find billing provider names under loop '20' from an EDI transaction. + find billing provider names under loop '20' from an EDI transaction. """ loop_info = self.loop_mapping.get_mapping(loop_key, description) From b8d23d47ad2f335cd0a2d7dc7e3aa4a8e26d07fd Mon Sep 17 00:00:00 2001 From: Raven Date: Thu, 2 May 2024 01:49:14 -0400 Subject: [PATCH 13/46] adjusted Loop and LoopManager to process nested loops with modified claim data with children --- databricksx12/hls/hierarchicalloop.py | 105 +++++++++++++++-------- sampledata/837/CHPW_Claimdata_edited.txt | 11 ++- 2 files changed, 80 insertions(+), 36 deletions(-) diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py index 6833df8..af2e333 100644 --- a/databricksx12/hls/hierarchicalloop.py +++ b/databricksx12/hls/hierarchicalloop.py @@ -1,40 +1,41 @@ from databricksx12.edi import * -import functools +import itertools class HierarchicalLoop(EDI): def __init__(self, data, delim_cls=AnsiX12Delim): super().__init__(data, delim_cls) - self.parent_start_loops = self._parent_start_tup_loops() # returns tuple; to check + + # parent and children loops + self.parent_start_loops = self._parent_start_tup_loops() self.parent_end_loops = self._parent_end_loops() self.parent_loops = self._parent_loops() self.child_loops = self._child_loops(self.parent_loops) - self.subchild_loops = self._child_loops(self.child_loops) # recursive cases - + self.subchild_loops = self._subchild_loops(self.child_loops) + def _parent_start_tup_loops(self): - # index of parent, counter, and if child - # TODO unit test to return tuple - return [(i, x.element(1), x.element(-1)) for i, x in self.segments_by_name_index("HL") if x.element(2) == ""] + # index of parent, counter, and if child + return [(i, x.element(1), x.element(-1)) for i, x in self.segments_by_name_index("HL") if x.element(2) == ""] # TODO unit test to return tuple def _parent_end_loops(self): return [i for i, x in self.segments_by_name_index("SE")] - + def _parent_loops(self): return [(tup + (j,)) for tup, j in zip(self.parent_start_loops, self.parent_end_loops)] + def _child_loops(self, parent_loops): child_loops = [(i, counter, segment.element(-1), parent_stop_index) - for _, counter, child_id, parent_stop_index in parent_loops if int(child_id) == 1 - for i, segment in self.segments_by_name_index("HL") if segment.element(2) == counter] + for _, counter, child_id, parent_stop_index in parent_loops + for i, segment in self.segments_by_name_index("HL") if segment.element(2) == counter] + return child_loops + + def _subchild_loops(self, child_loops): + it1, it2 = itertools.tee(child_loops) + next(it2, None) + return [pair[1] for pair in zip(it1, it2) if int(pair[0][2]) == 1] + - # recursive cases where child_id is greater than 1 == sub_child - subchild_cases = filter(lambda x: int(x[2]) > 1, parent_loops) - subchild_loops = map( - lambda x: self._child_loops( - [(x[0], x[1], str(int(x[2]) - 1), x[3])]), - subchild_cases - ) - return functools.reduce(lambda acc, lst: acc + lst, subchild_loops, child_loops) class HierarchicalLoopManager: @@ -43,23 +44,52 @@ def __init__(self, data, delim_cls=AnsiX12Delim): self.summary = self.generate_summary() def get_child_loops(self, parent_loop, loops): - return [loop for loop in loops if parent_loop[0] < loop[0] < parent_loop[-1]] + """Filter child loops that fall within the given parent loop's range.""" + return list(filter(lambda x: parent_loop[0] < x[0] < parent_loop[3], loops)) - def process_loop(self, loop, level=0): - child_loops = self.get_child_loops(loop, self.hl.child_loops) - children = [self.process_loop(child, level + 1) for child in child_loops] + def calculate_child_end_index(self, current_child, next_child, parent_end): + """Calculate the end index of a child, adjusting to avoid overlap with the next child.""" + return min(current_child[3], next_child[0] - 1 if next_child else parent_end) + + def process_child(self, child, subchildren, parent_end): + """Map a single child loop to its dictionary representation, including subchildren if applicable.""" + return { + 'index_start': child[0], + 'index_end': self.calculate_child_end_index(child, None, parent_end), # No next_child directly handled here + 'children': subchildren if subchildren else None + } + + def add_subchildren_to_children(self, children, subchild_loops, parent_end): + """Map function to add subchildren to corresponding children, exclude children that are subchildren.""" + #subchild_ids = {sc[1] for sc in subchild_loops} # Set of parent_ids for subchildren + children = [child for child in children if child not in subchild_loops] # Filter out subchildren + # subchild_lookup = {sc[1]: sc for sc in subchild_loops} # Lookup for subchildren by parent counter - loop_summary = { + # Process each child, include subchildren when applicable + return list(map(lambda child: self.process_child( + child, + [{ 'index_start': subchild[0], + 'index_end': self.calculate_child_end_index(subchild, None, child[-1]), + 'children': None } + for subchild in subchild_loops if subchild[1] == child[1] and int(child[2]) == 1], + parent_end), + children)) + + def process_loop(self, loop): + child_loops = sorted(self.get_child_loops(loop, self.hl.child_loops), key=lambda x: x[0]) + children = self.add_subchildren_to_children(child_loops, self.hl.subchild_loops, loop[3]) + return { 'index_start': loop[0], - 'index_end': loop[-1], + 'index_end': loop[3], 'children': children or None } - return loop_summary def generate_summary(self): - """Generate a hierarchical summary for each top-level parent loop.""" - loop_processing = lambda loop: (str(loop[1]), self.process_loop(loop)) - return dict(map(loop_processing, self.hl.parent_loops)) + return {str(loop[1]): self.process_loop(loop) for loop in self.hl.parent_loops} + + + + """ @@ -80,10 +110,17 @@ def generate_summary(self): 'index_end': 138, 'children': [{'index_start': 118, 'index_end': 138, 'children': None}]}, '79': {'index_start': 144, - 'index_end': 179, - 'children': [{'index_start': 153, - 'index_end': 179, - 'children': [{'index_start': 160, 'index_end': 179, 'children': None}]}, - {'index_start': 160, 'index_end': 179, 'children': None}]}} - # the last 'children' list, there is a repeat that is tricky to remove + 'index_end': 186, + 'children': [{'index_start': 153, 'index_end': 186, 'children': None}, + {'index_start': 160, + 'index_end': 186, + 'children': [{'index_start': 167, 'index_end': 186, 'children': None}]}]}} +""" +""" +sample_data_837p = open("./sampledata/837/837p.txt", "rb").read().decode("utf-8").replace("\\n", "") +HierarchicalLoopManager(sample_data_837p).summary +{'1': {'index_start': 7, + 'index_end': 42, + 'children': [{'index_start': 12, 'index_end': 42, 'children': None}, + {'index_start': 27, 'index_end': 42, 'children': None}]}} """ diff --git a/sampledata/837/CHPW_Claimdata_edited.txt b/sampledata/837/CHPW_Claimdata_edited.txt index 72ecb44..176e84a 100644 --- a/sampledata/837/CHPW_Claimdata_edited.txt +++ b/sampledata/837/CHPW_Claimdata_edited.txt @@ -142,7 +142,7 @@ BHT*0019*00*7349063984*20180508*0833*CH~ NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~ PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~ NM1*40*2*123456789*****46*CHPWA~ -HL*79**20*2~ +HL*79**20*1~ NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ N3*12345 MAIN ST~ N4*VANCOUVER*WA*98662~ @@ -158,7 +158,14 @@ N3*987 65TH PL~ N4*VANCOUVER*WA*986640001~ DMG*D8*19881225*M~ NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~ -HL*81*79*23*1~ +HL*81*79*22*1~ +SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~ +N3*987 65TH PL~ +N4*VANCOUVER*WA*986640001~ +DMG*D8*19881225*M~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF MASS*****PI*CHPWA~ +HL*82*79*23*0~ SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~ N3*987 65TH PL~ From e677ef24783909204a0b57e3091ce7f67c9dfbd1 Mon Sep 17 00:00:00 2001 From: Raven Date: Thu, 2 May 2024 20:00:15 -0400 Subject: [PATCH 14/46] claim builder v0 using hlmanager and loop --- databricksx12/hls/claim.py | 109 ++++++++++++++++++----- databricksx12/hls/hierarchicalloop.py | 79 +++++++++-------- databricksx12/hls/loop.py | 121 ++++++++++++-------------- 3 files changed, 184 insertions(+), 125 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 1436a75..6a9e66e 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -1,8 +1,10 @@ from databricksx12.edi import * - +from databricksx12.hls import loop # # Base claim class # + + class Claim(): def __init__(self): @@ -28,8 +30,8 @@ class Claim837p(Claim): # # Base claim builder (transaction -> 1 or more claims) # -class ClaimBuilder(EDI): +class ClaimBuilder(EDI): # # Given claim type (837i, 837p, etc), segments, and delim class, build claim level classes # @@ -38,46 +40,105 @@ def __init__(self, trnx_type, trnx_data, delim_cls): self.data = trnx_data self.delim_cls = delim_cls - pass #self.hl = ??? TODO - - def claim_count(): - return len(self.segments_by_name("CLM")) + self.loop_summary = loop.Loop(trnx_data) # - # Returns a dictionary of "loop name" : "loop data" + # Returns a dictionary of "loop name" : "loop data" # - def build_claim(self, clm_index, clm_segment): + + def build_claim(self, clm_segment): return { "1000A": { "desc": "Submitter Name", - "segments": "TODO" + "segments": self.loop_summary.sender }, "1000B": { - "desc": "Reciever Name", - "segments": "TODO" + "desc": "Receiver Name", + "segments": self.loop_summary.receiver }, "2000A": { - "desc": "Billing Provider HL Level" - "segments": "TODO" + "desc": "Billing Provider", + "segments": self.loop_summary.find_reference_element(clm_segment, '20', 'Information Source') }, "2000B": { - "desc": "Subscriber HL Level", - "segments": "TODO" + "desc": "Subscriber", + "segments": self.loop_summary.find_reference_element(clm_segment, '22', 'Subscriber') }, - "2000C" : { - "desc": "Patient HL Level", - "segments": "TODO" + "2010BA": { + "desc": "Patient", + "segments": (self.loop_summary.find_reference_element(clm_segment, '22', 'Individual First Name'), + self.loop_summary.find_reference_element(clm_segment, '22', 'Individual Last Name')) + + }, + "2010BB": { + "desc": "Payer", + "segments": self.loop_summary.find_reference_element(clm_segment, '22', 'Payer Name'), }, "2300": { - "desc": "Claim Information", - "segments": "TODO" + "desc": "Claim", + "segments": (self.loop_summary.find_reference_element(clm_segment, '22', 'Claim ID'), + self.loop_summary.find_reference_element(clm_segment, '22', 'Claim Amount')) } } - + # # Given transaction type, transaction segments, and delim info, build out claims in the transaction - # @return a list of Claim for each "clm" segment + # @return a list of Claim for each "clm" segment # def build(self): - return [self.build_claim(i, x) for i,x in segments_by_name_index("CLM")] - + return [self.build_claim(seg) for seg in self.loop_summary.claim_segments()] + + +""" +sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8") +claim_class = ClaimBuilder(trnx_type='837I', trnx_data=sample_data_837i_edited, delim_cls=AnsiX12Delim) +claim_class.build() + +[{'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'}, + '1000B': {'desc': 'Receiver Name', 'segments': '123456789'}, + '2000A': {'desc': 'Billing Provider', + 'segments': ['BH CLINIC OF VANCOUVER']}, + '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']}, + '2010BA': {'desc': 'Patient', 'segments': (['JOHN'], ['SUBSCRIBER'])}, + '2010BB': {'desc': 'Payer', + 'segments': ['COMMUNITY HEALTH PLAN OF WASHINGTON']}, + '2300': {'desc': 'Claim', 'segments': (['1805080AV3648339'], ['20'])}}, + {'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'}, + '1000B': {'desc': 'Receiver Name', 'segments': '123456789'}, + '2000A': {'desc': 'Billing Provider', + 'segments': ['BH CLINIC OF VANCOUVER']}, + '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']}, + '2010BA': {'desc': 'Patient', 'segments': (['SUSAN'], ['PATIENT'])}, + '2010BB': {'desc': 'Payer', + 'segments': ['COMMUNITY HEALTH PLAN OF WASHINGTON']}, + '2300': {'desc': 'Claim', 'segments': (['1805080AV3648347'], ['50.1'])}}, + {'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'}, + '1000B': {'desc': 'Receiver Name', 'segments': '123456789'}, + '2000A': {'desc': 'Billing Provider', + 'segments': ['BH CLINIC OF VANCOUVER']}, + '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']}, + '2010BA': {'desc': 'Patient', 'segments': (['JOHN'], ['SUBSCRIBER'])}, + '2010BB': {'desc': 'Payer', + 'segments': ['COMMUNITY HEALTH PLAN OF WASHINGTON']}, + '2300': {'desc': 'Claim', 'segments': (['1805080AV3648340'], ['11.64'])}}, + {'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'}, + '1000B': {'desc': 'Receiver Name', 'segments': '123456789'}, + '2000A': {'desc': 'Billing Provider', + 'segments': ['BH CLINIC OF VANCOUVER']}, + '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']}, + '2010BA': {'desc': 'Patient', 'segments': (['SUSAN'], ['PATIENT'])}, + '2010BB': {'desc': 'Payer', + 'segments': ['COMMUNITY HEALTH PLAN OF WASHINGTON']}, + '2300': {'desc': 'Claim', 'segments': (['1805080AV3648353'], ['234'])}}, + {'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'}, + '1000B': {'desc': 'Receiver Name', 'segments': '123456789'}, + '2000A': {'desc': 'Billing Provider', + 'segments': ['BH CLINIC OF VANCOUVER']}, + '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']}, + '2010BA': {'desc': 'Patient', + 'segments': (['JOHN', 'JOHN'], ['SUBSCRIBER', 'SUBSCRIBER'])}, + '2010BB': {'desc': 'Payer', + 'segments': ['COMMUNITY HEALTH PLAN OF MASS', + 'COMMUNITY HEALTH PLAN OF WASHINGTON']}, + '2300': {'desc': 'Claim', 'segments': (['1805080AV3648355'], ['20'])}}] +""" diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py index af2e333..c047f89 100644 --- a/databricksx12/hls/hierarchicalloop.py +++ b/databricksx12/hls/hierarchicalloop.py @@ -2,6 +2,7 @@ import itertools + class HierarchicalLoop(EDI): def __init__(self, data, delim_cls=AnsiX12Delim): super().__init__(data, delim_cls) @@ -12,32 +13,30 @@ def __init__(self, data, delim_cls=AnsiX12Delim): self.parent_loops = self._parent_loops() self.child_loops = self._child_loops(self.parent_loops) self.subchild_loops = self._subchild_loops(self.child_loops) - + def _parent_start_tup_loops(self): - # index of parent, counter, and if child - return [(i, x.element(1), x.element(-1)) for i, x in self.segments_by_name_index("HL") if x.element(2) == ""] # TODO unit test to return tuple + # index of parent, counter, and if child + # TODO unit test to return tuple + return [(i, x.element(1), x.element(-1)) for i, x in self.segments_by_name_index("HL") if x.element(2) == ""] def _parent_end_loops(self): return [i for i, x in self.segments_by_name_index("SE")] - + def _parent_loops(self): return [(tup + (j,)) for tup, j in zip(self.parent_start_loops, self.parent_end_loops)] - def _child_loops(self, parent_loops): child_loops = [(i, counter, segment.element(-1), parent_stop_index) - for _, counter, child_id, parent_stop_index in parent_loops - for i, segment in self.segments_by_name_index("HL") if segment.element(2) == counter] + for _, counter, child_id, parent_stop_index in parent_loops + for i, segment in self.segments_by_name_index("HL") if segment.element(2) == counter] return child_loops def _subchild_loops(self, child_loops): it1, it2 = itertools.tee(child_loops) - next(it2, None) + next(it2, None) return [pair[1] for pair in zip(it1, it2) if int(pair[0][2]) == 1] - - class HierarchicalLoopManager: def __init__(self, data, delim_cls=AnsiX12Delim): self.hl = HierarchicalLoop(data, delim_cls) @@ -50,34 +49,42 @@ def get_child_loops(self, parent_loop, loops): def calculate_child_end_index(self, current_child, next_child, parent_end): """Calculate the end index of a child, adjusting to avoid overlap with the next child.""" return min(current_child[3], next_child[0] - 1 if next_child else parent_end) - - def process_child(self, child, subchildren, parent_end): - """Map a single child loop to its dictionary representation, including subchildren if applicable.""" + + def process_subchildren(self, child, subchild_loops, parent_end): + """Process subchildren for a given child.""" + return [ + {'index_start': subchild[0], 'index_end': self.calculate_child_end_index( + subchild, None, parent_end), 'children': None} + for subchild in subchild_loops if subchild[1] == child[1] and int(child[2]) == 1 + ] + + def process_child_entry(self, child, index, children, subchild_loops, parent_end): + """Helper function to process each child entry.""" + next_child = children[index + + 1] if (index + 1) < len(children) else None + subchildren = self.process_subchildren( + child, subchild_loops, parent_end) return { 'index_start': child[0], - 'index_end': self.calculate_child_end_index(child, None, parent_end), # No next_child directly handled here - 'children': subchildren if subchildren else None + 'index_end': self.calculate_child_end_index(child, next_child, parent_end), + 'children': subchildren or None } - - def add_subchildren_to_children(self, children, subchild_loops, parent_end): - """Map function to add subchildren to corresponding children, exclude children that are subchildren.""" - #subchild_ids = {sc[1] for sc in subchild_loops} # Set of parent_ids for subchildren - children = [child for child in children if child not in subchild_loops] # Filter out subchildren - # subchild_lookup = {sc[1]: sc for sc in subchild_loops} # Lookup for subchildren by parent counter - - # Process each child, include subchildren when applicable - return list(map(lambda child: self.process_child( - child, - [{ 'index_start': subchild[0], - 'index_end': self.calculate_child_end_index(subchild, None, child[-1]), - 'children': None } - for subchild in subchild_loops if subchild[1] == child[1] and int(child[2]) == 1], - parent_end), - children)) + + def process_children(self, children, subchild_loops, parent_end): + """Process all children, adjusting their end indices correctly, and add subchildren using functional programming.""" + # Filter out subchildren from main children list + filtered_children = [ + child for child in children if child not in subchild_loops] + # Apply processing to each child and collect the results + processed_children = list(map(lambda child: self.process_child_entry(child, filtered_children.index( + child), filtered_children, subchild_loops, parent_end), filtered_children)) + return processed_children def process_loop(self, loop): - child_loops = sorted(self.get_child_loops(loop, self.hl.child_loops), key=lambda x: x[0]) - children = self.add_subchildren_to_children(child_loops, self.hl.subchild_loops, loop[3]) + child_loops = sorted(self.get_child_loops( + loop, self.hl.child_loops), key=lambda x: x[0]) + children = self.process_children( + child_loops, self.hl.subchild_loops, loop[3]) return { 'index_start': loop[0], 'index_end': loop[3], @@ -88,10 +95,6 @@ def generate_summary(self): return {str(loop[1]): self.process_loop(loop) for loop in self.hl.parent_loops} - - - - """ loop_manager = HierarchicalLoopManager(sample_data_837i_edited) summary = loop_manager.summary @@ -111,7 +114,7 @@ def generate_summary(self): 'children': [{'index_start': 118, 'index_end': 138, 'children': None}]}, '79': {'index_start': 144, 'index_end': 186, - 'children': [{'index_start': 153, 'index_end': 186, 'children': None}, + 'children': [{'index_start': 153, 'index_end': 159, 'children': None}, {'index_start': 160, 'index_end': 186, 'children': [{'index_start': 167, 'index_end': 186, 'children': None}]}]}} diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py index 365b1ea..1b71442 100644 --- a/databricksx12/hls/loop.py +++ b/databricksx12/hls/loop.py @@ -2,6 +2,7 @@ from functools import reduce from databricksx12.hls import hierarchicalloop + class LoopMapping: def __init__(self, mappings=None): self.mappings = mappings if mappings is not None else { @@ -9,8 +10,13 @@ def __init__(self, mappings=None): 'Information Source': { 'loop': '2000A', 'reference_ids': ('NM1', '3'), - 'secondary_reference': ('85', '1') + 'secondary_reference': ('85', '1') + }, + 'Provider Address Line 1': { + 'loop': '2000AA', + 'reference_ids': ('N3', '1') } + }, '22': { 'Subscriber': { @@ -19,28 +25,37 @@ def __init__(self, mappings=None): }, 'Individual First Name': { 'loop': '2010BA', - 'reference_ids': ('NM1', '4'), - 'secondary_reference': ('IL', '1') + 'reference_ids': ('NM1', '4'), + 'secondary_reference': ('IL', '1') }, 'Individual Last Name': { 'loop': '2010BA', - 'reference_ids': ('NM1', '3'), - 'secondary_reference': ('IL', '1') + 'reference_ids': ('NM1', '3'), + 'secondary_reference': ('IL', '1') }, 'Payer Name': { 'loop': '2010BB', - 'reference_ids': ('NM1', '3'), - 'secondary_reference': ('PR', '1') + 'reference_ids': ('NM1', '3'), + 'secondary_reference': ('PR', '1') + }, + 'Claim ID': { + 'loop': '2300', + 'reference_ids': ('CLM', '1') + }, + 'Claim Amount': { + 'loop': '2300', + 'reference_ids': ('CLM', '2') } } } - + def get_mapping(self, element, description=None): + """ Returns a specific mapping based on element key and description. """ mappings = self.mappings.get(element, {}) if description: return mappings.get(description, None) return None - + class Loop(EDI): def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()): @@ -48,37 +63,46 @@ def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()): self.loop_mapping = loop_mapping self.hlmanager = hierarchicalloop.HierarchicalLoopManager(data) self.hl_summary = self.hlmanager.summary - self.clm_segments = [(i, x.element(2)) for i, x in self.segments_by_name_index("CLM")] - + + self.sender = self.segments_by_name("GS")[0].element(2) + self.receiver = self.segments_by_name("GS")[0].element(3) + + def claim_segments(self): + return [(i, x.element(2)) for i, x in self.segments_by_name_index("CLM")] + + def claim_count(self): + return len(self.segments_by_name_index("CLM")) + def get_transaction_info(self, tx_summary, clm_index): """ retrieves transaction information for a claim from hierarchical summary - get the transaction range for claim index "x" + Eg., get the transaction range for claim index "x" """ return next((info for _, info in tx_summary.items() if info['index_start'] <= int(clm_index) <= info['index_end']), None) - def get_ranges(self, tx_info, use_children=False): + def get_ranges(self, tx_info, clm_index, use_children=False): """ - extracts numeric ranges for parent and optionally children based on transaction - find ranges for parent and children loops for processing + extracts numeric ranges for parent and optionally children based on transaction but if children add an index to filter to the right one + Eg., find ranges for parent and children loops for processing """ - parent_range = (tx_info['index_start'], tx_info['index_end']) - if use_children: - return [(child['index_start'], child['index_end']) for child in tx_info.get('children', [])] - return [parent_range] - + if use_children and 'children' in tx_info: + return [(child['index_start'], child['index_end']) for child in tx_info['children'] + if child['index_start'] <= int(clm_index) <= child['index_end']] + else: + return [(tx_info['index_start'], tx_info['index_end'])] def find_elements_based_on_ranges(self, ranges, target_segment_name, target_element_index, secondary_reference=None): """ filters and maps EDI segments to extract required elements based on their position and type """ - process_range = lambda range_tuple: [ + def process_range(range_tuple): return [ segment.element(int(target_element_index)) for segment in self.segments_by_position(range_tuple[0], range_tuple[1]) - if segment.segment_name() == target_segment_name and + if segment.segment_name() == target_segment_name and segment.segment_len() > int(target_element_index) and - (secondary_reference is None or segment.element(int(secondary_reference[1])) == secondary_reference[0]) + (secondary_reference is None or segment.element( + int(secondary_reference[1])) == secondary_reference[0]) ] return reduce(lambda acc, lst: acc + lst, map(process_range, ranges), []) @@ -91,58 +115,29 @@ def extract_elements_from_claim(self, clm_segment, target_segment_name, target_e if not tx_info: return None - ranges = self.get_ranges(tx_info, use_children) + ranges = self.get_ranges(tx_info, clm_index, use_children) return self.find_elements_based_on_ranges(ranges, target_segment_name, target_element_index, secondary_reference) - - def find_reference_elements(self, loop_key, description=None): - """ - extract reference elements from claims based on loop mapping and hierarchy and handles children separately if specified by loop key. - find billing provider names under loop '20' from an EDI transaction. - """ - + def find_reference_element(self, clm_segment, loop_key, description=None): loop_info = self.loop_mapping.get_mapping(loop_key, description) if not loop_info: return [] - + target_segment_name, target_element_index = loop_info['reference_ids'] secondary_reference = loop_info.get('secondary_reference', None) - use_children = loop_key == '22' - process_clm_segment = lambda clm_segment: self.extract_elements_from_claim(clm_segment, - target_segment_name, - target_element_index, - use_children, - secondary_reference) - reference_list = list(filter(None, map(process_clm_segment, self.clm_segments))) - - return [summary for summary in reference_list] + + return self.extract_elements_from_claim(clm_segment, + target_segment_name, + target_element_index, + use_children, + secondary_reference) """ sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8") claims = Loop(sample_data_837i_edited) -claims.find_reference_elements('20', 'Information Source') -Outputs: -[['BH CLINIC OF VANCOUVER'], - ['BH CLINIC OF VANCOUVER'], - ['BH CLINIC OF VANCOUVER'], - ['BH CLINIC OF VANCOUVER'], - ['BH CLINIC OF VANCOUVER']] -""" -""" -claims.find_reference_elements('22', 'Payer Name') +claims.find_reference_element(claims.claim_segments()[0], '22', 'Claim ID') Outputs: -[['COMMUNITY HEALTH PLAN OF WASHINGTON'], - ['COMMUNITY HEALTH PLAN OF WASHINGTON'], - ['COMMUNITY HEALTH PLAN OF WASHINGTON'], - ['COMMUNITY HEALTH PLAN OF WASHINGTON'], - ['COMMUNITY HEALTH PLAN OF WASHINGTON', - 'COMMUNITY HEALTH PLAN OF WASHINGTON', - 'COMMUNITY HEALTH PLAN OF WASHINGTON']] -""" +['1805080AV3648339'] """ -claims.find_reference_elements('22', 'Individual First Name') -Outputs: -[['JOHN'], ['SUSAN'], ['JOHN'], ['SUSAN'], ['JOHN', 'JOHN', 'JOHN']] - """ \ No newline at end of file From df88ec9d8569a7d665b3b40e869994861c1a7a39 Mon Sep 17 00:00:00 2001 From: Raven Date: Thu, 2 May 2024 20:08:04 -0400 Subject: [PATCH 15/46] sample data with patient dependents in the 5th tx --- sampledata/837/CHPW_Claimdata_edited.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sampledata/837/CHPW_Claimdata_edited.txt b/sampledata/837/CHPW_Claimdata_edited.txt index 176e84a..5ec175a 100644 --- a/sampledata/837/CHPW_Claimdata_edited.txt +++ b/sampledata/837/CHPW_Claimdata_edited.txt @@ -166,7 +166,7 @@ N4*VANCOUVER*WA*986640001~ DMG*D8*19881225*M~ NM1*PR*2*COMMUNITY HEALTH PLAN OF MASS*****PI*CHPWA~ HL*82*79*23*0~ -SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +PAT*P*18**DEPENDENT PATIENT*****CI~ NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~ N3*987 65TH PL~ N4*VANCOUVER*WA*986640001~ From d15ddef7fa7071e1529d702b56001b6a3eb3dd3e Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Fri, 3 May 2024 00:02:26 -0400 Subject: [PATCH 16/46] traverse HLs wip --- databricksx12/hls/loop.py | 180 +++++++++++++++++--------------------- 1 file changed, 81 insertions(+), 99 deletions(-) diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py index 1b71442..db6f588 100644 --- a/databricksx12/hls/loop.py +++ b/databricksx12/hls/loop.py @@ -1,51 +1,16 @@ from databricksx12.edi import * from functools import reduce -from databricksx12.hls import hierarchicalloop - class LoopMapping: def __init__(self, mappings=None): self.mappings = mappings if mappings is not None else { '20': { - 'Information Source': { - 'loop': '2000A', - 'reference_ids': ('NM1', '3'), - 'secondary_reference': ('85', '1') - }, - 'Provider Address Line 1': { - 'loop': '2000AA', - 'reference_ids': ('N3', '1') - } - + 'loop name': 'Information Source', + 'loop': '2000A' }, '22': { - 'Subscriber': { - 'loop': '2000B', - 'reference_ids': ('SBR', '4') - }, - 'Individual First Name': { - 'loop': '2010BA', - 'reference_ids': ('NM1', '4'), - 'secondary_reference': ('IL', '1') - }, - 'Individual Last Name': { - 'loop': '2010BA', - 'reference_ids': ('NM1', '3'), - 'secondary_reference': ('IL', '1') - }, - 'Payer Name': { - 'loop': '2010BB', - 'reference_ids': ('NM1', '3'), - 'secondary_reference': ('PR', '1') - }, - 'Claim ID': { - 'loop': '2300', - 'reference_ids': ('CLM', '1') - }, - 'Claim Amount': { - 'loop': '2300', - 'reference_ids': ('CLM', '2') - } + 'loop name': 'Subscriber', + 'loop': '2000B' } } @@ -58,82 +23,99 @@ def get_mapping(self, element, description=None): class Loop(EDI): + + def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()): super().__init__(data, delim_cls) self.loop_mapping = loop_mapping - self.hlmanager = hierarchicalloop.HierarchicalLoopManager(data) - self.hl_summary = self.hlmanager.summary - - self.sender = self.segments_by_name("GS")[0].element(2) - self.receiver = self.segments_by_name("GS")[0].element(3) - - def claim_segments(self): - return [(i, x.element(2)) for i, x in self.segments_by_name_index("CLM")] - - def claim_count(self): - return len(self.segments_by_name_index("CLM")) - - def get_transaction_info(self, tx_summary, clm_index): + self._start_indexes = self._build_hierarchy_start_indexes() + self.loop_hierarchy = self.build_hierarchy() """ - retrieves transaction information for a claim from hierarchical summary - Eg., get the transaction range for claim index "x" + loop_hierarchy = { unique_id : { + start_idx : "" + end_idx : "" + parent_id : "" + hl_code : "" + child_code: "" + } + } """ - return next((info for _, info in tx_summary.items() - if info['index_start'] <= int(clm_index) <= info['index_end']), None) - def get_ranges(self, tx_info, clm_index, use_children=False): + # + # Build a complete hierarchical view of all HL segments start and end positions + # + def build_hierarchy(self): """ - extracts numeric ranges for parent and optionally children based on transaction but if children add an index to filter to the right one - Eg., find ranges for parent and children loops for processing + Return all start indexes """ - if use_children and 'children' in tx_info: - return [(child['index_start'], child['index_end']) for child in tx_info['children'] - if child['index_start'] <= int(clm_index) <= child['index_end']] - else: - return [(tx_info['index_start'], tx_info['index_end'])] + return { + x[0]: { + "start_idx": x[1], + "end_idx": self._determine_end_index(x[1]), + "parent_id": x[2], + "hl_code": x[3], + "child_code": x[4] + } + for x in self._start_indexes + } - def find_elements_based_on_ranges(self, ranges, target_segment_name, target_element_index, secondary_reference=None): - """ - filters and maps EDI segments to extract required elements based on their position and type - """ - def process_range(range_tuple): return [ - segment.element(int(target_element_index)) - for segment in self.segments_by_position(range_tuple[0], range_tuple[1]) - if segment.segment_name() == target_segment_name and - segment.segment_len() > int(target_element_index) and - (secondary_reference is None or segment.element( - int(secondary_reference[1])) == secondary_reference[0]) - ] - return reduce(lambda acc, lst: acc + lst, map(process_range, ranges), []) - - def extract_elements_from_claim(self, clm_segment, target_segment_name, target_element_index, use_children=False, secondary_reference=None): - """ - a higher-level function that ties together the previous functions to get tx info, the ranges of interest, and elements from every range - """ - clm_index = clm_segment[0] - tx_info = self.get_transaction_info(self.hl_summary, clm_index) - if not tx_info: + # + # Return a tuple of all HL segments, start index, id, parent id, child code, and hl_code + # + def _build_hierarchy_start_indexes(self): + return [ ( x.element(1), #id + i, # "start_idx" + x.element(2), # "parent_id" + x.element(3), # "hl_code" + x.element(4)) # "child_code" + for i,x in self.segments_by_name_index("HL")] + + # + # Determine the end index of an HL segment + # @param start_idx - the start index of the existing HL segment + # x[1] = start index from tuple in _build_hierarchy_start_indexes + # + def _determine_end_index(self, start_idx): + return min([x[1] for x in self._start_indexes if x[1] > start_idx] + [len(self.data)]) + + # + # Primary search function within HL + # @param pos_idx - the reference point + # @param hl_code - the hl code being searched for + # + # @return - a tuple of the start and end position of the hl segment containing hl_code, otherwise None if not found + # + def find_hl_codes(self, pos_idx, hl_code): + return (self._filter_on_position(pos_idx, hl_code)[0] if self._filter_on_position(pos_idx, hl_code) else self.traverse_loops(pos_idx, hl_code)) + + def traverse_loops(self, pos_idx, hl_code, parent_idx = None): + if parent_idx == "": return None + elif parent_idx == None: + return traverse_loops(pos_idx, hl_code, parent_idx = self._filter_hl_on_position(pos_idx)) + else: + return (temp[0] if (temp := self._filter_hl_on_parent(hl_code, parent_idx)) else traverse_loops(pos_idx, hl_code, ...??? + - ranges = self.get_ranges(tx_info, clm_index, use_children) - return self.find_elements_based_on_ranges(ranges, target_segment_name, target_element_index, secondary_reference) - def find_reference_element(self, clm_segment, loop_key, description=None): - loop_info = self.loop_mapping.get_mapping(loop_key, description) - if not loop_info: - return [] + def _filter_hl_on_position(self, pos_idx): + return (temp[0] if (temp := filter(lambda k,v: v if v['start_idx'] <= pos_idx <= v['end_idx'] ,self.loop_hierarchy)) else "") - target_segment_name, target_element_index = loop_info['reference_ids'] - secondary_reference = loop_info.get('secondary_reference', None) - use_children = loop_key == '22' + + # + # Will only ever return one element or None + # + def _fitler_hl_on_position_and_code(self, pos_idx, hl_code): + return filter(lambda k,v: v if v['hl_code'] == hl_code and v['start_idx'] <= pos_idx <= v['end_idx'] ,self.loop_hierarchy) - return self.extract_elements_from_claim(clm_segment, - target_segment_name, - target_element_index, - use_children, - secondary_reference) + # + # Will only ever return one element or None + # + def _filter_hl_on_parent(self, hl_code, parent_id): + return filter(lambda k,v: v if v['hl_code'] == hl_code and v['id'] == parent_id, self.loop_hierarchy) + """ sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8") claims = Loop(sample_data_837i_edited) From 143fc222ef8b94343a90624cfcbbe4b09b9c7eb3 Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Sun, 5 May 2024 17:44:38 -0400 Subject: [PATCH 17/46] passing loop tests --- databricksx12/hls/loop.py | 36 +++++++-------- ...ited.txt => CHPW_Claimdata_edited.txt.tmp} | 0 tests/test_loop.py | 44 +++++++++++++++++++ tests/test_pyspark.py | 7 ++- 4 files changed, 66 insertions(+), 21 deletions(-) rename sampledata/837/{CHPW_Claimdata_edited.txt => CHPW_Claimdata_edited.txt.tmp} (100%) create mode 100644 tests/test_loop.py diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py index db6f588..c0e0334 100644 --- a/databricksx12/hls/loop.py +++ b/databricksx12/hls/loop.py @@ -83,37 +83,35 @@ def _determine_end_index(self, start_idx): # @param pos_idx - the reference point # @param hl_code - the hl code being searched for # - # @return - a tuple of the start and end position of the hl segment containing hl_code, otherwise None if not found - # + # @returns None if not found, otherwise the value from loop_hierarchy def find_hl_codes(self, pos_idx, hl_code): - return (self._filter_on_position(pos_idx, hl_code)[0] if self._filter_on_position(pos_idx, hl_code) else self.traverse_loops(pos_idx, hl_code)) + init_hl = self._filter_hl_on_position(pos_idx) + return (None if init_hl is None else self.traverse_loops(hl_code, init_hl)) + - def traverse_loops(self, pos_idx, hl_code, parent_idx = None): - if parent_idx == "": + # + # Go from child to parent searching for the specified hl_code + # + def traverse_loops(self, hl_code, loop): + if loop['hl_code'] == hl_code: + return loop + elif loop['parent_id'] == "": return None - elif parent_idx == None: - return traverse_loops(pos_idx, hl_code, parent_idx = self._filter_hl_on_position(pos_idx)) else: - return (temp[0] if (temp := self._filter_hl_on_parent(hl_code, parent_idx)) else traverse_loops(pos_idx, hl_code, ...??? - - + return self.traverse_loops(hl_code, self.loop_hierarchy.get(loop['parent_id'])) + # + # + # def _filter_hl_on_position(self, pos_idx): - return (temp[0] if (temp := filter(lambda k,v: v if v['start_idx'] <= pos_idx <= v['end_idx'] ,self.loop_hierarchy)) else "") + return (list(temp)[0] if (temp := filter(lambda v: v['start_idx'] <= pos_idx <= v['end_idx'], self.loop_hierarchy.values())) else None) # # Will only ever return one element or None # def _fitler_hl_on_position_and_code(self, pos_idx, hl_code): - return filter(lambda k,v: v if v['hl_code'] == hl_code and v['start_idx'] <= pos_idx <= v['end_idx'] ,self.loop_hierarchy) - - # - # Will only ever return one element or None - # - def _filter_hl_on_parent(self, hl_code, parent_id): - return filter(lambda k,v: v if v['hl_code'] == hl_code and v['id'] == parent_id, self.loop_hierarchy) - + return (list(temp)[0] if (temp := filter(lambda v: v['hl_code'] == hl_code and v['start_idx'] <= pos_idx <= v['end_idx'] ,self.loop_hierarchy.values())) else None) """ diff --git a/sampledata/837/CHPW_Claimdata_edited.txt b/sampledata/837/CHPW_Claimdata_edited.txt.tmp similarity index 100% rename from sampledata/837/CHPW_Claimdata_edited.txt rename to sampledata/837/CHPW_Claimdata_edited.txt.tmp diff --git a/tests/test_loop.py b/tests/test_loop.py new file mode 100644 index 0000000..a54737b --- /dev/null +++ b/tests/test_loop.py @@ -0,0 +1,44 @@ +from test_spark_base import * +from databricksx12.hls.loop import * +import unittest, re + + +class TestLoop(PysparkBaseTest): + + data = open("sampledata/837/837p.txt", "rb").read().decode("utf-8") + loop = Loop(data) + + # + # Test Loop base info + # + def test_loop_hierarchy_build(self): + assert (set(TestLoop.loop.loop_hierarchy.keys()) == set({'1','2','3'})) + assert (TestLoop.loop.loop_hierarchy.get('1')['start_idx'] == 7) + assert (TestLoop.loop.loop_hierarchy.get('2')['start_idx'] == 12) + assert (TestLoop.loop.loop_hierarchy.get('3')['start_idx'] == 27) + assert (TestLoop.loop.loop_hierarchy.get('1')['end_idx'] == 12) + assert (TestLoop.loop.loop_hierarchy.get('2')['end_idx'] == 27) + assert (TestLoop.loop.loop_hierarchy.get('3')['end_idx'] == 45) + assert ([x.get('hl_code') for x in list(TestLoop.loop.loop_hierarchy.values())] == ['20','22','22']) + assert ([x.get('child_code') for x in list(TestLoop.loop.loop_hierarchy.values())] == ['1','0','0']) + + + # + # Test traversing hierarchy + # + def test_loop_hierarchy(self): + clms = TestLoop.loop.segments_by_name_index("CLM") + assert (clms[0][0] == 22) + assert (clms[1][0] == 37) + + assert (TestLoop.loop.find_hl_codes(22, '20') == TestLoop.loop.find_hl_codes(37, '20')) + assert (TestLoop.loop.find_hl_codes(22, '22') != TestLoop.loop.find_hl_codes(37, '22')) + + assert (TestLoop.loop.find_hl_codes(22, '20')['start_idx'] == 7) + assert (TestLoop.loop.find_hl_codes(22, '22')['start_idx'] == 12) + assert (TestLoop.loop.find_hl_codes(37, '22')['start_idx'] == 27) + +if __name__ == '__main__': + unittest.main() + + diff --git a/tests/test_pyspark.py b/tests/test_pyspark.py index 36327d5..b7fcd3e 100644 --- a/tests/test_pyspark.py +++ b/tests/test_pyspark.py @@ -10,6 +10,9 @@ def test_transaction_count(self): .map(lambda x: EDI(x)) .map(lambda x: {"transaction_count": x.num_transactions()}) ).toDF() - assert ( data.count() == 4) #4 rows - assert ( data.select(data.transaction_count).groupBy().sum().collect()[0]["sum(transaction_count)"] == 8) #8 ST/SE transactions + assert ( data.count() == 5) #5 rows + assert ( data.select(data.transaction_count).groupBy().sum().collect()[0]["sum(transaction_count)"] == 9) #8 ST/SE transactions + +if __name__ == '__main__': + unittest.main() From 9a7b84bda27d0725d85764a011d64239cd8769d7 Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Tue, 7 May 2024 15:11:33 -0400 Subject: [PATCH 18/46] bug and testing --- databricksx12/hls/loop.py | 54 +++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py index c0e0334..9c3a767 100644 --- a/databricksx12/hls/loop.py +++ b/databricksx12/hls/loop.py @@ -2,6 +2,10 @@ from functools import reduce class LoopMapping: + + # + # class to hold k,v of hl_code, loop + # def __init__(self, mappings=None): self.mappings = mappings if mappings is not None else { '20': { @@ -14,6 +18,12 @@ def __init__(self, mappings=None): } } + # + # Get hl_code associated with the loop + # + def get_hl_code(self, loop): + return None if (temp := [hl_code for hl_code, v in self.mappings.items() if v['loop'] == loop]) == [] else temp[0] + def get_mapping(self, element, description=None): """ Returns a specific mapping based on element key and description. """ mappings = self.mappings.get(element, {}) @@ -27,7 +37,7 @@ class Loop(EDI): def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()): super().__init__(data, delim_cls) - self.loop_mapping = loop_mapping + self.mapping = loop_mapping self._start_indexes = self._build_hierarchy_start_indexes() self.loop_hierarchy = self.build_hierarchy() """ @@ -41,6 +51,16 @@ def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()): } """ + # + # Get the specified loop based upon a position, else return None if does not exist + # @param pos - the position of the data point + # @param loop - the loop from the mapping that is being searched for + # + # @return None if not found, otherwise value from loop_hierarchy + # + def get_loop(self, pos, loop): + return None if (temp := self.mapping.get_hl_code(loop)) is None else self.find_hl_codes(pos, temp) + # # Build a complete hierarchical view of all HL segments start and end positions # @@ -54,7 +74,8 @@ def build_hierarchy(self): "end_idx": self._determine_end_index(x[1]), "parent_id": x[2], "hl_code": x[3], - "child_code": x[4] + "child_code": x[4], + "subordinate_ind": self.subordinate_child_indicator(x[1]) #true if previous HL04=1 } for x in self._start_indexes } @@ -98,21 +119,38 @@ def traverse_loops(self, hl_code, loop): elif loop['parent_id'] == "": return None else: - return self.traverse_loops(hl_code, self.loop_hierarchy.get(loop['parent_id'])) + return self.traverse_loops(hl_code, self.determine_parent(loop)) # - # + # parent is either the parent_id or the previous HL segment if there was a child indicator section + # + def determine_parent(self, loop): + return loop['parent_id'] if loop['subordinate_ind'] == 0 else self.loop_hierarchy.get(self.determine_previous_hl(loop['start_idx'])[0]) + + # + # returns the HL segment # def _filter_hl_on_position(self, pos_idx): return (list(temp)[0] if (temp := filter(lambda v: v['start_idx'] <= pos_idx <= v['end_idx'], self.loop_hierarchy.values())) else None) - + + # + # determine if the HL segment at pos is a subordinate child of a parent + # i.e. (parent has child code =1) and parent is previous HL segment # - # Will only ever return one element or None # - def _fitler_hl_on_position_and_code(self, pos_idx, hl_code): - return (list(temp)[0] if (temp := filter(lambda v: v['hl_code'] == hl_code and v['start_idx'] <= pos_idx <= v['end_idx'] ,self.loop_hierarchy.values())) else None) + def subordinate_child_indicator(self, pos): + return 0 if self.determine_previous_hl(pos) is None else self.determine_previous_hl(pos)[4] + # + # Determine the previous HL segment based upon a position + # + def determine_previous_hl(self, pos): + try: + return reduce(lambda a,b: a if a[1] > b[1] else b, + filter(lambda x: x[1] < pos, self._start_indexes)) + except: + return None #when there is no preceding hl segment """ sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8") From af27bdf2db6f106e3d2fe0a3c1d1ed069570c7c7 Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Tue, 7 May 2024 22:53:42 -0400 Subject: [PATCH 19/46] subordinate child looping tests --- databricksx12/hls/loop.py | 4 ++++ tests/test_loop.py | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py index 9c3a767..b6d6638 100644 --- a/databricksx12/hls/loop.py +++ b/databricksx12/hls/loop.py @@ -15,6 +15,10 @@ def __init__(self, mappings=None): '22': { 'loop name': 'Subscriber', 'loop': '2000B' + }, + '23': { + 'loop name': 'Patient', + 'loop': '2000C' } } diff --git a/tests/test_loop.py b/tests/test_loop.py index a54737b..a831c47 100644 --- a/tests/test_loop.py +++ b/tests/test_loop.py @@ -37,6 +37,17 @@ def test_loop_hierarchy(self): assert (TestLoop.loop.find_hl_codes(22, '20')['start_idx'] == 7) assert (TestLoop.loop.find_hl_codes(22, '22')['start_idx'] == 12) assert (TestLoop.loop.find_hl_codes(37, '22')['start_idx'] == 27) + + def test_loop_hierarchy_child_codes(self): + data = open("./sampledata/837/CHPW_Claimdata_edited.txt.tmp", "rb").read().decode("utf-8") + loop = Loop(data) + assert(loop.find_hl_codes(174, '22')['start_idx'] == 160) + + def test_loop_search_by_name(self): + assert(TestLoop.loop.get_loop(22, "2000A")['start_idx'] == 7) + assert(TestLoop.loop.get_loop(22, "2000B")['start_idx'] == 12) + assert(TestLoop.loop.get_loop(37, "2000A")['start_idx'] == 7) + assert(TestLoop.loop.get_loop(37, "2000B")['start_idx'] == 27) if __name__ == '__main__': unittest.main() From a6f0321f169d7e265e81bd6691ede1adf9d00fe8 Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Thu, 9 May 2024 15:23:21 -0400 Subject: [PATCH 20/46] build data --- databricksx12/hls/build-func.py | 138 --------- databricksx12/hls/claim.py | 142 +++++---- databricksx12/hls/healthcare.py | 12 +- databricksx12/hls/hierarchicalloop.py | 129 --------- databricksx12/{ => hls}/hl7.py | 0 databricksx12/hls/loop.py | 9 +- .../hls/test-notebooks/claim-test.ipynb | 273 ------------------ tests/test_loop.py | 19 ++ 8 files changed, 123 insertions(+), 599 deletions(-) delete mode 100644 databricksx12/hls/build-func.py delete mode 100644 databricksx12/hls/hierarchicalloop.py rename databricksx12/{ => hls}/hl7.py (100%) delete mode 100644 databricksx12/hls/test-notebooks/claim-test.ipynb diff --git a/databricksx12/hls/build-func.py b/databricksx12/hls/build-func.py deleted file mode 100644 index 2de4f64..0000000 --- a/databricksx12/hls/build-func.py +++ /dev/null @@ -1,138 +0,0 @@ -""" -Apr 9 notes -""" -from databricksx12.edi import * - -class LoopMapping: - - def __init__(self, mappings=None): - self.mappings = (mappings if mappings is not None else { - '20': { - 'description': 'Information Source', - 'loop': '2000A' - }, - '22': { - 'description': 'Subscriber', - 'loop': '2000B' - } - }) - - - - - """ - def __init__(self): - self.mappings = { - '2000A': ('20', 'NM1', '3'), - '2000B': ('22', 'SBR', '4'), - } - - - ADZ want our key = (lookup value found in data), value = additional info needed - """ - - -class HierarchicalLoop(EDI): - def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping= LoopMapping.mappings): - super().__init__(data, delim_cls) - self.loop_mapping = loop_mapping - self.target_element, self.target_segment_name, self.target_element_index = self.loop_mapping.get_identifiers( - Loop) - - # find all HL segments along with the 3rd element that denotes 2000A (20) or 2000B (22) - self.hl_segments = self._hl_identifiers() - - # find all CLM segments (important for indexing the last HL or SBR within a tx) - self.clm_segments = self._clm_identifiers() - - # Calculate ranges and then extract 2000A/B lines based on those ranges - self.ranges = self.select_range_of_interest( - self.hl_segments, self.clm_segments, self.target_element) - self.extracted_lines = self.extract_lines_based_on_ranges( - self.ranges, self.target_segment_name, self.target_element_index) - - def _hl_identifiers(self): - # Find the segments where HL loop begins - indexed_HL_segments = self.segments_by_nloop_object.extracted_linesame_index("HL") - return [(i, x.element(3)) for i, x in indexed_HL_segments] - - def _clm_identifiers(self): - # Find the segments where CLM loop begins - indexed_CLM_segments = self.segments_by_name_index("CLM") - return [(i, x.element(2)) for i, x in indexed_CLM_segments] - - def select_range_of_interest(self, hl_indexes, clm_indexes, target_value): - ranges = [] - start_index = None - last_index = None - - for index, value in hl_indexes: - if value == target_value: - if start_index is not None: - ranges.append((start_index+1, index)) - start_index = index - elif start_index is not None: - ranges.append((start_index+1, index)) - start_index = None - if clm_indexes: - last_index = clm_indexes[-1][0] - if last_index and start_index is not None: - ranges.append((start_index+1, last_index)) - return ranges - - def extract_lines_based_on_ranges(self, ranges, target_value, target_index): - extracted_elements = [] - # Iterate through each range in the list - for start, end in ranges: - # Retrieve the segments within this range - segments_in_range = self.segments_by_position(start, end) - - desired_elements = map( - lambda segment: segment.element(int(target_index)), - filter( - lambda segment: segment.segment_name() == target_value and len( - segment.data.split(segment.format_cls.ELEMENT_DELIM)) > int(target_index), - segments_in_range - ) - ) - extracted_elements.extend(desired_elements) - - return list(extracted_elements) - - - def parent_loops(self): - pass - - def child_loops(self, parent_loop_num): - pass - - """ - @return - -index of each HL segment - -index of parent segments - -be able to answer "where is loop XYZ?" and "at this location, what looop am i in?" - """ - def _hl_segment_indexes(self): - pass - - - self.hl_parents = { - parent: - { index_start : value - index_end : value - children : [ - hl_child : { - index_start: value - index_end: value - } - ] - } - - - self.hl = HL() - self.claim_start_index = segment(clm) - - Who is my billing provider? hl.get_loop(20) - Who is my subscriber? - Who is my patient? - diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 6a9e66e..8cada93 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -1,31 +1,88 @@ from databricksx12.edi import * -from databricksx12.hls import loop +from databricksx12.hls.loop import * +import itertools + + # # Base claim class # -class Claim(): +class MedicalClaim(EDI): + + def __init__(self, + sender_loop = [], + receiver_loop = [], + billing_loop = [], + subscriber_loop = [], + patient_loop = [], + claim_loop = [], + sl_loop = [] #service line loop + ): + self.sender_loop = sender_loop + self.receiver_loop = receiver_loop + self.billing_loop = billing_loop + self.subscriber_loop = subscriber_loop + self.patient_loop = patient_loop + self.claim_loop = claim_loop + self.sl_loop = sl_loop + + self.build() + + def billing_loop(self): + return { + "billing_prvdr_name": "TODO", + "billing_npi": "TODO", + "billing_street_address": "TODO", + "billing_zip_cd": "TODO", + "billing_state_cd": "TODO" + } + + def subscriber_loop(self): + return { + "TODO": "TODO" + } + + # + # + # + def patient_loop(self): + #Note - if this doesn't exist then its the same as subscriber loop + return { + "TODO": "TODO" + } + + def toJson(self): + { + **self.patient_loop(), + **self.subscriber_loop(), + **self.billing_loop() + } - def __init__(self): - pass - @staticmethod - def from_dictionary(d): - pass + #not sure if this should be here or not, but you get the idea + def build(): + self.billing_info = self.billing_loop() + self.subscriber_info = self.subscriber_loop() + self.patient_info = self.subscriber_loop() if self.patient_loop = [] else self.patient_loop() + -class Claim837i(Claim): +class Claim837i(MedicalClaim): NAME = "837I" -# Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf +# Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf -class Claim837p(Claim): +class Claim837p(MedicalClaim): NAME = "837P" +class Claim835(MedicalClaim): + + NAME = "835" + # # Base claim builder (transaction -> 1 or more claims) @@ -35,59 +92,38 @@ class ClaimBuilder(EDI): # # Given claim type (837i, 837p, etc), segments, and delim class, build claim level classes # - def __init__(self, trnx_type, trnx_data, delim_cls): - self.trnx_type = trnx_type + def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim): self.data = trnx_data - self.delim_cls = delim_cls - - self.loop_summary = loop.Loop(trnx_data) + self.format_cls = delim_cls + self.trnx_cls = trnx_type_cls + self.loop = Loop(trnx_data) + # - # Returns a dictionary of "loop name" : "loop data" + # Builds a claim object from # - - def build_claim(self, clm_segment): - return { - "1000A": { - "desc": "Submitter Name", - "segments": self.loop_summary.sender - }, - "1000B": { - "desc": "Receiver Name", - "segments": self.loop_summary.receiver - }, - "2000A": { - "desc": "Billing Provider", - "segments": self.loop_summary.find_reference_element(clm_segment, '20', 'Information Source') - }, - "2000B": { - "desc": "Subscriber", - "segments": self.loop_summary.find_reference_element(clm_segment, '22', 'Subscriber') - }, - "2010BA": { - "desc": "Patient", - "segments": (self.loop_summary.find_reference_element(clm_segment, '22', 'Individual First Name'), - self.loop_summary.find_reference_element(clm_segment, '22', 'Individual Last Name')) - - }, - "2010BB": { - "desc": "Payer", - "segments": self.loop_summary.find_reference_element(clm_segment, '22', 'Payer Name'), - }, - "2300": { - "desc": "Claim", - "segments": (self.loop_summary.find_reference_element(clm_segment, '22', 'Claim ID'), - self.loop_summary.find_reference_element(clm_segment, '22', 'Claim Amount')) - } - } + # @param clm_segment - the claim segment of claim to build + # @param idx - the index of the claim segment in the data + # + # @return the clas containing the relevent claim information + # + def build_claim(self, clm_segment, idx): + return self.trnx_cls( + sender_loop = [], + receiver_loop = [], + billing_loop = self.loop.get_loop_segments(idx, "2000A"), + subscriber_loop = self.loop.get_loop_segments(idx, "2000B"), + patient_loop = self.loop.get_loop_segments(idx, "2000C"), + claim_loop = [], + sl_loop = [] #service line loop + ) # # Given transaction type, transaction segments, and delim info, build out claims in the transaction # @return a list of Claim for each "clm" segment # def build(self): - return [self.build_claim(seg) for seg in self.loop_summary.claim_segments()] - + return [self.build_claim(seg, i) for i, seg in self.segments_by_name_index("CLM")] """ sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8") diff --git a/databricksx12/hls/healthcare.py b/databricksx12/hls/healthcare.py index efd60f5..00cc22e 100644 --- a/databricksx12/hls/healthcare.py +++ b/databricksx12/hls/healthcare.py @@ -6,9 +6,9 @@ class HealthcareManager(EDI): def __init__(self, mapping = { - "222": "837P", - "223": "837I", - "221": "835" + "222": Claim837i, + "223": Claim837p, + "221": None # "835" }): self.mapping = mapping @@ -17,8 +17,10 @@ def __init__(self, mapping = { # Given an EDI message, return a list of healthcare claims # def from_edi(self, edi): - return list(itertools.chain.from_iterable([self.from_functional_group(y) for y in edi.functional_segments()])) + return self.flatmap(self.flatmap([self.from_functional_group(y) for y in edi.functional_segments()])) + def flatmap(self,x): + return list(itertools.chain.from_iterable(x)) def from_functional_group(self, fg): return [self.from_transaction(x) for x in fg.transaction_segments()] @@ -29,5 +31,5 @@ def from_functional_group(self, fg): # def from_transaction(self, trnx): return ClaimBuilder(self.mapping.get(trnx.transaction_type), - [x for x in trnx.data if x.segment_name() not in ['ST', 'SE']], trnx.delim_cls).build() + [x for x in trnx.data if x.segment_name() not in ['ST', 'SE']], trnx.format_cls).build() diff --git a/databricksx12/hls/hierarchicalloop.py b/databricksx12/hls/hierarchicalloop.py deleted file mode 100644 index c047f89..0000000 --- a/databricksx12/hls/hierarchicalloop.py +++ /dev/null @@ -1,129 +0,0 @@ -from databricksx12.edi import * - -import itertools - - -class HierarchicalLoop(EDI): - def __init__(self, data, delim_cls=AnsiX12Delim): - super().__init__(data, delim_cls) - - # parent and children loops - self.parent_start_loops = self._parent_start_tup_loops() - self.parent_end_loops = self._parent_end_loops() - self.parent_loops = self._parent_loops() - self.child_loops = self._child_loops(self.parent_loops) - self.subchild_loops = self._subchild_loops(self.child_loops) - - def _parent_start_tup_loops(self): - # index of parent, counter, and if child - # TODO unit test to return tuple - return [(i, x.element(1), x.element(-1)) for i, x in self.segments_by_name_index("HL") if x.element(2) == ""] - - def _parent_end_loops(self): - return [i for i, x in self.segments_by_name_index("SE")] - - def _parent_loops(self): - return [(tup + (j,)) for tup, j in zip(self.parent_start_loops, self.parent_end_loops)] - - def _child_loops(self, parent_loops): - child_loops = [(i, counter, segment.element(-1), parent_stop_index) - for _, counter, child_id, parent_stop_index in parent_loops - for i, segment in self.segments_by_name_index("HL") if segment.element(2) == counter] - return child_loops - - def _subchild_loops(self, child_loops): - it1, it2 = itertools.tee(child_loops) - next(it2, None) - return [pair[1] for pair in zip(it1, it2) if int(pair[0][2]) == 1] - - -class HierarchicalLoopManager: - def __init__(self, data, delim_cls=AnsiX12Delim): - self.hl = HierarchicalLoop(data, delim_cls) - self.summary = self.generate_summary() - - def get_child_loops(self, parent_loop, loops): - """Filter child loops that fall within the given parent loop's range.""" - return list(filter(lambda x: parent_loop[0] < x[0] < parent_loop[3], loops)) - - def calculate_child_end_index(self, current_child, next_child, parent_end): - """Calculate the end index of a child, adjusting to avoid overlap with the next child.""" - return min(current_child[3], next_child[0] - 1 if next_child else parent_end) - - def process_subchildren(self, child, subchild_loops, parent_end): - """Process subchildren for a given child.""" - return [ - {'index_start': subchild[0], 'index_end': self.calculate_child_end_index( - subchild, None, parent_end), 'children': None} - for subchild in subchild_loops if subchild[1] == child[1] and int(child[2]) == 1 - ] - - def process_child_entry(self, child, index, children, subchild_loops, parent_end): - """Helper function to process each child entry.""" - next_child = children[index + - 1] if (index + 1) < len(children) else None - subchildren = self.process_subchildren( - child, subchild_loops, parent_end) - return { - 'index_start': child[0], - 'index_end': self.calculate_child_end_index(child, next_child, parent_end), - 'children': subchildren or None - } - - def process_children(self, children, subchild_loops, parent_end): - """Process all children, adjusting their end indices correctly, and add subchildren using functional programming.""" - # Filter out subchildren from main children list - filtered_children = [ - child for child in children if child not in subchild_loops] - # Apply processing to each child and collect the results - processed_children = list(map(lambda child: self.process_child_entry(child, filtered_children.index( - child), filtered_children, subchild_loops, parent_end), filtered_children)) - return processed_children - - def process_loop(self, loop): - child_loops = sorted(self.get_child_loops( - loop, self.hl.child_loops), key=lambda x: x[0]) - children = self.process_children( - child_loops, self.hl.subchild_loops, loop[3]) - return { - 'index_start': loop[0], - 'index_end': loop[3], - 'children': children or None - } - - def generate_summary(self): - return {str(loop[1]): self.process_loop(loop) for loop in self.hl.parent_loops} - - -""" -loop_manager = HierarchicalLoopManager(sample_data_837i_edited) -summary = loop_manager.summary - -output: -{'1': {'index_start': 7, - 'index_end': 35, - 'children': [{'index_start': 16, 'index_end': 35, 'children': None}]}, - '63': {'index_start': 41, - 'index_end': 69, - 'children': [{'index_start': 50, 'index_end': 69, 'children': None}]}, - '49': {'index_start': 75, - 'index_end': 103, - 'children': [{'index_start': 84, 'index_end': 103, 'children': None}]}, - '75': {'index_start': 109, - 'index_end': 138, - 'children': [{'index_start': 118, 'index_end': 138, 'children': None}]}, - '79': {'index_start': 144, - 'index_end': 186, - 'children': [{'index_start': 153, 'index_end': 159, 'children': None}, - {'index_start': 160, - 'index_end': 186, - 'children': [{'index_start': 167, 'index_end': 186, 'children': None}]}]}} -""" -""" -sample_data_837p = open("./sampledata/837/837p.txt", "rb").read().decode("utf-8").replace("\\n", "") -HierarchicalLoopManager(sample_data_837p).summary -{'1': {'index_start': 7, - 'index_end': 42, - 'children': [{'index_start': 12, 'index_end': 42, 'children': None}, - {'index_start': 27, 'index_end': 42, 'children': None}]}} -""" diff --git a/databricksx12/hl7.py b/databricksx12/hls/hl7.py similarity index 100% rename from databricksx12/hl7.py rename to databricksx12/hls/hl7.py diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py index b6d6638..005b3dd 100644 --- a/databricksx12/hls/loop.py +++ b/databricksx12/hls/loop.py @@ -40,7 +40,8 @@ class Loop(EDI): def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()): - super().__init__(data, delim_cls) + self.data = data + self.format_cls = delim_cls self.mapping = loop_mapping self._start_indexes = self._build_hierarchy_start_indexes() self.loop_hierarchy = self.build_hierarchy() @@ -65,6 +66,12 @@ def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()): def get_loop(self, pos, loop): return None if (temp := self.mapping.get_hl_code(loop)) is None else self.find_hl_codes(pos, temp) + # + # same as above, but only returns segment list + # + def get_loop_segments(self, pos, loop): + return [] if (temp := self.get_loop(pos, loop)) is None else self.data[temp['start_idx']:temp['end_idx']] + # # Build a complete hierarchical view of all HL segments start and end positions # diff --git a/databricksx12/hls/test-notebooks/claim-test.ipynb b/databricksx12/hls/test-notebooks/claim-test.ipynb deleted file mode 100644 index 481f558..0000000 --- a/databricksx12/hls/test-notebooks/claim-test.ipynb +++ /dev/null @@ -1,273 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['/Users/raven.mukherjee/edi-sol-accelerator/x12-edi-parser/databricksx12/hls/test-notebooks', '/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python312.zip', '/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12', '/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/lib-dynload', '', '/Users/raven.mukherjee/edi-sol-accelerator/edi-parse-env/lib/python3.12/site-packages']\n" - ] - } - ], - "source": [ - "import sys\n", - "print(sys.path)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"EDI.sender_tax_id\": \"ZZ\",\n", - " \"list\": [\n", - " {\n", - " \"FunctionalGroup.receiver\": \"123456789\",\n", - " \"FunctionalGroup.sender\": \"CLEARINGHOUSE\",\n", - " \"FunctionalGroup.transaction_datetime\": \"20180508:0833\",\n", - " \"FunctionalGroup.transaction_type\": \"222\",\n", - " \"list\": [\n", - " {\n", - " \"Transaction.transaction_type\": \"222\"\n", - " },\n", - " {\n", - " \"Transaction.transaction_type\": \"222\"\n", - " },\n", - " {\n", - " \"Transaction.transaction_type\": \"222\"\n", - " },\n", - " {\n", - " \"Transaction.transaction_type\": \"222\"\n", - " },\n", - " {\n", - " \"Transaction.transaction_type\": \"222\"\n", - " }\n", - " ]\n", - " }\n", - " ]\n", - "}\n" - ] - } - ], - "source": [ - "from databricksx12.edi import *\n", - "x = EDIManager(EDI(open(\"/Users/raven.mukherjee/solution_accelerators/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt\", \"rb\").read().decode(\"utf-8\")))\n", - "\n", - "import json\n", - "print(json.dumps(x.flatten(x.data), indent=4))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "sample_data_837i = open(\"/Users/raven.mukherjee/solution_accelerators/x12-edi-parser/sampledata/837/CC_837I_EDI.txt\", \"rb\").read().decode(\"utf-8\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# from databricksx12.edi import *\n", - "\n", - "# class extend_transaction(EDI):\n", - "# def __init__(self, data, delim_cls=AnsiX12Delim):\n", - "# super().__init__(data, delim_cls)\n", - "\n", - "# @property\n", - "# def full_transaction(self):\n", - "# transaction_start_indexes = [i for i, segment in enumerate(self.data) if segment.segment_name() == \"ST\"]\n", - "# transaction_end_indexes = [i for i, segment in enumerate(self.data) if segment.segment_name() == \"SE\"]\n", - "\n", - "# transactions = []\n", - "# for start, end in zip(transaction_start_indexes, transaction_end_indexes):\n", - "# transaction_segments = self.data[start:end+1]\n", - "# transactions.append(transaction_segments)\n", - "# return transactions\n", - "\n", - "# @property\n", - "# def claim_identifier(self):\n", - "# transactions = self.full_transaction\n", - "# claim_identifiers = []\n", - "\n", - "# for transaction_segments in transactions:\n", - "# claim_id = None\n", - "# for segment in transaction_segments:\n", - "# if segment.segment_name() == \"BHT\":\n", - "# claim_id = segment.element(3) #confirm\n", - "# break\n", - "# claim_identifiers.append(claim_id)\n", - "\n", - "# return claim_identifiers\n", - "\n", - "# @property\n", - "# def header_billing_amount(self):\n", - "# transactions = self.full_transaction\n", - "# billing_headers = []\n", - "\n", - "# for transaction_segments in transactions:\n", - "# for segment in transaction_segments:\n", - "# if segment.segment_name() == \"CLM\":\n", - "# bill_header = segment.element(1)\n", - "# billing_headers.append(bill_header)\n", - "# break # one CLM segment per transaction?\n", - "\n", - "# return billing_headers\n", - "\n", - "# @property\n", - "# def billed_amount(self):\n", - "# transactions = self.full_transaction\n", - "# billed_amounts = []\n", - "\n", - "# for transaction_segments in transactions:\n", - "# for segment in transaction_segments:\n", - "# if segment.segment_name() == \"CLM\":\n", - "# billed_amount = segment.element(2) # Billed amount is the second element\n", - "# billed_amounts.append(billed_amount)\n", - "# break\n", - "\n", - "# return billed_amounts\n", - " \n", - "# # @property\n", - "# # def subscriber(self):\n", - "# # transactions = self.full_transaction\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# # use raw EDI data\n", - "# edi_object = extend_transaction(sample_data_837i)\n", - "\n", - "# # call different vars\n", - "# transactions = edi_object.full_transaction\n", - "# claim_ids = edi_object.claim_identifier\n", - "# header = edi_object.header_billing_amount\n", - "# billed_amount = edi_object.billed_amount" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "# identify elements functionally!\n", - "\n", - "from databricksx12.edi import *\n", - "\n", - "class extend_transaction(EDI):\n", - " def __init__(self, data, delim_cls=AnsiX12Delim):\n", - " super().__init__(data, delim_cls)\n", - "\n", - " # Use map and lambda to populate billed amounts and subscribers\n", - " self.billed_amounts = list(map(lambda x: x.element(2), self.segments_by_name(\"CLM\")))\n", - " self.subscribers = list(map(lambda x: x.element(4), self.segments_by_name(\"SBR\")))\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "sample_data_chpw_claimdata = open(\"/Users/raven.mukherjee/edi-sol-accelerator/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt\", \"rb\").read().decode(\"utf-8\")\n", - "# use raw EDI data\n", - "edi_object = extend_transaction(sample_data_chpw_claimdata)\n", - "billed_amounts = edi_object.billed_amounts\n", - "subscribers = edi_object.subscribers\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20', '50.1', '11.64', '234', '20']" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "billed_amounts" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['COMMUNITY HLTH PLAN OF WASH',\n", - " 'COMMUNITY HLTH PLAN OF WASH',\n", - " 'COMMUNITY HLTH PLAN OF WASH',\n", - " 'COMMUNITY HLTH PLAN OF WASH',\n", - " 'COMMUNITY HLTH PLAN OF WASH']" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "subscribers" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "edi-parse-kernel", - "language": "python", - "name": "edi-parse-env" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/tests/test_loop.py b/tests/test_loop.py index a831c47..e0dcd13 100644 --- a/tests/test_loop.py +++ b/tests/test_loop.py @@ -38,16 +38,35 @@ def test_loop_hierarchy(self): assert (TestLoop.loop.find_hl_codes(22, '22')['start_idx'] == 12) assert (TestLoop.loop.find_hl_codes(37, '22')['start_idx'] == 27) + # + # Test traversing heirarchy to find correct loops + # def test_loop_hierarchy_child_codes(self): data = open("./sampledata/837/CHPW_Claimdata_edited.txt.tmp", "rb").read().decode("utf-8") loop = Loop(data) assert(loop.find_hl_codes(174, '22')['start_idx'] == 160) + # + # Test getting all segments within a loop + # + def test_get_segments(self): + data = open("./sampledata/837/CHPW_Claimdata_edited.txt.tmp", "rb").read().decode("utf-8") + loop = Loop(data) + assert(loop.get_loop(174, '2000A')['start_idx'] == 144 and loop.get_loop(174, '2000A')['end_idx'] == 153) + assert( len(loop.get_loop_segments(174, '2000A')) == 153 - 144) + assert(loop.get_loop_segments(174, '2000A')[0].element(0) == "HL") + assert( len([x.element(0) for x in loop.get_loop_segments(174, '2000A') if x.element(0) == "HL"]) == 1) + + # + # Test loop start places by position using loop name search + # def test_loop_search_by_name(self): assert(TestLoop.loop.get_loop(22, "2000A")['start_idx'] == 7) assert(TestLoop.loop.get_loop(22, "2000B")['start_idx'] == 12) assert(TestLoop.loop.get_loop(37, "2000A")['start_idx'] == 7) assert(TestLoop.loop.get_loop(37, "2000B")['start_idx'] == 27) + + if __name__ == '__main__': unittest.main() From 6105e3b012a081313ae34c865bd78fba10c5113a Mon Sep 17 00:00:00 2001 From: Raven Date: Fri, 10 May 2024 22:49:32 -0400 Subject: [PATCH 21/46] medical claim builder --- databricksx12/edi.py | 2 + databricksx12/hls/claim.py | 109 +++++++++--------- databricksx12/hls/support_classes/__init__.py | 0 .../hls/support_classes/identities.py | 73 ++++++++++++ setup.py | 3 +- 5 files changed, 129 insertions(+), 58 deletions(-) create mode 100644 databricksx12/hls/support_classes/__init__.py create mode 100644 databricksx12/hls/support_classes/identities.py diff --git a/databricksx12/edi.py b/databricksx12/edi.py index 550fe83..905c33d 100644 --- a/databricksx12/edi.py +++ b/databricksx12/edi.py @@ -124,6 +124,8 @@ def toRows(self): def header(self): return self.data[0] + + class Segment(): # diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 8cada93..2e15487 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -1,6 +1,7 @@ -from databricksx12.edi import * -from databricksx12.hls.loop import * -import itertools +from databricksx12.edi import EDI, AnsiX12Delim +from databricksx12.hls.loop import Loop +from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity +from typing import List, Dict # @@ -10,77 +11,68 @@ class MedicalClaim(EDI): - def __init__(self, - sender_loop = [], - receiver_loop = [], - billing_loop = [], - subscriber_loop = [], - patient_loop = [], - claim_loop = [], - sl_loop = [] #service line loop - ): - self.sender_loop = sender_loop + def __init__( + self, + sender_loop: List = [], + receiver_loop: List = [], + billing_loop: List = [], + subscriber_loop: List = [], + patient_loop: List = [], + claim_loop: List = [], + sl_loop: List = [], # service line loop + ): + self.sender_loop = sender_loop # is this a loop or does it only occur once in a document? self.receiver_loop = receiver_loop self.billing_loop = billing_loop self.subscriber_loop = subscriber_loop self.patient_loop = patient_loop self.claim_loop = claim_loop self.sl_loop = sl_loop - + self.build() - def billing_loop(self): - return { - "billing_prvdr_name": "TODO", - "billing_npi": "TODO", - "billing_street_address": "TODO", - "billing_zip_cd": "TODO", - "billing_state_cd": "TODO" - } - - def subscriber_loop(self): - return { - "TODO": "TODO" - } + def _populate_billing_loop(self) -> Dict[str, str]: + return BillingIdentity(self.billing_loop) + + def _populate_subscriber_loop(self) -> Dict[str, str]: + return SubscriberIdentity(self.subscriber_loop) # # # - def patient_loop(self): - #Note - if this doesn't exist then its the same as subscriber loop - return { - "TODO": "TODO" - } - - def toJson(self): - { - **self.patient_loop(), - **self.subscriber_loop(), - **self.billing_loop() - } + def _populate_patient_loop(self) -> Dict[str, str]: + # Note - if this doesn't exist then its the same as subscriber loop + # Note to include in loop: information about subscriber/dependent relationship is marked by Element 2 + # 01 = Spouse; 18 = Self; 19 = Child; G8 = Other + return PatientIdentity(self.patient_loop) - - #not sure if this should be here or not, but you get the idea - def build(): - self.billing_info = self.billing_loop() - self.subscriber_info = self.subscriber_loop() - self.patient_info = self.subscriber_loop() if self.patient_loop = [] else self.patient_loop() - + def toJson(self): + {**self.patient_loop(), **self.subscriber_loop(), **self.billing_loop()} + + # not sure if this should be here or not, but you get the idea + def build(self) -> None: + self.billing_info = self._populate_billing_loop() + self.subscriber_info = self._populate_subscriber_loop() + self.patient_info = ( + self._populate_subscriber_loop() if self.patient_loop == [] else self._populate_patient_loop() + ) class Claim837i(MedicalClaim): NAME = "837I" - + # sender / receiver ? # Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf + class Claim837p(MedicalClaim): NAME = "837P" + class Claim835(MedicalClaim): - + NAME = "835" @@ -88,6 +80,7 @@ class Claim835(MedicalClaim): # Base claim builder (transaction -> 1 or more claims) # + class ClaimBuilder(EDI): # # Given claim type (837i, 837p, etc), segments, and delim class, build claim level classes @@ -97,7 +90,6 @@ def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim): self.format_cls = delim_cls self.trnx_cls = trnx_type_cls self.loop = Loop(trnx_data) - # # Builds a claim object from @@ -109,13 +101,13 @@ def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim): # def build_claim(self, clm_segment, idx): return self.trnx_cls( - sender_loop = [], - receiver_loop = [], - billing_loop = self.loop.get_loop_segments(idx, "2000A"), - subscriber_loop = self.loop.get_loop_segments(idx, "2000B"), - patient_loop = self.loop.get_loop_segments(idx, "2000C"), - claim_loop = [], - sl_loop = [] #service line loop + sender_loop=[], + receiver_loop=[], # assuming this is true of all claim types check! + billing_loop=self.loop.get_loop_segments(idx, "2000A"), + subscriber_loop=self.loop.get_loop_segments(idx, "2000B"), + patient_loop=self.loop.get_loop_segments(idx, "2000C"), + claim_loop=[], + sl_loop=[], # service line loop ) # @@ -123,7 +115,10 @@ def build_claim(self, clm_segment, idx): # @return a list of Claim for each "clm" segment # def build(self): - return [self.build_claim(seg, i) for i, seg in self.segments_by_name_index("CLM")] + return [ + self.build_claim(seg, i) for i, seg in self.segments_by_name_index("CLM") + ] + """ sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8") diff --git a/databricksx12/hls/support_classes/__init__.py b/databricksx12/hls/support_classes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py new file mode 100644 index 0000000..ffc41ca --- /dev/null +++ b/databricksx12/hls/support_classes/identities.py @@ -0,0 +1,73 @@ +from databricksx12.edi import Segment +from typing import List + + +class Identity: + def __init__(self, segments: List[Segment]): + self.name: str = None + self.street: str = None + self.type: str = None + self.city: str = None + self.state: str = None + self.zip: str = None + self.build(segments) + + def build(self, billing_loop: List[Segment]): + for segment in billing_loop: + if segment.element(0) == 'N3': + self.street = segment.element(1) + elif segment.element(0) == 'N4': + self.city = segment.element(1) + self.state = segment.element(2) + self.zip = segment.element(3) + + def to_dict(self): + return {k: v for k, v in self.__dict__.items() if v is not None} + + +class BillingIdentity(Identity): + def __init__(self, billing_segments: List[Segment]): + super().__init__(billing_segments) + self.npi = None + self.build_billing(billing_segments) + + def build_billing(self, billing_loop: List[Segment]): + for segment in billing_loop: + if segment.element(0) == 'NM1': + if segment.element(1) == '85': # Hardcoded to 85 for Billing Providers + self.type = 'Organization' if segment.element(2) == '2' else 'Individual' + self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + self.npi = segment.element(9) + + +class SubscriberIdentity(Identity): + def __init__(self, subscriber_segments: List[Segment]): + super().__init__(subscriber_segments) + self.id_code = None + self.relationship_to_insured = None + self.build_subscriber(subscriber_segments) + + def build_subscriber(self, subscriber_loop: List[Segment]): + for segment in subscriber_loop: + if segment.element(0) == 'NM1': + if segment.element(1) == 'IL': # Hardcoded to IL for Insured + self.type = 'Entity' if segment.element(2) == '2' else 'Individual' + self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + self.id_code = segment.element(9) + elif segment.element(0) == 'SBR': + self.relationship_to_insured = 'Self' if segment.element(2) == '18' else 'Dependent' # information about subscriber/dependent 01 = Spouse; 18 = Self; 19 = Child; G8 = Other + + +class PatientIdentity(Identity): + def __init__(self, patient_segments: List[Segment]): + super().__init__(patient_segments) + self.id_code = None + self.build_patient(patient_segments) + + def build_patient(self, patient_loop: List[Segment]): + for segment in patient_loop: + if segment.element(0) == 'NM1': + if segment.element(1) == 'QC': # Hardcoded to QC for Patient + self.type = 'Patient' + self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + diff --git a/setup.py b/setup.py index f087d59..b95d690 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,8 @@ setup( name="databricksx12", version="0.0.1", - python_requires='>=3.9.*', + # python_requires='>=3.9.*', + python_requires='>=3.9', author="", author_email="aaron.zavora@databricks.com", description= "Parser for handling x12 EDI transactions in Spark", From 4cca5f61c5b237b13f399f9887593eacc8d06ac1 Mon Sep 17 00:00:00 2001 From: Raven Date: Sun, 12 May 2024 23:48:10 -0400 Subject: [PATCH 22/46] added claim and service lines to claim builder --- databricksx12/hls/claim.py | 8 ++++---- databricksx12/hls/loop.py | 39 +++++++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 2e15487..1cb713c 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -101,13 +101,13 @@ def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim): # def build_claim(self, clm_segment, idx): return self.trnx_cls( - sender_loop=[], - receiver_loop=[], # assuming this is true of all claim types check! + sender_loop=self.loop.get_sender(), + receiver_loop=self.loop.get_receiver(), # assuming this is true of all claim types check! billing_loop=self.loop.get_loop_segments(idx, "2000A"), subscriber_loop=self.loop.get_loop_segments(idx, "2000B"), patient_loop=self.loop.get_loop_segments(idx, "2000C"), - claim_loop=[], - sl_loop=[], # service line loop + claim_loop=self.loop.get_claim_loop(idx), + sl_loop=self.loop.get_service_line_loop(idx), # service line loop ) # diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py index 005b3dd..91c76e6 100644 --- a/databricksx12/hls/loop.py +++ b/databricksx12/hls/loop.py @@ -19,7 +19,7 @@ def __init__(self, mappings=None): '23': { 'loop name': 'Patient', 'loop': '2000C' - } + }, } # @@ -162,6 +162,43 @@ def determine_previous_hl(self, pos): filter(lambda x: x[1] < pos, self._start_indexes)) except: return None #when there is no preceding hl segment + + # + # Determine claim loop: starts at the clm index and ends at LX segment, or CLM segment, or end of data + # + def get_claim_loop(self, clm_idx): + sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX")))) + clm_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("CLM")))) + + if sl_start_indexes: + clm_end_idx = min(sl_start_indexes) + elif clm_indexes: + clm_end_idx = min(clm_indexes + [len(self.data)]) + else: + clm_end_idx = len(self.data) + + return self.data[clm_idx:clm_end_idx] + + # + # fetch the indices of LX and CLM segments that are beyond the current clm index + # + def get_service_line_loop(self, clm_idx): + sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX")))) + tx_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("SE")))) + # Determine the end of the service line loop + if sl_start_indexes: + sl_end_idx = min(tx_end_indexes + [len(self.data)]) + return self.data[min(sl_start_indexes):sl_end_idx] + return [] + + def get_sender(self): + return [x.element(2) for i, x in self.segments_by_name_index("GS")] # same as ISA06 + + def get_receiver(self): + return [x.element(3) for i, x in self.segments_by_name_index("GS")] + + + """ sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8") From 6faf2cd51276d8bc01f4c41c34b060b2312ce113 Mon Sep 17 00:00:00 2001 From: Raven Date: Mon, 13 May 2024 15:14:41 -0400 Subject: [PATCH 23/46] filled in claim lines in claim build --- databricksx12/hls/claim.py | 16 +++++++++----- databricksx12/hls/loop.py | 9 ++------ .../hls/support_classes/identities.py | 22 ++++++++++++++++--- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 1cb713c..d39d4b2 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -1,6 +1,6 @@ from databricksx12.edi import EDI, AnsiX12Delim from databricksx12.hls.loop import Loop -from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity +from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity, ClaimIdentity from typing import List, Dict @@ -8,7 +8,6 @@ # Base claim class # - class MedicalClaim(EDI): def __init__( @@ -21,7 +20,7 @@ def __init__( claim_loop: List = [], sl_loop: List = [], # service line loop ): - self.sender_loop = sender_loop # is this a loop or does it only occur once in a document? + self.sender_loop = sender_loop self.receiver_loop = receiver_loop self.billing_loop = billing_loop self.subscriber_loop = subscriber_loop @@ -31,6 +30,7 @@ def __init__( self.build() + def _populate_billing_loop(self) -> Dict[str, str]: return BillingIdentity(self.billing_loop) @@ -45,9 +45,12 @@ def _populate_patient_loop(self) -> Dict[str, str]: # Note to include in loop: information about subscriber/dependent relationship is marked by Element 2 # 01 = Spouse; 18 = Self; 19 = Child; G8 = Other return PatientIdentity(self.patient_loop) + + def _populate_claim_loop(self) -> Dict[str, str]: + return ClaimIdentity(self.claim_loop) def toJson(self): - {**self.patient_loop(), **self.subscriber_loop(), **self.billing_loop()} + {**self.claim_loop(), **self.patient_loop(), **self.subscriber_loop(), **self.billing_loop()} # not sure if this should be here or not, but you get the idea def build(self) -> None: @@ -56,6 +59,7 @@ def build(self) -> None: self.patient_info = ( self._populate_subscriber_loop() if self.patient_loop == [] else self._populate_patient_loop() ) + self.claim_info = self._populate_claim_loop() class Claim837i(MedicalClaim): @@ -101,8 +105,8 @@ def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim): # def build_claim(self, clm_segment, idx): return self.trnx_cls( - sender_loop=self.loop.get_sender(), - receiver_loop=self.loop.get_receiver(), # assuming this is true of all claim types check! + sender_loop=[], + receiver_loop=[], # assuming this is true of all claim types check! billing_loop=self.loop.get_loop_segments(idx, "2000A"), subscriber_loop=self.loop.get_loop_segments(idx, "2000B"), patient_loop=self.loop.get_loop_segments(idx, "2000C"), diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py index 91c76e6..3e063e1 100644 --- a/databricksx12/hls/loop.py +++ b/databricksx12/hls/loop.py @@ -45,6 +45,7 @@ def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()): self.mapping = loop_mapping self._start_indexes = self._build_hierarchy_start_indexes() self.loop_hierarchy = self.build_hierarchy() + """ loop_hierarchy = { unique_id : { start_idx : "" @@ -190,14 +191,8 @@ def get_service_line_loop(self, clm_idx): sl_end_idx = min(tx_end_indexes + [len(self.data)]) return self.data[min(sl_start_indexes):sl_end_idx] return [] - - def get_sender(self): - return [x.element(2) for i, x in self.segments_by_name_index("GS")] # same as ISA06 - - def get_receiver(self): - return [x.element(3) for i, x in self.segments_by_name_index("GS")] - + """ diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index ffc41ca..56f9dcd 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -12,8 +12,8 @@ def __init__(self, segments: List[Segment]): self.zip: str = None self.build(segments) - def build(self, billing_loop: List[Segment]): - for segment in billing_loop: + def build(self, loop: List[Segment]): + for segment in loop: if segment.element(0) == 'N3': self.street = segment.element(1) elif segment.element(0) == 'N4': @@ -61,7 +61,6 @@ def build_subscriber(self, subscriber_loop: List[Segment]): class PatientIdentity(Identity): def __init__(self, patient_segments: List[Segment]): super().__init__(patient_segments) - self.id_code = None self.build_patient(patient_segments) def build_patient(self, patient_loop: List[Segment]): @@ -71,3 +70,20 @@ def build_patient(self, patient_loop: List[Segment]): self.type = 'Patient' self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + +class ClaimIdentity(Identity): + def __init__(self, claim_segments: List[Segment]): + super().__init__(claim_segments) + self.id_code = None + self.facility_code = None + self.claim_amount = None + self.build_claim_lines(claim_segments) + + def build_claim_lines(self, claim_loop: List[Segment]): + for segment in claim_loop: + if segment.element(0) == 'CLM': + self.id_code = segment.element(1) # submitter's identifier + self.claim_amount = segment.element(2) + if segment.element(5).split(':')[1] == 'B': + self.facility_code = 'Outpatient Hospital' if segment.element(3).split(':')[0]== 22 else 'Other' + \ No newline at end of file From 18a73d4bc90cb941212ee73219ce26032188991c Mon Sep 17 00:00:00 2001 From: Raven Date: Mon, 13 May 2024 15:48:00 -0400 Subject: [PATCH 24/46] fixed a claim line element --- databricksx12/hls/support_classes/identities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index 56f9dcd..8c03336 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -85,5 +85,5 @@ def build_claim_lines(self, claim_loop: List[Segment]): self.id_code = segment.element(1) # submitter's identifier self.claim_amount = segment.element(2) if segment.element(5).split(':')[1] == 'B': - self.facility_code = 'Outpatient Hospital' if segment.element(3).split(':')[0]== 22 else 'Other' + self.facility_code = 'Outpatient Hospital' if segment.element(5).split(':')[0]== 22 else 'Other' \ No newline at end of file From 797d6e44c96fbce41be0094792ab07ca880ba05a Mon Sep 17 00:00:00 2001 From: Raven Date: Tue, 14 May 2024 17:28:46 -0400 Subject: [PATCH 25/46] populated submitter and receiver info within claim --- databricksx12/hls/claim.py | 26 +++++---- databricksx12/hls/loop.py | 12 ++++- .../hls/support_classes/identities.py | 53 ++++++++++++++++++- 3 files changed, 77 insertions(+), 14 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index d39d4b2..03ab1d7 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -1,6 +1,6 @@ from databricksx12.edi import EDI, AnsiX12Delim from databricksx12.hls.loop import Loop -from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity, ClaimIdentity +from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity, ClaimIdentity, SubmitterIdentity, ReceiverIdentity from typing import List, Dict @@ -12,16 +12,14 @@ class MedicalClaim(EDI): def __init__( self, - sender_loop: List = [], - receiver_loop: List = [], + sender_receiver_loop: List = [], billing_loop: List = [], subscriber_loop: List = [], patient_loop: List = [], claim_loop: List = [], - sl_loop: List = [], # service line loop + sl_loop: List = [], ): - self.sender_loop = sender_loop - self.receiver_loop = receiver_loop + self.sender_receiver_loop = sender_receiver_loop # extracted together self.billing_loop = billing_loop self.subscriber_loop = subscriber_loop self.patient_loop = patient_loop @@ -30,7 +28,12 @@ def __init__( self.build() - + def _populate_submitter_loop(self) -> Dict[str, str]: + return SubmitterIdentity(self.sender_receiver_loop) + + def _populate_receiver_loop(self) -> Dict[str, str]: + return ReceiverIdentity(self.sender_receiver_loop) + def _populate_billing_loop(self) -> Dict[str, str]: return BillingIdentity(self.billing_loop) @@ -48,12 +51,15 @@ def _populate_patient_loop(self) -> Dict[str, str]: def _populate_claim_loop(self) -> Dict[str, str]: return ClaimIdentity(self.claim_loop) + def toJson(self): - {**self.claim_loop(), **self.patient_loop(), **self.subscriber_loop(), **self.billing_loop()} + {**self.sender_receiver_loop(), **self.claim_loop(), **self.patient_loop(), **self.subscriber_loop(), **self.billing_loop()} # not sure if this should be here or not, but you get the idea def build(self) -> None: + self.submitter_info = self._populate_submitter_loop() + self.receiver_info = self._populate_receiver_loop() self.billing_info = self._populate_billing_loop() self.subscriber_info = self._populate_subscriber_loop() self.patient_info = ( @@ -62,6 +68,7 @@ def build(self) -> None: self.claim_info = self._populate_claim_loop() + class Claim837i(MedicalClaim): NAME = "837I" @@ -105,8 +112,7 @@ def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim): # def build_claim(self, clm_segment, idx): return self.trnx_cls( - sender_loop=[], - receiver_loop=[], # assuming this is true of all claim types check! + sender_receiver_loop=self.loop.get_submitter_receiver_loop(idx), billing_loop=self.loop.get_loop_segments(idx, "2000A"), subscriber_loop=self.loop.get_loop_segments(idx, "2000B"), patient_loop=self.loop.get_loop_segments(idx, "2000C"), diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py index 3e063e1..8654b22 100644 --- a/databricksx12/hls/loop.py +++ b/databricksx12/hls/loop.py @@ -186,13 +186,21 @@ def get_claim_loop(self, clm_idx): def get_service_line_loop(self, clm_idx): sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX")))) tx_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("SE")))) - # Determine the end of the service line loop if sl_start_indexes: sl_end_idx = min(tx_end_indexes + [len(self.data)]) return self.data[min(sl_start_indexes):sl_end_idx] return [] - + def get_submitter_receiver_loop(self, clm_idx): + bht_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx, self.segments_by_name_index("BHT")))) + bht_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx and x[1].element(3) == '20', self.segments_by_name_index("HL")))) + if bht_start_indexes: + sub_rec_start_idx = max(bht_start_indexes) + sub_rec_end_idx = max(bht_end_indexes) + + return self.data[sub_rec_start_idx:sub_rec_end_idx] + return [] + """ diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index 8c03336..98dacd4 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -51,7 +51,7 @@ def build_subscriber(self, subscriber_loop: List[Segment]): for segment in subscriber_loop: if segment.element(0) == 'NM1': if segment.element(1) == 'IL': # Hardcoded to IL for Insured - self.type = 'Entity' if segment.element(2) == '2' else 'Individual' + self.type = 'Organization' if segment.element(2) == '2' else 'Individual' self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) self.id_code = segment.element(9) elif segment.element(0) == 'SBR': @@ -86,4 +86,53 @@ def build_claim_lines(self, claim_loop: List[Segment]): self.claim_amount = segment.element(2) if segment.element(5).split(':')[1] == 'B': self.facility_code = 'Outpatient Hospital' if segment.element(5).split(':')[0]== 22 else 'Other' - \ No newline at end of file + +class SubmitterIdentity(Identity): + def __init__(self, submitter_segments: List[Segment]): + super().__init__(submitter_segments) + self.tax_id = None + self.contact_name = None + self.contacts = [] + self.build_submitter_lines(submitter_segments) + + def build_submitter_lines(self, submitter_loop: List[Segment]): + contact_methods = { + 'EM': 'Email', + 'TE': 'Telephone', + 'FX': 'Fax' + } + for segment in submitter_loop: + if segment.element(0) == 'NM1' and segment.element(1) == '41': + self.type = 'Organization' if segment.element(2) == '2' else 'Individual' + self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + self.tax_id = segment.element(9) # id + elif segment.element(0) == 'PER': + self.contact_name = segment.element(2) + contact = { + 'contact_method': contact_methods.get(segment.element(3), 'Unknown method'), + 'contact_number': segment.element(4) + } + # Add additional contact details if present + if segment.element(5) in contact_methods: + contact['second_contact_method'] = contact_methods.get(segment.element(5), 'Unknown method') + contact['second_contact_number'] = segment.element(6) + + if segment.element(7) in contact_methods: + contact['other_contact_method'] = contact_methods.get(segment.element(7), 'Unknown method') + contact['other_contact_number'] = segment.element(8) + + self.contacts.append(contact) + + +class ReceiverIdentity(Identity): + def __init__(self, receiver_segments: List[Segment]): + super().__init__(receiver_segments) + self.id_code = None + self.build_receiver_lines(receiver_segments) + + def build_receiver_lines(self, receiver_loop: List[Segment]): + for segment in receiver_loop: + if segment.element(0) == 'NM1' and segment.element(1) == '40': + self.type = 'Organization' if segment.element(2) == '2' else 'Individual' + self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + self.id_code = segment.element(9) # id \ No newline at end of file From 040dcdbdd8dc8048990f87e2bcb4e5bd8064cee4 Mon Sep 17 00:00:00 2001 From: Raven Date: Tue, 14 May 2024 22:29:36 -0400 Subject: [PATCH 26/46] service lines professional and institutional --- databricksx12/hls/claim.py | 6 +- .../hls/support_classes/identities.py | 99 ++++++++++++++----- 2 files changed, 81 insertions(+), 24 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 03ab1d7..dd248c1 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -1,6 +1,6 @@ from databricksx12.edi import EDI, AnsiX12Delim from databricksx12.hls.loop import Loop -from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity, ClaimIdentity, SubmitterIdentity, ReceiverIdentity +from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity, ClaimIdentity, SubmitterIdentity, ReceiverIdentity, ServiceIdentity from typing import List, Dict @@ -51,6 +51,9 @@ def _populate_patient_loop(self) -> Dict[str, str]: def _populate_claim_loop(self) -> Dict[str, str]: return ClaimIdentity(self.claim_loop) + + def _populate_sl_loop(self) -> Dict[str, str]: + return ServiceIdentity(self.sl_loop) def toJson(self): @@ -66,6 +69,7 @@ def build(self) -> None: self._populate_subscriber_loop() if self.patient_loop == [] else self._populate_patient_loop() ) self.claim_info = self._populate_claim_loop() + self.sl_info = self._populate_sl_loop() diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index 98dacd4..6377da6 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -82,10 +82,14 @@ def __init__(self, claim_segments: List[Segment]): def build_claim_lines(self, claim_loop: List[Segment]): for segment in claim_loop: if segment.element(0) == 'CLM': + # TODO Inst/Prof self.id_code = segment.element(1) # submitter's identifier self.claim_amount = segment.element(2) - if segment.element(5).split(':')[1] == 'B': + if segment.element(5).split(':')[1] == 'B': # professional claims self.facility_code = 'Outpatient Hospital' if segment.element(5).split(':')[0]== 22 else 'Other' + # TODO: additional provider lines? + + class SubmitterIdentity(Identity): def __init__(self, submitter_segments: List[Segment]): @@ -96,32 +100,38 @@ def __init__(self, submitter_segments: List[Segment]): self.build_submitter_lines(submitter_segments) def build_submitter_lines(self, submitter_loop: List[Segment]): + for segment in submitter_loop: + if segment.element(0) == 'NM1'and segment.element(1) == '41': + self.process_nm1_segment(segment) + elif segment.element(0) == 'PER': + self.process_per_segment(segment) + + def process_nm1_segment(self, segment): + self.type = 'Organization' if segment.element(2) == '2' else 'Individual' + self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + self.tax_id = segment.element(9) # id + + def process_per_segment(self, segment): + self.contact_name = segment.element(2) contact_methods = { 'EM': 'Email', 'TE': 'Telephone', 'FX': 'Fax' } - for segment in submitter_loop: - if segment.element(0) == 'NM1' and segment.element(1) == '41': - self.type = 'Organization' if segment.element(2) == '2' else 'Individual' - self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) - self.tax_id = segment.element(9) # id - elif segment.element(0) == 'PER': - self.contact_name = segment.element(2) - contact = { - 'contact_method': contact_methods.get(segment.element(3), 'Unknown method'), - 'contact_number': segment.element(4) - } - # Add additional contact details if present - if segment.element(5) in contact_methods: - contact['second_contact_method'] = contact_methods.get(segment.element(5), 'Unknown method') - contact['second_contact_number'] = segment.element(6) - - if segment.element(7) in contact_methods: - contact['other_contact_method'] = contact_methods.get(segment.element(7), 'Unknown method') - contact['other_contact_number'] = segment.element(8) - - self.contacts.append(contact) + contact = { + 'contact_method': contact_methods.get(segment.element(3), 'Unknown method'), + 'contact_number': segment.element(4) + } + # Add additional contact details if present + if segment.element(5) in contact_methods: + contact['contact_method_2'] = contact_methods.get(segment.element(5), 'Unknown method') + contact['contact_number_2'] = segment.element(6) + + if segment.element(7) in contact_methods: + contact['contact_method_3'] = contact_methods.get(segment.element(7), 'Unknown method') + contact['contact_number_3'] = segment.element(8) + + self.contacts.append(contact) class ReceiverIdentity(Identity): @@ -135,4 +145,47 @@ def build_receiver_lines(self, receiver_loop: List[Segment]): if segment.element(0) == 'NM1' and segment.element(1) == '40': self.type = 'Organization' if segment.element(2) == '2' else 'Individual' self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) - self.id_code = segment.element(9) # id \ No newline at end of file + self.id_code = segment.element(9) # id + + +class ServiceIdentity(Identity): + def __init__(self, sl_segments: List[Segment]): + super().__init__(sl_segments) + self.services = { + 'Professional': [], + 'Institutional': [] + } + self.build_sl_lines(sl_segments) + + def build_sl_lines(self, sl_loop: List[Segment]): + for segment in sl_loop: + if segment.element(0) == 'SV1': # Professional service + service = self.parse_professional_service(segment) + self.services['Professional'].append(service) + elif segment.element(0) == 'SV2': # Institutional service + service = self.parse_institutional_service(segment) + self.services['Institutional'].append(service) + + def parse_professional_service(self, segment: Segment): + service_type, procedure_code = segment.element(1).split(':')[0:2] #assuming 7 elements but choosing first two + return { + 'Type of service/claim': 'Professional', + 'Type': service_type, + 'Procedure Code': procedure_code, + 'Procedure Amount': segment.element(2) + } + + def parse_institutional_service(self, segment: Segment): + revenue_code = segment.element(1) + service_type, procedure_code = segment.element(2).split(':')[0:2] #assuming 7 elements but choosing first two + return { + 'Type of service/claim': 'Institutional', + 'Revenue Code': revenue_code, + 'Type': service_type, + 'Procedure Code': procedure_code, + 'Procedure Amount': segment.element(3) + } + + + + From b654d9383097d81da54960a4e3d1803a1dc7d028 Mon Sep 17 00:00:00 2001 From: Aaron Zavora Date: Mon, 20 May 2024 11:20:15 -0400 Subject: [PATCH 27/46] Update README.md --- README.md | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8a27233..4be7046 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,8 @@ pip install git+https://github.com/databricks-industry-solutions/x12-edi-parser Default format used is AnsiX12 (* as a delim and ~ as segment separator) ```python -from databricksx12.format import * -from databricksx12.edi import * +from databricksx12 import * + ediFormat = AnsiX12Delim #specifying formats of data, ansi is also the default if nothing is specified df = spark.read.text("sampledata/837/*", wholetext = True) @@ -92,7 +92,8 @@ from pyspark.sql.functions import input_file_name #### Parsing Healthcare Transactions ```python -from databricksx12.hls.healthcare import * +from databricksx12 import * +from databricksx12.hls import * hm = HealthcareManager() x = EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8")) @@ -101,6 +102,34 @@ hm.from_edi(x) #[, , , , ] one_claim = hm.from_edi(x)[0] + +#print a json representation of a claim +import json +print(json.dumps(one_claim.toJson(), indent=4)) +""" +{ + "submitter": { + "name": "CLEARINGHOUSE LLC", + "type": "Organization", + "tax_id": "987654321", + "contact_name": "CLEARINGHOUSE CLIENT SERVICES", + "contacts": [ + { + "contact_method": "Telephone", + "contact_number": "8005551212", + "contact_method_2": "Fax", + "contact_number_2": "8005551212" + } + ] + }, + "reciever": { + "name": "123456789", + "type": "Organization", + "id_code": "CHPWA" + }, + "subscriber": {... +""" +#print raw EDI Segments print("\n".join([y.data for y in one_claim.data])) #Print one claim to look at the segments of it """ BHT*0019*00*7349063984*20180508*0833*CH From 1ee215d363785cecc514011ce51c58f840eba966 Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Mon, 20 May 2024 11:23:36 -0400 Subject: [PATCH 28/46] init --- databricksx12/__init__.py | 5 +++ databricksx12/hls/__init__.py | 3 ++ databricksx12/hls/claim.py | 74 +++++++++++++++++++++++++++++++++-- databricksx12/hls/loop.py | 38 ------------------ 4 files changed, 78 insertions(+), 42 deletions(-) create mode 100644 databricksx12/hls/__init__.py diff --git a/databricksx12/__init__.py b/databricksx12/__init__.py index 8b13789..bbfa1e5 100644 --- a/databricksx12/__init__.py +++ b/databricksx12/__init__.py @@ -1 +1,6 @@ +from .edi import * +from .format import * +from .functional import * +from .transaction import * + diff --git a/databricksx12/hls/__init__.py b/databricksx12/hls/__init__.py new file mode 100644 index 0000000..4785b04 --- /dev/null +++ b/databricksx12/hls/__init__.py @@ -0,0 +1,3 @@ +from .healthcare import * +from .claim import * +from .loop import * diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index dd248c1..4804576 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -56,8 +56,36 @@ def _populate_sl_loop(self) -> Dict[str, str]: return ServiceIdentity(self.sl_loop) + """ + Overall Asks + - Coordination of Benefits flag + - Patient / Subscriber same person flag + + Claim needs + - principal ICD10 diagnosis code + - other ICD10 diagnosis codes as an array + - hcfa place of service + - claim id? + - admission type code + - facility type code + - claim frequency code + + Claim line needs + - This should return an array + + Servicing provider needs + - TBD + """ def toJson(self): - {**self.sender_receiver_loop(), **self.claim_loop(), **self.patient_loop(), **self.subscriber_loop(), **self.billing_loop()} + return { + **{'submitter': self.submitter_info.to_dict()}, + **{'reciever': self.receiver_info.to_dict()}, + **{'subscriber': self.subscriber_info.to_dict()}, + **{'patient': self.patient_info.to_dict()}, + **{'billing_provider': self.billing_info.to_dict()}, + **{'claim_header': self.claim_info.to_dict()}, + **{'claim_lines': self.sl_info.to_dict()} + } # not sure if this should be here or not, but you get the idea def build(self) -> None: @@ -116,14 +144,52 @@ def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim): # def build_claim(self, clm_segment, idx): return self.trnx_cls( - sender_receiver_loop=self.loop.get_submitter_receiver_loop(idx), + sender_receiver_loop=self.get_submitter_receiver_loop(idx), billing_loop=self.loop.get_loop_segments(idx, "2000A"), subscriber_loop=self.loop.get_loop_segments(idx, "2000B"), patient_loop=self.loop.get_loop_segments(idx, "2000C"), - claim_loop=self.loop.get_claim_loop(idx), - sl_loop=self.loop.get_service_line_loop(idx), # service line loop + claim_loop=self.get_claim_loop(idx), + sl_loop=self.get_service_line_loop(idx), # service line loop ) + # + # Determine claim loop: starts at the clm index and ends at LX segment, or CLM segment, or end of data + # + def get_claim_loop(self, clm_idx): + sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX")))) + clm_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("CLM")))) + + if sl_start_indexes: + clm_end_idx = min(sl_start_indexes) + elif clm_indexes: + clm_end_idx = min(clm_indexes + [len(self.data)]) + else: + clm_end_idx = len(self.data) + + return self.data[clm_idx:clm_end_idx] + + # + # fetch the indices of LX and CLM segments that are beyond the current clm index + # + def get_service_line_loop(self, clm_idx): + sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX")))) + tx_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("SE")))) + if sl_start_indexes: + sl_end_idx = min(tx_end_indexes + [len(self.data)]) + return self.data[min(sl_start_indexes):sl_end_idx] + return [] + + def get_submitter_receiver_loop(self, clm_idx): + bht_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx, self.segments_by_name_index("BHT")))) + bht_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx and x[1].element(3) == '20', self.segments_by_name_index("HL")))) + if bht_start_indexes: + sub_rec_start_idx = max(bht_start_indexes) + sub_rec_end_idx = max(bht_end_indexes) + + return self.data[sub_rec_start_idx:sub_rec_end_idx] + return [] + + # # Given transaction type, transaction segments, and delim info, build out claims in the transaction # @return a list of Claim for each "clm" segment diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py index 8654b22..e7b4e33 100644 --- a/databricksx12/hls/loop.py +++ b/databricksx12/hls/loop.py @@ -164,44 +164,6 @@ def determine_previous_hl(self, pos): except: return None #when there is no preceding hl segment - # - # Determine claim loop: starts at the clm index and ends at LX segment, or CLM segment, or end of data - # - def get_claim_loop(self, clm_idx): - sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX")))) - clm_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("CLM")))) - - if sl_start_indexes: - clm_end_idx = min(sl_start_indexes) - elif clm_indexes: - clm_end_idx = min(clm_indexes + [len(self.data)]) - else: - clm_end_idx = len(self.data) - - return self.data[clm_idx:clm_end_idx] - - # - # fetch the indices of LX and CLM segments that are beyond the current clm index - # - def get_service_line_loop(self, clm_idx): - sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX")))) - tx_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("SE")))) - if sl_start_indexes: - sl_end_idx = min(tx_end_indexes + [len(self.data)]) - return self.data[min(sl_start_indexes):sl_end_idx] - return [] - - def get_submitter_receiver_loop(self, clm_idx): - bht_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx, self.segments_by_name_index("BHT")))) - bht_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx and x[1].element(3) == '20', self.segments_by_name_index("HL")))) - if bht_start_indexes: - sub_rec_start_idx = max(bht_start_indexes) - sub_rec_end_idx = max(bht_end_indexes) - - return self.data[sub_rec_start_idx:sub_rec_end_idx] - return [] - - """ sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8") From 4238bfcde7c2b169ece3c5b6092c71ccf15915b1 Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Mon, 20 May 2024 16:10:18 -0400 Subject: [PATCH 29/46] init --- README.md | 1 - databricksx12/edi.py | 5 ++--- databricksx12/hls/claim.py | 3 +-- databricksx12/hls/healthcare.py | 18 ++++++++++++++++++ .../hls/support_classes/identities.py | 6 +----- 5 files changed, 22 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 4be7046..165ad2c 100644 --- a/README.md +++ b/README.md @@ -164,7 +164,6 @@ LX*1 SV1*HC:H0003*20*UN*1***1 DTP*472*D8*20180428 REF*6R*142671 - """ ``` diff --git a/databricksx12/edi.py b/databricksx12/edi.py index 905c33d..e2ae36e 100644 --- a/databricksx12/edi.py +++ b/databricksx12/edi.py @@ -175,8 +175,6 @@ def filter(self, value, element, sub_element, dne="na/dne"): return self if value == self.get_element(element, sub_element, dne) else None - - # # Manage relationship heirarchy within EDI # @@ -225,7 +223,8 @@ def flatten(data = None): } else: return EDIManager.class_metadata(data) - + + """ from databricksx12.edi import * diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 4804576..8989192 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -76,7 +76,7 @@ def _populate_sl_loop(self) -> Dict[str, str]: Servicing provider needs - TBD """ - def toJson(self): + def to_json(self): return { **{'submitter': self.submitter_info.to_dict()}, **{'reciever': self.receiver_info.to_dict()}, @@ -104,7 +104,6 @@ def build(self) -> None: class Claim837i(MedicalClaim): NAME = "837I" - # sender / receiver ? # Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf diff --git a/databricksx12/hls/healthcare.py b/databricksx12/hls/healthcare.py index 00cc22e..8745efd 100644 --- a/databricksx12/hls/healthcare.py +++ b/databricksx12/hls/healthcare.py @@ -32,4 +32,22 @@ def from_functional_group(self, fg): def from_transaction(self, trnx): return ClaimBuilder(self.mapping.get(trnx.transaction_type), [x for x in trnx.data if x.segment_name() not in ['ST', 'SE']], trnx.format_cls).build() + + # + # Convert all data to json data + # + def to_json(self, edi): + return { + **EDIManager.class_metadata(edi), + 'FuncitonalGroup': [ + { + **EDIManager.class_metadata(fg), + 'Transactions': [ + { + **EDIManager.class_metadata(trnx), + 'Claims': [clm.to_json() for clm in self.from_transaction(trnx)] + } for trnx in fg.transaction_segments()] + } for fg in edi.functional_segments()] + } + diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index 6377da6..98da506 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -151,11 +151,7 @@ def build_receiver_lines(self, receiver_loop: List[Segment]): class ServiceIdentity(Identity): def __init__(self, sl_segments: List[Segment]): super().__init__(sl_segments) - self.services = { - 'Professional': [], - 'Institutional': [] - } - self.build_sl_lines(sl_segments) + #self.claim_lines = build_sl_lines(sl_segments) def build_sl_lines(self, sl_loop: List[Segment]): for segment in sl_loop: From adc737100ec6b6ca401462de15c8d077e7fb1737 Mon Sep 17 00:00:00 2001 From: Aaron Zavora Date: Mon, 20 May 2024 16:16:07 -0400 Subject: [PATCH 30/46] Update README.md --- README.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 165ad2c..7649f69 100644 --- a/README.md +++ b/README.md @@ -94,18 +94,26 @@ from pyspark.sql.functions import input_file_name ```python from databricksx12 import * from databricksx12.hls import * +import json hm = HealthcareManager() -x = EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8")) +edi = EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8")) -hm.from_edi(x) +hm.from_edi(edi) #[, , , , ] +#TODO replace this with Spark tomorrow +print(json.dumps(hm.to_json(edi), indent=4)) + + +""" +TODO update tomorrow below +""" + one_claim = hm.from_edi(x)[0] #print a json representation of a claim -import json -print(json.dumps(one_claim.toJson(), indent=4)) +print(json.dumps(one_claim.to_json(), indent=4)) """ { "submitter": { From 01c080fa7b6795cf3eafd6a93a6cf68385f16e91 Mon Sep 17 00:00:00 2001 From: Raven Date: Mon, 20 May 2024 16:38:26 -0400 Subject: [PATCH 31/46] functionized identities and claim --- .../hls/support_classes/identities.py | 250 +++++++++++++----- 1 file changed, 177 insertions(+), 73 deletions(-) diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index 6377da6..513025c 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -1,8 +1,25 @@ from databricksx12.edi import Segment -from typing import List +from typing import List, Dict +from collections import defaultdict +from functools import reduce class Identity: + nm1_identifiers = { + '85': 'Billing Provider', # entity that is billing for the services provided + '87': 'Pay-to Provider', # entity to which payments are to be sent + 'PR': 'Payer', # insurance company or payer + 'IL': 'Insured', # insured individual + 'QC': 'Patient', # patient + '82': 'Rendering Provider',# individual or group that performed the service + 'DN': 'Referring Provider',# doctor who referred the patient to another doctor + '77': 'Service Facility', # location where the service was performed + 'DQ': 'Supervising Provider', # provider who oversees the patient's care + '71': 'Attending Provider',# provider with primary responsibility for the patient at the time of service + 'DK': 'Ordering Provider', # provider who ordered the service or item + 'PE': 'Payee', # entity receiving the payment + } + def __init__(self, segments: List[Segment]): self.name: str = None self.street: str = None @@ -10,53 +27,106 @@ def __init__(self, segments: List[Segment]): self.city: str = None self.state: str = None self.zip: str = None + self.id: str = None + self.npi: str = None self.build(segments) def build(self, loop: List[Segment]): - for segment in loop: - if segment.element(0) == 'N3': - self.street = segment.element(1) - elif segment.element(0) == 'N4': - self.city = segment.element(1) - self.state = segment.element(2) - self.zip = segment.element(3) + nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.segment_len() >= 10, loop) + n3_segments = filter(lambda segment: segment.element(0) == 'N3', loop) + n4_segments = filter(lambda segment: segment.element(0) == 'N4', loop) + + list(map(self.process_nm1_segment, nm1_segments)) + list(map(self.process_n3_segment, n3_segments)) + list(map(self.process_n4_segment, n4_segments)) + return self.to_dict() + + def process_nm1_segment(self, segment: Segment): + self.type = 'Organization' if segment.element(2) == '2' else 'Individual' + self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + self.npi = segment.element(9) if len(segment.element(9)) == 10 else None + self.id = segment.element(9) if len(segment.element(9)) != 10 else None + + def process_n3_segment(self, segment: Segment): + self.street = segment.element(1) + + def process_n4_segment(self, segment: Segment): + self.city = segment.element(1) + self.state = segment.element(2) + self.zip = segment.element(3) + def to_dict(self): return {k: v for k, v in self.__dict__.items() if v is not None} + + + @staticmethod + def group_segments_by_provider(loop: List[Segment], nm1_identifiers: dict) -> Dict[str, List[List[Segment]]]: + def reducer(acc, segment): + provider_type, grouped = acc + if segment.element(0) == 'NM1': + provider_type = nm1_identifiers.get(segment.element(1)) + if provider_type: + grouped[provider_type].append([segment]) + elif provider_type: + grouped[provider_type][-1].append(segment) + return provider_type, grouped + + _, grouped = reduce(reducer, loop, (None, defaultdict(list))) + return grouped class BillingIdentity(Identity): def __init__(self, billing_segments: List[Segment]): + self.providers = defaultdict(list) super().__init__(billing_segments) - self.npi = None self.build_billing(billing_segments) def build_billing(self, billing_loop: List[Segment]): - for segment in billing_loop: - if segment.element(0) == 'NM1': - if segment.element(1) == '85': # Hardcoded to 85 for Billing Providers - self.type = 'Organization' if segment.element(2) == '2' else 'Individual' - self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) - self.npi = segment.element(9) + grouped_segments = self.group_segments_by_provider(billing_loop, self.nm1_identifiers) + self.providers = defaultdict(list, { + provider_type: [Identity(segments).to_dict() for segments in group] + for provider_type, group in grouped_segments.items() + }) + return self.to_dict() + def to_dict(self): + base_dict = super().to_dict() + base_dict.update({ + 'providers': dict(self.providers) + }) + return base_dict + class SubscriberIdentity(Identity): def __init__(self, subscriber_segments: List[Segment]): - super().__init__(subscriber_segments) - self.id_code = None + self.subscribers = defaultdict(list) self.relationship_to_insured = None + super().__init__(subscriber_segments) self.build_subscriber(subscriber_segments) def build_subscriber(self, subscriber_loop: List[Segment]): - for segment in subscriber_loop: - if segment.element(0) == 'NM1': - if segment.element(1) == 'IL': # Hardcoded to IL for Insured - self.type = 'Organization' if segment.element(2) == '2' else 'Individual' - self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) - self.id_code = segment.element(9) - elif segment.element(0) == 'SBR': - self.relationship_to_insured = 'Self' if segment.element(2) == '18' else 'Dependent' # information about subscriber/dependent 01 = Spouse; 18 = Self; 19 = Child; G8 = Other + grouped_segments = self.group_segments_by_provider(subscriber_loop, self.nm1_identifiers) + self.subscribers = defaultdict(list, { + subscriber_type: [self.process_segments_with_relationship(segments).to_dict() for segments in group] + for subscriber_type, group in grouped_segments.items() + }) + return self.to_dict() + + def process_segments_with_relationship(self, segments: List[Segment]) -> Identity: + identity = Identity(segments) + sbr_segment = next(filter(lambda s: s.element(0) == 'SBR', segments), None) + if sbr_segment: + identity.relationship_to_insured = 'Self' if sbr_segment.element(2) == '18' else 'Dependent' + return identity + def to_dict(self): + base_dict = super().to_dict() + base_dict.update({ + 'subscribers': dict(self.subscribers), + 'relationship_to_insured': self.relationship_to_insured + }) + return base_dict class PatientIdentity(Identity): def __init__(self, patient_segments: List[Segment]): @@ -64,52 +134,72 @@ def __init__(self, patient_segments: List[Segment]): self.build_patient(patient_segments) def build_patient(self, patient_loop: List[Segment]): - for segment in patient_loop: - if segment.element(0) == 'NM1': - if segment.element(1) == 'QC': # Hardcoded to QC for Patient - self.type = 'Patient' - self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + def process_patient_segment(segment: Segment): + self.type = 'Patient' + self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + return list(map(process_patient_segment, filter(lambda s: s.element(0) == 'NM1' and s.element(1) == 'QC', patient_loop))) + class ClaimIdentity(Identity): def __init__(self, claim_segments: List[Segment]): - super().__init__(claim_segments) - self.id_code = None - self.facility_code = None + self.patient_id = None self.claim_amount = None + self.facility_type_code = None + self.claim_code_freq = None + self.date = None + self.providers = defaultdict(list) + super().__init__(claim_segments) self.build_claim_lines(claim_segments) def build_claim_lines(self, claim_loop: List[Segment]): - for segment in claim_loop: + def process_segment(segment: Segment): if segment.element(0) == 'CLM': - # TODO Inst/Prof - self.id_code = segment.element(1) # submitter's identifier + self.patient_id = segment.element(1) # submitter's identifier self.claim_amount = segment.element(2) - if segment.element(5).split(':')[1] == 'B': # professional claims - self.facility_code = 'Outpatient Hospital' if segment.element(5).split(':')[0]== 22 else 'Other' - # TODO: additional provider lines? + codes = segment.element(5).split(':') # codes[1] == A for institutional and B for professional + self.facility_type_code = codes[0] + self.claim_code_freq = codes[2] + + if segment.element(0) == 'DTP': + self.date = segment.element(3) # format D8:CCYYMMDD + + if segment.element(0) == 'NM1': + provider_type = self.nm1_identifiers.get(segment.element(1)) + if provider_type: + identity = Identity([segment]) + self.providers[provider_type].append(identity.to_dict()) + + list(map(process_segment, claim_loop)) + + def to_dict(self): + base_dict = super().to_dict() + base_dict.update({ + 'patient_id': self.patient_id, + 'claim_amount': self.claim_amount, + 'facility_type_code': self.facility_type_code, + 'claim_code_freq': self.claim_code_freq, + 'date': self.date, + 'providers': dict(self.providers) + }) + return base_dict class SubmitterIdentity(Identity): def __init__(self, submitter_segments: List[Segment]): - super().__init__(submitter_segments) - self.tax_id = None self.contact_name = None self.contacts = [] + super().__init__(submitter_segments) self.build_submitter_lines(submitter_segments) - - def build_submitter_lines(self, submitter_loop: List[Segment]): - for segment in submitter_loop: - if segment.element(0) == 'NM1'and segment.element(1) == '41': - self.process_nm1_segment(segment) - elif segment.element(0) == 'PER': - self.process_per_segment(segment) - def process_nm1_segment(self, segment): - self.type = 'Organization' if segment.element(2) == '2' else 'Individual' - self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) - self.tax_id = segment.element(9) # id + def build_submitter_lines(self, submitter_loop: List[Segment]): + nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.element(1) == '41', submitter_loop) + per_segments = filter(lambda segment: segment.element(0) == 'PER', submitter_loop) + + list(map(self.process_nm1_segment, nm1_segments)) + list(map(self.process_per_segment, per_segments)) + return self.to_dict() def process_per_segment(self, segment): self.contact_name = segment.element(2) @@ -132,42 +222,50 @@ def process_per_segment(self, segment): contact['contact_number_3'] = segment.element(8) self.contacts.append(contact) + + def to_dict(self): + base_dict = super().to_dict() + base_dict.update({ + 'contact_name': self.contact_name, + 'contacts': self.contacts + }) + return base_dict class ReceiverIdentity(Identity): def __init__(self, receiver_segments: List[Segment]): super().__init__(receiver_segments) - self.id_code = None self.build_receiver_lines(receiver_segments) def build_receiver_lines(self, receiver_loop: List[Segment]): - for segment in receiver_loop: - if segment.element(0) == 'NM1' and segment.element(1) == '40': - self.type = 'Organization' if segment.element(2) == '2' else 'Individual' - self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) - self.id_code = segment.element(9) # id + nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.element(1) == '40', receiver_loop) + list(map(self.process_nm1_segment, nm1_segments)) + return self.to_dict() class ServiceIdentity(Identity): def __init__(self, sl_segments: List[Segment]): - super().__init__(sl_segments) - self.services = { - 'Professional': [], - 'Institutional': [] - } - self.build_sl_lines(sl_segments) + self.services = { + 'Professional': [], + 'Institutional': [] + } + super().__init__(sl_segments) + self.build_sl_lines(sl_segments) def build_sl_lines(self, sl_loop: List[Segment]): - for segment in sl_loop: - if segment.element(0) == 'SV1': # Professional service - service = self.parse_professional_service(segment) - self.services['Professional'].append(service) - elif segment.element(0) == 'SV2': # Institutional service - service = self.parse_institutional_service(segment) - self.services['Institutional'].append(service) + sv1_segments = filter(lambda segment: segment.element(0) == 'SV1', sl_loop) + sv2_segments = filter(lambda segment: segment.element(0) == 'SV2', sl_loop) + + professional_services = map(self.parse_professional_service, sv1_segments) + institutional_services = map(self.parse_institutional_service, sv2_segments) + + self.services['Professional'] = list(professional_services) + self.services['Institutional'] = list(institutional_services) + + return self.to_dict() def parse_professional_service(self, segment: Segment): - service_type, procedure_code = segment.element(1).split(':')[0:2] #assuming 7 elements but choosing first two + service_type, procedure_code = segment.element(1).split(':')[0:2] # assuming 7 elements but choosing first two return { 'Type of service/claim': 'Professional', 'Type': service_type, @@ -177,7 +275,7 @@ def parse_professional_service(self, segment: Segment): def parse_institutional_service(self, segment: Segment): revenue_code = segment.element(1) - service_type, procedure_code = segment.element(2).split(':')[0:2] #assuming 7 elements but choosing first two + service_type, procedure_code = segment.element(2).split(':')[0:2] # assuming 7 elements but choosing first two return { 'Type of service/claim': 'Institutional', 'Revenue Code': revenue_code, @@ -186,6 +284,12 @@ def parse_institutional_service(self, segment: Segment): 'Procedure Amount': segment.element(3) } + def to_dict(self): + base_dict = super().to_dict() + base_dict.update({ + 'services': self.services + }) + return base_dict From 5639ff1433ed1df388c3df414f5ddd06204eb20e Mon Sep 17 00:00:00 2001 From: Raven Date: Mon, 20 May 2024 18:29:57 -0400 Subject: [PATCH 32/46] adjust dictionary use in build --- .../hls/support_classes/identities.py | 66 +++---------------- 1 file changed, 9 insertions(+), 57 deletions(-) diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index 513025c..9db1b8c 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -5,6 +5,8 @@ from functools import reduce class Identity: + + # provider name identities associated with every NM1 line nm1_identifiers = { '85': 'Billing Provider', # entity that is billing for the services provided '87': 'Pay-to Provider', # entity to which payments are to be sent @@ -31,6 +33,7 @@ def __init__(self, segments: List[Segment]): self.npi: str = None self.build(segments) + # build name and address for any identity def build(self, loop: List[Segment]): nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.segment_len() >= 10, loop) n3_segments = filter(lambda segment: segment.element(0) == 'N3', loop) @@ -39,7 +42,6 @@ def build(self, loop: List[Segment]): list(map(self.process_nm1_segment, nm1_segments)) list(map(self.process_n3_segment, n3_segments)) list(map(self.process_n4_segment, n4_segments)) - return self.to_dict() def process_nm1_segment(self, segment: Segment): self.type = 'Organization' if segment.element(2) == '2' else 'Individual' @@ -88,16 +90,10 @@ def build_billing(self, billing_loop: List[Segment]): provider_type: [Identity(segments).to_dict() for segments in group] for provider_type, group in grouped_segments.items() }) - return self.to_dict() - - def to_dict(self): - base_dict = super().to_dict() - base_dict.update({ - 'providers': dict(self.providers) - }) - return base_dict + #TODO class pay_to() + class SubscriberIdentity(Identity): def __init__(self, subscriber_segments: List[Segment]): self.subscribers = defaultdict(list) @@ -108,25 +104,11 @@ def __init__(self, subscriber_segments: List[Segment]): def build_subscriber(self, subscriber_loop: List[Segment]): grouped_segments = self.group_segments_by_provider(subscriber_loop, self.nm1_identifiers) self.subscribers = defaultdict(list, { - subscriber_type: [self.process_segments_with_relationship(segments).to_dict() for segments in group] + subscriber_type: [Identity(segments).to_dict() for segments in group] for subscriber_type, group in grouped_segments.items() }) - return self.to_dict() - def process_segments_with_relationship(self, segments: List[Segment]) -> Identity: - identity = Identity(segments) - sbr_segment = next(filter(lambda s: s.element(0) == 'SBR', segments), None) - if sbr_segment: - identity.relationship_to_insured = 'Self' if sbr_segment.element(2) == '18' else 'Dependent' - return identity - def to_dict(self): - base_dict = super().to_dict() - base_dict.update({ - 'subscribers': dict(self.subscribers), - 'relationship_to_insured': self.relationship_to_insured - }) - return base_dict class PatientIdentity(Identity): def __init__(self, patient_segments: List[Segment]): @@ -170,19 +152,7 @@ def process_segment(segment: Segment): identity = Identity([segment]) self.providers[provider_type].append(identity.to_dict()) - list(map(process_segment, claim_loop)) - - def to_dict(self): - base_dict = super().to_dict() - base_dict.update({ - 'patient_id': self.patient_id, - 'claim_amount': self.claim_amount, - 'facility_type_code': self.facility_type_code, - 'claim_code_freq': self.claim_code_freq, - 'date': self.date, - 'providers': dict(self.providers) - }) - return base_dict + return list(map(process_segment, claim_loop)) @@ -199,7 +169,6 @@ def build_submitter_lines(self, submitter_loop: List[Segment]): list(map(self.process_nm1_segment, nm1_segments)) list(map(self.process_per_segment, per_segments)) - return self.to_dict() def process_per_segment(self, segment): self.contact_name = segment.element(2) @@ -222,14 +191,7 @@ def process_per_segment(self, segment): contact['contact_number_3'] = segment.element(8) self.contacts.append(contact) - - def to_dict(self): - base_dict = super().to_dict() - base_dict.update({ - 'contact_name': self.contact_name, - 'contacts': self.contacts - }) - return base_dict + class ReceiverIdentity(Identity): @@ -239,8 +201,7 @@ def __init__(self, receiver_segments: List[Segment]): def build_receiver_lines(self, receiver_loop: List[Segment]): nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.element(1) == '40', receiver_loop) - list(map(self.process_nm1_segment, nm1_segments)) - return self.to_dict() + return list(map(self.process_nm1_segment, nm1_segments)) class ServiceIdentity(Identity): @@ -262,8 +223,6 @@ def build_sl_lines(self, sl_loop: List[Segment]): self.services['Professional'] = list(professional_services) self.services['Institutional'] = list(institutional_services) - return self.to_dict() - def parse_professional_service(self, segment: Segment): service_type, procedure_code = segment.element(1).split(':')[0:2] # assuming 7 elements but choosing first two return { @@ -284,12 +243,5 @@ def parse_institutional_service(self, segment: Segment): 'Procedure Amount': segment.element(3) } - def to_dict(self): - base_dict = super().to_dict() - base_dict.update({ - 'services': self.services - }) - return base_dict - From c6daff7a337f1f872894976c8b2c123dfd90d43a Mon Sep 17 00:00:00 2001 From: Raven Date: Mon, 20 May 2024 19:15:09 -0400 Subject: [PATCH 33/46] removed unnecessary funcs from identities --- databricksx12/hls/claim.py | 2 +- .../hls/support_classes/identities.py | 44 +++++++------------ 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 8989192..d11c3cb 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -53,7 +53,7 @@ def _populate_claim_loop(self) -> Dict[str, str]: return ClaimIdentity(self.claim_loop) def _populate_sl_loop(self) -> Dict[str, str]: - return ServiceIdentity(self.sl_loop) + return ServiceIdentity(self.sl_loop) """ diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index 9db1b8c..f2a6a55 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -6,7 +6,7 @@ class Identity: - # provider name identities associated with every NM1 line + # provider name identities associated with every NM1 line. a combination may occur within loops nm1_identifiers = { '85': 'Billing Provider', # entity that is billing for the services provided '87': 'Pay-to Provider', # entity to which payments are to be sent @@ -135,7 +135,7 @@ def __init__(self, claim_segments: List[Segment]): self.build_claim_lines(claim_segments) def build_claim_lines(self, claim_loop: List[Segment]): - def process_segment(segment: Segment): + def process_claim_segment(segment: Segment): if segment.element(0) == 'CLM': self.patient_id = segment.element(1) # submitter's identifier self.claim_amount = segment.element(2) @@ -146,20 +146,21 @@ def process_segment(segment: Segment): if segment.element(0) == 'DTP': self.date = segment.element(3) # format D8:CCYYMMDD - if segment.element(0) == 'NM1': - provider_type = self.nm1_identifiers.get(segment.element(1)) - if provider_type: - identity = Identity([segment]) - self.providers[provider_type].append(identity.to_dict()) + # process claim-specific segments + list(map(process_claim_segment, claim_loop)) + + # process NM1 segments for providers + nm1_segments = filter(lambda segment: segment.element(0) == 'NM1', claim_loop) - return list(map(process_segment, claim_loop)) + # append instead of extend for single items + list(map(lambda segment: self.providers[self.nm1_identifiers.get(segment.element(1))].append(Identity([segment]).to_dict()), nm1_segments)) class SubmitterIdentity(Identity): def __init__(self, submitter_segments: List[Segment]): self.contact_name = None - self.contacts = [] + self.contacts = defaultdict(list) super().__init__(submitter_segments) self.build_submitter_lines(submitter_segments) @@ -190,7 +191,7 @@ def process_per_segment(self, segment): contact['contact_method_3'] = contact_methods.get(segment.element(7), 'Unknown method') contact['contact_number_3'] = segment.element(8) - self.contacts.append(contact) + self.contacts['primary'].append(contact) @@ -204,44 +205,33 @@ def build_receiver_lines(self, receiver_loop: List[Segment]): return list(map(self.process_nm1_segment, nm1_segments)) + class ServiceIdentity(Identity): def __init__(self, sl_segments: List[Segment]): - self.services = { - 'Professional': [], - 'Institutional': [] - } + self.services = defaultdict(list) super().__init__(sl_segments) self.build_sl_lines(sl_segments) def build_sl_lines(self, sl_loop: List[Segment]): sv1_segments = filter(lambda segment: segment.element(0) == 'SV1', sl_loop) sv2_segments = filter(lambda segment: segment.element(0) == 'SV2', sl_loop) + self.services['Professional'] = [self.parse_professional_service(segment) for segment in sv1_segments] + self.services['Institutional'] = [self.parse_institutional_service(segment) for segment in sv2_segments] - professional_services = map(self.parse_professional_service, sv1_segments) - institutional_services = map(self.parse_institutional_service, sv2_segments) - - self.services['Professional'] = list(professional_services) - self.services['Institutional'] = list(institutional_services) def parse_professional_service(self, segment: Segment): service_type, procedure_code = segment.element(1).split(':')[0:2] # assuming 7 elements but choosing first two return { - 'Type of service/claim': 'Professional', 'Type': service_type, 'Procedure Code': procedure_code, 'Procedure Amount': segment.element(2) } def parse_institutional_service(self, segment: Segment): - revenue_code = segment.element(1) service_type, procedure_code = segment.element(2).split(':')[0:2] # assuming 7 elements but choosing first two return { - 'Type of service/claim': 'Institutional', - 'Revenue Code': revenue_code, 'Type': service_type, + 'Revenue Code': segment.element(1), 'Procedure Code': procedure_code, 'Procedure Amount': segment.element(3) - } - - - + } \ No newline at end of file From 2ca454328a83ed25ea415a116c259086f59a2d7f Mon Sep 17 00:00:00 2001 From: Raven Date: Mon, 20 May 2024 19:30:39 -0400 Subject: [PATCH 34/46] relationship of subscriber and patient added in Sub identity --- databricksx12/hls/claim.py | 2 +- databricksx12/hls/support_classes/identities.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index d11c3cb..1b79607 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -59,7 +59,7 @@ def _populate_sl_loop(self) -> Dict[str, str]: """ Overall Asks - Coordination of Benefits flag - - Patient / Subscriber same person flag + - Patient / Subscriber same person flag --> self.relationship_to_insured in Suscriber Claim needs - principal ICD10 diagnosis code diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index f2a6a55..6a8d615 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -92,8 +92,6 @@ def build_billing(self, billing_loop: List[Segment]): }) - #TODO class pay_to() - class SubscriberIdentity(Identity): def __init__(self, subscriber_segments: List[Segment]): self.subscribers = defaultdict(list) @@ -102,6 +100,10 @@ def __init__(self, subscriber_segments: List[Segment]): self.build_subscriber(subscriber_segments) def build_subscriber(self, subscriber_loop: List[Segment]): + sbr_segment = next(filter(lambda s: s.element(0) == 'SBR', subscriber_loop), None) + if sbr_segment: + self.relationship_to_insured = 'Self' if sbr_segment.element(2) == '18' else 'Dependent' + grouped_segments = self.group_segments_by_provider(subscriber_loop, self.nm1_identifiers) self.subscribers = defaultdict(list, { subscriber_type: [Identity(segments).to_dict() for segments in group] From 327f60ba96a4a2f92b06db0eab9919e28bd03f43 Mon Sep 17 00:00:00 2001 From: Aaron Zavora Date: Tue, 21 May 2024 09:42:35 -0400 Subject: [PATCH 35/46] Update README.md --- README.md | 212 ++++++++++++++++++++---------------------------------- 1 file changed, 79 insertions(+), 133 deletions(-) diff --git a/README.md b/README.md index 7649f69..8770375 100644 --- a/README.md +++ b/README.md @@ -3,27 +3,31 @@ [![CLOUD](https://img.shields.io/badge/CLOUD-ALL-blue?logo=googlecloud&style=for-the-badge)](https://cloud.google.com/databricks) [![POC](https://img.shields.io/badge/POC-10_days-green?style=for-the-badge)](https://databricks.com/try-databricks) -## Business Problem (Under Construction / Not Stable) +# Business Problem -Addressing the issue of working with various parts of an x12 EDI transaction in Spark on Databricks. +Working with various x12 EDI transactions in Spark on Databricks. -## Install +# Install ```python pip install git+https://github.com/databricks-industry-solutions/x12-edi-parser ``` -## Run +# Run -### Reading in EDI Data +## Reading in EDI Data Default format used is AnsiX12 (* as a delim and ~ as segment separator) ```python from databricksx12 import * -ediFormat = AnsiX12Delim #specifying formats of data, ansi is also the default if nothing is specified -df = spark.read.text("sampledata/837/*", wholetext = True) +#EDI format type +ediFormat = AnsiX12Delim #specifying formats of data, ansi is also the default if nothing is specified +#can also specify customer formats (below is the same as AnsiX12Delim) +ediFormat = type("", (), dict({'SEGMENT_DELIM': '~', 'ELEMENT_DELIM': '*', 'SUB_DELIM': ':'})) + +df = spark.read.text("sampledata/837/*txt", wholetext = True) (df.rdd .map(lambda x: x.asDict().get("value")) @@ -37,59 +41,28 @@ df = spark.read.text("sampledata/837/*", wholetext = True) | 1| | 1| | 1| -+-----------------+ - - - -#Building a dynamic/custom format -customFormat = type("", (), dict({'SEGMENT_DELIM': '~', 'ELEMENT_DELIM': '*', 'SUB_DELIM': ':'})) -(df.rdd - .map(lambda x: x.asDict().get("value")) - .map(lambda x: EDI(x, delim_cls = customFormat)) - .map(lambda x: {"transaction_count": x.num_transactions()}) -).toDF().show() -+-----------------+ -|transaction_count| -+-----------------+ -| 5| -| 1| -| 1| | 1| +-----------------+ - ``` -#### EDI as a Table for SQL +## Parsing Healthcare Transactions + +Currently supports 837s. Records in each format type should be saved separately, e.g. do not mix 835s & 837s in the df.save() command. + +### 837i and 837p sample data in Spark ```python -"""" -Look at all data refernce -> https://justransform.com/edi-essentials/edi-structure/ - (1) Including control header / ISA & IEA segments -""" -from pyspark.sql.functions import input_file_name +from databricksx12 import * +from databricksx12.hls import * +import json -( df.withColumn("filename", input_file_name()).rdd - .map(lambda x: (x.asDict().get("filename"),x.asDict().get("value"))) - .map(lambda x: (x[0], EDI(x[1]))) - .map(lambda x: [{**{"filename": x[0]}, **y} for y in x[1].toRows()]) - .flatMap(lambda x: x) - .toDF()).show() +df = spark.read.text("sampledata/837/*", wholetext = True) -""" -+--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+ -| row_data|row_number|segment_element_delim_char|segment_length|segment_name|segment_subelement_delim_char|filename| -+--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+ -|ISA*00* ...| 0| *| 17| ISA| :|file:///| -|GS*HC*CLEARINGHOU...| 1| *| 9| GS| :|file:///| -|ST*837*000000001*...| 2| *| 4| ST| :|file:///| -|BHT*0019*00*73490...| 3| *| 7| BHT| :|file:///| -|NM1*41*2*CLEARING...| 4| *| 10| NM1| :|file:///| -|PER*IC*CLEARINGHO...| 5| *| 7| PER| :|file:///| -|NM1*40*2*12345678...| 6| *| 10| NM1| :|file:///| ``` -#### Parsing Healthcare Transactions +### Sample data outside of Spark + ```python from databricksx12 import * @@ -99,45 +72,39 @@ import json hm = HealthcareManager() edi = EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8")) +#Returns parsed claim data hm.from_edi(edi) -#[, , , , ] +#[, , , , ] -#TODO replace this with Spark tomorrow +#Print in json format print(json.dumps(hm.to_json(edi), indent=4)) - -""" -TODO update tomorrow below -""" - -one_claim = hm.from_edi(x)[0] - -#print a json representation of a claim -print(json.dumps(one_claim.to_json(), indent=4)) """ { - "submitter": { - "name": "CLEARINGHOUSE LLC", - "type": "Organization", - "tax_id": "987654321", - "contact_name": "CLEARINGHOUSE CLIENT SERVICES", - "contacts": [ - { - "contact_method": "Telephone", - "contact_number": "8005551212", - "contact_method_2": "Fax", - "contact_number_2": "8005551212" - } - ] - }, - "reciever": { - "name": "123456789", - "type": "Organization", - "id_code": "CHPWA" - }, - "subscriber": {... + "EDI.sender_tax_id": "ZZ", + "FuncitonalGroup": [ + { + "FunctionalGroup.receiver": "123456789", + "FunctionalGroup.sender": "CLEARINGHOUSE", + "FunctionalGroup.transaction_datetime": "20180508:0833", + "FunctionalGroup.transaction_type": "222", + "Transactions": [ + { + "Transaction.transaction_type": "222", + "Claims": [ + { + "submitter": { + "contact_name": "CLEARINGHOUSE CLIENT SERVICES", + "contacts": { + "primary": [ + { + "contact_method": "Telephone", + "contact_number": "8005551212", +... """ -#print raw EDI Segments + +#print the raw EDI Segments of one claim +one_claim = hm.from_edi(edi)[0] print("\n".join([y.data for y in one_claim.data])) #Print one claim to look at the segments of it """ BHT*0019*00*7349063984*20180508*0833*CH @@ -157,66 +124,45 @@ HL*2*1*22*0 SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321 N3*987 65TH PL -N4*VANCOUVER*WA*986640001 -DMG*D8*19881225*M -NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA -CLM*1805080AV3648339*20***57:B:1*Y*A*Y*Y -REF*D9*7349065509 -HI*ABK:F1120 -NM1*82*1*PROVIDER*JAMES****XX*1112223338 -PRV*PE*PXC*261QR0405X -NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455 -N3*12345 MAIN ST SUITE A1 -N4*VANCOUVER*WA*98662 -LX*1 -SV1*HC:H0003*20*UN*1***1 -DTP*472*D8*20180428 -REF*6R*142671 +... """ ``` -#### Further EDI Parsing in Pyspark - - -> **Warning** -> Sections below this are under construction +## EDI as a Table for SQL ```python -from databricksx12.edi import * -x = EDIManager(EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8"))) +"""" +Look at all data refernce -> https://justransform.com/edi-essentials/edi-structure/ + (1) Including control header / ISA & IEA segments +""" +from pyspark.sql.functions import input_file_name -import json -print(json.dumps(x.flatten(x.data), indent=4)) -{ - "EDI.sender_tax_id": "ZZ", - "list": [ - { - "FunctionalGroup.receiver": "123456789", - "FunctionalGroup.sender": "CLEARINGHOUSE", - "FunctionalGroup.transaction_datetime": "20180508:0833", - "FunctionalGroup.transaction_type": "222", - "list": [ - { - "Transaction.transaction_type": "222" - }, - { - "Transaction.transaction_type": "222" - }, - { - "Transaction.transaction_type": "222" - }, - { - "Transaction.transaction_type": "222" - }, - { - "Transaction.transaction_type": "222" - } - ] - } - ] -} +( df.withColumn("filename", input_file_name()).rdd + .map(lambda x: (x.asDict().get("filename"),x.asDict().get("value"))) + .map(lambda x: (x[0], EDI(x[1]))) + .map(lambda x: [{**{"filename": x[0]}, **y} for y in x[1].toRows()]) + .flatMap(lambda x: x) + .toDF()).show() + +""" ++--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+ +| row_data|row_number|segment_element_delim_char|segment_length|segment_name|segment_subelement_delim_char|filename| ++--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+ +|ISA*00* ...| 0| *| 17| ISA| :|file:///| +|GS*HC*CLEARINGHOU...| 1| *| 9| GS| :|file:///| +|ST*837*000000001*...| 2| *| 4| ST| :|file:///| +|BHT*0019*00*73490...| 3| *| 7| BHT| :|file:///| +|NM1*41*2*CLEARING...| 4| *| 10| NM1| :|file:///| +|PER*IC*CLEARINGHO...| 5| *| 7| PER| :|file:///| +|NM1*40*2*12345678...| 6| *| 10| NM1| :|file:///| ``` +#### Other types of EDI Parsing in Pyspark + + +> **Warning** +> Sections below this are under construction + ```python """ From b1668688f67dc7df55cb0aee1e5347afaa1fc3b7 Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Tue, 21 May 2024 10:14:04 -0400 Subject: [PATCH 36/46] updated GS08 --- databricksx12/hls/claim.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 1b79607..10ddff7 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -84,7 +84,7 @@ def to_json(self): **{'patient': self.patient_info.to_dict()}, **{'billing_provider': self.billing_info.to_dict()}, **{'claim_header': self.claim_info.to_dict()}, - **{'claim_lines': self.sl_info.to_dict()} + **{'claim_lines': 'TODO'} } # not sure if this should be here or not, but you get the idea @@ -97,7 +97,7 @@ def build(self) -> None: self._populate_subscriber_loop() if self.patient_loop == [] else self._populate_patient_loop() ) self.claim_info = self._populate_claim_loop() - self.sl_info = self._populate_sl_loop() + self.sl_info = self._populate_sl_loop() From bbcea3d55a2dfa223680a1592b31c0cecb6d3608 Mon Sep 17 00:00:00 2001 From: Aaron Zavora Date: Tue, 21 May 2024 13:14:09 -0400 Subject: [PATCH 37/46] Update README.md --- README.md | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 8770375..6d6f48a 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ df = spark.read.text("sampledata/837/*txt", wholetext = True) .map(lambda x: EDI(x, delim_cls = ediFormat)) .map(lambda x: {"transaction_count": x.num_transactions()}) ).toDF().show() +""" +-----------------+ |transaction_count| +-----------------+ @@ -43,7 +44,7 @@ df = spark.read.text("sampledata/837/*txt", wholetext = True) | 1| | 1| +-----------------+ - +""" ``` ## Parsing Healthcare Transactions @@ -56,8 +57,18 @@ Currently supports 837s. Records in each format type should be saved separately, from databricksx12 import * from databricksx12.hls import * import json +from pyspark.sql.functions import input_file_name + +hm = HealthcareManager() +df = spark.read.text("sampledata/837/*txt", wholetext = True) -df = spark.read.text("sampledata/837/*", wholetext = True) + +rdd = ( + df.withColumn("filename", input_file_name()).rdd + .map(lambda x: (x.asDict().get("filename"),x.asDict().get("value"))) + .map(lambda x: (x[0], EDI(x[1]))) + .map(lambda x: { **{'filename': x[0]}, **hm.to_json(x[1])} ) +) ``` @@ -74,7 +85,7 @@ edi = EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8" #Returns parsed claim data hm.from_edi(edi) -#[, , , , ] +#[, , , , ] #Print in json format print(json.dumps(hm.to_json(edi), indent=4)) @@ -157,11 +168,7 @@ from pyspark.sql.functions import input_file_name |NM1*40*2*12345678...| 6| *| 10| NM1| :|file:///| ``` -#### Other types of EDI Parsing in Pyspark - - -> **Warning** -> Sections below this are under construction +#### Other EDI Parsing in Pyspark ```python @@ -231,8 +238,6 @@ ediDF.show() """ - - #show first line of each transaction trxDF.filter(x.row_number == 0).show() """ From 0ebcd3b62a58c5ab44937b160f7916e979f9a162 Mon Sep 17 00:00:00 2001 From: Aaron Zavora Date: Tue, 21 May 2024 13:29:06 -0400 Subject: [PATCH 38/46] Update README.md --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 6d6f48a..f13985d 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,13 @@ rdd = ( .map(lambda x: (x.asDict().get("filename"),x.asDict().get("value"))) .map(lambda x: (x[0], EDI(x[1]))) .map(lambda x: { **{'filename': x[0]}, **hm.to_json(x[1])} ) + .map(lambda x: json.dumps(x)) ) +claims = spark.read.json(rdd) + +#Claim header table TODO + +#Claim line table TODO ``` From f999d9930ff04014f2e7db0015c9a4ed93eb88ed Mon Sep 17 00:00:00 2001 From: Raven Date: Tue, 21 May 2024 19:55:00 -0400 Subject: [PATCH 39/46] billing providers function in claim --- databricksx12/hls/claim.py | 64 +++++-- .../hls/support_classes/identities.py | 156 +++++++++--------- 2 files changed, 132 insertions(+), 88 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 10ddff7..f03c8e8 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -1,7 +1,18 @@ -from databricksx12.edi import EDI, AnsiX12Delim +from databricksx12.edi import EDI, AnsiX12Delim, Segment from databricksx12.hls.loop import Loop -from databricksx12.hls.support_classes.identities import BillingIdentity, SubscriberIdentity, PatientIdentity, ClaimIdentity, SubmitterIdentity, ReceiverIdentity, ServiceIdentity +from databricksx12.hls.support_classes.identities import ( + Identity, + BillingIdentity, + SubscriberIdentity, + PatientIdentity, + ClaimIdentity, + SubmitterIdentity, + ReceiverIdentity, + ServiceIdentity, +) from typing import List, Dict +from collections import defaultdict +from functools import reduce # @@ -35,12 +46,11 @@ def _populate_receiver_loop(self) -> Dict[str, str]: return ReceiverIdentity(self.sender_receiver_loop) def _populate_billing_loop(self) -> Dict[str, str]: - return BillingIdentity(self.billing_loop) + return BillingIdentity(self.sender_receiver_loop) def _populate_subscriber_loop(self) -> Dict[str, str]: return SubscriberIdentity(self.subscriber_loop) - - # + # # def _populate_patient_loop(self) -> Dict[str, str]: @@ -55,20 +65,41 @@ def _populate_claim_loop(self) -> Dict[str, str]: def _populate_sl_loop(self) -> Dict[str, str]: return ServiceIdentity(self.sl_loop) + def _populate_grouped_entities(self, loop: List[Segment]) -> Dict[str, List[Dict[str, str]]]: + # if we want a list of NM1 entities belonging within a loop + def group_segments_by_provider(loop, nm1_identifiers: dict = Identity.nm1_identifiers) -> Dict[str, List[List[Segment]]]: + def reducer(acc, segment): + provider_type, grouped = acc + if segment.element(0) == 'NM1': + provider_type = nm1_identifiers.get(segment.element(1)) + if provider_type: + grouped[provider_type].append([segment]) + elif provider_type: + grouped[provider_type][-1].append(segment) + return provider_type, grouped + + _, grouped = reduce(reducer, loop, (None, defaultdict(list))) + return grouped + + return defaultdict(list, { + provider_type: [Identity(segments).to_dict() for segments in group] + for provider_type, group in group_segments_by_provider(loop).items() + }) + """ Overall Asks - - Coordination of Benefits flag - - Patient / Subscriber same person flag --> self.relationship_to_insured in Suscriber + - Coordination of Benefits flag -- > self.benefits_assign_flag in Claim Identity + - Patient / Subscriber same person flag --> self.relationship_to_insured in Suscriber in Claim Identity Claim needs - principal ICD10 diagnosis code - - other ICD10 diagnosis codes as an array - - hcfa place of service - - claim id? - - admission type code - - facility type code - - claim frequency code + - other ICD10 diagnosis codes as an array + - hcfa place of service -- segment.element(5).split(':')? + - claim id? - done + - admission type code - only in 837i? + - facility type code - done + - claim frequency code - done Claim line needs - This should return an array @@ -84,7 +115,8 @@ def to_json(self): **{'patient': self.patient_info.to_dict()}, **{'billing_provider': self.billing_info.to_dict()}, **{'claim_header': self.claim_info.to_dict()}, - **{'claim_lines': 'TODO'} + **{'claim_lines': 'TODO'}, + **{'grouped_subscriber_entities': self.subscriber_entities_info.to_dict()}, # call for all entities in a loop[] } # not sure if this should be here or not, but you get the idea @@ -99,6 +131,9 @@ def build(self) -> None: self.claim_info = self._populate_claim_loop() self.sl_info = self._populate_sl_loop() + self.claim_entities_info = self._populate_grouped_entities(self.claim_loop) + self.subscriber_entities_info = self._populate_grouped_entities(self.subscriber_loop) + class Claim837i(MedicalClaim): @@ -112,6 +147,7 @@ class Claim837p(MedicalClaim): NAME = "837P" + class Claim835(MedicalClaim): diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index 6a8d615..6708e10 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -6,26 +6,27 @@ class Identity: - # provider name identities associated with every NM1 line. a combination may occur within loops + # entity name identities associated with every NM1 line. a combination may occur within loops nm1_identifiers = { - '85': 'Billing Provider', # entity that is billing for the services provided - '87': 'Pay-to Provider', # entity to which payments are to be sent - 'PR': 'Payer', # insurance company or payer + '85': 'Billing Provider', # entity that is billing for the services provided and 87 disregarded 'IL': 'Insured', # insured individual - 'QC': 'Patient', # patient + 'QC': 'Patient', # patient for 837P and PAT segments in 837i '82': 'Rendering Provider',# individual or group that performed the service 'DN': 'Referring Provider',# doctor who referred the patient to another doctor '77': 'Service Facility', # location where the service was performed 'DQ': 'Supervising Provider', # provider who oversees the patient's care '71': 'Attending Provider',# provider with primary responsibility for the patient at the time of service 'DK': 'Ordering Provider', # provider who ordered the service or item + 'PR': 'Payer', # insurance company or payer 'PE': 'Payee', # entity receiving the payment + } def __init__(self, segments: List[Segment]): self.name: str = None self.street: str = None self.type: str = None + self.provider_type: str = None self.city: str = None self.state: str = None self.zip: str = None @@ -33,19 +34,20 @@ def __init__(self, segments: List[Segment]): self.npi: str = None self.build(segments) - # build name and address for any identity + # build entity and address for any identity def build(self, loop: List[Segment]): nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.segment_len() >= 10, loop) - n3_segments = filter(lambda segment: segment.element(0) == 'N3', loop) - n4_segments = filter(lambda segment: segment.element(0) == 'N4', loop) + n3_segment = next(filter(lambda segment: segment.element(0) == 'N3', loop), None) # taking only the first address lines + n4_segment = next(filter(lambda segment: segment.element(0) == 'N4', loop), None) list(map(self.process_nm1_segment, nm1_segments)) - list(map(self.process_n3_segment, n3_segments)) - list(map(self.process_n4_segment, n4_segments)) + list(map(self.process_n3_segment, [n3_segment] if n3_segment else [])) + list(map(self.process_n4_segment, [n4_segment] if n4_segment else [])) def process_nm1_segment(self, segment: Segment): self.type = 'Organization' if segment.element(2) == '2' else 'Individual' self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + self.entity_type = self.nm1_identifiers.get(segment.element(1), 'Unknown') self.npi = segment.element(9) if len(segment.element(9)) == 10 else None self.id = segment.element(9) if len(segment.element(9)) != 10 else None @@ -57,105 +59,111 @@ def process_n4_segment(self, segment: Segment): self.state = segment.element(2) self.zip = segment.element(3) - def to_dict(self): return {k: v for k, v in self.__dict__.items() if v is not None} - - @staticmethod - def group_segments_by_provider(loop: List[Segment], nm1_identifiers: dict) -> Dict[str, List[List[Segment]]]: - def reducer(acc, segment): - provider_type, grouped = acc - if segment.element(0) == 'NM1': - provider_type = nm1_identifiers.get(segment.element(1)) - if provider_type: - grouped[provider_type].append([segment]) - elif provider_type: - grouped[provider_type][-1].append(segment) - return provider_type, grouped - - _, grouped = reduce(reducer, loop, (None, defaultdict(list))) - return grouped class BillingIdentity(Identity): def __init__(self, billing_segments: List[Segment]): - self.providers = defaultdict(list) super().__init__(billing_segments) - self.build_billing(billing_segments) - - def build_billing(self, billing_loop: List[Segment]): - grouped_segments = self.group_segments_by_provider(billing_loop, self.nm1_identifiers) - self.providers = defaultdict(list, { - provider_type: [Identity(segments).to_dict() for segments in group] - for provider_type, group in grouped_segments.items() - }) - + list(map(lambda segment: Identity([segment]).to_dict(), billing_segments)) + class SubscriberIdentity(Identity): def __init__(self, subscriber_segments: List[Segment]): - self.subscribers = defaultdict(list) self.relationship_to_insured = None super().__init__(subscriber_segments) self.build_subscriber(subscriber_segments) def build_subscriber(self, subscriber_loop: List[Segment]): - sbr_segment = next(filter(lambda s: s.element(0) == 'SBR', subscriber_loop), None) + sbr_segment = next(filter(lambda segment: segment.element(0) == 'SBR', subscriber_loop), None) if sbr_segment: self.relationship_to_insured = 'Self' if sbr_segment.element(2) == '18' else 'Dependent' - grouped_segments = self.group_segments_by_provider(subscriber_loop, self.nm1_identifiers) - self.subscribers = defaultdict(list, { - subscriber_type: [Identity(segments).to_dict() for segments in group] - for subscriber_type, group in grouped_segments.items() - }) - - class PatientIdentity(Identity): - def __init__(self, patient_segments: List[Segment]): - super().__init__(patient_segments) - self.build_patient(patient_segments) - - def build_patient(self, patient_loop: List[Segment]): - def process_patient_segment(segment: Segment): - self.type = 'Patient' - self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)]) - return list(map(process_patient_segment, filter(lambda s: s.element(0) == 'NM1' and s.element(1) == 'QC', patient_loop))) - - - + def __init__(self, patient_segments: List[Segment]): + super().__init__(patient_segments) + self.build_patient(patient_segments) + + def build_patient(self, patient_loop: List[Segment]): + def process_patient_segment(segment: Segment): + self.type = 'Patient' + self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + return list(map(process_patient_segment, filter(lambda s: s.element(0) == 'NM1' and s.element(1) == 'QC', patient_loop))) + + class ClaimIdentity(Identity): def __init__(self, claim_segments: List[Segment]): self.patient_id = None self.claim_amount = None self.facility_type_code = None self.claim_code_freq = None - self.date = None - self.providers = defaultdict(list) + self.admission_date = None + self.benefits_assign_flag = None + self.claim_id = None + self.admission_type = None # only 837I? + + self.pricipal_diagnosis_code = None + + self.providers = defaultdict(list) # still need? super().__init__(claim_segments) self.build_claim_lines(claim_segments) def build_claim_lines(self, claim_loop: List[Segment]): - def process_claim_segment(segment: Segment): - if segment.element(0) == 'CLM': - self.patient_id = segment.element(1) # submitter's identifier - self.claim_amount = segment.element(2) - codes = segment.element(5).split(':') # codes[1] == A for institutional and B for professional - self.facility_type_code = codes[0] - self.claim_code_freq = codes[2] + # Process claim-specific segments + clm_segments = filter(lambda segment: segment.element(0) == 'CLM', claim_loop) + dtp_segments = filter(lambda segment: segment.element(0) == 'DTP', claim_loop) + cli_segments = filter(lambda segment: segment.element(0) == 'CLI', claim_loop) + ref_segments = filter(lambda segment: segment.element(0) == 'REF' and segment.element(1) == 'D9', claim_loop) + + # get only the first HI segment for the pricipal diagnosis code + principle_diagnosis_segment = filter(lambda segment: segment.element(0) == 'HI' and segment.element(1).split(':')[0] in ['ABK', 'BK'], claim_loop) + # get all other HI segments for other diagnosis codes + other_diagnosis_segments = filter(lambda segment: segment.element(0) == 'HI' and segment.element(1).split(':')[0] in ['ABF', 'BF'], claim_loop) + + + list(map(self.process_clm_segment, clm_segments)) + list(map(self.process_dtp_segment, dtp_segments)) + list(map(self.process_cli_segment, cli_segments)) + list(map(self.process_ref_segment, ref_segments)) + # if principle_diagnosis_segment: + # self.process_principal_diagnosis_segment(principle_diagnosis_segment) + + # Process other diagnosis codes + # self.other_diagnosis_codes = [ + # code for segment in other_diagnosis_segments + # for i, code in enumerate(segment.element(1).split(':')) + # if i % 2 != 0 + # ] + + + # Process NM1 segments for providers + nm1_segments = filter(lambda segment: segment.element(0) == 'NM1', claim_loop) + list(map(lambda segment: self.providers[self.nm1_identifiers.get(segment.element(1))].append(Identity([segment]).to_dict()), nm1_segments)) - if segment.element(0) == 'DTP': - self.date = segment.element(3) # format D8:CCYYMMDD + def process_clm_segment(self, segment: Segment): + self.patient_id = segment.element(1) # submitter's identifier + self.claim_amount = segment.element(2) + self.benefits_assign_flag = 'Yes' if segment.element(8) == 'Y' else 'No' # Benefits flag - # process claim-specific segments - list(map(process_claim_segment, claim_loop)) + place_of_service = segment.element(5).split(':') # codes[1] == A for institutional and B for professional + self.facility_type_code = place_of_service[0] + self.claim_code_freq = place_of_service[2] - # process NM1 segments for providers - nm1_segments = filter(lambda segment: segment.element(0) == 'NM1', claim_loop) + def process_dtp_segment(self, segment: Segment): + self.date = segment.element(3) # format D8:CCYYMMDD + + def process_cli_segment(self, segment: Segment): + self.admission_date = segment.element(1) # Only in 837I + + def process_ref_segment(self, segment: Segment): + self.claim_id = segment.element(2) + + # def process_principal_diagnosis_segment(self, segment: Segment): + # self.principal_diagnosis_code = segment.element(2) # assuming HI segment's first element is the principal diagnosis code - # append instead of extend for single items - list(map(lambda segment: self.providers[self.nm1_identifiers.get(segment.element(1))].append(Identity([segment]).to_dict()), nm1_segments)) From 41485788b23490b62f69ad2c2e4363cea6e93443 Mon Sep 17 00:00:00 2001 From: Raven Date: Tue, 21 May 2024 20:15:31 -0400 Subject: [PATCH 40/46] cleaned grouping --- databricksx12/hls/claim.py | 11 +++++++---- databricksx12/hls/support_classes/identities.py | 6 +++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index f03c8e8..a7307db 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -46,7 +46,7 @@ def _populate_receiver_loop(self) -> Dict[str, str]: return ReceiverIdentity(self.sender_receiver_loop) def _populate_billing_loop(self) -> Dict[str, str]: - return BillingIdentity(self.sender_receiver_loop) + return BillingIdentity(self.billing_loop) def _populate_subscriber_loop(self) -> Dict[str, str]: return SubscriberIdentity(self.subscriber_loop) @@ -65,6 +65,9 @@ def _populate_claim_loop(self) -> Dict[str, str]: def _populate_sl_loop(self) -> Dict[str, str]: return ServiceIdentity(self.sl_loop) + # + # + # def _populate_grouped_entities(self, loop: List[Segment]) -> Dict[str, List[Dict[str, str]]]: # if we want a list of NM1 entities belonging within a loop def group_segments_by_provider(loop, nm1_identifiers: dict = Identity.nm1_identifiers) -> Dict[str, List[List[Segment]]]: @@ -73,11 +76,11 @@ def reducer(acc, segment): if segment.element(0) == 'NM1': provider_type = nm1_identifiers.get(segment.element(1)) if provider_type: - grouped[provider_type].append([segment]) + grouped[provider_type] = grouped.get(provider_type, []) + [[segment]] elif provider_type: - grouped[provider_type][-1].append(segment) + grouped[provider_type][-1] += [segment] return provider_type, grouped - + _, grouped = reduce(reducer, loop, (None, defaultdict(list))) return grouped diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index 6708e10..5fa2dc8 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -36,11 +36,11 @@ def __init__(self, segments: List[Segment]): # build entity and address for any identity def build(self, loop: List[Segment]): - nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.segment_len() >= 10, loop) + nm1_segment = next(filter(lambda segment: segment.element(0) == 'NM1' and segment.segment_len() >= 10, loop), None) n3_segment = next(filter(lambda segment: segment.element(0) == 'N3', loop), None) # taking only the first address lines n4_segment = next(filter(lambda segment: segment.element(0) == 'N4', loop), None) - list(map(self.process_nm1_segment, nm1_segments)) + list(map(self.process_nm1_segment, [nm1_segment] if nm1_segment else [])) list(map(self.process_n3_segment, [n3_segment] if n3_segment else [])) list(map(self.process_n4_segment, [n4_segment] if n4_segment else [])) @@ -93,7 +93,7 @@ def process_patient_segment(segment: Segment): self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)]) return list(map(process_patient_segment, filter(lambda s: s.element(0) == 'NM1' and s.element(1) == 'QC', patient_loop))) - + class ClaimIdentity(Identity): def __init__(self, claim_segments: List[Segment]): self.patient_id = None From 207a2ac649eb6407a6bccd0aedb4dc39cb74d8c4 Mon Sep 17 00:00:00 2001 From: Raven Date: Wed, 22 May 2024 09:15:32 -0400 Subject: [PATCH 41/46] added service units --- databricksx12/hls/claim.py | 2 ++ databricksx12/hls/support_classes/identities.py | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index a7307db..77207b0 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -90,6 +90,8 @@ def reducer(acc, segment): }) + + """ Overall Asks - Coordination of Benefits flag -- > self.benefits_assign_flag in Claim Identity diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index 5fa2dc8..b2301b0 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -234,7 +234,9 @@ def parse_professional_service(self, segment: Segment): return { 'Type': service_type, 'Procedure Code': procedure_code, - 'Procedure Amount': segment.element(2) + 'Procedure Amount': segment.element(2), + 'Measurement Code': segment.element(3), #UN or if anesthesia, MJ + 'Service unit': segment.element(4), } def parse_institutional_service(self, segment: Segment): @@ -243,5 +245,7 @@ def parse_institutional_service(self, segment: Segment): 'Type': service_type, 'Revenue Code': segment.element(1), 'Procedure Code': procedure_code, - 'Procedure Amount': segment.element(3) + 'Procedure Amount': segment.element(3), + 'Measurement Code': segment.element(4), #UN or if anesthesia, MJ + 'Service unit': segment.element(5), } \ No newline at end of file From f1cea0934f28bcce22d153772d8de9a11c189052 Mon Sep 17 00:00:00 2001 From: Raven Date: Wed, 22 May 2024 10:52:03 -0400 Subject: [PATCH 42/46] fixed grouped entities --- databricksx12/hls/claim.py | 8 ++++---- databricksx12/hls/support_classes/identities.py | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 77207b0..10c7038 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -81,13 +81,13 @@ def reducer(acc, segment): grouped[provider_type][-1] += [segment] return provider_type, grouped - _, grouped = reduce(reducer, loop, (None, defaultdict(list))) + _, grouped = reduce(reducer, loop, (None, {})) return grouped - return defaultdict(list, { + return { provider_type: [Identity(segments).to_dict() for segments in group] for provider_type, group in group_segments_by_provider(loop).items() - }) + } @@ -121,7 +121,7 @@ def to_json(self): **{'billing_provider': self.billing_info.to_dict()}, **{'claim_header': self.claim_info.to_dict()}, **{'claim_lines': 'TODO'}, - **{'grouped_subscriber_entities': self.subscriber_entities_info.to_dict()}, # call for all entities in a loop[] + **{'grouped_subscriber_entities': self.subscriber_entities_info}, # call for all entities in a loop[] } # not sure if this should be here or not, but you get the idea diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index b2301b0..afe571b 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -82,6 +82,8 @@ def build_subscriber(self, subscriber_loop: List[Segment]): self.relationship_to_insured = 'Self' if sbr_segment.element(2) == '18' else 'Dependent' + + class PatientIdentity(Identity): def __init__(self, patient_segments: List[Segment]): super().__init__(patient_segments) From 1fd11fd60aa139473c1dc2449f04222b41bb9933 Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Wed, 22 May 2024 14:06:05 -0400 Subject: [PATCH 43/46] helper function --- databricksx12/edi.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/databricksx12/edi.py b/databricksx12/edi.py index e2ae36e..50a293d 100644 --- a/databricksx12/edi.py +++ b/databricksx12/edi.py @@ -59,7 +59,12 @@ def num_transactions(self): # def num_functional_groups(self): return len(self.segments_by_name("GE")) - + + # + # Maps a list of indexes [0,4,7] to a series of ranges -> [(0,4), (4,7)] + # + def _index_to_tuples(self, indexes): + return list((zip(indexes, indexes[1:]))) # # Return all segments associated with each funtional group From 54ecf0586fcc375ee99081e5dd000d98f8e4986f Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Wed, 22 May 2024 15:34:40 -0400 Subject: [PATCH 44/46] service_line loops --- databricksx12/hls/claim.py | 51 +++++++------- databricksx12/hls/healthcare.py | 8 ++- .../hls/support_classes/identities.py | 67 ++++++++++--------- 3 files changed, 68 insertions(+), 58 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 10c7038..a8cef81 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -1,15 +1,6 @@ from databricksx12.edi import EDI, AnsiX12Delim, Segment from databricksx12.hls.loop import Loop -from databricksx12.hls.support_classes.identities import ( - Identity, - BillingIdentity, - SubscriberIdentity, - PatientIdentity, - ClaimIdentity, - SubmitterIdentity, - ReceiverIdentity, - ServiceIdentity, -) +from databricksx12.hls.support_classes.identities import * from typing import List, Dict from collections import defaultdict from functools import reduce @@ -36,8 +27,8 @@ def __init__( self.patient_loop = patient_loop self.claim_loop = claim_loop self.sl_loop = sl_loop - self.build() + def _populate_submitter_loop(self) -> Dict[str, str]: return SubmitterIdentity(self.sender_receiver_loop) @@ -62,9 +53,6 @@ def _populate_patient_loop(self) -> Dict[str, str]: def _populate_claim_loop(self) -> Dict[str, str]: return ClaimIdentity(self.claim_loop) - def _populate_sl_loop(self) -> Dict[str, str]: - return ServiceIdentity(self.sl_loop) - # # # @@ -118,12 +106,19 @@ def to_json(self): **{'reciever': self.receiver_info.to_dict()}, **{'subscriber': self.subscriber_info.to_dict()}, **{'patient': self.patient_info.to_dict()}, - **{'billing_provider': self.billing_info.to_dict()}, + **{'providers': [{"TODO":"TODO"}]}, **{'claim_header': self.claim_info.to_dict()}, - **{'claim_lines': 'TODO'}, + **{'claim_lines': [x.to_dict() for x in self.sl_info]}, #List **{'grouped_subscriber_entities': self.subscriber_entities_info}, # call for all entities in a loop[] } + # + # Returns each claim line as an array of segments that make up the claim line + # + def claim_lines(self): + return list(map(lambda i: self.sl_loop[i[0]:i[1]], + self._index_to_tuples([(i) for i,y in enumerate(self.sl_loop) if y.segment_name()=="LX"]+[len(self.sl_loop)]))) + # not sure if this should be here or not, but you get the idea def build(self) -> None: self.submitter_info = self._populate_submitter_loop() @@ -140,23 +135,33 @@ def build(self) -> None: self.subscriber_entities_info = self._populate_grouped_entities(self.subscriber_loop) - class Claim837i(MedicalClaim): NAME = "837I" -# Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf + # Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf + def _populate_sl_loop(self, missing=""): + return list( + map(lambda s: + ServiceLine( + sv2=[x for x in s if x.segment_name()=="SV2"][0], + lx=[x for x in s if x.segment_name()=="LX"][0], + dtp=[x for x in s if x.segment_name()=="DTP"][0] + ),self.claim_lines())) class Claim837p(MedicalClaim): NAME = "837P" - - -class Claim835(MedicalClaim): - - NAME = "835" + def _populate_sl_loop(self, missing=""): + return list( + map(lambda s: + ServiceLine( + sv1=[x for x in s if x.segment_name()=="SV1"][0], + lx=[x for x in s if x.segment_name()=="LX"][0], + dtp=[x for x in s if x.segment_name()=="DTP"][0] + ), self.claim_lines())) # diff --git a/databricksx12/hls/healthcare.py b/databricksx12/hls/healthcare.py index 8745efd..dc69885 100644 --- a/databricksx12/hls/healthcare.py +++ b/databricksx12/hls/healthcare.py @@ -6,9 +6,11 @@ class HealthcareManager(EDI): def __init__(self, mapping = { - "222": Claim837i, - "223": Claim837p, - "221": None # "835" + "221": None, # Remittance "835" + "222": Claim837p, + "223": Claim837i, + "224": None #Dental + }): self.mapping = mapping diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index afe571b..f57e64b 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -218,36 +218,39 @@ def build_receiver_lines(self, receiver_loop: List[Segment]): -class ServiceIdentity(Identity): - def __init__(self, sl_segments: List[Segment]): - self.services = defaultdict(list) - super().__init__(sl_segments) - self.build_sl_lines(sl_segments) - - def build_sl_lines(self, sl_loop: List[Segment]): - sv1_segments = filter(lambda segment: segment.element(0) == 'SV1', sl_loop) - sv2_segments = filter(lambda segment: segment.element(0) == 'SV2', sl_loop) - self.services['Professional'] = [self.parse_professional_service(segment) for segment in sv1_segments] - self.services['Institutional'] = [self.parse_institutional_service(segment) for segment in sv2_segments] - - - def parse_professional_service(self, segment: Segment): - service_type, procedure_code = segment.element(1).split(':')[0:2] # assuming 7 elements but choosing first two - return { - 'Type': service_type, - 'Procedure Code': procedure_code, - 'Procedure Amount': segment.element(2), - 'Measurement Code': segment.element(3), #UN or if anesthesia, MJ - 'Service unit': segment.element(4), - } +class ServiceLine(Identity): + + def common(self, sv, lx, dtp): + self.claim_line_number = lx.element(1) + self.service_date = dtp.element(3) + self.service_time = dtp.element(1) + self.service_date_format = dtp.element(2) + + # + # Institutional Claims + # + def __init__(self, sv2, lx, dtp): + self.common(sv2, lx, dtp) + self.units = sv2.element(6) + self.units_measurement = sv1.element(5) + self.line_chrg_amt = sv2.element(4) + self.prcdr_cd = sv2.element(2, 1) + self.prcdr_cd_type = sv2.element(2, 0) + self.modifier_cds = ','.join(filter(lambda x: x!="", [sv1.element(2, 2, ""), sv1.element(2, 3, ""), sv1.element(2, 4, ""), sv1.element(2, 5, "")])) + self.revenue_cd = sv2.element(1) + + # + # Professional Claims + # + def __init__(self, sv1, lx, dtp): + self.common(sv1, lx, dtp) + self.units = sv1.element(4) + self.units_measurement = sv1.element(3) + self.line_chrg_amt = sv1.element(2) + self.prcdr_cd = sv1.element(1, 1) + self.prcdr_cd_type = sv1.element(1, 0) + self.modifier_cds = ','.join(filter(lambda x: x!="", [sv1.element(1, 2, ""), sv1.element(1, 3, ""), sv1.element(1, 4, ""), sv1.element(1, 5, "")])) + self.place_of_service = sv1.element(5) + self.dg_cd_pntr = sv1.element(7) - def parse_institutional_service(self, segment: Segment): - service_type, procedure_code = segment.element(2).split(':')[0:2] # assuming 7 elements but choosing first two - return { - 'Type': service_type, - 'Revenue Code': segment.element(1), - 'Procedure Code': procedure_code, - 'Procedure Amount': segment.element(3), - 'Measurement Code': segment.element(4), #UN or if anesthesia, MJ - 'Service unit': segment.element(5), - } \ No newline at end of file + From b4e04572d8e38a0d8d0e674106097b4b4556de2a Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Wed, 22 May 2024 17:24:33 -0400 Subject: [PATCH 45/46] claim lines --- databricksx12/hls/claim.py | 4 +- .../hls/support_classes/identities.py | 66 ++++++++++++------- 2 files changed, 43 insertions(+), 27 deletions(-) diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index a8cef81..6ad5d91 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -144,7 +144,7 @@ class Claim837i(MedicalClaim): def _populate_sl_loop(self, missing=""): return list( map(lambda s: - ServiceLine( + ServiceLine.from_sv2( sv2=[x for x in s if x.segment_name()=="SV2"][0], lx=[x for x in s if x.segment_name()=="LX"][0], dtp=[x for x in s if x.segment_name()=="DTP"][0] @@ -157,7 +157,7 @@ class Claim837p(MedicalClaim): def _populate_sl_loop(self, missing=""): return list( map(lambda s: - ServiceLine( + ServiceLine.from_sv1( sv1=[x for x in s if x.segment_name()=="SV1"][0], lx=[x for x in s if x.segment_name()=="LX"][0], dtp=[x for x in s if x.segment_name()=="DTP"][0] diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py index f57e64b..a2bed66 100644 --- a/databricksx12/hls/support_classes/identities.py +++ b/databricksx12/hls/support_classes/identities.py @@ -220,37 +220,53 @@ def build_receiver_lines(self, receiver_loop: List[Segment]): class ServiceLine(Identity): - def common(self, sv, lx, dtp): - self.claim_line_number = lx.element(1) - self.service_date = dtp.element(3) - self.service_time = dtp.element(1) - self.service_date_format = dtp.element(2) + def __init__(self, d): + for k,v in d.items(): + setattr(self,k,v) + + @staticmethod + def common(sv, lx, dtp): + return { + "claim_line_number": lx.element(1), + "service_date": dtp.element(3), + "service_time": dtp.element(1), + "service_date_format": dtp.element(2) + } # # Institutional Claims - # - def __init__(self, sv2, lx, dtp): - self.common(sv2, lx, dtp) - self.units = sv2.element(6) - self.units_measurement = sv1.element(5) - self.line_chrg_amt = sv2.element(4) - self.prcdr_cd = sv2.element(2, 1) - self.prcdr_cd_type = sv2.element(2, 0) - self.modifier_cds = ','.join(filter(lambda x: x!="", [sv1.element(2, 2, ""), sv1.element(2, 3, ""), sv1.element(2, 4, ""), sv1.element(2, 5, "")])) - self.revenue_cd = sv2.element(1) + # + @classmethod + def from_sv2(cls, sv2, lx, dtp): + return cls({**cls.common(sv2, lx, dtp), + **{ + "units": sv2.element(5), + "units_measurement": sv2.element(4), + "line_chrg_amt": sv2.element(3), + "prcdr_cd": sv2.element(2, 1, ""), + "prcdr_cd_type": sv2.element(2, 0, ""), + "modifier_cds": ','.join(filter(lambda x: x!="", [sv2.element(2, 2, ""), sv2.element(2, 3, ""), sv2.element(2, 4,""), sv2.element(2, 5, "")])), + "revenue_cd": sv2.element(1) + } + }) # # Professional Claims # - def __init__(self, sv1, lx, dtp): - self.common(sv1, lx, dtp) - self.units = sv1.element(4) - self.units_measurement = sv1.element(3) - self.line_chrg_amt = sv1.element(2) - self.prcdr_cd = sv1.element(1, 1) - self.prcdr_cd_type = sv1.element(1, 0) - self.modifier_cds = ','.join(filter(lambda x: x!="", [sv1.element(1, 2, ""), sv1.element(1, 3, ""), sv1.element(1, 4, ""), sv1.element(1, 5, "")])) - self.place_of_service = sv1.element(5) - self.dg_cd_pntr = sv1.element(7) + @classmethod + def from_sv1(cls, sv1, lx, dtp): + return cls({**cls.common(sv1, lx, dtp), + **{ + "units": sv1.element(4), + "units_measurement": sv1.element(3), + "line_chrg_amt": sv1.element(2), + "prcdr_cd": sv1.element(1, 1), + "prcdr_cd_type": sv1.element(1, 0), + "modifier_cds": ','.join(filter(lambda x: x!="", [sv1.element(1, 2, ""), sv1.element(1, 3, ""), sv1.element(1, 4,""), sv1.element(1, 5, "")])), + "place_of_service": sv1.element(5), + "dg_cd_pntr": sv1.element(7) + } + }) + From 59ddfbedaf1b8b37407c9fbf4db0c205b2f38fee Mon Sep 17 00:00:00 2001 From: Aaron Z Date: Wed, 22 May 2024 17:29:51 -0400 Subject: [PATCH 46/46] adding claim lines --- tests/test_claims.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 tests/test_claims.py diff --git a/tests/test_claims.py b/tests/test_claims.py new file mode 100644 index 0000000..2dff588 --- /dev/null +++ b/tests/test_claims.py @@ -0,0 +1,25 @@ +from test_spark_base import * +from databricksx12.hls import * +from databricksx12 import * +import unittest, re + +class TestClaims(PySparkBaseTest): + + def test_professional_service_lines(self): + edi = EDI(open("sampledata/837/CC_837P_EDI.txt", "rb").read().decode("utf-8")) + hm = HealthcareManager() + data = hm.from_edi(edi)[0] + assert(len(data.sl_info) == 2) + assert([y.to_dict().get("claim_line_number") for y in data.sl_info] == ['1', '2']) + assert([y.to_dict().get("place_of_service") for y in data.sl_info] == ['11', '11']) + assert([y.to_dict().get("line_chrg_amt") for y in data.sl_info] == ['300', '300']) + + def test_institutional_service_lines(self): + edi = EDI(open("sampledata/837/CC_837I_EDI.txt", "rb").read().decode("utf-8")) + hm = HealthcareManager() + data = hm.from_edi(edi)[0] + assert([y.to_dict().get("claim_line_number") for y in data.sl_info] == ['1', '2', '3', '4', '5', '6', '7', '8', '9']) + assert([y.to_dict().get("revenue_cd") for y in data.sl_info] ==['0124', '0250', '0260', '0300', '0301', '0305', '0306', '0307', '0351']) + assert( sum([float(y.to_dict().get("line_chrg_amt")) for y in data.sl_info]) == 17166.7) + +