diff --git a/README.md b/README.md index 8a27233..f13985d 100644 --- a/README.md +++ b/README.md @@ -3,51 +3,38 @@ [![CLOUD](https://img.shields.io/badge/CLOUD-ALL-blue?logo=googlecloud&style=for-the-badge)](https://cloud.google.com/databricks) [![POC](https://img.shields.io/badge/POC-10_days-green?style=for-the-badge)](https://databricks.com/try-databricks) -## Business Problem (Under Construction / Not Stable) +# Business Problem -Addressing the issue of working with various parts of an x12 EDI transaction in Spark on Databricks. +Working with various x12 EDI transactions in Spark on Databricks. -## Install +# Install ```python pip install git+https://github.com/databricks-industry-solutions/x12-edi-parser ``` -## Run +# Run -### Reading in EDI Data +## Reading in EDI Data Default format used is AnsiX12 (* as a delim and ~ as segment separator) ```python -from databricksx12.format import * -from databricksx12.edi import * -ediFormat = AnsiX12Delim #specifying formats of data, ansi is also the default if nothing is specified -df = spark.read.text("sampledata/837/*", wholetext = True) - -(df.rdd - .map(lambda x: x.asDict().get("value")) - .map(lambda x: EDI(x, delim_cls = ediFormat)) - .map(lambda x: {"transaction_count": x.num_transactions()}) -).toDF().show() -+-----------------+ -|transaction_count| -+-----------------+ -| 5| -| 1| -| 1| -| 1| -+-----------------+ +from databricksx12 import * +#EDI format type +ediFormat = AnsiX12Delim #specifying formats of data, ansi is also the default if nothing is specified +#can also specify customer formats (below is the same as AnsiX12Delim) +ediFormat = type("", (), dict({'SEGMENT_DELIM': '~', 'ELEMENT_DELIM': '*', 'SUB_DELIM': ':'})) +df = spark.read.text("sampledata/837/*txt", wholetext = True) -#Building a dynamic/custom format -customFormat = type("", (), dict({'SEGMENT_DELIM': '~', 'ELEMENT_DELIM': '*', 'SUB_DELIM': ':'})) (df.rdd .map(lambda x: x.asDict().get("value")) - .map(lambda x: EDI(x, delim_cls = customFormat)) + .map(lambda x: EDI(x, delim_cls = ediFormat)) .map(lambda x: {"transaction_count": x.num_transactions()}) ).toDF().show() +""" +-----------------+ |transaction_count| +-----------------+ @@ -55,52 +42,86 @@ customFormat = type("", (), dict({'SEGMENT_DELIM': '~', 'ELEMENT_DELIM': '*', 'S | 1| | 1| | 1| +| 1| +-----------------+ +""" +``` +## Parsing Healthcare Transactions -``` +Currently supports 837s. Records in each format type should be saved separately, e.g. do not mix 835s & 837s in the df.save() command. -#### EDI as a Table for SQL +### 837i and 837p sample data in Spark ```python -"""" -Look at all data refernce -> https://justransform.com/edi-essentials/edi-structure/ - (1) Including control header / ISA & IEA segments -""" +from databricksx12 import * +from databricksx12.hls import * +import json from pyspark.sql.functions import input_file_name -( df.withColumn("filename", input_file_name()).rdd +hm = HealthcareManager() +df = spark.read.text("sampledata/837/*txt", wholetext = True) + + +rdd = ( + df.withColumn("filename", input_file_name()).rdd .map(lambda x: (x.asDict().get("filename"),x.asDict().get("value"))) .map(lambda x: (x[0], EDI(x[1]))) - .map(lambda x: [{**{"filename": x[0]}, **y} for y in x[1].toRows()]) - .flatMap(lambda x: x) - .toDF()).show() + .map(lambda x: { **{'filename': x[0]}, **hm.to_json(x[1])} ) + .map(lambda x: json.dumps(x)) +) +claims = spark.read.json(rdd) + +#Claim header table TODO + +#Claim line table TODO -""" -+--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+ -| row_data|row_number|segment_element_delim_char|segment_length|segment_name|segment_subelement_delim_char|filename| -+--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+ -|ISA*00* ...| 0| *| 17| ISA| :|file:///| -|GS*HC*CLEARINGHOU...| 1| *| 9| GS| :|file:///| -|ST*837*000000001*...| 2| *| 4| ST| :|file:///| -|BHT*0019*00*73490...| 3| *| 7| BHT| :|file:///| -|NM1*41*2*CLEARING...| 4| *| 10| NM1| :|file:///| -|PER*IC*CLEARINGHO...| 5| *| 7| PER| :|file:///| -|NM1*40*2*12345678...| 6| *| 10| NM1| :|file:///| ``` -#### Parsing Healthcare Transactions +### Sample data outside of Spark + ```python -from databricksx12.hls.healthcare import * +from databricksx12 import * +from databricksx12.hls import * +import json hm = HealthcareManager() -x = EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8")) +edi = EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8")) + +#Returns parsed claim data +hm.from_edi(edi) +#[, , , , ] -hm.from_edi(x) -#[, , , , ] +#Print in json format +print(json.dumps(hm.to_json(edi), indent=4)) -one_claim = hm.from_edi(x)[0] +""" +{ + "EDI.sender_tax_id": "ZZ", + "FuncitonalGroup": [ + { + "FunctionalGroup.receiver": "123456789", + "FunctionalGroup.sender": "CLEARINGHOUSE", + "FunctionalGroup.transaction_datetime": "20180508:0833", + "FunctionalGroup.transaction_type": "222", + "Transactions": [ + { + "Transaction.transaction_type": "222", + "Claims": [ + { + "submitter": { + "contact_name": "CLEARINGHOUSE CLIENT SERVICES", + "contacts": { + "primary": [ + { + "contact_method": "Telephone", + "contact_number": "8005551212", +... +""" + +#print the raw EDI Segments of one claim +one_claim = hm.from_edi(edi)[0] print("\n".join([y.data for y in one_claim.data])) #Print one claim to look at the segments of it """ BHT*0019*00*7349063984*20180508*0833*CH @@ -120,67 +141,41 @@ HL*2*1*22*0 SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321 N3*987 65TH PL -N4*VANCOUVER*WA*986640001 -DMG*D8*19881225*M -NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA -CLM*1805080AV3648339*20***57:B:1*Y*A*Y*Y -REF*D9*7349065509 -HI*ABK:F1120 -NM1*82*1*PROVIDER*JAMES****XX*1112223338 -PRV*PE*PXC*261QR0405X -NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455 -N3*12345 MAIN ST SUITE A1 -N4*VANCOUVER*WA*98662 -LX*1 -SV1*HC:H0003*20*UN*1***1 -DTP*472*D8*20180428 -REF*6R*142671 - +... """ ``` -#### Further EDI Parsing in Pyspark - - -> **Warning** -> Sections below this are under construction +## EDI as a Table for SQL ```python -from databricksx12.edi import * -x = EDIManager(EDI(open("sampledata/837/CHPW_Claimdata.txt", "rb").read().decode("utf-8"))) +"""" +Look at all data refernce -> https://justransform.com/edi-essentials/edi-structure/ + (1) Including control header / ISA & IEA segments +""" +from pyspark.sql.functions import input_file_name -import json -print(json.dumps(x.flatten(x.data), indent=4)) -{ - "EDI.sender_tax_id": "ZZ", - "list": [ - { - "FunctionalGroup.receiver": "123456789", - "FunctionalGroup.sender": "CLEARINGHOUSE", - "FunctionalGroup.transaction_datetime": "20180508:0833", - "FunctionalGroup.transaction_type": "222", - "list": [ - { - "Transaction.transaction_type": "222" - }, - { - "Transaction.transaction_type": "222" - }, - { - "Transaction.transaction_type": "222" - }, - { - "Transaction.transaction_type": "222" - }, - { - "Transaction.transaction_type": "222" - } - ] - } - ] -} +( df.withColumn("filename", input_file_name()).rdd + .map(lambda x: (x.asDict().get("filename"),x.asDict().get("value"))) + .map(lambda x: (x[0], EDI(x[1]))) + .map(lambda x: [{**{"filename": x[0]}, **y} for y in x[1].toRows()]) + .flatMap(lambda x: x) + .toDF()).show() + +""" ++--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+ +| row_data|row_number|segment_element_delim_char|segment_length|segment_name|segment_subelement_delim_char|filename| ++--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+ +|ISA*00* ...| 0| *| 17| ISA| :|file:///| +|GS*HC*CLEARINGHOU...| 1| *| 9| GS| :|file:///| +|ST*837*000000001*...| 2| *| 4| ST| :|file:///| +|BHT*0019*00*73490...| 3| *| 7| BHT| :|file:///| +|NM1*41*2*CLEARING...| 4| *| 10| NM1| :|file:///| +|PER*IC*CLEARINGHO...| 5| *| 7| PER| :|file:///| +|NM1*40*2*12345678...| 6| *| 10| NM1| :|file:///| ``` +#### Other EDI Parsing in Pyspark + ```python """ @@ -249,8 +244,6 @@ ediDF.show() """ - - #show first line of each transaction trxDF.filter(x.row_number == 0).show() """ diff --git a/databricksx12/__init__.py b/databricksx12/__init__.py index 8b13789..bbfa1e5 100644 --- a/databricksx12/__init__.py +++ b/databricksx12/__init__.py @@ -1 +1,6 @@ +from .edi import * +from .format import * +from .functional import * +from .transaction import * + diff --git a/databricksx12/edi.py b/databricksx12/edi.py index 550fe83..50a293d 100644 --- a/databricksx12/edi.py +++ b/databricksx12/edi.py @@ -59,7 +59,12 @@ def num_transactions(self): # def num_functional_groups(self): return len(self.segments_by_name("GE")) - + + # + # Maps a list of indexes [0,4,7] to a series of ranges -> [(0,4), (4,7)] + # + def _index_to_tuples(self, indexes): + return list((zip(indexes, indexes[1:]))) # # Return all segments associated with each funtional group @@ -124,6 +129,8 @@ def toRows(self): def header(self): return self.data[0] + + class Segment(): # @@ -173,8 +180,6 @@ def filter(self, value, element, sub_element, dne="na/dne"): return self if value == self.get_element(element, sub_element, dne) else None - - # # Manage relationship heirarchy within EDI # @@ -223,7 +228,8 @@ def flatten(data = None): } else: return EDIManager.class_metadata(data) - + + """ from databricksx12.edi import * diff --git a/databricksx12/hls/__init__.py b/databricksx12/hls/__init__.py new file mode 100644 index 0000000..4785b04 --- /dev/null +++ b/databricksx12/hls/__init__.py @@ -0,0 +1,3 @@ +from .healthcare import * +from .claim import * +from .loop import * diff --git a/databricksx12/hls/claim.py b/databricksx12/hls/claim.py index 94c21c3..6ad5d91 100644 --- a/databricksx12/hls/claim.py +++ b/databricksx12/hls/claim.py @@ -1,59 +1,300 @@ -from databricksx12.edi import * +from databricksx12.edi import EDI, AnsiX12Delim, Segment +from databricksx12.hls.loop import Loop +from databricksx12.hls.support_classes.identities import * +from typing import List, Dict +from collections import defaultdict +from functools import reduce + # # Base claim class # -class Claim(EDI): - def __init__(self, segments, delim_cls = AnsiX12Delim): - self.data = segments - self.format_cls = delim_cls - #For Raven TODO marked - self.claim_identifier = None #TODO include both CH and RP values here - self.claim_lines = None #TODO Maintain a list of claim lines using ClaimLine class - self.subscriber = None #TODO selecting the subscriber - self.patient = None #TODO selecting the patient info, maybe patient should be its own class? +class MedicalClaim(EDI): + + def __init__( + self, + sender_receiver_loop: List = [], + billing_loop: List = [], + subscriber_loop: List = [], + patient_loop: List = [], + claim_loop: List = [], + sl_loop: List = [], + ): + self.sender_receiver_loop = sender_receiver_loop # extracted together + self.billing_loop = billing_loop + self.subscriber_loop = subscriber_loop + self.patient_loop = patient_loop + self.claim_loop = claim_loop + self.sl_loop = sl_loop + self.build() + + def _populate_submitter_loop(self) -> Dict[str, str]: + return SubmitterIdentity(self.sender_receiver_loop) + + def _populate_receiver_loop(self) -> Dict[str, str]: + return ReceiverIdentity(self.sender_receiver_loop) + + def _populate_billing_loop(self) -> Dict[str, str]: + return BillingIdentity(self.billing_loop) + def _populate_subscriber_loop(self) -> Dict[str, str]: + return SubscriberIdentity(self.subscriber_loop) + # - # TODO total amount billed at the header of the claim # - def header_total_billed_amount(self): - pass + def _populate_patient_loop(self) -> Dict[str, str]: + # Note - if this doesn't exist then its the same as subscriber loop + # Note to include in loop: information about subscriber/dependent relationship is marked by Element 2 + # 01 = Spouse; 18 = Self; 19 = Child; G8 = Other + return PatientIdentity(self.patient_loop) + + def _populate_claim_loop(self) -> Dict[str, str]: + return ClaimIdentity(self.claim_loop) # - # TODO total amount billed across lines # - def lines_total_billed_amount(self): - pass + # + def _populate_grouped_entities(self, loop: List[Segment]) -> Dict[str, List[Dict[str, str]]]: + # if we want a list of NM1 entities belonging within a loop + def group_segments_by_provider(loop, nm1_identifiers: dict = Identity.nm1_identifiers) -> Dict[str, List[List[Segment]]]: + def reducer(acc, segment): + provider_type, grouped = acc + if segment.element(0) == 'NM1': + provider_type = nm1_identifiers.get(segment.element(1)) + if provider_type: + grouped[provider_type] = grouped.get(provider_type, []) + [[segment]] + elif provider_type: + grouped[provider_type][-1] += [segment] + return provider_type, grouped + + _, grouped = reduce(reducer, loop, (None, {})) + return grouped + + return { + provider_type: [Identity(segments).to_dict() for segments in group] + for provider_type, group in group_segments_by_provider(loop).items() + } + + + """ + Overall Asks + - Coordination of Benefits flag -- > self.benefits_assign_flag in Claim Identity + - Patient / Subscriber same person flag --> self.relationship_to_insured in Suscriber in Claim Identity + + Claim needs + - principal ICD10 diagnosis code + - other ICD10 diagnosis codes as an array + - hcfa place of service -- segment.element(5).split(':')? + - claim id? - done + - admission type code - only in 837i? + - facility type code - done + - claim frequency code - done + + Claim line needs + - This should return an array -class ClaimLine(Segment): + Servicing provider needs + - TBD + """ + def to_json(self): + return { + **{'submitter': self.submitter_info.to_dict()}, + **{'reciever': self.receiver_info.to_dict()}, + **{'subscriber': self.subscriber_info.to_dict()}, + **{'patient': self.patient_info.to_dict()}, + **{'providers': [{"TODO":"TODO"}]}, + **{'claim_header': self.claim_info.to_dict()}, + **{'claim_lines': [x.to_dict() for x in self.sl_info]}, #List + **{'grouped_subscriber_entities': self.subscriber_entities_info}, # call for all entities in a loop[] + } # - # TODO build out claim line uses (case class) + # Returns each claim line as an array of segments that make up the claim line # - def __init__(self): - pass - """ - select fields: - procedure code - procedure code type (HCPCS, CPT4, ICD10) - revenuce code - procedure modifier codes - billed amount - - """ + def claim_lines(self): + return list(map(lambda i: self.sl_loop[i[0]:i[1]], + self._index_to_tuples([(i) for i,y in enumerate(self.sl_loop) if y.segment_name()=="LX"]+[len(self.sl_loop)]))) + + # not sure if this should be here or not, but you get the idea + def build(self) -> None: + self.submitter_info = self._populate_submitter_loop() + self.receiver_info = self._populate_receiver_loop() + self.billing_info = self._populate_billing_loop() + self.subscriber_info = self._populate_subscriber_loop() + self.patient_info = ( + self._populate_subscriber_loop() if self.patient_loop == [] else self._populate_patient_loop() + ) + self.claim_info = self._populate_claim_loop() + self.sl_info = self._populate_sl_loop() + self.claim_entities_info = self._populate_grouped_entities(self.claim_loop) + self.subscriber_entities_info = self._populate_grouped_entities(self.subscriber_loop) -class Claim837i(Claim): + +class Claim837i(MedicalClaim): NAME = "837I" -#Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf -class Claim837p(Claim): + # Format of 837P https://www.dhs.wisconsin.gov/publications/p0/p00265.pdf - NAME = "837P" + def _populate_sl_loop(self, missing=""): + return list( + map(lambda s: + ServiceLine.from_sv2( + sv2=[x for x in s if x.segment_name()=="SV2"][0], + lx=[x for x in s if x.segment_name()=="LX"][0], + dtp=[x for x in s if x.segment_name()=="DTP"][0] + ),self.claim_lines())) + +class Claim837p(MedicalClaim): + NAME = "837P" + def _populate_sl_loop(self, missing=""): + return list( + map(lambda s: + ServiceLine.from_sv1( + sv1=[x for x in s if x.segment_name()=="SV1"][0], + lx=[x for x in s if x.segment_name()=="LX"][0], + dtp=[x for x in s if x.segment_name()=="DTP"][0] + ), self.claim_lines())) + + +# +# Base claim builder (transaction -> 1 or more claims) +# + + +class ClaimBuilder(EDI): + # + # Given claim type (837i, 837p, etc), segments, and delim class, build claim level classes + # + def __init__(self, trnx_type_cls, trnx_data, delim_cls=AnsiX12Delim): + self.data = trnx_data + self.format_cls = delim_cls + self.trnx_cls = trnx_type_cls + self.loop = Loop(trnx_data) + + # + # Builds a claim object from + # + # @param clm_segment - the claim segment of claim to build + # @param idx - the index of the claim segment in the data + # + # @return the clas containing the relevent claim information + # + def build_claim(self, clm_segment, idx): + return self.trnx_cls( + sender_receiver_loop=self.get_submitter_receiver_loop(idx), + billing_loop=self.loop.get_loop_segments(idx, "2000A"), + subscriber_loop=self.loop.get_loop_segments(idx, "2000B"), + patient_loop=self.loop.get_loop_segments(idx, "2000C"), + claim_loop=self.get_claim_loop(idx), + sl_loop=self.get_service_line_loop(idx), # service line loop + ) + + # + # Determine claim loop: starts at the clm index and ends at LX segment, or CLM segment, or end of data + # + def get_claim_loop(self, clm_idx): + sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX")))) + clm_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("CLM")))) + + if sl_start_indexes: + clm_end_idx = min(sl_start_indexes) + elif clm_indexes: + clm_end_idx = min(clm_indexes + [len(self.data)]) + else: + clm_end_idx = len(self.data) + + return self.data[clm_idx:clm_end_idx] + + # + # fetch the indices of LX and CLM segments that are beyond the current clm index + # + def get_service_line_loop(self, clm_idx): + sl_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("LX")))) + tx_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] > clm_idx, self.segments_by_name_index("SE")))) + if sl_start_indexes: + sl_end_idx = min(tx_end_indexes + [len(self.data)]) + return self.data[min(sl_start_indexes):sl_end_idx] + return [] + + def get_submitter_receiver_loop(self, clm_idx): + bht_start_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx, self.segments_by_name_index("BHT")))) + bht_end_indexes = list(map(lambda x: x[0], filter(lambda x: x[0] < clm_idx and x[1].element(3) == '20', self.segments_by_name_index("HL")))) + if bht_start_indexes: + sub_rec_start_idx = max(bht_start_indexes) + sub_rec_end_idx = max(bht_end_indexes) + + return self.data[sub_rec_start_idx:sub_rec_end_idx] + return [] + + + # + # Given transaction type, transaction segments, and delim info, build out claims in the transaction + # @return a list of Claim for each "clm" segment + # + def build(self): + return [ + self.build_claim(seg, i) for i, seg in self.segments_by_name_index("CLM") + ] + + +""" +sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8") +claim_class = ClaimBuilder(trnx_type='837I', trnx_data=sample_data_837i_edited, delim_cls=AnsiX12Delim) +claim_class.build() + +[{'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'}, + '1000B': {'desc': 'Receiver Name', 'segments': '123456789'}, + '2000A': {'desc': 'Billing Provider', + 'segments': ['BH CLINIC OF VANCOUVER']}, + '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']}, + '2010BA': {'desc': 'Patient', 'segments': (['JOHN'], ['SUBSCRIBER'])}, + '2010BB': {'desc': 'Payer', + 'segments': ['COMMUNITY HEALTH PLAN OF WASHINGTON']}, + '2300': {'desc': 'Claim', 'segments': (['1805080AV3648339'], ['20'])}}, + {'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'}, + '1000B': {'desc': 'Receiver Name', 'segments': '123456789'}, + '2000A': {'desc': 'Billing Provider', + 'segments': ['BH CLINIC OF VANCOUVER']}, + '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']}, + '2010BA': {'desc': 'Patient', 'segments': (['SUSAN'], ['PATIENT'])}, + '2010BB': {'desc': 'Payer', + 'segments': ['COMMUNITY HEALTH PLAN OF WASHINGTON']}, + '2300': {'desc': 'Claim', 'segments': (['1805080AV3648347'], ['50.1'])}}, + {'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'}, + '1000B': {'desc': 'Receiver Name', 'segments': '123456789'}, + '2000A': {'desc': 'Billing Provider', + 'segments': ['BH CLINIC OF VANCOUVER']}, + '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']}, + '2010BA': {'desc': 'Patient', 'segments': (['JOHN'], ['SUBSCRIBER'])}, + '2010BB': {'desc': 'Payer', + 'segments': ['COMMUNITY HEALTH PLAN OF WASHINGTON']}, + '2300': {'desc': 'Claim', 'segments': (['1805080AV3648340'], ['11.64'])}}, + {'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'}, + '1000B': {'desc': 'Receiver Name', 'segments': '123456789'}, + '2000A': {'desc': 'Billing Provider', + 'segments': ['BH CLINIC OF VANCOUVER']}, + '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']}, + '2010BA': {'desc': 'Patient', 'segments': (['SUSAN'], ['PATIENT'])}, + '2010BB': {'desc': 'Payer', + 'segments': ['COMMUNITY HEALTH PLAN OF WASHINGTON']}, + '2300': {'desc': 'Claim', 'segments': (['1805080AV3648353'], ['234'])}}, + {'1000A': {'desc': 'Submitter Name', 'segments': 'CLEARINGHOUSE'}, + '1000B': {'desc': 'Receiver Name', 'segments': '123456789'}, + '2000A': {'desc': 'Billing Provider', + 'segments': ['BH CLINIC OF VANCOUVER']}, + '2000B': {'desc': 'Subscriber', 'segments': ['COMMUNITY HLTH PLAN OF WASH']}, + '2010BA': {'desc': 'Patient', + 'segments': (['JOHN', 'JOHN'], ['SUBSCRIBER', 'SUBSCRIBER'])}, + '2010BB': {'desc': 'Payer', + 'segments': ['COMMUNITY HEALTH PLAN OF MASS', + 'COMMUNITY HEALTH PLAN OF WASHINGTON']}, + '2300': {'desc': 'Claim', 'segments': (['1805080AV3648355'], ['20'])}}] +""" diff --git a/databricksx12/hls/healthcare.py b/databricksx12/hls/healthcare.py index 91858de..dc69885 100644 --- a/databricksx12/hls/healthcare.py +++ b/databricksx12/hls/healthcare.py @@ -6,9 +6,11 @@ class HealthcareManager(EDI): def __init__(self, mapping = { - "222": "837P", - "223": "837I", - "221": "835" + "221": None, # Remittance "835" + "222": Claim837p, + "223": Claim837i, + "224": None #Dental + }): self.mapping = mapping @@ -17,8 +19,10 @@ def __init__(self, mapping = { # Given an EDI message, return a list of healthcare claims # def from_edi(self, edi): - return list(itertools.chain.from_iterable([self.from_functional_group(y) for y in edi.functional_segments()])) + return self.flatmap(self.flatmap([self.from_functional_group(y) for y in edi.functional_segments()])) + def flatmap(self,x): + return list(itertools.chain.from_iterable(x)) def from_functional_group(self, fg): return [self.from_transaction(x) for x in fg.transaction_segments()] @@ -28,12 +32,24 @@ def from_functional_group(self, fg): # @mapping = mapping the GS08 segment to the type of healthcare transaction # def from_transaction(self, trnx): - type = self.mapping.get(trnx.transaction_type) - data = [x for x in trnx.data if x.segment_name() not in ['ST', 'SE']] - if type == "837P": - return Claim837p(data, trnx.format_cls) - elif type == "837I": - return Claim837i(data, trnx.format_cls) - else: - return None #no mapping available + return ClaimBuilder(self.mapping.get(trnx.transaction_type), + [x for x in trnx.data if x.segment_name() not in ['ST', 'SE']], trnx.format_cls).build() + + # + # Convert all data to json data + # + def to_json(self, edi): + return { + **EDIManager.class_metadata(edi), + 'FuncitonalGroup': [ + { + **EDIManager.class_metadata(fg), + 'Transactions': [ + { + **EDIManager.class_metadata(trnx), + 'Claims': [clm.to_json() for clm in self.from_transaction(trnx)] + } for trnx in fg.transaction_segments()] + } for fg in edi.functional_segments()] + } + diff --git a/databricksx12/hl7.py b/databricksx12/hls/hl7.py similarity index 100% rename from databricksx12/hl7.py rename to databricksx12/hls/hl7.py diff --git a/databricksx12/hls/loop.py b/databricksx12/hls/loop.py new file mode 100644 index 0000000..e7b4e33 --- /dev/null +++ b/databricksx12/hls/loop.py @@ -0,0 +1,174 @@ +from databricksx12.edi import * +from functools import reduce + +class LoopMapping: + + # + # class to hold k,v of hl_code, loop + # + def __init__(self, mappings=None): + self.mappings = mappings if mappings is not None else { + '20': { + 'loop name': 'Information Source', + 'loop': '2000A' + }, + '22': { + 'loop name': 'Subscriber', + 'loop': '2000B' + }, + '23': { + 'loop name': 'Patient', + 'loop': '2000C' + }, + } + + # + # Get hl_code associated with the loop + # + def get_hl_code(self, loop): + return None if (temp := [hl_code for hl_code, v in self.mappings.items() if v['loop'] == loop]) == [] else temp[0] + + def get_mapping(self, element, description=None): + """ Returns a specific mapping based on element key and description. """ + mappings = self.mappings.get(element, {}) + if description: + return mappings.get(description, None) + return None + + +class Loop(EDI): + + + def __init__(self, data, delim_cls=AnsiX12Delim, loop_mapping=LoopMapping()): + self.data = data + self.format_cls = delim_cls + self.mapping = loop_mapping + self._start_indexes = self._build_hierarchy_start_indexes() + self.loop_hierarchy = self.build_hierarchy() + + """ + loop_hierarchy = { unique_id : { + start_idx : "" + end_idx : "" + parent_id : "" + hl_code : "" + child_code: "" + } + } + """ + + # + # Get the specified loop based upon a position, else return None if does not exist + # @param pos - the position of the data point + # @param loop - the loop from the mapping that is being searched for + # + # @return None if not found, otherwise value from loop_hierarchy + # + def get_loop(self, pos, loop): + return None if (temp := self.mapping.get_hl_code(loop)) is None else self.find_hl_codes(pos, temp) + + # + # same as above, but only returns segment list + # + def get_loop_segments(self, pos, loop): + return [] if (temp := self.get_loop(pos, loop)) is None else self.data[temp['start_idx']:temp['end_idx']] + + # + # Build a complete hierarchical view of all HL segments start and end positions + # + def build_hierarchy(self): + """ + Return all start indexes + """ + return { + x[0]: { + "start_idx": x[1], + "end_idx": self._determine_end_index(x[1]), + "parent_id": x[2], + "hl_code": x[3], + "child_code": x[4], + "subordinate_ind": self.subordinate_child_indicator(x[1]) #true if previous HL04=1 + } + for x in self._start_indexes + } + + # + # Return a tuple of all HL segments, start index, id, parent id, child code, and hl_code + # + def _build_hierarchy_start_indexes(self): + return [ ( x.element(1), #id + i, # "start_idx" + x.element(2), # "parent_id" + x.element(3), # "hl_code" + x.element(4)) # "child_code" + for i,x in self.segments_by_name_index("HL")] + + # + # Determine the end index of an HL segment + # @param start_idx - the start index of the existing HL segment + # x[1] = start index from tuple in _build_hierarchy_start_indexes + # + def _determine_end_index(self, start_idx): + return min([x[1] for x in self._start_indexes if x[1] > start_idx] + [len(self.data)]) + + # + # Primary search function within HL + # @param pos_idx - the reference point + # @param hl_code - the hl code being searched for + # + # @returns None if not found, otherwise the value from loop_hierarchy + def find_hl_codes(self, pos_idx, hl_code): + init_hl = self._filter_hl_on_position(pos_idx) + return (None if init_hl is None else self.traverse_loops(hl_code, init_hl)) + + + # + # Go from child to parent searching for the specified hl_code + # + def traverse_loops(self, hl_code, loop): + if loop['hl_code'] == hl_code: + return loop + elif loop['parent_id'] == "": + return None + else: + return self.traverse_loops(hl_code, self.determine_parent(loop)) + + # + # parent is either the parent_id or the previous HL segment if there was a child indicator section + # + def determine_parent(self, loop): + return loop['parent_id'] if loop['subordinate_ind'] == 0 else self.loop_hierarchy.get(self.determine_previous_hl(loop['start_idx'])[0]) + + # + # returns the HL segment + # + def _filter_hl_on_position(self, pos_idx): + return (list(temp)[0] if (temp := filter(lambda v: v['start_idx'] <= pos_idx <= v['end_idx'], self.loop_hierarchy.values())) else None) + + + # + # determine if the HL segment at pos is a subordinate child of a parent + # i.e. (parent has child code =1) and parent is previous HL segment + # + # + def subordinate_child_indicator(self, pos): + return 0 if self.determine_previous_hl(pos) is None else self.determine_previous_hl(pos)[4] + + # + # Determine the previous HL segment based upon a position + # + def determine_previous_hl(self, pos): + try: + return reduce(lambda a,b: a if a[1] > b[1] else b, + filter(lambda x: x[1] < pos, self._start_indexes)) + except: + return None #when there is no preceding hl segment + + +""" +sample_data_837i_edited = open("/sampledata/837/CHPW_Claimdata_edited.txt", "rb").read().decode("utf-8") +claims = Loop(sample_data_837i_edited) +claims.find_reference_element(claims.claim_segments()[0], '22', 'Claim ID') +Outputs: +['1805080AV3648339'] +""" diff --git a/databricksx12/hls/support_classes/__init__.py b/databricksx12/hls/support_classes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/databricksx12/hls/support_classes/identities.py b/databricksx12/hls/support_classes/identities.py new file mode 100644 index 0000000..a2bed66 --- /dev/null +++ b/databricksx12/hls/support_classes/identities.py @@ -0,0 +1,272 @@ +from databricksx12.edi import Segment +from typing import List, Dict + +from collections import defaultdict +from functools import reduce + +class Identity: + + # entity name identities associated with every NM1 line. a combination may occur within loops + nm1_identifiers = { + '85': 'Billing Provider', # entity that is billing for the services provided and 87 disregarded + 'IL': 'Insured', # insured individual + 'QC': 'Patient', # patient for 837P and PAT segments in 837i + '82': 'Rendering Provider',# individual or group that performed the service + 'DN': 'Referring Provider',# doctor who referred the patient to another doctor + '77': 'Service Facility', # location where the service was performed + 'DQ': 'Supervising Provider', # provider who oversees the patient's care + '71': 'Attending Provider',# provider with primary responsibility for the patient at the time of service + 'DK': 'Ordering Provider', # provider who ordered the service or item + 'PR': 'Payer', # insurance company or payer + 'PE': 'Payee', # entity receiving the payment + + } + + def __init__(self, segments: List[Segment]): + self.name: str = None + self.street: str = None + self.type: str = None + self.provider_type: str = None + self.city: str = None + self.state: str = None + self.zip: str = None + self.id: str = None + self.npi: str = None + self.build(segments) + + # build entity and address for any identity + def build(self, loop: List[Segment]): + nm1_segment = next(filter(lambda segment: segment.element(0) == 'NM1' and segment.segment_len() >= 10, loop), None) + n3_segment = next(filter(lambda segment: segment.element(0) == 'N3', loop), None) # taking only the first address lines + n4_segment = next(filter(lambda segment: segment.element(0) == 'N4', loop), None) + + list(map(self.process_nm1_segment, [nm1_segment] if nm1_segment else [])) + list(map(self.process_n3_segment, [n3_segment] if n3_segment else [])) + list(map(self.process_n4_segment, [n4_segment] if n4_segment else [])) + + def process_nm1_segment(self, segment: Segment): + self.type = 'Organization' if segment.element(2) == '2' else 'Individual' + self.name = segment.element(3) if self.type == 'Organization' else ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + self.entity_type = self.nm1_identifiers.get(segment.element(1), 'Unknown') + self.npi = segment.element(9) if len(segment.element(9)) == 10 else None + self.id = segment.element(9) if len(segment.element(9)) != 10 else None + + def process_n3_segment(self, segment: Segment): + self.street = segment.element(1) + + def process_n4_segment(self, segment: Segment): + self.city = segment.element(1) + self.state = segment.element(2) + self.zip = segment.element(3) + + def to_dict(self): + return {k: v for k, v in self.__dict__.items() if v is not None} + + + +class BillingIdentity(Identity): + def __init__(self, billing_segments: List[Segment]): + super().__init__(billing_segments) + list(map(lambda segment: Identity([segment]).to_dict(), billing_segments)) + + +class SubscriberIdentity(Identity): + def __init__(self, subscriber_segments: List[Segment]): + self.relationship_to_insured = None + super().__init__(subscriber_segments) + self.build_subscriber(subscriber_segments) + + def build_subscriber(self, subscriber_loop: List[Segment]): + sbr_segment = next(filter(lambda segment: segment.element(0) == 'SBR', subscriber_loop), None) + if sbr_segment: + self.relationship_to_insured = 'Self' if sbr_segment.element(2) == '18' else 'Dependent' + + + + +class PatientIdentity(Identity): + def __init__(self, patient_segments: List[Segment]): + super().__init__(patient_segments) + self.build_patient(patient_segments) + + def build_patient(self, patient_loop: List[Segment]): + def process_patient_segment(segment: Segment): + self.type = 'Patient' + self.name = ' '.join([segment.element(3), segment.element(4), segment.element(5)]) + return list(map(process_patient_segment, filter(lambda s: s.element(0) == 'NM1' and s.element(1) == 'QC', patient_loop))) + + +class ClaimIdentity(Identity): + def __init__(self, claim_segments: List[Segment]): + self.patient_id = None + self.claim_amount = None + self.facility_type_code = None + self.claim_code_freq = None + self.admission_date = None + self.benefits_assign_flag = None + self.claim_id = None + self.admission_type = None # only 837I? + + self.pricipal_diagnosis_code = None + + self.providers = defaultdict(list) # still need? + super().__init__(claim_segments) + self.build_claim_lines(claim_segments) + + def build_claim_lines(self, claim_loop: List[Segment]): + # Process claim-specific segments + clm_segments = filter(lambda segment: segment.element(0) == 'CLM', claim_loop) + dtp_segments = filter(lambda segment: segment.element(0) == 'DTP', claim_loop) + cli_segments = filter(lambda segment: segment.element(0) == 'CLI', claim_loop) + ref_segments = filter(lambda segment: segment.element(0) == 'REF' and segment.element(1) == 'D9', claim_loop) + + # get only the first HI segment for the pricipal diagnosis code + principle_diagnosis_segment = filter(lambda segment: segment.element(0) == 'HI' and segment.element(1).split(':')[0] in ['ABK', 'BK'], claim_loop) + # get all other HI segments for other diagnosis codes + other_diagnosis_segments = filter(lambda segment: segment.element(0) == 'HI' and segment.element(1).split(':')[0] in ['ABF', 'BF'], claim_loop) + + + list(map(self.process_clm_segment, clm_segments)) + list(map(self.process_dtp_segment, dtp_segments)) + list(map(self.process_cli_segment, cli_segments)) + list(map(self.process_ref_segment, ref_segments)) + # if principle_diagnosis_segment: + # self.process_principal_diagnosis_segment(principle_diagnosis_segment) + + # Process other diagnosis codes + # self.other_diagnosis_codes = [ + # code for segment in other_diagnosis_segments + # for i, code in enumerate(segment.element(1).split(':')) + # if i % 2 != 0 + # ] + + + # Process NM1 segments for providers + nm1_segments = filter(lambda segment: segment.element(0) == 'NM1', claim_loop) + list(map(lambda segment: self.providers[self.nm1_identifiers.get(segment.element(1))].append(Identity([segment]).to_dict()), nm1_segments)) + + def process_clm_segment(self, segment: Segment): + self.patient_id = segment.element(1) # submitter's identifier + self.claim_amount = segment.element(2) + self.benefits_assign_flag = 'Yes' if segment.element(8) == 'Y' else 'No' # Benefits flag + + place_of_service = segment.element(5).split(':') # codes[1] == A for institutional and B for professional + self.facility_type_code = place_of_service[0] + self.claim_code_freq = place_of_service[2] + + def process_dtp_segment(self, segment: Segment): + self.date = segment.element(3) # format D8:CCYYMMDD + + def process_cli_segment(self, segment: Segment): + self.admission_date = segment.element(1) # Only in 837I + + def process_ref_segment(self, segment: Segment): + self.claim_id = segment.element(2) + + # def process_principal_diagnosis_segment(self, segment: Segment): + # self.principal_diagnosis_code = segment.element(2) # assuming HI segment's first element is the principal diagnosis code + + + + +class SubmitterIdentity(Identity): + def __init__(self, submitter_segments: List[Segment]): + self.contact_name = None + self.contacts = defaultdict(list) + super().__init__(submitter_segments) + self.build_submitter_lines(submitter_segments) + + def build_submitter_lines(self, submitter_loop: List[Segment]): + nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.element(1) == '41', submitter_loop) + per_segments = filter(lambda segment: segment.element(0) == 'PER', submitter_loop) + + list(map(self.process_nm1_segment, nm1_segments)) + list(map(self.process_per_segment, per_segments)) + + def process_per_segment(self, segment): + self.contact_name = segment.element(2) + contact_methods = { + 'EM': 'Email', + 'TE': 'Telephone', + 'FX': 'Fax' + } + contact = { + 'contact_method': contact_methods.get(segment.element(3), 'Unknown method'), + 'contact_number': segment.element(4) + } + # Add additional contact details if present + if segment.element(5) in contact_methods: + contact['contact_method_2'] = contact_methods.get(segment.element(5), 'Unknown method') + contact['contact_number_2'] = segment.element(6) + + if segment.element(7) in contact_methods: + contact['contact_method_3'] = contact_methods.get(segment.element(7), 'Unknown method') + contact['contact_number_3'] = segment.element(8) + + self.contacts['primary'].append(contact) + + + +class ReceiverIdentity(Identity): + def __init__(self, receiver_segments: List[Segment]): + super().__init__(receiver_segments) + self.build_receiver_lines(receiver_segments) + + def build_receiver_lines(self, receiver_loop: List[Segment]): + nm1_segments = filter(lambda segment: segment.element(0) == 'NM1' and segment.element(1) == '40', receiver_loop) + return list(map(self.process_nm1_segment, nm1_segments)) + + + +class ServiceLine(Identity): + + def __init__(self, d): + for k,v in d.items(): + setattr(self,k,v) + + @staticmethod + def common(sv, lx, dtp): + return { + "claim_line_number": lx.element(1), + "service_date": dtp.element(3), + "service_time": dtp.element(1), + "service_date_format": dtp.element(2) + } + + # + # Institutional Claims + # + @classmethod + def from_sv2(cls, sv2, lx, dtp): + return cls({**cls.common(sv2, lx, dtp), + **{ + "units": sv2.element(5), + "units_measurement": sv2.element(4), + "line_chrg_amt": sv2.element(3), + "prcdr_cd": sv2.element(2, 1, ""), + "prcdr_cd_type": sv2.element(2, 0, ""), + "modifier_cds": ','.join(filter(lambda x: x!="", [sv2.element(2, 2, ""), sv2.element(2, 3, ""), sv2.element(2, 4,""), sv2.element(2, 5, "")])), + "revenue_cd": sv2.element(1) + } + }) + + # + # Professional Claims + # + @classmethod + def from_sv1(cls, sv1, lx, dtp): + return cls({**cls.common(sv1, lx, dtp), + **{ + "units": sv1.element(4), + "units_measurement": sv1.element(3), + "line_chrg_amt": sv1.element(2), + "prcdr_cd": sv1.element(1, 1), + "prcdr_cd_type": sv1.element(1, 0), + "modifier_cds": ','.join(filter(lambda x: x!="", [sv1.element(1, 2, ""), sv1.element(1, 3, ""), sv1.element(1, 4,""), sv1.element(1, 5, "")])), + "place_of_service": sv1.element(5), + "dg_cd_pntr": sv1.element(7) + } + }) + + + diff --git a/sampledata/837/837p.txt b/sampledata/837/837p.txt new file mode 100644 index 0000000..a69643f --- /dev/null +++ b/sampledata/837/837p.txt @@ -0,0 +1,45 @@ +ISA*00* *00* *ZZ*1234567 *ZZ*11111 *170508*1141*^*00501*000000101*1*P*:~ +GS*HC*XXXXXXX*XXXXX*20170617*1741*101*X*005010X222A1~ +ST*837*1239*005010X222A1~ +BHT*0019*00*010*20170617*1741*CH~ +NM1*41*2*SUBMITTER*****46*ABC123~ +PER*IC*BOB SMITH*TE*4805551212~ +NM1*40*2*RECEIVER*****46*44556~ +HL*1**20*1~ +NM1*85*2*BILLING PROVIDER*****XX*1122334455~ +N3*1234 SOME ROAD~ +N4*CHICAGO*IL*606739999~ +REF*EI*999999999~ +HL*2*1*22*0~ +SBR*P*18*******12~ +NM1*IL*1*BLOGGS*JOE****MI*1234567890~ +N3*1 SOME BLVD~ +N4*CHICAGO*IL*606129998~ +DMG*D8*19570111*M~ +NM1*PR*2*PAYER*****PI*12345~ +N3*1 PAYER WAY~ +N4*ST LOUIS*MO*212441850~ +REF*2U*W1014~ +CLM*1000A*140***19:B:1*Y*A*Y*Y~ +HI*ABK:I10~ +LX*1~ +SV1*HC:99213*140*UN*1***1~ +DTP*472*D8*20151124~ +HL*3*1*22*0~ +SBR*P*18*******12~ +NM1*IL*1*BLOGGS*FRED****MI*9876543201~ +N3*1 ANOTHER STR~ +N4*CHICAGO*IL*606129998~ +DMG*D8*19700601*M~ +NM1*PR*2*PAYER*****PI*12345~ +N3*1 PAYER WAY~ +N4*ST LOUIS*MO*212441850~ +REF*2U*W1014~ +CLM*1001A*140***19:B:1*Y*A*Y*Y~ +HI*ABK:I10~ +LX*1~ +SV1*HC:99213*140*UN*1***1~ +DTP*472*D8*20151124~ +SE*41*1239~ +GE*1*101~ +IEA*1*000000101~ diff --git a/sampledata/837/CHPW_Claimdata_edited.txt.tmp b/sampledata/837/CHPW_Claimdata_edited.txt.tmp new file mode 100644 index 0000000..5ec175a --- /dev/null +++ b/sampledata/837/CHPW_Claimdata_edited.txt.tmp @@ -0,0 +1,189 @@ +ISA*00* *00* *01*987654321 *ZZ*123456789 *180508*0833*^*00501*697773230*1*P*:~ +GS*HC*CLEARINGHOUSE*123456789*20180508*0833*212950697*X*005010X222A1~ +ST*837*000000001*005010X222A1~ +BHT*0019*00*7349063984*20180508*0833*CH~ +NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~ +PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~ +NM1*40*2*123456789*****46*CHPWA~ +HL*1**20*1~ +NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST~ +N4*VANCOUVER*WA*98662~ +REF*EI*720000000~ +PER*IC*CONTACT*TE*9185551212~ +NM1*87*2~ +N3*PO BOX 1234~ +N4*VANCOUVER*WA*986681234~ +HL*2*1*22*0~ +SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~ +N3*987 65TH PL~ +N4*VANCOUVER*WA*986640001~ +DMG*D8*19881225*M~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~ +CLM*1805080AV3648339*20***57:B:1*Y*A*Y*Y~ +REF*D9*7349065509~ +HI*ABK:F1120~ +NM1*82*1*PROVIDER*JAMES****XX*1112223338~ +PRV*PE*PXC*261QR0405X~ +NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST SUITE A1~ +N4*VANCOUVER*WA*98662~ +LX*1~ +SV1*HC:H0003*20*UN*1***1~ +DTP*472*D8*20180428~ +REF*6R*142671~ +SE*34*000000001~ +ST*837*000000002*005010X222A1~ +BHT*0019*00*7349063984*20180508*0833*CH~ +NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~ +PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~ +NM1*40*2*123456789*****46*CHPWA~ +HL*63**20*1~ +NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST~ +N4*VANCOUVER*WA*98662~ +REF*EI*720000000~ +PER*IC*CONTACT*TE*9185551212~ +NM1*87*2~ +N3*PO BOX 1234~ +N4*VANCOUVER*WA*986681234~ +HL*64*63*22*0~ +SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +NM1*IL*1*PATIENT*SUSAN*E***MI*765123~ +N3*765 43RD ST~ +N4*VANCOUVER*WA*986640002~ +DMG*D8*19881031*F~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~ +CLM*1805080AV3648347*50.1***57:B:1*Y*A*Y*Y~ +REF*D9*7349065730~ +HI*ABK:F1520*ABF:F1220~ +NM1*82*1*PROVIDER*SUSAN****XX*1112223346~ +PRV*PE*PXC*261QR0405X~ +NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST SUITE A1~ +N4*VANCOUVER*WA*98662~ +LX*1~ +SV1*HC:96153:HF*50.1*UN*6***1:2~ +DTP*472*D8*20180426~ +REF*6R*143792~ +SE*34*000000002~ +ST*837*000000003*005010X222A1~ +BHT*0019*00*7349063984*20180508*0833*CH~ +NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~ +PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~ +NM1*40*2*123456789*****46*CHPWA~ +HL*49**20*1~ +NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST~ +N4*VANCOUVER*WA*98662~ +REF*EI*720000000~ +PER*IC*CONTACT*TE*9185551212~ +NM1*87*2~ +N3*PO BOX 1234~ +N4*VANCOUVER*WA*986681234~ +HL*50*49*22*0~ +SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~ +N3*987 65TH PL~ +N4*VANCOUVER*WA*986640001~ +DMG*D8*19881225*M~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~ +CLM*1805080AV3648340*11.64***57:B:1*Y*A*Y*Y~ +REF*D9*7349065492~ +HI*ABK:F1020*ABF:F1220~ +NM1*82*1*PROVIDER*SUSAN****XX*1112223346~ +PRV*PE*PXC*261QR0405X~ +NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST SUITE A1~ +N4*VANCOUVER*WA*98662~ +LX*1~ +SV1*HC:T1017:HF*11.64*UN*1***1:2~ +DTP*472*D8*20180427~ +REF*6R*140976~ +SE*34*000000003~ +ST*837*000000004*005010X222A1~ +BHT*0019*00*7349063984*20180508*0833*CH~ +NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~ +PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~ +NM1*40*2*123456789*****46*CHPWA~ +HL*75**20*1~ +NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST~ +N4*VANCOUVER*WA*98662~ +REF*EI*720000000~ +PER*IC*CONTACT*TE*9185551212~ +NM1*87*2~ +N3*PO BOX 1234~ +N4*VANCOUVER*WA*986681234~ +HL*76*75*22*0~ +SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +NM1*IL*1*PATIENT*SUSAN*E***MI*765123~ +N3*765 43RD ST~ +N4*VANCOUVER*WA*986640002~ +DMG*D8*19881031*F~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~ +CLM*1805080AV3648353*234***53:B:1*Y*A*Y*Y~ +REF*D9*7349064290~ +HI*ABK:F251~ +NM1*82*1*PROVIDER*SUSAN****XX*1112223346~ +PRV*PE*PXC*251S00000X~ +NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST SUITE A1~ +N4*VANCOUVER*WA*98662~ +LX*1~ +SV1*HC:90853*234*UN*120***1~ +DTP*472*D8*20180427~ +REF*6R*140787~ +NTE*ADD*05~ +SE*35*000000004~ +ST*837*000000005*005010X222A1~ +BHT*0019*00*7349063984*20180508*0833*CH~ +NM1*41*2*CLEARINGHOUSE LLC*****46*987654321~ +PER*IC*CLEARINGHOUSE CLIENT SERVICES*TE*8005551212*FX*8005551212~ +NM1*40*2*123456789*****46*CHPWA~ +HL*79**20*1~ +NM1*85*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST~ +N4*VANCOUVER*WA*98662~ +REF*EI*720000000~ +PER*IC*CONTACT*TE*9185551212~ +NM1*87*2~ +N3*PO BOX 1234~ +N4*VANCOUVER*WA*986681234~ +HL*80*79*22*0~ +SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~ +N3*987 65TH PL~ +N4*VANCOUVER*WA*986640001~ +DMG*D8*19881225*M~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~ +HL*81*79*22*1~ +SBR*P*18**COMMUNITY HLTH PLAN OF WASH*****CI~ +NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~ +N3*987 65TH PL~ +N4*VANCOUVER*WA*986640001~ +DMG*D8*19881225*M~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF MASS*****PI*CHPWA~ +HL*82*79*23*0~ +PAT*P*18**DEPENDENT PATIENT*****CI~ +NM1*IL*1*SUBSCRIBER*JOHN*J***MI*987321~ +N3*987 65TH PL~ +N4*VANCOUVER*WA*986640001~ +DMG*D8*19881225*M~ +NM1*PR*2*COMMUNITY HEALTH PLAN OF WASHINGTON*****PI*CHPWA~ +CLM*1805080AV3648355*20***57:B:1*Y*A*Y*Y~ +REF*D9*7349064036~ +HI*ABK:F1020*ABF:F1120~ +NM1*82*1*PROVIDER*JAMES****XX*1112223338~ +PRV*PE*PXC*261QR0405X~ +NM1*77*2*BH CLINIC OF VANCOUVER*****XX*1122334455~ +N3*12345 MAIN ST SUITE A1~ +N4*VANCOUVER*WA*98662~ +LX*1~ +SV1*HC:H0003*20*UN*1***1:2~ +DTP*472*D8*20180427~ +REF*6R*143907~ +SE*34*000000005~ +GE*5*212950697~ +IEA*1*697773230~ diff --git a/setup.py b/setup.py index f087d59..b95d690 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,8 @@ setup( name="databricksx12", version="0.0.1", - python_requires='>=3.9.*', + # python_requires='>=3.9.*', + python_requires='>=3.9', author="", author_email="aaron.zavora@databricks.com", description= "Parser for handling x12 EDI transactions in Spark", diff --git a/tests/test_claims.py b/tests/test_claims.py new file mode 100644 index 0000000..2dff588 --- /dev/null +++ b/tests/test_claims.py @@ -0,0 +1,25 @@ +from test_spark_base import * +from databricksx12.hls import * +from databricksx12 import * +import unittest, re + +class TestClaims(PySparkBaseTest): + + def test_professional_service_lines(self): + edi = EDI(open("sampledata/837/CC_837P_EDI.txt", "rb").read().decode("utf-8")) + hm = HealthcareManager() + data = hm.from_edi(edi)[0] + assert(len(data.sl_info) == 2) + assert([y.to_dict().get("claim_line_number") for y in data.sl_info] == ['1', '2']) + assert([y.to_dict().get("place_of_service") for y in data.sl_info] == ['11', '11']) + assert([y.to_dict().get("line_chrg_amt") for y in data.sl_info] == ['300', '300']) + + def test_institutional_service_lines(self): + edi = EDI(open("sampledata/837/CC_837I_EDI.txt", "rb").read().decode("utf-8")) + hm = HealthcareManager() + data = hm.from_edi(edi)[0] + assert([y.to_dict().get("claim_line_number") for y in data.sl_info] == ['1', '2', '3', '4', '5', '6', '7', '8', '9']) + assert([y.to_dict().get("revenue_cd") for y in data.sl_info] ==['0124', '0250', '0260', '0300', '0301', '0305', '0306', '0307', '0351']) + assert( sum([float(y.to_dict().get("line_chrg_amt")) for y in data.sl_info]) == 17166.7) + + diff --git a/tests/test_loop.py b/tests/test_loop.py new file mode 100644 index 0000000..e0dcd13 --- /dev/null +++ b/tests/test_loop.py @@ -0,0 +1,74 @@ +from test_spark_base import * +from databricksx12.hls.loop import * +import unittest, re + + +class TestLoop(PysparkBaseTest): + + data = open("sampledata/837/837p.txt", "rb").read().decode("utf-8") + loop = Loop(data) + + # + # Test Loop base info + # + def test_loop_hierarchy_build(self): + assert (set(TestLoop.loop.loop_hierarchy.keys()) == set({'1','2','3'})) + assert (TestLoop.loop.loop_hierarchy.get('1')['start_idx'] == 7) + assert (TestLoop.loop.loop_hierarchy.get('2')['start_idx'] == 12) + assert (TestLoop.loop.loop_hierarchy.get('3')['start_idx'] == 27) + assert (TestLoop.loop.loop_hierarchy.get('1')['end_idx'] == 12) + assert (TestLoop.loop.loop_hierarchy.get('2')['end_idx'] == 27) + assert (TestLoop.loop.loop_hierarchy.get('3')['end_idx'] == 45) + assert ([x.get('hl_code') for x in list(TestLoop.loop.loop_hierarchy.values())] == ['20','22','22']) + assert ([x.get('child_code') for x in list(TestLoop.loop.loop_hierarchy.values())] == ['1','0','0']) + + + # + # Test traversing hierarchy + # + def test_loop_hierarchy(self): + clms = TestLoop.loop.segments_by_name_index("CLM") + assert (clms[0][0] == 22) + assert (clms[1][0] == 37) + + assert (TestLoop.loop.find_hl_codes(22, '20') == TestLoop.loop.find_hl_codes(37, '20')) + assert (TestLoop.loop.find_hl_codes(22, '22') != TestLoop.loop.find_hl_codes(37, '22')) + + assert (TestLoop.loop.find_hl_codes(22, '20')['start_idx'] == 7) + assert (TestLoop.loop.find_hl_codes(22, '22')['start_idx'] == 12) + assert (TestLoop.loop.find_hl_codes(37, '22')['start_idx'] == 27) + + # + # Test traversing heirarchy to find correct loops + # + def test_loop_hierarchy_child_codes(self): + data = open("./sampledata/837/CHPW_Claimdata_edited.txt.tmp", "rb").read().decode("utf-8") + loop = Loop(data) + assert(loop.find_hl_codes(174, '22')['start_idx'] == 160) + + # + # Test getting all segments within a loop + # + def test_get_segments(self): + data = open("./sampledata/837/CHPW_Claimdata_edited.txt.tmp", "rb").read().decode("utf-8") + loop = Loop(data) + assert(loop.get_loop(174, '2000A')['start_idx'] == 144 and loop.get_loop(174, '2000A')['end_idx'] == 153) + assert( len(loop.get_loop_segments(174, '2000A')) == 153 - 144) + assert(loop.get_loop_segments(174, '2000A')[0].element(0) == "HL") + assert( len([x.element(0) for x in loop.get_loop_segments(174, '2000A') if x.element(0) == "HL"]) == 1) + + # + # Test loop start places by position using loop name search + # + def test_loop_search_by_name(self): + assert(TestLoop.loop.get_loop(22, "2000A")['start_idx'] == 7) + assert(TestLoop.loop.get_loop(22, "2000B")['start_idx'] == 12) + assert(TestLoop.loop.get_loop(37, "2000A")['start_idx'] == 7) + assert(TestLoop.loop.get_loop(37, "2000B")['start_idx'] == 27) + + + +if __name__ == '__main__': + unittest.main() + + diff --git a/tests/test_pyspark.py b/tests/test_pyspark.py index 36327d5..b7fcd3e 100644 --- a/tests/test_pyspark.py +++ b/tests/test_pyspark.py @@ -10,6 +10,9 @@ def test_transaction_count(self): .map(lambda x: EDI(x)) .map(lambda x: {"transaction_count": x.num_transactions()}) ).toDF() - assert ( data.count() == 4) #4 rows - assert ( data.select(data.transaction_count).groupBy().sum().collect()[0]["sum(transaction_count)"] == 8) #8 ST/SE transactions + assert ( data.count() == 5) #5 rows + assert ( data.select(data.transaction_count).groupBy().sum().collect()[0]["sum(transaction_count)"] == 9) #8 ST/SE transactions + +if __name__ == '__main__': + unittest.main()