diff --git a/Excel_surveys/99DD-U77W.xlsx b/Excel_surveys/nametoChange1.xlsx similarity index 100% rename from Excel_surveys/99DD-U77W.xlsx rename to Excel_surveys/nametoChange1.xlsx diff --git a/Excel_surveys/G0BF-1XZE.xlsx b/Excel_surveys/nametoChange2.xlsx similarity index 100% rename from Excel_surveys/G0BF-1XZE.xlsx rename to Excel_surveys/nametoChange2.xlsx diff --git a/Excel_surveys/KVIO-U0K9.xlsx b/Excel_surveys/nametoChange3.xlsx similarity index 100% rename from Excel_surveys/KVIO-U0K9.xlsx rename to Excel_surveys/nametoChange3.xlsx diff --git a/src/main.py b/src/main.py index a167edb..810b344 100644 --- a/src/main.py +++ b/src/main.py @@ -8,14 +8,22 @@ def main(): logging.info("Program started") start_time = time.time() + #initialisation mainFolderPath,configFileMain,hashL, xlsxList = initialisation.main() - levelListM,inputsSubListM, inputsDimListM, inputsInterDimensionalListM, kripSimplifiedListM=data_import.main(mainFolderPath,configFileMain, xlsxList) + + #get data + levelListM,inputsSubListM, inputsDimListM, inputsInterDimensionalListM, kripSimpleListOutMain, kripInputListOutMain=data_import.main(mainFolderPath,configFileMain, xlsxList) + + #process data outputDataframeMain, DimConsistencyListMain, DimWeightListMain=data_processing.main(inputsSubListM, inputsDimListM) + #report generation from src.methods import report_generator # to make global variable work report_generator.main(mainFolderPath,configFileMain,len(xlsxList),levelListM,inputsSubListM, inputsDimListM, - inputsInterDimensionalListM, kripSimplifiedListM,outputDataframeMain, - DimConsistencyListMain,DimWeightListMain) + inputsInterDimensionalListM,outputDataframeMain, + DimConsistencyListMain,DimWeightListMain, + kripSimpleListOutMain, kripInputListOutMain) + logging.info("Program ended") logging.info("Program runtime: " + f"{(time.time() - start_time):.2f}" + " seconds") diff --git a/src/methods/data_import.py b/src/methods/data_import.py index 87a0ae0..214310e 100644 --- a/src/methods/data_import.py +++ b/src/methods/data_import.py @@ -28,13 +28,13 @@ def main(inputFolder, configFile,xlList): worksheetList = get_first_X_worksheet_names(xlList[0], len(subCompNumList)) - levelList,weightList, dimensionList, interDimensionalList, kripSimplifiedList=start_import(xlList, worksheetList, + levelList,weightList, dimensionDF, interDimensionalList=start_import(xlList, worksheetList, cC, sR,subCompNumList,cLG,lC,special_sheets_list) - #print(x) + kripSimpleListOut, kripInputListOut = iterate_through_simple_krip(weightList, 17, 3,dimensionDF) logging.info("M - Data import finished successfully") - return levelList,weightList, dimensionList, interDimensionalList, kripSimplifiedList + return levelList,weightList, dimensionDF, interDimensionalList, kripSimpleListOut, kripInputListOut def number_of_comparisions(n): @@ -53,14 +53,12 @@ def start_import(pathList, worksheetList, comparisonColumn, startingRow,subCompy lList,wList, dimList, intDimList = get_data(pathList, worksheetList, levelColumn, comparisonColumn, startingRow,subCompyList,comparisonLevelGap,SSList) - #print(get_krippendorff_DF(pd.concat(wList,axis=0))) - kripSimpleList = [] - + #verticalAdd= pd.concat(wList,axis=0) #join all dataframes vertically #print(get_krippendorff_DF(verticalAdd)) #print(get_krippendorff_DF(pd.concat(lList,axis=0))) - return lList , wList, dimList, intDimList, kripSimpleList + return lList , wList, dimList, intDimList def concat_dataframes(list_of_lists): concatenated_dfs = [] @@ -154,4 +152,26 @@ def replace_with_lists(num,odd_groups,my_wantedList): #replace with lists for sublist in odd_groups: if num in sublist: return my_wantedList[odd_groups.index(sublist)] - raise ValueError(f"Number {num} not found in any sublist") \ No newline at end of file + raise ValueError(f"Number {num} not found in any sublist") + +def iterate_through_simple_krip(weightList, nMax, nMin,dimDF): + kripSimpleList = [] + kripInputList = [] + + for x in range(nMin,nMax+1,2): + workInputList=[] + workSimpleList=[] + mWL,oG=simplify_krip(x) + onlyDimensionSImplified=dimDF.applymap(lambda x: replace_with_lists(x, oG, mWL)) + workInputList.append(onlyDimensionSImplified) + workSimpleList.append(get_krippendorff_DF(onlyDimensionSImplified)) + + for i,df in enumerate(weightList): + dimensionSimplifiedListWork=df.applymap(lambda x: replace_with_lists(x, oG, mWL)) + workInputList.append(dimensionSimplifiedListWork) + workSimpleList.append(get_krippendorff_DF(dimensionSimplifiedListWork)) + + kripSimpleList.append(workSimpleList) + kripInputList.append(workInputList) + + return kripSimpleList, kripInputList diff --git a/src/methods/report_generator.py b/src/methods/report_generator.py index bd04fc6..a62a8b1 100644 --- a/src/methods/report_generator.py +++ b/src/methods/report_generator.py @@ -10,8 +10,9 @@ ##############Main function############## def main(mFP,cFM,nList,levelListSM,weightListSM, dimensionListSM, - interDimensionalListSM, kripSimplifiedListSM, outputDataframeSM, - subDimConsistencyListSM,DimWeightListSM): + interDimensionalListSM, outputDataframeSM, + subDimConsistencyListSM,DimWeightListSM, + kripSimpleListOut, kripInputListOut): logging.info("M - Report generation started") outFolderPath = check_output_folder(cFM.get("Settings", "outputFolder"),mFP) #check if output folder exists listDimension,combinedList=get_subsection(cFM) #get subsections and dimensions from config file @@ -24,13 +25,11 @@ def main(mFP,cFM,nList,levelListSM,weightListSM, dimensionListSM, csv_interdim_comparisions_input(interDimensionalListSM,sOF) #generates csv file with interdimensional comparisions csv_calculated_weights(outputDataframeSM,sOF,combinedList,DimWeightListSM,listDimension) #generates csv file with calculated weights csv_consistency_indexes(subDimConsistencyListSM,sOF,listDimension) #generates csv file with consistency indexes - + csv_krip_inputs(kripInputListOut,sOF) #generates csv file with krippendorff inputs + csv_krip_outputs(kripSimpleListOut,sOF,listDimension) #generates csv file with krippendorff outputs logging.info("M - Report generation finished successfully") return None - - - ##############Sub functions############## #File that generates the report @@ -62,7 +61,7 @@ def make_folder(oFP,num_participants): if not os.path.exists(os.path.join(oFP, folder_name)): os.makedirs(os.path.join(oFP, folder_name)) - logging.info("Directory created successfully!") + logging.info("Report folder created successfully!") else: logging.info("Directory already exists!") logging.log(logging.CRITICAL, "Program aborted, Program run twice in single second") @@ -94,10 +93,15 @@ def csv_levels(df,sOF,combL): verticalAdd.to_csv(outPath, index=True) return None -def csv_kripke_simplified(kSL,sOF): #doesn't work - verticalAdd= pd.concat(kSL,axis=0) - outPath=os.path.join(sOF, "kripke_simplified.csv") - verticalAdd.to_csv(outPath, index=True) +def csv_krip_inputs(kripInputListOut,sOF): #from most convoluting to least (aka higher alpha to lower) + finalDF = pd.DataFrame() + for i in range(len(kripInputListOut)): + workingDF = pd.DataFrame() + for x in range(len(kripInputListOut[i])): + workingDF = pd.concat([workingDF,kripInputListOut[i][x]],axis=0) + finalDF = pd.concat([finalDF,workingDF],axis=1) + outPath=os.path.join(sOF, "inputs_krip.csv") + finalDF.to_csv(outPath, index=True) return None def csv_calculated_weights(df,sOF,combL,DimWeightListSMFunc,listDimensionFunc): @@ -120,11 +124,23 @@ def csv_consistency_indexes(sDCL,sOF,listDimensionFunc): sDCL.to_csv(outPath, index=True) return None - def csv_interdim_comparisions_input(iDL,sOF): outPath=os.path.join(sOF, "inputs_interdimensions.csv") iDL.to_csv(outPath, index=True) return None -def csv_base_math_model(): - return None \ No newline at end of file +def csv_krip_outputs(kripSimpleListOut, sOF, listDimensionFunc): + indexToUse = ["Dimensions"] + listDimensionFunc + cols = list(range(3, len(kripSimpleListOut)*2+2, 2)) #list of columns to drop + print(cols) + + finalDF = pd.DataFrame() + for i in range(len(kripSimpleListOut)): + col_name=cols[i] + df=pd.DataFrame({col_name: kripSimpleListOut[i]}) + finalDF=pd.concat([finalDF,df],axis=1) + + finalDF.index=indexToUse + outPath=os.path.join(sOF, "outputs_krip.csv") + finalDF.to_csv(outPath, index=True) + return None diff --git a/tests/generic_module_practice.py b/tests/generic_module_practice.py index c4bc0db..c6679d0 100644 --- a/tests/generic_module_practice.py +++ b/tests/generic_module_practice.py @@ -1,14 +1,31 @@ -import ast -import configparser +import krippendorff +import pandas as pd +import numpy as np -config = configparser.ConfigParser() -config.read('config.ini') +def test_krippendorff_alpha(): + df = pd.DataFrame({'rater1': ["N/A", "N/A", "N/A", "N/A", "N/A",3,4,1,2,1,1,3,3,"N/A",3], + 'rater2': [1, "N/A", 2, 1, 3,3,4,3,"N/A","N/A","N/A","N/A","N/A","N/A","N/A"], + 'rater3': ["N/A", "N/A", 2, 1, 3,4,4,"N/A",2,1,1,3,3,"N/A",4]}) -# Create an empty list to store the dictionaries -special_sheets_list = [] + data = df.T.values.tolist() -# Loop over all items in the special sheets dictionary and convert the string values to their appropriate data types using ast.literal_eval() -for key in config['Special_sheets']: - special_sheets_list.append(eval(config['Special_sheets'][key])) + data_tuple = tuple(' '.join(map(str, row)) for row in data) + + reliability_data_str = ( + "3 0.5 0.5 1 0.5 0.25 0.25 0.25 0.25 0.3333 2 0.5 3 2 0.3333", # coder A + "1 0.5 0.3333 0.5 1 0.5 3 1 1 2 3 4 1 1 1", # coder B + "2 0.3333 0.1667 1 2 2 0.2 0.3333 0.3333 0.3333 1 0.3333 3 3 1", # coder C + ) + + print(reliability_data_str) + print(data_tuple) + newlistconvert =[[np.nan if (v == "*" or v=="N/A") else v for v in coder.split()] for coder in data_tuple] + reliability_data = [[np.nan if (v == "*" or v=="N/A") else v for v in coder.split()] for coder in reliability_data_str] + + + #assert newlistconvert ==reliability_data + + print("Krippendorff's alpha for nominal metric: ", krippendorff.alpha(reliability_data=reliability_data, + level_of_measurement="nominal")) + #assert (krippendorff.alpha(reliability_data=newlistconvert,level_of_measurement="nominal"))== (krippendorff.alpha(reliability_data=reliability_data, level_of_measurement="nominal")) -print(special_sheets_list) \ No newline at end of file