Data parser (#61)

* expose custom-view to service-info api * expose custom-view to service-info api * Most horrible commit in history: pdf downloader + necessary fixes in shared modules and dep updates * include a subset of the newest version of shared modules from mother ship repo * fix system tests and translations work * real data in custom view * check for undefined user_answers * try to fix undefined answers error * Custom view (#59) * expose custom-view to service-info api * expose custom-view to service-info api * Most horrible commit in history: pdf downloader + necessary fixes in shared modules and dep updates * include a subset of the newest version of shared modules from mother ship repo * fix system tests and translations work * real data in custom view * check for undefined user_answers * try to fix undefined answers error * small fix * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Fix user label placement * Custom view (#60) * expose custom-view to service-info api * expose custom-view to service-info api * Most horrible commit in history: pdf downloader + necessary fixes in shared modules and dep updates * include a subset of the newest version of shared modules from mother ship repo * fix system tests and translations work * real data in custom view * check for undefined user_answers * try to fix undefined answers error * small fix * add user consents to data parsing script * Fix user label placement * fix text field in survey answers to more flexible * fix tests for fixed answer fields * update data-parser to info of modules and answer time of exercise
rage · May 28, 2024 · be86179 · be86179
1 parent 4513424
commit be86179
Show file tree

Hide file tree

Showing 11 changed files with 16,233 additions and 35,237 deletions.
diff --git a/data-parser/README.md b/data-parser/README.md
@@ -2,6 +2,26 @@
 
 The output of the data-parser is a .csv file containing only answers to the `DOGS FACTORIAL ANALYSIS SURVEY` exercise types. The file will contain answers submitted **after** 22.05.2023 due to the latest format. The separator used in the .csv file is the semicolon `;`.
 
+```diff
+@@ Update March 2024: @@
+
++ Support for parsing 'User Consents' files:
+columns named by the 'question' field, containing  true or false indicating whether a user checked the acceptance box,
+are added as new columns to the final output .csv file.
+
++ Output .csv file will contain the course name in question
+(if your 'data' file is contaminated with files from different courses, this will show in the file name)
+The parser chooses the latest version of each of the files containing 'Submissions', 'User Details',
+'Exercise tasks' and 'User Consents' in the file name, no matter the course name.
+
+! TODO: parser does not work correctly now that there are several modules containing identical questionLabels
+! Need to extract information about which exercises belong to which module and parse accordingly:
+row1: user_id_x, module_a,  answers
+row2: user_id_x, module_b,  answers
+! At this point exercises in different modules might overwrite each other --> no way to link the answers to the correct pet!
+```
+
+
 ## Dataset layout
 
 The file contains columns `user_id, name, email`, followed by a column per `questionLabel` existing in the course. Empty submissions (not answered questions) have empty entry-points.

diff --git a/data-parser/main.py b/data-parser/main.py
@@ -34,7 +34,7 @@ def flatten(xs):
 
 for f in datafiles:
     [course_name, file_name] = f.split(' - ', 1)
-    course_names.add(course_name)
+    #course_names.add(course_name)
     if 'Submissions' in file_name:
         submission_files.append(f)
     elif 'Exercise tasks' in file_name:
@@ -50,6 +50,16 @@ def flatten(xs):
 userdetail_files = sorted(userdetail_files, key=lambda x: (x.split(' ')[-1]), reverse=True)
 user_consents_files = sorted(user_consents_files, key=lambda x: (x.split(' ')[-1]), reverse=True)
 
+
+[course_name, file_name] = exercisetasks_files[0].split(' - ', 1)
+course_names.add(course_name)
+[course_name, file_name] = submission_files[0].split(' - ', 1)
+course_names.add(course_name)
+[course_name, file_name] = userdetail_files[0].split(' - ', 1)
+course_names.add(course_name)
+[course_name, file_name] = user_consents_files[0].split(' - ', 1)
+course_names.add(course_name)
+
 try:
     exercise_tasks = pl.read_csv(join('./data/', exercisetasks_files[0]))
 except OSError as error:
@@ -78,7 +88,7 @@ def flatten(xs):
 
 cleaned_subs = (submissions
             .join(user_details.select(pl.exclude('created_at')), on='user_id', how='left')
-            .join(exercise_tasks.select(['id', 'exercise_type']), left_on='exercise_task_id', right_on='id', how='left')
+            .join(exercise_tasks.select(['id', 'exercise_type', 'exercise_name', 'course_module_name']), left_on='exercise_task_id', right_on='id', how='left')
             .filter(pl.col('exercise_type') == 'dogs-factorial-analysis-survey')
             .drop(['course_instance_id', 'score_given','exercise_type'])
             .sort('created_at', descending=True)
@@ -92,8 +102,19 @@ def flatten(xs):
 
 user_details = user_details.join(user_consents, how='left', on='user_id')
 
+course_modules = exercise_tasks.get_column('course_module_name').unique()
+module_user_details_indexes = dict()
+module_user_details = []
+for index, mod in enumerate(course_modules):
+    df = user_details.clone()
+    df = df.with_columns(pl.lit(mod).alias('course_module_name'))
+    module_user_details.append(df)
+    module_user_details_indexes[mod] = index
+#user_details = user_details.join(course_modules, how="cross")
+
 # The map of private-specs: { exercise_task_id : { private_spec } }
-exercise_tasks_map = dict([(x[0], json.loads(x[4])) for x in exercise_tasks.rows() if 'factorial' in x[3]])
+
+exercise_tasks_map = dict([(x[0], json.loads(x[1])) for x in exercise_tasks.select(['id','private_spec','exercise_type']).rows() if 'factorial' in x[2]])
 
 # Formatting the private_specs to needed fields for exstracting submission info
 keys_to_delete = []
@@ -111,6 +132,9 @@ def flatten(xs):
             dict([(key, val) for key,val in dict(quest).items() if key not in ['question', 'mandatory']]) 
             for quest in exercise_tasks_map[k]['questions'] if quest['questionLabel'] not in 'info'
             ])
+        exercise_tasks_map[k]['exercise_name'] = exercise_tasks.select(['exercise_name','id']).row(by_predicate=(pl.col('id') == k))[0]
+        exercise_tasks_map[k]['course_module_name'] = exercise_tasks.select(['course_module_name','id']).row(by_predicate=(pl.col('id') == k))[0]
+
 # non-factorial survey type: {
 #   id,
 #   content: [{surveyItemId, options, questionLabel, answer-type}],
@@ -132,6 +156,9 @@ def flatten(xs):
             content[idx] = newItem
         if content:
             exercise_tasks_map[k]['content'] = content
+            exercise_tasks_map[k]['exercise_name'] = exercise_tasks.select(['exercise_name','id']).row(by_predicate=(pl.col('id') == k))[0]
+            exercise_tasks_map[k]['course_module_name'] = exercise_tasks.select(['course_module_name','id']).row(by_predicate=(pl.col('id') == k))[0]
+
         else:
             keys_to_delete.append(k)
 
@@ -140,26 +167,37 @@ def flatten(xs):
 
 # Building additional columns to final dataframe exercise tasks at a time
 for k,v in exercise_tasks_map.items():
-    submissions_data = cleaned_subs.filter(pl.col('exercise_task_id') == k).select('user_id','data_json').rows()
-
+    submissions_data = cleaned_subs.filter(pl.col('exercise_task_id') == k).select('user_id','data_json', 'created_at').rows()
+    current_module = v['course_module_name'] 
+    ind = module_user_details_indexes[current_module]
     user_submissions, col_labels, typed_col_labels = [],[],{}
 
     if v['type'] == 'factorial':
         col_labels = [lab['questionLabel'] for lab in v['questions']]
         col_labels.append('user_id')
+        exercise_name_label_for_timestamp = v['exercise_name']
+        col_labels.append('course_module_name')
 
         typed_col_labels = dict(ChainMap(*[{lab['questionLabel']: pl.Int8} for lab in v['questions']]))
+        if exercise_name_label_for_timestamp not in module_user_details[ind].columns:
+                col_labels.append(exercise_name_label_for_timestamp)
+                typed_col_labels[exercise_name_label_for_timestamp] = pl.Utf8
         typed_col_labels['user_id'] = pl.Utf8
+        typed_col_labels['course_module_name'] = pl.Utf8
 
         options = dict([(option['id'], option['value']) for option in v['options']])
 
         for row in submissions_data:
-            row = [row[0], json.loads(row[1])]
+            row = [row[0], json.loads(row[1]), row[2]]
             user_answers = dict(row[1])
 
             submission = dict(ChainMap(*[{item['questionLabel']: options.get(item.get('chosenOptionId'))} for item  in user_answers.get('answeredQuestions')]))
 
             submission['user_id'] = row[0]
+            submission['course_module_name'] = v['course_module_name'] 
+            exercise_name_label_for_timestamp = v['exercise_name']
+            if exercise_name_label_for_timestamp not in module_user_details[ind].columns:
+                submission[exercise_name_label_for_timestamp] = row[2][0:19] # create_at field
             user_submissions.append(submission)
 
     else: 
@@ -169,11 +207,15 @@ def flatten(xs):
 
         col_labels = flatten(col_labels)
         col_labels.append('user_id')
-
+        exercise_name_label_for_timestamp = v['exercise_name']
+        if exercise_name_label_for_timestamp not in module_user_details[ind].columns:
+                col_labels.append(exercise_name_label_for_timestamp)
+        col_labels.append('course_module_name')
+
         typed_col_labels = dict(ChainMap(*[{col: pl.Utf8} for col in col_labels]))
 
         for row in submissions_data:
-            row = [row[0], json.loads(row[1])]
+            row = [row[0], json.loads(row[1]), row[2]]
 
             user_answer = dict(row[1])
             user_answer = dict([(answeredQ.get('questionLabel'), answeredQ.get('answer')) for answeredQ in user_answer.get('answeredQuestions')])
@@ -193,14 +235,19 @@ def flatten(xs):
             submission = dict(ChainMap(*submission))
 
             submission['user_id']= row[0]
+            submission['course_module_name'] = v['course_module_name']
+            if exercise_name_label_for_timestamp not in module_user_details[ind].columns:
+                submission[exercise_name_label_for_timestamp] = row[2][0:19] # create_at field
 
             user_submissions.append(submission)
 
     data = user_submissions if user_submissions else [[None for _ in col_labels]]
 
+
     additional_cols = pl.DataFrame(data, schema=typed_col_labels).select(col_labels)
 
-    user_details = user_details.join(additional_cols, how='left', on='user_id')
+
+    module_user_details[ind] = module_user_details[ind].join(additional_cols, how='left', on=['user_id','course_module_name'])
 
 try:
     os.mkdir("./parsed-outputs")
@@ -209,6 +256,8 @@ def flatten(xs):
         pass
     else: print(error)
 
+user_details = pl.concat(module_user_details, how="diagonal")
+
 dt = datetime.now().strftime('%d-%m-%Y %H:%M:%S')
 course_name = '-'.join(course_names)
 filename = f'./parsed-outputs/Survey_data-{course_name}-{dt}.csv'

diff --git a/src/components/PdfDownload/PdfGenerator.tsx b/src/components/PdfDownload/PdfGenerator.tsx
@@ -227,7 +227,7 @@ const PDFSumFactorReport: React.FC<React.PropsWithChildren<SubmissionProps>> = (
   const userLabel: string = userName ?? userVar?.label ?? "Your Score"
   const userPlacement =
     (100 * (-(start as number) + userScore)) / ((finnish as number) - (start as number))
-  const userLabelWidth = (100 * getTextWidth(userLabel, "15px Raleway")) / 100
+  const userLabelWidth = getTextWidth(userLabel, "9px Raleway")
   const labelPlacement =
     userPlacement >= 100 - userLabelWidth ? userPlacement - userLabelWidth - 4 : userPlacement + 4
   return (
@@ -292,9 +292,9 @@ const MyDoc: React.FC<React.PropsWithChildren<CustomViewIframeState>> = (props)
     .flatMap((exercise) => {
       return exercise.exercise_tasks.flatMap((task) => {
         const grading = task.grading as CustomViewExerciseTaskGrading
-        const answer = task.user_answer
-          ? ((task.user_answer as CustomViewExerciseTaskSubmission[])[0].data_json as UserAnswer)
-          : null
+        const answer =
+          ((task.user_answer as CustomViewExerciseTaskSubmission)?.data_json as UserAnswer) ?? null
+
         const pubSpec = task.public_spec as PublicSpec
         const gradingFeedback = grading.feedback_json
           ? (grading.feedback_json as ExerciseFeedback)

diff --git a/src/components/SharedMisc/AdvancedDropdown.tsx b/src/components/SharedMisc/AdvancedDropdown.tsx
@@ -45,6 +45,7 @@ const AdvancedDropdown: React.FC<React.PropsWithChildren<Props>> = ({
       maxMenuHeight={400}
       className={css`
         aria-label: breed-selection;
+        width: 99%;
       `}
     />
     //</div>

diff --git a/src/components/Survey/SurveyExerciseItem.tsx b/src/components/Survey/SurveyExerciseItem.tsx
@@ -1,6 +1,8 @@
 import { css } from "@emotion/css"
 import styled from "@emotion/styled"
 
+import TextAreaField from "../../shared-module/components/InputFields/TextAreaField"
+import TextField from "../../shared-module/components/InputFields/TextField"
 import { AnswerType, SurveyItem } from "../../util/spec-types/privateSpec"
 import MarkdownText from "../MarkdownText"
 import AdvancedDropdown from "../SharedMisc/AdvancedDropdown"
@@ -50,7 +52,7 @@ const SurveyExerciseitem: React.FC<React.PropsWithChildren<Props>> = ({
     case AnswerType.Number: {
       return (
         <div>
-          <input
+          <TextField
             aria-label={`number-input-for-${item.question.questionLabel}`}
             value={answer ?? ""}
             type="number"
@@ -62,6 +64,7 @@ const SurveyExerciseitem: React.FC<React.PropsWithChildren<Props>> = ({
             className={css`
               border: 1px solid #e0e0e0;
               border-radius: 2px;
+              max-width: 10em;
             `}
           />
         </div>
@@ -70,18 +73,20 @@ const SurveyExerciseitem: React.FC<React.PropsWithChildren<Props>> = ({
     case AnswerType.Text: {
       return (
         <div>
-          <input
+          <TextAreaField
             aria-label={`text-input-for-${item.question.questionLabel}`}
             value={answer ?? ""}
-            type="text"
+            autoResize
             onChange={(e) => {
               updateAnswer(item.id, e.target.value)
             }}
             required
             disabled={disabled}
             className={css`
-              border: 1px solid #e0e0e0;
-              border-radius: 2px;
+              textarea {
+                width: 99%;
+                max-height: 200px;
+              }
             `}
           />
         </div>

diff --git a/src/pages/iframe.tsx b/src/pages/iframe.tsx
@@ -57,9 +57,7 @@ const Iframe: React.FC<React.PropsWithChildren<unknown>> = () => {
   const [state, setState] = useState<State | null>(null)
 
   const callback = useCallback((messageData: unknown, port: MessagePort) => {
-    //const messageData = customViewState as SetStateMessage
     if (isSetStateMessage(messageData)) {
-      console.log("Messagedata:", messageData)
       ReactDOM.flushSync(() => {
         if (messageData.view_type === "answer-exercise") {
           setState({

diff --git a/...tor.spec.ts-snapshots/sum-factorial-answered-exercise-mobile-chromium-linux.png b/...tor.spec.ts-snapshots/sum-factorial-answered-exercise-mobile-chromium-linux.png
diff --git a/...c.ts-snapshots/sum-factorial-answered-exercise-small-desktop-chromium-linux.png b/...c.ts-snapshots/sum-factorial-answered-exercise-small-desktop-chromium-linux.png
diff --git a/...napshots/answer-exercise-conditional-questions-appear-mobile-chromium-linux.png b/...napshots/answer-exercise-conditional-questions-appear-mobile-chromium-linux.png
diff --git a/...s/answer-exercise-conditional-questions-appear-small-desktop-chromium-linux.png b/...s/answer-exercise-conditional-questions-appear-small-desktop-chromium-linux.png