Skip to content

Commit

Permalink
Data parser (#61)
Browse files Browse the repository at this point in the history
* expose custom-view to service-info api

* expose custom-view to service-info api

* Most horrible commit in history: pdf downloader + necessary fixes in shared modules and dep updates

* include a subset of the newest version of shared modules from mother ship repo

* fix system tests and translations work

* real data in custom view

* check for undefined user_answers

* try to fix undefined answers error

* Custom view (#59)

* expose custom-view to service-info api

* expose custom-view to service-info api

* Most horrible commit in history: pdf downloader + necessary fixes in shared modules and dep updates

* include a subset of the newest version of shared modules from mother ship repo

* fix system tests and translations work

* real data in custom view

* check for undefined user_answers

* try to fix undefined answers error

* small fix

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Fix user label placement

* Custom view (#60)

* expose custom-view to service-info api

* expose custom-view to service-info api

* Most horrible commit in history: pdf downloader + necessary fixes in shared modules and dep updates

* include a subset of the newest version of shared modules from mother ship repo

* fix system tests and translations work

* real data in custom view

* check for undefined user_answers

* try to fix undefined answers error

* small fix

* add user consents to data parsing script

* Fix user label placement

* fix text field in survey answers to more flexible

* fix tests for fixed answer fields

* update data-parser to info of modules and answer time of exercise
  • Loading branch information
anadis504 authored May 28, 2024
1 parent 4513424 commit be86179
Show file tree
Hide file tree
Showing 11 changed files with 16,233 additions and 35,237 deletions.
20 changes: 20 additions & 0 deletions data-parser/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,26 @@

The output of the data-parser is a .csv file containing only answers to the `DOGS FACTORIAL ANALYSIS SURVEY` exercise types. The file will contain answers submitted **after** 22.05.2023 due to the latest format. The separator used in the .csv file is the semicolon `;`.

```diff
@@ Update March 2024: @@

+ Support for parsing 'User Consents' files:
columns named by the 'question' field, containing true or false indicating whether a user checked the acceptance box,
are added as new columns to the final output .csv file.

+ Output .csv file will contain the course name in question
(if your 'data' file is contaminated with files from different courses, this will show in the file name)
The parser chooses the latest version of each of the files containing 'Submissions', 'User Details',
'Exercise tasks' and 'User Consents' in the file name, no matter the course name.

! TODO: parser does not work correctly now that there are several modules containing identical questionLabels
! Need to extract information about which exercises belong to which module and parse accordingly:
row1: user_id_x, module_a, answers
row2: user_id_x, module_b, answers
! At this point exercises in different modules might overwrite each other --> no way to link the answers to the correct pet!
```


## Dataset layout

The file contains columns `user_id, name, email`, followed by a column per `questionLabel` existing in the course. Empty submissions (not answered questions) have empty entry-points.
Expand Down
67 changes: 58 additions & 9 deletions data-parser/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def flatten(xs):

for f in datafiles:
[course_name, file_name] = f.split(' - ', 1)
course_names.add(course_name)
#course_names.add(course_name)
if 'Submissions' in file_name:
submission_files.append(f)
elif 'Exercise tasks' in file_name:
Expand All @@ -50,6 +50,16 @@ def flatten(xs):
userdetail_files = sorted(userdetail_files, key=lambda x: (x.split(' ')[-1]), reverse=True)
user_consents_files = sorted(user_consents_files, key=lambda x: (x.split(' ')[-1]), reverse=True)


[course_name, file_name] = exercisetasks_files[0].split(' - ', 1)
course_names.add(course_name)
[course_name, file_name] = submission_files[0].split(' - ', 1)
course_names.add(course_name)
[course_name, file_name] = userdetail_files[0].split(' - ', 1)
course_names.add(course_name)
[course_name, file_name] = user_consents_files[0].split(' - ', 1)
course_names.add(course_name)

try:
exercise_tasks = pl.read_csv(join('./data/', exercisetasks_files[0]))
except OSError as error:
Expand Down Expand Up @@ -78,7 +88,7 @@ def flatten(xs):

cleaned_subs = (submissions
.join(user_details.select(pl.exclude('created_at')), on='user_id', how='left')
.join(exercise_tasks.select(['id', 'exercise_type']), left_on='exercise_task_id', right_on='id', how='left')
.join(exercise_tasks.select(['id', 'exercise_type', 'exercise_name', 'course_module_name']), left_on='exercise_task_id', right_on='id', how='left')
.filter(pl.col('exercise_type') == 'dogs-factorial-analysis-survey')
.drop(['course_instance_id', 'score_given','exercise_type'])
.sort('created_at', descending=True)
Expand All @@ -92,8 +102,19 @@ def flatten(xs):

user_details = user_details.join(user_consents, how='left', on='user_id')

course_modules = exercise_tasks.get_column('course_module_name').unique()
module_user_details_indexes = dict()
module_user_details = []
for index, mod in enumerate(course_modules):
df = user_details.clone()
df = df.with_columns(pl.lit(mod).alias('course_module_name'))
module_user_details.append(df)
module_user_details_indexes[mod] = index
#user_details = user_details.join(course_modules, how="cross")

# The map of private-specs: { exercise_task_id : { private_spec } }
exercise_tasks_map = dict([(x[0], json.loads(x[4])) for x in exercise_tasks.rows() if 'factorial' in x[3]])

exercise_tasks_map = dict([(x[0], json.loads(x[1])) for x in exercise_tasks.select(['id','private_spec','exercise_type']).rows() if 'factorial' in x[2]])

# Formatting the private_specs to needed fields for exstracting submission info
keys_to_delete = []
Expand All @@ -111,6 +132,9 @@ def flatten(xs):
dict([(key, val) for key,val in dict(quest).items() if key not in ['question', 'mandatory']])
for quest in exercise_tasks_map[k]['questions'] if quest['questionLabel'] not in 'info'
])
exercise_tasks_map[k]['exercise_name'] = exercise_tasks.select(['exercise_name','id']).row(by_predicate=(pl.col('id') == k))[0]
exercise_tasks_map[k]['course_module_name'] = exercise_tasks.select(['course_module_name','id']).row(by_predicate=(pl.col('id') == k))[0]

# non-factorial survey type: {
# id,
# content: [{surveyItemId, options, questionLabel, answer-type}],
Expand All @@ -132,6 +156,9 @@ def flatten(xs):
content[idx] = newItem
if content:
exercise_tasks_map[k]['content'] = content
exercise_tasks_map[k]['exercise_name'] = exercise_tasks.select(['exercise_name','id']).row(by_predicate=(pl.col('id') == k))[0]
exercise_tasks_map[k]['course_module_name'] = exercise_tasks.select(['course_module_name','id']).row(by_predicate=(pl.col('id') == k))[0]

else:
keys_to_delete.append(k)

Expand All @@ -140,26 +167,37 @@ def flatten(xs):

# Building additional columns to final dataframe exercise tasks at a time
for k,v in exercise_tasks_map.items():
submissions_data = cleaned_subs.filter(pl.col('exercise_task_id') == k).select('user_id','data_json').rows()

submissions_data = cleaned_subs.filter(pl.col('exercise_task_id') == k).select('user_id','data_json', 'created_at').rows()
current_module = v['course_module_name']
ind = module_user_details_indexes[current_module]
user_submissions, col_labels, typed_col_labels = [],[],{}

if v['type'] == 'factorial':
col_labels = [lab['questionLabel'] for lab in v['questions']]
col_labels.append('user_id')
exercise_name_label_for_timestamp = v['exercise_name']
col_labels.append('course_module_name')

typed_col_labels = dict(ChainMap(*[{lab['questionLabel']: pl.Int8} for lab in v['questions']]))
if exercise_name_label_for_timestamp not in module_user_details[ind].columns:
col_labels.append(exercise_name_label_for_timestamp)
typed_col_labels[exercise_name_label_for_timestamp] = pl.Utf8
typed_col_labels['user_id'] = pl.Utf8
typed_col_labels['course_module_name'] = pl.Utf8

options = dict([(option['id'], option['value']) for option in v['options']])

for row in submissions_data:
row = [row[0], json.loads(row[1])]
row = [row[0], json.loads(row[1]), row[2]]
user_answers = dict(row[1])

submission = dict(ChainMap(*[{item['questionLabel']: options.get(item.get('chosenOptionId'))} for item in user_answers.get('answeredQuestions')]))

submission['user_id'] = row[0]
submission['course_module_name'] = v['course_module_name']
exercise_name_label_for_timestamp = v['exercise_name']
if exercise_name_label_for_timestamp not in module_user_details[ind].columns:
submission[exercise_name_label_for_timestamp] = row[2][0:19] # create_at field
user_submissions.append(submission)

else:
Expand All @@ -169,11 +207,15 @@ def flatten(xs):

col_labels = flatten(col_labels)
col_labels.append('user_id')

exercise_name_label_for_timestamp = v['exercise_name']
if exercise_name_label_for_timestamp not in module_user_details[ind].columns:
col_labels.append(exercise_name_label_for_timestamp)
col_labels.append('course_module_name')

typed_col_labels = dict(ChainMap(*[{col: pl.Utf8} for col in col_labels]))

for row in submissions_data:
row = [row[0], json.loads(row[1])]
row = [row[0], json.loads(row[1]), row[2]]

user_answer = dict(row[1])
user_answer = dict([(answeredQ.get('questionLabel'), answeredQ.get('answer')) for answeredQ in user_answer.get('answeredQuestions')])
Expand All @@ -193,14 +235,19 @@ def flatten(xs):
submission = dict(ChainMap(*submission))

submission['user_id']= row[0]
submission['course_module_name'] = v['course_module_name']
if exercise_name_label_for_timestamp not in module_user_details[ind].columns:
submission[exercise_name_label_for_timestamp] = row[2][0:19] # create_at field

user_submissions.append(submission)

data = user_submissions if user_submissions else [[None for _ in col_labels]]


additional_cols = pl.DataFrame(data, schema=typed_col_labels).select(col_labels)

user_details = user_details.join(additional_cols, how='left', on='user_id')

module_user_details[ind] = module_user_details[ind].join(additional_cols, how='left', on=['user_id','course_module_name'])

try:
os.mkdir("./parsed-outputs")
Expand All @@ -209,6 +256,8 @@ def flatten(xs):
pass
else: print(error)

user_details = pl.concat(module_user_details, how="diagonal")

dt = datetime.now().strftime('%d-%m-%Y %H:%M:%S')
course_name = '-'.join(course_names)
filename = f'./parsed-outputs/Survey_data-{course_name}-{dt}.csv'
Expand Down
8 changes: 4 additions & 4 deletions src/components/PdfDownload/PdfGenerator.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ const PDFSumFactorReport: React.FC<React.PropsWithChildren<SubmissionProps>> = (
const userLabel: string = userName ?? userVar?.label ?? "Your Score"
const userPlacement =
(100 * (-(start as number) + userScore)) / ((finnish as number) - (start as number))
const userLabelWidth = (100 * getTextWidth(userLabel, "15px Raleway")) / 100
const userLabelWidth = getTextWidth(userLabel, "9px Raleway")
const labelPlacement =
userPlacement >= 100 - userLabelWidth ? userPlacement - userLabelWidth - 4 : userPlacement + 4
return (
Expand Down Expand Up @@ -292,9 +292,9 @@ const MyDoc: React.FC<React.PropsWithChildren<CustomViewIframeState>> = (props)
.flatMap((exercise) => {
return exercise.exercise_tasks.flatMap((task) => {
const grading = task.grading as CustomViewExerciseTaskGrading
const answer = task.user_answer
? ((task.user_answer as CustomViewExerciseTaskSubmission[])[0].data_json as UserAnswer)
: null
const answer =
((task.user_answer as CustomViewExerciseTaskSubmission)?.data_json as UserAnswer) ?? null

const pubSpec = task.public_spec as PublicSpec
const gradingFeedback = grading.feedback_json
? (grading.feedback_json as ExerciseFeedback)
Expand Down
1 change: 1 addition & 0 deletions src/components/SharedMisc/AdvancedDropdown.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ const AdvancedDropdown: React.FC<React.PropsWithChildren<Props>> = ({
maxMenuHeight={400}
className={css`
aria-label: breed-selection;
width: 99%;
`}
/>
//</div>
Expand Down
15 changes: 10 additions & 5 deletions src/components/Survey/SurveyExerciseItem.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { css } from "@emotion/css"
import styled from "@emotion/styled"

import TextAreaField from "../../shared-module/components/InputFields/TextAreaField"
import TextField from "../../shared-module/components/InputFields/TextField"
import { AnswerType, SurveyItem } from "../../util/spec-types/privateSpec"
import MarkdownText from "../MarkdownText"
import AdvancedDropdown from "../SharedMisc/AdvancedDropdown"
Expand Down Expand Up @@ -50,7 +52,7 @@ const SurveyExerciseitem: React.FC<React.PropsWithChildren<Props>> = ({
case AnswerType.Number: {
return (
<div>
<input
<TextField
aria-label={`number-input-for-${item.question.questionLabel}`}
value={answer ?? ""}
type="number"
Expand All @@ -62,6 +64,7 @@ const SurveyExerciseitem: React.FC<React.PropsWithChildren<Props>> = ({
className={css`
border: 1px solid #e0e0e0;
border-radius: 2px;
max-width: 10em;
`}
/>
</div>
Expand All @@ -70,18 +73,20 @@ const SurveyExerciseitem: React.FC<React.PropsWithChildren<Props>> = ({
case AnswerType.Text: {
return (
<div>
<input
<TextAreaField
aria-label={`text-input-for-${item.question.questionLabel}`}
value={answer ?? ""}
type="text"
autoResize
onChange={(e) => {
updateAnswer(item.id, e.target.value)
}}
required
disabled={disabled}
className={css`
border: 1px solid #e0e0e0;
border-radius: 2px;
textarea {
width: 99%;
max-height: 200px;
}
`}
/>
</div>
Expand Down
2 changes: 0 additions & 2 deletions src/pages/iframe.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@ const Iframe: React.FC<React.PropsWithChildren<unknown>> = () => {
const [state, setState] = useState<State | null>(null)

const callback = useCallback((messageData: unknown, port: MessagePort) => {
//const messageData = customViewState as SetStateMessage
if (isSetStateMessage(messageData)) {
console.log("Messagedata:", messageData)
ReactDOM.flushSync(() => {
if (messageData.view_type === "answer-exercise") {
setState({
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit be86179

Please sign in to comment.