-
Notifications
You must be signed in to change notification settings - Fork 0
/
SAS_code.sas
100 lines (75 loc) · 3.26 KB
/
SAS_code.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
* Read in the SDTM lab data (LB) dataset;
data sdtm_lb;
set SDTM.LB;
run;
* Check the structure of the SDTM LB dataset;
proc contents data=sdtm_lb;
run;
* Create the ADaM LB (ADLB) dataset;
data work.adlb;
set sdtm_lb;
* Retain only the variables we need for analysis;
keep USUBJID LBCAT LBTEST LBSTRESN LBSTRESU VISITNUM;
* Create derived variables for baseline and change from baseline;
by USUBJID LBCAT LBTEST VISITNUM;
* The baseline is defined as the first non-missing result for each subject, test, and category;
if first.USUBJID or first.LBTEST or first.LBCAT then do;
LBBLFL = 'Y';
LBSTRESN_BL = LBSTRESN;
end;
else LBBLFL = 'N';
* The change from baseline is the result minus the baseline value;
LBCHG = LBSTRESN - LBSTRESN_BL;
* Create a flag for significant change from baseline;
if LBCHG > 5 then LBCHGFL = 'Y';
else LBCHGFL = 'N';
run;
* Apply ADaM metadata attributes;
* This is a placeholder - actual metadata would be study-specific and would be applied based on the ADaM Implementation Guide;
run;
* Check log for any errors or warnings, and validate outputs;
* This step is crucial for quality control, ensuring the analysis was conducted correctly;
* Documentation is essential for every step to ensure reproducibility and inspection readiness;
* Remember to follow ADaM Implementation Guide and relevant regulatory guidelines;
#############################################################################################################
* Set the input data location;
filename indata '/path/to/clinical_trial_data.csv';
* Import the data into a SAS dataset;
proc import datafile=indata
out=work.clin_data
dbms=csv
replace;
run;
* Check the structure of the data, ensuring it's been read correctly;
proc contents data=work.clin_data;
run;
* Conduct data cleaning and manipulation;
data work.clean_data;
set work.clin_data;
* Recode categorical variables and handle missing values;
if var1 = 'old category' then var1 = 'new category';
if var2 = . then var2 = 0;
* Create a derived variable: the interaction between var1 and var2;
var1_var2 = var1 * var2;
run;
* Create a macro to automate repetitive tasks: generating summary statistics for a list of variables;
%macro summarise_vars(dataset, vars);
%let var_count = %sysfunc(countw(&vars));
%do i = 1 %to &var_count;
%let var = %scan(&vars, &i);
proc means data=&dataset mean median min max;
var &var;
run;
%end;
%mend summarise_vars;
* Use the macro to generate summary statistics for var1, var2, and the interaction term;
%summarise_vars(work.clean_data, var1 var2 var1_var2);
* Perform logistic regression;
proc logistic data=work.clean_data descending;
class var1(param=ref ref='new category');
model response_var(event='1') = var1 var2 var1_var2;
run;
* Check log for any errors or warnings, and validate outputs;
* This step is crucial for quality control, ensuring the analysis was conducted correctly;
* Documentation is essential for every step to ensure reproducibility and inspection readiness;
* Remember to follow SOPs, WIs, and relevant regulatory guidelines;