-
Notifications
You must be signed in to change notification settings - Fork 0
/
Analysis_MARGO2.R
3889 lines (3071 loc) · 179 KB
/
Analysis_MARGO2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
## Created: 18 / 8 / 2016
## Last edited: 21 / 8 / 2016
## Isabel Fenton
## Reanalysis for LDG paper response to reviewers
##
## Previous file: MARGO/Code/Environmental_variables.R
## Next file: 1311 LDGPaper/Reanalysis/Prediction_environment.R
## Inputs ------------------------------------------------------------------
## Environmental_variables.Rdata - containing ldg.margo.data, ldg.margo.env and other files
## Outputs ----------------------------------------------------------------
## 150601 ldg_margo_dup.RData - the dataset with duplicates (on a 1degree scale) included
## ldg_margo_mod.RData - The modelling dataframe
## Images saved into Figures with prefix Ana
## Atlantic_simplification.RData - models enroute to simplification
## Indian_simplification.RData - models enroute to simplification
## Pacific_simplification.RData - models enroute to simplification
## Atlantic_simplified.RData - Simplified model
## Indian_simplified.RData - Simplified model
## Pacific_simplified.RData - simplified model
## mod_hres.RData - models for comparing data resolution
## Evenness_coding.RData - testing different coding styles for evenness
## Lineage_coding.RData - testing different coding styles for lineage age
## Metabolic_hypothesis.RData - testing the metabolic hypothesis
## Richness_model.RData - the final (complete) model for richness
## Richness_model_simplified.RData - the final simplified model for richness
## Evenness_model.RData - the final (complete) model for evenness
## Lineage_model.RData - the final (complete) model for lineage age
## op0_summary.csv - table of full model cofficients for richness
## eve0_summary.csv - table of full model cofficients for evenness
## lna0_summary.csv - table of full model cofficients for lineage age
## lr_out.csv - likelihood ratios for the richness model
## libraries ---------------------------------------------------------------
setwd("C:/Users/isabf/Dropbox/Documents/PhD/Work/1311 LDGPaper/Reanalysis2")
library(spdep) # for SAR models
library(HH) # for vif
library(ncf) # for Moran's I
library(lmtest) # for likelihood ratio tests
library(mgcv) # for gams
library(colorRamps) # for matlab.like palette
library(scales) # for alpha
source("../Code/140420SARerrOptimising_NC.R") # for the optimising function
source("../../../Code/plot_spline_correlog_n.R") # for plotting spline correlog (with axis titles etc.)
source("../../../Code/sar_plot.R") # for producing sar plots
source("../../../Code/sar_predict.R") # for predicting with sar
source("../../../Code/maps.R") # for maps
source("../../../Code/palettes.R") # ditto
source("../../../Code/lr_calculations.R") # code for calculating likelihood ratios
source("../../../Code/sar_predict.R") # for predicting with poly
source("../../../Code/compare.R") # for checking species names
load("../../../Project/MARGO/Outputs/Environmental_variables.Rdata") # the datasets for the modelling
## 0i. Setting up the datasets -----------------------------------------------
rm(db.traits, db.traits.cons, WOA.depths)
# create a dataset for modelling with
tmp <- c("Core", "Latitude", "Longitude", "Water.Depth", "Total_Planktics", "Ocean2", "sp.rich", "rarefy.sr", "simpson", "simpsonEve", "FRic", "symbionts_obl", "symbionts_obl_abun", "symbionts_all", "symbionts_all_abun", "surface", "surface_subsurface", "subsurface", "subsurface_deep", "deep", "surfaceAbun", "surface_subsurfaceAbun", "subsurfaceAbun", "subsurface_deepAbun", "deepAbun", "MorphoAge", "LinAge", "MorphoAgeAbun", "LinAgeAbun", "cons_tax")
ldg.margo.mod <- merge(ldg.margo.env, ldg.margo.data[, tmp], by.x = c("Core", "Latitude", "Longitude", "Water.Depth", "Ocean2"), by.y = c("Core", "Latitude", "Longitude", "Water.Depth", "Ocean2"))
rm(ldg.margo.data, ldg.margo.env, tmp)
# set NAs in 10 deg depth to 0, so it can be modelled
ldg.margo.mod$depth10deg[is.na(ldg.margo.mod$depth10deg)] <- 0
# remove Water.Depth again (as it has NAs)
ldg.margo.mod <- ldg.margo.mod[, -which(names(ldg.margo.mod) == "Water.Depth")]
# remove the extra factor level of the mediterranean
table(ldg.margo.mod$Ocean2)
ldg.margo.mod <- ldg.margo.mod[ldg.margo.mod$Ocean2 != "Mediterranean", ]
ldg.margo.mod$Ocean2 <- droplevels(ldg.margo.mod$Ocean2)
table(ldg.margo.mod$Ocean2)
## which points are to be excluded because of delta_carb_ion?
with(ldg.margo.mod, plot(delta_carb_ion, rarefy.sr, pch = 16, col = Ocean2))
legend("topleft", levels(ldg.margo.mod$Ocean2), pch = 16, col = 1:3)
ldg.margo.mod <- ldg.margo.mod[which(ldg.margo.mod$delta_carb_ion >= -10.908), ]
with(ldg.margo.mod, distrib.map(Longitude, Latitude, Ocean2))
table(ldg.margo.mod$Ocean2)
# based on using a 3500m / 4500m cut-off
dim(ldg.margo.mod)
# for dissolution, only want to use it to account for dissolution. As there is no dissolution at sites with delta_carb_ion > 0, set all these to zero. It now becomes a measure of delta_carb_ion below zero.
ldg.margo.mod$delta_carb_ion[ldg.margo.mod$delta_carb_ion > 0] <- 0
with(ldg.margo.mod, plot(delta_carb_ion, rarefy.sr, pch = 16, col = Ocean2))
# expect salinity to impact both at the top and the bottom of the range.
with(ldg.margo.mod, plot(meanSal.0m, rarefy.sr, pch = 16, col = Ocean2))
# n.b. much of the pattern in the Atlantic is driven by SST, so although we see a relationship it isn't what is seems.
# work out ocean average
load("../../../Project/BFD/Environmental/Salinity/150414_salinity.RData")
summary(sal.mean.depth$Depth0m)
# ocean average is 34.28, but if we look at PF optima (see Be & Hutson, 1977), find the average optimal salinity of 35.1. Given we care about how the salinity is differing from optimum for species this is a more logical number to use.
rm(sal.margo, sal.mean.depth, sal.sd.depth)
ldg.margo.mod$absMnSal.0m <- abs(ldg.margo.mod$meanSal.0m - 35.1)
with(ldg.margo.mod, plot(absMnSal.0m, rarefy.sr, pch = 16, col = Ocean2))
## remove sites which have the wrong taxonomy
# check the ranges of the environmental variables with and without these sites
summary(ldg.margo.mod)
summary(ldg.margo.mod[ldg.margo.mod$cons_tax == "Y", ])
# they are the same, so hopefully won't make any difference to the modelling results
ldg.margo.mod <- ldg.margo.mod[ldg.margo.mod$cons_tax == "Y", ]
## save this dataset
save(ldg.margo.mod, file = "Outputs/ldg_margo_mod2.RData")
## create three different datasets for Rarefied, Evenness & LineageAge, & FRic
## for richness
cols <- c("simpson", "simpsonEve", "FRic", "symbionts_obl", "symbionts_obl_abun", "symbionts_all", "symbionts_all_abun", "surface", "surface_subsurface", "subsurface", "subsurface_deep", "deep", "surfaceAbun", "surface_subsurfaceAbun", "subsurfaceAbun", "subsurface_deepAbun", "deepAbun", "MorphoAge", "LinAge", "MorphoAgeAbun", "LinAgeAbun", "cons_tax")
rsr.margo.mod <- ldg.margo.mod[, !(names(ldg.margo.mod) %in% cols)]
rm(cols)
# remove other NAs
summary(rsr.margo.mod)
rsr.margo.mod <- na.omit(rsr.margo.mod)
## for evenness / lineage age
cols <- c("sp.rich", "rarefy.sr", "FRic", "symbionts_obl", "symbionts_obl_abun", "symbionts_all", "symbionts_all_abun", "surface", "surface_subsurface", "subsurface", "subsurface_deep", "deep", "surfaceAbun", "surface_subsurfaceAbun", "subsurfaceAbun", "subsurface_deepAbun", "deepAbun", "cons_tax")
eve.margo.mod <- ldg.margo.mod[, !(names(ldg.margo.mod) %in% cols)]
rm(cols)
# remove other NAs
summary(eve.margo.mod)
eve.margo.mod <- na.omit(eve.margo.mod)
## for FRic
cols <- c("sp.rich", "rarefy.sr", "simpson", "simpsonEve", "MorphoAge", "LinAge", "MorphoAgeAbun", "LinAgeAbun", "cons_tax")
fric.margo.mod <- ldg.margo.mod[, !(names(ldg.margo.mod) %in% cols)]
rm(cols)
# remove other NAs
summary(fric.margo.mod)
fric.margo.mod <- na.omit(fric.margo.mod)
# Consider dimensions
dim(rsr.margo.mod)
dim(eve.margo.mod)
dim(fric.margo.mod)
## 0ii. How to handle multiple points in a grid cell --------------------------
# for richness
head(rsr.margo.mod)
# n.b. don't want to include the finer resolution SST, and productivity is at 1/6 degree, so exclude that as well
cols <- c("meanSST.1deg", "sdSST.1deg", "SST.1deg.exact", "mean.mld.t", "sd.mld.t", "mean.mld.d", "sd.mld.d", "mean.mld.v", "sd.mld.v", "mld.exact", "depth10deg", "meanSal.0m", "sdSal.0m", "sal.exact", "meanOxy", "sdOxy", "prop2.oxy", "oxy.exact", "delta_carb_ion", "delta_carb_ion.OK", "mean.bvf", "max.bvf", "depth.bvf")
dim(unique(rsr.margo.mod[, cols]))
tmp.1 <- which(duplicated(rsr.margo.mod[, cols]))
rsr.margo.mod$uni <- NA
# add a column for each unique set
rsr.margo.mod$uni[!duplicated(rsr.margo.mod[, cols])] <- 1:length(rsr.margo.mod$uni[!duplicated(rsr.margo.mod[, cols])])
## the sort through the duplicated rows to match with the unique sets
# for each row in the data
for (i in tmp.1) {
# identify the matching rows (based on the relevant columns)
match.rows <- merge(rsr.margo.mod[i, ], rsr.margo.mod, by.x = cols, by.y = cols)
# extract the value for uni for these rows add that value to the duplicated row
rsr.margo.mod$uni[i] <- unique(match.rows$uni.y)[!is.na(unique(match.rows$uni.y))]
}
rm(i, match.rows)
# make uni a factor as basically each value represents a unique grid cell
rsr.margo.mod$uni <- factor(rsr.margo.mod$uni)
rsr.margo.dup <- rsr.margo.mod
# having got a column that identifies each unique grid cell, now need to create a dataframe that contains the mean value for each of these
# create a dataframe of the right length
rsr.margo.mod <- rsr.margo.dup[!duplicated(rsr.margo.dup$uni), ]
# for all the relevant columns calculate means, and replace these in the dataset
for (i in 1:ncol(rsr.margo.mod)) {
if (!is.factor(rsr.margo.mod[,i]) & !is.character(rsr.margo.mod[, i])) {
rsr.margo.mod[, i] <- as.numeric(tapply(rsr.margo.dup[, i], rsr.margo.dup$uni, mean, na.rm = TRUE))
}
}
rm(i)
save(rsr.margo.dup, file = "Outputs/160821 rsr_margo_dup.RData")
rm(rsr.margo.dup, cols, tmp.1)
# for evenness / lineage age
head(eve.margo.mod)
# n.b. don't want to include the finer resolution SST, and productivity is at 1/6 degree, so exclude that as well
cols <- c("meanSST.1deg", "sdSST.1deg", "SST.1deg.exact", "mean.mld.t", "sd.mld.t", "mean.mld.d", "sd.mld.d", "mean.mld.v", "sd.mld.v", "mld.exact", "depth10deg", "meanSal.0m", "sdSal.0m", "sal.exact", "meanOxy", "sdOxy", "prop2.oxy", "oxy.exact", "delta_carb_ion", "delta_carb_ion.OK", "mean.bvf", "max.bvf", "depth.bvf")
dim(unique(eve.margo.mod[, cols]))
tmp.1 <- which(duplicated(eve.margo.mod[, cols]))
eve.margo.mod$uni <- NA
# add a column for each unique set
eve.margo.mod$uni[!duplicated(eve.margo.mod[, cols])] <- 1:length(eve.margo.mod$uni[!duplicated(eve.margo.mod[, cols])])
## the sort through the duplicated rows to match with the unique sets
# for each row in the data
for (i in tmp.1) {
# identify the matching rows (based on the relevant columns)
match.rows <- merge(eve.margo.mod[i, ], eve.margo.mod, by.x = cols, by.y = cols)
# extract the value for uni for these rows add that value to the duplicated row
eve.margo.mod$uni[i] <- unique(match.rows$uni.y)[!is.na(unique(match.rows$uni.y))]
}
rm(i, match.rows)
# make uni a factor as basically each value represents a unique grid cell
eve.margo.mod$uni <- factor(eve.margo.mod$uni)
eve.margo.dup <- eve.margo.mod
# having got a column that identifies each unique grid cell, now need to create a dataframe that contains the mean value for each of these
# create a dataframe of the right length
eve.margo.mod <- eve.margo.dup[!duplicated(eve.margo.dup$uni), ]
# for all the relevant columns calculate means, and replace these in the dataset
for (i in 1:ncol(eve.margo.mod)) {
if (!is.factor(eve.margo.mod[,i]) & !is.character(eve.margo.mod[, i])) {
eve.margo.mod[, i] <- as.numeric(tapply(eve.margo.dup[, i], eve.margo.dup$uni, mean, na.rm = TRUE))
}
}
rm(i)
save(eve.margo.dup, file = "Outputs/160821 eve_margo_dup.RData")
rm(eve.margo.dup, cols, tmp.1)
# for FRic
head(fric.margo.mod)
# n.b. don't want to include the finer resolution SST, and productivity is at 1/6 degree, so exclude that as well
cols <- c("meanSST.1deg", "sdSST.1deg", "SST.1deg.exact", "mean.mld.t", "sd.mld.t", "mean.mld.d", "sd.mld.d", "mean.mld.v", "sd.mld.v", "mld.exact", "depth10deg", "meanSal.0m", "sdSal.0m", "sal.exact", "meanOxy", "sdOxy", "prop2.oxy", "oxy.exact", "delta_carb_ion", "delta_carb_ion.OK", "mean.bvf", "max.bvf", "depth.bvf")
dim(unique(fric.margo.mod[, cols]))
tmp.1 <- which(duplicated(fric.margo.mod[, cols]))
fric.margo.mod$uni <- NA
# add a column for each unique set
fric.margo.mod$uni[!duplicated(fric.margo.mod[, cols])] <- 1:length(fric.margo.mod$uni[!duplicated(fric.margo.mod[, cols])])
## the sort through the duplicated rows to match with the unique sets
# for each row in the data
for (i in tmp.1) {
# identify the matching rows (based on the relevant columns)
match.rows <- merge(fric.margo.mod[i, ], fric.margo.mod, by.x = cols, by.y = cols)
# extract the value for uni for these rows add that value to the duplicated row
fric.margo.mod$uni[i] <- unique(match.rows$uni.y)[!is.na(unique(match.rows$uni.y))]
}
rm(i, match.rows)
# make uni a factor as basically each value represents a unique grid cell
fric.margo.mod$uni <- factor(fric.margo.mod$uni)
fric.margo.dup <- fric.margo.mod
# having got a column that identifies each unique grid cell, now need to create a dataframe that contains the mean value for each of these
# create a dataframe of the right length
fric.margo.mod <- fric.margo.dup[!duplicated(fric.margo.dup$uni), ]
# for all the relevant columns calculate means, and replace these in the dataset
for (i in 1:ncol(fric.margo.mod)) {
if (!is.factor(fric.margo.mod[,i]) & !is.character(fric.margo.mod[, i])) {
fric.margo.mod[, i] <- as.numeric(tapply(fric.margo.dup[, i], fric.margo.dup$uni, mean, na.rm = TRUE))
}
}
rm(i)
save(fric.margo.dup, file = "Outputs/160821 fric_margo_dup.RData")
rm(fric.margo.dup, cols, tmp.1)
## 0iii. Consider relationships ---------------------------------------------
# use most complete dataset i.e. ldg.margo.mod
par(ask = TRUE)
for(i in c(5:34, ncol(ldg.margo.mod))) {
if (!is.character(ldg.margo.mod[, i]))
with(ldg.margo.mod, plot(ldg.margo.mod[, i], rarefy.sr, pch = 16, col = Ocean2, main = names(ldg.margo.mod)[i]))
}
par(ask = FALSE)
rm(i)
# check whether anything else should be logged (i.e. the histogram of each environmental variable)
summary(ldg.margo.mod)
par(ask = TRUE)
for(i in c(5:34, ncol(ldg.margo.mod))) {
if (!is.character(ldg.margo.mod[, i]))
with(ldg.margo.mod, hist(ldg.margo.mod[, i], main = names(ldg.margo.mod)[i]))
}
par(ask = FALSE)
rm(i)
# seem reasonable
## 0iv. Create plots ------------------------------------------------------
png("Figures/Ana_0iii_map_rsr_2.png", 700, 500)
with(rsr.margo.mod, distrib.map(Longitude, Latitude, rarefy.sr, palette = "matlab.like", main = "Rarefied species richness", col.water = "white", col.land = "black"))
dev.off()
png("Figures/Ana_0iii_map_eve_2.png", 700, 500)
with(eve.margo.mod, distrib.map(Longitude, Latitude, simpsonEve, palette = "matlab.like", main = "Simpson's Evenness", col.water = "white", col.land = "black"))
dev.off()
png("Figures/Ana_0iii_map_lna_2.png", 700, 500)
with(eve.margo.mod, distrib.map(Longitude, Latitude, LinAgeAbun, palette = "matlab.like", main = "Average Community Age", col.water = "white", col.land = "black"))
dev.off()
png("Figures/Ana_0iii_map_fric_2.png", 700, 500)
with(fric.margo.mod, distrib.map(Longitude, Latitude, FRic, palette = "matlab.like", main = "Functional richness", col.water = "white", col.land = "black"))
dev.off()
# tidy up
rm(ldg.margo.mod)
## 1. Check for correlation between explanatory variables ----------------------
names(rsr.margo.mod)
# variables are: mean/sd SST, mean/sd MLD, 10deg contour, mean/sd logProd, mean/sd Sal, Ocean2, carbonate ion
env.var <- c("meanSST.1deg", "sdSST.1deg", "mean.mld.t", "sd.mld.t", "depth10deg", "logProd.mn.ann", "logProd.sd.ann", "absMnSal.0m", "sdSal.0m", "Ocean2", "delta_carb_ion", "meanOxy", "prop2.oxy", "mean.bvf", "max.bvf")
# pairs plot
png("Figures/Ana_1_pairs_2.png", 1200, 1200)
pairs(rsr.margo.mod[, names(rsr.margo.mod) %in% env.var])
dev.off()
# variance inflation factor
vif(rsr.margo.mod[, names(rsr.margo.mod) %in% env.var])
# currently the only ones that are potentially correlated (VIF > 5 and look correlated on the pairs plot) are mean and sd in Prod, and mean / sd mld and mean / max bvf, also meanOxy and meanSST.1deg seem to be correlated (as suggested). Look into this in a bit more detail
png("Figures/Ana_1_mnprodsdprod1deg_2.png")
with(rsr.margo.mod, plot(logProd.mn.ann, logProd.sd.ann, pch = 16)) # highly correlated.
dev.off()
# does the same hold for for the mld
png("Figures/Ana_1_mnMLDsdMLD_2.png")
with(rsr.margo.mod, plot(mean.mld.t, sd.mld.t, pch = 16)) # yes
dev.off()
png("Figures/Ana_1_mnBVFmxBVF_2.png")
with(rsr.margo.mod, plot(mean.bvf, max.bvf, pch = 16)) # yes
dev.off()
png("Figures/Ana_1_mnSST1degmnOxy_2.png")
with(rsr.margo.mod, plot(meanSST.1deg, meanOxy, pch = 16)) # highly correlated.
dev.off()
# therefore suggest exclusion of logProd.sd.ann, sd.mld.t, max.bvf and meanOxy from models, so
env.var <- c("meanSST.1deg", "sdSST.1deg", "mean.mld.t", "depth10deg", "logProd.mn.ann", "absMnSal.0m", "sdSal.0m", "Ocean2", "delta_carb_ion", "prop2.oxy")
# check this has fixed the problem
pairs(rsr.margo.mod[, names(rsr.margo.mod) %in% env.var] )
vif(rsr.margo.mod[, names(rsr.margo.mod) %in% env.var] )
# it has, so now have new EVs
## 2. Model simplification -------------------------------------------------
## 2i. Create an OLS model and check for SAC --------------------------------
# an OLS model
mod.l0 <- lm(rarefy.sr ~ (poly(meanSST.1deg, 3) + sdSST.1deg + mean.mld.t + depth10deg + logProd.mn.ann + absMnSal.0m + sdSal.0m + prop2.oxy + Ocean2)^2 + delta_carb_ion, data = rsr.margo.mod)
# check model plots
png("Figures/Ana_2i_modl0_2.png", 600, 600)
par(mfrow = c(2, 2))
plot(mod.l0)
par(mfrow = c(1, 1))
dev.off()
# look for spatial autocorrelation in the residuals
# using spline.correlog
# haven't run this properly - should be resamp = 1000
mod.l0.sac <- with(rsr.margo.mod, spline.correlog(Longitude, Latitude, mod.l0$residuals, latlon = TRUE, resamp = 1))
summary(mod.l0.sac)
png("Figures/Ana_2i_modl0SAC_2.png")
plot.spline.correlog.n(mod.l0.sac, xlab = "Distance / km")
dev.off()
# using correlog
mod.l0.SACcor <- with(rsr.margo.mod, correlog(Longitude, Latitude, z = residuals(mod.l0), na.rm = T, increment = 100, resamp = 1, latlon = T))
png("Figures/Ana_2i_modl0SACcor_2.png")
plot(mod.l0.SACcor$correlation, type = "b", pch = 1, cex = 1.2, lwd = 1.5, ylim = c(-0.5, 1), xlab = "distance", ylab = "Moran's I", cex.lab = 1.5, cex.axis = 1.2)
abline(h = 0)
dev.off()
rm(mod.l0.SACcor)
## look at the residuals plots
png("Figures/Ana_2i_modl0resid_2.png", 700, 500)
with(rsr.margo.mod, distrib.map(Longitude, Latitude, mod.l0$residuals, palette = "rwb"))
dev.off()
rm(mod.l0)
## 2ii. Create a GAM to check complexity / SAC -----------------------------
# n.b. GAMs can't do interactions (as additive models)
mod.g0 <- with(rsr.margo.mod, gam(rarefy.sr ~ s(Longitude, Latitude, k = 80, by = Ocean2) + s(meanSST.1deg, by = Ocean2) + sdSST.1deg + mean.mld.t + depth10deg + logProd.mn.ann + absMnSal.0m + sdSal.0m + prop2.oxy + Ocean2 + delta_carb_ion, gamma = 1.4))
summary(mod.g0)
png("Figures/Ana_2ii_modg0_2.png")
gam.check(mod.g0) # n.b. this gives an error for factors
dev.off()
par(mfrow = c(1,1))
## calculate SAC
# using spline.correlog
mod.g0.SAC <- with(rsr.margo.mod, spline.correlog(Longitude, Latitude, mod.g0$residuals, latlon = TRUE, resamp = 1))
summary(mod.g0.SAC)
png("Figures/Ana_2ii_modg0SAC_2.png")
plot.spline.correlog.n(mod.g0.SAC, xlab = "Distance / km")
dev.off()
# using correlog
mod.g0.SACcor <- with(rsr.margo.mod, correlog(Longitude, Latitude, z = residuals(mod.g0), na.rm = T, increment = 100, resamp = 1, latlon = T))
png("Figures/Ana_2ii_modg0SACcor_2.png")
plot(mod.g0.SACcor$correlation, type = "b", pch = 1, cex = 1.2, lwd = 1.5, ylim = c(-0.5, 1), xlab = "distance", ylab = "Moran's I", cex.lab = 1.5, cex.axis = 1.2)
abline(h = 0)
dev.off()
rm(mod.g0, mod.g0.SAC, mod.g0.SACcor)
## 2iii. Create an optimised SARerror model --------------------------------
# Make a matrix of coordinates (X and Y coordinates)
ldg.coords <- cbind(rsr.margo.mod$Longitude, rsr.margo.mod$Latitude)
ldg.coords <- as.matrix(ldg.coords)
# run model optimisation
# getting problems with Error in solve.default(asyvar, tol = tol.solve) :
# system is computationally singular: reciprocal condition number = 8.20242e-19
# The suggested answer is to rescale if the variables are on very different scales, so go from
# mod.sar.opW <- with(rsr.margo.mod, sar.optimised(mod.l0.sac$real$x.intercept, rarefy.sr ~ (poly(meanSST.1deg, 3) + sdSST.1deg + mean.mld.t + depth10deg + logProd.mn.ann + absMnSal.0m + sdSal.0m + prop2.oxy + Ocean2)^2 + delta_carb_ion, ldg.coords, style = "W", tol = 4, longlat = TRUE, zero.policy = TRUE))
# to
summary(rsr.margo.mod) # check that ranges are roughly equivalent after scaling
mod.sar.opW <- with(rsr.margo.mod, sar.optimised(mod.l0.sac$real$x.intercept, rarefy.sr ~ (poly(meanSST.1deg, 3) + sdSST.1deg + I(mean.mld.t/10) + I(depth10deg/100) + logProd.mn.ann + absMnSal.0m + sdSal.0m + prop2.oxy + Ocean2)^2 + delta_carb_ion, ldg.coords, style = "W", tol = 4, longlat = TRUE, zero.policy = TRUE))
# reckon that delta_carb_ion should not interact with things as its relationship with species richness (at least the bit we care about) shouldn't depend on anything else
summary(mod.sar.opW$obj, Nagelkerke = TRUE)
## check SAC has been removed
# using spline.correlog
mod.sar.opW.SAC <- with(rsr.margo.mod, spline.correlog(Longitude, Latitude, mod.sar.opW$obj$residuals, latlon = TRUE, resamp = 1))
summary(mod.sar.opW.SAC)
png("Figures/Ana_2iii_modSarOp0SAC_2.png")
plot.spline.correlog.n(mod.sar.opW.SAC, xlab = "Distance / km")
dev.off()
rm(mod.sar.opW.SAC)
# using correlog
mod.sar.opW.SACcor <- with(rsr.margo.mod, correlog(Longitude, Latitude, z = residuals(mod.sar.opW$obj), na.rm = T, increment = 100, resamp = 1, latlon = T))
png("Figures/Ana_2iii_modSarOp0SACcor_2.png")
plot(mod.sar.opW.SACcor$correlation, type = "b", pch = 1, cex = 1.2, lwd = 1.5, ylim = c(-0.5, 1), xlab = "distance", ylab = "Moran's I", cex.lab = 1.5, cex.axis = 1.2)
abline(h = 0)
dev.off()
rm(mod.sar.opW.SACcor)
# check whether different coding methods improve the AIC
mod.sar.opB <- with(rsr.margo.mod, sar.optimised(mod.l0.sac$real$x.intercept, rarefy.sr ~ (poly(meanSST.1deg, 3) + sdSST.1deg + I(mean.mld.t/10) + I(depth10deg/100) + logProd.mn.ann + absMnSal.0m + sdSal.0m + prop2.oxy + Ocean2)^2 + delta_carb_ion, ldg.coords, style = "B", tol = 4, longlat = TRUE, zero.policy = TRUE))
AIC(mod.sar.opW$obj) # 4253.397
AIC(mod.sar.opB$obj) # 4286.374
mod.sar.opS <- with(rsr.margo.mod, sar.optimised(mod.l0.sac$real$x.intercept, rarefy.sr ~ (poly(meanSST.1deg, 3) + sdSST.1deg + I(mean.mld.t/10) + I(depth10deg/100) + logProd.mn.ann + absMnSal.0m + sdSal.0m + prop2.oxy + Ocean2)^2 + delta_carb_ion, ldg.coords, style = "S", tol = 4, longlat = TRUE, zero.policy = TRUE))
AIC(mod.sar.opW$obj) # 4253.397
AIC(mod.sar.opS$obj) # 4259.442
mod.sar.opW
# So "W" is the best coding style and the best neighbourhood distance is 507.1066
rm(mod.sar.opS, mod.sar.opB)
## 2iv. Run model simplification -------------------------------------------
summary(mod.sar.opW$obj)
# re-run this optimised model through errorsarlm, so things like anova and lr.calc work
op.nb <- dnearneigh(ldg.coords, 0, mod.sar.opW$dist, longlat = TRUE)
op.w <- nb2listw(op.nb, glist = NULL, style = "W", zero.policy = TRUE)
mod.sar.op0 <- errorsarlm(mod.sar.opW$mod, listw = op.w, zero.policy = TRUE, tol.solve = 1e-18)
rm(op.nb)
# start model simplification
summary(mod.sar.op0, Nagelkerke = TRUE) # 0.89216
summary(mod.sar.op0)$Coef[order(summary(mod.sar.op0)$Coef[, 4]),]
mod.sar.op1 <- update(mod.sar.op0, ~. -I(depth10deg/100):logProd.mn.ann)
anova(mod.sar.op0, mod.sar.op1)
summary(mod.sar.op1, Nagelkerke = TRUE) # 0.89216
AIC(mod.sar.op1) # 4251.402
summary(mod.sar.op1)$Coef[order(summary(mod.sar.op1)$Coef[, 4]),]
mod.sar.op2 <- update(mod.sar.op1, ~. -poly(meanSST.1deg, 3):sdSST.1deg + poly(meanSST.1deg, 2):sdSST.1deg)
anova(mod.sar.op2, mod.sar.op1)
summary(mod.sar.op2, Nagelkerke = TRUE) # 0.89216
AIC(mod.sar.op2) # 4249.422
rm(mod.sar.op1)
summary(mod.sar.op2)$Coef[order(summary(mod.sar.op2)$Coef[, 4]),]
mod.sar.op3 <- update(mod.sar.op2, ~. -logProd.mn.ann:prop2.oxy )
anova(mod.sar.op3, mod.sar.op2)
summary(mod.sar.op3, Nagelkerke = TRUE) # 0.89215
AIC(mod.sar.op3) # 4247.492
rm(mod.sar.op2)
summary(mod.sar.op3)$Coef[order(summary(mod.sar.op3)$Coef[, 4]),]
mod.sar.op4 <- update(mod.sar.op3, ~. -sdSST.1deg:poly(meanSST.1deg, 2) + sdSST.1deg:poly(meanSST.1deg, 1))
anova(mod.sar.op4, mod.sar.op3)
summary(mod.sar.op4, Nagelkerke = TRUE) # 0.89214
AIC(mod.sar.op4) # 4245.561
rm(mod.sar.op3)
summary(mod.sar.op4)$Coef[order(summary(mod.sar.op4)$Coef[, 4]),]
mod.sar.op5 <- update(mod.sar.op4, ~. -poly(meanSST.1deg, 3):I(mean.mld.t/10) + poly(meanSST.1deg, 2):I(mean.mld.t/10))
anova(mod.sar.op5, mod.sar.op4)
summary(mod.sar.op5, Nagelkerke = TRUE) # 0.89213
AIC(mod.sar.op5) # 4243.659
rm(mod.sar.op4)
summary(mod.sar.op5)$Coef[order(summary(mod.sar.op5)$Coef[, 4]),]
mod.sar.op6 <- update(mod.sar.op5, ~. -sdSST.1deg:absMnSal.0m)
anova(mod.sar.op6, mod.sar.op5)
summary(mod.sar.op6, Nagelkerke = TRUE) # 0.89212
AIC(mod.sar.op6) # 4241.756
rm(mod.sar.op5)
summary(mod.sar.op6)$Coef[order(summary(mod.sar.op6)$Coef[, 4]),]
mod.sar.op7 <- update(mod.sar.op6, ~. -poly(meanSST.1deg, 3):I(depth10deg/100) + poly(meanSST.1deg, 2):I(depth10deg/100))
anova(mod.sar.op7, mod.sar.op6)
summary(mod.sar.op7, Nagelkerke = TRUE) # 0.89211
AIC(mod.sar.op7) # 4239.864
rm(mod.sar.op6)
summary(mod.sar.op7)$Coef[order(summary(mod.sar.op7)$Coef[, 4]),]
mod.sar.op8 <- update(mod.sar.op7, ~. -I(mean.mld.t/10):prop2.oxy )
anova(mod.sar.op8, mod.sar.op7)
summary(mod.sar.op8, Nagelkerke = TRUE) # 0.89209
AIC(mod.sar.op8) # 4238.092
rm(mod.sar.op7)
summary(mod.sar.op8)$Coef[order(summary(mod.sar.op8)$Coef[, 4]),]
mod.sar.op9 <- update(mod.sar.op8, ~. -sdSST.1deg:I(depth10deg/100) )
anova(mod.sar.op9, mod.sar.op8)
summary(mod.sar.op9, Nagelkerke = TRUE) # 0.89206
AIC(mod.sar.op9) # 4236.386
rm(mod.sar.op8)
summary(mod.sar.op9)$Coef[order(summary(mod.sar.op9)$Coef[, 4]),]
mod.sar.op10 <- update(mod.sar.op9, ~. -sdSST.1deg:logProd.mn.ann )
anova(mod.sar.op10, mod.sar.op9)
summary(mod.sar.op10, Nagelkerke = TRUE) # 0.892
AIC(mod.sar.op10) # 4234.991
rm(mod.sar.op9)
summary(mod.sar.op10)$Coef[order(summary(mod.sar.op10)$Coef[, 4]),]
mod.sar.op11 <- update(mod.sar.op10, ~. -sdSal.0m:Ocean2)
anova(mod.sar.op11, mod.sar.op10)
summary(mod.sar.op11, Nagelkerke = TRUE) # 0.89194
AIC(mod.sar.op11) # 4231.56
rm(mod.sar.op10)
summary(mod.sar.op11)$Coef[order(summary(mod.sar.op11)$Coef[, 4]),]
mod.sar.op12 <- update(mod.sar.op11, ~. -absMnSal.0m:sdSal.0m )
anova(mod.sar.op12, mod.sar.op11)
summary(mod.sar.op12, Nagelkerke = TRUE) # 0.89189
AIC(mod.sar.op12) # 4230.036
rm(mod.sar.op11)
summary(mod.sar.op12)$Coef[order(summary(mod.sar.op12)$Coef[, 4]),]
mod.sar.op13 <- update(mod.sar.op12, ~. -I(mean.mld.t/10):sdSal.0m )
anova(mod.sar.op13, mod.sar.op12)
summary(mod.sar.op13, Nagelkerke = TRUE) # 0.89184
AIC(mod.sar.op13) # 4228.506
rm(mod.sar.op12)
summary(mod.sar.op13)$Coef[order(summary(mod.sar.op13)$Coef[, 4]),]
mod.sar.op14 <- update(mod.sar.op13, ~. -I(depth10deg/100):Ocean2)
anova(mod.sar.op14, mod.sar.op13)
summary(mod.sar.op14, Nagelkerke = TRUE) # 0.89176
AIC(mod.sar.op14) # 4225.307
rm(mod.sar.op13)
summary(mod.sar.op14)$Coef[order(summary(mod.sar.op14)$Coef[, 4]),]
mod.sar.op15 <- update(mod.sar.op14, ~. -I(mean.mld.t/10):I(depth10deg/100) )
anova(mod.sar.op15, mod.sar.op14)
summary(mod.sar.op15, Nagelkerke = TRUE) # 0.89168
AIC(mod.sar.op15) # 4224.159
rm(mod.sar.op14)
summary(mod.sar.op15)$Coef[order(summary(mod.sar.op15)$Coef[, 4]),]
mod.sar.op16 <- update(mod.sar.op15, ~. -poly(meanSST.1deg, 3):absMnSal.0m + poly(meanSST.1deg, 2):absMnSal.0m)
anova(mod.sar.op16, mod.sar.op15)
summary(mod.sar.op16, Nagelkerke = TRUE) # 0.89157
AIC(mod.sar.op16) # 4223.157
rm(mod.sar.op15)
summary(mod.sar.op16)$Coef[order(summary(mod.sar.op16)$Coef[, 4]),]
mod.sar.op17 <- update(mod.sar.op16, ~. -sdSST.1deg:prop2.oxy )
anova(mod.sar.op17, mod.sar.op16)
summary(mod.sar.op17, Nagelkerke = TRUE) # 0.89145
AIC(mod.sar.op17) # 4222.382
rm(mod.sar.op16)
summary(mod.sar.op17)$Coef[order(summary(mod.sar.op17)$Coef[, 4]),]
mod.sar.op18 <- update(mod.sar.op17, ~. -I(depth10deg/100):poly(meanSST.1deg, 2) + I(depth10deg/100):poly(meanSST.1deg, 1))
anova(mod.sar.op18, mod.sar.op17)
summary(mod.sar.op18, Nagelkerke = TRUE) # 0.89128
AIC(mod.sar.op18) # 4222.049
rm(mod.sar.op17)
summary(mod.sar.op18)$Coef[order(summary(mod.sar.op18)$Coef[, 4]),]
mod.sar.op19 <- update(mod.sar.op18, ~. -I(depth10deg/100):poly(meanSST.1deg, 1))
anova(mod.sar.op19, mod.sar.op18)
summary(mod.sar.op19, Nagelkerke = TRUE) # 0.89124
AIC(mod.sar.op19) # 4220.432
rm(mod.sar.op18)
summary(mod.sar.op19)$Coef[order(summary(mod.sar.op19)$Coef[, 4]),]
mod.sar.op20 <- update(mod.sar.op19, ~. -I(depth10deg/100):prop2.oxy)
anova(mod.sar.op20, mod.sar.op19)
summary(mod.sar.op20, Nagelkerke = TRUE) # 0.89111
AIC(mod.sar.op20) # 4219.697
rm(mod.sar.op19)
summary(mod.sar.op20)$Coef[order(summary(mod.sar.op20)$Coef[, 4]),]
mod.sar.op21 <- update(mod.sar.op20, ~. -poly(meanSST.1deg, 3):prop2.oxy + poly(meanSST.1deg, 2):prop2.oxy)
anova(mod.sar.op21, mod.sar.op20)
summary(mod.sar.op21, Nagelkerke = TRUE) # 0.89095
AIC(mod.sar.op21) # 4219.224
rm(mod.sar.op20)
summary(mod.sar.op21)$Coef[order(summary(mod.sar.op21)$Coef[, 4]),]
mod.sar.op22 <- update(mod.sar.op21, ~. -poly(meanSST.1deg, 3):logProd.mn.ann + poly(meanSST.1deg, 2):logProd.mn.ann)
anova(mod.sar.op22, mod.sar.op21)
summary(mod.sar.op22, Nagelkerke = TRUE) # 0.89075
AIC(mod.sar.op22) # 4219.205
rm(mod.sar.op21)
summary(mod.sar.op22)$Coef[order(summary(mod.sar.op22)$Coef[, 4]),]
mod.sar.op23 <- update(mod.sar.op22, ~. -logProd.mn.ann:poly(meanSST.1deg, 2) + logProd.mn.ann:poly(meanSST.1deg, 1))
anova(mod.sar.op23, mod.sar.op22)
summary(mod.sar.op23, Nagelkerke = TRUE) # 0.89068
AIC(mod.sar.op23) # 4217.853
rm(mod.sar.op22)
summary(mod.sar.op23)$Coef[order(summary(mod.sar.op23)$Coef[, 4]),]
mod.sar.op24 <- update(mod.sar.op23, ~. -I(mean.mld.t/10):poly(meanSST.1deg, 2) + I(mean.mld.t/10):poly(meanSST.1deg, 1))
anova(mod.sar.op24, mod.sar.op23)
summary(mod.sar.op24, Nagelkerke = TRUE) # 0.89047
AIC(mod.sar.op24) # 4217.919
rm(mod.sar.op23)
summary(mod.sar.op24)$Coef[order(summary(mod.sar.op24)$Coef[, 4]),]
mod.sar.op25 <- update(mod.sar.op24, ~. -I(mean.mld.t/10):poly(meanSST.1deg, 1))
anova(mod.sar.op25, mod.sar.op24)
summary(mod.sar.op25, Nagelkerke = TRUE) # 0.89047
AIC(mod.sar.op25) # 4215.922
rm(mod.sar.op24)
summary(mod.sar.op25)$Coef[order(summary(mod.sar.op25)$Coef[, 4]),]
mod.sar.op26 <- update(mod.sar.op25, ~. -sdSST.1deg:Ocean2)
anova(mod.sar.op26, mod.sar.op25)
summary(mod.sar.op26, Nagelkerke = TRUE) # 0.89024
AIC(mod.sar.op26) # 4214.076
rm(mod.sar.op25)
summary(mod.sar.op26)$Coef[order(summary(mod.sar.op26)$Coef[, 4]),]
# having got to here (which is that all values are <0.05 or can't be removed), then check whether any others should be removed (particularly those with ocean)
mod.sar.op27 <- update(mod.sar.op26, ~. -I(mean.mld.t/10):absMnSal.0m )
anova(mod.sar.op27, mod.sar.op26)
summary(mod.sar.op27, Nagelkerke = TRUE) # 0.88987
AIC(mod.sar.op27) # 4215.684
rm(mod.sar.op26)
summary(mod.sar.op27)$Coef[order(summary(mod.sar.op27)$Coef[, 4]),]
mod.sar.op28 <- update(mod.sar.op27, ~. -logProd.mn.ann:absMnSal.0m )
anova(mod.sar.op28, mod.sar.op27)
summary(mod.sar.op28, Nagelkerke = TRUE) # 0.88968
AIC(mod.sar.op28) # 4215.515
rm(mod.sar.op27)
summary(mod.sar.op28)$Coef[order(summary(mod.sar.op28)$Coef[, 4]),]
# having got to here (which is that all values are <0.05 or can't be removed), then check whether any others should be removed (particularly those with ocean)
mod.sar.op29 <- update(mod.sar.op28, ~. -sdSST.1deg:sdSal.0m)
anova(mod.sar.op29, mod.sar.op28)
# nothing else can be removed
mod.sar.opf <- mod.sar.op28
rm(mod.sar.op28, mod.sar.op29)
## 2v. Create a plot of model parameters --------------------------------------
summary(mod.sar.opf, Nagelkerke = T) # r2 = 0.88968
# generate a dataframe of coefficients
(ms.coef <- data.frame(names = names(mod.sar.opf$coefficients), coef.sar = mod.sar.opf$coefficients, row.names = 1:length(mod.sar.opf$coefficients), stars = NA))
# reorder the rows to something more sensible
order.coef.ms <- c(1:23, 40:45, 24:39)
(ms.coef <- ms.coef[order.coef.ms,])
rm(order.coef.ms)
# add a column of significance stars
stars <- c(0.001, 0.01, 0.05, 0.1)
names(stars) <- c("***", "**", "*", ".")
for (i in 1:length(stars)) {
ms.coef$stars[which(summary(mod.sar.opf)$Coef[, 4] <= stars[i] & is.na(ms.coef$stars))] <- names(stars)[i]
}
rm(i)
# plot the absolute coefficients
png("Figures/Ana_2v_coef_modsaropf_2.png", width = 1000, height = 750)
plt.def <- par("plt")
par(plt = c(plt.def[1:2], 0.5, plt.def[4]))
tmp.x <- barplot(abs(ms.coef$coef.sar), names = ms.coef$names, las = 2, ylim = c(0, max(abs(ms.coef$coef.sar)) + 50))
text(tmp.x, abs(ms.coef$coef.sar) + 20, ms.coef$stars)
par(plt = plt.def)
dev.off()
rm(plt.def, tmp.x, ms.coef)
## 2vi. Calculate likelihood ratios for the SAR model ----------------------
# best model is
summary(mod.sar.opf, Nagelkerke = T) # r2 = 0.88968
AIC(mod.sar.opf) # 4215.515
# removing mean temp^3
mod.sar.lr.mnt3 <- update(mod.sar.opf, ~. -poly(meanSST.1deg, 3) + poly(meanSST.1deg, 2) - poly(meanSST.1deg, 3):sdSal.0m + poly(meanSST.1deg, 2):sdSal.0m - poly(meanSST.1deg, 3):Ocean2 + poly(meanSST.1deg, 2):Ocean2)
summary(mod.sar.lr.mnt3, Nagelkerke = T) # r2 = 0.87958
AIC(mod.sar.lr.mnt3) # 4300.394
lrtest(mod.sar.opf, mod.sar.lr.mnt3) # n.b. this is basically an anova
# LR = < 2.338e-16 ***
# removing mean temp^2
mod.sar.lr.mnt2 <- update(mod.sar.opf, ~. -poly(meanSST.1deg, 3) + poly(meanSST.1deg, 1) - poly(meanSST.1deg, 3):sdSal.0m + poly(meanSST.1deg, 1):sdSal.0m - Ocean2:poly(meanSST.1deg, 3) + Ocean2:poly(meanSST.1deg, 1) - prop2.oxy:poly(meanSST.1deg, 2) + prop2.oxy:poly(meanSST.1deg, 1) - absMnSal.0m:poly(meanSST.1deg, 2) + absMnSal.0m:poly(meanSST.1deg, 1) )
summary(mod.sar.lr.mnt2, Nagelkerke = T) # r2 = 0.86771
AIC(mod.sar.lr.mnt2) # 4388.045
lrtest(mod.sar.opf, mod.sar.lr.mnt2)
# LR = < 2.2e-16 ***
# removing mean temp
mod.sar.lr.mnt <- update(mod.sar.opf, ~. -poly(meanSST.1deg, 3) - poly(meanSST.1deg, 3):sdSal.0m - Ocean2:poly(meanSST.1deg, 3) - prop2.oxy:poly(meanSST.1deg, 2) - absMnSal.0m:poly(meanSST.1deg, 2) - sdSST.1deg:poly(meanSST.1deg, 1) - logProd.mn.ann:poly(meanSST.1deg, 1))
summary(mod.sar.lr.mnt, Nagelkerke = T) # r2 = 0.83188
AIC(mod.sar.lr.mnt) # 4626.084
lrtest(mod.sar.opf, mod.sar.lr.mnt)
# LR = < 2.2e-16 ***
# removing sd temp
mod.sar.lr.sdt <- update(mod.sar.opf, ~. -sdSST.1deg - sdSST.1deg:I(mean.mld.t/10) - sdSST.1deg:sdSal.0m - sdSST.1deg:poly(meanSST.1deg, 1))
summary(mod.sar.lr.sdt, Nagelkerke = T) # r2 = 0.88817
AIC(mod.sar.lr.sdt) # 4221.922
lrtest(mod.sar.opf, mod.sar.lr.sdt)
# LR = 0.006105 **
# removing mld temp
mod.sar.lr.mld <- update(mod.sar.opf, ~. -I(mean.mld.t/10) - sdSST.1deg:I(mean.mld.t/10) - I(mean.mld.t/10):logProd.mn.ann - I(mean.mld.t/10):Ocean2)
summary(mod.sar.lr.mld, Nagelkerke = T) # r2 = 0.88612
AIC(mod.sar.lr.mld) # 4239.213
lrtest(mod.sar.opf, mod.sar.lr.mld)
# LR = 2.735e-06 ***
# removing depth of 10 degree contour
mod.sar.lr.d10 <- update(mod.sar.opf, ~. -I(depth10deg/100) - I(depth10deg/100):absMnSal.0m - I(depth10deg/100):sdSal.0m)
summary(mod.sar.lr.d10, Nagelkerke = T) # r2 = 0.88675
AIC(mod.sar.lr.d10) # 4237.339
lrtest(mod.sar.opf, mod.sar.lr.d10)
# LR = 3.955e-06 ***
# removing mean log Prod
mod.sar.lr.prod <- update(mod.sar.opf, ~. -logProd.mn.ann - I(mean.mld.t/10):logProd.mn.ann - logProd.mn.ann:sdSal.0m - logProd.mn.ann:Ocean2 - logProd.mn.ann:poly(meanSST.1deg, 1))
summary(mod.sar.lr.prod, Nagelkerke = T) # r2 = 0.88361
AIC(mod.sar.lr.prod) # 4260.249
lrtest(mod.sar.opf, mod.sar.lr.prod)
# LR = 2.068e-10 ***
# removing mean salinity
mod.sar.lr.msal <- update(mod.sar.opf, ~. -absMnSal.0m - I(depth10deg/100):absMnSal.0m - absMnSal.0m:prop2.oxy - absMnSal.0m:Ocean2 - absMnSal.0m:poly(meanSST.1deg, 2))
summary(mod.sar.lr.msal, Nagelkerke = T) # r2 = 0.88497
AIC(mod.sar.lr.msal) # 4245.839
lrtest(mod.sar.opf, mod.sar.lr.msal)
# LR = 1.85e-07 ***
# removing sd salinity
mod.sar.lr.sdsal <- update(mod.sar.opf, ~. -sdSal.0m - poly(meanSST.1deg, 3):sdSal.0m - sdSST.1deg:sdSal.0m - I(depth10deg/100):sdSal.0m - logProd.mn.ann:sdSal.0m - sdSal.0m:prop2.oxy)
summary(mod.sar.lr.sdsal, Nagelkerke = T) # r2 = 0.88583
AIC(mod.sar.lr.sdsal) # 4235.882
lrtest(mod.sar.opf, mod.sar.lr.sdsal)
# LR = 1.504e-05 ***
# removing prop2.oxy
mod.sar.lr.oxy <- update(mod.sar.opf, ~. -prop2.oxy - absMnSal.0m:prop2.oxy - sdSal.0m:prop2.oxy - prop2.oxy:Ocean2 - prop2.oxy:poly(meanSST.1deg, 2))
summary(mod.sar.lr.oxy, Nagelkerke = T) # r2 = 0.8874
AIC(mod.sar.lr.oxy) # 4223.214
lrtest(mod.sar.opf, mod.sar.lr.oxy)
# LR = 0.002863 **
# removing Ocean2
mod.sar.lr.oce <- update(mod.sar.opf, ~. -Ocean2 - I(mean.mld.t/10):Ocean2 - logProd.mn.ann:Ocean2 - absMnSal.0m:Ocean2 - prop2.oxy:Ocean2 - Ocean2:poly(meanSST.1deg, 3))
summary(mod.sar.lr.oce, Nagelkerke = T) # r2 = 0.87856
AIC(mod.sar.lr.oce) # 4285.282
lrtest(mod.sar.opf, mod.sar.lr.oce)
# LR = 1.615e-14 ***
# removing delta_carb_ion
mod.sar.lr.dis <- update(mod.sar.opf, ~. -delta_carb_ion)
summary(mod.sar.lr.dis, Nagelkerke = T) # r2 = 0.88888
AIC(mod.sar.lr.dis) # 4221.173
lrtest(mod.sar.opf, mod.sar.lr.dis)
# LR = 0.005652 **
# create a vector with these likelihood values
ms.lr <- data.frame(names = c("mnt3", "mnt2", "mnt", "sdt", "mld", "d10", "prod", "msal", "sdsal", "oxy", "oce", "dis"), lr = NA, p = NA, stars = NA)
ms.lr$lr <- sapply(ms.lr$names, function (x) lrtest(mod.sar.opf, eval(parse(text = paste("mod.sar.lr.", x, sep = ""))))$Chisq[2])
ms.lr$p <- sapply(ms.lr$names, function (x) lrtest(mod.sar.opf, eval(parse(text = paste("mod.sar.lr.", x, sep = ""))))$Pr[2])
ms.lr
# add a column of significance stars
stars # a vector of signifance stars
for (i in 1:length(stars)) {
ms.lr$stars[which(ms.lr$p <= stars[i] & is.na(ms.lr$stars))] <- names(stars)[i]
}
rm(i)
# plot these as a barplot
png("Figures/Ana_2vi_LRatio_sar_opf_2.png", width = 550)
tmp.x <- barplot(ms.lr$lr, names = ms.lr$names, las = 2, ylim = c(0, max(ms.lr$lr) + 30))
text(tmp.x, ms.lr$lr + 20, ms.lr$stars)
dev.off()
rm(tmp.x)
## 2vii. Calculate likelihood ratios for groups of EVs ---------------------
# best model is
summary(mod.sar.opf, Nagelkerke = T) # r2 = 0.88968
AIC(mod.sar.opf) # 4215.515
# removing temp
mod.sar.lr.temp <- mod.sar.lr.mnt
summary(mod.sar.lr.temp, Nagelkerke = T) # r2 = 0.83188
AIC(mod.sar.lr.temp) # 4626.084
lrtest(mod.sar.opf, mod.sar.lr.temp)
# LR = < 2.2e-16 ***
# removing structure
mod.sar.lr.str <- update(mod.sar.opf, ~. -I(mean.mld.t/10) - I(depth10deg/100) - sdSST.1deg:I(mean.mld.t/10) - I(mean.mld.t/10):logProd.mn.ann - I(mean.mld.t/10):Ocean2 - I(depth10deg/100):absMnSal.0m - I(depth10deg/100):sdSal.0m)
summary(mod.sar.lr.str, Nagelkerke = T) # r2 = 0.88397
AIC(mod.sar.lr.str) # 4252.977
lrtest(mod.sar.opf, mod.sar.lr.str)
# LR = 8.777e-09 ***
# removing stability
mod.sar.lr.stable <- update(mod.sar.opf, ~. -sdSST.1deg - sdSal.0m - sdSST.1deg:I(mean.mld.t/10) - sdSST.1deg:sdSal.0m - sdSST.1deg:poly(meanSST.1deg, 1) - poly(meanSST.1deg, 3):sdSal.0m - I(depth10deg/100):sdSal.0m - logProd.mn.ann:sdSal.0m - sdSal.0m:prop2.oxy)
summary(mod.sar.lr.stable, Nagelkerke = T) # r2 = 0.88512
AIC(mod.sar.lr.stable) # 4236.42
lrtest(mod.sar.opf, mod.sar.lr.stable)
# LR = 1.128e-05 ***
# removing productivity
summary(mod.sar.lr.prod, Nagelkerke = T) # r2 = 0.88361
AIC(mod.sar.lr.prod) # 4260.249
lrtest(mod.sar.opf, mod.sar.lr.prod)
# LR = 2.068e-10 ***
# removing stress
mod.sar.lr.sts <- update(mod.sar.opf, ~. -absMnSal.0m - prop2.oxy - I(depth10deg/100):absMnSal.0m - absMnSal.0m:prop2.oxy - absMnSal.0m:Ocean2 - sdSal.0m:prop2.oxy - prop2.oxy:Ocean2 - absMnSal.0m:poly(meanSST.1deg, 2) - prop2.oxy:poly(meanSST.1deg, 2))
summary(mod.sar.lr.sts, Nagelkerke = T) # r2 = 0.88383
AIC(mod.sar.lr.sts) # 4244.241
lrtest(mod.sar.opf, mod.sar.lr.sts)
# LR = 4.514e-07 ***
# removing Ocean2
summary(mod.sar.lr.oce, Nagelkerke = T) # r2 = 0.87856
AIC(mod.sar.lr.oce) # 4285.282
lrtest(mod.sar.opf, mod.sar.lr.oce)
# LR = 1.615e-14 ***
# removing delta_carb_ion
summary(mod.sar.lr.dis, Nagelkerke = T) # r2 = 0.88888
AIC(mod.sar.lr.dis) # 4221.173
lrtest(mod.sar.opf, mod.sar.lr.dis)
# LR = 0.005652 **
# create a vector with these likelihood values
ms.lr.group <- data.frame(names = c("temp", "str", "stable", "prod", "sts", "oce", "dis"), lr = NA, p = NA, stars = NA)
ms.lr.group$lr <- sapply(ms.lr.group$names, function (x) lrtest(mod.sar.opf, eval(parse(text = paste("mod.sar.lr.", x, sep = ""))))$Chisq[2])
ms.lr.group$p <- sapply(ms.lr.group$names, function (x) lrtest(mod.sar.opf, eval(parse(text = paste("mod.sar.lr.", x, sep = ""))))$Pr[2])
ms.lr.group
# add a column of significance stars
stars # a vector of signifance stars
for (i in 1:length(stars)) {
ms.lr.group$stars[which(ms.lr.group$p <= stars[i] & is.na(ms.lr.group$stars))] <- names(stars)[i]
}
rm(i)
# plot these as a barplot
png("Figures/Ana_2vii_LRatio_sar_opf_group_2.png", width = 550)
tmp.x <- barplot(ms.lr.group$lr, names = ms.lr.group$names, las = 2, ylim = c(0, max(ms.lr.group$lr) + 30))
text(tmp.x, ms.lr.group$lr + 20, ms.lr.group$stars)
dev.off()
rm(tmp.x)
rm(mod.sar.lr.d10, mod.sar.lr.dis, mod.sar.lr.mld, mod.sar.lr.mnt, mod.sar.lr.mnt2, mod.sar.lr.mnt3, mod.sar.lr.msal, mod.sar.lr.oce, mod.sar.lr.prod, mod.sar.lr.oxy, mod.sar.lr.sts, mod.sar.lr.sdsal, mod.sar.lr.sdt, mod.sar.lr.stable, mod.sar.lr.str, mod.sar.lr.temp)
## 2viii. Could we just use full models? -----------------------------------
# compare full model and simplified model for all coefficients
lr.sar.op0 <- lr.calc(mod.sar.op0)
par(mfrow = c(1,2))
sar.plot(mod.sar.op0)
lr.sar.opf <- lr.calc(mod.sar.opf) # n.b. this produces the same result as ms.lr
sar.plot(mod.sar.opf)
par(mfrow = c(1,1))
png("Figures/Ana_2viii_LRatio_opfop0_2.png", width = 800)
# get order from running without order first
lr.plot(lr.sar.op0, lr.sar.opf, order = c(9:7, 4:3, 12:11, 5, 1, 10, 6, 2), leg.txt = c("Full", "Simplified"), ylab = "Log Likelihood ratio", star.pos = 20)
dev.off()
# also for groups of variables
(tmp <- data.frame(names = model.evs(mod.sar.op0), group = c("Stability", "Productivity", "Stress", "Stability", "Stress", "Ocean", "Dissolution", "Temperature", "Temperature", "Temperature", "Vertical niche structure", "Vertical niche structure")))
lr.sar.op0g <- lr.calc(mod.sar.op0, tmp)
rm(tmp)
(tmp <- data.frame(names = model.evs(mod.sar.opf), group = c("Stability", "Productivity", "Stress", "Stability", "Stress", "Ocean", "Dissolution", "Temperature", "Temperature", "Temperature", "Vertical niche structure", "Vertical niche structure")))
lr.sar.opfg <- lr.calc(mod.sar.opf, tmp)
rm(tmp)
png("Figures/Ana_2viii_LRatio_g_opfop0_2.png", width = 800)
lr.plot(lr.sar.op0g, lr.sar.opfg, order = c(6:7, 4:3, 5, 2:1), leg.x = 17, leg.y = 400, leg.txt = c("Full", "Simplified"), ylab = "Log Likelihood ratio", star.pos = 20)
dev.off()
## 2ix. Does optimisation differ for simplified? ---------------------------
mod.fw.rop <- with(rsr.margo.mod, sar.optimised(mod.l0.sac$real$x.intercept, mod.sar.opf$call$formula, ldg.coords, style = "W", tol = 4, longlat = TRUE, zero.policy = TRUE))
mod.fw.rop # 506.9646
AIC(mod.fw.rop$obj)
# 4215.42
mod.fb.rop <- with(rsr.margo.mod, sar.optimised(mod.l0.sac$real$x.intercept, mod.sar.opf$call$formula, ldg.coords, style = "B", tol = 4, longlat = TRUE, zero.policy = TRUE))
mod.fb.rop # 506.3027
AIC(mod.fb.rop$obj)
# 4256.436
mod.fs.rop <- with(rsr.margo.mod, sar.optimised(mod.l0.sac$real$x.intercept, mod.sar.opf$call$formula, ldg.coords, style = "S", tol = 4, longlat = TRUE, zero.policy = TRUE))
mod.fs.rop # 506.9893
AIC(mod.fs.rop$obj)
# 4222.384
# therefore justified in using W
rm(mod.fw.rop, mod.fb.rop, mod.fs.rop, ms.lr, ms.lr.group)
## 3. How do the different Oceans compare? ---------------------------------
# best model is
summary(mod.sar.opf, Nagelkerke = T) # r2 = 0.88968
AIC(mod.sar.opf) # 4215.515
# do I have sufficient points for each ocean?
table(rsr.margo.mod$Ocean2) # should do
# c.f. Atlantic: 372 Indian: 157 Pacific: 146, which were the values for bfd
## 3i. set up model for Atlantic -----------------------------
# run model based on best model for complete data with only Atlantic
atl.nb <- dnearneigh(ldg.coords[rsr.margo.mod$Ocean2 == "Atlantic", ], 0, mod.sar.opW$dist, longlat = TRUE)
atl.w <- nb2listw(atl.nb, glist = NULL, style = "W", zero.policy = TRUE)
rm(atl.nb)
## 3ii. set up model for Indian -----------------------------------
# run model based on best model for complete data with only Indian
ind.nb <- dnearneigh(ldg.coords[rsr.margo.mod$Ocean2 == "Indian", ], 0, mod.sar.opW$dist, longlat = TRUE)
ind.w <- nb2listw(ind.nb, glist = NULL, style = "W", zero.policy = TRUE)
rm(ind.nb)
## 3iii. set up model for Pacific ---------------------------
pac.nb <- dnearneigh(ldg.coords[rsr.margo.mod$Ocean2 == "Pacific", ], 0, mod.sar.opW$dist, longlat = TRUE)
pac.w <- nb2listw(pac.nb, glist = NULL, style = "W", zero.policy = TRUE)
rm(pac.nb)
# n.b. removed the simplification in 3i-3iii as I'm only simplifying from the model with significant interactions (see 3vii). Therefore, also didn't need 3iv - vi
## 3vii. only significant interactions for Atlantic -----------------------------
summary(mod.sar.op0)
summary(mod.sar.opf)
# get formula without the Ocean2
op.formula <- update(mod.sar.opf$call$formula, ~.-Ocean2 - poly(meanSST.1deg, 3):Ocean2 - I(mean.mld.t/10):Ocean2 - logProd.mn.ann:Ocean2 - absMnSal.0m:Ocean2 - prop2.oxy:Ocean2)
# create a model with only significant values
mod.sar.atlI <- errorsarlm(op.formula, listw = atl.w, zero.policy = TRUE, tol.solve = 1e-18, data = rsr.margo.mod[rsr.margo.mod$Ocean2 == "Atlantic", ])
par(mfrow = c(1, 2))
sar.plot(mod.sar.atlI)
par(mfrow = c(1, 1))
# try adding in the other interactions
mod.sar.atlI2 <- update(mod.sar.atlI, ~. - poly(meanSST.1deg, 1):sdSST.1deg + poly(meanSST.1deg, 3):sdSST.1deg)
summary(mod.sar.atlI2)
anova(mod.sar.atlI, mod.sar.atlI2) # 0.20469
rm(mod.sar.atlI2)
mod.sar.atlI3 <- update(mod.sar.atlI, ~. + poly(meanSST.1deg, 3):I(mean.mld.t/10))
summary(mod.sar.atlI3)
anova(mod.sar.atlI, mod.sar.atlI3) # 0.19816
rm(mod.sar.atlI3)
mod.sar.atlI4 <- update(mod.sar.atlI, ~. + poly(meanSST.1deg, 3):I(depth10deg/100))
summary(mod.sar.atlI4)
anova(mod.sar.atlI, mod.sar.atlI4) # 0.66431
rm(mod.sar.atlI4)
mod.sar.atlI5 <- update(mod.sar.atlI, ~. -logProd.mn.ann:poly(meanSST.1deg, 1) + poly(meanSST.1deg, 3):logProd.mn.ann)
summary(mod.sar.atlI5)
anova(mod.sar.atlI, mod.sar.atlI5) # 0.78754
rm(mod.sar.atlI5)
# significant at the 0.05 level
mod.sar.atlI6 <- update(mod.sar.atlI, ~. - absMnSal.0m:poly(meanSST.1deg, 2) + poly(meanSST.1deg, 3):absMnSal.0m)