library(Matrix) library (MASS) library("lme4") library("rlang") library('Rcpp') library('data.table') library(ggplot2) library(car) library(forcats) library(tidyr) library(dplyr) library (lawstat) library (esquisse) library(bbmle) dframeH<-read.csv('C:/Users/saraalbuixechmarti/Documents/R/Haplos_RawData_R.csv', header=T) summary(dframeH) names(dframeH) head(dframeH) str(dframeH) ###NORMALITY: hist (dframeH$Wet_weight._.g., prob=TRUE) lines(density(dframeH$Wet_weight._.g., adjust=2), col="blue", lwd=2) qqnorm (dframeH$Wet_weight._.g.) qqline(dframeH$Wet_weight._.g., lty=2) shapiro.test(dframeH$Wet_weight._.g.) hist (dframeH$Height_.mm., prob=TRUE) lines(density(dframeH$Height_.mm., adjust=2), col="blue", lwd=2) qqnorm (dframeH$Height_.mm.) qqline (dframeH$Height_.mm., lty=2) shapiro.test(dframeH$Height_.mm.) hist (dframeH$Length_.mm., prob=TRUE) lines(density(dframeH$Length_.mm., adjust=2), col="blue", lwd=2) qqnorm (dframeH$Length_.mm.) qqline (dframeH$Length_.mm., lty=2) shapiro.test(dframeH$Length_.mm.) hist (dframeH$Width_.mm., prob=TRUE) lines(density(dframeH$Width_.mm., adjust=2), col="blue", lwd=2) qqnorm (dframeH$Width_.mm.) qqline (dframeH$Width_.mm., lty=2) shapiro.test(dframeH$Width_.mm.) hist (dframeH$Growth_Rings, prob=TRUE) lines(density(dframeH$Growth_Rings, adjust=2), col="blue", lwd=2) qqnorm (dframeH$Growth_Rings) qqline (dframeH$Growth_Rings, lty=2) shapiro.test(dframeH$Growth_Rings) hist (dframeH$Temperature_..C., prob=TRUE) lines(density(dframeH$Temperature_..C., adjust=2), col="blue", lwd=2) qqnorm (dframeH$Temperature_..C.) qqline (dframeH$Temperature_..C., lty=2) shapiro.test(dframeH$Temperature_..C.) hist (dframeH$Salinity_.PSU., prob=TRUE) lines(density(dframeH$Salinity_.PSU., adjust=2), col="blue", lwd=2) qqnorm (dframeH$Salinity_.PSU.) qqline (dframeH$Salinity_.PSU., lty=2) shapiro.test(dframeH$Salinity_.PSU.) hist (dframeH$O2_.mmol.m3., prob=TRUE) lines(density(dframeH$O2_.mmol.m3., adjust=2), col="blue", lwd=2) qqnorm (dframeH$O2_.mmol.m3.) qqline (dframeH$O2_.mmol.m3., lty=2) shapiro.test(dframeH$O2_.mmol.m3.) ###GENERALISED LINEAR MIXED MODELS (GLMM): with binomial error family ##Full model considering the spatial and seasonal variability: Full_Model<-glmer(Haplos_Presence ~ Season + Sites + (1|Bay/Sites), family=binomial (link=logit), nAGQ=1, data=dframeH) Full_Model summary(Full_Model) plot(Full_Model) qqnorm(resid(Full_Model)) ; qqline (resid(Full_Model)) ##Testing if the spatial variability is significant: Sites_Model<-glmer(Haplos_Presence ~ Season + (1|Bay/Sites), family=binomial (link=logit), nAGQ=1, data=dframeH) Sites_Model summary(Sites_Model) plot(Sites_Model) qqnorm(resid(Sites_Model)) ; qqline (resid(Sites_Model)) anova(Sites_Model, Full_Model) ##Testing if the seasonal variability is significant: Seasons_Model<-glmer(Haplos_Presence ~ Sites + (1|Bay/Sites), family=binomial (link=logit), nAGQ=1, data=dframeH) Seasons_Model summary(Seasons_Model) plot(Seasons_Model) qqnorm(resid(Seasons_Model)) ; qqline (resid(Seasons_Model)) anova(Seasons_Model, Full_Model) ##Testing the best performing drivers/inhibitors: #Kendall's Tau CORRELATIONS between the continuous variables: cor.test(dframeH$Temperature_..C., dframeH$Salinity_.PSU., method = "kendall") cor.test(dframeH$Temperature_..C., dframeH$O2_.mmol.m3., method = "kendall") cor.test(dframeH$Salinity_.PSU., dframeH$O2_.mmol.m3., method = "kendall") cor.test(dframeH$Temperature_..C., dframeH$Wet_weight._.g., method = "kendall") cor.test(dframeH$Temperature_..C., dframeH$Height_.mm., method = "kendall") cor.test(dframeH$Temperature_..C., dframeH$Length_.mm., method = "kendall") cor.test(dframeH$Temperature_..C., dframeH$Width_.mm., method = "kendall") cor.test(dframeH$Temperature_..C., dframeH$Growth_Rings, method = "kendall") cor.test(dframeH$Salinity_.PSU., dframeH$Wet_weight._.g., method = "kendall") cor.test(dframeH$Salinity_.PSU., dframeH$Height_.mm., method = "kendall") cor.test(dframeH$Salinity_.PSU., dframeH$Length_.mm., method = "kendall") cor.test(dframeH$Salinity_.PSU., dframeH$Width_.mm., method = "kendall") cor.test(dframeH$Salinity_.PSU., dframeH$Growth_Rings, method = "kendall") cor.test(dframeH$O2_.mmol.m3., dframeH$Wet_weight._.g., method = "kendall") cor.test(dframeH$O2_.mmol.m3., dframeH$Height_.mm., method = "kendall") cor.test(dframeH$O2_.mmol.m3., dframeH$Length_.mm., method = "kendall") cor.test(dframeH$O2_.mmol.m3., dframeH$Width_.mm., method = "kendall") cor.test(dframeH$O2_.mmol.m3., dframeH$Growth_Rings, method = "kendall") cor.test(dframeH$Wet_weight._.g., dframeH$Height_.mm., method = "kendall") cor.test(dframeH$Wet_weight._.g., dframeH$Length_.mm., method = "kendall") cor.test(dframeH$Wet_weight._.g., dframeH$Width_.mm., method = "kendall") cor.test(dframeH$Wet_weight._.g., dframeH$Growth_Rings, method = "kendall") cor.test(dframeH$Height_.mm., dframeH$Length_.mm., method = "kendall") cor.test(dframeH$Height_.mm., dframeH$Width_.mm., method = "kendall") cor.test(dframeH$Height_.mm., dframeH$Growth_Rings, method = "kendall") cor.test(dframeH$Length_.mm., dframeH$Width_.mm., method = "kendall") cor.test(dframeH$Length_.mm., dframeH$Growth_Rings, method = "kendall") cor.test(dframeH$Width_.mm., dframeH$Growth_Rings, method = "kendall") #Rescale the continous variables: dframeH$Height_.mm._SCALED<- (dframeH$Height_.mm. -mean(dframeH$Height_.mm.))/sd(dframeH$Height_.mm.) dframeH$Growth_Rings_SCALED<- (dframeH$Growth_Rings -mean(dframeH$Growth_Rings))/sd(dframeH$Growth_Rings) dframeH$Temperature_..C._SCALED<- (dframeH$Temperature_..C. -mean(dframeH$Temperature_..C.))/sd(dframeH$Temperature_..C.) dframeH$Salinity_.PSU._SCALED<- (dframeH$Salinity_.PSU. -mean(dframeH$Salinity_.PSU.))/sd(dframeH$Salinity_.PSU.) dframeH$O2_.mmol.m3_SCALED<- (dframeH$O2_.mmol.m3 -mean(dframeH$O2_.mmol.m3))/sd(dframeH$O2_.mmol.m3) #Null Model with Nested Sites and Seasons as random terms: NullM_Season<-glmer(Haplos_Presence ~ 1 + (1|Bay/Sites) + (1|Season), family=binomial (link=logit), nAGQ=1, data=dframeH) NullM_Season summary(NullM_Season) plot(NullM_Season) qqnorm(resid(NullM_Season)) ; qqline (resid(NullM_Season)) #Binomial GLMMs with one explanatory variable: Height_Model2<-glmer(Haplos_Presence ~ Height_.mm._SCALED + (1|Bay/Sites) + (1|Season), family=binomial (link=logit), nAGQ=1, data=dframeH) Height_Model2 summary(Height_Model2) plot(Height_Model2) qqnorm(resid(Height_Model2)) ; qqline (resid(Height_Model2)) anova(Height_Model2, NullM_Season) Rings_Model2<-glmer(Haplos_Presence ~ Growth_Rings_SCALED + (1|Bay/Sites) + (1|Season), family=binomial (link=logit), nAGQ=1, data=dframeH) Rings_Model2 summary(Rings_Model2) plot(Rings_Model2) qqnorm(resid(Rings_Model2)) ; qqline (resid(Rings_Model2)) anova(Rings_Model2, NullM_Season) Temp_Model2<-glmer(Haplos_Presence ~ Temperature_..C._SCALED + (1|Bay/Sites) + (1|Season), family=binomial (link=logit), nAGQ=1, data=dframeH) Temp_Model2 summary(Temp_Model2) plot(Temp_Model2) qqnorm(resid(Temp_Model2)) ; qqline (resid(Temp_Model2)) anova(Temp_Model2, NullM_Season) Sal_Model2<-glmer(Haplos_Presence ~ Salinity_.PSU._SCALED + (1|Bay/Sites) + (1|Season), family=binomial (link=logit), nAGQ=1, data=dframeH) Sal_Model2 summary(Sal_Model2) plot(Sal_Model2) qqnorm(resid(Sal_Model2)) ; qqline (resid(Sal_Model2)) anova(Sal_Model2, NullM_Season) O2_Model2<-glmer(Haplos_Presence ~ O2_.mmol.m3_SCALED + (1|Bay/Sites) + (1|Season), family=binomial (link=logit), nAGQ=1, data=dframeH) O2_Model2 summary(O2_Model2) plot(O2_Model2) qqnorm(resid(O2_Model2)) ; qqline (resid(O2_Model2)) anova(O2_Model2, NullM_Season) #Comparison between the 1-factor models: AICtab (NullM_Season, Height_Model2, Rings_Model2, Temp_Model2, Sal_Model2, O2_Model2, nobs=998, weights=TRUE, base=TRUE, sort=TRUE) #Binomial GLMMs with two explanatory variables (without interactions) -containing the fixed effect of the top performing driver (oxygen) plus each of the other explanatory variables in turn-: NullM_Season2<-glmer(Haplos_Presence ~ O2_.mmol.m3_SCALED + (1|Bay/Sites) + (1|Season) , family=binomial (link=logit), nAGQ=1, data=dframeH) NullM_Season2 summary(NullM_Season2) O2H_Model3<-glmer(Haplos_Presence ~ O2_.mmol.m3_SCALED + Height_.mm._SCALED + (1|Bay/Sites) + (1|Season), family=binomial (link=logit), nAGQ=1, data=dframeH) O2H_Model3 summary(O2H_Model3) plot(O2H_Model3) qqnorm(resid(O2H_Model3)) ; qqline (resid(O2H_Model3)) anova(O2H_Model3, NullM_Season2) O2GR_Model3<-glmer(Haplos_Presence ~ O2_.mmol.m3_SCALED + Growth_Rings_SCALED + (1|Bay/Sites) + (1|Season), family=binomial (link=logit), nAGQ=1, data=dframeH) O2GR_Model3 summary(O2GR_Model3) plot(O2GR_Model3) qqnorm(resid(O2GR_Model3)) ; qqline (resid(O2GR_Model3)) anova(O2GR_Model3, NullM_Season2) O2Tem_Model3<-glmer(Haplos_Presence ~ O2_.mmol.m3_SCALED + Temperature_..C._SCALED + (1|Bay/Sites) + (1|Season), family=binomial (link=logit), nAGQ=1, data=dframeH) O2Tem_Model3 summary(O2Tem_Model3) plot(O2Tem_Model3) qqnorm(resid(O2Tem_Model3)) ; qqline (resid(O2Tem_Model3)) anova(O2Tem_Model3, NullM_Season2) O2Sal_Model3<-glmer(Haplos_Presence ~ O2_.mmol.m3_SCALED + Salinity_.PSU._SCALED + (1|Bay/Sites) + (1|Season), family=binomial (link=logit), nAGQ=1, data=dframeH) O2Sal_Model3 summary(O2Sal_Model3) plot(O2Sal_Model3) qqnorm(resid(O2Sal_Model3)) ; qqline (resid(O2Sal_Model3)) anova(O2Sal_Model3, NullM_Season2) #Comparison between the 2-factor models: AICtab (NullM_Season2, O2H_Model3, O2GR_Model3, O2Tem_Model3, O2Sal_Model3, nobs=998, weights=TRUE, base=TRUE, sort=TRUE) ##Comparison between the best fitting models (1-factor and 2-factor models): AICtab (NullM_Season, O2_Model2, Height_Model2, O2H_Model3, O2Tem_Model3, nobs=998, weights=TRUE, base=TRUE, sort=TRUE) ##Model validation for mixed-effects models: modelresid<-resid (Sites_Model, type= "pearson") plot(modelresid~dframeH$Sites) modelresid<-resid (Seasons_Model, type= "pearson") plot(modelresid~dframeH$Season) modelresid<-resid (Height_Model2, type= "pearson") plot(modelresid~dframeH$Height_.mm._SCALED) modelresid<-resid (Rings_Model2, type= "pearson") plot(modelresid~dframeH$Growth_Rings_SCALED) modelresid<-resid (Temp_Model2, type= "pearson") plot(modelresid~dframeH$Temperature_..C._SCALED) modelresid<-resid (Sal_Model2, type= "pearson") plot(modelresid~dframeH$Salinity_.PSU._SCALED) modelresid<-resid (O2_Model2, type= "pearson") plot(modelresid~dframeH$O2_.mmol.m3_SCALED) modelresid<-resid (O2H_Model3, type= "pearson") plot(modelresid~dframeH$Height_.mm._SCALED) modelresid<-resid (O2GR_Model3, type= "pearson") plot(modelresid~dframeH$Growth_Rings_SCALED) modelresid<-resid (O2Tem_Model3, type= "pearson") plot(modelresid~dframeH$Temperature_..C._SCALED) modelresid<-resid (O2Sal_Model3, type= "pearson") plot(modelresid~dframeH$Salinity_.PSU._SCALED) ###GRAHPS: dframeG<-read.csv('C:/Users/saraalbuixechmarti/Documents/R/TotalsTable_Haplos_R.csv', header=T) dframeG1<-na.omit(dframeG) summary(dframeG1) names(dframeG1) head(dframeG1) str(dframeG1) #Haplosporidia presence by site: dframeG1$Site_f<-factor(dframeG1$Sites, levels=c("Ri","Cu","Fp","Cun","DKA","DKC"), ordered=is.ordered(c("Ri","Cu","Fp","Cun","DKA","DKC"))) summary(dframeG1$Site_f) ggplot(data = dframeG1) + aes(x = Collected.Cockles , y = Haplos_Positives, group=1) + geom_boxplot() + labs(x = "Cockle sample size", y = "Haplosporidia presence(%)") + theme(legend.position = "none", panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), axis.text=element_text(size=14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 16,face="bold")) + facet_grid(~Site_f) #Haplosporidia presence by season: dframeG1$Season_f<- factor(dframeG1$Season, levels= c("Spring_2018","Summer_2018", "Autumn_2018","Winter_2019","Spring_2019"), ordered = is.ordered(c("Spring_2018","Summer_2018", "Autumn_2018","Winter_2019","Spring_2019"))) summary(dframeG1$Season_f) ggplot(data = dframeG1) + aes(x = Collected.Cockles , y = Haplos_Positives, group=1) + geom_boxplot() + labs(x = "Cockle sample size", y = "Haplosporidia presence(%)") + theme(legend.position = "none", panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), axis.text=element_text(size=14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 16,face="bold")) + facet_grid(~Season_f)