#Ebbe's bachelor project 
#Carolina here, I added some stuff
#10.04.24 

#set working directory
rm(list=ls(all=TRUE)); # graphics.off() #cleans your working environment 
setwd("~/brain/ebbe") #sets the working directory, you change what is between the ""

#get the data
Data <- read.csv("R_Data_COM.csv",header = T, stringsAsFactors=T, dec = ".")
str(Data) #to check the structure of your data 
head(Data) #to see how the table is looking on the first couple of rows at least, can help see that everything is correct
attach(Data) #this way you don't have to call the data every time #Andi add it 

#subsets of the data 
# I am going to separate it by treatments 

FW = subset(Data, Data$Treatment == "FW") #makes a subset of only the FW treatment 
FWp = subset(Data, Data$Treatment == "FW+") #I called it p (plus), because I think R its sensitive with symbols
FWpp = subset(Data, Data$Treatment == "FW++") #plusPlus for FW++

####calculate body condition
#17.04.24 # with Andi´s help

library(ggplot2)

#Inspect data

#plot the PB vs the OPV of the whole data set and color coding the treatments 
ggplot(Data, aes(x = PB, y = OPV, color = Treatment)) +
  geom_point() +
  scale_colour_manual(values = c("#FDBE85", "#CC6B51", "#8B0000")) + 
  theme_minimal()

#simple linear model between the two
lm(OPV~PB)  
summary(lm(OPV~PB))#inspect model for residuals

par(mfrow = c(2, 2)) #make a window with 4 fields
plot(lm(OPV~PB)) #inspect assumptions of residuals --> all look very good! 

#now add the residuals to the data in a new column called conditionindex 
Data$conditionindex <-lm(OPV~PB)$residuals
View(Data) #check if the new column is added to the data 

#now lets plot our new condition index against the treatments 

# Compute medians for each Treatment (to add it to the plot)
  median_data <- Data %>%
  group_by(Treatment) %>%
  summarise(median_value = median(conditionindex))

# ggplot violins + points and a square that represents the median. 

bd <-ggplot(Data, aes(x = Treatment, y = conditionindex, fill= Treatment)) +
    geom_violin(aes(fill = Treatment), alpha = 0.4) +
    geom_point(aes(fill = Treatment), shape = 21, position = position_jitter(width = 0.2), color = "black", size = 3, stroke = 0.5) +
    scale_fill_manual(values = c("#FFDAB9", "darkorange", "#8B0000")) +
    geom_point(data = median_data, aes(x = Treatment, y = median_value, fill = Treatment), shape = 22, size = 7, colour = "black", stroke = 1.5) +
    labs(x = NULL, y = "Condition Index", title = "Body Condition Index by Treatment") +
    theme_minimal() 
    
bd #name of the plot to save it or show it (short for body condition)

tiff("BodyConditionIndex.tiff", units = "px", width = 7000, height = 5000, res = 600) #to save the plot in your working directory
bd
dev.off() #important to finalize

#colors chatGTP says that can be color-blind friendly and greyscale safe 
#("#FFDAB9", "#CD853F", "#B22222") 
#("#FDBE85", "#CC6B51", "#8B0000"))

#Although I printed the plot and played around a these three won: "#FFDAB9", "darkorange", "#8B0000"

#save the new data set with the body conditition index (residuals)
write.csv(Data, file= "BodyCondition.csv", col.names = TRUE) #I gave it a new name so we can distinguish between the two

#Lets test for normality
# Check normality assumption using Shapiro-Wilk test

#Ophistosoma volume 
shapiro_test <- shapiro.test(Data$OPV)
shapiro_test
#W = 0.96107, p-value = 0.01472 #not normal 

#For the Prosoma Width 
shapiro.test(Data$PB)
#W = 0.9903, p-value = 0.8079 #This one is normal! 

#For the ratio index (OPV/PB)
shapiro.test(Data$RatioIndex)
#W = 0.96236, p-value = 0.01771 #not normal 

#for the mother size
shapiro.test(Data$Mother)
#W = 0.91264, p-value = 5.361e-05 #not normal 

#for the clutch size 
shapiro.test(Data$ClutchSize)
#W = 0.82409, p-value = 2.074e-08 #not normal 


#############################################################################################################################
# Fit the model with glm (simpler without the origin)
# Scale the Mother variable 

Data <- na.omit(Data) #to ommit na form the missing Mother so it doesn't complain for the missing NAs

#First checking the difference of the models by checking each without some terms. 

model0 <-glm(conditionindex ~ 1, 
             data = Data,
             family = gaussian())

modelT <-glm(conditionindex ~ Treatment, 
             data = Data,
             family = gaussian())

modelC <-glm(conditionindex ~ ClutchSize, 
             data = Data,
             family = gaussian())

modelM <-glm(conditionindex ~ Mother, 
             data = Data,
             family = gaussian())

anova(model0, modelT, modelC, modelM, test="Chisq")

models<-c(model0,modelT,modelC,modelM)
Anova(models, type="III")



#model with all
model_glm <- glm(conditionindex ~  Treatment + Mother + ClutchSize,  
                 data = Data,
                 family = gaussian())

summary(model_glm)
Anova(model_glm, type = "III")


#Does scaling makes a difference? 

Data$Mother_scaled <- scale(Data$Mother) #a new column is added to the data 
Data$Clutch_scaled <- scale(Data$ClutchSize) #a new column is added to the data 

model_glm_scaled <- glm(conditionindex ~ Treatment + Mother_scaled + Clutch_scaled,  
                        data = Data,
                        family = gaussian())

summary(model_glm_scaled)
Anova(model_glm_scaled, type = "III")

#model_glm and model_glm_scaled are virtually the same. The significance do not change so I will go with the
#not scaled one, to make it more simpler. 


#Now dropping the mother... 
model_glms <- glm(conditionindex ~ Treatment + ClutchSize,
                  data = Data,
                  family = gaussian())

summary(model_glms)
Anova(model_glms, type = "III")

#The assumptions are slightly better, but Gabriele thinks that we should use the one with everything. 
# so the model_glm 

#comparison of the models 
anova(model0, model_glm, model_glm_scaled, model_glms, test="Chisq")

#The significant different one is model_glm.. so it is the chosen one.


# Check the summary of the chosen model

summary(model_glm)
Anova(model_glm, type = "III")

# Diagnostic plots
par(mfrow=c(2,2))
plot(model_glm)

#check the assumtions with DHARma 
# Install and load the DHARMa package if not already installed

install.packages("DHARMa") 

library(DHARMa)

obj <- simulateResiduals(model_glm, plot = T)
plot(obj, quantreg = T)


# run posthoc with Tukey correction for the chosen model 
emm <- emmeans(model_glm, specs = pairwise ~ Treatment)
posthoc_results <- summary(emm$contrasts)
summary(emm$contrasts, adjust = "tukey")

