#Ebbe's bachelor project 
#Carolina here, I might add some stuff
#10.04.24 

#set working directory
rm(list=ls(all=TRUE)); # graphics.off() #cleans your working environment 
setwd("~/brain/ebbe") #sets the working directory, you change what is between the ""

#get the data
Data <- read.csv("R_Data_COM.csv",header = T, stringsAsFactors=T, dec = ".")
str(Data) #to check the structure of your data 
head(Data) #to see how the table is looking on the first couple of rows at least, can help see that everything is correct
attach(Data) #this way you don't have to call the data every time #Andi add it 

#subsets of the data 
# I am going to separate it by treatments 

FW = subset(Data, Data$Treatment == "FW") #makes a subset of only the FW treatment 
FWp = subset(Data, Data$Treatment == "FW+") #I called it p (plus), because I think R its sensitive with symbols
FWpp = subset(Data, Data$Treatment == "FW++") #plusPlus for FW++

####calculate body condition
#17.04.24 # with Andi´s help

library(ggplot2)

#Inspect data

#plot the PB vs the OPV of the whole data set and color coding the treatments 
ggplot(Data, aes(x = PB, y = OPV, color = Treatment)) +
  geom_point() +
  scale_colour_manual(values = c("#FDBE85", "#CC6B51", "#8B0000")) + 
  theme_minimal()

#simple linear model between the two
lm(OPV~PB)  
summary(lm(OPV~PB))#inspect model for residuals

par(mfrow = c(2, 2)) #make a window with 4 fields
plot(lm(OPV~PB)) #inspect assumptions of residuals --> all look very good! 

#now add the residuals to the data in a new column called conditionindex 
Data$conditionindex <-lm(OPV~PB)$residuals
View(Data) #check if the new column is added to the data 

#now lets plot our new condition index against the treatments 

# Compute medians for each Treatment (to add it to the plot)
median_data <- Data %>%
  group_by(Treatment) %>%
  summarise(median_value = median(conditionindex))

# ggplot violins + points and a square that represents the median. 

bd <-ggplot(Data, aes(x = Treatment, y = conditionindex, fill= Treatment)) +
  geom_violin(aes(fill = Treatment), alpha = 0.4) +
  geom_point(aes(fill = Treatment), shape = 21, position = position_jitter(width = 0.2), color = "black", size = 3, stroke = 0.5) +
  scale_fill_manual(values = c("#FFDAB9", "darkorange", "#8B0000")) +
  geom_point(data = median_data, aes(x = Treatment, y = median_value, fill = Treatment), shape = 22, size = 7, colour = "black", stroke = 1.5) +
  labs(x = NULL, y = "Condition Index", title = "Body Condition Index by Treatment") +
  theme_minimal() 

bd #name of the plot to save it or show it (short for body condition)

tiff("BodyConditionIndex.tiff", units = "px", width = 7000, height = 5000, res = 600) #to save the plot in your working directory
bd
dev.off() #important to finalize

#colors chatGTP says that can be color-blind friendly and greyscale safe 
#("#FFDAB9", "#CD853F", "#B22222") 
#("#FDBE85", "#CC6B51", "#8B0000"))

#Although I printed the plot and played around a these three won: "#FFDAB9", "darkorange", "#8B0000"

#save the new data set with the body conditition index (residuals)
write.csv(Data, file= "BodyCondition.csv", col.names = TRUE) #I gave it a new name so we can distinguish between the two

#Lets test for normality
# Check normality assumption using Shapiro-Wilk test

#Ophistosoma volume 
shapiro_test <- shapiro.test(Data$OPV)
shapiro_test
#W = 0.96107, p-value = 0.01472 #not normal 

#For the Prosoma Width 
shapiro.test(Data$PB)
#W = 0.9903, p-value = 0.8079 #This one is normal! 

#For the ratio index (OPV/PB)
shapiro.test(Data$RatioIndex)
#W = 0.96236, p-value = 0.01771 #not normal 

#for the mother size
shapiro.test(Data$Mother)
#W = 0.91264, p-value = 5.361e-05 #not normal 

#for the clutch size 
shapiro.test(Data$ClutchSize)
#W = 0.82409, p-value = 2.074e-08 #not normal 


####### I will try a model a GLMM with the package glmmTMB ###################################################################
#17.04.24

#With Andi 22.04.24 

#exploring the terms of the model... 
library(glmmTMB)

Data <- na.omit(Data) #to ommit na form the missing Mother 
model0 <- glmmTMB(conditionindex ~ 1 + (1 | Origin), 
                  data = Data,
                  family = tweedie ())
modelError<- glmmTMB(conditionindex ~ 1 , 
                     data = Data,
                     family = tweedie ())

modelT <- glmmTMB(conditionindex ~ Treatment + (1 | Origin), 
                  data = Data,
                  family = tweedie ())
modelM <- glmmTMB(conditionindex ~ scale(Mother) + (1 | Origin), 
                  data = Data,
                  family = tweedie ())

modelC <- glmmTMB(conditionindex ~ scale(ClutchSize) + (1 | Origin), 
                  data = Data,
                  family = tweedie ())

modelcomplex <- glmmTMB(conditionindex ~ Treatment + scale(ClutchSize) + scale(Mother) + (1 | Origin), 
                        data = Data,
                        family = tweedie ())
modelcmplxError<- glmmTMB(conditionindex ~ Treatment + scale(ClutchSize) + scale(Mother), 
                          data = Data,
                          family = tweedie ())

modelcmplxMother<- glmmTMB(conditionindex ~ Treatment + scale(ClutchSize) , 
                           data = Data,
                           family = tweedie ())

modelcmlxClutch <- glmmTMB(conditionindex ~ Treatment + scale(Mother) , 
                           data = Data,
                           family = tweedie ())

anova(modelcomplex,modelcmplxError, modelcmplxMother,modelcmlxClutch, type ="F" )
#use this

model_CS<-glmmTMB(conditionindex ~ Treatment + scale(ClutchSize) , 
                  data = Data,
                  family = gaussian ())

obj <- simulateResiduals(model_CS, plot = F)
plot(obj, quantreg = F)
summary(Data$conditionindex)
summary(model_CS)
Anova(model_CS, type="III")

#fix model
library(bestNormalize)

# Yeo-Johnson Transformation
yj_trans <- bestNormalize(Data$conditionindex)

# Transform conditionindex using the Yeo-Johnson transformation
Data$transformed_conditionindex <- predict(yj_trans, Data$conditionindex)

# Fit the model using the transformed response variable
model_CS_ <- glmmTMB(transformed_conditionindex ~ Treatment+(1 |Origin), 
                     data = Data,
                     family = gaussian()) # Assuming gaussian family for transformed data
obj <- simulateResiduals(model_CS_, plot = F)
plot(obj, quantreg = F)
summary(model_CS_)
Anova(model_CS_, type="III")

anova(model0,modelT,modelM, modelC,modelError, type="F") #drop mother and error term as they do not contribute to a significant model 



model1 <- glmmTMB(conditionindex ~ Treatment *scale(ClutchSize), 
                  data = Data,
                  family = tweedie ())

obj <- simulateResiduals(model1, plot = F)
plot(obj, quantreg = F)


model2 <- glmmTMB(conditionindex ~ Treatment *scale(Mother)+ (1 | Origin), 
                  data = Data,
                  family = tweedie ())
summary(model2)

summary (model1) 
Anova(model1, type="III")

obj <- simulateResiduals(model2, plot = F)
plot(obj, quantreg = F)

#This model has all the variables, the calculated condition index vs the treatment, then we add the scaled mother size
#(scaled for easier comparison) and the clutchsize because it could be biologically relevant for the size and finally we add the origin as a random effect. 

###Not lets check the assumtions 

# Install and load the DHARMa package if not already installed
install.packages("DHARMa") #if not already installed 
library(DHARMa)

# Generate diagnostic plots for convergence

simulated_residuals1 <- simulateResiduals(model1)

# Plot diagnostic plots
plot(simulated_residuals1)

### its very bad. now we are going to try with a different family. 


#Try it with Gausian...does not work, because the reponse variable now has negative values.  

model2 <- glmmTMB(conditionindex ~ Treatment + scale(Mother) + ClutchSize + (1 | Origin),
                  family = gaussian(),
                  data = Data)
summary(model2)


##checking the model 

# Generate diagnostic plots for convergence
simulated_residuals2 <- simulateResiduals(model2)

# Plot diagnostic plots
plot(simulated_residuals2)

#############################################################################################################################
# Fit the model with glm (simpler without the origin)
# Scale the Mother variable 

Data$Mother_scaled <- scale(Data$Mother) #a new column is added to the data 
Data$Clutch_scaled <- scale(Data$ClutchSize)
Data <- na.omit(Data) #to ommit na form the missing Mother 

model0 <-glm(conditionindex ~ 1, 
             data = Data,
             family = gaussian())

model_glm <- glm(conditionindex ~ Treatment + Mother + ClutchSize,  
                 data = Data,
                 family = gaussian())


#this one 
model_glms <- glm(conditionindex ~ Treatment + ClutchSize,
                  data = Data,
                  family = gaussian())


# Check the summary of the model
summary(model_glm)
Anova(model_glms, type = "III")

# Diagnostic plots
par(mfrow=c(2,2))
plot(model_glm)

#check the assumtions with DHARma 
obj <- simulateResiduals(model_glm, plot = F)
plot(obj, quantreg = F)

anova(model0,model_glm,model_glms)
#I think going with the simpler model_glms is perfect 
anova(model0, model_glms, type="F")




#plot the model...

library(ggplot2)

#data frame is named Data and the model is model_glms
#this plot, shows the condition index vs the clutch size and add the model as lines for the treatment. Without confidence intervals.

# Step 1: Generate new data for predictions
new_data <- expand.grid(
  Treatment = unique(Data$Treatment),
  ClutchSize = seq(min(Data$ClutchSize), max(Data$ClutchSize), length.out = 100)
)

# Predict using the model
new_data$Predicted <- predict(model_glms, newdata = new_data, type = "response")

# Colors for the treatments
treatment_colors <- c("#FFDAB9", "darkorange", "#8B0000")

# Step 2: Create the plot
p <- ggplot(Data, aes(x = ClutchSize, y = conditionindex, colour = Treatment)) +
  geom_point(alpha = 0.5) +  # Original data points
  geom_line(data = new_data, aes(y = Predicted), size = 1) +
  scale_color_manual(values = treatment_colors) +  # Setting custom colors
  labs(x = "Clutch Size", y = "Condition Index", title = "Body condition GLM") +
  theme_minimal()
print(p)

########### one that shows the clutch size by size of the points and the treatments ####
#in this one the ribbons (confidence intervals) do not show...

# Calculate means and confidence intervals for each treatment
summaries <- Data %>%
  group_by(Treatment) %>%
  summarise(
    mean = mean(conditionindex),
    sd = sd(conditionindex),
    n = n(),
    se = sd / sqrt(n),
    lower_ci = mean - qt(0.975, df=n-1) * se,
    upper_ci = mean + qt(0.975, df=n-1) * se
  ) %>%
  ungroup()

# Make sure Treatment is a factor and has the same levels in both data frames
Data$Treatment <- factor(Data$Treatment, levels = unique(Data$Treatment))
summaries$Treatment <- factor(summaries$Treatment, levels = unique(Data$Treatment))

# Step 2: Create the plot
c <- ggplot() +
  geom_ribbon(data = summaries, aes(x = Treatment, ymin = lower_ci, ymax = upper_ci, fill = Treatment), alpha = 0.25) +
  geom_point(data = Data, aes(x = Treatment, y = conditionindex, color = Treatment, size = ClutchSize), position = position_jitter(width = 0.1), alpha = 0.6) +
  scale_color_manual(values = c("FW" = "#FFDAB9", "FW+" = "darkorange", "FW++" = "#8B0000")) +
  scale_fill_manual(values = c("FW" = "#FFDAB9", "FW+" = "darkorange", "FW++" = "#8B0000")) +
  scale_size_continuous(range = c(3, 10), name = "Clutch Size") +
  labs(x = "Treatment", y = "Condition Index", title = "Condition Index by Treatment and Clutch Size") +
  theme_minimal() +
  theme(legend.position = "bottom")
print(c)

### 