#Resampling Magic R-code:

#References:
  #   https://rcompanion.org/handbook/K_01.html
  #   https://www.statology.org/bootstrap-standard-error-in-r/
  #   https://stackoverflow.com/questions/58393608/bootstrapped-correlation-in-r
  #   https://www.statology.org/k-fold-cross-validation-in-r/
  #   #https://search.r-project.org/CRAN/refmans/pls/html/jack.test.html

#Get data and packages
library(lmPerm)
library(dplyr)
library(ggplot2)
library(boot)
library(caret)
library(pls)


  #magic cards:
setwd("C:/Users/Mark.Williamson.2/OneDrive - North Dakota University System/Desktop/Williamson Data/Magic")
magic<-read.csv("cards_condensed_10e_clean.csv")
head(magic)

boxplot(magic$convertedManaCost~magic$colors, col=c("grey", "green", "red", "blue", "white"))

boxplot(magic$convertedManaCost[magic$types=="Creature"]~magic$colors[magic$types=="Creature"],
        col=c("grey", "green", "red", "blue", "white"))

boxplot(magic$toughness[magic$types=="Creature"]~magic$colors[magic$types=="Creature"],
        col=c("grey", "green", "red", "blue", "white"))

boxplot(magic$power[magic$types=="Creature"]~magic$colors[magic$types=="Creature"],
        col=c("grey", "green", "red", "blue", "white"))

creatures <-magic[magic$types=="Creature",]

par(mfrow=c(2,3))
hist(creatures$convertedManaCost[creatures$colors=="R"], col="red", main="Mana Cost", xlab="Cost")
hist(creatures$convertedManaCost[creatures$colors=="U"], col="blue", main="", xlab="Cost")
hist(creatures$convertedManaCost[creatures$colors=="B"], col="grey", main="", xlab="Cost")
hist(creatures$convertedManaCost[creatures$colors=="W"], col="white", main="", xlab="Cost")
hist(creatures$convertedManaCost[creatures$colors=="G"], col="green", main="", xlab="Cost")
par(mfrow=c(1,1))

par(mfrow=c(2,3))
hist(creatures$power[creatures$colors=="R"], col="red", main="Power", xlab="Power")
hist(creatures$power[creatures$colors=="U"], col="blue", main="", xlab="Power")
hist(creatures$power[creatures$colors=="B"], col="grey", main="", xlab="Power")
hist(creatures$power[creatures$colors=="W"], col="white", main="", xlab="Power")
hist(creatures$power[creatures$colors=="G"], col="green", main="", xlab="Power")
par(mfrow=c(1,1))

par(mfrow=c(2,3))
hist(creatures$toughness[creatures$colors=="R"], col="red", main="Toughness", xlab="Toughness")
hist(creatures$toughness[creatures$colors=="U"], col="blue", main="", xlab="Toughness")
hist(creatures$toughness[creatures$colors=="B"], col="grey", main="", xlab="Toughness")
hist(creatures$toughness[creatures$colors=="W"], col="white", main="", xlab="Toughness")
hist(creatures$toughness[creatures$colors=="G"], col="green", main="", xlab="Toughness")
par(mfrow=c(1,1))

creatures %>%
  group_by(colors) %>%
  summarize(mean_Mana = mean(convertedManaCost),
            mean_power= mean(power),
            mean_toughness=mean(toughness))
#------------------------------------------------------------------------------;

#Example 1: Permutation test

  #Question: Is average power different between Green and White creatures?
creatures_GW <-subset(creatures, colors =="G" | colors == "W")
head(creatures_GW)

glm1 <- glm(power ~ colors, data=creatures_GW, family="poisson")
summary(glm1)

lm1 <-lm(power ~ colors, data=creatures_GW)
summary(lm1)
anova(lm1)

barplot(c(2.74,1.38)~c("G","W"), xlab="Power", ylab="mean", col=c("Green", "White"), ylim=c(0,3))
box()

lmp1 <-lmp(power ~ colors, data=creatures_GW, perm="Prob", seqs=FALSE)
summary(lmp1)
anova(lmp1)

  #Question: Is average toughness different between Black and Green creatures?
creatures_BG <-subset(creatures, colors =="B" | colors == "G")
head(creatures_BG)

glm2 <- glm(toughness ~ colors, data=creatures_BG, family="poisson")
summary(glm2)

lm2 <-lm(toughness ~ colors, data=creatures_BG)
summary(lm2)
anova(lm2)

barplot(c(2.12,2.90)~c("B","G"), xlab="Toughness", ylab="mean", col=c("Grey", "Green"), ylim=c(0,3))
box()

lmp2 <-lmp(toughness ~ colors, data=creatures_BG, perm="Prob", seqs=FALSE)
summary(lmp2)
anova(lmp2)
#------------------------------------------------------------------------------;

  #visualization:
t_coeff<-unname(lm1$coefficients[2]) #estimate of difference between Green and White creature power

set.seed(10)
coeff_list <-c()
for (i in seq(1:1000)) {
  perm_colors<-sample(creatures_GW$colors,replace=FALSE)
  perm_power <-creatures_GW$power
  perm_df <-data.frame(colors=perm_colors, power=perm_power)
  plm <-lm(power ~ colors, data=perm_df)
  perm_coeff <-unname(plm$coefficients[2])
  coeff_list <<-c(coeff_list, perm_coeff)
}
coeff_df <-data.frame(coeff=coeff_list)

plot1 <-ggplot(data=coeff_df, aes(x=coeff))+
  geom_histogram(, bins=12, fill="blue", col="black")
plot1

t_coeff #actual coefficient we got
1000-sum(coeff_list>t_coeff)

plot1+
  geom_segment(aes(x=t_coeff, y=0, xend=t_coeff, yend=250), colour="red", linewidth=1)+
  geom_text(aes(label=c("4/1000, p=0.004")), x=t_coeff, y=257, size=3)
#------------------------------------------------------------------------------;


#Example 2: Bootstrapping

  #Question: What is the bootstrap confidence in mean cost for Instants?
head(magic[magic$types=="Instant",])

Instant_Cost <-magic$convertedManaCost[magic$types=="Instant"]
mean(Instant_Cost)

    #Bootstrap standard error
set.seed(10)
meanFunc <- function(x,i){mean(x[i])}
reps1 <-boot(Instant_Cost, meanFunc, 50)
reps2 <-boot(Instant_Cost, meanFunc, 10000)

    #Plot Bootstrap distribution
plot(reps1)
plot(reps2)

    #Calculate confidence intervals
boot.ci(reps1, type="bca")
boot.ci(reps2, type="bca")

  #Question: How confident can we be in a correlation between Mana cost and Creature power?
Power_cost <-data.frame(power=creatures$power, cost=creatures$convertedManaCost)
head(Power_cost)

cor1<-cor(Power_cost$power, Power_cost$cost, method="spearman")
cor1

plot(Power_cost$power, Power_cost$cost)

corFunc <- function(data,i){cor(data[i,"power"], data[i,"cost"], method="spearman")}

reps3 <-boot(Power_cost, corFunc, 10000)
reps3

plot(reps3)

boot.ci(reps3, type="bca")

  #density plot
plot(density(reps3$t))
abline(v=cor1, lty="dashed")
#------------------------------------------------------------------------------;


#Example 3: K-fold validation/ Jackknife
    
  #Question: How accurate is using power and toughness to predict Mana cost for creatures, using K-fold?
lm3 <-lm(convertedManaCost ~ power * toughness, data=creatures)
summary(lm3)

ctrl <- trainControl(method="cv", number =5)
modelA <-train(convertedManaCost ~ power, data=creatures, method="lm", trControl=ctrl)
modelB <-train(convertedManaCost ~ toughness, data=creatures, method="lm", trControl=ctrl)
modelC <-train(convertedManaCost ~ power + toughness, data=creatures, method="lm", trControl=ctrl)
modelD <-train(convertedManaCost ~ power * toughness, data=creatures, method="lm", trControl=ctrl)

    #RSME=root mean squared error: lower the better;
    #Rsquared=correlation: the higher the better;
    #MAE=mean absolute error: lower the better;

modelA$results; modelB$results; modelC$results; modelD$results
  #Model 4 had lowest RSME, highest Rsquared, and lowest MAE
modelD$finalModel

  #Question: what about jackknife?
jlm <-pcr(convertedManaCost ~ power * toughness, data=creatures, validation="LOO", jackknife=TRUE)

jack.test(jlm)

  #visualization example:
lm3$coefficients

power_c <-unname(lm3$coefficients[2])
tough_c <-unname(lm3$coefficients[3])

lc<-length(creatures$convertedManaCost)
power_coeff_list <-c()
tough_coeff_list <-c()
for (i in seq(1:lc)){
  new_df <-creatures[-c(i), ]
  lm_loo <-lm(convertedManaCost ~ power * toughness, data=new_df)
  power_coeff <-unname(lm_loo$coefficients[2])
  tough_coeff <-unname(lm_loo$coefficients[3])
  power_coeff_list <<-c(power_coeff_list, power_coeff)
  tough_coeff_list <<-c(tough_coeff_list, tough_coeff)
}
power_jack_df <-data.frame(coeff=power_coeff_list)
tough_jack_df <-data.frame(coeff=tough_coeff_list)


plot2 <-ggplot(data=power_jack_df, aes(x=coeff))+
  geom_histogram(bins=12, fill="red", col="black")+
  geom_segment(aes(x=power_c, y=0, xend=power_c, yend=120), colour="black", linewidth=1.2)+
  labs(x="Power coefficient")
plot2

plot3 <-ggplot(data=tough_jack_df, aes(x=coeff))+
  geom_histogram(bins=12, fill="green", col="black")+
  geom_segment(aes(x=tough_c, y=0, xend=tough_c, yend=95), colour="black", linewidth=1.2)+
labs(x="Toughness coefficient")
plot3