rm(list=ls(all=TRUE))  # clear all variables
graphics.off()  # clear all graphics
# Smiles and Leniency
# Greg Francis
# PSY 626
# 12 October 2020

# use simple ANOVA to explore shrinkage

# load the rethinking library
library(rethinking)

# load data file
SLdata<-read.csv(file="SmilesLeniency.csv",header=TRUE,stringsAsFactors=TRUE)

# Morris Efron Shrinkage
# Get each sample mean
Means <- aggregate(Leniency~SmileType, FUN=mean, data=SLdata)
counts<- aggregate(Leniency~SmileType, FUN=length, data=SLdata)
Vars <- aggregate(Leniency~SmileType, FUN=var, data=SLdata) 
GrandMean = sum(Means$Leniency)/4
newMeans = (1-  ((length(Means$Leniency)-3))*Vars$Leniency/counts$Leniency /sum( (Means$Leniency - GrandMean)^2 )) *(Means$Leniency - GrandMean) + GrandMean

par(bg="lightblue")
range = c(min(c(Means$Leniency, newMeans)), max(c(Means$Leniency, newMeans)))

plot(Means$SmileType, Means$Leniency, main="Morris-Efron", ylim=range)
points(Means$SmileType, newMeans, pch=19)
abline(h= GrandMean, col="red", lwd=3, lty=2) 


# Set up the alternative model
# Dummy variable to indicate condition
SLdata$ConditionIndex <-0*SLdata$Leniency # just makes an array of the right length, filled with 0's
SLdata$ConditionIndex[SLdata$SmileType =="FALSE"] =1
SLdata$ConditionIndex[SLdata$SmileType =="Felt"] =2
SLdata$ConditionIndex[SLdata$SmileType =="Miserable"] =3
SLdata$ConditionIndex[SLdata$SmileType =="Neutral"] =4


# Alternative model 1: different mean for each condition (dumy variable coding)	
SLmodel3 <- map(
			alist( Leniency ~ dnorm(mu, sigma), 
			mu <- a[ConditionIndex],
			a[ConditionIndex] ~ dnorm(5, 5),
			sigma ~ dunif(0, 10)
			), data= SLdata ) 
cat("Finished SLmodel3\n")

print(SLmodel3)

# Show how to extract posterior from this model set up


# compute means of posteriors
post<-extract.samples(SLmodel3, n= 2000)
newMeans <- c(mean(post$a[,1]), mean(post$a[,2]),  mean(post$a[,3]), mean(post$a[,4] ) )
range = c(min(c(Means$Leniency, newMeans)), max(c(Means$Leniency, newMeans)))
dev.new()
plot(Means$SmileType, Means$Leniency, main="SLmodel3", ylim=range)
points(Means$SmileType, newMeans, pch=19)
abline(h= GrandMean, col="red", lwd=3, lty=2)


# Set prior on means to shrink toward GrandMean
SLmodel4 <- map(
			alist( Leniency ~ dnorm(mu, sigma), 
			mu <- a[ConditionIndex],
			a[ConditionIndex] ~ dnorm(4.827206, 5),
			sigma ~ dunif(0, 10)
			), data= SLdata ) 
cat("Finished SLmodel4\n")

print(SLmodel4)

# compute means of posteriors
post<-extract.samples(SLmodel4, n= 2000)
newMeans <- c(mean(post$a[,1]), mean(post$a[,2]),  mean(post$a[,3]), mean(post$a[,4] ) )
range = c(min(c(Means$Leniency, newMeans)), max(c(Means$Leniency, newMeans)))
dev.new()
plot(Means$SmileType, Means$Leniency, main="SLmodel4", ylim=range)
points(Means$SmileType, newMeans, pch=19)
abline(h= GrandMean, col="red", lwd=3, lty=2)


# Set prior on means to shrink toward GrandMean, with smaller standard deviation
SLmodel5 <- map(
			alist( Leniency ~ dnorm(mu, sigma), 
			mu <- a[ConditionIndex],
			a[ConditionIndex] ~ dnorm(4.827206, 1),
			sigma ~ dunif(0, 10)
			), data= SLdata ) 
cat("Finished SLmodel5\n")

print(SLmodel5)

# compute means of posteriors
post<-extract.samples(SLmodel5, n= 2000)
newMeans <- c(mean(post$a[,1]), mean(post$a[,2]),  mean(post$a[,3]), mean(post$a[,4] ) )
range = c(min(c(Means$Leniency, newMeans)), max(c(Means$Leniency, newMeans)))
dev.new()
plot(Means$SmileType, Means$Leniency, main="SLmodel5", ylim=range)
points(Means$SmileType, newMeans, pch=19)
abline(h= GrandMean, col="red", lwd=3, lty=2)


# Set prior on means to shrink toward GrandMean, with very small standard deviation
SLmodel6 <- map(
			alist( Leniency ~ dnorm(mu, sigma), 
			mu <- a[ConditionIndex],
			a[ConditionIndex] ~ dnorm(4.827206, 0.1),
			sigma ~ dunif(0, 10)
			), data= SLdata ) 
cat("Finished SLmodel6\n")

print(SLmodel6)

# compute means of posteriors
post<-extract.samples(SLmodel6, n= 2000)
newMeans <- c(mean(post$a[,1]), mean(post$a[,2]),  mean(post$a[,3]), mean(post$a[,4] ) )
range = c(min(c(Means$Leniency, newMeans)), max(c(Means$Leniency, newMeans)))
dev.new()
plot(Means$SmileType, Means$Leniency, main="SLmodel6", ylim=range)
points(Means$SmileType, newMeans, pch=19)
abline(h= GrandMean, col="red", lwd=3, lty=2)


# Set prior on means to shrink toward GrandMean, with prior standard deviation estimated from data
GrandSE = sqrt(var(Means$Leniency) ) # SDs of means

SLmodel7 <- map(
			alist( Leniency ~ dnorm(mu, sigma), 
			mu <- a[ConditionIndex],
			a[ConditionIndex] ~ dnorm(4.827206, 0.5195674),
			sigma ~ dunif(0, 10)
			), data= SLdata ) 
cat("Finished SLmodel7\n")

print(SLmodel7)

# compute means of posteriors
post<-extract.samples(SLmodel7, n= 2000)
newMeans <- c(mean(post$a[,1]), mean(post$a[,2]),  mean(post$a[,3]), mean(post$a[,4] ) )
range = c(min(c(Means$Leniency, newMeans)), max(c(Means$Leniency, newMeans)))
dev.new()
plot(Means$SmileType, Means$Leniency, main="SLmodel7", ylim=range)
points(Means$SmileType, newMeans, pch=19)
abline(h= GrandMean, col="red", lwd=3, lty=2)

# Hierarchical model
SLmodel8 <- map2stan(
			alist( Leniency ~ dnorm(mu, sigma), 
			mu <- a[ConditionIndex],
			a[ConditionIndex] ~ dnorm(Gmean, Gsd),
			Gmean ~ dnorm(5, 5),
			Gsd ~ dunif(0, 10),
			sigma ~ dunif(0, 10)
			), data= SLdata) 
			
			
# compute means of posteriors
post<-extract.samples(SLmodel8, n= 2000)
newMeans <- c(mean(post$a[,1]), mean(post$a[,2]),  mean(post$a[,3]), mean(post$a[,4] ) )
range = c(min(c(Means$Leniency, newMeans)), max(c(Means$Leniency, newMeans)))
dev.new()
plot(Means$SmileType, Means$Leniency, main="SLmodel8", ylim=range)
points(Means$SmileType, newMeans, pch=19)
abline(h= GrandMean, col="red", lwd=3, lty=2)