rm(list=ls(all=TRUE))  # clear all variables
graphics.off()  # clear all graphics

# Greg Francis
# PSY 626
# 04 November 2025

# Generate data sets that have the provided correlation and sample sizes

# set seed so the code produces the same data set every time 
set.seed(10)

# Older students
n=52
r=0.48

# This code uses the mvrnorm function in the MASS library, written by Venables and Ripley. 
# I no longer recall who suggested that, but someone deserves credit.
# The variables will each have a mean of 0 and a variance of 1. After you have the data, you can 
# apply a linear transformation to get any mean and variance that you want.

 rmat <- matrix(c(1.0, r,r, 1.0), nrow = 2, byrow = T)
 mu <- c(0,0)
 library(MASS)
 mat <- mvrnorm(n, Sigma = rmat, mu = mu, empirical = TRUE) 
 # If empirical = FALSE, the correlations will be approx.


# Scale to look like anxiety and performance scores 
OlderAnxiety=5*mat[,1] + 40
OlderPerformance=10*mat[,2] + 75

# Younger students
n=47
r=0.22

rmat <- matrix(c(1.0, r,r, 1.0), nrow = 2, byrow = T)
mu <- c(0,0)
library(MASS)
mat <- mvrnorm(n, Sigma = rmat, mu = mu, empirical = TRUE) 
# If empirical = FALSE, the correlations will be approx.
 
YoungerAnxiety=5*mat[,1] + 40
YoungerPerformance=10*mat[,2] + 75

Participant<-seq(1,(52+47))
TestAnxiety<-c(OlderAnxiety, YoungerAnxiety)
TestPerformance<-c(OlderPerformance, YoungerPerformance)
Age<-c(rep(0, 52), rep(1, 47))  # 0 indicates older, 1 indicates younger

data <- data.frame(Participant=Participant, Anxiety=TestAnxiety, Performance=TestPerformance, Age=Age)

# Stan/Ulam with rethinking 

# Linear regression models

library(rethinking)
	
# Null model (same slope for older and younger students, intercepts can vary)
NullModel <- ulam(
			alist( Performance ~ dnorm(mu, sigma), 
			mu<- a1*(1-Age) + a2*Age + b*Anxiety,
			a1 ~ dnorm(50, 25),
			a2 ~ dnorm(50, 25),
			b ~ dnorm(0, 30),
			sigma~dnorm(0, 20)
			), data= data, constraints=list(sigma="lower=0"), log_lik=TRUE, chains=4  ) 
			

# Alt model (different slopes for older and younger students, intercepts can also vary)
# Since one-tailed test, we restrict d (difference in slope) to be negative
AltModel <- ulam(
			alist( Performance ~ dnorm(mu, sigma), 
			mu<- a1*(1-Age) + a2*Age + (b+d*Age)*Anxiety,
			a1 ~ dnorm(50, 25),
			a2 ~ dnorm(50, 25),
			b ~ dnorm(0, 30),
			d ~ dnorm(0, 30),
			sigma~dnorm(0, 20)
			), data= data, constraints=list(sigma="lower=0", d="upper=0"), log_lik=TRUE, chains=4  ) 
			
print(compare(NullModel, AltModel) )


# Analyzing posterior of full model
# Remove restriction on d
FullModel <- ulam(
			alist( Performance ~ dnorm(mu, sigma), 
			mu<- a1*(1-Age) + a2*Age + (b+d*Age)*Anxiety,
			a1 ~ dnorm(50, 25),
			a2 ~ dnorm(50, 25),
			b ~ dnorm(0, 30),
			d ~ dnorm(0, 30),
			sigma~dnorm(0, 20)
			), data= data, constraints=list(sigma="lower=0"), log_lik=TRUE, chains=4  ) 
			
	
numSamples=2000
post<-extract.samples(FullModel, n= numSamples)

# Probability that slope is positive ?
cat("Probability that change in slope (for younger compared to older) is negative:")
length(post$d[post$d <0])/length(post$d)

dev.new()
dens(post$d, col=rangi2, lwd=2, xlab="posterior for change in slope")

# Can convert slopes to correlations
# Older
OlderSx = sd(OlderPerformance)
OlderSy= sd(OlderAnxiety)

# posterior for correlation calculated from posterior of slope
Oldercorrelation <- post$b * OlderSy/OlderSx

dev.new()
dens(Oldercorrelation, col=rangi2, lwd=2, xlab="posterior for Older correlation")

cat("Mean of posterior correlation for Older=", mean(Oldercorrelation))

# Younger
YoungerSx = sd(YoungerPerformance)
YoungerSy= sd(YoungerAnxiety)

# posterior for correlation calculated from posterior of slope
Youngercorrelation <- (post$b + post$d) * YoungerSy/YoungerSx

dev.new()
dens(Oldercorrelation, col=rangi2, lwd=2, xlab="posterior for Younger correlation")

cat("Mean of posterior correlation for Younger=", mean(Youngercorrelation))