###############R Workshop Code####################

###R Baiscs 


#Hashtags allow you to write comments to yourself

#Really useful when you're making notes on the go 


A <- 3
a <- 4 

A
a


#Objects 

X <- 2  #X is the object and it is now holding the number 2 -- X is also a scalar 

X <- c(2,2,4,5) #X is the object holding the numbers 2,2,4,5 -- X is now a vector
                #the 'c' function above means concetenate 

X <- matrix(c(1,1,2,2,3,3), nrow=2, ncol=3)
Y <- matrix(c(1,1,1,1,1,1), nrow=3, ncol=2) 

A <- c(1,2,3,4) 
B <- c('T','F','T','F')  #B is now a character vector -- anytime you use character in R you have to use either '' or "", but don't mix them like '"...that's just crazy 

ds <- data.frame(A,B) #ds is now a dataframe, which is exactly what the SPSS data window is

names(ds) <- c('Var1', 'Var2') #Giving names to your variables 

Var1 

#You just received an error saying "Error: object 'Var1' not found" 

ds$Var1 

attach(ds) #attaching a dataset means that you can reference the variables in the dataset using only their name

Var1

#  rm(ds) removes the dataset...lets not do that yet though...


#Arithmetic 

2 + 2
2 - 2 
2*3 
2/3 

#Boolean Operators 

2 > 3 
3 < 6
4 == 4 

#Matrix Algebra 

X%*%Y #Matrix multiplication 

t(X) #Transpose of X 

ginv(X) #Gives you the invere of X 

#You probably just received an error telling you that the function ginv could not be found or something like that 
#This means we have to load a package 

library(MASS) #Loads the package MASS 

ginv(X) 

#####################Data Management Section 

#A few options...you can A) write out the entire location of the file you want to import or B)set your R directory  

#We're going to go with Option B, so click file then Change dir...now find the folder you've saved all of the datasets in

#Importing the Moderated Mediation Dataset 

MMDS <- read.csv('Mod Med Dataset.csv') 

#Importing Logistic Regression Dataset 

LRDS <- read.csv('Logistic Reg Dataset.csv') 

#Importing HLM Dataset 

HLMDS <- read.csv('HLM Dataset.csv') 

#Importing SEM Dataset 

SEMDS <- read.csv('SEM Dataset.csv') 

#Adding variables to a dataframe 

ds$C <- c(4,3,2,1) #This attached a new variable to the dataframe "ds"  

ds <- ds[,-3] 

ds[,2] 
ds[1,]
ds[2,2] 


#######################Descriptive Statistics Section 

set.seed(617418) #Set the seed so we generate the same "random" numbers 
X <- round(runif(100,1,6))  #Quick note...you're making an object X that contains 100 randomly generated values from a uniform distribution bounded between 1 and 6. The round command gets rid of decimals

#Central Tendency
mean(X)
median(X)
table(X) 


#Dispersion

var(X)
sd(X) 
range(X)


#Measures of covariation 

#First lets do a tiny simulation 

set.seed(618418) #Set the seed so we all generate the same random numbers 
X <- rnorm(10000,3,sqrt(3)) #Generated a random variable X from a normal distribution with a mean of 3 and variance of 3 

Y <- X*2 + rnorm(10000,2,sqrt(15 - var(X*2))) #You've no just simulated a random variable Y that should covary with X at 6 and correlate at around .89

cov(X,Y) #Covariance 
cor(X,Y) #Correlation 


##################Inferential Statistics 


#General Linear Model 

modlm1 <- lm(Y ~ X, data=MMDS)
modlm2 <- lm(Y ~ 1 + X, data=MMDS) #1 just represents the intercept
modlm3 <- lm(Y ~ X + Z, data=MMDS)

modtest <- anova(modlm1, modlm3) #Test models against one another 

plot(modlm1$resid)

##Moderated Mediation -- Total Effects Moderation 
#Making centered Variables and product terms 

MMDS$Xc <- MMDS$X - mean(MMDS$X)
MMDS$Zc <- MMDS$Z - mean(MMDS$Z)
MMDS$Mc <- MMDS$M - mean(MMDS$M)

MMDS$XZc <- MMDS$Xc*MMDS$Zc 
MMDS$MZc <- MMDS$Mc*MMDS$Zc

mod1 <- lm(M ~ Xc + Zc + XZc, data= MMDS)  #Putting the results of your linear model (lm) into an R object labeled mod1 

summary(mod1) #Gives you all of your model information

str(mod1) #allows you to examine the structure of the object mod1 and potentially reference single elements in that structure

mod1$coefficients

mod2 <- lm(Y ~ Xc + Zc + Mc + XZc + MZc, data= MMDS)
summary(mod2) 


##You are going to want to get bootstrap estimates of the product term since product terms are not normally distributed 


library(boot) 

#Getting the bootstrapped coefficients 

lmcoef <- function(formula, data, indices) { #creates a function which is similar to a SAS macro...don't worry too much about this 

d <- MMDS[indices,] 
fit <- lm(formula, data=d) 
return(coef(fit))

} 

mod1r <- boot(data=MMDS, statistic=lmcoef, R=1000, formula=M ~ Xc + Zc + XZc)


boot.ci(mod1r, type='bca', index = 4) #BCA is bootstrap adjusted confidence interval and index lets the boot.ci function know what variable to choose 

mod2r <- boot(data = MMDS, statistic=lmcoef, R=1000, formula= Y ~ Xc + Zc + Mc + XZc + MZc)
boot.ci(mod2r, type='bca', type=5) #Bootstrap estimate of the product term XZc
boot.ci(mod2r, type='bca', type=5) #Bootstrap estimate of the product term MZc


########Logistic Regression 


#Lets look at the Y variable

plot(LRDS$y) #definitely does not look normal...

#So let's turn to the generalized linear model

mod1LR <- glm(y ~ x1, family=binomial, data=LRDS)
summary(mod1LR)

mod1LR <- glm(y ~ x1 + x2, family=binomial, data=LRDS) #Family lets R know what link function to use 
summary(mod2LR)

modtest <- anova(mod1LR, mod2LR) 
pchisq(modtest$Deviance[2], modtest$Df, lower.tail=F) #Compares the 2 models using a difference in chisquare test 


######Hierarchical Linear Modeling 

library(lme4) #opens HLM package


#First we can fit a null or empty model 

nmod <-  lmer(y ~ 1 + (1|group), data=HLMDS, REML=F)  #This allows you to test for a random intercept that varies by group (1|group) 

ismod <- lmer(y ~ xgc + (1 + xgc|group), data=HLMDS, REML=F) #REML=F tells R to estimate this modeling using Full Maximum Likelihood not Restricted Max Likelihood 

slopetest <- anova(nmod,ismod)

fullmod <- lmer(y ~ xgc + z + (xgc|group), data=HLMDS, REML=F)

fullmodtest <- anova(ismod,fullmod)


################Latent Variable Modeling 

##Creating the measurement model 


library(sem) #need the sem package 

mod1 <- specifyModel()

LV1 -> X1, lam11 #Giving labels to each factor loading and allowing it to be freely estimated 
LV1 -> X2, lam21
LV1 -> X3, lam31 
LV1 -> X4, lam41  
LV2 -> Y1, lam12
LV2 -> Y2, lam22
LV2 -> Y3, lam32 
LV2 -> Y4, lam42  
LV3 -> Y5, lam13 
LV3 -> Y6, lam23 
LV3 -> Y7, lam33 
LV3 -> Y8, lam43
LV1 <-> LV1, NA, 1  #Controlling latent factor variances 
LV2 <-> LV2, NA, 1
LV3 <-> LV3, NA, 1 
LV1 <-> LV2, Ph12 
LV1 <-> LV3, Ph13 
LV2 <-> LV3, Ph23

mod1.sem <- sem(mod1, data=SEMDS) #saving your model in mod1.sem 


#Now creating your latent structural model or path model aka Structural Equation Modeling 


mod2 <- specifyModel()

LV1 -> X1, lam11 
LV1 -> X2, lam21
LV1 -> X3, lam31 
LV1 -> X4, lam41  
LV2 -> Y1, NA, 1
LV2 -> Y2, lam22
LV2 -> Y3, lam32 
LV2 -> Y4, lam42  
LV3 -> Y5, NA, 1 
LV3 -> Y6, lam23 
LV3 -> Y7, lam33 
LV3 -> Y8, lam43
LV1 <-> LV1, NA, 1
LV1 -> LV2, G21
LV1 -> LV3, G31 
LV2 -> LV3, G32

mod2.sem <- sem(mod2, data=SEMDS) 
coef(mod2.sem, standardized=T)


##Creating an alternative model where the path from LV1 to LV3 is no longer modeled 


mod3 <- specifyModel()

LV1 -> X1, lam11 
LV1 -> X2, lam21
LV1 -> X3, lam31 
LV1 -> X4, lam41  
LV2 -> Y1, NA, 1
LV2 -> Y2, lam22
LV2 -> Y3, lam32 
LV2 -> Y4, lam42  
LV3 -> Y5, NA, 1 
LV3 -> Y6, lam23 
LV3 -> Y7, lam33 
LV3 -> Y8, lam43
LV1 <-> LV1, NA, 1
LV1 -> LV2, G21
LV2 -> LV3, G32

mod3.sem <- sem(mod3, data=SEMDS) 

#testing model 2 against model 3 

anova(mod2.sem,mod3.sem)