###############R Workshop Code#################### ###R Baiscs #Hashtags allow you to write comments to yourself #Really useful when you're making notes on the go A <- 3 a <- 4 A a #Objects X <- 2 #X is the object and it is now holding the number 2 -- X is also a scalar X <- c(2,2,4,5) #X is the object holding the numbers 2,2,4,5 -- X is now a vector #the 'c' function above means concetenate X <- matrix(c(1,1,2,2,3,3), nrow=2, ncol=3) Y <- matrix(c(1,1,1,1,1,1), nrow=3, ncol=2) A <- c(1,2,3,4) B <- c('T','F','T','F') #B is now a character vector -- anytime you use character in R you have to use either '' or "", but don't mix them like '"...that's just crazy ds <- data.frame(A,B) #ds is now a dataframe, which is exactly what the SPSS data window is names(ds) <- c('Var1', 'Var2') #Giving names to your variables Var1 #You just received an error saying "Error: object 'Var1' not found" ds$Var1 attach(ds) #attaching a dataset means that you can reference the variables in the dataset using only their name Var1 # rm(ds) removes the dataset...lets not do that yet though... #Arithmetic 2 + 2 2 - 2 2*3 2/3 #Boolean Operators 2 > 3 3 < 6 4 == 4 #Matrix Algebra X%*%Y #Matrix multiplication t(X) #Transpose of X ginv(X) #Gives you the invere of X #You probably just received an error telling you that the function ginv could not be found or something like that #This means we have to load a package library(MASS) #Loads the package MASS ginv(X) #####################Data Management Section #A few options...you can A) write out the entire location of the file you want to import or B)set your R directory #We're going to go with Option B, so click file then Change dir...now find the folder you've saved all of the datasets in #Importing the Moderated Mediation Dataset MMDS <- read.csv('Mod Med Dataset.csv') #Importing Logistic Regression Dataset LRDS <- read.csv('Logistic Reg Dataset.csv') #Importing HLM Dataset HLMDS <- read.csv('HLM Dataset.csv') #Importing SEM Dataset SEMDS <- read.csv('SEM Dataset.csv') #Adding variables to a dataframe ds$C <- c(4,3,2,1) #This attached a new variable to the dataframe "ds" ds <- ds[,-3] ds[,2] ds[1,] ds[2,2] #######################Descriptive Statistics Section set.seed(617418) #Set the seed so we generate the same "random" numbers X <- round(runif(100,1,6)) #Quick note...you're making an object X that contains 100 randomly generated values from a uniform distribution bounded between 1 and 6. The round command gets rid of decimals #Central Tendency mean(X) median(X) table(X) #Dispersion var(X) sd(X) range(X) #Measures of covariation #First lets do a tiny simulation set.seed(618418) #Set the seed so we all generate the same random numbers X <- rnorm(10000,3,sqrt(3)) #Generated a random variable X from a normal distribution with a mean of 3 and variance of 3 Y <- X*2 + rnorm(10000,2,sqrt(15 - var(X*2))) #You've no just simulated a random variable Y that should covary with X at 6 and correlate at around .89 cov(X,Y) #Covariance cor(X,Y) #Correlation ##################Inferential Statistics #General Linear Model modlm1 <- lm(Y ~ X, data=MMDS) modlm2 <- lm(Y ~ 1 + X, data=MMDS) #1 just represents the intercept modlm3 <- lm(Y ~ X + Z, data=MMDS) modtest <- anova(modlm1, modlm3) #Test models against one another plot(modlm1$resid) ##Moderated Mediation -- Total Effects Moderation #Making centered Variables and product terms MMDS$Xc <- MMDS$X - mean(MMDS$X) MMDS$Zc <- MMDS$Z - mean(MMDS$Z) MMDS$Mc <- MMDS$M - mean(MMDS$M) MMDS$XZc <- MMDS$Xc*MMDS$Zc MMDS$MZc <- MMDS$Mc*MMDS$Zc mod1 <- lm(M ~ Xc + Zc + XZc, data= MMDS) #Putting the results of your linear model (lm) into an R object labeled mod1 summary(mod1) #Gives you all of your model information str(mod1) #allows you to examine the structure of the object mod1 and potentially reference single elements in that structure mod1$coefficients mod2 <- lm(Y ~ Xc + Zc + Mc + XZc + MZc, data= MMDS) summary(mod2) ##You are going to want to get bootstrap estimates of the product term since product terms are not normally distributed library(boot) #Getting the bootstrapped coefficients lmcoef <- function(formula, data, indices) { #creates a function which is similar to a SAS macro...don't worry too much about this d <- MMDS[indices,] fit <- lm(formula, data=d) return(coef(fit)) } mod1r <- boot(data=MMDS, statistic=lmcoef, R=1000, formula=M ~ Xc + Zc + XZc) boot.ci(mod1r, type='bca', index = 4) #BCA is bootstrap adjusted confidence interval and index lets the boot.ci function know what variable to choose mod2r <- boot(data = MMDS, statistic=lmcoef, R=1000, formula= Y ~ Xc + Zc + Mc + XZc + MZc) boot.ci(mod2r, type='bca', type=5) #Bootstrap estimate of the product term XZc boot.ci(mod2r, type='bca', type=5) #Bootstrap estimate of the product term MZc ########Logistic Regression #Lets look at the Y variable plot(LRDS$y) #definitely does not look normal... #So let's turn to the generalized linear model mod1LR <- glm(y ~ x1, family=binomial, data=LRDS) summary(mod1LR) mod1LR <- glm(y ~ x1 + x2, family=binomial, data=LRDS) #Family lets R know what link function to use summary(mod2LR) modtest <- anova(mod1LR, mod2LR) pchisq(modtest$Deviance[2], modtest$Df, lower.tail=F) #Compares the 2 models using a difference in chisquare test ######Hierarchical Linear Modeling library(lme4) #opens HLM package #First we can fit a null or empty model nmod <- lmer(y ~ 1 + (1|group), data=HLMDS, REML=F) #This allows you to test for a random intercept that varies by group (1|group) ismod <- lmer(y ~ xgc + (1 + xgc|group), data=HLMDS, REML=F) #REML=F tells R to estimate this modeling using Full Maximum Likelihood not Restricted Max Likelihood slopetest <- anova(nmod,ismod) fullmod <- lmer(y ~ xgc + z + (xgc|group), data=HLMDS, REML=F) fullmodtest <- anova(ismod,fullmod) ################Latent Variable Modeling ##Creating the measurement model library(sem) #need the sem package mod1 <- specifyModel() LV1 -> X1, lam11 #Giving labels to each factor loading and allowing it to be freely estimated LV1 -> X2, lam21 LV1 -> X3, lam31 LV1 -> X4, lam41 LV2 -> Y1, lam12 LV2 -> Y2, lam22 LV2 -> Y3, lam32 LV2 -> Y4, lam42 LV3 -> Y5, lam13 LV3 -> Y6, lam23 LV3 -> Y7, lam33 LV3 -> Y8, lam43 LV1 <-> LV1, NA, 1 #Controlling latent factor variances LV2 <-> LV2, NA, 1 LV3 <-> LV3, NA, 1 LV1 <-> LV2, Ph12 LV1 <-> LV3, Ph13 LV2 <-> LV3, Ph23 mod1.sem <- sem(mod1, data=SEMDS) #saving your model in mod1.sem #Now creating your latent structural model or path model aka Structural Equation Modeling mod2 <- specifyModel() LV1 -> X1, lam11 LV1 -> X2, lam21 LV1 -> X3, lam31 LV1 -> X4, lam41 LV2 -> Y1, NA, 1 LV2 -> Y2, lam22 LV2 -> Y3, lam32 LV2 -> Y4, lam42 LV3 -> Y5, NA, 1 LV3 -> Y6, lam23 LV3 -> Y7, lam33 LV3 -> Y8, lam43 LV1 <-> LV1, NA, 1 LV1 -> LV2, G21 LV1 -> LV3, G31 LV2 -> LV3, G32 mod2.sem <- sem(mod2, data=SEMDS) coef(mod2.sem, standardized=T) ##Creating an alternative model where the path from LV1 to LV3 is no longer modeled mod3 <- specifyModel() LV1 -> X1, lam11 LV1 -> X2, lam21 LV1 -> X3, lam31 LV1 -> X4, lam41 LV2 -> Y1, NA, 1 LV2 -> Y2, lam22 LV2 -> Y3, lam32 LV2 -> Y4, lam42 LV3 -> Y5, NA, 1 LV3 -> Y6, lam23 LV3 -> Y7, lam33 LV3 -> Y8, lam43 LV1 <-> LV1, NA, 1 LV1 -> LV2, G21 LV2 -> LV3, G32 mod3.sem <- sem(mod3, data=SEMDS) #testing model 2 against model 3 anova(mod2.sem,mod3.sem)