##Propensity Score Matching Example #The data were simulated using the correlation matrix (Sigma) below #This will require that you install the "MASS" Package in R install.packages("MASS") Library(MASS) #Set seed to ensure you get the same result each time. set.seed(10001) #100 cases were simluated. simdata <- as.data.frame(mvrnorm(100, mu = c(.5,18,2.5,5,3.0), Sigma = matrix(c(1,0.2,0.4,0.6,0.5, 0.2, 1,0.7,0.2,0.8, 0.4,0.7,1,0.5,0.4, 0.6,0.2,0.5,1,0.25, 0.5,0.8,0.4,0.25,1), ncol = 5), empirical = TRUE)) #Add column Names to the simulated data colnames(simdata) <- c("Group","ACT", "HSGPA", "Absence", "GPA") #Recode Group variable into binary grouping variable. simdata$Group[simdata$Group< 0.5] <- 0 simdata$Group[simdata$Group>=0.5] <- 1 #You can save this data as a .csv file, if desired. write.table (simdata, file = "SampleData.csv", sep =",", col.names =,) #Null model - no covariates #t-test of GPA by group t.test(simdata$GPA~simdata$Group) describeBy(simdata$GPA, simdata$Group) #load "MatchIt" program for propenisty score matching. #This program may need to be installed. install.packages("MatchIt") library(MatchIt) #Estimate propensity scores and conduct nearest neighbor matching. m.out <- matchit(Group ~ ACT + HSGPA + Absence , data = simdata, distance = "logit", method = "nearest", caliper=.25, replace = TRUE) #The nonrandom package can be used to obtain an distribution of propensity scores #However, histograms can also be obtained using the syntax listed later (plot(m.out, type="hist")) library(nonrandom) ps <- pscore(data = simdata, Group ~ ACT + HSGPA + Absence) plot.pscore(x = ps, main = "PS distribution", xlab = "", par.1=list(col="red"), par.0=list(lwd=2), par.dens=list(kernel="gaussian"), ylim=c(0,5.5),xlim=c(0,1.0)) #Obtain graphical plots and statistical tables. #Use the sink() to Print this output to folder to your working directory. #The sink() line can be removed if you only want to view the output on screen in R. sink(file="PSMoutput.txt") plot(m.out, type="hist") summary(m.out, interactions = FALSE, standardize = TRUE) sink() #Extract datafile (m.data) with match cases. m.data<-match.data(object=m.out, group="all", distance = "distance", weights = "weights") ####Examine the matched dataset t.test(m.data$GPA~m.data$Group) describeBy(m.data$GPA, m.data$Group)