##Propensity Score Matching Example

#The data were simulated using the correlation matrix (Sigma) below
#This will require that you install the "MASS" Package in R
install.packages("MASS")
Library(MASS)

#Set seed to ensure you get the same result each time.
set.seed(10001)

#100 cases were simluated.
simdata <- as.data.frame(mvrnorm(100, mu = c(.5,18,2.5,5,3.0), 
                                 Sigma = matrix(c(1,0.2,0.4,0.6,0.5,
                                                  0.2, 1,0.7,0.2,0.8,
                                                  0.4,0.7,1,0.5,0.4,
                                                  0.6,0.2,0.5,1,0.25,
                                                  0.5,0.8,0.4,0.25,1), ncol = 5),
                                 empirical = TRUE))

#Add column Names to the simulated data
colnames(simdata) <- c("Group","ACT", "HSGPA", "Absence", "GPA")

#Recode Group variable into binary grouping variable.
simdata$Group[simdata$Group< 0.5] <- 0
simdata$Group[simdata$Group>=0.5] <- 1

#You can save this data as a .csv file, if desired.
write.table (simdata, file = "SampleData.csv", sep =",", col.names =,)

#Null model - no covariates
#t-test of GPA by group
t.test(simdata$GPA~simdata$Group)
describeBy(simdata$GPA, simdata$Group)

#load "MatchIt" program for propenisty score matching.
#This program may need to be installed.
install.packages("MatchIt")
library(MatchIt)

#Estimate propensity scores and conduct nearest neighbor matching.
m.out <- matchit(Group ~ ACT + HSGPA + Absence , data = simdata, distance = "logit", 
                 method = "nearest", caliper=.25, replace = TRUE)

#The nonrandom package can be used to obtain an distribution of propensity scores
#However, histograms can also be obtained using the syntax listed later (plot(m.out, type="hist"))
library(nonrandom)
ps <- pscore(data = simdata, Group ~ ACT + HSGPA + Absence)
plot.pscore(x    = ps,
            main = "PS distribution",
            xlab = "",
            par.1=list(col="red"),
            par.0=list(lwd=2),
            par.dens=list(kernel="gaussian"), ylim=c(0,5.5),xlim=c(0,1.0))


#Obtain graphical plots and statistical tables.  
#Use the sink() to Print this output to folder to your working directory.
#The sink() line can be removed if you only want to view the output on screen in R.
sink(file="PSMoutput.txt") 
plot(m.out, type="hist")
summary(m.out, interactions = FALSE, standardize = TRUE)
sink()

#Extract datafile (m.data) with match cases.
m.data<-match.data(object=m.out, group="all", distance = "distance", weights = "weights")

####Examine the matched dataset 
t.test(m.data$GPA~m.data$Group)
describeBy(m.data$GPA, m.data$Group)