From 68184a10e2d53ae84ec20dbb2a8c0bfe81bff437 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 17 Aug 2015 15:54:17 -0400
Subject: [PATCH 001/212] Add comments while walking through code.

---
 R/PSM_correlogram.R  | 135 +++++++++++++++++++++++--------------------
 R/PSMdistDecay.R     |  40 ++++++-------
 R/SpatialCausalPSM.R | 100 ++++++++++++++++----------------
 R/timeRangeTrend.R   |  67 +++++++++++++--------
 4 files changed, 187 insertions(+), 155 deletions(-)

diff --git a/R/PSM_correlogram.R b/R/PSM_correlogram.R
index 2e44f33..f4f9107 100644
--- a/R/PSM_correlogram.R
+++ b/R/PSM_correlogram.R
@@ -1,66 +1,77 @@
-#forked from SPDEP (sp.correlogram)
+# forked from SPDEP (sp.correlogram)
+PSM_correlogram <- function (dta, var, order = 1, style = "W", randomisation = TRUE, zero.policy = NULL, spChk = NULL, start, end) {
 
-PSM_correlogram <- function (dta, var, order = 1, style = "W", 
-          randomisation = TRUE, zero.policy = NULL, spChk = NULL, start,end) 
-{
-  stopifnot(is.vector(var))
-  if (any(is.na(var))) 
-    stop("no NAs permitted in variable")
-  if (is.null(zero.policy)) 
-    zero.policy <- get("zeroPolicy", envir = .spdepOptions)
-  stopifnot(is.logical(zero.policy))
-  if (is.null(spChk)) 
-    spChk <- get.spChkOption()
-  if (order < 1) 
-    stop("order less than 1")
-  
-  #nblags <- nblag(neighbours, maxlag = order)
-  #r.nb <- dnearneigh(as.matrix(coordinates(dta_prj)),d1=c1,d2=c2)
-  nblags <- vector(mode = "list", length = order)
-  end = end * 1000
-  start = start * 1000
-  rng_increment = (end-start) / order
-  cur_step = start + rng_increment
-  cur_start = start
-  nblags[[1]] <- dnearneigh(dta,d1=cur_start,d2=cur_step)
-  binname <- list()
-  binname <- rbind(binname,(cur_start/1000))
-  for(L in 2:order)
-  {
-    cur_start = cur_step
-    cur_step = cur_step + rng_increment
-    nblags[[L]] <- dnearneigh(dta,d1=cur_start,d2=cur_step)
-    binname <- rbind(binname,(cur_start/1000))
-  }
-  cardnos <- vector(mode = "list", length = order)
-  for (i in 1:order) cardnos[[i]] <- table(card(nblags[[i]]))
-  nobs <- sapply(cardnos, function(x) sum(x[names(x) > "0"]))
-  #if (any(nobs < 3)) 
-  #  stop("sp.correlogram: too few included observations in higher lags:\n\treduce order.")
-    res <- matrix(NA, nrow = 0, ncol = 3)
-    cnt = 0
+    stopifnot(is.vector(var))
+
+    if (any(is.na(var))) 
+        stop("no NAs permitted in variable")
+
+    if (is.null(zero.policy)) 
+        zero.policy <- get("zeroPolicy", envir = .spdepOptions)
+
+    stopifnot(is.logical(zero.policy))
+
+    if (is.null(spChk)) 
+        spChk <- get.spChkOption()
+
+    if (order < 1) 
+        stop("order less than 1")
+    
+    #nblags <- nblag(neighbours, maxlag = order)
+    #r.nb <- dnearneigh(as.matrix(coordinates(dta_prj)),d1=c1,d2=c2)
+
+    end = end * 1000
+    start = start * 1000
+    rng_increment = (end-start) / order
+    nblags <- vector(mode = "list", length = order)
+    binname <- list()
+
+    cur_step = start + rng_increment
+    cur_start = start
+    nblags[[1]] <- dnearneigh(dta, d1=cur_start, d2=cur_step)
+    binname <- rbind(binname, (cur_start/1000))
+
+    for (L in 2:order) {
+        cur_start = cur_step
+        cur_step = cur_step + rng_increment
+        nblags[[L]] <- dnearneigh(dta, d1=cur_start, d2=cur_step)
+        binname <- rbind(binname, (cur_start/1000))
+    }
+
+    cardnos <- vector(mode = "list", length = order)
+    for (i in 1:order) {
+        cardnos[[i]] <- table(card(nblags[[i]]))
+    }
+
+    nobs <- sapply(cardnos, function(x) sum(x[names(x) > "0"]))
+
+    #if (any(nobs < 3)) 
+    #  stop("sp.correlogram: too few included observations in higher lags:\n\treduce order.")
+        res <- matrix(NA, nrow = 0, ncol = 3)
+        cnt = 0
+        for (i in 1:order) {
+            if (nobs[[i]] == 0) {
+                cardnos <- cardnos[-i]
+                cnt = cnt + 1
+                warn_str = paste("Bin h=",i,"was empty, and is excluded from the plot.")
+                warning(warn_str)
+            } else {
+                listw <- nb2listw(nblags[[i]], style = style, zero.policy = zero.policy)
+                res <- rbind(res, moran.test(var, listw, randomisation = randomisation, zero.policy = zero.policy)$estimate)
+                cur_rw = i - cnt
+                rownames(res)[cur_rw] <- binname[[i]]
+            }
+        }
+        order = order - cnt
+        #rownames(res) <- 1:order
+
+    cardnos <- vector(mode = "list", length = order)
     for (i in 1:order) {
-      if(nobs[[i]] == 0)
-      {
-        cardnos <- cardnos[-i]
-        cnt = cnt + 1
-        warn_str = paste("Bin h=",i,"was empty, and is excluded from the plot.")
-        warning(warn_str)
-      } else {
-      listw <- nb2listw(nblags[[i]], style = style, zero.policy = zero.policy)
-      res <- rbind(res,moran.test(var, listw, randomisation = randomisation, 
-                               zero.policy = zero.policy)$estimate)
-      cur_rw = i - cnt
-      rownames(res)[cur_rw] <- binname[[i]]
-      }
+        cardnos[[i]] <- table(card(nblags[[i]])) 
     }
-    order = order - cnt
-    #rownames(res) <- 1:order
-  cardnos <- vector(mode = "list", length = order)
-  for (i in 1:order) cardnos[[i]] <- table(card(nblags[[i]])) 
-  obj <- list(res = res, method = "I", cardnos = cardnos, 
-              var = deparse(substitute(var)))
-  class(obj) <- "spcor"
-  print(obj)
-  obj
+
+    obj <- list(res = res, method = "I", cardnos = cardnos, var = deparse(substitute(var)))
+    class(obj) <- "spcor"
+    print(obj)
+    obj
 }
\ No newline at end of file
diff --git a/R/PSMdistDecay.R b/R/PSMdistDecay.R
index 20ae298..77ce202 100644
--- a/R/PSMdistDecay.R
+++ b/R/PSMdistDecay.R
@@ -1,22 +1,22 @@
-#PSM distance decay examination - should we enforce a threshold for matches or not?
-#This is a wrapper for a heavily modified  sp.correlogram from SPDEP
-#This new function (PSM_correlogram) allows for the specification of distance bands
-#Neighbors within each band are then tested for Moran's I correlation.
-PSMdistDecay = function(dta,psm_col,start,end,h)
-{
-  #Produce a corellogram using Moran's I at varying resolutions
-  #First, convert to an equal-distance projection
-  
-  #Need to update this so it handles projections correctly every time.
-  #Currently hacked together.
-  
-  dta_prj_coords <- project(as.matrix(coordinates(dta)),"+proj=laea") 
-  dta_prj <- as(dta,"data.frame")
-  coordinates(dta_prj) <- dta_prj_coords
-  
-  exec <- paste("PSM_correlogram(as.matrix(coordinates(dta_prj)),dta_prj$",psm_col,",order=",h,",zero.policy=TRUE,start=",start,",end=",end,")",sep="")
-  sp.cor <- eval(parse(text=exec))
-  plot(sp.cor)
-  return(sp.cor)
+# PSM distance decay examination - should we enforce a threshold for matches or not?
+# This is a wrapper for a heavily modified  sp.correlogram from SPDEP
+# This new function (PSM_correlogram) allows for the specification of distance bands
+# Neighbors within each band are then tested for Moran's I correlation.
+PSMdistDecay = function(dta, psm_col, start, end, h) {
+    # Produce a corellogram using Moran's I at varying resolutions
+    
+    # First, convert to an equal-distance projection
+    # Need to update this so it handles projections correctly every time.
+    # Currently hacked together.
+    dta_prj_coords <- project(as.matrix(coordinates(dta)),"+proj=laea") 
+    dta_prj <- as(dta,"data.frame")
+    coordinates(dta_prj) <- dta_prj_coords
+
+    # run PSM_correlogram and return results 
+    exec <- paste("PSM_correlogram(as.matrix(coordinates(dta_prj)),dta_prj$",psm_col,",order=",h,",zero.policy=TRUE,start=",start,",end=",end,")", sep="")
+    sp.cor <- eval(parse(text=exec))
+    plot(sp.cor)
+
+    return(sp.cor)
 }
 
diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index 6af506a..5284989 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -1,56 +1,56 @@
-SpatialCausalPSM <- function(dta, mtd,mdl,drop, visual)
-{
-  #Initialization
-  pltObjs <- list()
-  
-  #Method
-  if(mtd == "logit")
-  {
-    PSMfit <- glm(mdl, dta@data, family="binomial")
-    retData <- dta
-    retData$PSM_trtProb <- predict(PSMfit, dta@data, type="response")
-  }
-  if(mtd=="lm")
-  {
-    PSMfit <- lm(mdl, dta@data)
+SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {}
+    # Initialization
+    pltObjs <- list()
+    
+    # generate model based on method
+    if (mtd == "logit") {
+        # generalized linear model
+        PSMfit <- glm(mdl, dta@data, family="binomial")
+    else if (mtd == "lm") {
+        # linear model
+        PSMfit <- lm(mdl, dta@data)
+    }
+
+    # copy data 
     retData <- dta
-    retData$PSM_trtProb <- predict(PSMfit, dta@data, type="response")
-  }
-  
-  if(visual == "TRUE")
-  {
-    #Show user distributions.
-    pltObjs[[1]] <- GroupCompHist(retData, "PSM_trtProb","Initial PSM Balance",simple_out=FALSE)
-    print(summary(PSMfit))
-  }
 
-  
-  #Second, if a drop parameter - if set to "support", remove observations
-  #that don't overlap in the PSM distribution.
-  if(drop == "support")
-  {
+    # predict values based on model
+    retData$PSM_trtProb <- predict(PSMfit, dta@data, type="response")
     
-    #Drop
-    treated <- retData@data[retData@data$TrtBin == 1,]
-    untreated <- retData@data[retData@data$TrtBin == 0,]
-    min_cut <- max(min(treated$PSM_trtProb), min(untreated$PSM_trtProb))
-    max_cut <- min(max(treated$PSM_trtProb), max(untreated$PSM_trtProb))
     
-    retData <- retData[retData@data$PSM_trtProb >= min_cut,]    
-    retData <- retData[retData@data$PSM_trtProb <= max_cut,] 
+    if (visual == "TRUE") {
+        # Show user distributions.
+        pltObjs[[1]] <- GroupCompHist(retData, "PSM_trtProb", "Initial PSM Balance", simple_out=FALSE)
+        print(summary(PSMfit))
+    }
 
-  }
-  if(visual == "TRUE")
-  {
-    #Post drop histograms
-    pltObjs[[2]] <- GroupCompHist(retData, "PSM_trtProb","Post-Extrapolation Drops (if enabled)",simple_out=FALSE)
-    
-    #Output graphics
-    grid.arrange(pltObjs[[1]], pltObjs[[2]],ncol=2,main="PSM Matching Stage 1 (Dropping Observations Requiring Extrapolation)")
-    
-  }
-  retEle <- 0
-  retEle$data <- retData
-  retEle$model <- PSMfit
-  return (retEle)
+    # Second, if a drop parameter - if set to "support", remove observations
+    # that don't overlap in the PSM distribution.
+    if (drop == "support") {
+        
+        # Drop
+        treated <- retData@data[retData@data$TrtBin == 1,]
+        untreated <- retData@data[retData@data$TrtBin == 0,]
+        min_cut <- max(min(treated$PSM_trtProb), min(untreated$PSM_trtProb))
+        max_cut <- min(max(treated$PSM_trtProb), max(untreated$PSM_trtProb))
+        
+        retData <- retData[retData@data$PSM_trtProb >= min_cut,]    
+        retData <- retData[retData@data$PSM_trtProb <= max_cut,] 
+
+    }
+
+    if (visual == "TRUE") {
+        # Post drop histograms
+        pltObjs[[2]] <- GroupCompHist(retData, "PSM_trtProb", "Post-Extrapolation Drops (if enabled)", simple_out=FALSE)
+        
+        # Output graphics
+        grid.arrange(pltObjs[[1]], pltObjs[[2]], ncol=2, main="PSM Matching Stage 1 (Dropping Observations Requiring Extrapolation)")
+        
+    }
+
+    # return original and predicted data along with model
+    retEle <- 0
+    retEle$data <- retData
+    retEle$model <- PSMfit
+    return (retEle)
 }
diff --git a/R/timeRangeTrend.R b/R/timeRangeTrend.R
index 33c6e87..85f4afa 100644
--- a/R/timeRangeTrend.R
+++ b/R/timeRangeTrend.R
@@ -1,23 +1,44 @@
-timeRangeTrend <- function(dta,prefix,startyr,endyr,IDfield)
-{
-  grep_str = paste(IDfield,prefix,sep="|")
-  tDF <- dta@data[grepl(grep_str,names(dta@data))]
-  analysisDF <- melt(tDF,id=c(IDfield))
-  
-  #cleaned GREP
-  new_pre <- gsub("[0-9]","",prefix,fixed=TRUE)
-  analysisDF["Year"] <- lapply(analysisDF["variable"],FUN=function(x) as.numeric(gsub(new_pre,"",x)))
-  analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
-  analysisDF <- analysisDF[analysisDF["Year"] <= endyr ,]
-  dta@data["newfieldID"] <- 0
-  for (i in 1:length(dta))
-  {
-    ID <- as.character(dta@data[IDfield][i,])
-    #Fit trend model
-    ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
-    trend_mod <- lm(value ~ Year,data=ID_dat)
-    dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
-  }
-  return(dta$newfieldID)
-  
-}
\ No newline at end of file
+# run linear model on data within year range as specified
+# by field prefix and return coefficients
+timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield) {
+
+    # create new dataframe from all columns in dta dataframe that 
+    # are either the ID or a year which is indicated by the prefix
+    grep_str = paste(IDfield, prefix, sep="|")
+    tDF <- dta@data[grepl(grep_str, names(dta@data))]
+
+    # melt all years columns in new dataframe
+    analysisDF <- melt(tDF, id=c(IDfield))
+    
+    # cleaned GREP - remove year digit placeholders
+    new_pre <- gsub("[0-9]", "", prefix, fixed=TRUE)
+
+    # generate new year field by removing prefix from variable (original column names)
+    analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
+
+    # keep years in range specified
+    analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
+    analysisDF <- analysisDF[analysisDF["Year"] <= endyr ,]
+
+    # create empty field
+    dta@data["newfieldID"] <- 0
+
+    # iterate over original dataframe
+    for (i in 1:length(dta)) {
+        # get id for row (in original data)
+        ID <- as.character(dta@data[IDfield][i,])
+
+        # get all data corresponding to id from analysis dataframe
+        ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
+
+        # fit trend model
+        trend_mod <- lm(value ~ Year, data=ID_dat)
+
+        # add trend coefficients to new field
+        dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
+    }
+    
+    # return new field with trend coefficients
+    return(dta$newfieldID)
+    
+}

From d3b84711139d1c430f67c64cd8bffe2c810b1005 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 09:44:52 -0400
Subject: [PATCH 002/212] Clean BuildTimeSeries.R

---
 DESCRIPTION         |   4 +-
 R/BuildTimeSeries.R | 246 +++++++++++++++++++++-----------------------
 2 files changed, 121 insertions(+), 129 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9f1486f..8d8fcf7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,4 +1,4 @@
-Package: SCI
+Package: SCI2
 Title: Spatial Cauasal Inference
 Version: 0.0.0.1
 Authors@R: person("Dan", "Runfola", , "drunfola@aiddata.org", role = c("aut", "cre"))
@@ -6,4 +6,4 @@ Description: An alpha release of a package designed to make impact evaluation th
 Depends: R (>= 3.0.2), sp, maptools, reshape, ggplot2, grid, gridExtra, spdep,proj4, FNN, psych
 License: Creative Commons with Attribution
 LazyData: true
-Imports:sp, maptools, reshape, ggplot2, grid, gridExtra, spdep,proj4, FNN, psych
\ No newline at end of file
+Imports:sp, maptools, reshape, ggplot2, grid, gridExtra, spdep,proj4, FNN, psych
diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index a470ad8..59b4190 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -1,147 +1,139 @@
-BuildTimeSeries <- function(dta,idField,varList_pre,startYear,endYear,colYears=NULL,interpYears=NULL)
-{
-  years <- startYear:endYear
-  #If there is a "colYears" variable, convert to binaries.
-  #Eventually could be extended to more than one column.
-  if(!is.null(colYears))
-  {
-    #For each variable, for each year, create a binary representing the treatment status.
-    for(k in 1:length(years))
-    {
-      for(j in 1:length(colYears))
-      {
-      varN <- paste("TrtMnt_",colYears[j],years[k],sep="")
-      exec <- paste("dta$",varN,"=0",sep="")
-      eval(parse(text=exec))
+BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colYears=NULL, interpYears=NULL) {
+    
+    years <- startYear:endYear
 
-      dta@data[varN][dta@data[colYears[j]] <= as.numeric(years[k])] <- 1
-      }
-    }
-  }
-  for (j in 1:length(colYears))
-  {
-  trt_id = paste("TrtMnt_",colYears[j],sep="")
-  interpYears <- c(interpYears,trt_id)  
-  }
-  print(interpYears)
-  #If there is an "interpVars" variable, linearly interpolate values based on at least 2 known points in time.
-  if(!is.null(interpYears))
-  {
-    for(AncInt in 1:length(interpYears))
-      {
-        cur_ancVi <- interpYears[AncInt]
-        interpFrame <- dta@data[idField]
-        interpFrame[idField] <- dta@data[idField]
-        cnt = 2
-        for(k in 1:length(years))
-        {
-        #First, build a model describing the relationship between years and any data in the interp field.
-        varI <- paste(cur_ancVi,years[[k]],sep="")
-        #Check if data exists for the year - if not, ignore.  If so, include in the new modeling frame.
-          if(varI %in% colnames(dta@data))
-          {
-            add_data <- paste("interpFrame[cnt] <- dta@data$",varI)
-            eval(parse(text=add_data))
-            colnames(interpFrame)[cnt] <- years[[k]]
-            cnt = cnt + 1
-          } else
-          {
-            #Exception for a single-point interpolation
-            varC <- paste(cur_ancVi,sep="")
-            if(varC %in% colnames(dta@data))
-            {
-              add_data <- paste("interpFrame[cnt] <- dta@data$",varC)
-              eval(parse(text=add_data))
-              cnt = 3
+    # If there is a "colYears" variable, convert to binaries.
+    # Eventually could be extended to more than one column.
+    if (!is.null(colYears)) {
+        # For each variable, for each year, create a binary representing the treatment status.
+        for (k in 1:length(years)) {
+            for (j in 1:length(colYears)) {
+                varN <- paste("TrtMnt_",colYears[j],years[k],sep="")
+                exec <- paste("dta$",varN,"=0",sep="")
+                eval(parse(text=exec))
+
+                dta@data[varN][dta@data[colYears[j]] <= as.numeric(years[k])] <- 1
             }
-          }
-        
         }
-        #Only one time point, so no interpolation is done - value is simply copied to all other columns.
-        if(cnt == 3)
-        {
-          for(k in 1:length(years))
-          {
-            varI <- paste("dta@data$",cur_ancVi,years[[k]]," <- interpFrame[2]",sep="")
-            eval(parse(text=varI))
-          }
-        } else {
-        tDframe <- dta@data[idField]
-        #Here, we model out everything. 
-        #Melt the dataframe for modeling
-        melt_Model_dta <- melt(data.frame(interpFrame),id=idField)
-        melt_Model_dta["variable"] <- as.numeric(gsub("X","",melt_Model_dta$variable))
-                                                 
-        #Fit the model for interpolation
-        execstr <- paste("mdl <- lm(value ~ variable + factor(",idField,"),data=melt_Model_dta)",sep="")
-        eval(parse(text=execstr))
-        #Apply the model to interpolate
-        for(u in 1:length(years))
-          {
-          varI <- paste(cur_ancVi,years[[u]],sep="")
-          if(!(varI %in% colnames(dta@data)))
-            {
-            #Variable doesn't exist, so we need to interpolate.
-            tDframe[idField] <- dta@data[idField]
-            tDframe["variable"] <- years[[u]]
-            dta@data[varI] <- predict(mdl,newdata=tDframe)
+    }
 
+    for (j in 1:length(colYears)) {
+        trt_id = paste("TrtMnt_",colYears[j],sep="")
+        interpYears <- c(interpYears,trt_id)  
+    }
+
+    print(interpYears)
+    
+    # If there is an "interpVars" variable, linearly interpolate values based on at least 2 known points in time.
+    if (!is.null(interpYears)) {
+        for (AncInt in 1:length(interpYears)) {
+            cur_ancVi <- interpYears[AncInt]
+            interpFrame <- dta@data[idField]
+            interpFrame[idField] <- dta@data[idField]
+            cnt = 2
+            for (k in 1:length(years)) {
+                # First, build a model describing the relationship between years and any data in the interp field.
+                varI <- paste(cur_ancVi,years[[k]],sep="")
+                # Check if data exists for the year - if not, ignore.  If so, include in the new modeling frame.
+                if (varI %in% colnames(dta@data)) {
+                    add_data <- paste("interpFrame[cnt] <- dta@data$",varI)
+                    eval(parse(text=add_data))
+                    colnames(interpFrame)[cnt] <- years[[k]]
+                    cnt = cnt + 1
+                } else {
+                    # Exception for a single-point interpolation
+                    varC <- paste(cur_ancVi,sep="")
+                    if (varC %in% colnames(dta@data)) {
+                        add_data <- paste("interpFrame[cnt] <- dta@data$",varC)
+                        eval(parse(text=add_data))
+                        cnt = 3
+                    }
+                }
+        
+            }
+
+            # Only one time point, so no interpolation is done - value is simply copied to all other columns.
+            if (cnt == 3) {
+
+                for (k in 1:length(years)) {
+                    varI <- paste("dta@data$",cur_ancVi,years[[k]]," <- interpFrame[2]",sep="")
+                    eval(parse(text=varI))
+                }
+
+            } else {
+                tDframe <- dta@data[idField]
+                
+                # Here, we model out everything. 
+                # Melt the dataframe for modeling
+                melt_Model_dta <- melt(data.frame(interpFrame),id=idField)
+                melt_Model_dta["variable"] <- as.numeric(gsub("X","",melt_Model_dta$variable))
+                                                         
+                # Fit the model for interpolation
+                execstr <- paste("mdl <- lm(value ~ variable + factor(",idField,"),data=melt_Model_dta)",sep="")
+                eval(parse(text=execstr))
+
+                # Apply the model to interpolate
+                for (u in 1:length(years)) {
+                    varI <- paste(cur_ancVi,years[[u]],sep="")
+                    if (!(varI %in% colnames(dta@data))) {
+                        # Variable doesn't exist, so we need to interpolate.
+                        tDframe[idField] <- dta@data[idField]
+                        tDframe["variable"] <- years[[u]]
+                        dta@data[varI] <- predict(mdl,newdata=tDframe)
+
+                    }
+                }     
             }
-          }     
-        }
       
-      }
-    
-    #Append interpolated fields to our melting lists
-    for(v in 1:length(interpYears))
-      {
-      varList_pre[[length(varList_pre)+1]] <- interpYears[v]
-      }
+        }
+        
+        # Append interpolated fields to our melting lists
+        for (v in 1:length(interpYears)) {
+            varList_pre[[length(varList_pre)+1]] <- interpYears[v]
+        }
     
-  }
+    }
   
   
-  #Run the melts
+    # Run the melts
+    meltList <- list()
+    for (i in 1:length(varList_pre)) {
 
-  meltList <- list()
-  for (i in 1:length(varList_pre))
-  {
-    #grep_str = paste(idField,"|",varList_pre[i],"[0-9][0-9][0-9][0-9]",sep="")
-    #Limit to only relevant years
-    grepStrYrs = idField
-    for(j in 1:length(years))
-    {
-      tempGrep <- grepStrYrs
-      grepStrYrs <- paste(tempGrep,"|",varList_pre[[i]],years[[j]],sep="")
-    }
-    tDF <- dta@data[grepl(grepStrYrs,names(dta@data))]
-    meltList[[i]] <- melt(tDF,id=idField)
-    
-    #Keep only years in the year column, rename columns
-    colnames(meltList[[i]])[2] <- "Year"
-    colnames(meltList[[i]])[3] <- varList_pre[[i]]
-    
-    #Clean up year column
-    gsub_command <- paste("^",varList_pre[[i]],sep="")
-    meltList[[i]][2] <- gsub(gsub_command, "", as.matrix(meltList[[i]][2]))
-    
+        # grep_str = paste(idField,"|",varList_pre[i],"[0-9][0-9][0-9][0-9]",sep="")
+        # Limit to only relevant years
+        grepStrYrs = idField
+        for (j in 1:length(years)) {
+            tempGrep <- grepStrYrs
+            grepStrYrs <- paste(tempGrep,"|",varList_pre[[i]],years[[j]],sep="")
+        }
+
+        tDF <- dta@data[grepl(grepStrYrs,names(dta@data))]
+        meltList[[i]] <- melt(tDF,id=idField)
+        
+        # Keep only years in the year column, rename columns
+        colnames(meltList[[i]])[2] <- "Year"
+        colnames(meltList[[i]])[3] <- varList_pre[[i]]
+        
+        # Clean up year column
+        gsub_command <- paste("^",varList_pre[[i]],sep="")
+        meltList[[i]][2] <- gsub(gsub_command, "", as.matrix(meltList[[i]][2]))
+        
     
-    #Remove ID and year if this is at least the second variable to avoid duplications.
-    if(i > 1)
-    {
-      meltList[[i]] <- meltList[[i]][3]
-    }
+        # Remove ID and year if this is at least the second variable to avoid duplications.
+        if (i > 1) {
+            meltList[[i]] <- meltList[[i]][3]
+        }
 
-  }
+    }
 
-  #Finish up with a cherry on top
-  meltListRet <- data.frame(meltList)
+    # Finish up with a cherry on top
+    meltListRet <- data.frame(meltList)
   
-  return(meltListRet)
+    return(meltListRet)
 }
 
 
 # dm1 <- melt(d[,c("Type","I.alt","idx06","idx07","idx08")], id=c("Type","I.alt"))
 # dm2 <- melt(d[,c("Type","I.alt","farve1","farve2")], id=c("Type","I.alt"))
 # colnames(dm2) <- c("Type", "I.alt", "variable2", "value2")
-# dm <- merge(dm1, dm2)
\ No newline at end of file
+# dm <- merge(dm1, dm2)

From c8bb7ad729f0a93fc0d425c599f0c223f7488b83 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 09:47:39 -0400
Subject: [PATCH 003/212] Fix bug.

---
 R/SpatialCausalPSM.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index 5284989..a693a36 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -6,7 +6,7 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {}
     if (mtd == "logit") {
         # generalized linear model
         PSMfit <- glm(mdl, dta@data, family="binomial")
-    else if (mtd == "lm") {
+    } else if (mtd == "lm") {
         # linear model
         PSMfit <- lm(mdl, dta@data)
     }

From 4f7240fb06ec519877ed88c5eaf9449c1a9238d1 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 09:48:30 -0400
Subject: [PATCH 004/212] Fix bug.

---
 R/SpatialCausalPSM.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index a693a36..4a284c4 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -1,4 +1,4 @@
-SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {}
+SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
     # Initialization
     pltObjs <- list()
     

From cd1193a8a22ad008969b211526c78e2cd54238f7 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 10:06:01 -0400
Subject: [PATCH 005/212] Remove some eval statments from buildtimeseries func.

---
 R/BuildTimeSeries.R | 78 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 67 insertions(+), 11 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 59b4190..8c8dda2 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -2,27 +2,54 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     
     years <- startYear:endYear
 
+
+
+    print("bts1")
+    timer <- proc.time()
+
     # If there is a "colYears" variable, convert to binaries.
     # Eventually could be extended to more than one column.
     if (!is.null(colYears)) {
         # For each variable, for each year, create a binary representing the treatment status.
         for (k in 1:length(years)) {
             for (j in 1:length(colYears)) {
-                varN <- paste("TrtMnt_",colYears[j],years[k],sep="")
-                exec <- paste("dta$",varN,"=0",sep="")
-                eval(parse(text=exec))
+
+                varN <- paste("TrtMnt_", colYears[j], years[k], sep="")
+
+
+                # exec <- paste("dta$",varN,"=0",sep="")
+                # eval(parse(text=exec))
+
+                dta[,varN] = 0
+
 
                 dta@data[varN][dta@data[colYears[j]] <= as.numeric(years[k])] <- 1
             }
         }
     }
 
+    timer <- proc.time() - timer
+    print(paste("section completed in", timer[3], "seconds."))
+
+
+
+    print("bts2")
+    timer <- proc.time()
+
     for (j in 1:length(colYears)) {
         trt_id = paste("TrtMnt_",colYears[j],sep="")
         interpYears <- c(interpYears,trt_id)  
     }
 
     print(interpYears)
+
+    timer <- proc.time() - timer
+    print(paste("section completed in", timer[3], "seconds."))
+
+
+
+    print("bts3")
+    timer <- proc.time()
     
     # If there is an "interpVars" variable, linearly interpolate values based on at least 2 known points in time.
     if (!is.null(interpYears)) {
@@ -36,16 +63,26 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 varI <- paste(cur_ancVi,years[[k]],sep="")
                 # Check if data exists for the year - if not, ignore.  If so, include in the new modeling frame.
                 if (varI %in% colnames(dta@data)) {
-                    add_data <- paste("interpFrame[cnt] <- dta@data$",varI)
-                    eval(parse(text=add_data))
+
+
+                    # add_data <- paste("interpFrame[cnt] <- dta@data$",varI)
+                    # eval(parse(text=add_data))
+                    interpFrame[cnt] <- dta@data[,varI]
+
+
                     colnames(interpFrame)[cnt] <- years[[k]]
                     cnt = cnt + 1
                 } else {
                     # Exception for a single-point interpolation
                     varC <- paste(cur_ancVi,sep="")
                     if (varC %in% colnames(dta@data)) {
-                        add_data <- paste("interpFrame[cnt] <- dta@data$",varC)
-                        eval(parse(text=add_data))
+
+
+                        # add_data <- paste("interpFrame[cnt] <- dta@data$",varC)
+                        # eval(parse(text=add_data))
+                        interpFrame[cnt] <- dta@data[,varC]
+
+
                         cnt = 3
                     }
                 }
@@ -56,22 +93,28 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             if (cnt == 3) {
 
                 for (k in 1:length(years)) {
-                    varI <- paste("dta@data$",cur_ancVi,years[[k]]," <- interpFrame[2]",sep="")
-                    eval(parse(text=varI))
+
+                    # varI <- paste("dta@data$",cur_ancVi,years[[k]]," <- interpFrame[2]",sep="")
+                    # eval(parse(text=varI))
+                    dta@data[,cur_ancVi,years[[k]]] <- interpFrame[2]
                 }
 
             } else {
                 tDframe <- dta@data[idField]
-                
+
                 # Here, we model out everything. 
                 # Melt the dataframe for modeling
                 melt_Model_dta <- melt(data.frame(interpFrame),id=idField)
                 melt_Model_dta["variable"] <- as.numeric(gsub("X","",melt_Model_dta$variable))
                                                          
                 # Fit the model for interpolation
+                
                 execstr <- paste("mdl <- lm(value ~ variable + factor(",idField,"),data=melt_Model_dta)",sep="")
                 eval(parse(text=execstr))
 
+                # mdl <- lm(value ~ variable + factor(idField), data=melt_Model_dta)
+
+
                 # Apply the model to interpolate
                 for (u in 1:length(years)) {
                     varI <- paste(cur_ancVi,years[[u]],sep="")
@@ -94,7 +137,14 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     
     }
   
-  
+    timer <- proc.time() - timer
+    print(paste("section completed in", timer[3], "seconds."))
+
+
+
+    print("bts4")
+    timer <- proc.time()
+
     # Run the melts
     meltList <- list()
     for (i in 1:length(varList_pre)) {
@@ -126,6 +176,12 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
     }
 
+
+    timer <- proc.time() - timer
+    print(paste("section completed in", timer[3], "seconds."))
+
+
+
     # Finish up with a cherry on top
     meltListRet <- data.frame(meltList)
   

From a2a378c874044b77705197bd33bfb537d237a570 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 10:36:20 -0400
Subject: [PATCH 006/212] Testing.

---
 R/BuildTimeSeries.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 8c8dda2..34286b8 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -16,11 +16,11 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
                 varN <- paste("TrtMnt_", colYears[j], years[k], sep="")
 
+                print(varN)
+                exec <- paste("dta$",varN,"=0",sep="")
+                eval(parse(text=exec))
 
-                # exec <- paste("dta$",varN,"=0",sep="")
-                # eval(parse(text=exec))
-
-                dta[,varN] = 0
+                # dta[,varN] = 0
 
 
                 dta@data[varN][dta@data[colYears[j]] <= as.numeric(years[k])] <- 1

From 6f3ffbee01f95b713ce067768ffa2f84726beb1b Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 10:56:30 -0400
Subject: [PATCH 007/212] Testing.

---
 R/BuildTimeSeries.R |   1 +
 R/SAT.R             | 375 +++++++++++++++++++++-----------------------
 2 files changed, 180 insertions(+), 196 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 34286b8..a91cd2e 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -61,6 +61,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             for (k in 1:length(years)) {
                 # First, build a model describing the relationship between years and any data in the interp field.
                 varI <- paste(cur_ancVi,years[[k]],sep="")
+                print(varI)
                 # Check if data exists for the year - if not, ignore.  If so, include in the new modeling frame.
                 if (varI %in% colnames(dta@data)) {
 
diff --git a/R/SAT.R b/R/SAT.R
index f630f35..fb96d11 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -1,227 +1,210 @@
-SAT <- function(dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinColName)
-{
-  #Initialization
-  pltObjs <- list()
-  init_dta <- dta
+SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinColName) {
+    #Initialization
+    pltObjs <- list()
+    init_dta <- dta
   
-  drop_unmatched = drop_opts["drop_unmatched"]
-  drop_method = drop_opts["drop_method"]
-  drop_thresh = as.numeric(drop_opts["drop_thresh"])
+    drop_unmatched = drop_opts["drop_unmatched"]
+    drop_method = drop_opts["drop_method"]
+    drop_thresh = as.numeric(drop_opts["drop_thresh"])
   
-  if(!is.null(constraints))
-  {
-    for(cst in 1:length(names(constraints)))
-    {
-    if(names(constraints)[cst] == "groups")
-      {
-      exec_stmnt = paste("dta$ConstraintGroupSet_Opt <- dta$",constraints["groups"],sep="")
-      eval(parse(text=exec_stmnt))
-      } else
-      {
+    if (!is.null(constraints)) {
+        for (cst in 1:length(names(constraints))) {
+            if(names(constraints)[cst] == "groups") {
+                exec_stmnt = paste("dta$ConstraintGroupSet_Opt <- dta$",constraints["groups"],sep="")
+                eval(parse(text=exec_stmnt))
+            } else {
+                dta$ConstraintGroupSet_Opt <- 1
+            }
+            if (names(constraints)[cst] == "distance") {
+            dist_PSM = as.numeric(constraints["distance"][[1]])
+            } else {
+                dist_PSM=NULL
+            }
+        }
+    } else {
         dta$ConstraintGroupSet_Opt <- 1
-      }
-    if(names(constraints)[cst] == "distance")
-      {
-      dist_PSM = as.numeric(constraints["distance"][[1]])
-      } else
-      {
-      dist_PSM=NULL
-      }
+        #max the distance threshold by taking the diagonal of the bounding box.
+        dist_PSM = NULL
     }
-  } else {
-    dta$ConstraintGroupSet_Opt <- 1
-    #max the distance threshold by taking the diagonal of the bounding box.
-    dist_PSM = NULL
-  }
   
-  #Caclulate the number of groups to constrain by, if any.
-  group_constraints <- unique(dta$ConstraintGroupSet_Opt)
+    #Caclulate the number of groups to constrain by, if any.
+    group_constraints <- unique(dta$ConstraintGroupSet_Opt)
   
-  #Make sure there are both treatment and control groups of an adequate size (>= 1 of each)
-  t_dta <- list()
-  u_dta <-list()
-  grp_list <- list()
-  cnt = 0
-  for (grp in 1:length(group_constraints))
-  {
-    cur_grp <- as.matrix(group_constraints)[grp]
-    grp_index = length(grp_list)+1
-    t_index = length(t_dta)+1
-    grp_list[[grp_index]] <- as.matrix(group_constraints)[grp]
-    t_dta[[t_index]] <- dta[dta$TrtBin == 1,]
-    u_dta[[t_index]] <- dta[dta$TrtBin == 0,]
-    treatment_count <- cur_grp %in% t_dta[[t_index]]$ConstraintGroupSet_Opt
-    untreated_count <- cur_grp %in% u_dta[[t_index]]$ConstraintGroupSet_Opt
-    if((untreated_count == FALSE) || (treatment_count == FALSE))
-    {
-      dta <- dta[!dta$ConstraintGroupSet_Opt == cur_grp,]
-      t_dta[[t_index]] <- NULL
-      u_dta[[t_index]] <- NULL
-      grp_list[[t_index]] <- NULL
-      war_statement = paste("Dropped group due to a lack of both treatment and control observation: '",cur_grp,"'",sep="")
-      warning(war_statement)
-    } else {
-      
-      t_dta[[t_index]] <- t_dta[[t_index]][t_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
-      u_dta[[t_index]] <- u_dta[[t_index]][u_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
+    #Make sure there are both treatment and control groups of an adequate size (>= 1 of each)
+    t_dta <- list()
+    u_dta <-list()
+    grp_list <- list()
+    cnt = 0
+    for (grp in 1:length(group_constraints)) {
+        cur_grp <- as.matrix(group_constraints)[grp]
+        grp_index = length(grp_list)+1
+        t_index = length(t_dta)+1
+        grp_list[[grp_index]] <- as.matrix(group_constraints)[grp]
+        t_dta[[t_index]] <- dta[dta$TrtBin == 1,]
+        u_dta[[t_index]] <- dta[dta$TrtBin == 0,]
+        treatment_count <- cur_grp %in% t_dta[[t_index]]$ConstraintGroupSet_Opt
+        untreated_count <- cur_grp %in% u_dta[[t_index]]$ConstraintGroupSet_Opt
+        if ((untreated_count == FALSE) || (treatment_count == FALSE)) {
+            dta <- dta[!dta$ConstraintGroupSet_Opt == cur_grp,]
+            t_dta[[t_index]] <- NULL
+            u_dta[[t_index]] <- NULL
+            grp_list[[t_index]] <- NULL
+            war_statement = paste("Dropped group due to a lack of both treatment and control observation: '",cur_grp,"'",sep="")
+            warning(war_statement)
+        } else { 
+            t_dta[[t_index]] <- t_dta[[t_index]][t_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
+            u_dta[[t_index]] <- u_dta[[t_index]][u_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
         
-      cnt = cnt + 1
+            cnt = cnt + 1
+        }
     }
-  }
-  temp_dta <- list()
-for(i in 1:cnt)
-  {
-  cur_grp <- grp_list[[i]]
-  it_dta <- maptools::spRbind(t_dta[[i]],u_dta[[i]])
+
+    temp_dta <- list()
+
+    for (i in 1:cnt) {
+        cur_grp <- grp_list[[i]]
+        it_dta <- maptools::spRbind(t_dta[[i]],u_dta[[i]])
 
   
-  if (mtd == "fastNN")
-    {
-      temp_dta[[i]] <- fastNN_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
-    }
+        if (mtd == "fastNN") {
+            temp_dta[[i]] <- fastNN_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+        }
   
-  if (mtd == "NN_WithReplacement")
-    {
-      print("NN with replacement is currently not available, please choose fastNN")
-     # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+        if (mtd == "NN_WithReplacement") {
+            print("NN with replacement is currently not available, please choose fastNN")
+            # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+        }
+    }
+
+    #Build the final datasets from subsets
+    if (cnt > 1) {
+      dta <- temp_dta[[1]]
+      for(k in 2:cnt) {
+        dta  <- maptools::spRbind(dta, temp_dta[[k]])
+      } 
+    } else {
+      dta <- temp_dta[[1]]
     }
-  }
-
-#Build the final datasets from subsets
-if(cnt > 1)
-{
-  dta <- temp_dta[[1]]
-  for(k in 2:cnt)
-  {
-  dta  <- maptools::spRbind(dta, temp_dta[[k]])
-  } 
-} else {
-  dta <- temp_dta[[1]]
-}
 
 
-  if (drop_unmatched == TRUE)
-  {
-    dta <- dta[dta@data$PSM_match_ID != -999,]    
-  }
+    if (drop_unmatched == TRUE) {
+        dta <- dta[dta@data$PSM_match_ID != -999,]    
+    }
   
-  anc_v_int <- strsplit(psm_eq, "~")[[1]][2]
-  anc_vars <- strsplit(gsub(" ","",anc_v_int), "+", fixed=TRUE)
-  anc_vars <- c(anc_vars[[1]], "PSM_trtProb")
+    anc_v_int <- strsplit(psm_eq, "~")[[1]][2]
+    anc_vars <- strsplit(gsub(" ","",anc_v_int), "+", fixed=TRUE)
+    anc_vars <- c(anc_vars[[1]], "PSM_trtProb")
   
-  #Drop observations according to the selected method
-  if(drop_method == "SD")
-  {
-    #Method to drop pairs that are greater than a set threshold apart in terms of PSM Standard Deviations.
-    psm_sd_thresh = sd(dta$PSM_trtProb) * drop_thresh
-    if(visual == "TRUE")
-    {
-      print(psm_sd_thresh)
+    #Drop observations according to the selected method
+    if (drop_method == "SD") {
+        #Method to drop pairs that are greater than a set threshold apart in terms of PSM Standard Deviations.
+        psm_sd_thresh = sd(dta$PSM_trtProb) * drop_thresh
+        if (visual == "TRUE") {
+            print(psm_sd_thresh)
+        }
+        dta <- dta[dta@data$PSM_distance < psm_sd_thresh,]
     }
-    dta <- dta[dta@data$PSM_distance < psm_sd_thresh,]
-  }
   
-  #Plot the pre and post-dropping balance for PSM model...
-  #Balance metrics are based on "Misunderstandings between experimentalists and
-  #observationalists about causal inference", Imal, King, and Stuart.
-  #Simplest suggestion of comparing means and checking if .25 SD apart used.
-  cnt = 0
-  for (i in 1:length(anc_vars))
-  {
-    #gsub to remove any factors()
-    ed_v = sub("factor\\(","",anc_vars[i])
-    ed_v = sub(")","",ed_v)
-    treat_mean_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[2]][[3]],5)")
-    treat_SD_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[2]][[4]],5)")
-    
-    control_mean_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[1]][[3]],5)")
-    control_SD_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[1]][[4]],5)")
-    
-    treat_mean_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[2]][[3]],5)")
-    treat_SD_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[2]][[4]],5)")
-    
-    control_mean_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[1]][[3]],5)")
-    control_SD_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[1]][[4]],5)")
-   
-    c_type = eval(parse(text=paste("class(init_dta@data$",ed_v,")")))
+    #Plot the pre and post-dropping balance for PSM model...
+    #Balance metrics are based on "Misunderstandings between experimentalists and
+    #observationalists about causal inference", Imal, King, and Stuart.
+    #Simplest suggestion of comparing means and checking if .25 SD apart used.
+    cnt = 0
     
-    if(c_type == "matrix")
-    {
-      exec_str = paste("dta@data$",ed_v,"<- as.numeric(dta@data$",ed_v,")",sep="")
-      eval(parse(text=exec_str))
-      
-      exec_str = paste("init_dta@data$",ed_v,"<- as.numeric(init_dta@data$",ed_v,")",sep="")
-      eval(parse(text=exec_str))
-      c_type = "numeric"
-    }
-    if((c_type == "numeric") & (visual == "TRUE"))
-    {
-      cnt = cnt + 1
-      pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(init_dta, anc_vars[i],"Pre-Balancing: ",simple_out = FALSE)
-      pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(dta, anc_vars[i],"Post-Balancing: ",simple_out = FALSE)  
-
-      treat_mean_pre <- eval(parse(text=treat_mean_pre))
-      treat_SD_pre <- eval(parse(text=treat_SD_pre))
-      control_mean_pre <- eval(parse(text=control_mean_pre))
-      control_SD_pre <- eval(parse(text=control_SD_pre))
-      
-      treat_mean_post <- eval(parse(text=treat_mean_post))
-      treat_SD_post <- eval(parse(text=treat_SD_post))
-      control_mean_post <- eval(parse(text=control_mean_post))
-      control_SD_post <- eval(parse(text=control_SD_post))
-      
-      it_diff_Mean_pre <- round(abs( treat_mean_pre-control_mean_pre ),5)
-      it_diff_Mean_post <- round(abs(treat_mean_post-control_mean_post),5)
-      
-      if(!exists("bRes"))
-      {
-        bRes <- data.frame(treat_mean_pre,treat_SD_pre,control_mean_pre,control_SD_pre,
-                           treat_mean_post,treat_SD_post,control_mean_post,control_SD_post,
-                           it_diff_Mean_pre,it_diff_Mean_post)
-        colnames(bRes)[1] <- "Pre-Balance Treated Mean"
-        colnames(bRes)[2] <- "Pre-Balance Treated SD"
-        colnames(bRes)[3] <- "Pre-Balance Control Mean"
-        colnames(bRes)[4] <- "Pre-Balance Control SD"
+    for (i in 1:length(anc_vars)) {
+        #gsub to remove any factors()
+        ed_v = sub("factor\\(","",anc_vars[i])
+        ed_v = sub(")","",ed_v)
+        treat_mean_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[2]][[3]],5)")
+        treat_SD_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[2]][[4]],5)")
         
-        colnames(bRes)[5] <- "Post-Balance Treated Mean"
-        colnames(bRes)[6] <- "Post-Balance Treated SD"
-        colnames(bRes)[7] <- "Post-Balance Control Mean"
-        colnames(bRes)[8] <- "Post-Balance Control SD"
+        control_mean_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[1]][[3]],5)")
+        control_SD_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[1]][[4]],5)")
         
-        colnames(bRes)[9] <- "Mean Difference Pre-Balance"
-        colnames(bRes)[10] <- "Mean Difference Post-Balance"
-      }else{
-        bRes <- rbind(bRes, c(treat_mean_pre,treat_SD_pre,control_mean_pre,control_SD_pre,
+        treat_mean_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[2]][[3]],5)")
+        treat_SD_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[2]][[4]],5)")
+        
+        control_mean_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[1]][[3]],5)")
+        control_SD_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[1]][[4]],5)")
+       
+        # c_type = eval(parse(text=paste("class(init_dta@data$",ed_v,")")))
+        c_type = class(init_dta@data[,ed_v])
+
+        if (c_type == "matrix") {
+
+            # exec_str = paste("dta@data$",ed_v,"<- as.numeric(dta@data$",ed_v,")",sep="")
+            # eval(parse(text=exec_str))
+            dta@data[,ed_v] <- as.numeric(dta@data[,ed_v]
+
+            # exec_str = paste("init_dta@data$",ed_v,"<- as.numeric(init_dta@data$",ed_v,")",sep="")
+            # eval(parse(text=exec_str))
+            init_dta@data[,ed_v] <- as.numeric(init_dta@data[,ed_v]
+
+            c_type = "numeric"
+        }
+
+        if ((c_type == "numeric") & (visual == "TRUE")) {
+            cnt = cnt + 1
+            pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(init_dta, anc_vars[i],"Pre-Balancing: ",simple_out = FALSE)
+            pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(dta, anc_vars[i],"Post-Balancing: ",simple_out = FALSE)  
+
+            treat_mean_pre <- eval(parse(text=treat_mean_pre))
+            treat_SD_pre <- eval(parse(text=treat_SD_pre))
+            control_mean_pre <- eval(parse(text=control_mean_pre))
+            control_SD_pre <- eval(parse(text=control_SD_pre))
+
+            treat_mean_post <- eval(parse(text=treat_mean_post))
+            treat_SD_post <- eval(parse(text=treat_SD_post))
+            control_mean_post <- eval(parse(text=control_mean_post))
+            control_SD_post <- eval(parse(text=control_SD_post))
+
+            it_diff_Mean_pre <- round(abs( treat_mean_pre-control_mean_pre ),5)
+            it_diff_Mean_post <- round(abs(treat_mean_post-control_mean_post),5)
+
+            if(!exists("bRes")) {
+                bRes <- data.frame(treat_mean_pre,treat_SD_pre,control_mean_pre,control_SD_pre,
+                           treat_mean_post,treat_SD_post,control_mean_post,control_SD_post,
+                           it_diff_Mean_pre,it_diff_Mean_post)
+                colnames(bRes)[1] <- "Pre-Balance Treated Mean"
+                colnames(bRes)[2] <- "Pre-Balance Treated SD"
+                colnames(bRes)[3] <- "Pre-Balance Control Mean"
+                colnames(bRes)[4] <- "Pre-Balance Control SD"
+
+                colnames(bRes)[5] <- "Post-Balance Treated Mean"
+                colnames(bRes)[6] <- "Post-Balance Treated SD"
+                colnames(bRes)[7] <- "Post-Balance Control Mean"
+                colnames(bRes)[8] <- "Post-Balance Control SD"
+
+                colnames(bRes)[9] <- "Mean Difference Pre-Balance"
+                colnames(bRes)[10] <- "Mean Difference Post-Balance"
+            } else {
+                bRes <- rbind(bRes, c(treat_mean_pre,treat_SD_pre,control_mean_pre,control_SD_pre,
                               treat_mean_post,treat_SD_post,control_mean_post,control_SD_post,
                               it_diff_Mean_pre,it_diff_Mean_post))
-      }
+            }
       
-  
-      rownames(bRes)[i-(i-cnt)] <- gsub("[^a-zA-Z0-9]","",ed_v)
+            rownames(bRes)[i-(i-cnt)] <- gsub("[^a-zA-Z0-9]","",ed_v)
+        }
     }
-  }
   
-  if(visual=="TRUE")
-  {
-    #Output graphics
-    #Remove the factor rows
-    nrow_c <- length(pltObjs)
-    counter <- 1
-    while(counter <= nrow_c)
-    {
-      d = counter + 3
-      if(d > nrow_c)
-      {
-        d = nrow_c
-      }
-      do.call(grid.arrange,c(pltObjs[counter:d],nrow=2,ncol=2))
-      counter = counter + 4
+    if (visual=="TRUE") {
+        #Output graphics
+        #Remove the factor rows
+        nrow_c <- length(pltObjs)
+        counter <- 1
+        while(counter <= nrow_c) {
+            d = counter + 3
+            if(d > nrow_c) {
+                d = nrow_c
+            }
+            do.call(grid.arrange,c(pltObjs[counter:d],nrow=2,ncol=2))
+            counter = counter + 4
+        }
+        #bTab <- stargazer(bRes,summary=FALSE,type="html")
+        #print.htmlTable(bTab)
     }
-    #bTab <- stargazer(bRes,summary=FALSE,type="html")
-    #print.htmlTable(bTab)
-  }
   
 
-  return (dta)
+    return (dta)
 }
 

From 88581428fb3ae98e3533eba1a1add3b5bdea4725 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 10:57:55 -0400
Subject: [PATCH 008/212] Testing.

---
 R/SAT.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/SAT.R b/R/SAT.R
index fb96d11..59b0798 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -138,7 +138,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
             # exec_str = paste("init_dta@data$",ed_v,"<- as.numeric(init_dta@data$",ed_v,")",sep="")
             # eval(parse(text=exec_str))
-            init_dta@data[,ed_v] <- as.numeric(init_dta@data[,ed_v]
+            init_dta@data[,ed_v] <- as.numeric(init_dta@data[,ed_v])
 
             c_type = "numeric"
         }

From f9e28e25807fe94b0c40f0faf72bc39ae13c8006 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 10:59:21 -0400
Subject: [PATCH 009/212] Testing.

---
 R/SAT.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/SAT.R b/R/SAT.R
index 59b0798..7569e1c 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -138,7 +138,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
             # exec_str = paste("init_dta@data$",ed_v,"<- as.numeric(init_dta@data$",ed_v,")",sep="")
             # eval(parse(text=exec_str))
-            init_dta@data[,ed_v] <- as.numeric(init_dta@data[,ed_v])
+            init_dta@data[[ed_v]] <- as.numeric(init_dta@data[[ed_v])
 
             c_type = "numeric"
         }

From 00f9515bf431d400d7047a3ab9362e145e4f36e6 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 10:59:50 -0400
Subject: [PATCH 010/212] Testing.

---
 R/SAT.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 7569e1c..fc793e7 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -136,9 +136,9 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             # eval(parse(text=exec_str))
             dta@data[,ed_v] <- as.numeric(dta@data[,ed_v]
 
-            # exec_str = paste("init_dta@data$",ed_v,"<- as.numeric(init_dta@data$",ed_v,")",sep="")
-            # eval(parse(text=exec_str))
-            init_dta@data[[ed_v]] <- as.numeric(init_dta@data[[ed_v])
+            exec_str = paste("init_dta@data$",ed_v,"<- as.numeric(init_dta@data$",ed_v,")",sep="")
+            eval(parse(text=exec_str))
+            # init_dta@data[[ed_v]] <- as.numeric(init_dta@data[[ed_v])
 
             c_type = "numeric"
         }

From 02b3a6eac70ed238c5c1883a6b97d3bee7ef7c8c Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 11:00:19 -0400
Subject: [PATCH 011/212] Testing.

---
 R/SAT.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index fc793e7..5917a9b 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -134,11 +134,11 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
             # exec_str = paste("dta@data$",ed_v,"<- as.numeric(dta@data$",ed_v,")",sep="")
             # eval(parse(text=exec_str))
-            dta@data[,ed_v] <- as.numeric(dta@data[,ed_v]
+            dta@data[,ed_v] <- as.numeric(dta@data[,ed_v])
 
-            exec_str = paste("init_dta@data$",ed_v,"<- as.numeric(init_dta@data$",ed_v,")",sep="")
-            eval(parse(text=exec_str))
-            # init_dta@data[[ed_v]] <- as.numeric(init_dta@data[[ed_v])
+            # exec_str = paste("init_dta@data$",ed_v,"<- as.numeric(init_dta@data$",ed_v,")",sep="")
+            # eval(parse(text=exec_str))
+            init_dta@data[[ed_v]] <- as.numeric(init_dta@data[[ed_v])
 
             c_type = "numeric"
         }

From 849e133068a9d6ff1b75a11c9def0bc00b00999c Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 11:00:45 -0400
Subject: [PATCH 012/212] Testing.

---
 R/SAT.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/SAT.R b/R/SAT.R
index 5917a9b..906ced7 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -138,7 +138,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
             # exec_str = paste("init_dta@data$",ed_v,"<- as.numeric(init_dta@data$",ed_v,")",sep="")
             # eval(parse(text=exec_str))
-            init_dta@data[[ed_v]] <- as.numeric(init_dta@data[[ed_v])
+            init_dta@data[[ed_v]] <- as.numeric(init_dta@data[[ed_v]])
 
             c_type = "numeric"
         }

From fca8884cb771ccbf16958cca73dedaca969913ab Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 11:02:42 -0400
Subject: [PATCH 013/212] Get rid of eval statements in SAT.R.

---
 R/SAT.R | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 906ced7..bc1b3b4 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -10,13 +10,15 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     if (!is.null(constraints)) {
         for (cst in 1:length(names(constraints))) {
             if(names(constraints)[cst] == "groups") {
-                exec_stmnt = paste("dta$ConstraintGroupSet_Opt <- dta$",constraints["groups"],sep="")
-                eval(parse(text=exec_stmnt))
+                # exec_stmnt = paste("dta$ConstraintGroupSet_Opt <- dta$",constraints["groups"],sep="")
+                # eval(parse(text=exec_stmnt))
+                dta[,ConstraintGroupSet_Opt] <- dta[,constraints["groups"]]
+
             } else {
                 dta$ConstraintGroupSet_Opt <- 1
             }
             if (names(constraints)[cst] == "distance") {
-            dist_PSM = as.numeric(constraints["distance"][[1]])
+                dist_PSM = as.numeric(constraints["distance"][[1]])
             } else {
                 dist_PSM=NULL
             }
@@ -138,7 +140,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
             # exec_str = paste("init_dta@data$",ed_v,"<- as.numeric(init_dta@data$",ed_v,")",sep="")
             # eval(parse(text=exec_str))
-            init_dta@data[[ed_v]] <- as.numeric(init_dta@data[[ed_v]])
+            init_dta@data[,ed_v] <- as.numeric(init_dta@data[,ed_v])
 
             c_type = "numeric"
         }

From 604dc4318d6ab5318b0134b340f1b5edd5d67f8b Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 11:04:26 -0400
Subject: [PATCH 014/212] Fix bug.

---
 R/SAT.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/SAT.R b/R/SAT.R
index bc1b3b4..e40ae58 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -12,7 +12,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             if(names(constraints)[cst] == "groups") {
                 # exec_stmnt = paste("dta$ConstraintGroupSet_Opt <- dta$",constraints["groups"],sep="")
                 # eval(parse(text=exec_stmnt))
-                dta[,ConstraintGroupSet_Opt] <- dta[,constraints["groups"]]
+                dta[,"ConstraintGroupSet_Opt"] <- dta[,constraints["groups"]]
 
             } else {
                 dta$ConstraintGroupSet_Opt <- 1

From 2c4bb58714ac94da1646966a2b5819c632af6801 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 11:06:58 -0400
Subject: [PATCH 015/212] Testing.

---
 R/SAT.R | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/R/SAT.R b/R/SAT.R
index e40ae58..4f6f178 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -7,6 +7,8 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     drop_method = drop_opts["drop_method"]
     drop_thresh = as.numeric(drop_opts["drop_thresh"])
   
+    print("sat1")
+
     if (!is.null(constraints)) {
         for (cst in 1:length(names(constraints))) {
             if(names(constraints)[cst] == "groups") {
@@ -28,7 +30,9 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         #max the distance threshold by taking the diagonal of the bounding box.
         dist_PSM = NULL
     }
-  
+
+    print("sat2")
+
     #Caclulate the number of groups to constrain by, if any.
     group_constraints <- unique(dta$ConstraintGroupSet_Opt)
   
@@ -60,6 +64,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             cnt = cnt + 1
         }
     }
+    print("sat3")
 
     temp_dta <- list()
 
@@ -77,6 +82,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
         }
     }
+    print("sat4")
 
     #Build the final datasets from subsets
     if (cnt > 1) {
@@ -88,6 +94,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
       dta <- temp_dta[[1]]
     }
 
+    print("sat5")
 
     if (drop_unmatched == TRUE) {
         dta <- dta[dta@data$PSM_match_ID != -999,]    
@@ -96,6 +103,8 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     anc_v_int <- strsplit(psm_eq, "~")[[1]][2]
     anc_vars <- strsplit(gsub(" ","",anc_v_int), "+", fixed=TRUE)
     anc_vars <- c(anc_vars[[1]], "PSM_trtProb")
+    
+    print("sat6")
   
     #Drop observations according to the selected method
     if (drop_method == "SD") {
@@ -112,6 +121,8 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     #observationalists about causal inference", Imal, King, and Stuart.
     #Simplest suggestion of comparing means and checking if .25 SD apart used.
     cnt = 0
+
+    print("sat7")
     
     for (i in 1:length(anc_vars)) {
         #gsub to remove any factors()
@@ -188,6 +199,8 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             rownames(bRes)[i-(i-cnt)] <- gsub("[^a-zA-Z0-9]","",ed_v)
         }
     }
+    
+    print("sat8")
   
     if (visual=="TRUE") {
         #Output graphics

From 977e68e5fb0aea82da43b0cd3f86d4311464fc67 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 11:09:47 -0400
Subject: [PATCH 016/212] Testing.

---
 R/SAT.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 4f6f178..791ebb5 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -11,13 +11,13 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
     if (!is.null(constraints)) {
         for (cst in 1:length(names(constraints))) {
-            if(names(constraints)[cst] == "groups") {
+            if (names(constraints)[cst] == "groups") {
                 # exec_stmnt = paste("dta$ConstraintGroupSet_Opt <- dta$",constraints["groups"],sep="")
                 # eval(parse(text=exec_stmnt))
-                dta[,"ConstraintGroupSet_Opt"] <- dta[,constraints["groups"]]
+                dta[["ConstraintGroupSet_Opt"]] <- dta[[constraints["groups"]]]
 
             } else {
-                dta$ConstraintGroupSet_Opt <- 1
+                dta[["ConstraintGroupSet_Opt"]] <- 1
             }
             if (names(constraints)[cst] == "distance") {
                 dist_PSM = as.numeric(constraints["distance"][[1]])
@@ -26,7 +26,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             }
         }
     } else {
-        dta$ConstraintGroupSet_Opt <- 1
+        dta[["ConstraintGroupSet_Opt"]] <- 1
         #max the distance threshold by taking the diagonal of the bounding box.
         dist_PSM = NULL
     }
@@ -34,7 +34,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     print("sat2")
 
     #Caclulate the number of groups to constrain by, if any.
-    group_constraints <- unique(dta$ConstraintGroupSet_Opt)
+    group_constraints <- unique(dta[["ConstraintGroupSet_Opt"]])
   
     #Make sure there are both treatment and control groups of an adequate size (>= 1 of each)
     t_dta <- list()
@@ -199,7 +199,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             rownames(bRes)[i-(i-cnt)] <- gsub("[^a-zA-Z0-9]","",ed_v)
         }
     }
-    
+
     print("sat8")
   
     if (visual=="TRUE") {

From c5fe38f75a6db91f1509ee48a9b5414cb62ae58b Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 11:17:35 -0400
Subject: [PATCH 017/212] Testing.

---
 R/SAT.R | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 791ebb5..13d0093 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -70,9 +70,11 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
     for (i in 1:cnt) {
         cur_grp <- grp_list[[i]]
+
+        print("sat3.1")
         it_dta <- maptools::spRbind(t_dta[[i]],u_dta[[i]])
 
-  
+        print("sat3.2")
         if (mtd == "fastNN") {
             temp_dta[[i]] <- fastNN_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
         }
@@ -143,19 +145,21 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         # c_type = eval(parse(text=paste("class(init_dta@data$",ed_v,")")))
         c_type = class(init_dta@data[,ed_v])
 
+        print("sat7.1")
         if (c_type == "matrix") {
 
             # exec_str = paste("dta@data$",ed_v,"<- as.numeric(dta@data$",ed_v,")",sep="")
             # eval(parse(text=exec_str))
-            dta@data[,ed_v] <- as.numeric(dta@data[,ed_v])
+            dta@data[[ed_v]] <- as.numeric(dta@data[[ed_v]])
 
             # exec_str = paste("init_dta@data$",ed_v,"<- as.numeric(init_dta@data$",ed_v,")",sep="")
             # eval(parse(text=exec_str))
-            init_dta@data[,ed_v] <- as.numeric(init_dta@data[,ed_v])
+            init_dta@data[[ed_v]] <- as.numeric(init_dta@data[[ed_v]])
 
             c_type = "numeric"
         }
 
+        print("sat7.2")
         if ((c_type == "numeric") & (visual == "TRUE")) {
             cnt = cnt + 1
             pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(init_dta, anc_vars[i],"Pre-Balancing: ",simple_out = FALSE)
@@ -174,7 +178,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             it_diff_Mean_pre <- round(abs( treat_mean_pre-control_mean_pre ),5)
             it_diff_Mean_post <- round(abs(treat_mean_post-control_mean_post),5)
 
-            if(!exists("bRes")) {
+            if (!exists("bRes")) {
                 bRes <- data.frame(treat_mean_pre,treat_SD_pre,control_mean_pre,control_SD_pre,
                            treat_mean_post,treat_SD_post,control_mean_post,control_SD_post,
                            it_diff_Mean_pre,it_diff_Mean_post)
@@ -207,9 +211,9 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         #Remove the factor rows
         nrow_c <- length(pltObjs)
         counter <- 1
-        while(counter <= nrow_c) {
+        while (counter <= nrow_c) {
             d = counter + 3
-            if(d > nrow_c) {
+            if (d > nrow_c) {
                 d = nrow_c
             }
             do.call(grid.arrange,c(pltObjs[counter:d],nrow=2,ncol=2))

From 8726c83f311dde696dd9b7451284e9faedfeff62 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 11:32:29 -0400
Subject: [PATCH 018/212] Testing.

---
 R/SAT.R | 78 ++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 29 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 13d0093..1aa2f11 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -46,20 +46,20 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         grp_index = length(grp_list)+1
         t_index = length(t_dta)+1
         grp_list[[grp_index]] <- as.matrix(group_constraints)[grp]
-        t_dta[[t_index]] <- dta[dta$TrtBin == 1,]
-        u_dta[[t_index]] <- dta[dta$TrtBin == 0,]
-        treatment_count <- cur_grp %in% t_dta[[t_index]]$ConstraintGroupSet_Opt
-        untreated_count <- cur_grp %in% u_dta[[t_index]]$ConstraintGroupSet_Opt
+        t_dta[[t_index]] <- dta[dta[["TrtBin"]] == 1,]
+        u_dta[[t_index]] <- dta[dta[["TrtBin"]] == 0,]
+        treatment_count <- cur_grp %in% t_dta[[t_index]][["ConstraintGroupSet_Opt"]]
+        untreated_count <- cur_grp %in% u_dta[[t_index]][["ConstraintGroupSet_Opt"]]
         if ((untreated_count == FALSE) || (treatment_count == FALSE)) {
-            dta <- dta[!dta$ConstraintGroupSet_Opt == cur_grp,]
+            dta <- dta[!dta[["ConstraintGroupSet_Opt"]] == cur_grp,]
             t_dta[[t_index]] <- NULL
             u_dta[[t_index]] <- NULL
             grp_list[[t_index]] <- NULL
             war_statement = paste("Dropped group due to a lack of both treatment and control observation: '",cur_grp,"'",sep="")
             warning(war_statement)
         } else { 
-            t_dta[[t_index]] <- t_dta[[t_index]][t_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
-            u_dta[[t_index]] <- u_dta[[t_index]][u_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
+            t_dta[[t_index]] <- t_dta[[t_index]][t_dta[[t_index]][["ConstraintGroupSet_Opt"]] == cur_grp,]
+            u_dta[[t_index]] <- u_dta[[t_index]][u_dta[[t_index]][["ConstraintGroupSet_Opt"]] == cur_grp,]
         
             cnt = cnt + 1
         }
@@ -76,9 +76,10 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
         print("sat3.2")
         if (mtd == "fastNN") {
+            # this is the slow part of functions
             temp_dta[[i]] <- fastNN_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
         }
-  
+
         if (mtd == "NN_WithReplacement") {
             print("NN with replacement is currently not available, please choose fastNN")
             # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
@@ -99,7 +100,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     print("sat5")
 
     if (drop_unmatched == TRUE) {
-        dta <- dta[dta@data$PSM_match_ID != -999,]    
+        dta <- dta[dta@data[["PSM_match_ID"]] != -999,]    
     }
   
     anc_v_int <- strsplit(psm_eq, "~")[[1]][2]
@@ -111,11 +112,11 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     #Drop observations according to the selected method
     if (drop_method == "SD") {
         #Method to drop pairs that are greater than a set threshold apart in terms of PSM Standard Deviations.
-        psm_sd_thresh = sd(dta$PSM_trtProb) * drop_thresh
+        psm_sd_thresh = sd(dta[["PSM_trtProb"]]) * drop_thresh
         if (visual == "TRUE") {
             print(psm_sd_thresh)
         }
-        dta <- dta[dta@data$PSM_distance < psm_sd_thresh,]
+        dta <- dta[dta@data[["PSM_distance"]] < psm_sd_thresh,]
     }
   
     #Plot the pre and post-dropping balance for PSM model...
@@ -127,23 +128,27 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     print("sat7")
     
     for (i in 1:length(anc_vars)) {
+
+        print("sat7.0")
+
         #gsub to remove any factors()
         ed_v = sub("factor\\(","",anc_vars[i])
         ed_v = sub(")","",ed_v)
-        treat_mean_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[2]][[3]],5)")
-        treat_SD_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[2]][[4]],5)")
+
+        # treat_mean_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[2]][[3]],5)")
+        # treat_SD_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[2]][[4]],5)")
         
-        control_mean_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[1]][[3]],5)")
-        control_SD_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[1]][[4]],5)")
+        # control_mean_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[1]][[3]],5)")
+        # control_SD_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[1]][[4]],5)")
         
-        treat_mean_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[2]][[3]],5)")
-        treat_SD_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[2]][[4]],5)")
+        # treat_mean_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[2]][[3]],5)")
+        # treat_SD_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[2]][[4]],5)")
         
-        control_mean_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[1]][[3]],5)")
-        control_SD_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[1]][[4]],5)")
+        # control_mean_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[1]][[3]],5)")
+        # control_SD_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[1]][[4]],5)")
        
         # c_type = eval(parse(text=paste("class(init_dta@data$",ed_v,")")))
-        c_type = class(init_dta@data[,ed_v])
+        c_type = class(init_dta@data[[ed_v]])
 
         print("sat7.1")
         if (c_type == "matrix") {
@@ -165,23 +170,37 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(init_dta, anc_vars[i],"Pre-Balancing: ",simple_out = FALSE)
             pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(dta, anc_vars[i],"Post-Balancing: ",simple_out = FALSE)  
 
-            treat_mean_pre <- eval(parse(text=treat_mean_pre))
-            treat_SD_pre <- eval(parse(text=treat_SD_pre))
-            control_mean_pre <- eval(parse(text=control_mean_pre))
-            control_SD_pre <- eval(parse(text=control_SD_pre))
+            # treat_mean_pre <- eval(parse(text=treat_mean_pre))
+            # treat_SD_pre <- eval(parse(text=treat_SD_pre))
+            # control_mean_pre <- eval(parse(text=control_mean_pre))
+            # control_SD_pre <- eval(parse(text=control_SD_pre))
+
+            # treat_mean_post <- eval(parse(text=treat_mean_post))
+            # treat_SD_post <- eval(parse(text=treat_SD_post))
+            # control_mean_post <- eval(parse(text=control_mean_post))
+            # control_SD_post <- eval(parse(text=control_SD_post))
+
+
+            treat_mean_pre <- round(describeBy(init_dta@data[[ed_v]], group=init_dta@data[[TrtBinColName]])[[2]][[3]], 5)
+            treat_SD_pre <- round(describeBy(init_dta@data[[ed_v]], group=init_dta@data[[TrtBinColName]])[[2]][[4]], 5)
+            control_mean_pre <- round(describeBy(init_dta@data[[ed_v]], group=init_dta@data[[TrtBinColName]])[[1]][[3]], 5)
+            control_SD_pre <- round(describeBy(init_dta@data[[ed_v]], group=init_dta@data[[TrtBinColName]])[[1]][[4]], 5)
+
+            treat_mean_post <- round(describeBy(dta@data[[ed_v]], group=dta@data[[TrtBinColName]])[[2]][[3]], 5)
+            treat_SD_post <- round(describeBy(dta@data[[ed_v]], group=dta@data[[TrtBinColName]])[[2]][[4]], 5)
+            control_mean_post <- round(describeBy(dta@data[[ed_v]], group=dta@data[[TrtBinColName]])[[1]][[3]], 5)
+            control_SD_post <- round(describeBy(dta@data[[ed_v]], group=dta@data[[TrtBinColName]])[[1]][[4]], 5)
 
-            treat_mean_post <- eval(parse(text=treat_mean_post))
-            treat_SD_post <- eval(parse(text=treat_SD_post))
-            control_mean_post <- eval(parse(text=control_mean_post))
-            control_SD_post <- eval(parse(text=control_SD_post))
 
             it_diff_Mean_pre <- round(abs( treat_mean_pre-control_mean_pre ),5)
             it_diff_Mean_post <- round(abs(treat_mean_post-control_mean_post),5)
 
             if (!exists("bRes")) {
+
                 bRes <- data.frame(treat_mean_pre,treat_SD_pre,control_mean_pre,control_SD_pre,
                            treat_mean_post,treat_SD_post,control_mean_post,control_SD_post,
                            it_diff_Mean_pre,it_diff_Mean_post)
+
                 colnames(bRes)[1] <- "Pre-Balance Treated Mean"
                 colnames(bRes)[2] <- "Pre-Balance Treated SD"
                 colnames(bRes)[3] <- "Pre-Balance Control Mean"
@@ -194,13 +213,14 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
                 colnames(bRes)[9] <- "Mean Difference Pre-Balance"
                 colnames(bRes)[10] <- "Mean Difference Post-Balance"
+
             } else {
                 bRes <- rbind(bRes, c(treat_mean_pre,treat_SD_pre,control_mean_pre,control_SD_pre,
                               treat_mean_post,treat_SD_post,control_mean_post,control_SD_post,
                               it_diff_Mean_pre,it_diff_Mean_post))
             }
       
-            rownames(bRes)[i-(i-cnt)] <- gsub("[^a-zA-Z0-9]","",ed_v)
+            rownames(bRes)[i-(i-cnt)] <- gsub("[^a-zA-Z0-9]", "", ed_v)
         }
     }
 

From 0cee107d7dd53356396f7e536575ed6c592b5437 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 11:56:47 -0400
Subject: [PATCH 019/212] Remove evals from fastNN_binary_func.R.

---
 R/fastNN_binary_func.R | 232 ++++++++++++++++++++++-------------------
 1 file changed, 127 insertions(+), 105 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index a9e1419..73ec549 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -5,120 +5,142 @@
 #and then working through this list in order from highest to lowest.
 #Matches are removed each step.
 
-fastNN_binary_func <- function(dta,trtMntVar,ids,curgrp,dist_PSM)
-{
-  #Fast nearest neighbors search - will not arrive at optimum,
-  #but this may not be an issue for many analysis.
-  #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
-  sorted_dta <- dta@data[order(dta@data$PSM_trtProb),]
-  #Conduct the matching
-  
-  str_trted <- paste("treated <- sorted_dta[sorted_dta$",trtMntVar, "== 1,]",sep="")
-  str_untrted <- paste("untreated <- sorted_dta[sorted_dta$",trtMntVar,"==0,]",sep="")
-  eval(parse(text=str_trted))
-  eval(parse(text=str_untrted))
-  
-  it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
-  dta@data$match <- -999
-  dta@data$PSM_distance <- -999
-  dta@data$PSM_match_ID <- -999
-  
-  #Calculate a distance decay function
-  #to perturb pairs based on their distances.  
-  for (j in 1:it_cnt)
-  {
-    str_trted <- paste("treated <- sorted_dta[sorted_dta$",trtMntVar, "== 1,]",sep="")
-    str_untrted <- paste("untreated <- sorted_dta[sorted_dta$",trtMntVar,"==0,]",sep="")
-    eval(parse(text=str_trted))
-    eval(parse(text=str_untrted))
-    
-  
+fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
+
+    #Fast nearest neighbors search - will not arrive at optimum,
+    #but this may not be an issue for many analysis.
+    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+    sorted_dta <- dta@data[order(dta@data$PSM_trtProb),]
     
-    #Run the KNN for all neighbors. 
-    k <- get.knnx(treated$PSM_trtProb, untreated$PSM_trtProb, 1)
+
+    #Conduct the matching
+
+    # str_trted <- paste("treated <- sorted_dta[sorted_dta$",trtMntVar, "== 1,]",sep="")
+    # str_untrted <- paste("untreated <- sorted_dta[sorted_dta$",trtMntVar,"==0,]",sep="")
+    # eval(parse(text=str_trted))
+    # eval(parse(text=str_untrted))
+
+    treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
+    untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0,]
+
+
+    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+    dta@data[["match"]] <- -999
+    dta@data[["PSM_distance"]] <- -999
+    dta@data[["PSM_match_ID"]] <- -999
+
+    #Calculate a distance decay function
+    #to perturb pairs based on their distances.  
+    for (j in 1:it_cnt) {
+
+        # str_trted <- paste("treated <- sorted_dta[sorted_dta$",trtMntVar, "== 1,]",sep="")
+        # str_untrted <- paste("untreated <- sorted_dta[sorted_dta$",trtMntVar,"==0,]",sep="")
+        # eval(parse(text=str_trted))
+        # eval(parse(text=str_untrted))
     
-    #Perturb the values based on the distance decay function, if selected.
-    if(!is.null(dist_PSM))
-    {
-      for(mC in 1:length(k[[1]]))
-      {
-        #Calculate the Euclidean Distance between pairs
-        cid_txt = paste("untreated$",ids,"[",mC,"]",sep="")
-        Control_ID = toString(eval(parse(text=cid_txt)))
-        
-        mT = k$nn.index[mC]
-        
-        tid_txt = paste("treated$",ids,"[",mT,"]",sep="")
-        Treatment_ID = toString(eval(parse(text=tid_txt)))
-        
-        #Find the control x,y location
-        cCoord_e = paste("coordinates(dta[which(dta@data$",ids," == Control_ID),])", sep="")
-        cCoord = eval(parse(text=cCoord_e))
+        treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
+        untreated <- sorted_dta[sorted_dta[[trtMntVar]] ==0,]
 
-        
-        #Find the treatment x,y location
-        tCoord_e = paste("coordinates(dta[which(dta@data$",ids," == Treatment_ID),])", sep="")
-        tCoord = eval(parse(text=tCoord_e))
 
-        y_dist = abs(cCoord[1] - cCoord[2])
-        x_dist = abs(tCoord[1] - tCoord[2])
-        euc_dist = sqrt(y_dist^2 + x_dist^2)
+        #Run the KNN for all neighbors. 
+        k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
-        PSM_score = k$nn.dist[mC]
-        geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+        #Perturb the values based on the distance decay function, if selected.
+        if (!is.null(dist_PSM)) {
+            for (mC in 1:length(k[[1]])) {
+                #Calculate the Euclidean Distance between pairs
+                # cid_txt = paste("untreated$",ids,"[",mC,"]",sep="")
+                # Control_ID = toString(eval(parse(text=cid_txt)))
+                Control_ID = toString(untreated[[ids]][[mC]])
 
+                mT = k$nn.index[mC]
+                
+                # tid_txt = paste("treated$",ids,"[",mT,"]",sep="")
+                # Treatment_ID = toString(eval(parse(text=tid_txt)))
+                Treatment_ID = toString(treated[[ids]][[mT]])
 
-        
-        k$nn.dist[mC] <- ((1-geog_Weight) * PSM_score)
+                #Find the control x,y location
+                # cCoord_e = paste("coordinates(dta[which(dta@data$",ids," == Control_ID),])", sep="")
+                # cCoord = eval(parse(text=cCoord_e))
+                cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+                
+
+                #Find the treatment x,y location
+                # tCoord_e = paste("coordinates(dta[which(dta@data$",ids," == Treatment_ID),])", sep="")
+                # tCoord = eval(parse(text=tCoord_e))
+                tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+
+                y_dist = abs(cCoord[1] - cCoord[2])
+                x_dist = abs(tCoord[1] - tCoord[2])
+                euc_dist = sqrt(y_dist^2 + x_dist^2)
+                
+                PSM_score = k$nn.dist[mC]
+                geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-      }
+
+                
+                k$nn.dist[mC] <- ((1-geog_Weight) * PSM_score)
+
+            }
       
-    }
-    
-    #Add the matched treatment and control values to the recording data frame
-    #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
-    best_m_control = which(k$nn.dist %in% sort(k$nn.dist)[1])
-    
-    #This will give us the matched index in the "treated" dataset.
-    best_m_treated = k$nn.index[best_m_control]
+        }
     
-    #Control PSM ID
-    cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-    Control_ID = toString(eval(parse(text=cid_txt)))
+        #Add the matched treatment and control values to the recording data frame
+        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+        best_m_control = which(k$nn.dist %in% sort(k$nn.dist)[1])
         
-    #Treatment PSM ID
-    tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-    Treatment_ID = toString(eval(parse(text=tid_txt)))
-    
-    #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-    pair_id = paste(curgrp,j,sep="")
-    
-    #Add the Treatment ID to the Control Row 
-    tid_a_1 = paste("dta@data$match[which(dta@data$",ids," == Control_ID)] = Treatment_ID", sep="")
-    tid_a_2 = paste("dta@data$PSM_distance[which(dta@data$",ids," == Control_ID)] = k$nn.dist[,1][best_m_control]",sep="")
-    tid_a_3 = paste("dta@data$PSM_match_ID[which(dta@data$",ids," == Control_ID)] = pair_id", sep="")
-    eval(parse(text=tid_a_1))
-    eval(parse(text=tid_a_2))
-    eval(parse(text=tid_a_3))
-    
-    
-    
-    #Add the Control ID to the Treatment Row
-    cid_a_1 = paste("dta@data$match[which(dta@data$",ids," == Treatment_ID)] = Control_ID", sep="")
-    cid_a_2 = paste("dta@data$PSM_distance[which(dta@data$",ids," == Treatment_ID)] = k$nn.dist[,1][best_m_control]", sep="")
-    cid_a_3 = paste("dta@data$PSM_match_ID[which(dta@data$",ids," == Treatment_ID)] = pair_id", sep="")
-    eval(parse(text=cid_a_1))
-    eval(parse(text=cid_a_2))
-    eval(parse(text=cid_a_3))
-    
-    
-    #Drop the paired match out of the iteration matrix 
-    did_a_1 = paste("sorted_dta <- sorted_dta[sorted_dta$",ids,"!= Treatment_ID ,]",sep="")
-    did_a_2 = paste("sorted_dta <- sorted_dta[sorted_dta$",ids,"!= Control_ID ,]",sep="")
-    eval(parse(text=did_a_1))
-    eval(parse(text=did_a_2))
-    
+        #This will give us the matched index in the "treated" dataset.
+        best_m_treated = k$nn.index[best_m_control]
+        
+        #Control PSM ID
+        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        # Control_ID = toString(eval(parse(text=cid_txt)))
+        Control_ID = toString(untreated[[ids]][[best_m_control]])    
+
+        #Treatment PSM ID
+        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        # Treatment_ID = toString(eval(parse(text=tid_txt)))
+        Treatment_ID = toString(treated[[ids]][[best_m_treated]])
+
+
+        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+        pair_id = paste(curgrp,j,sep="")
+        
+
+        #Add the Treatment ID to the Control Row 
+        # tid_a_1 = paste("dta@data$match[which(dta@data$",ids," == Control_ID)] = Treatment_ID", sep="")
+        # tid_a_2 = paste("dta@data$PSM_distance[which(dta@data$",ids," == Control_ID)] = k$nn.dist[,1][best_m_control]",sep="")
+        # tid_a_3 = paste("dta@data$PSM_match_ID[which(dta@data$",ids," == Control_ID)] = pair_id", sep="")
+        # eval(parse(text=tid_a_1))
+        # eval(parse(text=tid_a_2))
+        # eval(parse(text=tid_a_3))
+        dta@data$match[which(dta@data[[ids]] == Control_ID)] = Treatment_ID
+        dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID)] = k$nn.dist[,1][best_m_control]
+        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID)] = pair_id        
+        
+        
+        #Add the Control ID to the Treatment Row
+        # cid_a_1 = paste("dta@data$match[which(dta@data$",ids," == Treatment_ID)] = Control_ID", sep="")
+        # cid_a_2 = paste("dta@data$PSM_distance[which(dta@data$",ids," == Treatment_ID)] = k$nn.dist[,1][best_m_control]", sep="")
+        # cid_a_3 = paste("dta@data$PSM_match_ID[which(dta@data$",ids," == Treatment_ID)] = pair_id", sep="")
+        # eval(parse(text=cid_a_1))
+        # eval(parse(text=cid_a_2))
+        # eval(parse(text=cid_a_3))
+        dta@data$match[which(dta@data[[ids]] == Treatment_ID)] = Control_ID
+        dta@data$PSM_distance[which(dta@data[[ids]] == Treatment_ID)] = k$nn.dist[,1][best_m_control]
+        dta@data$PSM_match_ID[which(dta@data[[ids]] == Treatment_ID)] = pair_id        
+        
+        #Drop the paired match out of the iteration matrix 
+        # did_a_1 = paste("sorted_dta <- sorted_dta[sorted_dta$",ids,"!= Treatment_ID ,]",sep="")
+        # did_a_2 = paste("sorted_dta <- sorted_dta[sorted_dta$",ids,"!= Control_ID ,]",sep="")
+        # eval(parse(text=did_a_1))
+        # eval(parse(text=did_a_2))
+        sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+        sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
-  }
-  return(dta) 
-}
\ No newline at end of file
+    }
+
+    return(dta) 
+
+}
+

From 0ca43a03773291c5aa4af3c5da77ee0ca366f4ac Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 12:08:26 -0400
Subject: [PATCH 020/212] Testing.

---
 R/BuildTimeSeries.R | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index a91cd2e..ae58a99 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -53,11 +53,16 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     
     # If there is an "interpVars" variable, linearly interpolate values based on at least 2 known points in time.
     if (!is.null(interpYears)) {
+        print("bts3.0")
         for (AncInt in 1:length(interpYears)) {
+
+            print("bts3.0.0")
             cur_ancVi <- interpYears[AncInt]
             interpFrame <- dta@data[idField]
             interpFrame[idField] <- dta@data[idField]
             cnt = 2
+
+            print("bts3.0.1")
             for (k in 1:length(years)) {
                 # First, build a model describing the relationship between years and any data in the interp field.
                 varI <- paste(cur_ancVi,years[[k]],sep="")
@@ -68,7 +73,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
                     # add_data <- paste("interpFrame[cnt] <- dta@data$",varI)
                     # eval(parse(text=add_data))
-                    interpFrame[cnt] <- dta@data[,varI]
+                    interpFrame[cnt] <- dta@data[[varI]]
 
 
                     colnames(interpFrame)[cnt] <- years[[k]]
@@ -81,7 +86,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
                         # add_data <- paste("interpFrame[cnt] <- dta@data$",varC)
                         # eval(parse(text=add_data))
-                        interpFrame[cnt] <- dta@data[,varC]
+                        interpFrame[cnt] <- dta@data[[varC]]
 
 
                         cnt = 3
@@ -90,6 +95,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         
             }
 
+            print("bts3.0.2")
             # Only one time point, so no interpolation is done - value is simply copied to all other columns.
             if (cnt == 3) {
 
@@ -97,7 +103,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
                     # varI <- paste("dta@data$",cur_ancVi,years[[k]]," <- interpFrame[2]",sep="")
                     # eval(parse(text=varI))
-                    dta@data[,cur_ancVi,years[[k]]] <- interpFrame[2]
+                    dta@data[[paste(cur_ancVi,years[[k]]], sep="")]] <- interpFrame[2]
                 }
 
             } else {
@@ -130,7 +136,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             }
       
         }
-        
+
+        print("bts3.1")        
         # Append interpolated fields to our melting lists
         for (v in 1:length(interpYears)) {
             varList_pre[[length(varList_pre)+1]] <- interpYears[v]

From 045121118a169dae91c6af9bd575b07e396ae1ea Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 12:08:56 -0400
Subject: [PATCH 021/212] Testing.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index ae58a99..d17182a 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -103,7 +103,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
                     # varI <- paste("dta@data$",cur_ancVi,years[[k]]," <- interpFrame[2]",sep="")
                     # eval(parse(text=varI))
-                    dta@data[[paste(cur_ancVi,years[[k]]], sep="")]] <- interpFrame[2]
+                    dta@data[[paste(cur_ancVi,years[[k]], sep="")]] <- interpFrame[2]
                 }
 
             } else {

From eeff4db99def66f50064372b8d30902dc899180d Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 12:37:58 -0400
Subject: [PATCH 022/212] Remove unnecessary interpolation from buildtimeseries
 func.

---
 R/BuildTimeSeries.R | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index d17182a..ea88f12 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -96,6 +96,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             }
 
             print("bts3.0.2")
+            # this is a slow part
+
             # Only one time point, so no interpolation is done - value is simply copied to all other columns.
             if (cnt == 3) {
 
@@ -106,14 +108,17 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                     dta@data[[paste(cur_ancVi,years[[k]], sep="")]] <- interpFrame[2]
                 }
 
-            } else {
+            } else if (cnt < length(years) + 2) {
+
                 tDframe <- dta@data[idField]
 
                 # Here, we model out everything. 
+
                 # Melt the dataframe for modeling
                 melt_Model_dta <- melt(data.frame(interpFrame),id=idField)
                 melt_Model_dta["variable"] <- as.numeric(gsub("X","",melt_Model_dta$variable))
-                                                         
+                
+
                 # Fit the model for interpolation
                 
                 execstr <- paste("mdl <- lm(value ~ variable + factor(",idField,"),data=melt_Model_dta)",sep="")
@@ -132,9 +137,9 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                         dta@data[varI] <- predict(mdl,newdata=tDframe)
 
                     }
-                }     
+                }
             }
-      
+
         }
 
         print("bts3.1")        

From f68e73eb6fc568bb3228d0b0335688881d3856c0 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 13:52:55 -0400
Subject: [PATCH 023/212] Update.

---
 R/BuildTimeSeries.R  | 27 +++++++++++++--------------
 R/SAT.R              |  1 +
 R/SpatialCausalPSM.R | 18 +++++++++---------
 R/loadLibs.R         |  5 ++---
 4 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index ea88f12..a9f5d6c 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -14,10 +14,10 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         for (k in 1:length(years)) {
             for (j in 1:length(colYears)) {
 
-                varN <- paste("TrtMnt_", colYears[j], years[k], sep="")
+                varN <- paste("TrtMnt_",colYears[j],years[k], sep="")
 
                 print(varN)
-                exec <- paste("dta$",varN,"=0",sep="")
+                exec <- paste("dta$",varN,"=0", sep="")
                 eval(parse(text=exec))
 
                 # dta[,varN] = 0
@@ -37,8 +37,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     timer <- proc.time()
 
     for (j in 1:length(colYears)) {
-        trt_id = paste("TrtMnt_",colYears[j],sep="")
-        interpYears <- c(interpYears,trt_id)  
+        trt_id = paste("TrtMnt_",colYears[j], sep="")
+        interpYears <- c(interpYears, trt_id)  
     }
 
     print(interpYears)
@@ -65,7 +65,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             print("bts3.0.1")
             for (k in 1:length(years)) {
                 # First, build a model describing the relationship between years and any data in the interp field.
-                varI <- paste(cur_ancVi,years[[k]],sep="")
+                varI <- paste(cur_ancVi,years[[k]], sep="")
                 print(varI)
                 # Check if data exists for the year - if not, ignore.  If so, include in the new modeling frame.
                 if (varI %in% colnames(dta@data)) {
@@ -80,7 +80,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                     cnt = cnt + 1
                 } else {
                     # Exception for a single-point interpolation
-                    varC <- paste(cur_ancVi,sep="")
+                    varC <- paste(cur_ancVi, sep="")
                     if (varC %in% colnames(dta@data)) {
 
 
@@ -115,26 +115,25 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 # Here, we model out everything. 
 
                 # Melt the dataframe for modeling
-                melt_Model_dta <- melt(data.frame(interpFrame),id=idField)
+                melt_Model_dta <- melt(data.frame(interpFrame), id=idField)
                 melt_Model_dta["variable"] <- as.numeric(gsub("X","",melt_Model_dta$variable))
                 
 
                 # Fit the model for interpolation
                 
-                execstr <- paste("mdl <- lm(value ~ variable + factor(",idField,"),data=melt_Model_dta)",sep="")
+                execstr <- paste("mdl <- lm(value ~ variable + factor(",idField,"),data=melt_Model_dta)", sep="")
                 eval(parse(text=execstr))
-
                 # mdl <- lm(value ~ variable + factor(idField), data=melt_Model_dta)
 
 
                 # Apply the model to interpolate
                 for (u in 1:length(years)) {
-                    varI <- paste(cur_ancVi,years[[u]],sep="")
+                    varI <- paste(cur_ancVi,years[[u]], sep="")
                     if (!(varI %in% colnames(dta@data))) {
                         # Variable doesn't exist, so we need to interpolate.
                         tDframe[idField] <- dta@data[idField]
                         tDframe["variable"] <- years[[u]]
-                        dta@data[varI] <- predict(mdl,newdata=tDframe)
+                        dta@data[varI] <- predict(mdl, newdata=tDframe)
 
                     }
                 }
@@ -167,11 +166,11 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         grepStrYrs = idField
         for (j in 1:length(years)) {
             tempGrep <- grepStrYrs
-            grepStrYrs <- paste(tempGrep,"|",varList_pre[[i]],years[[j]],sep="")
+            grepStrYrs <- paste(tempGrep,"|",varList_pre[[i]],years[[j]], sep="")
         }
 
-        tDF <- dta@data[grepl(grepStrYrs,names(dta@data))]
-        meltList[[i]] <- melt(tDF,id=idField)
+        tDF <- dta@data[grepl(grepStrYrs, names(dta@data))]
+        meltList[[i]] <- melt(tDF, id=idField)
         
         # Keep only years in the year column, rename columns
         colnames(meltList[[i]])[2] <- "Year"
diff --git a/R/SAT.R b/R/SAT.R
index 1aa2f11..d9d3a71 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -76,6 +76,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
         print("sat3.2")
         if (mtd == "fastNN") {
+            # ***
             # this is the slow part of functions
             temp_dta[[i]] <- fastNN_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
         }
diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index 4a284c4..4ba9ef2 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -15,7 +15,7 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
     retData <- dta
 
     # predict values based on model
-    retData$PSM_trtProb <- predict(PSMfit, dta@data, type="response")
+    retData[["PSM_trtProb"]] <- predict(PSMfit, dta@data, type="response")
     
     
     if (visual == "TRUE") {
@@ -29,13 +29,13 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
     if (drop == "support") {
         
         # Drop
-        treated <- retData@data[retData@data$TrtBin == 1,]
-        untreated <- retData@data[retData@data$TrtBin == 0,]
-        min_cut <- max(min(treated$PSM_trtProb), min(untreated$PSM_trtProb))
-        max_cut <- min(max(treated$PSM_trtProb), max(untreated$PSM_trtProb))
+        treated <- retData@data[retData@data[["TrtBin"]] == 1,]
+        untreated <- retData@data[retData@data[["TrtBin"]] == 0,]
+        min_cut <- max(min(treated[["PSM_trtProb"]]), min(untreated[["PSM_trtProb"]]))
+        max_cut <- min(max(treated[["PSM_trtProb"]]), max(untreated[["PSM_trtProb"]]))
         
-        retData <- retData[retData@data$PSM_trtProb >= min_cut,]    
-        retData <- retData[retData@data$PSM_trtProb <= max_cut,] 
+        retData <- retData[retData@data[["PSM_trtProb"]] >= min_cut,]    
+        retData <- retData[retData@data[["PSM_trtProb"]] <= max_cut,] 
 
     }
 
@@ -50,7 +50,7 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
 
     # return original and predicted data along with model
     retEle <- 0
-    retEle$data <- retData
-    retEle$model <- PSMfit
+    retEle[["data"]] <- retData
+    retEle[["model"]] <- PSMfit
     return (retEle)
 }
diff --git a/R/loadLibs.R b/R/loadLibs.R
index 8cf97ca..2eb2d52 100644
--- a/R/loadLibs.R
+++ b/R/loadLibs.R
@@ -1,6 +1,5 @@
 #Library loading script in case dependencies are not loading correctly.
-loadLibs <- function (x=1)
-  {
+loadLibs <- function (x=1) {
   require(sp)
   #require(GISTools) # sudo apt-get install libgeos-dev
   require(maptools)
@@ -20,4 +19,4 @@ loadLibs <- function (x=1)
   library(stargazer)
   library(lmtest)
   library(multiwayvcov)
-  }
\ No newline at end of file
+}

From 2bc1f94f540d7cda3130923ffee6422b915f6825 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 14:49:06 -0400
Subject: [PATCH 024/212] Clean up.

---
 R/BuildTimeSeries.R    |   2 +-
 R/Stage2PSM.R          | 140 ++++++++++++++++++++---------------------
 R/fastNN_binary_func.R |  20 +++---
 3 files changed, 78 insertions(+), 84 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index a9f5d6c..038f789 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -116,7 +116,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
                 # Melt the dataframe for modeling
                 melt_Model_dta <- melt(data.frame(interpFrame), id=idField)
-                melt_Model_dta["variable"] <- as.numeric(gsub("X","",melt_Model_dta$variable))
+                melt_Model_dta["variable"] <- as.numeric(gsub("X","",melt_Model_dta[["variable"]]))
                 
 
                 # Fit the model for interpolation
diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index a0054b4..7fcaa92 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -3,82 +3,76 @@
 #These functions are to make common modeling strategies easier to specify for users
 #That do not write their own models.
 
-Stage2PSM <- function(model, dta, type, table_out = NULL, opts = NULL)
-{
+Stage2PSM <- function(model, dta, type, table_out = NULL, opts = NULL) {
   
-  ret_var <- list()
+    ret_var <- list()
   
-  if(type == "lm")
-  {
-    m_fit <- lm(model,dta)
-    print("==========================")
-    print("UNSTANDARDIZED MODEL RESULTS")
-    print("==========================")
-    #mTab <- stargazer(m_fit,type="html",title="Unstandardized Model Results")
-    print(summary(m_fit))
-    ret_var$unstandardized <- lm(model,dta)
-    texreg::plotreg(m_fit,omit.coef="(match)|(Intercept)",custom.model.names="Unstandardized Model",custom.note=model)
-    
-    if(!is.null(table_out))
-    {
-      dta_tmp <- dta
-      
-      if(class(dta) == "data.frame")
-      {
-        d_index <- sapply(dta_tmp, is.numeric)
-        dta_tmp[d_index] <- lapply(dta_tmp[d_index],scale)
-      } else {
-      d_index <- sapply(dta_tmp@data, is.numeric)
-      dta_tmp@data[d_index] <- lapply(dta_tmp@data[d_index],scale)
-      }
-      dta_fit_std <- lm(model,dta_tmp)
-      ret_var$standardized <- lm(model,dta_tmp)
-      print("==========================")
-      print("STANDARDIZED MODEL RESULTS")
-      print("==========================")
-      print(summary(dta_fit_std))
-      texreg::plotreg(dta_fit_std,omit.coef="(match)|(Intercept)",custom.model.names="Standardized Model", custom.note=model)
+    if(type == "lm") {
+        m_fit <- lm(model,dta)
+        print("==========================")
+        print("UNSTANDARDIZED MODEL RESULTS")
+        print("==========================")
+        #mTab <- stargazer(m_fit,type="html",title="Unstandardized Model Results")
+        print(summary(m_fit))
+        ret_var[["unstandardized"]] <- lm(model,dta)
+        texreg::plotreg(m_fit,omit.coef="(match)|(Intercept)",custom.model.names="Unstandardized Model",custom.note=model)
+        
+        if(!is.null(table_out)) {
+            dta_tmp <- dta
       
-    }
+            if(class(dta) == "data.frame") {
+                d_index <- sapply(dta_tmp, is.numeric)
+                dta_tmp[d_index] <- lapply(dta_tmp[d_index],scale)
+            } else {
+              d_index <- sapply(dta_tmp@data, is.numeric)
+              dta_tmp@data[d_index] <- lapply(dta_tmp@data[d_index],scale)
+            }
+
+            dta_fit_std <- lm(model,dta_tmp)
+            ret_var[["standardized"]] <- lm(model,dta_tmp)
+            print("==========================")
+            print("STANDARDIZED MODEL RESULTS")
+            print("==========================")
+            print(summary(dta_fit_std))
+            texreg::plotreg(dta_fit_std,omit.coef="(match)|(Intercept)",custom.model.names="Standardized Model", custom.note=model)
+          
+        }
     
-  }
+    }
   
-  if(type == "cmreg")
-  {
-    m_fit <- lm(model,dta)
-    ret_var$unstandardized <- m_fit
-    #mTab <- stargazer(m_fit,type="html",title="Unstandardized Model Results")
-    #print.htmlTable(mTab)
-    print(summary(m_fit))
-    texreg::plotreg(m_fit,omit.coef="(match)|(Intercept)|(factor)",custom.model.names="Unstandardized Model",custom.note=model)
-    
-    if(!is.null(table_out))
-    {
-      dta_tmp <- dta
-      
-      if(class(dta) == "data.frame")
-      {
-        d_index <- sapply(dta_tmp, is.numeric)
-        dta_tmp[d_index] <- lapply(dta_tmp[d_index],scale)
-      } else {
-        d_index <- sapply(dta_tmp@data, is.numeric)
-        dta_tmp@data[d_index] <- lapply(dta_tmp@data[d_index],scale)
-      }
-      dta_fit_std <- lm(model,dta_tmp)
-      ret_var$standardized <- dta_fit_std
-      print(summary(dta_fit_std))
-      texreg::plotreg(dta_fit_std,omit.coef="(match)|(Intercept)|(factor)",custom.model.names="Standardized Model", custom.note=model)
-      
+    if(type == "cmreg") {
+        m_fit <- lm(model,dta)
+        ret_var[["unstandardized"]] <- m_fit
+        #mTab <- stargazer(m_fit,type="html",title="Unstandardized Model Results")
+        #print.htmlTable(mTab)
+        print(summary(m_fit))
+        texreg::plotreg(m_fit,omit.coef="(match)|(Intercept)|(factor)",custom.model.names="Unstandardized Model",custom.note=model)
+        
+        if(!is.null(table_out)) {
+            dta_tmp <- dta
+          
+            if(class(dta) == "data.frame") {
+                d_index <- sapply(dta_tmp, is.numeric)
+                dta_tmp[d_index] <- lapply(dta_tmp[d_index],scale)
+            } else {
+                d_index <- sapply(dta_tmp@data, is.numeric)
+                dta_tmp@data[d_index] <- lapply(dta_tmp@data[d_index],scale)
+            }
+            dta_fit_std <- lm(model,dta_tmp)
+            ret_var[["standardized"]] <- dta_fit_std
+            print(summary(dta_fit_std))
+            texreg::plotreg(dta_fit_std,omit.coef="(match)|(Intercept)|(factor)",custom.model.names="Standardized Model", custom.note=model)
+          
+        }
+        
+        print(opts)
+        exec = paste("cluster.vcov(m_fit,cbind(dta$",opts[1],",dta$",opts[2],"))",sep="")
+        m_fit[["var"]] <- eval(parse(text=exec))
+        CMREG <- coeftest(m_fit,m_fit[["var"]])
+        print("cmReg:")
+        print(CMREG)
+        ret_var[["cmreg"]] <- CMREG
+        
     }
-    
-    print(opts)
-    exec = paste("cluster.vcov(m_fit,cbind(dta$",opts[1],",dta$",opts[2],"))",sep="")
-    m_fit$var <- eval(parse(text=exec))
-    CMREG <- coeftest(m_fit,m_fit$var)
-    print("cmReg:")
-    print(CMREG)
-    ret_var$cmreg <- CMREG
-    
-  }
-  return(ret_var)
-}
\ No newline at end of file
+    return(ret_var)
+}
diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 73ec549..2ea0519 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -10,7 +10,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
-    sorted_dta <- dta@data[order(dta@data$PSM_trtProb),]
+    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]),]
     
 
     #Conduct the matching
@@ -53,7 +53,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
                 # Control_ID = toString(eval(parse(text=cid_txt)))
                 Control_ID = toString(untreated[[ids]][[mC]])
 
-                mT = k$nn.index[mC]
+                mT = k[["nn.index"]][mC]
                 
                 # tid_txt = paste("treated$",ids,"[",mT,"]",sep="")
                 # Treatment_ID = toString(eval(parse(text=tid_txt)))
@@ -74,12 +74,12 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
                 x_dist = abs(tCoord[1] - tCoord[2])
                 euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-                PSM_score = k$nn.dist[mC]
+                PSM_score = k[["nn.dist"]][mC]
                 geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
 
                 
-                k$nn.dist[mC] <- ((1-geog_Weight) * PSM_score)
+                k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
 
             }
       
@@ -87,10 +87,10 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     
         #Add the matched treatment and control values to the recording data frame
         #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
-        best_m_control = which(k$nn.dist %in% sort(k$nn.dist)[1])
+        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
         
         #This will give us the matched index in the "treated" dataset.
-        best_m_treated = k$nn.index[best_m_control]
+        best_m_treated = k[["nn.index"]][best_m_control]
         
         #Control PSM ID
         # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
@@ -115,7 +115,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # eval(parse(text=tid_a_2))
         # eval(parse(text=tid_a_3))
         dta@data$match[which(dta@data[[ids]] == Control_ID)] = Treatment_ID
-        dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID)] = k$nn.dist[,1][best_m_control]
+        dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID)] = k[["nn.dist"]][,1][best_m_control]
         dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID)] = pair_id        
         
         
@@ -126,9 +126,9 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # eval(parse(text=cid_a_1))
         # eval(parse(text=cid_a_2))
         # eval(parse(text=cid_a_3))
-        dta@data$match[which(dta@data[[ids]] == Treatment_ID)] = Control_ID
-        dta@data$PSM_distance[which(dta@data[[ids]] == Treatment_ID)] = k$nn.dist[,1][best_m_control]
-        dta@data$PSM_match_ID[which(dta@data[[ids]] == Treatment_ID)] = pair_id        
+        dta@data[["match"]][which(dta@data[[ids]] == Treatment_ID)] = Control_ID
+        dta@data[["PSM_distance"]][which(dta@data[[ids]] == Treatment_ID)] = k[["nn.dist"]][,1][best_m_control]
+        dta@data[["PSM_match_ID"]][which(dta@data[[ids]] == Treatment_ID)] = pair_id        
         
         #Drop the paired match out of the iteration matrix 
         # did_a_1 = paste("sorted_dta <- sorted_dta[sorted_dta$",ids,"!= Treatment_ID ,]",sep="")

From 3b3d4b8283d974903ee42ed28c149d72a07a3d71 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 14:53:18 -0400
Subject: [PATCH 025/212] Clean up.

---
 R/BuildTimeSeries.R    | 21 +--------------------
 R/SAT.R                | 29 ----------------------------
 R/fastNN_binary_func.R | 43 +-----------------------------------------
 3 files changed, 2 insertions(+), 91 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 038f789..4425749 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -2,8 +2,6 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     
     years <- startYear:endYear
 
-
-
     print("bts1")
     timer <- proc.time()
 
@@ -15,11 +13,10 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             for (j in 1:length(colYears)) {
 
                 varN <- paste("TrtMnt_",colYears[j],years[k], sep="")
-
                 print(varN)
+
                 exec <- paste("dta$",varN,"=0", sep="")
                 eval(parse(text=exec))
-
                 # dta[,varN] = 0
 
 
@@ -32,7 +29,6 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     print(paste("section completed in", timer[3], "seconds."))
 
 
-
     print("bts2")
     timer <- proc.time()
 
@@ -47,7 +43,6 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     print(paste("section completed in", timer[3], "seconds."))
 
 
-
     print("bts3")
     timer <- proc.time()
     
@@ -70,12 +65,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 # Check if data exists for the year - if not, ignore.  If so, include in the new modeling frame.
                 if (varI %in% colnames(dta@data)) {
 
-
-                    # add_data <- paste("interpFrame[cnt] <- dta@data$",varI)
-                    # eval(parse(text=add_data))
                     interpFrame[cnt] <- dta@data[[varI]]
 
-
                     colnames(interpFrame)[cnt] <- years[[k]]
                     cnt = cnt + 1
                 } else {
@@ -83,16 +74,11 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                     varC <- paste(cur_ancVi, sep="")
                     if (varC %in% colnames(dta@data)) {
 
-
-                        # add_data <- paste("interpFrame[cnt] <- dta@data$",varC)
-                        # eval(parse(text=add_data))
                         interpFrame[cnt] <- dta@data[[varC]]
 
-
                         cnt = 3
                     }
                 }
-        
             }
 
             print("bts3.0.2")
@@ -102,9 +88,6 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             if (cnt == 3) {
 
                 for (k in 1:length(years)) {
-
-                    # varI <- paste("dta@data$",cur_ancVi,years[[k]]," <- interpFrame[2]",sep="")
-                    # eval(parse(text=varI))
                     dta@data[[paste(cur_ancVi,years[[k]], sep="")]] <- interpFrame[2]
                 }
 
@@ -188,12 +171,10 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
     }
 
-
     timer <- proc.time() - timer
     print(paste("section completed in", timer[3], "seconds."))
 
 
-
     # Finish up with a cherry on top
     meltListRet <- data.frame(meltList)
   
diff --git a/R/SAT.R b/R/SAT.R
index d9d3a71..dffaf8e 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -12,8 +12,6 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     if (!is.null(constraints)) {
         for (cst in 1:length(names(constraints))) {
             if (names(constraints)[cst] == "groups") {
-                # exec_stmnt = paste("dta$ConstraintGroupSet_Opt <- dta$",constraints["groups"],sep="")
-                # eval(parse(text=exec_stmnt))
                 dta[["ConstraintGroupSet_Opt"]] <- dta[[constraints["groups"]]]
 
             } else {
@@ -136,30 +134,13 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         ed_v = sub("factor\\(","",anc_vars[i])
         ed_v = sub(")","",ed_v)
 
-        # treat_mean_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[2]][[3]],5)")
-        # treat_SD_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[2]][[4]],5)")
-        
-        # control_mean_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[1]][[3]],5)")
-        # control_SD_pre = paste("round(describeBy(init_dta@data$",ed_v,", group=init_dta@data$",TrtBinColName,")[[1]][[4]],5)")
-        
-        # treat_mean_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[2]][[3]],5)")
-        # treat_SD_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[2]][[4]],5)")
-        
-        # control_mean_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[1]][[3]],5)")
-        # control_SD_post = paste("round(describeBy(dta@data$",ed_v,", group=dta@data$",TrtBinColName,")[[1]][[4]],5)")
-       
-        # c_type = eval(parse(text=paste("class(init_dta@data$",ed_v,")")))
         c_type = class(init_dta@data[[ed_v]])
 
         print("sat7.1")
         if (c_type == "matrix") {
 
-            # exec_str = paste("dta@data$",ed_v,"<- as.numeric(dta@data$",ed_v,")",sep="")
-            # eval(parse(text=exec_str))
             dta@data[[ed_v]] <- as.numeric(dta@data[[ed_v]])
 
-            # exec_str = paste("init_dta@data$",ed_v,"<- as.numeric(init_dta@data$",ed_v,")",sep="")
-            # eval(parse(text=exec_str))
             init_dta@data[[ed_v]] <- as.numeric(init_dta@data[[ed_v]])
 
             c_type = "numeric"
@@ -171,16 +152,6 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(init_dta, anc_vars[i],"Pre-Balancing: ",simple_out = FALSE)
             pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(dta, anc_vars[i],"Post-Balancing: ",simple_out = FALSE)  
 
-            # treat_mean_pre <- eval(parse(text=treat_mean_pre))
-            # treat_SD_pre <- eval(parse(text=treat_SD_pre))
-            # control_mean_pre <- eval(parse(text=control_mean_pre))
-            # control_SD_pre <- eval(parse(text=control_SD_pre))
-
-            # treat_mean_post <- eval(parse(text=treat_mean_post))
-            # treat_SD_post <- eval(parse(text=treat_SD_post))
-            # control_mean_post <- eval(parse(text=control_mean_post))
-            # control_SD_post <- eval(parse(text=control_SD_post))
-
 
             treat_mean_pre <- round(describeBy(init_dta@data[[ed_v]], group=init_dta@data[[TrtBinColName]])[[2]][[3]], 5)
             treat_SD_pre <- round(describeBy(init_dta@data[[ed_v]], group=init_dta@data[[TrtBinColName]])[[2]][[4]], 5)
diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 2ea0519..7bd8408 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -14,16 +14,9 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     
 
     #Conduct the matching
-
-    # str_trted <- paste("treated <- sorted_dta[sorted_dta$",trtMntVar, "== 1,]",sep="")
-    # str_untrted <- paste("untreated <- sorted_dta[sorted_dta$",trtMntVar,"==0,]",sep="")
-    # eval(parse(text=str_trted))
-    # eval(parse(text=str_untrted))
-
     treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
     untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0,]
 
-
     it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
     dta@data[["match"]] <- -999
     dta@data[["PSM_distance"]] <- -999
@@ -33,15 +26,9 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #to perturb pairs based on their distances.  
     for (j in 1:it_cnt) {
 
-        # str_trted <- paste("treated <- sorted_dta[sorted_dta$",trtMntVar, "== 1,]",sep="")
-        # str_untrted <- paste("untreated <- sorted_dta[sorted_dta$",trtMntVar,"==0,]",sep="")
-        # eval(parse(text=str_trted))
-        # eval(parse(text=str_untrted))
-    
         treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
         untreated <- sorted_dta[sorted_dta[[trtMntVar]] ==0,]
 
-
         #Run the KNN for all neighbors. 
         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
@@ -49,25 +36,17 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         if (!is.null(dist_PSM)) {
             for (mC in 1:length(k[[1]])) {
                 #Calculate the Euclidean Distance between pairs
-                # cid_txt = paste("untreated$",ids,"[",mC,"]",sep="")
-                # Control_ID = toString(eval(parse(text=cid_txt)))
                 Control_ID = toString(untreated[[ids]][[mC]])
 
                 mT = k[["nn.index"]][mC]
                 
-                # tid_txt = paste("treated$",ids,"[",mT,"]",sep="")
-                # Treatment_ID = toString(eval(parse(text=tid_txt)))
-                Treatment_ID = toString(treated[[ids]][[mT]])
+                 Treatment_ID = toString(treated[[ids]][[mT]])
 
                 #Find the control x,y location
-                # cCoord_e = paste("coordinates(dta[which(dta@data$",ids," == Control_ID),])", sep="")
-                # cCoord = eval(parse(text=cCoord_e))
                 cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
                 
 
                 #Find the treatment x,y location
-                # tCoord_e = paste("coordinates(dta[which(dta@data$",ids," == Treatment_ID),])", sep="")
-                # tCoord = eval(parse(text=tCoord_e))
                 tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
 
                 y_dist = abs(cCoord[1] - cCoord[2])
@@ -93,13 +72,9 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         best_m_treated = k[["nn.index"]][best_m_control]
         
         #Control PSM ID
-        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        # Control_ID = toString(eval(parse(text=cid_txt)))
         Control_ID = toString(untreated[[ids]][[best_m_control]])    
 
         #Treatment PSM ID
-        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        # Treatment_ID = toString(eval(parse(text=tid_txt)))
         Treatment_ID = toString(treated[[ids]][[best_m_treated]])
 
 
@@ -108,33 +83,17 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         
 
         #Add the Treatment ID to the Control Row 
-        # tid_a_1 = paste("dta@data$match[which(dta@data$",ids," == Control_ID)] = Treatment_ID", sep="")
-        # tid_a_2 = paste("dta@data$PSM_distance[which(dta@data$",ids," == Control_ID)] = k$nn.dist[,1][best_m_control]",sep="")
-        # tid_a_3 = paste("dta@data$PSM_match_ID[which(dta@data$",ids," == Control_ID)] = pair_id", sep="")
-        # eval(parse(text=tid_a_1))
-        # eval(parse(text=tid_a_2))
-        # eval(parse(text=tid_a_3))
         dta@data$match[which(dta@data[[ids]] == Control_ID)] = Treatment_ID
         dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID)] = k[["nn.dist"]][,1][best_m_control]
         dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID)] = pair_id        
         
         
         #Add the Control ID to the Treatment Row
-        # cid_a_1 = paste("dta@data$match[which(dta@data$",ids," == Treatment_ID)] = Control_ID", sep="")
-        # cid_a_2 = paste("dta@data$PSM_distance[which(dta@data$",ids," == Treatment_ID)] = k$nn.dist[,1][best_m_control]", sep="")
-        # cid_a_3 = paste("dta@data$PSM_match_ID[which(dta@data$",ids," == Treatment_ID)] = pair_id", sep="")
-        # eval(parse(text=cid_a_1))
-        # eval(parse(text=cid_a_2))
-        # eval(parse(text=cid_a_3))
         dta@data[["match"]][which(dta@data[[ids]] == Treatment_ID)] = Control_ID
         dta@data[["PSM_distance"]][which(dta@data[[ids]] == Treatment_ID)] = k[["nn.dist"]][,1][best_m_control]
         dta@data[["PSM_match_ID"]][which(dta@data[[ids]] == Treatment_ID)] = pair_id        
         
         #Drop the paired match out of the iteration matrix 
-        # did_a_1 = paste("sorted_dta <- sorted_dta[sorted_dta$",ids,"!= Treatment_ID ,]",sep="")
-        # did_a_2 = paste("sorted_dta <- sorted_dta[sorted_dta$",ids,"!= Control_ID ,]",sep="")
-        # eval(parse(text=did_a_1))
-        # eval(parse(text=did_a_2))
         sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
         sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     

From f53f7f785c721367bb50b974ce8cf2ce7a2ba8ad Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 4 Sep 2015 15:42:35 -0400
Subject: [PATCH 026/212] Clean up.

---
 R/Stage2PSM.R | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index 7fcaa92..a9567ba 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -3,24 +3,24 @@
 #These functions are to make common modeling strategies easier to specify for users
 #That do not write their own models.
 
-Stage2PSM <- function(model, dta, type, table_out = NULL, opts = NULL) {
+Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
   
     ret_var <- list()
   
-    if(type == "lm") {
+    if (type == "lm") {
         m_fit <- lm(model,dta)
         print("==========================")
         print("UNSTANDARDIZED MODEL RESULTS")
         print("==========================")
         #mTab <- stargazer(m_fit,type="html",title="Unstandardized Model Results")
         print(summary(m_fit))
-        ret_var[["unstandardized"]] <- lm(model,dta)
-        texreg::plotreg(m_fit,omit.coef="(match)|(Intercept)",custom.model.names="Unstandardized Model",custom.note=model)
+        ret_var[["unstandardized"]] <- lm(model, dta)
+        texreg::plotreg(m_fit, omit.coef="(match)|(Intercept)", custom.model.names="Unstandardized Model", custom.note=model)
         
-        if(!is.null(table_out)) {
+        if (!is.null(table_out)) {
             dta_tmp <- dta
       
-            if(class(dta) == "data.frame") {
+            if (class(dta) == "data.frame") {
                 d_index <- sapply(dta_tmp, is.numeric)
                 dta_tmp[d_index] <- lapply(dta_tmp[d_index],scale)
             } else {
@@ -40,7 +40,7 @@ Stage2PSM <- function(model, dta, type, table_out = NULL, opts = NULL) {
     
     }
   
-    if(type == "cmreg") {
+    if (type == "cmreg") {
         m_fit <- lm(model,dta)
         ret_var[["unstandardized"]] <- m_fit
         #mTab <- stargazer(m_fit,type="html",title="Unstandardized Model Results")
@@ -48,10 +48,10 @@ Stage2PSM <- function(model, dta, type, table_out = NULL, opts = NULL) {
         print(summary(m_fit))
         texreg::plotreg(m_fit,omit.coef="(match)|(Intercept)|(factor)",custom.model.names="Unstandardized Model",custom.note=model)
         
-        if(!is.null(table_out)) {
+        if (!is.null(table_out)) {
             dta_tmp <- dta
           
-            if(class(dta) == "data.frame") {
+            if( class(dta) == "data.frame") {
                 d_index <- sapply(dta_tmp, is.numeric)
                 dta_tmp[d_index] <- lapply(dta_tmp[d_index],scale)
             } else {

From 94a654643f0a902f521f34e25e725bfe440f4ac4 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 11 Sep 2015 11:35:34 -0400
Subject: [PATCH 027/212] Update how timeRangeTrend finds year.

---
 R/fastNN_binary_func.R | 2 +-
 R/timeRangeTrend.R     | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 7bd8408..fe2e52c 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -40,7 +40,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
                 mT = k[["nn.index"]][mC]
                 
-                 Treatment_ID = toString(treated[[ids]][[mT]])
+                Treatment_ID = toString(treated[[ids]][[mT]])
 
                 #Find the control x,y location
                 cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
diff --git a/R/timeRangeTrend.R b/R/timeRangeTrend.R
index 85f4afa..fbe7a5e 100644
--- a/R/timeRangeTrend.R
+++ b/R/timeRangeTrend.R
@@ -11,10 +11,14 @@ timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield) {
     analysisDF <- melt(tDF, id=c(IDfield))
     
     # cleaned GREP - remove year digit placeholders
-    new_pre <- gsub("[0-9]", "", prefix, fixed=TRUE)
+    # new_pre <- gsub("[0-9]", "", prefix, fixed=TRUE)
+
+    # get location of year in prefix
+    yIndex <- regexpr("[0-9]", prefix, fixed=TRUE)
 
     # generate new year field by removing prefix from variable (original column names)
-    analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
+    # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
+    analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(substr(x, yIndex[1], yIndex[1]+3)))
 
     # keep years in range specified
     analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]

From d9227e4cd1cdae58ca9072c0e61a2d32f39d75ff Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 11 Sep 2015 16:31:13 -0400
Subject: [PATCH 028/212] Update how timeRangeTrend handles NA data.

---
 R/BuildTimeSeries.R | 39 ++++++++++++++++++++++++---------------
 R/timeRangeTrend.R  | 25 +++++++++++++++++++------
 2 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 4425749..7ca5721 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -32,6 +32,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     print("bts2")
     timer <- proc.time()
 
+
+    # add the "TrtMnt_" + colYears[j] prefix to interpYears
     for (j in 1:length(colYears)) {
         trt_id = paste("TrtMnt_",colYears[j], sep="")
         interpYears <- c(interpYears, trt_id)  
@@ -60,24 +62,21 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             print("bts3.0.1")
             for (k in 1:length(years)) {
                 # First, build a model describing the relationship between years and any data in the interp field.
-                varI <- paste(cur_ancVi,years[[k]], sep="")
-                print(varI)
+
                 # Check if data exists for the year - if not, ignore.  If so, include in the new modeling frame.
+                varI <- gsub('####', years[[k]], cur_ancVi)
                 if (varI %in% colnames(dta@data)) {
 
                     interpFrame[cnt] <- dta@data[[varI]]
-
                     colnames(interpFrame)[cnt] <- years[[k]]
                     cnt = cnt + 1
-                } else {
-                    # Exception for a single-point interpolation
-                    varC <- paste(cur_ancVi, sep="")
-                    if (varC %in% colnames(dta@data)) {
 
-                        interpFrame[cnt] <- dta@data[[varC]]
+                } else if (cur_ancVi %in% colnames(dta@data)) {
 
-                        cnt = 3
-                    }
+                    # Exception for a single-point interpolation
+                    interpFrame[cnt] <- dta@data[[cur_ancVi]]
+                    cnt = 3
+                    
                 }
             }
 
@@ -88,7 +87,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             if (cnt == 3) {
 
                 for (k in 1:length(years)) {
-                    dta@data[[paste(cur_ancVi,years[[k]], sep="")]] <- interpFrame[2]
+                    # add _year to end of non temporal data
+                    dta@data[[paste(cur_ancVi,years[[k]],sep="_")]] <- interpFrame[2]
                 }
 
             } else if (cnt < length(years) + 2) {
@@ -99,7 +99,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
                 # Melt the dataframe for modeling
                 melt_Model_dta <- melt(data.frame(interpFrame), id=idField)
-                melt_Model_dta["variable"] <- as.numeric(gsub("X","",melt_Model_dta[["variable"]]))
+                melt_Model_dta["variable"] <- as.numeric(gsub("X", "", melt_Model_dta[["variable"]]))
                 
 
                 # Fit the model for interpolation
@@ -111,7 +111,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
                 # Apply the model to interpolate
                 for (u in 1:length(years)) {
-                    varI <- paste(cur_ancVi,years[[u]], sep="")
+
+                    varI <- gsub('####', years[[u]], cur_ancVi)
                     if (!(varI %in% colnames(dta@data))) {
                         # Variable doesn't exist, so we need to interpolate.
                         tDframe[idField] <- dta@data[idField]
@@ -144,13 +145,21 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     meltList <- list()
     for (i in 1:length(varList_pre)) {
 
-        # grep_str = paste(idField,"|",varList_pre[i],"[0-9][0-9][0-9][0-9]",sep="")
+
         # Limit to only relevant years
         grepStrYrs = idField
+
         for (j in 1:length(years)) {
             tempGrep <- grepStrYrs
-            grepStrYrs <- paste(tempGrep,"|",varList_pre[[i]],years[[j]], sep="")
+
+            if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
+                grepStrYrs <- paste(tempGrep,"|",paste(varList_pre[[i]]],years[[j]], sep="_"), sep="")
+            } else {
+                grepStrYrs <- paste(tempGrep,"|",gsub('####', years[[j]], varList_pre[[i]]]), sep="")
+            }
         }
+    
+
 
         tDF <- dta@data[grepl(grepStrYrs, names(dta@data))]
         meltList[[i]] <- melt(tDF, id=idField)
diff --git a/R/timeRangeTrend.R b/R/timeRangeTrend.R
index fbe7a5e..ac41eda 100644
--- a/R/timeRangeTrend.R
+++ b/R/timeRangeTrend.R
@@ -1,6 +1,6 @@
 # run linear model on data within year range as specified
 # by field prefix and return coefficients
-timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield) {
+timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
 
     # create new dataframe from all columns in dta dataframe that 
     # are either the ID or a year which is indicated by the prefix
@@ -35,14 +35,27 @@ timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield) {
         # get all data corresponding to id from analysis dataframe
         ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
 
-        # fit trend model
-        trend_mod <- lm(value ~ Year, data=ID_dat)
+        dat_length <-length(ID_dat)
+        count_na <-sum(is.na(ID_dat[['value']])) 
+        count_non_na <- dat_length - count_na
+        percent_na <- count_na / dat_length
+
+        # if number of NAs is over threshold or if less than 2 points of data are not NA, return NA
+        if (percent_na > thresh || count_non_na < 2) {
+            
+            dta@data["newfieldID"][i,] <- NA
+        
+        } else {
+            # fit trend model
+            trend_mod <- lm(value ~ Year, data=ID_dat, na.action = na.omit)
+      
+            # add trend coefficients to new field
+            dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
+        }
 
-        # add trend coefficients to new field
-        dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
     }
     
     # return new field with trend coefficients
-    return(dta$newfieldID)
+    return(dta[["newfieldID"]])
     
 }

From 716bd5589ce977badfefb0979c3584760914792c Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 11 Sep 2015 16:33:52 -0400
Subject: [PATCH 029/212] Fix syntax.

---
 R/BuildTimeSeries.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 7ca5721..f697687 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -153,9 +153,9 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             tempGrep <- grepStrYrs
 
             if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
-                grepStrYrs <- paste(tempGrep,"|",paste(varList_pre[[i]]],years[[j]], sep="_"), sep="")
+                grepStrYrs <- paste(tempGrep,"|",paste(varList_pre[[i]],years[[j]], sep="_"), sep="")
             } else {
-                grepStrYrs <- paste(tempGrep,"|",gsub('####', years[[j]], varList_pre[[i]]]), sep="")
+                grepStrYrs <- paste(tempGrep,"|",gsub('####', years[[j]], varList_pre[[i]]), sep="")
             }
         }
     

From a8f17ecf951f54d573c224ef2f007cc317dd016e Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 14 Sep 2015 17:15:16 -0400
Subject: [PATCH 030/212] Update how SpatialCausalPSM handles NAs.

---
 R/SpatialCausalPSM.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index 4ba9ef2..72a35e7 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -5,10 +5,10 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
     # generate model based on method
     if (mtd == "logit") {
         # generalized linear model
-        PSMfit <- glm(mdl, dta@data, family="binomial")
+        PSMfit <- glm(mdl, dta@data, family="binomial", na.action=na.omit)
     } else if (mtd == "lm") {
         # linear model
-        PSMfit <- lm(mdl, dta@data)
+        PSMfit <- lm(mdl, dta@data, na.action=na.omit)
     }
 
     # copy data 

From cb314e6f89682a099edec947b60cc961e1ad5c60 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 10:05:48 -0400
Subject: [PATCH 031/212] Update SpatialCausalPSM.

---
 R/SpatialCausalPSM.R | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index 72a35e7..e1d49d3 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -31,9 +31,10 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
         # Drop
         treated <- retData@data[retData@data[["TrtBin"]] == 1,]
         untreated <- retData@data[retData@data[["TrtBin"]] == 0,]
-        min_cut <- max(min(treated[["PSM_trtProb"]]), min(untreated[["PSM_trtProb"]]))
-        max_cut <- min(max(treated[["PSM_trtProb"]]), max(untreated[["PSM_trtProb"]]))
+        min_cut <- max(min(treated[["PSM_trtProb"]], na.rm = TRUE), min(untreated[["PSM_trtProb"]], na.rm = TRUE))
+        max_cut <- min(max(treated[["PSM_trtProb"]], na.rm = TRUE), max(untreated[["PSM_trtProb"]], na.rm = TRUE))
         
+        retData <- retData[!is.na(retData@data[["PSM_trtProb"]]),]
         retData <- retData[retData@data[["PSM_trtProb"]] >= min_cut,]    
         retData <- retData[retData@data[["PSM_trtProb"]] <= max_cut,] 
 
@@ -49,8 +50,8 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
     }
 
     # return original and predicted data along with model
-    retEle <- 0
-    retEle[["data"]] <- retData
-    retEle[["model"]] <- PSMfit
+    retEle <- c()
+    retEle$data <- retData
+    retEle$model <- PSMfit
     return (retEle)
 }

From 3013570920c22deb8ebf6a97de8d2a6285e6f5a2 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 10:09:54 -0400
Subject: [PATCH 032/212] Update.

---
 R/SpatialCausalPSM.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index e1d49d3..e862f53 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -5,10 +5,10 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
     # generate model based on method
     if (mtd == "logit") {
         # generalized linear model
-        PSMfit <- glm(mdl, dta@data, family="binomial", na.action=na.omit)
+        PSMfit <- glm(mdl, dta@data, family="binomial")
     } else if (mtd == "lm") {
         # linear model
-        PSMfit <- lm(mdl, dta@data, na.action=na.omit)
+        PSMfit <- lm(mdl, dta@data)
     }
 
     # copy data 

From 4f7a3a1d24d7abe83e51d5b7dd6b341804372483 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 10:11:10 -0400
Subject: [PATCH 033/212] Update.

---
 R/SpatialCausalPSM.R | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index e862f53..e72e689 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -1,11 +1,12 @@
 SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
+
     # Initialization
     pltObjs <- list()
-    
+
     # generate model based on method
     if (mtd == "logit") {
         # generalized linear model
-        PSMfit <- glm(mdl, dta@data, family="binomial")
+        PSMfit <- glm(mdl, dta@data, family="binomial", na.action=na.omit)
     } else if (mtd == "lm") {
         # linear model
         PSMfit <- lm(mdl, dta@data)
@@ -16,8 +17,8 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
 
     # predict values based on model
     retData[["PSM_trtProb"]] <- predict(PSMfit, dta@data, type="response")
-    
-    
+
+
     if (visual == "TRUE") {
         # Show user distributions.
         pltObjs[[1]] <- GroupCompHist(retData, "PSM_trtProb", "Initial PSM Balance", simple_out=FALSE)
@@ -26,7 +27,7 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
 
     # Second, if a drop parameter - if set to "support", remove observations
     # that don't overlap in the PSM distribution.
-    if (drop == "support") {
+    #if (drop == "support") {
         
         # Drop
         treated <- retData@data[retData@data[["TrtBin"]] == 1,]
@@ -37,8 +38,8 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
         retData <- retData[!is.na(retData@data[["PSM_trtProb"]]),]
         retData <- retData[retData@data[["PSM_trtProb"]] >= min_cut,]    
         retData <- retData[retData@data[["PSM_trtProb"]] <= max_cut,] 
-
-    }
+        
+    #}
 
     if (visual == "TRUE") {
         # Post drop histograms
@@ -53,5 +54,3 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
     retEle <- c()
     retEle$data <- retData
     retEle$model <- PSMfit
-    return (retEle)
-}

From 7bbb4482b9884c432ac66eefc047934866ba2366 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:06:59 -0400
Subject: [PATCH 034/212] Add debug prints to fastNN.

---
 R/fastNN_binary_func.R | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index fe2e52c..da3f19a 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -7,6 +7,8 @@
 
 fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
+    print("nn1.0")
+
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
@@ -22,6 +24,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     dta@data[["PSM_distance"]] <- -999
     dta@data[["PSM_match_ID"]] <- -999
 
+    print("nn2.0")
+
     #Calculate a distance decay function
     #to perturb pairs based on their distances.  
     for (j in 1:it_cnt) {
@@ -63,7 +67,9 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
             }
       
         }
-    
+
+        print("nn3.0")
+
         #Add the matched treatment and control values to the recording data frame
         #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
         best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
@@ -81,7 +87,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
         pair_id = paste(curgrp,j,sep="")
         
-
+        print("nn4.0")
+        
         #Add the Treatment ID to the Control Row 
         dta@data$match[which(dta@data[[ids]] == Control_ID)] = Treatment_ID
         dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID)] = k[["nn.dist"]][,1][best_m_control]

From 5809656d8c120ac687da4234ffb6807425f5fdc3 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:08:28 -0400
Subject: [PATCH 035/212] Test.

---
 R/timeRangeTrend.R | 48 +++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/R/timeRangeTrend.R b/R/timeRangeTrend.R
index ac41eda..f57a6ed 100644
--- a/R/timeRangeTrend.R
+++ b/R/timeRangeTrend.R
@@ -24,38 +24,38 @@ timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
     analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
     analysisDF <- analysisDF[analysisDF["Year"] <= endyr ,]
 
-    # create empty field
-    dta@data["newfieldID"] <- 0
+    # # create empty field
+    # dta@data["newfieldID"] <- 0
 
-    # iterate over original dataframe
-    for (i in 1:length(dta)) {
-        # get id for row (in original data)
-        ID <- as.character(dta@data[IDfield][i,])
+    # # iterate over original dataframe
+    # for (i in 1:length(dta)) {
+    #     # get id for row (in original data)
+    #     ID <- as.character(dta@data[IDfield][i,])
 
-        # get all data corresponding to id from analysis dataframe
-        ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
+    #     # get all data corresponding to id from analysis dataframe
+    #     ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
 
-        dat_length <-length(ID_dat)
-        count_na <-sum(is.na(ID_dat[['value']])) 
-        count_non_na <- dat_length - count_na
-        percent_na <- count_na / dat_length
+    #     dat_length <-length(ID_dat)
+    #     count_na <-sum(is.na(ID_dat[['value']])) 
+    #     count_non_na <- dat_length - count_na
+    #     percent_na <- count_na / dat_length
 
-        # if number of NAs is over threshold or if less than 2 points of data are not NA, return NA
-        if (percent_na > thresh || count_non_na < 2) {
+    #     # if number of NAs is over threshold or if less than 2 points of data are not NA, return NA
+    #     if (percent_na > thresh || count_non_na < 2) {
             
-            dta@data["newfieldID"][i,] <- NA
+    #         dta@data["newfieldID"][i,] <- NA
         
-        } else {
-            # fit trend model
-            trend_mod <- lm(value ~ Year, data=ID_dat, na.action = na.omit)
+    #     } else {
+    #         # fit trend model
+    #         trend_mod <- lm(value ~ Year, data=ID_dat, na.action = na.omit)
       
-            # add trend coefficients to new field
-            dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
-        }
+    #         # add trend coefficients to new field
+    #         dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
+    #     }
 
-    }
+    # }
     
-    # return new field with trend coefficients
-    return(dta[["newfieldID"]])
+    # # return new field with trend coefficients
+    # return(dta[["newfieldID"]])
     
 }

From 76ea9b3c6b8b017200971fe55eede28cca6ada12 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:09:10 -0400
Subject: [PATCH 036/212] Test.

---
 R/timeRangeTrend.R | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/R/timeRangeTrend.R b/R/timeRangeTrend.R
index f57a6ed..4c0ba9d 100644
--- a/R/timeRangeTrend.R
+++ b/R/timeRangeTrend.R
@@ -2,27 +2,27 @@
 # by field prefix and return coefficients
 timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
 
-    # create new dataframe from all columns in dta dataframe that 
-    # are either the ID or a year which is indicated by the prefix
-    grep_str = paste(IDfield, prefix, sep="|")
-    tDF <- dta@data[grepl(grep_str, names(dta@data))]
+    # # create new dataframe from all columns in dta dataframe that 
+    # # are either the ID or a year which is indicated by the prefix
+    # grep_str = paste(IDfield, prefix, sep="|")
+    # tDF <- dta@data[grepl(grep_str, names(dta@data))]
 
-    # melt all years columns in new dataframe
-    analysisDF <- melt(tDF, id=c(IDfield))
+    # # melt all years columns in new dataframe
+    # analysisDF <- melt(tDF, id=c(IDfield))
     
-    # cleaned GREP - remove year digit placeholders
-    # new_pre <- gsub("[0-9]", "", prefix, fixed=TRUE)
+    # # cleaned GREP - remove year digit placeholders
+    # # new_pre <- gsub("[0-9]", "", prefix, fixed=TRUE)
 
-    # get location of year in prefix
-    yIndex <- regexpr("[0-9]", prefix, fixed=TRUE)
+    # # get location of year in prefix
+    # yIndex <- regexpr("[0-9]", prefix, fixed=TRUE)
 
-    # generate new year field by removing prefix from variable (original column names)
-    # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
-    analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(substr(x, yIndex[1], yIndex[1]+3)))
+    # # generate new year field by removing prefix from variable (original column names)
+    # # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
+    # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(substr(x, yIndex[1], yIndex[1]+3)))
 
-    # keep years in range specified
-    analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
-    analysisDF <- analysisDF[analysisDF["Year"] <= endyr ,]
+    # # keep years in range specified
+    # analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
+    # analysisDF <- analysisDF[analysisDF["Year"] <= endyr ,]
 
     # # create empty field
     # dta@data["newfieldID"] <- 0

From 823a2e3282574dfb7e6de54dff05cf976f0b0645 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:10:28 -0400
Subject: [PATCH 037/212] Test.

---
 R/timeRangeTrend.R | 83 ++++++++++++++++++++++++----------------------
 1 file changed, 43 insertions(+), 40 deletions(-)

diff --git a/R/timeRangeTrend.R b/R/timeRangeTrend.R
index 4c0ba9d..387d2a9 100644
--- a/R/timeRangeTrend.R
+++ b/R/timeRangeTrend.R
@@ -2,60 +2,63 @@
 # by field prefix and return coefficients
 timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
 
-    # # create new dataframe from all columns in dta dataframe that 
-    # # are either the ID or a year which is indicated by the prefix
-    # grep_str = paste(IDfield, prefix, sep="|")
-    # tDF <- dta@data[grepl(grep_str, names(dta@data))]
+    # create new dataframe from all columns in dta dataframe that 
+    # are either the ID or a year which is indicated by the prefix
+    grep_str = paste(IDfield, prefix, sep="|")
+    tDF <- dta@data[grepl(grep_str, names(dta@data))]
 
-    # # melt all years columns in new dataframe
-    # analysisDF <- melt(tDF, id=c(IDfield))
+    # melt all years columns in new dataframe
+    analysisDF <- melt(tDF, id=c(IDfield))
     
-    # # cleaned GREP - remove year digit placeholders
-    # # new_pre <- gsub("[0-9]", "", prefix, fixed=TRUE)
+    # cleaned GREP - remove year digit placeholders
+    # new_pre <- gsub("[0-9]", "", prefix, fixed=TRUE)
 
-    # # get location of year in prefix
-    # yIndex <- regexpr("[0-9]", prefix, fixed=TRUE)
+    # get location of year in prefix
+    yIndex <- regexpr("[0-9]", prefix, fixed=TRUE)
 
-    # # generate new year field by removing prefix from variable (original column names)
-    # # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
-    # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(substr(x, yIndex[1], yIndex[1]+3)))
+    # generate new year field by removing prefix from variable (original column names)
+    # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
+    analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(substr(x, yIndex[1], yIndex[1]+3)))
 
-    # # keep years in range specified
-    # analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
-    # analysisDF <- analysisDF[analysisDF["Year"] <= endyr ,]
+    # keep years in range specified
+    analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
+    analysisDF <- analysisDF[analysisDF["Year"] <= endyr ,]
 
-    # # create empty field
-    # dta@data["newfieldID"] <- 0
+    # create empty field
+    dta@data["newfieldID"] <- 0
 
-    # # iterate over original dataframe
-    # for (i in 1:length(dta)) {
-    #     # get id for row (in original data)
-    #     ID <- as.character(dta@data[IDfield][i,])
+    # iterate over original dataframe
+    for (i in 1:length(dta)) {
+        # get id for row (in original data)
+        ID <- as.character(dta@data[IDfield][i,])
 
-    #     # get all data corresponding to id from analysis dataframe
-    #     ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
+        # get all data corresponding to id from analysis dataframe
+        ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
 
-    #     dat_length <-length(ID_dat)
-    #     count_na <-sum(is.na(ID_dat[['value']])) 
-    #     count_non_na <- dat_length - count_na
-    #     percent_na <- count_na / dat_length
+        dat_length <-length(ID_dat)
+        count_na <-sum(is.na(ID_dat[['value']])) 
+        count_non_na <- dat_length - count_na
+        percent_na <- count_na / dat_length
 
-    #     # if number of NAs is over threshold or if less than 2 points of data are not NA, return NA
-    #     if (percent_na > thresh || count_non_na < 2) {
+        # if number of NAs is over threshold or if less than 2 points of data are not NA, return NA
+        if (percent_na > thresh || count_non_na < 2) {
             
-    #         dta@data["newfieldID"][i,] <- NA
+            dta@data["newfieldID"][i,] <- NA
         
-    #     } else {
-    #         # fit trend model
-    #         trend_mod <- lm(value ~ Year, data=ID_dat, na.action = na.omit)
+        } else {
+            # fit trend model
+            trend_mod <- lm(value ~ Year, data=ID_dat, na.action = na.omit)
       
-    #         # add trend coefficients to new field
-    #         dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
-    #     }
+            # add trend coefficients to new field
+            dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
+        }
 
-    # }
+    }
     
-    # # return new field with trend coefficients
-    # return(dta[["newfieldID"]])
+    # return new field with trend coefficients
+    return(dta[["newfieldID"]])
     
 }
+
+
+

From 0998551973255516ce252bfd37b7a49bdaa22a69 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:11:00 -0400
Subject: [PATCH 038/212] Test.

---
 R/timeRangeTrend.R | 84 +++++++++++++++++++++++-----------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/R/timeRangeTrend.R b/R/timeRangeTrend.R
index 387d2a9..0afd412 100644
--- a/R/timeRangeTrend.R
+++ b/R/timeRangeTrend.R
@@ -1,64 +1,64 @@
 # run linear model on data within year range as specified
 # by field prefix and return coefficients
-timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
+# timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
 
-    # create new dataframe from all columns in dta dataframe that 
-    # are either the ID or a year which is indicated by the prefix
-    grep_str = paste(IDfield, prefix, sep="|")
-    tDF <- dta@data[grepl(grep_str, names(dta@data))]
+#     # create new dataframe from all columns in dta dataframe that 
+#     # are either the ID or a year which is indicated by the prefix
+#     grep_str = paste(IDfield, prefix, sep="|")
+#     tDF <- dta@data[grepl(grep_str, names(dta@data))]
 
-    # melt all years columns in new dataframe
-    analysisDF <- melt(tDF, id=c(IDfield))
+#     # melt all years columns in new dataframe
+#     analysisDF <- melt(tDF, id=c(IDfield))
     
-    # cleaned GREP - remove year digit placeholders
-    # new_pre <- gsub("[0-9]", "", prefix, fixed=TRUE)
+#     # cleaned GREP - remove year digit placeholders
+#     # new_pre <- gsub("[0-9]", "", prefix, fixed=TRUE)
 
-    # get location of year in prefix
-    yIndex <- regexpr("[0-9]", prefix, fixed=TRUE)
+#     # get location of year in prefix
+#     yIndex <- regexpr("[0-9]", prefix, fixed=TRUE)
 
-    # generate new year field by removing prefix from variable (original column names)
-    # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
-    analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(substr(x, yIndex[1], yIndex[1]+3)))
+#     # generate new year field by removing prefix from variable (original column names)
+#     # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
+#     analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(substr(x, yIndex[1], yIndex[1]+3)))
 
-    # keep years in range specified
-    analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
-    analysisDF <- analysisDF[analysisDF["Year"] <= endyr ,]
+#     # keep years in range specified
+#     analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
+#     analysisDF <- analysisDF[analysisDF["Year"] <= endyr ,]
 
-    # create empty field
-    dta@data["newfieldID"] <- 0
+#     # create empty field
+#     dta@data["newfieldID"] <- 0
 
-    # iterate over original dataframe
-    for (i in 1:length(dta)) {
-        # get id for row (in original data)
-        ID <- as.character(dta@data[IDfield][i,])
+#     # iterate over original dataframe
+#     for (i in 1:length(dta)) {
+#         # get id for row (in original data)
+#         ID <- as.character(dta@data[IDfield][i,])
 
-        # get all data corresponding to id from analysis dataframe
-        ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
+#         # get all data corresponding to id from analysis dataframe
+#         ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
 
-        dat_length <-length(ID_dat)
-        count_na <-sum(is.na(ID_dat[['value']])) 
-        count_non_na <- dat_length - count_na
-        percent_na <- count_na / dat_length
+#         dat_length <-length(ID_dat)
+#         count_na <-sum(is.na(ID_dat[['value']])) 
+#         count_non_na <- dat_length - count_na
+#         percent_na <- count_na / dat_length
 
-        # if number of NAs is over threshold or if less than 2 points of data are not NA, return NA
-        if (percent_na > thresh || count_non_na < 2) {
+#         # if number of NAs is over threshold or if less than 2 points of data are not NA, return NA
+#         if (percent_na > thresh || count_non_na < 2) {
             
-            dta@data["newfieldID"][i,] <- NA
+#             dta@data["newfieldID"][i,] <- NA
         
-        } else {
-            # fit trend model
-            trend_mod <- lm(value ~ Year, data=ID_dat, na.action = na.omit)
+#         } else {
+#             # fit trend model
+#             trend_mod <- lm(value ~ Year, data=ID_dat, na.action = na.omit)
       
-            # add trend coefficients to new field
-            dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
-        }
+#             # add trend coefficients to new field
+#             dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
+#         }
 
-    }
+#     }
     
-    # return new field with trend coefficients
-    return(dta[["newfieldID"]])
+#     # return new field with trend coefficients
+#     return(dta[["newfieldID"]])
     
-}
+# }
 
 
 

From e81c87e5835f813db804104e34d2d8cc413972d6 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:12:10 -0400
Subject: [PATCH 039/212] Fix bug.

---
 R/SpatialCausalPSM.R |  3 ++
 R/timeRangeTrend.R   | 87 +++++++++++++++++++++-----------------------
 2 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index e72e689..ea2eec3 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -54,3 +54,6 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
     retEle <- c()
     retEle$data <- retData
     retEle$model <- PSMfit
+
+    return(retEle)
+}
diff --git a/R/timeRangeTrend.R b/R/timeRangeTrend.R
index 0afd412..ac41eda 100644
--- a/R/timeRangeTrend.R
+++ b/R/timeRangeTrend.R
@@ -1,64 +1,61 @@
 # run linear model on data within year range as specified
 # by field prefix and return coefficients
-# timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
+timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
 
-#     # create new dataframe from all columns in dta dataframe that 
-#     # are either the ID or a year which is indicated by the prefix
-#     grep_str = paste(IDfield, prefix, sep="|")
-#     tDF <- dta@data[grepl(grep_str, names(dta@data))]
+    # create new dataframe from all columns in dta dataframe that 
+    # are either the ID or a year which is indicated by the prefix
+    grep_str = paste(IDfield, prefix, sep="|")
+    tDF <- dta@data[grepl(grep_str, names(dta@data))]
 
-#     # melt all years columns in new dataframe
-#     analysisDF <- melt(tDF, id=c(IDfield))
+    # melt all years columns in new dataframe
+    analysisDF <- melt(tDF, id=c(IDfield))
     
-#     # cleaned GREP - remove year digit placeholders
-#     # new_pre <- gsub("[0-9]", "", prefix, fixed=TRUE)
+    # cleaned GREP - remove year digit placeholders
+    # new_pre <- gsub("[0-9]", "", prefix, fixed=TRUE)
 
-#     # get location of year in prefix
-#     yIndex <- regexpr("[0-9]", prefix, fixed=TRUE)
+    # get location of year in prefix
+    yIndex <- regexpr("[0-9]", prefix, fixed=TRUE)
 
-#     # generate new year field by removing prefix from variable (original column names)
-#     # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
-#     analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(substr(x, yIndex[1], yIndex[1]+3)))
+    # generate new year field by removing prefix from variable (original column names)
+    # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
+    analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(substr(x, yIndex[1], yIndex[1]+3)))
 
-#     # keep years in range specified
-#     analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
-#     analysisDF <- analysisDF[analysisDF["Year"] <= endyr ,]
+    # keep years in range specified
+    analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
+    analysisDF <- analysisDF[analysisDF["Year"] <= endyr ,]
 
-#     # create empty field
-#     dta@data["newfieldID"] <- 0
+    # create empty field
+    dta@data["newfieldID"] <- 0
 
-#     # iterate over original dataframe
-#     for (i in 1:length(dta)) {
-#         # get id for row (in original data)
-#         ID <- as.character(dta@data[IDfield][i,])
+    # iterate over original dataframe
+    for (i in 1:length(dta)) {
+        # get id for row (in original data)
+        ID <- as.character(dta@data[IDfield][i,])
 
-#         # get all data corresponding to id from analysis dataframe
-#         ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
+        # get all data corresponding to id from analysis dataframe
+        ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
 
-#         dat_length <-length(ID_dat)
-#         count_na <-sum(is.na(ID_dat[['value']])) 
-#         count_non_na <- dat_length - count_na
-#         percent_na <- count_na / dat_length
+        dat_length <-length(ID_dat)
+        count_na <-sum(is.na(ID_dat[['value']])) 
+        count_non_na <- dat_length - count_na
+        percent_na <- count_na / dat_length
 
-#         # if number of NAs is over threshold or if less than 2 points of data are not NA, return NA
-#         if (percent_na > thresh || count_non_na < 2) {
+        # if number of NAs is over threshold or if less than 2 points of data are not NA, return NA
+        if (percent_na > thresh || count_non_na < 2) {
             
-#             dta@data["newfieldID"][i,] <- NA
+            dta@data["newfieldID"][i,] <- NA
         
-#         } else {
-#             # fit trend model
-#             trend_mod <- lm(value ~ Year, data=ID_dat, na.action = na.omit)
+        } else {
+            # fit trend model
+            trend_mod <- lm(value ~ Year, data=ID_dat, na.action = na.omit)
       
-#             # add trend coefficients to new field
-#             dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
-#         }
+            # add trend coefficients to new field
+            dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
+        }
 
-#     }
+    }
     
-#     # return new field with trend coefficients
-#     return(dta[["newfieldID"]])
+    # return new field with trend coefficients
+    return(dta[["newfieldID"]])
     
-# }
-
-
-
+}

From 1f036d12d8fc27335b410b424043e5c9f43144ce Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:14:01 -0400
Subject: [PATCH 040/212] Test.

---
 R/fastNN_binary_func.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index da3f19a..f7bdf67 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -78,6 +78,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         best_m_treated = k[["nn.index"]][best_m_control]
         
         #Control PSM ID
+        print(untreated[[ids]][[best_m_control]])
         Control_ID = toString(untreated[[ids]][[best_m_control]])    
 
         #Treatment PSM ID
@@ -88,7 +89,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         pair_id = paste(curgrp,j,sep="")
         
         print("nn4.0")
-        
+
         #Add the Treatment ID to the Control Row 
         dta@data$match[which(dta@data[[ids]] == Control_ID)] = Treatment_ID
         dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID)] = k[["nn.dist"]][,1][best_m_control]

From 6eb42dc8d7d924fd6e74d9d58ccd62049ae78216 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:23:52 -0400
Subject: [PATCH 041/212] Test.

---
 R/fastNN_binary_func.R | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index f7bdf67..67e9b3c 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -77,12 +77,20 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         #This will give us the matched index in the "treated" dataset.
         best_m_treated = k[["nn.index"]][best_m_control]
         
-        #Control PSM ID
-        print(untreated[[ids]][[best_m_control]])
-        Control_ID = toString(untreated[[ids]][[best_m_control]])    
+        # #Control PSM ID
+        # Control_ID = toString(untreated[[ids]][[best_m_control]])    
 
+        # #Treatment PSM ID
+        # Treatment_ID = toString(treated[[ids]][[best_m_treated]])
+   
+        #Control PSM ID
+        cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        Control_ID = toString(eval(parse(text=cid_txt)))
+            
         #Treatment PSM ID
-        Treatment_ID = toString(treated[[ids]][[best_m_treated]])
+        tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        Treatment_ID = toString(eval(parse(text=tid_txt)))
+
 
 
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)

From 9352e0b25dcf7d24aeb9760e576726ddb181b7cd Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:33:22 -0400
Subject: [PATCH 042/212] Test.

---
 R/BuildTimeSeries.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index f697687..ff3e8a4 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -52,6 +52,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     if (!is.null(interpYears)) {
         print("bts3.0")
         for (AncInt in 1:length(interpYears)) {
+            print(AncInt)
 
             print("bts3.0.0")
             cur_ancVi <- interpYears[AncInt]

From 622e914c33baf17e27af7640b585a27d039c4dca Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:34:44 -0400
Subject: [PATCH 043/212] Test.

---
 R/BuildTimeSeries.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index ff3e8a4..3df67f6 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -86,14 +86,14 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
             # Only one time point, so no interpolation is done - value is simply copied to all other columns.
             if (cnt == 3) {
-
+                print("bts3.0.2a")
                 for (k in 1:length(years)) {
                     # add _year to end of non temporal data
                     dta@data[[paste(cur_ancVi,years[[k]],sep="_")]] <- interpFrame[2]
                 }
 
             } else if (cnt < length(years) + 2) {
-
+                print("bts3.0.2b")
                 tDframe <- dta@data[idField]
 
                 # Here, we model out everything. 

From a06df241271b315dbcb276f334ab308f03c1ba64 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:35:29 -0400
Subject: [PATCH 044/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 3df67f6..0ca9c0e 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -52,7 +52,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     if (!is.null(interpYears)) {
         print("bts3.0")
         for (AncInt in 1:length(interpYears)) {
-            print(AncInt)
+            print(interpYears[AncInt])
 
             print("bts3.0.0")
             cur_ancVi <- interpYears[AncInt]

From bf2f2a2e8ca0a4dce04be8b779ae5c7bb7b72dc6 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:40:01 -0400
Subject: [PATCH 045/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 0ca9c0e..d9df0f0 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -12,7 +12,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         for (k in 1:length(years)) {
             for (j in 1:length(colYears)) {
 
-                varN <- paste("TrtMnt_",colYears[j],years[k], sep="")
+                varN <- paste("TrtMnt_",colYears[j],"_",years[k], sep="")
                 print(varN)
 
                 exec <- paste("dta$",varN,"=0", sep="")

From 2fa6bfb94d5f2a5074913ad43553eef26aa3ff84 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:41:28 -0400
Subject: [PATCH 046/212] Test.

---
 R/BuildTimeSeries.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index d9df0f0..5f3a519 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -93,7 +93,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 }
 
             } else if (cnt < length(years) + 2) {
-                print("bts3.0.2b")
+                print("bts3.0.2b0")
                 tDframe <- dta@data[idField]
 
                 # Here, we model out everything. 
@@ -104,11 +104,13 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 
 
                 # Fit the model for interpolation
+                print("bts3.0.2b1")
                 
                 execstr <- paste("mdl <- lm(value ~ variable + factor(",idField,"),data=melt_Model_dta)", sep="")
                 eval(parse(text=execstr))
                 # mdl <- lm(value ~ variable + factor(idField), data=melt_Model_dta)
 
+                print("bts3.0.2b2")
 
                 # Apply the model to interpolate
                 for (u in 1:length(years)) {

From 1595c8f5af93dbee4156b9c4e6385999e7c91f9c Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:42:52 -0400
Subject: [PATCH 047/212] Test.

---
 R/BuildTimeSeries.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 5f3a519..6ee4124 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -81,6 +81,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 }
             }
 
+            print(cnt)
+
             print("bts3.0.2")
             # this is a slow part
 

From a5ff1bf6864c0763f6ca26b137d9f41f2e9a193f Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:45:53 -0400
Subject: [PATCH 048/212] Test.

---
 R/BuildTimeSeries.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 6ee4124..4bda2de 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -28,6 +28,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     timer <- proc.time() - timer
     print(paste("section completed in", timer[3], "seconds."))
 
+    print(colnames(dta))
 
     print("bts2")
     timer <- proc.time()

From dd1bf12b52ed726a7db9fc2530be45f7106408fd Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:48:26 -0400
Subject: [PATCH 049/212] Test.

---
 R/BuildTimeSeries.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 4bda2de..9719492 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -15,7 +15,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 varN <- paste("TrtMnt_",colYears[j],"_",years[k], sep="")
                 print(varN)
 
-                exec <- paste("dta$",varN,"=0", sep="")
+                exec <- paste("dta$",varN," = 0", sep="")
                 eval(parse(text=exec))
                 # dta[,varN] = 0
 
@@ -28,7 +28,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     timer <- proc.time() - timer
     print(paste("section completed in", timer[3], "seconds."))
 
-    print(colnames(dta))
+    print(colnames(dta@data))
 
     print("bts2")
     timer <- proc.time()

From 0e8838b68ae2bae0711aa11a77eea9fa082d71b8 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:52:15 -0400
Subject: [PATCH 050/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 9719492..cdf8d1a 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -36,7 +36,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
     # add the "TrtMnt_" + colYears[j] prefix to interpYears
     for (j in 1:length(colYears)) {
-        trt_id = paste("TrtMnt_",colYears[j], sep="")
+        trt_id = paste("TrtMnt_",colYears[j],"_####", sep="")
         interpYears <- c(interpYears, trt_id)  
     }
 

From 70a5e48c45ff60a8acbffa40a590240e0e74ec88 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:56:31 -0400
Subject: [PATCH 051/212] Test.

---
 R/BuildTimeSeries.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index cdf8d1a..0289853 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -133,9 +133,10 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
         print("bts3.1")        
         # Append interpolated fields to our melting lists
-        for (v in 1:length(interpYears)) {
-            varList_pre[[length(varList_pre)+1]] <- interpYears[v]
-        }
+        varList_pre <- c(varList_pre, interpYears)
+        # for (v in 1:length(interpYears)) {
+        #     varList_pre[[length(varList_pre)+1]] <- interpYears[v]
+        # }
     
     }
   

From 1ec1c1c6251ae8def3a69e02eb9af306db7e0000 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 11:59:29 -0400
Subject: [PATCH 052/212] Test.

---
 R/BuildTimeSeries.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 0289853..ac5b34e 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -1,5 +1,7 @@
 BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colYears=NULL, interpYears=NULL) {
     
+    print(varList_pre)
+    
     years <- startYear:endYear
 
     print("bts1")

From 0d5bc8e37e18fa171b8abf5439ab2d6303493614 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 12:02:47 -0400
Subject: [PATCH 053/212] Test.

---
 R/BuildTimeSeries.R | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index ac5b34e..4f934a2 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -1,7 +1,7 @@
 BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colYears=NULL, interpYears=NULL) {
     
     print(varList_pre)
-    
+
     years <- startYear:endYear
 
     print("bts1")
@@ -154,7 +154,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     meltList <- list()
     for (i in 1:length(varList_pre)) {
 
-
+        print("bts4.0")
         # Limit to only relevant years
         grepStrYrs = idField
 
@@ -168,20 +168,29 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             }
         }
     
+        print("bts4.1")
 
 
         tDF <- dta@data[grepl(grepStrYrs, names(dta@data))]
         meltList[[i]] <- melt(tDF, id=idField)
         
+        print("bts4.2")
+
         # Keep only years in the year column, rename columns
         colnames(meltList[[i]])[2] <- "Year"
+
+        print("bts4.3")
+
         colnames(meltList[[i]])[3] <- varList_pre[[i]]
-        
+
+        print("bts4.4")
+
         # Clean up year column
         gsub_command <- paste("^",varList_pre[[i]],sep="")
         meltList[[i]][2] <- gsub(gsub_command, "", as.matrix(meltList[[i]][2]))
         
-    
+        print("bts4.5")
+
         # Remove ID and year if this is at least the second variable to avoid duplications.
         if (i > 1) {
             meltList[[i]] <- meltList[[i]][3]
@@ -192,6 +201,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     timer <- proc.time() - timer
     print(paste("section completed in", timer[3], "seconds."))
 
+    print("bts5")
 
     # Finish up with a cherry on top
     meltListRet <- data.frame(meltList)

From 8515cdf56daa59712580ff5ebc5c973f3926dbd6 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 12:09:53 -0400
Subject: [PATCH 054/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 4f934a2..885d899 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -175,7 +175,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         meltList[[i]] <- melt(tDF, id=idField)
         
         print("bts4.2")
-
+        print(colnames(meltList[[i]]))
         # Keep only years in the year column, rename columns
         colnames(meltList[[i]])[2] <- "Year"
 

From 4a0702d15371d7a8b36460c89e151c5b92f29392 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 12:11:33 -0400
Subject: [PATCH 055/212] Test.

---
 R/BuildTimeSeries.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 885d899..157ea7b 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -155,6 +155,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     for (i in 1:length(varList_pre)) {
 
         print("bts4.0")
+        print(varList_pre[[i]])
+
         # Limit to only relevant years
         grepStrYrs = idField
 

From 73c70278a8c0ab6449c74ee1d08a7c94d1141e6e Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 12:19:06 -0400
Subject: [PATCH 056/212] Test.

---
 R/BuildTimeSeries.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 157ea7b..136ce32 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -169,7 +169,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 grepStrYrs <- paste(tempGrep,"|",gsub('####', years[[j]], varList_pre[[i]]), sep="")
             }
         }
-    
+        print(grepStrYrs)
+
         print("bts4.1")
 
 

From df96e72f59c1c28bd5b104e65c3a9122b3012a88 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 12:20:54 -0400
Subject: [PATCH 057/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 136ce32..8fe40d9 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -172,7 +172,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         print(grepStrYrs)
 
         print("bts4.1")
-
+        print(names(dta@data))
 
         tDF <- dta@data[grepl(grepStrYrs, names(dta@data))]
         meltList[[i]] <- melt(tDF, id=idField)

From b9f605749f4ebb149fd5e519a16304772b4b96cd Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 12:30:35 -0400
Subject: [PATCH 058/212] Test.

---
 R/BuildTimeSeries.R | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 8fe40d9..7e26f2c 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -64,23 +64,25 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             cnt = 2
 
             print("bts3.0.1")
-            for (k in 1:length(years)) {
-                # First, build a model describing the relationship between years and any data in the interp field.
-
-                # Check if data exists for the year - if not, ignore.  If so, include in the new modeling frame.
-                varI <- gsub('####', years[[k]], cur_ancVi)
-                if (varI %in% colnames(dta@data)) {
-
-                    interpFrame[cnt] <- dta@data[[varI]]
-                    colnames(interpFrame)[cnt] <- years[[k]]
-                    cnt = cnt + 1
-
-                } else if (cur_ancVi %in% colnames(dta@data)) {
-
+            if (cur_ancVi %in% colnames(dta@data)) {
+                    print("IN!!")
                     # Exception for a single-point interpolation
                     interpFrame[cnt] <- dta@data[[cur_ancVi]]
                     cnt = 3
                     
+            } else {
+                for (k in 1:length(years)) {
+                    # First, build a model describing the relationship between years and any data in the interp field.
+
+                    # Check if data exists for the year - if not, ignore.  If so, include in the new modeling frame.
+                    varI <- gsub('####', years[[k]], cur_ancVi)
+                    if (varI %in% colnames(dta@data)) {
+
+                        interpFrame[cnt] <- dta@data[[varI]]
+                        colnames(interpFrame)[cnt] <- years[[k]]
+                        cnt = cnt + 1
+
+                    } 
                 }
             }
 

From 6e0788e6081d61ea0f0c3d5c381add87b2a1fde3 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 12:55:11 -0400
Subject: [PATCH 059/212] Test.

---
 R/BuildTimeSeries.R | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 7e26f2c..21264ae 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -1,9 +1,11 @@
 BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colYears=NULL, interpYears=NULL) {
     
-    print(varList_pre)
-
+    # generate year range 
     years <- startYear:endYear
 
+
+
+
     print("bts1")
     timer <- proc.time()
 
@@ -11,8 +13,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     # Eventually could be extended to more than one column.
     if (!is.null(colYears)) {
         # For each variable, for each year, create a binary representing the treatment status.
-        for (k in 1:length(years)) {
-            for (j in 1:length(colYears)) {
+        for (j in 1:length(colYears)) {
+            for (k in 1:length(years)) {
 
                 varN <- paste("TrtMnt_",colYears[j],"_",years[k], sep="")
                 print(varN)
@@ -21,20 +23,13 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 eval(parse(text=exec))
                 # dta[,varN] = 0
 
-
-                dta@data[varN][dta@data[colYears[j]] <= as.numeric(years[k])] <- 1
+                dta@data[varN][as.Date(dta@data[colYears[j]]) <= as.Date(years[k])] <- 1
             }
         }
     }
 
-    timer <- proc.time() - timer
-    print(paste("section completed in", timer[3], "seconds."))
-
     print(colnames(dta@data))
 
-    print("bts2")
-    timer <- proc.time()
-
 
     # add the "TrtMnt_" + colYears[j] prefix to interpYears
     for (j in 1:length(colYears)) {
@@ -44,8 +39,9 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
     print(interpYears)
 
-    timer <- proc.time() - timer
-    print(paste("section completed in", timer[3], "seconds."))
+
+
+
 
 
     print("bts3")
@@ -53,11 +49,13 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     
     # If there is an "interpVars" variable, linearly interpolate values based on at least 2 known points in time.
     if (!is.null(interpYears)) {
+
         print("bts3.0")
+
         for (AncInt in 1:length(interpYears)) {
+
             print(interpYears[AncInt])
 
-            print("bts3.0.0")
             cur_ancVi <- interpYears[AncInt]
             interpFrame <- dta@data[idField]
             interpFrame[idField] <- dta@data[idField]
@@ -65,7 +63,6 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
             print("bts3.0.1")
             if (cur_ancVi %in% colnames(dta@data)) {
-                    print("IN!!")
                     # Exception for a single-point interpolation
                     interpFrame[cnt] <- dta@data[[cur_ancVi]]
                     cnt = 3
@@ -88,6 +85,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
             print(cnt)
 
+
             print("bts3.0.2")
             # this is a slow part
 
@@ -138,17 +136,18 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         print("bts3.1")        
         # Append interpolated fields to our melting lists
         varList_pre <- c(varList_pre, interpYears)
-        # for (v in 1:length(interpYears)) {
-        #     varList_pre[[length(varList_pre)+1]] <- interpYears[v]
-        # }
     
     }
   
+    print(varList_pre)
+
     timer <- proc.time() - timer
     print(paste("section completed in", timer[3], "seconds."))
 
 
 
+
+
     print("bts4")
     timer <- proc.time()
 
@@ -206,6 +205,10 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     timer <- proc.time() - timer
     print(paste("section completed in", timer[3], "seconds."))
 
+
+
+
+
     print("bts5")
 
     # Finish up with a cherry on top

From b73c000a010ad862869a72239c687c5c2ceb0b18 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 12:57:36 -0400
Subject: [PATCH 060/212] Test.

---
 R/BuildTimeSeries.R | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 21264ae..7d4f10e 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -14,6 +14,9 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     if (!is.null(colYears)) {
         # For each variable, for each year, create a binary representing the treatment status.
         for (j in 1:length(colYears)) {
+
+            dta@data[colYears[j]] <- lapply(dta@data[colYears[j]], as.Date)
+
             for (k in 1:length(years)) {
 
                 varN <- paste("TrtMnt_",colYears[j],"_",years[k], sep="")
@@ -23,7 +26,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 eval(parse(text=exec))
                 # dta[,varN] = 0
 
-                dta@data[varN][as.Date(dta@data[colYears[j]]) <= as.Date(years[k])] <- 1
+
+                dta@data[varN][dta@data[colYears[j]] <= as.Date(years[k])] <- 1
             }
         }
     }

From 37642800984a281a491097ef94a52c7a47963a00 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 13:01:50 -0400
Subject: [PATCH 061/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 7d4f10e..9832437 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -27,7 +27,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 # dta[,varN] = 0
 
 
-                dta@data[varN][dta@data[colYears[j]] <= as.Date(years[k])] <- 1
+                dta@data[varN][dta@data[colYears[j]] <= years[k]] <- 1
             }
         }
     }

From 9840b3b921c3f97a5c1da3627ce35a4bad2adb46 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 13:07:19 -0400
Subject: [PATCH 062/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 9832437..e9d4e11 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -27,7 +27,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 # dta[,varN] = 0
 
 
-                dta@data[varN][dta@data[colYears[j]] <= years[k]] <- 1
+                dta@data[varN][dta@data[colYears[j]] <= as.Date(paste(years[k],"01","01", sep="-")] <- 1
             }
         }
     }

From f96c62d1b2bff01d98fbf58fa18a0b332318c908 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 13:07:51 -0400
Subject: [PATCH 063/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index e9d4e11..79256f1 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -27,7 +27,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 # dta[,varN] = 0
 
 
-                dta@data[varN][dta@data[colYears[j]] <= as.Date(paste(years[k],"01","01", sep="-")] <- 1
+                dta@data[varN][dta@data[colYears[j]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
             }
         }
     }

From a8d737fbdc20c38a7dfb48293f1f6f7fad2566dc Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 13:20:09 -0400
Subject: [PATCH 064/212] Test.

---
 R/BuildTimeSeries.R | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 79256f1..320d346 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -22,12 +22,13 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 varN <- paste("TrtMnt_",colYears[j],"_",years[k], sep="")
                 print(varN)
 
-                exec <- paste("dta$",varN," = 0", sep="")
-                eval(parse(text=exec))
-                # dta[,varN] = 0
+                # exec <- paste("dta$",varN," = 0", sep="")
+                # eval(parse(text=exec))
 
+                dta@data[[varN]] <- 0
 
-                dta@data[varN][dta@data[colYears[j]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
+
+                dta@data[[varN]][dta@data[colYears[j]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
             }
         }
     }

From 68ca7a5cf7cd8c5d34a5ac96edf3b62487272dcb Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 13:23:35 -0400
Subject: [PATCH 065/212] Test.

---
 R/BuildTimeSeries.R | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 320d346..263561f 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -25,10 +25,16 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 # exec <- paste("dta$",varN," = 0", sep="")
                 # eval(parse(text=exec))
 
-                dta@data[[varN]] <- 0
-
+                dta@data[[varN]] <- lapply(dta@data[colYears[j]], function (cYear) {
+                    if (cYear <= as.Date(paste(years[k],"01","01", sep="-"))) {
+                        return(1)
+                    } else {
+                        return(0)
+                    }
+                })
 
-                dta@data[[varN]][dta@data[colYears[j]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
+                # dta@data[[varN]] <- 0
+                # dta@data[[varN]][dta@data[colYears[j]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
             }
         }
     }

From 296caa8493c112370b74bce3ec26a31b4fa62f02 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 13:25:52 -0400
Subject: [PATCH 066/212] Test.

---
 R/BuildTimeSeries.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 263561f..b599725 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -39,6 +39,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         }
     }
 
+    return dta
+    
     print(colnames(dta@data))
 
 

From fd97e1b0d0d1d58946049bfe48781357200d11a7 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 13:26:12 -0400
Subject: [PATCH 067/212] Test.

---
 R/BuildTimeSeries.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index b599725..ec6a68f 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -39,8 +39,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         }
     }
 
-    return dta
-    
+    return(dta)
+
     print(colnames(dta@data))
 
 

From 0f36d656d494c099ccc36185b093db8d5c913207 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 13:32:35 -0400
Subject: [PATCH 068/212] Test.

---
 R/BuildTimeSeries.R | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index ec6a68f..ae93f7b 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -15,7 +15,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         # For each variable, for each year, create a binary representing the treatment status.
         for (j in 1:length(colYears)) {
 
-            dta@data[colYears[j]] <- lapply(dta@data[colYears[j]], as.Date)
+            # dta@data[colYears[j]] <- lapply(dta@data[colYears[j]], as.Date)
 
             for (k in 1:length(years)) {
 
@@ -25,16 +25,17 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 # exec <- paste("dta$",varN," = 0", sep="")
                 # eval(parse(text=exec))
 
-                dta@data[[varN]] <- lapply(dta@data[colYears[j]], function (cYear) {
-                    if (cYear <= as.Date(paste(years[k],"01","01", sep="-"))) {
-                        return(1)
-                    } else {
-                        return(0)
-                    }
-                })
+                # dta@data[[varN]] <- lapply(dta@data[colYears[j]], function (colYear_date) {
+                #     if (colYear_date <= as.Date(paste(years[k],"01","01", sep="-"))) {
+                #         return(1)
+                #     } else {
+                #         return(0)
+                #     }
+                # })
+
+                dta@data[[varN]] <- 0
+                dta@data[[varN]][dta@data[[colYears[j]]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
 
-                # dta@data[[varN]] <- 0
-                # dta@data[[varN]][dta@data[colYears[j]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
             }
         }
     }

From 2627231ecb689c3d0b8a6420d61035c845e037f2 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 13:55:53 -0400
Subject: [PATCH 069/212] Test.

---
 R/BuildTimeSeries.R | 91 +++++++++++++++++++--------------------------
 1 file changed, 38 insertions(+), 53 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index ae93f7b..59e5e5b 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -5,9 +5,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
 
 
-
     print("bts1")
-    timer <- proc.time()
 
     # If there is a "colYears" variable, convert to binaries.
     # Eventually could be extended to more than one column.
@@ -15,40 +13,20 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         # For each variable, for each year, create a binary representing the treatment status.
         for (j in 1:length(colYears)) {
 
-            # dta@data[colYears[j]] <- lapply(dta@data[colYears[j]], as.Date)
-
             for (k in 1:length(years)) {
 
-                varN <- paste("TrtMnt_",colYears[j],"_",years[k], sep="")
+                varN = paste("TrtMnt_",colYears[j],"_",years[k], sep="")
                 print(varN)
 
-                # exec <- paste("dta$",varN," = 0", sep="")
-                # eval(parse(text=exec))
-
-                # dta@data[[varN]] <- lapply(dta@data[colYears[j]], function (colYear_date) {
-                #     if (colYear_date <= as.Date(paste(years[k],"01","01", sep="-"))) {
-                #         return(1)
-                #     } else {
-                #         return(0)
-                #     }
-                # })
-
                 dta@data[[varN]] <- 0
                 dta@data[[varN]][dta@data[[colYears[j]]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
 
             }
-        }
-    }
-
-    return(dta)
 
-    print(colnames(dta@data))
-
-
-    # add the "TrtMnt_" + colYears[j] prefix to interpYears
-    for (j in 1:length(colYears)) {
-        trt_id = paste("TrtMnt_",colYears[j],"_####", sep="")
-        interpYears <- c(interpYears, trt_id)  
+            # add the "TrtMnt_" + colYears[j] prefix to interpYears
+            trt_id = paste("TrtMnt_",colYears[j],"_####", sep="")
+            interpYears <- c(interpYears, trt_id)    
+        }
     }
 
     print(interpYears)
@@ -58,24 +36,29 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
 
 
-    print("bts3")
+    print("bts2")
     timer <- proc.time()
     
     # If there is an "interpVars" variable, linearly interpolate values based on at least 2 known points in time.
     if (!is.null(interpYears)) {
 
-        print("bts3.0")
-
         for (AncInt in 1:length(interpYears)) {
 
             print(interpYears[AncInt])
 
+
             cur_ancVi <- interpYears[AncInt]
+
+            # create interpolation data frame and add id field
             interpFrame <- dta@data[idField]
             interpFrame[idField] <- dta@data[idField]
+            
             cnt = 2
 
-            print("bts3.0.1")
+
+            print("bts2.0.1")
+
+            # add data to interpolation data frame
             if (cur_ancVi %in% colnames(dta@data)) {
                     # Exception for a single-point interpolation
                     interpFrame[cnt] <- dta@data[[cur_ancVi]]
@@ -93,45 +76,47 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                         colnames(interpFrame)[cnt] <- years[[k]]
                         cnt = cnt + 1
 
-                    } 
+                    }
                 }
             }
 
             print(cnt)
 
 
-            print("bts3.0.2")
-            # this is a slow part
+            print("bts2.0.2")
 
-            # Only one time point, so no interpolation is done - value is simply copied to all other columns.
             if (cnt == 3) {
-                print("bts3.0.2a")
+                # only one time point, so no interpolation is done - value is simply copied to all other columns.
+
+                print("bts2.0.2a")
                 for (k in 1:length(years)) {
                     # add _year to end of non temporal data
                     dta@data[[paste(cur_ancVi,years[[k]],sep="_")]] <- interpFrame[2]
                 }
 
             } else if (cnt < length(years) + 2) {
-                print("bts3.0.2b0")
-                tDframe <- dta@data[idField]
-
-                # Here, we model out everything. 
+                # run model if data exists for at least two years but not for all years
+                print("bts2.0.2b0")
 
                 # Melt the dataframe for modeling
                 melt_Model_dta <- melt(data.frame(interpFrame), id=idField)
                 melt_Model_dta["variable"] <- as.numeric(gsub("X", "", melt_Model_dta[["variable"]]))
                 
 
+                print("bts2.0.2b1")
+
                 # Fit the model for interpolation
-                print("bts3.0.2b1")
-                
+                # this is a slow part
                 execstr <- paste("mdl <- lm(value ~ variable + factor(",idField,"),data=melt_Model_dta)", sep="")
                 eval(parse(text=execstr))
                 # mdl <- lm(value ~ variable + factor(idField), data=melt_Model_dta)
 
-                print("bts3.0.2b2")
+
+                print("bts2.0.2b2")
 
                 # Apply the model to interpolate
+                tDframe <- dta@data[idField]
+
                 for (u in 1:length(years)) {
 
                     varI <- gsub('####', years[[u]], cur_ancVi)
@@ -147,7 +132,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
         }
 
-        print("bts3.1")        
+        print("bts2.1")        
         # Append interpolated fields to our melting lists
         varList_pre <- c(varList_pre, interpYears)
     
@@ -159,17 +144,17 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     print(paste("section completed in", timer[3], "seconds."))
 
 
+    return(dta)
 
 
-
-    print("bts4")
+    print("bts3")
     timer <- proc.time()
 
     # Run the melts
     meltList <- list()
     for (i in 1:length(varList_pre)) {
 
-        print("bts4.0")
+        print("bts3.0")
         print(varList_pre[[i]])
 
         # Limit to only relevant years
@@ -186,28 +171,28 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         }
         print(grepStrYrs)
 
-        print("bts4.1")
+        print("bts3.1")
         print(names(dta@data))
 
         tDF <- dta@data[grepl(grepStrYrs, names(dta@data))]
         meltList[[i]] <- melt(tDF, id=idField)
         
-        print("bts4.2")
+        print("bts3.2")
         print(colnames(meltList[[i]]))
         # Keep only years in the year column, rename columns
         colnames(meltList[[i]])[2] <- "Year"
 
-        print("bts4.3")
+        print("bts3.3")
 
         colnames(meltList[[i]])[3] <- varList_pre[[i]]
 
-        print("bts4.4")
+        print("bts3.4")
 
         # Clean up year column
         gsub_command <- paste("^",varList_pre[[i]],sep="")
         meltList[[i]][2] <- gsub(gsub_command, "", as.matrix(meltList[[i]][2]))
         
-        print("bts4.5")
+        print("bts3.5")
 
         # Remove ID and year if this is at least the second variable to avoid duplications.
         if (i > 1) {
@@ -223,7 +208,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
 
 
-    print("bts5")
+    print("bts4")
 
     # Finish up with a cherry on top
     meltListRet <- data.frame(meltList)

From f0b68900f53b0697c981c39f4705eae8df8b501f Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 14:23:02 -0400
Subject: [PATCH 070/212] Test.

---
 R/BuildTimeSeries.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 59e5e5b..cdf4ff0 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -61,7 +61,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             # add data to interpolation data frame
             if (cur_ancVi %in% colnames(dta@data)) {
                     # Exception for a single-point interpolation
-                    interpFrame[cnt] <- dta@data[[cur_ancVi]]
+                    interpFrame[[cnt]] <- dta@data[[cur_ancVi]]
                     cnt = 3
                     
             } else {
@@ -72,7 +72,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                     varI <- gsub('####', years[[k]], cur_ancVi)
                     if (varI %in% colnames(dta@data)) {
 
-                        interpFrame[cnt] <- dta@data[[varI]]
+                        interpFrame[[cnt]] <- dta@data[[varI]]
                         colnames(interpFrame)[cnt] <- years[[k]]
                         cnt = cnt + 1
 

From 700d531a899b599c7acfc9a55274cc5dd560f324 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 15:16:11 -0400
Subject: [PATCH 071/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index cdf4ff0..fae07ab 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -91,7 +91,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 print("bts2.0.2a")
                 for (k in 1:length(years)) {
                     # add _year to end of non temporal data
-                    dta@data[[paste(cur_ancVi,years[[k]],sep="_")]] <- interpFrame[2]
+                    dta@data[,paste(cur_ancVi,years[[k]],sep="_")] <- interpFrame[2]
                 }
 
             } else if (cnt < length(years) + 2) {

From 7aba41a38540ec289f0e0e873d8f7582dc0fa631 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 15:29:01 -0400
Subject: [PATCH 072/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index fae07ab..fb0815a 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -91,7 +91,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 print("bts2.0.2a")
                 for (k in 1:length(years)) {
                     # add _year to end of non temporal data
-                    dta@data[,paste(cur_ancVi,years[[k]],sep="_")] <- interpFrame[2]
+                    dta@data[paste(cur_ancVi,years[[k]],sep="_")] <- interpFrame[2]
                 }
 
             } else if (cnt < length(years) + 2) {

From 8baad95d5a040dfbf9c6ee229c8153326a0c3093 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 15:30:28 -0400
Subject: [PATCH 073/212] Test.

---
 R/BuildTimeSeries.R | 2 --
 1 file changed, 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index fb0815a..06c38d2 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -144,8 +144,6 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     print(paste("section completed in", timer[3], "seconds."))
 
 
-    return(dta)
-
 
     print("bts3")
     timer <- proc.time()

From 4a148fbbc0ef729a5cab2ffd58c60a66ab807b12 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 15:35:04 -0400
Subject: [PATCH 074/212] Test.

---
 R/BuildTimeSeries.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 06c38d2..fb0815a 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -144,6 +144,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     print(paste("section completed in", timer[3], "seconds."))
 
 
+    return(dta)
+
 
     print("bts3")
     timer <- proc.time()

From ef5a7f3632940406df6f6fbdefcda3109636c6ca Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 15:40:44 -0400
Subject: [PATCH 075/212] Test.

---
 R/BuildTimeSeries.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index fb0815a..7a3c21a 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -144,7 +144,6 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     print(paste("section completed in", timer[3], "seconds."))
 
 
-    return(dta)
 
 
     print("bts3")
@@ -172,13 +171,13 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         print(grepStrYrs)
 
         print("bts3.1")
-        print(names(dta@data))
+        # print(names(dta@data))
 
         tDF <- dta@data[grepl(grepStrYrs, names(dta@data))]
         meltList[[i]] <- melt(tDF, id=idField)
         
         print("bts3.2")
-        print(colnames(meltList[[i]]))
+        # print(colnames(meltList[[i]]))
         # Keep only years in the year column, rename columns
         colnames(meltList[[i]])[2] <- "Year"
 

From 9cc855b8246951cb6edb414ae08fccc0ebd8c5eb Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 15:52:31 -0400
Subject: [PATCH 076/212] Test.

---
 R/BuildTimeSeries.R | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 7a3c21a..0c08971 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -188,9 +188,16 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         print("bts3.4")
 
         # Clean up year column
-        gsub_command <- paste("^",varList_pre[[i]],sep="")
-        meltList[[i]][2] <- gsub(gsub_command, "", as.matrix(meltList[[i]][2]))
+        # gsub_command <- paste("^",varList_pre[[i]],sep="")
+        # meltList[[i]][2] <- gsub(gsub_command, "", as.matrix(meltList[[i]][2]))
         
+        if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
+            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {return substr(z,regexpr("####", varList_pre[[i]], fixed=TRUE)[1],nchar("####")})
+        } else {
+            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {return substr(z,nchar(z)-nchar("####")+1,nchar(z)})
+        }
+
+
         print("bts3.5")
 
         # Remove ID and year if this is at least the second variable to avoid duplications.

From 3957e713892378698a02e8508404c68bb2e23b88 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 15:53:15 -0400
Subject: [PATCH 077/212] Test.

---
 R/BuildTimeSeries.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 0c08971..ec0367f 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -192,9 +192,9 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         # meltList[[i]][2] <- gsub(gsub_command, "", as.matrix(meltList[[i]][2]))
         
         if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
-            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {return substr(z,regexpr("####", varList_pre[[i]], fixed=TRUE)[1],nchar("####")})
+            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {return substr(z,regexpr("####", varList_pre[[i]], fixed=TRUE)[1],nchar("####"))})
         } else {
-            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {return substr(z,nchar(z)-nchar("####")+1,nchar(z)})
+            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {return substr(z,nchar(z)-nchar("####")+1,nchar(z))})
         }
 
 

From ced445ee592be1a777280ff10fe1d4441f409271 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 15:54:47 -0400
Subject: [PATCH 078/212] Test.

---
 R/BuildTimeSeries.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index ec0367f..632fdbe 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -192,9 +192,11 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         # meltList[[i]][2] <- gsub(gsub_command, "", as.matrix(meltList[[i]][2]))
         
         if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
-            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {return substr(z,regexpr("####", varList_pre[[i]], fixed=TRUE)[1],nchar("####"))})
-        } else {
             meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {return substr(z,nchar(z)-nchar("####")+1,nchar(z))})
+
+        } else {
+            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {return substr(z,regexpr("####", varList_pre[[i]], fixed=TRUE)[1],nchar("####"))})
+
         }
 
 

From c4f322b4ade90b16e06c097b30afbbb5a79cec62 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 15:56:18 -0400
Subject: [PATCH 079/212] Test.

---
 R/BuildTimeSeries.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 632fdbe..27670c2 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -192,10 +192,12 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         # meltList[[i]][2] <- gsub(gsub_command, "", as.matrix(meltList[[i]][2]))
         
         if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
-            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {return substr(z,nchar(z)-nchar("####")+1,nchar(z))})
+            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
+                return(substr(z,nchar(z)-nchar("####")+1,nchar(z)))
+                })
 
         } else {
-            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {return substr(z,regexpr("####", varList_pre[[i]], fixed=TRUE)[1],nchar("####"))})
+            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {return(substr(z,regexpr("####", varList_pre[[i]], fixed=TRUE)[1],nchar("####")))})
 
         }
 

From c9e154af5d68d8581595b68cd1b2c003e14e10c0 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 15:58:05 -0400
Subject: [PATCH 080/212] Test.

---
 R/BuildTimeSeries.R | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 27670c2..a137837 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -193,11 +193,15 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         
         if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
             meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
-                return(substr(z,nchar(z)-nchar("####")+1,nchar(z)))
-                })
+                print(z)
+                return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
+            })
 
         } else {
-            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {return(substr(z,regexpr("####", varList_pre[[i]], fixed=TRUE)[1],nchar("####")))})
+            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
+                print(z)
+                return(substr(z, regexpr("####", varList_pre[[i]], fixed=TRUE)[1], nchar("####")))
+            })
 
         }
 

From ce55dea03b433e736642c0f5fb2fab503cd4b5b4 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 16:02:41 -0400
Subject: [PATCH 081/212] Test.

---
 R/BuildTimeSeries.R | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index a137837..618c0e4 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -168,6 +168,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 grepStrYrs <- paste(tempGrep,"|",gsub('####', years[[j]], varList_pre[[i]]), sep="")
             }
         }
+
         print(grepStrYrs)
 
         print("bts3.1")
@@ -193,13 +194,15 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         
         if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
             meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
-                print(z)
+                # print(z)
+                z = toString(z)
                 return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
             })
 
         } else {
             meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
-                print(z)
+                # print(z)
+                z = toString(z)
                 return(substr(z, regexpr("####", varList_pre[[i]], fixed=TRUE)[1], nchar("####")))
             })
 

From cf33f9ab4d28a9c61771e5955e157bf0c4fc7ef8 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 16:07:40 -0400
Subject: [PATCH 082/212] Test.

---
 R/BuildTimeSeries.R | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 618c0e4..b3e1870 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -192,21 +192,21 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         # gsub_command <- paste("^",varList_pre[[i]],sep="")
         # meltList[[i]][2] <- gsub(gsub_command, "", as.matrix(meltList[[i]][2]))
         
-        if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
-            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
-                # print(z)
-                z = toString(z)
-                return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
-            })
-
-        } else {
-            meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
-                # print(z)
-                z = toString(z)
-                return(substr(z, regexpr("####", varList_pre[[i]], fixed=TRUE)[1], nchar("####")))
-            })
-
-        }
+        # if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
+        #     meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
+        #         # print(z)
+        #         z = toString(z)
+        #         return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
+        #     })
+
+        # } else {
+        #     meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
+        #         # print(z)
+        #         z = toString(z)
+        #         return(substr(z, regexpr("####", varList_pre[[i]], fixed=TRUE)[1], nchar("####")))
+        #     })
+
+        # }
 
 
         print("bts3.5")

From e49d4fccbac73c27e63ff6195e36324462420c3e Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 16:13:36 -0400
Subject: [PATCH 083/212] Test.

---
 R/BuildTimeSeries.R | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index b3e1870..2733ca7 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -192,28 +192,29 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         # gsub_command <- paste("^",varList_pre[[i]],sep="")
         # meltList[[i]][2] <- gsub(gsub_command, "", as.matrix(meltList[[i]][2]))
         
-        # if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
-        #     meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
-        #         # print(z)
-        #         z = toString(z)
-        #         return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
-        #     })
-
-        # } else {
-        #     meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
-        #         # print(z)
-        #         z = toString(z)
-        #         return(substr(z, regexpr("####", varList_pre[[i]], fixed=TRUE)[1], nchar("####")))
-        #     })
-
-        # }
-
 
         print("bts3.5")
 
-        # Remove ID and year if this is at least the second variable to avoid duplications.
         if (i > 1) {
+            # Remove ID and year after first pass to avoid duplications
             meltList[[i]] <- meltList[[i]][3]
+
+        } else {
+            # for first pass format year 
+
+            # format year
+            meltList[[i]][2] <- lapply(meltList[[i]][2], as.character)
+            if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
+                meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
+                    return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
+                })
+
+            } else {
+                meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
+                    return(substr(z, regexpr("####", varList_pre[[i]], fixed=TRUE)[1], nchar("####")))
+                })
+
+            }
         }
 
     }

From 2b03b4c0e6c9f8f566e87c0991259271b1069a7e Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 16:14:51 -0400
Subject: [PATCH 084/212] Test.

---
 R/BuildTimeSeries.R | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 2733ca7..3a88fa1 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -204,17 +204,17 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
             # format year
             meltList[[i]][2] <- lapply(meltList[[i]][2], as.character)
-            if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
-                meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
-                    return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
-                })
+            # if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
+            #     meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
+            #         return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
+            #     })
 
-            } else {
-                meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
-                    return(substr(z, regexpr("####", varList_pre[[i]], fixed=TRUE)[1], nchar("####")))
-                })
+            # } else {
+            #     meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
+            #         return(substr(z, regexpr("####", varList_pre[[i]], fixed=TRUE)[1], nchar("####")))
+            #     })
 
-            }
+            # }
         }
 
     }

From f91c1eb1fd3853135b307c027486dea9d7df1b8a Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 16:17:29 -0400
Subject: [PATCH 085/212] Test.

---
 R/BuildTimeSeries.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 3a88fa1..ee4a61b 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -196,14 +196,14 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         print("bts3.5")
 
         if (i > 1) {
-            # Remove ID and year after first pass to avoid duplications
+            # remove id and year after first pass to avoid duplications
             meltList[[i]] <- meltList[[i]][3]
 
-        } else {
+        } #else {
             # for first pass format year 
 
-            # format year
-            meltList[[i]][2] <- lapply(meltList[[i]][2], as.character)
+            # meltList[[i]][2] <- lapply(meltList[[i]][2], as.character)
+
             # if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
             #     meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
             #         return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
@@ -215,7 +215,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             #     })
 
             # }
-        }
+        # }
 
     }
 

From 2766df74f7d20b6c0efc4174f59c9a5b1db57b2f Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 16:25:10 -0400
Subject: [PATCH 086/212] Test.

---
 R/BuildTimeSeries.R | 43 +++++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index ee4a61b..4e2897a 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -195,27 +195,15 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
         print("bts3.5")
 
-        if (i > 1) {
+        if (i == 1) {
+            # set field to use for regex when formatting year field later
+            year_regex_field <- varList_pre[[i]]
+
+        } else {
             # remove id and year after first pass to avoid duplications
             meltList[[i]] <- meltList[[i]][3]
 
-        } #else {
-            # for first pass format year 
-
-            # meltList[[i]][2] <- lapply(meltList[[i]][2], as.character)
-
-            # if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
-            #     meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
-            #         return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
-            #     })
-
-            # } else {
-            #     meltList[[i]][2] <- lapply(meltList[[i]][2], function (z) {
-            #         return(substr(z, regexpr("####", varList_pre[[i]], fixed=TRUE)[1], nchar("####")))
-            #     })
-
-            # }
-        # }
+        }
 
     }
 
@@ -228,9 +216,24 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
     print("bts4")
 
-    # Finish up with a cherry on top
+    # convert meltList to data frame
     meltListRet <- data.frame(meltList)
-  
+
+    # format year 
+    meltListRet['Year'] <- lapply(meltListRet['Year'], as.character)
+
+    if (regexpr("####", year_regex_field, fixed=TRUE)[1] == -1) {
+        meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
+            return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
+        })
+
+    } else {
+        meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
+            return(substr(z, regexpr("####", year_regex_field, fixed=TRUE)[1], nchar("####")))
+        })
+
+    }
+
     return(meltListRet)
 }
 

From fd96062b919054d55ee98bf1ad8d7b49dea74dd4 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 16:26:54 -0400
Subject: [PATCH 087/212] Test.

---
 R/BuildTimeSeries.R | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 4e2897a..c24deec 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -219,20 +219,20 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     # convert meltList to data frame
     meltListRet <- data.frame(meltList)
 
-    # format year 
-    meltListRet['Year'] <- lapply(meltListRet['Year'], as.character)
+    # # format year 
+    # meltListRet['Year'] <- lapply(meltListRet['Year'], as.character)
 
-    if (regexpr("####", year_regex_field, fixed=TRUE)[1] == -1) {
-        meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
-            return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
-        })
+    # if (regexpr("####", year_regex_field, fixed=TRUE)[1] == -1) {
+    #     meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
+    #         return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
+    #     })
 
-    } else {
-        meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
-            return(substr(z, regexpr("####", year_regex_field, fixed=TRUE)[1], nchar("####")))
-        })
+    # } else {
+    #     meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
+    #         return(substr(z, regexpr("####", year_regex_field, fixed=TRUE)[1], nchar("####")))
+    #     })
 
-    }
+    # }
 
     return(meltListRet)
 }

From 9f4c52d4bf745b7891df776265c2b0bbd3d3ae3d Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 16:37:30 -0400
Subject: [PATCH 088/212] Test.

---
 R/BuildTimeSeries.R | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index c24deec..c23c6d8 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -213,26 +213,23 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
 
 
-
     print("bts4")
 
     # convert meltList to data frame
     meltListRet <- data.frame(meltList)
 
-    # # format year 
-    # meltListRet['Year'] <- lapply(meltListRet['Year'], as.character)
-
-    # if (regexpr("####", year_regex_field, fixed=TRUE)[1] == -1) {
-    #     meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
-    #         return(substr(z, nchar(z)-nchar("####")+1, nchar(z)))
-    #     })
-
-    # } else {
-    #     meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
-    #         return(substr(z, regexpr("####", year_regex_field, fixed=TRUE)[1], nchar("####")))
-    #     })
-
-    # }
+    # format year
+    regex_test <-regexpr("####", year_regex_field, fixed=TRUE)[1]
+    if (regex_test > -1) {
+        meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
+            return(as.numeric(substr(z, regex_test, regex_test+nchar("####")-1)))
+        })
+
+    } else {
+        meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
+            return(as.numeric(substr(z, nchar(z)-nchar("####")+1, nchar(z))))
+        })
+    }
 
     return(meltListRet)
 }

From b2ecccaf02b49736a7f8d9223c0d052811a240e2 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 16:44:13 -0400
Subject: [PATCH 089/212] Test.

---
 R/BuildTimeSeries.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index c23c6d8..8d90d63 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -219,7 +219,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     meltListRet <- data.frame(meltList)
 
     # format year
-    regex_test <-regexpr("####", year_regex_field, fixed=TRUE)[1]
+    regex_test <- regexpr("####", year_regex_field, fixed=TRUE)[1]
     if (regex_test > -1) {
         meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
             return(as.numeric(substr(z, regex_test, regex_test+nchar("####")-1)))
@@ -231,6 +231,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         })
     }
 
+    colnames(meltListRet) <- lapply(colnames(meltListRet), function (z) {return gsub("....", "#####", z, fixed=TRUE)})
+
     return(meltListRet)
 }
 

From 705aba76120040fb18c6bba89452e56ffef0353b Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 16:45:07 -0400
Subject: [PATCH 090/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 8d90d63..2c7dfa5 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -231,7 +231,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         })
     }
 
-    colnames(meltListRet) <- lapply(colnames(meltListRet), function (z) {return gsub("....", "#####", z, fixed=TRUE)})
+    colnames(meltListRet) <- lapply(colnames(meltListRet), function (z) {return(gsub("....", "#####", z, fixed=TRUE))})
 
     return(meltListRet)
 }

From d904678a97a77621dc9d5c58a545cbef2275aee1 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 16:45:55 -0400
Subject: [PATCH 091/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 2c7dfa5..fe7afcf 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -231,7 +231,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         })
     }
 
-    colnames(meltListRet) <- lapply(colnames(meltListRet), function (z) {return(gsub("....", "#####", z, fixed=TRUE))})
+    colnames(meltListRet) <- lapply(colnames(meltListRet), function (z) {return(gsub("....", "####", z, fixed=TRUE))})
 
     return(meltListRet)
 }

From d6536bffb33833b7201943ae0408fd6afa8c2560 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 16:50:58 -0400
Subject: [PATCH 092/212] Test.

---
 R/Stage2PSM.R | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index a9567ba..bb64480 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -41,13 +41,17 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
     }
   
     if (type == "cmreg") {
+
+        print("c1")
         m_fit <- lm(model,dta)
         ret_var[["unstandardized"]] <- m_fit
         #mTab <- stargazer(m_fit,type="html",title="Unstandardized Model Results")
         #print.htmlTable(mTab)
         print(summary(m_fit))
         texreg::plotreg(m_fit,omit.coef="(match)|(Intercept)|(factor)",custom.model.names="Unstandardized Model",custom.note=model)
-        
+
+        print("c2")
+
         if (!is.null(table_out)) {
             dta_tmp <- dta
           
@@ -65,6 +69,8 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
           
         }
         
+        print("c3")
+    
         print(opts)
         exec = paste("cluster.vcov(m_fit,cbind(dta$",opts[1],",dta$",opts[2],"))",sep="")
         m_fit[["var"]] <- eval(parse(text=exec))

From bbd6e0fad6dc582a79e4f22c476d1d5576a4ba93 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 16:54:02 -0400
Subject: [PATCH 093/212] Test.

---
 R/Stage2PSM.R | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index bb64480..4f02ce8 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -42,9 +42,13 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
   
     if (type == "cmreg") {
 
-        print("c1")
+        print("c1a")
         m_fit <- lm(model,dta)
+        print("c1b")
+
         ret_var[["unstandardized"]] <- m_fit
+
+        print("c1c")
         #mTab <- stargazer(m_fit,type="html",title="Unstandardized Model Results")
         #print.htmlTable(mTab)
         print(summary(m_fit))
@@ -68,7 +72,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
             texreg::plotreg(dta_fit_std,omit.coef="(match)|(Intercept)|(factor)",custom.model.names="Standardized Model", custom.note=model)
           
         }
-        
+
         print("c3")
     
         print(opts)

From e4e99f8551ef89184e64e679f3b778ed5757e194 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 17:04:48 -0400
Subject: [PATCH 094/212] Test.

---
 R/BuildTimeSeries.R | 18 ++++++++----------
 R/Stage2PSM.R       |  6 +++---
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index fe7afcf..cd07c88 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -24,7 +24,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             }
 
             # add the "TrtMnt_" + colYears[j] prefix to interpYears
-            trt_id = paste("TrtMnt_",colYears[j],"_####", sep="")
+            trt_id = paste("TrtMnt_",colYears[j],"_....", sep="")
             interpYears <- c(interpYears, trt_id)    
         }
     }
@@ -69,7 +69,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                     # First, build a model describing the relationship between years and any data in the interp field.
 
                     # Check if data exists for the year - if not, ignore.  If so, include in the new modeling frame.
-                    varI <- gsub('####', years[[k]], cur_ancVi)
+                    varI <- gsub('....', years[[k]], cur_ancVi)
                     if (varI %in% colnames(dta@data)) {
 
                         interpFrame[[cnt]] <- dta@data[[varI]]
@@ -119,7 +119,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
                 for (u in 1:length(years)) {
 
-                    varI <- gsub('####', years[[u]], cur_ancVi)
+                    varI <- gsub('....', years[[u]], cur_ancVi)
                     if (!(varI %in% colnames(dta@data))) {
                         # Variable doesn't exist, so we need to interpolate.
                         tDframe[idField] <- dta@data[idField]
@@ -162,10 +162,10 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         for (j in 1:length(years)) {
             tempGrep <- grepStrYrs
 
-            if (regexpr("####", varList_pre[[i]], fixed=TRUE)[1] == -1) {
+            if (regexpr("....", varList_pre[[i]], fixed=TRUE)[1] == -1) {
                 grepStrYrs <- paste(tempGrep,"|",paste(varList_pre[[i]],years[[j]], sep="_"), sep="")
             } else {
-                grepStrYrs <- paste(tempGrep,"|",gsub('####', years[[j]], varList_pre[[i]]), sep="")
+                grepStrYrs <- paste(tempGrep,"|",gsub('....', years[[j]], varList_pre[[i]]), sep="")
             }
         }
 
@@ -219,20 +219,18 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     meltListRet <- data.frame(meltList)
 
     # format year
-    regex_test <- regexpr("####", year_regex_field, fixed=TRUE)[1]
+    regex_test <- regexpr("....", year_regex_field, fixed=TRUE)[1]
     if (regex_test > -1) {
         meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
-            return(as.numeric(substr(z, regex_test, regex_test+nchar("####")-1)))
+            return(as.numeric(substr(z, regex_test, regex_test+nchar("....")-1)))
         })
 
     } else {
         meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
-            return(as.numeric(substr(z, nchar(z)-nchar("####")+1, nchar(z))))
+            return(as.numeric(substr(z, nchar(z)-nchar("....")+1, nchar(z))))
         })
     }
 
-    colnames(meltListRet) <- lapply(colnames(meltListRet), function (z) {return(gsub("....", "####", z, fixed=TRUE))})
-
     return(meltListRet)
 }
 
diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index 4f02ce8..2474dad 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -8,7 +8,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
     ret_var <- list()
   
     if (type == "lm") {
-        m_fit <- lm(model,dta)
+        m_fit <- lm(model, dta)
         print("==========================")
         print("UNSTANDARDIZED MODEL RESULTS")
         print("==========================")
@@ -43,9 +43,9 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
     if (type == "cmreg") {
 
         print("c1a")
-        m_fit <- lm(model,dta)
-        print("c1b")
+        m_fit <- lm(model, dta)
 
+        print("c1b")
         ret_var[["unstandardized"]] <- m_fit
 
         print("c1c")

From 512341ea2933f2e9d6c30c602d7aa5ca50a2d49c Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 17:09:11 -0400
Subject: [PATCH 095/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index cd07c88..8a77d7d 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -69,7 +69,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                     # First, build a model describing the relationship between years and any data in the interp field.
 
                     # Check if data exists for the year - if not, ignore.  If so, include in the new modeling frame.
-                    varI <- gsub('....', years[[k]], cur_ancVi)
+                    varI <- gsub('....', years[[k]], cur_ancVi, fixed=TRUE)
                     if (varI %in% colnames(dta@data)) {
 
                         interpFrame[[cnt]] <- dta@data[[varI]]

From c9c9ad9938a9f17b534d5340834325bbef0d8fb0 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 17:11:20 -0400
Subject: [PATCH 096/212] Test.

---
 R/BuildTimeSeries.R | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 8a77d7d..5242bf2 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -119,7 +119,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
                 for (u in 1:length(years)) {
 
-                    varI <- gsub('....', years[[u]], cur_ancVi)
+                    varI <- gsub('....', years[[u]], cur_ancVi, fixed=TRUE)
                     if (!(varI %in% colnames(dta@data))) {
                         # Variable doesn't exist, so we need to interpolate.
                         tDframe[idField] <- dta@data[idField]
@@ -165,7 +165,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             if (regexpr("....", varList_pre[[i]], fixed=TRUE)[1] == -1) {
                 grepStrYrs <- paste(tempGrep,"|",paste(varList_pre[[i]],years[[j]], sep="_"), sep="")
             } else {
-                grepStrYrs <- paste(tempGrep,"|",gsub('....', years[[j]], varList_pre[[i]]), sep="")
+                grepStrYrs <- paste(tempGrep,"|",gsub('....', years[[j]], varList_pre[[i]], fixed=TRUE), sep="")
             }
         }
 
@@ -186,14 +186,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
         colnames(meltList[[i]])[3] <- varList_pre[[i]]
 
-        print("bts3.4")
-
-        # Clean up year column
-        # gsub_command <- paste("^",varList_pre[[i]],sep="")
-        # meltList[[i]][2] <- gsub(gsub_command, "", as.matrix(meltList[[i]][2]))
-        
 
-        print("bts3.5")
+        print("bts3.4")
 
         if (i == 1) {
             # set field to use for regex when formatting year field later

From c799e4b86cdeb5d75ea51208e98dfd42e20c3ee0 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 17:14:04 -0400
Subject: [PATCH 097/212] Test.

---
 R/Stage2PSM.R | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index 2474dad..fa9ef0e 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -76,8 +76,12 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         print("c3")
     
         print(opts)
-        exec = paste("cluster.vcov(m_fit,cbind(dta$",opts[1],",dta$",opts[2],"))",sep="")
-        m_fit[["var"]] <- eval(parse(text=exec))
+
+        # exec = paste("cluster.vcov(m_fit,cbind(dta$",opts[1],",dta$",opts[2],"))",sep="")
+        # m_fit[["var"]] <- eval(parse(text=exec))
+
+        m_fit[["var"]] <- cluster.vcov(m_fit,cbind(dta[opts[1]], dta[opts[2]]))
+
         CMREG <- coeftest(m_fit,m_fit[["var"]])
         print("cmReg:")
         print(CMREG)

From 15d6156ca4eb087fee1fd6c568324c3950c9f521 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 15 Sep 2015 17:15:22 -0400
Subject: [PATCH 098/212] Test.

---
 R/Stage2PSM.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index fa9ef0e..9998754 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -80,7 +80,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         # exec = paste("cluster.vcov(m_fit,cbind(dta$",opts[1],",dta$",opts[2],"))",sep="")
         # m_fit[["var"]] <- eval(parse(text=exec))
 
-        m_fit[["var"]] <- cluster.vcov(m_fit,cbind(dta[opts[1]], dta[opts[2]]))
+        m_fit[["var"]] <- cluster.vcov(m_fit,cbind(dta[opts[1]]))
 
         CMREG <- coeftest(m_fit,m_fit[["var"]])
         print("cmReg:")

From f161ffe7b1b1ff69b8b99222657c2cda6cbd6ed9 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 11:17:02 -0400
Subject: [PATCH 099/212] Test.

---
 R/Stage2PSM.R | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index 9998754..18c8006 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -14,7 +14,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         print("==========================")
         #mTab <- stargazer(m_fit,type="html",title="Unstandardized Model Results")
         print(summary(m_fit))
-        ret_var[["unstandardized"]] <- lm(model, dta)
+        ret_var$unstandardized <- lm(model, dta)
         texreg::plotreg(m_fit, omit.coef="(match)|(Intercept)", custom.model.names="Unstandardized Model", custom.note=model)
         
         if (!is.null(table_out)) {
@@ -29,7 +29,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
             }
 
             dta_fit_std <- lm(model,dta_tmp)
-            ret_var[["standardized"]] <- lm(model,dta_tmp)
+            ret_var$standardized <- lm(model,dta_tmp)
             print("==========================")
             print("STANDARDIZED MODEL RESULTS")
             print("==========================")
@@ -46,7 +46,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         m_fit <- lm(model, dta)
 
         print("c1b")
-        ret_var[["unstandardized"]] <- m_fit
+        ret_var$unstandardized <- m_fit
 
         print("c1c")
         #mTab <- stargazer(m_fit,type="html",title="Unstandardized Model Results")
@@ -67,7 +67,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
                 dta_tmp@data[d_index] <- lapply(dta_tmp@data[d_index],scale)
             }
             dta_fit_std <- lm(model,dta_tmp)
-            ret_var[["standardized"]] <- dta_fit_std
+            ret_var$standardized <- dta_fit_std
             print(summary(dta_fit_std))
             texreg::plotreg(dta_fit_std,omit.coef="(match)|(Intercept)|(factor)",custom.model.names="Standardized Model", custom.note=model)
           
@@ -80,12 +80,12 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         # exec = paste("cluster.vcov(m_fit,cbind(dta$",opts[1],",dta$",opts[2],"))",sep="")
         # m_fit[["var"]] <- eval(parse(text=exec))
 
-        m_fit[["var"]] <- cluster.vcov(m_fit,cbind(dta[opts[1]]))
+        m_fit$var <- cluster.vcov(m_fit,cbind(dta[opts[1]]))
 
-        CMREG <- coeftest(m_fit,m_fit[["var"]])
+        CMREG <- coeftest(m_fit,m_fit$var)
         print("cmReg:")
         print(CMREG)
-        ret_var[["cmreg"]] <- CMREG
+        ret_var$cmreg <- CMREG
         
     }
     return(ret_var)

From 82352ddd8e477a41770878ed375de5213df7ed62 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 11:21:22 -0400
Subject: [PATCH 100/212] Test.

---
 R/Stage2PSM.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index 18c8006..c056eab 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -24,8 +24,8 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
                 d_index <- sapply(dta_tmp, is.numeric)
                 dta_tmp[d_index] <- lapply(dta_tmp[d_index],scale)
             } else {
-              d_index <- sapply(dta_tmp@data, is.numeric)
-              dta_tmp@data[d_index] <- lapply(dta_tmp@data[d_index],scale)
+                d_index <- sapply(dta_tmp@data, is.numeric)
+                dta_tmp@data[d_index] <- lapply(dta_tmp@data[d_index],scale)
             }
 
             dta_fit_std <- lm(model,dta_tmp)
@@ -80,7 +80,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         # exec = paste("cluster.vcov(m_fit,cbind(dta$",opts[1],",dta$",opts[2],"))",sep="")
         # m_fit[["var"]] <- eval(parse(text=exec))
 
-        m_fit$var <- cluster.vcov(m_fit,cbind(dta[opts[1]]))
+        m_fit$var <- cluster.vcov(m_fit,cbind(dta[opts[1]], dta[opts[2]]))
 
         CMREG <- coeftest(m_fit,m_fit$var)
         print("cmReg:")

From 669d7bde15d0943e96e1630656ce042789b2e2a1 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 11:32:48 -0400
Subject: [PATCH 101/212] Test.

---
 R/BuildTimeSeries.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 5242bf2..b42c89f 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -216,12 +216,12 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
     regex_test <- regexpr("....", year_regex_field, fixed=TRUE)[1]
     if (regex_test > -1) {
         meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
-            return(as.numeric(substr(z, regex_test, regex_test+nchar("....")-1)))
+            return(as.integer(substr(z, regex_test, regex_test+nchar("....")-1)))
         })
 
     } else {
         meltListRet['Year'] <- lapply(meltListRet['Year'], function (z) {
-            return(as.numeric(substr(z, nchar(z)-nchar("....")+1, nchar(z))))
+            return(as.integer(substr(z, nchar(z)-nchar("....")+1, nchar(z))))
         })
     }
 

From 2e89179ad77ae7729e8ac721f4c2ce2631fe071d Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 12:13:06 -0400
Subject: [PATCH 102/212] Test.

---
 R/BuildTimeSeries.R | 12 ++++++------
 R/Stage2PSM.R       | 10 +++++++++-
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index b42c89f..f9e7176 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -18,8 +18,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 varN = paste("TrtMnt_",colYears[j],"_",years[k], sep="")
                 print(varN)
 
-                dta@data[[varN]] <- 0
-                dta@data[[varN]][dta@data[[colYears[j]]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
+                dta@data[,varN] <- 0
+                dta@data[,varN][dta@data[[colYears[j]]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
 
             }
 
@@ -61,7 +61,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
             # add data to interpolation data frame
             if (cur_ancVi %in% colnames(dta@data)) {
                     # Exception for a single-point interpolation
-                    interpFrame[[cnt]] <- dta@data[[cur_ancVi]]
+                    interpFrame[cnt] <- dta@data[,cur_ancVi]
                     cnt = 3
                     
             } else {
@@ -72,7 +72,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                     varI <- gsub('....', years[[k]], cur_ancVi, fixed=TRUE)
                     if (varI %in% colnames(dta@data)) {
 
-                        interpFrame[[cnt]] <- dta@data[[varI]]
+                        interpFrame[cnt] <- dta@data[,varI]
                         colnames(interpFrame)[cnt] <- years[[k]]
                         cnt = cnt + 1
 
@@ -91,7 +91,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 print("bts2.0.2a")
                 for (k in 1:length(years)) {
                     # add _year to end of non temporal data
-                    dta@data[paste(cur_ancVi,years[[k]],sep="_")] <- interpFrame[2]
+                    dta@data[,paste(cur_ancVi,years[[k]],sep="_")] <- interpFrame[2]
                 }
 
             } else if (cnt < length(years) + 2) {
@@ -100,7 +100,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
                 # Melt the dataframe for modeling
                 melt_Model_dta <- melt(data.frame(interpFrame), id=idField)
-                melt_Model_dta["variable"] <- as.numeric(gsub("X", "", melt_Model_dta[["variable"]]))
+                melt_Model_dta["variable"] <- as.numeric(gsub("X", "", melt_Model_dta[,"variable"]))
                 
 
                 print("bts2.0.2b1")
diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index c056eab..c28d035 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -80,7 +80,15 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         # exec = paste("cluster.vcov(m_fit,cbind(dta$",opts[1],",dta$",opts[2],"))",sep="")
         # m_fit[["var"]] <- eval(parse(text=exec))
 
-        m_fit$var <- cluster.vcov(m_fit,cbind(dta[opts[1]], dta[opts[2]]))
+        if (length(opts) == 1) {
+            m_fit$var <- cluster.vcov(m_fit,dta[opts[1]]))
+
+        } else if (length(opts) == 2) {
+            m_fit$var <- cluster.vcov(m_fit,cbind(dta[opts[1]], dta[opts[2]]))
+
+        } else {
+            return("CANNOT HAVE MORE THAN 2 OPTS")
+        }
 
         CMREG <- coeftest(m_fit,m_fit$var)
         print("cmReg:")

From 099bcd5b6528f2db48b5b44c1394f52188534c58 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 12:13:58 -0400
Subject: [PATCH 103/212] Test.

---
 R/Stage2PSM.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index c28d035..4305cfe 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -81,7 +81,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         # m_fit[["var"]] <- eval(parse(text=exec))
 
         if (length(opts) == 1) {
-            m_fit$var <- cluster.vcov(m_fit,dta[opts[1]]))
+            m_fit$var <- cluster.vcov(m_fit,dta[opts[1]])
 
         } else if (length(opts) == 2) {
             m_fit$var <- cluster.vcov(m_fit,cbind(dta[opts[1]], dta[opts[2]]))

From 4d9ef9dd050deacaffcd561a0321cd63c3c32d3a Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 12:53:56 -0400
Subject: [PATCH 104/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index f9e7176..9fa05ed 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -19,7 +19,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 print(varN)
 
                 dta@data[,varN] <- 0
-                dta@data[,varN][dta@data[[colYears[j]]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
+                dta@data[,varN][is.na(dta@data[[colYears[j]]]) || dta@data[[colYears[j]]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
 
             }
 

From b02c0c460d9d06032d1df57d7eab7a3db66483db Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 13:09:55 -0400
Subject: [PATCH 105/212] Test.

---
 R/SpatialCausalPSM.R | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index ea2eec3..d723fdf 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -16,7 +16,7 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
     retData <- dta
 
     # predict values based on model
-    retData[["PSM_trtProb"]] <- predict(PSMfit, dta@data, type="response")
+    retData[,"PSM_trtProb"] <- predict(PSMfit, dta@data, type="response")
 
 
     if (visual == "TRUE") {
@@ -30,14 +30,14 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
     #if (drop == "support") {
         
         # Drop
-        treated <- retData@data[retData@data[["TrtBin"]] == 1,]
-        untreated <- retData@data[retData@data[["TrtBin"]] == 0,]
-        min_cut <- max(min(treated[["PSM_trtProb"]], na.rm = TRUE), min(untreated[["PSM_trtProb"]], na.rm = TRUE))
-        max_cut <- min(max(treated[["PSM_trtProb"]], na.rm = TRUE), max(untreated[["PSM_trtProb"]], na.rm = TRUE))
+        treated <- retData@data[retData@data[,"TrtBin"] == 1,]
+        untreated <- retData@data[retData@data[,"TrtBin"] == 0,]
+        min_cut <- max(min(treated[,"PSM_trtProb"], na.rm = TRUE), min(untreated[,"PSM_trtProb"], na.rm = TRUE))
+        max_cut <- min(max(treated[,"PSM_trtProb"], na.rm = TRUE), max(untreated[,"PSM_trtProb"], na.rm = TRUE))
         
-        retData <- retData[!is.na(retData@data[["PSM_trtProb"]]),]
-        retData <- retData[retData@data[["PSM_trtProb"]] >= min_cut,]    
-        retData <- retData[retData@data[["PSM_trtProb"]] <= max_cut,] 
+        retData <- retData[!is.na(retData@data[,"PSM_trtProb"]),]
+        retData <- retData[retData@data[,"PSM_trtProb"] >= min_cut,]    
+        retData <- retData[retData@data[,"PSM_trtProb"] <= max_cut,] 
         
     #}
 

From 48b8d1a61c8081bf91fc12fbcdf46a1f0a2fc4c4 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 13:17:04 -0400
Subject: [PATCH 106/212] Test.

---
 R/BuildTimeSeries.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 9fa05ed..7bc86fa 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -19,7 +19,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 print(varN)
 
                 dta@data[,varN] <- 0
-                dta@data[,varN][is.na(dta@data[[colYears[j]]]) || dta@data[[colYears[j]]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
+                dta@data[,varN][!is.na(dta@data[[colYears[j]]]) || dta@data[[colYears[j]]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
 
             }
 

From a4ca7856913b8dadda692c5b9dbf9c0df44e282f Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 13:19:17 -0400
Subject: [PATCH 107/212] Test.

---
 R/SpatialCausalPSM.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index d723fdf..992a39a 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -16,7 +16,7 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
     retData <- dta
 
     # predict values based on model
-    retData[,"PSM_trtProb"] <- predict(PSMfit, dta@data, type="response")
+    retData@data[,"PSM_trtProb"] <- predict(PSMfit, dta@data, type="response")
 
 
     if (visual == "TRUE") {

From 3f4b18f76c40065eb200a6352aea54cbd9d6479b Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 13:30:06 -0400
Subject: [PATCH 108/212] Test.

---
 R/SAT.R              | 32 ++++++++++++++++----------------
 R/SpatialCausalPSM.R |  6 +++---
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index dffaf8e..4f3fc83 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -12,10 +12,10 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     if (!is.null(constraints)) {
         for (cst in 1:length(names(constraints))) {
             if (names(constraints)[cst] == "groups") {
-                dta[["ConstraintGroupSet_Opt"]] <- dta[[constraints["groups"]]]
+                dta@data[,"ConstraintGroupSet_Opt"] <- dta@data[,constraints["groups"]]
 
             } else {
-                dta[["ConstraintGroupSet_Opt"]] <- 1
+                dta@data[,"ConstraintGroupSet_Opt"] <- 1
             }
             if (names(constraints)[cst] == "distance") {
                 dist_PSM = as.numeric(constraints["distance"][[1]])
@@ -24,7 +24,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             }
         }
     } else {
-        dta[["ConstraintGroupSet_Opt"]] <- 1
+        dta@data[,"ConstraintGroupSet_Opt"] <- 1
         #max the distance threshold by taking the diagonal of the bounding box.
         dist_PSM = NULL
     }
@@ -32,7 +32,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     print("sat2")
 
     #Caclulate the number of groups to constrain by, if any.
-    group_constraints <- unique(dta[["ConstraintGroupSet_Opt"]])
+    group_constraints <- unique(dta@data[,"ConstraintGroupSet_Opt"])
   
     #Make sure there are both treatment and control groups of an adequate size (>= 1 of each)
     t_dta <- list()
@@ -44,20 +44,20 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         grp_index = length(grp_list)+1
         t_index = length(t_dta)+1
         grp_list[[grp_index]] <- as.matrix(group_constraints)[grp]
-        t_dta[[t_index]] <- dta[dta[["TrtBin"]] == 1,]
-        u_dta[[t_index]] <- dta[dta[["TrtBin"]] == 0,]
-        treatment_count <- cur_grp %in% t_dta[[t_index]][["ConstraintGroupSet_Opt"]]
-        untreated_count <- cur_grp %in% u_dta[[t_index]][["ConstraintGroupSet_Opt"]]
+        t_dta[[t_index]] <- dta@data[dta@data[,"TrtBin"] == 1,]
+        u_dta[[t_index]] <- dta@data[dta@data[,"TrtBin"] == 0,]
+        treatment_count <- cur_grp %in% t_dta[[t_index]]@data[,"ConstraintGroupSet_Opt"]
+        untreated_count <- cur_grp %in% u_dta[[t_index]]@data[,"ConstraintGroupSet_Opt"]
         if ((untreated_count == FALSE) || (treatment_count == FALSE)) {
-            dta <- dta[!dta[["ConstraintGroupSet_Opt"]] == cur_grp,]
+            dta <- dta@data[!dta@data[,"ConstraintGroupSet_Opt"] == cur_grp,]
             t_dta[[t_index]] <- NULL
             u_dta[[t_index]] <- NULL
             grp_list[[t_index]] <- NULL
             war_statement = paste("Dropped group due to a lack of both treatment and control observation: '",cur_grp,"'",sep="")
             warning(war_statement)
         } else { 
-            t_dta[[t_index]] <- t_dta[[t_index]][t_dta[[t_index]][["ConstraintGroupSet_Opt"]] == cur_grp,]
-            u_dta[[t_index]] <- u_dta[[t_index]][u_dta[[t_index]][["ConstraintGroupSet_Opt"]] == cur_grp,]
+            t_dta[[t_index]] <- t_dta[[t_index]][t_dta[[t_index]]@data[,"ConstraintGroupSet_Opt"] == cur_grp,]
+            u_dta[[t_index]] <- u_dta[[t_index]][u_dta[[t_index]]@data[,"ConstraintGroupSet_Opt"] == cur_grp,]
         
             cnt = cnt + 1
         }
@@ -99,7 +99,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     print("sat5")
 
     if (drop_unmatched == TRUE) {
-        dta <- dta[dta@data[["PSM_match_ID"]] != -999,]    
+        dta <- dta[dta@data[,"PSM_match_ID"] != -999,]    
     }
   
     anc_v_int <- strsplit(psm_eq, "~")[[1]][2]
@@ -111,11 +111,11 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     #Drop observations according to the selected method
     if (drop_method == "SD") {
         #Method to drop pairs that are greater than a set threshold apart in terms of PSM Standard Deviations.
-        psm_sd_thresh = sd(dta[["PSM_trtProb"]]) * drop_thresh
+        psm_sd_thresh = sd(dta@data[,"PSM_trtProb"]) * drop_thresh
         if (visual == "TRUE") {
             print(psm_sd_thresh)
         }
-        dta <- dta[dta@data[["PSM_distance"]] < psm_sd_thresh,]
+        dta <- dta[dta@data[,"PSM_distance"] < psm_sd_thresh,]
     }
   
     #Plot the pre and post-dropping balance for PSM model...
@@ -139,9 +139,9 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         print("sat7.1")
         if (c_type == "matrix") {
 
-            dta@data[[ed_v]] <- as.numeric(dta@data[[ed_v]])
+            dta@data[,ed_v] <- as.numeric(dta@data[,ed_v])
 
-            init_dta@data[[ed_v]] <- as.numeric(init_dta@data[[ed_v]])
+            init_dta@data[,ed_v] <- as.numeric(init_dta@data[,ed_v])
 
             c_type = "numeric"
         }
diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index 992a39a..2fbccd5 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -35,9 +35,9 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
         min_cut <- max(min(treated[,"PSM_trtProb"], na.rm = TRUE), min(untreated[,"PSM_trtProb"], na.rm = TRUE))
         max_cut <- min(max(treated[,"PSM_trtProb"], na.rm = TRUE), max(untreated[,"PSM_trtProb"], na.rm = TRUE))
         
-        retData <- retData[!is.na(retData@data[,"PSM_trtProb"]),]
-        retData <- retData[retData@data[,"PSM_trtProb"] >= min_cut,]    
-        retData <- retData[retData@data[,"PSM_trtProb"] <= max_cut,] 
+        retData <- retData@data[!is.na(retData@data[,"PSM_trtProb"]),]
+        retData <- retData@data[retData@data[,"PSM_trtProb"] >= min_cut,]    
+        retData <- retData@data[retData@data[,"PSM_trtProb"] <= max_cut,] 
         
     #}
 

From 691db70f6328fed07defa21453a4bb4d753da0e8 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 13:36:03 -0400
Subject: [PATCH 109/212] Test.

---
 R/SAT.R | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 4f3fc83..f0ae8ce 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -7,6 +7,8 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     drop_method = drop_opts["drop_method"]
     drop_thresh = as.numeric(drop_opts["drop_thresh"])
   
+
+
     print("sat1")
 
     if (!is.null(constraints)) {
@@ -15,7 +17,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
                 dta@data[,"ConstraintGroupSet_Opt"] <- dta@data[,constraints["groups"]]
 
             } else {
-                dta@data[,"ConstraintGroupSet_Opt"] <- 1
+                dta$ConstraintGroupSet_Opt <- 1
             }
             if (names(constraints)[cst] == "distance") {
                 dist_PSM = as.numeric(constraints["distance"][[1]])
@@ -24,44 +26,54 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             }
         }
     } else {
-        dta@data[,"ConstraintGroupSet_Opt"] <- 1
+        dta$ConstraintGroupSet_Opt <- 1
         #max the distance threshold by taking the diagonal of the bounding box.
         dist_PSM = NULL
     }
 
+
+
     print("sat2")
 
     #Caclulate the number of groups to constrain by, if any.
-    group_constraints <- unique(dta@data[,"ConstraintGroupSet_Opt"])
+    group_constraints <- unique(dta$ConstraintGroupSet_Opt)
   
     #Make sure there are both treatment and control groups of an adequate size (>= 1 of each)
     t_dta <- list()
     u_dta <-list()
     grp_list <- list()
     cnt = 0
+
     for (grp in 1:length(group_constraints)) {
         cur_grp <- as.matrix(group_constraints)[grp]
         grp_index = length(grp_list)+1
         t_index = length(t_dta)+1
         grp_list[[grp_index]] <- as.matrix(group_constraints)[grp]
-        t_dta[[t_index]] <- dta@data[dta@data[,"TrtBin"] == 1,]
-        u_dta[[t_index]] <- dta@data[dta@data[,"TrtBin"] == 0,]
-        treatment_count <- cur_grp %in% t_dta[[t_index]]@data[,"ConstraintGroupSet_Opt"]
-        untreated_count <- cur_grp %in% u_dta[[t_index]]@data[,"ConstraintGroupSet_Opt"]
+
+        t_dta[[t_index]] <- dta[dta$TrtBin == 1,]
+        u_dta[[t_index]] <- dta[dta$TrtBin == 0,]
+
+        treatment_count <- cur_grp %in% t_dta[[t_index]]$ConstraintGroupSet_Opt
+        untreated_count <- cur_grp %in% u_dta[[t_index]]$ConstraintGroupSet_Opt
+
         if ((untreated_count == FALSE) || (treatment_count == FALSE)) {
-            dta <- dta@data[!dta@data[,"ConstraintGroupSet_Opt"] == cur_grp,]
+            dta <- dta[!dta$ConstraintGroupSet_Opt == cur_grp,]
             t_dta[[t_index]] <- NULL
             u_dta[[t_index]] <- NULL
             grp_list[[t_index]] <- NULL
             war_statement = paste("Dropped group due to a lack of both treatment and control observation: '",cur_grp,"'",sep="")
             warning(war_statement)
+
         } else { 
-            t_dta[[t_index]] <- t_dta[[t_index]][t_dta[[t_index]]@data[,"ConstraintGroupSet_Opt"] == cur_grp,]
-            u_dta[[t_index]] <- u_dta[[t_index]][u_dta[[t_index]]@data[,"ConstraintGroupSet_Opt"] == cur_grp,]
+            t_dta[[t_index]] <- t_dta[[t_index]][t_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
+            u_dta[[t_index]] <- u_dta[[t_index]][u_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
         
             cnt = cnt + 1
         }
     }
+
+
+
     print("sat3")
 
     temp_dta <- list()

From 227718ea872bee747fac6d2a4cafe7bd3926149e Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 13:50:07 -0400
Subject: [PATCH 110/212] Test.

---
 R/BuildTimeSeries.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 7bc86fa..cb082ea 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -19,7 +19,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 print(varN)
 
                 dta@data[,varN] <- 0
-                dta@data[,varN][!is.na(dta@data[[colYears[j]]]) || dta@data[[colYears[j]]] <= as.Date(paste(years[k],"01","01", sep="-"))] <- 1
+                dta@data[,varN][!is.na(dta@data[[colYears[j]]]) && dta@data[[colYears[j]]] <= years[k]] <- 1
 
             }
 
@@ -35,7 +35,6 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
 
 
 
-
     print("bts2")
     timer <- proc.time()
     

From a6b8b763eb7b196ade0e6c10579e216383eaae32 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 13:54:52 -0400
Subject: [PATCH 111/212] Test.

---
 R/SpatialCausalPSM.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index 2fbccd5..992a39a 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -35,9 +35,9 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
         min_cut <- max(min(treated[,"PSM_trtProb"], na.rm = TRUE), min(untreated[,"PSM_trtProb"], na.rm = TRUE))
         max_cut <- min(max(treated[,"PSM_trtProb"], na.rm = TRUE), max(untreated[,"PSM_trtProb"], na.rm = TRUE))
         
-        retData <- retData@data[!is.na(retData@data[,"PSM_trtProb"]),]
-        retData <- retData@data[retData@data[,"PSM_trtProb"] >= min_cut,]    
-        retData <- retData@data[retData@data[,"PSM_trtProb"] <= max_cut,] 
+        retData <- retData[!is.na(retData@data[,"PSM_trtProb"]),]
+        retData <- retData[retData@data[,"PSM_trtProb"] >= min_cut,]    
+        retData <- retData[retData@data[,"PSM_trtProb"] <= max_cut,] 
         
     #}
 

From 3d5c4a5ea12c6e382a9fde1644b8c87bc9136da8 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 16:12:39 -0400
Subject: [PATCH 112/212] Test.

---
 R/BuildTimeSeries.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index cb082ea..455bfe0 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -19,7 +19,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 print(varN)
 
                 dta@data[,varN] <- 0
-                dta@data[,varN][!is.na(dta@data[[colYears[j]]]) && dta@data[[colYears[j]]] <= years[k]] <- 1
+                # dta@data[,varN][!is.na(dta@data[[colYears[j]]]) && dta@data[[colYears[j]]] <= years[k]] <- 1
+                dta@data[,varN][dta@data[[colYears[j]]] <= years[k]] <- 1
 
             }
 

From de90481afdd81147603347cfb24d3d5f9c43397c Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 16:24:59 -0400
Subject: [PATCH 113/212] Test.

---
 R/BuildTimeSeries.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 455bfe0..a526637 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -18,9 +18,9 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 varN = paste("TrtMnt_",colYears[j],"_",years[k], sep="")
                 print(varN)
 
-                dta@data[,varN] <- 0
-                # dta@data[,varN][!is.na(dta@data[[colYears[j]]]) && dta@data[[colYears[j]]] <= years[k]] <- 1
-                dta@data[,varN][dta@data[[colYears[j]]] <= years[k]] <- 1
+                dta@data[varN] <- 0
+                dta@data[varN][!is.na(dta@data[colYears[j]]) && dta@data[colYears[j]] <= years[k]] <- 1
+                # dta@data[varN][dta@data[colYears[j]] <= years[k]] <- 1
 
             }
 

From 1ad0accffda348de38470ed9b4757c9b9241de23 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 16 Sep 2015 16:30:58 -0400
Subject: [PATCH 114/212] Test.

---
 R/BuildTimeSeries.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index a526637..5e363d7 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -19,8 +19,9 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 print(varN)
 
                 dta@data[varN] <- 0
-                dta@data[varN][!is.na(dta@data[colYears[j]]) && dta@data[colYears[j]] <= years[k]] <- 1
-                # dta@data[varN][dta@data[colYears[j]] <= years[k]] <- 1
+                # dta@data[varN][!is.na(dta@data[colYears[j]]) && dta@data[colYears[j]] <= years[k]] <- 1
+                dta@data[varN][dta@data[colYears[j]] <= years[k]] <- 1
+                dta@data[varN][is.na(dta@data[colYears[j]])] <- 0
 
             }
 

From 21f881e9b9e6710981e06cac09d74f8073d4d1bf Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 17 Sep 2015 13:21:59 -0400
Subject: [PATCH 115/212] Test.

---
 R/BuildTimeSeries.R |  1 -
 R/Stage2PSM.R       | 13 +++++++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 5e363d7..0f510fc 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -19,7 +19,6 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
                 print(varN)
 
                 dta@data[varN] <- 0
-                # dta@data[varN][!is.na(dta@data[colYears[j]]) && dta@data[colYears[j]] <= years[k]] <- 1
                 dta@data[varN][dta@data[colYears[j]] <= years[k]] <- 1
                 dta@data[varN][is.na(dta@data[colYears[j]])] <- 0
 
diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index 4305cfe..1418eb0 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -42,6 +42,17 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
   
     if (type == "cmreg") {
 
+        # make sure cluster options provided are valid
+        if (length(opts) == 0 || opts == NULL) {
+            print("Must have at least 1 clustering option")
+            return("Invalid opts given.")
+
+        } else if (length(opts) > 2) {
+            print("Cannot have more than 2 clustering options")
+            return("Invalid opts given.")
+        }
+
+
         print("c1a")
         m_fit <- lm(model, dta)
 
@@ -86,8 +97,6 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         } else if (length(opts) == 2) {
             m_fit$var <- cluster.vcov(m_fit,cbind(dta[opts[1]], dta[opts[2]]))
 
-        } else {
-            return("CANNOT HAVE MORE THAN 2 OPTS")
         }
 
         CMREG <- coeftest(m_fit,m_fit$var)

From 7d1e73811a6803f15199c8b359a5cba878d7c3eb Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 17 Sep 2015 13:24:36 -0400
Subject: [PATCH 116/212] Test.

---
 R/Stage2PSM.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index 1418eb0..58b4712 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -43,7 +43,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
     if (type == "cmreg") {
 
         # make sure cluster options provided are valid
-        if (length(opts) == 0 || opts == NULL) {
+        if (opts == NULL || length(opts) == 0) {
             print("Must have at least 1 clustering option")
             return("Invalid opts given.")
 
@@ -68,6 +68,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         print("c2")
 
         if (!is.null(table_out)) {
+            
             dta_tmp <- dta
           
             if( class(dta) == "data.frame") {

From 292d0b14af55a36e6d82e3a40ce00673bc6ce8da Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 17 Sep 2015 13:26:10 -0400
Subject: [PATCH 117/212] Test.

---
 R/Stage2PSM.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index 58b4712..5296f69 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -43,6 +43,9 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
     if (type == "cmreg") {
 
         # make sure cluster options provided are valid
+        print(opts == NULL)
+        print(length(opts) == 0)
+        
         if (opts == NULL || length(opts) == 0) {
             print("Must have at least 1 clustering option")
             return("Invalid opts given.")
@@ -68,7 +71,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         print("c2")
 
         if (!is.null(table_out)) {
-            
+
             dta_tmp <- dta
           
             if( class(dta) == "data.frame") {

From 3831022bcc1aca59ad517c5b68d4c88024d4fc32 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 17 Sep 2015 13:30:30 -0400
Subject: [PATCH 118/212] Test.

---
 R/Stage2PSM.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index 5296f69..bb3a261 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -45,8 +45,8 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         # make sure cluster options provided are valid
         print(opts == NULL)
         print(length(opts) == 0)
-        
-        if (opts == NULL || length(opts) == 0) {
+
+        if (is.null(opts) || length(opts) == 0) {
             print("Must have at least 1 clustering option")
             return("Invalid opts given.")
 

From cb1589288d99843b8a7c910fb5e33e36fb4c8d77 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 9 Oct 2015 14:17:34 -0400
Subject: [PATCH 119/212] Fix fitting same model multiple times in stage2psm.

---
 R/Stage2PSM.R      |  10 +--
 R/timeRangeAvg.R   |  22 +++---
 R/timeRangeTrend.R | 171 ++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 186 insertions(+), 17 deletions(-)

diff --git a/R/Stage2PSM.R b/R/Stage2PSM.R
index bb3a261..8936173 100644
--- a/R/Stage2PSM.R
+++ b/R/Stage2PSM.R
@@ -3,7 +3,7 @@
 #These functions are to make common modeling strategies easier to specify for users
 #That do not write their own models.
 
-Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
+Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL, force_posdef = TRUE) {
   
     ret_var <- list()
   
@@ -14,7 +14,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         print("==========================")
         #mTab <- stargazer(m_fit,type="html",title="Unstandardized Model Results")
         print(summary(m_fit))
-        ret_var$unstandardized <- lm(model, dta)
+        ret_var$unstandardized <- m_fit
         texreg::plotreg(m_fit, omit.coef="(match)|(Intercept)", custom.model.names="Unstandardized Model", custom.note=model)
         
         if (!is.null(table_out)) {
@@ -29,7 +29,7 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
             }
 
             dta_fit_std <- lm(model,dta_tmp)
-            ret_var$standardized <- lm(model,dta_tmp)
+            ret_var$standardized <- dta_fit_std
             print("==========================")
             print("STANDARDIZED MODEL RESULTS")
             print("==========================")
@@ -96,10 +96,10 @@ Stage2PSM <- function (model, dta, type, table_out = NULL, opts = NULL) {
         # m_fit[["var"]] <- eval(parse(text=exec))
 
         if (length(opts) == 1) {
-            m_fit$var <- cluster.vcov(m_fit,dta[opts[1]])
+            m_fit$var <- cluster.vcov(m_fit,dta[opts[1]], force_posdef=force_posdef)
 
         } else if (length(opts) == 2) {
-            m_fit$var <- cluster.vcov(m_fit,cbind(dta[opts[1]], dta[opts[2]]))
+            m_fit$var <- cluster.vcov(m_fit,cbind(dta[opts[1]], dta[opts[2]]), force_posdef=force_posdef)
 
         }
 
diff --git a/R/timeRangeAvg.R b/R/timeRangeAvg.R
index 1af3cfd..c378173 100644
--- a/R/timeRangeAvg.R
+++ b/R/timeRangeAvg.R
@@ -1,9 +1,13 @@
-timeRangeAvg <- function(dta,prefix,startyr,endyr)
-{
-  searchS = paste("^",prefix,startyr,sep="")
-  searchE = paste("^",prefix,endyr,sep="")
-  strt_id <- grep(searchS,colnames(dta))
-  end_id <- grep(searchE,colnames(dta))
-  rmean <- rowMeans(dta[strt_id:end_id])
-  return(rmean)
-}
\ No newline at end of file
+timeRangeAvg <- function(dta, prefix, startyr, endyr) {
+
+    searchS = paste("^",prefix,startyr, sep="")
+    searchE = paste("^",prefix,endyr, sep="")
+
+    start_id <- grep(searchS, colnames(dta))
+    end_id <- grep(searchE, colnames(dta))
+
+    rmean <- rowMeans(dta[start_id:end_id])
+
+    return(rmean)
+
+}
diff --git a/R/timeRangeTrend.R b/R/timeRangeTrend.R
index ac41eda..6fbe27e 100644
--- a/R/timeRangeTrend.R
+++ b/R/timeRangeTrend.R
@@ -1,6 +1,96 @@
+
+
+
+timeRangeType <- function (columns, prefix, startyr, endyr, field) {
+
+    if (!is.na(as.numeric(startyr) && is.na(as.integer(endyr) && !is.na(field) && field %in% column_names) {
+        type = "pre"
+        startyr = as.integer(startyr)
+    
+    } else if (is.na(as.numeric(startyr) && !is.na(as.integer(endyr) && !is.na(field) && field %in% column_names) {
+        type = "post"
+        endyr = as.integer(endyr)
+    
+    } else if (!is.na(as.numeric(startyr) && !is.na(as.integer(endyr) && as.integer(endyr) > as.integer(startyr)) {
+        type = "range"
+        startyr = as.integer(startyr)
+        endyr = as.integer(endyr)
+
+    } else {
+        type = "invalid"
+    }
+
+    return(c(type, startyr, endyr))
+
+}
+
+
+
 # run linear model on data within year range as specified
 # by field prefix and return coefficients
-timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
+timeRangeTrend <- function (dta, prefix, startyr, endyr, field=NA, IDfield, thresh=0.5) {
+
+    check <- timeRangeType(colnames(dta), prefix, startyr, endyr, field)
+
+    type = check[1]
+    startyr = check[2]
+    endyr = check[3]
+    
+    
+    if (type == "range") {
+
+        output <- timeRangeTrend_calc(dta, prefix, startyr, tmp_endyr, IDfield, thresh=0.5)
+
+    } else if (type == "pre") {
+
+        output <- apply(dta, 1, function (row) {
+
+            tmp_endyr <- as.integer(row['start_actual_isodate'])
+
+            if (is.na(tmp_endyr) || start_yr >= tmp_endyr) {
+                return(as.integer("NA"))
+
+            } else {
+                return(timeRangeTrend_calc(row, prefix, startyr, tmp_endyr, IDfield, thresh=0.5, field="start_actual_isodate"))
+
+            }
+
+        })
+
+
+    } else if (type == "post") {
+
+        output <- lapply(dta, function (row) {
+
+            tmp_startyr <- as.integer(row['start_actual_isodate'])
+
+            if (is.na(tmp_startyr) || tmp_startyr >= endyr) {
+                return(as.integer("NA"))
+
+            } else {
+                return(timeRangeTrend_calc(row, prefix, tmp_startyr, endyr, IDfield, thresh=0.5, field="start_actual_isodate"))
+
+            }
+
+        })
+
+
+    } else if (type == "invalid") {
+        output <- 1
+
+    } else {
+        output <- 2
+
+    }
+
+    return(output)
+
+}
+
+
+
+timeRangeTrend_calc <- function (dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
+
 
     # create new dataframe from all columns in dta dataframe that 
     # are either the ID or a year which is indicated by the prefix
@@ -18,7 +108,9 @@ timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
 
     # generate new year field by removing prefix from variable (original column names)
     # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
-    analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(substr(x, yIndex[1], yIndex[1]+3)))
+    analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) {
+        as.numeric(substr(x, yIndex[1], yIndex[1]+3))
+    })
 
     # keep years in range specified
     analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
@@ -36,7 +128,7 @@ timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
         ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
 
         dat_length <-length(ID_dat)
-        count_na <-sum(is.na(ID_dat[['value']])) 
+        count_na <-sum(is.na(ID_dat[['value']]))
         count_non_na <- dat_length - count_na
         percent_na <- count_na / dat_length
 
@@ -59,3 +151,76 @@ timeRangeTrend <- function(dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
     return(dta[["newfieldID"]])
     
 }
+
+
+
+timeRangeAvg <- function (dta, prefix, startyr, endyr, field=NA) {
+
+    check <- timeRangeType(colnames(dta), prefix, startyr, endyr, field)
+
+    type = check[1]
+    startyr = check[2]
+    endyr = check[3]
+    
+    
+    if (type == "range") {
+
+        output <- timeRangeAvg_calc(dta, prefix, startyr, endyr)
+
+    } else if (type == "pre") {
+
+        output <- apply(dta, 1, function (row) {
+
+            tmp_endyr <- as.integer(row['start_actual_isodate'])
+
+            if (is.na(tmp_endyr) || start_yr >= tmp_endyr) {
+                return(as.integer("NA"))
+
+            } else {
+                return(timeRangeAvg_calc(row, prefix, startyr, tmp_endyr))
+
+            }
+
+        })
+
+
+    } else if (type == "post") {
+
+        output <- lapply(dta, function (row) {
+
+            tmp_startyr <- as.integer(row['start_actual_isodate'])
+
+            if (is.na(tmp_startyr) || tmp_startyr >= endyr) {
+                return(as.integer("NA"))
+
+            } else {
+                return(timeRangeAvg_calc(row, prefix, tmp_startyr, endyr))
+
+            }
+
+        })
+
+    } else if (type == "invalid") {
+        output <- 1
+
+    } else {
+        output <- 2
+
+    }
+
+    return(output)
+
+}
+
+
+
+timeRangeAvg_calc <- function (dta, prefix, startyr, endyr) {
+
+    range <- c(startyr:endyr)
+    search <- paste("^",prefix,"(",paste(range, collapse="|"),")", sep="")
+    matches <- grepl(search, colnames(dta))
+    rmean <- rowMeans(dta[matches], na.rm=FALSE)
+
+    return(rmean)
+
+}

From 2c5d8a5f3f266ece557df01573a21b2e63ccedde Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 9 Oct 2015 14:19:11 -0400
Subject: [PATCH 120/212] Temp. remove timerangetrend.

---
 R/timeRangeTrend.R | 266 ++++++++++++++++++++++-----------------------
 1 file changed, 133 insertions(+), 133 deletions(-)

diff --git a/R/timeRangeTrend.R b/R/timeRangeTrend.R
index 6fbe27e..cb196ce 100644
--- a/R/timeRangeTrend.R
+++ b/R/timeRangeTrend.R
@@ -1,226 +1,226 @@
 
 
 
-timeRangeType <- function (columns, prefix, startyr, endyr, field) {
+# timeRangeType <- function (columns, prefix, startyr, endyr, field) {
 
-    if (!is.na(as.numeric(startyr) && is.na(as.integer(endyr) && !is.na(field) && field %in% column_names) {
-        type = "pre"
-        startyr = as.integer(startyr)
+#     if (!is.na(as.numeric(startyr) && is.na(as.integer(endyr) && !is.na(field) && field %in% column_names) {
+#         type = "pre"
+#         startyr = as.integer(startyr)
     
-    } else if (is.na(as.numeric(startyr) && !is.na(as.integer(endyr) && !is.na(field) && field %in% column_names) {
-        type = "post"
-        endyr = as.integer(endyr)
+#     } else if (is.na(as.numeric(startyr) && !is.na(as.integer(endyr) && !is.na(field) && field %in% column_names) {
+#         type = "post"
+#         endyr = as.integer(endyr)
     
-    } else if (!is.na(as.numeric(startyr) && !is.na(as.integer(endyr) && as.integer(endyr) > as.integer(startyr)) {
-        type = "range"
-        startyr = as.integer(startyr)
-        endyr = as.integer(endyr)
+#     } else if (!is.na(as.numeric(startyr) && !is.na(as.integer(endyr) && as.integer(endyr) > as.integer(startyr)) {
+#         type = "range"
+#         startyr = as.integer(startyr)
+#         endyr = as.integer(endyr)
 
-    } else {
-        type = "invalid"
-    }
+#     } else {
+#         type = "invalid"
+#     }
 
-    return(c(type, startyr, endyr))
+#     return(c(type, startyr, endyr))
 
-}
+# }
 
 
 
-# run linear model on data within year range as specified
-# by field prefix and return coefficients
-timeRangeTrend <- function (dta, prefix, startyr, endyr, field=NA, IDfield, thresh=0.5) {
+# # run linear model on data within year range as specified
+# # by field prefix and return coefficients
+# timeRangeTrend <- function (dta, prefix, startyr, endyr, field=NA, IDfield, thresh=0.5) {
 
-    check <- timeRangeType(colnames(dta), prefix, startyr, endyr, field)
+#     check <- timeRangeType(colnames(dta), prefix, startyr, endyr, field)
 
-    type = check[1]
-    startyr = check[2]
-    endyr = check[3]
+#     type = check[1]
+#     startyr = check[2]
+#     endyr = check[3]
     
     
-    if (type == "range") {
+#     if (type == "range") {
 
-        output <- timeRangeTrend_calc(dta, prefix, startyr, tmp_endyr, IDfield, thresh=0.5)
+#         output <- timeRangeTrend_calc(dta, prefix, startyr, tmp_endyr, IDfield, thresh=0.5)
 
-    } else if (type == "pre") {
+#     } else if (type == "pre") {
 
-        output <- apply(dta, 1, function (row) {
+#         output <- apply(dta, 1, function (row) {
 
-            tmp_endyr <- as.integer(row['start_actual_isodate'])
+#             tmp_endyr <- as.integer(row['start_actual_isodate'])
 
-            if (is.na(tmp_endyr) || start_yr >= tmp_endyr) {
-                return(as.integer("NA"))
+#             if (is.na(tmp_endyr) || start_yr >= tmp_endyr) {
+#                 return(as.integer("NA"))
 
-            } else {
-                return(timeRangeTrend_calc(row, prefix, startyr, tmp_endyr, IDfield, thresh=0.5, field="start_actual_isodate"))
+#             } else {
+#                 return(timeRangeTrend_calc(row, prefix, startyr, tmp_endyr, IDfield, thresh=0.5, field="start_actual_isodate"))
 
-            }
+#             }
 
-        })
+#         })
 
 
-    } else if (type == "post") {
+#     } else if (type == "post") {
 
-        output <- lapply(dta, function (row) {
+#         output <- lapply(dta, function (row) {
 
-            tmp_startyr <- as.integer(row['start_actual_isodate'])
+#             tmp_startyr <- as.integer(row['start_actual_isodate'])
 
-            if (is.na(tmp_startyr) || tmp_startyr >= endyr) {
-                return(as.integer("NA"))
+#             if (is.na(tmp_startyr) || tmp_startyr >= endyr) {
+#                 return(as.integer("NA"))
 
-            } else {
-                return(timeRangeTrend_calc(row, prefix, tmp_startyr, endyr, IDfield, thresh=0.5, field="start_actual_isodate"))
+#             } else {
+#                 return(timeRangeTrend_calc(row, prefix, tmp_startyr, endyr, IDfield, thresh=0.5, field="start_actual_isodate"))
 
-            }
+#             }
 
-        })
+#         })
 
 
-    } else if (type == "invalid") {
-        output <- 1
+#     } else if (type == "invalid") {
+#         output <- 1
 
-    } else {
-        output <- 2
+#     } else {
+#         output <- 2
 
-    }
+#     }
 
-    return(output)
+#     return(output)
 
-}
+# }
 
 
 
-timeRangeTrend_calc <- function (dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
+# timeRangeTrend_calc <- function (dta, prefix, startyr, endyr, IDfield, thresh=0.5) {
 
 
-    # create new dataframe from all columns in dta dataframe that 
-    # are either the ID or a year which is indicated by the prefix
-    grep_str = paste(IDfield, prefix, sep="|")
-    tDF <- dta@data[grepl(grep_str, names(dta@data))]
+#     # create new dataframe from all columns in dta dataframe that 
+#     # are either the ID or a year which is indicated by the prefix
+#     grep_str = paste(IDfield, prefix, sep="|")
+#     tDF <- dta@data[grepl(grep_str, names(dta@data))]
 
-    # melt all years columns in new dataframe
-    analysisDF <- melt(tDF, id=c(IDfield))
+#     # melt all years columns in new dataframe
+#     analysisDF <- melt(tDF, id=c(IDfield))
     
-    # cleaned GREP - remove year digit placeholders
-    # new_pre <- gsub("[0-9]", "", prefix, fixed=TRUE)
+#     # cleaned GREP - remove year digit placeholders
+#     # new_pre <- gsub("[0-9]", "", prefix, fixed=TRUE)
 
-    # get location of year in prefix
-    yIndex <- regexpr("[0-9]", prefix, fixed=TRUE)
+#     # get location of year in prefix
+#     yIndex <- regexpr("[0-9]", prefix, fixed=TRUE)
 
-    # generate new year field by removing prefix from variable (original column names)
-    # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
-    analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) {
-        as.numeric(substr(x, yIndex[1], yIndex[1]+3))
-    })
+#     # generate new year field by removing prefix from variable (original column names)
+#     # analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) as.numeric(gsub(new_pre, "", x)))
+#     analysisDF["Year"] <- lapply(analysisDF["variable"], FUN=function(x) {
+#         as.numeric(substr(x, yIndex[1], yIndex[1]+3))
+#     })
 
-    # keep years in range specified
-    analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
-    analysisDF <- analysisDF[analysisDF["Year"] <= endyr ,]
+#     # keep years in range specified
+#     analysisDF <- analysisDF[analysisDF["Year"] >= startyr ,]
+#     analysisDF <- analysisDF[analysisDF["Year"] <= endyr ,]
 
-    # create empty field
-    dta@data["newfieldID"] <- 0
+#     # create empty field
+#     dta@data["newfieldID"] <- 0
 
-    # iterate over original dataframe
-    for (i in 1:length(dta)) {
-        # get id for row (in original data)
-        ID <- as.character(dta@data[IDfield][i,])
+#     # iterate over original dataframe
+#     for (i in 1:length(dta)) {
+#         # get id for row (in original data)
+#         ID <- as.character(dta@data[IDfield][i,])
 
-        # get all data corresponding to id from analysis dataframe
-        ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
+#         # get all data corresponding to id from analysis dataframe
+#         ID_dat <- analysisDF[analysisDF[IDfield] == ID,]
 
-        dat_length <-length(ID_dat)
-        count_na <-sum(is.na(ID_dat[['value']]))
-        count_non_na <- dat_length - count_na
-        percent_na <- count_na / dat_length
+#         dat_length <-length(ID_dat)
+#         count_na <-sum(is.na(ID_dat[['value']]))
+#         count_non_na <- dat_length - count_na
+#         percent_na <- count_na / dat_length
 
-        # if number of NAs is over threshold or if less than 2 points of data are not NA, return NA
-        if (percent_na > thresh || count_non_na < 2) {
+#         # if number of NAs is over threshold or if less than 2 points of data are not NA, return NA
+#         if (percent_na > thresh || count_non_na < 2) {
             
-            dta@data["newfieldID"][i,] <- NA
+#             dta@data["newfieldID"][i,] <- NA
         
-        } else {
-            # fit trend model
-            trend_mod <- lm(value ~ Year, data=ID_dat, na.action = na.omit)
+#         } else {
+#             # fit trend model
+#             trend_mod <- lm(value ~ Year, data=ID_dat, na.action = na.omit)
       
-            # add trend coefficients to new field
-            dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
-        }
+#             # add trend coefficients to new field
+#             dta@data["newfieldID"][i,] <- summary(trend_mod)$coefficients[2]
+#         }
 
-    }
+#     }
     
-    # return new field with trend coefficients
-    return(dta[["newfieldID"]])
+#     # return new field with trend coefficients
+#     return(dta[["newfieldID"]])
     
-}
+# }
 
 
 
-timeRangeAvg <- function (dta, prefix, startyr, endyr, field=NA) {
+# timeRangeAvg <- function (dta, prefix, startyr, endyr, field=NA) {
 
-    check <- timeRangeType(colnames(dta), prefix, startyr, endyr, field)
+#     check <- timeRangeType(colnames(dta), prefix, startyr, endyr, field)
 
-    type = check[1]
-    startyr = check[2]
-    endyr = check[3]
+#     type = check[1]
+#     startyr = check[2]
+#     endyr = check[3]
     
     
-    if (type == "range") {
+#     if (type == "range") {
 
-        output <- timeRangeAvg_calc(dta, prefix, startyr, endyr)
+#         output <- timeRangeAvg_calc(dta, prefix, startyr, endyr)
 
-    } else if (type == "pre") {
+#     } else if (type == "pre") {
 
-        output <- apply(dta, 1, function (row) {
+#         output <- apply(dta, 1, function (row) {
 
-            tmp_endyr <- as.integer(row['start_actual_isodate'])
+#             tmp_endyr <- as.integer(row['start_actual_isodate'])
 
-            if (is.na(tmp_endyr) || start_yr >= tmp_endyr) {
-                return(as.integer("NA"))
+#             if (is.na(tmp_endyr) || start_yr >= tmp_endyr) {
+#                 return(as.integer("NA"))
 
-            } else {
-                return(timeRangeAvg_calc(row, prefix, startyr, tmp_endyr))
+#             } else {
+#                 return(timeRangeAvg_calc(row, prefix, startyr, tmp_endyr))
 
-            }
+#             }
 
-        })
+#         })
 
 
-    } else if (type == "post") {
+#     } else if (type == "post") {
 
-        output <- lapply(dta, function (row) {
+#         output <- lapply(dta, function (row) {
 
-            tmp_startyr <- as.integer(row['start_actual_isodate'])
+#             tmp_startyr <- as.integer(row['start_actual_isodate'])
 
-            if (is.na(tmp_startyr) || tmp_startyr >= endyr) {
-                return(as.integer("NA"))
+#             if (is.na(tmp_startyr) || tmp_startyr >= endyr) {
+#                 return(as.integer("NA"))
 
-            } else {
-                return(timeRangeAvg_calc(row, prefix, tmp_startyr, endyr))
+#             } else {
+#                 return(timeRangeAvg_calc(row, prefix, tmp_startyr, endyr))
 
-            }
+#             }
 
-        })
+#         })
 
-    } else if (type == "invalid") {
-        output <- 1
+#     } else if (type == "invalid") {
+#         output <- 1
 
-    } else {
-        output <- 2
+#     } else {
+#         output <- 2
 
-    }
+#     }
 
-    return(output)
+#     return(output)
 
-}
+# }
 
 
 
-timeRangeAvg_calc <- function (dta, prefix, startyr, endyr) {
+# timeRangeAvg_calc <- function (dta, prefix, startyr, endyr) {
 
-    range <- c(startyr:endyr)
-    search <- paste("^",prefix,"(",paste(range, collapse="|"),")", sep="")
-    matches <- grepl(search, colnames(dta))
-    rmean <- rowMeans(dta[matches], na.rm=FALSE)
+#     range <- c(startyr:endyr)
+#     search <- paste("^",prefix,"(",paste(range, collapse="|"),")", sep="")
+#     matches <- grepl(search, colnames(dta))
+#     rmean <- rowMeans(dta[matches], na.rm=FALSE)
 
-    return(rmean)
+#     return(rmean)
 
-}
+# }

From 3a1313d6c19a8cf11859591e63f326b14f65342d Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 14 Oct 2015 10:00:52 -0400
Subject: [PATCH 121/212] Fix empty check on SAT constraints.

---
 R/SAT.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/SAT.R b/R/SAT.R
index f0ae8ce..3c85cd5 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -11,7 +11,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
     print("sat1")
 
-    if (!is.null(constraints)) {
+    if (!is.null(constraints) && contraints != c()) {
         for (cst in 1:length(names(constraints))) {
             if (names(constraints)[cst] == "groups") {
                 dta@data[,"ConstraintGroupSet_Opt"] <- dta@data[,constraints["groups"]]

From d6b02a5576622fe44b9986c8bb7a398d47b38428 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 14 Oct 2015 11:21:07 -0400
Subject: [PATCH 122/212] Add spatialcausalpsm drop option back in. Remove some
 debug prints.

---
 R/SpatialCausalPSM.R   | 4 ++--
 R/fastNN_binary_func.R | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/SpatialCausalPSM.R b/R/SpatialCausalPSM.R
index 992a39a..44b6a41 100644
--- a/R/SpatialCausalPSM.R
+++ b/R/SpatialCausalPSM.R
@@ -27,7 +27,7 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
 
     # Second, if a drop parameter - if set to "support", remove observations
     # that don't overlap in the PSM distribution.
-    #if (drop == "support") {
+    if (drop == "support") {
         
         # Drop
         treated <- retData@data[retData@data[,"TrtBin"] == 1,]
@@ -39,7 +39,7 @@ SpatialCausalPSM <- function(dta, mtd, mdl, drop, visual) {
         retData <- retData[retData@data[,"PSM_trtProb"] >= min_cut,]    
         retData <- retData[retData@data[,"PSM_trtProb"] <= max_cut,] 
         
-    #}
+    }
 
     if (visual == "TRUE") {
         # Post drop histograms
diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 67e9b3c..908c57d 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -7,7 +7,7 @@
 
 fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
-    print("nn1.0")
+    # print("nn1.0")
 
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
@@ -24,7 +24,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     dta@data[["PSM_distance"]] <- -999
     dta@data[["PSM_match_ID"]] <- -999
 
-    print("nn2.0")
+    # print("nn2.0")
 
     #Calculate a distance decay function
     #to perturb pairs based on their distances.  
@@ -68,7 +68,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
       
         }
 
-        print("nn3.0")
+        # print("nn3.0")
 
         #Add the matched treatment and control values to the recording data frame
         #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
@@ -96,7 +96,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
         pair_id = paste(curgrp,j,sep="")
         
-        print("nn4.0")
+        # print("nn4.0")
 
         #Add the Treatment ID to the Control Row 
         dta@data$match[which(dta@data[[ids]] == Control_ID)] = Treatment_ID

From 7867fc35c365700dae26db56130948ed3f7621c1 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 14 Oct 2015 11:37:41 -0400
Subject: [PATCH 123/212] Test.

---
 R/SAT.R                | 2 +-
 R/fastNN_binary_func.R | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 3c85cd5..46c31fb 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -67,7 +67,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         } else { 
             t_dta[[t_index]] <- t_dta[[t_index]][t_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
             u_dta[[t_index]] <- u_dta[[t_index]][u_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
-        
+
             cnt = cnt + 1
         }
     }
diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 908c57d..67e9b3c 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -7,7 +7,7 @@
 
 fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
-    # print("nn1.0")
+    print("nn1.0")
 
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
@@ -24,7 +24,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     dta@data[["PSM_distance"]] <- -999
     dta@data[["PSM_match_ID"]] <- -999
 
-    # print("nn2.0")
+    print("nn2.0")
 
     #Calculate a distance decay function
     #to perturb pairs based on their distances.  
@@ -68,7 +68,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
       
         }
 
-        # print("nn3.0")
+        print("nn3.0")
 
         #Add the matched treatment and control values to the recording data frame
         #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
@@ -96,7 +96,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
         pair_id = paste(curgrp,j,sep="")
         
-        # print("nn4.0")
+        print("nn4.0")
 
         #Add the Treatment ID to the Control Row 
         dta@data$match[which(dta@data[[ids]] == Control_ID)] = Treatment_ID

From 6b6a215141f56505acade0c61a7af8ae6acd1af8 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Wed, 14 Oct 2015 11:39:09 -0400
Subject: [PATCH 124/212] Test.

---
 R/fastNN_binary_func.R | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 67e9b3c..3c58659 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -32,10 +32,14 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
         treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
         untreated <- sorted_dta[sorted_dta[[trtMntVar]] ==0,]
+        
+        print("nn2.1")
 
         #Run the KNN for all neighbors. 
         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
+        print("nn2.2")
+
         #Perturb the values based on the distance decay function, if selected.
         if (!is.null(dist_PSM)) {
             for (mC in 1:length(k[[1]])) {
@@ -57,9 +61,12 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
                 x_dist = abs(tCoord[1] - tCoord[2])
                 euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
+                print("nn2.3")
+
                 PSM_score = k[["nn.dist"]][mC]
                 geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
+                print("nn2.4")
 
                 
                 k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)

From 85a4e30c6509430519574847e5bd7957dd8316e0 Mon Sep 17 00:00:00 2001
From: DanRunfola <dan@danrunfola.com>
Date: Fri, 16 Oct 2015 14:05:58 -0400
Subject: [PATCH 125/212] Testing edits to the BuildTimeSeries module to fix
 panel null error

---
 R/BuildTimeSeries.R |  2 +-
 SCI_PanelFix.Rproj  | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 SCI_PanelFix.Rproj

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 0f510fc..900a823 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -30,7 +30,7 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         }
     }
 
-    print(interpYears)
+    View(dta)
 
 
 
diff --git a/SCI_PanelFix.Rproj b/SCI_PanelFix.Rproj
new file mode 100644
index 0000000..a4dce49
--- /dev/null
+++ b/SCI_PanelFix.Rproj
@@ -0,0 +1,17 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 4
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source

From 34749957111b45e85d7326c9b84d3ab6e8f5c3e5 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:06:12 -0400
Subject: [PATCH 126/212] Update NN function use.

---
 R/SAT.R                | 13 ++++++++-----
 R/fastNN_binary_func.R | 28 +++++++++++++++++++---------
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 46c31fb..e4ecd01 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -96,16 +96,17 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
         }
     }
+
     print("sat4")
 
     #Build the final datasets from subsets
     if (cnt > 1) {
-      dta <- temp_dta[[1]]
-      for(k in 2:cnt) {
-        dta  <- maptools::spRbind(dta, temp_dta[[k]])
-      } 
+        dta <- temp_dta[[1]]
+        for(k in 2:cnt) {
+            dta  <- maptools::spRbind(dta, temp_dta[[k]])
+        } 
     } else {
-      dta <- temp_dta[[1]]
+        dta <- temp_dta[[1]]
     }
 
     print("sat5")
@@ -130,6 +131,8 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         dta <- dta[dta@data[,"PSM_distance"] < psm_sd_thresh,]
     }
   
+
+
     #Plot the pre and post-dropping balance for PSM model...
     #Balance metrics are based on "Misunderstandings between experimentalists and
     #observationalists about causal inference", Imal, King, and Stuart.
diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 3c58659..6eb4639 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -24,14 +24,18 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     dta@data[["PSM_distance"]] <- -999
     dta@data[["PSM_match_ID"]] <- -999
 
-    print("nn2.0")
+    dta@data[["nn_matched"]] <- 0
+
+    print("nn2")
 
     #Calculate a distance decay function
     #to perturb pairs based on their distances.  
     for (j in 1:it_cnt) {
 
-        treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
-        untreated <- sorted_dta[sorted_dta[[trtMntVar]] ==0,]
+        print("nn2.0")
+
+        treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
+        untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
         print("nn2.1")
 
@@ -43,6 +47,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         #Perturb the values based on the distance decay function, if selected.
         if (!is.null(dist_PSM)) {
             for (mC in 1:length(k[[1]])) {
+
+                print("nn2.2.0")
                 #Calculate the Euclidean Distance between pairs
                 Control_ID = toString(untreated[[ids]][[mC]])
 
@@ -61,12 +67,12 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
                 x_dist = abs(tCoord[1] - tCoord[2])
                 euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-                print("nn2.3")
+                print("nn2.2.1")
 
                 PSM_score = k[["nn.dist"]][mC]
                 geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-                print("nn2.4")
+                print("nn2.2.2")
 
                 
                 k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
@@ -75,7 +81,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
       
         }
 
-        print("nn3.0")
+        print("nn2.3")
 
         #Add the matched treatment and control values to the recording data frame
         #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
@@ -103,7 +109,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
         pair_id = paste(curgrp,j,sep="")
         
-        print("nn4.0")
+        print("nn2.4")
 
         #Add the Treatment ID to the Control Row 
         dta@data$match[which(dta@data[[ids]] == Control_ID)] = Treatment_ID
@@ -116,10 +122,14 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         dta@data[["PSM_distance"]][which(dta@data[[ids]] == Treatment_ID)] = k[["nn.dist"]][,1][best_m_control]
         dta@data[["PSM_match_ID"]][which(dta@data[[ids]] == Treatment_ID)] = pair_id        
         
+        print("nn2.5")
+
         #Drop the paired match out of the iteration matrix 
-        sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
-        sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
+        sorted_dta[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID),]['nn_matched'] <- 1
+
     }
 
     return(dta) 

From 2c54d98c479229b2dc221e7513e1213674303042 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:12:32 -0400
Subject: [PATCH 127/212] Test

---
 R/BuildTimeSeries.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/BuildTimeSeries.R b/R/BuildTimeSeries.R
index 900a823..f0c26bc 100644
--- a/R/BuildTimeSeries.R
+++ b/R/BuildTimeSeries.R
@@ -30,8 +30,8 @@ BuildTimeSeries <- function (dta, idField, varList_pre, startYear, endYear, colY
         }
     }
 
-    View(dta)
-
+    # View(dta)
+    # print(interpYears)
 
 
 

From 3807331cb263abbca7864ef9a2e7ea5fed395a79 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:16:15 -0400
Subject: [PATCH 128/212] Test

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 6eb4639..70a5985 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -128,7 +128,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
-        sorted_dta[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID),]['nn_matched'] <- 1
+        sorted_dta[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
     }
 

From 4d39760bddd95730d43505cd287e136442bfc869 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:17:59 -0400
Subject: [PATCH 129/212] Test

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 70a5985..1f34c03 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -128,7 +128,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
-        sorted_dta[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+        sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
     }
 

From c983349577f629c4ec9cbf8744bc7eaa4ca75990 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:20:00 -0400
Subject: [PATCH 130/212] Test

---
 R/fastNN_binary_func.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 1f34c03..b096900 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -14,6 +14,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
     sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]),]
     
+    sorted_dta[["nn_matched"]] <- 0
+
 
     #Conduct the matching
     treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
@@ -24,7 +26,6 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     dta@data[["PSM_distance"]] <- -999
     dta@data[["PSM_match_ID"]] <- -999
 
-    dta@data[["nn_matched"]] <- 0
 
     print("nn2")
 

From 8f69f06cb8ebdd3373f5a84d987949ecffe8f39e Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:28:01 -0400
Subject: [PATCH 131/212] Test

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index b096900..55ac80d 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -12,7 +12,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
-    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]),]
+    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, "PSM_trtProb")]
     
     sorted_dta[["nn_matched"]] <- 0
 

From 64d39e4cf5964e5435ca6ce9eb23e329bd627557 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:29:52 -0400
Subject: [PATCH 132/212] Test

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 55ac80d..badef5f 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -12,7 +12,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
-    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, "PSM_trtProb")]
+    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
     
     sorted_dta[["nn_matched"]] <- 0
 

From 90e136a09a796df600055e42c554b316256dcd42 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:32:21 -0400
Subject: [PATCH 133/212] Test

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index badef5f..1fb5c0a 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -13,7 +13,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #but this may not be an issue for many analysis.
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
     sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
-    
+    return(sorted_dta)
     sorted_dta[["nn_matched"]] <- 0
 
 

From 0f914bd1f64a2f1a16600783507ef18a64a38e41 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:36:54 -0400
Subject: [PATCH 134/212] Test

---
 R/fastNN_binary_func.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 1fb5c0a..6fa4ade 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -12,9 +12,10 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
-    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
-    return(sorted_dta)
-    sorted_dta[["nn_matched"]] <- 0
+    dta@data[["nn_matched"]] <- 0
+
+    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb", "nn_matched"])]
+
 
 
     #Conduct the matching

From 417ec95f617e118bf4e3665d96328c70392cbfde Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:37:16 -0400
Subject: [PATCH 135/212] Test

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 6fa4ade..52e0304 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -14,7 +14,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
     dta@data[["nn_matched"]] <- 0
 
-    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb", "nn_matched"])]
+    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb", "nn_matched")]
 
 
 

From 98b0f86cb651d2fab27ada0a403029b02bef7f71 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:43:56 -0400
Subject: [PATCH 136/212] Test

---
 R/fastNN_binary_func.R | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 52e0304..1205d5e 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -9,6 +9,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     print("nn1.0")
 
+
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
@@ -27,24 +28,29 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     dta@data[["PSM_distance"]] <- -999
     dta@data[["PSM_match_ID"]] <- -999
 
-
     print("nn2")
 
     #Calculate a distance decay function
     #to perturb pairs based on their distances.  
     for (j in 1:it_cnt) {
+        time_list <- c()
 
         print("nn2.0")
+        timer <- proc.time()
 
         treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
         untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
+        time_list[1] <- (proc.time() - timer)[3]
         print("nn2.1")
+        timer <- proc.time()
 
         #Run the KNN for all neighbors. 
         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
+        time_list[2] <- (proc.time() - timer)[3]
         print("nn2.2")
+        timer <- proc.time()
 
         #Perturb the values based on the distance decay function, if selected.
         if (!is.null(dist_PSM)) {
@@ -83,7 +89,9 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
       
         }
 
+        time_list[3] <- (proc.time() - timer)[3]
         print("nn2.3")
+        timer <- proc.time()
 
         #Add the matched treatment and control values to the recording data frame
         #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
@@ -111,7 +119,9 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
         pair_id = paste(curgrp,j,sep="")
         
+        time_list[4] <- (proc.time() - timer)[3]
         print("nn2.4")
+        timer <- proc.time()
 
         #Add the Treatment ID to the Control Row 
         dta@data$match[which(dta@data[[ids]] == Control_ID)] = Treatment_ID
@@ -124,7 +134,9 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         dta@data[["PSM_distance"]][which(dta@data[[ids]] == Treatment_ID)] = k[["nn.dist"]][,1][best_m_control]
         dta@data[["PSM_match_ID"]][which(dta@data[[ids]] == Treatment_ID)] = pair_id        
         
+        time_list[5] <- (proc.time() - timer)[3]
         print("nn2.5")
+        timer <- proc.time()
 
         #Drop the paired match out of the iteration matrix 
         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
@@ -132,6 +144,10 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     
         sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
+        time_list[6] <- (proc.time() - timer)[3]
+
+        print(paste(time_list))
+
     }
 
     return(dta) 

From e3e82558dc871588fd0ce899356f8c8a93c69b72 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:44:55 -0400
Subject: [PATCH 137/212] Test

---
 R/fastNN_binary_func.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 1205d5e..3149919 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -35,21 +35,21 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     for (j in 1:it_cnt) {
         time_list <- c()
 
-        print("nn2.0")
+        # print("nn2.0")
         timer <- proc.time()
 
         treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
         untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
         time_list[1] <- (proc.time() - timer)[3]
-        print("nn2.1")
+        # print("nn2.1")
         timer <- proc.time()
 
         #Run the KNN for all neighbors. 
         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
         time_list[2] <- (proc.time() - timer)[3]
-        print("nn2.2")
+        # print("nn2.2")
         timer <- proc.time()
 
         #Perturb the values based on the distance decay function, if selected.
@@ -90,7 +90,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         }
 
         time_list[3] <- (proc.time() - timer)[3]
-        print("nn2.3")
+        # print("nn2.3")
         timer <- proc.time()
 
         #Add the matched treatment and control values to the recording data frame
@@ -120,7 +120,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         pair_id = paste(curgrp,j,sep="")
         
         time_list[4] <- (proc.time() - timer)[3]
-        print("nn2.4")
+        # print("nn2.4")
         timer <- proc.time()
 
         #Add the Treatment ID to the Control Row 
@@ -135,7 +135,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         dta@data[["PSM_match_ID"]][which(dta@data[[ids]] == Treatment_ID)] = pair_id        
         
         time_list[5] <- (proc.time() - timer)[3]
-        print("nn2.5")
+        # print("nn2.5")
         timer <- proc.time()
 
         #Drop the paired match out of the iteration matrix 

From cb72f9242280386b13a023c86851470cc6ce4e3e Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:50:16 -0400
Subject: [PATCH 138/212] Test

---
 R/fastNN_binary_func.R | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 3149919..e797384 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -41,14 +41,14 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
         untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
-        time_list[1] <- (proc.time() - timer)[3]
+        time_list[1] <- round((proc.time() - timer)[3],5)
         # print("nn2.1")
         timer <- proc.time()
 
         #Run the KNN for all neighbors. 
         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
-        time_list[2] <- (proc.time() - timer)[3]
+        time_list[2] <- round((proc.time() - timer)[3],5)
         # print("nn2.2")
         timer <- proc.time()
 
@@ -89,7 +89,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
       
         }
 
-        time_list[3] <- (proc.time() - timer)[3]
+        time_list[3] <- round((proc.time() - timer)[3],5)
         # print("nn2.3")
         timer <- proc.time()
 
@@ -119,22 +119,20 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
         pair_id = paste(curgrp,j,sep="")
         
-        time_list[4] <- (proc.time() - timer)[3]
+        time_list[4] <- round((proc.time() - timer)[3],5)
         # print("nn2.4")
         timer <- proc.time()
 
-        #Add the Treatment ID to the Control Row 
+        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
         dta@data$match[which(dta@data[[ids]] == Control_ID)] = Treatment_ID
-        dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID)] = k[["nn.dist"]][,1][best_m_control]
-        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID)] = pair_id        
-        
-        
-        #Add the Control ID to the Treatment Row
-        dta@data[["match"]][which(dta@data[[ids]] == Treatment_ID)] = Control_ID
-        dta@data[["PSM_distance"]][which(dta@data[[ids]] == Treatment_ID)] = k[["nn.dist"]][,1][best_m_control]
-        dta@data[["PSM_match_ID"]][which(dta@data[[ids]] == Treatment_ID)] = pair_id        
+        dta@data$match[which(dta@data[[ids]] == Treatment_ID)] = Control_ID
+
+        dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
+
+
         
-        time_list[5] <- (proc.time() - timer)[3]
+        time_list[5] <- round((proc.time() - timer)[3],5)
         # print("nn2.5")
         timer <- proc.time()
 
@@ -144,7 +142,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     
         sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-        time_list[6] <- (proc.time() - timer)[3]
+        time_list[6] <- round((proc.time() - timer)[3],5)
 
         print(paste(time_list))
 

From 868b35799e47efebd8fe5e49a56cc281705e3142 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:53:09 -0400
Subject: [PATCH 139/212] Test

---
 R/fastNN_binary_func.R | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index e797384..d19ac0e 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -8,6 +8,7 @@
 fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     print("nn1.0")
+    timerx <- proc.time()
 
 
     #Fast nearest neighbors search - will not arrive at optimum,
@@ -124,8 +125,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         timer <- proc.time()
 
         #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
-        dta@data$match[which(dta@data[[ids]] == Control_ID)] = Treatment_ID
-        dta@data$match[which(dta@data[[ids]] == Treatment_ID)] = Control_ID
+        dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+        dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
 
         dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
         dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
@@ -148,6 +149,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     }
 
+    print(proc.time() - timerx)[3])
+
     return(dta) 
 
 }

From e78a2abf88f8a63a532284ae7aa6f61c36dbe13b Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:53:33 -0400
Subject: [PATCH 140/212] Test

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index d19ac0e..af3276c 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -149,7 +149,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     }
 
-    print(proc.time() - timerx)[3])
+    print((proc.time() - timerx)[3])
 
     return(dta) 
 

From 0fd30074cbc553f315b59bb05372a963f9d369e2 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 11:56:14 -0400
Subject: [PATCH 141/212] Test

---
 R/fastNN_binary_func.R | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index af3276c..dcda985 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -29,35 +29,36 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     dta@data[["PSM_distance"]] <- -999
     dta@data[["PSM_match_ID"]] <- -999
 
-    print("nn2")
+    # print("nn2")
 
     #Calculate a distance decay function
     #to perturb pairs based on their distances.  
     for (j in 1:it_cnt) {
-        time_list <- c()
+        # time_list <- c()
 
         # print("nn2.0")
-        timer <- proc.time()
+        # timer <- proc.time()
 
         treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
         untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
-        time_list[1] <- round((proc.time() - timer)[3],5)
+        # time_list[1] <- round((proc.time() - timer)[3],5)
         # print("nn2.1")
-        timer <- proc.time()
+        # timer <- proc.time()
 
         #Run the KNN for all neighbors. 
         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
-        time_list[2] <- round((proc.time() - timer)[3],5)
+        # time_list[2] <- round((proc.time() - timer)[3],5)
         # print("nn2.2")
-        timer <- proc.time()
+        # timer <- proc.time()
 
         #Perturb the values based on the distance decay function, if selected.
         if (!is.null(dist_PSM)) {
             for (mC in 1:length(k[[1]])) {
 
-                print("nn2.2.0")
+                # print("nn2.2.0")
+                
                 #Calculate the Euclidean Distance between pairs
                 Control_ID = toString(untreated[[ids]][[mC]])
 
@@ -76,12 +77,12 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
                 x_dist = abs(tCoord[1] - tCoord[2])
                 euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-                print("nn2.2.1")
+                # print("nn2.2.1")
 
                 PSM_score = k[["nn.dist"]][mC]
                 geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-                print("nn2.2.2")
+                # print("nn2.2.2")
 
                 
                 k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
@@ -90,9 +91,9 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
       
         }
 
-        time_list[3] <- round((proc.time() - timer)[3],5)
+        # time_list[3] <- round((proc.time() - timer)[3],5)
         # print("nn2.3")
-        timer <- proc.time()
+        # timer <- proc.time()
 
         #Add the matched treatment and control values to the recording data frame
         #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
@@ -120,9 +121,9 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
         pair_id = paste(curgrp,j,sep="")
         
-        time_list[4] <- round((proc.time() - timer)[3],5)
+        # time_list[4] <- round((proc.time() - timer)[3],5)
         # print("nn2.4")
-        timer <- proc.time()
+        # timer <- proc.time()
 
         #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
         dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
@@ -133,9 +134,9 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
         
-        time_list[5] <- round((proc.time() - timer)[3],5)
+        # time_list[5] <- round((proc.time() - timer)[3],5)
         # print("nn2.5")
-        timer <- proc.time()
+        # timer <- proc.time()
 
         #Drop the paired match out of the iteration matrix 
         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
@@ -143,9 +144,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     
         sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-        time_list[6] <- round((proc.time() - timer)[3],5)
-
-        print(paste(time_list))
+        # time_list[6] <- round((proc.time() - timer)[3],5)
+        # print(paste(time_list))
 
     }
 

From 607b759e0d6b285e08e4b069d3bc7b9e7ec1bf9e Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 12:00:33 -0400
Subject: [PATCH 142/212] Test

---
 R/fastNN_binary_func.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index dcda985..f17d814 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -42,6 +42,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
         untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
+        print(nrow(sorted_dta))
+        
         # time_list[1] <- round((proc.time() - timer)[3],5)
         # print("nn2.1")
         # timer <- proc.time()
@@ -58,7 +60,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
             for (mC in 1:length(k[[1]])) {
 
                 # print("nn2.2.0")
-                
+
                 #Calculate the Euclidean Distance between pairs
                 Control_ID = toString(untreated[[ids]][[mC]])
 

From e1b007838048a9946fd9c277b9af3bf19e1c5f0b Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 12:01:21 -0400
Subject: [PATCH 143/212] Test

---
 R/fastNN_binary_func.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index f17d814..beffa40 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -42,8 +42,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
         untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
-        print(nrow(sorted_dta))
-        
+        print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
+
         # time_list[1] <- round((proc.time() - timer)[3],5)
         # print("nn2.1")
         # timer <- proc.time()

From 07c8576ea5d0ae813a38ac965f653d2357e490a1 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 12:13:41 -0400
Subject: [PATCH 144/212] Test

---
 R/fastNN_binary_func.R | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index beffa40..9d414af 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -39,10 +39,10 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # print("nn2.0")
         # timer <- proc.time()
 
-        treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
-        untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
+        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
+        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
-        print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
+        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
 
         # time_list[1] <- round((proc.time() - timer)[3],5)
         # print("nn2.1")
@@ -121,7 +121,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-        pair_id = paste(curgrp,j,sep="")
+        pair_id = paste(curgrp,j, sep="")
         
         # time_list[4] <- round((proc.time() - timer)[3],5)
         # print("nn2.4")
@@ -144,8 +144,11 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
-        sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
+        treated <- treated[which(treated[[ids]] != Treatment_ID),]
+        untreated <- untreated[which(untreated[[ids]] != Control_ID),]
+        
         # time_list[6] <- round((proc.time() - timer)[3],5)
         # print(paste(time_list))
 

From 344535d2d9c07b315fb707b20564c191b9d4d017 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 12:17:15 -0400
Subject: [PATCH 145/212] Test

---
 R/fastNN_binary_func.R | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 9d414af..86688d6 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -14,10 +14,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
-    dta@data[["nn_matched"]] <- 0
-
-    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb", "nn_matched")]
 
+    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
 
 
     #Conduct the matching
@@ -110,14 +108,17 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # #Treatment PSM ID
         # Treatment_ID = toString(treated[[ids]][[best_m_treated]])
    
-        #Control PSM ID
-        cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        Control_ID = toString(eval(parse(text=cid_txt)))
+        # #Control PSM ID
+        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        # Control_ID = toString(eval(parse(text=cid_txt)))
             
-        #Treatment PSM ID
-        tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        Treatment_ID = toString(eval(parse(text=tid_txt)))
+        # #Treatment PSM ID
+        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        # Treatment_ID = toString(eval(parse(text=tid_txt)))
 
+        #Control PSM ID and Treatment PSM ID
+        Control_ID = toString(untreated[,ids][best_m_control])
+        Treatment_ID = toString(treated[,ids][best_m_treated])
 
 
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)

From f21621b6d5195d9bca970a129e897a39109ae774 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 12:20:01 -0400
Subject: [PATCH 146/212] Test

---
 R/fastNN_binary_func.R | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 86688d6..20f48c1 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -102,23 +102,18 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         #This will give us the matched index in the "treated" dataset.
         best_m_treated = k[["nn.index"]][best_m_control]
         
-        # #Control PSM ID
-        # Control_ID = toString(untreated[[ids]][[best_m_control]])    
-
-        # #Treatment PSM ID
-        # Treatment_ID = toString(treated[[ids]][[best_m_treated]])
-   
-        # #Control PSM ID
-        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        # Control_ID = toString(eval(parse(text=cid_txt)))
+
+        #Control PSM ID
+        cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        Control_ID = toString(eval(parse(text=cid_txt)))
             
-        # #Treatment PSM ID
-        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        # Treatment_ID = toString(eval(parse(text=tid_txt)))
+        #Treatment PSM ID
+        tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        Treatment_ID = toString(eval(parse(text=tid_txt)))
 
         #Control PSM ID and Treatment PSM ID
-        Control_ID = toString(untreated[,ids][best_m_control])
-        Treatment_ID = toString(treated[,ids][best_m_treated])
+        # Control_ID = toString(untreated[,ids][best_m_control])
+        # Treatment_ID = toString(treated[,ids][best_m_treated])
 
 
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)

From 7253a3be12fa18c5ffaaf0f75a6cbe0005f795ab Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 12:22:09 -0400
Subject: [PATCH 147/212] Test

---
 R/fastNN_binary_func.R | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 20f48c1..6a31fdb 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -103,17 +103,17 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         best_m_treated = k[["nn.index"]][best_m_control]
         
 
-        #Control PSM ID
-        cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        Control_ID = toString(eval(parse(text=cid_txt)))
+        # #Control PSM ID
+        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        # Control_ID = toString(eval(parse(text=cid_txt)))
             
-        #Treatment PSM ID
-        tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        Treatment_ID = toString(eval(parse(text=tid_txt)))
+        # #Treatment PSM ID
+        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        # Treatment_ID = toString(eval(parse(text=tid_txt)))
 
         #Control PSM ID and Treatment PSM ID
-        # Control_ID = toString(untreated[,ids][best_m_control])
-        # Treatment_ID = toString(treated[,ids][best_m_treated])
+        Control_ID = untreated[,ids][best_m_control]
+        Treatment_ID = treated[,ids][best_m_treated]
 
 
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)

From c8d0833fa07d93dc8ac153ec19b4df2559303de8 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 12:29:35 -0400
Subject: [PATCH 148/212] Test

---
 R/fastNN_binary_func.R | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 6a31fdb..20f48c1 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -103,17 +103,17 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         best_m_treated = k[["nn.index"]][best_m_control]
         
 
-        # #Control PSM ID
-        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        # Control_ID = toString(eval(parse(text=cid_txt)))
+        #Control PSM ID
+        cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        Control_ID = toString(eval(parse(text=cid_txt)))
             
-        # #Treatment PSM ID
-        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        # Treatment_ID = toString(eval(parse(text=tid_txt)))
+        #Treatment PSM ID
+        tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        Treatment_ID = toString(eval(parse(text=tid_txt)))
 
         #Control PSM ID and Treatment PSM ID
-        Control_ID = untreated[,ids][best_m_control]
-        Treatment_ID = treated[,ids][best_m_treated]
+        # Control_ID = toString(untreated[,ids][best_m_control])
+        # Treatment_ID = toString(treated[,ids][best_m_treated])
 
 
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)

From 8beeb51c6a2bdb8aa374f6149b7a66a56f3ddfc7 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 12:52:39 -0400
Subject: [PATCH 149/212] Test

---
 R/SAT.R | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index e4ecd01..2b87c3f 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -78,22 +78,24 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
     temp_dta <- list()
 
-    for (i in 1:cnt) {
-        cur_grp <- grp_list[[i]]
-
-        print("sat3.1")
-        it_dta <- maptools::spRbind(t_dta[[i]],u_dta[[i]])
-
-        print("sat3.2")
-        if (mtd == "fastNN") {
-            # ***
-            # this is the slow part of functions
-            temp_dta[[i]] <- fastNN_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
-        }
+    if (cnt > 0) {
+        for (i in 1:cnt) {
+            cur_grp <- grp_list[[i]]
+
+            print("sat3.1")
+            it_dta <- maptools::spRbind(t_dta[[i]],u_dta[[i]])
+
+            print("sat3.2")
+            if (mtd == "fastNN") {
+                # ***
+                # this is the slow part of functions
+                temp_dta[[i]] <- fastNN_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+            }
 
-        if (mtd == "NN_WithReplacement") {
-            print("NN with replacement is currently not available, please choose fastNN")
-            # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+            if (mtd == "NN_WithReplacement") {
+                print("NN with replacement is currently not available, please choose fastNN")
+                # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+            }
         }
     }
 

From 7c833b64ef98c692711ef4d7dfba41ca2184209b Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 12:55:00 -0400
Subject: [PATCH 150/212] Test

---
 R/SAT.R | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 2b87c3f..a079503 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -79,6 +79,8 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     temp_dta <- list()
 
     if (cnt > 0) {
+        print("sat3.0")
+
         for (i in 1:cnt) {
             cur_grp <- grp_list[[i]]
 
@@ -97,21 +99,22 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
                 # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
             }
         }
-    }
 
-    print("sat4")
+        print("sat3.1")
+
+        #Build the final datasets from subsets
+        if (cnt > 1) {
+            dta <- temp_dta[[1]]
+            for(k in 2:cnt) {
+                dta  <- maptools::spRbind(dta, temp_dta[[k]])
+            } 
+        } else {
+            dta <- temp_dta[[1]]
+        }
 
-    #Build the final datasets from subsets
-    if (cnt > 1) {
-        dta <- temp_dta[[1]]
-        for(k in 2:cnt) {
-            dta  <- maptools::spRbind(dta, temp_dta[[k]])
-        } 
-    } else {
-        dta <- temp_dta[[1]]
     }
 
-    print("sat5")
+    print("sat4")
 
     if (drop_unmatched == TRUE) {
         dta <- dta[dta@data[,"PSM_match_ID"] != -999,]    
@@ -121,7 +124,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     anc_vars <- strsplit(gsub(" ","",anc_v_int), "+", fixed=TRUE)
     anc_vars <- c(anc_vars[[1]], "PSM_trtProb")
     
-    print("sat6")
+    print("sat5")
   
     #Drop observations according to the selected method
     if (drop_method == "SD") {
@@ -141,11 +144,11 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     #Simplest suggestion of comparing means and checking if .25 SD apart used.
     cnt = 0
 
-    print("sat7")
+    print("sat6")
     
     for (i in 1:length(anc_vars)) {
 
-        print("sat7.0")
+        print("sat6.0")
 
         #gsub to remove any factors()
         ed_v = sub("factor\\(","",anc_vars[i])
@@ -153,7 +156,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
         c_type = class(init_dta@data[[ed_v]])
 
-        print("sat7.1")
+        print("sat6.1")
         if (c_type == "matrix") {
 
             dta@data[,ed_v] <- as.numeric(dta@data[,ed_v])
@@ -163,7 +166,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             c_type = "numeric"
         }
 
-        print("sat7.2")
+        print("sat6.2")
         if ((c_type == "numeric") & (visual == "TRUE")) {
             cnt = cnt + 1
             pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(init_dta, anc_vars[i],"Pre-Balancing: ",simple_out = FALSE)
@@ -213,7 +216,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         }
     }
 
-    print("sat8")
+    print("sat7")
   
     if (visual=="TRUE") {
         #Output graphics

From 8a45f4fa80f3b5723c858686086817939ff4b025 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 13:02:21 -0400
Subject: [PATCH 151/212] Test

---
 R/SAT.R | 69 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 34 insertions(+), 35 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index a079503..c096cd7 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -73,49 +73,48 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     }
 
 
+    if (cnt == 0) {
+        return('drop')
+    }
+
 
     print("sat3")
 
     temp_dta <- list()
 
-    if (cnt > 0) {
-        print("sat3.0")
-
-        for (i in 1:cnt) {
-            cur_grp <- grp_list[[i]]
-
-            print("sat3.1")
-            it_dta <- maptools::spRbind(t_dta[[i]],u_dta[[i]])
-
-            print("sat3.2")
-            if (mtd == "fastNN") {
-                # ***
-                # this is the slow part of functions
-                temp_dta[[i]] <- fastNN_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
-            }
-
-            if (mtd == "NN_WithReplacement") {
-                print("NN with replacement is currently not available, please choose fastNN")
-                # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
-            }
-        }
+    for (i in 1:cnt) {
+        cur_grp <- grp_list[[i]]
 
         print("sat3.1")
+        it_dta <- maptools::spRbind(t_dta[[i]],u_dta[[i]])
 
-        #Build the final datasets from subsets
-        if (cnt > 1) {
-            dta <- temp_dta[[1]]
-            for(k in 2:cnt) {
-                dta  <- maptools::spRbind(dta, temp_dta[[k]])
-            } 
-        } else {
-            dta <- temp_dta[[1]]
+        print("sat3.2")
+        if (mtd == "fastNN") {
+            # ***
+            # this is the slow part of functions
+            temp_dta[[i]] <- fastNN_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
         }
 
+        if (mtd == "NN_WithReplacement") {
+            print("NN with replacement is currently not available, please choose fastNN")
+            # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+        }
     }
 
     print("sat4")
 
+    #Build the final datasets from subsets
+    if (cnt > 1) {
+        dta <- temp_dta[[1]]
+        for(k in 2:cnt) {
+            dta  <- maptools::spRbind(dta, temp_dta[[k]])
+        } 
+    } else {
+        dta <- temp_dta[[1]]
+    }
+
+    print("sat5")
+
     if (drop_unmatched == TRUE) {
         dta <- dta[dta@data[,"PSM_match_ID"] != -999,]    
     }
@@ -124,7 +123,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     anc_vars <- strsplit(gsub(" ","",anc_v_int), "+", fixed=TRUE)
     anc_vars <- c(anc_vars[[1]], "PSM_trtProb")
     
-    print("sat5")
+    print("sat6")
   
     #Drop observations according to the selected method
     if (drop_method == "SD") {
@@ -144,11 +143,11 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     #Simplest suggestion of comparing means and checking if .25 SD apart used.
     cnt = 0
 
-    print("sat6")
+    print("sat7")
     
     for (i in 1:length(anc_vars)) {
 
-        print("sat6.0")
+        print("sat7.0")
 
         #gsub to remove any factors()
         ed_v = sub("factor\\(","",anc_vars[i])
@@ -156,7 +155,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
         c_type = class(init_dta@data[[ed_v]])
 
-        print("sat6.1")
+        print("sat7.1")
         if (c_type == "matrix") {
 
             dta@data[,ed_v] <- as.numeric(dta@data[,ed_v])
@@ -166,7 +165,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             c_type = "numeric"
         }
 
-        print("sat6.2")
+        print("sat7.2")
         if ((c_type == "numeric") & (visual == "TRUE")) {
             cnt = cnt + 1
             pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(init_dta, anc_vars[i],"Pre-Balancing: ",simple_out = FALSE)
@@ -216,7 +215,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         }
     }
 
-    print("sat7")
+    print("sat8")
   
     if (visual=="TRUE") {
         #Output graphics

From 919d1c22261955f7ab087307d6d331835a435c1d Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 19 Oct 2015 13:06:08 -0400
Subject: [PATCH 152/212] Test

---
 R/SAT.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/SAT.R b/R/SAT.R
index c096cd7..3d389dc 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -19,14 +19,16 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             } else {
                 dta$ConstraintGroupSet_Opt <- 1
             }
+
             if (names(constraints)[cst] == "distance") {
                 dist_PSM = as.numeric(constraints["distance"][[1]])
             } else {
                 dist_PSM=NULL
             }
         }
+        
     } else {
-        dta$ConstraintGroupSet_Opt <- 1
+        dta@data[,"ConstraintGroupSet_Opt"] <- 1
         #max the distance threshold by taking the diagonal of the bounding box.
         dist_PSM = NULL
     }

From c46cdbcc6261a21f5e3e77d1d39073a5a39e05f9 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 3 Nov 2015 13:53:02 -0500
Subject: [PATCH 153/212] Test nn issue.

---
 R/SAT.R                | 2 +-
 R/fastNN_binary_func.R | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 3d389dc..7396621 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -7,7 +7,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     drop_method = drop_opts["drop_method"]
     drop_thresh = as.numeric(drop_opts["drop_thresh"])
   
-
+    
 
     print("sat1")
 
diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 20f48c1..67a1b2b 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -21,7 +21,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #Conduct the matching
     treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
     untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0,]
-
+    print(length(treated[[1]]))
+    print(length(untreated[[1]]))
     it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
     dta@data[["match"]] <- -999
     dta@data[["PSM_distance"]] <- -999

From 6b8d97fdedd74eea9eb322bc99d57fea1dfdb70d Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 3 Nov 2015 13:56:07 -0500
Subject: [PATCH 154/212] Test nn issue.

---
 R/fastNN_binary_func.R | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 67a1b2b..9a9090d 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -21,8 +21,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #Conduct the matching
     treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
     untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0,]
-    print(length(treated[[1]]))
-    print(length(untreated[[1]]))
+
     it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
     dta@data[["match"]] <- -999
     dta@data[["PSM_distance"]] <- -999
@@ -48,6 +47,11 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # timer <- proc.time()
 
         #Run the KNN for all neighbors. 
+        print(length(treated[[1]]))
+        summary(treated[["PSM_trtProb"]])
+        print(length(untreated[[1]]))
+        summary(untreated[["PSM_trtProb"]])
+
         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
         # time_list[2] <- round((proc.time() - timer)[3],5)

From 6fcc5d61f1d87762f2ac482d2db248d5c80e1a94 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 3 Nov 2015 14:08:36 -0500
Subject: [PATCH 155/212] Test nn issue.

---
 R/fastNN_binary_func.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 9a9090d..c771e83 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -47,10 +47,10 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # timer <- proc.time()
 
         #Run the KNN for all neighbors. 
-        print(length(treated[[1]]))
-        summary(treated[["PSM_trtProb"]])
-        print(length(untreated[[1]]))
-        summary(untreated[["PSM_trtProb"]])
+        # print(length(treated[[1]]))
+        # summary(treated[["PSM_trtProb"]])
+        # print(length(untreated[[1]]))
+        # summary(untreated[["PSM_trtProb"]])
 
         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         

From 5c33df70aca5077e0d22eaa2b431fb2b62e3dd34 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 6 Nov 2015 11:57:26 -0500
Subject: [PATCH 156/212] test nn

---
 R/fastNN_binary_func.R | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index c771e83..40ff9e9 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -27,14 +27,14 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     dta@data[["PSM_distance"]] <- -999
     dta@data[["PSM_match_ID"]] <- -999
 
-    # print("nn2")
+    print("nn2")
 
     #Calculate a distance decay function
     #to perturb pairs based on their distances.  
     for (j in 1:it_cnt) {
         # time_list <- c()
 
-        # print("nn2.0")
+        print("nn2.0")
         # timer <- proc.time()
 
         # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
@@ -55,14 +55,14 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
         # time_list[2] <- round((proc.time() - timer)[3],5)
-        # print("nn2.2")
+        print("nn2.2")
         # timer <- proc.time()
 
         #Perturb the values based on the distance decay function, if selected.
         if (!is.null(dist_PSM)) {
             for (mC in 1:length(k[[1]])) {
 
-                # print("nn2.2.0")
+                print("nn2.2.0")
 
                 #Calculate the Euclidean Distance between pairs
                 Control_ID = toString(untreated[[ids]][[mC]])
@@ -82,12 +82,12 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
                 x_dist = abs(tCoord[1] - tCoord[2])
                 euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-                # print("nn2.2.1")
+                print("nn2.2.1")
 
                 PSM_score = k[["nn.dist"]][mC]
                 geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-                # print("nn2.2.2")
+                print("nn2.2.2")
 
                 
                 k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
@@ -97,7 +97,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         }
 
         # time_list[3] <- round((proc.time() - timer)[3],5)
-        # print("nn2.3")
+        print("nn2.3")
         # timer <- proc.time()
 
         #Add the matched treatment and control values to the recording data frame
@@ -125,7 +125,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         pair_id = paste(curgrp,j, sep="")
         
         # time_list[4] <- round((proc.time() - timer)[3],5)
-        # print("nn2.4")
+        print("nn2.4")
         # timer <- proc.time()
 
         #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
@@ -138,7 +138,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
         
         # time_list[5] <- round((proc.time() - timer)[3],5)
-        # print("nn2.5")
+        print("nn2.5")
         # timer <- proc.time()
 
         #Drop the paired match out of the iteration matrix 

From 0875a69017e130c7e902b459c93d0ddf1aa91a8c Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 6 Nov 2015 12:00:13 -0500
Subject: [PATCH 157/212] test nn

---
 R/fastNN_binary_func.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 40ff9e9..9ea8262 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -125,13 +125,15 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         pair_id = paste(curgrp,j, sep="")
         
         # time_list[4] <- round((proc.time() - timer)[3],5)
-        print("nn2.4")
+        print("nn2.4a")
         # timer <- proc.time()
 
         #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
         dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
         dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
 
+        print("nn2.4b")
+
         dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
         dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
 

From 17b1bb790f2b73a6c541dd2ed4835ca9a19e5971 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 6 Nov 2015 12:02:22 -0500
Subject: [PATCH 158/212] test nn

---
 R/fastNN_binary_func.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 9ea8262..7b82e46 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -124,15 +124,16 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
         pair_id = paste(curgrp,j, sep="")
         
+
+
         # time_list[4] <- round((proc.time() - timer)[3],5)
-        print("nn2.4a")
+        print("nn2.4x")
         # timer <- proc.time()
 
         #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
         dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
         dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
 
-        print("nn2.4b")
 
         dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
         dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
@@ -140,7 +141,6 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
         
         # time_list[5] <- round((proc.time() - timer)[3],5)
-        print("nn2.5")
         # timer <- proc.time()
 
         #Drop the paired match out of the iteration matrix 

From 7697483d6ee958cd39db7e0e04d45ff2a95cc3d7 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 6 Nov 2015 12:06:54 -0500
Subject: [PATCH 159/212] test nn

---
 R/fastNN_binary_func.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 7b82e46..cc8fd83 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -23,8 +23,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0,]
 
     it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
-    dta@data[["match"]] <- -999
-    dta@data[["PSM_distance"]] <- -999
+    # dta@data[["match"]] <- -999
+    # dta@data[["PSM_distance"]] <- -999
     dta@data[["PSM_match_ID"]] <- -999
 
     print("nn2")
@@ -131,11 +131,11 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # timer <- proc.time()
 
         #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
-        dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
-        dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
+        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
 
 
-        dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
         dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
 
 

From e6076575d378d3c24ca99d7a4be53e97335e2de5 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Mon, 9 Nov 2015 16:20:07 -0500
Subject: [PATCH 160/212] data table test for nn

---
 R/SAT.R                | 249 +++++++++++++++++++++++++++++++++++++++++
 R/fastNN_binary_func.R | 208 +++++++++++++++++++++++++++++++---
 R/loadLibs.R           |   2 +
 3 files changed, 441 insertions(+), 18 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 7396621..1f6d06c 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -1,3 +1,252 @@
+# SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinColName) {
+
+#     dta <- as.data.table(dta@data)
+
+#     #Initialization
+#     pltObjs <- list()
+#     init_dta <- dta
+  
+#     drop_unmatched = drop_opts["drop_unmatched"]
+#     drop_method = drop_opts["drop_method"]
+#     drop_thresh = as.numeric(drop_opts["drop_thresh"])
+  
+    
+
+#     print("sat1")
+
+#     if (!is.null(constraints) && contraints != c()) {
+#         for (cst in 1:length(names(constraints))) {
+#             if (names(constraints)[cst] == "groups") {
+#                 dta[,"ConstraintGroupSet_Opt"] <- dta[,get(constraints["groups"])]
+
+#             } else {
+#                 dta[,"ConstraintGroupSet_Opt"] <- 1
+#             }
+
+#             if (names(constraints)[cst] == "distance") {
+#                 dist_PSM = as.numeric(constraints["distance"][[1]])
+#             } else {
+#                 dist_PSM=NULL
+#             }
+#         }
+        
+#     } else {
+#         dta[,"ConstraintGroupSet_Opt"] <- 1
+#         #max the distance threshold by taking the diagonal of the bounding box.
+#         dist_PSM = NULL
+#     }
+
+
+
+#     print("sat2")
+
+#     #Caclulate the number of groups to constrain by, if any.
+#     group_constraints <- unique(dta[,'ConstraintGroupSet_Opt']
+  
+#     #Make sure there are both treatment and control groups of an adequate size (>= 1 of each)
+#     t_dta <- list()
+#     u_dta <-list()
+#     grp_list <- list()
+#     cnt = 0
+
+#     for (grp in 1:length(group_constraints)) {
+#         cur_grp <- as.matrix(group_constraints)[grp]
+#         grp_index = length(grp_list)+1
+#         t_index = length(t_dta)+1
+#         grp_list[[grp_index]] <- as.matrix(group_constraints)[grp]
+
+#         t_dta[[t_index]] <- dta[TrtBin == 1]
+#         u_dta[[t_index]] <- dta[TrtBin == 0]
+
+#         has_treated <- cur_grp %in% t_dta[[t_index]][,'ConstraintGroupSet_Opt']
+#         has_untreated <- cur_grp %in% u_dta[[t_index]][,'ConstraintGroupSet_Opt']
+
+#         if ((has_untreated == FALSE) || (has_treated == FALSE)) {
+#             dta <- dta['ConstraintGroupSet_Opt' != cur_grp]
+#             t_dta[[t_index]] <- NULL
+#             u_dta[[t_index]] <- NULL
+#             grp_list[[t_index]] <- NULL
+#             war_statement = paste("Dropped group due to a lack of both treatment and control observation: '",cur_grp,"'",sep="")
+#             warning(war_statement)
+
+#         } else { 
+#             t_dta[[t_index]] <- t_dta[[t_index]][ConstraintGroupSet_Opt == (cur_grp)]
+#             u_dta[[t_index]] <- u_dta[[t_index]][ConstraintGroupSet_Opt == (cur_grp)]
+
+#             cnt = cnt + 1
+#         }
+#     }
+
+
+#     if (cnt == 0) {
+#         return('drop')
+#     }
+
+
+#     print("sat3")
+
+#     temp_dta <- list()
+
+#     for (i in 1:cnt) {
+#         cur_grp <- grp_list[[i]]
+
+#         print("sat3.1")
+#         it_dta <- maptools::spRbind(t_dta[[i]],u_dta[[i]])
+
+#         print("sat3.2")
+#         if (mtd == "fastNN") {
+#             # ***
+#             # this is the slow part of functions
+#             temp_dta[[i]] <- fastNN_binary_func(it_dta, TrtBinColName, ids, cur_grp, dist_PSM) 
+#         }
+
+#         if (mtd == "NN_WithReplacement") {
+#             print("NN with replacement is currently not available, please choose fastNN")
+#             # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+#         }
+#     }
+
+#     print("sat4")
+
+#     #Build the final datasets from subsets
+#     if (cnt > 1) {
+#         dta <- temp_dta[[1]]
+#         for(k in 2:cnt) {
+#             dta  <- maptools::spRbind(dta, temp_dta[[k]])
+#         } 
+#     } else {
+#         dta <- temp_dta[[1]]
+#     }
+
+#     print("sat5")
+
+#     if (drop_unmatched == TRUE) {
+#         dta <- dta["PSM_match_ID" != -999]    
+#     }
+  
+#     anc_v_int <- strsplit(psm_eq, "~")[[1]][2]
+#     anc_vars <- strsplit(gsub(" ","",anc_v_int), "+", fixed=TRUE)
+#     anc_vars <- c(anc_vars[[1]], "PSM_trtProb")
+    
+#     print("sat6")
+  
+#     #Drop observations according to the selected method
+#     if (drop_method == "SD") {
+#         #Method to drop pairs that are greater than a set threshold apart in terms of PSM Standard Deviations.
+#         psm_sd_thresh = sd(dta[,"PSM_trtProb"]) * drop_thresh
+#         if (visual == "TRUE") {
+#             print(psm_sd_thresh)
+#         }
+#         dta <- dta["PSM_distance" < psm_sd_thresh]
+#     }
+  
+
+
+#     #Plot the pre and post-dropping balance for PSM model...
+#     #Balance metrics are based on "Misunderstandings between experimentalists and
+#     #observationalists about causal inference", Imal, King, and Stuart.
+#     #Simplest suggestion of comparing means and checking if .25 SD apart used.
+#     cnt = 0
+
+#     print("sat7")
+    
+#     for (i in 1:length(anc_vars)) {
+
+#         print("sat7.0")
+
+#         #gsub to remove any factors()
+#         ed_v = sub("factor\\(","",anc_vars[i])
+#         ed_v = sub(")","",ed_v)
+
+#         c_type = class(init_dta[[ed_v]])
+
+#         print("sat7.1")
+#         if (c_type == "matrix") {
+
+#             dta[,ed_v] <- as.numeric(dta[,ed_v])
+
+#             init_dta[,ed_v] <- as.numeric(init_dta[,ed_v])
+
+#             c_type = "numeric"
+#         }
+
+#         print("sat7.2")
+#         if ((c_type == "numeric") & (visual == "TRUE")) {
+#             cnt = cnt + 1
+#             pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(init_dta, anc_vars[i],"Pre-Balancing: ",simple_out = FALSE)
+#             pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(dta, anc_vars[i],"Post-Balancing: ",simple_out = FALSE)  
+
+
+#             treat_mean_pre <- round(describeBy(init_dta[[ed_v]], group=init_dta[[TrtBinColName]])[[2]][[3]], 5)
+#             treat_SD_pre <- round(describeBy(init_dta[[ed_v]], group=init_dta[[TrtBinColName]])[[2]][[4]], 5)
+#             control_mean_pre <- round(describeBy(init_dta[[ed_v]], group=init_dta[[TrtBinColName]])[[1]][[3]], 5)
+#             control_SD_pre <- round(describeBy(init_dta[[ed_v]], group=init_dta[[TrtBinColName]])[[1]][[4]], 5)
+
+#             treat_mean_post <- round(describeBy(dta[[ed_v]], group=dta[[TrtBinColName]])[[2]][[3]], 5)
+#             treat_SD_post <- round(describeBy(dta[[ed_v]], group=dta[[TrtBinColName]])[[2]][[4]], 5)
+#             control_mean_post <- round(describeBy(dta[[ed_v]], group=dta[[TrtBinColName]])[[1]][[3]], 5)
+#             control_SD_post <- round(describeBy(dta[[ed_v]], group=dta[[TrtBinColName]])[[1]][[4]], 5)
+
+
+#             it_diff_Mean_pre <- round(abs( treat_mean_pre-control_mean_pre ),5)
+#             it_diff_Mean_post <- round(abs(treat_mean_post-control_mean_post),5)
+
+#             if (!exists("bRes")) {
+
+#                 bRes <- data.frame(treat_mean_pre,treat_SD_pre,control_mean_pre,control_SD_pre,
+#                            treat_mean_post,treat_SD_post,control_mean_post,control_SD_post,
+#                            it_diff_Mean_pre,it_diff_Mean_post)
+
+#                 colnames(bRes)[1] <- "Pre-Balance Treated Mean"
+#                 colnames(bRes)[2] <- "Pre-Balance Treated SD"
+#                 colnames(bRes)[3] <- "Pre-Balance Control Mean"
+#                 colnames(bRes)[4] <- "Pre-Balance Control SD"
+
+#                 colnames(bRes)[5] <- "Post-Balance Treated Mean"
+#                 colnames(bRes)[6] <- "Post-Balance Treated SD"
+#                 colnames(bRes)[7] <- "Post-Balance Control Mean"
+#                 colnames(bRes)[8] <- "Post-Balance Control SD"
+
+#                 colnames(bRes)[9] <- "Mean Difference Pre-Balance"
+#                 colnames(bRes)[10] <- "Mean Difference Post-Balance"
+
+#             } else {
+#                 bRes <- rbind(bRes, c(treat_mean_pre,treat_SD_pre,control_mean_pre,control_SD_pre,
+#                               treat_mean_post,treat_SD_post,control_mean_post,control_SD_post,
+#                               it_diff_Mean_pre,it_diff_Mean_post))
+#             }
+      
+#             rownames(bRes)[i-(i-cnt)] <- gsub("[^a-zA-Z0-9]", "", ed_v)
+#         }
+#     }
+
+#     print("sat8")
+  
+#     if (visual=="TRUE") {
+#         #Output graphics
+#         #Remove the factor rows
+#         nrow_c <- length(pltObjs)
+#         counter <- 1
+#         while (counter <= nrow_c) {
+#             d = counter + 3
+#             if (d > nrow_c) {
+#                 d = nrow_c
+#             }
+#             do.call(grid.arrange,c(pltObjs[counter:d],nrow=2,ncol=2))
+#             counter = counter + 4
+#         }
+#         #bTab <- stargazer(bRes,summary=FALSE,type="html")
+#         #print.htmlTable(bTab)
+#     }
+  
+
+#     return (as.data.frame(dta))
+# }
+
+
+
+
+
 SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinColName) {
     #Initialization
     pltObjs <- list()
diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index cc8fd83..e4d649f 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -8,19 +8,21 @@
 fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     print("nn1.0")
-    timerx <- proc.time()
+    # timerx <- proc.time()
 
+    
 
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
-    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+    sorted_dta <- as.data.table(dta@data)
+    sorted_dta <- sorted_dta[order("PSM_trtProb")][, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
 
 
     #Conduct the matching
-    treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
-    untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0,]
+    treated <- sorted_dta[get(trtMntVar) == 1]
+    untreated <- sorted_dta[get(trtMntVar) == 0]
 
     it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
     # dta@data[["match"]] <- -999
@@ -52,7 +54,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # print(length(untreated[[1]]))
         # summary(untreated[["PSM_trtProb"]])
 
-        k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
+        k <- get.knnx(treated[, PSM_trtProb], untreated[, PSM_trtProb], 1)
         
         # time_list[2] <- round((proc.time() - timer)[3],5)
         print("nn2.2")
@@ -65,11 +67,11 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
                 print("nn2.2.0")
 
                 #Calculate the Euclidean Distance between pairs
-                Control_ID = toString(untreated[[ids]][[mC]])
+                Control_ID = toString(untreated[mC, get(ids)])
 
                 mT = k[["nn.index"]][mC]
                 
-                Treatment_ID = toString(treated[[ids]][[mT]])
+                Treatment_ID = toString(treated[mT, get(ids)])
 
                 #Find the control x,y location
                 cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
@@ -108,17 +110,17 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         best_m_treated = k[["nn.index"]][best_m_control]
         
 
-        #Control PSM ID
-        cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        Control_ID = toString(eval(parse(text=cid_txt)))
+        #Control and Treatment PSM ID
+        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        # Control_ID = toString(eval(parse(text=cid_txt)))
             
-        #Treatment PSM ID
-        tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        Treatment_ID = toString(eval(parse(text=tid_txt)))
+        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        # Treatment_ID = toString(eval(parse(text=tid_txt)))
+
 
         #Control PSM ID and Treatment PSM ID
-        # Control_ID = toString(untreated[,ids][best_m_control])
-        # Treatment_ID = toString(treated[,ids][best_m_treated])
+        Control_ID = toString(untreated[best_m_control, get(ids)])
+        Treatment_ID = toString(treated[best_m_control, get(ids)])
 
 
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
@@ -149,17 +151,187 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     
         # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-        treated <- treated[which(treated[[ids]] != Treatment_ID),]
-        untreated <- untreated[which(untreated[[ids]] != Control_ID),]
+        treated <- treated[get(ids) != (Treatment_ID)]
+        untreated <- untreated[get(ids) != (Control_ID)]
         
         # time_list[6] <- round((proc.time() - timer)[3],5)
         # print(paste(time_list))
 
     }
 
-    print((proc.time() - timerx)[3])
+    # print((proc.time() - timerx)[3])
 
     return(dta) 
 
 }
 
+
+
+
+
+# #FastNN 
+# #Algorithm to find a hopefully near-optimal match of pairs
+# #In a treatment and control group
+# #Works by first ordering by the propensity score matching value,
+# #and then working through this list in order from highest to lowest.
+# #Matches are removed each step.
+
+# fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
+
+#     print("nn1.0")
+#     timerx <- proc.time()
+
+    
+
+#     #Fast nearest neighbors search - will not arrive at optimum,
+#     #but this may not be an issue for many analysis.
+#     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+
+#     sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+
+
+#     #Conduct the matching
+#     treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
+#     untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0,]
+
+#     it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+#     # dta@data[["match"]] <- -999
+#     # dta@data[["PSM_distance"]] <- -999
+#     dta@data[["PSM_match_ID"]] <- -999
+
+#     print("nn2")
+
+#     #Calculate a distance decay function
+#     #to perturb pairs based on their distances.  
+#     for (j in 1:it_cnt) {
+#         # time_list <- c()
+
+#         print("nn2.0")
+#         # timer <- proc.time()
+
+#         # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
+#         # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
+        
+#         # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
+
+#         # time_list[1] <- round((proc.time() - timer)[3],5)
+#         # print("nn2.1")
+#         # timer <- proc.time()
+
+#         #Run the KNN for all neighbors. 
+#         # print(length(treated[[1]]))
+#         # summary(treated[["PSM_trtProb"]])
+#         # print(length(untreated[[1]]))
+#         # summary(untreated[["PSM_trtProb"]])
+
+#         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
+        
+#         # time_list[2] <- round((proc.time() - timer)[3],5)
+#         print("nn2.2")
+#         # timer <- proc.time()
+
+#         #Perturb the values based on the distance decay function, if selected.
+#         if (!is.null(dist_PSM)) {
+#             for (mC in 1:length(k[[1]])) {
+
+#                 print("nn2.2.0")
+
+#                 #Calculate the Euclidean Distance between pairs
+#                 Control_ID = toString(untreated[[ids]][[mC]])
+
+#                 mT = k[["nn.index"]][mC]
+                
+#                 Treatment_ID = toString(treated[[ids]][[mT]])
+
+#                 #Find the control x,y location
+#                 cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+                
+
+#                 #Find the treatment x,y location
+#                 tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+
+#                 y_dist = abs(cCoord[1] - cCoord[2])
+#                 x_dist = abs(tCoord[1] - tCoord[2])
+#                 euc_dist = sqrt(y_dist^2 + x_dist^2)
+                
+#                 print("nn2.2.1")
+
+#                 PSM_score = k[["nn.dist"]][mC]
+#                 geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+
+#                 print("nn2.2.2")
+
+                
+#                 k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
+
+#             }
+      
+#         }
+
+#         # time_list[3] <- round((proc.time() - timer)[3],5)
+#         print("nn2.3")
+#         # timer <- proc.time()
+
+#         #Add the matched treatment and control values to the recording data frame
+#         #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+#         best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
+        
+#         #This will give us the matched index in the "treated" dataset.
+#         best_m_treated = k[["nn.index"]][best_m_control]
+        
+
+#         #Control PSM ID
+#         cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+#         Control_ID = toString(eval(parse(text=cid_txt)))
+            
+#         #Treatment PSM ID
+#         tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+#         Treatment_ID = toString(eval(parse(text=tid_txt)))
+
+#         #Control PSM ID and Treatment PSM ID
+#         # Control_ID = toString(untreated[,ids][best_m_control])
+#         # Treatment_ID = toString(treated[,ids][best_m_treated])
+
+
+#         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+#         pair_id = paste(curgrp,j, sep="")
+        
+
+
+#         # time_list[4] <- round((proc.time() - timer)[3],5)
+#         print("nn2.4x")
+#         # timer <- proc.time()
+
+#         #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
+#         # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+#         # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
+
+
+#         # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+#         dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
+
+
+        
+#         # time_list[5] <- round((proc.time() - timer)[3],5)
+#         # timer <- proc.time()
+
+#         #Drop the paired match out of the iteration matrix 
+#         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+#         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
+    
+#         # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+
+#         treated <- treated[which(treated[[ids]] != Treatment_ID),]
+#         untreated <- untreated[which(untreated[[ids]] != Control_ID),]
+        
+#         # time_list[6] <- round((proc.time() - timer)[3],5)
+#         # print(paste(time_list))
+
+#     }
+
+#     print((proc.time() - timerx)[3])
+
+#     return(dta) 
+
+# }
+
diff --git a/R/loadLibs.R b/R/loadLibs.R
index 2eb2d52..cff4216 100644
--- a/R/loadLibs.R
+++ b/R/loadLibs.R
@@ -19,4 +19,6 @@ loadLibs <- function (x=1) {
   library(stargazer)
   library(lmtest)
   library(multiwayvcov)
+  
+  library(data.table)
 }

From 77a292ed80618883d22664fb908e1178e762616b Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 09:16:30 -0500
Subject: [PATCH 161/212] data table test for nn

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index e4d649f..ecf56ee 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -17,7 +17,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
     sorted_dta <- as.data.table(dta@data)
-    sorted_dta <- sorted_dta[order("PSM_trtProb")][, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
+    sorted_dta <- sorted_dta[order(PSM_trtProb)][, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
 
 
     #Conduct the matching

From db5cb1183108fbaa7fa776feb551048f83f8b5ab Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 09:20:58 -0500
Subject: [PATCH 162/212] data table test for nn

---
 R/fastNN_binary_func.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index ecf56ee..bea8b0f 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -17,6 +17,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
     sorted_dta <- as.data.table(dta@data)
+
+    print(colnames(sorted_dta))
     sorted_dta <- sorted_dta[order(PSM_trtProb)][, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
 
 

From f7ea0e64eb37707eda8ac73358f9d7ee87d009d4 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 09:22:11 -0500
Subject: [PATCH 163/212] data table test for nn

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index bea8b0f..8f28851 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -18,8 +18,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     sorted_dta <- as.data.table(dta@data)
 
-    print(colnames(sorted_dta))
     sorted_dta <- sorted_dta[order(PSM_trtProb)][, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
+    print(colnames(sorted_dta))
 
 
     #Conduct the matching

From 8c15100195bbf9f8982e181ad95d6a2468ab8147 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 09:24:13 -0500
Subject: [PATCH 164/212] data table test for nn

---
 R/fastNN_binary_func.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 8f28851..0d70ab0 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -18,10 +18,13 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     sorted_dta <- as.data.table(dta@data)
 
-    sorted_dta <- sorted_dta[order(PSM_trtProb)][, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
+    sorted_dta <- sorted_dta[order(PSM_trtProb)]
     print(colnames(sorted_dta))
 
 
+    sorted_dta <- sorted_dta[, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
+
+
     #Conduct the matching
     treated <- sorted_dta[get(trtMntVar) == 1]
     untreated <- sorted_dta[get(trtMntVar) == 0]

From 642344d70c2be0da4618dc54a8bfe183f7fff33d Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 09:28:22 -0500
Subject: [PATCH 165/212] data table test for nn

---
 R/SAT.R                |   2 +-
 R/fastNN_binary_func.R | 388 ++++++++++++++++++++---------------------
 2 files changed, 195 insertions(+), 195 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 1f6d06c..96bf259 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -343,7 +343,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         if (mtd == "fastNN") {
             # ***
             # this is the slow part of functions
-            temp_dta[[i]] <- fastNN_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+            temp_dta[[i]] <- fastNN_binary_func(it_dta, TrtBinColName, ids, cur_grp, dist_PSM) 
         }
 
         if (mtd == "NN_WithReplacement") {
diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 0d70ab0..8d9f5e3 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -1,179 +1,3 @@
-#FastNN 
-#Algorithm to find a hopefully near-optimal match of pairs
-#In a treatment and control group
-#Works by first ordering by the propensity score matching value,
-#and then working through this list in order from highest to lowest.
-#Matches are removed each step.
-
-fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
-
-    print("nn1.0")
-    # timerx <- proc.time()
-
-    
-
-    #Fast nearest neighbors search - will not arrive at optimum,
-    #but this may not be an issue for many analysis.
-    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
-
-    sorted_dta <- as.data.table(dta@data)
-
-    sorted_dta <- sorted_dta[order(PSM_trtProb)]
-    print(colnames(sorted_dta))
-
-
-    sorted_dta <- sorted_dta[, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
-
-
-    #Conduct the matching
-    treated <- sorted_dta[get(trtMntVar) == 1]
-    untreated <- sorted_dta[get(trtMntVar) == 0]
-
-    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
-    # dta@data[["match"]] <- -999
-    # dta@data[["PSM_distance"]] <- -999
-    dta@data[["PSM_match_ID"]] <- -999
-
-    print("nn2")
-
-    #Calculate a distance decay function
-    #to perturb pairs based on their distances.  
-    for (j in 1:it_cnt) {
-        # time_list <- c()
-
-        print("nn2.0")
-        # timer <- proc.time()
-
-        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
-        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
-        
-        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
-
-        # time_list[1] <- round((proc.time() - timer)[3],5)
-        # print("nn2.1")
-        # timer <- proc.time()
-
-        #Run the KNN for all neighbors. 
-        # print(length(treated[[1]]))
-        # summary(treated[["PSM_trtProb"]])
-        # print(length(untreated[[1]]))
-        # summary(untreated[["PSM_trtProb"]])
-
-        k <- get.knnx(treated[, PSM_trtProb], untreated[, PSM_trtProb], 1)
-        
-        # time_list[2] <- round((proc.time() - timer)[3],5)
-        print("nn2.2")
-        # timer <- proc.time()
-
-        #Perturb the values based on the distance decay function, if selected.
-        if (!is.null(dist_PSM)) {
-            for (mC in 1:length(k[[1]])) {
-
-                print("nn2.2.0")
-
-                #Calculate the Euclidean Distance between pairs
-                Control_ID = toString(untreated[mC, get(ids)])
-
-                mT = k[["nn.index"]][mC]
-                
-                Treatment_ID = toString(treated[mT, get(ids)])
-
-                #Find the control x,y location
-                cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
-                
-
-                #Find the treatment x,y location
-                tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
-
-                y_dist = abs(cCoord[1] - cCoord[2])
-                x_dist = abs(tCoord[1] - tCoord[2])
-                euc_dist = sqrt(y_dist^2 + x_dist^2)
-                
-                print("nn2.2.1")
-
-                PSM_score = k[["nn.dist"]][mC]
-                geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
-
-                print("nn2.2.2")
-
-                
-                k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
-
-            }
-      
-        }
-
-        # time_list[3] <- round((proc.time() - timer)[3],5)
-        print("nn2.3")
-        # timer <- proc.time()
-
-        #Add the matched treatment and control values to the recording data frame
-        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
-        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
-        
-        #This will give us the matched index in the "treated" dataset.
-        best_m_treated = k[["nn.index"]][best_m_control]
-        
-
-        #Control and Treatment PSM ID
-        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        # Control_ID = toString(eval(parse(text=cid_txt)))
-            
-        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        # Treatment_ID = toString(eval(parse(text=tid_txt)))
-
-
-        #Control PSM ID and Treatment PSM ID
-        Control_ID = toString(untreated[best_m_control, get(ids)])
-        Treatment_ID = toString(treated[best_m_control, get(ids)])
-
-
-        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-        pair_id = paste(curgrp,j, sep="")
-        
-
-
-        # time_list[4] <- round((proc.time() - timer)[3],5)
-        print("nn2.4x")
-        # timer <- proc.time()
-
-        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
-        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
-        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
-
-
-        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
-        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
-
-
-        
-        # time_list[5] <- round((proc.time() - timer)[3],5)
-        # timer <- proc.time()
-
-        #Drop the paired match out of the iteration matrix 
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
-    
-        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
-
-        treated <- treated[get(ids) != (Treatment_ID)]
-        untreated <- untreated[get(ids) != (Control_ID)]
-        
-        # time_list[6] <- round((proc.time() - timer)[3],5)
-        # print(paste(time_list))
-
-    }
-
-    # print((proc.time() - timerx)[3])
-
-    return(dta) 
-
-}
-
-
-
-
-
 # #FastNN 
 # #Algorithm to find a hopefully near-optimal match of pairs
 # #In a treatment and control group
@@ -184,7 +8,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 # fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 #     print("nn1.0")
-#     timerx <- proc.time()
+#     # timerx <- proc.time()
 
     
 
@@ -192,12 +16,18 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 #     #but this may not be an issue for many analysis.
 #     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
-#     sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+#     sorted_dta <- as.data.table(dta@data)
+
+#     sorted_dta <- sorted_dta[order(PSM_trtProb)]
+#     print(colnames(sorted_dta))
+
+
+#     sorted_dta <- sorted_dta[, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
 
 
 #     #Conduct the matching
-#     treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
-#     untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0,]
+#     treated <- sorted_dta[get(trtMntVar) == 1]
+#     untreated <- sorted_dta[get(trtMntVar) == 0]
 
 #     it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
 #     # dta@data[["match"]] <- -999
@@ -229,7 +59,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 #         # print(length(untreated[[1]]))
 #         # summary(untreated[["PSM_trtProb"]])
 
-#         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
+#         k <- get.knnx(treated[, PSM_trtProb], untreated[, PSM_trtProb], 1)
         
 #         # time_list[2] <- round((proc.time() - timer)[3],5)
 #         print("nn2.2")
@@ -242,11 +72,11 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 #                 print("nn2.2.0")
 
 #                 #Calculate the Euclidean Distance between pairs
-#                 Control_ID = toString(untreated[[ids]][[mC]])
+#                 Control_ID = toString(untreated[mC, get(ids)])
 
 #                 mT = k[["nn.index"]][mC]
                 
-#                 Treatment_ID = toString(treated[[ids]][[mT]])
+#                 Treatment_ID = toString(treated[mT, get(ids)])
 
 #                 #Find the control x,y location
 #                 cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
@@ -285,17 +115,17 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 #         best_m_treated = k[["nn.index"]][best_m_control]
         
 
-#         #Control PSM ID
-#         cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-#         Control_ID = toString(eval(parse(text=cid_txt)))
+#         #Control and Treatment PSM ID
+#         # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+#         # Control_ID = toString(eval(parse(text=cid_txt)))
             
-#         #Treatment PSM ID
-#         tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-#         Treatment_ID = toString(eval(parse(text=tid_txt)))
+#         # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+#         # Treatment_ID = toString(eval(parse(text=tid_txt)))
+
 
 #         #Control PSM ID and Treatment PSM ID
-#         # Control_ID = toString(untreated[,ids][best_m_control])
-#         # Treatment_ID = toString(treated[,ids][best_m_treated])
+#         Control_ID = toString(untreated[best_m_control, get(ids)])
+#         Treatment_ID = toString(treated[best_m_control, get(ids)])
 
 
 #         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
@@ -326,17 +156,187 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     
 #         # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-#         treated <- treated[which(treated[[ids]] != Treatment_ID),]
-#         untreated <- untreated[which(untreated[[ids]] != Control_ID),]
+#         treated <- treated[get(ids) != (Treatment_ID)]
+#         untreated <- untreated[get(ids) != (Control_ID)]
         
 #         # time_list[6] <- round((proc.time() - timer)[3],5)
 #         # print(paste(time_list))
 
 #     }
 
-#     print((proc.time() - timerx)[3])
+#     # print((proc.time() - timerx)[3])
 
 #     return(dta) 
 
 # }
 
+
+
+
+
+#FastNN 
+#Algorithm to find a hopefully near-optimal match of pairs
+#In a treatment and control group
+#Works by first ordering by the propensity score matching value,
+#and then working through this list in order from highest to lowest.
+#Matches are removed each step.
+
+fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
+
+    print("nn1.0")
+    timerx <- proc.time()
+
+    
+
+    #Fast nearest neighbors search - will not arrive at optimum,
+    #but this may not be an issue for many analysis.
+    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+
+    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+
+
+    #Conduct the matching
+    treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
+    untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0,]
+
+    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+    # dta@data[["match"]] <- -999
+    # dta@data[["PSM_distance"]] <- -999
+    dta@data[["PSM_match_ID"]] <- -999
+
+    print("nn2")
+
+    #Calculate a distance decay function
+    #to perturb pairs based on their distances.  
+    for (j in 1:it_cnt) {
+        # time_list <- c()
+
+        print("nn2.0")
+        # timer <- proc.time()
+
+        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
+        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
+        
+        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
+
+        # time_list[1] <- round((proc.time() - timer)[3],5)
+        # print("nn2.1")
+        # timer <- proc.time()
+
+        #Run the KNN for all neighbors. 
+        # print(length(treated[[1]]))
+        # summary(treated[["PSM_trtProb"]])
+        # print(length(untreated[[1]]))
+        # summary(untreated[["PSM_trtProb"]])
+
+        k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
+        
+        # time_list[2] <- round((proc.time() - timer)[3],5)
+        print("nn2.2")
+        # timer <- proc.time()
+
+        #Perturb the values based on the distance decay function, if selected.
+        if (!is.null(dist_PSM)) {
+            for (mC in 1:length(k[[1]])) {
+
+                print("nn2.2.0")
+
+                #Calculate the Euclidean Distance between pairs
+                Control_ID = toString(untreated[[ids]][[mC]])
+
+                mT = k[["nn.index"]][mC]
+                
+                Treatment_ID = toString(treated[[ids]][[mT]])
+
+                #Find the control x,y location
+                cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+                
+
+                #Find the treatment x,y location
+                tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+
+                y_dist = abs(cCoord[1] - cCoord[2])
+                x_dist = abs(tCoord[1] - tCoord[2])
+                euc_dist = sqrt(y_dist^2 + x_dist^2)
+                
+                print("nn2.2.1")
+
+                PSM_score = k[["nn.dist"]][mC]
+                geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+
+                print("nn2.2.2")
+
+                
+                k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
+
+            }
+      
+        }
+
+        # time_list[3] <- round((proc.time() - timer)[3],5)
+        print("nn2.3")
+        # timer <- proc.time()
+
+        #Add the matched treatment and control values to the recording data frame
+        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
+        
+        #This will give us the matched index in the "treated" dataset.
+        best_m_treated = k[["nn.index"]][best_m_control]
+        
+
+        #Control PSM ID
+        cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        Control_ID = toString(eval(parse(text=cid_txt)))
+            
+        #Treatment PSM ID
+        tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        Treatment_ID = toString(eval(parse(text=tid_txt)))
+
+        #Control PSM ID and Treatment PSM ID
+        # Control_ID = toString(untreated[,ids][best_m_control])
+        # Treatment_ID = toString(treated[,ids][best_m_treated])
+
+
+        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+        pair_id = paste(curgrp,j, sep="")
+        
+
+
+        # time_list[4] <- round((proc.time() - timer)[3],5)
+        print("nn2.4x")
+        # timer <- proc.time()
+
+        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
+        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
+
+
+        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
+
+
+        
+        # time_list[5] <- round((proc.time() - timer)[3],5)
+        # timer <- proc.time()
+
+        #Drop the paired match out of the iteration matrix 
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
+    
+        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+
+        treated <- treated[which(treated[[ids]] != Treatment_ID),]
+        untreated <- untreated[which(untreated[[ids]] != Control_ID),]
+        
+        # time_list[6] <- round((proc.time() - timer)[3],5)
+        # print(paste(time_list))
+
+    }
+
+    print((proc.time() - timerx)[3])
+
+    return(dta) 
+
+}
+

From 2ef7491c722bd2408eecc0136b5a8a415823c26f Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 11:44:40 -0500
Subject: [PATCH 166/212] data table test for nn

---
 R/fastNN_binary_func.R | 388 ++++++++++++++++++++---------------------
 1 file changed, 194 insertions(+), 194 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 8d9f5e3..ac94827 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -1,3 +1,179 @@
+#FastNN 
+#Algorithm to find a hopefully near-optimal match of pairs
+#In a treatment and control group
+#Works by first ordering by the propensity score matching value,
+#and then working through this list in order from highest to lowest.
+#Matches are removed each step.
+
+fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
+
+    print("nn1.0")
+    # timerx <- proc.time()
+
+    vvv <<- sorted_dta
+
+    #Fast nearest neighbors search - will not arrive at optimum,
+    #but this may not be an issue for many analysis.
+    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+
+    sorted_dta <- as.data.table(dta@data)
+
+    sorted_dta <- sorted_dta[order(PSM_trtProb)]
+    print(colnames(sorted_dta))
+
+
+    sorted_dta <- sorted_dta[, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
+
+
+    #Conduct the matching
+    treated <- sorted_dta[get(trtMntVar) == 1]
+    untreated <- sorted_dta[get(trtMntVar) == 0]
+
+    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+    # dta@data[["match"]] <- -999
+    # dta@data[["PSM_distance"]] <- -999
+    dta@data[["PSM_match_ID"]] <- -999
+
+    print("nn2")
+
+    #Calculate a distance decay function
+    #to perturb pairs based on their distances.  
+    for (j in 1:it_cnt) {
+        # time_list <- c()
+
+        print("nn2.0")
+        # timer <- proc.time()
+
+        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
+        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
+        
+        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
+
+        # time_list[1] <- round((proc.time() - timer)[3],5)
+        # print("nn2.1")
+        # timer <- proc.time()
+
+        #Run the KNN for all neighbors. 
+        # print(length(treated[[1]]))
+        # summary(treated[["PSM_trtProb"]])
+        # print(length(untreated[[1]]))
+        # summary(untreated[["PSM_trtProb"]])
+
+        k <- get.knnx(treated[, PSM_trtProb], untreated[, PSM_trtProb], 1)
+        
+        # time_list[2] <- round((proc.time() - timer)[3],5)
+        print("nn2.2")
+        # timer <- proc.time()
+
+        #Perturb the values based on the distance decay function, if selected.
+        if (!is.null(dist_PSM)) {
+            for (mC in 1:length(k[[1]])) {
+
+                print("nn2.2.0")
+
+                #Calculate the Euclidean Distance between pairs
+                Control_ID = toString(untreated[mC, get(ids)])
+
+                mT = k[["nn.index"]][mC]
+                
+                Treatment_ID = toString(treated[mT, get(ids)])
+
+                #Find the control x,y location
+                cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+                
+
+                #Find the treatment x,y location
+                tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+
+                y_dist = abs(cCoord[1] - cCoord[2])
+                x_dist = abs(tCoord[1] - tCoord[2])
+                euc_dist = sqrt(y_dist^2 + x_dist^2)
+                
+                print("nn2.2.1")
+
+                PSM_score = k[["nn.dist"]][mC]
+                geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+
+                print("nn2.2.2")
+
+                
+                k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
+
+            }
+      
+        }
+
+        # time_list[3] <- round((proc.time() - timer)[3],5)
+        print("nn2.3")
+        # timer <- proc.time()
+
+        #Add the matched treatment and control values to the recording data frame
+        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
+        
+        #This will give us the matched index in the "treated" dataset.
+        best_m_treated = k[["nn.index"]][best_m_control]
+        
+
+        #Control and Treatment PSM ID
+        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        # Control_ID = toString(eval(parse(text=cid_txt)))
+            
+        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        # Treatment_ID = toString(eval(parse(text=tid_txt)))
+
+
+        #Control PSM ID and Treatment PSM ID
+        Control_ID = toString(untreated[best_m_control, get(ids)])
+        Treatment_ID = toString(treated[best_m_control, get(ids)])
+
+
+        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+        pair_id = paste(curgrp,j, sep="")
+        
+
+
+        # time_list[4] <- round((proc.time() - timer)[3],5)
+        print("nn2.4x")
+        # timer <- proc.time()
+
+        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
+        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
+
+
+        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
+
+
+        
+        # time_list[5] <- round((proc.time() - timer)[3],5)
+        # timer <- proc.time()
+
+        #Drop the paired match out of the iteration matrix 
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
+    
+        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+
+        treated <- treated[get(ids) != (Treatment_ID)]
+        untreated <- untreated[get(ids) != (Control_ID)]
+        
+        # time_list[6] <- round((proc.time() - timer)[3],5)
+        # print(paste(time_list))
+
+    }
+
+    # print((proc.time() - timerx)[3])
+
+    return(dta) 
+
+}
+
+
+
+
+
 # #FastNN 
 # #Algorithm to find a hopefully near-optimal match of pairs
 # #In a treatment and control group
@@ -8,7 +184,7 @@
 # fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 #     print("nn1.0")
-#     # timerx <- proc.time()
+#     timerx <- proc.time()
 
     
 
@@ -16,18 +192,12 @@
 #     #but this may not be an issue for many analysis.
 #     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
-#     sorted_dta <- as.data.table(dta@data)
-
-#     sorted_dta <- sorted_dta[order(PSM_trtProb)]
-#     print(colnames(sorted_dta))
-
-
-#     sorted_dta <- sorted_dta[, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
+#     sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
 
 
 #     #Conduct the matching
-#     treated <- sorted_dta[get(trtMntVar) == 1]
-#     untreated <- sorted_dta[get(trtMntVar) == 0]
+#     treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
+#     untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0,]
 
 #     it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
 #     # dta@data[["match"]] <- -999
@@ -59,7 +229,7 @@
 #         # print(length(untreated[[1]]))
 #         # summary(untreated[["PSM_trtProb"]])
 
-#         k <- get.knnx(treated[, PSM_trtProb], untreated[, PSM_trtProb], 1)
+#         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
 #         # time_list[2] <- round((proc.time() - timer)[3],5)
 #         print("nn2.2")
@@ -72,11 +242,11 @@
 #                 print("nn2.2.0")
 
 #                 #Calculate the Euclidean Distance between pairs
-#                 Control_ID = toString(untreated[mC, get(ids)])
+#                 Control_ID = toString(untreated[[ids]][[mC]])
 
 #                 mT = k[["nn.index"]][mC]
                 
-#                 Treatment_ID = toString(treated[mT, get(ids)])
+#                 Treatment_ID = toString(treated[[ids]][[mT]])
 
 #                 #Find the control x,y location
 #                 cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
@@ -115,17 +285,17 @@
 #         best_m_treated = k[["nn.index"]][best_m_control]
         
 
-#         #Control and Treatment PSM ID
-#         # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-#         # Control_ID = toString(eval(parse(text=cid_txt)))
+#         #Control PSM ID
+#         cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+#         Control_ID = toString(eval(parse(text=cid_txt)))
             
-#         # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-#         # Treatment_ID = toString(eval(parse(text=tid_txt)))
-
+#         #Treatment PSM ID
+#         tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+#         Treatment_ID = toString(eval(parse(text=tid_txt)))
 
 #         #Control PSM ID and Treatment PSM ID
-#         Control_ID = toString(untreated[best_m_control, get(ids)])
-#         Treatment_ID = toString(treated[best_m_control, get(ids)])
+#         # Control_ID = toString(untreated[,ids][best_m_control])
+#         # Treatment_ID = toString(treated[,ids][best_m_treated])
 
 
 #         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
@@ -156,187 +326,17 @@
     
 #         # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-#         treated <- treated[get(ids) != (Treatment_ID)]
-#         untreated <- untreated[get(ids) != (Control_ID)]
+#         treated <- treated[which(treated[[ids]] != Treatment_ID),]
+#         untreated <- untreated[which(untreated[[ids]] != Control_ID),]
         
 #         # time_list[6] <- round((proc.time() - timer)[3],5)
 #         # print(paste(time_list))
 
 #     }
 
-#     # print((proc.time() - timerx)[3])
+#     print((proc.time() - timerx)[3])
 
 #     return(dta) 
 
 # }
 
-
-
-
-
-#FastNN 
-#Algorithm to find a hopefully near-optimal match of pairs
-#In a treatment and control group
-#Works by first ordering by the propensity score matching value,
-#and then working through this list in order from highest to lowest.
-#Matches are removed each step.
-
-fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
-
-    print("nn1.0")
-    timerx <- proc.time()
-
-    
-
-    #Fast nearest neighbors search - will not arrive at optimum,
-    #but this may not be an issue for many analysis.
-    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
-
-    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
-
-
-    #Conduct the matching
-    treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
-    untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0,]
-
-    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
-    # dta@data[["match"]] <- -999
-    # dta@data[["PSM_distance"]] <- -999
-    dta@data[["PSM_match_ID"]] <- -999
-
-    print("nn2")
-
-    #Calculate a distance decay function
-    #to perturb pairs based on their distances.  
-    for (j in 1:it_cnt) {
-        # time_list <- c()
-
-        print("nn2.0")
-        # timer <- proc.time()
-
-        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
-        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
-        
-        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
-
-        # time_list[1] <- round((proc.time() - timer)[3],5)
-        # print("nn2.1")
-        # timer <- proc.time()
-
-        #Run the KNN for all neighbors. 
-        # print(length(treated[[1]]))
-        # summary(treated[["PSM_trtProb"]])
-        # print(length(untreated[[1]]))
-        # summary(untreated[["PSM_trtProb"]])
-
-        k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
-        
-        # time_list[2] <- round((proc.time() - timer)[3],5)
-        print("nn2.2")
-        # timer <- proc.time()
-
-        #Perturb the values based on the distance decay function, if selected.
-        if (!is.null(dist_PSM)) {
-            for (mC in 1:length(k[[1]])) {
-
-                print("nn2.2.0")
-
-                #Calculate the Euclidean Distance between pairs
-                Control_ID = toString(untreated[[ids]][[mC]])
-
-                mT = k[["nn.index"]][mC]
-                
-                Treatment_ID = toString(treated[[ids]][[mT]])
-
-                #Find the control x,y location
-                cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
-                
-
-                #Find the treatment x,y location
-                tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
-
-                y_dist = abs(cCoord[1] - cCoord[2])
-                x_dist = abs(tCoord[1] - tCoord[2])
-                euc_dist = sqrt(y_dist^2 + x_dist^2)
-                
-                print("nn2.2.1")
-
-                PSM_score = k[["nn.dist"]][mC]
-                geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
-
-                print("nn2.2.2")
-
-                
-                k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
-
-            }
-      
-        }
-
-        # time_list[3] <- round((proc.time() - timer)[3],5)
-        print("nn2.3")
-        # timer <- proc.time()
-
-        #Add the matched treatment and control values to the recording data frame
-        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
-        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
-        
-        #This will give us the matched index in the "treated" dataset.
-        best_m_treated = k[["nn.index"]][best_m_control]
-        
-
-        #Control PSM ID
-        cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        Control_ID = toString(eval(parse(text=cid_txt)))
-            
-        #Treatment PSM ID
-        tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        Treatment_ID = toString(eval(parse(text=tid_txt)))
-
-        #Control PSM ID and Treatment PSM ID
-        # Control_ID = toString(untreated[,ids][best_m_control])
-        # Treatment_ID = toString(treated[,ids][best_m_treated])
-
-
-        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-        pair_id = paste(curgrp,j, sep="")
-        
-
-
-        # time_list[4] <- round((proc.time() - timer)[3],5)
-        print("nn2.4x")
-        # timer <- proc.time()
-
-        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
-        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
-        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
-
-
-        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
-        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
-
-
-        
-        # time_list[5] <- round((proc.time() - timer)[3],5)
-        # timer <- proc.time()
-
-        #Drop the paired match out of the iteration matrix 
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
-    
-        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
-
-        treated <- treated[which(treated[[ids]] != Treatment_ID),]
-        untreated <- untreated[which(untreated[[ids]] != Control_ID),]
-        
-        # time_list[6] <- round((proc.time() - timer)[3],5)
-        # print(paste(time_list))
-
-    }
-
-    print((proc.time() - timerx)[3])
-
-    return(dta) 
-
-}
-

From c18bff9a0818bbb42fa354c59c1699796a95133a Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 11:46:36 -0500
Subject: [PATCH 167/212] data table test for nn

---
 R/fastNN_binary_func.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index ac94827..59b53b7 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -10,7 +10,6 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     print("nn1.0")
     # timerx <- proc.time()
 
-    vvv <<- sorted_dta
 
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
@@ -18,6 +17,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     sorted_dta <- as.data.table(dta@data)
 
+    vvv <<- sorted_dta
+
     sorted_dta <- sorted_dta[order(PSM_trtProb)]
     print(colnames(sorted_dta))
 

From d33a14f15f78f775a133fb52e1e2ddc197c7e531 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 11:48:42 -0500
Subject: [PATCH 168/212] data table test for nn

---
 R/fastNN_binary_func.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 59b53b7..ed6c0b9 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -17,9 +17,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     sorted_dta <- as.data.table(dta@data)
 
-    vvv <<- sorted_dta
 
-    sorted_dta <- sorted_dta[order(PSM_trtProb)]
+    sorted_dta <- sorted_dta[order(get("PSM_trtProb"))]
     print(colnames(sorted_dta))
 
 

From 7346d54732fcc780c966b40542a6a15033a16c47 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 11:50:15 -0500
Subject: [PATCH 169/212] data table test for nn

---
 R/fastNN_binary_func.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index ed6c0b9..f1896a6 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -17,12 +17,15 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     sorted_dta <- as.data.table(dta@data)
 
+    print("x")
+    print(colnames(sorted_dta))
+    sorted_dta <- sorted_dta[, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
+    print(colnames(sorted_dta))
 
     sorted_dta <- sorted_dta[order(get("PSM_trtProb"))]
     print(colnames(sorted_dta))
 
 
-    sorted_dta <- sorted_dta[, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
 
 
     #Conduct the matching

From efb3780ca8142b84fd4e7df121f6936d4d405f63 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 11:54:45 -0500
Subject: [PATCH 170/212] data table test for nn

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index f1896a6..4f97839 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -22,7 +22,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     sorted_dta <- sorted_dta[, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
     print(colnames(sorted_dta))
 
-    sorted_dta <- sorted_dta[order(get("PSM_trtProb"))]
+    sorted_dta <- sorted_dta[order(sorted_dta$PSM_trtProb)]
     print(colnames(sorted_dta))
 
 

From d546f8cbeb56fa502a8c291fc5c7e859ba2eb7fb Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 11:58:56 -0500
Subject: [PATCH 171/212] data table test for nn

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 4f97839..8421a80 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -22,7 +22,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     sorted_dta <- sorted_dta[, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
     print(colnames(sorted_dta))
 
-    sorted_dta <- sorted_dta[order(sorted_dta$PSM_trtProb)]
+    sorted_dta <- sorted_dta[order(get(ids))]
     print(colnames(sorted_dta))
 
 

From 3ff419110e55ed8b159488a4d0acac7b34df6bba Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 12:37:27 -0500
Subject: [PATCH 172/212] data table test for nn

---
 R/fastNN_binary_func.R | 92 ++++++++++++++----------------------------
 1 file changed, 31 insertions(+), 61 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 8421a80..57ca7b5 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -8,73 +8,45 @@
 fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     print("nn1.0")
-    # timerx <- proc.time()
 
 
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
-    sorted_dta <- as.data.table(dta@data)
-
-    print("x")
-    print(colnames(sorted_dta))
-    sorted_dta <- sorted_dta[, c(ids, trtMntVar, "PSM_trtProb"), with=FALSE]
-    print(colnames(sorted_dta))
-
-    sorted_dta <- sorted_dta[order(get(ids))]
-    print(colnames(sorted_dta))
 
+    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
 
+    #Conduct the matching
+    treated <- as.data.table(sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")])
+    untreated <- as.data.tablesorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")])
 
 
-    #Conduct the matching
-    treated <- sorted_dta[get(trtMntVar) == 1]
-    untreated <- sorted_dta[get(trtMntVar) == 0]
 
-    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+    it_cnt = min(nrow(treated), nrow(untreated))
     # dta@data[["match"]] <- -999
     # dta@data[["PSM_distance"]] <- -999
     dta@data[["PSM_match_ID"]] <- -999
 
     print("nn2")
 
-    #Calculate a distance decay function
-    #to perturb pairs based on their distances.  
+    # Calculate a distance decay function
+    # to perturb pairs based on their distances.  
     for (j in 1:it_cnt) {
-        # time_list <- c()
-
-        print("nn2.0")
-        # timer <- proc.time()
-
-        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
-        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
-        
-        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
-
-        # time_list[1] <- round((proc.time() - timer)[3],5)
-        # print("nn2.1")
-        # timer <- proc.time()
-
-        #Run the KNN for all neighbors. 
-        # print(length(treated[[1]]))
-        # summary(treated[["PSM_trtProb"]])
-        # print(length(untreated[[1]]))
-        # summary(untreated[["PSM_trtProb"]])
 
+        print("nn2.1")
+      
         k <- get.knnx(treated[, PSM_trtProb], untreated[, PSM_trtProb], 1)
         
-        # time_list[2] <- round((proc.time() - timer)[3],5)
         print("nn2.2")
-        # timer <- proc.time()
 
-        #Perturb the values based on the distance decay function, if selected.
+        # Perturb the values based on the distance decay function, if selected.
         if (!is.null(dist_PSM)) {
             for (mC in 1:length(k[[1]])) {
 
                 print("nn2.2.0")
 
-                #Calculate the Euclidean Distance between pairs
+                # Calculate the Euclidean Distance between pairs
                 Control_ID = toString(untreated[mC, get(ids)])
 
                 mT = k[["nn.index"]][mC]
@@ -106,19 +78,17 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
       
         }
 
-        # time_list[3] <- round((proc.time() - timer)[3],5)
         print("nn2.3")
-        # timer <- proc.time()
 
-        #Add the matched treatment and control values to the recording data frame
-        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+        # Add the matched treatment and control values to the recording data frame
+        # best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
         best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
         
-        #This will give us the matched index in the "treated" dataset.
+        # This will give us the matched index in the "treated" dataset.
         best_m_treated = k[["nn.index"]][best_m_control]
         
 
-        #Control and Treatment PSM ID
+        # Control and Treatment PSM ID
         # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
         # Control_ID = toString(eval(parse(text=cid_txt)))
             
@@ -126,19 +96,17 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # Treatment_ID = toString(eval(parse(text=tid_txt)))
 
 
-        #Control PSM ID and Treatment PSM ID
+        # Control PSM ID and Treatment PSM ID
         Control_ID = toString(untreated[best_m_control, get(ids)])
         Treatment_ID = toString(treated[best_m_control, get(ids)])
 
 
-        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+        # Create a unique pair ID for each group (will simply append a "1" if only 1 group)
         pair_id = paste(curgrp,j, sep="")
         
 
 
-        # time_list[4] <- round((proc.time() - timer)[3],5)
         print("nn2.4x")
-        # timer <- proc.time()
 
         #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
         # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
@@ -150,10 +118,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
         
-        # time_list[5] <- round((proc.time() - timer)[3],5)
-        # timer <- proc.time()
-
-        #Drop the paired match out of the iteration matrix 
+        # Drop the paired match out of the iteration matrix 
         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
@@ -162,12 +127,9 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         treated <- treated[get(ids) != (Treatment_ID)]
         untreated <- untreated[get(ids) != (Control_ID)]
         
-        # time_list[6] <- round((proc.time() - timer)[3],5)
-        # print(paste(time_list))
 
-    }
 
-    # print((proc.time() - timerx)[3])
+    }
 
     return(dta) 
 
@@ -177,6 +139,14 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
 
+
+
+
+
+
+
+
+
 # #FastNN 
 # #Algorithm to find a hopefully near-optimal match of pairs
 # #In a treatment and control group
@@ -187,7 +157,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 # fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 #     print("nn1.0")
-#     timerx <- proc.time()
+#     # timerx <- proc.time()
 
     
 
@@ -199,8 +169,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
 #     #Conduct the matching
-#     treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1,]
-#     untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0,]
+#     treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
+#     untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
 
 #     it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
 #     # dta@data[["match"]] <- -999
@@ -337,7 +307,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 #     }
 
-#     print((proc.time() - timerx)[3])
+#     # print((proc.time() - timerx)[3])
 
 #     return(dta) 
 

From 6dc5d4bac7b3447167fa37826912dc9e5176d308 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 12:37:54 -0500
Subject: [PATCH 173/212] data table test for nn

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 57ca7b5..f13d2bf 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -19,7 +19,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     #Conduct the matching
     treated <- as.data.table(sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")])
-    untreated <- as.data.tablesorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")])
+    untreated <- as.data.table(sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")])
 
 
 

From 7c6ece533c506f41742441808cec47d2b75dffb1 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 12:43:10 -0500
Subject: [PATCH 174/212] data table test for nn

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index f13d2bf..4eeea35 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -9,7 +9,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     print("nn1.0")
 
-
+    library(data.table)
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.

From 921b77e876e5e7afbe951dab5814471f3e49a1e9 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 12:44:04 -0500
Subject: [PATCH 175/212] data table test for nn

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 4eeea35..599a8ba 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -36,7 +36,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
         print("nn2.1")
       
-        k <- get.knnx(treated[, PSM_trtProb], untreated[, PSM_trtProb], 1)
+        k <- get.knnx(treated[, 'PSM_trtProb'], untreated[, 'PSM_trtProb'], 1)
         
         print("nn2.2")
 

From 2b33b8dee08bd504fd6541d1b29cc386a52e7dfb Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 12:46:23 -0500
Subject: [PATCH 176/212] data table test for nn

---
 R/fastNN_binary_func.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 599a8ba..c077d82 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -9,7 +9,6 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     print("nn1.0")
 
-    library(data.table)
     #Fast nearest neighbors search - will not arrive at optimum,
     #but this may not be an issue for many analysis.
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
@@ -47,11 +46,11 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
                 print("nn2.2.0")
 
                 # Calculate the Euclidean Distance between pairs
-                Control_ID = toString(untreated[mC, get(ids)])
+                Control_ID = toString(untreated[mC, get(ids), with=FALSE])
 
                 mT = k[["nn.index"]][mC]
                 
-                Treatment_ID = toString(treated[mT, get(ids)])
+                Treatment_ID = toString(treated[mT, get(ids), with=FALSE])
 
                 #Find the control x,y location
                 cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])

From bc96f18743ef50d1d17ae0158fc8ad03a3725f2b Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 12:47:36 -0500
Subject: [PATCH 177/212] data table test for nn

---
 R/fastNN_binary_func.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index c077d82..2c5cf92 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -96,8 +96,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
         # Control PSM ID and Treatment PSM ID
-        Control_ID = toString(untreated[best_m_control, get(ids)])
-        Treatment_ID = toString(treated[best_m_control, get(ids)])
+        Control_ID = toString(untreated[best_m_control, get(ids), with=FALSE])
+        Treatment_ID = toString(treated[best_m_control, get(ids), with=FALSE])
 
 
         # Create a unique pair ID for each group (will simply append a "1" if only 1 group)

From dcde29724dd7eea42f9fd4a99e983ff50af5781d Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 12:48:26 -0500
Subject: [PATCH 178/212] data table test for nn

---
 R/fastNN_binary_func.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 2c5cf92..dcf4d9a 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -96,8 +96,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
         # Control PSM ID and Treatment PSM ID
-        Control_ID = toString(untreated[best_m_control, get(ids), with=FALSE])
-        Treatment_ID = toString(treated[best_m_control, get(ids), with=FALSE])
+        Control_ID = toString(untreated[best_m_control, (ids), with=FALSE])
+        Treatment_ID = toString(treated[best_m_control, (ids), with=FALSE])
 
 
         # Create a unique pair ID for each group (will simply append a "1" if only 1 group)

From 55d475395c5e8b25b1748634aa91a2785ce5f7fd Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 12:50:10 -0500
Subject: [PATCH 179/212] data table test for nn

---
 R/fastNN_binary_func.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index dcf4d9a..3d9c2dc 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -94,7 +94,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
         # Treatment_ID = toString(eval(parse(text=tid_txt)))
 
-
+        print(class(untreated))
+        print(colnames(untreated))
         # Control PSM ID and Treatment PSM ID
         Control_ID = toString(untreated[best_m_control, (ids), with=FALSE])
         Treatment_ID = toString(treated[best_m_control, (ids), with=FALSE])

From 3f04e6d5bd6faf0689d45811462e7828ef67e81d Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 12:54:23 -0500
Subject: [PATCH 180/212] data table test for nn

---
 R/fastNN_binary_func.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 3d9c2dc..7b51489 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -96,6 +96,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
         print(class(untreated))
         print(colnames(untreated))
+
         # Control PSM ID and Treatment PSM ID
         Control_ID = toString(untreated[best_m_control, (ids), with=FALSE])
         Treatment_ID = toString(treated[best_m_control, (ids), with=FALSE])
@@ -124,8 +125,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     
         # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-        treated <- treated[get(ids) != (Treatment_ID)]
-        untreated <- untreated[get(ids) != (Control_ID)]
+        treated <- treated[get(ids) != (Treatment_ID), with=FALSE]
+        untreated <- untreated[get(ids) != (Control_ID), with=FALSE]
         
 
 

From be8cb34a7baa0d2bedcc6b22723ca73f5fce4dc6 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 13:05:01 -0500
Subject: [PATCH 181/212] data table test for nn

---
 R/fastNN_binary_func.R | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 7b51489..080b910 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -125,8 +125,11 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     
         # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-        treated <- treated[get(ids) != (Treatment_ID), with=FALSE]
-        untreated <- untreated[get(ids) != (Control_ID), with=FALSE]
+        qt = quote(ids != Treatment_ID)
+        qu = quote(ids != Treatment_ID)
+
+        treated <- treated[eval(qt)]
+        untreated <- untreated[eval(qu)]
         
 
 

From c1b56543716d130aa2ca594429a9cb6353c7e25d Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 13:48:49 -0500
Subject: [PATCH 182/212] Cleaning up fastnn and sat.

---
 R/SAT.R                | 128 ++++++++++++++++++++++++-----------------
 R/fastNN_binary_func.R |  53 +++++++++--------
 2 files changed, 102 insertions(+), 79 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 96bf259..9007333 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -261,6 +261,9 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     print("sat1")
 
     if (!is.null(constraints) && contraints != c()) {
+
+        print("sat1a.1")
+
         for (cst in 1:length(names(constraints))) {
             if (names(constraints)[cst] == "groups") {
                 dta@data[,"ConstraintGroupSet_Opt"] <- dta@data[,constraints["groups"]]
@@ -275,83 +278,99 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
                 dist_PSM=NULL
             }
         }
-        
-    } else {
-        dta@data[,"ConstraintGroupSet_Opt"] <- 1
-        #max the distance threshold by taking the diagonal of the bounding box.
-        dist_PSM = NULL
-    }
-
-
 
-    print("sat2")
 
-    #Caclulate the number of groups to constrain by, if any.
-    group_constraints <- unique(dta$ConstraintGroupSet_Opt)
-  
-    #Make sure there are both treatment and control groups of an adequate size (>= 1 of each)
-    t_dta <- list()
-    u_dta <-list()
-    grp_list <- list()
-    cnt = 0
+        print("sat1a.2")
 
-    for (grp in 1:length(group_constraints)) {
-        cur_grp <- as.matrix(group_constraints)[grp]
-        grp_index = length(grp_list)+1
-        t_index = length(t_dta)+1
-        grp_list[[grp_index]] <- as.matrix(group_constraints)[grp]
+        # Caclulate the number of groups to constrain by, if any.
+        group_constraints <- unique(dta$ConstraintGroupSet_Opt)
+      
+        # Make sure there are both treatment and control groups of an adequate size (>= 1 of each)
+        t_dta <- list()
+        u_dta <-list()
+        grp_list <- list()
+        cnt = 0
+
+        for (grp in 1:length(group_constraints)) {
+            cur_grp <- as.matrix(group_constraints)[grp]
+            grp_index = length(grp_list)+1
+            t_index = length(t_dta)+1
+            grp_list[[grp_index]] <- as.matrix(group_constraints)[grp]
+
+            t_dta[[t_index]] <- dta[dta$TrtBin == 1,]
+            u_dta[[t_index]] <- dta[dta$TrtBin == 0,]
+
+            treatment_count <- cur_grp %in% t_dta[[t_index]]$ConstraintGroupSet_Opt
+            untreated_count <- cur_grp %in% u_dta[[t_index]]$ConstraintGroupSet_Opt
+
+            if ((untreated_count == FALSE) || (treatment_count == FALSE)) {
+                dta <- dta[!dta$ConstraintGroupSet_Opt == cur_grp,]
+                t_dta[[t_index]] <- NULL
+                u_dta[[t_index]] <- NULL
+                grp_list[[t_index]] <- NULL
+                war_statement = paste("Dropped group due to a lack of both treatment and control observation: '",cur_grp,"'",sep="")
+                warning(war_statement)
+
+            } else { 
+                t_dta[[t_index]] <- t_dta[[t_index]][t_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
+                u_dta[[t_index]] <- u_dta[[t_index]][u_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
+
+                cnt = cnt + 1
+            }
+        }
 
-        t_dta[[t_index]] <- dta[dta$TrtBin == 1,]
-        u_dta[[t_index]] <- dta[dta$TrtBin == 0,]
 
-        treatment_count <- cur_grp %in% t_dta[[t_index]]$ConstraintGroupSet_Opt
-        untreated_count <- cur_grp %in% u_dta[[t_index]]$ConstraintGroupSet_Opt
+        if (cnt == 0) {
+            return('drop')
+        }
 
-        if ((untreated_count == FALSE) || (treatment_count == FALSE)) {
-            dta <- dta[!dta$ConstraintGroupSet_Opt == cur_grp,]
-            t_dta[[t_index]] <- NULL
-            u_dta[[t_index]] <- NULL
-            grp_list[[t_index]] <- NULL
-            war_statement = paste("Dropped group due to a lack of both treatment and control observation: '",cur_grp,"'",sep="")
-            warning(war_statement)
 
-        } else { 
-            t_dta[[t_index]] <- t_dta[[t_index]][t_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
-            u_dta[[t_index]] <- u_dta[[t_index]][u_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
+        print("sat1a.3")
 
-            cnt = cnt + 1
-        }
-    }
 
+        for (i in 1:cnt) {
+            cur_grp <- grp_list[[i]]
 
-    if (cnt == 0) {
-        return('drop')
-    }
+            print("sat3.1")
+            it_dta <- maptools::spRbind(t_dta[[i]],u_dta[[i]])
 
+            print("sat3.2")
+            if (mtd == "fastNN") {
+                # ***
+                # this is the slow part of functions
+                temp_dta[[i]] <- fastNN_binary_func(it_dta, TrtBinColName, ids, cur_grp, dist_PSM) 
+            }
 
-    print("sat3")
+            # if (mtd == "NN_WithReplacement") {
+            #     print("NN with replacement is currently not available, please choose fastNN")
+            #     # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+            # }
+        }
 
-    temp_dta <- list()
+        
+    } else {
 
-    for (i in 1:cnt) {
-        cur_grp <- grp_list[[i]]
+        print("sat1b.1")
 
-        print("sat3.1")
-        it_dta <- maptools::spRbind(t_dta[[i]],u_dta[[i]])
+        temp_dta <- list()
 
-        print("sat3.2")
         if (mtd == "fastNN") {
             # ***
             # this is the slow part of functions
-            temp_dta[[i]] <- fastNN_binary_func(it_dta, TrtBinColName, ids, cur_grp, dist_PSM) 
+            temp_dta[[i]] <- fastNN_binary_func(dta, TrtBinColName, ids, NULL, NULL) 
         }
 
-        if (mtd == "NN_WithReplacement") {
-            print("NN with replacement is currently not available, please choose fastNN")
-            # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
-        }
+        # if (mtd == "NN_WithReplacement") {
+        #     print("NN with replacement is currently not available, please choose fastNN")
+        #     # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+        # }
+
+
     }
 
+
+
+
     print("sat4")
 
     #Build the final datasets from subsets
@@ -364,6 +383,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         dta <- temp_dta[[1]]
     }
 
+
     print("sat5")
 
     if (drop_unmatched == TRUE) {
diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 080b910..5308fd9 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -37,45 +37,45 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
       
         k <- get.knnx(treated[, 'PSM_trtProb'], untreated[, 'PSM_trtProb'], 1)
         
-        print("nn2.2")
+        # print("nn2.2")
 
-        # Perturb the values based on the distance decay function, if selected.
-        if (!is.null(dist_PSM)) {
-            for (mC in 1:length(k[[1]])) {
+        # # Perturb the values based on the distance decay function, if selected.
+        # if (!is.null(dist_PSM)) {
+        #     for (mC in 1:length(k[[1]])) {
 
-                print("nn2.2.0")
+        #         print("nn2.2.0")
 
-                # Calculate the Euclidean Distance between pairs
-                Control_ID = toString(untreated[mC, get(ids), with=FALSE])
+        #         # Calculate the Euclidean Distance between pairs
+        #         Control_ID = toString(untreated[mC, get(ids), with=FALSE])
 
-                mT = k[["nn.index"]][mC]
+        #         mT = k[["nn.index"]][mC]
                 
-                Treatment_ID = toString(treated[mT, get(ids), with=FALSE])
+        #         Treatment_ID = toString(treated[mT, get(ids), with=FALSE])
 
-                #Find the control x,y location
-                cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+        #         #Find the control x,y location
+        #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
                 
 
-                #Find the treatment x,y location
-                tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+        #         #Find the treatment x,y location
+        #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
 
-                y_dist = abs(cCoord[1] - cCoord[2])
-                x_dist = abs(tCoord[1] - tCoord[2])
-                euc_dist = sqrt(y_dist^2 + x_dist^2)
+        #         y_dist = abs(cCoord[1] - cCoord[2])
+        #         x_dist = abs(tCoord[1] - tCoord[2])
+        #         euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-                print("nn2.2.1")
+        #         print("nn2.2.1")
 
-                PSM_score = k[["nn.dist"]][mC]
-                geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+        #         PSM_score = k[["nn.dist"]][mC]
+        #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-                print("nn2.2.2")
+        #         print("nn2.2.2")
 
                 
-                k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
+        #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
 
-            }
+        #     }
       
-        }
+        # }
 
         print("nn2.3")
 
@@ -103,8 +103,11 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
         # Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-        pair_id = paste(curgrp,j, sep="")
-        
+        if (curgrp != NULL) {
+            pair_id <- paste(curgrp,j, sep="")
+        else {
+            pair_id <- paste('pair',j, sep="")
+        }
 
 
         print("nn2.4x")

From 3aae82fd6cd92125277418629f73b12f724a85e8 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 13:49:17 -0500
Subject: [PATCH 183/212] Cleaning up fastnn and sat.

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 5308fd9..ad83aaf 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -105,7 +105,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # Create a unique pair ID for each group (will simply append a "1" if only 1 group)
         if (curgrp != NULL) {
             pair_id <- paste(curgrp,j, sep="")
-        else {
+        } else {
             pair_id <- paste('pair',j, sep="")
         }
 

From 975c502c27d9e38fc486bb8d2185179e00a853c6 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 13:50:47 -0500
Subject: [PATCH 184/212] Cleaning up fastnn and sat.

---
 R/fastNN_binary_func.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index ad83aaf..fea89b8 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -103,10 +103,11 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
         # Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-        if (curgrp != NULL) {
-            pair_id <- paste(curgrp,j, sep="")
-        } else {
+        if (curgrp == NULL) {
             pair_id <- paste('pair',j, sep="")
+        } else {
+            pair_id <- paste(curgrp,j, sep="")
+
         }
 
 

From 9fa1103a5539e4e649ba34eb276333c4cc0c25ce Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 13:51:40 -0500
Subject: [PATCH 185/212] Cleaning up fastnn and sat.

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index fea89b8..a2fe169 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -103,7 +103,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
         # Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-        if (curgrp == NULL) {
+        if (is.null(curgrp)) {
             pair_id <- paste('pair',j, sep="")
         } else {
             pair_id <- paste(curgrp,j, sep="")

From 83bb0a01d909ebddcec1549af4c6540ce2cb1a82 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 13:55:30 -0500
Subject: [PATCH 186/212] Cleaning up fastnn and sat.

---
 R/SAT.R | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 9007333..5cdad3b 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -289,7 +289,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         t_dta <- list()
         u_dta <-list()
         grp_list <- list()
-        cnt = 0
+        cnt <- 0
 
         for (grp in 1:length(group_constraints)) {
             cur_grp <- as.matrix(group_constraints)[grp]
@@ -315,7 +315,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
                 t_dta[[t_index]] <- t_dta[[t_index]][t_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
                 u_dta[[t_index]] <- u_dta[[t_index]][u_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
 
-                cnt = cnt + 1
+                cnt <- cnt + 1
             }
         }
 
@@ -331,10 +331,10 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         for (i in 1:cnt) {
             cur_grp <- grp_list[[i]]
 
-            print("sat3.1")
+            print("sat1a.3.1")
             it_dta <- maptools::spRbind(t_dta[[i]],u_dta[[i]])
 
-            print("sat3.2")
+            print("sat1a.3.2")
             if (mtd == "fastNN") {
                 # ***
                 # this is the slow part of functions
@@ -347,19 +347,33 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             # }
         }
 
+        print("sat1a.4")
+
+        #Build the final datasets from subsets
+        if (cnt > 1) {
+            dta <- temp_dta[[1]]
+            for(k in 2:cnt) {
+                dta  <- maptools::spRbind(dta, temp_dta[[k]])
+            } 
+        } else {
+            dta <- temp_dta[[1]]
+        }
+
         
     } else {
 
         print("sat1b.1")
 
+        cnt <- 1
         temp_dta <- list()
 
         if (mtd == "fastNN") {
             # ***
             # this is the slow part of functions
-            temp_dta[[i]] <- fastNN_binary_func(dta, TrtBinColName, ids, NULL, NULL) 
+            dta <- fastNN_binary_func(dta, TrtBinColName, ids, NULL, NULL) 
         }
 
+
         # if (mtd == "NN_WithReplacement") {
         #     print("NN with replacement is currently not available, please choose fastNN")
         #     # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
@@ -371,19 +385,6 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
 
 
-    print("sat4")
-
-    #Build the final datasets from subsets
-    if (cnt > 1) {
-        dta <- temp_dta[[1]]
-        for(k in 2:cnt) {
-            dta  <- maptools::spRbind(dta, temp_dta[[k]])
-        } 
-    } else {
-        dta <- temp_dta[[1]]
-    }
-
-
     print("sat5")
 
     if (drop_unmatched == TRUE) {

From b8f1a1ac8499e3904b33ea2df7b5a8e5ab426894 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 14:03:59 -0500
Subject: [PATCH 187/212] Cleaning up fastnn and sat.

---
 R/fastNN_binary_func.R | 384 ++++++++++++++++++++---------------------
 1 file changed, 192 insertions(+), 192 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index a2fe169..b16694d 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -1,147 +1,147 @@
-#FastNN 
-#Algorithm to find a hopefully near-optimal match of pairs
-#In a treatment and control group
-#Works by first ordering by the propensity score matching value,
-#and then working through this list in order from highest to lowest.
-#Matches are removed each step.
+# #FastNN 
+# #Algorithm to find a hopefully near-optimal match of pairs
+# #In a treatment and control group
+# #Works by first ordering by the propensity score matching value,
+# #and then working through this list in order from highest to lowest.
+# #Matches are removed each step.
 
-fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
+# fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
-    print("nn1.0")
+#     print("nn1.0")
 
-    #Fast nearest neighbors search - will not arrive at optimum,
-    #but this may not be an issue for many analysis.
-    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+#     #Fast nearest neighbors search - will not arrive at optimum,
+#     #but this may not be an issue for many analysis.
+#     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
 
-    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+#     sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
 
-    #Conduct the matching
-    treated <- as.data.table(sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")])
-    untreated <- as.data.table(sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")])
+#     #Conduct the matching
+#     treated <- as.data.table(sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")])
+#     untreated <- as.data.table(sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")])
 
 
 
-    it_cnt = min(nrow(treated), nrow(untreated))
-    # dta@data[["match"]] <- -999
-    # dta@data[["PSM_distance"]] <- -999
-    dta@data[["PSM_match_ID"]] <- -999
+#     it_cnt = min(nrow(treated), nrow(untreated))
+#     # dta@data[["match"]] <- -999
+#     # dta@data[["PSM_distance"]] <- -999
+#     dta@data[["PSM_match_ID"]] <- -999
 
-    print("nn2")
+#     print("nn2")
 
-    # Calculate a distance decay function
-    # to perturb pairs based on their distances.  
-    for (j in 1:it_cnt) {
+#     # Calculate a distance decay function
+#     # to perturb pairs based on their distances.  
+#     for (j in 1:it_cnt) {
 
-        print("nn2.1")
+#         print("nn2.1")
       
-        k <- get.knnx(treated[, 'PSM_trtProb'], untreated[, 'PSM_trtProb'], 1)
+#         k <- get.knnx(treated[, 'PSM_trtProb'], untreated[, 'PSM_trtProb'], 1)
         
-        # print("nn2.2")
+#         # print("nn2.2")
 
-        # # Perturb the values based on the distance decay function, if selected.
-        # if (!is.null(dist_PSM)) {
-        #     for (mC in 1:length(k[[1]])) {
+#         # # Perturb the values based on the distance decay function, if selected.
+#         # if (!is.null(dist_PSM)) {
+#         #     for (mC in 1:length(k[[1]])) {
 
-        #         print("nn2.2.0")
+#         #         print("nn2.2.0")
 
-        #         # Calculate the Euclidean Distance between pairs
-        #         Control_ID = toString(untreated[mC, get(ids), with=FALSE])
+#         #         # Calculate the Euclidean Distance between pairs
+#         #         Control_ID = toString(untreated[mC, get(ids), with=FALSE])
 
-        #         mT = k[["nn.index"]][mC]
+#         #         mT = k[["nn.index"]][mC]
                 
-        #         Treatment_ID = toString(treated[mT, get(ids), with=FALSE])
+#         #         Treatment_ID = toString(treated[mT, get(ids), with=FALSE])
 
-        #         #Find the control x,y location
-        #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+#         #         #Find the control x,y location
+#         #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
                 
 
-        #         #Find the treatment x,y location
-        #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+#         #         #Find the treatment x,y location
+#         #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
 
-        #         y_dist = abs(cCoord[1] - cCoord[2])
-        #         x_dist = abs(tCoord[1] - tCoord[2])
-        #         euc_dist = sqrt(y_dist^2 + x_dist^2)
+#         #         y_dist = abs(cCoord[1] - cCoord[2])
+#         #         x_dist = abs(tCoord[1] - tCoord[2])
+#         #         euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-        #         print("nn2.2.1")
+#         #         print("nn2.2.1")
 
-        #         PSM_score = k[["nn.dist"]][mC]
-        #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+#         #         PSM_score = k[["nn.dist"]][mC]
+#         #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-        #         print("nn2.2.2")
+#         #         print("nn2.2.2")
 
                 
-        #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
+#         #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
 
-        #     }
+#         #     }
       
-        # }
+#         # }
 
-        print("nn2.3")
+#         print("nn2.3")
 
-        # Add the matched treatment and control values to the recording data frame
-        # best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
-        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
+#         # Add the matched treatment and control values to the recording data frame
+#         # best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+#         best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
         
-        # This will give us the matched index in the "treated" dataset.
-        best_m_treated = k[["nn.index"]][best_m_control]
+#         # This will give us the matched index in the "treated" dataset.
+#         best_m_treated = k[["nn.index"]][best_m_control]
         
 
-        # Control and Treatment PSM ID
-        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        # Control_ID = toString(eval(parse(text=cid_txt)))
+#         # Control and Treatment PSM ID
+#         # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+#         # Control_ID = toString(eval(parse(text=cid_txt)))
             
-        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        # Treatment_ID = toString(eval(parse(text=tid_txt)))
+#         # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+#         # Treatment_ID = toString(eval(parse(text=tid_txt)))
 
-        print(class(untreated))
-        print(colnames(untreated))
+#         print(class(untreated))
+#         print(colnames(untreated))
 
-        # Control PSM ID and Treatment PSM ID
-        Control_ID = toString(untreated[best_m_control, (ids), with=FALSE])
-        Treatment_ID = toString(treated[best_m_control, (ids), with=FALSE])
+#         # Control PSM ID and Treatment PSM ID
+#         Control_ID = toString(untreated[best_m_control, (ids), with=FALSE])
+#         Treatment_ID = toString(treated[best_m_control, (ids), with=FALSE])
 
 
-        # Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-        if (is.null(curgrp)) {
-            pair_id <- paste('pair',j, sep="")
-        } else {
-            pair_id <- paste(curgrp,j, sep="")
+#         # Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+#         if (is.null(curgrp)) {
+#             pair_id <- paste('pair',j, sep="")
+#         } else {
+#             pair_id <- paste(curgrp,j, sep="")
 
-        }
+#         }
 
 
-        print("nn2.4x")
+#         print("nn2.4x")
 
-        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
-        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
-        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
+#         #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
+#         # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+#         # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
 
 
-        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
-        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
+#         # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+#         dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
 
 
         
-        # Drop the paired match out of the iteration matrix 
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
+#         # Drop the paired match out of the iteration matrix 
+#         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+#         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
-        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+#         # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-        qt = quote(ids != Treatment_ID)
-        qu = quote(ids != Treatment_ID)
+#         qt = quote(ids != Treatment_ID)
+#         qu = quote(ids != Treatment_ID)
 
-        treated <- treated[eval(qt)]
-        untreated <- untreated[eval(qu)]
+#         treated <- treated[eval(qt)]
+#         untreated <- untreated[eval(qu)]
         
 
 
-    }
+#     }
 
-    return(dta) 
+#     return(dta) 
 
-}
+# }
 
 
 
@@ -155,169 +155,169 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
 
-# #FastNN 
-# #Algorithm to find a hopefully near-optimal match of pairs
-# #In a treatment and control group
-# #Works by first ordering by the propensity score matching value,
-# #and then working through this list in order from highest to lowest.
-# #Matches are removed each step.
+#FastNN 
+#Algorithm to find a hopefully near-optimal match of pairs
+#In a treatment and control group
+#Works by first ordering by the propensity score matching value,
+#and then working through this list in order from highest to lowest.
+#Matches are removed each step.
 
-# fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
+fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
-#     print("nn1.0")
-#     # timerx <- proc.time()
+    print("nn1.0")
+    # timerx <- proc.time()
 
     
 
-#     #Fast nearest neighbors search - will not arrive at optimum,
-#     #but this may not be an issue for many analysis.
-#     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+    #Fast nearest neighbors search - will not arrive at optimum,
+    #but this may not be an issue for many analysis.
+    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
-#     sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
 
 
-#     #Conduct the matching
-#     treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
-#     untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
+    #Conduct the matching
+    treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
+    untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
 
-#     it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
-#     # dta@data[["match"]] <- -999
-#     # dta@data[["PSM_distance"]] <- -999
-#     dta@data[["PSM_match_ID"]] <- -999
+    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+    # dta@data[["match"]] <- -999
+    # dta@data[["PSM_distance"]] <- -999
+    dta@data[["PSM_match_ID"]] <- -999
 
-#     print("nn2")
+    # print("nn2")
 
-#     #Calculate a distance decay function
-#     #to perturb pairs based on their distances.  
-#     for (j in 1:it_cnt) {
-#         # time_list <- c()
+    #Calculate a distance decay function
+    #to perturb pairs based on their distances.  
+    for (j in 1:it_cnt) {
+        # time_list <- c()
 
-#         print("nn2.0")
-#         # timer <- proc.time()
+        print("nn2.0")
+        # timer <- proc.time()
 
-#         # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
-#         # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
+        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
+        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
-#         # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
+        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
 
-#         # time_list[1] <- round((proc.time() - timer)[3],5)
-#         # print("nn2.1")
-#         # timer <- proc.time()
+        # time_list[1] <- round((proc.time() - timer)[3],5)
+        # print("nn2.1")
+        # timer <- proc.time()
 
-#         #Run the KNN for all neighbors. 
-#         # print(length(treated[[1]]))
-#         # summary(treated[["PSM_trtProb"]])
-#         # print(length(untreated[[1]]))
-#         # summary(untreated[["PSM_trtProb"]])
+        #Run the KNN for all neighbors. 
+        # print(length(treated[[1]]))
+        # summary(treated[["PSM_trtProb"]])
+        # print(length(untreated[[1]]))
+        # summary(untreated[["PSM_trtProb"]])
 
-#         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
+        k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
-#         # time_list[2] <- round((proc.time() - timer)[3],5)
-#         print("nn2.2")
-#         # timer <- proc.time()
+        # time_list[2] <- round((proc.time() - timer)[3],5)
+        print("nn2.2")
+        # timer <- proc.time()
 
-#         #Perturb the values based on the distance decay function, if selected.
-#         if (!is.null(dist_PSM)) {
-#             for (mC in 1:length(k[[1]])) {
+        # #Perturb the values based on the distance decay function, if selected.
+        # if (!is.null(dist_PSM)) {
+        #     for (mC in 1:length(k[[1]])) {
 
-#                 print("nn2.2.0")
+        #         print("nn2.2.0")
 
-#                 #Calculate the Euclidean Distance between pairs
-#                 Control_ID = toString(untreated[[ids]][[mC]])
+        #         #Calculate the Euclidean Distance between pairs
+        #         Control_ID = toString(untreated[[ids]][[mC]])
 
-#                 mT = k[["nn.index"]][mC]
+        #         mT = k[["nn.index"]][mC]
                 
-#                 Treatment_ID = toString(treated[[ids]][[mT]])
+        #         Treatment_ID = toString(treated[[ids]][[mT]])
 
-#                 #Find the control x,y location
-#                 cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+        #         #Find the control x,y location
+        #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
                 
 
-#                 #Find the treatment x,y location
-#                 tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+        #         #Find the treatment x,y location
+        #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
 
-#                 y_dist = abs(cCoord[1] - cCoord[2])
-#                 x_dist = abs(tCoord[1] - tCoord[2])
-#                 euc_dist = sqrt(y_dist^2 + x_dist^2)
+        #         y_dist = abs(cCoord[1] - cCoord[2])
+        #         x_dist = abs(tCoord[1] - tCoord[2])
+        #         euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-#                 print("nn2.2.1")
+        #         print("nn2.2.1")
 
-#                 PSM_score = k[["nn.dist"]][mC]
-#                 geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+        #         PSM_score = k[["nn.dist"]][mC]
+        #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-#                 print("nn2.2.2")
+        #         print("nn2.2.2")
 
                 
-#                 k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
+        #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
 
-#             }
+        #     }
       
-#         }
+        # }
 
-#         # time_list[3] <- round((proc.time() - timer)[3],5)
-#         print("nn2.3")
-#         # timer <- proc.time()
+        # time_list[3] <- round((proc.time() - timer)[3],5)
+        print("nn2.3")
+        # timer <- proc.time()
 
-#         #Add the matched treatment and control values to the recording data frame
-#         #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
-#         best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
+        #Add the matched treatment and control values to the recording data frame
+        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
         
-#         #This will give us the matched index in the "treated" dataset.
-#         best_m_treated = k[["nn.index"]][best_m_control]
+        #This will give us the matched index in the "treated" dataset.
+        best_m_treated = k[["nn.index"]][best_m_control]
         
 
-#         #Control PSM ID
-#         cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-#         Control_ID = toString(eval(parse(text=cid_txt)))
+        #Control PSM ID
+        cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        Control_ID = toString(eval(parse(text=cid_txt)))
             
-#         #Treatment PSM ID
-#         tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-#         Treatment_ID = toString(eval(parse(text=tid_txt)))
+        #Treatment PSM ID
+        tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        Treatment_ID = toString(eval(parse(text=tid_txt)))
 
-#         #Control PSM ID and Treatment PSM ID
-#         # Control_ID = toString(untreated[,ids][best_m_control])
-#         # Treatment_ID = toString(treated[,ids][best_m_treated])
+        #Control PSM ID and Treatment PSM ID
+        # Control_ID = toString(untreated[,ids][best_m_control])
+        # Treatment_ID = toString(treated[,ids][best_m_treated])
 
 
-#         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-#         pair_id = paste(curgrp,j, sep="")
+        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+        pair_id = paste(curgrp,j, sep="")
         
 
 
-#         # time_list[4] <- round((proc.time() - timer)[3],5)
-#         print("nn2.4x")
-#         # timer <- proc.time()
+        # time_list[4] <- round((proc.time() - timer)[3],5)
+        print("nn2.4x")
+        # timer <- proc.time()
 
-#         #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
-#         # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
-#         # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
+        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
+        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
 
 
-#         # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
-#         dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
+        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
 
 
         
-#         # time_list[5] <- round((proc.time() - timer)[3],5)
-#         # timer <- proc.time()
+        # time_list[5] <- round((proc.time() - timer)[3],5)
+        # timer <- proc.time()
 
-#         #Drop the paired match out of the iteration matrix 
-#         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
-#         # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
+        #Drop the paired match out of the iteration matrix 
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
-#         # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-#         treated <- treated[which(treated[[ids]] != Treatment_ID),]
-#         untreated <- untreated[which(untreated[[ids]] != Control_ID),]
+        treated <- treated[which(treated[[ids]] != Treatment_ID),]
+        untreated <- untreated[which(untreated[[ids]] != Control_ID),]
         
-#         # time_list[6] <- round((proc.time() - timer)[3],5)
-#         # print(paste(time_list))
+        # time_list[6] <- round((proc.time() - timer)[3],5)
+        # print(paste(time_list))
 
-#     }
+    }
 
-#     # print((proc.time() - timerx)[3])
+    # print((proc.time() - timerx)[3])
 
-#     return(dta) 
+    return(dta) 
 
-# }
+}
 

From fecb05258a6895fe1782ca0f22e797e9f04a367b Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 14:06:43 -0500
Subject: [PATCH 188/212] Cleaning up fastnn and sat.

---
 R/fastNN_binary_func.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index b16694d..b12f158 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -192,7 +192,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     for (j in 1:it_cnt) {
         # time_list <- c()
 
-        print("nn2.0")
+        print(paste("nn cnt:",i))
         # timer <- proc.time()
 
         # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
@@ -213,7 +213,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
         # time_list[2] <- round((proc.time() - timer)[3],5)
-        print("nn2.2")
+        # print("nn2.2")
         # timer <- proc.time()
 
         # #Perturb the values based on the distance decay function, if selected.

From c3f1859ab289f089a52fe865a6194fd14dc39605 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 14:07:41 -0500
Subject: [PATCH 189/212] Cleaning up fastnn and sat.

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index b12f158..b3812d6 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -192,7 +192,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     for (j in 1:it_cnt) {
         # time_list <- c()
 
-        print(paste("nn cnt:",i))
+        print(paste("nn cnt:",j))
         # timer <- proc.time()
 
         # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]

From 50c67c143b2fda5bd4a28443f9115ad7e391ccaf Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 14:30:27 -0500
Subject: [PATCH 190/212] Cleaning up fastnn and sat.

---
 R/SAT.R                | 4 ++++
 R/fastNN_binary_func.R | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/R/SAT.R b/R/SAT.R
index 5cdad3b..ac3f97c 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -371,6 +371,10 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             # ***
             # this is the slow part of functions
             dta <- fastNN_binary_func(dta, TrtBinColName, ids, NULL, NULL) 
+
+            if (class(dta) == class('drop')) {
+                return('drop')
+            }
         }
 
 
diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index b3812d6..b65eb3f 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -181,6 +181,11 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
 
     it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+
+    if (it_cnt < 30) {
+        return('drop')
+    }
+    
     # dta@data[["match"]] <- -999
     # dta@data[["PSM_distance"]] <- -999
     dta@data[["PSM_match_ID"]] <- -999

From 866f21d9ed736ac0df1628577e399d8b1a9d8710 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 16:20:32 -0500
Subject: [PATCH 191/212] Cleaning up fastnn and sat.

---
 R/fastNN_binary_func.R | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index b65eb3f..01cbb30 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -185,7 +185,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     if (it_cnt < 30) {
         return('drop')
     }
-    
+
     # dta@data[["match"]] <- -999
     # dta@data[["PSM_distance"]] <- -999
     dta@data[["PSM_match_ID"]] <- -999
@@ -271,17 +271,17 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         best_m_treated = k[["nn.index"]][best_m_control]
         
 
-        #Control PSM ID
-        cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        Control_ID = toString(eval(parse(text=cid_txt)))
+        # #Control PSM ID
+        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        # Control_ID = toString(eval(parse(text=cid_txt)))
             
-        #Treatment PSM ID
-        tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        Treatment_ID = toString(eval(parse(text=tid_txt)))
+        # #Treatment PSM ID
+        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        # Treatment_ID = toString(eval(parse(text=tid_txt)))
 
         #Control PSM ID and Treatment PSM ID
-        # Control_ID = toString(untreated[,ids][best_m_control])
-        # Treatment_ID = toString(treated[,ids][best_m_treated])
+        Control_ID = toString(untreated[[ids][best_m_control])
+        Treatment_ID = toString(treated[[ids]][best_m_treated])
 
 
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)

From c1015ba3559739613d01451eeef9378b7a1af822 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Tue, 10 Nov 2015 16:21:03 -0500
Subject: [PATCH 192/212] Cleaning up fastnn and sat.

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 01cbb30..0f80e36 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -280,7 +280,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         # Treatment_ID = toString(eval(parse(text=tid_txt)))
 
         #Control PSM ID and Treatment PSM ID
-        Control_ID = toString(untreated[[ids][best_m_control])
+        Control_ID = toString(untreated[[ids]][best_m_control])
         Treatment_ID = toString(treated[[ids]][best_m_treated])
 
 

From a0f9eca05043c2a4a8d032e4751a4dc9c1a41382 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 12 Nov 2015 15:12:28 -0500
Subject: [PATCH 193/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 222 +++++++++++++++++++++++------------------
 R/loadLibs.R           |   4 +-
 2 files changed, 126 insertions(+), 100 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 0f80e36..50b83d5 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -164,163 +164,187 @@
 
 fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
-    print("nn1.0")
-    # timerx <- proc.time()
 
-    
+    print('nn')
 
-    #Fast nearest neighbors search - will not arrive at optimum,
-    #but this may not be an issue for many analysis.
-    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+    # make sure there are at least x rows in both treated and control
+    row_min <- 30
+    trt_rows <- dta@data$TrtBin == 1
+    if ( sum(trt_rows) < row_min | sum(! trt_rows) < row_min ) {
+        return('drop')
+    }
 
-    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+    rownames(dta@data) <- dta$sector_split_id
 
+    m <- matchit(TrtBin ~ PSM_trtProb, data=dta@data, method="optimal", ratio=1)
 
-    #Conduct the matching
-    treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
-    untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
+    for ( i in rownames(m$match.matrix) ) {
+        trt_id <- i
+        cnt_id <- m$match.matrix[i,]
+        pair_id <- paste(trt_id,cnt_id, sep='__')
 
-    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+        dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
 
-    if (it_cnt < 30) {
-        return('drop')
     }
 
-    # dta@data[["match"]] <- -999
-    # dta@data[["PSM_distance"]] <- -999
-    dta@data[["PSM_match_ID"]] <- -999
 
-    # print("nn2")
+    # ===========================================================================
+ 
+
+    # print("nn1.0")
+
+
+    # #Fast nearest neighbors search - will not arrive at optimum,
+    # #but this may not be an issue for many analysis.
+    # #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+
+    # sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
 
-    #Calculate a distance decay function
-    #to perturb pairs based on their distances.  
-    for (j in 1:it_cnt) {
-        # time_list <- c()
 
-        print(paste("nn cnt:",j))
-        # timer <- proc.time()
+    # #Conduct the matching
+    # treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
+    # untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
 
-        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
-        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
+    # it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+
+    # if (it_cnt < 30) {
+    #     return('drop')
+    # }
+
+    # # dta@data[["match"]] <- -999
+    # # dta@data[["PSM_distance"]] <- -999
+    # dta@data[["PSM_match_ID"]] <- -999
+
+    # # print("nn2")
+
+    # #Calculate a distance decay function
+    # #to perturb pairs based on their distances.  
+    # for (j in 1:it_cnt) {
+    #     # time_list <- c()
+
+    #     print(paste("nn cnt:",j))
+    #     # timer <- proc.time()
+
+    #     # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
+    #     # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
-        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
+    #     # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
 
-        # time_list[1] <- round((proc.time() - timer)[3],5)
-        # print("nn2.1")
-        # timer <- proc.time()
+    #     # time_list[1] <- round((proc.time() - timer)[3],5)
+    #     # print("nn2.1")
+    #     # timer <- proc.time()
 
-        #Run the KNN for all neighbors. 
-        # print(length(treated[[1]]))
-        # summary(treated[["PSM_trtProb"]])
-        # print(length(untreated[[1]]))
-        # summary(untreated[["PSM_trtProb"]])
+    #     #Run the KNN for all neighbors. 
+    #     # print(length(treated[[1]]))
+    #     # summary(treated[["PSM_trtProb"]])
+    #     # print(length(untreated[[1]]))
+    #     # summary(untreated[["PSM_trtProb"]])
 
-        k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
+    #     k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
-        # time_list[2] <- round((proc.time() - timer)[3],5)
-        # print("nn2.2")
-        # timer <- proc.time()
+    #     # time_list[2] <- round((proc.time() - timer)[3],5)
+    #     # print("nn2.2")
+    #     # timer <- proc.time()
 
-        # #Perturb the values based on the distance decay function, if selected.
-        # if (!is.null(dist_PSM)) {
-        #     for (mC in 1:length(k[[1]])) {
+    #     # #Perturb the values based on the distance decay function, if selected.
+    #     # if (!is.null(dist_PSM)) {
+    #     #     for (mC in 1:length(k[[1]])) {
 
-        #         print("nn2.2.0")
+    #     #         print("nn2.2.0")
 
-        #         #Calculate the Euclidean Distance between pairs
-        #         Control_ID = toString(untreated[[ids]][[mC]])
+    #     #         #Calculate the Euclidean Distance between pairs
+    #     #         Control_ID = toString(untreated[[ids]][[mC]])
 
-        #         mT = k[["nn.index"]][mC]
+    #     #         mT = k[["nn.index"]][mC]
                 
-        #         Treatment_ID = toString(treated[[ids]][[mT]])
+    #     #         Treatment_ID = toString(treated[[ids]][[mT]])
 
-        #         #Find the control x,y location
-        #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+    #     #         #Find the control x,y location
+    #     #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
                 
 
-        #         #Find the treatment x,y location
-        #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+    #     #         #Find the treatment x,y location
+    #     #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
 
-        #         y_dist = abs(cCoord[1] - cCoord[2])
-        #         x_dist = abs(tCoord[1] - tCoord[2])
-        #         euc_dist = sqrt(y_dist^2 + x_dist^2)
+    #     #         y_dist = abs(cCoord[1] - cCoord[2])
+    #     #         x_dist = abs(tCoord[1] - tCoord[2])
+    #     #         euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-        #         print("nn2.2.1")
+    #     #         print("nn2.2.1")
 
-        #         PSM_score = k[["nn.dist"]][mC]
-        #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+    #     #         PSM_score = k[["nn.dist"]][mC]
+    #     #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-        #         print("nn2.2.2")
+    #     #         print("nn2.2.2")
 
                 
-        #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
+    #     #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
 
-        #     }
+    #     #     }
       
-        # }
+    #     # }
 
-        # time_list[3] <- round((proc.time() - timer)[3],5)
-        print("nn2.3")
-        # timer <- proc.time()
+    #     # time_list[3] <- round((proc.time() - timer)[3],5)
+    #     print("nn2.3")
+    #     # timer <- proc.time()
 
-        #Add the matched treatment and control values to the recording data frame
-        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
-        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
+    #     #Add the matched treatment and control values to the recording data frame
+    #     #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+    #     best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
         
-        #This will give us the matched index in the "treated" dataset.
-        best_m_treated = k[["nn.index"]][best_m_control]
+    #     #This will give us the matched index in the "treated" dataset.
+    #     best_m_treated = k[["nn.index"]][best_m_control]
         
 
-        # #Control PSM ID
-        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        # Control_ID = toString(eval(parse(text=cid_txt)))
+    #     # #Control PSM ID
+    #     # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+    #     # Control_ID = toString(eval(parse(text=cid_txt)))
             
-        # #Treatment PSM ID
-        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        # Treatment_ID = toString(eval(parse(text=tid_txt)))
+    #     # #Treatment PSM ID
+    #     # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+    #     # Treatment_ID = toString(eval(parse(text=tid_txt)))
 
-        #Control PSM ID and Treatment PSM ID
-        Control_ID = toString(untreated[[ids]][best_m_control])
-        Treatment_ID = toString(treated[[ids]][best_m_treated])
+    #     #Control PSM ID and Treatment PSM ID
+    #     Control_ID = toString(untreated[[ids]][best_m_control])
+    #     Treatment_ID = toString(treated[[ids]][best_m_treated])
 
 
-        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-        pair_id = paste(curgrp,j, sep="")
+    #     #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+    #     pair_id = paste(curgrp,j, sep="")
         
 
 
-        # time_list[4] <- round((proc.time() - timer)[3],5)
-        print("nn2.4x")
-        # timer <- proc.time()
+    #     # time_list[4] <- round((proc.time() - timer)[3],5)
+    #     print("nn2.4x")
+    #     # timer <- proc.time()
 
-        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
-        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
-        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
+    #     #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
+    #     # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+    #     # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
 
 
-        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
-        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
+    #     # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+    #     dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
 
 
         
-        # time_list[5] <- round((proc.time() - timer)[3],5)
-        # timer <- proc.time()
+    #     # time_list[5] <- round((proc.time() - timer)[3],5)
+    #     # timer <- proc.time()
 
-        #Drop the paired match out of the iteration matrix 
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
+    #     #Drop the paired match out of the iteration matrix 
+    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
-        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+    #     # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-        treated <- treated[which(treated[[ids]] != Treatment_ID),]
-        untreated <- untreated[which(untreated[[ids]] != Control_ID),]
+    #     treated <- treated[which(treated[[ids]] != Treatment_ID),]
+    #     untreated <- untreated[which(untreated[[ids]] != Control_ID),]
         
-        # time_list[6] <- round((proc.time() - timer)[3],5)
-        # print(paste(time_list))
+    #     # time_list[6] <- round((proc.time() - timer)[3],5)
+    #     # print(paste(time_list))
 
-    }
+    # }
 
-    # print((proc.time() - timerx)[3])
 
     return(dta) 
 
diff --git a/R/loadLibs.R b/R/loadLibs.R
index cff4216..943400e 100644
--- a/R/loadLibs.R
+++ b/R/loadLibs.R
@@ -20,5 +20,7 @@ loadLibs <- function (x=1) {
   library(lmtest)
   library(multiwayvcov)
   
-  library(data.table)
+  # library(data.table)
+  library(MatchIt)
+  library(optmatch)
 }

From f6540526621ead71cb29db10b2220c9c46f5765c Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 12 Nov 2015 15:22:32 -0500
Subject: [PATCH 194/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 50b83d5..f92f74d 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -174,9 +174,15 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         return('drop')
     }
 
-    rownames(dta@data) <- dta$sector_split_id
 
-    m <- matchit(TrtBin ~ PSM_trtProb, data=dta@data, method="optimal", ratio=1)
+
+    match_data <- dta@data[,c('PSM_trtProb', ids)]
+
+    rownames(match_data) <- match_data[[ids]]
+
+    zzz <<- match_data
+    
+    m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="optimal", ratio=1)
 
     for ( i in rownames(m$match.matrix) ) {
         trt_id <- i

From 54c3d72d1159a9c7729208db512859c712ef291e Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 12 Nov 2015 15:23:45 -0500
Subject: [PATCH 195/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index f92f74d..3a4adcb 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -176,12 +176,12 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
 
-    match_data <- dta@data[,c('PSM_trtProb', ids)]
+    match_data <- dta@data[,c('PSM_trtProb', ids, 'TrtBin')]
 
     rownames(match_data) <- match_data[[ids]]
 
     zzz <<- match_data
-    
+
     m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="optimal", ratio=1)
 
     for ( i in rownames(m$match.matrix) ) {

From 80759e750363be6be2025a1127d01f1a4fbefa35 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 12 Nov 2015 15:38:21 -0500
Subject: [PATCH 196/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 3a4adcb..e563d85 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -182,7 +182,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     zzz <<- match_data
 
-    m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="optimal", ratio=1)
+    m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
 
     for ( i in rownames(m$match.matrix) ) {
         trt_id <- i

From 85b06fe2957f0843f90cfda5ef642d5ce56bd5fb Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 12 Nov 2015 15:40:54 -0500
Subject: [PATCH 197/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 2 ++
 R/loadLibs.R           | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index e563d85..a9b3aaa 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -184,6 +184,8 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
     m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
 
+    dta@data[["PSM_match_ID"]] <- -999
+
     for ( i in rownames(m$match.matrix) ) {
         trt_id <- i
         cnt_id <- m$match.matrix[i,]
diff --git a/R/loadLibs.R b/R/loadLibs.R
index 943400e..6facdfc 100644
--- a/R/loadLibs.R
+++ b/R/loadLibs.R
@@ -22,5 +22,5 @@ loadLibs <- function (x=1) {
   
   # library(data.table)
   library(MatchIt)
-  library(optmatch)
+  # library(optmatch)
 }

From f72ebf2481830df699643c3fda0222860f637960 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 12 Nov 2015 15:48:34 -0500
Subject: [PATCH 198/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index a9b3aaa..4b7f738 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -189,10 +189,11 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     for ( i in rownames(m$match.matrix) ) {
         trt_id <- i
         cnt_id <- m$match.matrix[i,]
-        pair_id <- paste(trt_id,cnt_id, sep='__')
-
-        dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
-
+        
+        if ( !is.na(cnt_id)) ) {
+            pair_id <- paste(trt_id,cnt_id, sep='__')
+            dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
+        }
     }
 
 

From 4b668441a72e3a2191f4fe63c19f09a64a9608aa Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 12 Nov 2015 15:48:58 -0500
Subject: [PATCH 199/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 4b7f738..cebf804 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -190,7 +190,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         trt_id <- i
         cnt_id <- m$match.matrix[i,]
         
-        if ( !is.na(cnt_id)) ) {
+        if ( !is.na(cnt_id) ) {
             pair_id <- paste(trt_id,cnt_id, sep='__')
             dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
         }

From 4af586e8cb0c36b3449644e422ea7c49ac03f738 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 12 Nov 2015 15:55:54 -0500
Subject: [PATCH 200/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 234 ++++++++++++++++++++---------------------
 1 file changed, 117 insertions(+), 117 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index cebf804..5592541 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -165,194 +165,194 @@
 fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
-    print('nn')
+    # print('nn')
 
-    # make sure there are at least x rows in both treated and control
-    row_min <- 30
-    trt_rows <- dta@data$TrtBin == 1
-    if ( sum(trt_rows) < row_min | sum(! trt_rows) < row_min ) {
-        return('drop')
-    }
+    # # make sure there are at least x rows in both treated and control
+    # row_min <- 30
+    # trt_rows <- dta@data$TrtBin == 1
+    # if ( sum(trt_rows) < row_min | sum(! trt_rows) < row_min ) {
+    #     return('drop')
+    # }
 
 
 
-    match_data <- dta@data[,c('PSM_trtProb', ids, 'TrtBin')]
+    # match_data <- dta@data[,c('PSM_trtProb', ids, 'TrtBin')]
 
-    rownames(match_data) <- match_data[[ids]]
+    # rownames(match_data) <- match_data[[ids]]
 
-    zzz <<- match_data
+    # zzz <<- match_data
 
-    m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
+    # m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
 
-    dta@data[["PSM_match_ID"]] <- -999
+    # dta@data[["PSM_match_ID"]] <- -999
 
-    for ( i in rownames(m$match.matrix) ) {
-        trt_id <- i
-        cnt_id <- m$match.matrix[i,]
+    # for ( i in rownames(m$match.matrix) ) {
+    #     trt_id <- i
+    #     cnt_id <- m$match.matrix[i,]
         
-        if ( !is.na(cnt_id) ) {
-            pair_id <- paste(trt_id,cnt_id, sep='__')
-            dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
-        }
-    }
+    #     if ( !is.na(cnt_id) ) {
+    #         pair_id <- paste(trt_id,cnt_id, sep='__')
+    #         dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
+    #     }
+    # }
 
 
     # ===========================================================================
  
 
-    # print("nn1.0")
+    print("nn1.0")
 
 
-    # #Fast nearest neighbors search - will not arrive at optimum,
-    # #but this may not be an issue for many analysis.
-    # #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+    #Fast nearest neighbors search - will not arrive at optimum,
+    #but this may not be an issue for many analysis.
+    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
-    # sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
 
 
-    # #Conduct the matching
-    # treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
-    # untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
+    #Conduct the matching
+    treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
+    untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
 
-    # it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
 
-    # if (it_cnt < 30) {
-    #     return('drop')
-    # }
+    if (it_cnt < 30) {
+        return('drop')
+    }
 
-    # # dta@data[["match"]] <- -999
-    # # dta@data[["PSM_distance"]] <- -999
-    # dta@data[["PSM_match_ID"]] <- -999
+    # dta@data[["match"]] <- -999
+    # dta@data[["PSM_distance"]] <- -999
+    dta@data[["PSM_match_ID"]] <- -999
 
-    # # print("nn2")
+    # print("nn2")
 
-    # #Calculate a distance decay function
-    # #to perturb pairs based on their distances.  
-    # for (j in 1:it_cnt) {
-    #     # time_list <- c()
+    #Calculate a distance decay function
+    #to perturb pairs based on their distances.  
+    for (j in 1:it_cnt) {
+        # time_list <- c()
 
-    #     print(paste("nn cnt:",j))
-    #     # timer <- proc.time()
+        print(paste("nn cnt:",j))
+        # timer <- proc.time()
 
-    #     # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
-    #     # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
+        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
+        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
-    #     # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
+        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
 
-    #     # time_list[1] <- round((proc.time() - timer)[3],5)
-    #     # print("nn2.1")
-    #     # timer <- proc.time()
+        # time_list[1] <- round((proc.time() - timer)[3],5)
+        # print("nn2.1")
+        # timer <- proc.time()
 
-    #     #Run the KNN for all neighbors. 
-    #     # print(length(treated[[1]]))
-    #     # summary(treated[["PSM_trtProb"]])
-    #     # print(length(untreated[[1]]))
-    #     # summary(untreated[["PSM_trtProb"]])
+        #Run the KNN for all neighbors. 
+        # print(length(treated[[1]]))
+        # summary(treated[["PSM_trtProb"]])
+        # print(length(untreated[[1]]))
+        # summary(untreated[["PSM_trtProb"]])
 
-    #     k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
+        k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
-    #     # time_list[2] <- round((proc.time() - timer)[3],5)
-    #     # print("nn2.2")
-    #     # timer <- proc.time()
+        # time_list[2] <- round((proc.time() - timer)[3],5)
+        # print("nn2.2")
+        # timer <- proc.time()
 
-    #     # #Perturb the values based on the distance decay function, if selected.
-    #     # if (!is.null(dist_PSM)) {
-    #     #     for (mC in 1:length(k[[1]])) {
+        # #Perturb the values based on the distance decay function, if selected.
+        # if (!is.null(dist_PSM)) {
+        #     for (mC in 1:length(k[[1]])) {
 
-    #     #         print("nn2.2.0")
+        #         print("nn2.2.0")
 
-    #     #         #Calculate the Euclidean Distance between pairs
-    #     #         Control_ID = toString(untreated[[ids]][[mC]])
+        #         #Calculate the Euclidean Distance between pairs
+        #         Control_ID = toString(untreated[[ids]][[mC]])
 
-    #     #         mT = k[["nn.index"]][mC]
+        #         mT = k[["nn.index"]][mC]
                 
-    #     #         Treatment_ID = toString(treated[[ids]][[mT]])
+        #         Treatment_ID = toString(treated[[ids]][[mT]])
 
-    #     #         #Find the control x,y location
-    #     #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+        #         #Find the control x,y location
+        #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
                 
 
-    #     #         #Find the treatment x,y location
-    #     #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+        #         #Find the treatment x,y location
+        #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
 
-    #     #         y_dist = abs(cCoord[1] - cCoord[2])
-    #     #         x_dist = abs(tCoord[1] - tCoord[2])
-    #     #         euc_dist = sqrt(y_dist^2 + x_dist^2)
+        #         y_dist = abs(cCoord[1] - cCoord[2])
+        #         x_dist = abs(tCoord[1] - tCoord[2])
+        #         euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-    #     #         print("nn2.2.1")
+        #         print("nn2.2.1")
 
-    #     #         PSM_score = k[["nn.dist"]][mC]
-    #     #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+        #         PSM_score = k[["nn.dist"]][mC]
+        #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-    #     #         print("nn2.2.2")
+        #         print("nn2.2.2")
 
                 
-    #     #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
+        #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
 
-    #     #     }
+        #     }
       
-    #     # }
+        # }
 
-    #     # time_list[3] <- round((proc.time() - timer)[3],5)
-    #     print("nn2.3")
-    #     # timer <- proc.time()
+        # time_list[3] <- round((proc.time() - timer)[3],5)
+        print("nn2.3")
+        # timer <- proc.time()
 
-    #     #Add the matched treatment and control values to the recording data frame
-    #     #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
-    #     best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
+        #Add the matched treatment and control values to the recording data frame
+        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
         
-    #     #This will give us the matched index in the "treated" dataset.
-    #     best_m_treated = k[["nn.index"]][best_m_control]
+        #This will give us the matched index in the "treated" dataset.
+        best_m_treated = k[["nn.index"]][best_m_control]
         
 
-    #     # #Control PSM ID
-    #     # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-    #     # Control_ID = toString(eval(parse(text=cid_txt)))
+        # #Control PSM ID
+        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        # Control_ID = toString(eval(parse(text=cid_txt)))
             
-    #     # #Treatment PSM ID
-    #     # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-    #     # Treatment_ID = toString(eval(parse(text=tid_txt)))
+        # #Treatment PSM ID
+        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        # Treatment_ID = toString(eval(parse(text=tid_txt)))
 
-    #     #Control PSM ID and Treatment PSM ID
-    #     Control_ID = toString(untreated[[ids]][best_m_control])
-    #     Treatment_ID = toString(treated[[ids]][best_m_treated])
+        #Control PSM ID and Treatment PSM ID
+        Control_ID = toString(untreated[[ids]][best_m_control])
+        Treatment_ID = toString(treated[[ids]][best_m_treated])
 
 
-    #     #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-    #     pair_id = paste(curgrp,j, sep="")
+        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+        pair_id = paste(curgrp,j, sep="")
         
 
 
-    #     # time_list[4] <- round((proc.time() - timer)[3],5)
-    #     print("nn2.4x")
-    #     # timer <- proc.time()
+        # time_list[4] <- round((proc.time() - timer)[3],5)
+        print("nn2.4x")
+        # timer <- proc.time()
 
-    #     #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
-    #     # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
-    #     # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
+        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
+        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
 
 
-    #     # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
-    #     dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
+        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
 
 
         
-    #     # time_list[5] <- round((proc.time() - timer)[3],5)
-    #     # timer <- proc.time()
+        # time_list[5] <- round((proc.time() - timer)[3],5)
+        # timer <- proc.time()
 
-    #     #Drop the paired match out of the iteration matrix 
-    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
-    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
+        #Drop the paired match out of the iteration matrix 
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
-    #     # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-    #     treated <- treated[which(treated[[ids]] != Treatment_ID),]
-    #     untreated <- untreated[which(untreated[[ids]] != Control_ID),]
+        treated <- treated[which(treated[[ids]] != Treatment_ID),]
+        untreated <- untreated[which(untreated[[ids]] != Control_ID),]
         
-    #     # time_list[6] <- round((proc.time() - timer)[3],5)
-    #     # print(paste(time_list))
+        # time_list[6] <- round((proc.time() - timer)[3],5)
+        # print(paste(time_list))
 
-    # }
+    }
 
 
     return(dta) 

From 32cce26632157766f8a2e09af71b72a6a2a7f195 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 12 Nov 2015 15:57:21 -0500
Subject: [PATCH 201/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 234 ++++++++++++++++++++---------------------
 1 file changed, 117 insertions(+), 117 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 5592541..cebf804 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -165,194 +165,194 @@
 fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
-    # print('nn')
+    print('nn')
 
-    # # make sure there are at least x rows in both treated and control
-    # row_min <- 30
-    # trt_rows <- dta@data$TrtBin == 1
-    # if ( sum(trt_rows) < row_min | sum(! trt_rows) < row_min ) {
-    #     return('drop')
-    # }
+    # make sure there are at least x rows in both treated and control
+    row_min <- 30
+    trt_rows <- dta@data$TrtBin == 1
+    if ( sum(trt_rows) < row_min | sum(! trt_rows) < row_min ) {
+        return('drop')
+    }
 
 
 
-    # match_data <- dta@data[,c('PSM_trtProb', ids, 'TrtBin')]
+    match_data <- dta@data[,c('PSM_trtProb', ids, 'TrtBin')]
 
-    # rownames(match_data) <- match_data[[ids]]
+    rownames(match_data) <- match_data[[ids]]
 
-    # zzz <<- match_data
+    zzz <<- match_data
 
-    # m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
+    m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
 
-    # dta@data[["PSM_match_ID"]] <- -999
+    dta@data[["PSM_match_ID"]] <- -999
 
-    # for ( i in rownames(m$match.matrix) ) {
-    #     trt_id <- i
-    #     cnt_id <- m$match.matrix[i,]
+    for ( i in rownames(m$match.matrix) ) {
+        trt_id <- i
+        cnt_id <- m$match.matrix[i,]
         
-    #     if ( !is.na(cnt_id) ) {
-    #         pair_id <- paste(trt_id,cnt_id, sep='__')
-    #         dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
-    #     }
-    # }
+        if ( !is.na(cnt_id) ) {
+            pair_id <- paste(trt_id,cnt_id, sep='__')
+            dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
+        }
+    }
 
 
     # ===========================================================================
  
 
-    print("nn1.0")
+    # print("nn1.0")
 
 
-    #Fast nearest neighbors search - will not arrive at optimum,
-    #but this may not be an issue for many analysis.
-    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+    # #Fast nearest neighbors search - will not arrive at optimum,
+    # #but this may not be an issue for many analysis.
+    # #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
-    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+    # sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
 
 
-    #Conduct the matching
-    treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
-    untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
+    # #Conduct the matching
+    # treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
+    # untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
 
-    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+    # it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
 
-    if (it_cnt < 30) {
-        return('drop')
-    }
+    # if (it_cnt < 30) {
+    #     return('drop')
+    # }
 
-    # dta@data[["match"]] <- -999
-    # dta@data[["PSM_distance"]] <- -999
-    dta@data[["PSM_match_ID"]] <- -999
+    # # dta@data[["match"]] <- -999
+    # # dta@data[["PSM_distance"]] <- -999
+    # dta@data[["PSM_match_ID"]] <- -999
 
-    # print("nn2")
+    # # print("nn2")
 
-    #Calculate a distance decay function
-    #to perturb pairs based on their distances.  
-    for (j in 1:it_cnt) {
-        # time_list <- c()
+    # #Calculate a distance decay function
+    # #to perturb pairs based on their distances.  
+    # for (j in 1:it_cnt) {
+    #     # time_list <- c()
 
-        print(paste("nn cnt:",j))
-        # timer <- proc.time()
+    #     print(paste("nn cnt:",j))
+    #     # timer <- proc.time()
 
-        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
-        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
+    #     # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
+    #     # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
-        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
+    #     # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
 
-        # time_list[1] <- round((proc.time() - timer)[3],5)
-        # print("nn2.1")
-        # timer <- proc.time()
+    #     # time_list[1] <- round((proc.time() - timer)[3],5)
+    #     # print("nn2.1")
+    #     # timer <- proc.time()
 
-        #Run the KNN for all neighbors. 
-        # print(length(treated[[1]]))
-        # summary(treated[["PSM_trtProb"]])
-        # print(length(untreated[[1]]))
-        # summary(untreated[["PSM_trtProb"]])
+    #     #Run the KNN for all neighbors. 
+    #     # print(length(treated[[1]]))
+    #     # summary(treated[["PSM_trtProb"]])
+    #     # print(length(untreated[[1]]))
+    #     # summary(untreated[["PSM_trtProb"]])
 
-        k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
+    #     k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
-        # time_list[2] <- round((proc.time() - timer)[3],5)
-        # print("nn2.2")
-        # timer <- proc.time()
+    #     # time_list[2] <- round((proc.time() - timer)[3],5)
+    #     # print("nn2.2")
+    #     # timer <- proc.time()
 
-        # #Perturb the values based on the distance decay function, if selected.
-        # if (!is.null(dist_PSM)) {
-        #     for (mC in 1:length(k[[1]])) {
+    #     # #Perturb the values based on the distance decay function, if selected.
+    #     # if (!is.null(dist_PSM)) {
+    #     #     for (mC in 1:length(k[[1]])) {
 
-        #         print("nn2.2.0")
+    #     #         print("nn2.2.0")
 
-        #         #Calculate the Euclidean Distance between pairs
-        #         Control_ID = toString(untreated[[ids]][[mC]])
+    #     #         #Calculate the Euclidean Distance between pairs
+    #     #         Control_ID = toString(untreated[[ids]][[mC]])
 
-        #         mT = k[["nn.index"]][mC]
+    #     #         mT = k[["nn.index"]][mC]
                 
-        #         Treatment_ID = toString(treated[[ids]][[mT]])
+    #     #         Treatment_ID = toString(treated[[ids]][[mT]])
 
-        #         #Find the control x,y location
-        #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+    #     #         #Find the control x,y location
+    #     #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
                 
 
-        #         #Find the treatment x,y location
-        #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+    #     #         #Find the treatment x,y location
+    #     #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
 
-        #         y_dist = abs(cCoord[1] - cCoord[2])
-        #         x_dist = abs(tCoord[1] - tCoord[2])
-        #         euc_dist = sqrt(y_dist^2 + x_dist^2)
+    #     #         y_dist = abs(cCoord[1] - cCoord[2])
+    #     #         x_dist = abs(tCoord[1] - tCoord[2])
+    #     #         euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-        #         print("nn2.2.1")
+    #     #         print("nn2.2.1")
 
-        #         PSM_score = k[["nn.dist"]][mC]
-        #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+    #     #         PSM_score = k[["nn.dist"]][mC]
+    #     #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-        #         print("nn2.2.2")
+    #     #         print("nn2.2.2")
 
                 
-        #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
+    #     #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
 
-        #     }
+    #     #     }
       
-        # }
+    #     # }
 
-        # time_list[3] <- round((proc.time() - timer)[3],5)
-        print("nn2.3")
-        # timer <- proc.time()
+    #     # time_list[3] <- round((proc.time() - timer)[3],5)
+    #     print("nn2.3")
+    #     # timer <- proc.time()
 
-        #Add the matched treatment and control values to the recording data frame
-        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
-        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
+    #     #Add the matched treatment and control values to the recording data frame
+    #     #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+    #     best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
         
-        #This will give us the matched index in the "treated" dataset.
-        best_m_treated = k[["nn.index"]][best_m_control]
+    #     #This will give us the matched index in the "treated" dataset.
+    #     best_m_treated = k[["nn.index"]][best_m_control]
         
 
-        # #Control PSM ID
-        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        # Control_ID = toString(eval(parse(text=cid_txt)))
+    #     # #Control PSM ID
+    #     # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+    #     # Control_ID = toString(eval(parse(text=cid_txt)))
             
-        # #Treatment PSM ID
-        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        # Treatment_ID = toString(eval(parse(text=tid_txt)))
+    #     # #Treatment PSM ID
+    #     # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+    #     # Treatment_ID = toString(eval(parse(text=tid_txt)))
 
-        #Control PSM ID and Treatment PSM ID
-        Control_ID = toString(untreated[[ids]][best_m_control])
-        Treatment_ID = toString(treated[[ids]][best_m_treated])
+    #     #Control PSM ID and Treatment PSM ID
+    #     Control_ID = toString(untreated[[ids]][best_m_control])
+    #     Treatment_ID = toString(treated[[ids]][best_m_treated])
 
 
-        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-        pair_id = paste(curgrp,j, sep="")
+    #     #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+    #     pair_id = paste(curgrp,j, sep="")
         
 
 
-        # time_list[4] <- round((proc.time() - timer)[3],5)
-        print("nn2.4x")
-        # timer <- proc.time()
+    #     # time_list[4] <- round((proc.time() - timer)[3],5)
+    #     print("nn2.4x")
+    #     # timer <- proc.time()
 
-        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
-        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
-        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
+    #     #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
+    #     # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+    #     # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
 
 
-        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
-        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
+    #     # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+    #     dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
 
 
         
-        # time_list[5] <- round((proc.time() - timer)[3],5)
-        # timer <- proc.time()
+    #     # time_list[5] <- round((proc.time() - timer)[3],5)
+    #     # timer <- proc.time()
 
-        #Drop the paired match out of the iteration matrix 
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
+    #     #Drop the paired match out of the iteration matrix 
+    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
-        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+    #     # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-        treated <- treated[which(treated[[ids]] != Treatment_ID),]
-        untreated <- untreated[which(untreated[[ids]] != Control_ID),]
+    #     treated <- treated[which(treated[[ids]] != Treatment_ID),]
+    #     untreated <- untreated[which(untreated[[ids]] != Control_ID),]
         
-        # time_list[6] <- round((proc.time() - timer)[3],5)
-        # print(paste(time_list))
+    #     # time_list[6] <- round((proc.time() - timer)[3],5)
+    #     # print(paste(time_list))
 
-    }
+    # }
 
 
     return(dta) 

From c37946e0ced0b4d000e3a38ef54893af7862ce55 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 12 Nov 2015 16:16:33 -0500
Subject: [PATCH 202/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 234 ++++++++++++++++++++---------------------
 1 file changed, 117 insertions(+), 117 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index cebf804..5592541 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -165,194 +165,194 @@
 fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
-    print('nn')
+    # print('nn')
 
-    # make sure there are at least x rows in both treated and control
-    row_min <- 30
-    trt_rows <- dta@data$TrtBin == 1
-    if ( sum(trt_rows) < row_min | sum(! trt_rows) < row_min ) {
-        return('drop')
-    }
+    # # make sure there are at least x rows in both treated and control
+    # row_min <- 30
+    # trt_rows <- dta@data$TrtBin == 1
+    # if ( sum(trt_rows) < row_min | sum(! trt_rows) < row_min ) {
+    #     return('drop')
+    # }
 
 
 
-    match_data <- dta@data[,c('PSM_trtProb', ids, 'TrtBin')]
+    # match_data <- dta@data[,c('PSM_trtProb', ids, 'TrtBin')]
 
-    rownames(match_data) <- match_data[[ids]]
+    # rownames(match_data) <- match_data[[ids]]
 
-    zzz <<- match_data
+    # zzz <<- match_data
 
-    m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
+    # m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
 
-    dta@data[["PSM_match_ID"]] <- -999
+    # dta@data[["PSM_match_ID"]] <- -999
 
-    for ( i in rownames(m$match.matrix) ) {
-        trt_id <- i
-        cnt_id <- m$match.matrix[i,]
+    # for ( i in rownames(m$match.matrix) ) {
+    #     trt_id <- i
+    #     cnt_id <- m$match.matrix[i,]
         
-        if ( !is.na(cnt_id) ) {
-            pair_id <- paste(trt_id,cnt_id, sep='__')
-            dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
-        }
-    }
+    #     if ( !is.na(cnt_id) ) {
+    #         pair_id <- paste(trt_id,cnt_id, sep='__')
+    #         dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
+    #     }
+    # }
 
 
     # ===========================================================================
  
 
-    # print("nn1.0")
+    print("nn1.0")
 
 
-    # #Fast nearest neighbors search - will not arrive at optimum,
-    # #but this may not be an issue for many analysis.
-    # #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+    #Fast nearest neighbors search - will not arrive at optimum,
+    #but this may not be an issue for many analysis.
+    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
-    # sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
 
 
-    # #Conduct the matching
-    # treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
-    # untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
+    #Conduct the matching
+    treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
+    untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
 
-    # it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
 
-    # if (it_cnt < 30) {
-    #     return('drop')
-    # }
+    if (it_cnt < 30) {
+        return('drop')
+    }
 
-    # # dta@data[["match"]] <- -999
-    # # dta@data[["PSM_distance"]] <- -999
-    # dta@data[["PSM_match_ID"]] <- -999
+    # dta@data[["match"]] <- -999
+    # dta@data[["PSM_distance"]] <- -999
+    dta@data[["PSM_match_ID"]] <- -999
 
-    # # print("nn2")
+    # print("nn2")
 
-    # #Calculate a distance decay function
-    # #to perturb pairs based on their distances.  
-    # for (j in 1:it_cnt) {
-    #     # time_list <- c()
+    #Calculate a distance decay function
+    #to perturb pairs based on their distances.  
+    for (j in 1:it_cnt) {
+        # time_list <- c()
 
-    #     print(paste("nn cnt:",j))
-    #     # timer <- proc.time()
+        print(paste("nn cnt:",j))
+        # timer <- proc.time()
 
-    #     # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
-    #     # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
+        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
+        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
-    #     # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
+        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
 
-    #     # time_list[1] <- round((proc.time() - timer)[3],5)
-    #     # print("nn2.1")
-    #     # timer <- proc.time()
+        # time_list[1] <- round((proc.time() - timer)[3],5)
+        # print("nn2.1")
+        # timer <- proc.time()
 
-    #     #Run the KNN for all neighbors. 
-    #     # print(length(treated[[1]]))
-    #     # summary(treated[["PSM_trtProb"]])
-    #     # print(length(untreated[[1]]))
-    #     # summary(untreated[["PSM_trtProb"]])
+        #Run the KNN for all neighbors. 
+        # print(length(treated[[1]]))
+        # summary(treated[["PSM_trtProb"]])
+        # print(length(untreated[[1]]))
+        # summary(untreated[["PSM_trtProb"]])
 
-    #     k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
+        k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
-    #     # time_list[2] <- round((proc.time() - timer)[3],5)
-    #     # print("nn2.2")
-    #     # timer <- proc.time()
+        # time_list[2] <- round((proc.time() - timer)[3],5)
+        # print("nn2.2")
+        # timer <- proc.time()
 
-    #     # #Perturb the values based on the distance decay function, if selected.
-    #     # if (!is.null(dist_PSM)) {
-    #     #     for (mC in 1:length(k[[1]])) {
+        # #Perturb the values based on the distance decay function, if selected.
+        # if (!is.null(dist_PSM)) {
+        #     for (mC in 1:length(k[[1]])) {
 
-    #     #         print("nn2.2.0")
+        #         print("nn2.2.0")
 
-    #     #         #Calculate the Euclidean Distance between pairs
-    #     #         Control_ID = toString(untreated[[ids]][[mC]])
+        #         #Calculate the Euclidean Distance between pairs
+        #         Control_ID = toString(untreated[[ids]][[mC]])
 
-    #     #         mT = k[["nn.index"]][mC]
+        #         mT = k[["nn.index"]][mC]
                 
-    #     #         Treatment_ID = toString(treated[[ids]][[mT]])
+        #         Treatment_ID = toString(treated[[ids]][[mT]])
 
-    #     #         #Find the control x,y location
-    #     #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+        #         #Find the control x,y location
+        #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
                 
 
-    #     #         #Find the treatment x,y location
-    #     #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+        #         #Find the treatment x,y location
+        #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
 
-    #     #         y_dist = abs(cCoord[1] - cCoord[2])
-    #     #         x_dist = abs(tCoord[1] - tCoord[2])
-    #     #         euc_dist = sqrt(y_dist^2 + x_dist^2)
+        #         y_dist = abs(cCoord[1] - cCoord[2])
+        #         x_dist = abs(tCoord[1] - tCoord[2])
+        #         euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-    #     #         print("nn2.2.1")
+        #         print("nn2.2.1")
 
-    #     #         PSM_score = k[["nn.dist"]][mC]
-    #     #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+        #         PSM_score = k[["nn.dist"]][mC]
+        #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-    #     #         print("nn2.2.2")
+        #         print("nn2.2.2")
 
                 
-    #     #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
+        #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
 
-    #     #     }
+        #     }
       
-    #     # }
+        # }
 
-    #     # time_list[3] <- round((proc.time() - timer)[3],5)
-    #     print("nn2.3")
-    #     # timer <- proc.time()
+        # time_list[3] <- round((proc.time() - timer)[3],5)
+        print("nn2.3")
+        # timer <- proc.time()
 
-    #     #Add the matched treatment and control values to the recording data frame
-    #     #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
-    #     best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
+        #Add the matched treatment and control values to the recording data frame
+        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
         
-    #     #This will give us the matched index in the "treated" dataset.
-    #     best_m_treated = k[["nn.index"]][best_m_control]
+        #This will give us the matched index in the "treated" dataset.
+        best_m_treated = k[["nn.index"]][best_m_control]
         
 
-    #     # #Control PSM ID
-    #     # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-    #     # Control_ID = toString(eval(parse(text=cid_txt)))
+        # #Control PSM ID
+        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        # Control_ID = toString(eval(parse(text=cid_txt)))
             
-    #     # #Treatment PSM ID
-    #     # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-    #     # Treatment_ID = toString(eval(parse(text=tid_txt)))
+        # #Treatment PSM ID
+        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        # Treatment_ID = toString(eval(parse(text=tid_txt)))
 
-    #     #Control PSM ID and Treatment PSM ID
-    #     Control_ID = toString(untreated[[ids]][best_m_control])
-    #     Treatment_ID = toString(treated[[ids]][best_m_treated])
+        #Control PSM ID and Treatment PSM ID
+        Control_ID = toString(untreated[[ids]][best_m_control])
+        Treatment_ID = toString(treated[[ids]][best_m_treated])
 
 
-    #     #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-    #     pair_id = paste(curgrp,j, sep="")
+        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+        pair_id = paste(curgrp,j, sep="")
         
 
 
-    #     # time_list[4] <- round((proc.time() - timer)[3],5)
-    #     print("nn2.4x")
-    #     # timer <- proc.time()
+        # time_list[4] <- round((proc.time() - timer)[3],5)
+        print("nn2.4x")
+        # timer <- proc.time()
 
-    #     #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
-    #     # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
-    #     # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
+        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
+        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
 
 
-    #     # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
-    #     dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
+        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
 
 
         
-    #     # time_list[5] <- round((proc.time() - timer)[3],5)
-    #     # timer <- proc.time()
+        # time_list[5] <- round((proc.time() - timer)[3],5)
+        # timer <- proc.time()
 
-    #     #Drop the paired match out of the iteration matrix 
-    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
-    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
+        #Drop the paired match out of the iteration matrix 
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
-    #     # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-    #     treated <- treated[which(treated[[ids]] != Treatment_ID),]
-    #     untreated <- untreated[which(untreated[[ids]] != Control_ID),]
+        treated <- treated[which(treated[[ids]] != Treatment_ID),]
+        untreated <- untreated[which(untreated[[ids]] != Control_ID),]
         
-    #     # time_list[6] <- round((proc.time() - timer)[3],5)
-    #     # print(paste(time_list))
+        # time_list[6] <- round((proc.time() - timer)[3],5)
+        # print(paste(time_list))
 
-    # }
+    }
 
 
     return(dta) 

From b0d2bc7063f3b4eaba502962eef77be1b928438e Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 12 Nov 2015 16:28:53 -0500
Subject: [PATCH 203/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 5592541..f1ea1c6 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -207,7 +207,7 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
     #but this may not be an issue for many analysis.
     #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
-    sorted_dta <- dta@data[order(dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+    sorted_dta <- dta@data[order(-dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
 
 
     #Conduct the matching

From d266959be9e7d885b6a6dc96ceb901b6e30c3dbc Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Thu, 12 Nov 2015 16:47:51 -0500
Subject: [PATCH 204/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 234 ++++++++++++++++++++---------------------
 1 file changed, 117 insertions(+), 117 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index f1ea1c6..efe84f1 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -165,194 +165,194 @@
 fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
-    # print('nn')
+    print('nn')
 
-    # # make sure there are at least x rows in both treated and control
-    # row_min <- 30
-    # trt_rows <- dta@data$TrtBin == 1
-    # if ( sum(trt_rows) < row_min | sum(! trt_rows) < row_min ) {
-    #     return('drop')
-    # }
+    # make sure there are at least x rows in both treated and control
+    row_min <- 30
+    trt_rows <- dta@data$TrtBin == 1
+    if ( sum(trt_rows) < row_min | sum(! trt_rows) < row_min ) {
+        return('drop')
+    }
 
 
 
-    # match_data <- dta@data[,c('PSM_trtProb', ids, 'TrtBin')]
+    match_data <- dta@data[,c('PSM_trtProb', ids, 'TrtBin')]
 
-    # rownames(match_data) <- match_data[[ids]]
+    rownames(match_data) <- match_data[[ids]]
 
-    # zzz <<- match_data
+    zzz <<- match_data
 
-    # m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
+    m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
 
-    # dta@data[["PSM_match_ID"]] <- -999
+    dta@data[["PSM_match_ID"]] <- -999
 
-    # for ( i in rownames(m$match.matrix) ) {
-    #     trt_id <- i
-    #     cnt_id <- m$match.matrix[i,]
+    for ( i in rownames(m$match.matrix) ) {
+        trt_id <- i
+        cnt_id <- m$match.matrix[i,]
         
-    #     if ( !is.na(cnt_id) ) {
-    #         pair_id <- paste(trt_id,cnt_id, sep='__')
-    #         dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
-    #     }
-    # }
+        if ( !is.na(cnt_id) ) {
+            pair_id <- paste(trt_id,cnt_id, sep='__')
+            dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
+        }
+    }
 
 
     # ===========================================================================
  
 
-    print("nn1.0")
+    # print("nn1.0")
 
 
-    #Fast nearest neighbors search - will not arrive at optimum,
-    #but this may not be an issue for many analysis.
-    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+    # #Fast nearest neighbors search - will not arrive at optimum,
+    # #but this may not be an issue for many analysis.
+    # #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
-    sorted_dta <- dta@data[order(-dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+    # sorted_dta <- dta@data[order(-dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
 
 
-    #Conduct the matching
-    treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
-    untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
+    # #Conduct the matching
+    # treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
+    # untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
 
-    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+    # it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
 
-    if (it_cnt < 30) {
-        return('drop')
-    }
+    # if (it_cnt < 30) {
+    #     return('drop')
+    # }
 
-    # dta@data[["match"]] <- -999
-    # dta@data[["PSM_distance"]] <- -999
-    dta@data[["PSM_match_ID"]] <- -999
+    # # dta@data[["match"]] <- -999
+    # # dta@data[["PSM_distance"]] <- -999
+    # dta@data[["PSM_match_ID"]] <- -999
 
-    # print("nn2")
+    # # print("nn2")
 
-    #Calculate a distance decay function
-    #to perturb pairs based on their distances.  
-    for (j in 1:it_cnt) {
-        # time_list <- c()
+    # #Calculate a distance decay function
+    # #to perturb pairs based on their distances.  
+    # for (j in 1:it_cnt) {
+    #     # time_list <- c()
 
-        print(paste("nn cnt:",j))
-        # timer <- proc.time()
+    #     print(paste("nn cnt:",j))
+    #     # timer <- proc.time()
 
-        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
-        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
+    #     # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
+    #     # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
-        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
+    #     # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
 
-        # time_list[1] <- round((proc.time() - timer)[3],5)
-        # print("nn2.1")
-        # timer <- proc.time()
+    #     # time_list[1] <- round((proc.time() - timer)[3],5)
+    #     # print("nn2.1")
+    #     # timer <- proc.time()
 
-        #Run the KNN for all neighbors. 
-        # print(length(treated[[1]]))
-        # summary(treated[["PSM_trtProb"]])
-        # print(length(untreated[[1]]))
-        # summary(untreated[["PSM_trtProb"]])
+    #     #Run the KNN for all neighbors. 
+    #     # print(length(treated[[1]]))
+    #     # summary(treated[["PSM_trtProb"]])
+    #     # print(length(untreated[[1]]))
+    #     # summary(untreated[["PSM_trtProb"]])
 
-        k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
+    #     k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
-        # time_list[2] <- round((proc.time() - timer)[3],5)
-        # print("nn2.2")
-        # timer <- proc.time()
+    #     # time_list[2] <- round((proc.time() - timer)[3],5)
+    #     # print("nn2.2")
+    #     # timer <- proc.time()
 
-        # #Perturb the values based on the distance decay function, if selected.
-        # if (!is.null(dist_PSM)) {
-        #     for (mC in 1:length(k[[1]])) {
+    #     # #Perturb the values based on the distance decay function, if selected.
+    #     # if (!is.null(dist_PSM)) {
+    #     #     for (mC in 1:length(k[[1]])) {
 
-        #         print("nn2.2.0")
+    #     #         print("nn2.2.0")
 
-        #         #Calculate the Euclidean Distance between pairs
-        #         Control_ID = toString(untreated[[ids]][[mC]])
+    #     #         #Calculate the Euclidean Distance between pairs
+    #     #         Control_ID = toString(untreated[[ids]][[mC]])
 
-        #         mT = k[["nn.index"]][mC]
+    #     #         mT = k[["nn.index"]][mC]
                 
-        #         Treatment_ID = toString(treated[[ids]][[mT]])
+    #     #         Treatment_ID = toString(treated[[ids]][[mT]])
 
-        #         #Find the control x,y location
-        #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+    #     #         #Find the control x,y location
+    #     #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
                 
 
-        #         #Find the treatment x,y location
-        #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+    #     #         #Find the treatment x,y location
+    #     #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
 
-        #         y_dist = abs(cCoord[1] - cCoord[2])
-        #         x_dist = abs(tCoord[1] - tCoord[2])
-        #         euc_dist = sqrt(y_dist^2 + x_dist^2)
+    #     #         y_dist = abs(cCoord[1] - cCoord[2])
+    #     #         x_dist = abs(tCoord[1] - tCoord[2])
+    #     #         euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-        #         print("nn2.2.1")
+    #     #         print("nn2.2.1")
 
-        #         PSM_score = k[["nn.dist"]][mC]
-        #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+    #     #         PSM_score = k[["nn.dist"]][mC]
+    #     #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-        #         print("nn2.2.2")
+    #     #         print("nn2.2.2")
 
                 
-        #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
+    #     #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
 
-        #     }
+    #     #     }
       
-        # }
+    #     # }
 
-        # time_list[3] <- round((proc.time() - timer)[3],5)
-        print("nn2.3")
-        # timer <- proc.time()
+    #     # time_list[3] <- round((proc.time() - timer)[3],5)
+    #     print("nn2.3")
+    #     # timer <- proc.time()
 
-        #Add the matched treatment and control values to the recording data frame
-        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
-        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
+    #     #Add the matched treatment and control values to the recording data frame
+    #     #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+    #     best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
         
-        #This will give us the matched index in the "treated" dataset.
-        best_m_treated = k[["nn.index"]][best_m_control]
+    #     #This will give us the matched index in the "treated" dataset.
+    #     best_m_treated = k[["nn.index"]][best_m_control]
         
 
-        # #Control PSM ID
-        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        # Control_ID = toString(eval(parse(text=cid_txt)))
+    #     # #Control PSM ID
+    #     # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+    #     # Control_ID = toString(eval(parse(text=cid_txt)))
             
-        # #Treatment PSM ID
-        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        # Treatment_ID = toString(eval(parse(text=tid_txt)))
+    #     # #Treatment PSM ID
+    #     # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+    #     # Treatment_ID = toString(eval(parse(text=tid_txt)))
 
-        #Control PSM ID and Treatment PSM ID
-        Control_ID = toString(untreated[[ids]][best_m_control])
-        Treatment_ID = toString(treated[[ids]][best_m_treated])
+    #     #Control PSM ID and Treatment PSM ID
+    #     Control_ID = toString(untreated[[ids]][best_m_control])
+    #     Treatment_ID = toString(treated[[ids]][best_m_treated])
 
 
-        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-        pair_id = paste(curgrp,j, sep="")
+    #     #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+    #     pair_id = paste(curgrp,j, sep="")
         
 
 
-        # time_list[4] <- round((proc.time() - timer)[3],5)
-        print("nn2.4x")
-        # timer <- proc.time()
+    #     # time_list[4] <- round((proc.time() - timer)[3],5)
+    #     print("nn2.4x")
+    #     # timer <- proc.time()
 
-        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
-        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
-        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
+    #     #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
+    #     # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+    #     # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
 
 
-        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
-        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
+    #     # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+    #     dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
 
 
         
-        # time_list[5] <- round((proc.time() - timer)[3],5)
-        # timer <- proc.time()
+    #     # time_list[5] <- round((proc.time() - timer)[3],5)
+    #     # timer <- proc.time()
 
-        #Drop the paired match out of the iteration matrix 
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
+    #     #Drop the paired match out of the iteration matrix 
+    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
-        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+    #     # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-        treated <- treated[which(treated[[ids]] != Treatment_ID),]
-        untreated <- untreated[which(untreated[[ids]] != Control_ID),]
+    #     treated <- treated[which(treated[[ids]] != Treatment_ID),]
+    #     untreated <- untreated[which(untreated[[ids]] != Control_ID),]
         
-        # time_list[6] <- round((proc.time() - timer)[3],5)
-        # print(paste(time_list))
+    #     # time_list[6] <- round((proc.time() - timer)[3],5)
+    #     # print(paste(time_list))
 
-    }
+    # }
 
 
     return(dta) 

From ede4adc702310bad6ee9829ce1601b760a5a8655 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 13 Nov 2015 09:06:05 -0500
Subject: [PATCH 205/212] Test matchit lib.

---
 R/SAT.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/SAT.R b/R/SAT.R
index ac3f97c..8f9812b 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -386,7 +386,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
     }
 
-
+    vvv <<- dta
 
 
     print("sat5")

From a38e71f714d536687b18ae3d98827e89428f73ae Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 13 Nov 2015 09:10:19 -0500
Subject: [PATCH 206/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 234 ++++++++++++++++++++---------------------
 1 file changed, 117 insertions(+), 117 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index efe84f1..f1ea1c6 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -165,194 +165,194 @@
 fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
-    print('nn')
+    # print('nn')
 
-    # make sure there are at least x rows in both treated and control
-    row_min <- 30
-    trt_rows <- dta@data$TrtBin == 1
-    if ( sum(trt_rows) < row_min | sum(! trt_rows) < row_min ) {
-        return('drop')
-    }
+    # # make sure there are at least x rows in both treated and control
+    # row_min <- 30
+    # trt_rows <- dta@data$TrtBin == 1
+    # if ( sum(trt_rows) < row_min | sum(! trt_rows) < row_min ) {
+    #     return('drop')
+    # }
 
 
 
-    match_data <- dta@data[,c('PSM_trtProb', ids, 'TrtBin')]
+    # match_data <- dta@data[,c('PSM_trtProb', ids, 'TrtBin')]
 
-    rownames(match_data) <- match_data[[ids]]
+    # rownames(match_data) <- match_data[[ids]]
 
-    zzz <<- match_data
+    # zzz <<- match_data
 
-    m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
+    # m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
 
-    dta@data[["PSM_match_ID"]] <- -999
+    # dta@data[["PSM_match_ID"]] <- -999
 
-    for ( i in rownames(m$match.matrix) ) {
-        trt_id <- i
-        cnt_id <- m$match.matrix[i,]
+    # for ( i in rownames(m$match.matrix) ) {
+    #     trt_id <- i
+    #     cnt_id <- m$match.matrix[i,]
         
-        if ( !is.na(cnt_id) ) {
-            pair_id <- paste(trt_id,cnt_id, sep='__')
-            dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
-        }
-    }
+    #     if ( !is.na(cnt_id) ) {
+    #         pair_id <- paste(trt_id,cnt_id, sep='__')
+    #         dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
+    #     }
+    # }
 
 
     # ===========================================================================
  
 
-    # print("nn1.0")
+    print("nn1.0")
 
 
-    # #Fast nearest neighbors search - will not arrive at optimum,
-    # #but this may not be an issue for many analysis.
-    # #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+    #Fast nearest neighbors search - will not arrive at optimum,
+    #but this may not be an issue for many analysis.
+    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
-    # sorted_dta <- dta@data[order(-dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+    sorted_dta <- dta@data[order(-dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
 
 
-    # #Conduct the matching
-    # treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
-    # untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
+    #Conduct the matching
+    treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
+    untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
 
-    # it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
 
-    # if (it_cnt < 30) {
-    #     return('drop')
-    # }
+    if (it_cnt < 30) {
+        return('drop')
+    }
 
-    # # dta@data[["match"]] <- -999
-    # # dta@data[["PSM_distance"]] <- -999
-    # dta@data[["PSM_match_ID"]] <- -999
+    # dta@data[["match"]] <- -999
+    # dta@data[["PSM_distance"]] <- -999
+    dta@data[["PSM_match_ID"]] <- -999
 
-    # # print("nn2")
+    # print("nn2")
 
-    # #Calculate a distance decay function
-    # #to perturb pairs based on their distances.  
-    # for (j in 1:it_cnt) {
-    #     # time_list <- c()
+    #Calculate a distance decay function
+    #to perturb pairs based on their distances.  
+    for (j in 1:it_cnt) {
+        # time_list <- c()
 
-    #     print(paste("nn cnt:",j))
-    #     # timer <- proc.time()
+        print(paste("nn cnt:",j))
+        # timer <- proc.time()
 
-    #     # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
-    #     # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
+        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
+        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
-    #     # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
+        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
 
-    #     # time_list[1] <- round((proc.time() - timer)[3],5)
-    #     # print("nn2.1")
-    #     # timer <- proc.time()
+        # time_list[1] <- round((proc.time() - timer)[3],5)
+        # print("nn2.1")
+        # timer <- proc.time()
 
-    #     #Run the KNN for all neighbors. 
-    #     # print(length(treated[[1]]))
-    #     # summary(treated[["PSM_trtProb"]])
-    #     # print(length(untreated[[1]]))
-    #     # summary(untreated[["PSM_trtProb"]])
+        #Run the KNN for all neighbors. 
+        # print(length(treated[[1]]))
+        # summary(treated[["PSM_trtProb"]])
+        # print(length(untreated[[1]]))
+        # summary(untreated[["PSM_trtProb"]])
 
-    #     k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
+        k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
-    #     # time_list[2] <- round((proc.time() - timer)[3],5)
-    #     # print("nn2.2")
-    #     # timer <- proc.time()
+        # time_list[2] <- round((proc.time() - timer)[3],5)
+        # print("nn2.2")
+        # timer <- proc.time()
 
-    #     # #Perturb the values based on the distance decay function, if selected.
-    #     # if (!is.null(dist_PSM)) {
-    #     #     for (mC in 1:length(k[[1]])) {
+        # #Perturb the values based on the distance decay function, if selected.
+        # if (!is.null(dist_PSM)) {
+        #     for (mC in 1:length(k[[1]])) {
 
-    #     #         print("nn2.2.0")
+        #         print("nn2.2.0")
 
-    #     #         #Calculate the Euclidean Distance between pairs
-    #     #         Control_ID = toString(untreated[[ids]][[mC]])
+        #         #Calculate the Euclidean Distance between pairs
+        #         Control_ID = toString(untreated[[ids]][[mC]])
 
-    #     #         mT = k[["nn.index"]][mC]
+        #         mT = k[["nn.index"]][mC]
                 
-    #     #         Treatment_ID = toString(treated[[ids]][[mT]])
+        #         Treatment_ID = toString(treated[[ids]][[mT]])
 
-    #     #         #Find the control x,y location
-    #     #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+        #         #Find the control x,y location
+        #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
                 
 
-    #     #         #Find the treatment x,y location
-    #     #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+        #         #Find the treatment x,y location
+        #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
 
-    #     #         y_dist = abs(cCoord[1] - cCoord[2])
-    #     #         x_dist = abs(tCoord[1] - tCoord[2])
-    #     #         euc_dist = sqrt(y_dist^2 + x_dist^2)
+        #         y_dist = abs(cCoord[1] - cCoord[2])
+        #         x_dist = abs(tCoord[1] - tCoord[2])
+        #         euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-    #     #         print("nn2.2.1")
+        #         print("nn2.2.1")
 
-    #     #         PSM_score = k[["nn.dist"]][mC]
-    #     #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+        #         PSM_score = k[["nn.dist"]][mC]
+        #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-    #     #         print("nn2.2.2")
+        #         print("nn2.2.2")
 
                 
-    #     #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
+        #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
 
-    #     #     }
+        #     }
       
-    #     # }
+        # }
 
-    #     # time_list[3] <- round((proc.time() - timer)[3],5)
-    #     print("nn2.3")
-    #     # timer <- proc.time()
+        # time_list[3] <- round((proc.time() - timer)[3],5)
+        print("nn2.3")
+        # timer <- proc.time()
 
-    #     #Add the matched treatment and control values to the recording data frame
-    #     #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
-    #     best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
+        #Add the matched treatment and control values to the recording data frame
+        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
         
-    #     #This will give us the matched index in the "treated" dataset.
-    #     best_m_treated = k[["nn.index"]][best_m_control]
+        #This will give us the matched index in the "treated" dataset.
+        best_m_treated = k[["nn.index"]][best_m_control]
         
 
-    #     # #Control PSM ID
-    #     # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-    #     # Control_ID = toString(eval(parse(text=cid_txt)))
+        # #Control PSM ID
+        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        # Control_ID = toString(eval(parse(text=cid_txt)))
             
-    #     # #Treatment PSM ID
-    #     # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-    #     # Treatment_ID = toString(eval(parse(text=tid_txt)))
+        # #Treatment PSM ID
+        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        # Treatment_ID = toString(eval(parse(text=tid_txt)))
 
-    #     #Control PSM ID and Treatment PSM ID
-    #     Control_ID = toString(untreated[[ids]][best_m_control])
-    #     Treatment_ID = toString(treated[[ids]][best_m_treated])
+        #Control PSM ID and Treatment PSM ID
+        Control_ID = toString(untreated[[ids]][best_m_control])
+        Treatment_ID = toString(treated[[ids]][best_m_treated])
 
 
-    #     #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-    #     pair_id = paste(curgrp,j, sep="")
+        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+        pair_id = paste(curgrp,j, sep="")
         
 
 
-    #     # time_list[4] <- round((proc.time() - timer)[3],5)
-    #     print("nn2.4x")
-    #     # timer <- proc.time()
+        # time_list[4] <- round((proc.time() - timer)[3],5)
+        print("nn2.4x")
+        # timer <- proc.time()
 
-    #     #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
-    #     # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
-    #     # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
+        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
+        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
 
 
-    #     # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
-    #     dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
+        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
 
 
         
-    #     # time_list[5] <- round((proc.time() - timer)[3],5)
-    #     # timer <- proc.time()
+        # time_list[5] <- round((proc.time() - timer)[3],5)
+        # timer <- proc.time()
 
-    #     #Drop the paired match out of the iteration matrix 
-    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
-    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
+        #Drop the paired match out of the iteration matrix 
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
-    #     # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-    #     treated <- treated[which(treated[[ids]] != Treatment_ID),]
-    #     untreated <- untreated[which(untreated[[ids]] != Control_ID),]
+        treated <- treated[which(treated[[ids]] != Treatment_ID),]
+        untreated <- untreated[which(untreated[[ids]] != Control_ID),]
         
-    #     # time_list[6] <- round((proc.time() - timer)[3],5)
-    #     # print(paste(time_list))
+        # time_list[6] <- round((proc.time() - timer)[3],5)
+        # print(paste(time_list))
 
-    # }
+    }
 
 
     return(dta) 

From 648eb89fed8b3eb8e23eb6a8bc59a8f1c376316a Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 13 Nov 2015 09:34:20 -0500
Subject: [PATCH 207/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index f1ea1c6..ed708c1 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -318,10 +318,11 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         Treatment_ID = toString(treated[[ids]][best_m_treated])
 
 
-        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-        pair_id = paste(curgrp,j, sep="")
-        
 
+        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+        # pair_id = paste(curgrp,j, sep="")
+        pair_id <- paste(Treatment_ID,Control_ID, sep='__')
+        print(pair_id)
 
         # time_list[4] <- round((proc.time() - timer)[3],5)
         print("nn2.4x")

From 849786148368b8c727d516216599f286e36825db Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 13 Nov 2015 09:37:38 -0500
Subject: [PATCH 208/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index ed708c1..c6449ce 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -306,17 +306,23 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
         
 
         # #Control PSM ID
-        # cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        # Control_ID = toString(eval(parse(text=cid_txt)))
+        cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+        Control_ID1 = toString(eval(parse(text=cid_txt)))
             
         # #Treatment PSM ID
-        # tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        # Treatment_ID = toString(eval(parse(text=tid_txt)))
+        tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+        Treatment_ID1 = toString(eval(parse(text=tid_txt)))
+
+        print(Control_ID1)
+        print(Treatment_ID1)
+
 
         #Control PSM ID and Treatment PSM ID
         Control_ID = toString(untreated[[ids]][best_m_control])
         Treatment_ID = toString(treated[[ids]][best_m_treated])
 
+        print(Control_ID)
+        print(Treatment_ID)
 
 
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)

From 08a63fc992df6c56fca89a57702e087b753efcb8 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 13 Nov 2015 09:39:06 -0500
Subject: [PATCH 209/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index c6449ce..4566410 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -307,22 +307,12 @@ fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
         # #Control PSM ID
         cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        Control_ID1 = toString(eval(parse(text=cid_txt)))
+        Control_ID = toString(eval(parse(text=cid_txt)))
             
         # #Treatment PSM ID
         tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        Treatment_ID1 = toString(eval(parse(text=tid_txt)))
+        Treatment_ID = toString(eval(parse(text=tid_txt)))
 
-        print(Control_ID1)
-        print(Treatment_ID1)
-
-
-        #Control PSM ID and Treatment PSM ID
-        Control_ID = toString(untreated[[ids]][best_m_control])
-        Treatment_ID = toString(treated[[ids]][best_m_treated])
-
-        print(Control_ID)
-        print(Treatment_ID)
 
 
         #Create a unique pair ID for each group (will simply append a "1" if only 1 group)

From 128c3db6bea77e6fb843a3fffa5e2750032c0f97 Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 13 Nov 2015 12:22:02 -0500
Subject: [PATCH 210/212] Test matchit lib.

---
 R/fastNN_binary_func.R | 232 ++++++++++++++++++++---------------------
 1 file changed, 116 insertions(+), 116 deletions(-)

diff --git a/R/fastNN_binary_func.R b/R/fastNN_binary_func.R
index 4566410..b4917d2 100644
--- a/R/fastNN_binary_func.R
+++ b/R/fastNN_binary_func.R
@@ -165,191 +165,191 @@
 fastNN_binary_func <- function(dta, trtMntVar, ids, curgrp, dist_PSM) {
 
 
-    # print('nn')
+    print('nn')
 
-    # # make sure there are at least x rows in both treated and control
-    # row_min <- 30
-    # trt_rows <- dta@data$TrtBin == 1
-    # if ( sum(trt_rows) < row_min | sum(! trt_rows) < row_min ) {
-    #     return('drop')
-    # }
+    # make sure there are at least x rows in both treated and control
+    row_min <- 30
+    trt_rows <- dta@data$TrtBin == 1
+    if ( sum(trt_rows) < row_min | sum(! trt_rows) < row_min ) {
+        return('drop')
+    }
 
 
 
-    # match_data <- dta@data[,c('PSM_trtProb', ids, 'TrtBin')]
+    match_data <- dta@data[,c('PSM_trtProb', ids, 'TrtBin')]
 
-    # rownames(match_data) <- match_data[[ids]]
+    rownames(match_data) <- match_data[[ids]]
 
-    # zzz <<- match_data
+    zzz <<- match_data
 
-    # m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
+    m <- matchit(TrtBin ~ PSM_trtProb, data=match_data, method="nearest", ratio=1)
 
-    # dta@data[["PSM_match_ID"]] <- -999
+    dta@data[["PSM_match_ID"]] <- -999
 
-    # for ( i in rownames(m$match.matrix) ) {
-    #     trt_id <- i
-    #     cnt_id <- m$match.matrix[i,]
+    for ( i in rownames(m$match.matrix) ) {
+        trt_id <- i
+        cnt_id <- m$match.matrix[i,]
         
-    #     if ( !is.na(cnt_id) ) {
-    #         pair_id <- paste(trt_id,cnt_id, sep='__')
-    #         dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
-    #     }
-    # }
+        if ( !is.na(cnt_id) ) {
+            pair_id <- paste(trt_id,cnt_id, sep='__')
+            dta@data$PSM_match_ID[which(dta@data[[ids]] == cnt_id | dta@data[[ids]] == trt_id)] <- pair_id        
+        }
+    }
 
 
     # ===========================================================================
  
 
-    print("nn1.0")
+    # print("nn1.0")
 
 
-    #Fast nearest neighbors search - will not arrive at optimum,
-    #but this may not be an issue for many analysis.
-    #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
+    # #Fast nearest neighbors search - will not arrive at optimum,
+    # #but this may not be an issue for many analysis.
+    # #Effectively loops through all observations in the treatment group, ordered by PSM score - higher scores go first.
 
-    sorted_dta <- dta@data[order(-dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
+    # sorted_dta <- dta@data[order(-dta@data[["PSM_trtProb"]]), c(ids, trtMntVar, "PSM_trtProb")]
 
 
-    #Conduct the matching
-    treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
-    untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
+    # #Conduct the matching
+    # treated <- sorted_dta[sorted_dta[[trtMntVar]] == 1, c(ids, "PSM_trtProb")]
+    # untreated <- sorted_dta[sorted_dta[[trtMntVar]] == 0, c(ids, "PSM_trtProb")]
 
-    it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
+    # it_cnt = min(length(treated[[1]]), length(untreated[[1]]))
 
-    if (it_cnt < 30) {
-        return('drop')
-    }
+    # if (it_cnt < 30) {
+    #     return('drop')
+    # }
 
-    # dta@data[["match"]] <- -999
-    # dta@data[["PSM_distance"]] <- -999
-    dta@data[["PSM_match_ID"]] <- -999
+    # # dta@data[["match"]] <- -999
+    # # dta@data[["PSM_distance"]] <- -999
+    # dta@data[["PSM_match_ID"]] <- -999
 
-    # print("nn2")
+    # # print("nn2")
 
-    #Calculate a distance decay function
-    #to perturb pairs based on their distances.  
-    for (j in 1:it_cnt) {
-        # time_list <- c()
+    # #Calculate a distance decay function
+    # #to perturb pairs based on their distances.  
+    # for (j in 1:it_cnt) {
+    #     # time_list <- c()
 
-        print(paste("nn cnt:",j))
-        # timer <- proc.time()
+    #     print(paste("nn cnt:",j))
+    #     # timer <- proc.time()
 
-        # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
-        # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
+    #     # treated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 1 & sorted_dta[['nn_matched']] == 0),]
+    #     # untreated <- sorted_dta[which(sorted_dta[[trtMntVar]] == 0 & sorted_dta[['nn_matched']] == 0),]
         
-        # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
+    #     # print(nrow(sorted_dta[which(sorted_dta[['nn_matched']] == 0),]))
 
-        # time_list[1] <- round((proc.time() - timer)[3],5)
-        # print("nn2.1")
-        # timer <- proc.time()
+    #     # time_list[1] <- round((proc.time() - timer)[3],5)
+    #     # print("nn2.1")
+    #     # timer <- proc.time()
 
-        #Run the KNN for all neighbors. 
-        # print(length(treated[[1]]))
-        # summary(treated[["PSM_trtProb"]])
-        # print(length(untreated[[1]]))
-        # summary(untreated[["PSM_trtProb"]])
+    #     #Run the KNN for all neighbors. 
+    #     # print(length(treated[[1]]))
+    #     # summary(treated[["PSM_trtProb"]])
+    #     # print(length(untreated[[1]]))
+    #     # summary(untreated[["PSM_trtProb"]])
 
-        k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
+    #     k <- get.knnx(treated[["PSM_trtProb"]], untreated[["PSM_trtProb"]], 1)
         
-        # time_list[2] <- round((proc.time() - timer)[3],5)
-        # print("nn2.2")
-        # timer <- proc.time()
+    #     # time_list[2] <- round((proc.time() - timer)[3],5)
+    #     # print("nn2.2")
+    #     # timer <- proc.time()
 
-        # #Perturb the values based on the distance decay function, if selected.
-        # if (!is.null(dist_PSM)) {
-        #     for (mC in 1:length(k[[1]])) {
+    #     # #Perturb the values based on the distance decay function, if selected.
+    #     # if (!is.null(dist_PSM)) {
+    #     #     for (mC in 1:length(k[[1]])) {
 
-        #         print("nn2.2.0")
+    #     #         print("nn2.2.0")
 
-        #         #Calculate the Euclidean Distance between pairs
-        #         Control_ID = toString(untreated[[ids]][[mC]])
+    #     #         #Calculate the Euclidean Distance between pairs
+    #     #         Control_ID = toString(untreated[[ids]][[mC]])
 
-        #         mT = k[["nn.index"]][mC]
+    #     #         mT = k[["nn.index"]][mC]
                 
-        #         Treatment_ID = toString(treated[[ids]][[mT]])
+    #     #         Treatment_ID = toString(treated[[ids]][[mT]])
 
-        #         #Find the control x,y location
-        #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
+    #     #         #Find the control x,y location
+    #     #         cCoord = coordinates(dta[which(dta@data[[ids]] == Control_ID),])
                 
 
-        #         #Find the treatment x,y location
-        #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
+    #     #         #Find the treatment x,y location
+    #     #         tCoord = coordinates(dta[which(dta@data[[ids]] == Treatment_ID),])
 
-        #         y_dist = abs(cCoord[1] - cCoord[2])
-        #         x_dist = abs(tCoord[1] - tCoord[2])
-        #         euc_dist = sqrt(y_dist^2 + x_dist^2)
+    #     #         y_dist = abs(cCoord[1] - cCoord[2])
+    #     #         x_dist = abs(tCoord[1] - tCoord[2])
+    #     #         euc_dist = sqrt(y_dist^2 + x_dist^2)
                 
-        #         print("nn2.2.1")
+    #     #         print("nn2.2.1")
 
-        #         PSM_score = k[["nn.dist"]][mC]
-        #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
+    #     #         PSM_score = k[["nn.dist"]][mC]
+    #     #         geog_Weight = pairDistWeight(dist=euc_dist,threshold=dist_PSM,type="Spherical")
 
-        #         print("nn2.2.2")
+    #     #         print("nn2.2.2")
 
                 
-        #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
+    #     #         k[["nn.dist"]][mC] <- ((1-geog_Weight) * PSM_score)
 
-        #     }
+    #     #     }
       
-        # }
+    #     # }
 
-        # time_list[3] <- round((proc.time() - timer)[3],5)
-        print("nn2.3")
-        # timer <- proc.time()
+    #     # time_list[3] <- round((proc.time() - timer)[3],5)
+    #     print("nn2.3")
+    #     # timer <- proc.time()
 
-        #Add the matched treatment and control values to the recording data frame
-        #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
-        best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
+    #     #Add the matched treatment and control values to the recording data frame
+    #     #best_m_control is the row in the "distance" matrix with the lowest value.  This is the same row as in the index.
+    #     best_m_control = which(k[["nn.dist"]] %in% sort(k[["nn.dist"]])[1])
         
-        #This will give us the matched index in the "treated" dataset.
-        best_m_treated = k[["nn.index"]][best_m_control]
+    #     #This will give us the matched index in the "treated" dataset.
+    #     best_m_treated = k[["nn.index"]][best_m_control]
         
 
-        # #Control PSM ID
-        cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
-        Control_ID = toString(eval(parse(text=cid_txt)))
+    #     # #Control PSM ID
+    #     cid_txt = paste("untreated$",ids,"[",best_m_control,"]",sep="")
+    #     Control_ID = toString(eval(parse(text=cid_txt)))
             
-        # #Treatment PSM ID
-        tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
-        Treatment_ID = toString(eval(parse(text=tid_txt)))
+    #     # #Treatment PSM ID
+    #     tid_txt = paste("treated$",ids,"[",best_m_treated,"]",sep="")
+    #     Treatment_ID = toString(eval(parse(text=tid_txt)))
 
 
 
-        #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
-        # pair_id = paste(curgrp,j, sep="")
-        pair_id <- paste(Treatment_ID,Control_ID, sep='__')
-        print(pair_id)
+    #     #Create a unique pair ID for each group (will simply append a "1" if only 1 group)
+    #     # pair_id = paste(curgrp,j, sep="")
+    #     pair_id <- paste(Treatment_ID,Control_ID, sep='__')
+    #     print(pair_id)
 
-        # time_list[4] <- round((proc.time() - timer)[3],5)
-        print("nn2.4x")
-        # timer <- proc.time()
+    #     # time_list[4] <- round((proc.time() - timer)[3],5)
+    #     print("nn2.4x")
+    #     # timer <- proc.time()
 
-        #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
-        # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
-        # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
+    #     #Add the Treatment ID to the Control Row and Add the Control ID to the Treatment Row
+    #     # dta@data$match[which(dta@data[[ids]] == Control_ID)] <- Treatment_ID
+    #     # dta@data$match[which(dta@data[[ids]] == Treatment_ID)] <- Control_ID
 
 
-        # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
-        dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
+    #     # dta@data$PSM_distance[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- k[["nn.dist"]][,1][best_m_control]
+    #     dta@data$PSM_match_ID[which(dta@data[[ids]] == Control_ID | dta@data[[ids]] == Treatment_ID)] <- pair_id        
 
 
         
-        # time_list[5] <- round((proc.time() - timer)[3],5)
-        # timer <- proc.time()
+    #     # time_list[5] <- round((proc.time() - timer)[3],5)
+    #     # timer <- proc.time()
 
-        #Drop the paired match out of the iteration matrix 
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
-        # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
+    #     #Drop the paired match out of the iteration matrix 
+    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Treatment_ID ,]
+    #     # sorted_dta <- sorted_dta[sorted_dta[[ids]] != Control_ID ,]    
     
-        # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
+    #     # sorted_dta[which(sorted_dta[[ids]] == Control_ID | sorted_dta[[ids]] == Treatment_ID),][['nn_matched']] <- 1
 
-        treated <- treated[which(treated[[ids]] != Treatment_ID),]
-        untreated <- untreated[which(untreated[[ids]] != Control_ID),]
+    #     treated <- treated[which(treated[[ids]] != Treatment_ID),]
+    #     untreated <- untreated[which(untreated[[ids]] != Control_ID),]
         
-        # time_list[6] <- round((proc.time() - timer)[3],5)
-        # print(paste(time_list))
+    #     # time_list[6] <- round((proc.time() - timer)[3],5)
+    #     # print(paste(time_list))
 
-    }
+    # }
 
 
     return(dta) 

From b9edba9e6bda334a69a51bc0e54160af82209d0c Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 6 May 2016 13:23:14 -0400
Subject: [PATCH 211/212] Fix type.

---
 R/SAT.R | 84 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/R/SAT.R b/R/SAT.R
index 8f9812b..7b2e751 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -5,16 +5,16 @@
 #     #Initialization
 #     pltObjs <- list()
 #     init_dta <- dta
-  
+
 #     drop_unmatched = drop_opts["drop_unmatched"]
 #     drop_method = drop_opts["drop_method"]
 #     drop_thresh = as.numeric(drop_opts["drop_thresh"])
-  
-    
+
+
 
 #     print("sat1")
 
-#     if (!is.null(constraints) && contraints != c()) {
+#     if (!is.null(constraints) && constraints != c()) {
 #         for (cst in 1:length(names(constraints))) {
 #             if (names(constraints)[cst] == "groups") {
 #                 dta[,"ConstraintGroupSet_Opt"] <- dta[,get(constraints["groups"])]
@@ -29,7 +29,7 @@
 #                 dist_PSM=NULL
 #             }
 #         }
-        
+
 #     } else {
 #         dta[,"ConstraintGroupSet_Opt"] <- 1
 #         #max the distance threshold by taking the diagonal of the bounding box.
@@ -42,7 +42,7 @@
 
 #     #Caclulate the number of groups to constrain by, if any.
 #     group_constraints <- unique(dta[,'ConstraintGroupSet_Opt']
-  
+
 #     #Make sure there are both treatment and control groups of an adequate size (>= 1 of each)
 #     t_dta <- list()
 #     u_dta <-list()
@@ -69,7 +69,7 @@
 #             war_statement = paste("Dropped group due to a lack of both treatment and control observation: '",cur_grp,"'",sep="")
 #             warning(war_statement)
 
-#         } else { 
+#         } else {
 #             t_dta[[t_index]] <- t_dta[[t_index]][ConstraintGroupSet_Opt == (cur_grp)]
 #             u_dta[[t_index]] <- u_dta[[t_index]][ConstraintGroupSet_Opt == (cur_grp)]
 
@@ -97,12 +97,12 @@
 #         if (mtd == "fastNN") {
 #             # ***
 #             # this is the slow part of functions
-#             temp_dta[[i]] <- fastNN_binary_func(it_dta, TrtBinColName, ids, cur_grp, dist_PSM) 
+#             temp_dta[[i]] <- fastNN_binary_func(it_dta, TrtBinColName, ids, cur_grp, dist_PSM)
 #         }
 
 #         if (mtd == "NN_WithReplacement") {
 #             print("NN with replacement is currently not available, please choose fastNN")
-#             # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+#             # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM)
 #         }
 #     }
 
@@ -113,7 +113,7 @@
 #         dta <- temp_dta[[1]]
 #         for(k in 2:cnt) {
 #             dta  <- maptools::spRbind(dta, temp_dta[[k]])
-#         } 
+#         }
 #     } else {
 #         dta <- temp_dta[[1]]
 #     }
@@ -121,15 +121,15 @@
 #     print("sat5")
 
 #     if (drop_unmatched == TRUE) {
-#         dta <- dta["PSM_match_ID" != -999]    
+#         dta <- dta["PSM_match_ID" != -999]
 #     }
-  
+
 #     anc_v_int <- strsplit(psm_eq, "~")[[1]][2]
 #     anc_vars <- strsplit(gsub(" ","",anc_v_int), "+", fixed=TRUE)
 #     anc_vars <- c(anc_vars[[1]], "PSM_trtProb")
-    
+
 #     print("sat6")
-  
+
 #     #Drop observations according to the selected method
 #     if (drop_method == "SD") {
 #         #Method to drop pairs that are greater than a set threshold apart in terms of PSM Standard Deviations.
@@ -139,7 +139,7 @@
 #         }
 #         dta <- dta["PSM_distance" < psm_sd_thresh]
 #     }
-  
+
 
 
 #     #Plot the pre and post-dropping balance for PSM model...
@@ -149,7 +149,7 @@
 #     cnt = 0
 
 #     print("sat7")
-    
+
 #     for (i in 1:length(anc_vars)) {
 
 #         print("sat7.0")
@@ -174,7 +174,7 @@
 #         if ((c_type == "numeric") & (visual == "TRUE")) {
 #             cnt = cnt + 1
 #             pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(init_dta, anc_vars[i],"Pre-Balancing: ",simple_out = FALSE)
-#             pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(dta, anc_vars[i],"Post-Balancing: ",simple_out = FALSE)  
+#             pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(dta, anc_vars[i],"Post-Balancing: ",simple_out = FALSE)
 
 
 #             treat_mean_pre <- round(describeBy(init_dta[[ed_v]], group=init_dta[[TrtBinColName]])[[2]][[3]], 5)
@@ -215,13 +215,13 @@
 #                               treat_mean_post,treat_SD_post,control_mean_post,control_SD_post,
 #                               it_diff_Mean_pre,it_diff_Mean_post))
 #             }
-      
+
 #             rownames(bRes)[i-(i-cnt)] <- gsub("[^a-zA-Z0-9]", "", ed_v)
 #         }
 #     }
 
 #     print("sat8")
-  
+
 #     if (visual=="TRUE") {
 #         #Output graphics
 #         #Remove the factor rows
@@ -238,7 +238,7 @@
 #         #bTab <- stargazer(bRes,summary=FALSE,type="html")
 #         #print.htmlTable(bTab)
 #     }
-  
+
 
 #     return (as.data.frame(dta))
 # }
@@ -251,16 +251,16 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     #Initialization
     pltObjs <- list()
     init_dta <- dta
-  
+
     drop_unmatched = drop_opts["drop_unmatched"]
     drop_method = drop_opts["drop_method"]
     drop_thresh = as.numeric(drop_opts["drop_thresh"])
-  
-    
+
+
 
     print("sat1")
 
-    if (!is.null(constraints) && contraints != c()) {
+    if (!is.null(constraints) && constraints != c()) {
 
         print("sat1a.1")
 
@@ -284,7 +284,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
         # Caclulate the number of groups to constrain by, if any.
         group_constraints <- unique(dta$ConstraintGroupSet_Opt)
-      
+
         # Make sure there are both treatment and control groups of an adequate size (>= 1 of each)
         t_dta <- list()
         u_dta <-list()
@@ -311,7 +311,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
                 war_statement = paste("Dropped group due to a lack of both treatment and control observation: '",cur_grp,"'",sep="")
                 warning(war_statement)
 
-            } else { 
+            } else {
                 t_dta[[t_index]] <- t_dta[[t_index]][t_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
                 u_dta[[t_index]] <- u_dta[[t_index]][u_dta[[t_index]]$ConstraintGroupSet_Opt == cur_grp,]
 
@@ -338,12 +338,12 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             if (mtd == "fastNN") {
                 # ***
                 # this is the slow part of functions
-                temp_dta[[i]] <- fastNN_binary_func(it_dta, TrtBinColName, ids, cur_grp, dist_PSM) 
+                temp_dta[[i]] <- fastNN_binary_func(it_dta, TrtBinColName, ids, cur_grp, dist_PSM)
             }
 
             # if (mtd == "NN_WithReplacement") {
             #     print("NN with replacement is currently not available, please choose fastNN")
-            #     # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+            #     # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM)
             # }
         }
 
@@ -354,12 +354,12 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
             dta <- temp_dta[[1]]
             for(k in 2:cnt) {
                 dta  <- maptools::spRbind(dta, temp_dta[[k]])
-            } 
+            }
         } else {
             dta <- temp_dta[[1]]
         }
 
-        
+
     } else {
 
         print("sat1b.1")
@@ -370,7 +370,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         if (mtd == "fastNN") {
             # ***
             # this is the slow part of functions
-            dta <- fastNN_binary_func(dta, TrtBinColName, ids, NULL, NULL) 
+            dta <- fastNN_binary_func(dta, TrtBinColName, ids, NULL, NULL)
 
             if (class(dta) == class('drop')) {
                 return('drop')
@@ -380,7 +380,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
         # if (mtd == "NN_WithReplacement") {
         #     print("NN with replacement is currently not available, please choose fastNN")
-        #     # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM) 
+        #     # temp_dta[[i]] <- NN_WithReplacement_binary_func(it_dta,TrtBinColName,ids,cur_grp,dist_PSM)
         # }
 
 
@@ -392,15 +392,15 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     print("sat5")
 
     if (drop_unmatched == TRUE) {
-        dta <- dta[dta@data[,"PSM_match_ID"] != -999,]    
+        dta <- dta[dta@data[,"PSM_match_ID"] != -999,]
     }
-  
+
     anc_v_int <- strsplit(psm_eq, "~")[[1]][2]
     anc_vars <- strsplit(gsub(" ","",anc_v_int), "+", fixed=TRUE)
     anc_vars <- c(anc_vars[[1]], "PSM_trtProb")
-    
+
     print("sat6")
-  
+
     #Drop observations according to the selected method
     if (drop_method == "SD") {
         #Method to drop pairs that are greater than a set threshold apart in terms of PSM Standard Deviations.
@@ -410,7 +410,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         }
         dta <- dta[dta@data[,"PSM_distance"] < psm_sd_thresh,]
     }
-  
+
 
 
     #Plot the pre and post-dropping balance for PSM model...
@@ -420,7 +420,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
     cnt = 0
 
     print("sat7")
-    
+
     for (i in 1:length(anc_vars)) {
 
         print("sat7.0")
@@ -445,7 +445,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         if ((c_type == "numeric") & (visual == "TRUE")) {
             cnt = cnt + 1
             pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(init_dta, anc_vars[i],"Pre-Balancing: ",simple_out = FALSE)
-            pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(dta, anc_vars[i],"Post-Balancing: ",simple_out = FALSE)  
+            pltObjs[[length(pltObjs) + 1]] <- GroupCompHist(dta, anc_vars[i],"Post-Balancing: ",simple_out = FALSE)
 
 
             treat_mean_pre <- round(describeBy(init_dta@data[[ed_v]], group=init_dta@data[[TrtBinColName]])[[2]][[3]], 5)
@@ -486,13 +486,13 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
                               treat_mean_post,treat_SD_post,control_mean_post,control_SD_post,
                               it_diff_Mean_pre,it_diff_Mean_post))
             }
-      
+
             rownames(bRes)[i-(i-cnt)] <- gsub("[^a-zA-Z0-9]", "", ed_v)
         }
     }
 
     print("sat8")
-  
+
     if (visual=="TRUE") {
         #Output graphics
         #Remove the factor rows
@@ -509,7 +509,7 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
         #bTab <- stargazer(bRes,summary=FALSE,type="html")
         #print.htmlTable(bTab)
     }
-  
+
 
     return (dta)
 }

From 530b6dd5119d0882614d8ace0f2b536a3d6298fb Mon Sep 17 00:00:00 2001
From: userz <sgoodman@aiddata.org>
Date: Fri, 6 May 2016 13:27:29 -0400
Subject: [PATCH 212/212] Fix maybe.

---
 R/SAT.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/SAT.R b/R/SAT.R
index 7b2e751..a3e25a6 100644
--- a/R/SAT.R
+++ b/R/SAT.R
@@ -260,7 +260,8 @@ SAT <- function (dta, mtd, constraints, psm_eq, ids, drop_opts, visual, TrtBinCo
 
     print("sat1")
 
-    if (!is.null(constraints) && constraints != c()) {
+    # if (!is.null(constraints) && constraints != c()) {
+    if (!is.null(constraints) ) {
 
         print("sat1a.1")