initial r code

a748d7ed · Streams, James (CPT) · a748d7ed · a748d7ed · a748d7ed · a748d7ed
Commit a748d7ed authored 6 years ago by Streams, James (CPT)
--- a/ADOE.R
+++ b/ADOE.R
+##########################################################################################
+##################                ADOE Code                     ##########################
+##########################################################################################
+
+# Purpose: Aid analyst in analysis by allowing the program to determine the best range
+# the factor inputs should be to obtain an objective.
+
+##########################################################################################
+# Packages used:
+
+library(XML)        # Tools for parsing and generating XML within R
+library(stringr)    # Common string operations
+library(dplyr)      # Data manipulation
+library(DiceDesign) # Generates a DOEs
+library(tidyr)      # Easily tidy data with the spread() or gather() function
+library(data.table) # Extension of data frame
+library(bestglm)    # Best subset GLM and regression utilities
+library(gtools)     # Order string experssions; mixedsort()
+library(parallel)   # Allows for parallel computing
+
+# Set initial working directory
+setwd("C:\\Users\\James\\Desktop\\LBCv5\\ADOE\\")
+
+### Load DoE Generation Functions
+# Functions here include loading base case XML, creating experimental design, and generating XML scenarios for exection
+source('DOEGen.R')  # Loads DoE Generator Function (newDOE) based on 'DiceDesign' NOLH function.  
+source('scenCreator.R') # Loads function to take design and base XML (makeXMLs) to create required number of XMLs to run 
+source('gatherxmls.R') # Creates a list of XMLs to process through the simulation
+source('runSim.R') # Loads function that runs DoE in parallel.
+source('remove.R') # Remove scenarios files to clear room on hard drive
+source('response.R') # Gets the response variable from the data
+source('modeling.R') # generates the model
+source('adaptDOEGen.R') # DOE generator for the adaptive portion of the program
+source('newadptFolder.R') # Creates new folder for storage of each iterations data
+
+# Location of LBC or Simulation to use
+lbcFolder <- "C:\\Users\\James\\Desktop\\LBCv5\\"
+
+# xml files location
+setwd("C:\\Users\\James\\Desktop\\LBCv5\\data\\excel\\")
+
+# The xml file to parse for analysis
+xmlfile = "SCEN7-Phase-3-5NOV2018-90.v2.xml"
+
+# identify the directory where the files are to be located
+Dir = "E:\\files\\"
+Dir2 = "E:\\files\\"
+
+# the number of replications per design point
+numReps = 5     
+
+# Sets the initial range of value for the first DOE
+p = 0.15 
+
+# Sets the range of min amd max values for follow on DOEs
+p2 = 0.10
+
+# Starting value for iterations, should remain at 1 unless starting from a different iteration
+k = 1                           
+
+# Stopping condition for adaptive process
+z = 30               
+
+##########################################################################################
+##################       Search Parameters in XML files         ##########################
+##########################################################################################
+pstart = Sys.time() # starts the clock to time how long the program takes to complete
+
+# Breaks the xmlFile into sections and saves a temp version for data extraction
+xmldata <- xmlTreeParse(xmlfile, useInternal = TRUE)
+
+# Locates the a number of replications value
+numReplications = getNodeSet(xmldata, "//LBCAssembly[@numReplications]") 
+
+# find all ThresholdReorderLogic Nodes
+ReorderNodes=getNodeSet(xmldata, "//ThresholdReorderLogic")
+
+# Locates all the reorder point data (this section can be adjusted to look for other 
+#                         areas of interest such as consumption rate or truck capacities)
+ReorderValueNode = getNodeSet(xmldata, "//ThresholdReorderLogic//*[@value]")
+numNodes = length(ReorderValueNode)
+
+# Create lists to collect information from xmlFile
+numNodesName=character()
+nodeNum = integer()
+reps = character()
+name = character()
+Company = character()
+UnitId = character()
+Consumable =character()
+value = character()
+
+# this loop will return all required information from the parent nodes in reference 
+# to the attribute value name
+for (i in 1:numNodes){
+  parent = xmlParent(ReorderValueNode[[i]])
+  child = xmlChildren(ReorderNodes[[1]])[i]
+  grandParent = xmlParent(xmlParent(ReorderValueNode[[i]]))
+  nodeNum[i] = i
+  name[i] = xmlAttrs(parent) #gets the methodName of the parent node in reference to the attribute value name
+  reps[i] = xmlAttrs(numReplications[[1]])['numReplications']
+  
+  # use the name to find the Company Name
+  providerNode = getNodeSet(grandParent, ".//*[@refId]")
+  
+  # extract the consumable
+  Consumable[i]= xmlAttrs(providerNode[[1]])['refId']
+  
+  # extract the Company Name
+  UnitId[i]= xmlAttrs(providerNode[[2]])['refId']
+  
+  # extract the value
+  valueNode = getNodeSet(parent, ".//*[@value]")
+  value[i]= xmlAttrs(valueNode[[1]])['value']
+  
+}
+# Data frame of collected nodes from entire xmlFile
+ReorderDf = data.frame(nodeNum, reps, name, UnitId, Consumable, value)
+pnd = Sys.time()
+pnd - pstart
+##########################################################################################
+##################      Locating Factors of Interest            ##########################
+##########################################################################################
+# Choose the units or factors of interest through the use of dplyr filters
+
+# This can be changed to look for any unit or factor based on the information available 
+# from the xml data collection
+
+Div4 <- ReorderDf %>% select(nodeNum, reps, name, UnitId, Consumable, value) %>% 
+  filter(str_detect(UnitId, "DIV-4")) %>% filter(!str_detect(UnitId, paste(c("BSB", "FSC", "DISTRO", "ALLIED", "SUST", "EN-BDE", 
+                                                                             "HHC", "HHB", "HQ", "BEB","MEB","MI-BN"), collapse = "|"))) %>% filter(!str_detect(Consumable, paste(c("CARGO", "WATER"),collapse = "|")))
+
+# Enusre that MASS is not running at the same time as dplyr or else it will mask the select function in dplyr
+
+mainFactors <- Div4 %>% filter(str_detect(Consumable, "AMMO"))          # replace with your choosen data set
+mainFactors$value <- as.numeric(levels(mainFactors$value))[mainFactors$value] # values to vary to create a range
+# Code found at
+# https://stackoverflow.com/questions/3418128/how-to-convert-a-factor-to-integer-numeric-without-loss-of-information
+
+# Now set the range of min and max values
+
+# add those values to the data frame
+mainFactors['min'] <- (mainFactors$value - mainFactors$value*p)
+mainFactors['max'] <- (mainFactors$value*(1+p))
+
+# From DOEGen.R 
+ammoDOE <- newDOE(mainFactors)
+
+# Creates folder for initial storage of data, it is based on the Dir set at the beginning
+dir.create(Dir)
+
+writeScenarios(ammoDOE)
+files <- getXMLs(Dir)
+
+##########################################################################################
+# Runs each scenario through LBC, this takes awhile since it has to shut down and restart LBC each time
+setwd(lbcFolder)
+
+runEx(files, Dir)
+removeFiles(files)
+
+##########################################################################################
+################   Gather the information for the response varialbe   ####################
+##########################################################################################
+
+# Finds all the csv files and then puts the names in a list
+csvL <- list.files(path = Dir, pattern = '*.csv')
+
+# Creates an empty data frame for information collection, 
+# this can be modified based on what desired information the user is looking for
+results <- data.frame(matrix(ncol = 6, nrow = length(csvL)))
+colnames(results) <- c('All','Total', 'All%', 'Div 4', 'Total','Div 4 %')
+
+test <- getResponse(csvs = csvL, DOE = ammoDOE)
+
+##########################################################################################
+
+# need to include code that saves each linear model and test data to the folder holding the data
+par(mfrow = c(2,2))
+test.mod <- getModel(test)
+
+##########################################################################################
+################   Adaptive Portion, as long as it meets conditions   ####################
+##########################################################################################
+while (test.mod[1] > z){
+  if(k == 1){
+    print(paste('Now executing iteration',k))
+    
+    DOE <- aDOE(ammoDOE, test.mod[2], mainFactors)
+    
+    drive <- newFolder('E:\\', k)
+    
+    Dir = paste0(drive[1],'\\')
+    Dir2 = paste0(drive[2],'\\')
+    
+    setwd(lbcFolder) 
+    
+    writeScenarios(DOE)
+    
+    files <- getXMLs(Dir)
+    
+    runEx(files, Dir)
+    
+    removeFiles(files)
+    
+    # Finds all the csv files and then puts the names in a list
+    csvL <- list.files(path = Dir, pattern = '*.csv')
+    
+    # Creates an empty data frame for information collection, 
+    # this can be modified based on what desired information the user is looking for
+    results <- data.frame(matrix(ncol = 6, nrow = length(csvL)))
+    colnames(results) <- c('All','Total', 'All%', 'Div 4', 'Total','Div 4 %')
+    
+    newTest <- getResponse(csvs = csvL, DOE = DOE)
+    test <- rbind(test,newTest)
+    
+    curDOE <- test[,1:nrow(mainFactors)]
+    curDOE <- as.data.frame(t(curDOE))
+    
+    write.table(test, file = paste0(Dir,'test.csv'), sep = ',')
+    
+    par(mfrow = c(2,2))
+    test.mod <- getModel(test)
+    
+    k = k + 1
+    
+  } else {
+    
+    print(paste('Now executing iteration', k))
+    DOE <- aDOE(curDOE, test.mod[2], mainFactors)
+    drive <- newFolder('E:\\', k)
+    
+    Dir = paste0(drive[1],'\\')
+    Dir2 = paste0(drive[2],'\\')
+    
+    setwd(lbcFolder) 
+    
+    writeScenarios(DOE)
+    
+    files <- getXMLs(Dir)
+    
+    runEx(files, Dir)
+    
+    removeFiles(files)
+    
+    csvL <- list.files(path = Dir, pattern = '*.csv')
+    
+    results <- data.frame(matrix(ncol = 6, nrow = length(csvL)))
+    colnames(results) <- c('All','Total', 'All%', 'Div 4', 'Total','Div 4 %')
+    
+    newTest <- getResponse(csvs = csvL, DOE = DOE)
+    test <- rbind(test,newTest)
+    
+    curDOE <- test[,1:nrow(mainFactors)]
+    curDOE <- as.data.frame(t(curDOE))
+    
+    write.table(test, file = paste0(Dir,'test',k,'.csv'), sep = ',')
+    
+    par(mfrow = c(2,2))
+    test.mod <- getModel(test)
+    
+    
+    k = k + 1
+  }
+}  
+
+if (k == 1){ 
+  # Find the final model
+  final.mod <- glm(y~(.)^2, data = test) 
+  # summary(final.mod)
+
+  favg <- mean(final.mod$residuals)
+  fsigma <- sd(final.mod$residuals)
+  
+  fub <- favg + 1.96*sqrt(fsigma/length(final.mod$residuals))
+  flb <- favg - 1.96*sqrt(fsigma/length(final.mod$residuals))
+  
+  resid <- sum(final.mod$residuals>fub) + sum(sum(final.mod$residuals<flb))
+  print(paste0('The number of residuals outside of the accepted range is ', resid, '.'))
+  print(paste('The program took', k, 'iterations for find the model.'))
+  
+  # plot the Residuals and QQ PLot
+  par(mfrow = c(2,1))
+  plot(final.mod, which = c(2,1)) # prints the residual vs fitted graph
+  abline(plot(final.mod, which = c(1,1)),h = fub, col = 'red', lty = 'dashed')
+  abline(h = flb, col = 'red', lty = 'dashed')
+  par(new = T)
+  # https://stackoverflow.com/questions/2564258/plot-two-graphs-in-same-plot-in-r
+  plot(final.mod, which = c(2,1))
+  
+  # Diplay the top 10 significant factors of the model
+  focus <- data.frame(names(final.mod$coefficients))
+  # https://stackoverflow.com/questions/16724148/getting-predictor-names-in-r-regression
+  focus$pvalue <- coef(summary(final.mod))[,4]
+  # https://stackoverflow.com/questions/23838937/extract-pvalue-from-glm
+  focus$coeff <- coef(summary(final.mod))[,1]
+  colnames(focus)[1] <- 'Factors'
+  impFac <- focus[focus$pvalue < 0.05,]
+  top10 <- impFac[order(abs(impFac$coeff), decreasing = TRUE),]
+  print(top10[1:10,])
+  
+  # End program
+  print('Program complete')
+  pend = Sys.time()
+  print(pend - pstart)
+  
+} else {
+  # Find the final model
+  final.mod <- glm(y~(.)^2, data = test) 
+  # summary(final.mod)
+  
+  favg <- mean(final.mod$residuals)
+  fsigma <- sd(final.mod$residuals)
+  
+  fub <- favg + 1.96*sqrt(fsigma/length(final.mod$residuals))
+  flb <- favg - 1.96*sqrt(fsigma/length(final.mod$residuals))
+  
+  resid <- sum(final.mod$residuals>fub) + sum(sum(final.mod$residuals<flb))
+  resid
+  print(paste0('The number of residuals outside of the accepted range is ', resid, '.'))
+  print(paste('The program took', k, 'iterations for find the model.'))
+  
+  # plot the Residuals and QQ PLot
+  par(mfrow = c(2,1))
+  plot(final.mod, which = c(2,1)) 
+  abline(plot(final.mod, which = c(1,1)),h = fub, col = 'red', lty = 'dashed')
+  abline(h = flb, col = 'red', lty = 'dashed')
+  par(new = T)
+  plot(final.mod, which = c(2,1))
+
+  # Diplay the top 10 significant factors of the model
+  focus <- data.frame(names(final.mod$coefficients))
+  focus$pvalue <- coef(summary(final.mod))[,4]
+  focus$coeff <- coef(summary(final.mod))[,1]
+  colnames(focus)[1] <- 'Factors'
+  impFac <- focus[focus$pvalue < 0.05,]
+  top10 <- impFac[order(abs(impFac$coeff), decreasing = TRUE),]
+  print(top10[1:10,])
+  
+  # End Program
+  print('Program Complete')
+  pend = sys.time()
+  print(pend - pstart)
+}
+
+
--- a/ADOEwithHPC.R
+++ b/ADOEwithHPC.R
+##########################################################################################
+##################                ADOE Code                     ##########################
+##########################################################################################
+
+# Purpose: Aid analyst in analysis by allowing the program to determine the best range
+# the factor inputs should be to obtain an objective.
+
+##########################################################################################
+# Packages used:
+
+library(XML)        # Tools for parsing and generating XML within R
+library(stringr)    # Common string operations
+library(dplyr)      # Data manipulation
+library(DiceDesign) # Generates a DOEs
+library(tidyr)      # Easily tidy data with the spread() or gather() function
+library(data.table) # Extension of data frame
+library(bestglm)    # Best subset GLM and regression utilities
+library(gtools)     # Order string experssions; mixedsort()
+library(parallel)   # Allows for parallel computing
+
+# Set initial working directory
+setwd("./LBCv5/ADOE/")
+
+### Load DoE Generation Functions
+# Functions here include loading base case XML, creating experimental design, and generating XML scenarios for exection
+source('DOEGen.R')  # Loads DoE Generator Function (newDOE) based on 'DiceDesign' NOLH function.  
+source('scenCreator.R') # Loads function to take design and base XML (makeXMLs) to create required number of XMLs to run 
+source('gatherxmls.R') # Creates a list of XMLs to process through the simulation
+source('runSim.R') # Loads function that runs DoE in parallel.
+source('remove.R') # Remove scenarios files to clear room on hard drive
+source('response.R') # Gets the response variable from the data
+source('modeling.R') # generates the model
+source('adaptDOEGen.R') # DOE generator for the adaptive portion of the program
+source('newadptFolder.R') # Creates new folder for storage of each iterations data
+
+# Location of LBC or Simulation to use
+lbcFolder <- "./LBCv5/"
+
+# xml files location
+setwd("./LBCv5/data/excel/")
+
+# The xml file to parse for analysis
+xmlfile = "SCEN7-Phase-3-5NOV2018-90.v2.xml"
+
+# identify the directory where the files are to be located
+Dir = "./data/files/"
+Dir2 = "./data/files/"
+
+# the number of replications per design point
+numReps = 5     
+
+# Sets the initial range of value for the first DOE
+p = 0.15 
+
+# Sets the range of min amd max values for follow on DOEs
+p2 = 0.10
+
+# Starting value for iterations, should remain at 1 unless starting from a different iteration
+k = 1                           
+
+# Stopping condition for adaptive process
+z = 20               
+
+##########################################################################################
+##################       Search Parameters in XML files         ##########################
+##########################################################################################
+pstart = Sys.time() # starts the clock to time how long the program takes to complete
+
+# Breaks the xmlFile into sections and saves a temp version for data extraction
+xmldata <- xmlTreeParse(xmlfile, useInternal = TRUE)
+
+# Locates the a number of replications value
+numReplications = getNodeSet(xmldata, "//LBCAssembly[@numReplications]") 
+
+# find all ThresholdReorderLogic Nodes
+ReorderNodes=getNodeSet(xmldata, "//ThresholdReorderLogic")
+
+# Locates all the reorder point data (this section can be adjusted to look for other 
+#                         areas of interest such as consumption rate or truck capacities)
+ReorderValueNode = getNodeSet(xmldata, "//ThresholdReorderLogic//*[@value]")
+numNodes = length(ReorderValueNode)
+
+# Create lists to collect information from xmlFile
+numNodesName=character()
+nodeNum = integer()
+reps = character()
+name = character()
+Company = character()
+UnitId = character()
+Consumable =character()
+value = character()
+
+# this loop will return all required information from the parent nodes in reference 
+# to the attribute value name
+for (i in 1:numNodes){
+  parent = xmlParent(ReorderValueNode[[i]])
+  child = xmlChildren(ReorderNodes[[1]])[i]
+  grandParent = xmlParent(xmlParent(ReorderValueNode[[i]]))
+  nodeNum[i] = i
+  name[i] = xmlAttrs(parent) #gets the methodName of the parent node in reference to the attribute value name
+  reps[i] = xmlAttrs(numReplications[[1]])['numReplications']
+  
+  # use the name to find the Company Name
+  providerNode = getNodeSet(grandParent, ".//*[@refId]")
+  
+  # extract the consumable
+  Consumable[i]= xmlAttrs(providerNode[[1]])['refId']
+  
+  # extract the Company Name
+  UnitId[i]= xmlAttrs(providerNode[[2]])['refId']
+  
+  # extract the value
+  valueNode = getNodeSet(parent, ".//*[@value]")
+  value[i]= xmlAttrs(valueNode[[1]])['value']
+  
+}
+# Data frame of collected nodes from entire xmlFile
+ReorderDf = data.frame(nodeNum, reps, name, UnitId, Consumable, value)
+
+##########################################################################################
+##################      Locating Factors of Interest            ##########################
+##########################################################################################
+# Choose the units or factors of interest through the use of dplyr filters
+
+# This can be changed to look for any unit or factor based on the information available 
+# from the xml data collection
+
+Div4 <- ReorderDf %>% select(nodeNum, reps, name, UnitId, Consumable, value) %>% 
+  filter(str_detect(UnitId, "DIV-4")) %>% filter(!str_detect(UnitId, paste(c("BSB", "FSC", "DISTRO", "ALLIED", "SUST", "EN-BDE", 
+                                                                             "HHC", "HHB", "HQ", "BEB","MEB","MI-BN"), collapse = "|"))) %>% filter(!str_detect(Consumable, paste(c("CARGO", "WATER"),collapse = "|")))
+
+# Enusre that MASS is not running at the same time as dplyr or else it will mask the select function in dplyr
+
+mainFactors <- Div4 %>% filter(str_detect(Consumable, "AMMO"))          # replace with your choosen data set
+mainFactors$value <- as.numeric(levels(mainFactors$value))[mainFactors$value] # values to vary to create a range
+# Code found at
+# https://stackoverflow.com/questions/3418128/how-to-convert-a-factor-to-integer-numeric-without-loss-of-information
+
+# Now set the range of min and max values
+
+# add those values to the data frame
+mainFactors['min'] <- (mainFactors$value - mainFactors$value*p)
+mainFactors['max'] <- (mainFactors$value*(1+p))
+
+# From DOEGen.R 
+ammoDOE <- newDOE(mainFactors)
+
+# Creates folder for initial storage of data, it is based on the Dir set at the beginning
+dir.create(Dir)
+
+writeScenarios(ammoDOE)
+files <- getXMLs(Dir)
+
+##########################################################################################
+# Runs each scenario through LBC, this takes awhile since it has to shut down and restart LBC each time
+setwd(lbcFolder)
+
+runEx(files, Dir)
+removeFiles(files)
+
+##########################################################################################
+################   Gather the information for the response varialbe   ####################
+##########################################################################################
+
+# Finds all the csv files and then puts the names in a list
+csvL <- list.files(path = Dir, pattern = '*.csv')
+
+# Creates an empty data frame for information collection, 
+# this can be modified based on what desired information the user is looking for
+results <- data.frame(matrix(ncol = 6, nrow = length(csvL)))
+colnames(results) <- c('All','Total', 'All%', 'Div 4', 'Total','Div 4 %')
+
+test <- getResponse(csvs = csvL, DOE = ammoDOE)
+
+##########################################################################################
+
+# need to include code that saves each linear model and test data to the folder holding the data
+par(mfrow = c(2,2))
+test.mod <- getModel(test)
+
+##########################################################################################
+################   Adaptive Portion, as long as it meets conditions   ####################
+##########################################################################################
+while (test.mod[1] > z){
+  if(k == 1){
+    print(paste('Now executing iteration',k))
+    
+    DOE <- aDOE(ammoDOE, test.mod[2], mainFactors)
+    
+    drive <- newFolder('E:\\', k)
+    
+    Dir = paste0(drive[1],'\\')
+    Dir2 = paste0(drive[2],'\\')
+    
+    setwd(lbcFolder) 
+    
+    writeScenarios(DOE)
+    
+    files <- getXMLs(Dir)
+    
+    runEx(files, Dir)
+    
+    removeFiles(files)
+    
+    # Finds all the csv files and then puts the names in a list
+    csvL <- list.files(path = Dir, pattern = '*.csv')
+    
+    # Creates an empty data frame for information collection, 
+    # this can be modified based on what desired information the user is looking for
+    results <- data.frame(matrix(ncol = 6, nrow = length(csvL)))
+    colnames(results) <- c('All','Total', 'All%', 'Div 4', 'Total','Div 4 %')
+    
+    newTest <- getResponse(csvs = csvL, DOE = DOE)
+    test <- rbind(test,newTest)
+    
+    curDOE <- test[,1:nrow(mainFactors)]
+    curDOE <- as.data.frame(t(curDOE))
+    
+    write.table(test, file = paste0(Dir,'test.csv'), sep = ',')
+    
+    par(mfrow = c(2,2))
+    test.mod <- getModel(test)
+    
+    k = k + 1
+    
+  } else {
+    
+    print(paste('Now executing iteration', k))
+    DOE <- aDOE(curDOE, test.mod[2], mainFactors)
+    drive <- newFolder('E:\\', k)
+    
+    Dir = paste0(drive[1],'\\')
+    Dir2 = paste0(drive[2],'\\')
+    
+    setwd(lbcFolder) 
+    
+    writeScenarios(DOE)
+    
+    files <- getXMLs(Dir)
+    
+    runEx(files, Dir)
+    
+    removeFiles(files)
+    
+    csvL <- list.files(path = Dir, pattern = '*.csv')
+    
+    results <- data.frame(matrix(ncol = 6, nrow = length(csvL)))
+    colnames(results) <- c('All','Total', 'All%', 'Div 4', 'Total','Div 4 %')
+    
+    newTest <- getResponse(csvs = csvL, DOE = DOE)
+    test <- rbind(test,newTest)
+    
+    curDOE <- test[,1:nrow(mainFactors)]
+    curDOE <- as.data.frame(t(curDOE))
+    
+    write.table(test, file = paste0(Dir,'test',k,'.csv'), sep = ',')
+    
+    par(mfrow = c(2,2))
+    test.mod <- getModel(test)
+    
+    
+    k = k + 1
+  }
+}  
+
+if (k == 1){ 
+  final.mod <- glm(y~(.)^2, data = test) 
+  summary(final.mod)
+
+  favg <- mean(final.mod$residuals)
+  fsigma <- sd(final.mod$residuals)
+  
+  fub <- favg + 1.96*sqrt(fsigma/length(final.mod$residuals))
+  flb <- favg - 1.96*sqrt(fsigma/length(final.mod$residuals))
+  
+  resid <- sum(final.mod$residuals>fub) + sum(sum(final.mod$residuals<flb))
+  print(paste0('The number of residuals outside of the accepted range is ', resid, '.'))
+  print(paste('The program took', k, 'iterations for find the model.'))
+  
+  par(mfrow = c(2,1))
+  plot(final.mod, which = c(2,1)) # prints the residual vs fitted graph
+  
+  abline(plot(final.mod, which = c(1,1)),h = fub, col = 'red', lty = 'dashed')
+  abline(h = flb, col = 'red', lty = 'dashed')
+  par(new = T)
+  # https://stackoverflow.com/questions/2564258/plot-two-graphs-in-same-plot-in-r
+  plot(final.mod, which = c(2,1))
+  print('Program complete')
+  pend = Sys.time()
+  print(pend - pstart)
+  
+} else {
+  final.mod <- glm(y~(.)^2, data = test) 
+  summary(final.mod)
+  
+  favg <- mean(final.mod$residuals)
+  fsigma <- sd(final.mod$residuals)
+  
+  fub <- favg + 1.96*sqrt(fsigma/length(final.mod$residuals))
+  flb <- favg - 1.96*sqrt(fsigma/length(final.mod$residuals))
+  
+  resid <- sum(final.mod$residuals>fub) + sum(sum(final.mod$residuals<flb))
+  print(paste0('The number of residuals outside of the accepted range is ', resid, '.'))
+  print(paste('The program took', k, 'iterations for find the model.'))
+  par(mfrow = c(2,1))
+  plot(final.mod, which = c(2,1)) # prints the residual vs fitted graph
+  
+  abline(plot(final.mod, which = c(1,1)),h = fub, col = 'red', lty = 'dashed')
+  abline(h = flb, col = 'red', lty = 'dashed')
+  par(new = T)
+  plot(final.mod, which = c(2,1))
+  print('Program Complete')
+  pend = sys.time()
+  print(pend - pstart)
+}
+
+pend = Sys.time()
+(total = pend - pstart)
--- a/DOEGen.R
+++ b/DOEGen.R
+##########################################################################################
+##################       Function to create new set of DOE     ##########################
+##########################################################################################
+
+newDOE <- function(df){
+  # Create a list of list of percentages to use and output a set of design points for the model
+  
+  set.seed(1234)    # sent seed for the NOLH creation
+  blank <- nolhDesign(nrow(df))
+  # Code found at
+  # https://rdrr.io/cran/DiceDesign/man/nolhDesign.html
+  
+  # Create a data frame of ranges for each factor
+  ammo <- data.frame(matrix(ncol = blank$n, nrow = nrow(df)))
+  for (i in 1:nrow(df)){
+    ammo[i,] <- df$max[i] - df$min[i]
+  }
+  
+  # transpose NOLH percentages for matrix math operations
+  blank2 <- as.data.frame(t(blank$design))
+  
+  # create a data frame of ranges * NOLH percentages values to get value to add to min of each range of each factor 
+  add2min <- as.data.frame(ammo * blank2)
+  
+  # create a data frame of all the min value ranges for each factor
+  ammoMin <- data.frame(matrix(ncol = blank$n, nrow = nrow(df)))
+  for (i in 1:nrow(df)){
+    ammoMin[i,] <- df$min[i]
+  }
+  
+  # create the DOE
+  ammoDOE <- as.data.frame(as.matrix(ammoMin) + as.matrix(add2min))
+  
+  # Change the Row names of the DOEs
+  designPoints <- NULL
+  for (i in  1:blank$n){
+    designPoints[i] <- paste("DP",i, sep = " ")  
+  }
+  colnames(ammoDOE) <- designPoints
+  row.names(ammoDOE) <- df$UnitId
+  
+  return(ammoDOE)
+}
--- a/adaptDOEGen.R
+++ b/adaptDOEGen.R
+aDOE <- function(DOE, center, factorDOE){
+  newVal <- data.frame(DOE[,test.mod[2]])
+  colnames(newVal) <- 'value'
+  newVal
+  
+  p2 = 0.10
+  newVal['min'] <- (newVal$value - newVal$value*p2)
+  newVal['max'] <- (newVal$value*(1+p2))
+  
+  blank <- nolhDesign(nrow(newVal))
+  
+  ammo2 <- data.frame(matrix(ncol = blank$n, nrow = nrow(newVal)))
+  for (i in 1:nrow(newVal)){
+    ammo2[i,] <- newVal$max[i] - newVal$min[i]
+  }
+  
+  blank2 <- as.data.frame(t(blank$design))
+  add2min <- as.data.frame(ammo2 * blank2)
+  ammoMin2 <- data.frame(matrix(ncol = blank$n, nrow = nrow(newVal)))
+  for (i in 1:nrow(newVal)){
+    ammoMin2[i,] <- newVal$min[i]
+  }
+  ammoDOE2 <- as.data.frame(as.matrix(ammoMin2) + as.matrix(add2min))
+  designPoints3 <- NULL
+  for (i in  1:blank$n){
+    designPoints3[i] <- paste("DP",i+(257*k), sep = " ")  
+  }
+  colnames(ammoDOE2) <- designPoints3
+  row.names(ammoDOE2) <- factorDOE$UnitId
+  
+  return(ammoDOE2)
+}
\ No newline at end of file
--- a/gatherxmls.R
+++ b/gatherxmls.R
+getXMLs <- function(path){
+  print('Locating and sorting .xml files...')
+  xmlL <- list.files(path = path, pattern = '*.xml')
+  xmlL <- mixedsort(xmlL) # function from gtools package
+  print(paste('List created, found', length(xmlL), 'files in directory.'))
+  return(xmlL)
+}
\ No newline at end of file
--- a/modeling.R
+++ b/modeling.R
+##########################################################################################
+################   Gather the information for the response varialbe   ####################
+##########################################################################################
+
+getModel <- function(data){
+  print('Conducting regression analysis of the data...')
+  # This will find the resdiuals that fall within a certain range and return the number of 
+  # residuals that are outside of the range and DP with the highest residual
+  glm.mod <- glm(y~(.)^2, data = data) 
+  print('Model Complete..')
+  setwd(Dir)
+  print('Saving model to folder...')
+  save(glm.mod, file = 'glmmod.RData')
+  # https://stackoverflow.com/questions/14761496/saving-and-loading-a-model-in-r
+  print('Saving data frame to folder..')
+  write.table(data, file = 'data.csv', sep = ',')
+  # https://datascienceplus.com/exporting-data-from-r/
+  
+  avg <- mean(glm.mod$residuals)
+  sigma <- sd(glm.mod$residuals)
+  
+  ub <- avg + 1.96*sqrt(sigma/length(glm.mod$residuals))
+  lb <- avg - 1.96*sqrt(sigma/length(glm.mod$residuals))
+  
+  resid <- sum(glm.mod$residuals>ub) + sum(sum(glm.mod$residuals<lb))
+  
+  newCen <- which.max(abs(glm.mod$residuals))
+  print('Function Complete...')
+  return(c(resid,newCen, plot(glm.mod)))
+}
\ No newline at end of file
--- a/newadptFolder.R
+++ b/newadptFolder.R
+newFolder <- function(path, k){
+  dir.create(paste0(path,'files',k ))   # Create folder for storage
+  
+  Dir = paste0(path,'files',k)
+  Dir2 = paste0(path,'files',k)
+  
+  return(c(Dir, Dir2))
+}
\ No newline at end of file
--- a/remove.R
+++ b/remove.R
+# This removes all the .xml files to free up space on the computer
+removeFiles<- function(xmls){
+  for (i in 1:length(xmls)){
+    fileLocation= paste0(Dir,"design_point_", i ,".xml")  
+    file.remove(fileLocation)
+  }
+  print('All .xml removed from directory...')
+}
\ No newline at end of file
--- a/response.R
+++ b/response.R
+##########################################################################################
+getResponse <- function(csvs,DOE){
+  print('Generating Reponse for each experiment...')
+  start_time4 <- Sys.time()
+  
+  for (i in 1:length(csvs)){
+    
+    outputData1 <- fread(paste0(Dir,csvs[i]))
+    
+    ## Get Data for Supply Minimum Supply Table, must be reactive value to avoid conflicts
+    supplyTable <- outputData1 %>% dplyr::filter(propertyName=="balanceOnHand") %>%  collect()
+    
+    # Find minimum supply level reached by unit/commodity, excluding convoys
+    minsupplyTable <- supplyTable %>% filter(!str_detect(`data`,"Convoy")) %>% 
+      group_by(entityName, data) %>% 
+      summarise(minOnHand=min(as.numeric(newValue), na.rm=TRUE)) %>% 
+      ungroup()
+    
+    # Find starting supply level by unit/commodity, excluding convoys
+    minPercentTable<-supplyTable%>% 
+      filter(!str_detect(`data`,"Convoy")) %>% 
+      filter(!str_detect(`entityName`,"UNLIMITED")) %>% 
+      dplyr::select(-replication,-eventName, -propertyName, -oldValue) %>% 
+      group_by(entityName, data) %>% 
+      mutate(minOnHand=min(as.numeric(newValue), na.rm=TRUE), minTime=min(time)) %>% 
+      ungroup() %>% 
+      filter(time==minTime) %>% 
+      mutate(minPercOnHand=minOnHand/as.numeric(newValue)) %>% 
+      dplyr::select(data, entityName, minPercOnHand)
+    
+    minPercentTable[which.min(minPercentTable$minPercOnHand),]
+    sub <- nrow(minPercentTable[minPercentTable$minPercOnHand <.6,])
+    results[i,1] <- sub
+    total <-nrow(minPercentTable)
+    results[i,2] <- total
+    per <- sub/total
+    results[i,3] <- per
+    
+    div4 <- supplyTable %>% filter(str_detect(`entityName`, "DIV-4"))
+    
+    # Find minimum supply level reached by unit/commodity, excluding convoys
+    minsupplyTable2 <- div4 %>% filter(!str_detect(`data`,"Convoy")) %>% 
+      group_by(entityName, data) %>% 
+      summarise(minOnHand=min(as.numeric(newValue), na.rm=TRUE)) %>% 
+      ungroup()
+    
+    # Find starting supply level by unit/commodity, excluding convoys
+    minPercentTable2 <- div4 %>% 
+      filter(!str_detect(`data`,"Convoy")) %>% 
+      filter(!str_detect(`entityName`,"UNLIMITED")) %>% 
+      dplyr::select(-replication,-eventName, -propertyName, -oldValue) %>% 
+      group_by(entityName, data) %>% 
+      mutate(minOnHand=min(as.numeric(newValue), na.rm=TRUE), minTime=min(time)) %>% 
+      ungroup() %>% 
+      filter(time==minTime) %>% 
+      mutate(minPercOnHand=minOnHand/as.numeric(newValue)) %>% 
+      dplyr::select(data, entityName, minPercOnHand)
+    
+    minPercentTable2[which.min(minPercentTable2$minPercOnHand),]
+    sub2 <- nrow(minPercentTable2[minPercentTable2$minPercOnHand <.6,])
+    results[i,4] <- sub2
+    total2 <- nrow(minPercentTable2)
+    results[i,5] <- total2
+    per2 <- sub2/total2
+    results[i,6] <- per2
+    
+  }
+  newAmmo <- data.frame(t(DOE))
+  test <- newAmmo[1:length(csvs),]
+  test$y <- results[,3]
+  print('Data complete for regression analysis')
+  end_time4 <- Sys.time()
+  print(total4 <- end_time4 - start_time4)
+  return(test)
+}
\ No newline at end of file
--- a/runSim.R
+++ b/runSim.R
+runEx <- function(xmls, Dir){
+  no_cores<-detectCores()
+  
+  print(paste("detected", no_cores, "cores."))
+  
+  # sets a limit for the number of cores to be used
+  no_cores=no_cores-2
+  if (no_cores<1){no_cores=1}
+  cl <- makeCluster(no_cores)
+  
+  print(paste("Cluster initiated, running on", no_cores, "cores."))
+  Dir = Dir
+  start_time = Sys.time()
+  
+  # Command Line code to execute LBC
+  runSim <- function(scenarios){
+    # set.seed for ech rep
+    system2("java", args = c("-jar", "dist\\LBCRunner.jar", paste0(Dir, scenarios)))
+  }
+  
+  parLapply(cl, xmls, runSim)
+  stopCluster(cl)
+  
+  end_time = Sys.time()
+  total = end_time - start_time
+  # print(total)
+  print(paste0("Experiment complete, cluster stopped, it took ",total," hours to run."))
+  
+}
--- a/scenCreator.R
+++ b/scenCreator.R
+##########################################################################################
+##################    Function to create xml scenarios for LBC  ##########################
+##########################################################################################
+writeScenarios <- function(DOE){
+  # this function take the DOE and creates new scenarios
+  # majority of this code came from MAJ Jim Jablonski and TaLena Fletcher
+  start_time = Sys.time()
+  print('Writing Scenarios...')
+  for(DesignRow in seq(ncol(DOE))){
+    
+    for (i in seq(nrow(DOE))){
+      
+      # identify the node containing the value and replace it with the value contained in the Design
+      index = mainFactors$nodeNum[i]
+      xmlAttrs(ReorderValueNode[[index]])[2] <- paste0(DOE[i,DesignRow])
+      xmlAttrs(numReplications[[1]])['numReplications'] <- numReps             
+      
+    }
+    
+    # access only the //SimEntityDataLogger that contains the file attribute name  
+    SaveFileNodes=getNodeSet(xmldata,"//SimEntityDataLogger[@file]")
+    for (j in seq(length(SaveFileNodes))){
+      
+      loggerName=xmlAttrs(SaveFileNodes[[j]])['propertyName'] #cycles through to capture the name of the csv file
+      
+      xmlAttrs(SaveFileNodes[[j]])=c(file=paste0(Dir2, 'design_output_',DesignRow,'_', loggerName, '.csv')) #appends the correct filename to the csv file in the XML document
+      
+    }
+    
+    cat(saveXML(xmldata), file=paste0(Dir,"design_point_", DesignRow  ,".xml")) #saves the filename based on the design point number
+  }
+  end_time = Sys.time()
+  total = end_time-start_time
+  return(total)
+}
\ No newline at end of file