From bfedfeb03e074da5cc744fecc65b8781880b1f05 Mon Sep 17 00:00:00 2001 From: "Streams, James (CPT)" <jestream@nps.edu> Date: Thu, 13 Jun 2019 08:11:17 -0700 Subject: [PATCH] updates --- ADOE4HPCBigBox.R | 322 ++++++++++++++++++++++++++++ ADOE4HPCLong.R | 322 ++++++++++++++++++++++++++++ ADOE4HPCR2Val.R | 325 +++++++++++++++++++++++++++++ ADOE4HPCSmallBox.R | 322 ++++++++++++++++++++++++++++ adaptDOEGen2.R | 34 +++ modeling4 - Return Adj R Squared.R | 34 +++ 6 files changed, 1359 insertions(+) create mode 100644 ADOE4HPCBigBox.R create mode 100644 ADOE4HPCLong.R create mode 100644 ADOE4HPCR2Val.R create mode 100644 ADOE4HPCSmallBox.R create mode 100644 adaptDOEGen2.R create mode 100644 modeling4 - Return Adj R Squared.R diff --git a/ADOE4HPCBigBox.R b/ADOE4HPCBigBox.R new file mode 100644 index 0000000..e5d2508 --- /dev/null +++ b/ADOE4HPCBigBox.R @@ -0,0 +1,322 @@ +########################################################################################## +################## ADOE Code ########################## +########################################################################################## + +# Purpose: Aid analyst in analysis by allowing the program to determine the best range +# the factor inputs should be to obtain an objective. + +########################################################################################## +# Packages used: + +library(XML) # Tools for parsing and generating XML within R +library(stringr) # Common string operations +library(dplyr) # Data manipulation +library(DiceDesign) # Generates a DOEs +library(tidyr) # Easily tidy data with the spread() or gather() function +library(data.table) # Extension of data frame +library(bestglm) # Best subset GLM and regression utilities +library(gtools) # Order string experssions; mixedsort() +library(parallel) # Allows for parallel computing +# library(DT) # Function to print datatable +print('Libraries loaded...') + +# Set initial working directory +setwd("/home/jestream/LBCv5/ADOE/") + +### Load DoE Generation Functions +# Functions here include loading base case XML, creating experimental design, and generating XML scenarios for exection +source('DOEGen.R') # Loads DoE Generator Function (newDOE) based on 'DiceDesign' NOLH function. +source('scenCreator.R') # Loads function to take design and base XML (makeXMLs) to create required number of XMLs to run +source('gatherxmls.R') # Creates a list of XMLs to process through the simulation +source('runSim.R') # Loads function that runs DoE in parallel. +source('remove.R') # Remove scenarios files to clear room on hard drive +source('response4.R') # Gets the response variable from the data +source('modeling3.R') # generates the model +source('adaptDOEGen2.R') # DOE generator for the adaptive portion of the program +source('newadptFolder3.R') # Creates new folder for storage of each iterations data +source('finalModel.R') # Displays the results and the final plot of the found meta-model +source('sigFactors.R') # Displays the found significant factors from the meta-model +print('Functions loaded...') + +# Location of LBC or Simulation to use +lbcFolder <- "/home/jestream/LBCv5/" + +# xml files location +setwd("/home/jestream/LBCv5/data/excel/") + +# The xml file to parse for analysis +xmlfile = "SCEN7-Phase-3-5NOV2018-90.v2.xml" + +# Where to store the data +StorageLoc <- "/home/jestream/LBCv5/data/" + +# Name of the files for storage +namefile <- 'bigbox' + +# identify the directory where the files are to be located +Dir = paste0(StorageLoc,namefile,"/") +Dir2 = paste0(StorageLoc,namefile,"/") + +# the number of replications per design point +numReps = 30 + +# Sets the initial range of value for the first DOE +p = 0.25 + +# Sets the range of min amd max values for follow on DOEs +p2 = 0.15 + +# Starting value for iterations, should remain at 1 unless starting from a different iteration +k = 1 + +# Stopping condition for adaptive process +z = 0.1 # percent of residuals outside of acceptable range +stopVal <- 0.004 # minimum change needed to continue iteration +iterThres = 2 # number of iterations in a row the model is allowed to iterate wwithout improvement +maxIter = 12 + +# Values for displaying significant factors +alpha = 0.05 # The minimum p-value a factor must have to be considered significant +n = 10 # The max number of significant factors to return + +########################################################################################## +################## Search Parameters in XML files ########################## +########################################################################################## +pstart = Sys.time() # starts the clock to time how long the program takes to complete +print('Parsing XML file...') +# Breaks the xmlFile into sections and saves a temp version for data extraction +xmldata <- xmlTreeParse(xmlfile, useInternal = TRUE) + +# Locates the a number of replications value +numReplications = getNodeSet(xmldata, "//LBCAssembly[@numReplications]") + +# find all ThresholdReorderLogic Nodes +ReorderNodes=getNodeSet(xmldata, "//ThresholdReorderLogic") + +# Locates all the reorder point data (this section can be adjusted to look for other +# areas of interest such as consumption rate or truck capacities) +ReorderValueNode = getNodeSet(xmldata, "//ThresholdReorderLogic//*[@value]") +numNodes = length(ReorderValueNode) + +# Create lists to collect information from xmlFile +numNodesName=character() +nodeNum = integer() +reps = character() +name = character() +Company = character() +UnitId = character() +Consumable =character() +value = character() + +# this loop will return all required information from the parent nodes in reference +# to the attribute value name +for (i in 1:numNodes){ + parent = xmlParent(ReorderValueNode[[i]]) + child = xmlChildren(ReorderNodes[[1]])[i] + grandParent = xmlParent(xmlParent(ReorderValueNode[[i]])) + nodeNum[i] = i + name[i] = xmlAttrs(parent) #gets the methodName of the parent node in reference to the attribute value name + reps[i] = xmlAttrs(numReplications[[1]])['numReplications'] + + # use the name to find the Company Name + providerNode = getNodeSet(grandParent, ".//*[@refId]") + + # extract the consumable + Consumable[i]= xmlAttrs(providerNode[[1]])['refId'] + + # extract the Company Name + UnitId[i]= xmlAttrs(providerNode[[2]])['refId'] + + # extract the value + valueNode = getNodeSet(parent, ".//*[@value]") + value[i]= xmlAttrs(valueNode[[1]])['value'] + +} +# Data frame of collected nodes from entire xmlFile +ReorderDf = data.frame(nodeNum, reps, name, UnitId, Consumable, value) +print('Parsing complete...') +########################################################################################## +################## Locating Factors of Interest ########################## +########################################################################################## +# Choose the units or factors of interest through the use of dplyr filters + +# This can be changed to look for any unit or factor based on the information available +# from the xml data collection + +Div4 <- ReorderDf %>% select(nodeNum, reps, name, UnitId, Consumable, value) %>% + filter(str_detect(UnitId, "DIV-4")) %>% filter(!str_detect(UnitId, paste(c("BSB", "FSC", "DISTRO", "ALLIED", "SUST", "EN-BDE", + "HHC", "HHB", "HQ", "BEB","MEB","MI-BN"), collapse = "|"))) %>% filter(!str_detect(Consumable, paste(c("CARGO", "WATER"),collapse = "|"))) + +# Enusre that MASS is not running at the same time as dplyr or else it will mask the select function in dplyr + +mainFactors <- Div4 %>% filter(str_detect(Consumable, "AMMO")) # replace with your choosen data set +mainFactors$value <- as.numeric(levels(mainFactors$value))[mainFactors$value] # values to vary to create a range +# Code found at +# https://stackoverflow.com/questions/3418128/how-to-convert-a-factor-to-integer-numeric-without-loss-of-information + +# Now set the range of min and max values + +# add those values to the data frame +mainFactors['min'] <- (mainFactors$value - mainFactors$value*p) +mainFactors['max'] <- (mainFactors$value*(1+p)) + +# From DOEGen.R +ammoDOE <- newDOE(mainFactors) + +# Creates folder for initial storage of data, it is based on the Dir set at the beginning +dir.create(Dir) + +writeScenarios(ammoDOE) +files <- getXMLs(Dir) + +########################################################################################## +# Runs each scenario through LBC, this takes awhile since it has to shut down and restart LBC each time +setwd(lbcFolder) + +runEx(files, Dir) +removeFiles(files) + +########################################################################################## +################ Gather the information for the response varialbe #################### +########################################################################################## + +# Finds all the csv files and then puts the names in a list +csvL <- list.files(path = Dir, pattern = '*.csv') +csvL <- mixedsort(csvL) + +# Creates an empty data frame for information collection, +# this can be modified based on what desired information the user is looking for +results <- data.frame(matrix(ncol = 5)) +colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%') + + +test <- getResponse(csvs = csvL, DOE = ammoDOE) +write.table(test, file = paste0(Dir,'test.csv'), sep = ',') +########################################################################################## +# need to include code that saves each linear model and test data to the folder holding the data +par(mfrow = c(2,2)) +test.mod <- getModel(test) + +# Save the current DOE +curDOE <- test[,1:nrow(mainFactors)] +curDOE <- as.data.frame(t(curDOE)) +write.table(curDOE, file = paste0(Dir,'DOE for initial .csv'), sep = ',') + +########################################################################################## +# Storing percent values for additional stopping criteria +percents <- list() +percents <- append(percents, test.mod[1]) +c = 0 + +########################################################################################## +################ Adaptive Portion, as long as it meets conditions #################### +########################################################################################## +while (test.mod[1] > z && c < iterThres && k < maxIter){ + if(k == 1){ + print(paste('Now executing iteration',k)) + + DOE <- aDOE(test, test.mod[2], mainFactors) + + drive <- newFolder(StorageLoc, k) + + Dir = paste0(drive[1],'/') + Dir2 = paste0(drive[2],'/') + + setwd(lbcFolder) + + writeScenarios(DOE) + + files <- getXMLs(Dir) + + runEx(files, Dir) + + removeFiles(files) + + # Finds all the csv files and then puts the names in a list + csvL <- list.files(path = Dir, pattern = '*.csv') + + # Creates an empty data frame for information collection, + # this can be modified based on what desired information the user is looking for + results <- data.frame(matrix(ncol = 5)) + colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%') + + newTest <- getResponse(csvs = csvL, DOE = DOE) + test <- rbind(test,newTest) + + curDOE <- test[,1:nrow(mainFactors)] + curDOE <- as.data.frame(t(curDOE)) + write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',') + + par(mfrow = c(2,2)) + test.mod <- getModel(test) + + percents <- append(percents, test.mod[1]) + diff <- abs(percents[[k+1]] - percents[[k]]) + if(diff > stopVal){ + c = 0 + }else{ + c = c + 1 + } + + + k = k + 1 + + } else { + + print(paste('Now executing iteration', k)) + DOE <- aDOE(test, test.mod[2], mainFactors) + drive <- newFolder(StorageLoc, k) + + Dir = paste0(drive[1],'/') + Dir2 = paste0(drive[2],'/') + + setwd(lbcFolder) + + writeScenarios(DOE) + + files <- getXMLs(Dir) + + runEx(files, Dir) + + removeFiles(files) + + csvL <- list.files(path = Dir, pattern = '*.csv') + + results <- data.frame(matrix(ncol = 3, nrow = length(csvL))) + colnames(results) <- c('All','Total', 'All%') + + newTest <- getResponse(csvs = csvL, DOE = DOE) + test <- rbind(test,newTest) + + curDOE <- test[,1:nrow(mainFactors)] + curDOE <- as.data.frame(t(curDOE)) + + write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',') + + + par(mfrow = c(2,2)) + test.mod <- getModel(test) + + percents <- append(percents, test.mod[1]) + diff <- abs(percents[[k+1]] - percents[[k]]) + if(diff > stopVal){ + c = 0 + }else{ + c = c + 1 + } + + k = k + 1 + } +} + +final.mod <- finalModel(test) + +sigFactors(final.mod) + +# End program +print('Program complete') +pend = Sys.time() +print(pend - pstart) + + + diff --git a/ADOE4HPCLong.R b/ADOE4HPCLong.R new file mode 100644 index 0000000..af2bfb7 --- /dev/null +++ b/ADOE4HPCLong.R @@ -0,0 +1,322 @@ +########################################################################################## +################## ADOE Code ########################## +########################################################################################## + +# Purpose: Aid analyst in analysis by allowing the program to determine the best range +# the factor inputs should be to obtain an objective. + +########################################################################################## +# Packages used: + +library(XML) # Tools for parsing and generating XML within R +library(stringr) # Common string operations +library(dplyr) # Data manipulation +library(DiceDesign) # Generates a DOEs +library(tidyr) # Easily tidy data with the spread() or gather() function +library(data.table) # Extension of data frame +library(bestglm) # Best subset GLM and regression utilities +library(gtools) # Order string experssions; mixedsort() +library(parallel) # Allows for parallel computing +# library(DT) # Function to print datatable +print('Libraries loaded...') + +# Set initial working directory +setwd("/home/jestream/LBCv5/ADOE/") + +### Load DoE Generation Functions +# Functions here include loading base case XML, creating experimental design, and generating XML scenarios for exection +source('DOEGen.R') # Loads DoE Generator Function (newDOE) based on 'DiceDesign' NOLH function. +source('scenCreator.R') # Loads function to take design and base XML (makeXMLs) to create required number of XMLs to run +source('gatherxmls.R') # Creates a list of XMLs to process through the simulation +source('runSim.R') # Loads function that runs DoE in parallel. +source('remove.R') # Remove scenarios files to clear room on hard drive +source('response4.R') # Gets the response variable from the data +source('modeling3.R') # generates the model +source('adaptDOEGen2.R') # DOE generator for the adaptive portion of the program +source('newadptFolder3.R') # Creates new folder for storage of each iterations data +source('finalModel.R') # Displays the results and the final plot of the found meta-model +source('sigFactors.R') # Displays the found significant factors from the meta-model +print('Functions loaded...') + +# Location of LBC or Simulation to use +lbcFolder <- "/home/jestream/LBCv5/" + +# xml files location +setwd("/home/jestream/LBCv5/data/excel/") + +# The xml file to parse for analysis +xmlfile = "SCEN7-Phase-3-5NOV2018-90.v2.xml" + +# Where to store the data +StorageLoc <- "/home/jestream/LBCv5/data/" + +# Name of the files for storage +namefile <- 'longRun' + +# identify the directory where the files are to be located +Dir = paste0(StorageLoc,namefile,"/") +Dir2 = paste0(StorageLoc,namefile,"/") + +# the number of replications per design point +numReps = 30 + +# Sets the initial range of value for the first DOE +p = 0.15 + +# Sets the range of min amd max values for follow on DOEs +p2 = 0.10 + +# Starting value for iterations, should remain at 1 unless starting from a different iteration +k = 1 + +# Stopping condition for adaptive process +z = 0.1 # percent of residuals outside of acceptable range +# stopVal <- 0.004 # minimum change needed to continue iteration +# iterThres = 2 # number of iterations in a row the model is allowed to iterate wwithout improvement +maxIter = 12 + +# Values for displaying significant factors +alpha = 0.05 # The minimum p-value a factor must have to be considered significant +n = 10 # The max number of significant factors to return + +########################################################################################## +################## Search Parameters in XML files ########################## +########################################################################################## +pstart = Sys.time() # starts the clock to time how long the program takes to complete +print('Parsing XML file...') +# Breaks the xmlFile into sections and saves a temp version for data extraction +xmldata <- xmlTreeParse(xmlfile, useInternal = TRUE) + +# Locates the a number of replications value +numReplications = getNodeSet(xmldata, "//LBCAssembly[@numReplications]") + +# find all ThresholdReorderLogic Nodes +ReorderNodes=getNodeSet(xmldata, "//ThresholdReorderLogic") + +# Locates all the reorder point data (this section can be adjusted to look for other +# areas of interest such as consumption rate or truck capacities) +ReorderValueNode = getNodeSet(xmldata, "//ThresholdReorderLogic//*[@value]") +numNodes = length(ReorderValueNode) + +# Create lists to collect information from xmlFile +numNodesName=character() +nodeNum = integer() +reps = character() +name = character() +Company = character() +UnitId = character() +Consumable =character() +value = character() + +# this loop will return all required information from the parent nodes in reference +# to the attribute value name +for (i in 1:numNodes){ + parent = xmlParent(ReorderValueNode[[i]]) + child = xmlChildren(ReorderNodes[[1]])[i] + grandParent = xmlParent(xmlParent(ReorderValueNode[[i]])) + nodeNum[i] = i + name[i] = xmlAttrs(parent) #gets the methodName of the parent node in reference to the attribute value name + reps[i] = xmlAttrs(numReplications[[1]])['numReplications'] + + # use the name to find the Company Name + providerNode = getNodeSet(grandParent, ".//*[@refId]") + + # extract the consumable + Consumable[i]= xmlAttrs(providerNode[[1]])['refId'] + + # extract the Company Name + UnitId[i]= xmlAttrs(providerNode[[2]])['refId'] + + # extract the value + valueNode = getNodeSet(parent, ".//*[@value]") + value[i]= xmlAttrs(valueNode[[1]])['value'] + +} +# Data frame of collected nodes from entire xmlFile +ReorderDf = data.frame(nodeNum, reps, name, UnitId, Consumable, value) +print('Parsing complete...') +########################################################################################## +################## Locating Factors of Interest ########################## +########################################################################################## +# Choose the units or factors of interest through the use of dplyr filters + +# This can be changed to look for any unit or factor based on the information available +# from the xml data collection + +Div4 <- ReorderDf %>% select(nodeNum, reps, name, UnitId, Consumable, value) %>% + filter(str_detect(UnitId, "DIV-4")) %>% filter(!str_detect(UnitId, paste(c("BSB", "FSC", "DISTRO", "ALLIED", "SUST", "EN-BDE", + "HHC", "HHB", "HQ", "BEB","MEB","MI-BN"), collapse = "|"))) %>% filter(!str_detect(Consumable, paste(c("CARGO", "WATER"),collapse = "|"))) + +# Enusre that MASS is not running at the same time as dplyr or else it will mask the select function in dplyr + +mainFactors <- Div4 %>% filter(str_detect(Consumable, "AMMO")) # replace with your choosen data set +mainFactors$value <- as.numeric(levels(mainFactors$value))[mainFactors$value] # values to vary to create a range +# Code found at +# https://stackoverflow.com/questions/3418128/how-to-convert-a-factor-to-integer-numeric-without-loss-of-information + +# Now set the range of min and max values + +# add those values to the data frame +mainFactors['min'] <- (mainFactors$value - mainFactors$value*p) +mainFactors['max'] <- (mainFactors$value*(1+p)) + +# From DOEGen.R +ammoDOE <- newDOE(mainFactors) + +# Creates folder for initial storage of data, it is based on the Dir set at the beginning +dir.create(Dir) + +writeScenarios(ammoDOE) +files <- getXMLs(Dir) + +########################################################################################## +# Runs each scenario through LBC, this takes awhile since it has to shut down and restart LBC each time +setwd(lbcFolder) + +runEx(files, Dir) +removeFiles(files) + +########################################################################################## +################ Gather the information for the response varialbe #################### +########################################################################################## + +# Finds all the csv files and then puts the names in a list +csvL <- list.files(path = Dir, pattern = '*.csv') +csvL <- mixedsort(csvL) + +# Creates an empty data frame for information collection, +# this can be modified based on what desired information the user is looking for +results <- data.frame(matrix(ncol = 5)) +colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%') + + +test <- getResponse(csvs = csvL, DOE = ammoDOE) +write.table(test, file = paste0(Dir,'test.csv'), sep = ',') +########################################################################################## +# need to include code that saves each linear model and test data to the folder holding the data +par(mfrow = c(2,2)) +test.mod <- getModel(test) + +# Save the current DOE +curDOE <- test[,1:nrow(mainFactors)] +curDOE <- as.data.frame(t(curDOE)) +write.table(curDOE, file = paste0(Dir,'DOE for initial .csv'), sep = ',') + +########################################################################################## +# Storing percent values for additional stopping criteria +percents <- list() +percents <- append(percents, test.mod[1]) +c = 0 + +########################################################################################## +################ Adaptive Portion, as long as it meets conditions #################### +########################################################################################## +while (test.mod[1] > z && k < maxIter){ + if(k == 1){ + print(paste('Now executing iteration',k)) + + DOE <- aDOE(test, test.mod[2], mainFactors) + + drive <- newFolder(StorageLoc, k) + + Dir = paste0(drive[1],'/') + Dir2 = paste0(drive[2],'/') + + setwd(lbcFolder) + + writeScenarios(DOE) + + files <- getXMLs(Dir) + + runEx(files, Dir) + + removeFiles(files) + + # Finds all the csv files and then puts the names in a list + csvL <- list.files(path = Dir, pattern = '*.csv') + + # Creates an empty data frame for information collection, + # this can be modified based on what desired information the user is looking for + results <- data.frame(matrix(ncol = 5)) + colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%') + + newTest <- getResponse(csvs = csvL, DOE = DOE) + test <- rbind(test,newTest) + + curDOE <- test[,1:nrow(mainFactors)] + curDOE <- as.data.frame(t(curDOE)) + write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',') + + par(mfrow = c(2,2)) + test.mod <- getModel(test) + + percents <- append(percents, test.mod[1]) + diff <- abs(percents[[k+1]] - percents[[k]]) + if(diff > stopVal){ + c = 0 + }else{ + c = c + 1 + } + + + k = k + 1 + + } else { + + print(paste('Now executing iteration', k)) + DOE <- aDOE(test, test.mod[2], mainFactors) + drive <- newFolder(StorageLoc, k) + + Dir = paste0(drive[1],'/') + Dir2 = paste0(drive[2],'/') + + setwd(lbcFolder) + + writeScenarios(DOE) + + files <- getXMLs(Dir) + + runEx(files, Dir) + + removeFiles(files) + + csvL <- list.files(path = Dir, pattern = '*.csv') + + results <- data.frame(matrix(ncol = 3, nrow = length(csvL))) + colnames(results) <- c('All','Total', 'All%') + + newTest <- getResponse(csvs = csvL, DOE = DOE) + test <- rbind(test,newTest) + + curDOE <- test[,1:nrow(mainFactors)] + curDOE <- as.data.frame(t(curDOE)) + + write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',') + + + par(mfrow = c(2,2)) + test.mod <- getModel(test) + + percents <- append(percents, test.mod[1]) + diff <- abs(percents[[k+1]] - percents[[k]]) + if(diff > stopVal){ + c = 0 + }else{ + c = c + 1 + } + + k = k + 1 + } +} + +final.mod <- finalModel(test) + +sigFactors(final.mod) + +# End program +print('Program complete') +pend = Sys.time() +print(pend - pstart) + + + diff --git a/ADOE4HPCR2Val.R b/ADOE4HPCR2Val.R new file mode 100644 index 0000000..ca62d12 --- /dev/null +++ b/ADOE4HPCR2Val.R @@ -0,0 +1,325 @@ +########################################################################################## +################## ADOE Code ########################## +########################################################################################## + +# Purpose: Aid analyst in analysis by allowing the program to determine the best range +# the factor inputs should be to obtain an objective. + +########################################################################################## +# Packages used: + +library(XML) # Tools for parsing and generating XML within R +library(stringr) # Common string operations +library(dplyr) # Data manipulation +library(DiceDesign) # Generates a DOEs +library(tidyr) # Easily tidy data with the spread() or gather() function +library(data.table) # Extension of data frame +library(bestglm) # Best subset GLM and regression utilities +library(gtools) # Order string experssions; mixedsort() +library(parallel) # Allows for parallel computing +# library(DT) # Function to print datatable +print('Libraries loaded...') + +# Set initial working directory +setwd("/home/jestream/LBCv5/ADOE/") + +### Load DoE Generation Functions +# Functions here include loading base case XML, creating experimental design, and generating XML scenarios for exection +source('DOEGen.R') # Loads DoE Generator Function (newDOE) based on 'DiceDesign' NOLH function. +source('scenCreator.R') # Loads function to take design and base XML (makeXMLs) to create required number of XMLs to run +source('gatherxmls.R') # Creates a list of XMLs to process through the simulation +source('runSim.R') # Loads function that runs DoE in parallel. +source('remove.R') # Remove scenarios files to clear room on hard drive +source('response4.R') # Gets the response variable from the data +source('modeling4 - Return Adj R Squared.R') # generates the model +source('adaptDOEGen2.R') # DOE generator for the adaptive portion of the program +source('newadptFolder3.R') # Creates new folder for storage of each iterations data +source('finalModel.R') # Displays the results and the final plot of the found meta-model +source('sigFactors.R') # Displays the found significant factors from the meta-model +print('Functions loaded...') + +# Location of LBC or Simulation to use +lbcFolder <- "/home/jestream/LBCv5/" + +# xml files location +setwd("/home/jestream/LBCv5/data/excel/") + +# The xml file to parse for analysis +xmlfile = "SCEN7-Phase-3-5NOV2018-90.v2.xml" + +# Where to store the data +StorageLoc <- "/home/jestream/LBCv5/data/" + +# Name of the files for storage +namefile <- 'testRsquared' + +# identify the directory where the files are to be located +Dir = paste0(StorageLoc,namefile,"/") +Dir2 = paste0(StorageLoc,namefile,"/") + +# the number of replications per design point +numReps = 30 + +# Sets the initial range of value for the first DOE +p = 0.15 + +# Sets the range of min amd max values for follow on DOEs +p2 = 0.10 + +# Starting value for iterations, should remain at 1 unless starting from a different iteration +k = 1 + +# Stopping condition for adaptive process +# z = 0.05 # percent of residuals outside of acceptable range + +stopVal <- 0.004 # minimum change needed to continue iteration +iterThres = 2 # number of iterations in a row the model is allowed to iterate wwithout improvement +maxIter = 12 + +# Values for displaying significant factors +alpha = 0.05 # The minimum p-value a factor must have to be considered significant +n = 10 # The max number of significant factors to return + +########################################################################################## +################## Search Parameters in XML files ########################## +########################################################################################## +pstart = Sys.time() # starts the clock to time how long the program takes to complete +print('Parsing XML file...') +# Breaks the xmlFile into sections and saves a temp version for data extraction + +xmldata <- xmlTreeParse(xmlfile, useInternal = TRUE) + +# Locates the a number of replications value +numReplications = getNodeSet(xmldata, "//LBCAssembly[@numReplications]") + +# find all ThresholdReorderLogic Nodes +ReorderNodes=getNodeSet(xmldata, "//ThresholdReorderLogic") + +# Locates all the reorder point data (this section can be adjusted to look for other +# areas of interest such as consumption rate or truck capacities) +ReorderValueNode = getNodeSet(xmldata, "//ThresholdReorderLogic//*[@value]") +numNodes = length(ReorderValueNode) + +# Create lists to collect information from xmlFile +numNodesName=character() +nodeNum = integer() +reps = character() +name = character() +Company = character() +UnitId = character() +Consumable =character() +value = character() + +# this loop will return all required information from the parent nodes in reference +# to the attribute value name +for (i in 1:numNodes){ + parent = xmlParent(ReorderValueNode[[i]]) + child = xmlChildren(ReorderNodes[[1]])[i] + grandParent = xmlParent(xmlParent(ReorderValueNode[[i]])) + nodeNum[i] = i + name[i] = xmlAttrs(parent) #gets the methodName of the parent node in reference to the attribute value name + reps[i] = xmlAttrs(numReplications[[1]])['numReplications'] + + # use the name to find the Company Name + providerNode = getNodeSet(grandParent, ".//*[@refId]") + + # extract the consumable + Consumable[i]= xmlAttrs(providerNode[[1]])['refId'] + + # extract the Company Name + UnitId[i]= xmlAttrs(providerNode[[2]])['refId'] + + # extract the value + valueNode = getNodeSet(parent, ".//*[@value]") + value[i]= xmlAttrs(valueNode[[1]])['value'] + +} +# Data frame of collected nodes from entire xmlFile +ReorderDf = data.frame(nodeNum, reps, name, UnitId, Consumable, value) +print('Parsing complete...') + +########################################################################################## +################## Locating Factors of Interest ########################## +########################################################################################## +# Choose the units or factors of interest through the use of dplyr filters + +# This can be changed to look for any unit or factor based on the information available +# from the xml data collection + +Div4 <- ReorderDf %>% select(nodeNum, reps, name, UnitId, Consumable, value) %>% + filter(str_detect(UnitId, "DIV-4")) %>% filter(!str_detect(UnitId, paste(c("BSB", "FSC", "DISTRO", "ALLIED", "SUST", "EN-BDE", + "HHC", "HHB", "HQ", "BEB","MEB","MI-BN"), collapse = "|"))) %>% filter(!str_detect(Consumable, paste(c("CARGO", "WATER"),collapse = "|"))) + +# Enusre that MASS is not running at the same time as dplyr or else it will mask the select function in dplyr + +mainFactors <- Div4 %>% filter(str_detect(Consumable, "AMMO")) # replace with your choosen data set +mainFactors$value <- as.numeric(levels(mainFactors$value))[mainFactors$value] # values to vary to create a range +# Code found at +# https://stackoverflow.com/questions/3418128/how-to-convert-a-factor-to-integer-numeric-without-loss-of-information + +# Now set the range of min and max values + +# add those values to the data frame +mainFactors['min'] <- (mainFactors$value - mainFactors$value*p) +mainFactors['max'] <- (mainFactors$value*(1+p)) + +# From DOEGen.R +ammoDOE <- newDOE(mainFactors) + +# Creates folder for initial storage of data, it is based on the Dir set at the beginning +dir.create(Dir) + +writeScenarios(ammoDOE) +files <- getXMLs(Dir) + +########################################################################################## +# Runs each scenario through LBC, this takes awhile since it has to shut down and restart LBC each time +setwd(lbcFolder) + +runEx(files, Dir) +removeFiles(files) + +########################################################################################## +################ Gather the information for the response varialbe #################### +########################################################################################## + +# Finds all the csv files and then puts the names in a list +csvL <- list.files(path = Dir, pattern = '*.csv') +csvL <- mixedsort(csvL) + +# Creates an empty data frame for information collection, +# this can be modified based on what desired information the user is looking for +results <- data.frame(matrix(ncol = 5)) +colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%') + + +test <- getResponse(csvs = csvL, DOE = ammoDOE) +# write.table(test, file = paste0(Dir,'test.csv'), sep = ',') +########################################################################################## +# need to include code that saves each linear model and test data to the folder holding the data +par(mfrow = c(2,2)) +test.mod <- getModel(test) + +# Save the current DOE +curDOE <- test[,1:nrow(mainFactors)] +curDOE <- as.data.frame(t(curDOE)) +write.table(curDOE, file = paste0(Dir,'DOE for initial .csv'), sep = ',') + +########################################################################################## +# Storing percent values for additional stopping criteria +percents <- list() +percents <- append(percents, test.mod[4]) +c = 0 + +########################################################################################## +################ Adaptive Portion, as long as it meets conditions #################### +########################################################################################## +while (c < iterThres && k < maxIter){ + if(k == 1){ + print(paste('Now executing iteration',k)) + + DOE <- aDOE(test, test.mod[2], mainFactors) + + drive <- newFolder(StorageLoc, k) + + Dir = paste0(drive[1],'/') + Dir2 = paste0(drive[2],'/') + + setwd(lbcFolder) + + writeScenarios(DOE) + + files <- getXMLs(Dir) + + runEx(files, Dir) + + removeFiles(files) + + # Finds all the csv files and then puts the names in a list + csvL <- list.files(path = Dir, pattern = '*.csv') + + # Creates an empty data frame for information collection, + # this can be modified based on what desired information the user is looking for + results <- data.frame(matrix(ncol = 5)) + colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%') + + newTest <- getResponse(csvs = csvL, DOE = DOE) + test <- rbind(test,newTest) + + curDOE <- test[,1:nrow(mainFactors)] + curDOE <- as.data.frame(t(curDOE)) + write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',') + + par(mfrow = c(2,2)) + test.mod <- getModel(test) + + percents <- append(percents, test.mod[4]) + diff <- abs(percents[[k+1]] - percents[[k]]) + if(diff > stopVal){ + c = 0 + }else{ + c = c + 1 + } + + + k = k + 1 + + } else { + + print(paste('Now executing iteration', k)) + DOE <- aDOE(test, test.mod[2], mainFactors) + drive <- newFolder(StorageLoc, k) + + Dir = paste0(drive[1],'/') + Dir2 = paste0(drive[2],'/') + + setwd(lbcFolder) + + writeScenarios(DOE) + + files <- getXMLs(Dir) + + runEx(files, Dir) + + removeFiles(files) + + csvL <- list.files(path = Dir, pattern = '*.csv') + + results <- data.frame(matrix(ncol = 3, nrow = length(csvL))) + colnames(results) <- c('All','Total', 'All%') + + newTest <- getResponse(csvs = csvL, DOE = DOE) + test <- rbind(test,newTest) + + curDOE <- test[,1:nrow(mainFactors)] + curDOE <- as.data.frame(t(curDOE)) + + write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',') + + + par(mfrow = c(2,2)) + test.mod <- getModel(test) + + percents <- append(percents, test.mod[4]) + diff <- abs(percents[[k+1]] - percents[[k]]) + if(diff > stopVal){ + c = 0 + }else{ + c = c + 1 + } + + k = k + 1 + } +} + +final.mod <- finalModel(test) + +sigFactors(final.mod) + +# End program +print('Program complete') +pend = Sys.time() +print(pend - pstart) + + + diff --git a/ADOE4HPCSmallBox.R b/ADOE4HPCSmallBox.R new file mode 100644 index 0000000..bf2f13b --- /dev/null +++ b/ADOE4HPCSmallBox.R @@ -0,0 +1,322 @@ +########################################################################################## +################## ADOE Code ########################## +########################################################################################## + +# Purpose: Aid analyst in analysis by allowing the program to determine the best range +# the factor inputs should be to obtain an objective. + +########################################################################################## +# Packages used: + +library(XML) # Tools for parsing and generating XML within R +library(stringr) # Common string operations +library(dplyr) # Data manipulation +library(DiceDesign) # Generates a DOEs +library(tidyr) # Easily tidy data with the spread() or gather() function +library(data.table) # Extension of data frame +library(bestglm) # Best subset GLM and regression utilities +library(gtools) # Order string experssions; mixedsort() +library(parallel) # Allows for parallel computing +# library(DT) # Function to print datatable +print('Libraries loaded...') + +# Set initial working directory +setwd("/home/jestream/LBCv5/ADOE/") + +### Load DoE Generation Functions +# Functions here include loading base case XML, creating experimental design, and generating XML scenarios for exection +source('DOEGen.R') # Loads DoE Generator Function (newDOE) based on 'DiceDesign' NOLH function. +source('scenCreator.R') # Loads function to take design and base XML (makeXMLs) to create required number of XMLs to run +source('gatherxmls.R') # Creates a list of XMLs to process through the simulation +source('runSim.R') # Loads function that runs DoE in parallel. +source('remove.R') # Remove scenarios files to clear room on hard drive +source('response4.R') # Gets the response variable from the data +source('modeling3.R') # generates the model +source('adaptDOEGen2.R') # DOE generator for the adaptive portion of the program +source('newadptFolder3.R') # Creates new folder for storage of each iterations data +source('finalModel.R') # Displays the results and the final plot of the found meta-model +source('sigFactors.R') # Displays the found significant factors from the meta-model +print('Functions loaded...') + +# Location of LBC or Simulation to use +lbcFolder <- "/home/jestream/LBCv5/" + +# xml files location +setwd("/home/jestream/LBCv5/data/excel/") + +# The xml file to parse for analysis +xmlfile = "SCEN7-Phase-3-5NOV2018-90.v2.xml" + +# Where to store the data +StorageLoc <- "/home/jestream/LBCv5/data/" + +# Name of the files for storage +namefile <- 'smallbox' + +# identify the directory where the files are to be located +Dir = paste0(StorageLoc,namefile,"/") +Dir2 = paste0(StorageLoc,namefile,"/") + +# the number of replications per design point +numReps = 30 + +# Sets the initial range of value for the first DOE +p = 0.1 + +# Sets the range of min amd max values for follow on DOEs +p2 = 0.05 + +# Starting value for iterations, should remain at 1 unless starting from a different iteration +k = 1 + +# Stopping condition for adaptive process +z = 0.1 # percent of residuals outside of acceptable range +stopVal <- 0.004 # minimum change needed to continue iteration +iterThres = 2 # number of iterations in a row the model is allowed to iterate wwithout improvement +maxIter = 12 + +# Values for displaying significant factors +alpha = 0.05 # The minimum p-value a factor must have to be considered significant +n = 10 # The max number of significant factors to return + +########################################################################################## +################## Search Parameters in XML files ########################## +########################################################################################## +pstart = Sys.time() # starts the clock to time how long the program takes to complete +print('Parsing XML file...') +# Breaks the xmlFile into sections and saves a temp version for data extraction +xmldata <- xmlTreeParse(xmlfile, useInternal = TRUE) + +# Locates the a number of replications value +numReplications = getNodeSet(xmldata, "//LBCAssembly[@numReplications]") + +# find all ThresholdReorderLogic Nodes +ReorderNodes=getNodeSet(xmldata, "//ThresholdReorderLogic") + +# Locates all the reorder point data (this section can be adjusted to look for other +# areas of interest such as consumption rate or truck capacities) +ReorderValueNode = getNodeSet(xmldata, "//ThresholdReorderLogic//*[@value]") +numNodes = length(ReorderValueNode) + +# Create lists to collect information from xmlFile +numNodesName=character() +nodeNum = integer() +reps = character() +name = character() +Company = character() +UnitId = character() +Consumable =character() +value = character() + +# this loop will return all required information from the parent nodes in reference +# to the attribute value name +for (i in 1:numNodes){ + parent = xmlParent(ReorderValueNode[[i]]) + child = xmlChildren(ReorderNodes[[1]])[i] + grandParent = xmlParent(xmlParent(ReorderValueNode[[i]])) + nodeNum[i] = i + name[i] = xmlAttrs(parent) #gets the methodName of the parent node in reference to the attribute value name + reps[i] = xmlAttrs(numReplications[[1]])['numReplications'] + + # use the name to find the Company Name + providerNode = getNodeSet(grandParent, ".//*[@refId]") + + # extract the consumable + Consumable[i]= xmlAttrs(providerNode[[1]])['refId'] + + # extract the Company Name + UnitId[i]= xmlAttrs(providerNode[[2]])['refId'] + + # extract the value + valueNode = getNodeSet(parent, ".//*[@value]") + value[i]= xmlAttrs(valueNode[[1]])['value'] + +} +# Data frame of collected nodes from entire xmlFile +ReorderDf = data.frame(nodeNum, reps, name, UnitId, Consumable, value) +print('Parsing complete...') +########################################################################################## +################## Locating Factors of Interest ########################## +########################################################################################## +# Choose the units or factors of interest through the use of dplyr filters + +# This can be changed to look for any unit or factor based on the information available +# from the xml data collection + +Div4 <- ReorderDf %>% select(nodeNum, reps, name, UnitId, Consumable, value) %>% + filter(str_detect(UnitId, "DIV-4")) %>% filter(!str_detect(UnitId, paste(c("BSB", "FSC", "DISTRO", "ALLIED", "SUST", "EN-BDE", + "HHC", "HHB", "HQ", "BEB","MEB","MI-BN"), collapse = "|"))) %>% filter(!str_detect(Consumable, paste(c("CARGO", "WATER"),collapse = "|"))) + +# Enusre that MASS is not running at the same time as dplyr or else it will mask the select function in dplyr + +mainFactors <- Div4 %>% filter(str_detect(Consumable, "AMMO")) # replace with your choosen data set +mainFactors$value <- as.numeric(levels(mainFactors$value))[mainFactors$value] # values to vary to create a range +# Code found at +# https://stackoverflow.com/questions/3418128/how-to-convert-a-factor-to-integer-numeric-without-loss-of-information + +# Now set the range of min and max values + +# add those values to the data frame +mainFactors['min'] <- (mainFactors$value - mainFactors$value*p) +mainFactors['max'] <- (mainFactors$value*(1+p)) + +# From DOEGen.R +ammoDOE <- newDOE(mainFactors) + +# Creates folder for initial storage of data, it is based on the Dir set at the beginning +dir.create(Dir) + +writeScenarios(ammoDOE) +files <- getXMLs(Dir) + +########################################################################################## +# Runs each scenario through LBC, this takes awhile since it has to shut down and restart LBC each time +setwd(lbcFolder) + +runEx(files, Dir) +removeFiles(files) + +########################################################################################## +################ Gather the information for the response varialbe #################### +########################################################################################## + +# Finds all the csv files and then puts the names in a list +csvL <- list.files(path = Dir, pattern = '*.csv') +csvL <- mixedsort(csvL) + +# Creates an empty data frame for information collection, +# this can be modified based on what desired information the user is looking for +results <- data.frame(matrix(ncol = 5)) +colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%') + + +test <- getResponse(csvs = csvL, DOE = ammoDOE) +write.table(test, file = paste0(Dir,'test.csv'), sep = ',') +########################################################################################## +# need to include code that saves each linear model and test data to the folder holding the data +par(mfrow = c(2,2)) +test.mod <- getModel(test) + +# Save the current DOE +curDOE <- test[,1:nrow(mainFactors)] +curDOE <- as.data.frame(t(curDOE)) +write.table(curDOE, file = paste0(Dir,'DOE for initial .csv'), sep = ',') + +########################################################################################## +# Storing percent values for additional stopping criteria +percents <- list() +percents <- append(percents, test.mod[1]) +c = 0 + +########################################################################################## +################ Adaptive Portion, as long as it meets conditions #################### +########################################################################################## +while (test.mod[1] > z && c < iterThres && k < maxIter){ + if(k == 1){ + print(paste('Now executing iteration',k)) + + DOE <- aDOE(test, test.mod[2], mainFactors) + + drive <- newFolder(StorageLoc, k) + + Dir = paste0(drive[1],'/') + Dir2 = paste0(drive[2],'/') + + setwd(lbcFolder) + + writeScenarios(DOE) + + files <- getXMLs(Dir) + + runEx(files, Dir) + + removeFiles(files) + + # Finds all the csv files and then puts the names in a list + csvL <- list.files(path = Dir, pattern = '*.csv') + + # Creates an empty data frame for information collection, + # this can be modified based on what desired information the user is looking for + results <- data.frame(matrix(ncol = 5)) + colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%') + + newTest <- getResponse(csvs = csvL, DOE = DOE) + test <- rbind(test,newTest) + + curDOE <- test[,1:nrow(mainFactors)] + curDOE <- as.data.frame(t(curDOE)) + write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',') + + par(mfrow = c(2,2)) + test.mod <- getModel(test) + + percents <- append(percents, test.mod[1]) + diff <- abs(percents[[k+1]] - percents[[k]]) + if(diff > stopVal){ + c = 0 + }else{ + c = c + 1 + } + + + k = k + 1 + + } else { + + print(paste('Now executing iteration', k)) + DOE <- aDOE(test, test.mod[2], mainFactors) + drive <- newFolder(StorageLoc, k) + + Dir = paste0(drive[1],'/') + Dir2 = paste0(drive[2],'/') + + setwd(lbcFolder) + + writeScenarios(DOE) + + files <- getXMLs(Dir) + + runEx(files, Dir) + + removeFiles(files) + + csvL <- list.files(path = Dir, pattern = '*.csv') + + results <- data.frame(matrix(ncol = 3, nrow = length(csvL))) + colnames(results) <- c('All','Total', 'All%') + + newTest <- getResponse(csvs = csvL, DOE = DOE) + test <- rbind(test,newTest) + + curDOE <- test[,1:nrow(mainFactors)] + curDOE <- as.data.frame(t(curDOE)) + + write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',') + + + par(mfrow = c(2,2)) + test.mod <- getModel(test) + + percents <- append(percents, test.mod[1]) + diff <- abs(percents[[k+1]] - percents[[k]]) + if(diff > stopVal){ + c = 0 + }else{ + c = c + 1 + } + + k = k + 1 + } +} + +final.mod <- finalModel(test) + +sigFactors(final.mod) + +# End program +print('Program complete') +pend = Sys.time() +print(pend - pstart) + + + diff --git a/adaptDOEGen2.R b/adaptDOEGen2.R new file mode 100644 index 0000000..bf9570a --- /dev/null +++ b/adaptDOEGen2.R @@ -0,0 +1,34 @@ +aDOE <- function(DOE, center, factorDOE){ + newVal <- data.frame(DOE[center,]) # Pulls in the design point with the highest residual + newVal <- newVal[,1:nrow(factorDOE)] # Knocks off the response variable + newVal <- data.frame(t(newVal)) # Transposes from a single row to a single column + colnames(newVal) <- 'value' # Renames the column + newVal + + p2 = 0.10 # will comment out in final version, + newVal['min'] <- (newVal$value - newVal$value*p2) + newVal['max'] <- (newVal$value*(1+p2)) + + blank <- nolhDesign(nrow(newVal)) + + ammo2 <- data.frame(matrix(ncol = blank$n, nrow = nrow(newVal))) + for (i in 1:nrow(newVal)){ + ammo2[i,] <- newVal$max[i] - newVal$min[i] + } + + blank2 <- as.data.frame(t(blank$design)) + add2min <- as.data.frame(ammo2 * blank2) + ammoMin2 <- data.frame(matrix(ncol = blank$n, nrow = nrow(newVal))) + for (i in 1:nrow(newVal)){ + ammoMin2[i,] <- newVal$min[i] + } + ammoDOE2 <- as.data.frame(as.matrix(ammoMin2) + as.matrix(add2min)) + designPoints3 <- NULL + for (i in 1:blank$n){ + designPoints3[i] <- paste("DP",i+(257*k), sep = " ") + } + colnames(ammoDOE2) <- designPoints3 + row.names(ammoDOE2) <- factorDOE$UnitId + + return(ammoDOE2) +} \ No newline at end of file diff --git a/modeling4 - Return Adj R Squared.R b/modeling4 - Return Adj R Squared.R new file mode 100644 index 0000000..5c3a9cd --- /dev/null +++ b/modeling4 - Return Adj R Squared.R @@ -0,0 +1,34 @@ +########################################################################################## +################ Gather the information for the response varialbe #################### +########################################################################################## + +getModel <- function(data){ + print('Conducting regression analysis of the data...') + # This will find the resdiuals that fall within a certain range and return the number of + # residuals that are outside of the range and DP with the highest residual + lm.mod <- lm(y~(.)^2, data = data) + print('Model Complete..') + setwd(Dir) + print('Saving model to folder...') + save(lm.mod, file = 'lnmod.RData') + # https://stackoverflow.com/questions/14761496/saving-and-loading-a-model-in-r + print('Saving data frame to folder..') + write.table(data, file = 'model data.csv', sep = ',') + # https://datascienceplus.com/exporting-data-from-r/ + + avg <- mean(lm.mod$residuals) + sigma <- sd(lm.mod$residuals) + + ub <- avg + 2*sigma + lb <- avg - 2*sigma + + resid <- sum(lm.mod$residuals>ub) + sum(lm.mod$residuals<lb) + percent <- resid/nrow(data) + print(paste0('The number of residuals outside of the accepted range is ',resid,'.')) + print(paste0('The current length of the DOE is ', nrow(data),'.')) + print(paste0('The percentage of residuals outside of the accepted range is ', round(percent,5), '.')) + newCen <- which.max(abs(lm.mod$residuals)) + print(paste0('Building new DOE using design point ', names(lm.mod$residuals[newCen]),'.')) + print('Modeling complete...') + return(c(percent,newCen, plot(lm.mod), summary(lm.mod)$adj.r.squared)) +} \ No newline at end of file -- GitLab