Skip to content
Snippets Groups Projects
Commit bfedfeb0 authored by Streams, James (CPT)'s avatar Streams, James (CPT)
Browse files

updates

parent d8696ea5
No related branches found
No related tags found
No related merge requests found
##########################################################################################
################## ADOE Code ##########################
##########################################################################################
# Purpose: Aid analyst in analysis by allowing the program to determine the best range
# the factor inputs should be to obtain an objective.
##########################################################################################
# Packages used:
library(XML) # Tools for parsing and generating XML within R
library(stringr) # Common string operations
library(dplyr) # Data manipulation
library(DiceDesign) # Generates a DOEs
library(tidyr) # Easily tidy data with the spread() or gather() function
library(data.table) # Extension of data frame
library(bestglm) # Best subset GLM and regression utilities
library(gtools) # Order string experssions; mixedsort()
library(parallel) # Allows for parallel computing
# library(DT) # Function to print datatable
print('Libraries loaded...')
# Set initial working directory
setwd("/home/jestream/LBCv5/ADOE/")
### Load DoE Generation Functions
# Functions here include loading base case XML, creating experimental design, and generating XML scenarios for exection
source('DOEGen.R') # Loads DoE Generator Function (newDOE) based on 'DiceDesign' NOLH function.
source('scenCreator.R') # Loads function to take design and base XML (makeXMLs) to create required number of XMLs to run
source('gatherxmls.R') # Creates a list of XMLs to process through the simulation
source('runSim.R') # Loads function that runs DoE in parallel.
source('remove.R') # Remove scenarios files to clear room on hard drive
source('response4.R') # Gets the response variable from the data
source('modeling3.R') # generates the model
source('adaptDOEGen2.R') # DOE generator for the adaptive portion of the program
source('newadptFolder3.R') # Creates new folder for storage of each iterations data
source('finalModel.R') # Displays the results and the final plot of the found meta-model
source('sigFactors.R') # Displays the found significant factors from the meta-model
print('Functions loaded...')
# Location of LBC or Simulation to use
lbcFolder <- "/home/jestream/LBCv5/"
# xml files location
setwd("/home/jestream/LBCv5/data/excel/")
# The xml file to parse for analysis
xmlfile = "SCEN7-Phase-3-5NOV2018-90.v2.xml"
# Where to store the data
StorageLoc <- "/home/jestream/LBCv5/data/"
# Name of the files for storage
namefile <- 'bigbox'
# identify the directory where the files are to be located
Dir = paste0(StorageLoc,namefile,"/")
Dir2 = paste0(StorageLoc,namefile,"/")
# the number of replications per design point
numReps = 30
# Sets the initial range of value for the first DOE
p = 0.25
# Sets the range of min amd max values for follow on DOEs
p2 = 0.15
# Starting value for iterations, should remain at 1 unless starting from a different iteration
k = 1
# Stopping condition for adaptive process
z = 0.1 # percent of residuals outside of acceptable range
stopVal <- 0.004 # minimum change needed to continue iteration
iterThres = 2 # number of iterations in a row the model is allowed to iterate wwithout improvement
maxIter = 12
# Values for displaying significant factors
alpha = 0.05 # The minimum p-value a factor must have to be considered significant
n = 10 # The max number of significant factors to return
##########################################################################################
################## Search Parameters in XML files ##########################
##########################################################################################
pstart = Sys.time() # starts the clock to time how long the program takes to complete
print('Parsing XML file...')
# Breaks the xmlFile into sections and saves a temp version for data extraction
xmldata <- xmlTreeParse(xmlfile, useInternal = TRUE)
# Locates the a number of replications value
numReplications = getNodeSet(xmldata, "//LBCAssembly[@numReplications]")
# find all ThresholdReorderLogic Nodes
ReorderNodes=getNodeSet(xmldata, "//ThresholdReorderLogic")
# Locates all the reorder point data (this section can be adjusted to look for other
# areas of interest such as consumption rate or truck capacities)
ReorderValueNode = getNodeSet(xmldata, "//ThresholdReorderLogic//*[@value]")
numNodes = length(ReorderValueNode)
# Create lists to collect information from xmlFile
numNodesName=character()
nodeNum = integer()
reps = character()
name = character()
Company = character()
UnitId = character()
Consumable =character()
value = character()
# this loop will return all required information from the parent nodes in reference
# to the attribute value name
for (i in 1:numNodes){
parent = xmlParent(ReorderValueNode[[i]])
child = xmlChildren(ReorderNodes[[1]])[i]
grandParent = xmlParent(xmlParent(ReorderValueNode[[i]]))
nodeNum[i] = i
name[i] = xmlAttrs(parent) #gets the methodName of the parent node in reference to the attribute value name
reps[i] = xmlAttrs(numReplications[[1]])['numReplications']
# use the name to find the Company Name
providerNode = getNodeSet(grandParent, ".//*[@refId]")
# extract the consumable
Consumable[i]= xmlAttrs(providerNode[[1]])['refId']
# extract the Company Name
UnitId[i]= xmlAttrs(providerNode[[2]])['refId']
# extract the value
valueNode = getNodeSet(parent, ".//*[@value]")
value[i]= xmlAttrs(valueNode[[1]])['value']
}
# Data frame of collected nodes from entire xmlFile
ReorderDf = data.frame(nodeNum, reps, name, UnitId, Consumable, value)
print('Parsing complete...')
##########################################################################################
################## Locating Factors of Interest ##########################
##########################################################################################
# Choose the units or factors of interest through the use of dplyr filters
# This can be changed to look for any unit or factor based on the information available
# from the xml data collection
Div4 <- ReorderDf %>% select(nodeNum, reps, name, UnitId, Consumable, value) %>%
filter(str_detect(UnitId, "DIV-4")) %>% filter(!str_detect(UnitId, paste(c("BSB", "FSC", "DISTRO", "ALLIED", "SUST", "EN-BDE",
"HHC", "HHB", "HQ", "BEB","MEB","MI-BN"), collapse = "|"))) %>% filter(!str_detect(Consumable, paste(c("CARGO", "WATER"),collapse = "|")))
# Enusre that MASS is not running at the same time as dplyr or else it will mask the select function in dplyr
mainFactors <- Div4 %>% filter(str_detect(Consumable, "AMMO")) # replace with your choosen data set
mainFactors$value <- as.numeric(levels(mainFactors$value))[mainFactors$value] # values to vary to create a range
# Code found at
# https://stackoverflow.com/questions/3418128/how-to-convert-a-factor-to-integer-numeric-without-loss-of-information
# Now set the range of min and max values
# add those values to the data frame
mainFactors['min'] <- (mainFactors$value - mainFactors$value*p)
mainFactors['max'] <- (mainFactors$value*(1+p))
# From DOEGen.R
ammoDOE <- newDOE(mainFactors)
# Creates folder for initial storage of data, it is based on the Dir set at the beginning
dir.create(Dir)
writeScenarios(ammoDOE)
files <- getXMLs(Dir)
##########################################################################################
# Runs each scenario through LBC, this takes awhile since it has to shut down and restart LBC each time
setwd(lbcFolder)
runEx(files, Dir)
removeFiles(files)
##########################################################################################
################ Gather the information for the response varialbe ####################
##########################################################################################
# Finds all the csv files and then puts the names in a list
csvL <- list.files(path = Dir, pattern = '*.csv')
csvL <- mixedsort(csvL)
# Creates an empty data frame for information collection,
# this can be modified based on what desired information the user is looking for
results <- data.frame(matrix(ncol = 5))
colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%')
test <- getResponse(csvs = csvL, DOE = ammoDOE)
write.table(test, file = paste0(Dir,'test.csv'), sep = ',')
##########################################################################################
# need to include code that saves each linear model and test data to the folder holding the data
par(mfrow = c(2,2))
test.mod <- getModel(test)
# Save the current DOE
curDOE <- test[,1:nrow(mainFactors)]
curDOE <- as.data.frame(t(curDOE))
write.table(curDOE, file = paste0(Dir,'DOE for initial .csv'), sep = ',')
##########################################################################################
# Storing percent values for additional stopping criteria
percents <- list()
percents <- append(percents, test.mod[1])
c = 0
##########################################################################################
################ Adaptive Portion, as long as it meets conditions ####################
##########################################################################################
while (test.mod[1] > z && c < iterThres && k < maxIter){
if(k == 1){
print(paste('Now executing iteration',k))
DOE <- aDOE(test, test.mod[2], mainFactors)
drive <- newFolder(StorageLoc, k)
Dir = paste0(drive[1],'/')
Dir2 = paste0(drive[2],'/')
setwd(lbcFolder)
writeScenarios(DOE)
files <- getXMLs(Dir)
runEx(files, Dir)
removeFiles(files)
# Finds all the csv files and then puts the names in a list
csvL <- list.files(path = Dir, pattern = '*.csv')
# Creates an empty data frame for information collection,
# this can be modified based on what desired information the user is looking for
results <- data.frame(matrix(ncol = 5))
colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%')
newTest <- getResponse(csvs = csvL, DOE = DOE)
test <- rbind(test,newTest)
curDOE <- test[,1:nrow(mainFactors)]
curDOE <- as.data.frame(t(curDOE))
write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',')
par(mfrow = c(2,2))
test.mod <- getModel(test)
percents <- append(percents, test.mod[1])
diff <- abs(percents[[k+1]] - percents[[k]])
if(diff > stopVal){
c = 0
}else{
c = c + 1
}
k = k + 1
} else {
print(paste('Now executing iteration', k))
DOE <- aDOE(test, test.mod[2], mainFactors)
drive <- newFolder(StorageLoc, k)
Dir = paste0(drive[1],'/')
Dir2 = paste0(drive[2],'/')
setwd(lbcFolder)
writeScenarios(DOE)
files <- getXMLs(Dir)
runEx(files, Dir)
removeFiles(files)
csvL <- list.files(path = Dir, pattern = '*.csv')
results <- data.frame(matrix(ncol = 3, nrow = length(csvL)))
colnames(results) <- c('All','Total', 'All%')
newTest <- getResponse(csvs = csvL, DOE = DOE)
test <- rbind(test,newTest)
curDOE <- test[,1:nrow(mainFactors)]
curDOE <- as.data.frame(t(curDOE))
write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',')
par(mfrow = c(2,2))
test.mod <- getModel(test)
percents <- append(percents, test.mod[1])
diff <- abs(percents[[k+1]] - percents[[k]])
if(diff > stopVal){
c = 0
}else{
c = c + 1
}
k = k + 1
}
}
final.mod <- finalModel(test)
sigFactors(final.mod)
# End program
print('Program complete')
pend = Sys.time()
print(pend - pstart)
##########################################################################################
################## ADOE Code ##########################
##########################################################################################
# Purpose: Aid analyst in analysis by allowing the program to determine the best range
# the factor inputs should be to obtain an objective.
##########################################################################################
# Packages used:
library(XML) # Tools for parsing and generating XML within R
library(stringr) # Common string operations
library(dplyr) # Data manipulation
library(DiceDesign) # Generates a DOEs
library(tidyr) # Easily tidy data with the spread() or gather() function
library(data.table) # Extension of data frame
library(bestglm) # Best subset GLM and regression utilities
library(gtools) # Order string experssions; mixedsort()
library(parallel) # Allows for parallel computing
# library(DT) # Function to print datatable
print('Libraries loaded...')
# Set initial working directory
setwd("/home/jestream/LBCv5/ADOE/")
### Load DoE Generation Functions
# Functions here include loading base case XML, creating experimental design, and generating XML scenarios for exection
source('DOEGen.R') # Loads DoE Generator Function (newDOE) based on 'DiceDesign' NOLH function.
source('scenCreator.R') # Loads function to take design and base XML (makeXMLs) to create required number of XMLs to run
source('gatherxmls.R') # Creates a list of XMLs to process through the simulation
source('runSim.R') # Loads function that runs DoE in parallel.
source('remove.R') # Remove scenarios files to clear room on hard drive
source('response4.R') # Gets the response variable from the data
source('modeling3.R') # generates the model
source('adaptDOEGen2.R') # DOE generator for the adaptive portion of the program
source('newadptFolder3.R') # Creates new folder for storage of each iterations data
source('finalModel.R') # Displays the results and the final plot of the found meta-model
source('sigFactors.R') # Displays the found significant factors from the meta-model
print('Functions loaded...')
# Location of LBC or Simulation to use
lbcFolder <- "/home/jestream/LBCv5/"
# xml files location
setwd("/home/jestream/LBCv5/data/excel/")
# The xml file to parse for analysis
xmlfile = "SCEN7-Phase-3-5NOV2018-90.v2.xml"
# Where to store the data
StorageLoc <- "/home/jestream/LBCv5/data/"
# Name of the files for storage
namefile <- 'longRun'
# identify the directory where the files are to be located
Dir = paste0(StorageLoc,namefile,"/")
Dir2 = paste0(StorageLoc,namefile,"/")
# the number of replications per design point
numReps = 30
# Sets the initial range of value for the first DOE
p = 0.15
# Sets the range of min amd max values for follow on DOEs
p2 = 0.10
# Starting value for iterations, should remain at 1 unless starting from a different iteration
k = 1
# Stopping condition for adaptive process
z = 0.1 # percent of residuals outside of acceptable range
# stopVal <- 0.004 # minimum change needed to continue iteration
# iterThres = 2 # number of iterations in a row the model is allowed to iterate wwithout improvement
maxIter = 12
# Values for displaying significant factors
alpha = 0.05 # The minimum p-value a factor must have to be considered significant
n = 10 # The max number of significant factors to return
##########################################################################################
################## Search Parameters in XML files ##########################
##########################################################################################
pstart = Sys.time() # starts the clock to time how long the program takes to complete
print('Parsing XML file...')
# Breaks the xmlFile into sections and saves a temp version for data extraction
xmldata <- xmlTreeParse(xmlfile, useInternal = TRUE)
# Locates the a number of replications value
numReplications = getNodeSet(xmldata, "//LBCAssembly[@numReplications]")
# find all ThresholdReorderLogic Nodes
ReorderNodes=getNodeSet(xmldata, "//ThresholdReorderLogic")
# Locates all the reorder point data (this section can be adjusted to look for other
# areas of interest such as consumption rate or truck capacities)
ReorderValueNode = getNodeSet(xmldata, "//ThresholdReorderLogic//*[@value]")
numNodes = length(ReorderValueNode)
# Create lists to collect information from xmlFile
numNodesName=character()
nodeNum = integer()
reps = character()
name = character()
Company = character()
UnitId = character()
Consumable =character()
value = character()
# this loop will return all required information from the parent nodes in reference
# to the attribute value name
for (i in 1:numNodes){
parent = xmlParent(ReorderValueNode[[i]])
child = xmlChildren(ReorderNodes[[1]])[i]
grandParent = xmlParent(xmlParent(ReorderValueNode[[i]]))
nodeNum[i] = i
name[i] = xmlAttrs(parent) #gets the methodName of the parent node in reference to the attribute value name
reps[i] = xmlAttrs(numReplications[[1]])['numReplications']
# use the name to find the Company Name
providerNode = getNodeSet(grandParent, ".//*[@refId]")
# extract the consumable
Consumable[i]= xmlAttrs(providerNode[[1]])['refId']
# extract the Company Name
UnitId[i]= xmlAttrs(providerNode[[2]])['refId']
# extract the value
valueNode = getNodeSet(parent, ".//*[@value]")
value[i]= xmlAttrs(valueNode[[1]])['value']
}
# Data frame of collected nodes from entire xmlFile
ReorderDf = data.frame(nodeNum, reps, name, UnitId, Consumable, value)
print('Parsing complete...')
##########################################################################################
################## Locating Factors of Interest ##########################
##########################################################################################
# Choose the units or factors of interest through the use of dplyr filters
# This can be changed to look for any unit or factor based on the information available
# from the xml data collection
Div4 <- ReorderDf %>% select(nodeNum, reps, name, UnitId, Consumable, value) %>%
filter(str_detect(UnitId, "DIV-4")) %>% filter(!str_detect(UnitId, paste(c("BSB", "FSC", "DISTRO", "ALLIED", "SUST", "EN-BDE",
"HHC", "HHB", "HQ", "BEB","MEB","MI-BN"), collapse = "|"))) %>% filter(!str_detect(Consumable, paste(c("CARGO", "WATER"),collapse = "|")))
# Enusre that MASS is not running at the same time as dplyr or else it will mask the select function in dplyr
mainFactors <- Div4 %>% filter(str_detect(Consumable, "AMMO")) # replace with your choosen data set
mainFactors$value <- as.numeric(levels(mainFactors$value))[mainFactors$value] # values to vary to create a range
# Code found at
# https://stackoverflow.com/questions/3418128/how-to-convert-a-factor-to-integer-numeric-without-loss-of-information
# Now set the range of min and max values
# add those values to the data frame
mainFactors['min'] <- (mainFactors$value - mainFactors$value*p)
mainFactors['max'] <- (mainFactors$value*(1+p))
# From DOEGen.R
ammoDOE <- newDOE(mainFactors)
# Creates folder for initial storage of data, it is based on the Dir set at the beginning
dir.create(Dir)
writeScenarios(ammoDOE)
files <- getXMLs(Dir)
##########################################################################################
# Runs each scenario through LBC, this takes awhile since it has to shut down and restart LBC each time
setwd(lbcFolder)
runEx(files, Dir)
removeFiles(files)
##########################################################################################
################ Gather the information for the response varialbe ####################
##########################################################################################
# Finds all the csv files and then puts the names in a list
csvL <- list.files(path = Dir, pattern = '*.csv')
csvL <- mixedsort(csvL)
# Creates an empty data frame for information collection,
# this can be modified based on what desired information the user is looking for
results <- data.frame(matrix(ncol = 5))
colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%')
test <- getResponse(csvs = csvL, DOE = ammoDOE)
write.table(test, file = paste0(Dir,'test.csv'), sep = ',')
##########################################################################################
# need to include code that saves each linear model and test data to the folder holding the data
par(mfrow = c(2,2))
test.mod <- getModel(test)
# Save the current DOE
curDOE <- test[,1:nrow(mainFactors)]
curDOE <- as.data.frame(t(curDOE))
write.table(curDOE, file = paste0(Dir,'DOE for initial .csv'), sep = ',')
##########################################################################################
# Storing percent values for additional stopping criteria
percents <- list()
percents <- append(percents, test.mod[1])
c = 0
##########################################################################################
################ Adaptive Portion, as long as it meets conditions ####################
##########################################################################################
while (test.mod[1] > z && k < maxIter){
if(k == 1){
print(paste('Now executing iteration',k))
DOE <- aDOE(test, test.mod[2], mainFactors)
drive <- newFolder(StorageLoc, k)
Dir = paste0(drive[1],'/')
Dir2 = paste0(drive[2],'/')
setwd(lbcFolder)
writeScenarios(DOE)
files <- getXMLs(Dir)
runEx(files, Dir)
removeFiles(files)
# Finds all the csv files and then puts the names in a list
csvL <- list.files(path = Dir, pattern = '*.csv')
# Creates an empty data frame for information collection,
# this can be modified based on what desired information the user is looking for
results <- data.frame(matrix(ncol = 5))
colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%')
newTest <- getResponse(csvs = csvL, DOE = DOE)
test <- rbind(test,newTest)
curDOE <- test[,1:nrow(mainFactors)]
curDOE <- as.data.frame(t(curDOE))
write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',')
par(mfrow = c(2,2))
test.mod <- getModel(test)
percents <- append(percents, test.mod[1])
diff <- abs(percents[[k+1]] - percents[[k]])
if(diff > stopVal){
c = 0
}else{
c = c + 1
}
k = k + 1
} else {
print(paste('Now executing iteration', k))
DOE <- aDOE(test, test.mod[2], mainFactors)
drive <- newFolder(StorageLoc, k)
Dir = paste0(drive[1],'/')
Dir2 = paste0(drive[2],'/')
setwd(lbcFolder)
writeScenarios(DOE)
files <- getXMLs(Dir)
runEx(files, Dir)
removeFiles(files)
csvL <- list.files(path = Dir, pattern = '*.csv')
results <- data.frame(matrix(ncol = 3, nrow = length(csvL)))
colnames(results) <- c('All','Total', 'All%')
newTest <- getResponse(csvs = csvL, DOE = DOE)
test <- rbind(test,newTest)
curDOE <- test[,1:nrow(mainFactors)]
curDOE <- as.data.frame(t(curDOE))
write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',')
par(mfrow = c(2,2))
test.mod <- getModel(test)
percents <- append(percents, test.mod[1])
diff <- abs(percents[[k+1]] - percents[[k]])
if(diff > stopVal){
c = 0
}else{
c = c + 1
}
k = k + 1
}
}
final.mod <- finalModel(test)
sigFactors(final.mod)
# End program
print('Program complete')
pend = Sys.time()
print(pend - pstart)
##########################################################################################
################## ADOE Code ##########################
##########################################################################################
# Purpose: Aid analyst in analysis by allowing the program to determine the best range
# the factor inputs should be to obtain an objective.
##########################################################################################
# Packages used:
library(XML) # Tools for parsing and generating XML within R
library(stringr) # Common string operations
library(dplyr) # Data manipulation
library(DiceDesign) # Generates a DOEs
library(tidyr) # Easily tidy data with the spread() or gather() function
library(data.table) # Extension of data frame
library(bestglm) # Best subset GLM and regression utilities
library(gtools) # Order string experssions; mixedsort()
library(parallel) # Allows for parallel computing
# library(DT) # Function to print datatable
print('Libraries loaded...')
# Set initial working directory
setwd("/home/jestream/LBCv5/ADOE/")
### Load DoE Generation Functions
# Functions here include loading base case XML, creating experimental design, and generating XML scenarios for exection
source('DOEGen.R') # Loads DoE Generator Function (newDOE) based on 'DiceDesign' NOLH function.
source('scenCreator.R') # Loads function to take design and base XML (makeXMLs) to create required number of XMLs to run
source('gatherxmls.R') # Creates a list of XMLs to process through the simulation
source('runSim.R') # Loads function that runs DoE in parallel.
source('remove.R') # Remove scenarios files to clear room on hard drive
source('response4.R') # Gets the response variable from the data
source('modeling4 - Return Adj R Squared.R') # generates the model
source('adaptDOEGen2.R') # DOE generator for the adaptive portion of the program
source('newadptFolder3.R') # Creates new folder for storage of each iterations data
source('finalModel.R') # Displays the results and the final plot of the found meta-model
source('sigFactors.R') # Displays the found significant factors from the meta-model
print('Functions loaded...')
# Location of LBC or Simulation to use
lbcFolder <- "/home/jestream/LBCv5/"
# xml files location
setwd("/home/jestream/LBCv5/data/excel/")
# The xml file to parse for analysis
xmlfile = "SCEN7-Phase-3-5NOV2018-90.v2.xml"
# Where to store the data
StorageLoc <- "/home/jestream/LBCv5/data/"
# Name of the files for storage
namefile <- 'testRsquared'
# identify the directory where the files are to be located
Dir = paste0(StorageLoc,namefile,"/")
Dir2 = paste0(StorageLoc,namefile,"/")
# the number of replications per design point
numReps = 30
# Sets the initial range of value for the first DOE
p = 0.15
# Sets the range of min amd max values for follow on DOEs
p2 = 0.10
# Starting value for iterations, should remain at 1 unless starting from a different iteration
k = 1
# Stopping condition for adaptive process
# z = 0.05 # percent of residuals outside of acceptable range
stopVal <- 0.004 # minimum change needed to continue iteration
iterThres = 2 # number of iterations in a row the model is allowed to iterate wwithout improvement
maxIter = 12
# Values for displaying significant factors
alpha = 0.05 # The minimum p-value a factor must have to be considered significant
n = 10 # The max number of significant factors to return
##########################################################################################
################## Search Parameters in XML files ##########################
##########################################################################################
pstart = Sys.time() # starts the clock to time how long the program takes to complete
print('Parsing XML file...')
# Breaks the xmlFile into sections and saves a temp version for data extraction
xmldata <- xmlTreeParse(xmlfile, useInternal = TRUE)
# Locates the a number of replications value
numReplications = getNodeSet(xmldata, "//LBCAssembly[@numReplications]")
# find all ThresholdReorderLogic Nodes
ReorderNodes=getNodeSet(xmldata, "//ThresholdReorderLogic")
# Locates all the reorder point data (this section can be adjusted to look for other
# areas of interest such as consumption rate or truck capacities)
ReorderValueNode = getNodeSet(xmldata, "//ThresholdReorderLogic//*[@value]")
numNodes = length(ReorderValueNode)
# Create lists to collect information from xmlFile
numNodesName=character()
nodeNum = integer()
reps = character()
name = character()
Company = character()
UnitId = character()
Consumable =character()
value = character()
# this loop will return all required information from the parent nodes in reference
# to the attribute value name
for (i in 1:numNodes){
parent = xmlParent(ReorderValueNode[[i]])
child = xmlChildren(ReorderNodes[[1]])[i]
grandParent = xmlParent(xmlParent(ReorderValueNode[[i]]))
nodeNum[i] = i
name[i] = xmlAttrs(parent) #gets the methodName of the parent node in reference to the attribute value name
reps[i] = xmlAttrs(numReplications[[1]])['numReplications']
# use the name to find the Company Name
providerNode = getNodeSet(grandParent, ".//*[@refId]")
# extract the consumable
Consumable[i]= xmlAttrs(providerNode[[1]])['refId']
# extract the Company Name
UnitId[i]= xmlAttrs(providerNode[[2]])['refId']
# extract the value
valueNode = getNodeSet(parent, ".//*[@value]")
value[i]= xmlAttrs(valueNode[[1]])['value']
}
# Data frame of collected nodes from entire xmlFile
ReorderDf = data.frame(nodeNum, reps, name, UnitId, Consumable, value)
print('Parsing complete...')
##########################################################################################
################## Locating Factors of Interest ##########################
##########################################################################################
# Choose the units or factors of interest through the use of dplyr filters
# This can be changed to look for any unit or factor based on the information available
# from the xml data collection
Div4 <- ReorderDf %>% select(nodeNum, reps, name, UnitId, Consumable, value) %>%
filter(str_detect(UnitId, "DIV-4")) %>% filter(!str_detect(UnitId, paste(c("BSB", "FSC", "DISTRO", "ALLIED", "SUST", "EN-BDE",
"HHC", "HHB", "HQ", "BEB","MEB","MI-BN"), collapse = "|"))) %>% filter(!str_detect(Consumable, paste(c("CARGO", "WATER"),collapse = "|")))
# Enusre that MASS is not running at the same time as dplyr or else it will mask the select function in dplyr
mainFactors <- Div4 %>% filter(str_detect(Consumable, "AMMO")) # replace with your choosen data set
mainFactors$value <- as.numeric(levels(mainFactors$value))[mainFactors$value] # values to vary to create a range
# Code found at
# https://stackoverflow.com/questions/3418128/how-to-convert-a-factor-to-integer-numeric-without-loss-of-information
# Now set the range of min and max values
# add those values to the data frame
mainFactors['min'] <- (mainFactors$value - mainFactors$value*p)
mainFactors['max'] <- (mainFactors$value*(1+p))
# From DOEGen.R
ammoDOE <- newDOE(mainFactors)
# Creates folder for initial storage of data, it is based on the Dir set at the beginning
dir.create(Dir)
writeScenarios(ammoDOE)
files <- getXMLs(Dir)
##########################################################################################
# Runs each scenario through LBC, this takes awhile since it has to shut down and restart LBC each time
setwd(lbcFolder)
runEx(files, Dir)
removeFiles(files)
##########################################################################################
################ Gather the information for the response varialbe ####################
##########################################################################################
# Finds all the csv files and then puts the names in a list
csvL <- list.files(path = Dir, pattern = '*.csv')
csvL <- mixedsort(csvL)
# Creates an empty data frame for information collection,
# this can be modified based on what desired information the user is looking for
results <- data.frame(matrix(ncol = 5))
colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%')
test <- getResponse(csvs = csvL, DOE = ammoDOE)
# write.table(test, file = paste0(Dir,'test.csv'), sep = ',')
##########################################################################################
# need to include code that saves each linear model and test data to the folder holding the data
par(mfrow = c(2,2))
test.mod <- getModel(test)
# Save the current DOE
curDOE <- test[,1:nrow(mainFactors)]
curDOE <- as.data.frame(t(curDOE))
write.table(curDOE, file = paste0(Dir,'DOE for initial .csv'), sep = ',')
##########################################################################################
# Storing percent values for additional stopping criteria
percents <- list()
percents <- append(percents, test.mod[4])
c = 0
##########################################################################################
################ Adaptive Portion, as long as it meets conditions ####################
##########################################################################################
while (c < iterThres && k < maxIter){
if(k == 1){
print(paste('Now executing iteration',k))
DOE <- aDOE(test, test.mod[2], mainFactors)
drive <- newFolder(StorageLoc, k)
Dir = paste0(drive[1],'/')
Dir2 = paste0(drive[2],'/')
setwd(lbcFolder)
writeScenarios(DOE)
files <- getXMLs(Dir)
runEx(files, Dir)
removeFiles(files)
# Finds all the csv files and then puts the names in a list
csvL <- list.files(path = Dir, pattern = '*.csv')
# Creates an empty data frame for information collection,
# this can be modified based on what desired information the user is looking for
results <- data.frame(matrix(ncol = 5))
colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%')
newTest <- getResponse(csvs = csvL, DOE = DOE)
test <- rbind(test,newTest)
curDOE <- test[,1:nrow(mainFactors)]
curDOE <- as.data.frame(t(curDOE))
write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',')
par(mfrow = c(2,2))
test.mod <- getModel(test)
percents <- append(percents, test.mod[4])
diff <- abs(percents[[k+1]] - percents[[k]])
if(diff > stopVal){
c = 0
}else{
c = c + 1
}
k = k + 1
} else {
print(paste('Now executing iteration', k))
DOE <- aDOE(test, test.mod[2], mainFactors)
drive <- newFolder(StorageLoc, k)
Dir = paste0(drive[1],'/')
Dir2 = paste0(drive[2],'/')
setwd(lbcFolder)
writeScenarios(DOE)
files <- getXMLs(Dir)
runEx(files, Dir)
removeFiles(files)
csvL <- list.files(path = Dir, pattern = '*.csv')
results <- data.frame(matrix(ncol = 3, nrow = length(csvL)))
colnames(results) <- c('All','Total', 'All%')
newTest <- getResponse(csvs = csvL, DOE = DOE)
test <- rbind(test,newTest)
curDOE <- test[,1:nrow(mainFactors)]
curDOE <- as.data.frame(t(curDOE))
write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',')
par(mfrow = c(2,2))
test.mod <- getModel(test)
percents <- append(percents, test.mod[4])
diff <- abs(percents[[k+1]] - percents[[k]])
if(diff > stopVal){
c = 0
}else{
c = c + 1
}
k = k + 1
}
}
final.mod <- finalModel(test)
sigFactors(final.mod)
# End program
print('Program complete')
pend = Sys.time()
print(pend - pstart)
##########################################################################################
################## ADOE Code ##########################
##########################################################################################
# Purpose: Aid analyst in analysis by allowing the program to determine the best range
# the factor inputs should be to obtain an objective.
##########################################################################################
# Packages used:
library(XML) # Tools for parsing and generating XML within R
library(stringr) # Common string operations
library(dplyr) # Data manipulation
library(DiceDesign) # Generates a DOEs
library(tidyr) # Easily tidy data with the spread() or gather() function
library(data.table) # Extension of data frame
library(bestglm) # Best subset GLM and regression utilities
library(gtools) # Order string experssions; mixedsort()
library(parallel) # Allows for parallel computing
# library(DT) # Function to print datatable
print('Libraries loaded...')
# Set initial working directory
setwd("/home/jestream/LBCv5/ADOE/")
### Load DoE Generation Functions
# Functions here include loading base case XML, creating experimental design, and generating XML scenarios for exection
source('DOEGen.R') # Loads DoE Generator Function (newDOE) based on 'DiceDesign' NOLH function.
source('scenCreator.R') # Loads function to take design and base XML (makeXMLs) to create required number of XMLs to run
source('gatherxmls.R') # Creates a list of XMLs to process through the simulation
source('runSim.R') # Loads function that runs DoE in parallel.
source('remove.R') # Remove scenarios files to clear room on hard drive
source('response4.R') # Gets the response variable from the data
source('modeling3.R') # generates the model
source('adaptDOEGen2.R') # DOE generator for the adaptive portion of the program
source('newadptFolder3.R') # Creates new folder for storage of each iterations data
source('finalModel.R') # Displays the results and the final plot of the found meta-model
source('sigFactors.R') # Displays the found significant factors from the meta-model
print('Functions loaded...')
# Location of LBC or Simulation to use
lbcFolder <- "/home/jestream/LBCv5/"
# xml files location
setwd("/home/jestream/LBCv5/data/excel/")
# The xml file to parse for analysis
xmlfile = "SCEN7-Phase-3-5NOV2018-90.v2.xml"
# Where to store the data
StorageLoc <- "/home/jestream/LBCv5/data/"
# Name of the files for storage
namefile <- 'smallbox'
# identify the directory where the files are to be located
Dir = paste0(StorageLoc,namefile,"/")
Dir2 = paste0(StorageLoc,namefile,"/")
# the number of replications per design point
numReps = 30
# Sets the initial range of value for the first DOE
p = 0.1
# Sets the range of min amd max values for follow on DOEs
p2 = 0.05
# Starting value for iterations, should remain at 1 unless starting from a different iteration
k = 1
# Stopping condition for adaptive process
z = 0.1 # percent of residuals outside of acceptable range
stopVal <- 0.004 # minimum change needed to continue iteration
iterThres = 2 # number of iterations in a row the model is allowed to iterate wwithout improvement
maxIter = 12
# Values for displaying significant factors
alpha = 0.05 # The minimum p-value a factor must have to be considered significant
n = 10 # The max number of significant factors to return
##########################################################################################
################## Search Parameters in XML files ##########################
##########################################################################################
pstart = Sys.time() # starts the clock to time how long the program takes to complete
print('Parsing XML file...')
# Breaks the xmlFile into sections and saves a temp version for data extraction
xmldata <- xmlTreeParse(xmlfile, useInternal = TRUE)
# Locates the a number of replications value
numReplications = getNodeSet(xmldata, "//LBCAssembly[@numReplications]")
# find all ThresholdReorderLogic Nodes
ReorderNodes=getNodeSet(xmldata, "//ThresholdReorderLogic")
# Locates all the reorder point data (this section can be adjusted to look for other
# areas of interest such as consumption rate or truck capacities)
ReorderValueNode = getNodeSet(xmldata, "//ThresholdReorderLogic//*[@value]")
numNodes = length(ReorderValueNode)
# Create lists to collect information from xmlFile
numNodesName=character()
nodeNum = integer()
reps = character()
name = character()
Company = character()
UnitId = character()
Consumable =character()
value = character()
# this loop will return all required information from the parent nodes in reference
# to the attribute value name
for (i in 1:numNodes){
parent = xmlParent(ReorderValueNode[[i]])
child = xmlChildren(ReorderNodes[[1]])[i]
grandParent = xmlParent(xmlParent(ReorderValueNode[[i]]))
nodeNum[i] = i
name[i] = xmlAttrs(parent) #gets the methodName of the parent node in reference to the attribute value name
reps[i] = xmlAttrs(numReplications[[1]])['numReplications']
# use the name to find the Company Name
providerNode = getNodeSet(grandParent, ".//*[@refId]")
# extract the consumable
Consumable[i]= xmlAttrs(providerNode[[1]])['refId']
# extract the Company Name
UnitId[i]= xmlAttrs(providerNode[[2]])['refId']
# extract the value
valueNode = getNodeSet(parent, ".//*[@value]")
value[i]= xmlAttrs(valueNode[[1]])['value']
}
# Data frame of collected nodes from entire xmlFile
ReorderDf = data.frame(nodeNum, reps, name, UnitId, Consumable, value)
print('Parsing complete...')
##########################################################################################
################## Locating Factors of Interest ##########################
##########################################################################################
# Choose the units or factors of interest through the use of dplyr filters
# This can be changed to look for any unit or factor based on the information available
# from the xml data collection
Div4 <- ReorderDf %>% select(nodeNum, reps, name, UnitId, Consumable, value) %>%
filter(str_detect(UnitId, "DIV-4")) %>% filter(!str_detect(UnitId, paste(c("BSB", "FSC", "DISTRO", "ALLIED", "SUST", "EN-BDE",
"HHC", "HHB", "HQ", "BEB","MEB","MI-BN"), collapse = "|"))) %>% filter(!str_detect(Consumable, paste(c("CARGO", "WATER"),collapse = "|")))
# Enusre that MASS is not running at the same time as dplyr or else it will mask the select function in dplyr
mainFactors <- Div4 %>% filter(str_detect(Consumable, "AMMO")) # replace with your choosen data set
mainFactors$value <- as.numeric(levels(mainFactors$value))[mainFactors$value] # values to vary to create a range
# Code found at
# https://stackoverflow.com/questions/3418128/how-to-convert-a-factor-to-integer-numeric-without-loss-of-information
# Now set the range of min and max values
# add those values to the data frame
mainFactors['min'] <- (mainFactors$value - mainFactors$value*p)
mainFactors['max'] <- (mainFactors$value*(1+p))
# From DOEGen.R
ammoDOE <- newDOE(mainFactors)
# Creates folder for initial storage of data, it is based on the Dir set at the beginning
dir.create(Dir)
writeScenarios(ammoDOE)
files <- getXMLs(Dir)
##########################################################################################
# Runs each scenario through LBC, this takes awhile since it has to shut down and restart LBC each time
setwd(lbcFolder)
runEx(files, Dir)
removeFiles(files)
##########################################################################################
################ Gather the information for the response varialbe ####################
##########################################################################################
# Finds all the csv files and then puts the names in a list
csvL <- list.files(path = Dir, pattern = '*.csv')
csvL <- mixedsort(csvL)
# Creates an empty data frame for information collection,
# this can be modified based on what desired information the user is looking for
results <- data.frame(matrix(ncol = 5))
colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%')
test <- getResponse(csvs = csvL, DOE = ammoDOE)
write.table(test, file = paste0(Dir,'test.csv'), sep = ',')
##########################################################################################
# need to include code that saves each linear model and test data to the folder holding the data
par(mfrow = c(2,2))
test.mod <- getModel(test)
# Save the current DOE
curDOE <- test[,1:nrow(mainFactors)]
curDOE <- as.data.frame(t(curDOE))
write.table(curDOE, file = paste0(Dir,'DOE for initial .csv'), sep = ',')
##########################################################################################
# Storing percent values for additional stopping criteria
percents <- list()
percents <- append(percents, test.mod[1])
c = 0
##########################################################################################
################ Adaptive Portion, as long as it meets conditions ####################
##########################################################################################
while (test.mod[1] > z && c < iterThres && k < maxIter){
if(k == 1){
print(paste('Now executing iteration',k))
DOE <- aDOE(test, test.mod[2], mainFactors)
drive <- newFolder(StorageLoc, k)
Dir = paste0(drive[1],'/')
Dir2 = paste0(drive[2],'/')
setwd(lbcFolder)
writeScenarios(DOE)
files <- getXMLs(Dir)
runEx(files, Dir)
removeFiles(files)
# Finds all the csv files and then puts the names in a list
csvL <- list.files(path = Dir, pattern = '*.csv')
# Creates an empty data frame for information collection,
# this can be modified based on what desired information the user is looking for
results <- data.frame(matrix(ncol = 5))
colnames(results) <-c('DP', 'REP', 'Bad', 'Total', '%')
newTest <- getResponse(csvs = csvL, DOE = DOE)
test <- rbind(test,newTest)
curDOE <- test[,1:nrow(mainFactors)]
curDOE <- as.data.frame(t(curDOE))
write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',')
par(mfrow = c(2,2))
test.mod <- getModel(test)
percents <- append(percents, test.mod[1])
diff <- abs(percents[[k+1]] - percents[[k]])
if(diff > stopVal){
c = 0
}else{
c = c + 1
}
k = k + 1
} else {
print(paste('Now executing iteration', k))
DOE <- aDOE(test, test.mod[2], mainFactors)
drive <- newFolder(StorageLoc, k)
Dir = paste0(drive[1],'/')
Dir2 = paste0(drive[2],'/')
setwd(lbcFolder)
writeScenarios(DOE)
files <- getXMLs(Dir)
runEx(files, Dir)
removeFiles(files)
csvL <- list.files(path = Dir, pattern = '*.csv')
results <- data.frame(matrix(ncol = 3, nrow = length(csvL)))
colnames(results) <- c('All','Total', 'All%')
newTest <- getResponse(csvs = csvL, DOE = DOE)
test <- rbind(test,newTest)
curDOE <- test[,1:nrow(mainFactors)]
curDOE <- as.data.frame(t(curDOE))
write.table(curDOE, file = paste0(Dir,'DOE_for_turn ',k,'.csv'), sep = ',')
par(mfrow = c(2,2))
test.mod <- getModel(test)
percents <- append(percents, test.mod[1])
diff <- abs(percents[[k+1]] - percents[[k]])
if(diff > stopVal){
c = 0
}else{
c = c + 1
}
k = k + 1
}
}
final.mod <- finalModel(test)
sigFactors(final.mod)
# End program
print('Program complete')
pend = Sys.time()
print(pend - pstart)
aDOE <- function(DOE, center, factorDOE){
newVal <- data.frame(DOE[center,]) # Pulls in the design point with the highest residual
newVal <- newVal[,1:nrow(factorDOE)] # Knocks off the response variable
newVal <- data.frame(t(newVal)) # Transposes from a single row to a single column
colnames(newVal) <- 'value' # Renames the column
newVal
p2 = 0.10 # will comment out in final version,
newVal['min'] <- (newVal$value - newVal$value*p2)
newVal['max'] <- (newVal$value*(1+p2))
blank <- nolhDesign(nrow(newVal))
ammo2 <- data.frame(matrix(ncol = blank$n, nrow = nrow(newVal)))
for (i in 1:nrow(newVal)){
ammo2[i,] <- newVal$max[i] - newVal$min[i]
}
blank2 <- as.data.frame(t(blank$design))
add2min <- as.data.frame(ammo2 * blank2)
ammoMin2 <- data.frame(matrix(ncol = blank$n, nrow = nrow(newVal)))
for (i in 1:nrow(newVal)){
ammoMin2[i,] <- newVal$min[i]
}
ammoDOE2 <- as.data.frame(as.matrix(ammoMin2) + as.matrix(add2min))
designPoints3 <- NULL
for (i in 1:blank$n){
designPoints3[i] <- paste("DP",i+(257*k), sep = " ")
}
colnames(ammoDOE2) <- designPoints3
row.names(ammoDOE2) <- factorDOE$UnitId
return(ammoDOE2)
}
\ No newline at end of file
##########################################################################################
################ Gather the information for the response varialbe ####################
##########################################################################################
getModel <- function(data){
print('Conducting regression analysis of the data...')
# This will find the resdiuals that fall within a certain range and return the number of
# residuals that are outside of the range and DP with the highest residual
lm.mod <- lm(y~(.)^2, data = data)
print('Model Complete..')
setwd(Dir)
print('Saving model to folder...')
save(lm.mod, file = 'lnmod.RData')
# https://stackoverflow.com/questions/14761496/saving-and-loading-a-model-in-r
print('Saving data frame to folder..')
write.table(data, file = 'model data.csv', sep = ',')
# https://datascienceplus.com/exporting-data-from-r/
avg <- mean(lm.mod$residuals)
sigma <- sd(lm.mod$residuals)
ub <- avg + 2*sigma
lb <- avg - 2*sigma
resid <- sum(lm.mod$residuals>ub) + sum(lm.mod$residuals<lb)
percent <- resid/nrow(data)
print(paste0('The number of residuals outside of the accepted range is ',resid,'.'))
print(paste0('The current length of the DOE is ', nrow(data),'.'))
print(paste0('The percentage of residuals outside of the accepted range is ', round(percent,5), '.'))
newCen <- which.max(abs(lm.mod$residuals))
print(paste0('Building new DOE using design point ', names(lm.mod$residuals[newCen]),'.'))
print('Modeling complete...')
return(c(percent,newCen, plot(lm.mod), summary(lm.mod)$adj.r.squared))
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment