-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
26 changed files
with
2,619 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
[Problem] | ||
n_facility = 10 | ||
n_client = 10 | ||
n_scenario = 50 | ||
extensive_optimality_gap = 0.02 | ||
# Time limit in seconds to solve the extensive form | ||
extensive_time_limit = 600 | ||
surrogate_optimality_gap = 0.001 | ||
# Time limit in seconds to solve the surrogate form | ||
surrogate_time_limit = 300 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Configurations for data_manager package | ||
|
||
# All the sections and corresponding keys in this file are mandatory, | ||
# i.e., you cannot alter their name but change the value. | ||
|
||
[Run] | ||
# `problem` value should match with the folder containing data | ||
# management scripts for a given problem type. | ||
# For example, we have a data-management scripts related to | ||
# Stochastic Capacitated Facility Location (S-CFLP) inside the `cflp`. | ||
# Hence, for data management of S-CFLP we assign `problem` key the | ||
# value `cflp`. | ||
problem = cflp | ||
# Number of processes to run in parallel | ||
n_worker = 4 | ||
from_pid = 0 | ||
to_pid = 100 | ||
|
||
# Values in Directory and File section are optional. If nothing is passed, | ||
# we will automatically set the default values. | ||
[Directory] | ||
data = data | ||
result_extensive = result_ext | ||
result_xi = result_xi | ||
[File] | ||
instance = instances.pkl | ||
result_extensive = result_ext.pkl | ||
result_xi = result_xi.pkl | ||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
"""Dataset management | ||
Data manager comprises of following modules: | ||
1. generate_instance.py | ||
2. generate_optimal_sol.py | ||
3. generate_xi_star.py (with different heuristics) | ||
4. improve_xi_star.py | ||
5. generate_dataset (responsible for creating dataset for ML models) | ||
One should run modules 1 to 5 in order to create the dataset for ML model. | ||
""" | ||
from argparse import ArgumentParser | ||
from configparser import ConfigParser | ||
from importlib import import_module | ||
from pathlib import Path | ||
|
||
|
||
def main(): | ||
# Load and set configuration | ||
meta_config, problem_config = ConfigParser(), ConfigParser() | ||
ROOT = Path(__file__).parent.parent | ||
|
||
# Meta config | ||
meta_config.read(ROOT / "config" / "meta.ini") | ||
data_dir = meta_config.get('Directory', 'data') | ||
problem = meta_config.get('Run', 'problem') | ||
instance_file = meta_config.get('File', 'instance') | ||
result_ext_file = meta_config.get('File', 'result_extensive') | ||
result_xi_file = meta_config.get('File', 'result_xi') | ||
|
||
# Problem config | ||
problem_config.read(ROOT / "config" / ".".join([problem, "ini"])) | ||
problem_path = ".".join(["nectar.data_manager", problem]) | ||
get_problem_identifier = getattr(import_module( | ||
"nectar.utils.combinatorics."+problem | ||
), "get_problem_identifier") | ||
identifier = get_problem_identifier(problem_config) | ||
|
||
# Set path | ||
data_dir_path = ROOT / data_dir / "_".join([problem, identifier]) | ||
path = { | ||
"data": data_dir_path, | ||
"result_xi": data_dir_path / result_xi_file, | ||
"result_ext": data_dir_path / result_ext_file, | ||
"instance": data_dir_path / instance_file | ||
} | ||
|
||
# Specify the module to run | ||
parser = ArgumentParser() | ||
parser.add_argument('--run', type=str, | ||
help='specify the data_manager module to execute. ' | ||
'inst: to generate instances ' | ||
'opt: to generate optimal solution ' | ||
'repr: to find a representative scenario ' | ||
'imp: to improve a representative scenario ' | ||
'dataset : to create dataset for ML' | ||
'all: to run all module one after the other ', | ||
default='inst') | ||
args = parser.parse_args() | ||
if args.run == "inst" or args.run == "all": | ||
generate_instance = getattr(import_module(".".join([problem_path, "generate_instance"])), | ||
"generate_instance") | ||
generate_instance(meta_config, problem_config, path) | ||
if args.run == "opt" or args.run == "all": | ||
generate_optimal_sol = getattr(import_module(".".join([problem_path, "generate_optimal_sol"])), | ||
"generate_optimal_sol") | ||
generate_optimal_sol(meta_config, problem_config, path) | ||
if args.run == "repr" or args.run == "all": | ||
generate_xi_hat = getattr(import_module(".".join([problem_path, "generate_xi_hat"])), | ||
"generate_xi_hat") | ||
runs = ConfigParser() | ||
runs.read(Path(__file__).parents[0] / meta_config['Run']['problem'] / "runs.ini") | ||
for idx in runs.sections(): | ||
generate_xi_hat(meta_config, runs[idx], path) | ||
if args.run == "imp" or args.run == "all": | ||
improve_xi_hat = getattr(import_module(".".join([problem_path, "improve_xi_hat"])), | ||
"improve_xi_hat") | ||
improve_xi_hat(meta_config, path) | ||
if args.run == "dataset" or args.run == "all": | ||
generate_dataset = getattr(import_module(".".join([problem_path, "generate_dataset"])), | ||
"generate_dataset") | ||
generate_dataset(path) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
import random | ||
from collections import defaultdict | ||
import time | ||
import numpy as np | ||
|
||
from ...utils import load_pickle | ||
|
||
np.random.seed(7) | ||
random.seed(11) | ||
|
||
MIN_C_F, MAX_C_F = 15, 19 | ||
MIN_C_V, MAX_C_V = 5, 9 | ||
|
||
MEAN_C_F = (MAX_C_F - MIN_C_F) / 2 | ||
MEAN_C_V = (MAX_C_V - MIN_C_V) / 2 | ||
|
||
|
||
def fetch_scenario(idxs, data): | ||
scenario = [] | ||
for idx in idxs: | ||
scenario.append(data[idx]['scenario']) | ||
scenario = np.asarray(scenario) | ||
|
||
return scenario | ||
|
||
|
||
def normalize_scenario(scenario, MIN_SCE, MAX_SCE): | ||
scenario_diff = np.subtract(scenario, MIN_SCE) | ||
scenario_scaled = np.divide(scenario_diff, MAX_SCE - MIN_SCE) | ||
scenario_scaled = (scenario_scaled * 2) - 1 | ||
|
||
return scenario_scaled | ||
|
||
|
||
def extract_scenario_features(scenario): | ||
features = [] | ||
start_time = time.time() | ||
features.extend(np.max(scenario, axis=0)) | ||
features.extend(np.min(scenario, axis=0)) | ||
features.extend(np.median(scenario, axis=0)) | ||
features.extend(np.quantile(scenario, 0.75, axis=0)) | ||
features.extend(np.quantile(scenario, 0.25, axis=0)) | ||
features.extend(np.mean(scenario, axis=0)) | ||
features.extend(np.std(scenario, axis=0)) | ||
|
||
for k in [0.9, 1, 1.1, 1.2, 1.5]: | ||
greater_than = [] | ||
less_than = [] | ||
for i in range(scenario.shape[1]): | ||
i_greater_than = [True] * scenario.shape[0] | ||
i_less_than = [True] * scenario.shape[0] | ||
for j in range(scenario.shape[1]): | ||
if i == j: | ||
continue | ||
|
||
i_greater_than = np.logical_and(i_greater_than, (1 + k) * scenario[:, i] >= scenario[:, j]) | ||
i_less_than = np.logical_and(i_less_than, scenario[:, i] <= (1 + k) * scenario[:, j]) | ||
|
||
greater_than.append(sum(i_greater_than) / scenario.shape[0]) | ||
less_than.append(sum(i_less_than) / scenario.shape[0]) | ||
|
||
features.extend(greater_than) | ||
features.extend(less_than) | ||
|
||
total_time = time.time() - start_time | ||
|
||
return np.asarray(features), total_time | ||
|
||
|
||
def create_model_input(idxs, instance, cost_normalized, scenarios_normalized): | ||
assert len(idxs) == scenarios_normalized.shape[0] | ||
total_time = 0 | ||
x = [] | ||
for rank, idx in enumerate(idxs): | ||
x_object = {k: v for k, v in instance[idx].items()} | ||
x_object["pid"] = idx | ||
x_object["c_f_normalized"] = cost_normalized[idx]['c_f'] | ||
x_object["c_v_normalized"] = cost_normalized[idx]['c_v'] | ||
x_object["scenario_normalized"] = scenarios_normalized[rank] | ||
x_object["scenario_features"], item_time = extract_scenario_features(scenarios_normalized[rank]) | ||
total_time += item_time | ||
x.append(x_object) | ||
|
||
return {"input": np.asarray(x), "total_time": total_time} | ||
|
||
|
||
def generate_dataset(path, train_test_split=0.7): | ||
instance = load_pickle(path["instance"]) | ||
result_xi = load_pickle(path["result_xi"]) | ||
total_time = 0 | ||
|
||
# Find problem for which we have representative scenario | ||
solved = [] | ||
for k, v in result_xi.items(): | ||
v["solved_xi"] and solved.append(k) | ||
|
||
# Normalize cost | ||
cost_normalized = defaultdict(dict) | ||
start_time = time.time() | ||
for idx in solved: | ||
cost_normalized[idx]['c_f'] = (((instance[idx]['c_f'] - MIN_C_F) / (MAX_C_F - MIN_C_F)) * 2) - 1 | ||
cost_normalized[idx]['c_v'] = (((instance[idx]['c_v'] - MIN_C_V) / (MAX_C_V - MIN_C_V)) * 2) - 1 | ||
total_time += (time.time() - start_time) | ||
|
||
# Shuffle and split into train and test | ||
random.shuffle(solved) | ||
n_train = int(train_test_split * len(solved)) | ||
train_idxs, test_idxs = solved[:n_train], solved[n_train:] | ||
|
||
# Normalize scenarios | ||
train_scenarios = fetch_scenario(train_idxs, instance) | ||
test_scenarios = fetch_scenario(test_idxs, instance) | ||
start_time = time.time() | ||
MAX_SCE = np.max(train_scenarios, axis=0) | ||
MIN_SCE = np.min(train_scenarios, axis=0) | ||
train_scenarios_normalized = normalize_scenario(train_scenarios, MIN_SCE, MAX_SCE) | ||
test_scenarios_normalized = normalize_scenario(test_scenarios, MIN_SCE, MAX_SCE) | ||
total_time += (time.time() - start_time) | ||
|
||
# Prepare training samples | ||
result = create_model_input(train_idxs, instance, cost_normalized, train_scenarios_normalized) | ||
x_train, total_time_train = result["input"], result["total_time"] | ||
|
||
result = create_model_input(test_idxs, instance, cost_normalized, test_scenarios_normalized) | ||
x_test, total_time_test = result["input"], result["total_time"] | ||
|
||
total_time += (total_time_train + total_time_test) | ||
|
||
y_train = np.asarray([{"pid": pid, "xi_hat": result_xi[pid]["xi_hat"]} | ||
for pid in train_idxs]) | ||
y_test = np.asarray([{"pid": pid, "xi_hat": result_xi[pid]["xi_hat"]} | ||
for pid in test_idxs]) | ||
|
||
np.save(path["data"] / "x_train_raw.npy", x_train) | ||
np.save(path["data"] / "y_train_raw.npy", y_train) | ||
np.save(path["data"] / "x_test_raw.npy", x_test) | ||
np.save(path["data"] / "y_test_raw.npy", y_test) | ||
np.save(path["data"] / "preprocessing_time.npy", [total_time / len(solved)]) |
Oops, something went wrong.