Skip to content

Commit

Permalink
test: add datagen driver
Browse files Browse the repository at this point in the history
  • Loading branch information
michiboo committed Sep 3, 2023
1 parent 3b33740 commit c9f4028
Show file tree
Hide file tree
Showing 21 changed files with 182 additions and 184 deletions.
10 changes: 8 additions & 2 deletions python/tests/e2e_v2/assert_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from numpy.testing import assert_allclose, assert_almost_equal
from vw_executor.vw import ExecutionStatus
import vowpalwabbit as vw
from test_helper import get_function_object
from test_helper import get_function_object, datagen_driver


def remove_non_digits(string):
Expand Down Expand Up @@ -82,6 +82,7 @@ def assert_loss_below(job, **kwargs):


def assert_prediction_with_generated_data(job, **kwargs):

assert job.status == ExecutionStatus.Success, "job should be successful"
expected_class = []
trained_model = vw.Workspace(f"-i {job[0].model9('-f').path} --quiet")
Expand All @@ -102,7 +103,12 @@ def assert_prediction_with_generated_data(job, **kwargs):
break
except:
pass
dataFile = data_func_obj(**kwargs["data_func"]["params"])
script_directory = os.path.dirname(os.path.realpath(__file__))
dataFile = datagen_driver(
os.path.join(script_directory, subdir_name),
data_func_obj,
**kwargs["data_func"]["params"],
)
with open(dataFile, "r") as f:
for line in f.readlines():
expected_class.append(line.split("|")[0].strip())
Expand Down
70 changes: 34 additions & 36 deletions python/tests/e2e_v2/cb/data_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def random_number_items(items):


def generate_cb_data(
f,
num_examples,
num_features,
num_actions,
Expand All @@ -20,7 +21,6 @@ def generate_cb_data(
seed=random.randint(0, 100),
):
random.seed(seed)
dataFile = f"cb_test_{num_examples}_{num_actions}_{num_features}.txt"

reward_function_obj = get_function_object(
"cb.reward_functions", reward_function["name"]
Expand All @@ -29,43 +29,41 @@ def generate_cb_data(
"cb.logging_policies", logging_policy["name"]
)
features = [f"feature{index}" for index in range(1, num_features + 1)]
with open(os.path.join(script_directory, dataFile), "w") as f:
for _ in range(num_examples):
no_context = len(context_name)
if no_context > 1:
context = random.randint(1, no_context)
else:
context = 1
for _ in range(num_examples):
no_context = len(context_name)
if no_context > 1:
context = random.randint(1, no_context)
else:
context = 1

def return_cost_probability(chosen_action, context=1):
cost = -reward_function_obj(
chosen_action, context, **reward_function["params"]
)
if "params" not in logging_policy:
logging_policy["params"] = {}
logging_policy["params"]["chosen_action"] = chosen_action
logging_policy["params"]["num_actions"] = num_actions
probability = logging_policy_obj(**logging_policy["params"])
return cost, probability
def return_cost_probability(chosen_action, context=1):
cost = -reward_function_obj(
chosen_action, context, **reward_function["params"]
)
if "params" not in logging_policy:
logging_policy["params"] = {}
logging_policy["params"]["chosen_action"] = chosen_action
logging_policy["params"]["num_actions"] = num_actions
probability = logging_policy_obj(**logging_policy["params"])
return cost, probability

chosen_action = random.randint(1, num_actions)
if no_context > 1:
f.write(f"shared | User s_{context_name[context-1]}\n")
for action in range(1, num_actions + 1):
chosen_action = random.randint(1, num_actions)
if no_context > 1:
f.write(f"shared | User s_{context_name[context-1]}\n")
for action in range(1, num_actions + 1):

cost, probability = return_cost_probability(action, context)
if action == chosen_action:
f.write(
f'{action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n'
)
else:
f.write(f'| {" ".join(random_number_items(features))}\n')
cost, probability = return_cost_probability(action, context)
if action == chosen_action:
f.write(
f'{action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n'
)
else:
f.write(f'| {" ".join(random_number_items(features))}\n')

else:
else:

cost, probability = return_cost_probability(chosen_action)
f.write(
f'{chosen_action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n'
)
f.write("\n")
return os.path.join(script_directory, dataFile)
cost, probability = return_cost_probability(chosen_action)
f.write(
f'{chosen_action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n'
)
f.write("\n")
5 changes: 2 additions & 3 deletions python/tests/e2e_v2/cb/logging_policies.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
def constant_probability(chosen_action, **kwargs):
def constant_probability(chosen_action):
return 1


def even_probability(chosen_action, **kwargs):
num_actions = kwargs["num_actions"]
def even_probability(chosen_action, num_actions):
return round(1 / num_actions, 2)
7 changes: 3 additions & 4 deletions python/tests/e2e_v2/cb/reward_functions.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
def fixed_reward(chosen_action, context, **kwargs):
def fixed_reward(chosen_action, context):
return 1


def constant_reward(chosen_action, context, **kwargs):
reward = kwargs["reward"]
def constant_reward(chosen_action, context, reward):
return reward[chosen_action - 1]


def fixed_reward_two_action(chosen_action, context, **kwargs):
def fixed_reward_two_action(chosen_action, context):
if context == 1 and chosen_action == 2:
return 1
elif context == 2 and chosen_action == 2:
Expand Down
63 changes: 31 additions & 32 deletions python/tests/e2e_v2/cb_cont/data_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def random_number_items(items):


def generate_cb_data(
f,
num_examples,
num_features,
action_range,
Expand All @@ -21,7 +22,6 @@ def generate_cb_data(
):
random.seed(seed)
num_actions = int(abs(action_range[1] - action_range[0]))
dataFile = f"cb_cont_test_{num_examples}_{num_actions}_{num_features}.txt"

reward_function_obj = get_function_object(
"cb_cont.reward_functions", reward_function["name"]
Expand All @@ -30,34 +30,33 @@ def generate_cb_data(
"cb_cont.logging_policies", logging_policy["name"]
)
features = [f"feature{index}" for index in range(1, num_features + 1)]
with open(os.path.join(script_directory, dataFile), "w") as f:
for _ in range(num_examples):
no_context = len(context_name)
if no_context > 1:
context = random.randint(1, no_context)
else:
context = 1

def return_cost_probability(chosen_action, context):
cost = -reward_function_obj(
chosen_action, context, **reward_function["params"]
)
if "params" not in logging_policy:
logging_policy["params"] = {}
logging_policy["params"]["chosen_action"] = chosen_action
logging_policy["params"]["num_actions"] = num_actions
probability = logging_policy_obj(**logging_policy["params"])
return cost, probability

chosen_action = round(random.uniform(0, num_actions), 2)
cost, probability = return_cost_probability(chosen_action, context)
if no_context == 1:
f.write(
f'ca {chosen_action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n'
)
else:
f.write(
f'ca {chosen_action}:{cost}:{probability} | {"s_" + context_name[context-1]} {" ".join(random_number_items(features))}\n'
)
f.write("\n")
return os.path.join(script_directory, dataFile)

for _ in range(num_examples):
no_context = len(context_name)
if no_context > 1:
context = random.randint(1, no_context)
else:
context = 1

def return_cost_probability(chosen_action, context):
cost = -reward_function_obj(
chosen_action, context, **reward_function["params"]
)
if "params" not in logging_policy:
logging_policy["params"] = {}
logging_policy["params"]["chosen_action"] = chosen_action
logging_policy["params"]["num_actions"] = num_actions
probability = logging_policy_obj(**logging_policy["params"])
return cost, probability

chosen_action = round(random.uniform(0, num_actions), 2)
cost, probability = return_cost_probability(chosen_action, context)
if no_context == 1:
f.write(
f'ca {chosen_action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n'
)
else:
f.write(
f'ca {chosen_action}:{cost}:{probability} | {"s_" + context_name[context-1]} {" ".join(random_number_items(features))}\n'
)
f.write("\n")
5 changes: 2 additions & 3 deletions python/tests/e2e_v2/cb_cont/logging_policies.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
def constant_probability(chosen_action, **kwargs):
def constant_probability(chosen_action):
return 1


def even_probability(chosen_action, **kwargs):
num_actions = kwargs["num_actions"]
def even_probability(chosen_action, num_actions):
return round(1 / num_actions, 2)
7 changes: 3 additions & 4 deletions python/tests/e2e_v2/cb_cont/reward_functions.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
def fixed_reward(chosen_action, context, **kwargs):
def fixed_reward(chosen_action, context):
return 1


def piecewise_constant(chosen_action, context, **kwargs):
reward = kwargs["reward"]
def piecewise_constant(chosen_action, context, reward):
return reward[int(chosen_action) - 1]


def fixed_reward_two_action(chosen_action, context, **kwargs):
def fixed_reward_two_action(chosen_action, context):
if context == 1 and chosen_action >= 2:
return 1
elif context == 2 and chosen_action < 2 and chosen_action >= 1:
Expand Down
21 changes: 8 additions & 13 deletions python/tests/e2e_v2/classification/data_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,23 @@


def generate_classification_data(
f,
num_example,
num_classes,
num_features,
classify_func,
seed=random.randint(0, 100),
bounds=None,
):
random.seed(seed)
dataFile = f"classification_{num_classes}_{num_features}_{num_example}.txt"
classify_func_obj = get_function_object(
"classification.classification_functions", classify_func["name"]
)
if not bounds:
bounds = [[0, 1] for _ in range(num_features)]
with open(os.path.join(script_directory, dataFile), "w") as f:
for _ in range(num_example):
x = [
random.uniform(bounds[index][0], bounds[index][1])
for index in range(num_features)
]
y = classify_func_obj(x, **classify_func["params"])
f.write(
f"{y} |f {' '.join([f'x{i}:{x[i]}' for i in range(num_features)])}\n"
)
return os.path.join(script_directory, dataFile)
for _ in range(num_example):
x = [
random.uniform(bounds[index][0], bounds[index][1])
for index in range(num_features)
]
y = classify_func_obj(x, **classify_func["params"])
f.write(f"{y} |f {' '.join([f'x{i}:{x[i]}' for i in range(num_features)])}\n")
2 changes: 0 additions & 2 deletions python/tests/e2e_v2/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import pytest

# conftest.py
def pytest_addoption(parser):
parser.addoption(
Expand Down
13 changes: 4 additions & 9 deletions python/tests/e2e_v2/regression/data_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,9 @@


def constant_function(
no_sample, constant, x_lower_bound, x_upper_bound, seed=random.randint(0, 100)
f, no_sample, constant, x_lower_bound, x_upper_bound, seed=random.randint(0, 100)
):
random.seed(seed)
dataFile = (
f"constant_func_{no_sample}_{constant}_{x_upper_bound}_{x_lower_bound}.txt"
)
with open(os.path.join(script_directory, dataFile), "w") as f:
for _ in range(no_sample):
x = random.uniform(x_lower_bound, x_upper_bound)
f.write(f"{constant} |f x:{x}\n")
return os.path.join(script_directory, dataFile)
for _ in range(no_sample):
x = random.uniform(x_lower_bound, x_upper_bound)
f.write(f"{constant} |f x:{x}\n")
8 changes: 3 additions & 5 deletions python/tests/e2e_v2/slate/action_space.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
def new_action_after_threshold(**kwargs):
iteration = kwargs.get("iteration", 0)
threshold = kwargs.get("threshold", 0)
def new_action_after_threshold(iteration, threshold, before, after):
# before iteration 500, it is sunny and after it is raining
if iteration > threshold:
return kwargs["after"]
return kwargs["before"]
return after
return before
1 change: 0 additions & 1 deletion python/tests/e2e_v2/slate/assert_job.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from numpy.testing import assert_allclose, assert_almost_equal
from vw_executor.vw import ExecutionStatus
import numpy as np

Expand Down
Loading

0 comments on commit c9f4028

Please sign in to comment.