test: [RLOS2023] add test for cb with continous action (#4630)

* test: add test for slate * test: test cleanup and slate test update * test: minor cleanup and change assert_loss function to equal instead of lower * test: add test for cb with continous action * modify blocker testcase
VowpalWabbit · Aug 18, 2023 · 4bb9741 · 4bb9741
1 parent 702604f
commit 4bb9741
Show file tree

Hide file tree

Showing 6 changed files with 279 additions and 1 deletion.
diff --git a/python/tests/test_framework/assert_job.py b/python/tests/test_framework/assert_job.py
@@ -51,6 +51,8 @@ def assert_prediction(job, **kwargs):
         prediction = [i for i in prediction if i != ""]
         if ":" in prediction[0]:
             prediction = [[j.split(":")[1] for j in i.split(",")] for i in prediction]
+        elif "," in prediction[0]:
+            prediction = [[j for j in i.split(",")] for i in prediction]
         if type(prediction[0]) == list:
             prediction = [[float(remove_non_digits(j)) for j in i] for i in prediction]
         else:

diff --git a/python/tests/test_framework/cb_cont/data_generation.py b/python/tests/test_framework/cb_cont/data_generation.py
@@ -0,0 +1,62 @@
+import random
+import os
+from test_helper import get_function_object
+
+script_directory = os.path.dirname(os.path.realpath(__file__))
+random.seed(10)
+
+
+def random_number_items(items):
+    num_items_to_select = random.randint(1, len(items))
+    return random.sample(items, num_items_to_select)
+
+
+def generate_cb_data(
+    num_examples,
+    num_features,
+    action_range,
+    reward_function,
+    logging_policy,
+    context_name=["1"],
+):
+    num_actions = int(abs(action_range[1] - action_range[0]))
+    dataFile = f"cb_cont_test_{num_examples}_{num_actions}_{num_features}.txt"
+
+    reward_function_obj = get_function_object(
+        "cb_cont.reward_functions", reward_function["name"]
+    )
+    logging_policy_obj = get_function_object(
+        "cb_cont.logging_policies", logging_policy["name"]
+    )
+    features = [f"feature{index}" for index in range(1, num_features + 1)]
+    with open(os.path.join(script_directory, dataFile), "w") as f:
+        for _ in range(num_examples):
+            no_context = len(context_name)
+            if no_context > 1:
+                context = random.randint(1, no_context)
+            else:
+                context = 1
+
+            def return_cost_probability(chosen_action, context):
+                cost = -reward_function_obj(
+                    chosen_action, context, **reward_function["params"]
+                )
+                if "params" not in logging_policy:
+                    logging_policy["params"] = {}
+                logging_policy["params"]["chosen_action"] = chosen_action
+                logging_policy["params"]["num_actions"] = num_actions
+                probability = logging_policy_obj(**logging_policy["params"])
+                return cost, probability
+
+            chosen_action = round(random.uniform(0, num_actions), 2)
+            cost, probability = return_cost_probability(chosen_action, context)
+            if no_context == 1:
+                f.write(
+                    f'ca {chosen_action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n'
+                )
+            else:
+                f.write(
+                    f'ca {chosen_action}:{cost}:{probability} | {"s_" + context_name[context-1]} {" ".join(random_number_items(features))}\n'
+                )
+            f.write("\n")
+    return os.path.join(script_directory, dataFile)
diff --git a/python/tests/test_framework/cb_cont/logging_policies.py b/python/tests/test_framework/cb_cont/logging_policies.py
@@ -0,0 +1,7 @@
+def constant_probability(chosen_action, **kwargs):
+    return 1
+
+
+def even_probability(chosen_action, **kwargs):
+    num_actions = kwargs["num_actions"]
+    return round(1 / num_actions, 2)
diff --git a/python/tests/test_framework/cb_cont/reward_functions.py b/python/tests/test_framework/cb_cont/reward_functions.py
@@ -0,0 +1,19 @@
+def fixed_reward(chosen_action, context, **kwargs):
+    return 1
+
+
+def piecewise_constant(chosen_action, context, **kwargs):
+    reward = kwargs["reward"]
+    return reward[int(chosen_action) - 1]
+
+
+def fixed_reward_two_action(chosen_action, context, **kwargs):
+    if context == 1 and chosen_action >= 2:
+        return 1
+    elif context == 2 and chosen_action < 2 and chosen_action >= 1:
+        return 0
+    elif context == 1 and chosen_action < 1 and chosen_action >= 1:
+        return 0
+    elif context == 2 and chosen_action < 1:
+        return 1
+    return 1
diff --git a/python/tests/test_framework/test_configs/cb_cont.json b/python/tests/test_framework/test_configs/cb_cont.json
@@ -0,0 +1,189 @@
+[
+    {
+        "test_name": "cb_two_action",
+        "data_func": {
+            "name": "generate_cb_data",
+            "params": {
+                "num_examples": 100,
+                "num_features": 1,
+                "action_range": [
+                    0,
+                    2
+                ],
+                "reward_function": {
+                    "name": "piecewise_constant",
+                    "params": {
+                        "reward": [
+                            1,
+                            0
+                        ]
+                    }
+                },
+                "logging_policy": {
+                    "name": "even_probability",
+                    "params": {}
+                }
+            }
+        },
+        "assert_functions": [
+            {
+                "name": "assert_loss",
+                "params": {
+                    "expected_loss": -1,
+                    "decimal": 1
+                }
+            },
+            {
+                "name": "assert_prediction",
+                "params": {
+                    "expected_value": [
+                        1,
+                        0
+                    ],
+                    "threshold": 0.8
+                }
+            }
+        ],
+        "grids": {
+            "cb": {
+                "#base": [
+                    "--cats 2 --min_value 0 --max_value 2 --bandwidth 1"
+                ]
+            },
+            "epsilon": {
+                "--epsilon": [
+                    0.1,
+                    0.2,
+                    0.3
+                ]
+            }
+        },
+        "grids_expression": "cb * (epsilon)",
+        "output": [
+            "--readable_model",
+            "-p"
+        ]
+    },
+    {
+        "test_name": "cb_two_action_diff_context",
+        "data_func": {
+            "name": "generate_cb_data",
+            "params": {
+                "num_examples": 100,
+                "num_features": 2,
+                "action_range": [
+                    0,
+                    2
+                ],
+                "reward_function": {
+                    "name": "fixed_reward_two_action",
+                    "params": {}
+                },
+                "logging_policy": {
+                    "name": "even_probability",
+                    "params": {}
+                },
+                "context_name": [
+                    "1",
+                    "2"
+                ]
+            }
+        },
+        "assert_functions": [
+            {
+                "name": "assert_loss",
+                "params": {
+                    "expected_loss": -0.8,
+                    "decimal": 1
+                }
+            },
+            {
+                "name": "assert_prediction",
+                "params": {
+                    "expected_value": [
+                        0.975,
+                        0.025
+                    ],
+                    "threshold": 0.1,
+                    "atol": 0.1,
+                    "rtol": 0.1
+                }
+            }
+        ],
+        "grids": {
+            "cb": {
+                "#base": [
+                    "--cats 2 --min_value 0 --max_value 2 --bandwidth 1"
+                ]
+            },
+            "epsilon": {
+                "--epsilon": [
+                    0.1,
+                    0.2,
+                    0.3
+                ]
+            }
+        },
+        "grids_expression": "cb * (epsilon)",
+        "output": [
+            "--readable_model",
+            "-p"
+        ]
+    },
+    {
+        "test_name": "cb_one_action",
+        "data_func": {
+            "name": "generate_cb_data",
+            "params": {
+                "num_examples": 10,
+                "num_features": 1,
+                "action_range": [
+                    0,
+                    1
+                ],
+                "reward_function": {
+                    "name": "fixed_reward",
+                    "params": {}
+                },
+                "logging_policy": {
+                    "name": "even_probability"
+                }
+            }
+        },
+        "assert_functions": [
+            {
+                "name": "assert_loss",
+                "params": {
+                    "expected_loss": -1
+                }
+            },
+            {
+                "name": "assert_prediction",
+                "params": {
+                    "expected_value": [0,1],
+                    "threshold": 0.1
+                }
+            }
+        ],
+        "grids": {
+            "g0": {
+                "#base": [
+                    "--cats 2 --min_value 0 --max_value 1 --bandwidth 1"
+                ]
+            },
+            "g1": {
+                "--cb_type": [
+                    "ips",
+                    "mtr",
+                    "dr",
+                    "dm"
+                ]
+            }
+        },
+        "grids_expression": "g0 * g1",
+        "output": [
+            "--readable_model",
+            "-p"
+        ]
+    }
+]
diff --git a/python/tests/test_framework/test_helper.py b/python/tests/test_framework/test_helper.py
@@ -75,7 +75,6 @@ def copy_file(source_file, destination_file):
 
 
 def call_function_with_dirs(dirs, module_name, function_name, **kargs):
-
     for dir in dirs:
         try:
             data = dynamic_function_call(