diff --git a/.bumpversion.cfg b/.bumpversion.cfg index d059d740e..6d0944b7e 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.8.0 +current_version = 0.8.1 commit = False tag = False allow_dirty = False diff --git a/.test_durations b/.test_durations index bf283f1a9..7a7768311 100644 --- a/.test_durations +++ b/.test_durations @@ -1,4 +1,74 @@ { + "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv1d_nn_pert]": 2.59026943400022, + "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv1d_nn_up]": 2.7703545530002884, + "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv1d_no_grad_up]": 0.8260756999989098, + "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv2d_nn_pert]": 1.101015895999808, + "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv2d_nn_up]": 1.206421760000012, + "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv3d_nn_pert]": 1.4294998579989624, + "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv3d_nn_up]": 1.3345100419992377, + "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[simple_nn_class_up]": 3.361096810000163, + "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[simple_nn_pert]": 0.6431655560008949, + "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[simple_nn_up]": 0.7108467549987836, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_nn_pert-arnoldi]": 1.4143697240015172, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_nn_pert-cg]": 2.522983850998571, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_nn_pert-direct]": 1.3974800130017684, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_nn_up-arnoldi]": 1.4222584220005956, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_nn_up-cg]": 2.5742563249987143, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_nn_up-direct]": 1.3653277730008995, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_no_grad_up-arnoldi]": 0.48600830500072334, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_no_grad_up-cg]": 0.7124692380002671, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_no_grad_up-direct]": 0.47575023000172223, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv2d_nn_pert-arnoldi]": 0.8454596849987865, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv2d_nn_pert-cg]": 1.7426123529985489, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv2d_nn_pert-direct]": 0.808057442000063, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv2d_nn_up-arnoldi]": 0.8408936979994905, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv2d_nn_up-cg]": 1.8711466349977854, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv2d_nn_up-direct]": 0.7968461060008849, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv3d_nn_pert-arnoldi]": 1.041476223997961, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv3d_nn_pert-cg]": 2.6348945509980695, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv3d_nn_pert-direct]": 1.0208977649999724, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv3d_nn_up-arnoldi]": 1.3290127370019036, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv3d_nn_up-cg]": 5.805227180999282, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv3d_nn_up-direct]": 1.8304335940010787, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_class_up-arnoldi]": 1.9109577300005185, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_class_up-cg]": 4.174298836998787, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_class_up-direct]": 1.5329143839990138, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_pert-arnoldi]": 0.4525704900006531, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_pert-cg]": 0.8970914879992051, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_pert-direct]": 0.46585072099878744, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_up-arnoldi]": 0.4456351110020478, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_up-cg]": 1.0693235140006436, + "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_up-direct]": 0.473094435999883, + "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv1d_nn_pert]": 2.9761773999980505, + "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv1d_nn_up]": 4.120701600999382, + "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv1d_no_grad_up]": 1.3337201610011107, + "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv2d_nn_pert]": 2.1662617799993313, + "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv2d_nn_up]": 3.132741712999632, + "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv3d_nn_pert]": 2.958187670999905, + "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv3d_nn_up]": 29.53393912699903, + "tests/influence/test_influence_calculator.py::test_dask_influence_nn[simple_nn_class_up]": 3.257567571998152, + "tests/influence/test_influence_calculator.py::test_dask_influence_nn[simple_nn_pert]": 1.361139677999745, + "tests/influence/test_influence_calculator.py::test_dask_influence_nn[simple_nn_up]": 1.261350679998941, + "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv1d_nn_pert]": 2.6579838110010314, + "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv1d_nn_up]": 2.6499502710012166, + "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv1d_no_grad_up]": 0.8881425300005503, + "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv2d_nn_pert]": 1.463408392999554, + "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv2d_nn_up]": 1.4602782740003022, + "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv3d_nn_pert]": 1.7320480180023878, + "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv3d_nn_up]": 1.5744405670029664, + "tests/influence/test_influence_calculator.py::test_sequential_calculator[simple_nn_class_up]": 4.504372877998321, + "tests/influence/test_influence_calculator.py::test_sequential_calculator[simple_nn_pert]": 0.8736393959989073, + "tests/influence/test_influence_calculator.py::test_sequential_calculator[simple_nn_up]": 0.8922971840001992, + "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv1d_nn_pert]": 2.381483594999736, + "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv1d_nn_up]": 2.314768557000207, + "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv1d_no_grad_up]": 0.7438636890019552, + "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv2d_nn_pert]": 0.9980942529964523, + "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv2d_nn_up]": 1.1705565329993988, + "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv3d_nn_pert]": 1.2230443010012095, + "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv3d_nn_up]": 4.6594328910014156, + "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[simple_nn_class_up]": 3.0931850600009057, + "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[simple_nn_pert]": 0.7189972920023138, + "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[simple_nn_up]": 0.7615732119993481, "tests/influence/test_influences.py::test_influence_linear_model[cg-train_set_size_200-perturbation]": 0.8664472580130678, "tests/influence/test_influences.py::test_influence_linear_model[cg-train_set_size_200-up]": 0.18988716599415056, "tests/influence/test_influences.py::test_influence_linear_model[direct-train_set_size_200-perturbation]": 0.66577532098745, @@ -78,61 +148,230 @@ "tests/influence/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data3-8-160-1e-05]": 4.422049004002474, "tests/influence/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data4-4-250-1e-05]": 9.08382142597111, "tests/influence/test_util.py::test_lanzcos_low_rank_hessian_approx_exception": 0.0035210640053264797, - "tests/test_plugin.py::test_failure": 0.001304317032918334, - "tests/test_plugin.py::test_fixture_call_no_arguments": 0.0014436830242630094, - "tests/test_plugin.py::test_fixture_only[1]": 0.0011941569682676345, - "tests/test_plugin.py::test_fixture_only[2]": 0.0013037140015512705, - "tests/test_plugin.py::test_marker_and_fixture[1]": 0.0011783259978983551, - "tests/test_plugin.py::test_marker_and_fixture[2]": 0.001276884024264291, - "tests/test_plugin.py::test_marker_ignore_exception[0]": 0.0011224850022699684, - "tests/test_plugin.py::test_marker_ignore_exception[1]": 0.0009688139834906906, - "tests/test_plugin.py::test_marker_ignore_exception[2]": 0.0011277040175627917, - "tests/test_plugin.py::test_marker_ignore_exception[3]": 0.001226628024596721, - "tests/test_plugin.py::test_marker_ignore_exception[4]": 0.0010670160118024796, - "tests/test_plugin.py::test_marker_only[0]": 0.0027732179732993245, - "tests/test_plugin.py::test_marker_only_with_data_fixture[0]": 0.0012184199877083302, - "tests/test_plugin.py::test_marker_only_with_data_fixture[1]": 0.0014672029938083142, + "tests/influence/torch/test_functional.py::test_get_hessian[model_data0-4-avg]": 0.20045989400023245, + "tests/influence/torch/test_functional.py::test_get_hessian[model_data0-4-full]": 0.06902083099885203, + "tests/influence/torch/test_functional.py::test_get_hessian[model_data1-5-avg]": 0.5016348780009139, + "tests/influence/torch/test_functional.py::test_get_hessian[model_data1-5-full]": 0.1801713530021516, + "tests/influence/torch/test_functional.py::test_get_hessian[model_data2-10-avg]": 0.1689359069987404, + "tests/influence/torch/test_functional.py::test_get_hessian[model_data2-10-full]": 0.06361526499858883, + "tests/influence/torch/test_functional.py::test_get_hessian[model_data3-8-avg]": 0.23464886900001147, + "tests/influence/torch/test_functional.py::test_get_hessian[model_data3-8-full]": 0.08587454999906186, + "tests/influence/torch/test_functional.py::test_get_hessian[model_data4-4-avg]": 3.4049244679990807, + "tests/influence/torch/test_functional.py::test_get_hessian[model_data4-4-full]": 0.8161465619996306, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data0-4-avg-no_precomputed_grad]": 0.07086462199913512, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data0-4-avg-precomputed_grad]": 0.05853749299967603, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data0-4-full-no_precomputed_grad]": 0.042555562999041285, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data0-4-full-precomputed_grad]": 0.04254312700140872, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data1-5-avg-no_precomputed_grad]": 0.12398883400055638, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data1-5-avg-precomputed_grad]": 0.09578048600087641, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data1-5-full-no_precomputed_grad]": 0.08458008900015557, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data1-5-full-precomputed_grad]": 0.08726069599833863, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data2-10-avg-no_precomputed_grad]": 0.04802275299880421, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data2-10-avg-precomputed_grad]": 0.03521194200220634, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data2-10-full-no_precomputed_grad]": 0.036551941999277915, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data2-10-full-precomputed_grad]": 0.037776481000037165, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data3-8-avg-no_precomputed_grad]": 0.07563198299976648, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data3-8-avg-precomputed_grad]": 0.06490736200066749, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data3-8-full-no_precomputed_grad]": 0.04830970400143997, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data3-8-full-precomputed_grad]": 0.046601254001870984, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data4-4-avg-no_precomputed_grad]": 0.6670472200003132, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data4-4-avg-precomputed_grad]": 0.5045342149987846, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data4-4-full-no_precomputed_grad]": 0.2970590019995143, + "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data4-4-full-precomputed_grad]": 0.3075855399983993, + "tests/influence/torch/test_functional.py::test_hvp[model_data0-1e-05]": 0.021658439001839724, + "tests/influence/torch/test_functional.py::test_hvp[model_data1-1e-05]": 0.059265748001053, + "tests/influence/torch/test_functional.py::test_hvp[model_data2-1e-05]": 0.01743878900015261, + "tests/influence/torch/test_functional.py::test_hvp[model_data3-1e-05]": 0.02271863699934329, + "tests/influence/torch/test_functional.py::test_hvp[model_data4-1e-05]": 0.03280089899999439, + "tests/influence/torch/test_functional.py::test_matrix_jacobian_product[100-5-110]": 0.022210784001799766, + "tests/influence/torch/test_functional.py::test_matrix_jacobian_product[25-10-500]": 0.020637996000004932, + "tests/influence/torch/test_functional.py::test_matrix_jacobian_product[46-1-632]": 0.01833421299852489, + "tests/influence/torch/test_functional.py::test_matrix_jacobian_product[50-3-120]": 0.01751421799781383, + "tests/influence/torch/test_functional.py::test_mixed_derivatives[100-5-512]": 1.7785364399987884, + "tests/influence/torch/test_functional.py::test_mixed_derivatives[25-10-734]": 0.27127136799936125, + "tests/influence/torch/test_functional.py::test_mixed_derivatives[46-1-1000]": 0.18510219099880487, + "tests/influence/torch/test_functional.py::test_mixed_derivatives[50-3-100]": 0.055743695000273874, + "tests/influence/torch/test_functional.py::test_per_sample_gradient[100-5-120]": 0.022160912998515414, + "tests/influence/torch/test_functional.py::test_per_sample_gradient[25-10-550]": 0.0378074430009292, + "tests/influence/torch/test_functional.py::test_per_sample_gradient[46-6-632]": 0.033401361002688645, + "tests/influence/torch/test_functional.py::test_per_sample_gradient[50-3-120]": 0.02004740899974422, + "tests/influence/torch/test_influence_model.py::test_influence_linear_model[cg-train_set_size_200-perturbation]": 4.1002855009992345, + "tests/influence/torch/test_influence_model.py::test_influence_linear_model[cg-train_set_size_200-up]": 4.092100218998894, + "tests/influence/torch/test_influence_model.py::test_influence_linear_model[direct-train_set_size_200-perturbation]": 0.5856196849999833, + "tests/influence/torch/test_influence_model.py::test_influence_linear_model[direct-train_set_size_200-up]": 0.1793102950014145, + "tests/influence/torch/test_influence_model.py::test_influence_linear_model[lissa-train_set_size_200-perturbation]": 74.52067036900007, + "tests/influence/torch/test_influence_model.py::test_influence_linear_model[lissa-train_set_size_200-up]": 72.80234433299847, + "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv1d_nn_pert]": 2.6213616719978745, + "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv1d_nn_up]": 2.9271264809995046, + "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv1d_no_grad_up]": 1.1280039110006328, + "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv2d_nn_pert]": 16.078887900001064, + "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv2d_nn_up]": 16.092805495001812, + "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv3d_nn_pert]": 5.826150597002197, + "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv3d_nn_up]": 5.808433192996745, + "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[simple_nn_class_up]": 3.4398634410008526, + "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[simple_nn_pert]": 1.783800326000346, + "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[simple_nn_up]": 1.5235134640006436, + "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv1d_nn_pert]": 2.470179049998478, + "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv1d_nn_up]": 2.3925959699990926, + "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv1d_no_grad_up]": 0.7791441699991992, + "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv2d_nn_pert]": 1.128924710001229, + "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv2d_nn_up]": 1.256267286998991, + "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv3d_nn_pert]": 1.2739636030000838, + "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv3d_nn_up]": 1.2143029310009297, + "tests/influence/torch/test_influence_model.py::test_influences_ekfac[simple_nn_class_up]": 3.279752685999483, + "tests/influence/torch/test_influence_model.py::test_influences_ekfac[simple_nn_pert]": 0.7043358350001654, + "tests/influence/torch/test_influence_model.py::test_influences_ekfac[simple_nn_up]": 0.7221000240006106, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv1d_nn_pert-cg]": 2.666355408999152, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv1d_nn_pert-lissa]": 3.536100011000599, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv1d_nn_up-cg]": 2.8996486520009057, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv1d_nn_up-lissa]": 3.648799233000318, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv1d_no_grad_up-cg]": 0.846027261000927, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv1d_no_grad_up-lissa]": 1.46926116800023, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv2d_nn_pert-cg]": 1.322623816999112, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv2d_nn_pert-lissa]": 2.4566458920016885, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv2d_nn_up-cg]": 1.3719535260006523, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv2d_nn_up-lissa]": 2.3520564940008626, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv3d_nn_pert-cg]": 1.4215319300001283, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv3d_nn_pert-lissa]": 2.5365598410025996, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv3d_nn_up-cg]": 1.4816708039998048, + "tests/influence/torch/test_influence_model.py::test_influences_nn[conv3d_nn_up-lissa]": 2.420441305001077, + "tests/influence/torch/test_influence_model.py::test_influences_nn[simple_nn_class_up-cg]": 3.5962213779985177, + "tests/influence/torch/test_influence_model.py::test_influences_nn[simple_nn_class_up-lissa]": 4.116930427000625, + "tests/influence/torch/test_influence_model.py::test_influences_nn[simple_nn_pert-cg]": 0.8423471179994522, + "tests/influence/torch/test_influence_model.py::test_influences_nn[simple_nn_pert-lissa]": 1.8240221239993843, + "tests/influence/torch/test_influence_model.py::test_influences_nn[simple_nn_up-cg]": 0.8927097550003964, + "tests/influence/torch/test_influence_model.py::test_influences_nn[simple_nn_up-lissa]": 1.7715864019992296, + "tests/influence/torch/test_util.py::test_align_structure_error[source0-target0]": 0.008872623999195639, + "tests/influence/torch/test_util.py::test_align_structure_error[source1-target1]": 0.008990501999505796, + "tests/influence/torch/test_util.py::test_align_structure_error[source2-unsupported]": 0.008625348000350641, + "tests/influence/torch/test_util.py::test_align_structure_success[source0-target0]": 0.009682354999313247, + "tests/influence/torch/test_util.py::test_align_structure_success[source1-target1]": 0.011076738001065678, + "tests/influence/torch/test_util.py::test_align_structure_success[source2-target2]": 0.008422474998951657, + "tests/influence/torch/test_util.py::test_batch_hvp[model_data0-1e-05]": 0.018433343999276985, + "tests/influence/torch/test_util.py::test_batch_hvp[model_data1-1e-05]": 0.04329261199745815, + "tests/influence/torch/test_util.py::test_batch_hvp[model_data2-1e-05]": 0.021779085998787195, + "tests/influence/torch/test_util.py::test_batch_hvp[model_data3-1e-05]": 0.02447877800113929, + "tests/influence/torch/test_util.py::test_batch_hvp[model_data4-1e-05]": 0.027325978000590112, + "tests/influence/torch/test_util.py::test_get_hvp_function[model_data0-4-avg]": 0.05654373300058069, + "tests/influence/torch/test_util.py::test_get_hvp_function[model_data0-4-full]": 0.048235695001494605, + "tests/influence/torch/test_util.py::test_get_hvp_function[model_data1-5-avg]": 0.10194805900027859, + "tests/influence/torch/test_util.py::test_get_hvp_function[model_data1-5-full]": 0.07934144700084289, + "tests/influence/torch/test_util.py::test_get_hvp_function[model_data2-10-avg]": 0.04320316699704563, + "tests/influence/torch/test_util.py::test_get_hvp_function[model_data2-10-full]": 0.03360124099890527, + "tests/influence/torch/test_util.py::test_get_hvp_function[model_data3-8-avg]": 0.062039004000325804, + "tests/influence/torch/test_util.py::test_get_hvp_function[model_data3-8-full]": 0.039968260998648475, + "tests/influence/torch/test_util.py::test_get_hvp_function[model_data4-4-avg]": 0.5075304600013624, + "tests/influence/torch/test_util.py::test_get_hvp_function[model_data4-4-full]": 0.29033965000235185, + "tests/influence/torch/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data0-4-200-0.0001]": 6.130291282001053, + "tests/influence/torch/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data1-5-70-0.001]": 7.575732932000392, + "tests/influence/torch/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data2-10-50-0.0001]": 5.145587835002516, + "tests/influence/torch/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data3-8-160-1e-05]": 9.05795658399984, + "tests/influence/torch/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data4-4-250-1e-05]": 15.930120687000453, + "tests/influence/torch/test_util.py::test_lanzcos_low_rank_hessian_approx_exception": 0.010992516999976942, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions0-30-5]": 0.3716939040004945, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions0-30-6]": 0.3245709369984979, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions0-45-5]": 0.4916222280025977, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions0-45-6]": 0.44272739400003047, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions0-50-5]": 11.764691698001116, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions0-50-6]": 6.499053524999908, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions1-30-5]": 0.3747018210015085, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions1-30-6]": 0.33021277699845086, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions1-45-5]": 0.5002059710004687, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions1-45-6]": 0.46272212300027604, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions1-50-5]": 0.5805674699986412, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions1-50-6]": 0.5113370569997642, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions2-30-5]": 0.44749919500100077, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions2-30-6]": 0.37910716500118724, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions2-45-5]": 0.5593350939998345, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions2-45-6]": 0.47198495200063917, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions2-50-5]": 0.5512238980008988, + "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions2-50-6]": 0.5188143759987724, + "tests/test_plugin.py::test_failure": 0.003285975997641799, + "tests/test_plugin.py::test_fixture_call_no_arguments": 0.0033647860000201035, + "tests/test_plugin.py::test_fixture_only[1]": 0.0034970750002685236, + "tests/test_plugin.py::test_fixture_only[2]": 0.0034195480002381373, + "tests/test_plugin.py::test_marker_and_fixture[1]": 0.0034149520015489543, + "tests/test_plugin.py::test_marker_and_fixture[2]": 0.003513548002956668, + "tests/test_plugin.py::test_marker_ignore_exception[0]": 0.003384523999557132, + "tests/test_plugin.py::test_marker_ignore_exception[1]": 0.00331666099918948, + "tests/test_plugin.py::test_marker_ignore_exception[2]": 0.00323147399831214, + "tests/test_plugin.py::test_marker_ignore_exception[3]": 0.0032334910029021557, + "tests/test_plugin.py::test_marker_ignore_exception[4]": 0.003230911001082859, + "tests/test_plugin.py::test_marker_only[0]": 0.006288947000939515, + "tests/test_plugin.py::test_marker_only_with_data_fixture[0]": 0.003481858999293763, + "tests/test_plugin.py::test_marker_only_with_data_fixture[1]": 0.003525184998579789, "tests/test_plugin.py::test_marker_only_with_data_fixture[2]": 0.0012167239910922945, - "tests/test_results.py::test_adding_different_indices[indices_10-names_10-values_10-indices_20-names_20-values_20-expected_indices0-expected_names0-expected_values0]": 0.0020641259907279164, - "tests/test_results.py::test_adding_different_indices[indices_11-names_11-values_11-indices_21-names_21-values_21-expected_indices1-expected_names1-expected_values1]": 0.002675808995263651, - "tests/test_results.py::test_adding_different_indices[indices_12-names_12-values_12-indices_22-names_22-values_22-expected_indices2-expected_names2-expected_values2]": 0.002674269024282694, - "tests/test_results.py::test_adding_different_indices[indices_13-names_13-values_13-indices_23-names_23-values_23-expected_indices3-expected_names3-expected_values3]": 0.0020707659714389592, - "tests/test_results.py::test_adding_random": 0.0034820580040104687, - "tests/test_results.py::test_dataframe_sorting[values0-names0-ranks_asc0]": 0.0029723149491474032, - "tests/test_results.py::test_dataframe_sorting[values1-names1-ranks_asc1]": 0.002218269946752116, - "tests/test_results.py::test_empty[0]": 0.0012037760170642287, - "tests/test_results.py::test_empty[5]": 0.001365817035548389, - "tests/test_results.py::test_empty_deprecation": 0.0013571020099334419, - "tests/test_results.py::test_equality[values0-names0]": 0.0021291770099196583, - "tests/test_results.py::test_equality[values1-names1]": 0.0016342299932148308, - "tests/test_results.py::test_extra_values[extra_values0]": 0.001437259983504191, - "tests/test_results.py::test_extra_values[extra_values1]": 0.0015066640044096857, - "tests/test_results.py::test_from_random_creation[-1.0-10]": 0.0015409209881909192, - "tests/test_results.py::test_from_random_creation[-1.0-1]": 0.0014630080258939415, - "tests/test_results.py::test_from_random_creation[1.0-10]": 0.0012284110125619918, - "tests/test_results.py::test_from_random_creation[1.0-1]": 0.0013108189741615206, - "tests/test_results.py::test_from_random_creation[None-10]": 0.0012196720344945788, - "tests/test_results.py::test_from_random_creation[None-1]": 0.0015253000019583851, - "tests/test_results.py::test_from_random_creation_errors": 0.0009378239628858864, - "tests/test_results.py::test_get_idx": 0.0010275309905409813, - "tests/test_results.py::test_indexing[values0-names0-ranks_asc0]": 0.0014630479854531586, - "tests/test_results.py::test_indexing[values1-names1-ranks_asc1]": 0.001598447997821495, - "tests/test_results.py::test_iter[values0-names0-ranks_asc0]": 0.0013525879476219416, - "tests/test_results.py::test_iter[values1-names1-ranks_asc1]": 0.0014122460270300508, - "tests/test_results.py::test_names[data_names0]": 0.0015603950014337897, - "tests/test_results.py::test_serialization[values0-None-dumps-loads0]": 0.001649087033001706, - "tests/test_results.py::test_serialization[values0-None-dumps-loads1]": 0.0016458219906780869, - "tests/test_results.py::test_serialization[values1-None-dumps-loads0]": 0.0015400749980472028, - "tests/test_results.py::test_serialization[values1-None-dumps-loads1]": 0.0019450989784672856, - "tests/test_results.py::test_sorting[values0-names0-ranks_asc0]": 0.0016402129840571433, - "tests/test_results.py::test_sorting[values1-names1-ranks_asc1]": 0.0016363860049750656, - "tests/test_results.py::test_todataframe[values0-names0-ranks_asc0]": 0.0023001570079941303, - "tests/test_results.py::test_todataframe[values1-names1-ranks_asc1]": 0.002222412033006549, - "tests/test_results.py::test_types[indices0-int32-data_names0---True]": 0.013553835999118746, + "tests/utils/test_caching.py::test_cached_func_hash_function[foo--False]": 0.009608976999516017, + "tests/utils/test_caching.py::test_cached_func_hash_function[foo-foo-True]": 0.007844682999348151, + "tests/utils/test_caching.py::test_cached_func_hash_function[foo-foo_duplicate-True]": 0.00921746999847528, + "tests/utils/test_caching.py::test_cached_func_hash_function[foo-foo_with_random-False]": 0.006780997000532807, + "tests/utils/test_caching.py::test_cached_func_hash_function[foo_with_random-foo_with_random_and_sleep-False]": 0.008397087000048487, "tests/utils/test_caching.py::test_failed_connection": 0.0039788429858163, + "tests/utils/test_caching.py::test_faster_with_repeated_training[disk]": 5.499508081999011, + "tests/utils/test_caching.py::test_faster_with_repeated_training[in-memory]": 5.596929604998877, + "tests/utils/test_caching.py::test_faster_with_repeated_training[memcached]": 6.545152930997574, + "tests/utils/test_caching.py::test_memcached_failed_connection": 0.009630470000047353, "tests/utils/test_caching.py::test_memcached_faster_with_repeated_training": 5.003239913989091, "tests/utils/test_caching.py::test_memcached_parallel_jobs[joblib]": 3.1677759810409043, "tests/utils/test_caching.py::test_memcached_parallel_jobs[ray-external]": 38.430890925985295, @@ -151,303 +390,439 @@ "tests/utils/test_caching.py::test_memcached_parallel_repeated_training[ray-local-20-2-20-10]": 0.007027510990155861, "tests/utils/test_caching.py::test_memcached_repeated_training": 2.3077823049970903, "tests/utils/test_caching.py::test_memcached_single_job": 0.007132280006771907, - "tests/utils/test_dataset.py::test_creating_dataset_from_sklearn[0.1]": 0.009810923977056518, - "tests/utils/test_dataset.py::test_creating_dataset_from_sklearn[0.5]": 0.0023630280047655106, - "tests/utils/test_dataset.py::test_creating_dataset_from_sklearn[0.8]": 0.002483188029145822, - "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.1-kwargs0]": 0.0022864479979034513, - "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.1-kwargs1]": 0.001960736990440637, - "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.5-kwargs0]": 0.0018571619875729084, - "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.5-kwargs1]": 0.0019256969972047955, - "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.8-kwargs0]": 0.0020103229908272624, - "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.8-kwargs1]": 0.001870437990874052, - "tests/utils/test_dataset.py::test_creating_dataset_subsclassfrom_sklearn[0.1]": 0.004145220998907462, - "tests/utils/test_dataset.py::test_creating_dataset_subsclassfrom_sklearn[0.5]": 0.002273507008794695, - "tests/utils/test_dataset.py::test_creating_dataset_subsclassfrom_sklearn[0.8]": 0.0025340290158055723, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn[0.1]": 0.002445343037834391, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn[0.5]": 0.002387374988757074, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn[0.8]": 0.0025074610312003642, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn_failure[0.1]": 0.0031885300122667104, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn_failure[0.5]": 0.0018069100042339414, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn_failure[0.8]": 0.0019649149908218533, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.1-kwargs0]": 0.002473844971973449, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.1-kwargs1]": 0.0024133779807016253, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.5-kwargs0]": 0.0023138070246204734, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.5-kwargs1]": 0.002177672984544188, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.8-kwargs0]": 0.0030658979958388954, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.8-kwargs1]": 0.002469450992066413, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays_failure[0.1]": 0.0016314840177074075, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays_failure[0.5]": 0.0017394520109519362, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays_failure[0.8]": 0.0017109749896917492, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_subsclassfrom_sklearn[0.1]": 0.003284825972514227, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_subsclassfrom_sklearn[0.5]": 0.0038210980128496885, - "tests/utils/test_dataset.py::test_creating_grouped_dataset_subsclassfrom_sklearn[0.8]": 0.0023955479555297643, - "tests/utils/test_dataset.py::test_grouped_dataset_results": 0.00312941602896899, - "tests/utils/test_numeric.py::test_powerset": 0.002356015960685909, - "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[0-2-ValueError]": 0.0011365640093572438, - "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[1-2-ValueError]": 0.0010459299955982715, - "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[10-1-ValueError]": 0.0011281229672022164, - "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[10-2-None]": 0.001765107037499547, - "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[2-10-None]": 0.001528021995909512, - "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[4--2-ValueError]": 0.0011659429874271154, - "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[7-23-None]": 0.001419320033164695, - "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_reproducible[10-2]": 0.001462101994547993, - "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_reproducible[2-10]": 0.001395261992001906, - "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_reproducible[7-23]": 0.001416039012838155, - "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_stochastic[10-2]": 0.0015627649845555425, - "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_stochastic[2-10]": 0.0014263579796534032, - "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_stochastic[7-23]": 0.0016800050216261297, - "tests/utils/test_numeric.py::test_random_powerset[0-1]": 0.0012409990013111383, - "tests/utils/test_numeric.py::test_random_powerset[1-10]": 0.0014637470012530684, - "tests/utils/test_numeric.py::test_random_powerset[10-1024]": 0.0079122620227281, - "tests/utils/test_numeric.py::test_random_powerset[5-128]": 0.0020825770043302327, - "tests/utils/test_numeric.py::test_random_powerset_label_min[0-10-3-1000]": 0.11152737599331886, - "tests/utils/test_numeric.py::test_random_powerset_label_min[1-10-3-1000]": 0.11375491399667226, - "tests/utils/test_numeric.py::test_random_powerset_label_min[2-10-3-1000]": 0.11396494103246368, - "tests/utils/test_numeric.py::test_random_powerset_reproducible[10-1024]": 0.013066521001746878, - "tests/utils/test_numeric.py::test_random_powerset_stochastic[10-1024]": 0.012338358006672934, - "tests/utils/test_numeric.py::test_random_subset_of_size[0-0-None]": 0.0015464180323760957, - "tests/utils/test_numeric.py::test_random_subset_of_size[0-1-ValueError]": 0.001127758005168289, - "tests/utils/test_numeric.py::test_random_subset_of_size[10-0-None]": 0.0013323969906195998, - "tests/utils/test_numeric.py::test_random_subset_of_size[10-3-None]": 0.0015970039821695536, - "tests/utils/test_numeric.py::test_random_subset_of_size[1000-40-None]": 0.001427212991984561, - "tests/utils/test_numeric.py::test_random_subset_of_size_stochastic[10-3]": 0.001142591005191207, - "tests/utils/test_numeric.py::test_random_subset_of_size_stochastic[1000-40]": 0.0012538870214484632, - "tests/utils/test_numeric.py::test_running_moments": 0.35335890398710035, - "tests/utils/test_parallel.py::test_chunkification[joblib-data0-3-expected_chunks0]": 0.0042906299931928515, - "tests/utils/test_parallel.py::test_chunkification[joblib-data1-2-expected_chunks1]": 0.004308464995119721, - "tests/utils/test_parallel.py::test_chunkification[joblib-data2-2-expected_chunks2]": 0.004244079987984151, - "tests/utils/test_parallel.py::test_chunkification[joblib-data3-3-expected_chunks3]": 0.004028873983770609, - "tests/utils/test_parallel.py::test_chunkification[joblib-data4-5-expected_chunks4]": 0.004101024009287357, - "tests/utils/test_parallel.py::test_chunkification[joblib-data5-42-expected_chunks5]": 0.004789252998307347, - "tests/utils/test_parallel.py::test_chunkification[joblib-data6-42-expected_chunks6]": 0.004256373038515449, - "tests/utils/test_parallel.py::test_chunkification[joblib-data7-4-expected_chunks7]": 0.004143773025134578, - "tests/utils/test_parallel.py::test_chunkification[joblib-data8-4-expected_chunks8]": 0.0040604640380479395, - "tests/utils/test_parallel.py::test_chunkification[ray-external-data0-3-expected_chunks0]": 0.0060307729872874916, - "tests/utils/test_parallel.py::test_chunkification[ray-external-data1-2-expected_chunks1]": 0.005929058010224253, - "tests/utils/test_parallel.py::test_chunkification[ray-external-data2-2-expected_chunks2]": 0.009121662005782127, - "tests/utils/test_parallel.py::test_chunkification[ray-external-data3-3-expected_chunks3]": 0.009956339985365048, - "tests/utils/test_parallel.py::test_chunkification[ray-external-data4-5-expected_chunks4]": 0.010149178997380659, - "tests/utils/test_parallel.py::test_chunkification[ray-external-data5-42-expected_chunks5]": 0.010347278992412612, - "tests/utils/test_parallel.py::test_chunkification[ray-external-data6-42-expected_chunks6]": 0.010047424992080778, - "tests/utils/test_parallel.py::test_chunkification[ray-external-data7-4-expected_chunks7]": 0.008645244990475476, - "tests/utils/test_parallel.py::test_chunkification[ray-external-data8-4-expected_chunks8]": 0.009245932975318283, - "tests/utils/test_parallel.py::test_chunkification[ray-local-data0-3-expected_chunks0]": 0.0045589170476887375, - "tests/utils/test_parallel.py::test_chunkification[ray-local-data1-2-expected_chunks1]": 0.004910157964332029, - "tests/utils/test_parallel.py::test_chunkification[ray-local-data2-2-expected_chunks2]": 0.004910080024274066, - "tests/utils/test_parallel.py::test_chunkification[ray-local-data3-3-expected_chunks3]": 0.0059317940031178296, - "tests/utils/test_parallel.py::test_chunkification[ray-local-data4-5-expected_chunks4]": 0.008992511982796714, - "tests/utils/test_parallel.py::test_chunkification[ray-local-data5-42-expected_chunks5]": 0.008223566022934392, - "tests/utils/test_parallel.py::test_chunkification[ray-local-data6-42-expected_chunks6]": 0.007052068045595661, - "tests/utils/test_parallel.py::test_chunkification[ray-local-data7-4-expected_chunks7]": 0.004718763986602426, - "tests/utils/test_parallel.py::test_chunkification[ray-local-data8-4-expected_chunks8]": 0.005322564014932141, - "tests/utils/test_parallel.py::test_effective_n_jobs[joblib]": 0.0014253620174713433, - "tests/utils/test_parallel.py::test_effective_n_jobs[ray-external]": 3.978927739954088, - "tests/utils/test_parallel.py::test_effective_n_jobs[ray-local]": 4.104055134986993, - "tests/utils/test_parallel.py::test_future_cancellation[joblib]": 0.005014022986870259, - "tests/utils/test_parallel.py::test_future_cancellation[ray-external]": 1.9293224809807725, - "tests/utils/test_parallel.py::test_future_cancellation[ray-local]": 0.07703918303013779, - "tests/utils/test_parallel.py::test_futures_executor_map[joblib]": 1.5601177359640133, - "tests/utils/test_parallel.py::test_futures_executor_map[ray-external]": 0.09417001300607808, - "tests/utils/test_parallel.py::test_futures_executor_map[ray-local]": 0.09271710200118832, - "tests/utils/test_parallel.py::test_futures_executor_map_with_max_workers[joblib]": 0.007176648010499775, - "tests/utils/test_parallel.py::test_futures_executor_map_with_max_workers[ray-external]": 1.090440120024141, - "tests/utils/test_parallel.py::test_futures_executor_map_with_max_workers[ray-local]": 1.095393077004701, - "tests/utils/test_parallel.py::test_futures_executor_submit[joblib]": 1.8566069509834051, - "tests/utils/test_parallel.py::test_futures_executor_submit[ray-external]": 0.04992300402955152, - "tests/utils/test_parallel.py::test_futures_executor_submit[ray-local]": 0.048481280013220385, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-list-indices0-expected0]": 0.0015987549850251526, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-list-indices1-expected1]": 0.001547530002426356, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-list-indices2-expected2]": 0.001560483971843496, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-numpy-indices4-45]": 0.00178057502489537, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-range-indices3-expected3]": 0.0015469170466531068, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-list-indices0-expected0]": 0.0018091480305884033, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-list-indices1-expected1]": 0.01276223495369777, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-list-indices2-expected2]": 0.012882986018666998, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-numpy-indices4-45]": 0.01399321696953848, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-range-indices3-expected3]": 0.012885421980172396, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-list-indices0-expected0]": 0.15361307095736265, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-list-indices1-expected1]": 0.8156346119940281, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-list-indices2-expected2]": 1.3068530370073859, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-numpy-indices4-45]": 0.01750938399345614, - "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-range-indices3-expected3]": 0.017205809010192752, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-list-indices0-expected0]": 0.0029827099933754653, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-list-indices1-expected1]": 0.0027304230316076428, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-list-indices2-expected2]": 0.0026203590095974505, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-numpy-indices4-45]": 0.003456770005868748, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-range-indices3-expected3]": 0.0027074709651060402, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-list-indices0-expected0]": 0.8282912400027271, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-list-indices1-expected1]": 2.2837093910493422, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-list-indices2-expected2]": 2.4645657170040067, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-numpy-indices4-45]": 2.281004316988401, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-range-indices3-expected3]": 2.393285626982106, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-list-indices0-expected0]": 1.903353853005683, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-list-indices1-expected1]": 2.947957994969329, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-list-indices2-expected2]": 3.211508878011955, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-numpy-indices4-45]": 3.3349247129808646, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-range-indices3-expected3]": 3.599037625041092, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-list-indices0-expected0]": 0.016201907012145966, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-list-indices1-expected1]": 0.013995222019730136, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-list-indices2-expected2]": 0.013650566979777068, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-numpy-indices4-45]": 0.013722714997129515, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-range-indices3-expected3]": 0.013983122975332662, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-list-indices0-expected0]": 1.5035187809844501, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-list-indices1-expected1]": 2.235937710967846, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-list-indices2-expected2]": 2.1283504489983898, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-numpy-indices4-45]": 2.0944344620220363, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-range-indices3-expected3]": 2.104675643990049, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-list-indices0-expected0]": 1.7145587989652995, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-list-indices1-expected1]": 2.772829012013972, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-list-indices2-expected2]": 3.1254515810287558, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-numpy-indices4-45]": 3.4023931239789817, - "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-range-indices3-expected3]": 3.7103631219943054, - "tests/utils/test_parallel.py::test_map_reduce_job_partial_map_and_reduce_func[joblib]": 0.01629631401738152, - "tests/utils/test_parallel.py::test_map_reduce_job_partial_map_and_reduce_func[ray-external]": 3.550109267991502, - "tests/utils/test_parallel.py::test_map_reduce_job_partial_map_and_reduce_func[ray-local]": 3.186494815017795, - "tests/utils/test_parallel.py::test_map_reduce_seeding[joblib-42-12]": 0.05403909899177961, - "tests/utils/test_parallel.py::test_map_reduce_seeding[ray-external-42-12]": 9.918427228025394, - "tests/utils/test_parallel.py::test_map_reduce_seeding[ray-local-42-12]": 9.834357938991161, - "tests/utils/test_parallel.py::test_wrap_function[joblib]": 0.0031614619656465948, - "tests/utils/test_parallel.py::test_wrap_function[ray-external]": 3.1981390729779378, - "tests/utils/test_parallel.py::test_wrap_function[ray-local]": 3.2998613989911973, - "tests/utils/test_score.py::test_compose_score": 0.0027295449981465936, - "tests/utils/test_score.py::test_scorer": 0.0051104900194332, - "tests/utils/test_score.py::test_squashed_r2": 0.001943372975802049, - "tests/utils/test_score.py::test_squashed_variance": 0.001487176021328196, - "tests/utils/test_status.py::test_and_status": 0.001112824014853686, - "tests/utils/test_status.py::test_not_status": 0.0010235870140604675, - "tests/utils/test_status.py::test_or_status": 0.0009352969937026501, + "tests/utils/test_caching.py::test_parallel_jobs[joblib-disk]": 0.008082594998995773, + "tests/utils/test_caching.py::test_parallel_jobs[joblib-in-memory]": 0.007858986000428558, + "tests/utils/test_caching.py::test_parallel_jobs[joblib-memcached]": 5.864486223999847, + "tests/utils/test_caching.py::test_parallel_jobs[ray-external-disk]": 0.019337756999448175, + "tests/utils/test_caching.py::test_parallel_jobs[ray-external-in-memory]": 3.8737009590004163, + "tests/utils/test_caching.py::test_parallel_jobs[ray-external-memcached]": 0.010424148002130096, + "tests/utils/test_caching.py::test_parallel_jobs[ray-local-disk]": 0.006320855998637853, + "tests/utils/test_caching.py::test_parallel_jobs[ray-local-in-memory]": 0.007159704999139649, + "tests/utils/test_caching.py::test_parallel_jobs[ray-local-memcached]": 0.010268650999933016, + "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-disk-20-1-10-5]": 0.040544517996750074, + "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-disk-20-1-20-10]": 0.041609834999690065, + "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-disk-20-2-10-5]": 0.450297680001313, + "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-disk-20-2-20-10]": 0.41885778900177684, + "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-in-memory-20-1-10-5]": 0.04637932000150613, + "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-in-memory-20-1-20-10]": 0.038561840998227126, + "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-in-memory-20-2-10-5]": 4.16153838199898, + "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-in-memory-20-2-20-10]": 0.47474137900280766, + "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-memcached-20-1-10-5]": 0.03560425399882661, + "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-memcached-20-1-20-10]": 0.04425754300064, + "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-memcached-20-2-10-5]": 0.46746473100029107, + "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-memcached-20-2-20-10]": 0.47426626100059366, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-disk-20-1-10-5]": 0.019769640000959043, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-disk-20-1-20-10]": 0.02465987799951108, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-disk-20-2-10-5]": 0.012952293998750974, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-disk-20-2-20-10]": 0.010107056999913766, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-in-memory-20-1-10-5]": 0.013676337999640964, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-in-memory-20-1-20-10]": 0.009283014000175172, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-in-memory-20-2-10-5]": 0.014747097000508802, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-in-memory-20-2-20-10]": 0.012189770999611937, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-memcached-20-1-10-5]": 0.014756809001482907, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-memcached-20-1-20-10]": 0.014543373998094467, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-memcached-20-2-10-5]": 0.018690378999963286, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-memcached-20-2-20-10]": 0.017414769001334207, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-disk-20-1-10-5]": 0.00978782600031991, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-disk-20-1-20-10]": 0.008025870998608298, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-disk-20-2-10-5]": 0.00932121699952404, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-disk-20-2-20-10]": 0.012999636999666109, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-in-memory-20-1-10-5]": 0.010384335999333416, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-in-memory-20-1-20-10]": 0.007256282997332164, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-in-memory-20-2-10-5]": 0.007955910998134641, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-in-memory-20-2-20-10]": 0.006997692000368261, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-memcached-20-1-10-5]": 0.008193191000827937, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-memcached-20-1-20-10]": 0.010128158999577863, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-memcached-20-2-10-5]": 0.013161438002498471, + "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-memcached-20-2-20-10]": 0.009240641998985666, + "tests/utils/test_caching.py::test_repeated_training[disk]": 1.2679626049975923, + "tests/utils/test_caching.py::test_repeated_training[in-memory]": 0.8953080740011501, + "tests/utils/test_caching.py::test_repeated_training[memcached]": 1.0694843190012762, + "tests/utils/test_caching.py::test_single_job[disk]": 0.013467190003211726, + "tests/utils/test_caching.py::test_single_job[in-memory]": 0.007606943001519539, + "tests/utils/test_caching.py::test_single_job[memcached]": 0.012050191000525956, + "tests/utils/test_caching.py::test_without_pymemcache": 0.0068226680014049634, + "tests/utils/test_dataset.py::test_creating_dataset_from_sklearn[0.1]": 0.020587041000908357, + "tests/utils/test_dataset.py::test_creating_dataset_from_sklearn[0.5]": 0.00390724699900602, + "tests/utils/test_dataset.py::test_creating_dataset_from_sklearn[0.8]": 0.00450960899979691, + "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.1-kwargs0]": 0.0038753029984945897, + "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.1-kwargs1]": 0.0036959019998903386, + "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.5-kwargs0]": 0.0038168650007719407, + "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.5-kwargs1]": 0.0037348340010794345, + "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.8-kwargs0]": 0.003279165001004003, + "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.8-kwargs1]": 0.003160262998790131, + "tests/utils/test_dataset.py::test_creating_dataset_subsclassfrom_sklearn[0.1]": 0.00434540000060224, + "tests/utils/test_dataset.py::test_creating_dataset_subsclassfrom_sklearn[0.5]": 0.004031194001072436, + "tests/utils/test_dataset.py::test_creating_dataset_subsclassfrom_sklearn[0.8]": 0.0037131489989405964, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn[0.1]": 0.0038164179986779345, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn[0.5]": 0.005253569997876184, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn[0.8]": 0.005844020997756161, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn_failure[0.1]": 0.003921200999684515, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn_failure[0.5]": 0.0038101809986983426, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn_failure[0.8]": 0.00419950299874472, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.1-kwargs0]": 0.0037465159985003993, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.1-kwargs1]": 0.0037122550002095522, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.5-kwargs0]": 0.003727491999597987, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.5-kwargs1]": 0.00471100999857299, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.8-kwargs0]": 0.004182996997769806, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.8-kwargs1]": 0.004742823000924545, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays_failure[0.1]": 0.0034745570010272786, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays_failure[0.5]": 0.0029412200001388555, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays_failure[0.8]": 0.003702544998304802, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_subsclassfrom_sklearn[0.1]": 0.004099161000340246, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_subsclassfrom_sklearn[0.5]": 0.004149230999246356, + "tests/utils/test_dataset.py::test_creating_grouped_dataset_subsclassfrom_sklearn[0.8]": 0.004327725000621285, + "tests/utils/test_dataset.py::test_grouped_dataset_results": 0.005107523998958641, + "tests/utils/test_numeric.py::test_powerset": 0.003924966000340646, + "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[0-2-ValueError]": 0.003189409999322379, + "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[1-2-ValueError]": 0.0027116169985674787, + "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[10-1-ValueError]": 0.0027666500009218, + "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[10-2-None]": 0.0031559840026602615, + "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[2-10-None]": 0.004578909001793363, + "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[4--2-ValueError]": 0.0026738769993244205, + "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[7-23-None]": 0.0031755019990669098, + "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_reproducible[10-2]": 0.0032696249982109293, + "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_reproducible[2-10]": 0.0026570699992589653, + "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_reproducible[7-23]": 0.004022232000352233, + "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_stochastic[10-2]": 0.004431671000929782, + "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_stochastic[2-10]": 0.003217298000890878, + "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_stochastic[7-23]": 0.002955772999484907, + "tests/utils/test_numeric.py::test_random_powerset[0-1]": 0.003080959002545569, + "tests/utils/test_numeric.py::test_random_powerset[1-10]": 0.0025249310019717086, + "tests/utils/test_numeric.py::test_random_powerset[10-1024]": 0.012735986998450244, + "tests/utils/test_numeric.py::test_random_powerset[5-128]": 0.0035692419987753965, + "tests/utils/test_numeric.py::test_random_powerset_label_min[0-10-3-1000]": 0.171625541001049, + "tests/utils/test_numeric.py::test_random_powerset_label_min[1-10-3-1000]": 0.17619158500019694, + "tests/utils/test_numeric.py::test_random_powerset_label_min[2-10-3-1000]": 0.1760632390014507, + "tests/utils/test_numeric.py::test_random_powerset_reproducible[10-1024]": 0.018217550998087972, + "tests/utils/test_numeric.py::test_random_powerset_stochastic[10-1024]": 0.018896675997893908, + "tests/utils/test_numeric.py::test_random_subset_of_size[0-0-None]": 0.002780025000902242, + "tests/utils/test_numeric.py::test_random_subset_of_size[0-1-ValueError]": 0.0032452249997731997, + "tests/utils/test_numeric.py::test_random_subset_of_size[10-0-None]": 0.0033025680004357127, + "tests/utils/test_numeric.py::test_random_subset_of_size[10-3-None]": 0.002845983000952401, + "tests/utils/test_numeric.py::test_random_subset_of_size[1000-40-None]": 0.0032918939996307017, + "tests/utils/test_numeric.py::test_random_subset_of_size_stochastic[10-3]": 0.002797532002659864, + "tests/utils/test_numeric.py::test_random_subset_of_size_stochastic[1000-40]": 0.0036268280000513187, + "tests/utils/test_numeric.py::test_running_moments": 0.6145333489985205, + "tests/utils/test_parallel.py::test_chunkification[joblib-data0-3-expected_chunks0]": 0.015510658000494004, + "tests/utils/test_parallel.py::test_chunkification[joblib-data1-2-expected_chunks1]": 0.012093620000086958, + "tests/utils/test_parallel.py::test_chunkification[joblib-data2-2-expected_chunks2]": 0.011375399999451474, + "tests/utils/test_parallel.py::test_chunkification[joblib-data3-3-expected_chunks3]": 0.016111063001517323, + "tests/utils/test_parallel.py::test_chunkification[joblib-data4-5-expected_chunks4]": 0.02149817300050927, + "tests/utils/test_parallel.py::test_chunkification[joblib-data5-42-expected_chunks5]": 0.013197087000662577, + "tests/utils/test_parallel.py::test_chunkification[joblib-data6-42-expected_chunks6]": 0.017662769996604766, + "tests/utils/test_parallel.py::test_chunkification[joblib-data7-4-expected_chunks7]": 0.013664767000591382, + "tests/utils/test_parallel.py::test_chunkification[joblib-data8-4-expected_chunks8]": 0.0129568249994918, + "tests/utils/test_parallel.py::test_chunkification[ray-external-data0-3-expected_chunks0]": 0.02873299299972132, + "tests/utils/test_parallel.py::test_chunkification[ray-external-data1-2-expected_chunks1]": 0.037400651001007645, + "tests/utils/test_parallel.py::test_chunkification[ray-external-data2-2-expected_chunks2]": 0.04821507099950395, + "tests/utils/test_parallel.py::test_chunkification[ray-external-data3-3-expected_chunks3]": 0.03959165199921699, + "tests/utils/test_parallel.py::test_chunkification[ray-external-data4-5-expected_chunks4]": 0.030608711000240874, + "tests/utils/test_parallel.py::test_chunkification[ray-external-data5-42-expected_chunks5]": 0.026263547000780818, + "tests/utils/test_parallel.py::test_chunkification[ray-external-data6-42-expected_chunks6]": 0.01923054399958346, + "tests/utils/test_parallel.py::test_chunkification[ray-external-data7-4-expected_chunks7]": 0.020033368999065715, + "tests/utils/test_parallel.py::test_chunkification[ray-external-data8-4-expected_chunks8]": 0.019113988000754034, + "tests/utils/test_parallel.py::test_chunkification[ray-local-data0-3-expected_chunks0]": 0.022260648998781107, + "tests/utils/test_parallel.py::test_chunkification[ray-local-data1-2-expected_chunks1]": 0.02477619599994796, + "tests/utils/test_parallel.py::test_chunkification[ray-local-data2-2-expected_chunks2]": 0.037821603000338655, + "tests/utils/test_parallel.py::test_chunkification[ray-local-data3-3-expected_chunks3]": 0.0276968880007189, + "tests/utils/test_parallel.py::test_chunkification[ray-local-data4-5-expected_chunks4]": 0.03822717000184639, + "tests/utils/test_parallel.py::test_chunkification[ray-local-data5-42-expected_chunks5]": 0.03200487200047064, + "tests/utils/test_parallel.py::test_chunkification[ray-local-data6-42-expected_chunks6]": 0.02251517100012279, + "tests/utils/test_parallel.py::test_chunkification[ray-local-data7-4-expected_chunks7]": 0.02549016900047718, + "tests/utils/test_parallel.py::test_chunkification[ray-local-data8-4-expected_chunks8]": 0.016007507998438086, + "tests/utils/test_parallel.py::test_effective_n_jobs[joblib]": 0.005121522000990808, + "tests/utils/test_parallel.py::test_effective_n_jobs[ray-external]": 4.8416320709984575, + "tests/utils/test_parallel.py::test_effective_n_jobs[ray-local]": 6.68878685799973, + "tests/utils/test_parallel.py::test_future_cancellation[joblib]": 0.013322050999704516, + "tests/utils/test_parallel.py::test_future_cancellation[ray-external]": 6.1742852379975375, + "tests/utils/test_parallel.py::test_future_cancellation[ray-local]": 5.196579726998607, + "tests/utils/test_parallel.py::test_futures_executor_map[joblib]": 2.7167825960004848, + "tests/utils/test_parallel.py::test_futures_executor_map[ray-external]": 0.10519307000140543, + "tests/utils/test_parallel.py::test_futures_executor_map[ray-local]": 0.10775902599925757, + "tests/utils/test_parallel.py::test_futures_executor_map_with_max_workers[joblib]": 0.012954608999280026, + "tests/utils/test_parallel.py::test_futures_executor_map_with_max_workers[ray-external]": 1.1045504180019634, + "tests/utils/test_parallel.py::test_futures_executor_map_with_max_workers[ray-local]": 1.100314563000211, + "tests/utils/test_parallel.py::test_futures_executor_submit[joblib]": 3.2937196319981012, + "tests/utils/test_parallel.py::test_futures_executor_submit[ray-external]": 0.06437306899897521, + "tests/utils/test_parallel.py::test_futures_executor_submit[ray-local]": 0.05545763400186843, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-list-indices0-expected0]": 0.0033702880009514047, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-list-indices1-expected1]": 0.003624205000960501, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-list-indices2-expected2]": 0.0034593179989315104, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-numpy-indices4-45]": 0.003431146000366425, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-range-indices3-expected3]": 0.003291076000095927, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-list-indices0-expected0]": 0.0043230089995631715, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-list-indices1-expected1]": 0.014759305000552558, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-list-indices2-expected2]": 0.014669898000647663, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-numpy-indices4-45]": 0.014518962998408824, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-range-indices3-expected3]": 0.014446292998400168, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-list-indices0-expected0]": 0.16248785400057386, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-list-indices1-expected1]": 2.277719737998268, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-list-indices2-expected2]": 3.347688416000892, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-numpy-indices4-45]": 0.04604001000188873, + "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-range-indices3-expected3]": 0.057255595000242465, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-list-indices0-expected0]": 0.026082702997882734, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-list-indices1-expected1]": 0.023299047999898903, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-list-indices2-expected2]": 0.02191418300026271, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-numpy-indices4-45]": 0.02673473200047738, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-range-indices3-expected3]": 0.027526039999429486, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-list-indices0-expected0]": 3.4228467769989948, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-list-indices1-expected1]": 4.798353305001001, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-list-indices2-expected2]": 4.636959622999711, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-numpy-indices4-45]": 4.028821964997405, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-range-indices3-expected3]": 4.398552747999929, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-list-indices0-expected0]": 3.734075545000451, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-list-indices1-expected1]": 5.287959784998748, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-list-indices2-expected2]": 6.245923890002814, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-numpy-indices4-45]": 6.61028953999994, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-range-indices3-expected3]": 6.340780258999075, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-list-indices0-expected0]": 0.026392571999167558, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-list-indices1-expected1]": 0.0228169030015124, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-list-indices2-expected2]": 0.026224847002595197, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-numpy-indices4-45]": 0.02119264299835777, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-range-indices3-expected3]": 0.02678771700084326, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-list-indices0-expected0]": 2.813331847997688, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-list-indices1-expected1]": 4.129950463000569, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-list-indices2-expected2]": 4.1853057150001405, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-numpy-indices4-45]": 3.9139689650000946, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-range-indices3-expected3]": 4.066097430000809, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-list-indices0-expected0]": 3.626414754000507, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-list-indices1-expected1]": 5.354816800998378, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-list-indices2-expected2]": 6.589774920001219, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-numpy-indices4-45]": 6.373054822000995, + "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-range-indices3-expected3]": 6.71076984499814, + "tests/utils/test_parallel.py::test_map_reduce_job_partial_map_and_reduce_func[joblib]": 0.03710782099915377, + "tests/utils/test_parallel.py::test_map_reduce_job_partial_map_and_reduce_func[ray-external]": 6.3739082100000815, + "tests/utils/test_parallel.py::test_map_reduce_job_partial_map_and_reduce_func[ray-local]": 6.171818285998597, + "tests/utils/test_parallel.py::test_map_reduce_seeding[joblib-42-12]": 0.16202725999937684, + "tests/utils/test_parallel.py::test_map_reduce_seeding[ray-external-42-12]": 19.644846438999593, + "tests/utils/test_parallel.py::test_map_reduce_seeding[ray-local-42-12]": 19.494929903998127, + "tests/utils/test_parallel.py::test_wrap_function[joblib]": 0.010273419000441208, + "tests/utils/test_parallel.py::test_wrap_function[ray-external]": 4.3178896000026725, + "tests/utils/test_parallel.py::test_wrap_function[ray-local]": 4.386876819999088, + "tests/utils/test_score.py::test_compose_score": 0.003188072001648834, + "tests/utils/test_score.py::test_scorer": 0.006043704999683541, + "tests/utils/test_score.py::test_squashed_r2": 0.00286291600241384, + "tests/utils/test_score.py::test_squashed_variance": 0.002624727998409071, + "tests/utils/test_status.py::test_and_status": 0.0023914820012578275, + "tests/utils/test_status.py::test_not_status": 0.0024304439994011773, + "tests/utils/test_status.py::test_or_status": 0.0031306429991673212, "tests/utils/test_utility.py::test_cache[2-0-8]": 0.00677607100806199, - "tests/utils/test_utility.py::test_data_utility_learning_wrapper[10-2-0-8]": 0.004311377968406305, - "tests/utils/test_utility.py::test_data_utility_learning_wrapper[2-2-0-8]": 0.0040499519964214414, + "tests/utils/test_utility.py::test_data_utility_learning_wrapper[10-2-0-8]": 0.007143015998735791, + "tests/utils/test_utility.py::test_data_utility_learning_wrapper[2-2-0-8]": 0.008842511999318958, "tests/utils/test_utility.py::test_different_cache_signature[model_kwargs0-2-0-8]": 0.0038117940130177885, "tests/utils/test_utility.py::test_different_cache_signature[model_kwargs1-2-0-8]": 0.0034867670328821987, - "tests/utils/test_utility.py::test_utility_show_warnings[4-4-False]": 0.00734079402172938, - "tests/utils/test_utility.py::test_utility_show_warnings[4-4-True]": 0.007422954018693417, + "tests/utils/test_utility.py::test_different_utility_with_same_cache[2-0-8]": 0.00974041799963743, + "tests/utils/test_utility.py::test_utility_serialization[False-2-0-8]": 0.004299543001252459, + "tests/utils/test_utility.py::test_utility_serialization[True-2-0-8]": 0.004922002000967041, + "tests/utils/test_utility.py::test_utility_show_warnings[4-4-False]": 0.018307410000488744, + "tests/utils/test_utility.py::test_utility_show_warnings[4-4-True]": 0.009923514002366574, + "tests/utils/test_utility.py::test_utility_with_cache[2-0-8]": 0.010860190002858872, + "tests/value/least_core/test_common.py::test_lc_solve_problems[test_game0]": 6.664896995000163, "tests/value/least_core/test_common.py::test_lc_solve_problems[test_utility0]": 3.0655845460132696, + "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False--1-test_game0-0.1-128]": 0.10254659299971536, + "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False--1-test_game1-0.2-10000]": 0.95324419499957, "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False--1-test_utility0-0.1-128]": 0.05090764199849218, "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False--1-test_utility1-0.2-10000]": 0.39550038598827086, + "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False-1-test_game0-0.1-128]": 0.11610117799864383, + "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False-1-test_game1-0.2-10000]": 1.9240173660018627, "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False-1-test_utility0-0.1-128]": 0.054777625045971945, "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False-1-test_utility1-0.2-10000]": 0.7125970929628238, + "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True--1-test_game0-0.1-128]": 12.35835815199971, + "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True--1-test_game1-0.2-10000]": 1.27118392400007, "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True--1-test_utility0-0.1-128]": 6.515727574034827, "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True--1-test_utility1-0.2-10000]": 0.6112625639943872, + "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True-1-test_game0-0.1-128]": 0.13497778700002527, + "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True-1-test_game1-0.2-10000]": 2.054010283000025, "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True-1-test_utility0-0.1-128]": 0.07473104700329714, "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True-1-test_utility1-0.2-10000]": 0.7888634809933137, + "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_game0]": 0.056533884000600665, + "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_game1]": 0.05103961900022114, + "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_game2]": 0.04528383999968355, + "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_game3]": 0.04622581199873821, + "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_game4]": 0.04450138000174775, "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_utility0]": 0.024124946998199448, "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_utility1]": 0.02425819096970372, "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_utility2]": 0.023533977015176788, "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_utility3]": 0.023558928980492055, "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_utility4]": 0.024587185034761205, + "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_game0]": 0.055623405996811925, + "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_game1]": 0.05625994600086415, + "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_game2]": 0.05063546000201313, + "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_game3]": 0.05257723800241365, + "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_game4]": 0.055973189997530426, "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_utility0]": 0.025446541025303304, "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_utility1]": 0.026494102989090607, "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_utility2]": 0.02477889700094238, "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_utility3]": 0.026450325007317588, "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_utility4]": 0.026973432017257437, - "tests/value/loo/test_loo.py::test_loo[100]": 3.7793434759951197, - "tests/value/loo/test_loo.py::test_loo[10]": 3.8455980509752408, - "tests/value/shapley/test_classwise.py::test_classwise_scorer_accuracies_left_right_margins[101-0.3-0.4]": 0.004718418029369786, - "tests/value/shapley/test_classwise.py::test_classwise_scorer_accuracies_manual_derivation": 0.022209248010767624, - "tests/value/shapley/test_classwise.py::test_classwise_scorer_is_symmetric[101-0.3-0.4]": 0.0053302829910535365, - "tests/value/shapley/test_classwise.py::test_classwise_scorer_representation": 0.002573036035755649, - "tests/value/shapley/test_classwise.py::test_classwise_scorer_utility[101-0.3-0.4]": 0.00688477698713541, - "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution-n_resample_complement_sets=1-n_samples=500]": 6.088012945023365, - "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution_no_default-n_resample_complement_sets=1-n_samples=500]": 6.90557194603025, - "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution_no_default_allow_empty_set-n_resample_complement_sets=1-n_samples=500]": 6.456796451995615, - "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution_normalized-n_resample_complement_sets=1-n_samples=500]": 5.917300594970584, - "tests/value/shapley/test_classwise.py::test_closed_form_linear_classifier": 0.004191815009107813, - "tests/value/shapley/test_knn.py::test_knn_montecarlo_match": 6.380129672033945, + "tests/value/loo/test_loo.py::test_loo[100]": 6.34605625200129, + "tests/value/loo/test_loo.py::test_loo[10]": 6.683512068999335, + "tests/value/shapley/test_classwise.py::test_classwise_scorer_accuracies_left_right_margins[101-0.3-0.4]": 0.014495325998723274, + "tests/value/shapley/test_classwise.py::test_classwise_scorer_accuracies_manual_derivation": 0.059531668999625253, + "tests/value/shapley/test_classwise.py::test_classwise_scorer_is_symmetric[101-0.3-0.4]": 0.017718389000947354, + "tests/value/shapley/test_classwise.py::test_classwise_scorer_representation": 0.00893844900019758, + "tests/value/shapley/test_classwise.py::test_classwise_scorer_utility[101-0.3-0.4]": 0.02120917100182851, + "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution-n_resample_complement_sets=1-n_samples=500]": 11.03723036699921, + "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution_no_default-n_resample_complement_sets=1-n_samples=500]": 12.916025546999663, + "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution_no_default_allow_empty_set-n_resample_complement_sets=1-n_samples=500]": 12.068119810999633, + "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution_normalized-n_resample_complement_sets=1-n_samples=500]": 10.891289137000058, + "tests/value/shapley/test_classwise.py::test_closed_form_linear_classifier": 0.01344082000105118, + "tests/value/shapley/test_knn.py::test_knn_montecarlo_match": 11.906123751998166, "tests/value/shapley/test_montecarlo.py::test_analytic_montecarlo_shapley[12-owen-0.1-0.0001-kwargs2]": 0.6999966300209053, "tests/value/shapley/test_montecarlo.py::test_analytic_montecarlo_shapley[12-owen_antithetic-0.1-0.0001-kwargs3]": 1.3923712590476498, "tests/value/shapley/test_montecarlo.py::test_analytic_montecarlo_shapley[12-permutation_montecarlo-0.1-1e-05-kwargs0]": 4.533932764985366, "tests/value/shapley/test_montecarlo.py::test_analytic_montecarlo_shapley[3-group_testing-0.1-0.01-kwargs4]": 2.874565462989267, "tests/value/shapley/test_montecarlo.py::test_analytic_montecarlo_shapley[8-combinatorial_montecarlo-0.2-0.0001-kwargs1]": 4.175152084033471, - "tests/value/shapley/test_montecarlo.py::test_grouped_linear_montecarlo_shapley[permutation_montecarlo-kwargs0-scorer0-0.1-2-0-21-2]": 5.129105891013751, - "tests/value/shapley/test_montecarlo.py::test_hoeffding_bound_montecarlo[combinatorial_montecarlo-6-0.1-0.1]": 4.910673014004715, - "tests/value/shapley/test_montecarlo.py::test_hoeffding_bound_montecarlo[permutation_montecarlo-6-0.1-0.1]": 52.25644952899893, + "tests/value/shapley/test_montecarlo.py::test_games[combinatorial_montecarlo-0.2-0.0001-kwargs1-test_game0]": 8.304236846999629, + "tests/value/shapley/test_montecarlo.py::test_games[combinatorial_montecarlo-0.2-0.0001-kwargs1-test_game1]": 8.651754697999422, + "tests/value/shapley/test_montecarlo.py::test_games[group_testing-0.1-0.01-kwargs4-test_game0]": 4.506434214001274, + "tests/value/shapley/test_montecarlo.py::test_games[group_testing-0.1-0.01-kwargs4-test_game1]": 5.184473866002008, + "tests/value/shapley/test_montecarlo.py::test_games[owen-0.2-0.0001-kwargs2-test_game0]": 0.695304662000126, + "tests/value/shapley/test_montecarlo.py::test_games[owen-0.2-0.0001-kwargs2-test_game1]": 0.754036617001475, + "tests/value/shapley/test_montecarlo.py::test_games[owen_antithetic-0.1-0.0001-kwargs3-test_game0]": 1.3446016939979017, + "tests/value/shapley/test_montecarlo.py::test_games[owen_antithetic-0.1-0.0001-kwargs3-test_game1]": 1.7906026460022986, + "tests/value/shapley/test_montecarlo.py::test_games[permutation_montecarlo-0.2-0.0001-kwargs0-test_game0]": 9.640759977000926, + "tests/value/shapley/test_montecarlo.py::test_games[permutation_montecarlo-0.2-0.0001-kwargs0-test_game1]": 9.149135870000464, + "tests/value/shapley/test_montecarlo.py::test_grouped_linear_montecarlo_shapley[permutation_montecarlo-kwargs0-scorer0-0.1-2-0-21-2]": 11.869230333000814, + "tests/value/shapley/test_montecarlo.py::test_hoeffding_bound_montecarlo[combinatorial_montecarlo-6-0.1-0.1]": 12.159375920000457, + "tests/value/shapley/test_montecarlo.py::test_hoeffding_bound_montecarlo[permutation_montecarlo-6-0.1-0.1]": 121.21386299999904, "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_shapley[combinatorial_montecarlo-kwargs1-scorer0-0.25-2-0-21]": 17.78464582102606, "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_shapley[group_testing-kwargs4-scorer0-0.25-2-0-21]": 29.239474696019897, "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_shapley[owen-kwargs2-scorer0-0.25-2-0-21]": 4.124498174991459, "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_shapley[owen_antithetic-kwargs3-scorer0-0.25-2-0-21]": 7.887545032019261, "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_shapley[permutation_montecarlo-kwargs0-scorer0-0.25-2-0-21]": 5.8485472809989005, - "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[group_testing-kwargs3-scorer0-0.2-2-0-21]": 30.232708652998554, - "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[owen-kwargs1-scorer0-0.2-2-0-21]": 13.355578221991891, - "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[owen_antithetic-kwargs2-scorer0-0.2-2-0-21]": 20.621750775026157, - "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[permutation_montecarlo-kwargs0-scorer0-0.2-2-0-21]": 5.888187222008128, + "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[group_testing-kwargs3-scorer0-0.2-2-0-21]": 105.57146695700249, + "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[owen-kwargs1-scorer0-0.2-2-0-21]": 46.293949323999186, + "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[owen_antithetic-kwargs2-scorer0-0.2-2-0-21]": 75.77437868900051, + "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[permutation_montecarlo-kwargs0-scorer0-0.2-2-0-21]": 14.84272324000085, "tests/value/shapley/test_montecarlo.py::test_montecarlo_shapley_housing_dataset[12-3-12-combinatorial_montecarlo-kwargs0]": 0.16786966001382098, "tests/value/shapley/test_montecarlo.py::test_montecarlo_shapley_housing_dataset[12-3-12-owen-kwargs1]": 17.011920137971174, "tests/value/shapley/test_montecarlo.py::test_montecarlo_shapley_housing_dataset[12-3-12-owen_antithetic-kwargs2]": 35.88025256394758, "tests/value/shapley/test_montecarlo.py::test_montecarlo_shapley_housing_dataset[12-3-4-group_testing-kwargs3]": 0.25901710899779573, + "tests/value/shapley/test_montecarlo.py::test_seed[combinatorial_montecarlo-kwargs0-test_game0]": 0.10237690700159874, + "tests/value/shapley/test_montecarlo.py::test_seed[group_testing-kwargs3-test_game0]": 1.3946212869996089, + "tests/value/shapley/test_montecarlo.py::test_seed[owen-kwargs1-test_game0]": 2.984055114999137, + "tests/value/shapley/test_montecarlo.py::test_seed[owen_antithetic-kwargs2-test_game0]": 5.702334433002761, "tests/value/shapley/test_naive.py::test_analytic_exact_shapley[12-combinatorial_exact_shapley-0.01-1e-05]": 2.798590613005217, "tests/value/shapley/test_naive.py::test_analytic_exact_shapley[6-permutation_exact_shapley-0.01-1e-05]": 0.34537768000154756, - "tests/value/shapley/test_naive.py::test_grouped_linear[2-0-50-3-r2]": 0.057835308980429545, - "tests/value/shapley/test_naive.py::test_grouped_linear[2-1-100-5-explained_variance]": 1.2154581100330688, - "tests/value/shapley/test_naive.py::test_grouped_linear[2-1-100-5-r2]": 1.1950475970224943, + "tests/value/shapley/test_naive.py::test_games[combinatorial_exact_shapley-test_game0-0.1-1e-05]": 0.035801175001324737, + "tests/value/shapley/test_naive.py::test_games[combinatorial_exact_shapley-test_game1-0.1-1e-05]": 0.020296718997997232, + "tests/value/shapley/test_naive.py::test_games[combinatorial_exact_shapley-test_game2-0.1-1e-05]": 0.026713223998740432, + "tests/value/shapley/test_naive.py::test_games[combinatorial_exact_shapley-test_game3-0.1-1e-05]": 0.024250888998722075, + "tests/value/shapley/test_naive.py::test_games[combinatorial_exact_shapley-test_game4-0.1-1e-05]": 0.08578255800057377, + "tests/value/shapley/test_naive.py::test_games[permutation_exact_shapley-test_game0-0.1-1e-05]": 0.0334680340019986, + "tests/value/shapley/test_naive.py::test_games[permutation_exact_shapley-test_game1-0.1-1e-05]": 0.02248540199798299, + "tests/value/shapley/test_naive.py::test_games[permutation_exact_shapley-test_game2-0.1-1e-05]": 0.021266358000502805, + "tests/value/shapley/test_naive.py::test_games[permutation_exact_shapley-test_game3-0.1-1e-05]": 0.02347195299989835, + "tests/value/shapley/test_naive.py::test_games[permutation_exact_shapley-test_game4-0.1-1e-05]": 0.9876527700016595, + "tests/value/shapley/test_naive.py::test_grouped_linear[2-0-50-3-r2]": 0.19884431500031496, + "tests/value/shapley/test_naive.py::test_grouped_linear[2-1-100-5-explained_variance]": 3.9915946569999505, + "tests/value/shapley/test_naive.py::test_grouped_linear[2-1-100-5-r2]": 3.9885682109998015, "tests/value/shapley/test_naive.py::test_linear[2-0-10-r2]": 0.05533879197901115, "tests/value/shapley/test_naive.py::test_linear[2-1-10-explained_variance]": 0.058987755968701094, "tests/value/shapley/test_naive.py::test_linear[2-1-10-neg_median_absolute_error]": 0.05515471697435714, "tests/value/shapley/test_naive.py::test_linear[2-1-10-r2]": 0.05683578198659234, - "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-0-20-r2]": 7.4271527160017285, - "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-1-20-explained_variance]": 7.752014733996475, - "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-1-20-neg_median_absolute_error]": 7.2494586749817245, - "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-1-20-r2]": 7.528596303978702, - "tests/value/shapley/test_naive.py::test_polynomial[coefficients0-r2]": 0.10091358600766398, - "tests/value/shapley/test_naive.py::test_polynomial[coefficients1-neg_median_absolute_error]": 0.09756919997744262, - "tests/value/shapley/test_naive.py::test_polynomial[coefficients2-explained_variance]": 0.10092617000918835, - "tests/value/shapley/test_naive.py::test_polynomial_with_outlier[coefficients0-r2]": 0.05707916300161742, - "tests/value/shapley/test_naive.py::test_polynomial_with_outlier[coefficients1-neg_median_absolute_error]": 0.058802402985747904, - "tests/value/shapley/test_naive.py::test_polynomial_with_outlier[coefficients2-explained_variance]": 0.06408755297889002, + "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-0-20-r2]": 25.743576199000017, + "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-1-20-explained_variance]": 26.06965675200081, + "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-1-20-neg_median_absolute_error]": 25.645237798999005, + "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-1-20-r2]": 25.97635805399841, + "tests/value/shapley/test_naive.py::test_polynomial[coefficients0-r2]": 0.20116403300016827, + "tests/value/shapley/test_naive.py::test_polynomial[coefficients1-neg_median_absolute_error]": 0.20279847600068024, + "tests/value/shapley/test_naive.py::test_polynomial[coefficients2-explained_variance]": 0.20646126699830347, + "tests/value/shapley/test_naive.py::test_polynomial_with_outlier[coefficients0-r2]": 0.15503699600049003, + "tests/value/shapley/test_naive.py::test_polynomial_with_outlier[coefficients1-neg_median_absolute_error]": 0.15186486699894886, + "tests/value/shapley/test_naive.py::test_polynomial_with_outlier[coefficients2-explained_variance]": 0.1560443580001447, + "tests/value/shapley/test_truncated.py::test_games[done0-NoTruncation-truncation_kwargs0-test_game0]": 8.864981821001493, + "tests/value/shapley/test_truncated.py::test_games[done0-NoTruncation-truncation_kwargs0-test_game1]": 8.904717276998781, + "tests/value/shapley/test_truncated.py::test_games[done1-FixedTruncation-truncation_kwargs1-test_game0]": 8.893666212001335, + "tests/value/shapley/test_truncated.py::test_games[done1-FixedTruncation-truncation_kwargs1-test_game1]": 8.871429693997925, "tests/value/shapley/test_truncated.py::test_tmcs_analytic_montecarlo_shapley[12-truncated_montecarlo-0.1-1e-05-kwargs0]": 5.025441929989029, "tests/value/shapley/test_truncated.py::test_tmcs_linear_montecarlo_shapley[truncated_montecarlo-kwargs0-scorer0-0.25-2-0-21]": 5.633914494974306, - "tests/value/shapley/test_truncated.py::test_tmcs_linear_montecarlo_with_outlier[truncated_montecarlo-kwargs0-scorer0-0.2-2-0-21]": 3.523623990971828, - "tests/value/test_sampler.py::test_chunkify[AntitheticSampler]": 0.0012030639918521047, - "tests/value/test_sampler.py::test_chunkify[DeterministicUniformSampler]": 0.0011419990041758865, - "tests/value/test_sampler.py::test_chunkify[RandomHierarchicalSampler]": 0.0011900250101462007, - "tests/value/test_sampler.py::test_chunkify[UniformSampler]": 0.0013321389851626009, - "tests/value/test_sampler.py::test_chunkify_permutation[DeterministicPermutationSampler]": 0.0010862670314963907, - "tests/value/test_sampler.py::test_chunkify_permutation[PermutationSampler]": 0.001125522016081959, - "tests/value/test_sampler.py::test_proper[indices0-AntitheticSampler]": 0.0011964229634031653, - "tests/value/test_sampler.py::test_proper[indices0-DeterministicPermutationSampler]": 0.0013584279513452202, - "tests/value/test_sampler.py::test_proper[indices0-DeterministicUniformSampler]": 0.0013845030043739825, - "tests/value/test_sampler.py::test_proper[indices0-PermutationSampler]": 0.0012692750024143606, - "tests/value/test_sampler.py::test_proper[indices0-RandomHierarchicalSampler]": 0.0011780599888879806, - "tests/value/test_sampler.py::test_proper[indices0-UniformSampler]": 0.0012423349835444242, - "tests/value/test_sampler.py::test_proper[indices1-AntitheticSampler]": 0.001568679028423503, - "tests/value/test_sampler.py::test_proper[indices1-DeterministicPermutationSampler]": 0.0013892220158595592, - "tests/value/test_sampler.py::test_proper[indices1-DeterministicUniformSampler]": 0.0014415960176847875, - "tests/value/test_sampler.py::test_proper[indices1-PermutationSampler]": 0.0012552720145322382, - "tests/value/test_sampler.py::test_proper[indices1-RandomHierarchicalSampler]": 0.0017029709706548601, - "tests/value/test_sampler.py::test_proper[indices1-UniformSampler]": 0.0015911830123513937, - "tests/value/test_sampler.py::test_proper_reproducible[indices0-AntitheticSampler]": 0.0014955719816498458, - "tests/value/test_sampler.py::test_proper_reproducible[indices0-PermutationSampler]": 0.0017780059715732932, - "tests/value/test_sampler.py::test_proper_reproducible[indices0-RandomHierarchicalSampler]": 0.0015286150155588984, - "tests/value/test_sampler.py::test_proper_reproducible[indices0-UniformSampler]": 0.0013392769906204194, - "tests/value/test_sampler.py::test_proper_reproducible[indices1-AntitheticSampler]": 0.005814862961415201, - "tests/value/test_sampler.py::test_proper_reproducible[indices1-PermutationSampler]": 0.0022604400001000613, - "tests/value/test_sampler.py::test_proper_reproducible[indices1-RandomHierarchicalSampler]": 0.01281771101639606, - "tests/value/test_sampler.py::test_proper_reproducible[indices1-UniformSampler]": 0.006939170008990914, - "tests/value/test_sampler.py::test_proper_stochastic[indices0-AntitheticSampler]": 0.001301849988522008, - "tests/value/test_sampler.py::test_proper_stochastic[indices0-PermutationSampler]": 0.0013378779985941947, - "tests/value/test_sampler.py::test_proper_stochastic[indices0-RandomHierarchicalSampler]": 0.0014513320056721568, - "tests/value/test_sampler.py::test_proper_stochastic[indices0-UniformSampler]": 0.0014353079604916275, - "tests/value/test_sampler.py::test_proper_stochastic[indices1-AntitheticSampler]": 0.006029498006682843, - "tests/value/test_sampler.py::test_proper_stochastic[indices1-PermutationSampler]": 0.0019644349522423, - "tests/value/test_sampler.py::test_proper_stochastic[indices1-RandomHierarchicalSampler]": 0.012361108005279675, - "tests/value/test_sampler.py::test_proper_stochastic[indices1-UniformSampler]": 0.006347205984639004, - "tests/value/test_semivalues.py::test_banzhaf[AntitheticPermutationSampler-5]": 10.714197647990659, - "tests/value/test_semivalues.py::test_banzhaf[AntitheticSampler-5]": 4.695468286023242, - "tests/value/test_semivalues.py::test_banzhaf[DeterministicPermutationSampler-5]": 6.074063064996153, - "tests/value/test_semivalues.py::test_banzhaf[DeterministicUniformSampler-5]": 4.212341544014635, - "tests/value/test_semivalues.py::test_banzhaf[PermutationSampler-5]": 8.149094285006868, - "tests/value/test_semivalues.py::test_banzhaf[UniformSampler-5]": 4.764893947984092, - "tests/value/test_semivalues.py::test_coefficients[banzhaf_coefficient-100]": 0.003842581994831562, - "tests/value/test_semivalues.py::test_coefficients[banzhaf_coefficient-10]": 0.0032151709601748735, - "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w0-100]": 0.004444399964995682, - "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w0-10]": 0.003756532969418913, - "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w1-100]": 0.004344976012362167, - "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w1-10]": 0.003551592002622783, - "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w2-100]": 0.004556107014650479, - "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w2-10]": 0.0035066070267930627, - "tests/value/test_semivalues.py::test_coefficients[shapley_coefficient-100]": 0.0047601540281903, - "tests/value/test_semivalues.py::test_coefficients[shapley_coefficient-10]": 0.0030498180130962282, + "tests/value/shapley/test_truncated.py::test_tmcs_linear_montecarlo_with_outlier[truncated_montecarlo-kwargs0-scorer0-0.2-2-0-21]": 7.1438663650005765, + "tests/value/test_sampler.py::test_chunkify[AntitheticSampler]": 0.002635386001202278, + "tests/value/test_sampler.py::test_chunkify[DeterministicUniformSampler]": 0.002136322002115776, + "tests/value/test_sampler.py::test_chunkify[RandomHierarchicalSampler]": 0.0024412720013060607, + "tests/value/test_sampler.py::test_chunkify[UniformSampler]": 0.0022573409987671766, + "tests/value/test_sampler.py::test_chunkify_permutation[DeterministicPermutationSampler]": 0.0024367070000153035, + "tests/value/test_sampler.py::test_chunkify_permutation[PermutationSampler]": 0.002322892001757282, + "tests/value/test_sampler.py::test_proper[indices0-AntitheticSampler]": 0.003334062997964793, + "tests/value/test_sampler.py::test_proper[indices0-DeterministicPermutationSampler]": 0.002626270001201192, + "tests/value/test_sampler.py::test_proper[indices0-DeterministicUniformSampler]": 0.002812078997521894, + "tests/value/test_sampler.py::test_proper[indices0-PermutationSampler]": 0.002539194001656142, + "tests/value/test_sampler.py::test_proper[indices0-RandomHierarchicalSampler]": 0.0026362519984104438, + "tests/value/test_sampler.py::test_proper[indices0-UniformSampler]": 0.0024412409984506667, + "tests/value/test_sampler.py::test_proper[indices1-AntitheticSampler]": 0.0027277339995634975, + "tests/value/test_sampler.py::test_proper[indices1-DeterministicPermutationSampler]": 0.002861182998458389, + "tests/value/test_sampler.py::test_proper[indices1-DeterministicUniformSampler]": 0.004058188998897094, + "tests/value/test_sampler.py::test_proper[indices1-PermutationSampler]": 0.0026329000011173775, + "tests/value/test_sampler.py::test_proper[indices1-RandomHierarchicalSampler]": 0.003700332001244533, + "tests/value/test_sampler.py::test_proper[indices1-UniformSampler]": 0.003620775998570025, + "tests/value/test_sampler.py::test_proper_reproducible[indices0-AntitheticSampler]": 0.0028454019975470146, + "tests/value/test_sampler.py::test_proper_reproducible[indices0-PermutationSampler]": 0.0024918920007621637, + "tests/value/test_sampler.py::test_proper_reproducible[indices0-RandomHierarchicalSampler]": 0.0020272490019124234, + "tests/value/test_sampler.py::test_proper_reproducible[indices0-UniformSampler]": 0.0027337100000295322, + "tests/value/test_sampler.py::test_proper_reproducible[indices1-AntitheticSampler]": 0.009103345002586138, + "tests/value/test_sampler.py::test_proper_reproducible[indices1-PermutationSampler]": 0.003312619001007988, + "tests/value/test_sampler.py::test_proper_reproducible[indices1-RandomHierarchicalSampler]": 0.017666732001089258, + "tests/value/test_sampler.py::test_proper_reproducible[indices1-UniformSampler]": 0.010906160998274572, + "tests/value/test_sampler.py::test_proper_stochastic[indices0-AntitheticSampler]": 0.0025616729999455856, + "tests/value/test_sampler.py::test_proper_stochastic[indices0-PermutationSampler]": 0.0034559460000309628, + "tests/value/test_sampler.py::test_proper_stochastic[indices0-RandomHierarchicalSampler]": 0.0029194710004958324, + "tests/value/test_sampler.py::test_proper_stochastic[indices0-UniformSampler]": 0.0028906579991598846, + "tests/value/test_sampler.py::test_proper_stochastic[indices1-AntitheticSampler]": 0.011208809999516234, + "tests/value/test_sampler.py::test_proper_stochastic[indices1-PermutationSampler]": 0.003227124001568882, + "tests/value/test_sampler.py::test_proper_stochastic[indices1-RandomHierarchicalSampler]": 0.020847252999374177, + "tests/value/test_sampler.py::test_proper_stochastic[indices1-UniformSampler]": 0.01049548499941011, + "tests/value/test_semivalues.py::test_banzhaf[AntitheticPermutationSampler-5]": 19.099751196999932, + "tests/value/test_semivalues.py::test_banzhaf[AntitheticSampler-5]": 8.640272729999197, + "tests/value/test_semivalues.py::test_banzhaf[DeterministicPermutationSampler-5]": 11.046467014999507, + "tests/value/test_semivalues.py::test_banzhaf[DeterministicUniformSampler-5]": 7.140763282997796, + "tests/value/test_semivalues.py::test_banzhaf[PermutationSampler-5]": 16.536335553000754, + "tests/value/test_semivalues.py::test_banzhaf[UniformSampler-5]": 8.56469571100206, + "tests/value/test_semivalues.py::test_coefficients[banzhaf_coefficient-100]": 0.01005963700117718, + "tests/value/test_semivalues.py::test_coefficients[banzhaf_coefficient-10]": 0.008440342002359102, + "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w0-100]": 0.01072616300007212, + "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w0-10]": 0.010928496998531045, + "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w1-100]": 0.012177771001006477, + "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w1-10]": 0.00821317400004773, + "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w2-100]": 0.010945971000182908, + "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w2-10]": 0.008208530998672359, + "tests/value/test_semivalues.py::test_coefficients[shapley_coefficient-100]": 0.019708362000528723, + "tests/value/test_semivalues.py::test_coefficients[shapley_coefficient-10]": 0.007813238997187, + "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-AntitheticPermutationSampler-test_game0]": 22.607437191998542, + "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-AntitheticPermutationSampler-test_game1]": 19.905466008996882, + "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-AntitheticSampler-test_game0]": 22.902231953998125, + "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-AntitheticSampler-test_game1]": 20.254530511001576, + "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-PermutationSampler-test_game0]": 22.228997524001898, + "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-PermutationSampler-test_game1]": 19.948070817999906, + "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-UniformSampler-test_game0]": 23.313307015001556, + "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-UniformSampler-test_game1]": 20.214418551000563, + "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-AntitheticPermutationSampler-test_game0]": 16.339908187999754, + "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-AntitheticPermutationSampler-test_game1]": 14.935287896998489, + "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-AntitheticSampler-test_game0]": 16.71660759900078, + "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-AntitheticSampler-test_game1]": 15.118247157999576, + "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-PermutationSampler-test_game0]": 16.669900056000188, + "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-PermutationSampler-test_game1]": 14.85890512199876, + "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-UniformSampler-test_game0]": 16.9996823649999, + "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-UniformSampler-test_game1]": 15.419395829998393, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicPermutationSampler-test_game0]": 7.571815403000073, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicPermutationSampler-test_game1]": 6.795873736999056, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicPermutationSampler-test_game2]": 6.49785933900057, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicPermutationSampler-test_game3]": 7.046587265998824, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicUniformSampler-test_game0]": 6.9995765299991035, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicUniformSampler-test_game1]": 7.470778629000051, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicUniformSampler-test_game2]": 6.813381661997482, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicUniformSampler-test_game3]": 7.335269874001824, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicPermutationSampler-test_game0]": 8.675189851999676, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicPermutationSampler-test_game1]": 6.932035337997149, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicPermutationSampler-test_game2]": 6.9341853499990975, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicPermutationSampler-test_game3]": 6.737996050998845, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicUniformSampler-test_game0]": 4.491834778002158, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicUniformSampler-test_game1]": 6.446436399000959, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicUniformSampler-test_game2]": 6.968900550000399, + "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicUniformSampler-test_game3]": 6.659720210998785, + "tests/value/test_semivalues.py::test_marginal_batch_size[PermutationSampler-beta_coefficient_w-5-test_game0]": 0.004239154999595485, "tests/value/test_semivalues.py::test_shapley[beta_coefficient_w-AntitheticPermutationSampler-5]": 5.1298250389809255, "tests/value/test_semivalues.py::test_shapley[beta_coefficient_w-AntitheticSampler-5]": 21.97495059997891, "tests/value/test_semivalues.py::test_shapley[beta_coefficient_w-DeterministicPermutationSampler-5]": 5.294114143965999, @@ -460,18 +835,20 @@ "tests/value/test_semivalues.py::test_shapley[shapley_coefficient-DeterministicUniformSampler-5]": 3.263753114035353, "tests/value/test_semivalues.py::test_shapley[shapley_coefficient-PermutationSampler-5]": 4.766259174008155, "tests/value/test_semivalues.py::test_shapley[shapley_coefficient-UniformSampler-5]": 8.919797526003094, + "tests/value/test_semivalues.py::test_shapley_batch_size[1-PermutationSampler-beta_coefficient_w-5-test_game0]": 9.699354351001602, + "tests/value/test_semivalues.py::test_shapley_batch_size[2-PermutationSampler-beta_coefficient_w-5-test_game0]": 11.229309665000983, "tests/value/test_semivalues.py::test_shapley_batch_size[5-PermutationSampler-beta_coefficient_w-5]": 9.19877936199191, - "tests/value/test_stopping.py::test_history_deviation[0.01-100]": 0.7586702810076531, - "tests/value/test_stopping.py::test_history_deviation[0.01-1]": 0.01646678801625967, - "tests/value/test_stopping.py::test_history_deviation[0.01-42]": 0.35505866500898264, - "tests/value/test_stopping.py::test_history_deviation[0.05-100]": 0.15892104100203142, - "tests/value/test_stopping.py::test_history_deviation[0.05-1]": 0.003904131968738511, - "tests/value/test_stopping.py::test_history_deviation[0.05-42]": 0.06365110300248489, - "tests/value/test_stopping.py::test_make_criterion": 0.0067943750182166696, - "tests/value/test_stopping.py::test_max_checks": 0.0022287879837676883, - "tests/value/test_stopping.py::test_max_time": 0.30431480798870325, - "tests/value/test_stopping.py::test_minmax_updates": 0.003805230953730643, - "tests/value/test_stopping.py::test_standard_error": 0.003371614031493664, - "tests/value/test_stopping.py::test_stopping_criterion": 0.004461375967366621, - "tests/value/test_stopping.py::test_stopping_criterion_composition": 0.007468684023479 + "tests/value/test_stopping.py::test_history_deviation[0.01-100]": 1.7738857549993554, + "tests/value/test_stopping.py::test_history_deviation[0.01-1]": 0.029810868998538353, + "tests/value/test_stopping.py::test_history_deviation[0.01-42]": 0.7947784120024153, + "tests/value/test_stopping.py::test_history_deviation[0.05-100]": 0.3636526160007634, + "tests/value/test_stopping.py::test_history_deviation[0.05-1]": 0.010319109000192839, + "tests/value/test_stopping.py::test_history_deviation[0.05-42]": 0.16107529900000372, + "tests/value/test_stopping.py::test_make_criterion": 0.016543962998184725, + "tests/value/test_stopping.py::test_max_checks": 0.006280684001467307, + "tests/value/test_stopping.py::test_max_time": 0.30847623600129737, + "tests/value/test_stopping.py::test_minmax_updates": 0.012927236997711589, + "tests/value/test_stopping.py::test_standard_error": 0.007960140001159743, + "tests/value/test_stopping.py::test_stopping_criterion": 0.011265246001130436, + "tests/value/test_stopping.py::test_stopping_criterion_composition": 0.019021763000637293 } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index b3e09e30d..7b895d234 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # Changelog +## 0.8.1 - 🆕 🏗 New method and noteboo, Games with exact shapley values, bug fixes and cleanup + +### Added + +- Implement new method: `EkfacInfluence` + [PR #451](https://github.com/aai-institute/pyDVL/issues/451) +- New notebook to showcase ekfac for LLMs + [PR #483](https://github.com/aai-institute/pyDVL/pull/483) +- Implemented exact games in Castro et al. 2009 and 2017 + [PR #341](https://github.com/appliedAI-Initiative/pyDVL/pull/341) + +### Fixed + +- Bug in using `DaskInfluenceCalcualator` with `TorchnumpyConverter` + for single dimensional arrays [PR #485](https://github.com/aai-institute/pyDVL/pull/485) +- Fix implementations of `to` methods of `TorchInfluenceFunctionModel` implementations + [PR #487](https://github.com/aai-institute/pyDVL/pull/487) +- Fixed bug with checking for converged values in semivalues + [PR #341](https://github.com/appliedAI-Initiative/pyDVL/pull/341) + +### Docs + +- Add applications of data valuation section, display examples more prominently, + make all sections visible in table of contents, use mkdocs material cards + in the home page [PR #492](https://github.com/aai-institute/pyDVL/pull/492) + ## 0.8.0 - 🆕 New interfaces, scaling computation, bug fixes and improvements 🎁 ### Added diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 198c7ded3..ad8038705 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -23,7 +23,7 @@ to make your life easier. Run the following to set up the pre-commit git hook to run before pushes: -```shell script +```shell pre-commit install --hook-type pre-push ``` @@ -32,7 +32,7 @@ pre-commit install --hook-type pre-push We strongly suggest using some form of virtual environment for working with the library. E.g. with venv: -```shell script +```shell python -m venv ./venv . venv/bin/activate # `venv\Scripts\activate` in windows pip install -r requirements-dev.txt -r requirements-docs.txt @@ -40,7 +40,7 @@ pip install -r requirements-dev.txt -r requirements-docs.txt With conda: -```shell script +```shell conda create -n pydvl python=3.8 conda activate pydvl pip install -r requirements-dev.txt -r requirements-docs.txt @@ -49,7 +49,7 @@ pip install -r requirements-dev.txt -r requirements-docs.txt A very convenient way of working with your library during development is to install it in editable mode into your environment by running -```shell script +```shell pip install -e . ``` @@ -58,7 +58,7 @@ suite) [pandoc](https://pandoc.org/) is required. Except for OSX, it should be i automatically as a dependency with `requirements-docs.txt`. Under OSX you can install pandoc (you'll need at least version 2.11) with: -```shell script +```shell brew install pandoc ``` @@ -152,11 +152,11 @@ Two important markers are: To test the notebooks separately, run (see [below](#notebooks) for details): ```shell -tox -e tests -- notebooks/ +tox -e notebook-tests ``` To create a package locally, run: -```shell script +```shell python setup.py sdist bdist_wheel ``` @@ -343,8 +343,12 @@ runs](#skipping-ci-runs)). 3. We split the tests based on their duration into groups and run them in parallel. For that we use [pytest-split](https://jerry-git.github.io/pytest-split) - to first store the duration of all tests with `pytest --store-durations pytest --slow-tests` + to first store the duration of all tests with + `tox -e tests -- --store-durations --slow-tests` in a `.test_durations` file. + + Alternatively, we case use pytest directly + `pytest --store-durations --slow-tests`. > **Note** This does not have to be done each time a new test or test case > is added. For new tests and test cases pytes-split assumes @@ -359,11 +363,14 @@ runs](#skipping-ci-runs)). Then we can have as many splits as we want: ```shell - pytest --splits 3 --group 1 - pytest --splits 3 --group 2 - pytest --splits 3 --group 3 + tox -e tests -- --splits 3 --group 1 + tox -e tests -- --splits 3 --group 2 + tox -e tests -- --splits 3 --group 3 ``` + Alternatively, we case use pytest directly + `pytest --splits 3 ---group 1`. + Each one of these commands should be run in a separate shell/job to run the test groups in parallel and decrease the total runtime. @@ -510,13 +517,13 @@ Then, a new release can be created using the script `bumpversion` automatically derive the next release version by bumping the patch part): -```shell script +```shell build_scripts/release-version.sh 0.1.6 ``` To find out how to use the script, pass the `-h` or `--help` flags: -```shell script +```shell build_scripts/release-version.sh --help ``` @@ -542,7 +549,7 @@ create a new release manually by following these steps: 2. When ready to release: From the develop branch create the release branch and perform release activities (update changelog, news, ...). For your own convenience, define an env variable for the release version - ```shell script + ```shell export RELEASE_VERSION="vX.Y.Z" git checkout develop git branch release/${RELEASE_VERSION} && git checkout release/${RELEASE_VERSION} @@ -553,7 +560,7 @@ create a new release manually by following these steps: (the `release` part is ignored but required by bumpversion :rolling_eyes:). 4. Merge the release branch into `master`, tag the merge commit, and push back to the repo. The CI pipeline publishes the package based on the tagged commit. - ```shell script + ```shell git checkout master git merge --no-ff release/${RELEASE_VERSION} git tag -a ${RELEASE_VERSION} -m"Release ${RELEASE_VERSION}" @@ -564,7 +571,7 @@ create a new release manually by following these steps: always strictly more recent than the last published release version from `master`. 6. Merge the release branch into `develop`: - ```shell script + ```shell git checkout develop git merge --no-ff release/${RELEASE_VERSION} git push origin develop diff --git a/README.md b/README.md index 417709714..948ffc842 100644 --- a/README.md +++ b/README.md @@ -7,27 +7,13 @@

- - PyPI - - - Version - - - documentation - - - License - - - Build status - - - - - - DOI - + PyPI + Version + documentation + License + Build status + + DOI

**pyDVL** collects algorithms for **Data Valuation** and **Influence Function** computation. @@ -332,7 +318,8 @@ We currently implement the following papers: - Schioppa, Andrea, Polina Zablotskaia, David Vilar, and Artem Sokolov. [Scaling Up Influence Functions](http://arxiv.org/abs/2112.03052). In Proceedings of the AAAI-22. arXiv, 2021. - +- James Martens, Roger Grosse, [Optimizing Neural Networks with Kronecker-factored Approximate Curvature](https://arxiv.org/abs/1503.05671), International Conference on Machine Learning (ICML), 2015. +- George, Thomas, César Laurent, Xavier Bouthillier, Nicolas Ballas, Pascal Vincent, [Fast Approximate Natural Gradient Descent in a Kronecker-factored Eigenbasis](https://arxiv.org/abs/1806.03884), Advances in Neural Information Processing Systems 31,2018. # License diff --git a/build_scripts/copy_contributing_guide.py b/build_scripts/copy_contributing_guide.py new file mode 100644 index 000000000..0c4b2fbf2 --- /dev/null +++ b/build_scripts/copy_contributing_guide.py @@ -0,0 +1,38 @@ +import logging +import os +from pathlib import Path + +import mkdocs.plugins + +logger = logging.getLogger(__name__) + +root_dir = Path(__file__).parent.parent +docs_dir = root_dir / "docs" +contributing_file = root_dir / "CONTRIBUTING.md" +target_filepath = docs_dir / contributing_file.name + + +@mkdocs.plugins.event_priority(100) +def on_pre_build(config): + logger.info("Temporarily copying contributing guide to docs directory") + try: + if os.path.getmtime(contributing_file) <= os.path.getmtime(target_filepath): + logger.info( + f"Contributing guide '{os.fspath(contributing_file)}' hasn't been updated, skipping." + ) + return + except FileNotFoundError: + pass + logger.info( + f"Creating symbolic link for '{os.fspath(contributing_file)}' " + f"at '{os.fspath(target_filepath)}'" + ) + target_filepath.symlink_to(contributing_file) + + logger.info("Finished copying contributing guide to docs directory") + + +@mkdocs.plugins.event_priority(-100) +def on_shutdown(): + logger.info("Removing temporary contributing guide in docs directory") + target_filepath.unlink() diff --git a/docs/assets/pydvl.bib b/docs/assets/pydvl.bib index e87ad3484..a79fb6627 100644 --- a/docs/assets/pydvl.bib +++ b/docs/assets/pydvl.bib @@ -342,4 +342,21 @@ @InProceedings{kwon_data_2023 pdf = {https://proceedings.mlr.press/v202/kwon23e/kwon23e.pdf}, url = {https://proceedings.mlr.press/v202/kwon23e.html}, abstract = {Data valuation is a powerful framework for providing statistical insights into which data are beneficial or detrimental to model training. Many Shapley-based data valuation methods have shown promising results in various downstream tasks, however, they are well known to be computationally challenging as it requires training a large number of models. As a result, it has been recognized as infeasible to apply to large datasets. To address this issue, we propose Data-OOB, a new data valuation method for a bagging model that utilizes the out-of-bag estimate. The proposed method is computationally efficient and can scale to millions of data by reusing trained weak learners. Specifically, Data-OOB takes less than $2.25$ hours on a single CPU processor when there are $10^6$ samples to evaluate and the input dimension is $100$. Furthermore, Data-OOB has solid theoretical interpretations in that it identifies the same important data point as the infinitesimal jackknife influence function when two different points are compared. We conduct comprehensive experiments using 12 classification datasets, each with thousands of sample sizes. We demonstrate that the proposed method significantly outperforms existing state-of-the-art data valuation methods in identifying mislabeled data and finding a set of helpful (or harmful) data points, highlighting the potential for applying data values in real-world applications.} +} + +@article{george2018fast, + title={Fast approximate natural gradient descent in a kronecker factored eigenbasis}, + author={George, Thomas and Laurent, C{\'e}sar and Bouthillier, Xavier and Ballas, Nicolas and Vincent, Pascal}, + journal={Advances in Neural Information Processing Systems}, + volume={31}, + year={2018} +} + +@inproceedings{martens2015optimizing, + title={Optimizing neural networks with kronecker-factored approximate curvature}, + author={Martens, James and Grosse, Roger}, + booktitle={International conference on machine learning}, + pages={2408--2417}, + year={2015}, + organization={PMLR} } \ No newline at end of file diff --git a/docs/css/extra.css b/docs/css/extra.css index 0a74470ce..4354e03e9 100644 --- a/docs/css/extra.css +++ b/docs/css/extra.css @@ -69,6 +69,7 @@ a.autorefs-external:hover::after { .nt-card-image:focus { filter: invert(32%) sepia(93%) saturate(1535%) hue-rotate(220deg) brightness(102%) contrast(99%); } + .md-header__button.md-logo { padding: 0; } diff --git a/docs/css/grid-cards.css b/docs/css/grid-cards.css new file mode 100644 index 000000000..8be3dcbb8 --- /dev/null +++ b/docs/css/grid-cards.css @@ -0,0 +1,22 @@ +/* Shadow and Hover */ +.grid.cards > ul > li { + box-shadow: 0 2px 2px 0 rgb(0 0 0 / 14%), 0 3px 1px -2px rgb(0 0 0 / 20%), 0 1px 5px 0 rgb(0 0 0 / 12%); + + &:hover { + transform: scale(1.05); + z-index: 999; + background-color: rgba(0, 0, 0, 0.05); + } +} + +[data-md-color-scheme="slate"] { + .grid.cards > ul > li { + box-shadow: 0 2px 2px 0 rgb(4 40 33 / 14%), 0 3px 1px -2px rgb(40 86 94 / 47%), 0 1px 5px 0 rgb(139 252 255 / 64%); + + &:hover { + transform: scale(1.05); + z-index: 999; + background-color: rgba(139, 252, 255, 0.05); + } + } +} diff --git a/docs/css/neoteroi.css b/docs/css/neoteroi.css deleted file mode 100644 index 363c9229a..000000000 --- a/docs/css/neoteroi.css +++ /dev/null @@ -1 +0,0 @@ -:root{--nt-color-0: #CD853F;--nt-color-1: #B22222;--nt-color-2: #000080;--nt-color-3: #4B0082;--nt-color-4: #3CB371;--nt-color-5: #D2B48C;--nt-color-6: #FF00FF;--nt-color-7: #98FB98;--nt-color-8: #FFEBCD;--nt-color-9: #2E8B57;--nt-color-10: #6A5ACD;--nt-color-11: #48D1CC;--nt-color-12: #FFA500;--nt-color-13: #F4A460;--nt-color-14: #A52A2A;--nt-color-15: #FFE4C4;--nt-color-16: #FF4500;--nt-color-17: #AFEEEE;--nt-color-18: #FA8072;--nt-color-19: #2F4F4F;--nt-color-20: #FFDAB9;--nt-color-21: #BC8F8F;--nt-color-22: #FFC0CB;--nt-color-23: #00FA9A;--nt-color-24: #F0FFF0;--nt-color-25: #FFFACD;--nt-color-26: #F5F5F5;--nt-color-27: #FF6347;--nt-color-28: #FFFFF0;--nt-color-29: #7FFFD4;--nt-color-30: #E9967A;--nt-color-31: #7B68EE;--nt-color-32: #FFF8DC;--nt-color-33: #0000CD;--nt-color-34: #D2691E;--nt-color-35: #708090;--nt-color-36: #5F9EA0;--nt-color-37: #008080;--nt-color-38: #008000;--nt-color-39: #FFE4E1;--nt-color-40: #FFFF00;--nt-color-41: #FFFAF0;--nt-color-42: #DCDCDC;--nt-color-43: #ADFF2F;--nt-color-44: #ADD8E6;--nt-color-45: #8B008B;--nt-color-46: #7FFF00;--nt-color-47: #800000;--nt-color-48: #20B2AA;--nt-color-49: #556B2F;--nt-color-50: #778899;--nt-color-51: #E6E6FA;--nt-color-52: #FFFAFA;--nt-color-53: #FF7F50;--nt-color-54: #FF0000;--nt-color-55: #F5DEB3;--nt-color-56: #008B8B;--nt-color-57: #66CDAA;--nt-color-58: #808000;--nt-color-59: #FAF0E6;--nt-color-60: #00BFFF;--nt-color-61: #C71585;--nt-color-62: #00FFFF;--nt-color-63: #8B4513;--nt-color-64: #F0F8FF;--nt-color-65: #FAEBD7;--nt-color-66: #8B0000;--nt-color-67: #4682B4;--nt-color-68: #F0E68C;--nt-color-69: #BDB76B;--nt-color-70: #A0522D;--nt-color-71: #FAFAD2;--nt-color-72: #FFD700;--nt-color-73: #DEB887;--nt-color-74: #E0FFFF;--nt-color-75: #8A2BE2;--nt-color-76: #32CD32;--nt-color-77: #87CEFA;--nt-color-78: #00CED1;--nt-color-79: #696969;--nt-color-80: #DDA0DD;--nt-color-81: #EE82EE;--nt-color-82: #FFB6C1;--nt-color-83: #8FBC8F;--nt-color-84: #D8BFD8;--nt-color-85: #9400D3;--nt-color-86: #A9A9A9;--nt-color-87: #FFFFE0;--nt-color-88: #FFF5EE;--nt-color-89: #FFF0F5;--nt-color-90: #FFDEAD;--nt-color-91: #800080;--nt-color-92: #B0E0E6;--nt-color-93: #9932CC;--nt-color-94: #DAA520;--nt-color-95: #F0FFFF;--nt-color-96: #40E0D0;--nt-color-97: #00FF7F;--nt-color-98: #006400;--nt-color-99: #808080;--nt-color-100: #87CEEB;--nt-color-101: #0000FF;--nt-color-102: #6495ED;--nt-color-103: #FDF5E6;--nt-color-104: #B8860B;--nt-color-105: #BA55D3;--nt-color-106: #C0C0C0;--nt-color-107: #000000;--nt-color-108: #F08080;--nt-color-109: #B0C4DE;--nt-color-110: #00008B;--nt-color-111: #6B8E23;--nt-color-112: #FFE4B5;--nt-color-113: #FFA07A;--nt-color-114: #9ACD32;--nt-color-115: #FFFFFF;--nt-color-116: #F5F5DC;--nt-color-117: #90EE90;--nt-color-118: #1E90FF;--nt-color-119: #7CFC00;--nt-color-120: #FF69B4;--nt-color-121: #F8F8FF;--nt-color-122: #F5FFFA;--nt-color-123: #00FF00;--nt-color-124: #D3D3D3;--nt-color-125: #DB7093;--nt-color-126: #DA70D6;--nt-color-127: #FF1493;--nt-color-128: #228B22;--nt-color-129: #FFEFD5;--nt-color-130: #4169E1;--nt-color-131: #191970;--nt-color-132: #9370DB;--nt-color-133: #483D8B;--nt-color-134: #FF8C00;--nt-color-135: #EEE8AA;--nt-color-136: #CD5C5C;--nt-color-137: #DC143C}:root{--nt-group-0-main: #000000;--nt-group-0-dark: #FFFFFF;--nt-group-0-light: #000000;--nt-group-0-main-bg: #F44336;--nt-group-0-dark-bg: #BA000D;--nt-group-0-light-bg: #FF7961;--nt-group-1-main: #000000;--nt-group-1-dark: #FFFFFF;--nt-group-1-light: #000000;--nt-group-1-main-bg: #E91E63;--nt-group-1-dark-bg: #B0003A;--nt-group-1-light-bg: #FF6090;--nt-group-2-main: #FFFFFF;--nt-group-2-dark: #FFFFFF;--nt-group-2-light: #000000;--nt-group-2-main-bg: #9C27B0;--nt-group-2-dark-bg: #6A0080;--nt-group-2-light-bg: #D05CE3;--nt-group-3-main: #FFFFFF;--nt-group-3-dark: #FFFFFF;--nt-group-3-light: #000000;--nt-group-3-main-bg: #673AB7;--nt-group-3-dark-bg: #320B86;--nt-group-3-light-bg: #9A67EA;--nt-group-4-main: #FFFFFF;--nt-group-4-dark: #FFFFFF;--nt-group-4-light: #000000;--nt-group-4-main-bg: #3F51B5;--nt-group-4-dark-bg: #002984;--nt-group-4-light-bg: #757DE8;--nt-group-5-main: #000000;--nt-group-5-dark: #FFFFFF;--nt-group-5-light: #000000;--nt-group-5-main-bg: #2196F3;--nt-group-5-dark-bg: #0069C0;--nt-group-5-light-bg: #6EC6FF;--nt-group-6-main: #000000;--nt-group-6-dark: #FFFFFF;--nt-group-6-light: #000000;--nt-group-6-main-bg: #03A9F4;--nt-group-6-dark-bg: #007AC1;--nt-group-6-light-bg: #67DAFF;--nt-group-7-main: #000000;--nt-group-7-dark: #000000;--nt-group-7-light: #000000;--nt-group-7-main-bg: #00BCD4;--nt-group-7-dark-bg: #008BA3;--nt-group-7-light-bg: #62EFFF;--nt-group-8-main: #000000;--nt-group-8-dark: #FFFFFF;--nt-group-8-light: #000000;--nt-group-8-main-bg: #009688;--nt-group-8-dark-bg: #00675B;--nt-group-8-light-bg: #52C7B8;--nt-group-9-main: #000000;--nt-group-9-dark: #FFFFFF;--nt-group-9-light: #000000;--nt-group-9-main-bg: #4CAF50;--nt-group-9-dark-bg: #087F23;--nt-group-9-light-bg: #80E27E;--nt-group-10-main: #000000;--nt-group-10-dark: #000000;--nt-group-10-light: #000000;--nt-group-10-main-bg: #8BC34A;--nt-group-10-dark-bg: #5A9216;--nt-group-10-light-bg: #BEF67A;--nt-group-11-main: #000000;--nt-group-11-dark: #000000;--nt-group-11-light: #000000;--nt-group-11-main-bg: #CDDC39;--nt-group-11-dark-bg: #99AA00;--nt-group-11-light-bg: #FFFF6E;--nt-group-12-main: #000000;--nt-group-12-dark: #000000;--nt-group-12-light: #000000;--nt-group-12-main-bg: #FFEB3B;--nt-group-12-dark-bg: #C8B900;--nt-group-12-light-bg: #FFFF72;--nt-group-13-main: #000000;--nt-group-13-dark: #000000;--nt-group-13-light: #000000;--nt-group-13-main-bg: #FFC107;--nt-group-13-dark-bg: #C79100;--nt-group-13-light-bg: #FFF350;--nt-group-14-main: #000000;--nt-group-14-dark: #000000;--nt-group-14-light: #000000;--nt-group-14-main-bg: #FF9800;--nt-group-14-dark-bg: #C66900;--nt-group-14-light-bg: #FFC947;--nt-group-15-main: #000000;--nt-group-15-dark: #FFFFFF;--nt-group-15-light: #000000;--nt-group-15-main-bg: #FF5722;--nt-group-15-dark-bg: #C41C00;--nt-group-15-light-bg: #FF8A50;--nt-group-16-main: #FFFFFF;--nt-group-16-dark: #FFFFFF;--nt-group-16-light: #000000;--nt-group-16-main-bg: #795548;--nt-group-16-dark-bg: #4B2C20;--nt-group-16-light-bg: #A98274;--nt-group-17-main: #000000;--nt-group-17-dark: #FFFFFF;--nt-group-17-light: #000000;--nt-group-17-main-bg: #9E9E9E;--nt-group-17-dark-bg: #707070;--nt-group-17-light-bg: #CFCFCF;--nt-group-18-main: #000000;--nt-group-18-dark: #FFFFFF;--nt-group-18-light: #000000;--nt-group-18-main-bg: #607D8B;--nt-group-18-dark-bg: #34515E;--nt-group-18-light-bg: #8EACBB}.nt-pastello{--nt-group-0-main: #000000;--nt-group-0-dark: #000000;--nt-group-0-light: #000000;--nt-group-0-main-bg: #EF9A9A;--nt-group-0-dark-bg: #BA6B6C;--nt-group-0-light-bg: #FFCCCB;--nt-group-1-main: #000000;--nt-group-1-dark: #000000;--nt-group-1-light: #000000;--nt-group-1-main-bg: #F48FB1;--nt-group-1-dark-bg: #BF5F82;--nt-group-1-light-bg: #FFC1E3;--nt-group-2-main: #000000;--nt-group-2-dark: #000000;--nt-group-2-light: #000000;--nt-group-2-main-bg: #CE93D8;--nt-group-2-dark-bg: #9C64A6;--nt-group-2-light-bg: #FFC4FF;--nt-group-3-main: #000000;--nt-group-3-dark: #000000;--nt-group-3-light: #000000;--nt-group-3-main-bg: #B39DDB;--nt-group-3-dark-bg: #836FA9;--nt-group-3-light-bg: #E6CEFF;--nt-group-4-main: #000000;--nt-group-4-dark: #000000;--nt-group-4-light: #000000;--nt-group-4-main-bg: #9FA8DA;--nt-group-4-dark-bg: #6F79A8;--nt-group-4-light-bg: #D1D9FF;--nt-group-5-main: #000000;--nt-group-5-dark: #000000;--nt-group-5-light: #000000;--nt-group-5-main-bg: #90CAF9;--nt-group-5-dark-bg: #5D99C6;--nt-group-5-light-bg: #C3FDFF;--nt-group-6-main: #000000;--nt-group-6-dark: #000000;--nt-group-6-light: #000000;--nt-group-6-main-bg: #81D4FA;--nt-group-6-dark-bg: #4BA3C7;--nt-group-6-light-bg: #B6FFFF;--nt-group-7-main: #000000;--nt-group-7-dark: #000000;--nt-group-7-light: #000000;--nt-group-7-main-bg: #80DEEA;--nt-group-7-dark-bg: #4BACB8;--nt-group-7-light-bg: #B4FFFF;--nt-group-8-main: #000000;--nt-group-8-dark: #000000;--nt-group-8-light: #000000;--nt-group-8-main-bg: #80CBC4;--nt-group-8-dark-bg: #4F9A94;--nt-group-8-light-bg: #B2FEF7;--nt-group-9-main: #000000;--nt-group-9-dark: #000000;--nt-group-9-light: #000000;--nt-group-9-main-bg: #A5D6A7;--nt-group-9-dark-bg: #75A478;--nt-group-9-light-bg: #D7FFD9;--nt-group-10-main: #000000;--nt-group-10-dark: #000000;--nt-group-10-light: #000000;--nt-group-10-main-bg: #C5E1A5;--nt-group-10-dark-bg: #94AF76;--nt-group-10-light-bg: #F8FFD7;--nt-group-11-main: #000000;--nt-group-11-dark: #000000;--nt-group-11-light: #000000;--nt-group-11-main-bg: #E6EE9C;--nt-group-11-dark-bg: #B3BC6D;--nt-group-11-light-bg: #FFFFCE;--nt-group-12-main: #000000;--nt-group-12-dark: #000000;--nt-group-12-light: #000000;--nt-group-12-main-bg: #FFF59D;--nt-group-12-dark-bg: #CBC26D;--nt-group-12-light-bg: #FFFFCF;--nt-group-13-main: #000000;--nt-group-13-dark: #000000;--nt-group-13-light: #000000;--nt-group-13-main-bg: #FFE082;--nt-group-13-dark-bg: #CAAE53;--nt-group-13-light-bg: #FFFFB3;--nt-group-14-main: #000000;--nt-group-14-dark: #000000;--nt-group-14-light: #000000;--nt-group-14-main-bg: #FFCC80;--nt-group-14-dark-bg: #CA9B52;--nt-group-14-light-bg: #FFFFB0;--nt-group-15-main: #000000;--nt-group-15-dark: #000000;--nt-group-15-light: #000000;--nt-group-15-main-bg: #FFAB91;--nt-group-15-dark-bg: #C97B63;--nt-group-15-light-bg: #FFDDC1;--nt-group-16-main: #000000;--nt-group-16-dark: #000000;--nt-group-16-light: #000000;--nt-group-16-main-bg: #BCAAA4;--nt-group-16-dark-bg: #8C7B75;--nt-group-16-light-bg: #EFDCD5;--nt-group-17-main: #000000;--nt-group-17-dark: #000000;--nt-group-17-light: #000000;--nt-group-17-main-bg: #EEEEEE;--nt-group-17-dark-bg: #BCBCBC;--nt-group-17-light-bg: #FFFFFF;--nt-group-18-main: #000000;--nt-group-18-dark: #000000;--nt-group-18-light: #000000;--nt-group-18-main-bg: #B0BEC5;--nt-group-18-dark-bg: #808E95;--nt-group-18-light-bg: #E2F1F8}.nt-group-0 .nt-plan-group-summary,.nt-group-0 .nt-timeline-dot{color:var(--nt-group-0-dark);background-color:var(--nt-group-0-dark-bg)}.nt-group-0 .period{color:var(--nt-group-0-main);background-color:var(--nt-group-0-main-bg)}.nt-group-1 .nt-plan-group-summary,.nt-group-1 .nt-timeline-dot{color:var(--nt-group-1-dark);background-color:var(--nt-group-1-dark-bg)}.nt-group-1 .period{color:var(--nt-group-1-main);background-color:var(--nt-group-1-main-bg)}.nt-group-2 .nt-plan-group-summary,.nt-group-2 .nt-timeline-dot{color:var(--nt-group-2-dark);background-color:var(--nt-group-2-dark-bg)}.nt-group-2 .period{color:var(--nt-group-2-main);background-color:var(--nt-group-2-main-bg)}.nt-group-3 .nt-plan-group-summary,.nt-group-3 .nt-timeline-dot{color:var(--nt-group-3-dark);background-color:var(--nt-group-3-dark-bg)}.nt-group-3 .period{color:var(--nt-group-3-main);background-color:var(--nt-group-3-main-bg)}.nt-group-4 .nt-plan-group-summary,.nt-group-4 .nt-timeline-dot{color:var(--nt-group-4-dark);background-color:var(--nt-group-4-dark-bg)}.nt-group-4 .period{color:var(--nt-group-4-main);background-color:var(--nt-group-4-main-bg)}.nt-group-5 .nt-plan-group-summary,.nt-group-5 .nt-timeline-dot{color:var(--nt-group-5-dark);background-color:var(--nt-group-5-dark-bg)}.nt-group-5 .period{color:var(--nt-group-5-main);background-color:var(--nt-group-5-main-bg)}.nt-group-6 .nt-plan-group-summary,.nt-group-6 .nt-timeline-dot{color:var(--nt-group-6-dark);background-color:var(--nt-group-6-dark-bg)}.nt-group-6 .period{color:var(--nt-group-6-main);background-color:var(--nt-group-6-main-bg)}.nt-group-7 .nt-plan-group-summary,.nt-group-7 .nt-timeline-dot{color:var(--nt-group-7-dark);background-color:var(--nt-group-7-dark-bg)}.nt-group-7 .period{color:var(--nt-group-7-main);background-color:var(--nt-group-7-main-bg)}.nt-group-8 .nt-plan-group-summary,.nt-group-8 .nt-timeline-dot{color:var(--nt-group-8-dark);background-color:var(--nt-group-8-dark-bg)}.nt-group-8 .period{color:var(--nt-group-8-main);background-color:var(--nt-group-8-main-bg)}.nt-group-9 .nt-plan-group-summary,.nt-group-9 .nt-timeline-dot{color:var(--nt-group-9-dark);background-color:var(--nt-group-9-dark-bg)}.nt-group-9 .period{color:var(--nt-group-9-main);background-color:var(--nt-group-9-main-bg)}.nt-group-10 .nt-plan-group-summary,.nt-group-10 .nt-timeline-dot{color:var(--nt-group-10-dark);background-color:var(--nt-group-10-dark-bg)}.nt-group-10 .period{color:var(--nt-group-10-main);background-color:var(--nt-group-10-main-bg)}.nt-group-11 .nt-plan-group-summary,.nt-group-11 .nt-timeline-dot{color:var(--nt-group-11-dark);background-color:var(--nt-group-11-dark-bg)}.nt-group-11 .period{color:var(--nt-group-11-main);background-color:var(--nt-group-11-main-bg)}.nt-group-12 .nt-plan-group-summary,.nt-group-12 .nt-timeline-dot{color:var(--nt-group-12-dark);background-color:var(--nt-group-12-dark-bg)}.nt-group-12 .period{color:var(--nt-group-12-main);background-color:var(--nt-group-12-main-bg)}.nt-group-13 .nt-plan-group-summary,.nt-group-13 .nt-timeline-dot{color:var(--nt-group-13-dark);background-color:var(--nt-group-13-dark-bg)}.nt-group-13 .period{color:var(--nt-group-13-main);background-color:var(--nt-group-13-main-bg)}.nt-group-14 .nt-plan-group-summary,.nt-group-14 .nt-timeline-dot{color:var(--nt-group-14-dark);background-color:var(--nt-group-14-dark-bg)}.nt-group-14 .period{color:var(--nt-group-14-main);background-color:var(--nt-group-14-main-bg)}.nt-group-15 .nt-plan-group-summary,.nt-group-15 .nt-timeline-dot{color:var(--nt-group-15-dark);background-color:var(--nt-group-15-dark-bg)}.nt-group-15 .period{color:var(--nt-group-15-main);background-color:var(--nt-group-15-main-bg)}.nt-group-16 .nt-plan-group-summary,.nt-group-16 .nt-timeline-dot{color:var(--nt-group-16-dark);background-color:var(--nt-group-16-dark-bg)}.nt-group-16 .period{color:var(--nt-group-16-main);background-color:var(--nt-group-16-main-bg)}.nt-group-17 .nt-plan-group-summary,.nt-group-17 .nt-timeline-dot{color:var(--nt-group-17-dark);background-color:var(--nt-group-17-dark-bg)}.nt-group-17 .period{color:var(--nt-group-17-main);background-color:var(--nt-group-17-main-bg)}.nt-group-18 .nt-plan-group-summary,.nt-group-18 .nt-timeline-dot{color:var(--nt-group-18-dark);background-color:var(--nt-group-18-dark-bg)}.nt-group-18 .period{color:var(--nt-group-18-main);background-color:var(--nt-group-18-main-bg)}.nt-error{border:2px dashed darkred;padding:0 1rem;background:#faf9ba;color:darkred}.nt-timeline{margin-top:30px}.nt-timeline .nt-timeline-title{font-size:1.1rem;margin-top:0}.nt-timeline .nt-timeline-sub-title{margin-top:0}.nt-timeline .nt-timeline-content{font-size:.8rem;border-bottom:2px dashed #ccc;padding-bottom:1.2rem}.nt-timeline.horizontal .nt-timeline-items{flex-direction:row;overflow-x:scroll}.nt-timeline.horizontal .nt-timeline-items>div{min-width:400px;margin-right:50px}.nt-timeline.horizontal.reverse .nt-timeline-items{flex-direction:row-reverse}.nt-timeline.horizontal.center .nt-timeline-before{background-image:linear-gradient(rgba(252, 70, 107, 0) 0%, rgb(252, 70, 107) 100%);background-repeat:no-repeat;background-size:100% 2px;background-position:0 center}.nt-timeline.horizontal.center .nt-timeline-after{background-image:linear-gradient(180deg, rgb(252, 70, 107) 0%, rgba(252, 70, 107, 0) 100%);background-repeat:no-repeat;background-size:100% 2px;background-position:0 center}.nt-timeline.horizontal.center .nt-timeline-items{background-image:radial-gradient(circle, rgb(63, 94, 251) 0%, rgb(252, 70, 107) 100%);background-repeat:no-repeat;background-size:100% 2px;background-position:0 center}.nt-timeline.horizontal .nt-timeline-dot{left:50%}.nt-timeline.horizontal .nt-timeline-dot:not(.bigger){top:calc(50% - 4px)}.nt-timeline.horizontal .nt-timeline-dot.bigger{top:calc(50% - 15px)}.nt-timeline.vertical .nt-timeline-items{flex-direction:column}.nt-timeline.vertical.reverse .nt-timeline-items{flex-direction:column-reverse}.nt-timeline.vertical.center .nt-timeline-before{background:linear-gradient(rgba(252, 70, 107, 0) 0%, rgb(252, 70, 107) 100%) no-repeat center/2px 100%}.nt-timeline.vertical.center .nt-timeline-after{background:linear-gradient(rgb(252, 70, 107) 0%, rgba(252, 70, 107, 0) 100%) no-repeat center/2px 100%}.nt-timeline.vertical.center .nt-timeline-items{background:radial-gradient(circle, rgb(63, 94, 251) 0%, rgb(252, 70, 107) 100%) no-repeat center/2px 100%}.nt-timeline.vertical.center .nt-timeline-dot{left:calc(50% - 10px)}.nt-timeline.vertical.center .nt-timeline-dot:not(.bigger){top:10px}.nt-timeline.vertical.center .nt-timeline-dot.bigger{left:calc(50% - 20px)}.nt-timeline.vertical.left{padding-left:100px}.nt-timeline.vertical.left .nt-timeline-item{padding-left:70px}.nt-timeline.vertical.left .nt-timeline-sub-title{left:-100px;width:100px}.nt-timeline.vertical.left .nt-timeline-before{background:linear-gradient(rgba(252, 70, 107, 0) 0%, rgb(252, 70, 107) 100%) no-repeat 30px/2px 100%}.nt-timeline.vertical.left .nt-timeline-after{background:linear-gradient(rgb(252, 70, 107) 0%, rgba(252, 70, 107, 0) 100%) no-repeat 30px/2px 100%}.nt-timeline.vertical.left .nt-timeline-items{background:radial-gradient(circle, rgb(63, 94, 251) 0%, rgb(252, 70, 107) 100%) no-repeat 30px/2px 100%}.nt-timeline.vertical.left .nt-timeline-dot{left:21px;top:8px}.nt-timeline.vertical.left .nt-timeline-dot.bigger{top:0px;left:10px}.nt-timeline.vertical.right{padding-right:100px}.nt-timeline.vertical.right .nt-timeline-sub-title{right:-100px;text-align:left;width:100px}.nt-timeline.vertical.right .nt-timeline-item{padding-right:70px}.nt-timeline.vertical.right .nt-timeline-before{background:linear-gradient(rgba(252, 70, 107, 0) 0%, rgb(252, 70, 107) 100%) no-repeat calc(100% - 30px)/2px 100%}.nt-timeline.vertical.right .nt-timeline-after{background:linear-gradient(rgb(252, 70, 107) 0%, rgba(252, 70, 107, 0) 100%) no-repeat calc(100% - 30px)/2px 100%}.nt-timeline.vertical.right .nt-timeline-items{background:radial-gradient(circle, rgb(63, 94, 251) 0%, rgb(252, 70, 107) 100%) no-repeat calc(100% - 30px)/2px 100%}.nt-timeline.vertical.right .nt-timeline-dot{right:21px;top:8px}.nt-timeline.vertical.right .nt-timeline-dot.bigger{top:10px;right:10px}.nt-timeline-items{display:flex;position:relative}.nt-timeline-items>div{min-height:100px;padding-top:2px;padding-bottom:20px}.nt-timeline-before{content:"";height:15px}.nt-timeline-after{content:"";height:60px;margin-bottom:20px}.nt-timeline-sub-title{position:absolute;width:50%;top:4px;font-size:18px;color:var(--nt-color-50)}[data-md-color-scheme=slate] .nt-timeline-sub-title{color:var(--nt-color-51)}.nt-timeline-item{position:relative}.nt-timeline.vertical.center:not(.alternate) .nt-timeline-item{padding-left:calc(50% + 40px)}.nt-timeline.vertical.center:not(.alternate) .nt-timeline-item .nt-timeline-sub-title{left:0;padding-right:40px;text-align:right}.nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(odd){padding-left:calc(50% + 40px)}.nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(odd) .nt-timeline-sub-title{left:0;padding-right:40px;text-align:right}.nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(even){text-align:right;padding-right:calc(50% + 40px)}.nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(even) .nt-timeline-sub-title{right:0;padding-left:40px;text-align:left}.nt-timeline-dot{position:relative;width:20px;height:20px;border-radius:100%;background-color:#fc5b5b;position:absolute;top:0px;z-index:2;display:flex;justify-content:center;align-items:center;box-shadow:0 2px 1px -1px rgba(0,0,0,.2),0 1px 1px 0 rgba(0,0,0,.14),0 1px 3px 0 rgba(0,0,0,.12);border:3px solid #fff}.nt-timeline-dot:not(.bigger) .icon{font-size:10px}.nt-timeline-dot.bigger{width:40px;height:40px;padding:3px}.nt-timeline-dot .icon{color:#fff}@supports not (-moz-appearance: none){details .nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(odd) .nt-timeline-sub-title,details .nt-timeline.vertical.center:not(.alternate) .nt-timeline-item .nt-timeline-sub-title{left:-40px}details .nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(even) .nt-timeline-sub-title{right:-40px}details .nt-timeline.vertical.center .nt-timeline-dot{left:calc(50% - 12px)}details .nt-timeline-dot.bigger{font-size:1rem !important}}.nt-timeline-item:nth-child(0) .nt-timeline-dot{background-color:var(--nt-color-0)}.nt-timeline-item:nth-child(1) .nt-timeline-dot{background-color:var(--nt-color-1)}.nt-timeline-item:nth-child(2) .nt-timeline-dot{background-color:var(--nt-color-2)}.nt-timeline-item:nth-child(3) .nt-timeline-dot{background-color:var(--nt-color-3)}.nt-timeline-item:nth-child(4) .nt-timeline-dot{background-color:var(--nt-color-4)}.nt-timeline-item:nth-child(5) .nt-timeline-dot{background-color:var(--nt-color-5)}.nt-timeline-item:nth-child(6) .nt-timeline-dot{background-color:var(--nt-color-6)}.nt-timeline-item:nth-child(7) .nt-timeline-dot{background-color:var(--nt-color-7)}.nt-timeline-item:nth-child(8) .nt-timeline-dot{background-color:var(--nt-color-8)}.nt-timeline-item:nth-child(9) .nt-timeline-dot{background-color:var(--nt-color-9)}.nt-timeline-item:nth-child(10) .nt-timeline-dot{background-color:var(--nt-color-10)}.nt-timeline-item:nth-child(11) .nt-timeline-dot{background-color:var(--nt-color-11)}.nt-timeline-item:nth-child(12) .nt-timeline-dot{background-color:var(--nt-color-12)}.nt-timeline-item:nth-child(13) .nt-timeline-dot{background-color:var(--nt-color-13)}.nt-timeline-item:nth-child(14) .nt-timeline-dot{background-color:var(--nt-color-14)}.nt-timeline-item:nth-child(15) .nt-timeline-dot{background-color:var(--nt-color-15)}.nt-timeline-item:nth-child(16) .nt-timeline-dot{background-color:var(--nt-color-16)}.nt-timeline-item:nth-child(17) .nt-timeline-dot{background-color:var(--nt-color-17)}.nt-timeline-item:nth-child(18) .nt-timeline-dot{background-color:var(--nt-color-18)}.nt-timeline-item:nth-child(19) .nt-timeline-dot{background-color:var(--nt-color-19)}.nt-timeline-item:nth-child(20) .nt-timeline-dot{background-color:var(--nt-color-20)}:root{--nt-scrollbar-color: #2751b0;--nt-plan-actions-height: 24px;--nt-units-background: #ff9800;--nt-months-background: #2751b0;--nt-plan-vertical-line-color: #a3a3a3ad}.nt-pastello{--nt-scrollbar-color: #9fb8f4;--nt-units-background: #f5dc82;--nt-months-background: #5b7fd1}[data-md-color-scheme=slate]{--nt-units-background: #003773}[data-md-color-scheme=slate] .nt-pastello{--nt-units-background: #3f4997}.nt-plan-root{min-height:200px;scrollbar-width:20px;scrollbar-color:var(--nt-scrollbar-color);display:flex}.nt-plan-root ::-webkit-scrollbar{width:20px}.nt-plan-root ::-webkit-scrollbar-track{box-shadow:inset 0 0 5px gray;border-radius:10px}.nt-plan-root ::-webkit-scrollbar-thumb{background:var(--nt-scrollbar-color);border-radius:10px}.nt-plan-root .nt-plan{flex:80%}.nt-plan-root.no-groups .nt-plan-periods{padding-left:0}.nt-plan-root.no-groups .nt-plan-group-summary{display:none}.nt-plan-root .nt-timeline-dot.bigger{top:-10px}.nt-plan-root .nt-timeline-dot.bigger[title]{cursor:help}.nt-plan{white-space:nowrap;overflow-x:auto;display:flex}.nt-plan .ug-timeline-dot{left:368px;top:-8px;cursor:help}.months{display:flex}.month{flex:auto;display:inline-block;box-shadow:rgba(0,0,0,.2) 0px 3px 1px -2px,rgba(0,0,0,.14) 0px 2px 2px 0px,rgba(0,0,0,.12) 0px 1px 5px 0px inset;background-color:var(--nt-months-background);color:#fff;text-transform:uppercase;font-family:Roboto,Helvetica,Arial,sans-serif;padding:2px 5px;font-size:12px;border:1px solid #000;width:150px;border-radius:8px}.nt-plan-group-activities{flex:auto;position:relative}.nt-vline{border-left:1px dashed var(--nt-plan-vertical-line-color);height:100%;left:0;position:absolute;margin-left:-0.5px;top:0;-webkit-transition:all .5s linear !important;-moz-transition:all .5s linear !important;-ms-transition:all .5s linear !important;-o-transition:all .5s linear !important;transition:all .5s linear !important;z-index:-2}.nt-plan-activity{display:flex;margin:2px 0;background-color:rgba(187,187,187,.2509803922)}.actions{height:var(--nt-plan-actions-height)}.actions{position:relative}.period{display:inline-block;height:var(--nt-plan-actions-height);width:120px;position:absolute;left:0px;background:#1da1f2;border-radius:5px;transition:all .5s;cursor:help;-webkit-transition:width 1s ease-in-out;-moz-transition:width 1s ease-in-out;-o-transition:width 1s ease-in-out;transition:width 1s ease-in-out}.period .nt-tooltip{display:none;top:30px;position:relative;padding:1rem;text-align:center;font-size:12px}.period:hover .nt-tooltip{display:inline-block}.period-0{left:340px;visibility:visible;background-color:#456165}.period-1{left:40px;visibility:visible;background-color:green}.period-2{left:120px;visibility:visible;background-color:pink;width:80px}.period-3{left:190px;visibility:visible;background-color:darkred;width:150px}.weeks>span,.days>span{height:25px}.weeks>span{display:inline-block;margin:0;padding:0;font-weight:bold}.weeks>span .week-text{font-size:10px;position:absolute;display:inline-block;padding:3px 4px}.days{z-index:-2;position:relative}.day-text{font-size:10px;position:absolute;display:inline-block;padding:3px 4px}.period span{font-size:12px;vertical-align:top;margin-left:4px;color:#000;background:rgba(255,255,255,.6588235294);border-radius:6px;padding:0 4px}.weeks,.days{height:20px;display:flex;box-sizing:content-box}.months{display:flex}.week,.day{height:20px;position:relative;border:1;flex:auto;border:2px solid #fff;border-radius:4px;background-color:var(--nt-units-background);cursor:help}.years{display:flex}.year{text-align:center;border-right:1px solid var(--nt-plan-vertical-line-color);font-weight:bold}.year:first-child{border-left:1px solid var(--nt-plan-vertical-line-color)}.year:first-child:last-child{width:100%}.quarters{display:flex}.quarter{width:12.5%;text-align:center;border-right:1px solid var(--nt-plan-vertical-line-color);font-weight:bold}.quarter:first-child{border-left:1px solid var(--nt-plan-vertical-line-color)}.nt-plan-group{margin:20px 0;position:relative}.nt-plan-group{display:flex}.nt-plan-group-summary{background:#2751b0;width:150px;white-space:normal;padding:.1rem .5rem;border-radius:5px;color:#fff;z-index:3}.nt-plan-group-summary p{margin:0;padding:0;font-size:.6rem;color:#fff}.nt-plan-group-summary,.month,.period,.week,.day,.nt-tooltip{border:3px solid #fff;box-shadow:0 2px 3px -1px rgba(0,0,0,.2),0 3px 3px 0 rgba(0,0,0,.14),0 1px 5px 0 rgba(0,0,0,.12)}.nt-plan-periods{padding-left:150px}.months{z-index:2;position:relative}.weeks{position:relative;top:-2px;z-index:0}.month,.quarter,.year,.week,.day,.nt-tooltip{font-family:Roboto,Helvetica,Arial,sans-serif;box-sizing:border-box}.nt-cards.nt-grid{display:grid;grid-auto-columns:1fr;gap:.5rem;max-width:100vw;overflow-x:auto;padding:1px}.nt-cards.nt-grid.cols-1{grid-template-columns:repeat(1, 1fr)}.nt-cards.nt-grid.cols-2{grid-template-columns:repeat(2, 1fr)}.nt-cards.nt-grid.cols-3{grid-template-columns:repeat(3, 1fr)}.nt-cards.nt-grid.cols-4{grid-template-columns:repeat(4, 1fr)}.nt-cards.nt-grid.cols-5{grid-template-columns:repeat(5, 1fr)}.nt-cards.nt-grid.cols-6{grid-template-columns:repeat(6, 1fr)}@media only screen and (max-width: 400px){.nt-cards.nt-grid{grid-template-columns:repeat(1, 1fr) !important}}.nt-card{box-shadow:0 2px 2px 0 rgba(0,0,0,.14),0 3px 1px -2px rgba(0,0,0,.2),0 1px 5px 0 rgba(0,0,0,.12)}.nt-card:hover{box-shadow:0 2px 2px 0 rgba(0,0,0,.24),0 3px 1px -2px rgba(0,0,0,.3),0 1px 5px 0 rgba(0,0,0,.22)}[data-md-color-scheme=slate] .nt-card{box-shadow:0 2px 2px 0 rgba(4,40,33,.14),0 3px 1px -2px rgba(40,86,94,.47),0 1px 5px 0 rgba(139,252,255,.64)}[data-md-color-scheme=slate] .nt-card:hover{box-shadow:0 2px 2px 0 rgba(0,255,206,.14),0 3px 1px -2px rgba(33,156,177,.47),0 1px 5px 0 rgba(96,251,255,.64)}.nt-card>a{color:var(--md-default-fg-color)}.nt-card>a>div{cursor:pointer}.nt-card{padding:5px;margin-bottom:.5rem}.nt-card-title{font-size:1rem;font-weight:bold;margin:4px 0 8px 0;line-height:22px}.nt-card-content{padding:.4rem .8rem .8rem .8rem}.nt-card-text{font-size:14px;padding:0;margin:0}.nt-card .nt-card-image{text-align:center;border-radius:2px;background-position:center center;background-size:cover;background-repeat:no-repeat;min-height:120px}.nt-card .nt-card-image.tags img{margin-top:12px}.nt-card .nt-card-image img{height:105px;margin-top:5px}.nt-card a:hover,.nt-card a:focus{color:var(--md-accent-fg-color)}.nt-card h2{margin:0}.span-table-wrapper table{border-collapse:collapse;margin-bottom:2rem;border-radius:.1rem}.span-table td,.span-table th{padding:.2rem;background-color:var(--md-default-bg-color);font-size:.64rem;max-width:100%;overflow:auto;touch-action:auto;border-top:.05rem solid var(--md-typeset-table-color);padding:.9375em 1.25em;vertical-align:top}.span-table tr:first-child td{font-weight:700;min-width:5rem;padding:.9375em 1.25em;vertical-align:top}.span-table td:first-child{border-left:.05rem solid var(--md-typeset-table-color)}.span-table td:last-child{border-right:.05rem solid var(--md-typeset-table-color)}.span-table tr:last-child{border-bottom:.05rem solid var(--md-typeset-table-color)}.span-table [colspan],.span-table [rowspan]{font-weight:bold;border:.05rem solid var(--md-typeset-table-color)}.span-table tr:not(:first-child):hover td:not([colspan]):not([rowspan]),.span-table td[colspan]:hover,.span-table td[rowspan]:hover{background-color:rgba(0,0,0,.035);box-shadow:0 .05rem 0 var(--md-default-bg-color) inset;transition:background-color 125ms}.nt-contribs{margin-top:2rem;font-size:small;border-top:1px dotted #d3d3d3;padding-top:.5rem}.nt-contribs .nt-contributors{padding-top:.5rem;display:flex;flex-wrap:wrap}.nt-contribs .nt-contributor{background:#d3d3d3;background-size:cover;width:40px;height:40px;border-radius:100%;margin:0 6px 6px 0;cursor:help;opacity:.7}.nt-contribs .nt-contributor:hover{opacity:1}.nt-contribs .nt-initials{text-transform:uppercase;font-size:24px;text-align:center;width:40px;height:40px;display:inline-block;vertical-align:middle;position:relative;top:2px;color:inherit;font-weight:bold}.nt-contribs .nt-group-0{background-color:var(--nt-color-0)}.nt-contribs .nt-group-1{background-color:var(--nt-color-1)}.nt-contribs .nt-group-2{background-color:var(--nt-color-2)}.nt-contribs .nt-group-3{background-color:var(--nt-color-3)}.nt-contribs .nt-group-4{background-color:var(--nt-color-4)}.nt-contribs .nt-group-5{background-color:var(--nt-color-5)}.nt-contribs .nt-group-6{background-color:var(--nt-color-6)}.nt-contribs .nt-group-7{color:#000;background-color:var(--nt-color-7)}.nt-contribs .nt-group-8{color:#000;background-color:var(--nt-color-8)}.nt-contribs .nt-group-9{background-color:var(--nt-color-9)}.nt-contribs .nt-group-10{background-color:var(--nt-color-10)}.nt-contribs .nt-group-11{background-color:var(--nt-color-11)}.nt-contribs .nt-group-12{background-color:var(--nt-color-12)}.nt-contribs .nt-group-13{background-color:var(--nt-color-13)}.nt-contribs .nt-group-14{background-color:var(--nt-color-14)}.nt-contribs .nt-group-15{color:#000;background-color:var(--nt-color-15)}.nt-contribs .nt-group-16{background-color:var(--nt-color-16)}.nt-contribs .nt-group-17{color:#000;background-color:var(--nt-color-17)}.nt-contribs .nt-group-18{background-color:var(--nt-color-18)}.nt-contribs .nt-group-19{background-color:var(--nt-color-19)}.nt-contribs .nt-group-20{color:#000;background-color:var(--nt-color-20)}.nt-contribs .nt-group-21{color:#000;background-color:var(--nt-color-21)}.nt-contribs .nt-group-22{color:#000;background-color:var(--nt-color-22)}.nt-contribs .nt-group-23{color:#000;background-color:var(--nt-color-23)}.nt-contribs .nt-group-24{color:#000;background-color:var(--nt-color-24)}.nt-contribs .nt-group-25{color:#000;background-color:var(--nt-color-25)}.nt-contribs .nt-group-26{color:#000;background-color:var(--nt-color-26)}.nt-contribs .nt-group-27{background-color:var(--nt-color-27)}.nt-contribs .nt-group-28{color:#000;background-color:var(--nt-color-28)}.nt-contribs .nt-group-29{color:#000;background-color:var(--nt-color-29)}.nt-contribs .nt-group-30{background-color:var(--nt-color-30)}.nt-contribs .nt-group-31{background-color:var(--nt-color-31)}.nt-contribs .nt-group-32{color:#000;background-color:var(--nt-color-32)}.nt-contribs .nt-group-33{background-color:var(--nt-color-33)}.nt-contribs .nt-group-34{background-color:var(--nt-color-34)}.nt-contribs .nt-group-35{background-color:var(--nt-color-35)}.nt-contribs .nt-group-36{background-color:var(--nt-color-36)}.nt-contribs .nt-group-37{background-color:var(--nt-color-37)}.nt-contribs .nt-group-38{background-color:var(--nt-color-38)}.nt-contribs .nt-group-39{color:#000;background-color:var(--nt-color-39)}.nt-contribs .nt-group-40{color:#000;background-color:var(--nt-color-40)}.nt-contribs .nt-group-41{color:#000;background-color:var(--nt-color-41)}.nt-contribs .nt-group-42{color:#000;background-color:var(--nt-color-42)}.nt-contribs .nt-group-43{color:#000;background-color:var(--nt-color-43)}.nt-contribs .nt-group-44{color:#000;background-color:var(--nt-color-44)}.nt-contribs .nt-group-45{background-color:var(--nt-color-45)}.nt-contribs .nt-group-46{color:#000;background-color:var(--nt-color-46)}.nt-contribs .nt-group-47{background-color:var(--nt-color-47)}.nt-contribs .nt-group-48{background-color:var(--nt-color-48)}.nt-contribs .nt-group-49{background-color:var(--nt-color-49)} \ No newline at end of file diff --git a/docs/getting-started/first-steps.md b/docs/getting-started/first-steps.md index 403724362..9793cbf4a 100644 --- a/docs/getting-started/first-steps.md +++ b/docs/getting-started/first-steps.md @@ -1,11 +1,11 @@ --- -title: Getting Started +title: First Steps alias: - name: getting-started - text: Getting Started + name: first-steps + text: First Steps --- -# Getting started +# First Steps !!! Warning Make sure you have read [[installation]] before using the library. diff --git a/docs/index.md b/docs/index.md index fb6408b9e..c77b2c980 100644 --- a/docs/index.md +++ b/docs/index.md @@ -9,26 +9,39 @@ It runs most of them in parallel either locally or in a cluster and supports distributed caching of results. If you're a first time user of pyDVL, we recommend you to go through the -[[getting-started]] and [[installation]] guides. +[[installation]] and [[first-steps]] guides in the Getting Started section. -::cards:: cols=2 +
-- title: Installation - content: Steps to install and requirements - url: getting-started/installation.md +- :fontawesome-solid-toolbox:{ .lg .middle } __Installation__ + + --- + Steps to install and requirements + + [[installation|:octicons-arrow-right-24: Installation]] + +- :fontawesome-solid-scale-unbalanced:{ .lg .middle } __Data valuation__ + + --- -- title: Data valuation - content: > Basics of data valuation and description of the main algorithms - url: value/ -- title: Influence Function - content: > + [[data-valuation|:octicons-arrow-right-24: Data Valuation]] + +- :fontawesome-solid-scale-unbalanced-flip:{ .lg .middle } __Influence Function__ + + --- + An introduction to the influence function and its computation with pyDVL - url: influence/ -- title: Browse the API - content: Full documentation of the API - url: api/pydvl/ + [[influence-values|:octicons-arrow-right-24: Influence Values]] + +- :fontawesome-regular-file-code:{ .lg .middle } __API Reference__ + + --- + + Full documentation of the API + + [:octicons-arrow-right-24: API Reference](api/pydvl/) -::/cards:: +
diff --git a/docs/influence/influence_function_model.md b/docs/influence/influence_function_model.md index d12d963c9..a126efdc5 100644 --- a/docs/influence/influence_function_model.md +++ b/docs/influence/influence_function_model.md @@ -87,7 +87,7 @@ the Hessian and \(V\) contains the corresponding eigenvectors. See also ```python from pydvl.influence.torch import ArnoldiInfluence -if_model = ArnoldiInfluence +if_model = ArnoldiInfluence( model, loss, hessian_regularization=0.0, @@ -97,4 +97,32 @@ if_model = ArnoldiInfluence ``` These implementations represent the calculation logic on in memory tensors. To scale up to large collection of data, we map these influence function models over these collections. For a detailed discussion see the -documentation page [Scaling Computation](scaling_computation.md). \ No newline at end of file +documentation page [Scaling Computation](scaling_computation.md). + +### Eigenvalue Corrected K-FAC + +K-FAC, short for Kronecker-Factored Approximate Curvature, is a method that approximates the Fisher information matrix [FIM](https://en.wikipedia.org/wiki/Fisher_information) of a model. It is possible to show that for classification models with appropriate loss functions the FIM is equal to the Hessian of the model’s loss over the dataset. In this restricted but nonetheless important context K-FAC offers an efficient way to approximate the Hessian and hence the influence scores. +For more info and details refer to the original paper [@martens2015optimizing]. + +The K-FAC method is implemented in the class [EkfacInfluence](pydvl/influence/torch/influence_function_model.py). The following code snippet shows how to use the K-FAC method to calculate the influence function of a model. Note that, in contrast to the other methods for influence function calculation, K-FAC does not require the loss function as an input. This is because the current implementation is only applicable to classification models with a cross entropy loss function. + +```python +from pydvl.influence.torch import EkfacInfluence +if_model = EkfacInfluence( + model, + hessian_regularization=0.0, +) +``` +Upon initialization, the K-FAC method will parse the model and extract which layers require grad and which do not. Then it will only calculate the influence scores for the layers that require grad. The current implementation of the K-FAC method is only available for linear layers, and therefore if the model contains non-linear layers that require gradient the K-FAC method will raise a NotImplementedLayerRepresentationException. + +A further improvement of the K-FAC method is the Eigenvalue Corrected K-FAC (EKFAC) method [@george2018fast], which allows to further re-fit the eigenvalues of the Hessian, thus providing a more accurate approximation. On top of the K-FAC method, the EKFAC method is implemented by setting `update_diagonal=True` when initialising [EkfacInfluence](pydvl/influence/torch/influence_function_model.py). The following code snippet shows how to use the EKFAC method to calculate the influence function of a model. + +```python +from pydvl.influence.torch import EkfacInfluence +if_model = EkfacInfluence( + model, + update_diagonal=True, + hessian_regularization=0.0, +) +if_model.fit(train_loader) +``` \ No newline at end of file diff --git a/docs/value/applications.md b/docs/value/applications.md new file mode 100644 index 000000000..cf07e8cc5 --- /dev/null +++ b/docs/value/applications.md @@ -0,0 +1,91 @@ +--- +title: Applications of data valuation +--- + +# Applications of data valuation + +Data valuation methods hold promise for improving various aspects +of data engineering and machine learning workflows. When applied judiciously, +these methods can enhance data quality, model performance, and cost-effectiveness. + +However, the results can be inconsistent. Values have a strong dependency +on the training procedure and the performance metric used. For instance, +accuracy is a poor metric for imbalanced sets and this has a stark effect +on data values. Some models exhibit great variance in some regimes +and this again has a detrimental effect on values. + +While still an evolving field with methods requiring careful use, data valuation can +be applied across a wide range of data engineering tasks. For a comprehensive +overview, along with concrete examples, please refer to the [Transferlab blog +post]({{ transferlab.website }}blog/data-valuation-applications/) on this topic. + +## Data Engineering + +While still an emerging field, judicious use of data valuation techniques +has the potential to enhance data quality, model performance, +and the cost-effectiveness of data workflows in many applications. +Some of the promising applications in data engineering include: + +- Removing low-value data points can reduce noise and increase model performance. + However, care is needed to avoid overfitting when iteratively retraining on pruned datasets. +- Pruning redundant samples enables more efficient training of large models. + Value-based metrics can determine which data to discard for optimal efficiency gains. +- Computing value scores for unlabeled data points supports efficient active learning. + High-value points can be prioritized for labeling to maximize gains in model performance. +- Analyzing high- and low-value data provides insights to guide targeted data collection + and improve upstream data processes. Low-value points may reveal data issues to address. +- Data value metrics can also help identify irrelevant or duplicated data + when evaluating offerings from data providers. + +## Model development + +Data valuation techniques can provide insights for model debugging and interpretation. +Some of the useful applications include: + +- Interpretation and debugging: Analyzing the most or least valuable samples + for a class can reveal cases where the model relies on confounding features + instead of true signal. Investigating influential points for misclassified examples + highlights limitations to address. +- Sensitivity/robustness analysis: Prior work shows removing a small fraction + of highly influential data can completely flip model conclusions. + This reveals potential issues with the modeling approach, data collection process, + or intrinsic difficulty of the problem that require further inspection. + Robust models require many points removed before conclusions meaningfully shift. + High sensitivity means conclusions heavily depend on small subsets of data, + indicating deeper problems to resolve. +- Monitoring changes in data value during training provides insights into + model convergence and overfitting. +- Continual learning: in order to avoid forgetting when training on new data, + a subset of previously seen data is presented again. Data valuation helps + in the selection of highly influential samples. + +## Attacks + +Data valuation techniques have applications in detecting data manipulation and contamination: + +- Watermark removal: Points with low value on a correct validation set may be + part of a watermarking mechanism. Removing them can strip a model of its fingerprints. +- Poisoning attacks: Influential points can be shifted to induce large changes + in model estimators. However, the feasibility of such attacks is limited, + and their value for adversarial training is unclear. + +Overall, while data valuation techniques show promise for identifying anomalous +or manipulated data, more research is needed to develop robust methods suited +for security applications. + +## Data markets + +Additionally, one of the motivating applications for the whole field is that of +data markets: a marketplace where data owners can sell their data to interested +parties. In this setting, data valuation can be key component to determine the +price of data. Market pricing depends on the value addition for buyers +(e.g. improved model performance) and costs/privacy concerns for sellers. + +Game-theoretic valuation methods like Shapley values can help assign fair prices, +but have limitations around handling duplicates or adversarial data. +Model-free methods like LAVA [@just_lava_2023] and CRAIG are +particularly well suited for this, as they use the Wasserstein distance between +a vendor's data and the buyer's to determine the value of the former. + +However, this is a complex problem which can face practical banal problems like +the fact that data owners may not wish to disclose their data for valuation. diff --git a/docs/value/index.md b/docs/value/index.md index 2440428d3..ac87a977f 100644 --- a/docs/value/index.md +++ b/docs/value/index.md @@ -83,33 +83,6 @@ among all samples, failing to identify repeated ones as unnecessary, with e.g. a zero value. -## Applications of data valuation - -Many applications are touted for data valuation, but the results can be -inconsistent. Values have a strong dependency on the training procedure and the -performance metric used. For instance, accuracy is a poor metric for imbalanced -sets and this has a stark effect on data values. Some models exhibit great -variance in some regimes and this again has a detrimental effect on values. - -Nevertheless, some of the most promising applications are: - -* Cleaning of corrupted data. -* Pruning unnecessary or irrelevant data. -* Repairing mislabeled data. -* Guiding data acquisition and annotation (active learning). -* Anomaly detection and model debugging and interpretation. - -Additionally, one of the motivating applications for the whole field is that of -data markets: a marketplace where data owners can sell their data to interested -parties. In this setting, data valuation can be key component to determine the -price of data. Algorithm-agnostic methods like LAVA [@just_lava_2023] are -particularly well suited for this, as they use the Wasserstein distance between -a vendor's data and the buyer's to determine the value of the former. - -However, this is a complex problem which can face practical banal problems like -the fact that data owners may not wish to disclose their data for valuation. - - ## Computing data values Using pyDVL to compute data values is a simple process that can be broken down diff --git a/mkdocs.yml b/mkdocs.yml index 408b26b75..7ae01a1cb 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -6,6 +6,40 @@ repo_url: "https://github.com/aai-institute/pyDVL" copyright: "Copyright © AppliedAI Institute gGmbH" remote_branch: gh-pages +nav: + - Home: index.md + - Getting Started: + - Installation: getting-started/installation.md + - First steps: getting-started/first-steps.md + - Data Valuation: + - Introduction: value/index.md + - Applications: value/applications.md + - Notation: value/notation.md + - Shapley values: value/shapley.md + - Semi-values: value/semi-values.md + - The Core: value/the-core.md + - Classwise Shapley: value/classwise-shapley.md + - The Influence Function: + - Introduction: influence/index.md + - Influence Function Model: influence/influence_function_model.md + - Scaling Computation: influence/scaling_computation.md + - Examples: + - Data Valuation: + - Shapley values: examples/shapley_basic_spotify.ipynb + - KNN Shapley: examples/shapley_knn_flowers.ipynb + - Data utility learning: examples/shapley_utility_learning.ipynb + - Least Core: examples/least_core_basic.ipynb + - Data OOB: examples/data_oob.ipynb + - Influence Function: + - For CNNs: examples/influence_imagenet.ipynb + - For mislabeled data: examples/influence_synthetic.ipynb + - For outlier detection: examples/influence_wine.ipynb + - For sentiment analysis: examples/influence_sentiment_analysis.ipynb + - Code: + - API Reference: api/pydvl/ + - Changelog: CHANGELOG.md + - Development Guidelines: CONTRIBUTING.md + watch: - src/pydvl - notebooks @@ -13,6 +47,7 @@ watch: hooks: - build_scripts/copy_notebooks.py - build_scripts/copy_changelog.py + - build_scripts/copy_contributing_guide.py - build_scripts/modify_binder_link.py plugins: @@ -106,18 +141,26 @@ theme: - content.code.annotate - content.code.copy - navigation.footer -# - content.tooltips # insiders only + - content.tooltips # - navigation.indexes - navigation.instant - navigation.path -# - navigation.sections + - navigation.sections # - navigation.tabs - navigation.top - navigation.tracking - search.suggest - search.highlight - toc.follow - palette: # Palette toggle for light mode + palette: + # Palette toggle for automatic mode + - media: "(prefers-color-scheme)" + scheme: default + primary: teal + toggle: + icon: material/brightness-auto + name: Switch to light mode + # Palette toggle for light mode - media: "(prefers-color-scheme: light)" scheme: default primary: teal @@ -130,11 +173,11 @@ theme: primary: teal toggle: icon: material/brightness-4 - name: Switch to light mode + name: Switch to system preference extra_css: - css/extra.css - - css/neoteroi.css + - css/grid-cards.css extra_javascript: - javascripts/mathjax.js @@ -143,8 +186,7 @@ extra_javascript: extra: transferlab: - website: https://transferlab.appliedai.de - data_valuation_review: https://transferlab.appliedai.de/reviews/data-valuation + website: https://transferlab.ai/ copyright_link: https://appliedai-institute.de version: provider: mike @@ -166,7 +208,6 @@ markdown_extensions: - footnotes - markdown_captions - md_in_html - - neoteroi.cards - codehilite - toc: permalink: True @@ -174,8 +215,8 @@ markdown_extensions: - pymdownx.tabbed: alternate_style: true - pymdownx.emoji: - emoji_index: !!python/name:materialx.emoji.twemoji - emoji_generator: !!python/name:materialx.emoji.to_svg + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg - pymdownx.highlight: anchor_linenums: true pygments_lang_class: true @@ -188,33 +229,3 @@ markdown_extensions: - docs_includes/abbreviations.md - pymdownx.superfences - pymdownx.details - -nav: - - Home: index.md - - Getting Started: - - Installation: getting-started/installation.md - - First steps: getting-started/first-steps.md - - Data Valuation: - - Introduction: value/index.md - - Notation: value/notation.md - - Shapley values: value/shapley.md - - Semi-values: value/semi-values.md - - The core: value/the-core.md - - Classwise Shapley: value/classwise-shapley.md - - Examples: - - Shapley values: examples/shapley_basic_spotify.ipynb - - KNN Shapley: examples/shapley_knn_flowers.ipynb - - Data utility learning: examples/shapley_utility_learning.ipynb - - Least Core: examples/least_core_basic.ipynb - - Data OOB: examples/data_oob.ipynb - - The Influence Function: - - Introduction: influence/index.md - - Influence Function Model: influence/influence_function_model.md - - Scaling Computation: influence/scaling_computation.md - - Examples: - - For CNNs: examples/influence_imagenet.ipynb - - For mislabeled data: examples/influence_synthetic.ipynb - - For outlier detection: examples/influence_wine.ipynb - - Code: - - Changelog: CHANGELOG.md - - API: api/pydvl/ diff --git a/notebooks/influence_sentiment_analysis.ipynb b/notebooks/influence_sentiment_analysis.ipynb new file mode 100644 index 000000000..e11ff92b2 --- /dev/null +++ b/notebooks/influence_sentiment_analysis.ipynb @@ -0,0 +1,1284 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Influence functions for Large Language Models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebooks showcases the use of influence functions for large language models. In particular, it focuses on sentiment analysis using the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/) and a fine-tuned [BERT](https://arxiv.org/abs/1810.04805) model.\n", + "\n", + "Not all the methods for influence function calculation can scale to large models and datasets. In this notebook we will use the [Kronecker-Factored Approximate Curvature](https://arxiv.org/abs/1503.05671) method, which is the only one that can scale to current state-of-the-art language models.\n", + "\n", + "The notebook is structured as follows:\n", + "\n", + "- [Setup](#Setup) imports the required libraries and downloads the dataset and the model.\n", + "- [Sentiment analysis](#Sentiment-analysis) loads the model and the dataset and goes through a few examples of sentiment analysis.\n", + "- [Model and data preparation](#Model-and-data-preparation) prepares the model and the dataset for influence function calculation. In particular, it assigns all the linear layers to require gradients and wraps the model so that only logits are returned (and not the loss or attention masks).\n", + "- [Influence function computation](#Influence-function-computation): shows how to calculate the influence function for a few test and train examples.\n", + "- [Analysis of influence values](#Analysis-of-influence-values): analyses the influence values, trying to extract general information about the model and how it is affected by corruption in the training data.\n", + "- [Influence functions by layer](#Influence-functions-by-layer): since ekfac is based on a block diagonal approximation of the Fisher information matrix, we can compute the influence function separately for each layer of the neural network. This section shows how to do that and how to analyse the results.\n", + "\n", + "Finally, the [Appendix](#Appendix) shows how to select the Hessian regularization parameter to obtain the best influence function approximation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "\n", + "If you are reading this in the documentation, some boilerplate has been omitted for convenience.\n", + "\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "%load_ext autoreload" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's start by importing the required libraries. If not already installed, you can install them with `pip install -r requirements-notebooks.txt`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/fabio/miniconda3/envs/pydvl_env/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from copy import deepcopy\n", + "from typing import Sequence\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import torch\n", + "import torch.nn.functional as F\n", + "from datasets import load_dataset\n", + "from IPython.display import HTML, display\n", + "from sklearn.metrics import f1_score\n", + "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n", + "\n", + "from pydvl.influence.torch import EkfacInfluence\n", + "from support.torch import ImdbDataset, ModelLogitsWrapper" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "plt.rcParams[\"figure.figsize\"] = (16, 8)\n", + "plt.rcParams[\"font.size\"] = 12\n", + "plt.rcParams[\"xtick.labelsize\"] = 12\n", + "plt.rcParams[\"ytick.labelsize\"] = 10\n", + "plt.rcParams[\"axes.facecolor\"] = (1, 1, 1, 0)\n", + "plt.rcParams[\"figure.facecolor\"] = (1, 1, 1, 0)\n", + "DEVICE = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "seed = 42\n", + "torch.manual_seed(seed)\n", + "torch.cuda.manual_seed(seed)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sentiment Analysis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sentiment analysis is the task of classifying a sentence as having a positive or negative sentiment. For example, the sentence \"I love this movie\" has a positive sentiment, while \"I hate this movie\" has a negative sentiment. In this notebook we will use the IMDB dataset, which contains 50,000 movie reviews with corresponding labels. The dataset is split into 25,000 reviews for training and 25,000 reviews for testing. The dataset is balanced, meaning that there are the same number of positive and negative reviews in the training and test set." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using the latest cached version of the module from /Users/fabio/.cache/huggingface/modules/datasets_modules/datasets/imdb/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0 (last modified on Thu Dec 14 21:47:25 2023) since it couldn't be found locally at imdb., or remotely on the Hugging Face Hub.\n", + "Found cached dataset imdb (/Users/fabio/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0)\n", + "100%|██████████| 3/3 [00:00<00:00, 111.43it/s]\n" + ] + } + ], + "source": [ + "imdb = load_dataset(\"imdb\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's print an example of review and its label" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Here is a sample review with label 0: \n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "Without wishing to be a killjoy, Brad Sykes is responsible for at least two of the most dull and clichéd films i've ever seen - this being one of them, and Camp Blood being another. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "I bought this for £1, but remember, you can't put a price on 71 minutes of your life. You'd do well to avoid this turkey, even at a bargain basement price." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sample_review = imdb[\"train\"].select([24])\n", + "\n", + "print(f\"Here is a sample review with label {sample_review['label'][0]}: \\n\")\n", + "\n", + "display(HTML(sample_review[\"text\"][0].split(\"
\")[0]))\n", + "display(HTML(sample_review[\"text\"][0].split(\"
\")[-1]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The review is negative, and so label 0 is associated to negative sentiment." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The model is a BERT model fine-tuned on the IMDB dataset. BERT is a large language model that has been pre-trained on a large corpus of text. The model was fine-tuned on the IMDB dataset by AssemblyAI and is available on the HuggingFace model hub. We also load its tokenizer, which is used to convert sentences into numeric tokens." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "tokenizer = AutoTokenizer.from_pretrained(\"assemblyai/distilbert-base-uncased-sst2\")\n", + "model = AutoModelForSequenceClassification.from_pretrained(\n", + " \"assemblyai/distilbert-base-uncased-sst2\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Even if the model is trained on movie reviews, it can be used to classify any sentence as positive or negative. Let's try it on a simple sentence created by us." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "example_phrase = (\n", + " \"Pydvl is the best data valuation library, and it is fully open-source!\"\n", + ")\n", + "\n", + "tokenized_example = tokenizer(\n", + " [example_phrase],\n", + " return_tensors=\"pt\",\n", + " truncation=True,\n", + ")\n", + "\n", + "model_output = model(\n", + " input_ids=tokenized_example.input_ids,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The model output is a `SequenceClassificationOutput` object, which contains the logits and other information." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model Output:\n", + " SequenceClassifierOutput(loss=None, logits=tensor([[-2.6237, 2.8350]], grad_fn=), hidden_states=None, attentions=None)\n" + ] + } + ], + "source": [ + "print(\"Model Output:\\n\", model_output)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For calculating probabilities and for the influence functions we only need the logits. Then the softmax function converts the logits into probabilities." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "model_predictions = F.softmax(model_output.logits, dim=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The model is quite confident that the sentence has a positive sentiment, which is correct." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Positive probability: 99.6%\n", + "Negative probability: 0.4%\n" + ] + } + ], + "source": [ + "print(\n", + " \"Positive probability: \" + str(round(model_predictions[0][1].item(), 3) * 100) + \"%\"\n", + ")\n", + "print(\n", + " \"Negative probability: \" + str(round(model_predictions[0][0].item(), 3) * 100) + \"%\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's examine the model's f1 score on a small subset of the test set." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached shuffled indices for dataset at /Users/fabio/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-c1eaa46e94dfbfd3.arrow\n", + "Loading cached processed dataset at /Users/fabio/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-5dd4cdcbaa0bcc93.arrow\n" + ] + } + ], + "source": [ + "sample_test_set = imdb[\"test\"].shuffle(seed=seed).select(range(50))\n", + "sample_test_set = sample_test_set.map(\n", + " lambda example: tokenizer(example[\"text\"], truncation=True, padding=\"max_length\"),\n", + " batched=True,\n", + ")\n", + "sample_test_set.set_format(\"torch\", columns=[\"input_ids\", \"attention_mask\", \"label\"])\n", + "model.eval()\n", + "with torch.no_grad():\n", + " logits = model(\n", + " input_ids=sample_test_set[\"input_ids\"],\n", + " attention_mask=sample_test_set[\"attention_mask\"],\n", + " ).logits\n", + " predictions = torch.argmax(logits, dim=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "F1 Score: 0.955\n" + ] + } + ], + "source": [ + "f1_score_value = f1_score(sample_test_set[\"label\"], predictions)\n", + "print(f\"F1 Score: {round(f1_score_value, 3)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model and Data Preparation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this section we will define two helper function and classes that will be used in the rest of the notebook. " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "def print_sentiment_preds(\n", + " model: ModelLogitsWrapper, model_input: torch.Tensor, true_label: int\n", + "):\n", + " \"\"\"\n", + " Prints the sentiment predictions in a human-readable format given a model and an\n", + " input. It also prints the true label.\n", + " \"\"\"\n", + " model_predictions = F.softmax(model(model_input.unsqueeze(0)), dim=1)\n", + " print(\n", + " \"Positive probability: \"\n", + " + str(round(model_predictions[0][1].item(), 3) * 100)\n", + " + \"%\"\n", + " )\n", + " print(\n", + " \"Negative probability: \"\n", + " + str(round(model_predictions[0][0].item(), 3) * 100)\n", + " + \"%\"\n", + " )\n", + "\n", + " true_label = \"Positive\" if true_label == 1 else \"Negative\"\n", + " print(f\"True label: {true_label} \\n\")\n", + "\n", + "\n", + "def strip_layer_names(param_names: Sequence[str]):\n", + " \"\"\"\n", + " Helper function that strips the parameter names of the model and the transformer,\n", + " so that they can be printed and compared more easily.\n", + " \"\"\"\n", + " stripped_param_names = []\n", + " for name in param_names:\n", + " name = name.replace(\"model.\", \"\")\n", + " if name.startswith(\"distilbert.transformer.\"):\n", + " name = name.replace(\"distilbert.transformer.\", \"\")\n", + " stripped_param_names.append(name)\n", + " return stripped_param_names" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Importantly, we will need to assign all the linear layers to require gradients, so that we can compute the influence function with respect to them. Keep in mind that the current implementation of Ekfac only supports linear layers, so if any other type of layer in the model requires gradients the initialisation of the influence function class will fail." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "for param in model.named_parameters():\n", + " param[1].requires_grad = False\n", + "\n", + "for m_name, module in model.named_modules():\n", + " if len(list(module.children())) == 0 and len(list(module.parameters())) > 0:\n", + " if isinstance(module, torch.nn.Linear):\n", + " for p_name, param in module.named_parameters():\n", + " if (\n", + " \"ffn\" in m_name\n", + " or \"pre_classifier\" in m_name\n", + " or \"classifier\" in m_name\n", + " ):\n", + " param.requires_grad = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Albeit restrictive, linear layers constitute a large fraction of the parameters of most large language models, and so our analysis still holds a lot of information about the full neural network." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total parameters: 66.96 millions\n", + "Parameters requiring gradients: 28.93 millions\n", + "Ratio of Linear over other layer types: 43.20%\n" + ] + } + ], + "source": [ + "total_params = sum(p.numel() for p in model.parameters()) / 1e6\n", + "params_requiring_grad = (\n", + " sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6\n", + ")\n", + "\n", + "print(\"Total parameters: {:.2f} millions\".format(total_params))\n", + "print(\"Parameters requiring gradients: {:.2f} millions\".format(params_requiring_grad))\n", + "print(\n", + " \"Ratio of Linear over other layer types: {:.2f}%\".format(\n", + " (params_requiring_grad / total_params) * 100\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Influence function computation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are now ready to compute the influence function for a few testing and training examples. Let's start by selecting a subset of the full training and testing dataset and wrapping them in a `DataLoader` object, so that we can easily do batching." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached shuffled indices for dataset at /Users/fabio/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-9c48ce5d173413c7.arrow\n", + "Loading cached shuffled indices for dataset at /Users/fabio/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-c1eaa46e94dfbfd3.arrow\n", + " 0%| | 0/1 [00:00" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.imshow(ekfac_train_influences.numpy().astype(int), vmin=-1000, vmax=1000)\n", + "plt.colorbar(label=\"Influence value \")\n", + "plt.title(\"Influence of training examples on test examples\")\n", + "plt.xlabel(\"Training examples idx\")\n", + "plt.ylabel(\"Test examples idx\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Most of the test and training examples have similar influence, close to zero. However, there is one test and one training samples that stand out. In particular, their cross influence is very large and negative. Let's examine them more closely." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training example with idx 3: \n", + "\n", + "Positive probability: 18.099999999999998%\n", + "Negative probability: 81.89999999999999%\n", + "True label: Positive \n", + "\n", + "Sentence:\n" + ] + }, + { + "data": { + "text/html": [ + "In the process of trying to establish the audiences' empathy with Jake Roedel (Tobey Maguire) the filmmakers slander the North and the Jayhawkers. Missouri never withdrew from the Union and the Union Army was not an invading force. The Southerners fought for State's Rights: the right to own slaves, elect crooked legislatures and judges, and employ a political spoils system. There's nothing noble in that. The Missourians could have easily traveled east and joined the Confederate Army.

It seems to me that the story has nothing to do with ambiguity. When Jake leaves the Bushwhackers, it's not because he saw error in his way, he certainly doesn't give himself over to the virtue of the cause of abolition." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "train_sentence_idx = 3\n", + "\n", + "print(f\"Training example with idx {train_sentence_idx}: \\n\")\n", + "\n", + "print_sentiment_preds(\n", + " wrapped_model,\n", + " train_input[train_sentence_idx],\n", + " train_labels[train_sentence_idx].item(),\n", + ")\n", + "\n", + "print(\"Sentence:\")\n", + "display(HTML(train_text[train_sentence_idx]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see that, despite being positive, this review is quite hard to classify. Its language is overall negative, mostly associated to the facts narrated rather than the movie itself. Notice how several terms are related to war and invasion." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test example with idx 4: \n", + "\n", + "Positive probability: 39.6%\n", + "Negative probability: 60.4%\n", + "True label: Negative \n", + "\n", + "Sentence:\n" + ] + }, + { + "data": { + "text/html": [ + "\"An astronaut (Michael Emmet) dies while returning from a mission and his body is recovered by the military. The base where the dead astronaut is taken to becomes the scene of a bizarre invasion plan from outer space. Alien embryos inside the dead astronaut resurrect the corpse and begin a terrifying assault on the military staff in the hopes of conquering the world,\" according to the DVD sleeve's synopsis.

A Roger Corman \"American International\" production. The man who fell to Earth impregnated, Mr. Emmet (as John Corcoran), does all right. Angela Greene is his pretty conflicted fiancée. And, Ed Nelson (as Dave Randall) is featured as prominently. With a bigger budget, better opening, and a re-write for crisper characterizations, this could have been something approaching classic 1950s science fiction.

*** Night of the Blood Beast (1958) Bernard L. Kowalski, Roger Corman ~ Michael Emmet, Angela Greene, Ed Nelson" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "test_sentence_idx = 4\n", + "\n", + "print(f\"Test example with idx {test_sentence_idx}: \\n\")\n", + "\n", + "print_sentiment_preds(\n", + " wrapped_model, test_input[test_sentence_idx], test_labels[test_sentence_idx].item()\n", + ")\n", + "\n", + "print(\"Sentence:\")\n", + "display(HTML(test_text[test_sentence_idx]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This review is also quite hard to classify. This time it has a negative sentiment towards the movie, but it also contains several words with positive connotation. The parallel with the previous review is quite interesting since both talk about an invasion. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As it is often the case when analysing influence functions, it is hard to understand why these examples have such a large influence. We have seen some interesting patterns, mostly related to similarities in the language and words used, but it is hard to say with certainty if these are the reasons for such a large influence.\n", + "\n", + "A [recent paper](https://arxiv.org/abs/2308.03296) has explored this topic in high detail, even for much larger language models than BERT (up to ~50 billion parameters!). Among the most interesting findings is that smaller models tend to rely a lot on word-to-word correspondencies, while larger models are more capable of extracting higher level concepts, drawing connections between words across multiple phrases.\n", + "\n", + "For more info, you can visit our [blog on influence functions for large language models](https://transferlab.ai/pills/2023/llm-influences-with-ekfac/)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Influence of corrupted training examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this sections we want to get an idea of how influence functions change when training examples are corrupted. In the next cell we will flip the label of all the training examples and compute the influences on the same test batch as before." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "modified_train_labels = deepcopy(train_labels)\n", + "modified_train_labels = 1 - train_labels\n", + "\n", + "corrupted_ekfac_train_influences = ekfac_influence_model.influences(\n", + " test_input,\n", + " test_labels,\n", + " train_input,\n", + " modified_train_labels,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.imshow(corrupted_ekfac_train_influences.numpy().astype(int), vmin=-1000, vmax=1000)\n", + "plt.colorbar(label=\"Influence value \")\n", + "plt.title(\"Influence of corrupted training examples\")\n", + "plt.xlabel(\"Training examples idx\")\n", + "plt.ylabel(\"Test examples idx\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Overall, when corrupted the influences tend to become negative, as expected. Nevertheless, there are cases where values go from slightly negative to positive, mostly isolated to the second and last test samples. Single values can be quite noisy, so it is difficult to generalise this result, but it would be interesting to see how common these cases are in the full test dataset." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Influence functions by layer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since ekfac is based on a block diagonal approximation of the Fisher information matrix, we can compute the influence functions separately for each layer of the neural network. In this section we show how to do that and we briefly analyse the results." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "influences_by_layer = ekfac_influence_model.influences_by_layer(\n", + " test_input,\n", + " test_labels,\n", + " train_input,\n", + " train_labels,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The method `influences_by_layer` returns a dictionary containing the influence function values for each layer of the neural network as a tensor. To recover the full influence values as returned by the `influences` (as done in the previous section), we need to sum each layer's values." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "influences = torch.zeros_like(ekfac_train_influences)\n", + "for layer_id, value in influences_by_layer.items():\n", + " influences += value.detach()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And if we plot the result as a heatmap we can see that the results are the same as in [Negative influence training examples](#Negative-influence-training-examples)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.imshow(influences.numpy().astype(int), vmin=-1000, vmax=1000)\n", + "plt.colorbar(label=\"Influence value \")\n", + "plt.title(\"Influence from layers\")\n", + "plt.xlabel(\"Training examples idx\")\n", + "plt.ylabel(\"Test examples idx\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's analyse how the influence values change across different layers for given test and train examples. " + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "test_idx = 0\n", + "\n", + "train_idx_to_plot = list(range(len(ekfac_train_influences[0])))\n", + "train_idx_to_plot.pop(3)\n", + "for train_idx in train_idx_to_plot:\n", + " infl_across_layers = []\n", + " idx = (test_idx, train_idx)\n", + " for layer_id, value in influences_by_layer.items():\n", + " infl_across_layers.append(value[idx].item())\n", + " plt.plot(infl_across_layers, label=f\"Train example {train_idx}\")\n", + "plt.legend()\n", + "plt.xticks(\n", + " range(len(influences_by_layer.keys())),\n", + " strip_layer_names(influences_by_layer.keys()),\n", + " rotation=70,\n", + ")\n", + "plt.xlabel(\"Layer id\")\n", + "plt.ylabel(\"Influence value\")\n", + "plt.title(f\"Influence of test example {test_idx} on test examples\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The plot above shows the influences for test idx 0 and all train idx apart idx=3 (excluded for clarity since it has a very large absolute value). We can see that the scores tend to keep their sign across layers, but in almost all cases tend to decrease when approaching the output layer. This is not always the case, and in fact other test examples show different patterns. Understanding why this happens is an interesting research direction." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ekfac is a powerful approximate method for computing the influence function of models that use a cross-entropy loss. In this notebook we applied it to sentiment analysis with BERT on the IMDB dataset. However, this method can be applied to much larger models and problems, e.g. to analyse the influence of entire sentences generated by GPT, Llama or Claude. For more info, you can visit our [paper pill on influence functions for large language models](https://transferlab.ai/pills/2023/llm-influences-with-ekfac/)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Appendix: Choosing the Hessian regularization value" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Hessian regularization value impacts a lot the quality of the influence function approximation. In general, the value should be chosen as small as possible so that the results are finite. In practice, even when finite the influence values can be too large and lead to numerical instabilities. In this section we show how to efficiently analyse the impact of the Hessian regularization value with the ekfac method." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's start with a few additional imports." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from scipy.stats import pearsonr, spearmanr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The method `explore_hessian_regularization` will calculate the influence values of the training examples with each other for a range of Hessian regularization values. The method optimises gradient calculation and Hessian inversion to minimise the computation time." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "influences_by_reg_value = ekfac_influence_model.explore_hessian_regularization(\n", + " train_input,\n", + " train_labels,\n", + " regularization_values=[1e-15, 1e-9, 1e-5, 1],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting object, `influences_by_reg_value` is a dictionary that associates to each regularization value the influences for each layer of the neural network. This is a lot of data, so we will first organise it in a pandas dataframe and take the average across training examples." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "cols = [\"reg_value\", \"layer_id\", \"mean_infl\"]\n", + "infl_df = pd.DataFrame(influences_by_reg_value, columns=cols)\n", + "for reg_value in influences_by_reg_value:\n", + " for layer_id, layer_influences in influences_by_reg_value[reg_value].items():\n", + " mean_infl = torch.mean(layer_influences, dim=0).detach().numpy()\n", + " infl_df = pd.concat(\n", + " [infl_df, pd.DataFrame([[reg_value, layer_id, mean_infl]], columns=cols)]\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With this dataframe, we can take contiguous values of regularization and, for each layer, calculate the Pearson and Spearman correlation coefficients. This will give us an idea of how the influence values change with the regularization value." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "result_corr = {}\n", + "for layer_id, group_df in infl_df.groupby(\"layer_id\"):\n", + " result_corr[layer_id + \"_pearson\"] = {}\n", + " result_corr[layer_id + \"_spearman\"] = {}\n", + " for idx, mean_infl in enumerate(group_df[\"mean_infl\"]):\n", + " if idx == 0:\n", + " continue\n", + " reg_value_diff = f\"Reg: {group_df['reg_value'].iloc[idx-1]} -> {group_df['reg_value'].iloc[idx]}\"\n", + " pearson = pearsonr(mean_infl, group_df[\"mean_infl\"].iloc[idx - 1]).statistic\n", + " spearman = spearmanr(mean_infl, group_df[\"mean_infl\"].iloc[idx - 1]).statistic\n", + " result_corr[layer_id + \"_pearson\"].update({f\"{reg_value_diff}\": pearson})\n", + " result_corr[layer_id + \"_spearman\"].update({f\"{reg_value_diff}\": spearman})\n", + "result_df = pd.DataFrame(result_corr).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's plot the correlations heatmap. The y-axis reports Spearman and Pearson correlations for each layer, while the x-axis reports pairs of regularization values. High correlations mean that influences are stable across regularization values. " + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10, 6))\n", + "plt.imshow(result_df, cmap=\"coolwarm_r\", aspect=\"auto\")\n", + "plt.xticks(range(result_df.shape[1]), result_df.columns, rotation=45)\n", + "plt.yticks(range(result_df.shape[0]), strip_layer_names(result_df.index))\n", + "plt.colorbar(label=\"Correlation Value\")\n", + "plt.title(\"Correlation Heatmap\")\n", + "plt.xlabel(\"Regularization Values\")\n", + "plt.ylabel(\"Layer ID\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In our case, we can see that for regularization = 1 the spearman correlation becomes very bad. However, for a large range of regularization values smaller than 1 the sample rankings are stable. This is a good indicator that the model is not too sensitive to the regularization value. We therefore chose the value 1e-5 for our analysis." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pydvl_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/influence_wine.ipynb b/notebooks/influence_wine.ipynb index bc4c0e221..a0f46fcae 100644 --- a/notebooks/influence_wine.ipynb +++ b/notebooks/influence_wine.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "a75acfec", "metadata": {}, @@ -26,7 +25,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "68ec440b", "metadata": {}, @@ -35,7 +33,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "9eb29a26", "metadata": {}, @@ -48,7 +45,6 @@ "execution_count": 1, "id": "cef17bfc", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -66,13 +62,23 @@ "execution_count": 2, "id": "be813151", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, - "tags": [] + "tags": [ + "hide-output" + ] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/fabio/miniconda3/envs/pydvl_env/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "%autoreload\n", "%matplotlib inline\n", @@ -86,11 +92,17 @@ "import torch.nn.functional as F\n", "from support.common import plot_losses\n", "from support.torch import TorchMLP, fit_torch_model\n", - "from pydvl.influence.torch import DirectInfluence, CgInfluence\n", + "from pydvl.influence.torch import (\n", + " DirectInfluence,\n", + " CgInfluence,\n", + " ArnoldiInfluence,\n", + " EkfacInfluence,\n", + ")\n", "from support.shapley import load_wine_dataset\n", "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score\n", "from torch.optim import Adam, lr_scheduler\n", - "from torch.utils.data import DataLoader, TensorDataset" + "from torch.utils.data import DataLoader, TensorDataset\n", + "from scipy.stats import pearsonr, spearmanr" ] }, { @@ -98,7 +110,6 @@ "execution_count": 3, "id": "02254f9c", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -121,7 +132,6 @@ "execution_count": 4, "id": "a656363e", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -141,7 +151,6 @@ "execution_count": 5, "id": "df5159e6", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -156,7 +165,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "be7ddf7c", "metadata": {}, @@ -169,7 +177,6 @@ "execution_count": 6, "id": "0d3e96ca", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -178,7 +185,7 @@ "outputs": [], "source": [ "training_data, val_data, test_data, feature_names = load_wine_dataset(\n", - " train_size=0.3, test_size=0.6\n", + " train_size=0.6, test_size=0.3\n", ")" ] }, @@ -187,7 +194,6 @@ "execution_count": 7, "id": "cac906e3-aed6-4d11-b563-1b9a91132d29", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -203,7 +209,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "b96a15cc", "metadata": {}, @@ -225,7 +230,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "5de58672", "metadata": {}, @@ -248,7 +252,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "a018e72c", "metadata": {}, @@ -263,7 +266,6 @@ "execution_count": 10, "id": "00dc59af", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -273,18 +275,11 @@ }, "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "7ef3e6e6be3249b28035d8a19f2ea9cf", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Model fitting: 0%| | 0/300 [00:00" ] @@ -355,7 +348,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "b3345522", "metadata": {}, @@ -368,7 +360,6 @@ "execution_count": 12, "id": "08f1cba4", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -380,7 +371,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -401,7 +392,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "cca76db8", "metadata": {}, @@ -418,7 +408,7 @@ { "data": { "text/plain": [ - "0.9906846833902615" + "0.9633110554163186" ] }, "execution_count": 13, @@ -431,7 +421,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "5332e2b4", "metadata": {}, @@ -440,7 +429,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "45dbdd1e", "metadata": {}, @@ -456,7 +444,6 @@ "execution_count": 14, "id": "218d0983", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -474,7 +461,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "ce21c2dc", "metadata": {}, @@ -483,7 +469,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "4153c7db", "metadata": {}, @@ -502,7 +487,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "b5e254ad", "metadata": {}, @@ -515,7 +499,6 @@ "execution_count": 16, "id": "233a57da", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -527,7 +510,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -550,7 +533,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "8dd63529", "metadata": {}, @@ -563,7 +545,6 @@ "execution_count": 17, "id": "8bc72789", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -576,8 +557,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Average influence of corrupted points: -0.051440768\n", - "Average influence of other points: 0.033988394\n" + "Average influence of corrupted points: -1.0782924\n", + "Average influence of other points: 0.10896263\n" ] } ], @@ -593,7 +574,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "f1e747b1", "metadata": {}, @@ -602,7 +582,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "b00a6164", "metadata": {}, @@ -615,7 +594,6 @@ "execution_count": 18, "id": "462d545e", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -634,7 +612,6 @@ "execution_count": 19, "id": "1e096222", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -646,7 +623,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -670,7 +647,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "656e14dd", "metadata": {}, @@ -679,7 +655,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "3bf8c4dd", "metadata": {}, @@ -692,7 +667,6 @@ "execution_count": 20, "id": "efdb4050", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -702,18 +676,18 @@ }, "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "908d881822e94d7e82078457ab81e35d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Conjugate gradient: 0%| | 0/53 [00:00" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(mean_train_influences, mean_ekfac_train_influences)\n", + "plt.scatter(\n", + " mean_train_influences[:num_corrupted_idxs],\n", + " mean_ekfac_train_influences[:num_corrupted_idxs],\n", + " facecolors=\"none\",\n", + " edgecolors=\"r\",\n", + " s=60,\n", + ")\n", + "plt.xlabel(\"Direct Influence Score\")\n", + "plt.ylabel(\"EK-FAC Influence Score\")\n", + "plt.title(\"Influence of training points - EK-FAC vs direct method\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "de31676a", + "metadata": {}, + "source": [ + "The above plot shows a good correlation between the EK-FAC and the direct method. Corrupted points have been circled in red, and in both the direct and approximate case they are correcly identified as having negative influence on the model's accuracy. This is confirmed by explicit calculation of the Pearson and Spearman correlation coefficients." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "8537c4b1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pearson Correlation EK-FAC vs direct 0.9608164875442669\n", + "Spearman Correlation EK-FAC vs direct 0.8946217598307178\n" + ] + } + ], + "source": [ + "print(\n", + " f\"Pearson Correlation EK-FAC vs direct\",\n", + " pearsonr(mean_ekfac_train_influences, mean_train_influences).statistic,\n", + ")\n", + "print(\n", + " f\"Spearman Correlation EK-FAC vs direct\",\n", + " spearmanr(mean_ekfac_train_influences, mean_train_influences).statistic,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3a88b8c5", + "metadata": {}, + "source": [ + "The correlation between the EK-FAC and the direct method is quite good, and it improves significantly if we just keep top-20 highest absolute influences." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "a3256f00", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pearson Correlation EK-FAC vs direct - top-20 influences 0.9901775015427601\n", + "Spearman Correlation EK-FAC vs direct - top-20 influences 0.9428571428571428\n" + ] + } + ], + "source": [ + "highest_inlfuence_idxs = np.argsort(np.abs(mean_train_influences))[-20:]\n", + "print(\n", + " f\"Pearson Correlation EK-FAC vs direct - top-20 influences\",\n", + " pearsonr(\n", + " mean_ekfac_train_influences[highest_inlfuence_idxs],\n", + " mean_train_influences[highest_inlfuence_idxs],\n", + " ).statistic,\n", + ")\n", + "print(\n", + " f\"Spearman Correlation EK-FAC vs direct - top-20 influences\",\n", + " spearmanr(\n", + " mean_ekfac_train_influences[highest_inlfuence_idxs],\n", + " mean_train_influences[highest_inlfuence_idxs],\n", + " ).statistic,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "9a0629d3", + "metadata": {}, + "source": [ + "When we calculate influence scores, typically we are more interested in assessing which training points have the highest or lowest impact on the model rather than having a precise estimate of the influence value. EK-FAC then provides a fast and memory-efficient way to calculate a coarse influence ranking of the training points which scales very well even to the largest neural networks." + ] + }, + { + "cell_type": "markdown", + "id": "03728578", + "metadata": {}, + "source": [ + "## Conclusions" + ] + }, + { "cell_type": "markdown", "id": "9245791c", "metadata": { - "editable": true, "slideshow": { "slide_type": "" }, @@ -799,7 +1029,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.18" + "version": "3.9.16" }, "vscode": { "interpreter": { diff --git a/notebooks/support/torch.py b/notebooks/support/torch.py index d90ea4895..7286dea51 100644 --- a/notebooks/support/torch.py +++ b/notebooks/support/torch.py @@ -74,8 +74,6 @@ def __init__( layers.append(nn.Tanh()) layers.pop() - layers.append(nn.Softmax(dim=-1)) - self.layers = nn.Sequential(*layers) def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -257,6 +255,48 @@ def load(self) -> Losses: return pkl.load(file) +class ImdbDataset(torch.utils.data.Dataset): + """ + A PyTorch Dataset that takes in an HuggingFace Dataset object and tokenizes it. + The objects returned by __getitem__ are PyTorch tensors, with x being a tuple of + (input_ids, attention_mask), ready to be fed into a model, and y being the label. + It also returns the original text, for printing and debugging purposes. + """ + + def __init__(self, dataset, tokenizer): + self.tokenizer = tokenizer + self.tokenized_ds = dataset.map(self.preprocess_function, batched=True) + self.encodings = self.tokenized_ds["input_ids"] + self.attn_mask = self.tokenized_ds["attention_mask"] + self.labels = self.tokenized_ds["label"] + + def preprocess_function(self, examples): + return self.tokenizer(examples["text"], truncation=True, padding=True) + + def __getitem__(self, idx): + x = torch.tensor([self.encodings[idx], self.attn_mask[idx]]) + y = torch.tensor(self.labels[idx]) + text = self.tokenized_ds[idx]["text"] + return x, y, text + + def __len__(self): + return len(self.labels) + + +class ModelLogitsWrapper(torch.nn.Module): + """ + A wrapper around a PyTorch model that returns only the logits and not the loss or + the attention mask. + """ + + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, x): + return self.model(x[:, 0], x[:, 1]).logits + + def process_imgnet_io( df: pd.DataFrame, labels: dict ) -> Tuple[torch.Tensor, torch.Tensor]: diff --git a/requirements-docs.txt b/requirements-docs.txt index a4e0b016e..ca554638c 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -1,6 +1,6 @@ mike markdown-captions -mkdocs==1.5.2 +mkdocs==1.5.3 mkdocstrings[python]>=0.18 mkdocs-alias-plugin>=0.6.0 mkdocs-autorefs @@ -11,10 +11,9 @@ mkdocs-glightbox mknotebooks>=0.8.0 pygments mkdocs-literate-nav -mkdocs-material +mkdocs-material>=9.5.0 mkdocs-section-index mkdocs-macros-plugin -neoteroi-mkdocs # Needed for card grid on home page pypandoc; sys_platform == 'darwin' pypandoc_binary; sys_platform != 'darwin' GitPython diff --git a/requirements-notebooks.txt b/requirements-notebooks.txt index c45e0a104..11002c375 100644 --- a/requirements-notebooks.txt +++ b/requirements-notebooks.txt @@ -1,4 +1,5 @@ torch==2.0.1 torchvision==0.15.2 datasets==2.14.6 -pillow==9.3.0 +pillow==10.0.1 +transformers==4.35.0 diff --git a/setup.py b/setup.py index c0d6766f6..fd5d22229 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ package_data={"pydvl": ["py.typed"]}, packages=find_packages(where="src"), include_package_data=True, - version="0.8.0", + version="0.8.1", description="The Python Data Valuation Library", install_requires=[ line diff --git a/src/pydvl/__init__.py b/src/pydvl/__init__.py index 84ff4a221..adcd959a3 100644 --- a/src/pydvl/__init__.py +++ b/src/pydvl/__init__.py @@ -7,4 +7,4 @@ The two main modules you will want to look at are [value][pydvl.value] and [influence][pydvl.influence]. """ -__version__ = "0.8.0" +__version__ = "0.8.1" diff --git a/src/pydvl/influence/base_influence_function_model.py b/src/pydvl/influence/base_influence_function_model.py index 18b2ce8ed..0a9a9f33b 100644 --- a/src/pydvl/influence/base_influence_function_model.py +++ b/src/pydvl/influence/base_influence_function_model.py @@ -36,6 +36,12 @@ def __init__(self): ) +class NotImplementedLayerRepresentationException(ValueError): + def __init__(self, module_id: str): + message = f"Only Linear layers are supported, but found module {module_id} requiring grad." + super().__init__(message) + + """Type variable for tensors, i.e. sequences of numbers""" TensorType = TypeVar("TensorType", bound=Collection) DataLoaderType = TypeVar("DataLoaderType", bound=Iterable) diff --git a/src/pydvl/influence/influence_calculator.py b/src/pydvl/influence/influence_calculator.py index 7164edf1d..dd2b4383f 100644 --- a/src/pydvl/influence/influence_calculator.py +++ b/src/pydvl/influence/influence_calculator.py @@ -288,8 +288,8 @@ def func(x_numpy: NDArray, y_numpy: NDArray, model: InfluenceFunctionModel): chunk_shape = (chunk_size, self.n_parameters) chunk_array = da.from_delayed( delayed(func)( - x_chunk.squeeze().tolist(), - y_chunk.squeeze().tolist(), + x_chunk.squeeze()[()], + y_chunk.squeeze()[()], self.influence_function_model, ), dtype=x.dtype, @@ -400,10 +400,10 @@ def func( block_array = da.from_delayed( delayed(func)( - x_test_chunk.squeeze().tolist(), - y_test_chunk.squeeze().tolist(), - x_chunk.squeeze().tolist(), - y_chunk.squeeze().tolist(), + x_test_chunk.squeeze()[()], + y_test_chunk.squeeze()[()], + x_chunk.squeeze()[()], + y_chunk.squeeze()[()], self.influence_function_model, ), shape=block_shape, @@ -506,9 +506,9 @@ def func( block_array = da.from_delayed( delayed(func)( - z_test_chunk.squeeze().tolist(), - x_chunk.squeeze().tolist(), - y_chunk.squeeze().tolist(), + z_test_chunk.squeeze()[()], + x_chunk.squeeze()[()], + y_chunk.squeeze()[()], self.influence_function_model, ), shape=block_shape, diff --git a/src/pydvl/influence/torch/__init__.py b/src/pydvl/influence/torch/__init__.py index 9e90fd9df..6caf74d92 100644 --- a/src/pydvl/influence/torch/__init__.py +++ b/src/pydvl/influence/torch/__init__.py @@ -2,5 +2,6 @@ ArnoldiInfluence, CgInfluence, DirectInfluence, + EkfacInfluence, LissaInfluence, ) diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py index 3b7c0a688..287291032 100644 --- a/src/pydvl/influence/torch/influence_function_model.py +++ b/src/pydvl/influence/torch/influence_function_model.py @@ -8,7 +8,7 @@ import logging from abc import ABC, abstractmethod -from typing import Callable, Optional +from typing import Callable, Dict, List, Optional, Tuple import torch from torch import nn as nn @@ -20,6 +20,7 @@ from ..base_influence_function_model import ( InfluenceFunctionModel, InfluenceMode, + NotImplementedLayerRepresentationException, UnsupportedInfluenceModeException, ) from .functional import ( @@ -32,7 +33,11 @@ hessian, model_hessian_low_rank, ) -from .util import flatten_dimensions +from .util import ( + EkfacRepresentation, + empirical_cross_entropy_loss_fn, + flatten_dimensions, +) logger = logging.getLogger(__name__) @@ -87,7 +92,7 @@ def _loss_grad(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: return flatten_dimensions(grads.values(), shape=shape) @log_duration - def _flat_loss_mixed_grad(self, x: torch.Tensor, y: torch.Tensor): + def _flat_loss_mixed_grad(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: mixed_grads = create_per_sample_mixed_derivative_function( self.model, self.loss )(self.model_params, x, y) @@ -187,7 +192,7 @@ def _non_symmetric_values( x: torch.Tensor, y: torch.Tensor, mode: InfluenceMode = InfluenceMode.Up, - ): + ) -> torch.Tensor: if mode == InfluenceMode.Up: if x_test.shape[0] <= x.shape[0]: factor = self.influence_factors(x_test, y_test) @@ -304,6 +309,16 @@ def influences_from_factors( def _solve_hvp(self, rhs: torch.Tensor) -> torch.Tensor: pass + def to(self, device: torch.device): + self.model = self.model.to(device) + self._model_params = { + k: p.detach().to(device) + for k, p in self.model.named_parameters() + if p.requires_grad + } + self._model_device = device + return self + class DirectInfluence(TorchInfluenceFunctionModel): r""" @@ -402,15 +417,9 @@ def _solve_hvp(self, rhs: torch.Tensor) -> torch.Tensor: ).T def to(self, device: torch.device): - self.hessian = self.hessian.to(device) - self.model = self.model.to(device) - self._model_device = device - self._model_params = { - k: p.detach().to(device) - for k, p in self.model.named_parameters() - if p.requires_grad - } - return self + if self.is_fitted: + self.hessian = self.hessian.to(device) + return super().to(device) class CgInfluence(TorchInfluenceFunctionModel): @@ -537,16 +546,6 @@ def reg_hvp(v: torch.Tensor): batch_cg[idx] = batch_result return batch_cg - def to(self, device: torch.device): - self.model = self.model.to(device) - self._model_params = { - k: p.detach().to(device) - for k, p in self.model.named_parameters() - if p.requires_grad - } - self._model_device = device - return self - @staticmethod def _solve_cg( hvp: Callable[[torch.Tensor], torch.Tensor], @@ -873,6 +872,600 @@ def _solve_hvp(self, rhs: torch.Tensor) -> torch.Tensor: return result.t() def to(self, device: torch.device): - return ArnoldiInfluence( - self.model.to(device), self.loss, self.low_rank_representation.to(device) + if self.is_fitted: + self.low_rank_representation = self.low_rank_representation.to(device) + return super().to(device) + + +class EkfacInfluence(TorchInfluenceFunctionModel): + r""" + Approximately solves the linear system Hx = b, where H is the Hessian of a model with the empirical + categorical cross entropy as loss function and b is the given right-hand side vector. + It employs the EK-FAC method [@george2018fast], which is based on the kronecker + factorization of the Hessian first introduced in [@martens2015optimizing]. + Contrary to the other influence function methods, this implementation can only + be used for classification tasks with a cross entropy loss function. However, it + is much faster than the other methods and can be used efficiently for very large + datasets and models. For more information, see [Eigenvalue Corrected K-FAC][ekfac]. + + Args: + model: Instance of [torch.nn.Module][torch.nn.Module]. + update_diagonal: If True, the diagonal values in the ekfac representation are + refitted from the training data after calculating the KFAC blocks. + This provides a more accurate approximation of the Hessian, but it is + computationally more expensive. + hessian_regularization: Regularization of the hessian. + progress: If True, display progress bars. + """ + + ekfac_representation: EkfacRepresentation + + def __init__( + self, + model: nn.Module, + update_diagonal: bool = False, + hessian_regularization: float = 0.0, + progress: bool = False, + ): + + super().__init__(model, torch.nn.functional.cross_entropy) + self.hessian_regularization = hessian_regularization + self.update_diagonal = update_diagonal + self.active_layers = self._parse_active_layers() + self.progress = progress + + @property + def is_fitted(self): + try: + return self.ekfac_representation is not None + except AttributeError: + return False + + def _parse_active_layers(self) -> Dict[str, torch.nn.Module]: + """ + Find all layers of the model that have parameters that require grad + and return them in a dictionary. If a layer has some parameters that require + grad and some that do not, raise an error. + """ + active_layers: Dict[str, torch.nn.Module] = {} + for m_name, module in self.model.named_modules(): + if len(list(module.children())) == 0 and len(list(module.parameters())) > 0: + layer_requires_grad = [ + param.requires_grad for param in module.parameters() + ] + if all(layer_requires_grad): + active_layers[m_name] = module + elif any(layer_requires_grad): + raise ValueError( + f"Layer {m_name} has some parameters that require grad and some that do not." + f"This is not supported. Please set all parameters of the layer to require grad." + ) + return active_layers + + @staticmethod + def _init_layer_kfac_blocks( + module: torch.nn.Module, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Initialize the tensors that will store the cumulative forward and + backward KFAC blocks for the layer. + """ + if isinstance(module, nn.Linear): + with_bias = module.bias is not None + sG = module.out_features + sA = module.in_features + int(with_bias) + forward_x_layer = torch.zeros((sA, sA), device=module.weight.device) + grad_y_layer = torch.zeros((sG, sG), device=module.weight.device) + else: + raise NotImplementedLayerRepresentationException(module_id=str(module)) + return forward_x_layer, grad_y_layer + + @staticmethod + def _get_layer_kfac_hooks( + m_name: str, + module: torch.nn.Module, + forward_x: Dict[str, torch.Tensor], + grad_y: Dict[str, torch.Tensor], + ) -> Tuple[Callable, Callable]: + """ + Create the hooks that will be used to compute the forward and backward KFAC + blocks for the layer. The hooks are registered to the layer and will be called + during the forward and backward passes. At each pass, the hooks will update the + tensors that store the cumulative forward and backward KFAC blocks for the layer. + These tensors are stored in the forward_x and grad_y dictionaries. + """ + if isinstance(module, nn.Linear): + with_bias = module.bias is not None + + def input_hook(m, x, y): + x = x[0].reshape(-1, module.in_features) + if with_bias: + x = torch.cat( + (x, torch.ones((x.shape[0], 1), device=module.weight.device)), + dim=1, + ) + forward_x[m_name] += torch.mm(x.t(), x) + + def grad_hook(m, m_grad, m_out): + m_out = m_out[0].reshape(-1, module.out_features) + grad_y[m_name] += torch.mm(m_out.t(), m_out) + + else: + raise NotImplementedLayerRepresentationException(module_id=str(module)) + return input_hook, grad_hook + + def _get_kfac_blocks( + self, + data: DataLoader, + ) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]: + """ + Compute the KFAC blocks for each layer of the model, using the provided data. + Returns the average forward and backward KFAC blocks for each layer in + dictionaries. + """ + forward_x = {} + grad_y = {} + hooks = [] + data_len = 0 + + for m_name, module in self.active_layers.items(): + forward_x[m_name], grad_y[m_name] = self._init_layer_kfac_blocks(module) + layer_input_hook, layer_grad_hook = self._get_layer_kfac_hooks( + m_name, module, forward_x, grad_y + ) + hooks.append(module.register_forward_hook(layer_input_hook)) + hooks.append(module.register_full_backward_hook(layer_grad_hook)) + + for x, *_ in tqdm( + data, disable=not self.progress, desc="K-FAC blocks - batch progress" + ): + data_len += x.shape[0] + pred_y = self.model(x) + loss = empirical_cross_entropy_loss_fn(pred_y) + loss.backward() + + for key in forward_x.keys(): + forward_x[key] /= data_len + grad_y[key] /= data_len + + for hook in hooks: + hook.remove() + + return forward_x, grad_y + + def fit(self, data: DataLoader) -> EkfacInfluence: + """ + Compute the KFAC blocks for each layer of the model, using the provided data. + It then creates an EkfacRepresentation object that stores the KFAC blocks for + each layer, their eigenvalue decomposition and diagonal values. + """ + forward_x, grad_y = self._get_kfac_blocks(data) + layers_evecs_a = {} + layers_evect_g = {} + layers_diags = {} + for key in self.active_layers.keys(): + evals_a, evecs_a = torch.linalg.eigh(forward_x[key]) + evals_g, evecs_g = torch.linalg.eigh(grad_y[key]) + layers_evecs_a[key] = evecs_a + layers_evect_g[key] = evecs_g + layers_diags[key] = torch.kron(evals_g.view(-1, 1), evals_a.view(-1, 1)) + + self.ekfac_representation = EkfacRepresentation( + self.active_layers.keys(), + self.active_layers.values(), + layers_evecs_a.values(), + layers_evect_g.values(), + layers_diags.values(), + ) + if self.update_diagonal: + self._update_diag(data) + return self + + @staticmethod + def _init_layer_diag(module: torch.nn.Module) -> torch.Tensor: + """ + Initialize the tensor that will store the updated diagonal values of the layer. + """ + if isinstance(module, nn.Linear): + with_bias = module.bias is not None + sG = module.out_features + sA = module.in_features + int(with_bias) + layer_diag = torch.zeros((sA * sG), device=module.weight.device) + else: + raise NotImplementedLayerRepresentationException(module_id=str(module)) + return layer_diag + + def _get_layer_diag_hooks( + self, + m_name: str, + module: torch.nn.Module, + last_x_kfe: Dict[str, torch.Tensor], + diags: Dict[str, torch.Tensor], + ) -> Tuple[Callable, Callable]: + """ + Create the hooks that will be used to update the diagonal values of the layer. + The hooks are registered to the layer and will be called during the forward and + backward passes. At each pass, the hooks will update the tensor that stores the + updated diagonal values of the layer. This tensor is stored in the diags + dictionary. + """ + evecs_a, evecs_g = self.ekfac_representation.get_layer_evecs() + if isinstance(module, nn.Linear): + with_bias = module.bias is not None + + def input_hook(m, x, y): + x = x[0].reshape(-1, module.in_features) + if with_bias: + x = torch.cat( + (x, torch.ones((x.shape[0], 1), device=module.weight.device)), + dim=1, + ) + last_x_kfe[m_name] = torch.mm(x, evecs_a[m_name]) + + def grad_hook(m, m_grad, m_out): + m_out = m_out[0].reshape(-1, module.out_features) + gy_kfe = torch.mm(m_out, evecs_g[m_name]) + diags[m_name] += torch.mm( + gy_kfe.t() ** 2, last_x_kfe[m_name] ** 2 + ).view(-1) + + else: + raise NotImplementedLayerRepresentationException(module_id=str(module)) + return input_hook, grad_hook + + def _update_diag( + self, + data: DataLoader, + ) -> EkfacInfluence: + """ + Compute the updated diagonal values for each layer of the model, using the + provided data. It then updates the EkfacRepresentation object that stores the + KFAC blocks for each layer, their eigenvalue decomposition and diagonal values. + """ + if not self.is_fitted: + raise ValueError( + "EkfacInfluence must be fitted before updating the diagonal." + ) + diags = {} + last_x_kfe: Dict[str, torch.Tensor] = {} + hooks = [] + data_len = 0 + + for m_name, module in self.active_layers.items(): + diags[m_name] = self._init_layer_diag(module) + input_hook, grad_hook = self._get_layer_diag_hooks( + m_name, module, last_x_kfe, diags + ) + hooks.append(module.register_forward_hook(input_hook)) + hooks.append(module.register_full_backward_hook(grad_hook)) + + for x, *_ in tqdm( + data, disable=not self.progress, desc="Update Diagonal - batch progress" + ): + data_len += x.shape[0] + pred_y = self.model(x) + loss = empirical_cross_entropy_loss_fn(pred_y) + loss.backward() + + for key in diags.keys(): + diags[key] /= data_len + + for hook in hooks: + hook.remove() + + self.ekfac_representation = EkfacRepresentation( + self.ekfac_representation.layer_names, + self.ekfac_representation.layers_module, + self.ekfac_representation.evecs_a, + self.ekfac_representation.evecs_g, + diags.values(), + ) + + return self + + @staticmethod + def _solve_hvp_by_layer( + rhs: torch.Tensor, + ekfac_representation: EkfacRepresentation, + hessian_regularization: float, + ) -> Dict[str, torch.Tensor]: + """ + Compute the Hessian Vector Product for each layer of the model, using the + provided ekfac representation and hessian regularization. It returns a + dictionary containing the Hessian Vector Product for each layer. + """ + hvp_layers = {} + start_idx = 0 + for layer_id, (_, evecs_a, evecs_g, diag) in ekfac_representation: + end_idx = start_idx + diag.shape[0] + rhs_layer = rhs[:, start_idx : end_idx - evecs_g.shape[0]].reshape( + rhs.shape[0], evecs_g.shape[0], -1 + ) + bias_layer_b = rhs[:, end_idx - evecs_g.shape[0] : end_idx] + rhs_layer = torch.cat([rhs_layer, bias_layer_b.unsqueeze(2)], dim=2) + v_kfe = torch.einsum( + "bij,jk->bik", + torch.einsum("ij,bjk->bik", evecs_g.t(), rhs_layer), + evecs_a, + ) + inv_diag = 1 / (diag.reshape(*v_kfe.shape[1:]) + hessian_regularization) + inv_kfe = torch.einsum("bij,ij->bij", v_kfe, inv_diag) + inv = torch.einsum( + "bij,jk->bik", + torch.einsum("ij,bjk->bik", evecs_g, inv_kfe), + evecs_a.t(), + ) + hvp_layers[layer_id] = torch.cat( + [inv[:, :, :-1].reshape(rhs.shape[0], -1), inv[:, :, -1]], dim=1 + ) + start_idx = end_idx + return hvp_layers + + @log_duration + def _solve_hvp(self, rhs: torch.Tensor) -> torch.Tensor: + x = rhs.clone() + start_idx = 0 + layer_hvp = self._solve_hvp_by_layer( + rhs, self.ekfac_representation, self.hessian_regularization + ) + for hvp in layer_hvp.values(): + end_idx = start_idx + hvp.shape[1] + x[:, start_idx:end_idx] = hvp + start_idx = end_idx + x.detach_() + return x + + def influences_by_layer( + self, + x_test: torch.Tensor, + y_test: torch.Tensor, + x: Optional[torch.Tensor] = None, + y: Optional[torch.Tensor] = None, + mode: InfluenceMode = InfluenceMode.Up, + ) -> Dict[str, torch.Tensor]: + """ + Compute the influence of the data on the test data for each layer of the model. + + Args: + x_test: model input to use in the gradient computations of + $H^{-1}\nabla_{\theta} \ell(y_{\text{test}}, + f_{\theta}(x_{\text{test}}))$ + y_test: label tensor to compute gradients + x: optional model input to use in the gradient computations + $\nabla_{\theta}\ell(y, f_{\theta}(x))$, + resp. $\nabla_{x}\nabla_{\theta}\ell(y, f_{\theta}(x))$, + if None, use $x=x_{\text{test}}$ + y: optional label tensor to compute gradients + mode: enum value of [InfluenceType] + [pydvl.influence.base_influence_model.InfluenceType] + + Returns: + A dictionary containing the influence of the data on the test data for each + layer of the model, with the layer name as key. + """ + if not self.is_fitted: + raise ValueError( + "Instance must be fitted before calling influence methods on it" + ) + + if x is None: + + if y is not None: + raise ValueError( + "Providing labels y, without providing model input x " + "is not supported" + ) + + return self._symmetric_values_by_layer( + x_test.to(self.model_device), + y_test.to(self.model_device), + mode, + ) + + if y is None: + raise ValueError( + "Providing model input x without providing labels y is not supported" + ) + + return self._non_symmetric_values_by_layer( + x_test.to(self.model_device), + y_test.to(self.model_device), + x.to(self.model_device), + y.to(self.model_device), + mode, + ) + + def influence_factors_by_layer( + self, + x: torch.Tensor, + y: torch.Tensor, + ) -> Dict[str, torch.Tensor]: + """ + Computes the approximation of + + \[H^{-1}\nabla_{\theta} \ell(y, f_{\theta}(x))\] + + for each layer of the model separately. + + Args: + x: model input to use in the gradient computations + y: label tensor to compute gradients + + Returns: + A dictionary containing the influence factors for each layer of the model, + with the layer name as key. + """ + if not self.is_fitted: + raise ValueError( + "Instance must be fitted before calling influence methods on it" + ) + + return self._solve_hvp_by_layer( + self._loss_grad(x.to(self.model_device), y.to(self.model_device)), + self.ekfac_representation, + self.hessian_regularization, + ) + + def influences_from_factors_by_layer( + self, + z_test_factors: Dict[str, torch.Tensor], + x: torch.Tensor, + y: torch.Tensor, + mode: InfluenceMode = InfluenceMode.Up, + ) -> Dict[str, torch.Tensor]: + """ + Computation of + + \[ \langle z_{\text{test_factors}}, + \nabla_{\theta} \ell(y, f_{\theta}(x)) \rangle \] + + for the case of up-weighting influence, resp. + + \[ \langle z_{\text{test_factors}}, + \nabla_{x} \nabla_{\theta} \ell(y, f_{\theta}(x)) \rangle \] + + for the perturbation type influence case for each layer of the model separately. + The gradients are meant to be per sample of the batch $(x, y)$. + + Args: + z_test_factors: pre-computed tensor, approximating + $H^{-1}\nabla_{\theta} \ell(y_{\text{test}}, + f_{\theta}(x_{\text{test}}))$ + x: model input to use in the gradient computations + $\nabla_{\theta}\ell(y, f_{\theta}(x))$, + resp. $\nabla_{x}\nabla_{\theta}\ell(y, f_{\theta}(x))$ + y: label tensor to compute gradients + mode: enum value of [InfluenceType] + [pydvl.influence.twice_differentiable.InfluenceType] + + Returns: + A dictionary containing the influence of the data on the test data for each + layer of the model, with the layer name as key. + """ + if mode == InfluenceMode.Up: + total_grad = self._loss_grad( + x.to(self.model_device), y.to(self.model_device) + ) + start_idx = 0 + influences = {} + for layer_id, layer_z_test in z_test_factors.items(): + end_idx = start_idx + layer_z_test.shape[1] + influences[layer_id] = layer_z_test @ total_grad[:, start_idx:end_idx].T + start_idx = end_idx + return influences + elif mode == InfluenceMode.Perturbation: + total_mixed_grad = self._flat_loss_mixed_grad( + x.to(self.model_device), y.to(self.model_device) + ) + start_idx = 0 + influences = {} + for layer_id, layer_z_test in z_test_factors.items(): + end_idx = start_idx + layer_z_test.shape[1] + influences[layer_id] = torch.einsum( + "ia,j...a->ij...", + layer_z_test, + total_mixed_grad[:, start_idx:end_idx], + ) + start_idx = end_idx + return influences + else: + raise UnsupportedInfluenceModeException(mode) + + def _non_symmetric_values_by_layer( + self, + x_test: torch.Tensor, + y_test: torch.Tensor, + x: torch.Tensor, + y: torch.Tensor, + mode: InfluenceMode = InfluenceMode.Up, + ) -> Dict[str, torch.Tensor]: + """ + Similar to _non_symmetric_values, but computes the influence for each layer + separately. Returns a dictionary containing the influence for each layer, + with the layer name as key. + """ + if mode == InfluenceMode.Up: + if x_test.shape[0] <= x.shape[0]: + fac = self.influence_factors_by_layer(x_test, y_test) + values = self.influences_from_factors_by_layer(fac, x, y, mode=mode) + else: + fac = self.influence_factors_by_layer(x, y) + values = self.influences_from_factors_by_layer( + fac, x_test, y_test, mode=mode + ) + elif mode == InfluenceMode.Perturbation: + fac = self.influence_factors_by_layer(x_test, y_test) + values = self.influences_from_factors_by_layer(fac, x, y, mode=mode) + else: + raise UnsupportedInfluenceModeException(mode) + return values + + def _symmetric_values_by_layer( + self, x: torch.Tensor, y: torch.Tensor, mode: InfluenceMode + ) -> Dict[str, torch.Tensor]: + """ + Similar to _symmetric_values, but computes the influence for each layer + separately. Returns a dictionary containing the influence for each layer, + with the layer name as key. + """ + grad = self._loss_grad(x, y) + fac = self._solve_hvp_by_layer( + grad, self.ekfac_representation, self.hessian_regularization ) + + if mode == InfluenceMode.Up: + values = {} + start_idx = 0 + for layer_id, layer_fac in fac.items(): + end_idx = start_idx + layer_fac.shape[1] + values[layer_id] = layer_fac @ grad[:, start_idx:end_idx].T + start_idx = end_idx + elif mode == InfluenceMode.Perturbation: + values = self.influences_from_factors_by_layer(fac, x, y, mode=mode) + else: + raise UnsupportedInfluenceModeException(mode) + return values + + def explore_hessian_regularization( + self, + x: torch.Tensor, + y: torch.Tensor, + regularization_values: List[float], + ) -> Dict[float, Dict[str, torch.Tensor]]: + """ + Efficiently computes the influence for input x and label y for each layer of the + model, for different values of the hessian regularization parameter. This is done + by computing the gradient of the loss function for the input x and label y only once + and then solving the Hessian Vector Product for each regularization value. This is + useful for finding the optimal regularization value and for exploring + how robust the influence values are to changes in the regularization value. + + Args: + x: model input to use in the gradient computations + y: label tensor to compute gradients + regularization_values: list of regularization values to use + + Returns: + A dictionary containing with keys being the regularization values and values + being dictionaries containing the influences for each layer of the model, + with the layer name as key. + """ + grad = self._loss_grad(x, y) + influences_by_reg_value = {} + for reg_value in regularization_values: + reg_factors = self._solve_hvp_by_layer( + grad, self.ekfac_representation, reg_value + ) + values = {} + start_idx = 0 + for layer_id, layer_fac in reg_factors.items(): + end_idx = start_idx + layer_fac.shape[1] + values[layer_id] = layer_fac @ grad[:, start_idx:end_idx].T + start_idx = end_idx + influences_by_reg_value[reg_value] = values + return influences_by_reg_value + + def to(self, device: torch.device): + if self.is_fitted: + self.ekfac_representation.to(device) + return super().to(device) diff --git a/src/pydvl/influence/torch/util.py b/src/pydvl/influence/torch/util.py index 757cb7fbb..394cf535a 100644 --- a/src/pydvl/influence/torch/util.py +++ b/src/pydvl/influence/torch/util.py @@ -1,5 +1,6 @@ import logging import math +from dataclasses import dataclass from functools import partial from typing import ( Collection, @@ -453,3 +454,75 @@ def __call__( ) ) ) + + +@dataclass(frozen=True) +class EkfacRepresentation: + r""" + Container class for the EKFAC representation of the Hessian. + It can be iterated over to get the layers names and their corresponding module, + eigenvectors and diagonal elements of the factorized Hessian matrix. + + Args: + layer_names: Names of the layers. + layers_module: The layers. + evecs_a: The a eigenvectors of the ekfac representation. + evecs_g: The g eigenvectors of the ekfac representation. + diags: The diagonal elements of the factorized Hessian matrix. + """ + layer_names: Iterable[str] + layers_module: Iterable[torch.nn.Module] + evecs_a: Iterable[torch.Tensor] + evecs_g: Iterable[torch.Tensor] + diags: Iterable[torch.Tensor] + + def __iter__(self): + return iter( + zip( + self.layer_names, + zip(self.layers_module, self.evecs_a, self.evecs_g, self.diags), + ) + ) + + def get_layer_evecs( + self, + ) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]: + """ + It returns two dictionaries, one for the a eigenvectors and one for the g + eigenvectors, with the layer names as keys. The eigenvectors are in the same + order as the layers in the model. + """ + evecs_a_dict = {layer_name: evec_a for layer_name, (_, evec_a, _, _) in self} + evecs_g_dict = {layer_name: evec_g for layer_name, (_, _, evec_g, _) in self} + return evecs_a_dict, evecs_g_dict + + def to(self, device: torch.device) -> "EkfacRepresentation": + return EkfacRepresentation( + self.layer_names, + [layer.to(device) for layer in self.layers_module], + [evec_a.to(device) for evec_a in self.evecs_a], + [evec_g.to(device) for evec_g in self.evecs_g], + [diag.to(device) for diag in self.diags], + ) + + +def empirical_cross_entropy_loss_fn( + model_output: torch.Tensor, *args, **kwargs +) -> torch.Tensor: + """ + Computes the empirical cross entropy loss of the model output. This is the + cross entropy loss of the model output without the labels. The function takes + all the usual arguments and keyword arguments of the cross entropy loss + function, so that it is compatible with the PyTorch cross entropy loss + function. However, it ignores everything except the first argument, which is + the model output. + + Args: + model_output: The output of the model. + """ + probs_ = torch.softmax(model_output, dim=1) + log_probs_ = torch.log(probs_) + log_probs_ = torch.where( + torch.isfinite(log_probs_), log_probs_, torch.zeros_like(log_probs_) + ) + return torch.sum(log_probs_ * probs_.detach() ** 0.5) diff --git a/src/pydvl/parallel/futures/ray.py b/src/pydvl/parallel/futures/ray.py index 1a9658744..b15eef9e0 100644 --- a/src/pydvl/parallel/futures/ray.py +++ b/src/pydvl/parallel/futures/ray.py @@ -9,7 +9,6 @@ from weakref import WeakSet, ref import ray -from deprecate import deprecated from pydvl.parallel.config import ParallelConfig @@ -44,12 +43,6 @@ class RayExecutor(Executor): any. See [CancellationPolicy][pydvl.parallel.backend.CancellationPolicy] """ - @deprecated( - target=True, - deprecated_in="0.7.0", - remove_in="0.8.0", - args_mapping={"cancel_futures_on_exit": "cancel_futures"}, - ) def __init__( self, max_workers: Optional[int] = None, diff --git a/src/pydvl/utils/types.py b/src/pydvl/utils/types.py index 1a915c33c..18a22bd26 100644 --- a/src/pydvl/utils/types.py +++ b/src/pydvl/utils/types.py @@ -23,7 +23,7 @@ ] IndexT = TypeVar("IndexT", bound=np.int_) -NameT = TypeVar("NameT", bound=np.object_) +NameT = TypeVar("NameT", np.object_, np.int_) R = TypeVar("R", covariant=True) Seed = Union[int, Generator] diff --git a/src/pydvl/utils/utility.py b/src/pydvl/utils/utility.py index b975c0ff2..1afbfdeb3 100644 --- a/src/pydvl/utils/utility.py +++ b/src/pydvl/utils/utility.py @@ -38,7 +38,7 @@ from pydvl.utils.score import Scorer from pydvl.utils.types import SupervisedModel -__all__ = ["Utility", "DataUtilityLearning", "MinerGameUtility", "GlovesGameUtility"] +__all__ = ["Utility", "DataUtilityLearning"] logger = logging.getLogger(__name__) @@ -356,120 +356,3 @@ def __call__(self, indices: Iterable[int]) -> float: def data(self) -> Dataset: """Returns the wrapped utility's [Dataset][pydvl.utils.dataset.Dataset].""" return self.utility.data - - -class MinerGameUtility(Utility): - r"""Toy game utility that is used for testing and demonstration purposes. - - Consider a group of n miners, who have discovered large bars of gold. - - If two miners can carry one piece of gold, then the payoff of a - coalition $S$ is: - - $${ - v(S) = \left\{\begin{array}{lll} - \mid S \mid / 2 & \text{, if} & \mid S \mid \text{ is even} \\ - ( \mid S \mid - 1)/2 & \text{, if} & \mid S \mid \text{ is odd} - \end{array}\right. - }$$ - - If there are more than two miners and there is an even number of miners, - then the core consists of the single payoff where each miner gets 1/2. - - If there is an odd number of miners, then the core is empty. - - Taken from [Wikipedia](https://en.wikipedia.org/wiki/Core_(game_theory)) - - Args: - n_miners: Number of miners that participate in the game. - """ - - def __init__(self, n_miners: int, **kwargs): - if n_miners <= 2: - raise ValueError(f"n_miners, {n_miners} should be > 2") - self.n_miners = n_miners - - x = np.arange(n_miners)[..., np.newaxis] - # The y values don't matter here - y = np.zeros_like(x) - - self.data = Dataset(x_train=x, y_train=y, x_test=x, y_test=y) - - def __call__(self, indices: Iterable[int]) -> float: - n = len(tuple(indices)) - if n % 2 == 0: - return n / 2 - else: - return (n - 1) / 2 - - def _initialize_utility_wrapper(self): - pass - - def exact_least_core_values(self) -> Tuple[NDArray[np.float_], float]: - if self.n_miners % 2 == 0: - values = np.array([0.5] * self.n_miners) - subsidy = 0.0 - else: - values = np.array( - [(self.n_miners - 1) / (2 * self.n_miners)] * self.n_miners - ) - subsidy = (self.n_miners - 1) / (2 * self.n_miners) - return values, subsidy - - def __repr__(self) -> str: - return f"{self.__class__.__name__}(n={self.n_miners})" - - -class GlovesGameUtility(Utility): - r"""Toy game utility that is used for testing and demonstration purposes. - - In this game, some players have a left glove and others a right glove. - Single gloves have a worth of zero while pairs have a worth of 1. - - The payoff of a coalition $S$ is: - - $${ - v(S) = \min( \mid S \cap L \mid, \mid S \cap R \mid ) - }$$ - - Where $L$, respectively $R$, is the set of players with left gloves, - respectively right gloves. - - Args: - left: Number of players with a left glove. - right: Number of player with a right glove. - - """ - - def __init__(self, left: int, right: int, **kwargs): - self.left = left - self.right = right - - x = np.empty(left + right)[..., np.newaxis] - # The y values don't matter here - y = np.zeros_like(x) - - self.data = Dataset(x_train=x, y_train=y, x_test=x, y_test=y) - - def __call__(self, indices: Iterable[int]) -> float: - left_sum = float(np.sum(np.asarray(indices) < self.left)) - right_sum = float(np.sum(np.asarray(indices) >= self.left)) - return min(left_sum, right_sum) - - def _initialize_utility_wrapper(self): - pass - - def exact_least_core_values(self) -> Tuple[NDArray[np.float_], float]: - if self.left == self.right: - subsidy = -0.5 - values = np.array([0.5] * (self.left + self.right)) - elif self.left < self.right: - subsidy = 0.0 - values = np.array([1.0] * self.left + [0.0] * self.right) - else: - subsidy = 0.0 - values = np.array([0.0] * self.left + [1.0] * self.right) - return values, subsidy - - def __repr__(self) -> str: - return f"{self.__class__.__name__}(L={self.left}, R={self.right})" diff --git a/src/pydvl/value/games.py b/src/pydvl/value/games.py new file mode 100644 index 000000000..ef942ebcf --- /dev/null +++ b/src/pydvl/value/games.py @@ -0,0 +1,637 @@ +""" +This module provides several predefined games and, depending on the game, +the corresponding Shapley values, Least Core values or both of them, for +benchmarking purposes. + +## References + +[^1]: Castro, J., Gómez, D. and Tejada, J., 2009. + [Polynomial calculation of the Shapley value based on sampling](http://www.sciencedirect.com/science/article/pii/S0305054808000804). + Computers & Operations Research, 36(5), pp.1726-1730. + +""" +from __future__ import annotations + +from abc import ABC, abstractmethod +from functools import lru_cache +from typing import Iterable, Optional, Tuple + +import numpy as np +import scipy as sp +from numpy.typing import NDArray + +from pydvl.utils import Scorer, Status +from pydvl.utils.dataset import Dataset +from pydvl.utils.types import SupervisedModel +from pydvl.utils.utility import Utility +from pydvl.value import ValuationResult + +__all__ = [ + "Game", + "SymmetricVotingGame", + "AsymmetricVotingGame", + "ShoesGame", + "AirportGame", + "MinimumSpanningTreeGame", + "MinerGame", +] + + +class DummyGameDataset(Dataset): + """Dummy game dataset. + + Initializes a dummy game dataset with n_players and an optional + description. + + This class is used internally inside the [Game][pydvl.value.games.Game] + class. + + Args: + n_players: Number of players that participate in the game. + description: Optional description of the dataset. + """ + + def __init__(self, n_players: int, description: Optional[str] = None) -> None: + x = np.arange(0, n_players, 1).reshape(-1, 1) + nil = np.zeros_like(x) + super().__init__( + x, + nil.copy(), + nil.copy(), + nil.copy(), + feature_names=["x"], + target_names=["y"], + description=description, + ) + + def get_test_data( + self, indices: Optional[Iterable[int]] = None + ) -> Tuple[NDArray, NDArray]: + """Returns the subsets of the train set instead of the test set. + + Args: + indices: Indices into the training data. + + Returns: + Subset of the train data. + """ + if indices is None: + return self.x_train, self.y_train + x = self.x_train[indices] + y = self.y_train[indices] + return x, y + + +class DummyModel(SupervisedModel): + """Dummy model class. + + A dummy supervised model used for testing purposes only. + """ + + def __init__(self) -> None: + pass + + def fit(self, x: NDArray, y: NDArray) -> None: + pass + + def predict(self, x: NDArray) -> NDArray: # type: ignore + pass + + def score(self, x: NDArray, y: NDArray) -> float: + # Dummy, will be overriden + return 0 + + +class Game(ABC): + """Base class for games + + Any Game subclass has to implement the abstract `_score` method + to assign a score to each coalition/subset and at least + one of `shapley_values`, `least_core_values`. + + Args: + n_players: Number of players that participate in the game. + score_range: Minimum and maximum values of the `_score` method. + description: Optional string description of the dummy dataset that will be created. + + Attributes: + n_players: Number of players that participate in the game. + data: Dummy dataset object. + u: Utility object with a dummy model and dataset. + """ + + def __init__( + self, + n_players: int, + score_range: Tuple[float, float] = (-np.inf, np.inf), + description: Optional[str] = None, + ): + self.n_players = n_players + self.data = DummyGameDataset(self.n_players, description) + self.u = Utility( + DummyModel(), + self.data, + scorer=Scorer(self._score, range=score_range), + catch_errors=False, + show_warnings=True, + ) + + def shapley_values(self) -> ValuationResult: + raise NotImplementedError( + f"shapley_values method was not implemented for class {self.__class__.__name__}" + ) + + def least_core_values(self) -> ValuationResult: + raise NotImplementedError( + f"least_core_values method was not implemented for class {self.__class__.__name__}" + ) + + @abstractmethod + def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: + ... + + def __repr__(self) -> str: + return f"{self.__class__.__name__}(n_players={self.n_players})" + + +class SymmetricVotingGame(Game): + r"""Toy game that is used for testing and demonstration purposes. + + A symmetric voting game defined in + (Castro et al., 2009)1 + Section 4.1 + + For this game the utility of a coalition is 1 if its cardinality is + greater than num_samples/2, or 0 otherwise. + + $${ + v(S) = \left\{\begin{array}{ll} + 1, & \text{ if} \quad \mid S \mid > \frac{N}{2} \\ + 0, & \text{ otherwise} + \end{array}\right. + }$$ + + Args: + n_players: Number of players that participate in the game. + """ + + def __init__(self, n_players: int) -> None: + if n_players % 2 != 0: + raise ValueError("n_players must be an even number.") + description = "Dummy data for the symmetric voting game in Castro et al. 2009" + super().__init__( + n_players, + score_range=(0, 1), + description=description, + ) + + def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: + return 1 if len(X) > len(self.data) // 2 else 0 + + @lru_cache + def shapley_values(self) -> ValuationResult: + exact_values = np.ones(self.n_players) / self.n_players + result: ValuationResult[np.int_, np.int_] = ValuationResult( + algorithm="exact_shapley", + status=Status.Converged, + indices=self.data.indices, + values=exact_values, + variances=np.zeros_like(self.data.x_train), + counts=np.zeros_like(self.data.x_train), + ) + return result + + +class AsymmetricVotingGame(Game): + r"""Toy game that is used for testing and demonstration purposes. + + An asymmetric voting game defined in + (Castro et al., 2009)1 + Section 4.2. + + For this game the player set is $N = \{1,\dots,51\}$ and + the utility of a coalition is given by: + + $${ + v(S) = \left\{\begin{array}{ll} + 1, & \text{ if} \quad \sum\limits_{i \in S} w_i > \sum\limits_{j \in N}\frac{w_j}{2} \\ + 0, & \text{ otherwise} + \end{array}\right. + }$$ + + where $w = [w_1,\dots, w_{51}]$ is a list of weights associated with each player. + + Args: + n_players: Number of players that participate in the game. + """ + + def __init__(self, n_players: int = 51) -> None: + if n_players != 51: + raise ValueError( + f"{self.__class__.__name__} only supports n_players=51 but got {n_players=}." + ) + description = "Dummy data for the asymmetric voting game in Castro et al. 2009" + super().__init__( + n_players, + score_range=(0, 1), + description=description, + ) + + ranges = [ + range(0, 1), + range(1, 2), + range(2, 3), + range(3, 5), + range(5, 6), + range(6, 7), + range(7, 9), + range(9, 10), + range(10, 12), + range(12, 15), + range(15, 16), + range(16, 20), + range(20, 24), + range(24, 26), + range(26, 30), + range(30, 34), + range(34, 35), + range(35, 44), + range(44, 51), + ] + + ranges_weights = [ + 45, + 41, + 27, + 26, + 25, + 21, + 17, + 14, + 13, + 12, + 11, + 10, + 9, + 8, + 7, + 6, + 5, + 4, + 3, + ] + ranges_values = [ + "0.08831", + "0.07973", + "0.05096", + "0.04898", + "0.047", + "0.03917", + "0.03147", + "0.02577", + "0.02388", + "0.022", + "0.02013", + "0.01827", + "0.01641", + "0.01456", + "0.01272", + "0.01088", + "0.009053", + "0.00723", + "0.005412", + ] + + self.weight_table = np.zeros(self.n_players) + exact_values = np.zeros(self.n_players) + for r, w, v in zip(ranges, ranges_weights, ranges_values): + self.weight_table[r] = w + exact_values[r] = v + + self.exact_values = exact_values + self.threshold = np.sum(self.weight_table) / 2 + + def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: + return 1 if np.sum(self.weight_table[X]) > self.threshold else 0 + + @lru_cache + def shapley_values(self) -> ValuationResult: + result: ValuationResult[np.int_, np.int_] = ValuationResult( + algorithm="exact_shapley", + status=Status.Converged, + indices=self.data.indices, + values=self.exact_values, + variances=np.zeros_like(self.data.x_train), + counts=np.zeros_like(self.data.x_train), + ) + return result + + +class ShoesGame(Game): + """Toy game that is used for testing and demonstration purposes. + + An shoes game defined in + (Castro et al., 2009)1. + + In this game, some players have a left shoe and others a right shoe. + Single shoes have a worth of zero while pairs have a worth of 1. + + The payoff of a coalition $S$ is: + + $${ + v(S) = \min( \mid S \cap L \mid, \mid S \cap R \mid ) + }$$ + + Where $L$, respectively $R$, is the set of players with left shoes, + respectively right shoes. + + Args: + left: Number of players with a left shoe. + right: Number of players with a right shoe. + """ + + def __init__(self, left: int, right: int) -> None: + self.left = left + self.right = right + n_players = self.left + self.right + description = "Dummy data for the shoe game in Castro et al. 2009" + max_score = n_players // 2 + super().__init__(n_players, score_range=(0, max_score), description=description) + + def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: + left_sum = float(np.sum(np.asarray(X) < self.left)) + right_sum = float(np.sum(np.asarray(X) >= self.left)) + return min(left_sum, right_sum) + + @lru_cache + def shapley_values(self) -> ValuationResult: + if self.left != self.right and (self.left > 4 or self.right > 4): + raise ValueError( + "This class only supports getting exact shapley values " + "for left <= 4 and right <= 4 or left == right" + ) + precomputed_values = np.array( + [ + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.5, 0.667, 0.75, 0.8], + [0.0, 0.167, 0.5, 0.65, 0.733], + [0.0, 0.083, 0.233, 0.5, 0.638], + [0.0, 0.050, 0.133, 0.271, 0.5], + ] + ) + if self.left == self.right: + value_left = value_right = min(self.left, self.right) / ( + self.left + self.right + ) + else: + value_left = precomputed_values[self.left, self.right] + value_right = precomputed_values[self.right, self.left] + exact_values = np.array([value_left] * self.left + [value_right] * self.right) + result: ValuationResult[np.int_, np.int_] = ValuationResult( + algorithm="exact_shapley", + status=Status.Converged, + indices=self.data.indices, + values=exact_values, + variances=np.zeros_like(self.data.x_train), + counts=np.zeros_like(self.data.x_train), + ) + return result + + @lru_cache + def least_core_values(self) -> ValuationResult: + if self.left == self.right: + subsidy = -0.5 + exact_values = np.array([0.5] * (self.left + self.right)) + elif self.left < self.right: + subsidy = 0.0 + exact_values = np.array([1.0] * self.left + [0.0] * self.right) + else: + subsidy = 0.0 + exact_values = np.array([0.0] * self.left + [1.0] * self.right) + + result: ValuationResult[np.int_, np.int_] = ValuationResult( + algorithm="exact_least_core", + status=Status.Converged, + indices=self.data.indices, + values=exact_values, + subsidy=subsidy, + variances=np.zeros_like(self.data.x_train), + counts=np.zeros_like(self.data.x_train), + ) + return result + + def __repr__(self) -> str: + return f"{self.__class__.__name__}(L={self.left}, R={self.right})" + + +class AirportGame(Game): + """Toy game that is used for testing and demonstration purposes. + + An airport game defined in + (Castro et al., 2009)1 + Section 4.3 + + Args: + n_players: Number of players that participate in the game. + """ + + def __init__(self, n_players: int = 100) -> None: + if n_players != 100: + raise ValueError( + f"{self.__class__.__name__} only supports n_players=100 but got {n_players=}." + ) + description = "A dummy dataset for the airport game in Castro et al. 2009" + super().__init__(n_players, score_range=(0, 100), description=description) + ranges = [ + range(0, 8), + range(8, 20), + range(20, 26), + range(26, 40), + range(40, 48), + range(48, 57), + range(57, 70), + range(70, 80), + range(80, 90), + range(90, 100), + ] + exact = [ + 0.01, + 0.020869565, + 0.033369565, + 0.046883079, + 0.063549745, + 0.082780515, + 0.106036329, + 0.139369662, + 0.189369662, + 0.289369662, + ] + c = list(range(1, 10)) + score_table = np.zeros(100) + exact_values = np.zeros(100) + + for r, v in zip(ranges, exact): + score_table[r] = c + exact_values[r] = v + + self.exact_values = exact_values + self.score_table = score_table + + def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: + return max(self.score_table[X]) or 0.0 + + @lru_cache + def shapley_values(self) -> ValuationResult: + result: ValuationResult[np.int_, np.int_] = ValuationResult( + algorithm="exact_shapley", + status=Status.Converged, + indices=self.data.indices, + values=self.exact_values, + variances=np.zeros_like(self.data.x_train), + counts=np.zeros_like(self.data.x_train), + ) + return result + + +class MinimumSpanningTreeGame(Game): + r"""Toy game that is used for testing and demonstration purposes. + + A minimum spanning tree game defined in + (Castro et al., 2009)1. + + Let $G = (N \cup \{0\},E)$ be a valued graph where $N = \{1,\dots,100\}$, + and the cost associated to an edge $(i, j)$ is: + + $${ + c_{ij} = \left\{\begin{array}{lll} + 1, & \text{ if} & i = j + 1 \text{ or } i = j - 1 \\ + & & \text{ or } (i = 1 \text{ and } j = 100) \text{ or } (i = 100 \text{ and } j = 1) \\ + 101, & \text{ if} & i = 0 \text{ or } j = 0 \\ + \infty, & \text{ otherwise} + \end{array}\right. + }$$ + + A minimum spanning tree game $(N, c)$ is a cost game, where for a given coalition + $S \subset N$, $v(S)$ is the sum of the edge cost of the minimum spanning tree, + i.e. $v(S)$ = Minimum Spanning Tree of the graph $G|_{S\cup\{0\}}$, + which is the partial graph restricted to the players $S$ and the source node $0$. + + Args: + n_players: Number of players that participate in the game. + """ + + def __init__(self, n_players: int = 100) -> None: + if n_players != 100: + raise ValueError( + f"{self.__class__.__name__} only supports n_players=100 but got {n_players=}." + ) + description = ( + "A dummy dataset for the minimum spanning tree game in Castro et al. 2009" + ) + super().__init__(n_players, score_range=(0, np.inf), description=description) + + graph = np.zeros(shape=(self.n_players, self.n_players)) + + for i in range(self.n_players): + for j in range(self.n_players): + if ( + i == j + 1 + or i == j - 1 + or (i == 1 and j == self.n_players - 1) + or (i == self.n_players - 1 and j == 1) + ): + graph[i, j] = 1 + elif i == 0 or j == 0: + graph[i, j] = 0 + else: + graph[i, j] = np.inf + assert np.all(graph == graph.T) + + self.graph = graph + + def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: + partial_graph = sp.sparse.csr_array(self.graph[np.ix_(X, X)]) + span_tree = sp.sparse.csgraph.minimum_spanning_tree(partial_graph) + return span_tree.sum() or 0 + + @lru_cache + def shapley_values(self) -> ValuationResult: + exact_values = 2 * np.ones_like(self.data.x_train) + result: ValuationResult[np.int_, np.int_] = ValuationResult( + algorithm="exact_shapley", + status=Status.Converged, + indices=self.data.indices, + values=exact_values, + variances=np.zeros_like(self.data.x_train), + counts=np.zeros_like(self.data.x_train), + ) + return result + + +class MinerGame(Game): + r"""Toy game that is used for testing and demonstration purposes. + + Consider a group of n miners, who have discovered large bars of gold. + + If two miners can carry one piece of gold, then the payoff of a + coalition $S$ is: + + $${ + v(S) = \left\{\begin{array}{lll} + \mid S \mid / 2, & \text{ if} & \mid S \mid \text{ is even} \\ + ( \mid S \mid - 1)/2, & \text{ otherwise} + \end{array}\right. + }$$ + + If there are more than two miners and there is an even number of miners, + then the core consists of the single payoff where each miner gets 1/2. + + If there is an odd number of miners, then the core is empty. + + Taken from [Wikipedia](https://en.wikipedia.org/wiki/Core_(game_theory)) + + Args: + n_players: Number of miners that participate in the game. + """ + + def __init__(self, n_players: int) -> None: + if n_players <= 2: + raise ValueError(f"n_players, {n_players}, should be > 2") + description = "Dummy data for Miner Game taken from https://en.wikipedia.org/wiki/Core_(game_theory)" + super().__init__( + n_players, + score_range=(0, n_players // 2), + description=description, + ) + + def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: + n = len(X) + if n % 2 == 0: + return n / 2 + else: + return (n - 1) / 2 + + @lru_cache() + def least_core_values(self) -> ValuationResult: + if self.n_players % 2 == 0: + values = np.array([0.5] * self.n_players) + subsidy = 0.0 + else: + values = np.array( + [(self.n_players - 1) / (2 * self.n_players)] * self.n_players + ) + subsidy = (self.n_players - 1) / (2 * self.n_players) + + result: ValuationResult[np.int_, np.int_] = ValuationResult( + algorithm="exact_least_core", + status=Status.Converged, + indices=self.data.indices, + values=values, + subsidy=subsidy, + variances=np.zeros_like(self.data.x_train), + counts=np.zeros_like(self.data.x_train), + ) + return result + + def __repr__(self) -> str: + return f"{self.__class__.__name__}(n={self.n_players})" diff --git a/src/pydvl/value/least_core/__init__.py b/src/pydvl/value/least_core/__init__.py index abf34c623..39c764fef 100644 --- a/src/pydvl/value/least_core/__init__.py +++ b/src/pydvl/value/least_core/__init__.py @@ -47,6 +47,7 @@ def compute_least_core_values( mode: LeastCoreMode = LeastCoreMode.MonteCarlo, non_negative_subsidy: bool = False, solver_options: Optional[dict] = None, + progress: bool = False, **kwargs, ) -> ValuationResult: """Umbrella method to compute Least Core values with any of the available @@ -80,20 +81,6 @@ def compute_least_core_values( !!! tip "New in version 0.5.0" """ - progress: bool = kwargs.pop("progress", False) - - # TODO: remove this before releasing version 0.7.0 - if kwargs: - warnings.warn( - DeprecationWarning( - "Passing solver options as kwargs was deprecated in 0.6.0, will " - "be removed in 0.7.0. `Use solver_options` instead." - ) - ) - if solver_options is None: - solver_options = kwargs - else: - solver_options.update(kwargs) if mode == LeastCoreMode.MonteCarlo: # TODO fix progress showing in remote case diff --git a/src/pydvl/value/least_core/common.py b/src/pydvl/value/least_core/common.py index 2de8e7e3a..984930217 100644 --- a/src/pydvl/value/least_core/common.py +++ b/src/pydvl/value/least_core/common.py @@ -34,7 +34,6 @@ def lc_solve_problem( algorithm: str, non_negative_subsidy: bool = False, solver_options: Optional[dict] = None, - **options, ) -> ValuationResult: """Solves a linear problem as prepared by [mclc_prepare_problem()][pydvl.value.least_core.montecarlo.mclc_prepare_problem]. @@ -55,20 +54,6 @@ def lc_solve_problem( RuntimeWarning, ) - # TODO: remove this before releasing version 0.7.0 - if options: - warnings.warn( - DeprecationWarning( - "Passing solver options as kwargs was deprecated in " - "0.6.0, will be removed in 0.7.0. `Use solver_options` " - "instead." - ) - ) - if solver_options is None: - solver_options = options - else: - solver_options.update(options) - if solver_options is None: solver_options = {} diff --git a/src/pydvl/value/least_core/montecarlo.py b/src/pydvl/value/least_core/montecarlo.py index 88dc11ded..5a7a3c883 100644 --- a/src/pydvl/value/least_core/montecarlo.py +++ b/src/pydvl/value/least_core/montecarlo.py @@ -27,7 +27,6 @@ def montecarlo_least_core( config: ParallelConfig = ParallelConfig(), non_negative_subsidy: bool = False, solver_options: Optional[dict] = None, - options: Optional[dict] = None, progress: bool = False, seed: Optional[Seed] = None, ) -> ValuationResult: @@ -60,28 +59,12 @@ def montecarlo_least_core( and to configure it. Refer to [cvxpy's documentation](https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options) for all possible options. - options: (Deprecated) Dictionary of solver options. Use solver_options - instead. progress: If True, shows a tqdm progress bar seed: Either an instance of a numpy random number generator or a seed for it. Returns: Object with the data values and the least core value. """ - # TODO: remove this before releasing version 0.7.0 - if options: - warnings.warn( - DeprecationWarning( - "Passing solver options as kwargs was deprecated in " - "0.6.0, will be removed in 0.7.0. `Use solver_options` " - "instead." - ) - ) - if solver_options is None: - solver_options = options - else: - solver_options.update(options) - problem = mclc_prepare_problem( u, n_iterations, n_jobs=n_jobs, config=config, progress=progress, seed=seed ) diff --git a/src/pydvl/value/least_core/naive.py b/src/pydvl/value/least_core/naive.py index f97021678..713298234 100644 --- a/src/pydvl/value/least_core/naive.py +++ b/src/pydvl/value/least_core/naive.py @@ -20,7 +20,6 @@ def exact_least_core( *, non_negative_subsidy: bool = False, solver_options: Optional[dict] = None, - options: Optional[dict] = None, progress: bool = True, ) -> ValuationResult: r"""Computes the exact Least Core values. @@ -46,14 +45,12 @@ def exact_least_core( Args: u: Utility object with model, data, and scoring function - non_negative_subsidy: If True, the least core subsidy $e$ is constrained + non_negative_subsidy: If True, the least core subsidy $e$ is constrained to be non-negative. solver_options: Dictionary of options that will be used to select a solver and to configure it. Refer to the [cvxpy's documentation](https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options) for all possible options. - options: (Deprecated) Dictionary of solver options. Use `solver_options` - instead. progress: If True, shows a tqdm progress bar Returns: @@ -63,20 +60,6 @@ def exact_least_core( if n > 20: # Arbitrary choice, will depend on time required, caching, etc. warnings.warn(f"Large dataset! Computation requires 2^{n} calls to model.fit()") - # TODO: remove this before releasing version 0.7.0 - if options: - warnings.warn( - DeprecationWarning( - "Passing solver options as kwargs was deprecated in " - "0.6.0, will " - "be removed in 0.7.0. `Use solver_options` instead." - ) - ) - if solver_options is None: - solver_options = options - else: - solver_options.update(options) - problem = lc_prepare_problem(u, progress=progress) return lc_solve_problem( problem=problem, diff --git a/src/pydvl/value/loo/__init__.py b/src/pydvl/value/loo/__init__.py index 6b9e972fc..93b927272 100644 --- a/src/pydvl/value/loo/__init__.py +++ b/src/pydvl/value/loo/__init__.py @@ -1,2 +1 @@ from .loo import * -from .naive import * diff --git a/src/pydvl/value/loo/naive.py b/src/pydvl/value/loo/naive.py deleted file mode 100644 index 82c12c730..000000000 --- a/src/pydvl/value/loo/naive.py +++ /dev/null @@ -1,19 +0,0 @@ -from deprecate import deprecated - -from pydvl.utils import Utility -from pydvl.value.result import ValuationResult - -from .loo import compute_loo - -__all__ = ["naive_loo"] - - -@deprecated( - target=compute_loo, - deprecated_in="0.7.0", - remove_in="0.8.0", - args_extra=dict(n_jobs=1), -) -def naive_loo(u: Utility, *, progress: bool = True, **kwargs) -> ValuationResult: # type: ignore - """Deprecated. Use [compute_loo][pydvl.value.loo.compute_loo] instead.""" - pass diff --git a/src/pydvl/value/result.py b/src/pydvl/value/result.py index 20def1390..2773f9452 100644 --- a/src/pydvl/value/result.py +++ b/src/pydvl/value/result.py @@ -63,7 +63,6 @@ ) import numpy as np -from deprecate import deprecated from numpy.typing import NDArray from pydvl.utils.dataset import Dataset @@ -735,14 +734,6 @@ def from_random( return cls(**options) # type: ignore @classmethod - @deprecated( - target=True, - deprecated_in="0.6.0", - remove_in="0.8.0", - args_mapping=dict(indices=None, data_names=None, n_samples=None), - template_mgs="`%(source_name)s` is deprecated for generating zero-filled " - "results, use `ValuationResult.zeros()` instead.", - ) def empty( cls, algorithm: str = "", @@ -757,6 +748,10 @@ def empty( Args: algorithm: Name of the algorithm used to compute the values + indices: Optional sequence or array of indices. + data_names: Optional sequences or array of names for the data points. + Defaults to index numbers if not set. + n_samples: Number of valuation result entries. Returns: Object with the results. diff --git a/src/pydvl/value/sampler.py b/src/pydvl/value/sampler.py index ff360d154..08cabe372 100644 --- a/src/pydvl/value/sampler.py +++ b/src/pydvl/value/sampler.py @@ -65,7 +65,6 @@ ) import numpy as np -from deprecate import deprecated, void from numpy.typing import NDArray from pydvl.utils.numeric import powerset, random_subset, random_subset_of_size @@ -313,14 +312,6 @@ def weight(cls, n: int, subset_len: int) -> float: return float(2 ** (n - 1)) if n > 0 else 1.0 -class DeterministicCombinatorialSampler(DeterministicUniformSampler[IndexT]): - @deprecated( - target=DeterministicUniformSampler, deprecated_in="0.6.0", remove_in="0.8.0" - ) - def __init__(self, indices: NDArray[IndexT], *args, **kwargs): - void(indices, args, kwargs) - - class AntitheticSampler(StochasticSamplerMixin, PowersetSampler[IndexT]): """An iterator to perform uniform random sampling of subsets, and their complements. diff --git a/src/pydvl/value/semivalues.py b/src/pydvl/value/semivalues.py index 9eee1c83d..2119e38a9 100644 --- a/src/pydvl/value/semivalues.py +++ b/src/pydvl/value/semivalues.py @@ -94,6 +94,7 @@ from itertools import islice from typing import Iterable, List, Optional, Protocol, Tuple, Type, cast +import numpy as np import scipy as sp from deprecate import deprecated from tqdm import tqdm @@ -271,15 +272,10 @@ def compute_generic_semivalues( # Filter out samples for indices that have already converged filtered_samples = samples - if skip_converged and len(done.converged) > 0: - # TODO: cloudpickle can't pickle this on python 3.8: - # filtered_samples = filter( - # lambda t: not done.converged[t[0]], samples - # ) + if skip_converged and np.count_nonzero(done.converged) > 0: + # TODO: cloudpickle can't pickle result of `filter` on python 3.8 filtered_samples = tuple( - (idx, sample) - for idx, sample in samples - if not done.converged[idx] + filter(lambda t: not done.converged[t[0]], samples) ) if filtered_samples: diff --git a/src/pydvl/value/shapley/montecarlo.py b/src/pydvl/value/shapley/montecarlo.py index aabc2d813..0811e648b 100644 --- a/src/pydvl/value/shapley/montecarlo.py +++ b/src/pydvl/value/shapley/montecarlo.py @@ -16,9 +16,9 @@ Alternatively, employing another reformulation of the expression above as a sum over permutations, one has the implementation in -[permutation_montecarlo_shapley()][pydvl.value.shapley.montecarlo.permutation_montecarlo_shapley], -or using an early stopping strategy to reduce computation -[truncated_montecarlo_shapley()][pydvl.value.shapley.truncated.truncated_montecarlo_shapley]. +[permutation_montecarlo_shapley()][pydvl.value.shapley.montecarlo.permutation_montecarlo_shapley] +with the option to pass an early stopping strategy to reduce computation +as done in Truncated MonteCarlo Shapley (TMCS). !!! info "Also see" It is also possible to use [group_testing_shapley()][pydvl.value.shapley.gt.group_testing_shapley] @@ -50,7 +50,6 @@ from typing import Optional, Sequence, Union import numpy as np -from deprecate import deprecated from numpy.random import SeedSequence from numpy.typing import NDArray from tqdm.auto import tqdm @@ -125,14 +124,6 @@ def _permutation_montecarlo_one_step( return result -@deprecated( - target=True, - deprecated_in="0.7.0", - remove_in="0.8.0", - args_mapping=dict( - coordinator_update_period=None, worker_update_period=None, progress=None - ), -) def permutation_montecarlo_shapley( u: Utility, done: StoppingCriterion, diff --git a/src/pydvl/value/shapley/truncated.py b/src/pydvl/value/shapley/truncated.py index 43327db0b..d62f625fa 100644 --- a/src/pydvl/value/shapley/truncated.py +++ b/src/pydvl/value/shapley/truncated.py @@ -11,7 +11,6 @@ from typing import Optional, cast import numpy as np -from deprecate import deprecated from pydvl.parallel.config import ParallelConfig from pydvl.utils import Utility, running_moments @@ -24,7 +23,6 @@ "FixedTruncation", "BootstrapTruncation", "RelativeTruncation", - "truncated_montecarlo_shapley", ] @@ -186,50 +184,3 @@ def _check(self, idx: int, score: float) -> bool: def reset(self, u: Optional[Utility] = None): self.count = 0 self.variance = self.mean = 0 - - -@deprecated( - target=True, - deprecated_in="0.7.0", - remove_in="0.8.0", - args_mapping=dict(coordinator_update_period=None, worker_update_period=None), -) -def truncated_montecarlo_shapley( - u: Utility, - *, - done: StoppingCriterion, - truncation: TruncationPolicy, - config: ParallelConfig = ParallelConfig(), - n_jobs: int = 1, - coordinator_update_period: int = 10, - worker_update_period: int = 5, -) -> ValuationResult: - """ - !!! Warning - This method is deprecated and only a wrapper for - [permutation_montecarlo_shapley][pydvl.value.shapley.montecarlo.permutation_montecarlo_shapley]. - - !!! Todo - Think of how to add Robin-Gelman or some other more principled stopping - criterion. - - Args: - u: Utility object with model, data, and scoring function - done: Check on the results which decides when to stop sampling - permutations. - truncation: callable that decides whether to stop computing marginals - for a given permutation. - config: Object configuring parallel computation, with cluster address, - number of cpus, etc. - n_jobs: Number of permutation monte carlo jobs to run concurrently. - Returns: - Object with the data values. - """ - from pydvl.value.shapley.montecarlo import permutation_montecarlo_shapley - - return cast( - ValuationResult, - permutation_montecarlo_shapley( - u, done=done, truncation=truncation, config=config, n_jobs=n_jobs - ), - ) diff --git a/src/pydvl/value/stopping.py b/src/pydvl/value/stopping.py index 4ce4b27e8..206cf630c 100644 --- a/src/pydvl/value/stopping.py +++ b/src/pydvl/value/stopping.py @@ -125,7 +125,6 @@ from typing import Callable, Optional, Protocol, Type import numpy as np -from deprecate import deprecated, void from numpy.typing import NDArray from pydvl.utils import Status @@ -135,7 +134,6 @@ "make_criterion", "AbsoluteStandardError", "StoppingCriterion", - "StandardError", "MaxChecks", "MaxUpdates", "MinUpdates", @@ -243,16 +241,6 @@ def converged(self) -> NDArray[np.bool_]: """ return self._converged - @property - def name(self): - log = logging.getLogger(__name__) - # This string for the benefit of deprecation searches: - # remove_in="0.8.0" - log.warning( - "The `name` attribute of `StoppingCriterion` is deprecated and will be removed in 0.8.0. " - ) - return getattr(self, "_name", type(self).__name__) - def __str__(self): return type(self).__name__ @@ -389,12 +377,6 @@ def __str__(self): return f"AbsoluteStandardError(threshold={self.threshold}, fraction={self.fraction}, burn_in={self.burn_in})" -class StandardError(AbsoluteStandardError): - @deprecated(target=AbsoluteStandardError, deprecated_in="0.6.0", remove_in="0.8.0") - def __init__(self, *args, **kwargs): - void(*args, **kwargs) - - class MaxChecks(StoppingCriterion): """Terminate as soon as the number of checks exceeds the threshold. diff --git a/tests/influence/test_influence_calculator.py b/tests/influence/test_influence_calculator.py index 23797e9ad..a4d117478 100644 --- a/tests/influence/test_influence_calculator.py +++ b/tests/influence/test_influence_calculator.py @@ -7,10 +7,12 @@ import pytest import torch from distributed import Client +from torch import nn from torch.utils.data import DataLoader, TensorDataset from pydvl.influence import DaskInfluenceCalculator, InfluenceMode from pydvl.influence.base_influence_function_model import ( + NotImplementedLayerRepresentationException, UnsupportedInfluenceModeException, ) from pydvl.influence.influence_calculator import ( @@ -20,13 +22,19 @@ ThreadSafetyViolationError, UnalignedChunksError, ) -from pydvl.influence.torch import ArnoldiInfluence, CgInfluence, DirectInfluence +from pydvl.influence.torch import ( + ArnoldiInfluence, + CgInfluence, + DirectInfluence, + EkfacInfluence, +) from pydvl.influence.torch.util import ( NestedTorchCatAggregator, TorchCatAggregator, TorchNumpyConverter, ) from tests.influence.torch.test_influence_model import model_and_data, test_case +from tests.influence.torch.test_util import are_active_layers_linear @pytest.fixture @@ -34,7 +42,7 @@ "influence_factory", [ lambda model, loss, train_dataLoader, hessian_reg: CgInfluence( - model, loss, train_dataLoader + model, loss, train_dataLoader, hessian_reg ).fit(train_dataLoader), lambda model, loss, train_dataLoader, hessian_reg: DirectInfluence( model, loss, hessian_reg @@ -338,3 +346,50 @@ def test_sequential_calculator(model_and_data, test_case): assert torch.allclose(seq_values, torch_values, atol=1e-6) assert np.allclose(seq_values_from_zarr, torch_values.numpy(), atol=1e-6) shutil.rmtree(zarr_values_path) + + +@pytest.mark.torch +def test_dask_ekfac_influence(model_and_data, test_case): + model, loss, x_train, y_train, x_test, y_test = model_and_data + chunk_size = int(test_case.train_data_len / 4) + da_x_train = da.from_array( + x_train.numpy(), chunks=(chunk_size, *[-1 for _ in x_train.shape[1:]]) + ) + da_y_train = da.from_array( + y_train.numpy(), chunks=(chunk_size, *[-1 for _ in y_train.shape[1:]]) + ) + da_x_test = da.from_array( + x_test.numpy(), chunks=(chunk_size, *[-1 for _ in x_test.shape[1:]]) + ) + da_y_test = da.from_array( + y_test.numpy(), chunks=(chunk_size, *[-1 for _ in y_test.shape[1:]]) + ) + train_dataloader = DataLoader( + TensorDataset(x_train, y_train), batch_size=test_case.batch_size + ) + + if not are_active_layers_linear(model): + with pytest.raises(NotImplementedLayerRepresentationException): + EkfacInfluence(model).fit(train_dataloader) + elif isinstance(loss, nn.CrossEntropyLoss): + ekfac_influence = EkfacInfluence( + model, hessian_regularization=test_case.hessian_reg + ).fit(train_dataloader) + + numpy_converter = TorchNumpyConverter() + dask_inf = DaskInfluenceCalculator( + ekfac_influence, numpy_converter, DisableClientSingleThreadCheck + ) + + dask_val = dask_inf.influences( + da_x_test, + da_y_test, + da_x_train, + da_y_train, + mode=test_case.mode, + ) + dask_val = dask_val.compute(scheduler="synchronous") + torch_val = ekfac_influence.influences( + x_test, y_test, x_train, y_train, mode=test_case.mode + ).numpy() + assert np.allclose(dask_val, torch_val, atol=1e-5, rtol=1e-3) diff --git a/tests/influence/torch/test_influence_model.py b/tests/influence/torch/test_influence_model.py index 2ea33b75d..9472ad398 100644 --- a/tests/influence/torch/test_influence_model.py +++ b/tests/influence/torch/test_influence_model.py @@ -5,11 +5,15 @@ import pytest from numpy.typing import NDArray -from pydvl.influence.base_influence_function_model import NotFittedException +from pydvl.influence.base_influence_function_model import ( + NotFittedException, + NotImplementedLayerRepresentationException, +) from pydvl.influence.torch.influence_function_model import ( ArnoldiInfluence, CgInfluence, DirectInfluence, + EkfacInfluence, LissaInfluence, ) from tests.influence.torch.conftest import minimal_training @@ -28,6 +32,10 @@ analytical_linear_influences, linear_model, ) +from tests.influence.torch.test_util import ( + are_active_layers_linear, + check_influence_correlations, +) # Mark the entire module pytestmark = pytest.mark.torch @@ -53,7 +61,7 @@ def create_conv1d_nn(): return nn.Sequential( nn.Conv1d(in_channels=5, out_channels=3, kernel_size=2), nn.Flatten(), - nn.Linear(6, 3), + nn.Linear(6, 2), ) @@ -61,6 +69,21 @@ def create_simple_nn_regr(): return nn.Sequential(nn.Linear(10, 10), nn.Linear(10, 3), nn.Linear(3, 1)) +def create_conv1d_no_grad(): + return nn.Sequential( + nn.Conv1d(in_channels=5, out_channels=3, kernel_size=2).requires_grad_(False), + nn.Flatten(), + nn.Linear(6, 2), + ) + + +def create_simple_nn_no_grad(): + return nn.Sequential( + nn.Linear(10, 10).requires_grad_(False), + nn.Linear(10, 5), + ) + + class TestCase(NamedTuple): module_factory: Callable[[], nn.Module] input_dim: Tuple[int, ...] @@ -114,7 +137,7 @@ def case_conv1d_nn_up(self) -> TestCase: return TestCase( module_factory=create_conv1d_nn, input_dim=(5, 3), - output_dim=3, + output_dim=2, loss=nn.MSELoss(), mode=InfluenceMode.Up, ) @@ -123,7 +146,7 @@ def case_conv1d_nn_pert(self) -> TestCase: return TestCase( module_factory=create_conv1d_nn, input_dim=(5, 3), - output_dim=3, + output_dim=2, loss=nn.SmoothL1Loss(), mode=InfluenceMode.Perturbation, ) @@ -146,6 +169,26 @@ def case_simple_nn_pert(self) -> TestCase: mode=InfluenceMode.Perturbation, ) + def case_conv1d_no_grad_up(self) -> TestCase: + return TestCase( + module_factory=create_conv1d_no_grad, + input_dim=(5, 3), + output_dim=2, + loss=nn.CrossEntropyLoss(), + mode=InfluenceMode.Up, + ) + + def case_simple_nn_class_up(self) -> TestCase: + return TestCase( + module_factory=create_simple_nn_no_grad, + input_dim=(10,), + output_dim=5, + loss=nn.CrossEntropyLoss(), + mode=InfluenceMode.Up, + train_data_len=100, + test_data_len=30, + ) + @fixture @parametrize_with_cases( @@ -169,9 +212,17 @@ def model_and_data( torch.Tensor, ]: x_train = torch.rand((test_case.train_data_len, *test_case.input_dim)) - y_train = torch.rand((test_case.train_data_len, test_case.output_dim)) x_test = torch.rand((test_case.test_data_len, *test_case.input_dim)) - y_test = torch.rand((test_case.test_data_len, test_case.output_dim)) + if isinstance(test_case.loss, nn.CrossEntropyLoss): + y_train = torch.randint( + 0, test_case.output_dim, (test_case.train_data_len,), dtype=torch.long + ) + y_test = torch.randint( + 0, test_case.output_dim, (test_case.test_data_len,), dtype=torch.long + ) + else: + y_train = torch.rand((test_case.train_data_len, test_case.output_dim)) + y_test = torch.rand((test_case.test_data_len, test_case.output_dim)) train_dataloader = DataLoader( TensorDataset(x_train, y_train), batch_size=test_case.batch_size @@ -474,3 +525,69 @@ def test_influences_arnoldi( arnoldi_influence.influences(x_test, y_test, x=x_train, mode=test_case.mode) with pytest.raises(ValueError): arnoldi_influence.influences(x_test, y_test, y=y_train, mode=test_case.mode) + + +def test_influences_ekfac( + test_case: TestCase, + model_and_data: Tuple[ + torch.nn.Module, + Callable[[torch.Tensor, torch.Tensor], torch.Tensor], + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + ], + direct_influences, + direct_sym_influences, +): + model, loss, x_train, y_train, x_test, y_test = model_and_data + + train_dataloader = DataLoader( + TensorDataset(x_train, y_train), batch_size=test_case.batch_size + ) + + ekfac_influence = EkfacInfluence( + model, + update_diagonal=True, + hessian_regularization=test_case.hessian_reg, + ) + + with pytest.raises(NotFittedException): + ekfac_influence.influences( + x_test, y_test, x_train, y_train, mode=test_case.mode + ) + + with pytest.raises(NotFittedException): + ekfac_influence.influence_factors(x_test, y_test) + + if not are_active_layers_linear: + with pytest.raises(NotImplementedLayerRepresentationException): + ekfac_influence.fit(train_dataloader) + elif isinstance(loss, nn.CrossEntropyLoss): + ekfac_influence = ekfac_influence.fit(train_dataloader) + ekfac_influence_values = ekfac_influence.influences( + x_test, y_test, x_train, y_train, mode=test_case.mode + ).numpy() + + ekfac_influences_by_layer = ekfac_influence.influences_by_layer( + x_test, y_test, x_train, y_train, mode=test_case.mode + ) + + accumulated_inf_by_layer = np.zeros_like(ekfac_influence_values) + for layer, infl in ekfac_influences_by_layer.items(): + accumulated_inf_by_layer += infl.detach().numpy() + + ekfac_self_influence = ekfac_influence.influences( + x_train, y_train, mode=test_case.mode + ).numpy() + + ekfac_factors = ekfac_influence.influence_factors(x_test, y_test) + + influence_from_factors = ekfac_influence.influences_from_factors( + ekfac_factors, x_train, y_train, mode=test_case.mode + ).numpy() + + assert np.allclose(ekfac_influence_values, influence_from_factors) + assert np.allclose(ekfac_influence_values, accumulated_inf_by_layer) + check_influence_correlations(direct_influences, ekfac_influence_values) + check_influence_correlations(direct_sym_influences, ekfac_self_influence) diff --git a/tests/influence/torch/test_util.py b/tests/influence/torch/test_util.py index 33b8c9316..6e675b18f 100644 --- a/tests/influence/torch/test_util.py +++ b/tests/influence/torch/test_util.py @@ -7,6 +7,7 @@ torch = pytest.importorskip("torch") import torch.nn from numpy.typing import NDArray +from scipy.stats import pearsonr, spearmanr from torch.nn.functional import mse_loss from torch.utils.data import DataLoader, TensorDataset @@ -278,3 +279,21 @@ def __getitem__(self, index): torch_dataset_to_dask_array( tensor_data_set, chunk_size=chunk_size, total_size=total_size + 1 ) + + +def check_influence_correlations(true_infl, approx_infl, threshold=0.95): + for axis in range(0, true_infl.ndim): + mean_true_infl = np.mean(true_infl, axis=axis) + mean_approx_infl = np.mean(approx_infl, axis=axis) + assert np.all(pearsonr(mean_true_infl, mean_approx_infl).statistic > threshold) + assert np.all(spearmanr(mean_true_infl, mean_approx_infl).statistic > threshold) + + +def are_active_layers_linear(model): + for module in model.modules(): + if len(list(module.children())) == 0 and len(list(module.parameters())) > 0: + if not isinstance(module, torch.nn.Linear): + param_requires_grad = [p.requires_grad for p in module.parameters()] + if any(param_requires_grad): + return False + return True diff --git a/tests/test_results.py b/tests/test_results.py index 4ea80cf72..0b42fb48d 100644 --- a/tests/test_results.py +++ b/tests/test_results.py @@ -4,6 +4,7 @@ import operator import pickle from copy import deepcopy +from itertools import permutations import cloudpickle import numpy as np @@ -159,6 +160,20 @@ def test_updating(): assert v.counts[1] == 2 +def test_updating_order_invariance(): + updates = [0.8, 0.9, 1.0, 1.1, 1.2] + values = [] + for permutation in permutations(updates): + v = ValuationResult.zeros(indices=np.array([0])) + for update in permutation: + v.update(0, update) + values.append(v) + + v1 = values[0] + for v2 in values[1:]: + np.testing.assert_almost_equal(v1.values, v2.values) + + @pytest.mark.parametrize( "serialize, deserialize", [(pickle.dumps, pickle.loads), (cloudpickle.dumps, cloudpickle.loads)], @@ -415,8 +430,3 @@ def test_empty(n): v2 = ValuationResult(values=np.arange(n)) v += v2 assert len(v2) == n - - -def test_empty_deprecation(): - with pytest.warns(DeprecationWarning): - v3 = ValuationResult.empty(indices=[1, 2, 3]) diff --git a/tests/utils/test_score.py b/tests/utils/test_score.py index 078775240..5423c48be 100644 --- a/tests/utils/test_score.py +++ b/tests/utils/test_score.py @@ -1,5 +1,7 @@ import numpy as np +import sklearn from numpy.typing import NDArray +from packaging import version from pydvl.utils.score import Scorer, compose_score, squashed_r2, squashed_variance @@ -24,7 +26,13 @@ def test_scorer(): """Tests the Scorer class.""" scorer = Scorer("r2") assert str(scorer) == "r2" - assert repr(scorer) == "R2 (scorer=make_scorer(r2_score))" + if version.parse(sklearn.__version__) >= version.parse("1.4.0"): + assert ( + repr(scorer) + == "R2 (scorer=make_scorer(r2_score, response_method='predict'))" + ) + else: + assert repr(scorer) == "R2 (scorer=make_scorer(r2_score))" coef = np.array([1, 2]) X = np.array([[1, 2], [3, 4]]) diff --git a/tests/value/__init__.py b/tests/value/__init__.py index 4b27711c4..19a703d2d 100644 --- a/tests/value/__init__.py +++ b/tests/value/__init__.py @@ -19,7 +19,9 @@ def check_total_value( Shapley value is supposed to fulfill the total value axiom.""" total_utility = u(u.data.indices) # We can use relative tolerances if we don't have the range of the scorer. - assert np.isclose(np.sum(values.values), total_utility, rtol=rtol, atol=atol) + np.testing.assert_allclose( + np.sum(values.values), total_utility, rtol=rtol, atol=atol + ) def check_exact( @@ -33,10 +35,14 @@ def check_exact( values.sort() exact_values.sort() - assert np.all(values.indices == exact_values.indices), "Ranks do not match" - assert np.allclose( - values.values, exact_values.values, rtol=rtol, atol=atol - ), "Values do not match" + np.testing.assert_equal(values.indices, exact_values.indices, "Ranks do not match") + np.testing.assert_allclose( + values.values, + exact_values.values, + rtol=rtol, + atol=atol, + err_msg="Values do not match", + ) def check_values( @@ -66,9 +72,9 @@ def check_values( values.sort() exact_values.sort() - assert np.allclose(values.values, exact_values.values, rtol=rtol, atol=atol) + np.testing.assert_allclose(values.values, exact_values.values, rtol=rtol, atol=atol) for name in extra_values_names: - assert np.isclose( + np.testing.assert_allclose( getattr(values, name), getattr(exact_values, name), rtol=rtol, atol=atol ) diff --git a/tests/value/conftest.py b/tests/value/conftest.py index 0e3c48d29..139f0f5b6 100644 --- a/tests/value/conftest.py +++ b/tests/value/conftest.py @@ -11,12 +11,35 @@ from pydvl.utils.caching import InMemoryCacheBackend from pydvl.utils.status import Status from pydvl.value import ValuationResult +from pydvl.value.games import ( + AsymmetricVotingGame, + Game, + MinerGame, + ShoesGame, + SymmetricVotingGame, +) from pydvl.value.shapley.naive import combinatorial_exact_shapley from ..conftest import num_workers from . import polynomial +@pytest.fixture(scope="module") +def test_game(request) -> Game: + name, kwargs = request.param + if name == "miner": + game = MinerGame(n_players=kwargs["n_players"]) + elif name == "shoes": + game = ShoesGame(left=kwargs["left"], right=kwargs["right"]) + elif name == "symmetric-voting": + game = SymmetricVotingGame(n_players=kwargs["n_players"]) + elif name == "asymmetric-voting": + game = AsymmetricVotingGame() + else: + raise ValueError(f"Unknown game '{name}'") + return game + + @pytest.fixture(scope="function") def polynomial_dataset(coefficients: np.ndarray): """Coefficients must be for monomials of increasing degree""" diff --git a/tests/value/least_core/conftest.py b/tests/value/least_core/conftest.py deleted file mode 100644 index 2355c443a..000000000 --- a/tests/value/least_core/conftest.py +++ /dev/null @@ -1,30 +0,0 @@ -from typing import Tuple - -import numpy as np -import pytest - -from pydvl.utils import Utility -from pydvl.utils.status import Status -from pydvl.utils.utility import GlovesGameUtility, MinerGameUtility -from pydvl.value.result import ValuationResult - - -@pytest.fixture(scope="module") -def test_utility(request) -> Tuple[Utility, ValuationResult]: - name, kwargs = request.param - if name == "miner": - u = MinerGameUtility(**kwargs) - elif name == "gloves": - u = GlovesGameUtility(**kwargs) - else: - raise ValueError(f"Unknown '{name}'") - exact_values, subsidy = u.exact_least_core_values() - result = ValuationResult( - algorithm="exact", - values=exact_values, - subsidy=subsidy, - variances=np.zeros_like(exact_values), - data_names=np.arange(len(exact_values)), - status=Status.Converged, - ) - return u, result diff --git a/tests/value/least_core/test_common.py b/tests/value/least_core/test_common.py index feadeb954..6add2d12a 100644 --- a/tests/value/least_core/test_common.py +++ b/tests/value/least_core/test_common.py @@ -8,29 +8,30 @@ @pytest.mark.parametrize( - "test_utility", - [("miner", {"n_miners": 5})], + "test_game", + [("miner", {"n_players": 5})], indirect=True, ) -def test_lc_solve_problems(test_utility, n_jobs, parallel_config): +def test_lc_solve_problems(test_game, n_jobs, parallel_config): """Test solving LeastCoreProblems in parallel.""" - u, exact_values = test_utility n_problems = n_jobs - problem = lc_prepare_problem(u) + problem = lc_prepare_problem(test_game.u) solutions = lc_solve_problems( [problem] * n_problems, - u, + test_game.u, algorithm="test_lc", n_jobs=n_jobs, config=parallel_config, ) assert len(solutions) == n_problems + exact_values = test_game.least_core_values() + for solution in solutions: assert solution.status == Status.Converged check_values(solution, exact_values, rtol=0.01) - check = lc_solve_problem(problem, u=u, algorithm="test_lc") + check = lc_solve_problem(problem, u=test_game.u, algorithm="test_lc") assert check.status == Status.Converged check_values(solution, check, rtol=0.01) diff --git a/tests/value/least_core/test_montecarlo.py b/tests/value/least_core/test_montecarlo.py index 38d675e0d..8b926a3bf 100644 --- a/tests/value/least_core/test_montecarlo.py +++ b/tests/value/least_core/test_montecarlo.py @@ -10,28 +10,27 @@ @pytest.mark.parametrize( - "test_utility, rtol, n_iterations", + "test_game, rtol, n_iterations", [ - (("miner", {"n_miners": 8}), 0.1, 128), - (("gloves", {"left": 10, "right": 5}), 0.2, 10000), + (("miner", {"n_players": 8}), 0.1, 128), + (("shoes", {"left": 10, "right": 5}), 0.2, 10000), ], - indirect=["test_utility"], + indirect=["test_game"], ) @pytest.mark.parametrize("n_jobs", [1, -1]) @pytest.mark.parametrize("non_negative_subsidy", (True, False)) def test_montecarlo_least_core( - test_utility, rtol, n_iterations, n_jobs, non_negative_subsidy, seed + test_game, rtol, n_iterations, n_jobs, non_negative_subsidy, seed ): - u, exact_values = test_utility - values = montecarlo_least_core( - u, + test_game.u, n_iterations=n_iterations, non_negative_subsidy=non_negative_subsidy, progress=False, n_jobs=n_jobs, seed=seed, ) + exact_values = test_game.least_core_values() if non_negative_subsidy: check_values(values, exact_values) # Sometimes the subsidy is negative but really close to zero diff --git a/tests/value/least_core/test_naive.py b/tests/value/least_core/test_naive.py index 28a79e381..a972e72c0 100644 --- a/tests/value/least_core/test_naive.py +++ b/tests/value/least_core/test_naive.py @@ -6,23 +6,23 @@ @pytest.mark.parametrize( - "test_utility", + "test_game", [ - ("miner", {"n_miners": 3}), - ("miner", {"n_miners": 4}), - ("gloves", {"left": 1, "right": 1}), - ("gloves", {"left": 2, "right": 1}), - ("gloves", {"left": 1, "right": 2}), + ("miner", {"n_players": 3}), + ("miner", {"n_players": 4}), + ("shoes", {"left": 1, "right": 1}), + ("shoes", {"left": 2, "right": 1}), + ("shoes", {"left": 1, "right": 2}), ], indirect=True, ) @pytest.mark.parametrize("non_negative_subsidy", (True, False)) -def test_naive_least_core(test_utility, non_negative_subsidy): - u, exact_values = test_utility +def test_naive_least_core(test_game, non_negative_subsidy): values = exact_least_core( - u, non_negative_subsidy=non_negative_subsidy, progress=False + test_game.u, non_negative_subsidy=non_negative_subsidy, progress=False ) - check_total_value(u, values) + check_total_value(test_game.u, values) + exact_values = test_game.least_core_values() if non_negative_subsidy: check_values(values, exact_values) # Sometimes the subsidy is negative but really close to zero diff --git a/tests/value/shapley/test_classwise.py b/tests/value/shapley/test_classwise.py index bd4f55a5d..d73e86a0b 100644 --- a/tests/value/shapley/test_classwise.py +++ b/tests/value/shapley/test_classwise.py @@ -3,7 +3,9 @@ import numpy as np import pandas as pd import pytest +import sklearn from numpy.typing import NDArray +from packaging import version from pydvl.utils import Dataset, Utility, powerset from pydvl.value import MaxChecks, ValuationResult @@ -165,7 +167,13 @@ def test_classwise_scorer_representation(): scorer = ClasswiseScorer("accuracy", initial_label=0) assert str(scorer) == "classwise accuracy" - assert repr(scorer) == "ClasswiseAccuracy (scorer=make_scorer(accuracy_score))" + if version.parse(sklearn.__version__) >= version.parse("1.4.0"): + assert ( + repr(scorer) + == "ClasswiseAccuracy (scorer=make_scorer(accuracy_score, response_method='predict'))" + ) + else: + assert repr(scorer) == "ClasswiseAccuracy (scorer=make_scorer(accuracy_score))" @pytest.mark.parametrize("n_element, left_margin, right_margin", [(101, 0.3, 0.4)]) diff --git a/tests/value/shapley/test_montecarlo.py b/tests/value/shapley/test_montecarlo.py index ef9deed1f..58f9df2a9 100644 --- a/tests/value/shapley/test_montecarlo.py +++ b/tests/value/shapley/test_montecarlo.py @@ -6,7 +6,7 @@ from sklearn.linear_model import LinearRegression from pydvl.parallel.config import ParallelConfig -from pydvl.utils import Dataset, GroupedDataset, Status, Utility +from pydvl.utils import GroupedDataset, Status, Utility from pydvl.utils.numeric import num_samples_permutation_hoeffding from pydvl.utils.score import Scorer, squashed_r2 from pydvl.utils.types import Seed @@ -21,35 +21,38 @@ log = logging.getLogger(__name__) -# noinspection PyTestParametrized @pytest.mark.parametrize( - "num_samples, fun, rtol, atol, kwargs", + "test_game", [ - (12, ShapleyMode.PermutationMontecarlo, 0.1, 1e-5, {"done": MaxUpdates(10)}), - # FIXME! it should be enough with 2**(len(data)-1) samples + ("symmetric-voting", {"n_players": 6}), + ("shoes", {"left": 3, "right": 4}), + ], + indirect=["test_game"], +) +@pytest.mark.parametrize( + "fun, rtol, atol, kwargs", + [ + (ShapleyMode.PermutationMontecarlo, 0.2, 1e-4, dict(done=MaxUpdates(500))), ( - 8, ShapleyMode.CombinatorialMontecarlo, 0.2, 1e-4, - {"done": MaxUpdates(2**10)}, + dict(done=MaxUpdates(2**10)), ), - (12, ShapleyMode.Owen, 0.1, 1e-4, dict(n_samples=4, max_q=200)), - (12, ShapleyMode.OwenAntithetic, 0.1, 1e-4, dict(n_samples=4, max_q=200)), + (ShapleyMode.Owen, 0.2, 1e-4, dict(n_samples=5, max_q=200)), + (ShapleyMode.OwenAntithetic, 0.1, 1e-4, dict(n_samples=5, max_q=200)), + # Because of the inaccuracy of GroupTesting, a high atol is required for the + # value 0, for which the rtol has no effect. ( - 3, ShapleyMode.GroupTesting, 0.1, - # Because of the inaccuracy of GTS, a high atol is required for the - # value 0, for which the rtol has no effect. 1e-2, dict(n_samples=int(4e4), epsilon=0.2, delta=0.01), ), ], ) -def test_analytic_montecarlo_shapley( - num_samples, - analytic_shapley, +def test_games( + test_game, parallel_config, n_jobs, fun: ShapleyMode, @@ -58,10 +61,22 @@ def test_analytic_montecarlo_shapley( kwargs: dict, seed, ): - u, exact_values = analytic_shapley + """Tests values for all methods using a toy games. + + For permutation, the rtol for each scorer is chosen + so that the number of samples selected is just above the (ε,δ) bound for ε = + rtol, δ=0.001 and the range corresponding to each score. This means that + roughly once every 1000/num_methods runs the test will fail. + + FIXME: + - We don't have a bound for Owen. + NOTE: + - The variance in the combinatorial method is huge, so we need lots of + samples + """ values = compute_shapley_values( - u, + test_game.u, mode=fun, n_jobs=n_jobs, config=parallel_config, @@ -70,29 +85,31 @@ def test_analytic_montecarlo_shapley( **kwargs ) + exact_values = test_game.shapley_values() check_values(values, exact_values, rtol=rtol, atol=atol) @pytest.mark.slow @pytest.mark.parametrize( - "num_samples, fun, kwargs", + "test_game", + [ + ("symmetric-voting", {"n_players": 12}), + ], + indirect=["test_game"], +) +@pytest.mark.parametrize( + "fun, kwargs", [ # TODO Add once issue #416 is closed. - # (12, ShapleyMode.PermutationMontecarlo, {"done": MaxChecks(1)}), - ( - 12, - ShapleyMode.CombinatorialMontecarlo, - {"done": MaxChecks(4)}, - ), - (12, ShapleyMode.Owen, dict(n_samples=4, max_q=200)), - (12, ShapleyMode.OwenAntithetic, dict(n_samples=4, max_q=200)), - (4, ShapleyMode.GroupTesting, dict(n_samples=21, epsilon=0.2, delta=0.01)), + # (ShapleyMode.PermutationMontecarlo, dict(done=MaxChecks(1))), + (ShapleyMode.CombinatorialMontecarlo, dict(done=MaxChecks(4))), + (ShapleyMode.Owen, dict(n_samples=4, max_q=200)), + (ShapleyMode.OwenAntithetic, dict(n_samples=4, max_q=200)), + (ShapleyMode.GroupTesting, dict(n_samples=21, epsilon=0.2, delta=0.01)), ], ) -@pytest.mark.parametrize("num_points, num_features", [(12, 3)]) -def test_montecarlo_shapley_housing_dataset( - num_samples: int, - housing_dataset: Dataset, +def test_seed( + test_game, parallel_config: ParallelConfig, n_jobs: int, fun: ShapleyMode, @@ -102,11 +119,10 @@ def test_montecarlo_shapley_housing_dataset( ): values_1, values_2, values_3 = call_with_seeds( compute_shapley_values, - Utility(LinearRegression(), data=housing_dataset, scorer="r2"), + test_game.u, mode=fun, n_jobs=n_jobs, config=parallel_config, - progress=False, seeds=(seed, seed, seed_alt), **deepcopy(kwargs) ) @@ -143,62 +159,6 @@ def test_hoeffding_bound_montecarlo( check_rank_correlation(values, exact_values, threshold=0.8) -@pytest.mark.parametrize( - "a, b, num_points", [(2, 0, 21)] # training set will have 0.3 * 21 = 6 samples -) -@pytest.mark.parametrize("scorer, rtol", [(squashed_r2, 0.25)]) -@pytest.mark.parametrize( - "fun, kwargs", - [ - # FIXME: Hoeffding says 400 should be enough - (ShapleyMode.PermutationMontecarlo, dict(done=MaxUpdates(500))), - (ShapleyMode.CombinatorialMontecarlo, dict(done=MaxUpdates(2**11))), - (ShapleyMode.Owen, dict(n_samples=2, max_q=300)), - (ShapleyMode.OwenAntithetic, dict(n_samples=2, max_q=300)), - pytest.param( - ShapleyMode.GroupTesting, - dict(n_samples=int(5e4), epsilon=0.25, delta=0.1), - marks=pytest.mark.slow, - ), - ], -) -def test_linear_montecarlo_shapley( - linear_shapley, - n_jobs, - memcache_client_config, - scorer: Scorer, - rtol: float, - fun: ShapleyMode, - kwargs: dict, - seed: int, -): - """Tests values for all methods using a linear dataset. - - For permutation and truncated montecarlo, the rtol for each scorer is chosen - so that the number of samples selected is just above the (ε,δ) bound for ε = - rtol, δ=0.001 and the range corresponding to each score. This means that - roughly once every 1000/num_methods runs the test will fail. - - FIXME: - - For permutation, we must increase the number of samples above that what - is done for truncated, this is probably due to the averaging done by the - latter to reduce variance - - We don't have a bound for Owen. - NOTE: - - The variance in the combinatorial method is huge, so we need lots of - samples - - """ - u, exact_values = linear_shapley - - values = compute_shapley_values( - u, mode=fun, progress=False, n_jobs=n_jobs, seed=seed, **kwargs - ) - - check_values(values, exact_values, rtol=rtol) - check_total_value(u, values, rtol=rtol) # FIXME, could be more than rtol - - @pytest.mark.slow @pytest.mark.parametrize( "a, b, num_points", [(2, 0, 21)] # training set will have 0.3 * 21 ~= 6 samples diff --git a/tests/value/shapley/test_naive.py b/tests/value/shapley/test_naive.py index 45c32b1a9..98a18a626 100644 --- a/tests/value/shapley/test_naive.py +++ b/tests/value/shapley/test_naive.py @@ -15,55 +15,26 @@ log = logging.getLogger(__name__) -# noinspection PyTestParametrized @pytest.mark.parametrize( - "num_samples, fun, rtol, total_atol", + "test_game, rtol, total_atol", [ - (12, combinatorial_exact_shapley, 0.01, 1e-5), - (6, permutation_exact_shapley, 0.01, 1e-5), + (("symmetric-voting", {"n_players": 4}), 0.1, 1e-5), + (("shoes", {"left": 1, "right": 1}), 0.1, 1e-5), + (("shoes", {"left": 2, "right": 1}), 0.1, 1e-5), + (("shoes", {"left": 1, "right": 2}), 0.1, 1e-5), + (("shoes", {"left": 2, "right": 4}), 0.1, 1e-5), ], + indirect=["test_game"], ) -def test_analytic_exact_shapley(num_samples, analytic_shapley, fun, rtol, total_atol): - """Compares the combinatorial exact shapley and permutation exact shapley with - the analytic_shapley calculation for a dummy model. - """ - u, exact_values = analytic_shapley - values_p = fun(u, progress=False) - check_total_value(u, values_p, atol=total_atol) - check_values(values_p, exact_values, rtol=rtol) - - @pytest.mark.parametrize( - "a, b, num_points, scorer", - [ - (2, 0, 10, "r2"), - (2, 1, 10, "r2"), - (2, 1, 10, "neg_median_absolute_error"), - (2, 1, 10, "explained_variance"), - ], + "fun", + [combinatorial_exact_shapley, permutation_exact_shapley], ) -def test_linear( - linear_dataset, - memcache_client_config, - scorer, - cache_backend, - rtol=0.01, - total_atol=1e-5, -): - linear_utility = Utility( - LinearRegression(), - data=linear_dataset, - scorer=scorer, - cache_backend=cache_backend, - ) - - values_combinatorial = combinatorial_exact_shapley(linear_utility, progress=False) - check_total_value(linear_utility, values_combinatorial, atol=total_atol) - - values_permutation = permutation_exact_shapley(linear_utility, progress=False) - check_total_value(linear_utility, values_permutation, atol=total_atol) - - check_values(values_combinatorial, values_permutation, rtol=rtol) +def test_games(fun, test_game, rtol, total_atol): + values_p = fun(test_game.u) + exact_values = test_game.shapley_values() + check_total_value(test_game.u, values_p, atol=total_atol) + check_values(values_p, exact_values, rtol=rtol) @pytest.mark.parametrize( @@ -73,7 +44,6 @@ def test_linear( def test_grouped_linear( linear_dataset, num_groups, - memcache_client_config, scorer, cache_backend, rtol=0.01, @@ -112,9 +82,7 @@ def test_grouped_linear( (2, 1, 20, "r2"), ], ) -def test_linear_with_outlier( - linear_dataset, memcache_client_config, scorer, cache_backend, total_atol=1e-5 -): +def test_linear_with_outlier(linear_dataset, scorer, cache_backend, total_atol=1e-5): outlier_idx = np.random.randint(len(linear_dataset.y_train)) linear_dataset.y_train[outlier_idx] -= 100 linear_utility = Utility( @@ -173,7 +141,6 @@ def test_polynomial( def test_polynomial_with_outlier( polynomial_dataset, polynomial_pipeline, - memcache_client_config, scorer, cache_backend, total_atol=1e-5, diff --git a/tests/value/shapley/test_truncated.py b/tests/value/shapley/test_truncated.py index ac980ab96..7d5977216 100644 --- a/tests/value/shapley/test_truncated.py +++ b/tests/value/shapley/test_truncated.py @@ -8,7 +8,7 @@ from pydvl.utils.score import Scorer, squashed_r2 from pydvl.value import compute_shapley_values from pydvl.value.shapley import ShapleyMode -from pydvl.value.shapley.truncated import NoTruncation +from pydvl.value.shapley.truncated import FixedTruncation, NoTruncation from pydvl.value.stopping import HistoryDeviation, MaxUpdates from .. import check_total_value, check_values @@ -16,92 +16,49 @@ log = logging.getLogger(__name__) -# noinspection PyTestParametrized @pytest.mark.parametrize( - "num_samples, fun, rtol, atol, kwargs", + "test_game", [ - ( - 12, - ShapleyMode.TruncatedMontecarlo, - 0.1, - 1e-5, - dict( - done=MaxUpdates(500), - truncation=NoTruncation(), - ), - ), + ("symmetric-voting", {"n_players": 6}), + ("shoes", {"left": 3, "right": 4}), ], + indirect=["test_game"], ) -def test_tmcs_analytic_montecarlo_shapley( - num_samples, - analytic_shapley, - parallel_config, - n_jobs, - fun: ShapleyMode, - rtol: float, - atol: float, - kwargs: dict, -): - u, exact_values = analytic_shapley - - values = compute_shapley_values( - u, mode=fun, n_jobs=n_jobs, config=parallel_config, progress=False, **kwargs - ) - - check_values(values, exact_values, rtol=rtol, atol=atol) - - @pytest.mark.parametrize( - "a, b, num_points", [(2, 0, 21)] # training set will have 0.3 * 21 = 6 samples -) -@pytest.mark.parametrize("scorer, rtol", [(squashed_r2, 0.25)]) -@pytest.mark.parametrize( - "fun, kwargs", + "done, truncation_cls, truncation_kwargs", [ - ( - ShapleyMode.TruncatedMontecarlo, - dict( - done=MaxUpdates(500), - truncation=NoTruncation(), - ), - ), + (MaxUpdates(600), NoTruncation, dict()), + (MaxUpdates(600), FixedTruncation, dict(fraction=0.9)), ], ) -def test_tmcs_linear_montecarlo_shapley( - linear_shapley, +def test_games( + test_game, + parallel_config, n_jobs, - memcache_client_config, - scorer: Scorer, - rtol: float, - fun: ShapleyMode, - kwargs: dict, + done, + truncation_cls, + truncation_kwargs, + seed, ): - """Tests values for all methods using a linear dataset. - - For permutation and truncated montecarlo, the rtol for each scorer is chosen - so that the number of samples selected is just above the (ε,δ) bound for ε = - rtol, δ=0.001 and the range corresponding to each score. This means that - roughly once every 1000/num_methods runs the test will fail. - - FIXME: - - For permutation, we must increase the number of samples above that what - is done for truncated, this is probably due to the averaging done by the - latter to reduce variance - - We don't have a bound for Owen. - NOTE: - - The variance in the combinatorial method is huge, so we need lots of - samples - - """ - u, exact_values = linear_shapley - check_total_value(u, exact_values, rtol=rtol) + try: + truncation = truncation_cls(test_game.u, **truncation_kwargs) + except TypeError: + # The NoTruncation class's constructor doesn't take any arguments + truncation = truncation_cls(**truncation_kwargs) values = compute_shapley_values( - u, mode=fun, progress=False, n_jobs=n_jobs, **kwargs + test_game.u, + mode=ShapleyMode.TruncatedMontecarlo, + done=done, + truncation=truncation, + n_jobs=n_jobs, + config=parallel_config, + seed=seed, + progress=True, ) - check_values(values, exact_values, rtol=rtol) - check_total_value(u, values, rtol=rtol) # FIXME, could be more than rtol + exact_values = test_game.shapley_values() + check_values(values, exact_values, rtol=0.2, atol=1e-4) @pytest.mark.parametrize( diff --git a/tests/value/test_semivalues.py b/tests/value/test_semivalues.py index 50a0201b7..e33f92543 100644 --- a/tests/value/test_semivalues.py +++ b/tests/value/test_semivalues.py @@ -1,4 +1,5 @@ import math +from itertools import islice from typing import Type import numpy as np @@ -17,6 +18,7 @@ ) from pydvl.value.semivalues import ( SVCoefficient, + _marginal, banzhaf_coefficient, beta_coefficient, compute_generic_semivalues, @@ -28,12 +30,112 @@ from .utils import timed -@pytest.mark.parametrize("num_samples", [5]) +@pytest.mark.parametrize( + "test_game", + [ + ("shoes", {"left": 3, "right": 2}), + ], + indirect=["test_game"], +) +@pytest.mark.parametrize( + "sampler, coefficient, batch_size", + [(PermutationSampler, beta_coefficient(1, 1), 5)], +) +def test_marginal_batch_size(test_game, sampler, coefficient, batch_size, seed): + # TODO: This test is probably not needed. + # Because I added it and then realized that it doesn't do much. + # The only difference between the two calls is that for the first one + # the loop is outside and the second one the loop is inside. + sampler_it = iter(sampler(test_game.u.data.indices, seed=seed)) + samples = tuple(islice(sampler_it, batch_size)) + + marginals_single = [] + for sample in samples: + marginals_single.extend( + _marginal(test_game.u, coefficient=coefficient, samples=[sample]) + ) + + marginals_batch = _marginal(test_game.u, coefficient=coefficient, samples=samples) + + assert len(marginals_single) == len(marginals_batch) + assert set(marginals_single) == set(marginals_batch) + + +@pytest.mark.parametrize("n", [10, 100]) +@pytest.mark.parametrize( + "coefficient", + [ + beta_coefficient(1, 1), + beta_coefficient(1, 16), + beta_coefficient(4, 1), + banzhaf_coefficient, + shapley_coefficient, + ], +) +def test_coefficients(n: int, coefficient: SVCoefficient): + r"""Coefficients for semi-values must fulfill: + + $$ \sum_{i=1}^{n}\choose{n-1}{j-1}w^{(n)}(j) = 1 $$ + + Note that we depart from the usual definitions by including the factor $1/n$ + in the shapley and beta coefficients. + """ + s = [math.comb(n - 1, j - 1) * coefficient(n, j - 1) for j in range(1, n + 1)] + assert np.isclose(1, np.sum(s)) + + +@pytest.mark.parametrize( + "test_game", + [ + ("symmetric-voting", {"n_players": 4}), + ("shoes", {"left": 1, "right": 1}), + ("shoes", {"left": 2, "right": 1}), + ("shoes", {"left": 1, "right": 2}), + ], + indirect=["test_game"], +) @pytest.mark.parametrize( "sampler", [ DeterministicUniformSampler, DeterministicPermutationSampler, + ], +) +@pytest.mark.parametrize("coefficient", [shapley_coefficient, beta_coefficient(1, 1)]) +def test_games_shapley_deterministic( + test_game, + parallel_config, + n_jobs, + sampler: Type[PowersetSampler], + coefficient: SVCoefficient, + seed: Seed, +): + criterion = MaxUpdates(50) + values = compute_generic_semivalues( + sampler(test_game.u.data.indices, seed=seed), + test_game.u, + coefficient, + criterion, + skip_converged=True, + n_jobs=n_jobs, + config=parallel_config, + progress=True, + ) + exact_values = test_game.shapley_values() + check_values(values, exact_values, rtol=0.1) + + +@pytest.mark.parametrize( + "test_game", + [ + ("symmetric-voting", {"n_players": 6}), + ("shoes", {"left": 3, "right": 2}), + ], + indirect=["test_game"], +) +@pytest.mark.parametrize( + "sampler", + [ UniformSampler, PermutationSampler, pytest.param(AntitheticSampler, marks=pytest.mark.slow), @@ -41,36 +143,55 @@ ], ) @pytest.mark.parametrize("coefficient", [shapley_coefficient, beta_coefficient(1, 1)]) -def test_shapley( - num_samples: int, - analytic_shapley, +def test_games_shapley( + test_game, + parallel_config, + n_jobs, sampler: Type[PowersetSampler], coefficient: SVCoefficient, - n_jobs: int, - parallel_config: ParallelConfig, seed: Seed, ): - u, exact_values = analytic_shapley - criterion = HistoryDeviation(50, 1e-3) | MaxUpdates(1000) + criterion = HistoryDeviation(50, 1e-4) | MaxUpdates(500) values = compute_generic_semivalues( - sampler(u.data.indices, seed=seed), - u, + sampler(test_game.u.data.indices, seed=seed), + test_game.u, coefficient, criterion, skip_converged=True, n_jobs=n_jobs, config=parallel_config, + progress=True, ) + + exact_values = test_game.shapley_values() check_values(values, exact_values, rtol=0.2) @pytest.mark.parametrize( - "num_samples,sampler,coefficient,batch_size", - [(5, PermutationSampler, beta_coefficient(1, 1), 5)], + "test_game", + [ + ("shoes", {"left": 3, "right": 2}), + ], + indirect=["test_game"], +) +@pytest.mark.parametrize( + "sampler, coefficient, batch_size", + [(PermutationSampler, beta_coefficient(1, 1), 5)], +) +@pytest.mark.parametrize( + "n_jobs", + [ + 1, + pytest.param( + 2, + marks=pytest.mark.xfail( + reason="Bad interaction between parallelization and batching" + ), + ), + ], ) def test_shapley_batch_size( - num_samples: int, - analytic_shapley, + test_game, sampler: Type[PermutationSampler], coefficient: SVCoefficient, batch_size: int, @@ -78,13 +199,12 @@ def test_shapley_batch_size( parallel_config: ParallelConfig, seed: Seed, ): - u, exact_values = analytic_shapley timed_fn = timed(compute_generic_semivalues) result_single_batch = timed_fn( - sampler(u.data.indices, seed=seed), - u, + sampler(test_game.u.data.indices, seed=seed), + test_game.u, coefficient, - done=HistoryDeviation(50, 1e-3) | MaxUpdates(1000), + done=MaxUpdates(100), skip_converged=True, n_jobs=n_jobs, batch_size=1, @@ -93,10 +213,10 @@ def test_shapley_batch_size( total_seconds_single_batch = timed_fn.execution_time result_multi_batch = timed_fn( - sampler(u.data.indices, seed=seed), - u, + sampler(test_game.u.data.indices, seed=seed), + test_game.u, coefficient, - done=HistoryDeviation(50, 1e-3) | MaxUpdates(1000), + done=MaxUpdates(100), skip_converged=True, n_jobs=n_jobs, batch_size=batch_size, @@ -141,26 +261,3 @@ def test_banzhaf( config=parallel_config, ) check_values(values, exact_values, rtol=0.2) - - -@pytest.mark.parametrize("n", [10, 100]) -@pytest.mark.parametrize( - "coefficient", - [ - beta_coefficient(1, 1), - beta_coefficient(1, 16), - beta_coefficient(4, 1), - banzhaf_coefficient, - shapley_coefficient, - ], -) -def test_coefficients(n: int, coefficient: SVCoefficient): - r"""Coefficients for semi-values must fulfill: - - $$ \sum_{i=1}^{n}\choose{n-1}{j-1}w^{(n)}(j) = 1 $$ - - Note that we depart from the usual definitions by including the factor $1/n$ - in the shapley and beta coefficients. - """ - s = [math.comb(n - 1, j - 1) * coefficient(n, j - 1) for j in range(1, n + 1)] - assert np.isclose(1, np.sum(s)) diff --git a/tests/value/test_stopping.py b/tests/value/test_stopping.py index 7399dc9c3..efebac9c4 100644 --- a/tests/value/test_stopping.py +++ b/tests/value/test_stopping.py @@ -62,10 +62,10 @@ def _check(self, result: ValuationResult) -> Status: assert (C() & C() & C())(v) == c assert (P() | P() | P())(v) == p - assert (C() & P()).name == "Composite StoppingCriterion: C AND P" - assert (C() | P()).name == "Composite StoppingCriterion: C OR P" - assert (~C()).name == "Composite StoppingCriterion: NOT C" - assert (~P()).name == "Composite StoppingCriterion: NOT P" + assert str(C() & P()) == "Composite StoppingCriterion: C AND P" + assert str(C() | P()) == "Composite StoppingCriterion: C OR P" + assert str(~C()) == "Composite StoppingCriterion: NOT C" + assert str(~P()) == "Composite StoppingCriterion: NOT P" def test_make_criterion(): @@ -88,9 +88,9 @@ def always_failed(result: ValuationResult) -> Status: assert P()(v) == Status.Pending assert F()(v) == Status.Failed - assert C().name == "always_converged" - assert P().name == "always_pending" - assert F().name == "always_failed" + assert str(C()) == "always_converged" + assert str(P()) == "always_pending" + assert str(F()) == "always_failed" assert (~C())(v) == Status.Failed assert (~P())(v) == Status.Converged @@ -104,7 +104,7 @@ def always_failed(result: ValuationResult) -> Status: def test_minmax_updates(): maxstop = MaxUpdates(10) - assert maxstop.name == "MaxUpdates" + assert str(maxstop) == "MaxUpdates(n_updates=10)" v = ValuationResult.from_random(5) v._counts = np.zeros(5) assert maxstop(v) == Status.Pending @@ -114,7 +114,7 @@ def test_minmax_updates(): assert maxstop(v) == Status.Converged minstop = MinUpdates(10) - assert minstop.name == "MinUpdates" + assert str(minstop) == "MinUpdates(n_updates=10)" v._counts = np.zeros(5) assert minstop(v) == Status.Pending v._counts += np.ones(5) * 9 diff --git a/tox.ini b/tox.ini index 666a5760c..d62cfe481 100644 --- a/tox.ini +++ b/tox.ini @@ -9,16 +9,13 @@ deps = extras = ray influence + memcached setenv = COVERAGE_FILE = {env:COVERAGE_FILE:{toxinidir}/.coverage.{envname}} passenv = CI [testenv:tests] -extras = - ray - influence - memcached commands = pytest -n auto --dist worksteal --cov "{envsitepackagesdir}/pydvl" {posargs} @@ -27,7 +24,7 @@ deps = {[testenv]deps} -r requirements-notebooks.txt commands = - pytest --nbmake -n 0 --cov "{envsitepackagesdir}/pydvl" notebooks/ {posargs} + pytest --nbmake --nbmake-timeout=30 -n 0 --cov "{envsitepackagesdir}/pydvl" notebooks/ {posargs} [testenv:linting] skip_install = true