reagent.ope.test package

Submodules

reagent.ope.test.envs module

class reagent.ope.test.envs.Environment(max_horizon: int = -1)

Bases: reagent.ope.estimators.sequential_estimators.Model

Environment for RL

abstract close()
property current_state
abstract property observation_space
abstract reset(state: Optional[reagent.ope.estimators.sequential_estimators.State] = None)
abstract property states
step(policy: reagent.ope.estimators.sequential_estimators.RLPolicy)
class reagent.ope.test.envs.PolicyLogGenerator(env: reagent.ope.test.envs.Environment, policy: reagent.ope.estimators.sequential_estimators.RLPolicy)

Bases: object

generate_log(init_state: reagent.ope.estimators.sequential_estimators.State) → Sequence[reagent.ope.estimators.sequential_estimators.Transition]

reagent.ope.test.gridworld module

class reagent.ope.test.gridworld.GridWorld(size: Tuple[int, int], start: Tuple[int, int], goal: Tuple[int, int], max_horizon: int = -1, walls: Iterable[Tuple[int, int]] = ())

Bases: reagent.ope.test.envs.Environment

close()
dump_policy(policy) → str
dump_state_values(state_values) → str
dump_value_func(valfunc: reagent.ope.estimators.sequential_estimators.ValueFunction) → str
classmethod from_grid(grid: Sequence[Sequence[str]], max_horizon: int = -1)
next_state_reward_dist(state: reagent.ope.estimators.sequential_estimators.State, action: reagent.ope.estimators.types.TypeWrapper[typing.Union[int, typing.Tuple[int], float, typing.Tuple[float], numpy.ndarray, torch.Tensor]][Union[int, Tuple[int], float, Tuple[float], numpy.ndarray, torch.Tensor]]) → Mapping[reagent.ope.estimators.sequential_estimators.State, reagent.ope.estimators.sequential_estimators.RewardProbability]
property observation_space
reset(state: Optional[reagent.ope.estimators.sequential_estimators.State] = None)
property states
class reagent.ope.test.gridworld.NoiseGridWorldModel(gridworld: reagent.ope.test.gridworld.GridWorld, action_space: reagent.ope.estimators.types.ActionSpace, epsilon: float = 0.1, max_horizon: int = -1)

Bases: reagent.ope.test.envs.Environment

close()
property current_state
next_state_reward_dist(state: reagent.ope.estimators.sequential_estimators.State, action: reagent.ope.estimators.types.TypeWrapper[typing.Union[int, typing.Tuple[int], float, typing.Tuple[float], numpy.ndarray, torch.Tensor]][Union[int, Tuple[int], float, Tuple[float], numpy.ndarray, torch.Tensor]]) → Mapping[reagent.ope.estimators.sequential_estimators.State, reagent.ope.estimators.sequential_estimators.RewardProbability]
property observation_space
reset(state: Optional[reagent.ope.estimators.sequential_estimators.State] = None)
property states
class reagent.ope.test.gridworld.ThomasGridWorld

Bases: reagent.ope.test.gridworld.GridWorld

GridWorld set up in https://people.cs.umass.edu/~pthomas/papers/Thomas2015c.pdf

reagent.ope.test.mslr_slate module

class reagent.ope.test.mslr_slate.MSLRDatasets(params, num_columns: int, anchor_url_features: List[int], body_features: List[int], dataset_name: str = '', device=None)

Bases: object

property all_features
property anchor_url_features
property body_features
property cache_file
property features
property folder
load()
property name
property queries
property relevances
property sample_weights
save()
property source_file
class reagent.ope.test.mslr_slate.MSLRModel(relevances: torch.Tensor, device=None)

Bases: reagent.ope.estimators.slate_estimators.SlateModel

item_relevances(context: reagent.ope.estimators.slate_estimators.SlateContext) → torch.Tensor
item_rewards(context: reagent.ope.estimators.slate_estimators.SlateContext) → reagent.ope.estimators.slate_estimators.SlateItemValues

Returns each item’s relevance under the context :param context:

Returns

Item relevances

reagent.ope.test.mslr_slate.evaluate(experiments: Iterable[Tuple[Iterable[reagent.ope.estimators.slate_estimators.SlateEstimator], int]], dataset: reagent.ope.test.mslr_slate.MSLRDatasets, slate_size: int, item_size: int, metric_func: str, log_trainer: reagent.ope.estimators.types.Trainer, log_distribution: reagent.ope.estimators.slate_estimators.RewardDistribution, log_features: str, tgt_trainer: reagent.ope.estimators.types.Trainer, tgt_distribution: reagent.ope.estimators.slate_estimators.RewardDistribution, tgt_features: str, dm_features: str, max_num_workers: int, device=None)
reagent.ope.test.mslr_slate.load_dataset(params, num_columns, anchor_url_features, body_features, dataset_name='') → reagent.ope.test.mslr_slate.MSLRDatasets
reagent.ope.test.mslr_slate.train(trainer: reagent.ope.estimators.types.Trainer, train_dataset: reagent.ope.test.mslr_slate.MSLRDatasets, vali_dataset: reagent.ope.test.mslr_slate.MSLRDatasets, prefix: str = '')
reagent.ope.test.mslr_slate.train_all(train_dataset, vali_dataset, prefix: str = '')
reagent.ope.test.mslr_slate.train_models(params)

reagent.ope.test.multiclass_bandits module

class reagent.ope.test.multiclass_bandits.MultiClassContext(query_id: int)

Bases: object

class reagent.ope.test.multiclass_bandits.MultiClassDataRow(feature: torch.Tensor, label: torch.Tensor, one_hot: torch.Tensor)

Bases: object

class reagent.ope.test.multiclass_bandits.MultiClassModel(features: torch.Tensor, rewards: torch.Tensor)

Bases: reagent.ope.estimators.contextual_bandits_estimators.BanditsModel

class reagent.ope.test.multiclass_bandits.MultiClassPolicy(action_space: reagent.ope.estimators.types.ActionSpace, action_distributions: torch.Tensor, epsilon: float, device=None)

Bases: reagent.ope.estimators.types.Policy

class reagent.ope.test.multiclass_bandits.UCIMultiClassDataset(params, device=None)

Bases: object

To load and hold UCI classification datasets: https://archive.ics.uci.edu/ml/datasets.php?task=cla&sort=nameUp&view=table Also to convert it to contextual bandits problems References: https://arxiv.org/abs/1103.4601

property config_file
property features
property labels
property num_actions
property num_features
property one_hots
train_val_test_split(ratios: Tuple[float, float] = (0.8, 0.8), device=None)
reagent.ope.test.multiclass_bandits.evaluate_all(experiments: Iterable[Tuple[Iterable[reagent.ope.estimators.estimator.Estimator], int]], dataset: reagent.ope.test.multiclass_bandits.UCIMultiClassDataset, log_trainer: reagent.ope.estimators.types.Trainer, log_epsilon: float, tgt_trainer: reagent.ope.estimators.types.Trainer, tgt_epsilon: float, max_num_workers: int, device=None)

Module contents