reagent.ope.test package

Subpackages

Submodules

reagent.ope.test.cartpole module

class reagent.ope.test.cartpole.ComboPolicy(action_space: reagent.ope.estimators.types.ActionSpace, weights, policies)

Bases: reagent.ope.estimators.sequential_estimators.RLPolicy

action_dist(state: reagent.ope.estimators.sequential_estimators.State) reagent.ope.estimators.types.ActionDistribution
class reagent.ope.test.cartpole.EnvironmentModel(state_dim, action_dim, hidden_dim, hidden_layers, activation)

Bases: torch.nn.modules.module.Module

forward(state: torch.Tensor, action: torch.Tensor)

Defines the computation performed at every call.

Should be overridden by all subclasses.

Note

Although the recipe for forward pass needs to be defined within this function, one should call the Module instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.

training: bool
class reagent.ope.test.cartpole.ModelWrapper(model: reagent.ope.test.cartpole.EnvironmentModel, device=None)

Bases: reagent.ope.estimators.sequential_estimators.Model

next_state_reward_dist(state: reagent.ope.estimators.sequential_estimators.State, action: reagent.ope.estimators.types.TypeWrapper[Union[int, Tuple[int], float, Tuple[float], numpy.ndarray, torch.Tensor]]) Mapping[reagent.ope.estimators.sequential_estimators.State, reagent.ope.estimators.sequential_estimators.RewardProbability]
to(device)
class reagent.ope.test.cartpole.PyTorchPolicy(action_space: reagent.ope.estimators.types.ActionSpace, model)

Bases: reagent.ope.estimators.sequential_estimators.RLPolicy

action_dist(state: reagent.ope.estimators.sequential_estimators.State) reagent.ope.estimators.types.ActionDistribution
reagent.ope.test.cartpole.estimate_value(episodes: int, max_horizon: int, policy: reagent.ope.estimators.sequential_estimators.RLPolicy, gamma: float)
reagent.ope.test.cartpole.generate_logs(episodes: int, max_horizon: int, policy: reagent.ope.estimators.sequential_estimators.RLPolicy)
Parameters
  • episodes – number of episodes to generate

  • max_horizon – max horizon of each episode

  • policy – RLPolicy which uses real-valued states

reagent.ope.test.cartpole.run_dualdice_test(model_path: str, alpha: float)
reagent.ope.test.cartpole.zeta_nu_loss_callback(losses, estimated_values, input: reagent.ope.estimators.sequential_estimators.RLEstimatorInput)

reagent.ope.test.envs module

class reagent.ope.test.envs.Environment(max_horizon: int = - 1)

Bases: reagent.ope.estimators.sequential_estimators.Model

Environment for RL

abstract close()
property current_state
abstract property observation_space
abstract reset(state: Optional[reagent.ope.estimators.sequential_estimators.State] = None)
abstract property states
step(policy: reagent.ope.estimators.sequential_estimators.RLPolicy)
class reagent.ope.test.envs.PolicyLogGenerator(env: reagent.ope.test.envs.Environment, policy: reagent.ope.estimators.sequential_estimators.RLPolicy)

Bases: object

generate_log(init_state: reagent.ope.estimators.sequential_estimators.State, max_horizon: int = - 1) Sequence[reagent.ope.estimators.sequential_estimators.Transition]

reagent.ope.test.gridworld module

class reagent.ope.test.gridworld.GridWorld(size: Tuple[int, int], start: Tuple[int, int], goal: Tuple[int, int], max_horizon: int = - 1, walls: Iterable[Tuple[int, int]] = (), use_taxicab_reward: bool = False)

Bases: reagent.ope.test.envs.Environment

close()
dump_policy(policy) str
dump_state_values(state_values) str
dump_value_func(valfunc: reagent.ope.estimators.sequential_estimators.ValueFunction) str
classmethod from_grid(grid: Sequence[Sequence[str]], max_horizon: int = - 1, use_taxicab_reward: bool = False)
next_state_reward_dist(state: reagent.ope.estimators.sequential_estimators.State, action: reagent.ope.estimators.types.TypeWrapper[Union[int, Tuple[int], float, Tuple[float], numpy.ndarray, torch.Tensor]]) Mapping[reagent.ope.estimators.sequential_estimators.State, reagent.ope.estimators.sequential_estimators.RewardProbability]
property observation_space
classmethod random_grid(length: int, max_horizon: int = - 1, wall_prob: float = 0.1, use_taxicab_reward: bool = False)

Generates a random grid of size length x length with start = (0, 0) and goal = (length-1, length-1)

reset(state: Optional[reagent.ope.estimators.sequential_estimators.State] = None)
property states
class reagent.ope.test.gridworld.NoiseGridWorldModel(gridworld: reagent.ope.test.gridworld.GridWorld, action_space: reagent.ope.estimators.types.ActionSpace, epsilon: float = 0.1, max_horizon: int = - 1)

Bases: reagent.ope.test.envs.Environment

close()
property current_state
next_state_reward_dist(state: reagent.ope.estimators.sequential_estimators.State, action: reagent.ope.estimators.types.TypeWrapper[Union[int, Tuple[int], float, Tuple[float], numpy.ndarray, torch.Tensor]]) Mapping[reagent.ope.estimators.sequential_estimators.State, reagent.ope.estimators.sequential_estimators.RewardProbability]
property observation_space
reset(state: Optional[reagent.ope.estimators.sequential_estimators.State] = None)
property states
class reagent.ope.test.gridworld.ThomasGridWorld

Bases: reagent.ope.test.gridworld.GridWorld

GridWorld set up in https://people.cs.umass.edu/~pthomas/papers/Thomas2015c.pdf

reagent.ope.test.mslr_slate module

class reagent.ope.test.mslr_slate.MSLRDatasets(params, num_columns: int, anchor_url_features: List[int], body_features: List[int], dataset_name: str = '', device=None)

Bases: object

property all_features: torch.Tensor
property anchor_url_features: torch.Tensor
property body_features: torch.Tensor
property cache_file: str
property features: torch.Tensor
property folder: str
load()
property name: str
property queries: torch.Tensor
property relevances: torch.Tensor
property sample_weights: torch.Tensor
save()
property source_file: List[str]
class reagent.ope.test.mslr_slate.MSLRModel(relevances: torch.Tensor, device=None)

Bases: reagent.ope.estimators.slate_estimators.SlateModel

item_relevances(context: reagent.ope.estimators.slate_estimators.SlateContext) torch.Tensor
item_rewards(context: reagent.ope.estimators.slate_estimators.SlateContext) reagent.ope.estimators.slate_estimators.SlateItemValues

Returns each item’s relevance under the context :param context:

Returns

Item relevances

reagent.ope.test.mslr_slate.evaluate(experiments: Iterable[Tuple[Iterable[reagent.ope.estimators.slate_estimators.SlateEstimator], int]], dataset: reagent.ope.test.mslr_slate.MSLRDatasets, slate_size: int, item_size: int, metric_func: str, log_trainer: reagent.ope.estimators.types.Trainer, log_distribution: reagent.ope.estimators.slate_estimators.RewardDistribution, log_features: str, tgt_trainer: reagent.ope.estimators.types.Trainer, tgt_distribution: reagent.ope.estimators.slate_estimators.RewardDistribution, tgt_features: str, dm_features: str, max_num_workers: int, device=None)
reagent.ope.test.mslr_slate.load_dataset(params, num_columns, anchor_url_features, body_features, dataset_name='') reagent.ope.test.mslr_slate.MSLRDatasets
reagent.ope.test.mslr_slate.train(trainer: reagent.ope.estimators.types.Trainer, train_dataset: reagent.ope.test.mslr_slate.MSLRDatasets, vali_dataset: reagent.ope.test.mslr_slate.MSLRDatasets, prefix: str = '')
reagent.ope.test.mslr_slate.train_all(train_dataset, vali_dataset, prefix: str = '')
reagent.ope.test.mslr_slate.train_models(params)

reagent.ope.test.multiclass_bandits module

class reagent.ope.test.multiclass_bandits.MultiClassContext(query_id: int)

Bases: object

query_id: int
class reagent.ope.test.multiclass_bandits.MultiClassDataRow(feature: torch.Tensor, label: torch.Tensor, one_hot: torch.Tensor)

Bases: object

feature: torch.Tensor
label: torch.Tensor
one_hot: torch.Tensor
class reagent.ope.test.multiclass_bandits.MultiClassModel(features: torch.Tensor, rewards: torch.Tensor)

Bases: reagent.ope.estimators.contextual_bandits_estimators.BanditsModel

class reagent.ope.test.multiclass_bandits.MultiClassPolicy(action_space: reagent.ope.estimators.types.ActionSpace, action_distributions: torch.Tensor, epsilon: float, device=None)

Bases: reagent.ope.estimators.types.Policy

class reagent.ope.test.multiclass_bandits.UCIMultiClassDataset(params, device=None)

Bases: object

To load and hold UCI classification datasets: https://archive.ics.uci.edu/ml/datasets.php?task=cla&sort=nameUp&view=table Also to convert it to contextual bandits problems References: https://arxiv.org/abs/1103.4601

property config_file: str
property features: torch.Tensor
property labels: torch.Tensor
property num_actions: int
property num_features: int
property one_hots: torch.Tensor
train_val_test_split(ratios: Tuple[float, float] = (0.8, 0.8), device=None)
reagent.ope.test.multiclass_bandits.evaluate_all(experiments: Iterable[Tuple[Iterable[reagent.ope.estimators.estimator.Estimator], int]], dataset: reagent.ope.test.multiclass_bandits.UCIMultiClassDataset, log_trainer: reagent.ope.estimators.types.Trainer, log_epsilon: float, tgt_trainer: reagent.ope.estimators.types.Trainer, tgt_epsilon: float, max_num_workers: int, random_reward_prob: float = 0.0, device=None)

Module contents