reagent.ope.trainers package

Submodules

reagent.ope.trainers.linear_trainers module

class reagent.ope.trainers.linear_trainers.DecisionTreeClassifierTrainer

Bases: reagent.ope.trainers.linear_trainers.LinearTrainer

property name
train(data: reagent.ope.estimators.types.TrainingData, iterations: int = 1, num_samples: int = 0)
class reagent.ope.trainers.linear_trainers.DecisionTreeTrainer(is_classifier: bool = False)

Bases: reagent.ope.trainers.linear_trainers.LinearTrainer

property name
train(data: reagent.ope.estimators.types.TrainingData, iterations: int = 1, num_samples: int = 0)
class reagent.ope.trainers.linear_trainers.LassoTrainer(is_classifier: bool = False)

Bases: reagent.ope.trainers.linear_trainers.LinearTrainer

property name
train(data: reagent.ope.estimators.types.TrainingData, iterations: int = 1, num_samples: int = 0)
class reagent.ope.trainers.linear_trainers.LinearNet(D_in, H, D_out)

Bases: torch.nn.Module

forward(x: torch.Tensor)
class reagent.ope.trainers.linear_trainers.LinearTrainer(is_classifier: bool = False)

Bases: reagent.ope.estimators.types.Trainer

predict(x: torch.Tensor, device=None) → reagent.ope.estimators.types.PredictResults
score(x: torch.Tensor, y: torch.Tensor, weight: Optional[torch.Tensor] = None) → float
class reagent.ope.trainers.linear_trainers.LogisticRegressionTrainer(solver: str = 'lbfgs')

Bases: reagent.ope.trainers.linear_trainers.LinearTrainer

property name
train(data: reagent.ope.estimators.types.TrainingData, iterations: int = 1, num_samples: int = 0)
class reagent.ope.trainers.linear_trainers.NNTrainer(device=None)

Bases: reagent.ope.estimators.types.Trainer

property name
predict(x: torch.Tensor, device=None) → reagent.ope.estimators.types.PredictResults
score(x: torch.Tensor, y: torch.Tensor, weight: Optional[torch.Tensor] = None) → float
train(data: reagent.ope.estimators.types.TrainingData, iterations: int = 1, num_samples: int = 0)
class reagent.ope.trainers.linear_trainers.SGDClassifierTrainer(loss: str = 'log', max_iter: int = 1000)

Bases: reagent.ope.trainers.linear_trainers.LinearTrainer

property name
train(data: reagent.ope.estimators.types.TrainingData, iterations: int = 1, num_samples: int = 0)

reagent.ope.trainers.rl_tabular_trainers module

class reagent.ope.trainers.rl_tabular_trainers.DPTrainer(env: reagent.ope.test.envs.Environment, policy: reagent.ope.trainers.rl_tabular_trainers.TabularPolicy)

Bases: object

train(gamma: float = 0.9, threshold: float = 0.0001)
class reagent.ope.trainers.rl_tabular_trainers.DPValueFunction(policy: reagent.ope.estimators.sequential_estimators.RLPolicy, env: reagent.ope.test.envs.Environment, gamma: float = 0.99, threshold: float = 0.0001)

Bases: reagent.ope.trainers.rl_tabular_trainers.TabularValueFunction

reset(clear_state_values: bool = False)
state_value(state: reagent.ope.estimators.sequential_estimators.State, horizon: int = -1) → float
class reagent.ope.trainers.rl_tabular_trainers.MonteCarloTrainer(env: reagent.ope.test.envs.Environment, policy: reagent.ope.trainers.rl_tabular_trainers.TabularPolicy)

Bases: object

train(iterations: int, gamma: float = 0.9, first_visit: bool = True, update_interval: int = 20)
class reagent.ope.trainers.rl_tabular_trainers.MonteCarloValueFunction(policy: reagent.ope.estimators.sequential_estimators.RLPolicy, env: reagent.ope.test.envs.Environment, gamma: float = 0.99, first_visit: bool = True, count_threshold: int = 100, max_iteration: int = 200)

Bases: reagent.ope.trainers.rl_tabular_trainers.TabularValueFunction

reset(clear_state_values: bool = False)
state_value(state: reagent.ope.estimators.sequential_estimators.State) → float
class reagent.ope.trainers.rl_tabular_trainers.TabularPolicy(action_space: reagent.ope.estimators.types.ActionSpace, epsilon: float = 0.0, device=None)

Bases: reagent.ope.estimators.sequential_estimators.RLPolicy

action_dist(state: reagent.ope.estimators.sequential_estimators.State) → reagent.ope.estimators.types.ActionDistribution
load(path) → bool
save(path) → bool
update(state: reagent.ope.estimators.sequential_estimators.State, actions: Sequence[float]) → float
class reagent.ope.trainers.rl_tabular_trainers.TabularValueFunction(policy: reagent.ope.estimators.sequential_estimators.RLPolicy, model: reagent.ope.estimators.sequential_estimators.Model, gamma=0.99)

Bases: reagent.ope.estimators.sequential_estimators.ValueFunction

reset(clear_state_values: bool = False)
state_action_value(state: reagent.ope.estimators.sequential_estimators.State, action: reagent.ope.estimators.types.TypeWrapper[typing.Union[int, typing.Tuple[int], float, typing.Tuple[float], numpy.ndarray, torch.Tensor]][Union[int, Tuple[int], float, Tuple[float], numpy.ndarray, torch.Tensor]]) → float
state_value(state: reagent.ope.estimators.sequential_estimators.State) → float

Module contents