reagent.optimizer package

Submodules

reagent.optimizer.optimizer module

For each Torch optimizer, we create a wrapper pydantic dataclass around it. We also add this class to our Optimizer registry.

Usage:

Whenever you want to use this Optimizer__Union, specify it as the type. E.g. class Parameters:

rl: RLParameters = field(default_factory=RLParameters) minibatch_size: int = 64 optimizer: Optimizer__Union = field(default_factory=Optimizer__Union.default)

To instantiate it, specify desired optimzier in YAML file. E.g. rl:

minibatch: 64 optimizer:

Adam:

lr: 0.001 eps: 1e-08 lr_schedulers:

  • OneCycleLR:

Since we don’t know which network parameters we want to optimize, Optimizer__Union will be a factory for the optimizer it contains.

Following the above example, we create an optimizer as follows:

class Trainer:
def __init__(self, network, params):

self.optimizer = params.optimizer.make_optimizer_scheduler(network.parameters())[“optimizer”]

def train(self, data):

… loss.backward() # steps both optimizer and chained lr_schedulers self.optimizer.step()

class reagent.optimizer.optimizer.OptimizerConfig(lr_schedulers: List[reagent.optimizer.scheduler.LearningRateSchedulerConfig] = <factory>)

Bases: object

REGISTRY = {'ASGD': <class 'abc.ASGD'>, 'Adadelta': <class 'abc.Adadelta'>, 'Adagrad': <class 'abc.Adagrad'>, 'Adam': <class 'reagent.optimizer.uninferrable_optimizers.Adam'>, 'AdamW': <class 'reagent.optimizer.uninferrable_optimizers.AdamW'>, 'Adamax': <class 'reagent.optimizer.uninferrable_optimizers.Adamax'>, 'LBFGS': <class 'reagent.optimizer.uninferrable_optimizers.LBFGS'>, 'NAdam': <class 'reagent.optimizer.uninferrable_optimizers.NAdam'>, 'RAdam': <class 'reagent.optimizer.uninferrable_optimizers.RAdam'>, 'RMSprop': <class 'abc.RMSprop'>, 'Rprop': <class 'reagent.optimizer.uninferrable_optimizers.Rprop'>, 'SGD': <class 'reagent.optimizer.uninferrable_optimizers.SGD'>, 'SparseAdam': <class 'reagent.optimizer.uninferrable_optimizers.SparseAdam'>}
REGISTRY_FROZEN = True
REGISTRY_NAME = 'OptimizerConfig'
lr_schedulers: List[reagent.optimizer.scheduler.LearningRateSchedulerConfig]
make_optimizer_scheduler(params) Dict[str, Union[torch.optim.optimizer.Optimizer, torch.optim.lr_scheduler._LRScheduler]]

reagent.optimizer.scheduler module

class reagent.optimizer.scheduler.LearningRateSchedulerConfig

Bases: object

REGISTRY = {}
REGISTRY_FROZEN = False
REGISTRY_NAME = 'LearningRateSchedulerConfig'
decode_lambdas(args: Dict[str, Any]) None
make_from_optimizer(optimizer: torch.optim.optimizer.Optimizer) torch.optim.lr_scheduler._LRScheduler

reagent.optimizer.scheduler_union module

class reagent.optimizer.scheduler_union.LearningRateScheduler__Union(LambdaLR: Optional[reagent.optimizer.uninferrable_schedulers.LambdaLR] = None, MultiplicativeLR: Optional[reagent.optimizer.uninferrable_schedulers.MultiplicativeLR] = None, CyclicLR: Optional[reagent.optimizer.uninferrable_schedulers.CyclicLR] = None, StepLR: Optional[reagent.optimizer.uninferrable_schedulers.StepLR] = None, MultiStepLR: Optional[reagent.optimizer.uninferrable_schedulers.MultiStepLR] = None, ExponentialLR: Optional[reagent.optimizer.uninferrable_schedulers.ExponentialLR] = None, CosineAnnealingLR: Optional[reagent.optimizer.uninferrable_schedulers.CosineAnnealingLR] = None, OneCycleLR: Optional[reagent.optimizer.uninferrable_schedulers.OneCycleLR] = None, CosineAnnealingWarmRestarts: Optional[reagent.optimizer.uninferrable_schedulers.CosineAnnealingWarmRestarts] = None, ChainedScheduler: Optional[reagent.optimizer.scheduler_union.ChainedScheduler] = None, ConstantLR: Optional[reagent.optimizer.scheduler_union.ConstantLR] = None, LinearLR: Optional[reagent.optimizer.scheduler_union.LinearLR] = None, SequentialLR: Optional[reagent.optimizer.scheduler_union.SequentialLR] = None)

Bases: reagent.core.tagged_union.TaggedUnion

ChainedScheduler: Optional[reagent.optimizer.scheduler_union.ChainedScheduler] = None
ConstantLR: Optional[reagent.optimizer.scheduler_union.ConstantLR] = None
CosineAnnealingLR: Optional[reagent.optimizer.uninferrable_schedulers.CosineAnnealingLR] = None
CosineAnnealingWarmRestarts: Optional[reagent.optimizer.uninferrable_schedulers.CosineAnnealingWarmRestarts] = None
CyclicLR: Optional[reagent.optimizer.uninferrable_schedulers.CyclicLR] = None
ExponentialLR: Optional[reagent.optimizer.uninferrable_schedulers.ExponentialLR] = None
LambdaLR: Optional[reagent.optimizer.uninferrable_schedulers.LambdaLR] = None
LinearLR: Optional[reagent.optimizer.scheduler_union.LinearLR] = None
MultiStepLR: Optional[reagent.optimizer.uninferrable_schedulers.MultiStepLR] = None
MultiplicativeLR: Optional[reagent.optimizer.uninferrable_schedulers.MultiplicativeLR] = None
OneCycleLR: Optional[reagent.optimizer.uninferrable_schedulers.OneCycleLR] = None
SequentialLR: Optional[reagent.optimizer.scheduler_union.SequentialLR] = None
StepLR: Optional[reagent.optimizer.uninferrable_schedulers.StepLR] = None
make_union_instance(instance_class=None)
reagent.optimizer.scheduler_union.get_torch_lr_schedulers() List[str]

reagent.optimizer.soft_update module

class reagent.optimizer.soft_update.SoftUpdate(target_params, source_params, tau=0.1)

Bases: torch.optim.optimizer.Optimizer

classmethod make_optimizer_scheduler(target_params, source_params, tau)
step(closure=None)

Performs a single optimization step.

Parameters

closure (callable, optional) – A closure that reevaluates the model and returns the loss.

reagent.optimizer.uninferrable_optimizers module

This file contains configs that could not be inferred from the default values provided by PyTorch. If PyTorch optimizers and lr_schedulers had type annotations then we could infer everything. default values that cannot be inferred: - tuple - None TODO: remove this file once we can infer everything.

class reagent.optimizer.uninferrable_optimizers.Adam(lr_schedulers: List[reagent.optimizer.scheduler.LearningRateSchedulerConfig] = <factory>, lr: float = 0.001, betas: Tuple[float, float] = (0.9, 0.999), eps: float = 1e-08, weight_decay: float = 0, amsgrad: bool = False, maximize: bool = False)

Bases: reagent.optimizer.optimizer.OptimizerConfig

amsgrad: bool = False
betas: Tuple[float, float] = (0.9, 0.999)
eps: float = 1e-08
lr: float = 0.001
maximize: bool = False
weight_decay: float = 0
class reagent.optimizer.uninferrable_optimizers.AdamW(lr_schedulers: List[reagent.optimizer.scheduler.LearningRateSchedulerConfig] = <factory>, lr: float = 0.001, betas: Tuple[float, float] = (0.9, 0.999), eps: float = 1e-08, weight_decay: float = 0.01, amsgrad: bool = False, maximize: bool = False)

Bases: reagent.optimizer.optimizer.OptimizerConfig

amsgrad: bool = False
betas: Tuple[float, float] = (0.9, 0.999)
eps: float = 1e-08
lr: float = 0.001
maximize: bool = False
weight_decay: float = 0.01
class reagent.optimizer.uninferrable_optimizers.Adamax(lr_schedulers: List[reagent.optimizer.scheduler.LearningRateSchedulerConfig] = <factory>, lr: float = 0.001, betas: Tuple[float, float] = (0.9, 0.999), eps: float = 1e-08, weight_decay: float = 0, maximize: bool = False)

Bases: reagent.optimizer.optimizer.OptimizerConfig

betas: Tuple[float, float] = (0.9, 0.999)
eps: float = 1e-08
lr: float = 0.001
maximize: bool = False
weight_decay: float = 0
class reagent.optimizer.uninferrable_optimizers.LBFGS(lr_schedulers: List[reagent.optimizer.scheduler.LearningRateSchedulerConfig] = <factory>, lr: float = 1, max_iter: int = 20, max_eval: Optional[int] = None, tolerance_grad: float = 1e-07, tolerance_change: float = 1e-09, history_size: int = 100, line_search_fn: Optional[str] = None, maximize: bool = False)

Bases: reagent.optimizer.optimizer.OptimizerConfig

history_size: int = 100
line_search_fn: Optional[str] = None
lr: float = 1
max_eval: Optional[int] = None
max_iter: int = 20
maximize: bool = False
tolerance_change: float = 1e-09
tolerance_grad: float = 1e-07
class reagent.optimizer.uninferrable_optimizers.NAdam(lr_schedulers: List[reagent.optimizer.scheduler.LearningRateSchedulerConfig] = <factory>, lr: float = 0.001, betas: Tuple[float, float] = (0.9, 0.999), eps: float = 1e-08, weight_decay: float = 0, momentum_decay: float = 0.004, maximize: bool = False)

Bases: reagent.optimizer.optimizer.OptimizerConfig

betas: Tuple[float, float] = (0.9, 0.999)
eps: float = 1e-08
lr: float = 0.001
maximize: bool = False
momentum_decay: float = 0.004
weight_decay: float = 0
class reagent.optimizer.uninferrable_optimizers.RAdam(lr_schedulers: List[reagent.optimizer.scheduler.LearningRateSchedulerConfig] = <factory>, lr: float = 0.001, betas: Tuple[float, float] = (0.9, 0.999), eps: float = 1e-08, weight_decay: float = 0, maximize: bool = False)

Bases: reagent.optimizer.optimizer.OptimizerConfig

betas: Tuple[float, float] = (0.9, 0.999)
eps: float = 1e-08
lr: float = 0.001
maximize: bool = False
weight_decay: float = 0
class reagent.optimizer.uninferrable_optimizers.Rprop(lr_schedulers: List[reagent.optimizer.scheduler.LearningRateSchedulerConfig] = <factory>, lr: float = 0.01, etas: Tuple[float, float] = (0.5, 1.2), step_sizes: Tuple[float, float] = (1e-06, 50), maximize: bool = False)

Bases: reagent.optimizer.optimizer.OptimizerConfig

etas: Tuple[float, float] = (0.5, 1.2)
lr: float = 0.01
maximize: bool = False
step_sizes: Tuple[float, float] = (1e-06, 50)
class reagent.optimizer.uninferrable_optimizers.SGD(lr_schedulers: List[reagent.optimizer.scheduler.LearningRateSchedulerConfig] = <factory>, lr: float = 0.001, momentum: float = 0.0, weight_decay: float = 0.0, dampening: float = 0.0, nesterov: bool = False, maximize: bool = False)

Bases: reagent.optimizer.optimizer.OptimizerConfig

dampening: float = 0.0
lr: float = 0.001
maximize: bool = False
momentum: float = 0.0
nesterov: bool = False
weight_decay: float = 0.0
class reagent.optimizer.uninferrable_optimizers.SparseAdam(lr_schedulers: List[reagent.optimizer.scheduler.LearningRateSchedulerConfig] = <factory>, lr: float = 0.001, betas: Tuple[float, float] = (0.9, 0.999), eps: float = 1e-08, maximize: bool = False)

Bases: reagent.optimizer.optimizer.OptimizerConfig

betas: Tuple[float, float] = (0.9, 0.999)
eps: float = 1e-08
lr: float = 0.001
maximize: bool = False

reagent.optimizer.uninferrable_schedulers module

This file contains configs that could not be inferred from the default values provided by PyTorch. If PyTorch optimizers and lr_schedulers had type annotations then we could infer everything. default values that cannot be inferred: - tuple - None - required parameters (no default value)

Sometimes there are no defaults to infer from, so we got to include those here. TODO: remove this file once we can infer everything.

class reagent.optimizer.uninferrable_schedulers.CosineAnnealingLR(T_max: int, eta_min: float = 0, last_epoch: int = - 1, verbose: bool = False)

Bases: reagent.optimizer.scheduler.LearningRateSchedulerConfig

T_max: int
eta_min: float = 0
last_epoch: int = -1
verbose: bool = False
class reagent.optimizer.uninferrable_schedulers.CosineAnnealingWarmRestarts(T_0: int, T_mult: int = 1, eta_min: float = 0, last_epoch: int = - 1, verbose: bool = False)

Bases: reagent.optimizer.scheduler.LearningRateSchedulerConfig

T_0: int
T_mult: int = 1
eta_min: float = 0
last_epoch: int = -1
verbose: bool = False
class reagent.optimizer.uninferrable_schedulers.CyclicLR(base_lr: Union[float, List[float]], max_lr: Union[float, List[float]], step_size_up: int = 2000, step_size_down: Optional[int] = None, mode: str = 'triangular', gamma: float = 1.0, scale_fn: Union[str, Callable[[int], float], NoneType] = None, scale_mode: str = 'cycle', cycle_momentum: bool = True, base_momentum: float = 0.8, max_momentum: float = 0.9, last_epoch: int = - 1, verbose: bool = False)

Bases: reagent.optimizer.uninferrable_schedulers._ScaleFnLambdaMixin, reagent.optimizer.scheduler.LearningRateSchedulerConfig

base_lr: Union[float, List[float]]
base_momentum: float = 0.8
cycle_momentum: bool = True
gamma: float = 1.0
last_epoch: int = -1
max_lr: Union[float, List[float]]
max_momentum: float = 0.9
mode: str = 'triangular'
scale_fn: Optional[Union[str, Callable[[int], float]]] = None
scale_mode: str = 'cycle'
step_size_down: Optional[int] = None
step_size_up: int = 2000
verbose: bool = False
class reagent.optimizer.uninferrable_schedulers.ExponentialLR(gamma: float, last_epoch: int = - 1, verbose: bool = False)

Bases: reagent.optimizer.scheduler.LearningRateSchedulerConfig

gamma: float
last_epoch: int = -1
verbose: bool = False
class reagent.optimizer.uninferrable_schedulers.LambdaLR(lr_lambda: Union[str, Callable[[int], float], List[Callable[[int], float]]], last_epoch: int = - 1, verbose: bool = False)

Bases: reagent.optimizer.uninferrable_schedulers._LRLambdaMixin, reagent.optimizer.scheduler.LearningRateSchedulerConfig

last_epoch: int = -1
lr_lambda: Union[str, Callable[[int], float], List[Callable[[int], float]]]
verbose: bool = False
class reagent.optimizer.uninferrable_schedulers.MultiStepLR(milestones: List[int], gamma: float = 0.1, last_epoch: int = - 1, verbose: bool = False)

Bases: reagent.optimizer.scheduler.LearningRateSchedulerConfig

gamma: float = 0.1
last_epoch: int = -1
milestones: List[int]
verbose: bool = False
class reagent.optimizer.uninferrable_schedulers.MultiplicativeLR(lr_lambda: Union[str, Callable[[int], float], List[Callable[[int], float]]], last_epoch: int = - 1, verbose: bool = False)

Bases: reagent.optimizer.uninferrable_schedulers._LRLambdaMixin, reagent.optimizer.scheduler.LearningRateSchedulerConfig

last_epoch: int = -1
lr_lambda: Union[str, Callable[[int], float], List[Callable[[int], float]]]
verbose: bool = False
class reagent.optimizer.uninferrable_schedulers.OneCycleLR(max_lr: Union[float, List[float]], total_steps: Optional[int] = None, epochs: Optional[int] = None, steps_per_epoch: Optional[int] = None, pct_start: float = 0.3, anneal_strategy: str = 'cos', cycle_momentum: bool = True, base_momentum: float = 0.85, max_momentum: float = 0.95, div_factor: float = 25.0, final_div_factor: float = 10000.0, last_epoch: int = - 1, three_phase: bool = False, verbose: bool = False)

Bases: reagent.optimizer.scheduler.LearningRateSchedulerConfig

anneal_strategy: str = 'cos'
base_momentum: float = 0.85
cycle_momentum: bool = True
div_factor: float = 25.0
epochs: Optional[int] = None
final_div_factor: float = 10000.0
last_epoch: int = -1
max_lr: Union[float, List[float]]
max_momentum: float = 0.95
pct_start: float = 0.3
steps_per_epoch: Optional[int] = None
three_phase: bool = False
total_steps: Optional[int] = None
verbose: bool = False
class reagent.optimizer.uninferrable_schedulers.StepLR(step_size: int, gamma: float = 0.1, last_epoch: int = - 1, verbose: bool = False)

Bases: reagent.optimizer.scheduler.LearningRateSchedulerConfig

gamma: float = 0.1
last_epoch: int = -1
step_size: int
verbose: bool = False

reagent.optimizer.union module

class reagent.optimizer.union.Optimizer__Union(Adam: Optional[reagent.optimizer.uninferrable_optimizers.Adam] = None, NAdam: Optional[reagent.optimizer.uninferrable_optimizers.NAdam] = None, RAdam: Optional[reagent.optimizer.uninferrable_optimizers.RAdam] = None, SGD: Optional[reagent.optimizer.uninferrable_optimizers.SGD] = None, AdamW: Optional[reagent.optimizer.uninferrable_optimizers.AdamW] = None, SparseAdam: Optional[reagent.optimizer.uninferrable_optimizers.SparseAdam] = None, Adamax: Optional[reagent.optimizer.uninferrable_optimizers.Adamax] = None, LBFGS: Optional[reagent.optimizer.uninferrable_optimizers.LBFGS] = None, Rprop: Optional[reagent.optimizer.uninferrable_optimizers.Rprop] = None, ASGD: Optional[abc.ASGD] = None, Adadelta: Optional[abc.Adadelta] = None, Adagrad: Optional[abc.Adagrad] = None, RMSprop: Optional[abc.RMSprop] = None)

Bases: reagent.core.tagged_union.TaggedUnion

ASGD: Optional[abc.ASGD] = None
Adadelta: Optional[abc.Adadelta] = None
Adagrad: Optional[abc.Adagrad] = None
Adam: Optional[reagent.optimizer.uninferrable_optimizers.Adam] = None
AdamW: Optional[reagent.optimizer.uninferrable_optimizers.AdamW] = None
Adamax: Optional[reagent.optimizer.uninferrable_optimizers.Adamax] = None
LBFGS: Optional[reagent.optimizer.uninferrable_optimizers.LBFGS] = None
NAdam: Optional[reagent.optimizer.uninferrable_optimizers.NAdam] = None
RAdam: Optional[reagent.optimizer.uninferrable_optimizers.RAdam] = None
RMSprop: Optional[abc.RMSprop] = None
Rprop: Optional[reagent.optimizer.uninferrable_optimizers.Rprop] = None
SGD: Optional[reagent.optimizer.uninferrable_optimizers.SGD] = None
SparseAdam: Optional[reagent.optimizer.uninferrable_optimizers.SparseAdam] = None
classmethod default(**kwargs)

Return default factory for Optimizer (defaulting to Adam).

make_optimizer_scheduler(params)
make_union_instance(instance_class=None)
reagent.optimizer.union.get_torch_optimizers() List[str]

reagent.optimizer.utils module

reagent.optimizer.utils.is_strict_subclass(a, b)
reagent.optimizer.utils.is_torch_lr_scheduler(cls)
reagent.optimizer.utils.is_torch_optimizer(cls)

Module contents

class reagent.optimizer.Optimizer__Union(Adam: Optional[reagent.optimizer.uninferrable_optimizers.Adam] = None, NAdam: Optional[reagent.optimizer.uninferrable_optimizers.NAdam] = None, RAdam: Optional[reagent.optimizer.uninferrable_optimizers.RAdam] = None, SGD: Optional[reagent.optimizer.uninferrable_optimizers.SGD] = None, AdamW: Optional[reagent.optimizer.uninferrable_optimizers.AdamW] = None, SparseAdam: Optional[reagent.optimizer.uninferrable_optimizers.SparseAdam] = None, Adamax: Optional[reagent.optimizer.uninferrable_optimizers.Adamax] = None, LBFGS: Optional[reagent.optimizer.uninferrable_optimizers.LBFGS] = None, Rprop: Optional[reagent.optimizer.uninferrable_optimizers.Rprop] = None, ASGD: Optional[abc.ASGD] = None, Adadelta: Optional[abc.Adadelta] = None, Adagrad: Optional[abc.Adagrad] = None, RMSprop: Optional[abc.RMSprop] = None)

Bases: reagent.core.tagged_union.TaggedUnion

ASGD: Optional[abc.ASGD] = None
Adadelta: Optional[abc.Adadelta] = None
Adagrad: Optional[abc.Adagrad] = None
Adam: Optional[reagent.optimizer.uninferrable_optimizers.Adam] = None
AdamW: Optional[reagent.optimizer.uninferrable_optimizers.AdamW] = None
Adamax: Optional[reagent.optimizer.uninferrable_optimizers.Adamax] = None
LBFGS: Optional[reagent.optimizer.uninferrable_optimizers.LBFGS] = None
NAdam: Optional[reagent.optimizer.uninferrable_optimizers.NAdam] = None
RAdam: Optional[reagent.optimizer.uninferrable_optimizers.RAdam] = None
RMSprop: Optional[abc.RMSprop] = None
Rprop: Optional[reagent.optimizer.uninferrable_optimizers.Rprop] = None
SGD: Optional[reagent.optimizer.uninferrable_optimizers.SGD] = None
SparseAdam: Optional[reagent.optimizer.uninferrable_optimizers.SparseAdam] = None
classmethod default(**kwargs)

Return default factory for Optimizer (defaulting to Adam).

make_optimizer_scheduler(params)
make_union_instance(instance_class=None)
class reagent.optimizer.SoftUpdate(target_params, source_params, tau=0.1)

Bases: torch.optim.optimizer.Optimizer

classmethod make_optimizer_scheduler(target_params, source_params, tau)
step(closure=None)

Performs a single optimization step.

Parameters

closure (callable, optional) – A closure that reevaluates the model and returns the loss.