reagent.model_managers.policy_gradient package

Submodules

reagent.model_managers.policy_gradient.ppo module

class reagent.model_managers.policy_gradient.ppo.PPO(trainer_param: reagent.training.parameters.PPOTrainerParameters = <factory>, policy_net_builder: reagent.net_builder.unions.DiscreteDQNNetBuilder__Union = <factory>, value_net_builder: Optional[reagent.net_builder.unions.ValueNetBuilder__Union] = None, state_feature_config_provider: reagent.workflow.types.ModelFeatureConfigProvider__Union = <factory>, sampler_temperature: float = 1.0)

Bases: reagent.model_managers.model_manager.ModelManager

property action_names
build_serving_module(trainer_module: reagent.training.reagent_lightning_module.ReAgentLightningModule, normalization_data_map: Dict[str, reagent.core.parameters.NormalizationData]) torch.nn.modules.module.Module

Optionaly, implement this method if you only have one model for serving

build_trainer(normalization_data_map: Dict[str, reagent.core.parameters.NormalizationData], use_gpu: bool, reward_options: Optional[reagent.workflow.types.RewardOptions] = None) reagent.training.ppo_trainer.PPOTrainer

Implement this to build the trainer, given the config

TODO: This function should return ReAgentLightningModule & the dictionary of modules created

create_policy(trainer_module: reagent.training.reagent_lightning_module.ReAgentLightningModule, serving: bool = False, normalization_data_map: Optional[Dict[str, reagent.core.parameters.NormalizationData]] = None)
policy_net_builder: reagent.net_builder.unions.DiscreteDQNNetBuilder__Union
sampler_temperature: float = 1.0
property state_feature_config: reagent.core.types.ModelFeatureConfig
state_feature_config_provider: reagent.workflow.types.ModelFeatureConfigProvider__Union
trainer_param: reagent.training.parameters.PPOTrainerParameters
value_net_builder: Optional[reagent.net_builder.unions.ValueNetBuilder__Union] = None

reagent.model_managers.policy_gradient.reinforce module

class reagent.model_managers.policy_gradient.reinforce.Reinforce(trainer_param: reagent.training.parameters.ReinforceTrainerParameters = <factory>, policy_net_builder: reagent.net_builder.unions.DiscreteDQNNetBuilder__Union = <factory>, value_net_builder: Optional[reagent.net_builder.unions.ValueNetBuilder__Union] = None, state_feature_config_provider: reagent.workflow.types.ModelFeatureConfigProvider__Union = <factory>, sampler_temperature: float = 1.0)

Bases: reagent.model_managers.model_manager.ModelManager

property action_names
build_serving_module(trainer_module: reagent.training.reagent_lightning_module.ReAgentLightningModule, normalization_data_map: Dict[str, reagent.core.parameters.NormalizationData]) torch.nn.modules.module.Module

Optionaly, implement this method if you only have one model for serving

build_trainer(normalization_data_map: Dict[str, reagent.core.parameters.NormalizationData], use_gpu: bool, reward_options: Optional[reagent.workflow.types.RewardOptions] = None) reagent.training.reinforce_trainer.ReinforceTrainer

Implement this to build the trainer, given the config

TODO: This function should return ReAgentLightningModule & the dictionary of modules created

create_policy(trainer_module: reagent.training.reagent_lightning_module.ReAgentLightningModule, serving: bool = False, normalization_data_map: Optional[Dict[str, reagent.core.parameters.NormalizationData]] = None)
policy_net_builder: reagent.net_builder.unions.DiscreteDQNNetBuilder__Union
sampler_temperature: float = 1.0
property state_feature_config: reagent.core.types.ModelFeatureConfig
state_feature_config_provider: reagent.workflow.types.ModelFeatureConfigProvider__Union
trainer_param: reagent.training.parameters.ReinforceTrainerParameters
value_net_builder: Optional[reagent.net_builder.unions.ValueNetBuilder__Union] = None

Module contents

class reagent.model_managers.policy_gradient.PPO(trainer_param: reagent.training.parameters.PPOTrainerParameters = <factory>, policy_net_builder: reagent.net_builder.unions.DiscreteDQNNetBuilder__Union = <factory>, value_net_builder: Optional[reagent.net_builder.unions.ValueNetBuilder__Union] = None, state_feature_config_provider: reagent.workflow.types.ModelFeatureConfigProvider__Union = <factory>, sampler_temperature: float = 1.0)

Bases: reagent.model_managers.model_manager.ModelManager

property action_names
build_serving_module(trainer_module: reagent.training.reagent_lightning_module.ReAgentLightningModule, normalization_data_map: Dict[str, reagent.core.parameters.NormalizationData]) torch.nn.modules.module.Module

Optionaly, implement this method if you only have one model for serving

build_trainer(normalization_data_map: Dict[str, reagent.core.parameters.NormalizationData], use_gpu: bool, reward_options: Optional[reagent.workflow.types.RewardOptions] = None) reagent.training.ppo_trainer.PPOTrainer

Implement this to build the trainer, given the config

TODO: This function should return ReAgentLightningModule & the dictionary of modules created

create_policy(trainer_module: reagent.training.reagent_lightning_module.ReAgentLightningModule, serving: bool = False, normalization_data_map: Optional[Dict[str, reagent.core.parameters.NormalizationData]] = None)
policy_net_builder: reagent.net_builder.unions.DiscreteDQNNetBuilder__Union
sampler_temperature: float = 1.0
property state_feature_config: reagent.core.types.ModelFeatureConfig
state_feature_config_provider: reagent.workflow.types.ModelFeatureConfigProvider__Union
trainer_param: reagent.training.parameters.PPOTrainerParameters
value_net_builder: Optional[reagent.net_builder.unions.ValueNetBuilder__Union] = None
class reagent.model_managers.policy_gradient.Reinforce(trainer_param: reagent.training.parameters.ReinforceTrainerParameters = <factory>, policy_net_builder: reagent.net_builder.unions.DiscreteDQNNetBuilder__Union = <factory>, value_net_builder: Optional[reagent.net_builder.unions.ValueNetBuilder__Union] = None, state_feature_config_provider: reagent.workflow.types.ModelFeatureConfigProvider__Union = <factory>, sampler_temperature: float = 1.0)

Bases: reagent.model_managers.model_manager.ModelManager

property action_names
build_serving_module(trainer_module: reagent.training.reagent_lightning_module.ReAgentLightningModule, normalization_data_map: Dict[str, reagent.core.parameters.NormalizationData]) torch.nn.modules.module.Module

Optionaly, implement this method if you only have one model for serving

build_trainer(normalization_data_map: Dict[str, reagent.core.parameters.NormalizationData], use_gpu: bool, reward_options: Optional[reagent.workflow.types.RewardOptions] = None) reagent.training.reinforce_trainer.ReinforceTrainer

Implement this to build the trainer, given the config

TODO: This function should return ReAgentLightningModule & the dictionary of modules created

create_policy(trainer_module: reagent.training.reagent_lightning_module.ReAgentLightningModule, serving: bool = False, normalization_data_map: Optional[Dict[str, reagent.core.parameters.NormalizationData]] = None)
policy_net_builder: reagent.net_builder.unions.DiscreteDQNNetBuilder__Union
sampler_temperature: float = 1.0
property state_feature_config: reagent.core.types.ModelFeatureConfig
state_feature_config_provider: reagent.workflow.types.ModelFeatureConfigProvider__Union
trainer_param: reagent.training.parameters.ReinforceTrainerParameters
value_net_builder: Optional[reagent.net_builder.unions.ValueNetBuilder__Union] = None