reagent.net_builder.synthetic_reward package

Submodules

reagent.net_builder.synthetic_reward.ngram_synthetic_reward module

class reagent.net_builder.synthetic_reward.ngram_synthetic_reward.NGramConvNetSyntheticReward(sizes: List[int] = <factory>, activations: List[str] = <factory>, last_layer_activation: str = 'sigmoid', context_size: int = 3, conv_net_params: reagent.core.parameters.ConvNetParameters = <factory>, use_layer_norm: bool = False)

Bases: reagent.net_builder.synthetic_reward_net_builder.SyntheticRewardNetBuilder

activations: List[str]
build_synthetic_reward_network(state_normalization_data: reagent.core.parameters.NormalizationData, action_normalization_data: Optional[reagent.core.parameters.NormalizationData] = None, discrete_action_names: Optional[List[str]] = None) reagent.models.base.ModelBase
context_size: int = 3
conv_net_params: reagent.core.parameters.ConvNetParameters
last_layer_activation: str = 'sigmoid'
sizes: List[int]
use_layer_norm: bool = False
class reagent.net_builder.synthetic_reward.ngram_synthetic_reward.NGramSyntheticReward(sizes: List[int] = <factory>, activations: List[str] = <factory>, last_layer_activation: str = 'sigmoid', context_size: int = 3, use_layer_norm: bool = False)

Bases: reagent.net_builder.synthetic_reward_net_builder.SyntheticRewardNetBuilder

activations: List[str]
build_synthetic_reward_network(state_normalization_data: reagent.core.parameters.NormalizationData, action_normalization_data: Optional[reagent.core.parameters.NormalizationData] = None, discrete_action_names: Optional[List[str]] = None) reagent.models.base.ModelBase
context_size: int = 3
last_layer_activation: str = 'sigmoid'
sizes: List[int]
use_layer_norm: bool = False

reagent.net_builder.synthetic_reward.sequence_synthetic_reward module

class reagent.net_builder.synthetic_reward.sequence_synthetic_reward.SequenceSyntheticReward(lstm_hidden_size: int = 128, lstm_num_layers: int = 2, lstm_bidirectional: bool = False, last_layer_activation: str = 'sigmoid')

Bases: reagent.net_builder.synthetic_reward_net_builder.SyntheticRewardNetBuilder

build_synthetic_reward_network(state_normalization_data: reagent.core.parameters.NormalizationData, action_normalization_data: Optional[reagent.core.parameters.NormalizationData] = None, discrete_action_names: Optional[List[str]] = None) reagent.models.base.ModelBase
last_layer_activation: str = 'sigmoid'
lstm_bidirectional: bool = False
lstm_hidden_size: int = 128
lstm_num_layers: int = 2

reagent.net_builder.synthetic_reward.single_step_synthetic_reward module

class reagent.net_builder.synthetic_reward.single_step_synthetic_reward.SingleStepSyntheticReward(sizes: List[int] = <factory>, activations: List[str] = <factory>, last_layer_activation: str = 'sigmoid', use_batch_norm: bool = False, use_layer_norm: bool = False)

Bases: reagent.net_builder.synthetic_reward_net_builder.SyntheticRewardNetBuilder

activations: List[str]
build_synthetic_reward_network(state_normalization_data: reagent.core.parameters.NormalizationData, action_normalization_data: Optional[reagent.core.parameters.NormalizationData] = None, discrete_action_names: Optional[List[str]] = None) reagent.models.base.ModelBase
last_layer_activation: str = 'sigmoid'
sizes: List[int]
use_batch_norm: bool = False
use_layer_norm: bool = False

reagent.net_builder.synthetic_reward.transformer_synthetic_reward module

class reagent.net_builder.synthetic_reward.transformer_synthetic_reward.TransformerSyntheticReward(nhead: int = 1, d_model: int = 128, num_encoder_layers: int = 2, dim_feedforward: int = 128, dropout: float = 0.0, activation: str = 'relu', last_layer_activation: str = 'leaky_relu', layer_norm_eps: float = 1e-05, max_len: int = 10)

Bases: reagent.net_builder.synthetic_reward_net_builder.SyntheticRewardNetBuilder

activation: str = 'relu'
build_synthetic_reward_network(state_normalization_data: reagent.core.parameters.NormalizationData, action_normalization_data: Optional[reagent.core.parameters.NormalizationData] = None, discrete_action_names: Optional[List[str]] = None) reagent.models.base.ModelBase
d_model: int = 128
dim_feedforward: int = 128
dropout: float = 0.0
last_layer_activation: str = 'leaky_relu'
layer_norm_eps: float = 1e-05
max_len: int = 10
nhead: int = 1
num_encoder_layers: int = 2

Module contents