ml.rl package

Subpackages

Submodules

ml.rl.caffe_utils module

class ml.rl.caffe_utils.C2

Bases: object

static NextBlob(prefix: str) → str
static init_net()
static model()
static net()
static set_model(model)
static set_net(net)
static set_net_and_init_net(net, init_net)
class ml.rl.caffe_utils.C2Meta

Bases: type

class ml.rl.caffe_utils.PytorchCaffe2Converter

Bases: object

static buffer_to_caffe2_netdef(buffer)

Creates caffe2 NetDef from buffer object and returns pointer to input and output blobs and the NetDef.

static pytorch_net_to_buffer(pytorch_net, input_dim, model_on_gpu, float_input=True)

Traces a pytorch net and outputs a python buffer object holding net.

static pytorch_net_to_caffe2_netdef(*args, **kwargs)
static remap_blobs(input_blob, output_blob, netdef, prefix)
class ml.rl.caffe_utils.StackedArray(lengths, values)

Bases: object

classmethod from_list_list(d: List[List[float]], blob_prefix: str)
class ml.rl.caffe_utils.StackedAssociativeArray(lengths, keys, values)

Bases: object

classmethod from_dict_list(d: List[Dict[int, float]], blob_prefix: str)
to_python() → List[Dict[Any, Any]]
class ml.rl.caffe_utils.StackedTwoLevelAssociativeArray(outer_lengths: str, outer_keys: str, inner_lengths: str, inner_keys: str, inner_values: str)

Bases: object

to_python() → List[Dict[Any, Dict[Any, Any]]]
ml.rl.caffe_utils.masked_softmax(x, mask, temperature)

Compute softmax values for each sets of scores in x.

ml.rl.caffe_utils.softmax(x, temperature)

Compute softmax values for each sets of scores in x.

ml.rl.debug_on_error module

ml.rl.debug_on_error.start()

ml.rl.json_serialize module

ml.rl.json_serialize.from_json(j_obj: Any, to_type: Type) → Any
ml.rl.json_serialize.isinstance_namedtuple(x)
ml.rl.json_serialize.json_to_object(j: str, to_type: Type) → Any
ml.rl.json_serialize.object_to_json(o: Any) → str
ml.rl.json_serialize.prepare_for_json(o: Any) → Any

ml.rl.parameters module

class ml.rl.parameters.BaselineParameters(dim_feedforward:int, num_stacked_layers:int)

Bases: ml.rl.types.BaseDataClass

class ml.rl.parameters.CEMParameters(plan_horizon_length:int=0, num_world_models:int=0, cem_population_size:int=0, cem_num_iterations:int=0, ensemble_population_size:int=0, num_elites:int=0, mdnrnn:ml.rl.parameters.MDNRNNParameters=MDNRNNParameters(hidden_size=64, num_hidden_layers=2, minibatch_size=16, learning_rate=0.001, num_gaussians=5, train_data_percentage=60.0, validation_data_percentage=20.0, test_data_percentage=20.0, reward_loss_weight=1.0, next_state_loss_weight=1.0, not_terminal_loss_weight=1.0, fit_only_one_next_step=False), rl:ml.rl.parameters.RLParameters=RLParameters(gamma=0.9, epsilon=0.1, target_update_rate=0.001, maxq_learning=True, reward_boost=None, temperature=0.01, softmax_policy=1, use_seq_num_diff_as_time_diff=False, q_network_loss='mse', set_missing_value_to_zero=False, tensorboard_logging_freq=0, predictor_atol_check=0.0, predictor_rtol_check=5e-05, time_diff_unit_length=1.0, multi_steps=None, ratio_different_predictions_tolerance=0), evaluation:ml.rl.parameters.EvaluationParameters=EvaluationParameters(calc_cpe_in_training=True), alpha:float=0.25, epsilon:float=0.001)

Bases: ml.rl.types.BaseDataClass

alpha = 0.25
cem_num_iterations = 0
cem_population_size = 0
ensemble_population_size = 0
epsilon = 0.001
evaluation = EvaluationParameters(calc_cpe_in_training=True)
mdnrnn = MDNRNNParameters(hidden_size=64, num_hidden_layers=2, minibatch_size=16, learning_rate=0.001, num_gaussians=5, train_data_percentage=60.0, validation_data_percentage=20.0, test_data_percentage=20.0, reward_loss_weight=1.0, next_state_loss_weight=1.0, not_terminal_loss_weight=1.0, fit_only_one_next_step=False)
num_elites = 0
num_world_models = 0
plan_horizon_length = 0
rl = RLParameters(gamma=0.9, epsilon=0.1, target_update_rate=0.001, maxq_learning=True, reward_boost=None, temperature=0.01, softmax_policy=1, use_seq_num_diff_as_time_diff=False, q_network_loss='mse', set_missing_value_to_zero=False, tensorboard_logging_freq=0, predictor_atol_check=0.0, predictor_rtol_check=5e-05, time_diff_unit_length=1.0, multi_steps=None, ratio_different_predictions_tolerance=0)
class ml.rl.parameters.CNNParameters(conv_dims:List[int], conv_height_kernels:List[int], conv_width_kernels:List[int], pool_kernels_strides:List[int], pool_types:List[str], num_input_channels:int, input_height:int, input_width:int)

Bases: ml.rl.types.BaseDataClass

class ml.rl.parameters.ContinuousActionModelParameters(rl:ml.rl.parameters.RLParameters, training:ml.rl.parameters.TrainingParameters, rainbow:ml.rl.parameters.RainbowDQNParameters, state_feature_params:Union[ml.rl.parameters.StateFeatureParameters, NoneType]=None, evaluation:ml.rl.parameters.EvaluationParameters=EvaluationParameters(calc_cpe_in_training=True))

Bases: ml.rl.types.BaseDataClass

evaluation = EvaluationParameters(calc_cpe_in_training=True)
state_feature_params = None
class ml.rl.parameters.DDPGModelParameters(rl:ml.rl.parameters.RLParameters, shared_training:ml.rl.parameters.DDPGTrainingParameters, actor_training:ml.rl.parameters.DDPGNetworkParameters, critic_training:ml.rl.parameters.DDPGNetworkParameters, action_rescale_map:Dict[int, List[float]], state_feature_params:Union[ml.rl.parameters.StateFeatureParameters, NoneType]=None, evaluation:ml.rl.parameters.EvaluationParameters=EvaluationParameters(calc_cpe_in_training=True))

Bases: ml.rl.types.BaseDataClass

evaluation = EvaluationParameters(calc_cpe_in_training=True)
state_feature_params = None
class ml.rl.parameters.DDPGNetworkParameters(layers:List[int]=<factory>, activations:List[str]=<factory>, l2_decay:float=0.01, learning_rate:float=0.001)

Bases: ml.rl.types.BaseDataClass

l2_decay = 0.01
learning_rate = 0.001
class ml.rl.parameters.DDPGTrainingParameters(minibatch_size:int=2048, final_layer_init:float=0.003, optimizer:str='ADAM', warm_start_model_path:Union[str, NoneType]=None, minibatches_per_step:int=1)

Bases: ml.rl.types.BaseDataClass

final_layer_init = 0.003
minibatch_size = 2048
minibatches_per_step = 1
optimizer = 'ADAM'
warm_start_model_path = None
class ml.rl.parameters.DiscreteActionModelParameters(actions:List[str]=<factory>, rl:ml.rl.parameters.RLParameters=RLParameters(gamma=0.9, epsilon=0.1, target_update_rate=0.001, maxq_learning=True, reward_boost=None, temperature=0.01, softmax_policy=1, use_seq_num_diff_as_time_diff=False, q_network_loss='mse', set_missing_value_to_zero=False, tensorboard_logging_freq=0, predictor_atol_check=0.0, predictor_rtol_check=5e-05, time_diff_unit_length=1.0, multi_steps=None, ratio_different_predictions_tolerance=0), training:ml.rl.parameters.TrainingParameters=TrainingParameters(minibatch_size=4096, learning_rate=0.001, optimizer='ADAM', layers=[-1, 256, 128, 1], activations=['relu', 'relu', 'linear'], lr_policy='fixed', lr_decay=0.999, dropout_ratio=0.0, warm_start_model_path=None, cnn_parameters=None, factorization_parameters=None, l2_decay=0.01, weight_init_min_std=0.0, use_batch_norm=False, clip_grad_norm=None, minibatches_per_step=1), rainbow:ml.rl.parameters.RainbowDQNParameters=RainbowDQNParameters(double_q_learning=True, dueling_architecture=True, bcq=False, bcq_drop_threshold=0.1, categorical=False, num_atoms=51, qmin=-100, qmax=200, c51_l2_decay=0, quantile=False), state_feature_params:Union[ml.rl.parameters.StateFeatureParameters, NoneType]=None, target_action_distribution:Union[List[float], NoneType]=None, evaluation:ml.rl.parameters.EvaluationParameters=EvaluationParameters(calc_cpe_in_training=True))

Bases: ml.rl.types.BaseDataClass

evaluation = EvaluationParameters(calc_cpe_in_training=True)
rainbow = RainbowDQNParameters(double_q_learning=True, dueling_architecture=True, bcq=False, bcq_drop_threshold=0.1, categorical=False, num_atoms=51, qmin=-100, qmax=200, c51_l2_decay=0, quantile=False)
rl = RLParameters(gamma=0.9, epsilon=0.1, target_update_rate=0.001, maxq_learning=True, reward_boost=None, temperature=0.01, softmax_policy=1, use_seq_num_diff_as_time_diff=False, q_network_loss='mse', set_missing_value_to_zero=False, tensorboard_logging_freq=0, predictor_atol_check=0.0, predictor_rtol_check=5e-05, time_diff_unit_length=1.0, multi_steps=None, ratio_different_predictions_tolerance=0)
state_feature_params = None
target_action_distribution = None
training = TrainingParameters(minibatch_size=4096, learning_rate=0.001, optimizer='ADAM', layers=[-1, 256, 128, 1], activations=['relu', 'relu', 'linear'], lr_policy='fixed', lr_decay=0.999, dropout_ratio=0.0, warm_start_model_path=None, cnn_parameters=None, factorization_parameters=None, l2_decay=0.01, weight_init_min_std=0.0, use_batch_norm=False, clip_grad_norm=None, minibatches_per_step=1)
class ml.rl.parameters.EvaluationParameters(calc_cpe_in_training:bool=True)

Bases: ml.rl.types.BaseDataClass

calc_cpe_in_training = True
class ml.rl.parameters.EvolutionParameters(population_size:int=1000, mutation_power:float=0.1, learning_rate:float=0.01)

Bases: ml.rl.types.BaseDataClass

learning_rate = 0.01
mutation_power = 0.1
population_size = 1000
class ml.rl.parameters.FactorizationParameters(state:ml.rl.parameters.FeedForwardParameters, action:ml.rl.parameters.FeedForwardParameters)

Bases: ml.rl.types.BaseDataClass

class ml.rl.parameters.FeedForwardParameters(layers:List[int]=<factory>, activations:List[str]=<factory>, use_layer_norm:Union[bool, NoneType]=None)

Bases: ml.rl.types.BaseDataClass

use_layer_norm = None
class ml.rl.parameters.KNNDQNModelParameters(rl:ml.rl.parameters.RLParameters, shared_training:ml.rl.parameters.DDPGTrainingParameters, actor_training:ml.rl.parameters.DDPGNetworkParameters, critic_training:ml.rl.parameters.DDPGNetworkParameters, num_actions:int, action_dim:int, k:int, evaluation:ml.rl.parameters.EvaluationParameters=EvaluationParameters(calc_cpe_in_training=True))

Bases: ml.rl.types.BaseDataClass

evaluation = EvaluationParameters(calc_cpe_in_training=True)
class ml.rl.parameters.MDNRNNParameters(hidden_size:int=64, num_hidden_layers:int=2, minibatch_size:int=16, learning_rate:float=0.001, num_gaussians:int=5, train_data_percentage:float=60.0, validation_data_percentage:float=20.0, test_data_percentage:float=20.0, reward_loss_weight:float=1.0, next_state_loss_weight:float=1.0, not_terminal_loss_weight:float=1.0, fit_only_one_next_step:bool=False)

Bases: ml.rl.types.BaseDataClass

fit_only_one_next_step = False
hidden_size = 64
learning_rate = 0.001
minibatch_size = 16
next_state_loss_weight = 1.0
not_terminal_loss_weight = 1.0
num_gaussians = 5
num_hidden_layers = 2
reward_loss_weight = 1.0
test_data_percentage = 20.0
train_data_percentage = 60.0
validation_data_percentage = 20.0
class ml.rl.parameters.NormalizationParameters(feature_type:str, boxcox_lambda:Union[float, NoneType]=None, boxcox_shift:Union[float, NoneType]=None, mean:Union[float, NoneType]=None, stddev:Union[float, NoneType]=None, possible_values:Union[List[int], NoneType]=None, quantiles:Union[List[float], NoneType]=None, min_value:Union[float, NoneType]=None, max_value:Union[float, NoneType]=None)

Bases: ml.rl.types.BaseDataClass

boxcox_lambda = None
boxcox_shift = None
max_value = None
mean = None
min_value = None
possible_values = None
quantiles = None
stddev = None
class ml.rl.parameters.OpenAiGymParameters(env:str, run_details:ml.rl.parameters.OpenAiRunDetails, model_type:str='', use_gpu:bool=False, max_replay_memory_size:int=0, rl:Union[ml.rl.parameters.RLParameters, NoneType]=None, rainbow:Union[ml.rl.parameters.RainbowDQNParameters, NoneType]=None, training:Union[ml.rl.parameters.TrainingParameters, NoneType]=None, td3_training:Union[ml.rl.parameters.TD3TrainingParameters, NoneType]=None, sac_training:Union[ml.rl.parameters.SACTrainingParameters, NoneType]=None, sac_value_training:Union[ml.rl.parameters.FeedForwardParameters, NoneType]=None, critic_training:Union[ml.rl.parameters.FeedForwardParameters, NoneType]=None, actor_training:Union[ml.rl.parameters.FeedForwardParameters, NoneType]=None, cem:Union[ml.rl.parameters.CEMParameters, NoneType]=None, mdnrnn:Union[ml.rl.parameters.MDNRNNParameters, NoneType]=None, evaluation:ml.rl.parameters.EvaluationParameters=EvaluationParameters(calc_cpe_in_training=True))

Bases: ml.rl.types.BaseDataClass

actor_training = None
cem = None
critic_training = None
evaluation = EvaluationParameters(calc_cpe_in_training=True)
max_replay_memory_size = 0
mdnrnn = None
model_type = ''
rainbow = None
rl = None
sac_training = None
sac_value_training = None
td3_training = None
training = None
use_gpu = False
class ml.rl.parameters.OpenAiRunDetails(solved_reward_threshold:Union[int, NoneType]=None, max_episodes_to_run_after_solved:Union[int, NoneType]=None, stop_training_after_solved:bool=False, num_episodes:int=301, max_steps:Union[int, NoneType]=None, train_every_ts:int=100, train_after_ts:int=10, test_every_ts:int=100, test_after_ts:int=10, num_train_batches:int=1, avg_over_num_episodes:int=100, render:bool=False, epsilon_decay:Union[float, NoneType]=None, minimum_epsilon:Union[float, NoneType]=0.0, offline_train_epochs:Union[int, NoneType]=None, offline_score_bar:Union[float, NoneType]=None, offline_num_batches_per_epoch:Union[int, NoneType]=None, seq_len:int=5, num_train_episodes:int=4000, num_test_episodes:int=100, num_state_embed_episodes:int=1800, train_epochs:int=6, early_stopping_patience:int=3)

Bases: ml.rl.types.BaseDataClass

avg_over_num_episodes = 100
early_stopping_patience = 3
epsilon_decay = None
max_episodes_to_run_after_solved = None
max_steps = None
minimum_epsilon = 0.0
num_episodes = 301
num_state_embed_episodes = 1800
num_test_episodes = 100
num_train_batches = 1
num_train_episodes = 4000
offline_num_batches_per_epoch = None
offline_score_bar = None
offline_train_epochs = None
render = False
seq_len = 5
solved_reward_threshold = None
stop_training_after_solved = False
test_after_ts = 10
test_every_ts = 100
train_after_ts = 10
train_epochs = 6
train_every_ts = 100
class ml.rl.parameters.OptimizerParameters(optimizer:str='ADAM', learning_rate:float=0.001, l2_decay:float=0.01)

Bases: ml.rl.types.BaseDataClass

l2_decay = 0.01
learning_rate = 0.001
optimizer = 'ADAM'
class ml.rl.parameters.RLParameters(gamma:float=0.9, epsilon:float=0.1, target_update_rate:float=0.001, maxq_learning:bool=True, reward_boost:Union[Dict[str, float], NoneType]=None, temperature:float=0.01, softmax_policy:int=1, use_seq_num_diff_as_time_diff:bool=False, q_network_loss:str='mse', set_missing_value_to_zero:bool=False, tensorboard_logging_freq:int=0, predictor_atol_check:float=0.0, predictor_rtol_check:float=5e-05, time_diff_unit_length:float=1.0, multi_steps:Union[int, NoneType]=None, ratio_different_predictions_tolerance:float=0)

Bases: ml.rl.types.BaseDataClass

epsilon = 0.1
gamma = 0.9
maxq_learning = True
multi_steps = None
predictor_atol_check = 0.0
predictor_rtol_check = 5e-05
q_network_loss = 'mse'
ratio_different_predictions_tolerance = 0
reward_boost = None
set_missing_value_to_zero = False
softmax_policy = 1
target_update_rate = 0.001
temperature = 0.01
tensorboard_logging_freq = 0
time_diff_unit_length = 1.0
use_seq_num_diff_as_time_diff = False
class ml.rl.parameters.RainbowDQNParameters(double_q_learning:bool=True, dueling_architecture:bool=True, bcq:bool=False, bcq_drop_threshold:float=0.1, categorical:bool=False, num_atoms:int=51, qmin:float=-100, qmax:float=200, c51_l2_decay:float=0, quantile:bool=False)

Bases: ml.rl.types.BaseDataClass

bcq = False
bcq_drop_threshold = 0.1
c51_l2_decay = 0
categorical = False
double_q_learning = True
dueling_architecture = True
num_atoms = 51
qmax = 200
qmin = -100
quantile = False
class ml.rl.parameters.RankingParameters(minibatch_size:int, max_src_seq_len:int, max_tgt_seq_len:int, greedy_serving:bool, evaluation:ml.rl.parameters.EvaluationParameters=EvaluationParameters(calc_cpe_in_training=True))

Bases: ml.rl.types.BaseDataClass

evaluation = EvaluationParameters(calc_cpe_in_training=True)
class ml.rl.parameters.SACModelParameters(rl:ml.rl.parameters.RLParameters=RLParameters(gamma=0.9, epsilon=0.1, target_update_rate=0.001, maxq_learning=True, reward_boost=None, temperature=0.01, softmax_policy=1, use_seq_num_diff_as_time_diff=False, q_network_loss='mse', set_missing_value_to_zero=False, tensorboard_logging_freq=0, predictor_atol_check=0.0, predictor_rtol_check=5e-05, time_diff_unit_length=1.0, multi_steps=None, ratio_different_predictions_tolerance=0), training:ml.rl.parameters.SACTrainingParameters=SACTrainingParameters(minibatch_size=1024, q_network_optimizer=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), value_network_optimizer=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), actor_network_optimizer=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), use_2_q_functions=True, entropy_temperature=None, warm_start_model_path=None, logged_action_uniform_prior=True, minibatches_per_step=1, use_value_network=True, target_entropy=-1.0, alpha_optimizer=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), action_embedding_kld_weight=None, action_embedding_mean=None, action_embedding_variance=None), q_network:ml.rl.parameters.FeedForwardParameters=FeedForwardParameters(layers=[256, 128], activations=['relu', 'relu'], use_layer_norm=None), value_network:Union[ml.rl.parameters.FeedForwardParameters, NoneType]=None, actor_network:ml.rl.parameters.FeedForwardParameters=FeedForwardParameters(layers=[256, 128], activations=['relu', 'relu'], use_layer_norm=None), state_feature_params:Union[ml.rl.parameters.StateFeatureParameters, NoneType]=None, evaluation:ml.rl.parameters.EvaluationParameters=EvaluationParameters(calc_cpe_in_training=True), constrain_action_sum:bool=False, do_not_preprocess_action:Union[bool, NoneType]=None)

Bases: ml.rl.types.BaseDataClass

actor_network = FeedForwardParameters(layers=[256, 128], activations=['relu', 'relu'], use_layer_norm=None)
constrain_action_sum = False
do_not_preprocess_action = None
evaluation = EvaluationParameters(calc_cpe_in_training=True)
q_network = FeedForwardParameters(layers=[256, 128], activations=['relu', 'relu'], use_layer_norm=None)
rl = RLParameters(gamma=0.9, epsilon=0.1, target_update_rate=0.001, maxq_learning=True, reward_boost=None, temperature=0.01, softmax_policy=1, use_seq_num_diff_as_time_diff=False, q_network_loss='mse', set_missing_value_to_zero=False, tensorboard_logging_freq=0, predictor_atol_check=0.0, predictor_rtol_check=5e-05, time_diff_unit_length=1.0, multi_steps=None, ratio_different_predictions_tolerance=0)
state_feature_params = None
training = SACTrainingParameters(minibatch_size=1024, q_network_optimizer=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), value_network_optimizer=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), actor_network_optimizer=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), use_2_q_functions=True, entropy_temperature=None, warm_start_model_path=None, logged_action_uniform_prior=True, minibatches_per_step=1, use_value_network=True, target_entropy=-1.0, alpha_optimizer=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), action_embedding_kld_weight=None, action_embedding_mean=None, action_embedding_variance=None)
value_network = None
class ml.rl.parameters.SACTrainingParameters(minibatch_size:int=1024, q_network_optimizer:ml.rl.parameters.OptimizerParameters=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), value_network_optimizer:ml.rl.parameters.OptimizerParameters=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), actor_network_optimizer:ml.rl.parameters.OptimizerParameters=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), use_2_q_functions:bool=True, entropy_temperature:Union[float, NoneType]=None, warm_start_model_path:Union[str, NoneType]=None, logged_action_uniform_prior:bool=True, minibatches_per_step:int=1, use_value_network:bool=True, target_entropy:float=-1.0, alpha_optimizer:ml.rl.parameters.OptimizerParameters=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), action_embedding_kld_weight:Union[float, NoneType]=None, action_embedding_mean:Union[List[float], NoneType]=None, action_embedding_variance:Union[List[float], NoneType]=None)

Bases: ml.rl.types.BaseDataClass

action_embedding_kld_weight = None
action_embedding_mean = None
action_embedding_variance = None
actor_network_optimizer = OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01)
alpha_optimizer = OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01)
entropy_temperature = None
logged_action_uniform_prior = True
minibatch_size = 1024
minibatches_per_step = 1
q_network_optimizer = OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01)
target_entropy = -1.0
use_2_q_functions = True
use_value_network = True
value_network_optimizer = OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01)
warm_start_model_path = None
class ml.rl.parameters.Seq2SlateTransformerParameters(transformer:ml.rl.parameters.TransformerParameters, baseline:ml.rl.parameters.BaselineParameters, on_policy:bool)

Bases: ml.rl.types.BaseDataClass

class ml.rl.parameters.StateFeatureParameters(state_feature_names_override:List[str]=<factory>, state_feature_hashes_override:List[int]=<factory>)

Bases: ml.rl.types.BaseDataClass

class ml.rl.parameters.TD3ModelParameters(rl:ml.rl.parameters.RLParameters, training:ml.rl.parameters.TD3TrainingParameters, q_network:ml.rl.parameters.FeedForwardParameters, actor_network:ml.rl.parameters.FeedForwardParameters, state_feature_params:Union[ml.rl.parameters.StateFeatureParameters, NoneType]=None, evaluation:ml.rl.parameters.EvaluationParameters=EvaluationParameters(calc_cpe_in_training=True))

Bases: ml.rl.types.BaseDataClass

evaluation = EvaluationParameters(calc_cpe_in_training=True)
state_feature_params = None
class ml.rl.parameters.TD3TrainingParameters(minibatch_size:int=64, q_network_optimizer:ml.rl.parameters.OptimizerParameters=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), actor_network_optimizer:ml.rl.parameters.OptimizerParameters=OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01), use_2_q_functions:bool=True, exploration_noise:float=0.2, initial_exploration_ts:int=1000, target_policy_smoothing:float=0.2, noise_clip:float=0.5, delayed_policy_update:int=2, warm_start_model_path:Union[str, NoneType]=None, minibatches_per_step:int=1)

Bases: ml.rl.types.BaseDataClass

actor_network_optimizer = OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01)
delayed_policy_update = 2
exploration_noise = 0.2
initial_exploration_ts = 1000
minibatch_size = 64
minibatches_per_step = 1
noise_clip = 0.5
q_network_optimizer = OptimizerParameters(optimizer='ADAM', learning_rate=0.001, l2_decay=0.01)
target_policy_smoothing = 0.2
use_2_q_functions = True
warm_start_model_path = None
class ml.rl.parameters.TrainingParameters(minibatch_size:int=4096, learning_rate:float=0.001, optimizer:str='ADAM', layers:List[int]=<factory>, activations:List[str]=<factory>, lr_policy:str='fixed', lr_decay:float=0.999, dropout_ratio:float=0.0, warm_start_model_path:Union[str, NoneType]=None, cnn_parameters:Union[ml.rl.parameters.CNNParameters, NoneType]=None, factorization_parameters:Union[ml.rl.parameters.FactorizationParameters, NoneType]=None, l2_decay:float=0.01, weight_init_min_std:float=0.0, use_batch_norm:bool=False, clip_grad_norm:Union[float, NoneType]=None, minibatches_per_step:int=1)

Bases: ml.rl.types.BaseDataClass

clip_grad_norm = None
cnn_parameters = None
dropout_ratio = 0.0
factorization_parameters = None
l2_decay = 0.01
learning_rate = 0.001
lr_decay = 0.999
lr_policy = 'fixed'
minibatch_size = 4096
minibatches_per_step = 1
optimizer = 'ADAM'
use_batch_norm = False
warm_start_model_path = None
weight_init_min_std = 0.0
class ml.rl.parameters.TransformerParameters(num_heads:int, dim_model:int, dim_feedforward:int, num_stacked_layers:int)

Bases: ml.rl.types.BaseDataClass

ml.rl.tensorboardX module

Context library to allow dropping tensorboardX anywhere in the codebase. If there is no SummaryWriter in the context, function calls will be no-op.

Usage:

writer = SummaryWriter()

with summary_writer_context(writer):

some_func()

def some_func():

SummaryWriterContext.add_scalar(“foo”, tensor)

class ml.rl.tensorboardX.SummaryWriterContext

Bases: object

classmethod add_custom_scalars(writer)

Call this once you are satisfied setting up custom scalar

classmethod add_custom_scalars_multilinechart(tags, category=None, title=None)
classmethod add_histogram(key, val, *args, **kwargs)
classmethod increase_global_step()
classmethod pop()
classmethod push(writer)
class ml.rl.tensorboardX.SummaryWriterContextMeta

Bases: type

ml.rl.tensorboardX.summary_writer_context(writer)

ml.rl.torch_utils module

ml.rl.torch_utils.export_module_to_buffer(module) → _io.BytesIO
ml.rl.torch_utils.rescale_torch_tensor(tensor: torch.Tensor, new_min: torch.Tensor, new_max: torch.Tensor, prev_min: torch.Tensor, prev_max: torch.Tensor)

Rescale column values in N X M torch tensor to be in new range. Each column m in input tensor will be rescaled from range [prev_min[m], prev_max[m]] to [new_min[m], new_max[m]]

ml.rl.torch_utils.stack(mems)

Stack a list of tensors Could use torch.stack here but torch.stack is much slower than torch.cat + view Submitted an issue for investigation: https://github.com/pytorch/pytorch/issues/22462

FIXME: Remove this function after the issue above is resolved

ml.rl.types module

class ml.rl.types.ActorOutput(action:torch.Tensor, log_prob:Union[torch.Tensor, NoneType]=None)

Bases: ml.rl.types.BaseDataClass

log_prob = None
class ml.rl.types.AllActionQValues(q_values:torch.Tensor)

Bases: ml.rl.types.BaseDataClass

class ml.rl.types.BaseDataClass

Bases: object

cuda()
pin_memory()
class ml.rl.types.CommonInput(reward: torch.Tensor, time_diff: torch.Tensor, step: Optional[torch.Tensor], not_terminal: torch.Tensor)

Bases: ml.rl.types.BaseDataClass

Base class for all inputs, both raw and preprocessed

class ml.rl.types.DqnPolicyActionSet(greedy:int, softmax:Union[int, NoneType]=None, greedy_act_name:Union[str, NoneType]=None, softmax_act_name:Union[str, NoneType]=None)

Bases: ml.rl.types.BaseDataClass

greedy_act_name = None
softmax = None
softmax_act_name = None
class ml.rl.types.ExtraData(mdp_id:Union[numpy.ndarray, NoneType]=None, sequence_number:Union[torch.Tensor, NoneType]=None, action_probability:Union[torch.Tensor, NoneType]=None, max_num_actions:Union[int, NoneType]=None, metrics:Union[torch.Tensor, NoneType]=None)

Bases: ml.rl.types.BaseDataClass

action_probability = None
max_num_actions = None
mdp_id = None
metrics = None
sequence_number = None
class ml.rl.types.FeatureVector(float_features:ml.rl.types.ValuePresence, sequence_features:Union[ml.rl.types.SequenceFeatureBase, NoneType]=None, time_since_first:Union[torch.Tensor, NoneType]=None)

Bases: ml.rl.types.BaseDataClass

sequence_features = None
time_since_first = None
class ml.rl.types.FloatFeatureInfo(name:str, feature_id:int)

Bases: ml.rl.types.BaseDataClass

class ml.rl.types.IdFeatureBase

Bases: ml.rl.types.BaseDataClass

User should subclass this class and define each ID feature as a field w/ torch.Tensor as the type of the field.

classmethod get_feature_config() → Dict[str, ml.rl.types.IdFeatureConfig]

Returns mapping from feature name, which must be a field in this dataclass, to feature config.

class ml.rl.types.IdFeatureConfig(feature_id: int, id_mapping_name: str)

Bases: ml.rl.types.BaseDataClass

This describes how to map raw features to model features

class ml.rl.types.IdMapping(ids:List[int])

Bases: ml.rl.types.BaseDataClass

class ml.rl.types.MemoryNetworkOutput(mus:torch.Tensor, sigmas:torch.Tensor, logpi:torch.Tensor, reward:torch.Tensor, not_terminal:torch.Tensor, last_step_lstm_hidden:torch.Tensor, last_step_lstm_cell:torch.Tensor, all_steps_lstm_hidden:torch.Tensor)

Bases: ml.rl.types.BaseDataClass

class ml.rl.types.ModelFeatureConfig(float_feature_infos:List[ml.rl.types.FloatFeatureInfo], id_mapping_config:Dict[str, ml.rl.types.IdMapping], sequence_features_type:Union[Type[ml.rl.types.SequenceFeatures], NoneType])

Bases: ml.rl.types.BaseDataClass

class ml.rl.types.PlanningPolicyOutput(next_best_continuous_action:Union[torch.Tensor, NoneType]=None, next_best_discrete_action_one_hot:Union[torch.Tensor, NoneType]=None, next_best_discrete_action_idx:Union[int, NoneType]=None)

Bases: ml.rl.types.BaseDataClass

next_best_continuous_action = None
next_best_discrete_action_idx = None
next_best_discrete_action_one_hot = None
class ml.rl.types.PreprocessedBaseInput(reward:torch.Tensor, time_diff:torch.Tensor, step:Union[torch.Tensor, NoneType], not_terminal:torch.Tensor, state:ml.rl.types.PreprocessedFeatureVector, next_state:ml.rl.types.PreprocessedFeatureVector)

Bases: ml.rl.types.CommonInput

class ml.rl.types.PreprocessedDiscreteDqnInput(reward:torch.Tensor, time_diff:torch.Tensor, step:Union[torch.Tensor, NoneType], not_terminal:torch.Tensor, state:ml.rl.types.PreprocessedFeatureVector, next_state:ml.rl.types.PreprocessedFeatureVector, action:torch.Tensor, next_action:torch.Tensor, possible_actions_mask:torch.Tensor, possible_next_actions_mask:torch.Tensor)

Bases: ml.rl.types.PreprocessedBaseInput

class ml.rl.types.PreprocessedFeatureVector(float_features:torch.Tensor, time_since_first:Union[torch.Tensor, NoneType]=None)

Bases: ml.rl.types.BaseDataClass

time_since_first = None
class ml.rl.types.PreprocessedMemoryNetworkInput(reward:torch.Tensor, time_diff:torch.Tensor, step:Union[torch.Tensor, NoneType], not_terminal:torch.Tensor, state:ml.rl.types.PreprocessedFeatureVector, next_state:ml.rl.types.PreprocessedFeatureVector, action:Union[torch.Tensor, torch.Tensor])

Bases: ml.rl.types.PreprocessedBaseInput

class ml.rl.types.PreprocessedParametricDqnInput(reward:torch.Tensor, time_diff:torch.Tensor, step:Union[torch.Tensor, NoneType], not_terminal:torch.Tensor, state:ml.rl.types.PreprocessedFeatureVector, next_state:ml.rl.types.PreprocessedFeatureVector, action:ml.rl.types.PreprocessedFeatureVector, next_action:ml.rl.types.PreprocessedFeatureVector, possible_actions:ml.rl.types.PreprocessedFeatureVector, possible_actions_mask:torch.Tensor, possible_next_actions:ml.rl.types.PreprocessedFeatureVector, possible_next_actions_mask:torch.Tensor, tiled_next_state:ml.rl.types.PreprocessedFeatureVector)

Bases: ml.rl.types.PreprocessedBaseInput

class ml.rl.types.PreprocessedPolicyNetworkInput(reward:torch.Tensor, time_diff:torch.Tensor, step:Union[torch.Tensor, NoneType], not_terminal:torch.Tensor, state:ml.rl.types.PreprocessedFeatureVector, next_state:ml.rl.types.PreprocessedFeatureVector, action:ml.rl.types.PreprocessedFeatureVector, next_action:ml.rl.types.PreprocessedFeatureVector)

Bases: ml.rl.types.PreprocessedBaseInput

class ml.rl.types.PreprocessedRankingInput(state:ml.rl.types.PreprocessedFeatureVector, src_seq:ml.rl.types.PreprocessedFeatureVector, src_src_mask:torch.Tensor, tgt_seq:Union[ml.rl.types.PreprocessedFeatureVector, NoneType]=None, tgt_tgt_mask:Union[torch.Tensor, NoneType]=None, slate_reward:Union[torch.Tensor, NoneType]=None, src_in_idx:Union[torch.Tensor, NoneType]=None, tgt_in_idx:Union[torch.Tensor, NoneType]=None, tgt_out_idx:Union[torch.Tensor, NoneType]=None, tgt_out_probs:Union[torch.Tensor, NoneType]=None, optim_tgt_out_idx:Union[torch.Tensor, NoneType]=None)

Bases: ml.rl.types.BaseDataClass

classmethod from_tensors(state: torch.Tensor, src_seq: torch.Tensor, src_src_mask: torch.Tensor, tgt_seq: Optional[torch.Tensor] = None, tgt_tgt_mask: Optional[torch.Tensor] = None, slate_reward: Optional[torch.Tensor] = None, src_in_idx: Optional[torch.Tensor] = None, tgt_in_idx: Optional[torch.Tensor] = None, tgt_out_idx: Optional[torch.Tensor] = None, tgt_out_probs: Optional[torch.Tensor] = None, optim_tgt_out_idx: Optional[torch.Tensor] = None)
optim_tgt_out_idx = None
slate_reward = None
src_in_idx = None
tgt_in_idx = None
tgt_out_idx = None
tgt_out_probs = None
tgt_seq = None
tgt_tgt_mask = None
class ml.rl.types.PreprocessedState(state)

Bases: ml.rl.types.BaseDataClass

This class makes it easier to plug modules into predictor

classmethod from_tensor(state: torch.Tensor)
class ml.rl.types.PreprocessedStateAction(state, action)

Bases: ml.rl.types.BaseDataClass

classmethod from_tensors(state: torch.Tensor, action: torch.Tensor)
class ml.rl.types.PreprocessedTrainingBatch(training_input:Union[ml.rl.types.PreprocessedBaseInput, ml.rl.types.PreprocessedRankingInput], extras:Any)

Bases: ml.rl.types.BaseDataClass

batch_size()
class ml.rl.types.RankingOutput(ranked_tgt_out_idx:Union[torch.Tensor, NoneType]=None, ranked_tgt_out_probs:Union[torch.Tensor, NoneType]=None, log_probs:Union[torch.Tensor, NoneType]=None)

Bases: ml.rl.types.BaseDataClass

log_probs = None
ranked_tgt_out_idx = None
ranked_tgt_out_probs = None
class ml.rl.types.RawBaseInput(reward:torch.Tensor, time_diff:torch.Tensor, step:Union[torch.Tensor, NoneType], not_terminal:torch.Tensor, state:ml.rl.types.FeatureVector, next_state:ml.rl.types.FeatureVector)

Bases: ml.rl.types.CommonInput

class ml.rl.types.RawDiscreteDqnInput(reward:torch.Tensor, time_diff:torch.Tensor, step:Union[torch.Tensor, NoneType], not_terminal:torch.Tensor, state:ml.rl.types.FeatureVector, next_state:ml.rl.types.FeatureVector, action:torch.Tensor, next_action:torch.Tensor, possible_actions_mask:torch.Tensor, possible_next_actions_mask:torch.Tensor)

Bases: ml.rl.types.RawBaseInput

preprocess(state: ml.rl.types.PreprocessedFeatureVector, next_state: ml.rl.types.PreprocessedFeatureVector)
preprocess_tensors(state: torch.Tensor, next_state: torch.Tensor)
class ml.rl.types.RawMemoryNetworkInput(reward:torch.Tensor, time_diff:torch.Tensor, step:Union[torch.Tensor, NoneType], not_terminal:torch.Tensor, state:ml.rl.types.FeatureVector, next_state:ml.rl.types.FeatureVector, action:Union[ml.rl.types.FeatureVector, torch.Tensor])

Bases: ml.rl.types.RawBaseInput

preprocess(state: ml.rl.types.PreprocessedFeatureVector, next_state: ml.rl.types.PreprocessedFeatureVector, action: Optional[torch.Tensor] = None)
preprocess_tensors(state: torch.Tensor, next_state: torch.Tensor, action: Optional[torch.Tensor] = None)
class ml.rl.types.RawParametricDqnInput(reward:torch.Tensor, time_diff:torch.Tensor, step:Union[torch.Tensor, NoneType], not_terminal:torch.Tensor, state:ml.rl.types.FeatureVector, next_state:ml.rl.types.FeatureVector, action:ml.rl.types.FeatureVector, next_action:ml.rl.types.FeatureVector, possible_actions:ml.rl.types.FeatureVector, possible_actions_mask:torch.Tensor, possible_next_actions:ml.rl.types.FeatureVector, possible_next_actions_mask:torch.Tensor, tiled_next_state:ml.rl.types.FeatureVector)

Bases: ml.rl.types.RawBaseInput

preprocess(state: ml.rl.types.PreprocessedFeatureVector, next_state: ml.rl.types.PreprocessedFeatureVector, action: ml.rl.types.PreprocessedFeatureVector, next_action: ml.rl.types.PreprocessedFeatureVector, possible_actions: ml.rl.types.PreprocessedFeatureVector, possible_next_actions: ml.rl.types.PreprocessedFeatureVector, tiled_next_state: ml.rl.types.PreprocessedFeatureVector)
preprocess_tensors(state: torch.Tensor, next_state: torch.Tensor, action: torch.Tensor, next_action: torch.Tensor, possible_actions: torch.Tensor, possible_next_actions: torch.Tensor, tiled_next_state: torch.Tensor)
class ml.rl.types.RawPolicyNetworkInput(reward:torch.Tensor, time_diff:torch.Tensor, step:Union[torch.Tensor, NoneType], not_terminal:torch.Tensor, state:ml.rl.types.FeatureVector, next_state:ml.rl.types.FeatureVector, action:ml.rl.types.FeatureVector, next_action:ml.rl.types.FeatureVector)

Bases: ml.rl.types.RawBaseInput

preprocess(state: ml.rl.types.PreprocessedFeatureVector, next_state: ml.rl.types.PreprocessedFeatureVector, action: ml.rl.types.PreprocessedFeatureVector, next_action: ml.rl.types.PreprocessedFeatureVector)
preprocess_tensors(state: torch.Tensor, next_state: torch.Tensor, action: torch.Tensor, next_action: torch.Tensor)
class ml.rl.types.RawStateAction(state:ml.rl.types.FeatureVector, action:ml.rl.types.FeatureVector)

Bases: ml.rl.types.BaseDataClass

class ml.rl.types.RawTrainingBatch(training_input:ml.rl.types.RawBaseInput, extras:Any)

Bases: ml.rl.types.BaseDataClass

batch_size()
preprocess(training_input: ml.rl.types.PreprocessedBaseInput) → ml.rl.types.PreprocessedTrainingBatch
class ml.rl.types.SacPolicyActionSet(greedy:torch.Tensor, greedy_propensity:float)

Bases: ml.rl.types.BaseDataClass

class ml.rl.types.SequenceFeatureBase(id_features:Union[ml.rl.types.IdFeatureBase, NoneType], float_features:Union[ml.rl.types.ValuePresence, NoneType])

Bases: ml.rl.types.BaseDataClass

classmethod get_float_feature_infos() → List[ml.rl.types.FloatFeatureInfo]

Override this if the sequence has float features associated to it. Float features should be stored as ID-score-list, where the ID part corresponds to primary entity ID of the sequence. E.g., if this is a sequence of previously watched videos, then the key should be video ID.

classmethod get_max_length() → int

Subclass should return the max-length of this sequence. If the raw data is longer, feature extractor will truncate the front. If the raw data is shorter, feature extractor will fill the front with zero.

classmethod prototype() → T
class ml.rl.types.SequenceFeatures

Bases: ml.rl.types.BaseDataClass

A stub-class for sequence features in the model. All fields should be subclass of SequenceFeatureBase above.

classmethod prototype() → U
class ml.rl.types.SingleQValue(q_value:torch.Tensor)

Bases: ml.rl.types.BaseDataClass

class ml.rl.types.ValuePresence(value:torch.Tensor, presence:Union[torch.Tensor, NoneType])

Bases: ml.rl.types.BaseDataClass

Module contents