ml.rl.workflow package

Submodules

ml.rl.workflow.base_workflow module

class ml.rl.workflow.base_workflow.BaseWorkflow(batch_preprocessor, trainer, evaluator, minibatch_size)

Bases: object

static init_multiprocessing(num_processes_per_node, num_nodes, node_index, gpu_index, init_method)
static read_norm_file(path) → Dict[int, ml.rl.parameters.NormalizationParameters]
report(evaluation_details)
save_models(path: str)
save_torchscript_model(module: torch.nn.Module, path: str)
train_network(train_dataset, eval_dataset, epochs: int)

ml.rl.workflow.create_normalization_metadata module

ml.rl.workflow.create_normalization_metadata.check_samples_per_feature(samples_per_feature, num_samples)
ml.rl.workflow.create_normalization_metadata.create_norm_table(params)
ml.rl.workflow.create_normalization_metadata.get_norm_metadata(dataset, norm_params, norm_col)
ml.rl.workflow.create_normalization_metadata.get_norm_params(norm_params)

ml.rl.workflow.dqn_workflow module

class ml.rl.workflow.dqn_workflow.DqnWorkflow(model_params: ml.rl.parameters.DiscreteActionModelParameters, state_normalization: Dict[int, ml.rl.parameters.NormalizationParameters], use_gpu: bool, use_all_avail_gpus: bool)

Bases: ml.rl.workflow.base_workflow.BaseWorkflow

save_models(path: str)
ml.rl.workflow.dqn_workflow.main(params)
ml.rl.workflow.dqn_workflow.single_process_main(gpu_index, *args)

ml.rl.workflow.helpers module

ml.rl.workflow.helpers.minibatch_size_multiplier(use_gpu, use_all_avail_gpus)

Increase size of minibatch if using PyTorch DataParallel.

ml.rl.workflow.helpers.parse_args(args)
ml.rl.workflow.helpers.save_model_to_file(model, path)

Save network parameters and optimizer parameters to file.

Parameters

model – one of (DQNTrainer, ParametricDQNTrainer) object.

ml.rl.workflow.helpers.update_model_for_warm_start(model, path=None)

Load network parameters and optimizer parameters into trainer object to warm start it.

Parameters

model – one of (DQNTrainer, ParametricDQNTrainer) object.

ml.rl.workflow.page_handler module

class ml.rl.workflow.page_handler.EvaluationPageHandler(trainer, evaluator, reporter)

Bases: ml.rl.workflow.page_handler.PageHandler

finish() → None
get_last_cpe_results()
handle(tdp: ml.rl.types.PreprocessedTrainingBatch) → None
class ml.rl.workflow.page_handler.ImitatorPageHandler(trainer, train=True)

Bases: ml.rl.workflow.page_handler.PageHandler

handle(tdp: ml.rl.types.PreprocessedTrainingBatch) → None
class ml.rl.workflow.page_handler.PageHandler(trainer_or_evaluator)

Bases: object

finish() → None
get_loss(loss_name='loss')

See usage in get_mean_loss

get_mean_loss(loss_name='loss', axis=None)

Get the average of a certain type of loss

Parameters

loss_name – possible loss names:

For world model:

‘loss’ (referring to total loss), ‘bce’ (loss for predicting not_terminal), ‘gmm’ (loss for next state prediction), ‘mse’ (loss for predicting reward)

For ranking model:

‘pg’ (policy gradient loss) ‘baseline’ (the baseline model’s loss, usually for fitting V(s)) ‘kendall_tau’ (kendall_tau coefficient between advantage and log_probs,

used in evaluation page handlers)

‘kendaull_tau_p_value’ (the p-value for kendall_tau test, used in

evaluation page handlers)

Parameters

axis – axis to perform mean function.

handle(tdp: ml.rl.types.PreprocessedTrainingBatch) → None
refresh_results() → None
set_epoch(epoch) → None
class ml.rl.workflow.page_handler.RankingEvaluationPageHandler(trainer_or_evaluator)

Bases: ml.rl.workflow.page_handler.PageHandler

finish()
handle(tdp: ml.rl.types.PreprocessedTrainingBatch) → None
class ml.rl.workflow.page_handler.RankingTrainingPageHandler(trainer_or_evaluator)

Bases: ml.rl.workflow.page_handler.PageHandler

handle(tdp: ml.rl.types.PreprocessedTrainingBatch) → None
class ml.rl.workflow.page_handler.TrainingPageHandler(trainer_or_evaluator)

Bases: ml.rl.workflow.page_handler.PageHandler

finish() → None
handle(tdp: ml.rl.types.PreprocessedTrainingBatch) → None
class ml.rl.workflow.page_handler.WorldModelEvaluationPageHandler(trainer_or_evaluator)

Bases: ml.rl.workflow.page_handler.PageHandler

handle(tdp: ml.rl.types.PreprocessedTrainingBatch) → None
class ml.rl.workflow.page_handler.WorldModelRandomTrainingPageHandler(trainer_or_evaluator)

Bases: ml.rl.workflow.page_handler.PageHandler

Train a baseline model based on randomly shuffled data

handle(tdp: ml.rl.types.PreprocessedTrainingBatch) → None
class ml.rl.workflow.page_handler.WorldModelTrainingPageHandler(trainer_or_evaluator)

Bases: ml.rl.workflow.page_handler.PageHandler

handle(tdp: ml.rl.types.PreprocessedTrainingBatch) → None
ml.rl.workflow.page_handler.feed_pages(data_streamer, dataset_num_rows, epoch, minibatch_size, use_gpu, page_handler, batch_preprocessor=None)
ml.rl.workflow.page_handler.get_actual_minibatch_size(batch, minibatch_size_preset)

ml.rl.workflow.parametric_dqn_workflow module

class ml.rl.workflow.parametric_dqn_workflow.ParametricDqnWorkflow(model_params: ml.rl.parameters.ContinuousActionModelParameters, state_normalization: Dict[int, ml.rl.parameters.NormalizationParameters], action_normalization: Dict[int, ml.rl.parameters.NormalizationParameters], use_gpu: bool, use_all_avail_gpus: bool)

Bases: ml.rl.workflow.base_workflow.BaseWorkflow

save_models(path: str)
ml.rl.workflow.parametric_dqn_workflow.main(params)
ml.rl.workflow.parametric_dqn_workflow.single_process_main(gpu_index, *args)

ml.rl.workflow.preprocess_handler module

class ml.rl.workflow.preprocess_handler.ContinuousPreprocessHandler(state_sparse_to_dense: ml.rl.preprocessing.sparse_to_dense.SparseToDenseProcessor, action_sparse_to_dense: ml.rl.preprocessing.sparse_to_dense.SparseToDenseProcessor)

Bases: ml.rl.workflow.preprocess_handler.PreprocessHandler

preprocess(batch) → ml.rl.types.RawTrainingBatch
class ml.rl.workflow.preprocess_handler.DiscreteDqnPreprocessHandler(num_actions: int, sparse_to_dense_processor: ml.rl.preprocessing.sparse_to_dense.SparseToDenseProcessor)

Bases: ml.rl.workflow.preprocess_handler.PreprocessHandler

preprocess(batch) → ml.rl.types.RawTrainingBatch
read_actions(actions)
class ml.rl.workflow.preprocess_handler.ParametricDqnPreprocessHandler(state_sparse_to_dense: ml.rl.preprocessing.sparse_to_dense.SparseToDenseProcessor, action_sparse_to_dense: ml.rl.preprocessing.sparse_to_dense.SparseToDenseProcessor)

Bases: ml.rl.workflow.preprocess_handler.PreprocessHandler

preprocess(batch) → ml.rl.types.RawTrainingBatch
class ml.rl.workflow.preprocess_handler.PreprocessHandler(sparse_to_dense_processor: ml.rl.preprocessing.sparse_to_dense.SparseToDenseProcessor)

Bases: object

preprocess(batch) → ml.rl.types.RawTrainingBatch

ml.rl.workflow.transitional module

ml.rl.workflow.transitional.create_dqn_trainer_from_params(model: ml.rl.parameters.DiscreteActionModelParameters, normalization_parameters: Dict[int, ml.rl.parameters.NormalizationParameters], use_gpu: bool = False, use_all_avail_gpus: bool = False, metrics_to_score=None)
ml.rl.workflow.transitional.create_parametric_dqn_trainer_from_params(model: ml.rl.parameters.ContinuousActionModelParameters, state_normalization_parameters: Dict[int, ml.rl.parameters.NormalizationParameters], action_normalization_parameters: Dict[int, ml.rl.parameters.NormalizationParameters], use_gpu: bool = False, use_all_avail_gpus: bool = False)
ml.rl.workflow.transitional.create_world_model_trainer(env: ml.rl.test.gym.open_ai_gym_environment.OpenAIGymEnvironment, mdnrnn_params: ml.rl.parameters.MDNRNNParameters, use_gpu: bool) → ml.rl.training.world_model.mdnrnn_trainer.MDNRNNTrainer
ml.rl.workflow.transitional.get_cem_trainer(env: ml.rl.test.gym.open_ai_gym_environment.OpenAIGymEnvironment, params: ml.rl.parameters.CEMParameters, use_gpu: bool) → ml.rl.training.cem_trainer.CEMTrainer
ml.rl.workflow.transitional.get_sac_trainer(env: ml.rl.test.gym.open_ai_gym_environment.OpenAIGymEnvironment, parameters: ml.rl.parameters.SACModelParameters, use_gpu: bool)
ml.rl.workflow.transitional.get_td3_trainer(env, parameters, use_gpu)

Module contents