Module fontai.entrypoints.fontairun
This script runs a single MLOps pipeline stage according to provided parameters. Use –help for more details.
Expand source code
#!python
"""This script runs a single MLOps pipeline stage according to provided parameters. Use --help for more details.
"""
import sys
import os
import argparse
import logging
import datetime
import traceback
from pathlib import Path
logger = logging.getLogger("fontai")
from fontai.runners.base import FittableTransform
import fontai.runners.stages as stages
from tensorflow.config import get_visible_devices
from tensorflow.python.framework.errors_impl import UnknownError as unknown_tf_error
Path("logs").mkdir(parents=True, exist_ok=True)
class RunNotAllowedError(Exception):
pass
class StageRunner(object):
"""Parses CLI arguments and executed an ML lifecycle stage.
Attributes:
parser (argparse.ArgumentParser): Argument parser
schema_docstring (str): A compilation of docstrings from available runner classes and their parsers. It is meant to be displayed in the command line using the `--help` parameter.
stage_runners (dict): Maps strings to runner clases for the `--stage` argument
"""
stage_runners = {getattr(stages, name).get_stage_name() : getattr(stages, name) for name in stages.__all__}
schema_docstring = "\n\n\n". join([f"{key}: \n\t{val.get_config_parser().get_config_schema.__doc__}" for key, val in stage_runners.items()])
parser = argparse.ArgumentParser(
description = "Runs a single ML processing stage with its execution specified by a YAML configuration file. See below for details of configuration schema.",
formatter_class = argparse.RawTextHelpFormatter)
parser.add_argument(
'--stage',
dest='stage',
required = True,
help=
f"""
One of: {", ".join(list(stage_runners.keys()))}
""")
parser.add_argument(
'--config-file',
dest='config_file',
required = True,
help=
f"""path to YAML file that defines the execution. Configuration schemas are as follows:
\n
{schema_docstring}
""")
parser.add_argument(
'--fit',
dest='fit',
action = 'store_true',
help=
"""
If present (and if --stage is 'scoring'), fits the provided model, otherwise scores input data and saves the scored examples to the output_path parameter in the config file.
""")
parser.add_argument(
'--load-model',
dest='load_model',
action = 'store_true',
help=
"""
If set, the model is loaded from model_path instead of building it from scratch. This is useful for retraining an existing model.
""")
parser.add_argument(
'--run-name',
dest='run_name',
default = None,
help=
"""
Name of previously logged MLFlow run that is to be continued.
""")
parser.add_argument(
'--logging-level',
dest='logging_level',
default = "INFO",
help=
"""
Logging level. Defaults to 'INFO'
""")
@classmethod
def run(cls, args=None):
"""Run specified ML processing operation with the given configuration.
Args:
args (t.List[str]): CLI arguments
Raises:
TypeError: If --fit is provided when stage is not fittable.
"""
if args is None:
args = sys.argv
args, _ = cls.parser.parse_known_args(args)
# set up logging
logging_level = getattr(logging, args.logging_level)
if os.environ.get("CONTAINER_ENV", "false") == "true":
# if executing in a container, log to stdout
logging.basicConfig(level=logging_level)
logger.info(f"sys.argv input: {sys.argv}")
logger.info(f"Visible CUDA devices: {get_visible_devices()}")
if os.environ.get("CONTAINER_ENV", "false") != "true":
logfile = f"{args.stage}-{datetime.datetime.now()}.log"
logging.basicConfig(filename=Path("logs") / logfile, level=logging_level, filemode = "w")
print(f"Redirecting logs to logs/{logfile}")
# sanitize parameters
try:
stage_class = cls.stage_runners[args.stage]
except KeyError as e:
raise ValueError(f"stage must be one of {', '.join(list(cls.stage_runners.keys()))}")
if args.fit and not issubclass(stage_class, FittableTransform):
raise RunNotAllowedError(f"stage {args.stage} is not fittable.")
if args.load_model and args.stage != "scoring":
raise RunNotAllowedError("--load-model is only supported for input scoring or model fitting.")
if args.stage == "scoring" and not args.fit and not args.load_model:
raise RunNotAllowedError("Scoring input data can only be done with an existing (i.e. saved) model. Use the --load-model flag to load one from model_path.")
# run
if args.fit:
try:
stage_class.fit_from_config_file(args.config_file, run_id=args.run_name, load_from_model_path=args.load_model)
except unknown_tf_error as e:
raise Exception(f"{traceback.format_exc()}.\n\n *** If you get a cuDNN error, try doing: export TF_FORCE_GPU_ALLOW_GROWTH=true *** \n\n")
else:
stage_class.run_from_config_file(args.config_file, load_from_model_path=args.load_model)
Classes
class RunNotAllowedError (*args, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class RunNotAllowedError(Exception): pass
Ancestors
- builtins.Exception
- builtins.BaseException
class StageRunner
-
Parses CLI arguments and executed an ML lifecycle stage.
Attributes
parser
:argparse.ArgumentParser
- Argument parser
schema_docstring
:str
- A compilation of docstrings from available runner classes and their parsers. It is meant to be displayed in the command line using the
--help
parameter. stage_runners
:dict
- Maps strings to runner clases for the
--stage
argument
Expand source code
class StageRunner(object): """Parses CLI arguments and executed an ML lifecycle stage. Attributes: parser (argparse.ArgumentParser): Argument parser schema_docstring (str): A compilation of docstrings from available runner classes and their parsers. It is meant to be displayed in the command line using the `--help` parameter. stage_runners (dict): Maps strings to runner clases for the `--stage` argument """ stage_runners = {getattr(stages, name).get_stage_name() : getattr(stages, name) for name in stages.__all__} schema_docstring = "\n\n\n". join([f"{key}: \n\t{val.get_config_parser().get_config_schema.__doc__}" for key, val in stage_runners.items()]) parser = argparse.ArgumentParser( description = "Runs a single ML processing stage with its execution specified by a YAML configuration file. See below for details of configuration schema.", formatter_class = argparse.RawTextHelpFormatter) parser.add_argument( '--stage', dest='stage', required = True, help= f""" One of: {", ".join(list(stage_runners.keys()))} """) parser.add_argument( '--config-file', dest='config_file', required = True, help= f"""path to YAML file that defines the execution. Configuration schemas are as follows: \n {schema_docstring} """) parser.add_argument( '--fit', dest='fit', action = 'store_true', help= """ If present (and if --stage is 'scoring'), fits the provided model, otherwise scores input data and saves the scored examples to the output_path parameter in the config file. """) parser.add_argument( '--load-model', dest='load_model', action = 'store_true', help= """ If set, the model is loaded from model_path instead of building it from scratch. This is useful for retraining an existing model. """) parser.add_argument( '--run-name', dest='run_name', default = None, help= """ Name of previously logged MLFlow run that is to be continued. """) parser.add_argument( '--logging-level', dest='logging_level', default = "INFO", help= """ Logging level. Defaults to 'INFO' """) @classmethod def run(cls, args=None): """Run specified ML processing operation with the given configuration. Args: args (t.List[str]): CLI arguments Raises: TypeError: If --fit is provided when stage is not fittable. """ if args is None: args = sys.argv args, _ = cls.parser.parse_known_args(args) # set up logging logging_level = getattr(logging, args.logging_level) if os.environ.get("CONTAINER_ENV", "false") == "true": # if executing in a container, log to stdout logging.basicConfig(level=logging_level) logger.info(f"sys.argv input: {sys.argv}") logger.info(f"Visible CUDA devices: {get_visible_devices()}") if os.environ.get("CONTAINER_ENV", "false") != "true": logfile = f"{args.stage}-{datetime.datetime.now()}.log" logging.basicConfig(filename=Path("logs") / logfile, level=logging_level, filemode = "w") print(f"Redirecting logs to logs/{logfile}") # sanitize parameters try: stage_class = cls.stage_runners[args.stage] except KeyError as e: raise ValueError(f"stage must be one of {', '.join(list(cls.stage_runners.keys()))}") if args.fit and not issubclass(stage_class, FittableTransform): raise RunNotAllowedError(f"stage {args.stage} is not fittable.") if args.load_model and args.stage != "scoring": raise RunNotAllowedError("--load-model is only supported for input scoring or model fitting.") if args.stage == "scoring" and not args.fit and not args.load_model: raise RunNotAllowedError("Scoring input data can only be done with an existing (i.e. saved) model. Use the --load-model flag to load one from model_path.") # run if args.fit: try: stage_class.fit_from_config_file(args.config_file, run_id=args.run_name, load_from_model_path=args.load_model) except unknown_tf_error as e: raise Exception(f"{traceback.format_exc()}.\n\n *** If you get a cuDNN error, try doing: export TF_FORCE_GPU_ALLOW_GROWTH=true *** \n\n") else: stage_class.run_from_config_file(args.config_file, load_from_model_path=args.load_model)
Class variables
var parser
var schema_docstring
var stage_runners
Static methods
def run(args=None)
-
Run specified ML processing operation with the given configuration.
Args
args
:t.List[str]
- CLI arguments
Raises
TypeError
- If –fit is provided when stage is not fittable.
Expand source code
@classmethod def run(cls, args=None): """Run specified ML processing operation with the given configuration. Args: args (t.List[str]): CLI arguments Raises: TypeError: If --fit is provided when stage is not fittable. """ if args is None: args = sys.argv args, _ = cls.parser.parse_known_args(args) # set up logging logging_level = getattr(logging, args.logging_level) if os.environ.get("CONTAINER_ENV", "false") == "true": # if executing in a container, log to stdout logging.basicConfig(level=logging_level) logger.info(f"sys.argv input: {sys.argv}") logger.info(f"Visible CUDA devices: {get_visible_devices()}") if os.environ.get("CONTAINER_ENV", "false") != "true": logfile = f"{args.stage}-{datetime.datetime.now()}.log" logging.basicConfig(filename=Path("logs") / logfile, level=logging_level, filemode = "w") print(f"Redirecting logs to logs/{logfile}") # sanitize parameters try: stage_class = cls.stage_runners[args.stage] except KeyError as e: raise ValueError(f"stage must be one of {', '.join(list(cls.stage_runners.keys()))}") if args.fit and not issubclass(stage_class, FittableTransform): raise RunNotAllowedError(f"stage {args.stage} is not fittable.") if args.load_model and args.stage != "scoring": raise RunNotAllowedError("--load-model is only supported for input scoring or model fitting.") if args.stage == "scoring" and not args.fit and not args.load_model: raise RunNotAllowedError("Scoring input data can only be done with an existing (i.e. saved) model. Use the --load-model flag to load one from model_path.") # run if args.fit: try: stage_class.fit_from_config_file(args.config_file, run_id=args.run_name, load_from_model_path=args.load_model) except unknown_tf_error as e: raise Exception(f"{traceback.format_exc()}.\n\n *** If you get a cuDNN error, try doing: export TF_FORCE_GPU_ALLOW_GROWTH=true *** \n\n") else: stage_class.run_from_config_file(args.config_file, load_from_model_path=args.load_model)