Module fontai.config.prediction
Expand source code
from pathlib import Path
import logging
import typing as t
import inspect
import traceback
import string
from argparse import Namespace
import copy
from functools import reduce
from pydantic import BaseModel, PositiveInt, PositiveFloat, validator
import strictyaml as yml
from fontai.config.core import BaseConfigHandler, SimpleClassInstantiator, BasePipelineTransformConfig
import fontai.prediction.input_processing as input_processing
import fontai.prediction.custom_filters as custom_filters
import fontai.prediction.custom_mappers as custom_mappers
import fontai.prediction.models as custom_models
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow.keras.callbacks as tf_callbacks
from tensorflow.random import set_seed
import fontai.prediction.callbacks as custom_callbacks
import fontai.io.records as records
logger = logging.getLogger(__name__)
class TrainingConfig(BaseModel):
"""
Training configuration wrapper for a Scoring ML stage
Args:
batch_size (int): batch size
epochs (int): epochs
steps_per_epoch (int): batches per epoch
optimizer (keras.optimizers.Optimizer): optimizer
loss (keras.losses.Loss): loss function
filters t.List[t.Callable]: list of model input filter functions from `input_processing` module
seed (int) Tensorflow global random seed
metrics (t.List[str], optional): list of metrics to display
callbacks (t.List[tf_callbacks.Callback], optional): list of callbakcs to use at training time.
"""
custom_filters: t.List[t.Callable] = []
custom_mappers: t.List[t.Callable] = []
batch_size: t.Optional[PositiveInt]
epochs: PositiveInt
steps_per_epoch: PositiveInt
optimizer: keras.optimizers.Optimizer
loss: keras.losses.Loss
seed: PositiveInt = 1
metrics: t.Optional[t.List[str]] = None
callbacks: t.Optional[t.List[tf_callbacks.Callback]] = None
#lr_shrink_factor: PositiveFloat
@classmethod
def from_yaml(cls, yaml):
"""Instantiate from a yml.YAML object
Args:
yaml (yml.YAML): Input YAML object following the schema given by ConfigHandler.training_config_schema
Returns:
TrainingConfig: Initialised object
"""
schema_handler = SimpleClassInstantiator()
callback_factory = CallbackFactory()
args = yaml.data
# the following objects are not primitive types and need to be instantiated from YAML definitions
args["optimizer"] = schema_handler.get_instance(yaml=yaml.get("optimizer"), scope=keras.optimizers)
args["loss"] = schema_handler.get_instance(yaml=yaml.get("loss"), scope=keras.losses)
if yaml.get("custom_filters").data != []:
args["custom_filters"] = [getattr(custom_filters, subYAML.get("name").text)(**subYAML.get("kwargs").data) for subYAML in yaml.get("custom_filters")]
else:
args["custom_filters"] = []
if yaml.get("custom_mappers").data != []:
args["custom_mappers"] = [getattr(custom_mappers, subYAML.get("name").text)(**subYAML.get("kwargs").data) for subYAML in yaml.get("custom_mappers")]
else:
args["custom_mappers"] = []
if yaml.get("callbacks") is not None:
args["callbacks"] = [CallbackFactory.create(yaml) for yaml in yaml.get("callbacks")]
else:
args["callbacks"] = None
return TrainingConfig(**args)
class Config:
arbitrary_types_allowed = True
class Config(BasePipelineTransformConfig):
"""
Wrapper class for the configuration of the ModelTrainingStage class
Args:
input_record_class (records.TfrWritable): Input schema class from the `textai.io.records` submodule
training_config (TrainingConfig): Runtime configuration for training routine
model (keras.Model): Model instance that's going to be trained
"""
training_config: TrainingConfig
input_record_class: type
model_path: str
model: keras.Model
charset: str
# internal BaseModel configuration class
class Config:
arbitrary_types_allowed = True
@validator("charset")
def allowed_charsets(charset: str):
"""Validator for charset attribute
Args:
charset (str): charset attribute
Returns:
str: validated charset
Raises:
ValueError: If charset is invalid
"""
allowed_vals = ["all","uppercase","lowercase","digits"]
if charset in allowed_vals:
return charset
else:
raise ValueError(f"charset must be one of {allowed_vals}")
@validator("input_record_class")
def validate_input_record_class(input_record_class: type):
"""Validate input record class
Args:
input_record_class (type)
Returns:
type: Validated record class
Raises:
TypeError: If record class not in allowed set
"""
if issubclass(input_record_class, records.TfrWritable):
return input_record_class
else:
raise TypeError(f"input_record_class must inherit from TfrWritable")
class ModelFactory(object):
"""
Factory class for ML models that takes YAML configuration objects
"""
def __init__(self):
self.yaml_to_obj = SimpleClassInstantiator()
self.SEQUENTIAL_MODEL_SCHEMA = yml.Map({
"class": yml.Str(),
"kwargs": yml.Map({"layers": yml.Seq(self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA)})
})
self.MULTI_SEQUENTIAL_MODEL_SCHEMA = yml.Map({
"class": yml.Str(),
"kwargs": yml.MapPattern(
yml.Str(),
self.yaml_to_obj.ANY_PRIMITIVES | self.SEQUENTIAL_MODEL_SCHEMA,
)
})
self.PATH_TO_SAVED_MODEL_SCHEMA = yml.Map({"path": yml.Str(), yml.Optional("custom_class"): yml.Str()})
self.schema_constructors = {
self.PATH_TO_SAVED_MODEL_SCHEMA: ("SAVED MODEL PATH", self.from_path),
self.SEQUENTIAL_MODEL_SCHEMA: ("KERAS SEQUENTIAL", self.from_keras_sequential),
self.MULTI_SEQUENTIAL_MODEL_SCHEMA: ("MULTI SEQUENTIAL", self.from_multi_sequential)
}
#self.MODEL_CONFIG_SCHEMA = reduce(lambda schema1, schema2: schema1 | schema2, list(self.schema_constructors.keys()))
def from_yaml(self, yaml: yml.YAML):
"""
Instantiate a ML model from a YAML object that contains the model's specification
Args:
yaml (yml.YAML): YAML object
Returns:
keras.Model: instantiated model
Raises:
Exception: If no matching schema is found.
"""
for schema in self.schema_constructors:
name, constructor = self.schema_constructors[schema]
try:
model_yaml = yml.load(yaml.as_yaml(), schema)
logger.info(f"Model schema matches {name}")
model = constructor(model_yaml)
return model
except Exception as e:
logger.debug(f"Error when trying to instantiate model with schema {name}; \n Full trace: {traceback.format_exc()}")
#print(f"Model schema did not match {name}; {e}\n Full trace: {traceback.format_exc()}")
raise Exception("No valid schema matched provided model YAML; look at DEBUG log level for more info.")
def from_path(self,model_yaml: yml.YAML):
"""
Loads a saved model
Args:
model_yaml (yml.YAML): YAML object
Returns:
keras.Model: instantiated model
"""
if "custom_class" in model_yaml:
model_class = model_yaml.get("custom_class").text
return getattr(custom_models, model_class).load(model_yaml.get("path").text)
else:
return keras.models.load_model(model_yaml.get("path").text)
def from_keras_sequential(self, model_yaml: yml.YAML):
"""
Instantiate a ML model of Keras' Sequential class
Args:
model_yaml (yml.YAML): YAML object
Returns:
keras.Model: an instance of class Model
"""
model_layers = model_yaml.get("kwargs").get("layers")
layer_instances = [self.yaml_to_obj.get_instance(layer_yaml, layers) for layer_yaml in model_layers]
return keras.Sequential(layer_instances)
def from_multi_sequential(self, model_yaml: yml.YAML):
"""
Instantiate a ML model that uses multiple Keras Sequential models internally
Args:
model_yaml (yml.YAML): YAML object
Returns:
keras.Model: an instance of class Model
"""
args = model_yaml.get("kwargs")
materialised_kwargs = copy.deepcopy(args.data)
for arg in args:
try:
yml.load(args.get(arg).as_yaml(), self.SEQUENTIAL_MODEL_SCHEMA)
materialised_kwargs[arg] = self.from_keras_sequential(args.get(arg))
except Exception as e:
logger.debug(f"Parameter {arg} does not match Sequential model schema. Full trace: {traceback.format_exc()}")
return getattr(custom_models, model_yaml.get("class").text)(**materialised_kwargs)
class CallbackFactory(object):
"""Factory class for instantiating Tensorflow callback objects
"""
@classmethod
def create(cls, yaml: yml.YAML) -> tf_callbacks.Callback:
"""Create callback from YAML object
Args:
yaml (yml.YAML): Input YAML object
Raises:
ValueError: When YAML does not match any known callback class
"""
yaml_to_obj = SimpleClassInstantiator()
for module in [tf_callbacks, custom_callbacks]:
try:
callback = yaml_to_obj.get_instance(yaml, scope=module)
return callback
except AttributeError as e:
message = f"error loading callback from YAML {yaml.data} from module {module}: {e}\n Full trace: {traceback.format_exc()}"
#print(message)
logging.debug(message)
raise ValueError("Provided YAML did not match any known callback.")
class ConfigHandler(BaseConfigHandler):
"""
Wrapper for training configuration processing logic.
"""
def other_setup(self):
self.model_factory = ModelFactory()
@classmethod
def get_config_schema(self):
"""
YAML configuration schema:
input_record_class (optional, defaults to LabeledChar): name of record class that will be parsed from input files; it has to inherit from `fontai.io.records.TfrWritable`. At the moment only `LabeledChar` and `LabeledFont` are supported. If `LabeledChar`, loaded elements correspond to single images from unordered fonts. If `LabeledFont`, loaded elements correspond to tensors holding all characters corresponding to a single font.
input_path: Input files' folder with TF record files
output_path: output files' folder when scoring new data
model_path: Output model path when training a model
charset (optional, defaults to 'lowercase'): One of 'uppercase', 'lowercase', 'digits' or 'all', and determines the set to use for training or scoring.
training: subYAML. See below
model: subYAML. See below
'training' subYAML schema:
batch_size (optional, defaults to None): can be null
epochs: number of epochs
steps_per_epoch (optional, defauls to 10k): minibatches per epoch; this is needed because the total number of valid records is not known before runtime
seed (optional, defaults to 1): random seed for TF functions
metrics: List of TF metrics as strings
loss: subYAML with keys `class` and `kwargs` to instantiate a Keras loss function
optimizer (optional, defaults to Adam with default parameters): subYAML with keys `class` and `kwargs` to instantiate a Keras optimizer
callbacks (optional, defaults to []): subYAML with keys `class` and `kwargs` to instantiate a Keras callback or a custom one defined in `fontai.prediction.callbacks`
custom_filters (optional, defaults to []): subYAML with keys `name` and `kwargs` to instantiate a filtering function defined in `fontai.prediction.custom_filters` to apply just after records are deserialised for training
custom_mappers (optional, defaults to []): subYAML with keys `name` and `kwargs` to instantiate a mapping function defined in `fontai.prediction.custom_mappings` to apply just after records are deserialised for training
model subYAML schema:
A subYAML with entries `class` and `kwargs` to instantiate a Keras model architecture; currently only `Sequential` types are tested. Each Keras layers is specified and instantiated analogously in the kwargs. The class name can also correspond to a custom class in `fontai.prediction.models`. the kwargs of the specified class can subsequently be Sequential keras models if needed.
"""
#self.DATA_PREPROCESSING_SCHEMA = yml.Seq(self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA) | yml.EmptyList()
self.CUSTOM_FUNCTIONS = yml.Seq(yml.Map(
{"name": yml.Str(),
yml.Optional("kwargs", default = {}): yml.MapPattern(
yml.Str(),
self.yaml_to_obj.ANY_PRIMITIVES) | yml.EmptyDict()})) | yml.EmptyList()
self.TRAINING_CONFIG_SCHEMA = yml.Map({
yml.Optional("batch_size", default=None): yml.Int() | yml.EmptyNone(),
"epochs": yml.Int(),
yml.Optional("seed", default = 1): yml.Int(),
yml.Optional(
"metrics",
default = None): yml.Seq(yml.Str()) | yml.EmptyNone(),
"loss": self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA,
yml.Optional(
"steps_per_epoch",
default = 10000): yml.Int(),
yml.Optional(
"optimizer",
default = {"class": "Adam", "kwargs": {}}): self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA,
yml.Optional(
"callbacks",
default = None): yml.Seq(self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA)| yml.EmptyNone(),
yml.Optional(
"custom_filters",
default = []): self.CUSTOM_FUNCTIONS,
yml.Optional(
"custom_mappers",
default = []): self.CUSTOM_FUNCTIONS
})
schema = yml.Map({
yml.Optional("input_path", default = None): self.IO_CONFIG_SCHEMA,
yml.Optional("output_path", default = None): self.IO_CONFIG_SCHEMA,
yml.Optional("input_record_class",default = "LabeledChar"): yml.Str(),
"model_path": self.IO_CONFIG_SCHEMA,
"training": self.TRAINING_CONFIG_SCHEMA,
"model": yml.Any(),
yml.Optional("charset", default = "lowercase"): yml.Str()
})
return schema
def instantiate_config(self, config: yml.YAML) -> Config:
"""
Processes a YAML instance to produce an Config instance.
Args:
config (yml.YAML): YAML object
Returns:
Config: Instantiated configuration for a Scoring ML stage
"""
input_path, output_path = config.get("input_path").text, config.get("output_path").text
charset = config.get("charset").text
input_record_class = getattr(records, config.get("input_record_class").text)
model_path = config.get("model_path").text
training_config = TrainingConfig.from_yaml(config.get("training"))
set_seed(training_config.seed)
model = self.model_factory.from_yaml(config.get("model"))
return Config(
input_record_class = input_record_class,
input_path = input_path,
output_path = output_path,
model_path = model_path,
model = model,
training_config = training_config,
charset = charset,
yaml = config)
Classes
class CallbackFactory
-
Factory class for instantiating Tensorflow callback objects
Expand source code
class CallbackFactory(object): """Factory class for instantiating Tensorflow callback objects """ @classmethod def create(cls, yaml: yml.YAML) -> tf_callbacks.Callback: """Create callback from YAML object Args: yaml (yml.YAML): Input YAML object Raises: ValueError: When YAML does not match any known callback class """ yaml_to_obj = SimpleClassInstantiator() for module in [tf_callbacks, custom_callbacks]: try: callback = yaml_to_obj.get_instance(yaml, scope=module) return callback except AttributeError as e: message = f"error loading callback from YAML {yaml.data} from module {module}: {e}\n Full trace: {traceback.format_exc()}" #print(message) logging.debug(message) raise ValueError("Provided YAML did not match any known callback.")
Static methods
def create(yaml: strictyaml.representation.YAML) ‑> tensorflow.python.keras.callbacks.Callback
-
Create callback from YAML object
Args
yaml
:yml.YAML
- Input YAML object
Raises
ValueError
- When YAML does not match any known callback class
Expand source code
@classmethod def create(cls, yaml: yml.YAML) -> tf_callbacks.Callback: """Create callback from YAML object Args: yaml (yml.YAML): Input YAML object Raises: ValueError: When YAML does not match any known callback class """ yaml_to_obj = SimpleClassInstantiator() for module in [tf_callbacks, custom_callbacks]: try: callback = yaml_to_obj.get_instance(yaml, scope=module) return callback except AttributeError as e: message = f"error loading callback from YAML {yaml.data} from module {module}: {e}\n Full trace: {traceback.format_exc()}" #print(message) logging.debug(message) raise ValueError("Provided YAML did not match any known callback.")
class Config (**data: Any)
-
Wrapper class for the configuration of the ModelTrainingStage class
Args
input_record_class
:records.TfrWritable
- Input schema class from the
textai.io.records
submodule training_config
:TrainingConfig
- Runtime configuration for training routine
model
:keras.Model
- Model instance that's going to be trained
Create a new model by parsing and validating input data from keyword arguments.
Raises ValidationError if the input data cannot be parsed to form a valid model.
Expand source code
class Config(BasePipelineTransformConfig): """ Wrapper class for the configuration of the ModelTrainingStage class Args: input_record_class (records.TfrWritable): Input schema class from the `textai.io.records` submodule training_config (TrainingConfig): Runtime configuration for training routine model (keras.Model): Model instance that's going to be trained """ training_config: TrainingConfig input_record_class: type model_path: str model: keras.Model charset: str # internal BaseModel configuration class class Config: arbitrary_types_allowed = True @validator("charset") def allowed_charsets(charset: str): """Validator for charset attribute Args: charset (str): charset attribute Returns: str: validated charset Raises: ValueError: If charset is invalid """ allowed_vals = ["all","uppercase","lowercase","digits"] if charset in allowed_vals: return charset else: raise ValueError(f"charset must be one of {allowed_vals}") @validator("input_record_class") def validate_input_record_class(input_record_class: type): """Validate input record class Args: input_record_class (type) Returns: type: Validated record class Raises: TypeError: If record class not in allowed set """ if issubclass(input_record_class, records.TfrWritable): return input_record_class else: raise TypeError(f"input_record_class must inherit from TfrWritable")
Ancestors
- BasePipelineTransformConfig
- pydantic.main.BaseModel
- pydantic.utils.Representation
Class variables
var Config
var charset : str
var input_record_class : type
var model : tensorflow.python.keras.engine.training.Model
var model_path : str
var training_config : TrainingConfig
Static methods
def allowed_charsets()
-
Validator for charset attribute
Args
charset
:str
- charset attribute
Returns
str
- validated charset
Raises
ValueError
- If charset is invalid
Expand source code
@validator("charset") def allowed_charsets(charset: str): """Validator for charset attribute Args: charset (str): charset attribute Returns: str: validated charset Raises: ValueError: If charset is invalid """ allowed_vals = ["all","uppercase","lowercase","digits"] if charset in allowed_vals: return charset else: raise ValueError(f"charset must be one of {allowed_vals}")
def validate_input_record_class()
-
Validate input record class
Args
input_record_class (type)
Returns
type
- Validated record class
Raises
TypeError
- If record class not in allowed set
Expand source code
@validator("input_record_class") def validate_input_record_class(input_record_class: type): """Validate input record class Args: input_record_class (type) Returns: type: Validated record class Raises: TypeError: If record class not in allowed set """ if issubclass(input_record_class, records.TfrWritable): return input_record_class else: raise TypeError(f"input_record_class must inherit from TfrWritable")
class ConfigHandler
-
Wrapper for training configuration processing logic.
Expand source code
class ConfigHandler(BaseConfigHandler): """ Wrapper for training configuration processing logic. """ def other_setup(self): self.model_factory = ModelFactory() @classmethod def get_config_schema(self): """ YAML configuration schema: input_record_class (optional, defaults to LabeledChar): name of record class that will be parsed from input files; it has to inherit from `fontai.io.records.TfrWritable`. At the moment only `LabeledChar` and `LabeledFont` are supported. If `LabeledChar`, loaded elements correspond to single images from unordered fonts. If `LabeledFont`, loaded elements correspond to tensors holding all characters corresponding to a single font. input_path: Input files' folder with TF record files output_path: output files' folder when scoring new data model_path: Output model path when training a model charset (optional, defaults to 'lowercase'): One of 'uppercase', 'lowercase', 'digits' or 'all', and determines the set to use for training or scoring. training: subYAML. See below model: subYAML. See below 'training' subYAML schema: batch_size (optional, defaults to None): can be null epochs: number of epochs steps_per_epoch (optional, defauls to 10k): minibatches per epoch; this is needed because the total number of valid records is not known before runtime seed (optional, defaults to 1): random seed for TF functions metrics: List of TF metrics as strings loss: subYAML with keys `class` and `kwargs` to instantiate a Keras loss function optimizer (optional, defaults to Adam with default parameters): subYAML with keys `class` and `kwargs` to instantiate a Keras optimizer callbacks (optional, defaults to []): subYAML with keys `class` and `kwargs` to instantiate a Keras callback or a custom one defined in `fontai.prediction.callbacks` custom_filters (optional, defaults to []): subYAML with keys `name` and `kwargs` to instantiate a filtering function defined in `fontai.prediction.custom_filters` to apply just after records are deserialised for training custom_mappers (optional, defaults to []): subYAML with keys `name` and `kwargs` to instantiate a mapping function defined in `fontai.prediction.custom_mappings` to apply just after records are deserialised for training model subYAML schema: A subYAML with entries `class` and `kwargs` to instantiate a Keras model architecture; currently only `Sequential` types are tested. Each Keras layers is specified and instantiated analogously in the kwargs. The class name can also correspond to a custom class in `fontai.prediction.models`. the kwargs of the specified class can subsequently be Sequential keras models if needed. """ #self.DATA_PREPROCESSING_SCHEMA = yml.Seq(self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA) | yml.EmptyList() self.CUSTOM_FUNCTIONS = yml.Seq(yml.Map( {"name": yml.Str(), yml.Optional("kwargs", default = {}): yml.MapPattern( yml.Str(), self.yaml_to_obj.ANY_PRIMITIVES) | yml.EmptyDict()})) | yml.EmptyList() self.TRAINING_CONFIG_SCHEMA = yml.Map({ yml.Optional("batch_size", default=None): yml.Int() | yml.EmptyNone(), "epochs": yml.Int(), yml.Optional("seed", default = 1): yml.Int(), yml.Optional( "metrics", default = None): yml.Seq(yml.Str()) | yml.EmptyNone(), "loss": self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA, yml.Optional( "steps_per_epoch", default = 10000): yml.Int(), yml.Optional( "optimizer", default = {"class": "Adam", "kwargs": {}}): self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA, yml.Optional( "callbacks", default = None): yml.Seq(self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA)| yml.EmptyNone(), yml.Optional( "custom_filters", default = []): self.CUSTOM_FUNCTIONS, yml.Optional( "custom_mappers", default = []): self.CUSTOM_FUNCTIONS }) schema = yml.Map({ yml.Optional("input_path", default = None): self.IO_CONFIG_SCHEMA, yml.Optional("output_path", default = None): self.IO_CONFIG_SCHEMA, yml.Optional("input_record_class",default = "LabeledChar"): yml.Str(), "model_path": self.IO_CONFIG_SCHEMA, "training": self.TRAINING_CONFIG_SCHEMA, "model": yml.Any(), yml.Optional("charset", default = "lowercase"): yml.Str() }) return schema def instantiate_config(self, config: yml.YAML) -> Config: """ Processes a YAML instance to produce an Config instance. Args: config (yml.YAML): YAML object Returns: Config: Instantiated configuration for a Scoring ML stage """ input_path, output_path = config.get("input_path").text, config.get("output_path").text charset = config.get("charset").text input_record_class = getattr(records, config.get("input_record_class").text) model_path = config.get("model_path").text training_config = TrainingConfig.from_yaml(config.get("training")) set_seed(training_config.seed) model = self.model_factory.from_yaml(config.get("model")) return Config( input_record_class = input_record_class, input_path = input_path, output_path = output_path, model_path = model_path, model = model, training_config = training_config, charset = charset, yaml = config)
Ancestors
- BaseConfigHandler
- abc.ABC
Static methods
def get_config_schema()
-
YAML configuration schema:
input_record_class (optional, defaults to LabeledChar): name of record class that will be parsed from input files; it has to inherit from
TfrWritable
. At the moment onlyLabeledChar
andLabeledFont
are supported. IfLabeledChar
, loaded elements correspond to single images from unordered fonts. IfLabeledFont
, loaded elements correspond to tensors holding all characters corresponding to a single font. input_path: Input files' folder with TF record files output_path: output files' folder when scoring new data model_path: Output model path when training a model charset (optional, defaults to 'lowercase'): One of 'uppercase', 'lowercase', 'digits' or 'all', and determines the set to use for training or scoring. training: subYAML. See below model: subYAML. See below'training' subYAML schema:
batch_size (optional, defaults to None): can be null epochs: number of epochs steps_per_epoch (optional, defauls to 10k): minibatches per epoch; this is needed because the total number of valid records is not known before runtime seed (optional, defaults to 1): random seed for TF functions metrics: List of TF metrics as strings loss: subYAML with keys
class
andkwargs
to instantiate a Keras loss function optimizer (optional, defaults to Adam with default parameters): subYAML with keysclass
andkwargs
to instantiate a Keras optimizer callbacks (optional, defaults to []): subYAML with keysclass
andkwargs
to instantiate a Keras callback or a custom one defined infontai.prediction.callbacks
custom_filters (optional, defaults to []): subYAML with keysname
andkwargs
to instantiate a filtering function defined infontai.prediction.custom_filters
to apply just after records are deserialised for training custom_mappers (optional, defaults to []): subYAML with keysname
andkwargs
to instantiate a mapping function defined infontai.prediction.custom_mappings
to apply just after records are deserialised for trainingmodel subYAML schema:
A subYAML with entries
class
andkwargs
to instantiate a Keras model architecture; currently onlySequential
types are tested. Each Keras layers is specified and instantiated analogously in the kwargs. The class name can also correspond to a custom class infontai.prediction.models
. the kwargs of the specified class can subsequently be Sequential keras models if needed.Expand source code
@classmethod def get_config_schema(self): """ YAML configuration schema: input_record_class (optional, defaults to LabeledChar): name of record class that will be parsed from input files; it has to inherit from `fontai.io.records.TfrWritable`. At the moment only `LabeledChar` and `LabeledFont` are supported. If `LabeledChar`, loaded elements correspond to single images from unordered fonts. If `LabeledFont`, loaded elements correspond to tensors holding all characters corresponding to a single font. input_path: Input files' folder with TF record files output_path: output files' folder when scoring new data model_path: Output model path when training a model charset (optional, defaults to 'lowercase'): One of 'uppercase', 'lowercase', 'digits' or 'all', and determines the set to use for training or scoring. training: subYAML. See below model: subYAML. See below 'training' subYAML schema: batch_size (optional, defaults to None): can be null epochs: number of epochs steps_per_epoch (optional, defauls to 10k): minibatches per epoch; this is needed because the total number of valid records is not known before runtime seed (optional, defaults to 1): random seed for TF functions metrics: List of TF metrics as strings loss: subYAML with keys `class` and `kwargs` to instantiate a Keras loss function optimizer (optional, defaults to Adam with default parameters): subYAML with keys `class` and `kwargs` to instantiate a Keras optimizer callbacks (optional, defaults to []): subYAML with keys `class` and `kwargs` to instantiate a Keras callback or a custom one defined in `fontai.prediction.callbacks` custom_filters (optional, defaults to []): subYAML with keys `name` and `kwargs` to instantiate a filtering function defined in `fontai.prediction.custom_filters` to apply just after records are deserialised for training custom_mappers (optional, defaults to []): subYAML with keys `name` and `kwargs` to instantiate a mapping function defined in `fontai.prediction.custom_mappings` to apply just after records are deserialised for training model subYAML schema: A subYAML with entries `class` and `kwargs` to instantiate a Keras model architecture; currently only `Sequential` types are tested. Each Keras layers is specified and instantiated analogously in the kwargs. The class name can also correspond to a custom class in `fontai.prediction.models`. the kwargs of the specified class can subsequently be Sequential keras models if needed. """ #self.DATA_PREPROCESSING_SCHEMA = yml.Seq(self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA) | yml.EmptyList() self.CUSTOM_FUNCTIONS = yml.Seq(yml.Map( {"name": yml.Str(), yml.Optional("kwargs", default = {}): yml.MapPattern( yml.Str(), self.yaml_to_obj.ANY_PRIMITIVES) | yml.EmptyDict()})) | yml.EmptyList() self.TRAINING_CONFIG_SCHEMA = yml.Map({ yml.Optional("batch_size", default=None): yml.Int() | yml.EmptyNone(), "epochs": yml.Int(), yml.Optional("seed", default = 1): yml.Int(), yml.Optional( "metrics", default = None): yml.Seq(yml.Str()) | yml.EmptyNone(), "loss": self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA, yml.Optional( "steps_per_epoch", default = 10000): yml.Int(), yml.Optional( "optimizer", default = {"class": "Adam", "kwargs": {}}): self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA, yml.Optional( "callbacks", default = None): yml.Seq(self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA)| yml.EmptyNone(), yml.Optional( "custom_filters", default = []): self.CUSTOM_FUNCTIONS, yml.Optional( "custom_mappers", default = []): self.CUSTOM_FUNCTIONS }) schema = yml.Map({ yml.Optional("input_path", default = None): self.IO_CONFIG_SCHEMA, yml.Optional("output_path", default = None): self.IO_CONFIG_SCHEMA, yml.Optional("input_record_class",default = "LabeledChar"): yml.Str(), "model_path": self.IO_CONFIG_SCHEMA, "training": self.TRAINING_CONFIG_SCHEMA, "model": yml.Any(), yml.Optional("charset", default = "lowercase"): yml.Str() }) return schema
Methods
def instantiate_config(self, config: strictyaml.representation.YAML) ‑> Config
-
Processes a YAML instance to produce an Config instance.
Args
config
:yml.YAML
- YAML object
Returns
Config
- Instantiated configuration for a Scoring ML stage
Expand source code
def instantiate_config(self, config: yml.YAML) -> Config: """ Processes a YAML instance to produce an Config instance. Args: config (yml.YAML): YAML object Returns: Config: Instantiated configuration for a Scoring ML stage """ input_path, output_path = config.get("input_path").text, config.get("output_path").text charset = config.get("charset").text input_record_class = getattr(records, config.get("input_record_class").text) model_path = config.get("model_path").text training_config = TrainingConfig.from_yaml(config.get("training")) set_seed(training_config.seed) model = self.model_factory.from_yaml(config.get("model")) return Config( input_record_class = input_record_class, input_path = input_path, output_path = output_path, model_path = model_path, model = model, training_config = training_config, charset = charset, yaml = config)
def other_setup(self)
-
Expand source code
def other_setup(self): self.model_factory = ModelFactory()
Inherited members
class ModelFactory
-
Factory class for ML models that takes YAML configuration objects
Expand source code
class ModelFactory(object): """ Factory class for ML models that takes YAML configuration objects """ def __init__(self): self.yaml_to_obj = SimpleClassInstantiator() self.SEQUENTIAL_MODEL_SCHEMA = yml.Map({ "class": yml.Str(), "kwargs": yml.Map({"layers": yml.Seq(self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA)}) }) self.MULTI_SEQUENTIAL_MODEL_SCHEMA = yml.Map({ "class": yml.Str(), "kwargs": yml.MapPattern( yml.Str(), self.yaml_to_obj.ANY_PRIMITIVES | self.SEQUENTIAL_MODEL_SCHEMA, ) }) self.PATH_TO_SAVED_MODEL_SCHEMA = yml.Map({"path": yml.Str(), yml.Optional("custom_class"): yml.Str()}) self.schema_constructors = { self.PATH_TO_SAVED_MODEL_SCHEMA: ("SAVED MODEL PATH", self.from_path), self.SEQUENTIAL_MODEL_SCHEMA: ("KERAS SEQUENTIAL", self.from_keras_sequential), self.MULTI_SEQUENTIAL_MODEL_SCHEMA: ("MULTI SEQUENTIAL", self.from_multi_sequential) } #self.MODEL_CONFIG_SCHEMA = reduce(lambda schema1, schema2: schema1 | schema2, list(self.schema_constructors.keys())) def from_yaml(self, yaml: yml.YAML): """ Instantiate a ML model from a YAML object that contains the model's specification Args: yaml (yml.YAML): YAML object Returns: keras.Model: instantiated model Raises: Exception: If no matching schema is found. """ for schema in self.schema_constructors: name, constructor = self.schema_constructors[schema] try: model_yaml = yml.load(yaml.as_yaml(), schema) logger.info(f"Model schema matches {name}") model = constructor(model_yaml) return model except Exception as e: logger.debug(f"Error when trying to instantiate model with schema {name}; \n Full trace: {traceback.format_exc()}") #print(f"Model schema did not match {name}; {e}\n Full trace: {traceback.format_exc()}") raise Exception("No valid schema matched provided model YAML; look at DEBUG log level for more info.") def from_path(self,model_yaml: yml.YAML): """ Loads a saved model Args: model_yaml (yml.YAML): YAML object Returns: keras.Model: instantiated model """ if "custom_class" in model_yaml: model_class = model_yaml.get("custom_class").text return getattr(custom_models, model_class).load(model_yaml.get("path").text) else: return keras.models.load_model(model_yaml.get("path").text) def from_keras_sequential(self, model_yaml: yml.YAML): """ Instantiate a ML model of Keras' Sequential class Args: model_yaml (yml.YAML): YAML object Returns: keras.Model: an instance of class Model """ model_layers = model_yaml.get("kwargs").get("layers") layer_instances = [self.yaml_to_obj.get_instance(layer_yaml, layers) for layer_yaml in model_layers] return keras.Sequential(layer_instances) def from_multi_sequential(self, model_yaml: yml.YAML): """ Instantiate a ML model that uses multiple Keras Sequential models internally Args: model_yaml (yml.YAML): YAML object Returns: keras.Model: an instance of class Model """ args = model_yaml.get("kwargs") materialised_kwargs = copy.deepcopy(args.data) for arg in args: try: yml.load(args.get(arg).as_yaml(), self.SEQUENTIAL_MODEL_SCHEMA) materialised_kwargs[arg] = self.from_keras_sequential(args.get(arg)) except Exception as e: logger.debug(f"Parameter {arg} does not match Sequential model schema. Full trace: {traceback.format_exc()}") return getattr(custom_models, model_yaml.get("class").text)(**materialised_kwargs)
Methods
def from_keras_sequential(self, model_yaml: strictyaml.representation.YAML)
-
Instantiate a ML model of Keras' Sequential class
Args
model_yaml
:yml.YAML
- YAML object
Returns
keras.Model
- an instance of class Model
Expand source code
def from_keras_sequential(self, model_yaml: yml.YAML): """ Instantiate a ML model of Keras' Sequential class Args: model_yaml (yml.YAML): YAML object Returns: keras.Model: an instance of class Model """ model_layers = model_yaml.get("kwargs").get("layers") layer_instances = [self.yaml_to_obj.get_instance(layer_yaml, layers) for layer_yaml in model_layers] return keras.Sequential(layer_instances)
def from_multi_sequential(self, model_yaml: strictyaml.representation.YAML)
-
Instantiate a ML model that uses multiple Keras Sequential models internally
Args
model_yaml
:yml.YAML
- YAML object
Returns
keras.Model
- an instance of class Model
Expand source code
def from_multi_sequential(self, model_yaml: yml.YAML): """ Instantiate a ML model that uses multiple Keras Sequential models internally Args: model_yaml (yml.YAML): YAML object Returns: keras.Model: an instance of class Model """ args = model_yaml.get("kwargs") materialised_kwargs = copy.deepcopy(args.data) for arg in args: try: yml.load(args.get(arg).as_yaml(), self.SEQUENTIAL_MODEL_SCHEMA) materialised_kwargs[arg] = self.from_keras_sequential(args.get(arg)) except Exception as e: logger.debug(f"Parameter {arg} does not match Sequential model schema. Full trace: {traceback.format_exc()}") return getattr(custom_models, model_yaml.get("class").text)(**materialised_kwargs)
def from_path(self, model_yaml: strictyaml.representation.YAML)
-
Loads a saved model
Args
model_yaml
:yml.YAML
- YAML object
Returns
keras.Model
- instantiated model
Expand source code
def from_path(self,model_yaml: yml.YAML): """ Loads a saved model Args: model_yaml (yml.YAML): YAML object Returns: keras.Model: instantiated model """ if "custom_class" in model_yaml: model_class = model_yaml.get("custom_class").text return getattr(custom_models, model_class).load(model_yaml.get("path").text) else: return keras.models.load_model(model_yaml.get("path").text)
def from_yaml(self, yaml: strictyaml.representation.YAML)
-
Instantiate a ML model from a YAML object that contains the model's specification
Args
yaml
:yml.YAML
- YAML object
Returns
keras.Model
- instantiated model
Raises
Exception
- If no matching schema is found.
Expand source code
def from_yaml(self, yaml: yml.YAML): """ Instantiate a ML model from a YAML object that contains the model's specification Args: yaml (yml.YAML): YAML object Returns: keras.Model: instantiated model Raises: Exception: If no matching schema is found. """ for schema in self.schema_constructors: name, constructor = self.schema_constructors[schema] try: model_yaml = yml.load(yaml.as_yaml(), schema) logger.info(f"Model schema matches {name}") model = constructor(model_yaml) return model except Exception as e: logger.debug(f"Error when trying to instantiate model with schema {name}; \n Full trace: {traceback.format_exc()}") #print(f"Model schema did not match {name}; {e}\n Full trace: {traceback.format_exc()}") raise Exception("No valid schema matched provided model YAML; look at DEBUG log level for more info.")
class TrainingConfig (**data: Any)
-
Training configuration wrapper for a Scoring ML stage
Args
batch_size
:int
- batch size
epochs
:int
- epochs
steps_per_epoch
:int
- batches per epoch
optimizer
:keras.optimizers.Optimizer
- optimizer
loss
:keras.losses.Loss
- loss function
- filters t.List[t.Callable]: list of model input filter functions from
input_processing
module - seed (int) Tensorflow global random seed
metrics
:t.List[str]
, optional- list of metrics to display
callbacks
:t.List[tf_callbacks.Callback]
, optional- list of callbakcs to use at training time.
Create a new model by parsing and validating input data from keyword arguments.
Raises ValidationError if the input data cannot be parsed to form a valid model.
Expand source code
class TrainingConfig(BaseModel): """ Training configuration wrapper for a Scoring ML stage Args: batch_size (int): batch size epochs (int): epochs steps_per_epoch (int): batches per epoch optimizer (keras.optimizers.Optimizer): optimizer loss (keras.losses.Loss): loss function filters t.List[t.Callable]: list of model input filter functions from `input_processing` module seed (int) Tensorflow global random seed metrics (t.List[str], optional): list of metrics to display callbacks (t.List[tf_callbacks.Callback], optional): list of callbakcs to use at training time. """ custom_filters: t.List[t.Callable] = [] custom_mappers: t.List[t.Callable] = [] batch_size: t.Optional[PositiveInt] epochs: PositiveInt steps_per_epoch: PositiveInt optimizer: keras.optimizers.Optimizer loss: keras.losses.Loss seed: PositiveInt = 1 metrics: t.Optional[t.List[str]] = None callbacks: t.Optional[t.List[tf_callbacks.Callback]] = None #lr_shrink_factor: PositiveFloat @classmethod def from_yaml(cls, yaml): """Instantiate from a yml.YAML object Args: yaml (yml.YAML): Input YAML object following the schema given by ConfigHandler.training_config_schema Returns: TrainingConfig: Initialised object """ schema_handler = SimpleClassInstantiator() callback_factory = CallbackFactory() args = yaml.data # the following objects are not primitive types and need to be instantiated from YAML definitions args["optimizer"] = schema_handler.get_instance(yaml=yaml.get("optimizer"), scope=keras.optimizers) args["loss"] = schema_handler.get_instance(yaml=yaml.get("loss"), scope=keras.losses) if yaml.get("custom_filters").data != []: args["custom_filters"] = [getattr(custom_filters, subYAML.get("name").text)(**subYAML.get("kwargs").data) for subYAML in yaml.get("custom_filters")] else: args["custom_filters"] = [] if yaml.get("custom_mappers").data != []: args["custom_mappers"] = [getattr(custom_mappers, subYAML.get("name").text)(**subYAML.get("kwargs").data) for subYAML in yaml.get("custom_mappers")] else: args["custom_mappers"] = [] if yaml.get("callbacks") is not None: args["callbacks"] = [CallbackFactory.create(yaml) for yaml in yaml.get("callbacks")] else: args["callbacks"] = None return TrainingConfig(**args) class Config: arbitrary_types_allowed = True
Ancestors
- pydantic.main.BaseModel
- pydantic.utils.Representation
Class variables
var Config
var batch_size : Optional[pydantic.types.PositiveInt]
var callbacks : Optional[List[tensorflow.python.keras.callbacks.Callback]]
var custom_filters : List[Callable]
var custom_mappers : List[Callable]
var epochs : pydantic.types.PositiveInt
var loss : tensorflow.python.keras.losses.Loss
var metrics : Optional[List[str]]
var optimizer : tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2
var seed : pydantic.types.PositiveInt
var steps_per_epoch : pydantic.types.PositiveInt
Static methods
def from_yaml(yaml)
-
Instantiate from a yml.YAML object
Args
yaml
:yml.YAML
- Input YAML object following the schema given by ConfigHandler.training_config_schema
Returns
TrainingConfig
- Initialised object
Expand source code
@classmethod def from_yaml(cls, yaml): """Instantiate from a yml.YAML object Args: yaml (yml.YAML): Input YAML object following the schema given by ConfigHandler.training_config_schema Returns: TrainingConfig: Initialised object """ schema_handler = SimpleClassInstantiator() callback_factory = CallbackFactory() args = yaml.data # the following objects are not primitive types and need to be instantiated from YAML definitions args["optimizer"] = schema_handler.get_instance(yaml=yaml.get("optimizer"), scope=keras.optimizers) args["loss"] = schema_handler.get_instance(yaml=yaml.get("loss"), scope=keras.losses) if yaml.get("custom_filters").data != []: args["custom_filters"] = [getattr(custom_filters, subYAML.get("name").text)(**subYAML.get("kwargs").data) for subYAML in yaml.get("custom_filters")] else: args["custom_filters"] = [] if yaml.get("custom_mappers").data != []: args["custom_mappers"] = [getattr(custom_mappers, subYAML.get("name").text)(**subYAML.get("kwargs").data) for subYAML in yaml.get("custom_mappers")] else: args["custom_mappers"] = [] if yaml.get("callbacks") is not None: args["callbacks"] = [CallbackFactory.create(yaml) for yaml in yaml.get("callbacks")] else: args["callbacks"] = None return TrainingConfig(**args)