Module fontai.config.ingestion
Expand source code
from pathlib import Path
import logging
import typing as t
import inspect
from pydantic import PositiveFloat
import strictyaml as yml
import fontai.io.scrappers as scrapper_module
from fontai.config.core import BaseConfigHandler, BasePipelineTransformConfig
logger = logging.getLogger(__name__)
class Config(BasePipelineTransformConfig):
scrappers: t.List[scrapper_module.Scrapper]
max_output_file_size: float
class ConfigHandler(BaseConfigHandler):
"""
Wrapper for ingestion's configuration processing logic.
"""
@classmethod
def get_config_schema(self):
"""
YAML configuration schema:
scrappers: list of subyamls with keys (class, kwargs) specifying the scrapper instances from `fontai.io.scrappers `to use
output_path: target output folder
max_output_file_size: Maximum individual output file size in MB
"""
schema = yml.Map({
"scrappers" : yml.Seq(self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA),
yml.Optional("output_path", default = None): self.IO_CONFIG_SCHEMA,
yml.Optional("max_output_file_size", default = 64.0): yml.Float()
})
return schema
def instantiate_config(self, config: yml.YAML) -> Config:
"""
Processes a YAML instance to produce an Config instance.
Args:
config (yml.YAML): YAML object from the strictyaml library
Returns:
Config: Configuration object
"""
output_path = config.get("output_path").text
scrappers = [self.yaml_to_obj.get_instance(yaml=scrapper, scope=scrapper_module) for scrapper in config.get("scrappers")]
max_output_file_size = config.get("max_output_file_size").data
return Config(
max_output_file_size = max_output_file_size,
output_path = output_path,
scrappers = scrappers,
yaml = config)
Classes
class Config (**data: Any)
-
Base class for ML pipelane stage configuration objects
Args
input_path
:str
, optional- object to list and retrieve input files to be processed
output_path
:str
, optional- object to persist output bojects
yaml
:yml.YAML
- parsed YAML from supplied configuration file
Create a new model by parsing and validating input data from keyword arguments.
Raises ValidationError if the input data cannot be parsed to form a valid model.
Expand source code
class Config(BasePipelineTransformConfig): scrappers: t.List[scrapper_module.Scrapper] max_output_file_size: float
Ancestors
- BasePipelineTransformConfig
- pydantic.main.BaseModel
- pydantic.utils.Representation
Class variables
var max_output_file_size : float
var scrappers : List[fontai.io.scrappers.Scrapper]
class ConfigHandler
-
Wrapper for ingestion's configuration processing logic.
Expand source code
class ConfigHandler(BaseConfigHandler): """ Wrapper for ingestion's configuration processing logic. """ @classmethod def get_config_schema(self): """ YAML configuration schema: scrappers: list of subyamls with keys (class, kwargs) specifying the scrapper instances from `fontai.io.scrappers `to use output_path: target output folder max_output_file_size: Maximum individual output file size in MB """ schema = yml.Map({ "scrappers" : yml.Seq(self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA), yml.Optional("output_path", default = None): self.IO_CONFIG_SCHEMA, yml.Optional("max_output_file_size", default = 64.0): yml.Float() }) return schema def instantiate_config(self, config: yml.YAML) -> Config: """ Processes a YAML instance to produce an Config instance. Args: config (yml.YAML): YAML object from the strictyaml library Returns: Config: Configuration object """ output_path = config.get("output_path").text scrappers = [self.yaml_to_obj.get_instance(yaml=scrapper, scope=scrapper_module) for scrapper in config.get("scrappers")] max_output_file_size = config.get("max_output_file_size").data return Config( max_output_file_size = max_output_file_size, output_path = output_path, scrappers = scrappers, yaml = config)
Ancestors
- BaseConfigHandler
- abc.ABC
Static methods
def get_config_schema()
-
YAML configuration schema:
scrappers: list of subyamls with keys (class, kwargs) specifying the scrapper instances from
fontai.io.scrappers
to use output_path: target output folder max_output_file_size: Maximum individual output file size in MBExpand source code
@classmethod def get_config_schema(self): """ YAML configuration schema: scrappers: list of subyamls with keys (class, kwargs) specifying the scrapper instances from `fontai.io.scrappers `to use output_path: target output folder max_output_file_size: Maximum individual output file size in MB """ schema = yml.Map({ "scrappers" : yml.Seq(self.yaml_to_obj.PY_CLASS_INSTANCE_FROM_YAML_SCHEMA), yml.Optional("output_path", default = None): self.IO_CONFIG_SCHEMA, yml.Optional("max_output_file_size", default = 64.0): yml.Float() }) return schema
Methods
def instantiate_config(self, config: strictyaml.representation.YAML) ‑> Config
-
Processes a YAML instance to produce an Config instance.
Args
config
:yml.YAML
- YAML object from the strictyaml library
Returns
Config
- Configuration object
Expand source code
def instantiate_config(self, config: yml.YAML) -> Config: """ Processes a YAML instance to produce an Config instance. Args: config (yml.YAML): YAML object from the strictyaml library Returns: Config: Configuration object """ output_path = config.get("output_path").text scrappers = [self.yaml_to_obj.get_instance(yaml=scrapper, scope=scrapper_module) for scrapper in config.get("scrappers")] max_output_file_size = config.get("max_output_file_size").data return Config( max_output_file_size = max_output_file_size, output_path = output_path, scrappers = scrappers, yaml = config)
Inherited members