managers

ModelsManager

class msdss_models_api.managers.ModelsManager(models=[], folder='./models', handler=None, suffix='_base.pickle')[source]

Class to manage msdss_models_api.models.Model objects.

Parameters
  • models (list(msdss_models_api.models.Model) or dict(msdss_models_api.models.Model)) – List or dict of available Model objects to use for creating and managing model instances. If list, ensure that the class names are unique, otherwise the last object in the list takes priority.

  • folder (str) – The folder path to store models in.

  • handler (msdss_models_api.handlers.ModelsHandler or bool None) –

    Handler object to manage model operations.

    • If None, then a default handler will be created

    • If False, then inputs for model operations will not be handled

  • suffix (str) – Suffix for pickled model object files. These files contain the model objects without inputs or loading.

models

Dictionary of available models from parameter models, where:

  • Each key is the class name

  • Each value is the class itself

Type

dict(msdss_models_api.models.Model)

instances

Dictionary of loaded model instances created from method create.

Type

dict(msdss_models_api.models.Model)

folder

Same as parameter folder.

Type

str

handler

Same as parameter handler.

Type

msdss_models_api.handlers.ModelsHandler

suffix

Same as parameter suffix.

Type

str

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Initialize a model instance with inputs
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    models_manager.input('temp_model', train_data)

    # Update model instance with new data
    new_data = [
        {'col_a': 2, 'col_b': 'c'},
        {'col_a': 3, 'col_b': 'd'}
    ]
    models_manager.update('temp_model', new_data)

    # Produce output from a model instance
    test_data = [
        {'col_a': 2, 'col_b': 'c'},
        {'col_a': 3, 'col_b': 'd'}
    ]
    results = models_manager.output('temp_model', test_data)

    # Delete model instance
    models_manager.delete('temp_model')

_get_base_file

ModelsManager._get_base_file(name)[source]

Get the path of the base file for the model instance.

Parameters

name (str) – Unique name of the model instance. The instance is stored in .instances[name].

Returns

File path for the base file of the model instance.

Return type

str

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Delete model instance
    path = models_manager._get_base_file('temp_model')

_get_folder

ModelsManager._get_folder(name)[source]

Get the path of the subfolder for the model instance.

Parameters

name (str) – Unique name of the model instance. The instance is stored in .instances[name].

Returns

File path for the subfolder of the model instance.

Return type

str

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Delete model instance
    folder_path = models_manager._get_folder('temp_model')

_get_instance

ModelsManager._get_instance(name)[source]

Get a model instance by name.

Parameters

name (str) – Unique name of the model instance. The instance is stored in .instances[name].

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Delete model instance
    models_manager._get_instance('temp_model')

_get_model_name

ModelsManager._get_model_name(name)[source]

Get the model name of a model instance.

Parameters

name (str) – Unique name of the model instance. The instance is stored in .instances[name].

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Delete model instance
    models_manager._get_model_name('temp_model')

_get_save_file

ModelsManager._get_save_file(name)[source]

Get the path of the save file for the model instance without the extension.

Parameters

name (str) – Unique name of the model instance. The instance is stored in .instances[name].

Returns

File path for the save file of the model instance. Does not include extension.

Return type

str

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Delete model instance
    save_path = models_manager._get_save_file('temp_model')

_load_base_files

ModelsManager._load_base_files(force=False)[source]

Load all base models.

Sets attribute .instances to be initialized msdss_models_api.models.Model instances based on the subfolders in attribute folders

Parameters

force (bool) – Whether to force loading whether instance is in .instances attribute or not.

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Load all base models
    models_manager._load_base_files()

create

ModelsManager.create(name, model, parameters={})[source]

Creates a model instance.

  • Stores the instance in attribute instances

  • Creates a subfolder and base model file for the instance

Parameters
  • name (str) – Unique name of the model instance. The instance is stored in .instances[name].

  • model (str) – Name of the model to use for the instance. This is the same as the class name from attribute

  • parameters (dict) – parameters to be passed to msdss_models_api.models.Model to instantiate the instance.

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

delete

ModelsManager.delete(name, parameters={})[source]

Creates a model instance.

  • Removes the instance in attribute instances

  • Removes the subfolder and base model file for the instance

Parameters

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Initialize a model instance with inputs
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    models_manager.input('temp_model', train_data)

    # Delete model instance
    models_manager.delete('temp_model')

input

ModelsManager.input(name, data, parameters={})[source]

Train an initial model instance and save it by adding input data.

  • Modifies .instance[name] by calling the input and save methods

  • Modifies .states[name] with action and results for inputting data into the model instance

Parameters
  • name (str) – Unique name of the model instance. The instance is stored in .instances[name].

  • data (dict or list or pandas:pandas.DataFrame) – Data to use for initializing the model instance. Should accept a list or dict to be input in a pandas:pandas.DataFrame or the dataframe itself.

  • parameters (dict) – Keyword arguments passed to the msdss_models_api.models.Model.input().

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Initialize a model instance with inputs
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    models_manager.input('temp_model', train_data)

output

ModelsManager.output(name, data, parameters={})[source]

Get the output of a model instance.

Parameters
  • name (str) – Unique name of the model instance. The instance is stored in .instances[name].

  • data (dict or list or pandas:pandas.DataFrame) – Data to use for obtaining the model instance output. Should accept a list or dict to be input in a pandas:pandas.DataFrame or the dataframe itself.

  • parameters (dict) – Keyword arguments passed to the msdss_models_api.models.Model.output().

Returns

Output data from the name model instance using the input data from parameter data.

Return type

pandas:pandas.DataFrame

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Initialize a model instance with inputs
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    models_manager.input('temp_model', train_data)

    # Produce output from a model instance
    test_data = [
        {'col_a': 2, 'col_b': 'c'},
        {'col_a': 3, 'col_b': 'd'}
    ]
    results = models_manager.output('temp_model', test_data)

update

ModelsManager.update(name, data, parameters={})[source]

Update a model instance with new data.

  • Modifies .instance[name] by calling the update and save methods

  • Modifies .states[name] with action and results for updating the model instance

Parameters
  • name (str) – Unique name of the model instance. The instance is stored in .instances[name].

  • data (dict or list or pandas:pandas.DataFrame) – Data to use for updating the model instance. Should accept a list or dict to be input in a pandas:pandas.DataFrame or the dataframe itself.

  • parameters (dict) – Keyword arguments passed to the msdss_models_api.models.Model.update().

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Initialize a model instance with inputs
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    models_manager.input('temp_model', train_data)

    # Update model instance with new data
    new_data = [
        {'col_a': 2, 'col_b': 'c'},
        {'col_a': 3, 'col_b': 'd'}
    ]
    models_manager.update('temp_model', new_data)

ModelsBackgroundManager

class msdss_models_api.managers.ModelsBackgroundManager(worker, models_manager, handler=None, metadata_manager=None)[source]

Class to manage msdss_models_api.models.Model background processes using a msdss_models_api.managers.ModelsManager.

  • Note: it is expected that the model outputs and creation are not long running processes

Parameters
worker

Same as parameter worker.

Type

celery:celery.Celery or None

models_manager

Same as parameter models_manager.

Type

dict(msdss_models_api.manager.ModelsManager)

tasks

Dictionary of background tasks from the worker object with the following keys:

  • input (func): background task to initialize models with input data

  • update (func): background task to update models

Type

dict

states

Dictionary of processing states for each instance, consisting of the following keys:

  • task (str): the action that the process is performing - one of: INPUT, UPDATE

  • result (celery:celery.result.AsyncResult): celery async object for getting states, ids, etc (see celery.result)

  • started_at (datetime.datetime): datetime object for when the task was started

Type

dict

models_handler

Handler extracted from parameter models_manager.

Type

msdss_models_api.handlers.ModelsHandler

handler

Same as parameter handler.

Type

msdss_models_api.handlers.ModelsBackgroundHandler

metadata_manager

Same as parameter metadata_manager.

Type

msdss_models_api.managers.ModelsMetadataManager

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from celery import Celery
from msdss_models_api.models import Model
from msdss_models_api.managers import *
from pprint import pprint

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Create background manager
    worker = Celery(broker='redis://localhost:6379/0', backend='redis://localhost:6379/0') # rabbitmq
    bg_manager = ModelsBackgroundManager(worker, models_manager)

    # Initialize a model instance with inputs as a background process
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    bg_manager.input('temp_model', train_data)

    # Check status
    status = bg_manager.get_status('temp_model')
    pprint(status)

    # Update model instance with new data as background process
    new_data = [
        {'col_a': 2, 'col_b': 'c'},
        {'col_a': 3, 'col_b': 'd'}
    ]
    bg_manager.update('temp_model', new_data)

    # Produce output from a model instance
    test_data = [
        {'col_a': 2, 'col_b': 'c'},
        {'col_a': 3, 'col_b': 'd'}
    ]
    results = bg_manager.output('temp_model', test_data)

    # Delete model instance
    bg_manager.delete('temp_model')

_add_task

ModelsBackgroundManager._add_task(task, name, *args, **kwargs)[source]

Add a background task for a model instance.

  • Sets attribute .states with a key referring to the model instance name

Parameters
  • task (str) – The name of the task in attribute .tasks.

  • name (str) – The name of the model instance to add tasks for.

  • *args – Additional arguments passed to the associated task function in attribute .tasks.

  • **kwargs – Additional arguments passed to the associated task function in attribute .tasks.

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from celery import Celery
from msdss_models_api.models import Model
from msdss_models_api.managers import *

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Create background manager
    worker = Celery(broker='redis://localhost:6379/0', backend='redis://localhost:6379/0') # rabbitmq
    bg_manager = ModelsBackgroundManager(worker, models_manager)

    # Initialize a model instance with inputs as a background process
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    bg_manager._add_task('input', 'temp_model', train_data)

cancel

ModelsBackgroundManager.cancel(name)[source]

Cancel background task for model instance.

Parameters

name (str) – The name of the model instance to cancel tasks for.

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from celery import Celery
from msdss_models_api.models import Model
from msdss_models_api.managers import *

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Create background manager
    worker = Celery(broker='redis://localhost:6379/0', backend='redis://localhost:6379/0') # rabbitmq
    bg_manager = ModelsBackgroundManager(worker, models_manager)

    # Initialize a model instance with inputs as a background process
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    bg_manager.input('temp_model', train_data)

    # Cancel any bg tasks for model instance
    bg_manager.cancel('temp_model')

create

ModelsBackgroundManager.create(name, model, metadata={}, *args, **kwargs)[source]

Create a model instance.

Parameters

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from celery import Celery
from msdss_models_api.models import Model
from msdss_models_api.managers import *

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create background manager
    worker = Celery(broker='redis://localhost:6379/0', backend='redis://localhost:6379/0') # rabbitmq
    bg_manager = ModelsBackgroundManager(worker, models_manager)

    # Create model instance
    bg_manager.create('temp_model', 'Model')

delete

ModelsBackgroundManager.delete(name, *args, **kwargs)[source]

Delete a model instance and stop any associated background tasks.

Parameters

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from celery import Celery
from msdss_models_api.models import Model
from msdss_models_api.managers import *

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Create background manager
    worker = Celery(broker='redis://localhost:6379/0', backend='redis://localhost:6379/0') # rabbitmq
    bg_manager = ModelsBackgroundManager(worker, models_manager)

    # Initialize a model instance with inputs as a background process
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    bg_manager.input('temp_model', train_data)

    # Delete the model instance and any associated bg tasks
    bg_manager.delete('temp_model')

get_status

ModelsBackgroundManager.get_status(name)[source]

Get the status of a model instance.

Parameters

name (str) – See parameter name in msdss_models_api.managers.ModelsManager.load().

Returns

A dictionary representing the status of the model instance with name, containing the following keys:

  • task (str): the processing task of the model instance

  • status (str): the processing status of the model instance

  • started_at (datetime.datetime): when the process was started

If the model instance is not processing, it will return a dict of the status only {'status': 'IDLE'}.

Return type

dict

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from celery import Celery
from msdss_models_api.models import Model
from msdss_models_api.managers import *
from pprint import pprint

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Create background manager
    worker = Celery(broker='redis://localhost:6379/0', backend='redis://localhost:6379/0') # rabbitmq
    bg_manager = ModelsBackgroundManager(worker, models_manager)

    # Initialize a model instance with inputs as a background process
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    bg_manager.input('temp_model', train_data)

    # Get the status of the model instance
    status = bg_manager.get_status('temp_model')
    pprint(status)

input

ModelsBackgroundManager.input(name, *args, **kwargs)[source]

Initialize a model instance with data as a background task.

Runs msdss_models_api.managers.ModelsManager.input().

Parameters

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from celery import Celery
from msdss_models_api.models import Model
from msdss_models_api.managers import *

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Create background manager
    worker = Celery(broker='redis://localhost:6379/0', backend='redis://localhost:6379/0') # rabbitmq
    bg_manager = ModelsBackgroundManager(worker, models_manager)

    # Initialize a model instance with inputs as a background process
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    bg_manager.input('temp_model', train_data)

output

ModelsBackgroundManager.output(name, *args, **kwargs)[source]

Get output from a model, only if a model is not processing.

Runs msdss_models_api.managers.ModelsManager.output().

Parameters

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from celery import Celery
from msdss_models_api.models import Model
from msdss_models_api.managers import *

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Create background manager
    worker = Celery(broker='redis://localhost:6379/0', backend='redis://localhost:6379/0') # rabbitmq
    bg_manager = ModelsBackgroundManager(worker, models_manager)

    # Initialize a model instance with inputs as a background process
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    bg_manager.input('temp_model', train_data)

    # Get output from model instance
    test_data = [
        {'col_a': 2, 'col_b': 'c'},
        {'col_a': 3, 'col_b': 'd'}
    ]
    bg_manager.output('temp_model', new_data)

start

ModelsBackgroundManager.start(*args, worker_kwargs={}, **kwargs)[source]

Start the background worker to process background tasks.

Parameters
  • worker_kwargs (dict) – Keyword arguments for celery:celery.apps.worker.

  • *args – Additional arguments passed to celery:celery.apps.worker.start().

  • **kwargs – Additional arguments passed to celery:celery.apps.worker.start().

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from celery import Celery
from msdss_models_api.models import Model
from msdss_models_api.managers import *

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Create background manager
    worker = Celery(broker='redis://localhost:6379/0', backend='redis://localhost:6379/0') # rabbitmq
    bg_manager = ModelsBackgroundManager(worker, models_manager)

    # Initialize a model instance with inputs as a background process
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    bg_manager._add_task('input', 'temp_model', train_data)

    # Start worker
    bg_manager.start()

update

ModelsBackgroundManager.update(name, *args, **kwargs)[source]

Update a model instance with new data as a background task.

Runs msdss_models_api.managers.ModelsManager.update().

Parameters

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from celery import Celery
from msdss_models_api.models import Model
from msdss_models_api.managers import *

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Create background manager
    worker = Celery(broker='redis://localhost:6379/0', backend='redis://localhost:6379/0') # rabbitmq
    bg_manager = ModelsBackgroundManager(worker, models_manager)

    # Initialize a model instance with inputs as a background process
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    bg_manager.input('temp_model', train_data)

    # Update model instance with new data as background process
    new_data = [
        {'col_a': 2, 'col_b': 'c'},
        {'col_a': 3, 'col_b': 'd'}
    ]
    bg_manager.update('temp_model', new_data)

ModelsDBManager

class msdss_models_api.managers.ModelsDBManager(models=[], data_manager=None, *args, **kwargs)[source]

Class to manage msdss_models_api.models.Model objects with added methods for processing models with data from a database.

  • Inherits from msdss_models_api.models.ModelsManager

Parameters
  • models (list(msdss_models_api.models.Model)) – List of available Model objects to use for creating and managing model instances. Ensure that the class names are unique, otherwise the last object takes priority.

  • data_manager (msdss_data_api.managers.DataManager or None) – A data manager object for managing data in and out of a database. If None, a default data manager will be used.

  • *args – Additional arguments for msdss_models_api.models.ModelsManager

  • **kwargs – Additional arguments for msdss_models_api.models.ModelsManager

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_base_database import Database
from msdss_data_api.managers import DataManager
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsDBManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models and database
    database = Database()
    models = [Model]

    # Create manager
    data_manager = DataManager(database=database)
    models_manager = ModelsDBManager(models, data_manager, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Add training data to database
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    database.insert('models_test', train_data)

    # Initialize a model instance with inputs from database
    models_manager.input_db('temp_model', 'models_test')

    # Update model instance with new data
    new_data = [
        {'col_a': 3, 'col_b': 'c'},
        {'col_a': 4, 'col_b': 'd'}
    ]
    database.insert('models_test', new_data)
    models_manager.update_db('temp_model', 'models_test', where=['col_a > 2'])

    # Delete test table
    database.drop_table('models_test')

input_db

ModelsDBManager.input_db(name, dataset, parameters={}, *args, **kwargs)[source]

Initialize a model instance with data from the database.

Parameters

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_base_database import Database
from msdss_data_api.managers import DataManager
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsDBManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models and database
    database = Database()
    models = [Model]

    # Create manager
    data_manager = DataManager(database=database)
    models_manager = ModelsDBManager(models, data_manager, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Add training data to database
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    database.insert('models_test', train_data)

    # Initialize a model instance with inputs from database
    models_manager.input_db('temp_model', 'models_test')

    # Delete test table
    database.drop_table('models_test')

update_db

ModelsDBManager.update_db(name, dataset, parameters={}, *args, **kwargs)[source]

Update a model instance with data from the database.

Parameters

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_base_database import Database
from msdss_data_api.managers import DataManager
from msdss_models_api.models import Model
from msdss_models_api.managers import ModelsDBManager

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models and database
    database = Database()
    models = [Model]

    # Create manager
    data_manager = DataManager(database=database)
    models_manager = ModelsDBManager(models, data_manager, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Add training data to database
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    database.insert('models_test', train_data)

    # Initialize a model instance with inputs from database
    models_manager.input_db('temp_model', 'models_test')

    # Update model instance with new data
    new_data = [
        {'col_a': 3, 'col_b': 'c'},
        {'col_a': 4, 'col_b': 'd'}
    ]
    database.insert('models_test', new_data)
    models_manager.update_db('temp_model', 'models_test', where=['col_a > 2'])

    # Delete test table
    database.drop_table('models_test')

ModelsDBBackgroundManager

class msdss_models_api.managers.ModelsDBBackgroundManager(models_manager, *args, **kwargs)[source]

Class to manage msdss_models_api.models.Model background processes using a msdss_models_api.managers.ModelsManager.

Parameters
models_manager

Same as parameter models_manager.

Type

dict(msdss_models_api.manager.ModelsDBManager)

tasks

Dictionary of background tasks from the worker object with the following keys (in addition to the ones in attribute tasks of msdss_models_api.managers.ModelsBackgroundManager):

  • input_db (func): background task to initialize models with input data from a database

  • update_db (func): background task to update models using data from a database

Type

dict

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from celery import Celery
from msdss_base_database import Database
from msdss_data_api.managers import DataManager
from msdss_models_api.models import Model
from msdss_models_api.managers import *
from pprint import pprint

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    database = Database()
    data_manager = DataManager(database=database)
    models_manager = ModelsDBManager(models, data_manager, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Create background manager
    worker = Celery(broker='redis://localhost:6379/0', backend='redis://localhost:6379/0') # rabbitmq
    bg_manager = ModelsDBBackgroundManager(worker, models_manager)

     # Add training data to database
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    database.insert('models_test', train_data)

    # Initialize a model instance with inputs from database
    bg_manager.input_db('temp_model', 'models_test')

    # Update model instance with new data
    new_data = [
        {'col_a': 3, 'col_b': 'c'},
        {'col_a': 4, 'col_b': 'd'}
    ]
    database.insert('models_test', new_data)
    bg_manager.update_db('temp_model', 'models_test', where=['col_a > 2'])

input_db

ModelsDBBackgroundManager.input_db(name, dataset, parameters={}, where=None, *args, **kwargs)[source]

Initialize a model instance with data from the database.

Parameters

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from celery import Celery
from msdss_base_database import Database
from msdss_data_api.managers import DataManager
from msdss_models_api.models import Model
from msdss_models_api.managers import *
from pprint import pprint

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    database = Database()
    data_manager = DataManager(database=database)
    models_manager = ModelsDBManager(models, data_manager, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Create background manager
    worker = Celery(broker='redis://localhost:6379/0', backend='redis://localhost:6379/0') # rabbitmq
    bg_manager = ModelsDBBackgroundManager(worker, models_manager)

    # Add training data to database
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    database.insert('models_test', train_data)

    # Initialize a model instance with inputs from database
    bg_manager.input_db('temp_model', 'models_test')

update_db

ModelsDBBackgroundManager.update_db(name, dataset, parameters={}, where=None, *args, **kwargs)[source]

Update a model instance with new data from the database as a background task.

Runs msdss_models_api.managers.ModelsDBManager.update_db().

Parameters

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from celery import Celery
from msdss_base_database import Database
from msdss_data_api.managers import DataManager
from msdss_models_api.models import Model
from msdss_models_api.managers import *
from pprint import pprint

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    database = Database()
    data_manager = DataManager(database=database)
    models_manager = ModelsDBManager(models, data_manager, folder=folder_path)

    # Create model instance
    models_manager.create('temp_model', 'Model')

    # Create background manager
    worker = Celery(broker='redis://localhost:6379/0', backend='redis://localhost:6379/0') # rabbitmq
    bg_manager = ModelsDBBackgroundManager(worker, models_manager)

    # Add training data to database
    train_data = [
        {'col_a': 1, 'col_b': 'a'},
        {'col_a': 2, 'col_b': 'b'}
    ]
    database.insert('models_test', train_data)

    # Initialize a model instance with inputs from database
    bg_manager.input_db('temp_model', 'models_test')

    # Update model instance with new data
    new_data = [
        {'col_a': 3, 'col_b': 'c'},
        {'col_a': 4, 'col_b': 'd'}
    ]
    database.insert('models_test', new_data)
    bg_manager.update_db('temp_model', 'models_test', where=['col_a > 2'])

ModelsMetadataManager

class msdss_models_api.managers.ModelsMetadataManager(data_manager=None, models_manager=None, table='model', columns=[{'name': 'id', 'type_': 'Integer', 'primary_key': True}, {'name': 'name', 'type_': 'String', 'unique': True}, ('title', 'String'), ('description', 'String'), ('tags', 'String'), ('source', 'String'), ('model', 'String'), ('can_input', 'Boolean'), ('can_output', 'Boolean'), ('can_update', 'Boolean'), ('created_by', 'String'), ('created_at', 'DateTime'), ('updated_at', 'DateTime')], name_column='name', updated_column='updated_at', base_table='base_model', base_columns=[('model', 'String'), ('description', 'String'), ('input_description', 'String'), ('output_description', 'String'), ('update_description', 'String')], *args, **kwargs)[source]

Class to manage models metadata in a database.

Parameters
  • data_manager (msdss_data_api.managers.DataManager or None) – Data manager object for managing data in a database. If None, a default manager will be used. The restricted tables for the handler will be set to [] while the only permitted table will be the table name of the parameter table.

  • table (str) – The name of the table to store the metadata.

  • columns (list(dict) or list(list)) –

    List of dict (kwargs) or lists (positional args) that are passed to sqlalchemy.schema.Column. See parameter columns in msdss_base_database:msdss_base_database.core.create_table(). This defines the table to store the metadata, where the default is:

    [{'name': 'id', 'primary_key': True, 'type_': 'Integer'},
     {'name': 'name', 'type_': 'String', 'unique': True},
     ('title', 'String'),
     ('description', 'String'),
     ('tags', 'String'),
     ('source', 'String'),
     ('model', 'String'),
     ('can_input', 'Boolean'),
     ('can_output', 'Boolean'),
     ('can_update', 'Boolean'),
     ('created_by', 'String'),
     ('created_at', 'DateTime'),
     ('updated_at', 'DateTime')]
    

  • name_column (str) – Name of the column identifying each entry.

  • updated_column (str) – Name of the column for storing the last updated date/time.

  • base_table (str) – Name of the table to store the metadata for base models.

  • *args – Additional arguments passed to msdss_data_api.managers.MetadataManager

  • **kwargs – Additional arguments passed to msdss_data_api.managers.MetadataManager

base_table

Same as parameter base_table.

See other attributes in msdss_data_api.managers.MetadataManager.

Type

str

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from datetime import datetime
from msdss_base_database import Database
from msdss_data_api.managers import DataManager
from msdss_models_api.managers import *
from msdss_models_api.defaults import *

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Setup database
    db = Database()

    # Check if the metadata table exists and drop if it does
    if db.has_table(DEFAULT_METADATA_TABLE):
        db.drop_table(DEFAULT_METADATA_TABLE)

    # Setup metadata manager
    data_manager = DataManager(database=db)
    mdm = ModelsMetadataManager(data_manager, models_manager)

    # Add metadata
    metadata = [{
        'title': 'Test Model',
        'description': 'model used for testing',
        'tags': 'test exp auto',
        'source': 'Automatically generated from Python',
        'model': 'Model',
        'created_by': 'msdss',
        'created_at': datetime.now(),
        'updated_at': datetime.now()
    }]
    mdm.create('test_model', metadata)

    # Get metadata
    metadata_get = mdm.get('test_model')

    # Search metadata
    search_results = mdm.search(where=['title = "Test Model"'])

    # Update metadata
    mdm.update('test_model', {'description': 'NEW DESCRIPTION'})

    # Delete metadata
    mdm.delete('test_model')

load_base_models

ModelsMetadataManager.load_base_models()[source]

Load base model metadata into a table in the database.

If a table with the same name exists, it will be deleted and rebuilt.

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_base_database import Database
from msdss_data_api.managers import DataManager
from msdss_models_api.managers import *
from msdss_models_api.defaults import *
from msdss_models_api.models import *

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Setup database
    db = Database()

    # Check if the metadata table exists and drop if it does
    if db.has_table(DEFAULT_METADATA_TABLE):
        db.drop_table(DEFAULT_METADATA_TABLE)

    # Setup metadata manager
    data_manager = DataManager(database=db)
    mdm = ModelsMetadataManager(data_manager, models_manager)

    # Load base models
    mdm.load_base_models()

search_base_models

ModelsMetadataManager.search_base_models(*args, **kwargs)[source]

Search base model metadata.

See msdss_data_api.managers.DataManager.get().

Parameters
Returns

A dict of lists where each key is the column name and each list contains the values for columns in the order of the rows of the table.

Return type

list(dict)

Author

Richard Wen <rrwen.dev@gmail.com>

Example

import tempfile
from msdss_base_database import Database
from msdss_data_api.managers import DataManager
from msdss_models_api.managers import *
from msdss_models_api.defaults import *
from msdss_models_api.models import *

with tempfile.TemporaryDirectory() as folder_path:

    # Setup available models
    models = [Model]

    # Create manager
    models_manager = ModelsManager(models, folder=folder_path)

    # Setup database
    db = Database()

    # Check if the metadata table exists and drop if it does
    if db.has_table(DEFAULT_METADATA_TABLE):
        db.drop_table(DEFAULT_METADATA_TABLE)

    # Setup metadata manager
    data_manager = DataManager(database=db)
    mdm = ModelsMetadataManager(data_manager, models_manager)

    # Load base models
    mdm.load_base_models()

    # Search base models
    out = mdm.search_base_models()
    print(out)
[{'model': 'Model', 'description': '\nTemplate class to standardize modelling.\n\n\n* Methods delete, load, and save are handled by default using [:class:`pickle`](#id1) and do not need to be defined if there is no need for custom model saving and loading.* Methods input, output and update need to be defined as they are placeholders for standardized functions of the model\n\n\n\n  \nParameters\n----------\n\n\n* file\\_path: Path to save, load, and delete the model for persistence without the extension. Can be used in methods as self.file.\n* file\\_ext: File extension to save the model in.\n* can\\_input: Whether the method .input is defined and available. This is useful for controlling route requests in an API.\n* can\\_output: Whether the method .output is defined and available. This is useful for controlling route requests in an API.\n* can\\_update: Whether the method .update is defined and available. This is useful for controlling route requests in an API.\n* settings: Dict of initial custom settings to be used by model methods. These are expected not to change from the time of initialization.\n\n\n\n', 'input_description': '\nTemplate method for input data to initialize model.\n\n\nRequirements:* The first argument should be the input data seen in the parameters* Other arguments can be defined as any for the model after the first argument* Should set self.instance to the initialized modelNotes:* Does nothing but act as a template reference for class extension* This method should be re-defined using a class extension\n\n\n\n  \nParameters\n----------\n\n\n* data: Data to use for initializing the model. Should accept a list or dict to be input in a [:class:`pandas:pandas.DataFrame`](#id1) or the dataframe itself.\n\n\n\n', 'output_description': '\nTemplate method for a model to output data such as predictions or clusters.\n\n\nRequirements:* The first argument should be the input data seen in the parameters* Other arguments can be defined as any for the model output after the first argument* Ideally, should use self.instance to produce the outputNotes:* Does nothing but act as a template reference for class extension* This method should be re-defined using a class extension\n\n\n\n  \nParameters\n----------\n\n\n* data: Data to use as input for the model. Should accept a list or dict to be input in a [:class:`pandas:pandas.DataFrame`](#id1) or the dataframe itself.\n\n\n\n', 'update_description': '\nTemplate method for updating a model with new data.\n\n\nRequirements:* The first argument should be the input data seen in the parameters* Other arguments can be defined as any for the model output after the first argument* Ideally, should update [``](#id1)self.instance``Notes:* Does nothing but act as a template reference for class extension* This method should be re-defined using a class extension\n\n\n\n  \nParameters\n----------\n\n\n* data: Data to use for updating the model. Should accept a list or dict to be input in a [:class:`pandas:pandas.DataFrame`](#id1) or the dataframe itself.\n\n\n\n'}]