managers¶
DataManager¶
- class msdss_data_api.managers.DataManager(database=None, handler=None)[source]¶
Class to manage datasets in a database.
- Parameters
database (
msdss_base_database.core.Database
or None) – Database object to use for creating data. IfNone
, a default database will be used.handler (
msdss_data_api.data.DataHandler
or bool or None) –Handler for handling dataset events.
If
None
, a default handler will be created with parameterdatabase
If
False
, events will not be handledSets the handler database to be the parameter
database
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from msdss_base_database import Database from msdss_data_api.managers import * # Setup database db = Database() dm = DataManager(database=db) # Check if the table exists and drop if it does if db.has_table("test_table"): db.drop_table("test_table") # Create sample data data = { 'id': [1, 2, 3], 'column_one': ['a', 'b', 'c'], 'column_two': [2, 4, 6] } dm.create('test_table', data) # Query sample data result = dm.get('test_table') # Update sample data new_data = {'column_one': 'updated_at'} dm.update('test_table', new_data, where=['id > 1']) # Delete sample data dm.delete('test_table', where=['id = 1']) # Delete the entire dataset dm.delete('test_table', delete_all=True)
create¶
- DataManager.create(name, data)[source]¶
Create a dataset.
See
msdss_base_database.core.Database.insert()
.- Parameters
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from msdss_base_database import Database from msdss_data_api.managers import * # Setup database db = Database() dm = DataManager(database=db) # Check if the table exists and drop if it does if db.has_table("test_table"): db.drop_table("test_table") # Create sample data data = [ {'id': 1, 'column_one': 'a', 'column_two': 2}, {'id': 2, 'column_one': 'b', 'column_two': 4}, {'id': 3, 'column_one': 'c', 'column_two': 6}, ] dm.create('test_table', data)
delete¶
- DataManager.delete(name, where=None, where_boolean='AND', delete_all=False)[source]¶
Delete a dataset.
See
msdss_base_database.core.Database.delete()
.- Parameters
name (str) – Name of the dataset or table in the database to delete.
list of where statements the form of
column operator value
to further filter individual values or rows for deleting.Operators are one of
= != > >= > < <= LIKE NOTLIKE ILIKE NOTILIKE CONTAINS STARTSWITH ENDSWITH
Example:
'column_two < 3'
where_boolean (str) – One of AND or OR to combine where statements with. Defaults to AND if not one of AND or OR.
delete_all (bool) – Whether to remove the entire dataset or not.
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from msdss_base_database import Database from msdss_data_api.managers import * # Setup database db = Database() dm = DataManager(database=db) # Check if the table exists and drop if it does if db.has_table("test_table"): db.drop_table("test_table") # Create sample data data = [ {'id': 1, 'column_one': 'a', 'column_two': 2}, {'id': 2, 'column_one': 'b', 'column_two': 4}, {'id': 3, 'column_one': 'c', 'column_two': 6}, ] dm.create('test_table', data) # Delete sample data dm.delete('test_table', where=['id = 1']) res = dm.get('test_table') print(res) # Delete the entire dataset dm.delete('test_table', delete_all=True)
[{'id': 2, 'column_one': 'b', 'column_two': 4}, {'id': 3, 'column_one': 'c', 'column_two': 6}]
get¶
- DataManager.get(name, select='*', where=None, group_by=None, aggregate=None, aggregate_func=None, order_by=None, order_by_sort=None, limit=None, offset=None, where_boolean='AND', *args, **kwargs)[source]¶
Query data from the database.
See
msdss_base_database.core.Database.select()
.- Parameters
name (str) – Name of the database table to query from.
List of column names or a single column name to filter or select from the table.
If
None
, columns will not be added to the select statement.If
'*'
, then all columns will be selected
list of where statements the form of
column operator value
to further filter individual values or rows.Operators are one of
= != > >= > < <= LIKE NOTLIKE ILIKE NOTILIKE CONTAINS STARTSWITH ENDSWITH
Example:
'column_two < 3'
group_by (list(str) or None) – Single or list of column names to group by. This should be used with
aggregate
andaggregate_func
.aggregate (list(str) or None) – Single or list of column names to aggregate using the
aggregate_func
. This should be used withgroup_by
andaggregate_func
.aggregate_func (list(str)) – Function name (such as ‘count’ or ‘sum’) from
sqlalchemy:sqlalchemy.sql.functions.Function
for aggregating records from eachaggregate
column. If a list of str, then it must have the same number of elements asaggregate
or else only the shortest length list will be used.order_by (list(str) or None) – Single or list of column names to order or sort by.
order_by_sort (list(str)) – Sort the records in increasing or decreasing order by each column in
order_by
, where the value can be one of ‘asc’ for ascending or ‘desc’ for descending’. If a list of str, then it must have the same number of elements asorder_by
or else only the shortest length list will be used.limit (int or None) – Integer number to limit the number of rows returned.
where_boolean (str) – One of
AND
orOR
to combinewhere
statements with. Defaults toAND
if not one ofAND
orOR
.*args – Additional arguments passed to
msdss_base_database.core.Database.select()
.**kwargs – Additional arguments passed to
msdss_base_database.core.Database.select()
.
- Returns
A list of dicts where each key is the column name.
- Return type
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from msdss_base_database import Database from msdss_data_api.managers import * # Setup objects db = Database() dm = DataManager(database=db) # Check if the table exists and drop if it does if db.has_table("test_table"): db.drop_table("test_table") # Create sample data data = [ {'id': 1, 'column_one': 'a', 'column_two': 2}, {'id': 2, 'column_one': 'b', 'column_two': 4}, {'id': 3, 'column_one': 'c', 'column_two': 6}, ] dm.create('test_table', data) # Query the data from the database result = dm.get('test_table') print(result)
[{'id': 1, 'column_one': 'a', 'column_two': 2}, {'id': 2, 'column_one': 'b', 'column_two': 4}, {'id': 3, 'column_one': 'c', 'column_two': 6}]
get_columns¶
- DataManager.get_columns(name)[source]¶
Get number of columns for a dataset.
See
msdss_base_database.core.Database.columns()
.- Parameters
name (str) – Name of the dataset or table in the database.
- Returns
Number of columns for the dataset.
- Return type
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from msdss_base_database import Database from msdss_data_api.managers import * # Setup database db = Database() dm = DataManager(database=db) # Check if the table exists and drop if it does if db.has_table("test_table"): db.drop_table("test_table") # Create sample data data = [ {'id': 1, 'column_one': 'a', 'column_two': 2}, {'id': 2, 'column_one': 'b', 'column_two': 4}, {'id': 3, 'column_one': 'c', 'column_two': 6}, ] dm.create('test_table', data) # Get num of cols columns = dm.get_columns('test_table') print(f'columns: {columns}')
columns: 3
get_rows¶
- DataManager.get_rows(name)[source]¶
Get number of rows for a dataset.
See
msdss_base_database.core.Database.rows()
.- Parameters
name (str) – Name of the dataset or table in the database.
- Returns
Number of rows for the dataset.
- Return type
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from msdss_base_database import Database from msdss_data_api.managers import * # Setup database db = Database() dm = DataManager(database=db) # Check if the table exists and drop if it does if db.has_table("test_table"): db.drop_table("test_table") # Create sample data data = [ {'id': 1, 'column_one': 'a', 'column_two': 2}, {'id': 2, 'column_one': 'b', 'column_two': 4}, {'id': 3, 'column_one': 'c', 'column_two': 6}, ] dm.create('test_table', data) # Get num of rows rows = dm.get_rows('test_table') print(f'rows: {rows}')
rows: 3
insert¶
- DataManager.insert(name, data)[source]¶
Create a dataset.
See
msdss_base_database.core.Database.insert()
.- Parameters
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from msdss_base_database import Database from msdss_data_api.managers import * # Setup database db = Database() dm = DataManager(database=db) # Check if the table exists and drop if it does if db.has_table("test_table"): db.drop_table("test_table") # Create sample data data = [ {'id': 1, 'column_one': 'a', 'column_two': 2}, {'id': 2, 'column_one': 'b', 'column_two': 4}, {'id': 3, 'column_one': 'c', 'column_two': 6}, ] dm.create('test_table', data) # Insert more data more_data = [ {'id': 4, 'column_one': 'e', 'column_two': 8}, {'id': 5, 'column_one': 'f', 'column_two': 10}, {'id': 6, 'column_one': 'g', 'column_two': 12}, ] dm.insert('test_table', more_data)
update¶
- DataManager.update(name, data, where)[source]¶
Update data from the database.
See
msdss_base_database.core.Database.update()
.- Parameters
name (str) – Name of the table to update.
data (dict) – Dictionary representing values to update if they match the
where
parameter requirements. Each key is a column and the value is the updated new value.list of where statements the form of
column operator value
to further filter individual values or rows.Operators are one of
= != > >= > < <= LIKE NOTLIKE ILIKE NOTILIKE CONTAINS STARTSWITH ENDSWITH
Example:
'column_two < 3'
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from msdss_base_database import Database from msdss_data_api.managers import * # Setup database db = Database() dm = DataManager(database=db) # Check if the table exists and drop if it does if db.has_table("test_table"): db.drop_table("test_table") # Create sample data data = [ {'id': 1, 'column_one': 'a', 'column_two': 2}, {'id': 2, 'column_one': 'b', 'column_two': 4}, {'id': 3, 'column_one': 'c', 'column_two': 6}, ] dm.create('test_table', data) # Update the data from the database new_data = {'column_one': 'updated_at'} dm.update('test_table', new_data, where=['id > 1']) # See updated data result = dm.get('test_table') print(result)
[{'id': 1, 'column_one': 'a', 'column_two': 2}, {'id': 2, 'column_one': 'updated_at', 'column_two': 4}, {'id': 3, 'column_one': 'updated_at', 'column_two': 6}]
MetadataManager¶
- class msdss_data_api.managers.MetadataManager(data_manager=None, table='data', columns=[{'name': 'id', 'type_': 'Integer', 'primary_key': True}, {'name': 'dataset', 'type_': 'String', 'unique': True}, ('title', 'String'), ('description', 'String'), ('tags', 'String'), ('source', 'String'), ('created_by', 'String'), ('created_at', 'DateTime'), ('updated_at', 'DateTime')], name_column='dataset', updated_column='updated_at')[source]¶
Class to manage metadata in a database.
- Parameters
data_manager (
msdss_data_api.managers.DataManager
or None) – Data manager object for managing datasets in a database. IfNone
, a default manager will be created. The restricted tables for the handler will be set to[]
while the only permitted table will be the table name of the parametertable
.table (str) – The name of the table to store the metadata.
columns (list(dict) or list(list)) –
List of dict (kwargs) or lists (positional args) that are passed to sqlalchemy.schema.Column. See parameter
columns
inmsdss_base_database:msdss_base_database.core.create_table()
. This defines the table to store the metadata, where the default is:[{'name': 'id', 'primary_key': True, 'type_': 'Integer'}, {'name': 'dataset', 'type_': 'String', 'unique': True}, ('title', 'String'), ('description', 'String'), ('tags', 'String'), ('source', 'String'), ('created_by', 'String'), ('created_at', 'DateTime'), ('updated_at', 'DateTime')]
name_column (str) – Name of the column identifying each entry.
updated_column (str) – Name of the column for storing the last updated date/time.
- data_manager¶
Same as parameter
data_manager
.
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from datetime import datetime from msdss_base_database import Database from msdss_data_api.managers import * from msdss_data_api.defaults import * # Setup database db = Database() # Check if the metadata table exists and drop if it does if db.has_table(DEFAULT_METADATA_TABLE): db.drop_table(DEFAULT_METADATA_TABLE) # Setup metadata manager data_manager = DataManager(database=db) mdm = MetadataManager(data_manager) # Add metadata metadata = [{ 'title': 'Testing Data', 'description': 'Data used for testing', 'tags': 'test exp auto', 'source': 'Automatically generated from Python', 'created_by': 'msdss', 'created_at': datetime.now(), 'updated_at': datetime.now() }] mdm.create('test_data', metadata) # Get metadata metadata_get = mdm.get('test_data') # Search metadata search_results = mdm.search(where=['title = "Testing Data"']) # Update metadata mdm.update('test_data', {'description': 'NEW DESCRIPTION'}) # Delete metadata mdm.delete('test_data')
create¶
- MetadataManager.create(name, data)[source]¶
Create a metadata entry.
See
msdss_data_api.managers.DataManager.delete()
.- Parameters
name (str) – Name of the entry (e.g. dataset) to add metadata for.
Metadata to insert into the table, where each key represents a metadata descriptor. The default key names are:
[{'name': 'id', 'primary_key': True, 'type_': <class 'sqlalchemy.sql.sqltypes.Integer'>}, {'name': 'dataset', 'type_': <class 'sqlalchemy.sql.sqltypes.String'>, 'unique': True}, ('title', 'String'), ('description', 'String'), ('tags', 'String'), ('source', 'String'), ('created_by', 'String'), ('created_at', 'DateTime'), ('updated_at', 'DateTime')]
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from datetime import datetime from pprint import pprint from msdss_base_database import Database from msdss_data_api.managers import * from msdss_data_api.defaults import * # Setup database db = Database() # Check if the metadata table exists and drop if it does if db.has_table(DEFAULT_METADATA_TABLE): db.drop_table(DEFAULT_METADATA_TABLE) # Setup metadata manager data_manager = DataManager(database=db) mdm = MetadataManager(data_manager) # Add metadata metadata = [{ 'title': 'Testing Data', 'description': 'Data used for testing', 'tags': 'test exp auto', 'source': 'Automatically generated from Python', 'created_by': 'msdss', 'created_at': datetime.now(), 'updated_at': datetime.now() }] mdm.create('test_data', metadata) # Print results tb = mdm.search() pprint(tb)
[{'created_at': Timestamp('2021-12-23 22:34:46.653503'), 'created_by': 'msdss', 'dataset': 'test_data', 'description': 'Data used for testing', 'id': 1, 'source': 'Automatically generated from Python', 'tags': 'test exp auto', 'title': 'Testing Data', 'updated_at': Timestamp('2021-12-23 22:34:46.653508')}]
delete¶
- MetadataManager.delete(name)[source]¶
Delete a metadata entry.
See
msdss_data_api.managers.DataManager.delete()
.- Parameters
name (str) – Name of the entry (e.g. dataset) to add metadata for.
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from datetime import datetime from pprint import pprint from msdss_base_database import Database from msdss_data_api.managers import * from msdss_data_api.defaults import * # Setup database db = Database() # Check if the metadata table exists and drop if it does if db.has_table(DEFAULT_METADATA_TABLE): db.drop_table(DEFAULT_METADATA_TABLE) # Setup metadata manager data_manager = DataManager(database=db) mdm = MetadataManager(data_manager) # Add metadata metadata = [{ 'dataset': 'test_data', 'title': 'Testing Data', 'description': 'Data used for testing', 'tags': 'test exp auto', 'source': 'Automatically generated from Python', 'created_by': 'msdss', 'created_at': datetime.now(), 'updated_at': datetime.now() }] mdm.create('test_data', metadata) before_delete = mdm.search() # Delete metadata mdm.delete('test_data') after_delete = mdm.search() # Print results print('before_delete:\n') pprint(before_delete) print('\nafter_delete:\n') pprint(after_delete)
before_delete: [{'created_at': Timestamp('2021-12-23 22:34:46.717663'), 'created_by': 'msdss', 'dataset': 'test_data', 'description': 'Data used for testing', 'id': 1, 'source': 'Automatically generated from Python', 'tags': 'test exp auto', 'title': 'Testing Data', 'updated_at': Timestamp('2021-12-23 22:34:46.717670')}] after_delete: []
get¶
- MetadataManager.get(name)[source]¶
Search metadata entries.
See
msdss_data_api.managers.DataManager.get()
.- Parameters
name (str) – Name of the entry (e.g. dataset) to add metadata for.
- Returns
A list of dicts where each key is the column name.
- Return type
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from datetime import datetime from pprint import pprint from msdss_base_database import Database from msdss_data_api.managers import * from msdss_data_api.defaults import * # Setup database db = Database() # Check if the metadata table exists and drop if it does if db.has_table(DEFAULT_METADATA_TABLE): db.drop_table(DEFAULT_METADATA_TABLE) # Setup metadata manager data_manager = DataManager(database=db) mdm = MetadataManager(data_manager) # Add metadata metadata = [{ 'title': 'Testing Data', 'description': 'Data used for testing', 'tags': 'test exp auto', 'source': 'Automatically generated from Python', 'created_by': 'msdss', 'created_at': datetime.now(), 'updated_at': datetime.now() }] mdm.create('test_data', metadata) # Get metadata metadata_get = mdm.get('test_data') pprint(metadata_get)
[{'created_at': Timestamp('2021-12-23 22:34:46.790656'), 'created_by': 'msdss', 'dataset': 'test_data', 'description': 'Data used for testing', 'id': 1, 'source': 'Automatically generated from Python', 'tags': 'test exp auto', 'title': 'Testing Data', 'updated_at': Timestamp('2021-12-23 22:34:46.790662')}]
search¶
- MetadataManager.search(*args, **kwargs)[source]¶
Search metadata entries.
See
msdss_data_api.managers.DataManager.get()
.- Parameters
*args – Additional arguments passed to
msdss_data_api.managers.DataManager.get()
except for parametertable
.**kwargs – Additional arguments passed to
msdss_data_api.managers.DataManager.get()
except for parametertable
.
- Returns
A dict of lists where each key is the column name and each list contains the values for columns in the order of the rows of the table.
- Return type
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from datetime import datetime from pprint import pprint from msdss_base_database import Database from msdss_data_api.managers import * from msdss_data_api.defaults import * # Setup database db = Database() # Check if the metadata table exists and drop if it does if db.has_table(DEFAULT_METADATA_TABLE): db.drop_table(DEFAULT_METADATA_TABLE) # Setup metadata manager data_manager = DataManager(database=db) mdm = MetadataManager(data_manager) # Add metadata metadata = [{ 'title': 'Testing Data', 'description': 'Data used for testing', 'tags': 'test exp auto', 'source': 'Automatically generated from Python', 'created_by': 'msdss', 'created_at': datetime.now(), 'updated_at': datetime.now() }] mdm.create('test_data', metadata) # Search metadata results = mdm.search(where=['title = "Testing Data"']) pprint(results)
[{'created_at': Timestamp('2021-12-23 22:34:46.855564'), 'created_by': 'msdss', 'dataset': 'test_data', 'description': 'Data used for testing', 'id': 1, 'source': 'Automatically generated from Python', 'tags': 'test exp auto', 'title': 'Testing Data', 'updated_at': Timestamp('2021-12-23 22:34:46.855572')}]
update¶
- MetadataManager.update(name, data)[source]¶
Update metadata entry.
See
msdss_data_api.managers.DataManager.update()
.- Parameters
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from datetime import datetime from pprint import pprint from msdss_base_database import Database from msdss_data_api.managers import * from msdss_data_api.defaults import * # Setup database db = Database() # Check if the metadata table exists and drop if it does if db.has_table(DEFAULT_METADATA_TABLE): db.drop_table(DEFAULT_METADATA_TABLE) # Setup metadata manager data_manager = DataManager(database=db) mdm = MetadataManager(data_manager) # Add metadata metadata = [{ 'title': 'Testing Data', 'description': 'Data used for testing', 'tags': 'test exp auto', 'source': 'Automatically generated from Python', 'created_by': 'msdss', 'created_at': datetime.now(), 'updated_at': datetime.now() }] mdm.create('test_data', metadata) before_update = mdm.get('test_data') # Update metadata mdm.update('test_data', {'description': 'NEW DESCRIPTION'}) after_update = mdm.get('test_data') # Print results print('before_update:\n') pprint(before_update) print('\nafter_update:\n') pprint(after_update)
before_update: [{'created_at': Timestamp('2021-12-23 22:34:46.917063'), 'created_by': 'msdss', 'dataset': 'test_data', 'description': 'Data used for testing', 'id': 1, 'source': 'Automatically generated from Python', 'tags': 'test exp auto', 'title': 'Testing Data', 'updated_at': Timestamp('2021-12-23 22:34:46.917070')}] after_update: [{'created_at': Timestamp('2021-12-23 22:34:46.917063'), 'created_by': 'msdss', 'dataset': 'test_data', 'description': 'NEW DESCRIPTION', 'id': 1, 'source': 'Automatically generated from Python', 'tags': 'test exp auto', 'title': 'Testing Data', 'updated_at': Timestamp('2021-12-23 22:34:46.917070')}]
updated_at¶
- MetadataManager.updated_at(name, dt=None)[source]¶
Set last updated entry.
See
msdss_data_api.managers.MetadataManager.update()
.- Parameters
name (str) – Name of the entry (e.g. dataset) to add metadata for.
dt (
datetime.datetime
or None) – Datetime object representing the last updated time. IfNone
, will be set to now.
Author
Richard Wen <rrwen.dev@gmail.com>
Example
from datetime import datetime from pprint import pprint from msdss_base_database import Database from msdss_data_api.managers import * from msdss_data_api.defaults import * # Setup database db = Database() # Check if the metadata table exists and drop if it does if db.has_table(DEFAULT_METADATA_TABLE): db.drop_table(DEFAULT_METADATA_TABLE) # Setup metadata manager data_manager = DataManager(database=db) mdm = MetadataManager(data_manager) # Add metadata metadata = [{ 'title': 'Testing Data', 'description': 'Data used for testing', 'tags': 'test exp auto', 'source': 'Automatically generated from Python', 'created_by': 'msdss', 'created_at': datetime.now(), }] mdm.create('test_data', metadata) before_update = mdm.get('test_data') # Update metadata mdm.updated_at('test_data', datetime.now()) after_update = mdm.get('test_data') # Print results print('before_update:\n') pprint(before_update) print('\nafter_update:\n') pprint(after_update)
before_update: [{'created_at': Timestamp('2021-12-23 22:34:47.004171'), 'created_by': 'msdss', 'dataset': 'test_data', 'description': 'Data used for testing', 'id': 1, 'source': 'Automatically generated from Python', 'tags': 'test exp auto', 'title': 'Testing Data', 'updated_at': None}] after_update: [{'created_at': Timestamp('2021-12-23 22:34:47.004171'), 'created_by': 'msdss', 'dataset': 'test_data', 'description': 'Data used for testing', 'id': 1, 'source': 'Automatically generated from Python', 'tags': 'test exp auto', 'title': 'Testing Data', 'updated_at': Timestamp('2021-12-23 22:34:47.017886')}]