import argparse
import ast
import os
import pandas
from datetime import datetime
from msdss_base_database import Database
from msdss_base_database.env import DatabaseDotEnv
from msdss_base_database.defaults import DEFAULT_SUPPORTED_OPERATORS
from .core import DataAPI
from .defaults import *
from .managers import *
try:
from msdss_users_api import UsersAPI
except ImportError:
pass
[docs]def _get_parser():
"""
Builds an ``argparse`` parser for the ``msdss-data`` command line tool.
Returns
-------
:class:`argparse.ArgumentParser`
An ``argparse`` parser for ``msdss-data``.
Author
------
Richard Wen <rrwen.dev@gmail.com>
Example
-------
.. jupyter-execute::
:hide-output:
from msdss_data_api.cli import _get_parser
parser = _get_parser()
parser.print_help()
"""
# (_get_parser_parsers) Create main parser and sub parsers
parser = argparse.ArgumentParser(description='Manages data with a database')
subparsers = parser.add_subparsers(title='commands', dest='command')
# (_get_parser_create) Add create command
create_parser = subparsers.add_parser('create', help='create a dataset')
create_parser.add_argument('dataset', type=str, help='dataset name')
create_parser.add_argument('data', type=str, help='file path for data to create - supports .csv, .xlsx, .json, .xml')
create_parser.add_argument('--metadata', metavar=('COL', 'VALUE'), nargs=2, action='append', help='metadata columns to set - e.g. title "Some Title", description "Some descr.."')
# (_get_parser_delete) Add delete command
delete_parser = subparsers.add_parser('delete', help='delete a dataset')
delete_parser.add_argument('dataset', type=str, help='dataset name')
delete_parser.add_argument('--where', action='append', help='where statement for filtering rows to delete in the form of "col operator value" (e.g. "col_a = 1") - operators supported are' + ', '.join(DEFAULT_SUPPORTED_OPERATORS))
delete_parser.add_argument('--where_boolean', type=str, default='AND', help='bool to join multiple where statements')
delete_parser.add_argument('--delete_all', dest='delete_all', action='store_true', help='flag to delete the entire dataset')
delete_parser.set_defaults(delete_all=False)
# (_get_parser_get) Add get command
get_parser = subparsers.add_parser('get', help='query a dataset')
get_parser.add_argument('dataset', type=str, help='dataset name')
get_parser.add_argument('--select', action='append', help='column names to select')
get_parser.add_argument('--where', action='append', help='where statement for filtering rows to get in the form of "col operator value" (e.g. "col_a = 1") - operators supported are' + ', '.join(DEFAULT_SUPPORTED_OPERATORS))
get_parser.add_argument('--group_by', action='append', help='column names to group by')
get_parser.add_argument('--aggregate', action='append', help='column names to aggregate')
get_parser.add_argument('--aggregate_func', action='append', help='aggregate func name (e.g. sum, count) in the order of --aggregate')
get_parser.add_argument('--order_by', action='append', help='column names to sort by')
get_parser.add_argument('--order_by_sort', action='append', help='asc or desc for each col in --order_by')
get_parser.add_argument('--limit', type=int, default=None, help='')
get_parser.add_argument('--where_boolean', type=str, default='AND', help='bool to join multiple where statements')
# (_get_parser_insert) Add insert command
insert_parser = subparsers.add_parser('insert', help='insert data into a dataset')
insert_parser.add_argument('dataset', type=str, help='dataset name')
insert_parser.add_argument('data', type=str, help='data to insert - e.g. [ {"col_a": 1, "col_b": "a"}, {"col_a": 2, "col_b": "b"} ]')
# (_get_parser_update) Add update command
update_parser = subparsers.add_parser('update', help='update data in a dataset')
update_parser.add_argument('--data', required=True, metavar=('COL', 'VALUE'), nargs=2, action='append', help='column names and values to update with')
update_parser.add_argument('--where', action='append', help='where statement for filtering rows to update in the form of "col operator value" (e.g. "col_a = 1") - operators supported are' + ', '.join(DEFAULT_SUPPORTED_OPERATORS))
# (_get_parser_get) Add get command
search_parser = subparsers.add_parser('search', help='search datasets')
search_parser.add_argument('--select', action='append', help='column names to select')
search_parser.add_argument('--where', action='append', help='where statement for filtering rows to get in the form of "col operator value" (e.g. "col_a = 1") - operators supported are' + ', '.join(DEFAULT_SUPPORTED_OPERATORS))
search_parser.add_argument('--group_by', action='append', help='column names to group by')
search_parser.add_argument('--aggregate', action='append', help='column names to aggregate')
search_parser.add_argument('--aggregate_func', action='append', help='aggregate func name (e.g. sum, count) in the order of --aggregate')
search_parser.add_argument('--order_by', action='append', help='column names to sort by')
search_parser.add_argument('--order_by_sort', action='append', help='asc or desc for each col in --order_by')
search_parser.add_argument('--limit', type=int, default=None, help='')
search_parser.add_argument('--where_boolean', type=str, default='AND', help='bool to join multiple where statements')
# (_get_parser_start) Add start command
start_parser = subparsers.add_parser('start', help='start a data api server')
start_parser.add_argument('--host', type=str, default='127.0.0.1', help='address to host server')
start_parser.add_argument('--port', type=int, default=8000, help='port to host server')
start_parser.add_argument('--log_level', type=str, default='info', help='level of verbose messages to display')
start_parser.add_argument('--set', metavar=('ROUTE', 'KEY', 'VALUE'), nargs=3, action='append', help='set route settings, where ROUTE is the route name (create, delete, metadata, etc), KEY is the setting name (e.g. path, _enable, etc), and VALUE is value for the setting')
start_parser.add_argument('--enable_users', dest='enable_users', action='store_true', help='enable user auth for routes')
start_parser.set_defaults(enable_users=False)
# (_get_parser_file_key) Add file and key arguments to all commands
for p in [parser, create_parser, delete_parser, get_parser, insert_parser, update_parser, search_parser, start_parser]:
p.add_argument('--env_file', type=str, default='./.env', help='path of .env file')
p.add_argument('--key_path', type=str, default=None, help='path of key file')
# (_get_parser_out) Return the parser
out = parser
return out
[docs]def _parse_route_settings(cli_route_settings):
"""
Parses ``--set`` route settings for the ``msdss-data`` command line tool.
Parameters
----------
cli_route_settings : list(tuple)
List of tuples of length 3, representing the route, key setting and value setting in order.
Returns
-------
dict
A dictionary that can be passed to parameter ``route_settings`` in :func:`msdss_data_api.routers.get_data_router`
Author
------
Richard Wen <rrwen.dev@gmail.com>
Example
-------
.. jupyter-execute::
:hide-output:
from msdss_data_api.cli import _parse_route_settings
from pprint import pprint
cli_route_settings = [
('create', '_restricted_tables', '["user", "data"]'),
('create', '_enable', 'True'),
('create', '_get_user', '{"superuser": True}'),
('delete', 'path', '/{dataset}'),
('delete', 'tags', '["data"]')
]
route_settings = _parse_route_settings(cli_route_settings)
pprint(route_settings)
"""
out = {route: {} for route, k, v in cli_route_settings}
for route, key, value in cli_route_settings:
if key in ('tags', '_restricted_tables', '_get_user', '_enable'):
out[route][key] = ast.literal_eval(value)
else:
out[route][key] = value
return out
[docs]def _read_data_file(data_path):
"""
Reads a data file into a :class:`pandas:pandas.DataFrame` object.
Parameters
----------
data_path : str
Path of the data file with extension. Supports ``.csv``, ``.xlsx``/``.xls``, ``.json``, and ``.xml``.
Returns
-------
:class:`pandas:pandas.DataFrame`
A dataframe object of the file data.
Author
------
Richard Wen <rrwen.dev@gmail.com>
Example
-------
.. code::
from msdss_data_api.cli import _read_data_file
data = _read_data_file('path/to/data.json')
"""
data_ext = os.path.splitext(data_path)[1].lower()
if data_ext == '.csv':
out = pandas.read_csv(data_path)
elif data_ext in ('.xlsx', '.xls'):
out = pandas.read_excel(data_path)
elif data_ext == '.json':
out = pandas.read_json(data_path)
elif data_ext == '.xml':
out = pandas.read_xml(data_path)
return out
[docs]def run():
"""
Runs the ``msdss-data`` command.
Author
------
Richard Wen <rrwen.dev@gmail.com>
Example
-------
>>> msdss-data --help
.. jupyter-execute::
:hide-code:
from msdss_data_api.cli import _get_parser
parser = _get_parser()
parser.print_help()
Create a dataset:
>>> msdss-data create <data_name> <path/to/data.json> --metadata title "Test Data"
Query dataset:
>>> msdss-data get <data_name>
Update dataset values:
>>> msdss-data update <data_name> --data <col_name> <update_value> --where <col_name> = <value>
Insert data into dataset:
>>> msdss-data insert <data_name> "[{'col_a': 1, 'col_b': 'a'}]"
Delete a dataset:
>>> msdss-data delete <data_name> --delete_all
Search datasets:
>>> msdss-data search --where "dataset = <data_name>"
Start an API server:
>>> msdss-data start
"""
# (run_kwargs) Get arguments and command
parser = _get_parser()
kwargs = vars(parser.parse_args())
command = kwargs.pop('command')
# (run_env) Get env vars
env_kwargs = dict(
env_file=kwargs.pop('env_file'),
key_path=kwargs.pop('key_path')
)
# (run_database) Create database obj
database = Database(env=DatabaseDotEnv(**env_kwargs))
# (run_managers) Create data and metadata managers
data_manager = DataManager(database=database)
metadata_manager = MetadataManager(data_manager=data_manager)
# (run_command) Run commands
if command == 'create':
# (run_command_create_metadata) Get metadata attrs
metadata = kwargs.pop('metadata', None)
metadata = dict(metadata) if metadata else {}
metadata['created_by'] = 'admin'
metadata['created_at'] = datetime.now()
metadata['updated_at'] = datetime.now()
# (run_command_create_read) Read data from file
kwargs['data'] = _read_data_file(kwargs.pop('data'))
# (run_command_create_run) Create the dataset
try:
dataset = kwargs.get('dataset')
data_manager.create(**kwargs)
metadata_manager.create(dataset, metadata)
print('Dataset created ' + dataset)
except HTTPException as e:
print(e.detail)
elif command == 'delete':
# (run_command_delete) Execute user delete
try:
dataset = kwargs.get('dataset')
data_manager.delete(**kwargs)
metadata_manager.delete(dataset)
print('Dataset deleted ' + dataset)
except HTTPException as e:
print(e.detail)
elif command == 'get':
# (run_command_get) Query a dataset
try:
dataset = kwargs.get('dataset')
data = data_manager.get(**kwargs)
print(pandas.DataFrame(data))
except HTTPException as e:
print(e.detail)
elif command == 'insert':
# (run_command_insert) Insert data into a dataset
try:
kwargs['data'] = ast.literal_eval(kwargs['data'])
dataset = kwargs.get('dataset')
data_manager.insert(**kwargs)
metadata_manager.updated_at(dataset)
print('Dataset insert ' + dataset)
except HTTPException as e:
print(e.detail)
elif command == 'update':
# (run_command_update_data) Get data attrs
kwargs['data'] = dict(kwargs['data'])
# (run_command_update_run) Update dataset data
try:
dataset = kwargs.get('dataset')
data_manager.update(**kwargs)
metadata_manager.updated_at(dataset)
print('Dataset update ' + dataset)
except HTTPException as e:
print(e.detail)
elif command == 'search':
# (run_command_search_run) Search datasets
try:
data = metadata_manager.search(**kwargs)
print(pandas.DataFrame(data))
except HTTPException as e:
print(e.detail)
elif command == 'start':
# (run_command_start_server) Extract server kwargs
start_kwargs = dict(
host=kwargs.pop('host'),
port=kwargs.pop('port'),
log_level=kwargs.pop('log_level')
)
# (run_command_start_env) Configure env database and users api
kwargs['database'] = database
enable_users = kwargs.pop('enable_users')
if enable_users:
from msdss_users_api import UsersAPI
from msdss_users_api.env import UsersDotEnv
kwargs['users_api'] = UsersAPI(env=UsersDotEnv(**env_kwargs))
else:
kwargs['users_api'] = None
# (run_command_start_settings) Convert route settings
cli_route_settings = kwargs.pop('set', None)
kwargs['data_router_settings'] = dict(
route_settings=_parse_route_settings(cli_route_settings) if cli_route_settings else {}
)
# (run_command_start) Execute users api server
app = DataAPI(**kwargs)
app.start(**start_kwargs)