From c8a41a949e1cbb0f1f46a22856f9c80b2d2e6ce4 Mon Sep 17 00:00:00 2001 From: Tim O'Guin Date: Wed, 16 Jun 2021 13:35:57 -0500 Subject: [PATCH] feat(cli): Commands to read/write AWS Accounts in DynamoDB (#11) ### Added - Adds an `organizations write-accounts-to-dynamodb` CLI command - Adds an `organizations read-accounts-from-dynamodb` CLI command - Adds DynamoDB (de)serialization functions and requests helpers to utils ### Changed - Updates the Makefile to allow setting a custom PYTHONBREAKPOINT when debugging - Updates `OrganizationDataBuilder` to allow setting the client during init - Updates `OrganizationDataBuilder` to allow excluding account parent data lookups - Renames `ModelBase` serialization function prefixes from `as_` to `to_` - Updates `APIClient.api()` to only pascalize keys in kwargs, not the values. This fixes a bug that was causing items being inserted into DynamoDB to be pascalized. - Updates `APIClient()` and `APIClient.Connect()` to skip creating the client if it already exists --- CHANGELOG.md | 11 ++++ Makefile | 3 +- aws_data_tools/builders/organizations.py | 35 +++++++----- aws_data_tools/cli/__init__.py | 73 +++++++++++++++++++++--- aws_data_tools/client.py | 5 +- aws_data_tools/models/base.py | 19 ++++-- aws_data_tools/models/organizations.py | 26 +++++---- aws_data_tools/utils.py | 39 ++++++++++++- 8 files changed, 170 insertions(+), 41 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e87ed0..571a6ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,10 +20,21 @@ No unreleased changes. - Adds a `field` argument to `ModelBase.as_dict()` to dump a single field in a model - Adds configurations for tox and other testing tools - Adds a quickstart to the top of the README +- Adds an `organizations write-accounts-to-dynamodb` CLI command +- Adds an `organizations read-accounts-from-dynamodb` CLI command +- Adds DynamoDB (de)serialization functions and requests helpers to utils ### Changed - Refactors `OrganizationDataBuilder` to allow more control over pulling data +- Updates the Makefile to allow setting a custom PYTHONBREAKPOINT when debugging +- Updates `OrganizationDataBuilder` to allow setting the client during init +- Updates `OrganizationDataBuilder` to allow excluding account parent data lookups +- Renames `ModelBase` serialization function prefixes from `as_` to `to_` +- Updates `APIClient.api()` to only pascalize keys in kwargs, not the values. This + fixes a bug that was causing items being inserted into DynamoDB to be pascalized. +- Updates `APIClient()` and `APIClient.Connect()` to skip creating the client if it + already exists ## [0.1.0-beta1] - 2020-06-09 diff --git a/Makefile b/Makefile index 3e4b5a3..f8df5e2 100644 --- a/Makefile +++ b/Makefile @@ -4,9 +4,10 @@ PY_INSTALL_ARGS ?=--extras="cli devtools docs" VENV_DIR ?=.venv CMD ?=/bin/bash DEBUG ?=false +PYTHONBREAKPOINT ?=ipdb.set_trace ifeq (${DEBUG},true) - export PYTHONBREAKPOINT=ipdb.set_trace + export PYTHONBREAKPOINT else export PYTHONBREAKPOINT=0 endif diff --git a/aws_data_tools/builders/organizations.py b/aws_data_tools/builders/organizations.py index 32b9e1c..b6aabf5 100644 --- a/aws_data_tools/builders/organizations.py +++ b/aws_data_tools/builders/organizations.py @@ -48,7 +48,7 @@ class OrganizationDataBuilder(ModelBase): Provides serialization to dicts and JSON. """ - client: APIClient = field(default=None, init=False, repr=False) + client: APIClient = field(default=None, repr=False) dm: Organization = field(default_factory=Organization) # Used by __post_init__() to determine what data to initialize (default is none) @@ -65,9 +65,12 @@ class OrganizationDataBuilder(ModelBase): init_policy_targets: InitVar[bool] = field(default=False) init_effective_policies: InitVar[bool] = field(default=False) + include_account_parents: bool = field(default=False) + def Connect(self): """Initialize an authenticated session""" - self.client = APIClient(_SERVICE_NAME) + if self.client is None: + self.client = APIClient(_SERVICE_NAME) def api(self, func: str, **kwargs) -> Union[List[Dict[str, Any]], Dict[str, Any]]: """Make arbitrary API calls with the session client""" @@ -257,7 +260,7 @@ def __e_ous_recurse( if parents is None: if self.dm.root is None: self.fetch_root() - parents = [self.dm.root.as_parchild()] + parents = [self.dm.root.to_parchild()] if self.dm._parent_child_tree is None: self.dm._parent_child_tree = {} if self.dm._child_parent_tree is None: @@ -277,15 +280,15 @@ def __e_ous_recurse( ) for ou_result in ou_results: ou = OrganizationalUnit(parent=parent, **ou_result) - ou_as_parchild = ou.as_parchild() - self.dm._parent_child_tree[parent.id].append(ou_as_parchild) + ou_to_parchild = ou.to_parchild() + self.dm._parent_child_tree[parent.id].append(ou_to_parchild) self.dm._child_parent_tree[ou.id] = parent ous.append(ou) - next_parents.append(ou_as_parchild) + next_parents.append(ou_to_parchild) acct_results = self.api("list_accounts_for_parent", parent_id=parent.id) for acct_result in acct_results: account = Account(parent=parent, **acct_result) - self.dm._parent_child_tree[parent.id].append(account.as_parchild()) + self.dm._parent_child_tree[parent.id].append(account.to_parchild()) self.dm._child_parent_tree[account.id] = parent return self.__e_ous_recurse(parents=next_parents, ous=ous, depth=depth + 1) @@ -330,7 +333,7 @@ def __l_accounts(self, include_parents: bool = False) -> None: accounts = [] for result in data: account = result - if include_parents: + if include_parents or self.include_account_parents: if self.dm._child_parent_tree is None: self.fetch_ous() account.parent = self.dm._child_parent_tree[account.id] @@ -461,17 +464,21 @@ def fetch_all_tags(self) -> None: self.fetch_ou_tags() self.fetch_account_tags() - def as_dict(self, **kwargs) -> Dict[str, Any]: + def to_dict(self, **kwargs) -> Dict[str, Any]: """Return the data model for the organization as a dictionary""" - return self.dm.as_dict(**kwargs) + return self.dm.to_dict(**kwargs) + + def to_dynamodb(self, **kwargs) -> Dict[str, Any]: + """Return the data model for the organization as a DynamoDB Item""" + return self.dm.to_dynamodb(**kwargs) - def as_json(self, **kwargs) -> str: + def to_json(self, **kwargs) -> str: """Return the data model for the organization as a JSON string""" - return self.dm.as_json(**kwargs) + return self.dm.to_json(**kwargs) - def as_yaml(self, **kwargs) -> str: + def to_yaml(self, **kwargs) -> str: """Return the data model for the organization as a YAML string""" - return self.dm.as_yaml(**kwargs) + return self.dm.to_yaml(**kwargs) def fetch_all(self) -> None: """Initialize all data for nodes and edges in the organization""" diff --git a/aws_data_tools/cli/__init__.py b/aws_data_tools/cli/__init__.py index 10f7b4f..5652bce 100644 --- a/aws_data_tools/cli/__init__.py +++ b/aws_data_tools/cli/__init__.py @@ -2,10 +2,12 @@ CLI interface for working with data from AWS APIs """ +from itertools import zip_longest from json import dumps as json_dumps +from json import load as json_load from re import fullmatch from traceback import format_exc -from typing import Any, Dict, List, Union +from typing import Any, Dict, List from botocore.exceptions import ClientError, NoCredentialsError @@ -21,7 +23,14 @@ ) from .. import get_version +from ..client import APIClient from ..builders.organizations import OrganizationDataBuilder +from ..models.organizations import Account + +from ..utils import ( + deserialize_dynamodb_items, + prepare_dynamodb_batch_put_request, +) CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]} @@ -54,7 +63,7 @@ def handle_error(ctx, err_msg, tb=None): ctx.exit(1) -@organization.command() +@organization.command(short_help="Dump org data as JSON") @option( "--no-accounts", default=False, @@ -83,7 +92,7 @@ def dump_json( no_policies: bool, format_: str, out_file: str, -) -> Union[str, None]: +) -> None: """Dump a JSON representation of the organization""" err_msg = None tb = None @@ -100,11 +109,11 @@ def dump_json( kwargs["init_policies"] = False kwargs["init_policy_tags"] = False kwargs["init_policy_targets"] = False - odb = OrganizationDataBuilder(**kwargs) + odb = OrganizationDataBuilder(include_account_parents=True, **kwargs) if format_ == "JSON": - s_func = odb.as_json + s_func = odb.to_json elif format_ == "YAML": - s_func = odb.as_yaml + s_func = odb.to_yaml if out_file is None: out_file = "-" with open_file(out_file, mode="wb") as f: @@ -153,7 +162,7 @@ def lookup_accounts( include_effective_policies: bool, include_policies: bool, include_tags: bool, -) -> str: +) -> None: """Query for account details using a list of account IDs""" accounts_unvalidated = [] if " " in accounts: @@ -195,8 +204,56 @@ def lookup_accounts( odb.fetch_account_tags(account_ids=account_ids) data = [ - {k: v for k, v in acct.as_dict().items() if k not in exclude_keys} + {k: v for k, v in acct.to_dict().items() if k not in exclude_keys} for acct in odb.dm.accounts if acct.id in account_ids ] echo(json_dumps(data, default=str)) + + +@organization.command() +@option("--table", "-t", required=True, help="Name of the DynamoDB table") +@option( + "--in-file", "-i", required=True, help="File containing a list of Account objects" +) +@pass_context +def write_accounts_to_dynamodb( + ctx: Dict[str, Any], + table: str, + in_file: str, +) -> None: + """Write a list of accounts to a DynamoDB table""" + data = None + with open_file(in_file, mode="r") as f: + data = json_load(f) + odb = OrganizationDataBuilder() + if not isinstance(data, list): + handle_error(err_msg="Data is not a list") + odb.dm.accounts = [Account(**account) for account in data] + accounts = odb.to_dynamodb(field_name="accounts") + client = APIClient("dynamodb") + ret = {"responses": []} + # Group into batches of 25 since that's the max for BatchWriteItem + for group_ in zip_longest(*[iter(accounts)] * 25): + items = prepare_dynamodb_batch_put_request(table=table, items=group_) + res = client.api("batch_write_item", request_items=items) + # TODO: Add handling of any "UnprocessedItems" in the response. Add retry with + # exponential backoff. + ret["responses"].append(res) + echo(json_dumps(ret)) + + +@organization.command() +@option("--table", "-t", required=True, help="Name of the DynamoDB table") +@pass_context +def read_accounts_from_dynamodb( + ctx: Dict[str, Any], + table: str, +) -> None: + """Fetch a list of accounts from a DynamoDB table""" + client = APIClient("dynamodb") + res = client.api("scan", table_name=table) + accounts = [Account(**account) for account in deserialize_dynamodb_items(res)] + odb = OrganizationDataBuilder() + odb.dm.accounts = accounts + echo(odb.to_json(field_name="accounts")) diff --git a/aws_data_tools/client.py b/aws_data_tools/client.py index fb1481b..12e5098 100644 --- a/aws_data_tools/client.py +++ b/aws_data_tools/client.py @@ -33,7 +33,7 @@ def api(self, func: str, **kwargs) -> Union[Dict[str, Any], List[Dict[str, Any]] If the API action is one that supports pagination, it is handled automaticaly. All paginated responses are fully aggregated and then returned. """ - kwargs = pascalize(kwargs) + kwargs = {pascalize(key): value for key, value in kwargs.items()} paginate = self.client.can_paginate(func) if paginate: paginator = self.client.get_paginator(func) @@ -52,4 +52,5 @@ def api(self, func: str, **kwargs) -> Union[Dict[str, Any], List[Dict[str, Any]] return depascalize(response) def __post_init__(self): - self.client = self.session.client(self.service) + if self.client is None: + self.client = self.session.client(self.service) diff --git a/aws_data_tools/models/base.py b/aws_data_tools/models/base.py index 45060fb..bce8408 100644 --- a/aws_data_tools/models/base.py +++ b/aws_data_tools/models/base.py @@ -8,12 +8,14 @@ from yaml import dump as yaml_dump +from ..utils import serialize_dynamodb_item, serialize_dynamodb_items + @dataclass class ModelBase: """Base class for all models with helpers for serialization""" - def as_dict( + def to_dict( self, field_name: str = None ) -> Union[Dict[str, Any], List[Dict[str, Any]]]: """ @@ -30,10 +32,17 @@ def as_dict( raise Exception(f"Field {field_name} does not exist") return data - def as_json(self, **kwargs) -> str: + def to_dynamodb(self, **kwargs) -> Union[Dict[str, Any], List[Dict[str, Any]]]: + """Serialize the dataclass or field to a DynamoDB Item or list of Items""" + data = self.to_dict(**kwargs) + if isinstance(data, list): + return serialize_dynamodb_items(items=data) + return serialize_dynamodb_item(item=data) + + def to_json(self, **kwargs) -> str: """Serialize the dataclass instance to JSON""" - return json_dumps(self.as_dict(**kwargs), default=str) + return json_dumps(self.to_dict(**kwargs), default=str) - def as_yaml(self, **kwargs) -> str: + def to_yaml(self, **kwargs) -> str: """Serialize the dataclass instance to YAML""" - return yaml_dump(self.as_dict(**kwargs)) + return yaml_dump(self.to_dict(**kwargs)) diff --git a/aws_data_tools/models/organizations.py b/aws_data_tools/models/organizations.py index ffefc9b..4517e6c 100644 --- a/aws_data_tools/models/organizations.py +++ b/aws_data_tools/models/organizations.py @@ -78,7 +78,7 @@ class Policy(ModelBase): tags: Dict[str, str] = field(default=None) targets: List[PolicyTargetSummary] = field(default=None) - def as_target(self): + def to_target(self): """Return the Policy as a PolicySummaryForTarget object""" return PolicySummaryForTarget( id=self.policy_summary.id, type=self.policy_summary.type @@ -98,13 +98,13 @@ class Root(ModelBase): children: List[ParChild] = field(default=None) policies: List[PolicySummaryForTarget] = field(default=None) - def as_parchild_dict(self) -> Dict[str, str]: + def to_parchild_dict(self) -> Dict[str, str]: """Return the root as a ParChild (parent) dict""" return {"id": self.id, "type": "ROOT"} - def as_parchild(self) -> ParChild: + def to_parchild(self) -> ParChild: """Return the root as a ParChild (parent) object""" - return ParChild(**self.as_parchild_dict()) + return ParChild(**self.to_parchild_dict()) @dataclass @@ -121,13 +121,13 @@ class OrganizationalUnit(ModelBase): policies: List[PolicySummaryForTarget] = field(default=None) tags: Dict[str, str] = field(default=None) - def as_parchild_dict(self) -> Dict[str, str]: + def to_parchild_dict(self) -> Dict[str, str]: """Return the OU as a ParChild (parent) dict""" return {"id": self.id, "type": "ORGANIZATIONAL_UNIT"} - def as_parchild(self) -> ParChild: + def to_parchild(self) -> ParChild: """Return the OU as a ParChild (parent) object""" - return ParChild(**self.as_parchild_dict()) + return ParChild(**self.to_parchild_dict()) @dataclass @@ -148,13 +148,13 @@ class Account(ModelBase): policies: List[PolicySummaryForTarget] = field(default=None) tags: Dict[str, str] = field(default=None) - def as_parchild_dict(self) -> Dict[str, str]: + def to_parchild_dict(self) -> Dict[str, str]: """Return the account as a ParChild (parent) dict""" return {"id": self.id, "type": "ACCOUNT"} - def as_parchild(self) -> ParChild: + def to_parchild(self) -> ParChild: """Return the account as a ParChild (parent) object""" - return ParChild(**self.as_parchild_dict()) + return ParChild(**self.to_parchild_dict()) @dataclass @@ -172,6 +172,12 @@ class Organization(ModelBase): master_account_id: str = field(default=None) # Optional properties generally populated after initialization + + # TODO: These collections should be converted to container data classes to be able + # to better able to handle operations against specific fields. Currently, + # serializing/deserializing these collections indepently requires passing the + # "field_name" kwarg to the `to_dict()` function from ModelBase. It's already + # getting hacky. accounts: List[Account] = field(default=None) organizational_units: List[OrganizationalUnit] = field(default=None) policies: List[Policy] = field(default=None) diff --git a/aws_data_tools/utils.py b/aws_data_tools/utils.py index 8e7373d..a7a3772 100644 --- a/aws_data_tools/utils.py +++ b/aws_data_tools/utils.py @@ -1,4 +1,9 @@ -from typing import Dict, List +""" +Utilities for common operations that happen across different services +""" +from typing import Any, Dict, List + +from boto3.dynamodb.types import TypeDeserializer, TypeSerializer from .client import APIClient @@ -14,3 +19,35 @@ def query_tags(client: APIClient, resource_id: str) -> Dict[str, str]: if len(tags) == 0: return {} return tag_list_to_dict(tags) + + +def serialize_dynamodb_item(item: Dict[str, Any]) -> Dict[str, Any]: + """Convert a dict to a DynamoDB Item""" + serializer = TypeSerializer() + return {key: serializer.serialize(value) for key, value in item.items()} + + +def serialize_dynamodb_items(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Convert a list of dicts to a list of DynamoDB Items""" + return [serialize_dynamodb_item(item) for item in items] + + +def deserialize_dynamodb_item(item: Dict[str, Any]) -> Dict[str, Any]: + """Convert a DynamoDB Item to a dict""" + deserializer = TypeDeserializer() + return {key: deserializer.deserialize(value) for key, value in item.items()} + + +def deserialize_dynamodb_items(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Convert a list of DynamoDB Items to a list of dicts""" + return [deserialize_dynamodb_item(item) for item in items] + + +def prepare_dynamodb_batch_put_request( + table: str, + items: List[Dict[str, Any]], +) -> Dict[str, List[Dict[str, Any]]]: + """Prepare PutRequest input for a DynamoDB BatchWriteItem request""" + return { + table: [{"PutRequest": {"Item": item}} for item in items if item is not None] + }