Skip to content

Commit

Permalink
feat(cli): Commands to read/write AWS Accounts in DynamoDB (#11)
Browse files Browse the repository at this point in the history
### Added

- Adds an `organizations write-accounts-to-dynamodb` CLI command
- Adds an `organizations read-accounts-from-dynamodb` CLI command
- Adds DynamoDB (de)serialization functions and requests helpers to utils

### Changed

- Updates the Makefile to allow setting a custom PYTHONBREAKPOINT when debugging
- Updates `OrganizationDataBuilder` to allow setting the client during init
- Updates `OrganizationDataBuilder` to allow excluding account parent data lookups
- Renames `ModelBase` serialization function prefixes from `as_` to `to_`
- Updates `APIClient.api()` to only pascalize keys in kwargs, not the values. This
  fixes a bug that was causing items being inserted into DynamoDB to be pascalized.
- Updates `APIClient()` and `APIClient.Connect()` to skip creating the client if it
  already exists
  • Loading branch information
timoguin authored Jun 16, 2021
1 parent c43072a commit c8a41a9
Show file tree
Hide file tree
Showing 8 changed files with 170 additions and 41 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,21 @@ No unreleased changes.
- Adds a `field` argument to `ModelBase.as_dict()` to dump a single field in a model
- Adds configurations for tox and other testing tools
- Adds a quickstart to the top of the README
- Adds an `organizations write-accounts-to-dynamodb` CLI command
- Adds an `organizations read-accounts-from-dynamodb` CLI command
- Adds DynamoDB (de)serialization functions and requests helpers to utils

### Changed

- Refactors `OrganizationDataBuilder` to allow more control over pulling data
- Updates the Makefile to allow setting a custom PYTHONBREAKPOINT when debugging
- Updates `OrganizationDataBuilder` to allow setting the client during init
- Updates `OrganizationDataBuilder` to allow excluding account parent data lookups
- Renames `ModelBase` serialization function prefixes from `as_` to `to_`
- Updates `APIClient.api()` to only pascalize keys in kwargs, not the values. This
fixes a bug that was causing items being inserted into DynamoDB to be pascalized.
- Updates `APIClient()` and `APIClient.Connect()` to skip creating the client if it
already exists

## [0.1.0-beta1] - 2020-06-09

Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ PY_INSTALL_ARGS ?=--extras="cli devtools docs"
VENV_DIR ?=.venv
CMD ?=/bin/bash
DEBUG ?=false
PYTHONBREAKPOINT ?=ipdb.set_trace

ifeq (${DEBUG},true)
export PYTHONBREAKPOINT=ipdb.set_trace
export PYTHONBREAKPOINT
else
export PYTHONBREAKPOINT=0
endif
Expand Down
35 changes: 21 additions & 14 deletions aws_data_tools/builders/organizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class OrganizationDataBuilder(ModelBase):
Provides serialization to dicts and JSON.
"""

client: APIClient = field(default=None, init=False, repr=False)
client: APIClient = field(default=None, repr=False)
dm: Organization = field(default_factory=Organization)

# Used by __post_init__() to determine what data to initialize (default is none)
Expand All @@ -65,9 +65,12 @@ class OrganizationDataBuilder(ModelBase):
init_policy_targets: InitVar[bool] = field(default=False)
init_effective_policies: InitVar[bool] = field(default=False)

include_account_parents: bool = field(default=False)

def Connect(self):
"""Initialize an authenticated session"""
self.client = APIClient(_SERVICE_NAME)
if self.client is None:
self.client = APIClient(_SERVICE_NAME)

def api(self, func: str, **kwargs) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
"""Make arbitrary API calls with the session client"""
Expand Down Expand Up @@ -257,7 +260,7 @@ def __e_ous_recurse(
if parents is None:
if self.dm.root is None:
self.fetch_root()
parents = [self.dm.root.as_parchild()]
parents = [self.dm.root.to_parchild()]
if self.dm._parent_child_tree is None:
self.dm._parent_child_tree = {}
if self.dm._child_parent_tree is None:
Expand All @@ -277,15 +280,15 @@ def __e_ous_recurse(
)
for ou_result in ou_results:
ou = OrganizationalUnit(parent=parent, **ou_result)
ou_as_parchild = ou.as_parchild()
self.dm._parent_child_tree[parent.id].append(ou_as_parchild)
ou_to_parchild = ou.to_parchild()
self.dm._parent_child_tree[parent.id].append(ou_to_parchild)
self.dm._child_parent_tree[ou.id] = parent
ous.append(ou)
next_parents.append(ou_as_parchild)
next_parents.append(ou_to_parchild)
acct_results = self.api("list_accounts_for_parent", parent_id=parent.id)
for acct_result in acct_results:
account = Account(parent=parent, **acct_result)
self.dm._parent_child_tree[parent.id].append(account.as_parchild())
self.dm._parent_child_tree[parent.id].append(account.to_parchild())
self.dm._child_parent_tree[account.id] = parent
return self.__e_ous_recurse(parents=next_parents, ous=ous, depth=depth + 1)

Expand Down Expand Up @@ -330,7 +333,7 @@ def __l_accounts(self, include_parents: bool = False) -> None:
accounts = []
for result in data:
account = result
if include_parents:
if include_parents or self.include_account_parents:
if self.dm._child_parent_tree is None:
self.fetch_ous()
account.parent = self.dm._child_parent_tree[account.id]
Expand Down Expand Up @@ -461,17 +464,21 @@ def fetch_all_tags(self) -> None:
self.fetch_ou_tags()
self.fetch_account_tags()

def as_dict(self, **kwargs) -> Dict[str, Any]:
def to_dict(self, **kwargs) -> Dict[str, Any]:
"""Return the data model for the organization as a dictionary"""
return self.dm.as_dict(**kwargs)
return self.dm.to_dict(**kwargs)

def to_dynamodb(self, **kwargs) -> Dict[str, Any]:
"""Return the data model for the organization as a DynamoDB Item"""
return self.dm.to_dynamodb(**kwargs)

def as_json(self, **kwargs) -> str:
def to_json(self, **kwargs) -> str:
"""Return the data model for the organization as a JSON string"""
return self.dm.as_json(**kwargs)
return self.dm.to_json(**kwargs)

def as_yaml(self, **kwargs) -> str:
def to_yaml(self, **kwargs) -> str:
"""Return the data model for the organization as a YAML string"""
return self.dm.as_yaml(**kwargs)
return self.dm.to_yaml(**kwargs)

def fetch_all(self) -> None:
"""Initialize all data for nodes and edges in the organization"""
Expand Down
73 changes: 65 additions & 8 deletions aws_data_tools/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
CLI interface for working with data from AWS APIs
"""

from itertools import zip_longest
from json import dumps as json_dumps
from json import load as json_load
from re import fullmatch
from traceback import format_exc
from typing import Any, Dict, List, Union
from typing import Any, Dict, List

from botocore.exceptions import ClientError, NoCredentialsError

Expand All @@ -21,7 +23,14 @@
)

from .. import get_version
from ..client import APIClient
from ..builders.organizations import OrganizationDataBuilder
from ..models.organizations import Account

from ..utils import (
deserialize_dynamodb_items,
prepare_dynamodb_batch_put_request,
)


CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]}
Expand Down Expand Up @@ -54,7 +63,7 @@ def handle_error(ctx, err_msg, tb=None):
ctx.exit(1)


@organization.command()
@organization.command(short_help="Dump org data as JSON")
@option(
"--no-accounts",
default=False,
Expand Down Expand Up @@ -83,7 +92,7 @@ def dump_json(
no_policies: bool,
format_: str,
out_file: str,
) -> Union[str, None]:
) -> None:
"""Dump a JSON representation of the organization"""
err_msg = None
tb = None
Expand All @@ -100,11 +109,11 @@ def dump_json(
kwargs["init_policies"] = False
kwargs["init_policy_tags"] = False
kwargs["init_policy_targets"] = False
odb = OrganizationDataBuilder(**kwargs)
odb = OrganizationDataBuilder(include_account_parents=True, **kwargs)
if format_ == "JSON":
s_func = odb.as_json
s_func = odb.to_json
elif format_ == "YAML":
s_func = odb.as_yaml
s_func = odb.to_yaml
if out_file is None:
out_file = "-"
with open_file(out_file, mode="wb") as f:
Expand Down Expand Up @@ -153,7 +162,7 @@ def lookup_accounts(
include_effective_policies: bool,
include_policies: bool,
include_tags: bool,
) -> str:
) -> None:
"""Query for account details using a list of account IDs"""
accounts_unvalidated = []
if " " in accounts:
Expand Down Expand Up @@ -195,8 +204,56 @@ def lookup_accounts(
odb.fetch_account_tags(account_ids=account_ids)

data = [
{k: v for k, v in acct.as_dict().items() if k not in exclude_keys}
{k: v for k, v in acct.to_dict().items() if k not in exclude_keys}
for acct in odb.dm.accounts
if acct.id in account_ids
]
echo(json_dumps(data, default=str))


@organization.command()
@option("--table", "-t", required=True, help="Name of the DynamoDB table")
@option(
"--in-file", "-i", required=True, help="File containing a list of Account objects"
)
@pass_context
def write_accounts_to_dynamodb(
ctx: Dict[str, Any],
table: str,
in_file: str,
) -> None:
"""Write a list of accounts to a DynamoDB table"""
data = None
with open_file(in_file, mode="r") as f:
data = json_load(f)
odb = OrganizationDataBuilder()
if not isinstance(data, list):
handle_error(err_msg="Data is not a list")
odb.dm.accounts = [Account(**account) for account in data]
accounts = odb.to_dynamodb(field_name="accounts")
client = APIClient("dynamodb")
ret = {"responses": []}
# Group into batches of 25 since that's the max for BatchWriteItem
for group_ in zip_longest(*[iter(accounts)] * 25):
items = prepare_dynamodb_batch_put_request(table=table, items=group_)
res = client.api("batch_write_item", request_items=items)
# TODO: Add handling of any "UnprocessedItems" in the response. Add retry with
# exponential backoff.
ret["responses"].append(res)
echo(json_dumps(ret))


@organization.command()
@option("--table", "-t", required=True, help="Name of the DynamoDB table")
@pass_context
def read_accounts_from_dynamodb(
ctx: Dict[str, Any],
table: str,
) -> None:
"""Fetch a list of accounts from a DynamoDB table"""
client = APIClient("dynamodb")
res = client.api("scan", table_name=table)
accounts = [Account(**account) for account in deserialize_dynamodb_items(res)]
odb = OrganizationDataBuilder()
odb.dm.accounts = accounts
echo(odb.to_json(field_name="accounts"))
5 changes: 3 additions & 2 deletions aws_data_tools/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def api(self, func: str, **kwargs) -> Union[Dict[str, Any], List[Dict[str, Any]]
If the API action is one that supports pagination, it is handled automaticaly.
All paginated responses are fully aggregated and then returned.
"""
kwargs = pascalize(kwargs)
kwargs = {pascalize(key): value for key, value in kwargs.items()}
paginate = self.client.can_paginate(func)
if paginate:
paginator = self.client.get_paginator(func)
Expand All @@ -52,4 +52,5 @@ def api(self, func: str, **kwargs) -> Union[Dict[str, Any], List[Dict[str, Any]]
return depascalize(response)

def __post_init__(self):
self.client = self.session.client(self.service)
if self.client is None:
self.client = self.session.client(self.service)
19 changes: 14 additions & 5 deletions aws_data_tools/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@

from yaml import dump as yaml_dump

from ..utils import serialize_dynamodb_item, serialize_dynamodb_items


@dataclass
class ModelBase:
"""Base class for all models with helpers for serialization"""

def as_dict(
def to_dict(
self, field_name: str = None
) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
"""
Expand All @@ -30,10 +32,17 @@ def as_dict(
raise Exception(f"Field {field_name} does not exist")
return data

def as_json(self, **kwargs) -> str:
def to_dynamodb(self, **kwargs) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
"""Serialize the dataclass or field to a DynamoDB Item or list of Items"""
data = self.to_dict(**kwargs)
if isinstance(data, list):
return serialize_dynamodb_items(items=data)
return serialize_dynamodb_item(item=data)

def to_json(self, **kwargs) -> str:
"""Serialize the dataclass instance to JSON"""
return json_dumps(self.as_dict(**kwargs), default=str)
return json_dumps(self.to_dict(**kwargs), default=str)

def as_yaml(self, **kwargs) -> str:
def to_yaml(self, **kwargs) -> str:
"""Serialize the dataclass instance to YAML"""
return yaml_dump(self.as_dict(**kwargs))
return yaml_dump(self.to_dict(**kwargs))
26 changes: 16 additions & 10 deletions aws_data_tools/models/organizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class Policy(ModelBase):
tags: Dict[str, str] = field(default=None)
targets: List[PolicyTargetSummary] = field(default=None)

def as_target(self):
def to_target(self):
"""Return the Policy as a PolicySummaryForTarget object"""
return PolicySummaryForTarget(
id=self.policy_summary.id, type=self.policy_summary.type
Expand All @@ -98,13 +98,13 @@ class Root(ModelBase):
children: List[ParChild] = field(default=None)
policies: List[PolicySummaryForTarget] = field(default=None)

def as_parchild_dict(self) -> Dict[str, str]:
def to_parchild_dict(self) -> Dict[str, str]:
"""Return the root as a ParChild (parent) dict"""
return {"id": self.id, "type": "ROOT"}

def as_parchild(self) -> ParChild:
def to_parchild(self) -> ParChild:
"""Return the root as a ParChild (parent) object"""
return ParChild(**self.as_parchild_dict())
return ParChild(**self.to_parchild_dict())


@dataclass
Expand All @@ -121,13 +121,13 @@ class OrganizationalUnit(ModelBase):
policies: List[PolicySummaryForTarget] = field(default=None)
tags: Dict[str, str] = field(default=None)

def as_parchild_dict(self) -> Dict[str, str]:
def to_parchild_dict(self) -> Dict[str, str]:
"""Return the OU as a ParChild (parent) dict"""
return {"id": self.id, "type": "ORGANIZATIONAL_UNIT"}

def as_parchild(self) -> ParChild:
def to_parchild(self) -> ParChild:
"""Return the OU as a ParChild (parent) object"""
return ParChild(**self.as_parchild_dict())
return ParChild(**self.to_parchild_dict())


@dataclass
Expand All @@ -148,13 +148,13 @@ class Account(ModelBase):
policies: List[PolicySummaryForTarget] = field(default=None)
tags: Dict[str, str] = field(default=None)

def as_parchild_dict(self) -> Dict[str, str]:
def to_parchild_dict(self) -> Dict[str, str]:
"""Return the account as a ParChild (parent) dict"""
return {"id": self.id, "type": "ACCOUNT"}

def as_parchild(self) -> ParChild:
def to_parchild(self) -> ParChild:
"""Return the account as a ParChild (parent) object"""
return ParChild(**self.as_parchild_dict())
return ParChild(**self.to_parchild_dict())


@dataclass
Expand All @@ -172,6 +172,12 @@ class Organization(ModelBase):
master_account_id: str = field(default=None)

# Optional properties generally populated after initialization

# TODO: These collections should be converted to container data classes to be able
# to better able to handle operations against specific fields. Currently,
# serializing/deserializing these collections indepently requires passing the
# "field_name" kwarg to the `to_dict()` function from ModelBase. It's already
# getting hacky.
accounts: List[Account] = field(default=None)
organizational_units: List[OrganizationalUnit] = field(default=None)
policies: List[Policy] = field(default=None)
Expand Down
Loading

0 comments on commit c8a41a9

Please sign in to comment.