Skip to content

Commit

Permalink
Allow specification of Snowflake schema and database (#63)
Browse files Browse the repository at this point in the history
* Allow specification of schema and database for Snowflake.

* Snowflake - Move to the last hour of query history by default.

* Version bump.

* Add handler to allow use of internal Pydantic field names.

* Update documentation.
  • Loading branch information
hcpadkins authored Oct 8, 2024
1 parent a5f8f10 commit 471a3c6
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 8 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ isn't listed here, support can be added by creating a custom connector!
* SalesForce Marketing Cloud audit event logs
* SalesForce Marketing Cloud security event logs
* Slack audit logs
* Snowflake login history
* Snowflake query history
* Snowflake session history
* Stripe events
* Tines audit logs
* Terraform Cloud audit trails
Expand Down
4 changes: 4 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Currently the following log sources are supported by Grove out of the box. If a
isn't listed here, support can be added by creating a custom connector!

* Atlassian audit events (e.g. Confluence, Jira)
* FleetDM host logs
* GitHub audit logs
* GSuite alerts
* GSuite activity logs
Expand All @@ -62,6 +63,9 @@ isn't listed here, support can be added by creating a custom connector!
* SalesForce Marketing Cloud audit event logs
* SalesForce Marketing Cloud security event logs
* Slack audit logs
* Snowflake login history
* Snowflake query history
* Snowflake session history
* Stripe events
* Tines audit logs
* Terraform Cloud audit trails
Expand Down
2 changes: 1 addition & 1 deletion grove/__about__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Grove metadata."""

__version__ = "1.5.0"
__version__ = "1.6.0"
__title__ = "grove"
__license__ = "Mozilla Public License 2.0"
__copyright__ = "Copyright 2023 HashiCorp, Inc."
33 changes: 31 additions & 2 deletions grove/connectors/snowflake/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ def batch_size(self) -> int:
This is used to control the maximum number of records which will be retrieved
before they are flushed to the output handler.
The default is 500.
The default is 1000.
:return: The "batch_size" portion of the connector's configuration.
"""
try:
candidate = self.configuration.batch_size
except AttributeError:
return 500
return 1000

try:
candidate = int(candidate)
Expand Down Expand Up @@ -114,3 +114,32 @@ def passphrase(self) -> Optional[str]:
return self.configuration.passphrase
except AttributeError:
return None

@property
def schema(self) -> Optional[str]:
"""Fetches the optional schema name from the configuration.
The default is "SNOWFLAKE".
:return: The "schema" portion of the connector's configuration.
"""
try:
# The trailing underscore is due to a limitation in Pydantic < 2.0 where
# 'schema' is an internal field. We automatically remap these internal
# fields with a trailing underscore while we migrate to Pydantic >= 2.0
return self.configuration.schema_
except AttributeError:
return "SNOWFLAKE"

@property
def database(self) -> Optional[str]:
"""Fetches the optional database name from the configuration.
The default is "ADMIN".
:return: The "database" portion of the connector's configuration.
"""
try:
return self.configuration.database
except AttributeError:
return "ADMIN"
4 changes: 3 additions & 1 deletion grove/connectors/snowflake/login_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# Define the paramaterised Snowflake query to use to fetch login history records.
SNOWFLAKE_QUERY_LOGIN_HISTORY = """
SELECT *
FROM SNOWFLAKE.ACCOUNT_USAGE.LOGIN_HISTORY
FROM LOGIN_HISTORY
WHERE EVENT_TIMESTAMP > %(pointer)s
ORDER BY EVENT_TIMESTAMP ASC;
"""
Expand Down Expand Up @@ -43,7 +43,9 @@ def collect(self):
client = snowflake.connector.connect(
role=self.role,
user=self.identity,
schema=self.schema,
account=self.account,
database=self.database,
warehouse=self.warehouse,
private_key=private_key,
timezone="UTC",
Expand Down
9 changes: 6 additions & 3 deletions grove/connectors/snowflake/query_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
ROLE_NAME,
START_TIME,
END_TIME
FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
FROM QUERY_HISTORY
WHERE START_TIME > %(pointer)s
ORDER BY START_TIME ASC;
"""
Expand All @@ -32,11 +32,12 @@ class Connector(SnowflakeConnector):
def collect(self):
"""Collects query history records from Snowflake."""

# If no pointer is stored then start from 24-hours ago - due to volume.
# If no pointer is stored then start from 1 hour ago. We use a small value here
# due to volume.
try:
_ = self.pointer
except NotFoundException:
self.pointer = (datetime.now(timezone.utc) - timedelta(days=1)).isoformat()
self.pointer = (datetime.now(timezone.utc) - timedelta(hours=1)).isoformat()

# Decode the private key from the loaded PEM format PKCS#8 data.
private_key = self._load_private_key()
Expand All @@ -47,7 +48,9 @@ def collect(self):
client = snowflake.connector.connect(
role=self.role,
user=self.identity,
schema=self.schema,
account=self.account,
database=self.database,
warehouse=self.warehouse,
private_key=private_key,
timezone="UTC",
Expand Down
4 changes: 3 additions & 1 deletion grove/connectors/snowflake/session_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# Define the paramaterised Snowflake query to use to fetch session history records.
SNOWFLAKE_QUERY_SESSION_HISTORY = """
SELECT *
FROM SNOWFLAKE.ACCOUNT_USAGE.SESSIONS
FROM SESSIONS
WHERE CREATED_ON > %(pointer)s
ORDER BY CREATED_ON ASC;
"""
Expand Down Expand Up @@ -43,7 +43,9 @@ def collect(self):
client = snowflake.connector.connect(
role=self.role,
user=self.identity,
schema=self.schema,
account=self.account,
database=self.database,
warehouse=self.warehouse,
private_key=private_key,
timezone="UTC",
Expand Down
13 changes: 13 additions & 0 deletions grove/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,19 @@ def _decode_fields(cls, values): # noqa: B902
Other encoding schemes may be supported in future, but for now only base64 is
supported.
"""
# This is a horrible hack to allow fields with names that mask Pydantic
# internals. This can be removed once Grove is updated to use Pydantic >= 2.
INTERNAL_FIELDS = ["schema"]

for field in INTERNAL_FIELDS:
value = values.get(field, None)
if value is None:
continue

# Remap the field name to contain a trailing underscore.
values[f"{field}_"] = value
del values[field]

for field, encoding in values.get("encoding", {}).items():
# If the secret is externally stored decoding will be performed after the
# secret has been retrieved. Right now, this field should not exist as it
Expand Down

0 comments on commit 471a3c6

Please sign in to comment.