Skip to content

Commit

Permalink
Merge remote-tracking branch 'reuben/filter-collections'
Browse files Browse the repository at this point in the history
  • Loading branch information
melgazar9 committed Dec 10, 2024
2 parents f755679 + 78c6f98 commit aecbfcc
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 2 deletions.
3 changes: 3 additions & 0 deletions meltano.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ plugins:
description: |
An optional prefix which will be added to each stream name.
value: ''
- name: filter_collections
description: |
Collections to discover (default: all). Useful for improving catalog discovery performance.
- name: start_date
kind: date_iso8601
description: |
Expand Down
25 changes: 23 additions & 2 deletions tap_mongodb/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import sys
from logging import Logger, getLogger
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple, Union

from pymongo import MongoClient
from pymongo.database import Database
Expand Down Expand Up @@ -36,12 +36,14 @@ def __init__( # pylint: disable=too-many-arguments
db_name: str,
datetime_conversion: str,
prefix: Optional[str] = None,
collections: Optional[Union[str, List[str]]] = None,
) -> None:
self._connection_string = connection_string
self._options = options
self._db_name = db_name
self._datetime_conversion: str = datetime_conversion.upper()
self._prefix: Optional[str] = prefix
self._collections = [collections] if isinstance(collections, str) else collections
self._logger: Logger = getLogger(__name__)
self._version: Optional[MongoVersion] = None

Expand Down Expand Up @@ -116,7 +118,26 @@ def discover_catalog_entries(self) -> List[Dict[str, Any]]:
The discovered catalog entries as a list.
"""
result: List[Dict] = []
for collection in self.database.list_collection_names(authorizedCollections=True, nameOnly=True):

collections = self.database.list_collection_names(
authorizedCollections=True,
nameOnly=True,
filter={
"$or": [
{
"name": {
"$regex": f"^{c}$",
"$options": "i",
}
}
for c in self._collections
]
}
if self._collections
else None,
)

for collection in collections:
try:
self.database[collection].find_one()
except PyMongoError:
Expand Down
13 changes: 13 additions & 0 deletions tap_mongodb/tap.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,18 @@ class TapMongoDB(Tap):
default="",
description="An optional prefix which will be added to each stream name.",
),
th.Property(
"filter_collections",
th.OneOf(
th.StringType,
th.ArrayType(th.StringType),
),
required=True,
default=[],
description=(
"Collections to discover (default: all). Useful for improving catalog discovery performance."
),
),
th.Property(
"start_date",
th.DateTimeType,
Expand Down Expand Up @@ -210,6 +222,7 @@ def connector(self) -> MongoDBConnector:
self.config.get("database"),
self.config.get("datetime_conversion"),
prefix=self.config.get("prefix", None),
collections=self.config["filter_collections"],
)

@property
Expand Down

0 comments on commit aecbfcc

Please sign in to comment.