diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index 6d6dabf..58bbab5 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -26,11 +26,11 @@ jobs: - '3.13' steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install poetry run: pipx install poetry - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{ matrix.python-version }} cache: poetry @@ -39,7 +39,7 @@ jobs: poetry env use ${{ matrix.python-version }} poetry install - name: Run pre-commit checks - uses: pre-commit/action@v3.0.0 + uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 smoke-test: runs-on: ubuntu-latest strategy: @@ -54,9 +54,9 @@ jobs: - '3.13' steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{ matrix.python-version }} - name: Install meltano @@ -71,8 +71,7 @@ jobs: fail-fast: false matrix: mongo-version: - - 3.6 - - 4.4 + - '8.0' python-version: - '3.8' - '3.9' @@ -80,12 +79,17 @@ jobs: - '3.11' - '3.12' - '3.13' + include: + - mongo-version: '6.0' + python-version: '3.13' + - mongo-version: '7.0' + python-version: '3.13' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install poetry run: pipx install poetry - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{ matrix.python-version }} cache: poetry @@ -94,7 +98,7 @@ jobs: poetry env use ${{ matrix.python-version }} poetry install - name: Start MongoDB - uses: supercharge/mongodb-github-action@1.9.0 + uses: supercharge/mongodb-github-action@5a87bd81f88e2a8b195f8b7b656f5cda1350815a # 1.11.0 with: mongodb-version: ${{ matrix.mongo-version }} mongodb-username: admin @@ -102,3 +106,45 @@ jobs: mongodb-db: test - name: Test with pytest run: poetry run pytest + integration: + runs-on: ubuntu-latest + name: Meltano integration test 🐉 + env: + DATABASE: test + USERNAME: admin + PASSWORD: password + strategy: + matrix: + mongodb-version: + - '6.0' + - '7.0' + - '8.0' + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - name: Start MongoDB + uses: supercharge/mongodb-github-action@5a87bd81f88e2a8b195f8b7b656f5cda1350815a # 1.11.0 + with: + mongodb-version: ${{ matrix.mongodb-version }} + mongodb-username: ${{ env.USERNAME }} + mongodb-password: ${{ env.PASSWORD }} + mongodb-db: ${{ env.DATABASE }} + - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + with: + python-version: 3.x + - uses: astral-sh/setup-uv@38f3f104447c67c051c4a08e39b64a148898af3a # v4.2.0 + with: + version: '>=0.5' + - name: Install Meltano + run: uv tool install meltano + - run: meltano config meltano set venv.backend uv + - run: > + uv run scripts/seed.py + --host localhost + --port 27017 + --database ${{ env.DATABASE }} + --username ${{ env.USERNAME }} + --password ${{ env.PASSWORD }} + - run: meltano run tap-mongodb target-jsonl + env: + TAP_MONGODB_DATABASE: ${{ env.DATABASE }} + TAP_MONGODB_MONGODB_CONNECTION_STRING: mongodb://${{ env.USERNAME }}:${{ env.PASSWORD }}@localhost:27017/ diff --git a/.python-version b/.python-version index 371cfe3..24ee5b1 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.11.1 +3.13 diff --git a/poetry.lock b/poetry.lock index aec8c1a..9800403 100644 --- a/poetry.lock +++ b/poetry.lock @@ -27,19 +27,19 @@ typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} [[package]] name = "attrs" -version = "24.2.0" +version = "24.3.0" description = "Classes Without Boilerplate" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"}, - {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"}, + {file = "attrs-24.3.0-py3-none-any.whl", hash = "sha256:ac96cd038792094f438ad1f6ff80837353805ac950cd2aa0e0625ef19850c308"}, + {file = "attrs-24.3.0.tar.gz", hash = "sha256:8f5c07333d543103541ba7be0e2ce16eeee8130cb0b3f9238ab904ce1e85baff"}, ] [package.extras] benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] @@ -159,17 +159,17 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "boto3" -version = "1.35.79" +version = "1.35.82" description = "The AWS SDK for Python" optional = true python-versions = ">=3.8" files = [ - {file = "boto3-1.35.79-py3-none-any.whl", hash = "sha256:a673b0b6378c9ccbf045a31a43195b175e12aa5c37fb7635fcbfc8f48fb857b3"}, - {file = "boto3-1.35.79.tar.gz", hash = "sha256:1fa26217cd33ded82e55aed4460cd55f7223fa647916aa0d3c5d6828e6ec7135"}, + {file = "boto3-1.35.82-py3-none-any.whl", hash = "sha256:c422b68ae76959b9e23b77eb79e41c3483332f7e1de918d2b083c456d8cf234c"}, + {file = "boto3-1.35.82.tar.gz", hash = "sha256:2bbaf1551b1ed55770cb437d7040f1abe6742601103695057b30ce6328eef286"}, ] [package.dependencies] -botocore = ">=1.35.79,<1.36.0" +botocore = ">=1.35.82,<1.36.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -178,13 +178,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.35.79" +version = "1.35.82" description = "Low-level, data-driven core of boto 3." optional = true python-versions = ">=3.8" files = [ - {file = "botocore-1.35.79-py3-none-any.whl", hash = "sha256:e6b10bb9a357e3f5ca2e60f6dd15a85d311b9a476eb21b3c0c2a3b364a2897c8"}, - {file = "botocore-1.35.79.tar.gz", hash = "sha256:245bfdda1b1508539ddd1819c67a8a2cc81780adf0715d3de418d64c4247f346"}, + {file = "botocore-1.35.82-py3-none-any.whl", hash = "sha256:e43b97d8cbf19d35ce3a177f144bd97cc370f0a67d0984c7d7cf105ac198748f"}, + {file = "botocore-1.35.82.tar.gz", hash = "sha256:78dd7bf8f49616d00073698d7bbaf5a115208fe730b7b7afae4456adddb3552e"}, ] [package.dependencies] @@ -200,13 +200,13 @@ crt = ["awscrt (==0.22.0)"] [[package]] name = "certifi" -version = "2024.8.30" +version = "2024.12.14" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"}, - {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"}, + {file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"}, + {file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"}, ] [[package]] @@ -408,6 +408,21 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "faker" +version = "33.1.0" +description = "Faker is a Python package that generates fake data for you." +optional = false +python-versions = ">=3.8" +files = [ + {file = "Faker-33.1.0-py3-none-any.whl", hash = "sha256:d30c5f0e2796b8970de68978365247657486eb0311c5abe88d0b895b68dff05d"}, + {file = "faker-33.1.0.tar.gz", hash = "sha256:1c925fc0e86a51fc46648b504078c88d0cd48da1da2595c4e712841cab43a1e4"}, +] + +[package.dependencies] +python-dateutil = ">=2.4" +typing-extensions = "*" + [[package]] name = "flake8" version = "5.0.4" @@ -709,6 +724,8 @@ optional = false python-versions = "*" files = [ {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"}, + {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"}, + {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"}, ] [package.dependencies] @@ -1061,7 +1078,7 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no name = "python-dateutil" version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" -optional = true +optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, @@ -1767,4 +1784,4 @@ s3 = ["fs-s3fs"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "5598a5d9dd2f985bdfc7e723afceee594c203ca89a56a81e2dcd39895b37b4f8" +content-hash = "1d272e1297d48f9466300c7dc1e648917be1554094ec6021e9ae668c5354529f" diff --git a/pyproject.toml b/pyproject.toml index b9e6a3a..575d955 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ mypy = "^1.0.0" isort = "^5.11.5" singer-sdk = { version = "^0.42.1", extras = ["testing"] } pylint = "^3.0.0a6" +faker = "^33.1.0" [tool.poetry.extras] s3 = ["fs-s3fs"] diff --git a/scripts/seed.py b/scripts/seed.py new file mode 100644 index 0000000..c7b6bbf --- /dev/null +++ b/scripts/seed.py @@ -0,0 +1,77 @@ +# /// script +# dependencies = [ +# "faker", +# "pymongo", +# ] +# /// + +"""Seed the MongoDB database with fake data.""" + +import argparse + +import pymongo +from faker import Faker + + +class Params(argparse.Namespace): # pylint: disable=too-few-public-methods + """Namespace for command line arguments.""" + + host: str + database: str + username: str + password: str + port: int + + +def seed(): + """Seed the database with fake data. + + - users: 100 + - posts: 1000 + """ + parser = argparse.ArgumentParser() + parser.add_argument("--host", default="localhost", help="MongoDB host") + parser.add_argument("--database", required=True, help="MongoDB database") + parser.add_argument("--username", default=None, help="MongoDB username") + parser.add_argument("--password", default=None, help="MongoDB password") + parser.add_argument("--port", default=27017, help="MongoDB port") + args = parser.parse_args(namespace=Params()) + + fake = Faker() + uri = f"mongodb://{args.username}:{args.password}@{args.host}:{args.port}" + client = pymongo.MongoClient(uri) + db = client[args.database] + + users = db["users"] + users.delete_many({}) + users_oids = set() + + for _ in range(100): + result = users.insert_one( + { + "name": fake.name(), + "address": fake.address(), + "email": fake.email(), + "joined_at": fake.date_time_this_decade(), + } + ) + users_oids.add(result.inserted_id) + + posts = db["posts"] + posts.delete_many({}) + for _ in range(1000): + created_at = fake.date_time_this_decade() + updated_at = fake.date_time_between_dates(created_at) + posts.insert_one( + { + "title": fake.sentence(), + "content": fake.text(), + "user_id": fake.random_element(users_oids), + "created_at": created_at, + "updated_at": updated_at, + } + ) + + +if __name__ == "__main__": + seed() diff --git a/tap_mongodb/streams.py b/tap_mongodb/streams.py index eb17c36..a12546e 100644 --- a/tap_mongodb/streams.py +++ b/tap_mongodb/streams.py @@ -178,7 +178,7 @@ def _generate_record_messages(self, record: dict) -> Generator[singer.RecordMess Record message objects. """ extracted_at: datetime = record.pop("_sdc_extracted_at", utc_now()) - pop_deselected_record_properties(record, self.schema, self.mask, self.logger) + pop_deselected_record_properties(record, self.schema, self.mask) record = conform_record_data_types( stream_name=self.name, record=record,