From 1b3db729da0e5543a42ae5d96d8bc058b0279504 Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Sat, 22 Mar 2025 06:06:43 -0600 Subject: [PATCH 01/17] sourece-microsoft-sharepoint: add feature to iterate through all sites --- .../stream_reader.py | 111 +++++++++++++++++- .../source_microsoft_sharepoint/utils.py | 23 ++++ .../unit_tests/test_stream_reader.py | 80 ++++++++++++- .../sources/microsoft-sharepoint.md | 1 + 4 files changed, 209 insertions(+), 6 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py index e834cb5fb8ce7..478d43825e33e 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py @@ -10,12 +10,17 @@ from io import IOBase from os import makedirs, path from os.path import getsize -from typing import Dict, Iterable, List, Optional, Tuple +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, MutableMapping import requests import smart_open from msal import ConfidentialClientApplication +from office365.onedrive.drives.drive import Drive from office365.graph_client import GraphClient +from office365.runtime.auth.token_response import TokenResponse +from office365.sharepoint.client_context import ClientContext +from office365.sharepoint.search.service import SearchService +from office365.entity_collection import EntityCollection from airbyte_cdk import AirbyteTracedException, FailureType from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError @@ -24,7 +29,17 @@ from source_microsoft_sharepoint.spec import SourceMicrosoftSharePointSpec from .exceptions import ErrorDownloadingFile, ErrorFetchingMetadata -from .utils import FolderNotFoundException, MicrosoftSharePointRemoteFile, execute_query_with_retry, filter_http_urls +from .utils import ( + FolderNotFoundException, + MicrosoftSharePointRemoteFile, + execute_query_with_retry, + filter_http_urls, + get_site, + get_site_prefix, +) + +SITE_TITLE = "Title" +SITE_PATH = "Path" class SourceMicrosoftSharePointClient: @@ -50,9 +65,20 @@ def client(self): self._client = GraphClient(self._get_access_token) return self._client - def _get_access_token(self): + @staticmethod + def _get_scope(tenant_prefix: str = None): + """ + Returns the scope for the access token. + We use admin site to retrieve objects like Site groups and users. + """ + if tenant_prefix: + admin_site_url = f"https://{tenant_prefix}-admin.sharepoint.com" + return [f"{admin_site_url}/.default"] + return ["https://graph.microsoft.com/.default"] + + def _get_access_token(self, tenant_prefix: str = None): """Retrieves an access token for SharePoint access.""" - scope = ["https://graph.microsoft.com/.default"] + scope = self._get_scope(tenant_prefix) refresh_token = self.config.credentials.refresh_token if hasattr(self.config.credentials, "refresh_token") else None if refresh_token: @@ -67,6 +93,13 @@ def _get_access_token(self): return result + def get_token_response_object_wrapper(self, tenant_prefix: str): + def get_token_response_object(): + token = self._get_access_token(tenant_prefix=tenant_prefix) + return TokenResponse.from_json(token) + + return get_token_response_object + class SourceMicrosoftSharePointStreamReader(AbstractFileBasedStreamReader): """ @@ -103,6 +136,20 @@ def get_access_token(self): # Directly fetch a new access token from the auth_client each time it's called return self.auth_client._get_access_token()["access_token"] + def get_token_response_object(self, tenant_prefix: str = None) -> Callable: + """ " + When building a ClientContext using with_access_token method, + the token_func param is expected to be a method/callable that returns a TokenResponse object. + tenant_prefix is used to determine the scope of the access token. + return: A callable that returns a TokenResponse object. + """ + return self.auth_client.get_token_response_object_wrapper(tenant_prefix=tenant_prefix) + + def get_client_context(self): + site_url, root_site_prefix = get_site_prefix(get_site(self.one_drive_client)) + client_context = ClientContext(site_url).with_access_token(self.get_token_response_object(tenant_prefix=root_site_prefix)) + return client_context + @config.setter def config(self, value: SourceMicrosoftSharePointSpec): """ @@ -202,11 +249,67 @@ def _get_files_by_drive_name(self, drives, folder_path): yield from self._list_directories_and_files(folder, folder_path_url) + def get_all_sites(self) -> List[MutableMapping[str, Any]]: + """ + Retrieves all SharePoint sites from the current tenant. + + Returns: + List[MutableMapping[str, Any]]: A list of site information. + """ + _, root_site_prefix = get_site_prefix(get_site(self.one_drive_client)) + ctx = self.get_client_context() + search_service = SearchService(ctx) + # ignore default OneDrive site with NOT Path:https://prefix-my.sharepoint.com + search_job = search_service.post_query(f"contentclass:STS_Site NOT Path:https://{root_site_prefix}-my.sharepoint.com") + search_job_result = execute_query_with_retry(search_job) + + found_sites = [] + if search_job.value and search_job_result.value.PrimaryQueryResult: + table = search_job_result.value.PrimaryQueryResult.RelevantResults.Table + for row in table.Rows: + found_site = {} + data = row.Cells + found_site[SITE_TITLE] = data.get(SITE_TITLE) + found_site[SITE_PATH] = data.get(SITE_PATH) + found_sites.append(found_site) + else: + raise Exception("No site collections found") + + return found_sites + + def get_drives_from_sites(self, sites: List[MutableMapping[str, Any]]) -> EntityCollection: + """ + Retrieves SharePoint drives from the provided sites. + Args: + sites (List[MutableMapping[str, Any]]): A list of site information. + + Returns: + EntityCollection: A collection of SharePoint drives. + """ + all_sites_drives = EntityCollection(context=self.one_drive_client, item_type=Drive) + for site in sites: + drives = execute_query_with_retry(self.one_drive_client.sites.get_by_url(site[SITE_PATH]).drives.get()) + for site_drive in drives: + all_sites_drives.add_child(site_drive) + return all_sites_drives + def get_site_drive(self): + """ + Retrieves SharePoint drives based on the provided site URL. + It iterates over the sites if something like sharepoint.com/sites/ is in the site_url. + Returns: + EntityCollection: A collection of SharePoint drives. + + Raises: + AirbyteTracedException: If an error occurs while retrieving drives. + """ try: if not self.config.site_url: # get main site drives drives = execute_query_with_retry(self.one_drive_client.drives.get()) + elif re.search(r"sharepoint\.com/sites/?$", self.config.site_url): + # get all sites and then get drives from each site + return self.get_drives_from_sites(self.get_all_sites()) else: # get drives for site drives provided in the config drives = execute_query_with_retry(self.one_drive_client.sites.get_by_url(self.config.site_url).drives.get()) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py index 04e0300926e33..fcc6f9f845144 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py @@ -3,11 +3,15 @@ import time from datetime import datetime from enum import Enum +from functools import lru_cache from http import HTTPStatus +from typing import List from airbyte_cdk import AirbyteTracedException, FailureType from airbyte_cdk.sources.file_based.remote_file import RemoteFile +from office365.graph_client import GraphClient +from office365.onedrive.sites.site import Site LOGGER = logging.getLogger("airbyte") @@ -150,3 +154,22 @@ def build(self) -> str: query_string = "&".join(self._segments) query_string = "?" + query_string if query_string else "" return f"{self._scheme}://{self._host}{self._path}{query_string}" + + +@lru_cache(maxsize=None) +def get_site(graph_client: GraphClient, site_url: str = None): + if site_url: + site = execute_query_with_retry(graph_client.sites.get_by_url(site_url)) + else: + site = execute_query_with_retry(graph_client.sites.root.get()) + return site + + +def get_site_prefix(site: Site): + site_url = site.web_url + host_name = site.site_collection.hostname + host_name_parts: List = host_name.split(".") # e.g. "contoso.sharepoint.com" => ["contoso", "sharepoint", "com"] + if len(host_name_parts) < 2: + raise ValueError(f"Invalid host name: {host_name}") + + return site_url, host_name_parts[0] diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py index db9d93722a30b..48c8cdfd44d5a 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py @@ -16,6 +16,7 @@ SourceMicrosoftSharePointStreamReader, ) from wcmatch.glob import GLOBSTAR, globmatch +from office365.entity_collection import EntityCollection from airbyte_cdk import AirbyteTracedException @@ -604,7 +605,7 @@ def test_drives_property(auth_type, user_principal_name, has_refresh_token): refresh_token = "dummy_refresh_token" if has_refresh_token else None # Setup for different authentication types config_mock = MagicMock( - credentials=MagicMock(auth_type=auth_type, user_principal_name=user_principal_name, refresh_token=refresh_token) + credentials=MagicMock(auth_type=auth_type, user_principal_name=user_principal_name, refresh_token=refresh_token), site_url="" ) # Mock responses for the drives list and a single drive (my_drive) @@ -636,7 +637,82 @@ def test_drives_property(auth_type, user_principal_name, has_refresh_token): assert mock_execute_query.call_count == 2 drives_response.add_child.assert_called_once_with(my_drive) - # Retrieve files from accessible drives when search_scope is 'ACCESSIBLE_DRIVES' or 'ALL' + +@pytest.mark.parametrize( + "site_url, expected_call, expected_result, exception", + [ + # Scenario 1: No site URL (default site) + ("", "drives", MagicMock(spec=EntityCollection), None), + # Scenario 2: Site URL ending with 'sharepoint.com/sites/' (all sites) + ("https://test-tenant.sharepoint.com/sites/", "all_sites", MagicMock(spec=EntityCollection), None), + # Scenario 3: Specific site URL (single site) + ("https://test-tenant.sharepoint.com/sites/specific", "specific_site", MagicMock(spec=EntityCollection), None), + # Scenario 4: Error scenario + ("https://test-tenant.sharepoint.com/sites/error", "error", None, Exception("Test exception")), + ], +) +def test_get_site_drive(site_url, expected_call, expected_result, exception): + """ + Parameterized test for the get_site_drive method. + """ + # Create a mock reader instance + reader = SourceMicrosoftSharePointStreamReader() + + # Mock the config with the specified site URL + reader._config = MagicMock(site_url=site_url) + + # Mock one_drive_client + mock_one_drive_client = MagicMock() + # Set the underlying attribute instead of using the property + reader._one_drive_client = mock_one_drive_client + + # Create mock response for execute_query_with_retry + mock_drives = expected_result + + # Mock methods based on the scenario + with patch("source_microsoft_sharepoint.stream_reader.execute_query_with_retry") as mock_execute_query: + # Set up the mock to return the expected result or raise an exception + if exception and expected_call == "error": + mock_execute_query.side_effect = exception + else: + mock_execute_query.return_value = mock_drives + + # Mock get_all_sites and get_drives_from_sites for the 'all_sites' scenario + mock_sites = [ + {"Title": "Site1", "Path": "https://test-tenant.sharepoint.com/sites/site1"}, + {"Title": "Site2", "Path": "https://test-tenant.sharepoint.com/sites/site2"}, + ] + + with ( + patch.object(reader, "get_all_sites", return_value=mock_sites) as mock_get_all_sites, + patch.object(reader, "get_drives_from_sites", return_value=mock_drives) as mock_get_drives_from_sites, + ): + # Determine the expected behavior based on the scenario + if exception: + with pytest.raises(AirbyteTracedException) as exc_info: + reader.get_site_drive() + assert "Failed to retrieve drives from sharepoint" in str(exc_info.value) + else: + result = reader.get_site_drive() + + # Check that the right methods were called based on the scenario + if expected_call == "drives": + # Default site URL + mock_one_drive_client.drives.get.assert_called_once() + mock_get_all_sites.assert_not_called() + mock_get_drives_from_sites.assert_not_called() + elif expected_call == "all_sites": + # Site URL ending with 'sharepoint.com/sites/' + mock_one_drive_client.drives.get.assert_not_called() + mock_get_all_sites.assert_called_once() + mock_get_drives_from_sites.assert_called_once_with(mock_sites) + elif expected_call == "specific_site": + # Specific site URL + mock_one_drive_client.sites.get_by_url.assert_called_once_with(site_url) + mock_get_all_sites.assert_not_called() + mock_get_drives_from_sites.assert_not_called() + + assert result == mock_drives @pytest.mark.parametrize( diff --git a/docs/integrations/sources/microsoft-sharepoint.md b/docs/integrations/sources/microsoft-sharepoint.md index 4c260b9eb2361..80218ebbb1e38 100644 --- a/docs/integrations/sources/microsoft-sharepoint.md +++ b/docs/integrations/sources/microsoft-sharepoint.md @@ -271,6 +271,7 @@ If enabled, sends subdirectory folder structure along with source file names to By providing a url to the site URL field, the connector will be able to access the files in the specific sharepoint site. The site url should be in the format `https://.sharepoint.com/sites/`. If no field is provided, the connector will access the files in the main site. +To have the connector iterate all sub-sites provide the site url as `https://.sharepoint.com/sites/`. ### Supported sync modes From 896643510a529da57867e8dbaaaf1a48158edb0e Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Sat, 22 Mar 2025 06:09:32 -0600 Subject: [PATCH 02/17] sourece-microsoft-sharepoint: ruff format --- .../source_microsoft_sharepoint/stream_reader.py | 7 ++++--- .../source_microsoft_sharepoint/utils.py | 5 +++-- .../unit_tests/test_stream_reader.py | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py index 478d43825e33e..2bba7f0be0bbb 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py @@ -10,17 +10,17 @@ from io import IOBase from os import makedirs, path from os.path import getsize -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, MutableMapping +from typing import Any, Callable, Dict, Iterable, List, MutableMapping, Optional, Tuple import requests import smart_open from msal import ConfidentialClientApplication -from office365.onedrive.drives.drive import Drive +from office365.entity_collection import EntityCollection from office365.graph_client import GraphClient +from office365.onedrive.drives.drive import Drive from office365.runtime.auth.token_response import TokenResponse from office365.sharepoint.client_context import ClientContext from office365.sharepoint.search.service import SearchService -from office365.entity_collection import EntityCollection from airbyte_cdk import AirbyteTracedException, FailureType from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError @@ -38,6 +38,7 @@ get_site_prefix, ) + SITE_TITLE = "Title" SITE_PATH = "Path" diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py index fcc6f9f845144..c02aea857c7ef 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py @@ -7,11 +7,12 @@ from http import HTTPStatus from typing import List +from office365.graph_client import GraphClient +from office365.onedrive.sites.site import Site + from airbyte_cdk import AirbyteTracedException, FailureType from airbyte_cdk.sources.file_based.remote_file import RemoteFile -from office365.graph_client import GraphClient -from office365.onedrive.sites.site import Site LOGGER = logging.getLogger("airbyte") diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py index 48c8cdfd44d5a..39e4b73d0913f 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py @@ -6,6 +6,7 @@ from unittest.mock import ANY, MagicMock, Mock, PropertyMock, call, patch import pytest +from office365.entity_collection import EntityCollection from requests.exceptions import HTTPError from source_microsoft_sharepoint.exceptions import ErrorFetchingMetadata from source_microsoft_sharepoint.spec import SourceMicrosoftSharePointSpec @@ -16,7 +17,6 @@ SourceMicrosoftSharePointStreamReader, ) from wcmatch.glob import GLOBSTAR, globmatch -from office365.entity_collection import EntityCollection from airbyte_cdk import AirbyteTracedException From 78846ad202059ee4b64df2011a3cea14755c0c28 Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Sat, 22 Mar 2025 06:24:27 -0600 Subject: [PATCH 03/17] sourece-microsoft-sharepoint: update release information --- .../connectors/source-microsoft-sharepoint/metadata.yaml | 2 +- .../connectors/source-microsoft-sharepoint/pyproject.toml | 2 +- docs/integrations/sources/microsoft-sharepoint.md | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/metadata.yaml b/airbyte-integrations/connectors/source-microsoft-sharepoint/metadata.yaml index c33bb08d831c2..b29a0256470f2 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/metadata.yaml +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/metadata.yaml @@ -20,7 +20,7 @@ data: connectorSubtype: file connectorType: source definitionId: 59353119-f0f2-4e5a-a8ba-15d887bc34f6 - dockerImageTag: 0.8.0 + dockerImageTag: 0.9.0 dockerRepository: airbyte/source-microsoft-sharepoint githubIssueLabel: source-microsoft-sharepoint icon: microsoft-sharepoint.svg diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml b/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml index f693389c08383..9418e7291c5b4 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "0.8.0" +version = "0.9.0" name = "source-microsoft-sharepoint" description = "Source implementation for Microsoft SharePoint." authors = [ "Airbyte ",] diff --git a/docs/integrations/sources/microsoft-sharepoint.md b/docs/integrations/sources/microsoft-sharepoint.md index 80218ebbb1e38..6e5e5e9e7030c 100644 --- a/docs/integrations/sources/microsoft-sharepoint.md +++ b/docs/integrations/sources/microsoft-sharepoint.md @@ -306,6 +306,7 @@ The connector is restricted by normal Microsoft Graph [requests limitation](http | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------| +| 0.9.0 | 2025-03-24 | [55912](https://github.com/airbytehq/airbyte/pull/55912) | Provide ability to ietrate all sharepoint sites | | 0.8.0 | 2025-03-12 | [54658](https://github.com/airbytehq/airbyte/pull/54658) | Provide ability to sync other sites than Main sharepoint site | | 0.7.2 | 2025-03-08 | [55427](https://github.com/airbytehq/airbyte/pull/55427) | Update dependencies | | 0.7.1 | 2025-03-01 | [54749](https://github.com/airbytehq/airbyte/pull/54749) | Update dependencies | From db1f2669543c4bd4dd96df4b3c1ec9f723774c80 Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Sat, 22 Mar 2025 06:29:30 -0600 Subject: [PATCH 04/17] sourece-microsoft-sharepoint: minor changes in docstrings --- .../source_microsoft_sharepoint/stream_reader.py | 6 +++--- .../unit_tests/test_stream_reader.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py index 2bba7f0be0bbb..e4dd63e7780da 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py @@ -70,7 +70,7 @@ def client(self): def _get_scope(tenant_prefix: str = None): """ Returns the scope for the access token. - We use admin site to retrieve objects like Site groups and users. + We use admin site to retrieve objects like Sites. """ if tenant_prefix: admin_site_url = f"https://{tenant_prefix}-admin.sharepoint.com" @@ -138,8 +138,8 @@ def get_access_token(self): return self.auth_client._get_access_token()["access_token"] def get_token_response_object(self, tenant_prefix: str = None) -> Callable: - """ " - When building a ClientContext using with_access_token method, + """ + When building a ClientContext using with_access_token() method, the token_func param is expected to be a method/callable that returns a TokenResponse object. tenant_prefix is used to determine the scope of the access token. return: A callable that returns a TokenResponse object. diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py index 39e4b73d0913f..a5a002ef1f2ce 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py @@ -653,7 +653,7 @@ def test_drives_property(auth_type, user_principal_name, has_refresh_token): ) def test_get_site_drive(site_url, expected_call, expected_result, exception): """ - Parameterized test for the get_site_drive method. + Test for the get_site_drive method. """ # Create a mock reader instance reader = SourceMicrosoftSharePointStreamReader() From 7fa6cd7cd6fe44ecab432b54516523d9f8329a9c Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Sat, 22 Mar 2025 14:33:18 -0600 Subject: [PATCH 05/17] sourece-microsoft-sharepoint: increase testing --- .../stream_reader.py | 14 -- .../unit_tests/test_stream_reader.py | 125 ++++++++++++++++++ .../unit_tests/test_utils.py | 85 +++++++++++- 3 files changed, 209 insertions(+), 15 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py index e4dd63e7780da..3a9f2a55a2c8f 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py @@ -502,17 +502,3 @@ def get_file(self, file: MicrosoftSharePointRemoteFile, local_directory: str, lo raise AirbyteTracedException( f"There was an error while trying to download the file {file.uri}: {str(e)}", failure_type=FailureType.config_error ) - - def get_file_acl_permissions(self): - return None - - def load_identity_groups(self): - return None - - @property - def identities_schema(self): - return None - - @property - def file_permissions_schema(self): - return None diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py index a5a002ef1f2ce..23131ba1f7cf1 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py @@ -746,3 +746,128 @@ def test_retrieve_files_from_accessible_drives(mocker, refresh_token, auth_type, # Assert that only the desired methods were called assert reader._get_files_by_drive_name.called == ("_get_files_by_drive_name" in expected_methods_called) assert reader._get_shared_files_from_all_drives.called == ("_get_shared_files_from_all_drives" in expected_methods_called) + + +@pytest.mark.parametrize( + "search_result, expected_sites, raises_exception", + [ + # Case 1: Search returns results with sites + ( + { + "PrimaryQueryResult": { + "RelevantResults": { + "Table": { + "Rows": [ + {"Cells": {"Title": "Site1", "Path": "https://test-tenant.sharepoint.com/sites/site1"}}, + {"Cells": {"Title": "Site2", "Path": "https://test-tenant.sharepoint.com/sites/site2"}}, + ] + } + } + } + }, + [ + {"Title": "Site1", "Path": "https://test-tenant.sharepoint.com/sites/site1"}, + {"Title": "Site2", "Path": "https://test-tenant.sharepoint.com/sites/site2"}, + ], + False, + ), + # Case 2: Search returns empty results + (None, [], True), + # Case 3: Search returns no relevant results + ({"PrimaryQueryResult": None}, [], True), + ], +) +def test_get_all_sites(search_result, expected_sites, raises_exception): + """ + Test the get_all_sites method to verify it correctly retrieves and processes SharePoint site information. + """ + reader = SourceMicrosoftSharePointStreamReader() + + reader._config = MagicMock() + reader._one_drive_client = MagicMock() + + # Mock methods out of scope of this test + with ( + patch("source_microsoft_sharepoint.stream_reader.get_site") as mock_get_site, + patch("source_microsoft_sharepoint.stream_reader.get_site_prefix") as mock_get_site_prefix, + patch.object(reader, "get_client_context") as mock_get_client_context, + patch("source_microsoft_sharepoint.stream_reader.SearchService") as mock_search_service, + patch("source_microsoft_sharepoint.stream_reader.execute_query_with_retry") as mock_execute_query, + ): + # Setup mocks + mock_get_site.return_value = "test-site" + mock_get_site_prefix.return_value = ("https://test-tenant.sharepoint.com", "test-tenant") + + mock_client_context = MagicMock() + mock_get_client_context.return_value = mock_client_context + + mock_search_service_instance = MagicMock() + mock_search_service.return_value = mock_search_service_instance + + mock_search_job = MagicMock() + mock_search_service_instance.post_query.return_value = mock_search_job + + search_job_result = MagicMock() + mock_execute_query.return_value = search_job_result + + mock_search_job.value = True + search_job_result.value = MagicMock() + + if search_result is None: + # Case 2: Empty results + search_job_result.value.PrimaryQueryResult = None + elif search_result.get("PrimaryQueryResult") is None: + # Case 3: No relevant results + search_job_result.value.PrimaryQueryResult = None + else: + # Case 1: Success case with sites + # Create the full mock object structure with attributes instead of dict items + pq_data = search_result["PrimaryQueryResult"] + + primary_query_result = MagicMock() + search_job_result.value.PrimaryQueryResult = primary_query_result + + relevant_results = MagicMock() + primary_query_result.RelevantResults = relevant_results + + table = MagicMock() + relevant_results.Table = table + + if "Rows" in pq_data["RelevantResults"]["Table"]: + rows_data = pq_data["RelevantResults"]["Table"]["Rows"] + mock_rows = [] + + def create_cell_getter(cell_data): + def cell_getter(key, default=None): + return cell_data.get(key, default) + + return cell_getter + + for row_data in rows_data: + mock_row = MagicMock() + + cell_getter = create_cell_getter(row_data["Cells"]) + + mock_row.Cells = MagicMock() + mock_row.Cells.get = cell_getter + + mock_rows.append(mock_row) + + table.Rows = mock_rows + + if raises_exception: + with pytest.raises(Exception, match="No site collections found"): + reader.get_all_sites() + else: + result = reader.get_all_sites() + + assert result == expected_sites + + mock_get_site.assert_called_once_with(reader.one_drive_client) + mock_get_site_prefix.assert_called_once_with("test-site") + mock_get_client_context.assert_called_once() + mock_search_service.assert_called_once_with(mock_client_context) + mock_search_service_instance.post_query.assert_called_once_with( + "contentclass:STS_Site NOT Path:https://test-tenant-my.sharepoint.com" + ) + mock_execute_query.assert_called_once_with(mock_search_job) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_utils.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_utils.py index 15a3914b2cf6f..6fd8b15e8a536 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_utils.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_utils.py @@ -7,7 +7,8 @@ from urllib.parse import parse_qs, urlparse import pytest -from source_microsoft_sharepoint.utils import PlaceholderUrlBuilder, execute_query_with_retry, filter_http_urls +from office365.onedrive.sites.site import Site +from source_microsoft_sharepoint.utils import PlaceholderUrlBuilder, execute_query_with_retry, filter_http_urls, get_site, get_site_prefix from airbyte_cdk import AirbyteTracedException @@ -184,3 +185,85 @@ def test_url_builder_for_key_pair_value_pair(steps, expected_url): # Finally, build the URL and compare to expected url = builder.build() assert url == expected_url, f"Expected {expected_url}, but got {url}" + + +@pytest.mark.parametrize( + "site_url, expected_method_call", + [ + ("https://example.sharepoint.com/sites/test", "get_by_url"), + (None, "root.get"), + ], +) +@patch("source_microsoft_sharepoint.utils.execute_query_with_retry") +def test_get_site(mock_execute_query_with_retry, site_url, expected_method_call): + mock_graph_client = Mock() + + mock_site = Mock(spec=Site) + mock_site.web_url = "https://example.sharepoint.com/sites/test" if site_url else "https://example.sharepoint.com" + + mock_site.site_collection = Mock() + mock_site.site_collection.hostname = "example.sharepoint.com" + + mock_site.name = "Test Site" + mock_site.id = "test-site-id" + mock_site.root = Mock() + + mock_execute_query_with_retry.return_value = mock_site + + result = get_site(mock_graph_client, site_url) + + if expected_method_call == "get_by_url": + mock_graph_client.sites.get_by_url.assert_called_once_with(site_url) + else: + mock_graph_client.sites.root.get.assert_called_once() + + mock_execute_query_with_retry.assert_called_once() + assert result + + # Additional assertions to verify the site object's structure is maintained + assert result.web_url == "https://example.sharepoint.com/sites/test" if site_url else "https://example.sharepoint.com" + assert result.site_collection.hostname == "example.sharepoint.com" + assert result.name == "Test Site" + + +@pytest.mark.parametrize( + "web_url, hostname, expected_site_url, expected_prefix", + [ + ( + "https://contoso.sharepoint.com/sites/marketing", + "contoso.sharepoint.com", + "https://contoso.sharepoint.com/sites/marketing", + "contoso", + ), + ("https://fabrikam.sharepoint.com", "fabrikam.sharepoint.com", "https://fabrikam.sharepoint.com", "fabrikam"), + ( + "https://tailwind.sharepoint.com/sites/engineering/dev", + "tailwind.sharepoint.com", + "https://tailwind.sharepoint.com/sites/engineering/dev", + "tailwind", + ), + ], +) +def test_get_site_prefix(web_url, hostname, expected_site_url, expected_prefix): + # Create a mock Site object with the correct spec + mock_site = Mock(spec=Site) + mock_site.web_url = web_url + mock_site.site_collection = Mock() + mock_site.site_collection.hostname = hostname + + site_url, prefix = get_site_prefix(mock_site) + + assert site_url == expected_site_url + assert prefix == expected_prefix + + +def test_get_site_prefix_invalid_hostname(): + # Create a mock Site object with the correct spec + mock_site = Mock(spec=Site) + mock_site.web_url = "https://invalid" + mock_site.site_collection = Mock() + mock_site.site_collection.hostname = "invalid" + + # Call the function and expect a ValueError + with pytest.raises(ValueError, match="Invalid host name: invalid"): + get_site_prefix(mock_site) From c17a7b378835c406b99d07a0f76efe507ebe2da9 Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Sat, 22 Mar 2025 14:40:14 -0600 Subject: [PATCH 06/17] sourece-microsoft-sharepoint: remove extra comments --- .../unit_tests/test_stream_reader.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py index 23131ba1f7cf1..de1b8257d0310 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py @@ -655,21 +655,13 @@ def test_get_site_drive(site_url, expected_call, expected_result, exception): """ Test for the get_site_drive method. """ - # Create a mock reader instance reader = SourceMicrosoftSharePointStreamReader() - - # Mock the config with the specified site URL reader._config = MagicMock(site_url=site_url) - - # Mock one_drive_client mock_one_drive_client = MagicMock() - # Set the underlying attribute instead of using the property reader._one_drive_client = mock_one_drive_client - # Create mock response for execute_query_with_retry mock_drives = expected_result - # Mock methods based on the scenario with patch("source_microsoft_sharepoint.stream_reader.execute_query_with_retry") as mock_execute_query: # Set up the mock to return the expected result or raise an exception if exception and expected_call == "error": @@ -677,7 +669,6 @@ def test_get_site_drive(site_url, expected_call, expected_result, exception): else: mock_execute_query.return_value = mock_drives - # Mock get_all_sites and get_drives_from_sites for the 'all_sites' scenario mock_sites = [ {"Title": "Site1", "Path": "https://test-tenant.sharepoint.com/sites/site1"}, {"Title": "Site2", "Path": "https://test-tenant.sharepoint.com/sites/site2"}, @@ -687,7 +678,6 @@ def test_get_site_drive(site_url, expected_call, expected_result, exception): patch.object(reader, "get_all_sites", return_value=mock_sites) as mock_get_all_sites, patch.object(reader, "get_drives_from_sites", return_value=mock_drives) as mock_get_drives_from_sites, ): - # Determine the expected behavior based on the scenario if exception: with pytest.raises(AirbyteTracedException) as exc_info: reader.get_site_drive() @@ -695,7 +685,6 @@ def test_get_site_drive(site_url, expected_call, expected_result, exception): else: result = reader.get_site_drive() - # Check that the right methods were called based on the scenario if expected_call == "drives": # Default site URL mock_one_drive_client.drives.get.assert_called_once() @@ -729,21 +718,17 @@ def test_get_site_drive(site_url, expected_call, expected_result, exception): ], ) def test_retrieve_files_from_accessible_drives(mocker, refresh_token, auth_type, search_scope, expected_methods_called): - # Set up the reader class reader = SourceMicrosoftSharePointStreamReader() config = MagicMock(credentials=MagicMock(auth_type=auth_type, refresh_token=refresh_token), search_scope=search_scope) reader._config = config - # Mock the necessary methods with patch.object(SourceMicrosoftSharePointStreamReader, "drives", return_value=[]) as mock_drives: mocker.patch.object(reader, "_get_files_by_drive_name") mocker.patch.object(reader, "_get_shared_files_from_all_drives") - # Call the method under test files = list(reader.get_all_files()) - # Assert that only the desired methods were called assert reader._get_files_by_drive_name.called == ("_get_files_by_drive_name" in expected_methods_called) assert reader._get_shared_files_from_all_drives.called == ("_get_shared_files_from_all_drives" in expected_methods_called) @@ -794,7 +779,6 @@ def test_get_all_sites(search_result, expected_sites, raises_exception): patch("source_microsoft_sharepoint.stream_reader.SearchService") as mock_search_service, patch("source_microsoft_sharepoint.stream_reader.execute_query_with_retry") as mock_execute_query, ): - # Setup mocks mock_get_site.return_value = "test-site" mock_get_site_prefix.return_value = ("https://test-tenant.sharepoint.com", "test-tenant") From 03ef4948b44389a0ead353a293e4bbe968e1ef3d Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Sat, 22 Mar 2025 15:03:05 -0600 Subject: [PATCH 07/17] sourece-microsoft-sharepoint: bump cdk --- .../source-microsoft-sharepoint/poetry.lock | 196 ++++++++++-------- .../pyproject.toml | 4 +- 2 files changed, 108 insertions(+), 92 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/poetry.lock b/airbyte-integrations/connectors/source-microsoft-sharepoint/poetry.lock index a3c8732e8adbb..9292a0c1fbb96 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/poetry.lock +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/poetry.lock @@ -1,14 +1,14 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "airbyte-cdk" -version = "6.37.1" +version = "6.41.8" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<3.13,>=3.10" files = [ - {file = "airbyte_cdk-6.37.1-py3-none-any.whl", hash = "sha256:0806bb74a71c442edc2731af7d32c2f39b5b235a5a697367a991995a007e738d"}, - {file = "airbyte_cdk-6.37.1.tar.gz", hash = "sha256:c794a2ef16b864a0fc1200607f02653805a1bdf423ee0bb2149772318a29dde9"}, + {file = "airbyte_cdk-6.41.8-py3-none-any.whl", hash = "sha256:0a06428464ea9adc21a7c4ad092ac3fd962afef72d0b30a2f6993f00c9e4fcfb"}, + {file = "airbyte_cdk-6.41.8.tar.gz", hash = "sha256:e88ef8659b2de726b58f7f44fa1e9e261d091d07acdf22be6ef84a2a5e4292aa"}, ] [package.dependencies] @@ -17,7 +17,7 @@ anyascii = ">=0.3.2,<0.4.0" avro = {version = ">=1.11.2,<1.13.0", optional = true, markers = "extra == \"file-based\""} backoff = "*" cachetools = "*" -cryptography = ">=42.0.5,<44.0.0" +cryptography = ">=44.0.0,<45.0.0" dpath = ">=2.1.6,<3.0.0" dunamai = ">=1.22.0,<2.0.0" fastavro = {version = ">=1.8.0,<1.9.0", optional = true, markers = "extra == \"file-based\""} @@ -31,11 +31,12 @@ markdown = {version = "*", optional = true, markers = "extra == \"file-based\""} nltk = "3.9.1" numpy = "<2" orjson = ">=3.10.7,<4.0.0" +packaging = "*" pandas = "2.2.2" pdf2image = {version = "1.16.3", optional = true, markers = "extra == \"file-based\""} "pdfminer.six" = {version = "20221105", optional = true, markers = "extra == \"file-based\""} psutil = "6.1.0" -pyarrow = {version = ">=15.0.0,<15.1.0", optional = true, markers = "extra == \"file-based\""} +pyarrow = {version = ">=19.0.0,<20.0.0", optional = true, markers = "extra == \"file-based\""} pydantic = ">=2.7,<3.0" pyjwt = ">=2.8.0,<3.0.0" pyrate-limiter = ">=3.1.0,<3.2.0" @@ -50,6 +51,7 @@ rapidfuzz = ">=3.10.1,<4.0.0" requests = "*" requests_cache = "*" serpyco-rs = ">=1.10.2,<2.0.0" +typing-extensions = "*" unstructured = {version = "0.10.27", extras = ["docx", "pptx"], optional = true, markers = "extra == \"file-based\""} "unstructured.pytesseract" = {version = ">=0.3.12", optional = true, markers = "extra == \"file-based\""} wcmatch = "10.0" @@ -57,7 +59,7 @@ whenever = ">=0.6.16,<0.7.0" xmltodict = ">=0.13,<0.15" [package.extras] -file-based = ["avro (>=1.11.2,<1.13.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "python-calamine (==0.2.3)", "python-snappy (==0.7.3)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] +file-based = ["avro (>=1.11.2,<1.13.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=19.0.0,<20.0.0)", "pytesseract (==0.3.10)", "python-calamine (==0.2.3)", "python-snappy (==0.7.3)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] sql = ["sqlalchemy (>=2.0,!=2.0.36,<3.0)"] vector-db-based = ["cohere (==4.21)", "langchain (==0.1.16)", "openai[embeddings] (==0.27.9)", "tiktoken (==0.8.0)"] @@ -570,51 +572,59 @@ dev = ["black (==22.3.0)", "hypothesis", "numpy", "pytest (>=5.30)", "pytest-ben [[package]] name = "cryptography" -version = "43.0.3" +version = "44.0.2" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false -python-versions = ">=3.7" -files = [ - {file = "cryptography-43.0.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf7a1932ac4176486eab36a19ed4c0492da5d97123f1406cf15e41b05e787d2e"}, - {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63efa177ff54aec6e1c0aefaa1a241232dcd37413835a9b674b6e3f0ae2bfd3e"}, - {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e1ce50266f4f70bf41a2c6dc4358afadae90e2a1e5342d3c08883df1675374f"}, - {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:443c4a81bb10daed9a8f334365fe52542771f25aedaf889fd323a853ce7377d6"}, - {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:74f57f24754fe349223792466a709f8e0c093205ff0dca557af51072ff47ab18"}, - {file = "cryptography-43.0.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9762ea51a8fc2a88b70cf2995e5675b38d93bf36bd67d91721c309df184f49bd"}, - {file = "cryptography-43.0.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:81ef806b1fef6b06dcebad789f988d3b37ccaee225695cf3e07648eee0fc6b73"}, - {file = "cryptography-43.0.3-cp37-abi3-win32.whl", hash = "sha256:cbeb489927bd7af4aa98d4b261af9a5bc025bd87f0e3547e11584be9e9427be2"}, - {file = "cryptography-43.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:f46304d6f0c6ab8e52770addfa2fc41e6629495548862279641972b6215451cd"}, - {file = "cryptography-43.0.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8ac43ae87929a5982f5948ceda07001ee5e83227fd69cf55b109144938d96984"}, - {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:846da004a5804145a5f441b8530b4bf35afbf7da70f82409f151695b127213d5"}, - {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f996e7268af62598f2fc1204afa98a3b5712313a55c4c9d434aef49cadc91d4"}, - {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f7b178f11ed3664fd0e995a47ed2b5ff0a12d893e41dd0494f406d1cf555cab7"}, - {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:c2e6fc39c4ab499049df3bdf567f768a723a5e8464816e8f009f121a5a9f4405"}, - {file = "cryptography-43.0.3-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e1be4655c7ef6e1bbe6b5d0403526601323420bcf414598955968c9ef3eb7d16"}, - {file = "cryptography-43.0.3-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:df6b6c6d742395dd77a23ea3728ab62f98379eff8fb61be2744d4679ab678f73"}, - {file = "cryptography-43.0.3-cp39-abi3-win32.whl", hash = "sha256:d56e96520b1020449bbace2b78b603442e7e378a9b3bd68de65c782db1507995"}, - {file = "cryptography-43.0.3-cp39-abi3-win_amd64.whl", hash = "sha256:0c580952eef9bf68c4747774cde7ec1d85a6e61de97281f2dba83c7d2c806362"}, - {file = "cryptography-43.0.3-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d03b5621a135bffecad2c73e9f4deb1a0f977b9a8ffe6f8e002bf6c9d07b918c"}, - {file = "cryptography-43.0.3-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a2a431ee15799d6db9fe80c82b055bae5a752bef645bba795e8e52687c69efe3"}, - {file = "cryptography-43.0.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:281c945d0e28c92ca5e5930664c1cefd85efe80e5c0d2bc58dd63383fda29f83"}, - {file = "cryptography-43.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f18c716be16bc1fea8e95def49edf46b82fccaa88587a45f8dc0ff6ab5d8e0a7"}, - {file = "cryptography-43.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4a02ded6cd4f0a5562a8887df8b3bd14e822a90f97ac5e544c162899bc467664"}, - {file = "cryptography-43.0.3-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:53a583b6637ab4c4e3591a15bc9db855b8d9dee9a669b550f311480acab6eb08"}, - {file = "cryptography-43.0.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1ec0bcf7e17c0c5669d881b1cd38c4972fade441b27bda1051665faaa89bdcaa"}, - {file = "cryptography-43.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2ce6fae5bdad59577b44e4dfed356944fbf1d925269114c28be377692643b4ff"}, - {file = "cryptography-43.0.3.tar.gz", hash = "sha256:315b9001266a492a6ff443b61238f956b214dbec9910a081ba5b6646a055a805"}, +python-versions = "!=3.9.0,!=3.9.1,>=3.7" +files = [ + {file = "cryptography-44.0.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:efcfe97d1b3c79e486554efddeb8f6f53a4cdd4cf6086642784fa31fc384e1d7"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29ecec49f3ba3f3849362854b7253a9f59799e3763b0c9d0826259a88efa02f1"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc821e161ae88bfe8088d11bb39caf2916562e0a2dc7b6d56714a48b784ef0bb"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3c00b6b757b32ce0f62c574b78b939afab9eecaf597c4d624caca4f9e71e7843"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7bdcd82189759aba3816d1f729ce42ffded1ac304c151d0a8e89b9996ab863d5"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:4973da6ca3db4405c54cd0b26d328be54c7747e89e284fcff166132eb7bccc9c"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4e389622b6927d8133f314949a9812972711a111d577a5d1f4bee5e58736b80a"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:f514ef4cd14bb6fb484b4a60203e912cfcb64f2ab139e88c2274511514bf7308"}, + {file = "cryptography-44.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1bc312dfb7a6e5d66082c87c34c8a62176e684b6fe3d90fcfe1568de675e6688"}, + {file = "cryptography-44.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b721b8b4d948b218c88cb8c45a01793483821e709afe5f622861fc6182b20a7"}, + {file = "cryptography-44.0.2-cp37-abi3-win32.whl", hash = "sha256:51e4de3af4ec3899d6d178a8c005226491c27c4ba84101bfb59c901e10ca9f79"}, + {file = "cryptography-44.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:c505d61b6176aaf982c5717ce04e87da5abc9a36a5b39ac03905c4aafe8de7aa"}, + {file = "cryptography-44.0.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e0ddd63e6bf1161800592c71ac794d3fb8001f2caebe0966e77c5234fa9efc3"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81276f0ea79a208d961c433a947029e1a15948966658cf6710bbabb60fcc2639"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a1e657c0f4ea2a23304ee3f964db058c9e9e635cc7019c4aa21c330755ef6fd"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6210c05941994290f3f7f175a4a57dbbb2afd9273657614c506d5976db061181"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1c3572526997b36f245a96a2b1713bf79ce99b271bbcf084beb6b9b075f29ea"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:b042d2a275c8cee83a4b7ae30c45a15e6a4baa65a179a0ec2d78ebb90e4f6699"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:d03806036b4f89e3b13b6218fefea8d5312e450935b1a2d55f0524e2ed7c59d9"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c7362add18b416b69d58c910caa217f980c5ef39b23a38a0880dfd87bdf8cd23"}, + {file = "cryptography-44.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8cadc6e3b5a1f144a039ea08a0bdb03a2a92e19c46be3285123d32029f40a922"}, + {file = "cryptography-44.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6f101b1f780f7fc613d040ca4bdf835c6ef3b00e9bd7125a4255ec574c7916e4"}, + {file = "cryptography-44.0.2-cp39-abi3-win32.whl", hash = "sha256:3dc62975e31617badc19a906481deacdeb80b4bb454394b4098e3f2525a488c5"}, + {file = "cryptography-44.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:5f6f90b72d8ccadb9c6e311c775c8305381db88374c65fa1a68250aa8a9cb3a6"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:af4ff3e388f2fa7bff9f7f2b31b87d5651c45731d3e8cfa0944be43dff5cfbdb"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0529b1d5a0105dd3731fa65680b45ce49da4d8115ea76e9da77a875396727b41"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:7ca25849404be2f8e4b3c59483d9d3c51298a22c1c61a0e84415104dacaf5562"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:268e4e9b177c76d569e8a145a6939eca9a5fec658c932348598818acf31ae9a5"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:9eb9d22b0a5d8fd9925a7764a054dca914000607dff201a24c791ff5c799e1fa"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2bf7bf75f7df9715f810d1b038870309342bff3069c5bd8c6b96128cb158668d"}, + {file = "cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:909c97ab43a9c0c0b0ada7a1281430e4e5ec0458e6d9244c0e821bbf152f061d"}, + {file = "cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:96e7a5e9d6e71f9f4fca8eebfd603f8e86c5225bb18eb621b2c1e50b290a9471"}, + {file = "cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d1b3031093a366ac767b3feb8bcddb596671b3aaff82d4050f984da0c248b615"}, + {file = "cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:04abd71114848aa25edb28e225ab5f268096f44cf0127f3d36975bdf1bdf3390"}, + {file = "cryptography-44.0.2.tar.gz", hash = "sha256:c63454aa261a0cf0c5b4718349629793e9e634993538db841165b3df74f37ec0"}, ] [package.dependencies] cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} [package.extras] -docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] -docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"] -nox = ["nox"] -pep8test = ["check-sdist", "click", "mypy", "ruff"] -sdist = ["build"] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0)"] +docstest = ["pyenchant (>=3)", "readme-renderer (>=30.0)", "sphinxcontrib-spelling (>=7.3.1)"] +nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2)"] +pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"] +sdist = ["build (>=1.0.0)"] ssh = ["bcrypt (>=3.1.5)"] -test = ["certifi", "cryptography-vectors (==43.0.3)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test = ["certifi (>=2024)", "cryptography-vectors (==44.0.2)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] test-randomorder = ["pytest-randomly"] [[package]] @@ -1234,22 +1244,22 @@ tests = ["pytest", "simplejson"] [[package]] name = "msal" -version = "1.25.0" -description = "The Microsoft Authentication Library (MSAL) for Python library" +version = "1.27.0" +description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." optional = false python-versions = ">=2.7" files = [ - {file = "msal-1.25.0-py2.py3-none-any.whl", hash = "sha256:386df621becb506bc315a713ec3d4d5b5d6163116955c7dde23622f156b81af6"}, - {file = "msal-1.25.0.tar.gz", hash = "sha256:f44329fdb59f4f044c779164a34474b8a44ad9e4940afbc4c3a3a2bbe90324d9"}, + {file = "msal-1.27.0-py2.py3-none-any.whl", hash = "sha256:572d07149b83e7343a85a3bcef8e581167b4ac76befcbbb6eef0c0e19643cdc0"}, + {file = "msal-1.27.0.tar.gz", hash = "sha256:3109503c038ba6b307152b0e8d34f98113f2e7a78986e28d0baf5b5303afda52"}, ] [package.dependencies] -cryptography = ">=0.6,<44" +cryptography = ">=0.6,<45" PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} requests = ">=2.0.0,<3" [package.extras] -broker = ["pymsalruntime (>=0.13.2,<0.14)"] +broker = ["pymsalruntime (>=0.13.2,<0.15)"] [[package]] name = "mypy-extensions" @@ -1734,51 +1744,57 @@ test = ["pytest", "pytest-xdist", "setuptools"] [[package]] name = "pyarrow" -version = "15.0.2" +version = "19.0.1" description = "Python library for Apache Arrow" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pyarrow-15.0.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:88b340f0a1d05b5ccc3d2d986279045655b1fe8e41aba6ca44ea28da0d1455d8"}, - {file = "pyarrow-15.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eaa8f96cecf32da508e6c7f69bb8401f03745c050c1dd42ec2596f2e98deecac"}, - {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23c6753ed4f6adb8461e7c383e418391b8d8453c5d67e17f416c3a5d5709afbd"}, - {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f639c059035011db8c0497e541a8a45d98a58dbe34dc8fadd0ef128f2cee46e5"}, - {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:290e36a59a0993e9a5224ed2fb3e53375770f07379a0ea03ee2fce2e6d30b423"}, - {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:06c2bb2a98bc792f040bef31ad3e9be6a63d0cb39189227c08a7d955db96816e"}, - {file = "pyarrow-15.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:f7a197f3670606a960ddc12adbe8075cea5f707ad7bf0dffa09637fdbb89f76c"}, - {file = "pyarrow-15.0.2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:5f8bc839ea36b1f99984c78e06e7a06054693dc2af8920f6fb416b5bca9944e4"}, - {file = "pyarrow-15.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f5e81dfb4e519baa6b4c80410421528c214427e77ca0ea9461eb4097c328fa33"}, - {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a4f240852b302a7af4646c8bfe9950c4691a419847001178662a98915fd7ee7"}, - {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e7d9cfb5a1e648e172428c7a42b744610956f3b70f524aa3a6c02a448ba853e"}, - {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2d4f905209de70c0eb5b2de6763104d5a9a37430f137678edfb9a675bac9cd98"}, - {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:90adb99e8ce5f36fbecbbc422e7dcbcbed07d985eed6062e459e23f9e71fd197"}, - {file = "pyarrow-15.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:b116e7fd7889294cbd24eb90cd9bdd3850be3738d61297855a71ac3b8124ee38"}, - {file = "pyarrow-15.0.2-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:25335e6f1f07fdaa026a61c758ee7d19ce824a866b27bba744348fa73bb5a440"}, - {file = "pyarrow-15.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:90f19e976d9c3d8e73c80be84ddbe2f830b6304e4c576349d9360e335cd627fc"}, - {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a22366249bf5fd40ddacc4f03cd3160f2d7c247692945afb1899bab8a140ddfb"}, - {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2a335198f886b07e4b5ea16d08ee06557e07db54a8400cc0d03c7f6a22f785f"}, - {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3e6d459c0c22f0b9c810a3917a1de3ee704b021a5fb8b3bacf968eece6df098f"}, - {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:033b7cad32198754d93465dcfb71d0ba7cb7cd5c9afd7052cab7214676eec38b"}, - {file = "pyarrow-15.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:29850d050379d6e8b5a693098f4de7fd6a2bea4365bfd073d7c57c57b95041ee"}, - {file = "pyarrow-15.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:7167107d7fb6dcadb375b4b691b7e316f4368f39f6f45405a05535d7ad5e5058"}, - {file = "pyarrow-15.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e85241b44cc3d365ef950432a1b3bd44ac54626f37b2e3a0cc89c20e45dfd8bf"}, - {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:248723e4ed3255fcd73edcecc209744d58a9ca852e4cf3d2577811b6d4b59818"}, - {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ff3bdfe6f1b81ca5b73b70a8d482d37a766433823e0c21e22d1d7dde76ca33f"}, - {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:f3d77463dee7e9f284ef42d341689b459a63ff2e75cee2b9302058d0d98fe142"}, - {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:8c1faf2482fb89766e79745670cbca04e7018497d85be9242d5350cba21357e1"}, - {file = "pyarrow-15.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:28f3016958a8e45a1069303a4a4f6a7d4910643fc08adb1e2e4a7ff056272ad3"}, - {file = "pyarrow-15.0.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:89722cb64286ab3d4daf168386f6968c126057b8c7ec3ef96302e81d8cdb8ae4"}, - {file = "pyarrow-15.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cd0ba387705044b3ac77b1b317165c0498299b08261d8122c96051024f953cd5"}, - {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad2459bf1f22b6a5cdcc27ebfd99307d5526b62d217b984b9f5c974651398832"}, - {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58922e4bfece8b02abf7159f1f53a8f4d9f8e08f2d988109126c17c3bb261f22"}, - {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:adccc81d3dc0478ea0b498807b39a8d41628fa9210729b2f718b78cb997c7c91"}, - {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:8bd2baa5fe531571847983f36a30ddbf65261ef23e496862ece83bdceb70420d"}, - {file = "pyarrow-15.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6669799a1d4ca9da9c7e06ef48368320f5856f36f9a4dd31a11839dda3f6cc8c"}, - {file = "pyarrow-15.0.2.tar.gz", hash = "sha256:9c9bc803cb3b7bfacc1e96ffbfd923601065d9d3f911179d81e72d99fd74a3d9"}, + {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fc28912a2dc924dddc2087679cc8b7263accc71b9ff025a1362b004711661a69"}, + {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fca15aabbe9b8355800d923cc2e82c8ef514af321e18b437c3d782aa884eaeec"}, + {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad76aef7f5f7e4a757fddcdcf010a8290958f09e3470ea458c80d26f4316ae89"}, + {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d03c9d6f2a3dffbd62671ca070f13fc527bb1867b4ec2b98c7eeed381d4f389a"}, + {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:65cf9feebab489b19cdfcfe4aa82f62147218558d8d3f0fc1e9dea0ab8e7905a"}, + {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:41f9706fbe505e0abc10e84bf3a906a1338905cbbcf1177b71486b03e6ea6608"}, + {file = "pyarrow-19.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:c6cb2335a411b713fdf1e82a752162f72d4a7b5dbc588e32aa18383318b05866"}, + {file = "pyarrow-19.0.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc55d71898ea30dc95900297d191377caba257612f384207fe9f8293b5850f90"}, + {file = "pyarrow-19.0.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:7a544ec12de66769612b2d6988c36adc96fb9767ecc8ee0a4d270b10b1c51e00"}, + {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0148bb4fc158bfbc3d6dfe5001d93ebeed253793fff4435167f6ce1dc4bddeae"}, + {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f24faab6ed18f216a37870d8c5623f9c044566d75ec586ef884e13a02a9d62c5"}, + {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:4982f8e2b7afd6dae8608d70ba5bd91699077323f812a0448d8b7abdff6cb5d3"}, + {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:49a3aecb62c1be1d822f8bf629226d4a96418228a42f5b40835c1f10d42e4db6"}, + {file = "pyarrow-19.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:008a4009efdb4ea3d2e18f05cd31f9d43c388aad29c636112c2966605ba33466"}, + {file = "pyarrow-19.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:80b2ad2b193e7d19e81008a96e313fbd53157945c7be9ac65f44f8937a55427b"}, + {file = "pyarrow-19.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:ee8dec072569f43835932a3b10c55973593abc00936c202707a4ad06af7cb294"}, + {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5d1ec7ec5324b98887bdc006f4d2ce534e10e60f7ad995e7875ffa0ff9cb14"}, + {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3ad4c0eb4e2a9aeb990af6c09e6fa0b195c8c0e7b272ecc8d4d2b6574809d34"}, + {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d383591f3dcbe545f6cc62daaef9c7cdfe0dff0fb9e1c8121101cabe9098cfa6"}, + {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b4c4156a625f1e35d6c0b2132635a237708944eb41df5fbe7d50f20d20c17832"}, + {file = "pyarrow-19.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:5bd1618ae5e5476b7654c7b55a6364ae87686d4724538c24185bbb2952679960"}, + {file = "pyarrow-19.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e45274b20e524ae5c39d7fc1ca2aa923aab494776d2d4b316b49ec7572ca324c"}, + {file = "pyarrow-19.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d9dedeaf19097a143ed6da37f04f4051aba353c95ef507764d344229b2b740ae"}, + {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ebfb5171bb5f4a52319344ebbbecc731af3f021e49318c74f33d520d31ae0c4"}, + {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a21d39fbdb948857f67eacb5bbaaf36802de044ec36fbef7a1c8f0dd3a4ab2"}, + {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:99bc1bec6d234359743b01e70d4310d0ab240c3d6b0da7e2a93663b0158616f6"}, + {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1b93ef2c93e77c442c979b0d596af45e4665d8b96da598db145b0fec014b9136"}, + {file = "pyarrow-19.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:d9d46e06846a41ba906ab25302cf0fd522f81aa2a85a71021826f34639ad31ef"}, + {file = "pyarrow-19.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:c0fe3dbbf054a00d1f162fda94ce236a899ca01123a798c561ba307ca38af5f0"}, + {file = "pyarrow-19.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:96606c3ba57944d128e8a8399da4812f56c7f61de8c647e3470b417f795d0ef9"}, + {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f04d49a6b64cf24719c080b3c2029a3a5b16417fd5fd7c4041f94233af732f3"}, + {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a9137cf7e1640dce4c190551ee69d478f7121b5c6f323553b319cac936395f6"}, + {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:7c1bca1897c28013db5e4c83944a2ab53231f541b9e0c3f4791206d0c0de389a"}, + {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:58d9397b2e273ef76264b45531e9d552d8ec8a6688b7390b5be44c02a37aade8"}, + {file = "pyarrow-19.0.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:b9766a47a9cb56fefe95cb27f535038b5a195707a08bf61b180e642324963b46"}, + {file = "pyarrow-19.0.1-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:6c5941c1aac89a6c2f2b16cd64fe76bcdb94b2b1e99ca6459de4e6f07638d755"}, + {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd44d66093a239358d07c42a91eebf5015aa54fccba959db899f932218ac9cc8"}, + {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:335d170e050bcc7da867a1ed8ffb8b44c57aaa6e0843b156a501298657b1e972"}, + {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:1c7556165bd38cf0cd992df2636f8bcdd2d4b26916c6b7e646101aff3c16f76f"}, + {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:699799f9c80bebcf1da0983ba86d7f289c5a2a5c04b945e2f2bcf7e874a91911"}, + {file = "pyarrow-19.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8464c9fbe6d94a7fe1599e7e8965f350fd233532868232ab2596a71586c5a429"}, + {file = "pyarrow-19.0.1.tar.gz", hash = "sha256:3bf266b485df66a400f282ac0b6d1b500b9d2ae73314a153dbe97d6d5cc8a99e"}, ] -[package.dependencies] -numpy = ">=1.16.6,<2" +[package.extras] +test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"] [[package]] name = "pycparser" @@ -3064,4 +3080,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11,<3.12" -content-hash = "9f22c0b43e9dc49f4cda41c2aee5e95b21127454e06cd769025b3ec456ff6e23" +content-hash = "93b7616fd4990bd64dc228af569c877cd0d8281b528e2074ee4fb23f0424607e" diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml b/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml index 9418e7291c5b4..b5807318459c6 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml @@ -17,10 +17,10 @@ include = "source_microsoft_sharepoint" [tool.poetry.dependencies] python = "^3.11,<3.12" -msal = "==1.25.0" +msal = "==1.27.0" Office365-REST-Python-Client = "==2.5.5" smart-open = "==6.4.0" -airbyte-cdk = {extras = ["file-based"], version = "^6"} +airbyte-cdk = {extras = ["file-based"], version = "^6.38.5"} [tool.poetry.scripts] source-microsoft-sharepoint = "source_microsoft_sharepoint.run:run" From d271e1e70ecc8d2d081767cafed21f9cd6ad67e0 Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Sat, 22 Mar 2025 17:01:07 -0600 Subject: [PATCH 08/17] sourece-microsoft-sharepoint: remove unnecesary comments --- .../source-microsoft-sharepoint/unit_tests/test_utils.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_utils.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_utils.py index 6fd8b15e8a536..01cc998ec084e 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_utils.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_utils.py @@ -220,7 +220,6 @@ def test_get_site(mock_execute_query_with_retry, site_url, expected_method_call) mock_execute_query_with_retry.assert_called_once() assert result - # Additional assertions to verify the site object's structure is maintained assert result.web_url == "https://example.sharepoint.com/sites/test" if site_url else "https://example.sharepoint.com" assert result.site_collection.hostname == "example.sharepoint.com" assert result.name == "Test Site" @@ -245,7 +244,6 @@ def test_get_site(mock_execute_query_with_retry, site_url, expected_method_call) ], ) def test_get_site_prefix(web_url, hostname, expected_site_url, expected_prefix): - # Create a mock Site object with the correct spec mock_site = Mock(spec=Site) mock_site.web_url = web_url mock_site.site_collection = Mock() @@ -258,12 +256,10 @@ def test_get_site_prefix(web_url, hostname, expected_site_url, expected_prefix): def test_get_site_prefix_invalid_hostname(): - # Create a mock Site object with the correct spec mock_site = Mock(spec=Site) mock_site.web_url = "https://invalid" mock_site.site_collection = Mock() mock_site.site_collection.hostname = "invalid" - # Call the function and expect a ValueError with pytest.raises(ValueError, match="Invalid host name: invalid"): get_site_prefix(mock_site) From 200dabc5d22c69a28c375e8cb4c1b7246780a887 Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Sun, 30 Mar 2025 20:13:37 -0600 Subject: [PATCH 09/17] source-microsoft-sharepoint: add typing to util methods --- .../source-microsoft-sharepoint/poetry.lock | 152 +++++++++++++++--- .../pyproject.toml | 2 +- .../source_microsoft_sharepoint/utils.py | 34 +++- 3 files changed, 159 insertions(+), 29 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/poetry.lock b/airbyte-integrations/connectors/source-microsoft-sharepoint/poetry.lock index b3b298e5e9666..df3e3c80cc18a 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/poetry.lock +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. [[package]] name = "airbyte-cdk" @@ -6,6 +6,7 @@ version = "6.41.8" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<3.13,>=3.10" +groups = ["main"] files = [ {file = "airbyte_cdk-6.41.8-py3-none-any.whl", hash = "sha256:0a06428464ea9adc21a7c4ad092ac3fd962afef72d0b30a2f6993f00c9e4fcfb"}, {file = "airbyte_cdk-6.41.8.tar.gz", hash = "sha256:e88ef8659b2de726b58f7f44fa1e9e261d091d07acdf22be6ef84a2a5e4292aa"}, @@ -69,6 +70,7 @@ version = "0.14.2" description = "Declares the Airbyte Protocol using Python Dataclasses. Dataclasses in Python have less performance overhead compared to Pydantic models, making them a more efficient choice for scenarios where speed and memory usage are critical" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "airbyte_protocol_models_dataclasses-0.14.2-py3-none-any.whl", hash = "sha256:ae06a406df031afa42f1156bacc587958197e5c7d9bbaf11893480903d4ded8b"}, {file = "airbyte_protocol_models_dataclasses-0.14.2.tar.gz", hash = "sha256:9279237156b722cdd54e7b9ec8f97d264bd96e3f3008bc5fc47c215288a2212a"}, @@ -80,6 +82,7 @@ version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -91,6 +94,7 @@ version = "0.3.2" description = "Unicode to ASCII transliteration" optional = false python-versions = ">=3.3" +groups = ["main"] files = [ {file = "anyascii-0.3.2-py3-none-any.whl", hash = "sha256:3b3beef6fc43d9036d3b0529050b0c48bfad8bc960e9e562d7223cfb94fe45d4"}, {file = "anyascii-0.3.2.tar.gz", hash = "sha256:9d5d32ef844fe225b8bc7cba7f950534fae4da27a9bf3a6bea2cb0ea46ce4730"}, @@ -102,6 +106,7 @@ version = "4.9.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c"}, {file = "anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028"}, @@ -114,7 +119,7 @@ typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] doc = ["Sphinx (>=8.2,<9.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"] -test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"] +test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""] trio = ["trio (>=0.26.1)"] [[package]] @@ -123,6 +128,7 @@ version = "0.4.0" description = "PEP 224 implementation" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "attributes-doc-0.4.0.tar.gz", hash = "sha256:b1576c94a714e9fc2c65c47cf10d0c8e1a5f7c4f5ae7f69006be108d95cbfbfb"}, {file = "attributes_doc-0.4.0-py2.py3-none-any.whl", hash = "sha256:4c3007d9e58f3a6cb4b9c614c4d4ce2d92161581f28e594ddd8241cc3a113bdd"}, @@ -134,18 +140,19 @@ version = "25.3.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"}, {file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"}, ] [package.extras] -benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"] -tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] +tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""] [[package]] name = "avro" @@ -153,6 +160,7 @@ version = "1.12.0" description = "Avro is a serialization and RPC framework." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "avro-1.12.0-py2.py3-none-any.whl", hash = "sha256:9a255c72e1837341dd4f6ff57b2b6f68c0f0cecdef62dd04962e10fd33bec05b"}, {file = "avro-1.12.0.tar.gz", hash = "sha256:cad9c53b23ceed699c7af6bddced42e2c572fd6b408c257a7d4fc4e8cf2e2d6b"}, @@ -168,6 +176,7 @@ version = "2.2.1" description = "Function decoration for backoff and retry" optional = false python-versions = ">=3.7,<4.0" +groups = ["main"] files = [ {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, @@ -179,6 +188,7 @@ version = "4.13.3" description = "Screen-scraping library" optional = false python-versions = ">=3.7.0" +groups = ["main"] files = [ {file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"}, {file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"}, @@ -201,6 +211,7 @@ version = "2.5.post1" description = "Bash style brace expander." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "bracex-2.5.post1-py3-none-any.whl", hash = "sha256:13e5732fec27828d6af308628285ad358047cec36801598368cb28bc631dbaf6"}, {file = "bracex-2.5.post1.tar.gz", hash = "sha256:12c50952415bfa773d2d9ccb8e79651b8cdb1f31a42f6091b804f6ba2b4a66b6"}, @@ -212,6 +223,7 @@ version = "5.5.2" description = "Extensible memoizing collections and decorators" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"}, {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"}, @@ -223,6 +235,7 @@ version = "24.1.3" description = "Composable complex class support for attrs and dataclasses." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "cattrs-24.1.3-py3-none-any.whl", hash = "sha256:adf957dddd26840f27ffbd060a6c4dd3b2192c5b7c2c0525ef1bd8131d8a83f5"}, {file = "cattrs-24.1.3.tar.gz", hash = "sha256:981a6ef05875b5bb0c7fb68885546186d306f10f0f6718fe9b96c226e68821ff"}, @@ -235,8 +248,8 @@ attrs = ">=23.1.0" bson = ["pymongo (>=4.4.0)"] cbor2 = ["cbor2 (>=5.4.6)"] msgpack = ["msgpack (>=1.0.5)"] -msgspec = ["msgspec (>=0.18.5)"] -orjson = ["orjson (>=3.9.2)"] +msgspec = ["msgspec (>=0.18.5) ; implementation_name == \"cpython\""] +orjson = ["orjson (>=3.9.2) ; implementation_name == \"cpython\""] pyyaml = ["pyyaml (>=6.0)"] tomlkit = ["tomlkit (>=0.11.8)"] ujson = ["ujson (>=5.7.0)"] @@ -247,6 +260,7 @@ version = "2025.1.31" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" +groups = ["main", "dev"] files = [ {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, @@ -258,6 +272,8 @@ version = "1.17.1" description = "Foreign Function Interface for Python calling C code." optional = false python-versions = ">=3.8" +groups = ["main"] +markers = "platform_python_implementation != \"PyPy\"" files = [ {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, @@ -337,6 +353,7 @@ version = "5.2.0" description = "Universal encoding detector for Python 3" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, @@ -348,6 +365,7 @@ version = "3.4.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" +groups = ["main", "dev"] files = [ {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, @@ -449,6 +467,7 @@ version = "8.1.8" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, @@ -463,10 +482,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main", "dev"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win32\""} [[package]] name = "cramjam" @@ -474,6 +495,7 @@ version = "2.9.1" description = "Thin Python bindings to de/compression algorithms in Rust" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "cramjam-2.9.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:8e82464d1e00fbbb12958999b8471ba5e9f3d9711954505a0a7b378762332e6f"}, {file = "cramjam-2.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6d2df8a6511cc08ef1fccd2e0c65e2ebc9f57574ec8376052a76851af5398810"}, @@ -576,6 +598,7 @@ version = "44.0.2" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = "!=3.9.0,!=3.9.1,>=3.7" +groups = ["main"] files = [ {file = "cryptography-44.0.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:efcfe97d1b3c79e486554efddeb8f6f53a4cdd4cf6086642784fa31fc384e1d7"}, {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29ecec49f3ba3f3849362854b7253a9f59799e3763b0c9d0826259a88efa02f1"}, @@ -618,10 +641,10 @@ files = [ cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} [package.extras] -docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0)"] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0) ; python_version >= \"3.8\""] docstest = ["pyenchant (>=3)", "readme-renderer (>=30.0)", "sphinxcontrib-spelling (>=7.3.1)"] -nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2)"] -pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"] +nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2) ; python_version >= \"3.8\""] +pep8test = ["check-sdist ; python_version >= \"3.8\"", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"] sdist = ["build (>=1.0.0)"] ssh = ["bcrypt (>=3.1.5)"] test = ["certifi (>=2024)", "cryptography-vectors (==44.0.2)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] @@ -633,6 +656,7 @@ version = "0.6.7" description = "Easily serialize dataclasses to and from JSON." optional = false python-versions = "<4.0,>=3.7" +groups = ["main"] files = [ {file = "dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a"}, {file = "dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0"}, @@ -648,6 +672,7 @@ version = "2.2.0" description = "Filesystem-like pathing and searching for dictionaries" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "dpath-2.2.0-py3-none-any.whl", hash = "sha256:b330a375ded0a0d2ed404440f6c6a715deae5313af40bbb01c8a41d891900576"}, {file = "dpath-2.2.0.tar.gz", hash = "sha256:34f7e630dc55ea3f219e555726f5da4b4b25f2200319c8e6902c394258dd6a3e"}, @@ -659,6 +684,7 @@ version = "1.23.1" description = "Dynamic version generation" optional = false python-versions = ">=3.5" +groups = ["main"] files = [ {file = "dunamai-1.23.1-py3-none-any.whl", hash = "sha256:2611b0b9105a5797149ef82f4968a01dd912bdac857d49fc06856a4cfa58cf78"}, {file = "dunamai-1.23.1.tar.gz", hash = "sha256:0b5712fc63bfb235263d912bfc5eb84590ba2201bb737268d25a5dbad7085489"}, @@ -673,6 +699,7 @@ version = "2.14.1" description = "Emoji for Python" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "emoji-2.14.1-py3-none-any.whl", hash = "sha256:35a8a486c1460addb1499e3bf7929d3889b2e2841a57401903699fef595e942b"}, {file = "emoji-2.14.1.tar.gz", hash = "sha256:f8c50043d79a2c1410ebfae833ae1868d5941a67a6cd4d18377e2eb0bd79346b"}, @@ -687,6 +714,7 @@ version = "1.8.4" description = "Fast read/write of AVRO files" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "fastavro-1.8.4-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:7afe1475e8a967c04e2b0ef4d33bc10bffa66b4fa6e08bd2ee9d91b6768cba2a"}, {file = "fastavro-1.8.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5fd73609f3c1ac0d90ae3179d2fb9d788f842245db2656ff9225fce871fc5b7"}, @@ -733,6 +761,7 @@ version = "1.2.0" description = "Infer file type and MIME type of any file/buffer. No external dependencies." optional = false python-versions = "*" +groups = ["main"] files = [ {file = "filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25"}, {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, @@ -744,6 +773,7 @@ version = "1.3.0" description = "GenSON is a powerful, user-friendly JSON Schema generator." optional = false python-versions = "*" +groups = ["main"] files = [ {file = "genson-1.3.0-py3-none-any.whl", hash = "sha256:468feccd00274cc7e4c09e84b08704270ba8d95232aa280f65b986139cec67f7"}, {file = "genson-1.3.0.tar.gz", hash = "sha256:e02db9ac2e3fd29e65b5286f7135762e2cd8a986537c075b06fc5f1517308e37"}, @@ -755,6 +785,7 @@ version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, @@ -766,6 +797,7 @@ version = "1.0.7" description = "A minimal low-level HTTP client." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"}, {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"}, @@ -787,6 +819,7 @@ version = "0.28.1" description = "The next generation HTTP client." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, @@ -799,7 +832,7 @@ httpcore = "==1.*" idna = "*" [package.extras] -brotli = ["brotli", "brotlicffi"] +brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] @@ -811,6 +844,7 @@ version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" +groups = ["main", "dev"] files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, @@ -825,6 +859,7 @@ version = "2.1.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, @@ -836,6 +871,7 @@ version = "0.6.1" description = "An ISO 8601 date/time/duration parser and formatter" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, @@ -850,6 +886,7 @@ version = "3.1.6" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, @@ -867,6 +904,7 @@ version = "1.4.2" description = "Lightweight pipelining with Python functions" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, @@ -878,6 +916,7 @@ version = "1.33" description = "Apply JSON-Patches (RFC 6902)" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +groups = ["main"] files = [ {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, @@ -892,6 +931,7 @@ version = "3.0.0" description = "Identify specific nodes in a JSON document (RFC 6901)" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942"}, {file = "jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef"}, @@ -903,6 +943,7 @@ version = "0.2" description = "An implementation of JSON Reference for Python" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "jsonref-0.2-py3-none-any.whl", hash = "sha256:b1e82fa0b62e2c2796a13e5401fe51790b248f6d9bf9d7212a3e31a3501b291f"}, {file = "jsonref-0.2.tar.gz", hash = "sha256:f3c45b121cf6257eafabdc3a8008763aed1cd7da06dbabc59a9e4d2a5e4e6697"}, @@ -914,6 +955,7 @@ version = "4.17.3" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "jsonschema-4.17.3-py3-none-any.whl", hash = "sha256:a870ad254da1a8ca84b6a2905cac29d265f805acc57af304784962a2aa6508f6"}, {file = "jsonschema-4.17.3.tar.gz", hash = "sha256:0f864437ab8b6076ba6707453ef8f98a6a0d512a80e93f8abdb676f737ecb60d"}, @@ -933,6 +975,7 @@ version = "0.1.42" description = "Building applications with LLMs through composability" optional = false python-versions = "<4.0,>=3.8.1" +groups = ["main"] files = [ {file = "langchain_core-0.1.42-py3-none-any.whl", hash = "sha256:c5653ffa08a44f740295c157a24c0def4a753333f6a2c41f76bf431cd00be8b5"}, {file = "langchain_core-0.1.42.tar.gz", hash = "sha256:40751bf60ea5d8e2b2efe65290db434717ee3834870c002e40e2811f09d814e6"}, @@ -955,6 +998,7 @@ version = "1.0.9" description = "Language detection library ported from Google's language-detection." optional = false python-versions = "*" +groups = ["main"] files = [ {file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"}, {file = "langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0"}, @@ -969,6 +1013,7 @@ version = "0.1.147" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = "<4.0,>=3.8.1" +groups = ["main"] files = [ {file = "langsmith-0.1.147-py3-none-any.whl", hash = "sha256:7166fc23b965ccf839d64945a78e9f1157757add228b086141eb03a60d699a15"}, {file = "langsmith-0.1.147.tar.gz", hash = "sha256:2e933220318a4e73034657103b3b1a3a6109cc5db3566a7e8e03be8d6d7def7a"}, @@ -990,6 +1035,7 @@ version = "5.3.1" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a4058f16cee694577f7e4dd410263cd0ef75644b43802a689c2b3c2a7e69453b"}, {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:364de8f57d6eda0c16dcfb999af902da31396949efa0e583e12675d09709881b"}, @@ -1144,6 +1190,7 @@ version = "3.7" description = "Python implementation of John Gruber's Markdown." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803"}, {file = "markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2"}, @@ -1159,6 +1206,7 @@ version = "3.0.2" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, @@ -1229,6 +1277,7 @@ version = "3.26.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c"}, {file = "marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6"}, @@ -1248,6 +1297,7 @@ version = "1.27.0" description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." optional = false python-versions = ">=2.7" +groups = ["main"] files = [ {file = "msal-1.27.0-py2.py3-none-any.whl", hash = "sha256:572d07149b83e7343a85a3bcef8e581167b4ac76befcbbb6eef0c0e19643cdc0"}, {file = "msal-1.27.0.tar.gz", hash = "sha256:3109503c038ba6b307152b0e8d34f98113f2e7a78986e28d0baf5b5303afda52"}, @@ -1259,7 +1309,7 @@ PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} requests = ">=2.0.0,<3" [package.extras] -broker = ["pymsalruntime (>=0.13.2,<0.15)"] +broker = ["pymsalruntime (>=0.13.2,<0.15) ; python_version >= \"3.6\" and platform_system == \"Windows\""] [[package]] name = "mypy-extensions" @@ -1267,6 +1317,7 @@ version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." optional = false python-versions = ">=3.5" +groups = ["main"] files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, @@ -1278,6 +1329,7 @@ version = "3.9.1" description = "Natural Language Toolkit" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1"}, {file = "nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868"}, @@ -1303,6 +1355,7 @@ version = "1.26.4" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, @@ -1348,6 +1401,7 @@ version = "2.5.5" description = "Microsoft 365 & Microsoft Graph Library for Python" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "Office365-REST-Python-Client-2.5.5.tar.gz", hash = "sha256:2396f3ac1bc544646abff3db9e45f0e43a28d20668ed9a4736554c5262e70a86"}, {file = "Office365_REST_Python_Client-2.5.5-py3-none-any.whl", hash = "sha256:d64dcb9b3fe76859f8d570136c0e448a36ae26a8d71b52b4c5127eb9ae2290ca"}, @@ -1368,6 +1422,7 @@ version = "3.10.16" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "orjson-3.10.16-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:4cb473b8e79154fa778fb56d2d73763d977be3dcc140587e07dbc545bbfc38f8"}, {file = "orjson-3.10.16-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:622a8e85eeec1948690409a19ca1c7d9fd8ff116f4861d261e6ae2094fe59a00"}, @@ -1445,6 +1500,7 @@ version = "23.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" +groups = ["main", "dev"] files = [ {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, @@ -1456,6 +1512,7 @@ version = "2.2.2" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, @@ -1525,6 +1582,7 @@ version = "0.2.1" description = "Bring colors to your terminal." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["dev"] files = [ {file = "pastel-0.2.1-py2.py3-none-any.whl", hash = "sha256:4349225fcdf6c2bb34d483e523475de5bb04a5c10ef711263452cb37d7dd4364"}, {file = "pastel-0.2.1.tar.gz", hash = "sha256:e6581ac04e973cac858828c6202c1e1e81fee1dc7de7683f3e1ffe0bfd8a573d"}, @@ -1536,6 +1594,7 @@ version = "1.16.3" description = "A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list." optional = false python-versions = "*" +groups = ["main"] files = [ {file = "pdf2image-1.16.3-py3-none-any.whl", hash = "sha256:b6154164af3677211c22cbb38b2bd778b43aca02758e962fe1e231f6d3b0e380"}, {file = "pdf2image-1.16.3.tar.gz", hash = "sha256:74208810c2cef4d9e347769b8e62a52303982ddb4f2dfd744c7ab4b940ae287e"}, @@ -1550,6 +1609,7 @@ version = "20221105" description = "PDF parser and analyzer" optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "pdfminer.six-20221105-py3-none-any.whl", hash = "sha256:1eaddd712d5b2732f8ac8486824533514f8ba12a0787b3d5fe1e686cd826532d"}, {file = "pdfminer.six-20221105.tar.gz", hash = "sha256:8448ab7b939d18b64820478ecac5394f482d7a79f5f7eaa7703c6c959c175e1d"}, @@ -1570,6 +1630,7 @@ version = "11.1.0" description = "Python Imaging Library (Fork)" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "pillow-11.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8"}, {file = "pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192"}, @@ -1649,7 +1710,7 @@ docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline fpx = ["olefile"] mic = ["olefile"] tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"] -typing = ["typing-extensions"] +typing = ["typing-extensions ; python_version < \"3.10\""] xmp = ["defusedxml"] [[package]] @@ -1658,6 +1719,7 @@ version = "4.3.7" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "platformdirs-4.3.7-py3-none-any.whl", hash = "sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94"}, {file = "platformdirs-4.3.7.tar.gz", hash = "sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351"}, @@ -1674,6 +1736,7 @@ version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -1689,6 +1752,7 @@ version = "0.32.2" description = "A task runner that works well with poetry." optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "poethepoet-0.32.2-py3-none-any.whl", hash = "sha256:97e165de8e00b07d33fd8d72896fad8b20ccafcd327b1118bb6a3da26af38d33"}, {file = "poethepoet-0.32.2.tar.gz", hash = "sha256:1d68871dac1b191e27bd68fea57d0e01e9afbba3fcd01dbe6f6bc3fcb071fe4c"}, @@ -1699,7 +1763,7 @@ pastel = ">=0.2.1,<0.3.0" pyyaml = ">=6.0.2,<7.0" [package.extras] -poetry-plugin = ["poetry (>=1.2.0,<3.0.0)"] +poetry-plugin = ["poetry (>=1.2.0,<3.0.0) ; python_version < \"4.0\""] [[package]] name = "psutil" @@ -1707,6 +1771,7 @@ version = "6.1.0" description = "Cross-platform lib for process and system monitoring in Python." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +groups = ["main"] files = [ {file = "psutil-6.1.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ff34df86226c0227c52f38b919213157588a678d049688eded74c76c8ba4a5d0"}, {file = "psutil-6.1.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:c0e0c00aa18ca2d3b2b991643b799a15fc8f0563d2ebb6040f64ce8dc027b942"}, @@ -1737,6 +1802,7 @@ version = "19.0.1" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fc28912a2dc924dddc2087679cc8b7263accc71b9ff025a1362b004711661a69"}, {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fca15aabbe9b8355800d923cc2e82c8ef514af321e18b437c3d782aa884eaeec"}, @@ -1791,6 +1857,8 @@ version = "2.22" description = "C parser in Python" optional = false python-versions = ">=3.8" +groups = ["main"] +markers = "platform_python_implementation != \"PyPy\"" files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, @@ -1802,6 +1870,7 @@ version = "2.11.1" description = "Data validation using Python type hints" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "pydantic-2.11.1-py3-none-any.whl", hash = "sha256:5b6c415eee9f8123a14d859be0c84363fec6b1feb6b688d6435801230b56e0b8"}, {file = "pydantic-2.11.1.tar.gz", hash = "sha256:442557d2910e75c991c39f4b4ab18963d57b9b55122c8b2a9cd176d8c29ce968"}, @@ -1815,7 +1884,7 @@ typing-inspection = ">=0.4.0" [package.extras] email = ["email-validator (>=2.0.0)"] -timezone = ["tzdata"] +timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""] [[package]] name = "pydantic-core" @@ -1823,6 +1892,7 @@ version = "2.33.0" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "pydantic_core-2.33.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:71dffba8fe9ddff628c68f3abd845e91b028361d43c5f8e7b3f8b91d7d85413e"}, {file = "pydantic_core-2.33.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:abaeec1be6ed535a5d7ffc2e6c390083c425832b20efd621562fbb5bff6dc518"}, @@ -1934,6 +2004,7 @@ version = "2.10.1" description = "JSON Web Token implementation in Python" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb"}, {file = "pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953"}, @@ -1954,6 +2025,7 @@ version = "3.1.1" description = "Python Rate-Limiter using Leaky-Bucket Algorithm" optional = false python-versions = ">=3.8,<4.0" +groups = ["main"] files = [ {file = "pyrate_limiter-3.1.1-py3-none-any.whl", hash = "sha256:c51906f1d51d56dc992ff6c26e8300e32151bc6cfa3e6559792e31971dfd4e2b"}, {file = "pyrate_limiter-3.1.1.tar.gz", hash = "sha256:2f57eda712687e6eccddf6afe8f8a15b409b97ed675fe64a626058f12863b7b7"}, @@ -1969,6 +2041,7 @@ version = "0.20.0" description = "Persistent/Functional/Immutable data structures" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pyrsistent-0.20.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c3aba3e01235221e5b229a6c05f585f344734bd1ad42a8ac51493d74722bbce"}, {file = "pyrsistent-0.20.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1beb78af5423b879edaf23c5591ff292cf7c33979734c99aa66d5914ead880f"}, @@ -2010,6 +2083,7 @@ version = "0.3.10" description = "Python-tesseract is a python wrapper for Google's Tesseract-OCR" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "pytesseract-0.3.10-py3-none-any.whl", hash = "sha256:8f22cc98f765bf13517ead0c70effedb46c153540d25783e04014f28b55a5fc6"}, {file = "pytesseract-0.3.10.tar.gz", hash = "sha256:f1c3a8b0f07fd01a1085d451f5b8315be6eec1d5577a6796d46dc7a62bd4120f"}, @@ -2025,6 +2099,7 @@ version = "8.3.5" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"}, {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"}, @@ -2045,6 +2120,7 @@ version = "3.14.0" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, @@ -2062,6 +2138,7 @@ version = "0.2.3" description = "Python binding for Rust's library for reading excel and odf file - calamine" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "python_calamine-0.2.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f292a03591b1cab1537424851b74baa33b0a55affc315248a7592ba3de1c3e83"}, {file = "python_calamine-0.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6cfbd23d1147f53fd70fddfb38af2a98896ecad069c9a4120e77358a6fc43b39"}, @@ -2171,6 +2248,7 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -2185,6 +2263,7 @@ version = "1.1.2" description = "Create, read, and update Microsoft Word .docx files." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "python_docx-1.1.2-py3-none-any.whl", hash = "sha256:08c20d6058916fb19853fcf080f7f42b6270d89eac9fa5f8c15f691c0017fabe"}, {file = "python_docx-1.1.2.tar.gz", hash = "sha256:0cf1f22e95b9002addca7948e16f2cd7acdfd498047f1941ca5d293db7762efd"}, @@ -2200,6 +2279,7 @@ version = "2025.2.18" description = "ISO 639 language codes, names, and other associated information" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "python_iso639-2025.2.18-py3-none-any.whl", hash = "sha256:b2d471c37483a26f19248458b20e7bd96492e15368b01053b540126bcc23152f"}, {file = "python_iso639-2025.2.18.tar.gz", hash = "sha256:34e31e8e76eb3fc839629e257b12bcfd957c6edcbd486bbf66ba5185d1f566e8"}, @@ -2214,6 +2294,7 @@ version = "0.4.27" description = "File type identification using libmagic" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["main"] files = [ {file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"}, {file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"}, @@ -2225,6 +2306,7 @@ version = "0.6.21" description = "Generate and manipulate Open XML PowerPoint (.pptx) files" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "python-pptx-0.6.21.tar.gz", hash = "sha256:7798a2aaf89563565b3c7120c0acfe9aff775db0db3580544e3bf4840c2e378f"}, ] @@ -2240,6 +2322,7 @@ version = "0.7.3" description = "Python library for the snappy compression library from Google" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "python_snappy-0.7.3-py3-none-any.whl", hash = "sha256:074c0636cfcd97e7251330f428064050ac81a52c62ed884fc2ddebbb60ed7f50"}, {file = "python_snappy-0.7.3.tar.gz", hash = "sha256:40216c1badfb2d38ac781ecb162a1d0ec40f8ee9747e610bcfefdfa79486cee3"}, @@ -2254,6 +2337,7 @@ version = "3.0.0" description = "Universally unique lexicographically sortable identifier" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "python_ulid-3.0.0-py3-none-any.whl", hash = "sha256:e4c4942ff50dbd79167ad01ac725ec58f924b4018025ce22c858bfcff99a5e31"}, {file = "python_ulid-3.0.0.tar.gz", hash = "sha256:e50296a47dc8209d28629a22fc81ca26c00982c78934bd7766377ba37ea49a9f"}, @@ -2268,6 +2352,7 @@ version = "2024.2" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"}, {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"}, @@ -2279,6 +2364,7 @@ version = "6.0.2" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, @@ -2341,6 +2427,7 @@ version = "3.12.2" description = "rapid fuzzy string matching" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "rapidfuzz-3.12.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0b9a75e0385a861178adf59e86d6616cbd0d5adca7228dc9eeabf6f62cf5b0b1"}, {file = "rapidfuzz-3.12.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6906a7eb458731e3dd2495af1d0410e23a21a2a2b7ced535e6d5cd15cb69afc5"}, @@ -2447,6 +2534,7 @@ version = "2024.11.6" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"}, {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"}, @@ -2550,6 +2638,7 @@ version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, @@ -2571,6 +2660,7 @@ version = "1.2.1" description = "A persistent cache for python requests" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "requests_cache-1.2.1-py3-none-any.whl", hash = "sha256:1285151cddf5331067baa82598afe2d47c7495a1334bfe7a7d329b43e9fd3603"}, {file = "requests_cache-1.2.1.tar.gz", hash = "sha256:68abc986fdc5b8d0911318fbb5f7c80eebcd4d01bfacc6685ecf8876052511d1"}, @@ -2601,6 +2691,7 @@ version = "1.12.1" description = "Mock out responses from the requests package" optional = false python-versions = ">=3.5" +groups = ["dev"] files = [ {file = "requests-mock-1.12.1.tar.gz", hash = "sha256:e9e12e333b525156e82a3c852f22016b9158220d2f47454de9cae8a77d371401"}, {file = "requests_mock-1.12.1-py2.py3-none-any.whl", hash = "sha256:b1e37054004cdd5e56c84454cc7df12b25f90f382159087f4b6915aaeef39563"}, @@ -2618,6 +2709,7 @@ version = "1.0.0" description = "A utility belt for advanced users of python-requests" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["main"] files = [ {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, @@ -2632,6 +2724,7 @@ version = "1.13.0" description = "" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "serpyco_rs-1.13.0-cp310-cp310-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:e722b3053e627d8a304e462bce20cae1670a2c4b0ef875b84d0de0081bec4029"}, {file = "serpyco_rs-1.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f10e89c752ff78d720a42e026b0a9ada70717ad6306a9356f794280167d62bf"}, @@ -2686,6 +2779,7 @@ version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] files = [ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, @@ -2697,6 +2791,7 @@ version = "6.4.0" description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" optional = false python-versions = ">=3.6,<4.0" +groups = ["main"] files = [ {file = "smart_open-6.4.0-py3-none-any.whl", hash = "sha256:8d3ef7e6997e8e42dd55c74166ed21e6ac70664caa32dd940b26d54a8f6b4142"}, {file = "smart_open-6.4.0.tar.gz", hash = "sha256:be3c92c246fbe80ebce8fbacb180494a481a77fcdcb7c1aadb2ea5b9c2bee8b9"}, @@ -2718,6 +2813,7 @@ version = "1.3.1" description = "Sniff out which async library your code is running under" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, @@ -2729,6 +2825,7 @@ version = "2.6" description = "A modern CSS selector implementation for Beautiful Soup." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, @@ -2740,6 +2837,7 @@ version = "0.9.0" description = "Pretty-print tabular data" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, @@ -2754,6 +2852,7 @@ version = "8.5.0" description = "Retry code until it succeeds" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687"}, {file = "tenacity-8.5.0.tar.gz", hash = "sha256:8bc6c0c8a09b31e6cad13c47afbed1a567518250a9a171418582ed8d9c20ca78"}, @@ -2769,6 +2868,7 @@ version = "4.67.1" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, @@ -2790,6 +2890,7 @@ version = "4.13.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "typing_extensions-4.13.0-py3-none-any.whl", hash = "sha256:c8dd92cc0d6425a97c18fbb9d1954e5ff92c1ca881a309c45f06ebc0b79058e5"}, {file = "typing_extensions-4.13.0.tar.gz", hash = "sha256:0a4ac55a5820789d87e297727d229866c9650f6521b64206413c4fbada24d95b"}, @@ -2801,6 +2902,7 @@ version = "0.9.0" description = "Runtime inspection utilities for typing module." optional = false python-versions = "*" +groups = ["main"] files = [ {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, @@ -2816,6 +2918,7 @@ version = "0.4.0" description = "Runtime typing introspection tools" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f"}, {file = "typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122"}, @@ -2830,6 +2933,7 @@ version = "2025.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" +groups = ["main"] files = [ {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, @@ -2841,6 +2945,7 @@ version = "0.10.27" description = "A library that prepares raw documents for downstream ML tasks." optional = false python-versions = ">=3.7.0" +groups = ["main"] files = [ {file = "unstructured-0.10.27-py3-none-any.whl", hash = "sha256:3a8a8e44302388ddc39c184059e8b4458f1cdc58032540b9af7d85f6c3eca3be"}, {file = "unstructured-0.10.27.tar.gz", hash = "sha256:f567b5c4385993a9ab48db5563dd7b413aac4f2002bb22e6250496ea8f440f5e"}, @@ -2921,6 +3026,7 @@ version = "0.3.15" description = "Python-tesseract is a python wrapper for Google's Tesseract-OCR" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "unstructured.pytesseract-0.3.15-py3-none-any.whl", hash = "sha256:a3f505c5efb7ff9f10379051a7dd6aa624b3be6b0f023ed6767cc80d0b1613d1"}, {file = "unstructured.pytesseract-0.3.15.tar.gz", hash = "sha256:4b81bc76cfff4e2ef37b04863f0e48bd66184c0b39c3b2b4e017483bca1a7394"}, @@ -2936,6 +3042,7 @@ version = "1.4.3" description = "URL normalization for Python" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +groups = ["main"] files = [ {file = "url-normalize-1.4.3.tar.gz", hash = "sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2"}, {file = "url_normalize-1.4.3-py2.py3-none-any.whl", hash = "sha256:ec3c301f04e5bb676d333a7fa162fa977ad2ca04b7e652bfc9fac4e405728eed"}, @@ -2950,13 +3057,14 @@ version = "2.3.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" +groups = ["main", "dev"] files = [ {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] @@ -2967,6 +3075,7 @@ version = "10.0" description = "Wildcard/glob file name matcher." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "wcmatch-10.0-py3-none-any.whl", hash = "sha256:0dd927072d03c0a6527a20d2e6ad5ba8d0380e60870c383bc533b71744df7b7a"}, {file = "wcmatch-10.0.tar.gz", hash = "sha256:e72f0de09bba6a04e0de70937b0cf06e55f36f37b3deb422dfaf854b867b840a"}, @@ -2981,6 +3090,7 @@ version = "0.6.17" description = "Modern datetime library for Python" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "whenever-0.6.17-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:8e9e905fd19b0679e5ab1a0d0110a1974b89bf4cbd1ff22c9e352db381e4ae4f"}, {file = "whenever-0.6.17-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cd615e60f992fb9ae9d73fc3581ac63de981e51013b0fffbf8e2bd748c71e3df"}, @@ -3064,6 +3174,7 @@ version = "3.2.2" description = "A Python module for creating Excel XLSX files." optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "XlsxWriter-3.2.2-py3-none-any.whl", hash = "sha256:272ce861e7fa5e82a4a6ebc24511f2cb952fde3461f6c6e1a1e81d3272db1471"}, {file = "xlsxwriter-3.2.2.tar.gz", hash = "sha256:befc7f92578a85fed261639fb6cde1fd51b79c5e854040847dde59d4317077dc"}, @@ -3075,12 +3186,13 @@ version = "0.14.2" description = "Makes working with XML feel like you are working with JSON" optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "xmltodict-0.14.2-py2.py3-none-any.whl", hash = "sha256:20cc7d723ed729276e808f26fb6b3599f786cbc37e06c65e192ba77c40f20aac"}, {file = "xmltodict-0.14.2.tar.gz", hash = "sha256:201e7c28bb210e374999d1dde6382923ab0ed1a8a5faeece48ab525b7810a553"}, ] [metadata] -lock-version = "2.0" +lock-version = "2.1" python-versions = "^3.11,<3.12" -content-hash = "93b7616fd4990bd64dc228af569c877cd0d8281b528e2074ee4fb23f0424607e" +content-hash = "489c1fca0424c5b9bc67c659667751dba48bed8d4b0832e16343cc292168ccd1" diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml b/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml index b5807318459c6..22436db5b959d 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml @@ -20,7 +20,7 @@ python = "^3.11,<3.12" msal = "==1.27.0" Office365-REST-Python-Client = "==2.5.5" smart-open = "==6.4.0" -airbyte-cdk = {extras = ["file-based"], version = "^6.38.5"} +airbyte-cdk = {extras = ["file-based"], version = "^6"} [tool.poetry.scripts] source-microsoft-sharepoint = "source_microsoft_sharepoint.run:run" diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py index c02aea857c7ef..6f8700837ccaa 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py @@ -5,7 +5,7 @@ from enum import Enum from functools import lru_cache from http import HTTPStatus -from typing import List +from typing import List, Tuple from office365.graph_client import GraphClient from office365.onedrive.sites.site import Site @@ -158,18 +158,36 @@ def build(self) -> str: @lru_cache(maxsize=None) -def get_site(graph_client: GraphClient, site_url: str = None): +def get_site(graph_client: GraphClient, site_url: str = None) -> Site: + """ + Retrieve a SharePoint site using the Microsoft Graph API. + + Args: + graph_client (GraphClient): An instance of the Microsoft Graph client + used to interact with the Microsoft Graph API. + site_url (str, optional): The URL of the SharePoint site to retrieve. + If not provided, the root site will be retrieved. + + Returns: + Site: An object representing the retrieved SharePoint site. + """ if site_url: - site = execute_query_with_retry(graph_client.sites.get_by_url(site_url)) - else: - site = execute_query_with_retry(graph_client.sites.root.get()) - return site + return execute_query_with_retry(graph_client.sites.get_by_url(site_url)) + return execute_query_with_retry(graph_client.sites.root.get()) + +def get_site_prefix(site: Site) -> Tuple[str, str]: + """ + Extracts and returns the site URL and the prefix of the host name from a given SharePoint site. -def get_site_prefix(site: Site): + Example: + For a site with `web_url` = "https://contoso.sharepoint.com/sites/example" and + `site_collection.hostname` = "contoso.sharepoint.com", this function will return: + ("https://contoso.sharepoint.com/sites/example", "contoso") + """ site_url = site.web_url host_name = site.site_collection.hostname - host_name_parts: List = host_name.split(".") # e.g. "contoso.sharepoint.com" => ["contoso", "sharepoint", "com"] + host_name_parts: List = host_name.split(".") if len(host_name_parts) < 2: raise ValueError(f"Invalid host name: {host_name}") From b9b99e2b61bda99c1e750fcf3c69eacb7cc0b444 Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Mon, 31 Mar 2025 07:23:35 -0600 Subject: [PATCH 10/17] source-microsoft-sharepoint: make tenant_prefix required for get_token_response_object --- .../source_microsoft_sharepoint/stream_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py index 3a9f2a55a2c8f..f376cdc79c27d 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py @@ -137,7 +137,7 @@ def get_access_token(self): # Directly fetch a new access token from the auth_client each time it's called return self.auth_client._get_access_token()["access_token"] - def get_token_response_object(self, tenant_prefix: str = None) -> Callable: + def get_token_response_object(self, tenant_prefix: str) -> Callable: """ When building a ClientContext using with_access_token() method, the token_func param is expected to be a method/callable that returns a TokenResponse object. From c82cab529000c9cadfe16057a8cfab878d54d952 Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Mon, 31 Mar 2025 14:12:19 -0600 Subject: [PATCH 11/17] source-microsoft-sharepoint: add typing to get_site_drive --- .../source_microsoft_sharepoint/stream_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py index f376cdc79c27d..270de972b67fd 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py @@ -294,7 +294,7 @@ def get_drives_from_sites(self, sites: List[MutableMapping[str, Any]]) -> Entity all_sites_drives.add_child(site_drive) return all_sites_drives - def get_site_drive(self): + def get_site_drive(self) -> EntityCollection: """ Retrieves SharePoint drives based on the provided site URL. It iterates over the sites if something like sharepoint.com/sites/ is in the site_url. @@ -324,7 +324,7 @@ def get_site_drive(self): @property @lru_cache(maxsize=None) - def drives(self): + def drives(self) -> EntityCollection: """ Retrieves and caches SharePoint drives, including the user's drive based on authentication type. """ From 77db5c2f285f9b85dbc7ae4d7ee8de63e1cc0c54 Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Mon, 31 Mar 2025 15:16:10 -0600 Subject: [PATCH 12/17] source-microsoft-sharepoint: remove unnecesary get_site util and update tests --- .../stream_reader.py | 5 +- .../source_microsoft_sharepoint/utils.py | 31 +++------ .../unit_tests/test_stream_reader.py | 5 +- .../unit_tests/test_utils.py | 64 ++++++------------- 4 files changed, 33 insertions(+), 72 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py index 270de972b67fd..f31dbfb9b7d36 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py @@ -34,7 +34,6 @@ MicrosoftSharePointRemoteFile, execute_query_with_retry, filter_http_urls, - get_site, get_site_prefix, ) @@ -147,7 +146,7 @@ def get_token_response_object(self, tenant_prefix: str) -> Callable: return self.auth_client.get_token_response_object_wrapper(tenant_prefix=tenant_prefix) def get_client_context(self): - site_url, root_site_prefix = get_site_prefix(get_site(self.one_drive_client)) + site_url, root_site_prefix = get_site_prefix(self.one_drive_client) client_context = ClientContext(site_url).with_access_token(self.get_token_response_object(tenant_prefix=root_site_prefix)) return client_context @@ -257,7 +256,7 @@ def get_all_sites(self) -> List[MutableMapping[str, Any]]: Returns: List[MutableMapping[str, Any]]: A list of site information. """ - _, root_site_prefix = get_site_prefix(get_site(self.one_drive_client)) + _, root_site_prefix = get_site_prefix(self.one_drive_client) ctx = self.get_client_context() search_service = SearchService(ctx) # ignore default OneDrive site with NOT Path:https://prefix-my.sharepoint.com diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py index 6f8700837ccaa..7f1450b7b772e 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py @@ -157,34 +157,23 @@ def build(self) -> str: return f"{self._scheme}://{self._host}{self._path}{query_string}" -@lru_cache(maxsize=None) -def get_site(graph_client: GraphClient, site_url: str = None) -> Site: +def get_site_prefix(graph_client: GraphClient) -> Tuple[str, str]: """ - Retrieve a SharePoint site using the Microsoft Graph API. - - Args: - graph_client (GraphClient): An instance of the Microsoft Graph client - used to interact with the Microsoft Graph API. - site_url (str, optional): The URL of the SharePoint site to retrieve. - If not provided, the root site will be retrieved. - - Returns: - Site: An object representing the retrieved SharePoint site. - """ - if site_url: - return execute_query_with_retry(graph_client.sites.get_by_url(site_url)) - return execute_query_with_retry(graph_client.sites.root.get()) - - -def get_site_prefix(site: Site) -> Tuple[str, str]: - """ - Extracts and returns the site URL and the prefix of the host name from a given SharePoint site. + Retrieves the SharePoint site and extracts its URL and host prefix. Example: For a site with `web_url` = "https://contoso.sharepoint.com/sites/example" and `site_collection.hostname` = "contoso.sharepoint.com", this function will return: ("https://contoso.sharepoint.com/sites/example", "contoso") + + Args: + graph_client (GraphClient): An instance of the Microsoft Graph client. + + Returns: + Tuple[str, str]: A tuple containing (site_url, hostname_prefix). """ + site = execute_query_with_retry(graph_client.sites.root.get()) + site_url = site.web_url host_name = site.site_collection.hostname host_name_parts: List = host_name.split(".") diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py index de1b8257d0310..94ab727d39b8c 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py @@ -773,13 +773,11 @@ def test_get_all_sites(search_result, expected_sites, raises_exception): # Mock methods out of scope of this test with ( - patch("source_microsoft_sharepoint.stream_reader.get_site") as mock_get_site, patch("source_microsoft_sharepoint.stream_reader.get_site_prefix") as mock_get_site_prefix, patch.object(reader, "get_client_context") as mock_get_client_context, patch("source_microsoft_sharepoint.stream_reader.SearchService") as mock_search_service, patch("source_microsoft_sharepoint.stream_reader.execute_query_with_retry") as mock_execute_query, ): - mock_get_site.return_value = "test-site" mock_get_site_prefix.return_value = ("https://test-tenant.sharepoint.com", "test-tenant") mock_client_context = MagicMock() @@ -847,8 +845,7 @@ def cell_getter(key, default=None): assert result == expected_sites - mock_get_site.assert_called_once_with(reader.one_drive_client) - mock_get_site_prefix.assert_called_once_with("test-site") + mock_get_site_prefix.assert_called_once_with(reader.one_drive_client) mock_get_client_context.assert_called_once() mock_search_service.assert_called_once_with(mock_client_context) mock_search_service_instance.post_query.assert_called_once_with( diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_utils.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_utils.py index 01cc998ec084e..ca7b0829379e3 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_utils.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_utils.py @@ -4,11 +4,10 @@ from datetime import datetime, timedelta from http import HTTPStatus from unittest.mock import Mock, patch -from urllib.parse import parse_qs, urlparse import pytest from office365.onedrive.sites.site import Site -from source_microsoft_sharepoint.utils import PlaceholderUrlBuilder, execute_query_with_retry, filter_http_urls, get_site, get_site_prefix +from source_microsoft_sharepoint.utils import PlaceholderUrlBuilder, execute_query_with_retry, filter_http_urls, get_site_prefix from airbyte_cdk import AirbyteTracedException @@ -187,44 +186,6 @@ def test_url_builder_for_key_pair_value_pair(steps, expected_url): assert url == expected_url, f"Expected {expected_url}, but got {url}" -@pytest.mark.parametrize( - "site_url, expected_method_call", - [ - ("https://example.sharepoint.com/sites/test", "get_by_url"), - (None, "root.get"), - ], -) -@patch("source_microsoft_sharepoint.utils.execute_query_with_retry") -def test_get_site(mock_execute_query_with_retry, site_url, expected_method_call): - mock_graph_client = Mock() - - mock_site = Mock(spec=Site) - mock_site.web_url = "https://example.sharepoint.com/sites/test" if site_url else "https://example.sharepoint.com" - - mock_site.site_collection = Mock() - mock_site.site_collection.hostname = "example.sharepoint.com" - - mock_site.name = "Test Site" - mock_site.id = "test-site-id" - mock_site.root = Mock() - - mock_execute_query_with_retry.return_value = mock_site - - result = get_site(mock_graph_client, site_url) - - if expected_method_call == "get_by_url": - mock_graph_client.sites.get_by_url.assert_called_once_with(site_url) - else: - mock_graph_client.sites.root.get.assert_called_once() - - mock_execute_query_with_retry.assert_called_once() - assert result - - assert result.web_url == "https://example.sharepoint.com/sites/test" if site_url else "https://example.sharepoint.com" - assert result.site_collection.hostname == "example.sharepoint.com" - assert result.name == "Test Site" - - @pytest.mark.parametrize( "web_url, hostname, expected_site_url, expected_prefix", [ @@ -243,23 +204,38 @@ def test_get_site(mock_execute_query_with_retry, site_url, expected_method_call) ), ], ) -def test_get_site_prefix(web_url, hostname, expected_site_url, expected_prefix): +@patch("source_microsoft_sharepoint.utils.execute_query_with_retry") +def test_get_site_prefix(mock_execute_query, web_url, hostname, expected_site_url, expected_prefix): mock_site = Mock(spec=Site) mock_site.web_url = web_url mock_site.site_collection = Mock() mock_site.site_collection.hostname = hostname - site_url, prefix = get_site_prefix(mock_site) + mock_execute_query.return_value = mock_site + mock_graph_client = Mock() + + site_url, prefix = get_site_prefix(mock_graph_client) + + mock_graph_client.sites.root.get.assert_called_once() + mock_execute_query.assert_called_once() assert site_url == expected_site_url assert prefix == expected_prefix -def test_get_site_prefix_invalid_hostname(): +@patch("source_microsoft_sharepoint.utils.execute_query_with_retry") +def test_get_site_prefix_invalid_hostname(mock_execute_query): mock_site = Mock(spec=Site) mock_site.web_url = "https://invalid" mock_site.site_collection = Mock() mock_site.site_collection.hostname = "invalid" + mock_execute_query.return_value = mock_site + + mock_graph_client = Mock() + with pytest.raises(ValueError, match="Invalid host name: invalid"): - get_site_prefix(mock_site) + get_site_prefix(mock_graph_client) + + mock_graph_client.sites.root.get.assert_called_once() + mock_execute_query.assert_called_once() From 110bda1d33f54ff79d0ba355e8e7df4158c981b4 Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Mon, 31 Mar 2025 15:47:23 -0600 Subject: [PATCH 13/17] source-microsoft-sharepoint: make a few methods private as they are not being exposed --- .../source_microsoft_sharepoint/stream_reader.py | 8 ++++---- .../unit_tests/test_stream_reader.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py index f31dbfb9b7d36..3792f4ffc97f2 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py @@ -277,7 +277,7 @@ def get_all_sites(self) -> List[MutableMapping[str, Any]]: return found_sites - def get_drives_from_sites(self, sites: List[MutableMapping[str, Any]]) -> EntityCollection: + def _get_drives_from_sites(self, sites: List[MutableMapping[str, Any]]) -> EntityCollection: """ Retrieves SharePoint drives from the provided sites. Args: @@ -293,7 +293,7 @@ def get_drives_from_sites(self, sites: List[MutableMapping[str, Any]]) -> Entity all_sites_drives.add_child(site_drive) return all_sites_drives - def get_site_drive(self) -> EntityCollection: + def _get_site_drive(self) -> EntityCollection: """ Retrieves SharePoint drives based on the provided site URL. It iterates over the sites if something like sharepoint.com/sites/ is in the site_url. @@ -309,7 +309,7 @@ def get_site_drive(self) -> EntityCollection: drives = execute_query_with_retry(self.one_drive_client.drives.get()) elif re.search(r"sharepoint\.com/sites/?$", self.config.site_url): # get all sites and then get drives from each site - return self.get_drives_from_sites(self.get_all_sites()) + return self._get_drives_from_sites(self.get_all_sites()) else: # get drives for site drives provided in the config drives = execute_query_with_retry(self.one_drive_client.sites.get_by_url(self.config.site_url).drives.get()) @@ -327,7 +327,7 @@ def drives(self) -> EntityCollection: """ Retrieves and caches SharePoint drives, including the user's drive based on authentication type. """ - drives = self.get_site_drive() + drives = self._get_site_drive() # skip this step for application authentication flow if self.config.credentials.auth_type != "Client" or ( diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py index 94ab727d39b8c..3bdba6ca6d05b 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py @@ -676,14 +676,14 @@ def test_get_site_drive(site_url, expected_call, expected_result, exception): with ( patch.object(reader, "get_all_sites", return_value=mock_sites) as mock_get_all_sites, - patch.object(reader, "get_drives_from_sites", return_value=mock_drives) as mock_get_drives_from_sites, + patch.object(reader, "_get_drives_from_sites", return_value=mock_drives) as mock_get_drives_from_sites, ): if exception: with pytest.raises(AirbyteTracedException) as exc_info: - reader.get_site_drive() + reader._get_site_drive() assert "Failed to retrieve drives from sharepoint" in str(exc_info.value) else: - result = reader.get_site_drive() + result = reader._get_site_drive() if expected_call == "drives": # Default site URL From 46054dcd4a52318c1295994590caff4d0c7228e9 Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Mon, 31 Mar 2025 16:14:22 -0600 Subject: [PATCH 14/17] source-microsoft-sharepoint: minor refactor so priveate _get_client_context doesnt call again get_site_prefix --- .../source_microsoft_sharepoint/stream_reader.py | 14 +++++++++----- .../unit_tests/test_stream_reader.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py index 3792f4ffc97f2..a8291997e35d8 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py @@ -145,9 +145,13 @@ def get_token_response_object(self, tenant_prefix: str) -> Callable: """ return self.auth_client.get_token_response_object_wrapper(tenant_prefix=tenant_prefix) - def get_client_context(self): - site_url, root_site_prefix = get_site_prefix(self.one_drive_client) - client_context = ClientContext(site_url).with_access_token(self.get_token_response_object(tenant_prefix=root_site_prefix)) + def _get_client_context(self, site_url: str, root_site_prefix: str) -> ClientContext: + """ " + Creates a ClientContext for the specified SharePoint site URL. + """ + client_context = ClientContext(site_url).with_access_token( + token_func=self.get_token_response_object(tenant_prefix=root_site_prefix) + ) return client_context @config.setter @@ -256,8 +260,8 @@ def get_all_sites(self) -> List[MutableMapping[str, Any]]: Returns: List[MutableMapping[str, Any]]: A list of site information. """ - _, root_site_prefix = get_site_prefix(self.one_drive_client) - ctx = self.get_client_context() + site_url, root_site_prefix = get_site_prefix(self.one_drive_client) + ctx = self._get_client_context(site_url, root_site_prefix) search_service = SearchService(ctx) # ignore default OneDrive site with NOT Path:https://prefix-my.sharepoint.com search_job = search_service.post_query(f"contentclass:STS_Site NOT Path:https://{root_site_prefix}-my.sharepoint.com") diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py index 3bdba6ca6d05b..7705cf51cd333 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py @@ -774,7 +774,7 @@ def test_get_all_sites(search_result, expected_sites, raises_exception): # Mock methods out of scope of this test with ( patch("source_microsoft_sharepoint.stream_reader.get_site_prefix") as mock_get_site_prefix, - patch.object(reader, "get_client_context") as mock_get_client_context, + patch.object(reader, "_get_client_context") as mock_get_client_context, patch("source_microsoft_sharepoint.stream_reader.SearchService") as mock_search_service, patch("source_microsoft_sharepoint.stream_reader.execute_query_with_retry") as mock_execute_query, ): From 7d9a633ab9287a36461d4a33f9a4d1051666bd31 Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Mon, 31 Mar 2025 17:19:56 -0600 Subject: [PATCH 15/17] source-microsoft-sharepoint: break get_drive testins for readibility as scenarios with different dependencies and results were highly coupled --- .../unit_tests/test_stream_reader.py | 160 ++++++++++++------ 1 file changed, 107 insertions(+), 53 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py index 7705cf51cd333..cf3aa6abf744d 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py @@ -6,6 +6,7 @@ from unittest.mock import ANY, MagicMock, Mock, PropertyMock, call, patch import pytest +from office365.onedrive.drives.drive import Drive from office365.entity_collection import EntityCollection from requests.exceptions import HTTPError from source_microsoft_sharepoint.exceptions import ErrorFetchingMetadata @@ -638,70 +639,123 @@ def test_drives_property(auth_type, user_principal_name, has_refresh_token): drives_response.add_child.assert_called_once_with(my_drive) -@pytest.mark.parametrize( - "site_url, expected_call, expected_result, exception", - [ - # Scenario 1: No site URL (default site) - ("", "drives", MagicMock(spec=EntityCollection), None), - # Scenario 2: Site URL ending with 'sharepoint.com/sites/' (all sites) - ("https://test-tenant.sharepoint.com/sites/", "all_sites", MagicMock(spec=EntityCollection), None), - # Scenario 3: Specific site URL (single site) - ("https://test-tenant.sharepoint.com/sites/specific", "specific_site", MagicMock(spec=EntityCollection), None), - # Scenario 4: Error scenario - ("https://test-tenant.sharepoint.com/sites/error", "error", None, Exception("Test exception")), - ], -) -def test_get_site_drive(site_url, expected_call, expected_result, exception): +def test_get_site_drive_default_site(): """ - Test for the get_site_drive method. + Test retrieving drives from the default site (no site URL in config) """ + reader = SourceMicrosoftSharePointStreamReader() + reader._config = MagicMock(site_url="") + mock_one_drive_client = MagicMock() + reader._one_drive_client = mock_one_drive_client + + mock_drive = MagicMock() + mock_drives = MagicMock(spec=EntityCollection) + mock_drives.__iter__.return_value = [mock_drive] + + with patch("source_microsoft_sharepoint.stream_reader.execute_query_with_retry") as mock_execute_query: + mock_execute_query.return_value = mock_drives + + result = reader._get_site_drive() + + mock_one_drive_client.drives.get.assert_called_once() + assert result == mock_drives + assert len(list(result)) == 1 + + +def test_get_site_drive_all_sites(): + """ + Test retrieving drives from all sites (site URL in config ending with 'sharepoint.com/sites/') + """ + reader = SourceMicrosoftSharePointStreamReader() + reader._config = MagicMock(site_url="https://test-tenant.sharepoint.com/sites/") + reader._one_drive_client = MagicMock() + + first_drive_name = "Drive1" + first_drive_url = "https://test-tenant.sharepoint.com/sites/site1/drive1" + second_drive_name = "Drive2" + second_drive_url = "https://test-tenant.sharepoint.com/sites/site2/drive2" + first_site_url = "https://test-tenant.sharepoint.com/sites/site1" + second_site_url = "https://test-tenant.sharepoint.com/sites/site2" + + mock_drive1 = MagicMock(spec=Drive) + mock_drive1.name = first_drive_name + mock_drive1.web_url = first_drive_url + + mock_drive2 = MagicMock(spec=Drive) + mock_drive2.name = second_drive_name + mock_drive2.web_url = second_drive_url + + mock_drives = MagicMock(spec=EntityCollection) + mock_drives.__iter__.return_value = [mock_drive1, mock_drive2] + + mock_sites = [ + {"Title": "Site1", "Path": first_site_url}, + {"Title": "Site2", "Path": second_site_url}, + ] + + with ( + patch.object(reader, "get_all_sites", return_value=mock_sites) as mock_get_all_sites, + patch.object(reader, "_get_drives_from_sites", return_value=mock_drives) as mock_get_drives_from_sites, + ): + result = reader._get_site_drive() + + mock_get_all_sites.assert_called_once() + mock_get_drives_from_sites.assert_called_once_with(mock_sites) + assert result == mock_drives + + drives = list(result) + assert len(drives) == 2 + assert drives[0].name == first_drive_name + assert drives[0].web_url == first_drive_url + assert drives[1].name == second_drive_name + assert drives[1].web_url == second_drive_url + + +def test_get_site_drive_specific_site(): + """ + Test retrieving drives from a specific site in config (site URL in config ending with 'sharepoint.com/sites/specific') + """ + site_url = "https://test-tenant.sharepoint.com/sites/specific" + drive_name = "Test Drive" + drive_url = f"{site_url}/TestDrive" + reader = SourceMicrosoftSharePointStreamReader() reader._config = MagicMock(site_url=site_url) mock_one_drive_client = MagicMock() reader._one_drive_client = mock_one_drive_client - mock_drives = expected_result + mock_drive = MagicMock(spec=Drive) + mock_drive.name = drive_name + mock_drive.web_url = drive_url + mock_drives = [mock_drive] with patch("source_microsoft_sharepoint.stream_reader.execute_query_with_retry") as mock_execute_query: - # Set up the mock to return the expected result or raise an exception - if exception and expected_call == "error": - mock_execute_query.side_effect = exception - else: - mock_execute_query.return_value = mock_drives + mock_execute_query.return_value = mock_drives - mock_sites = [ - {"Title": "Site1", "Path": "https://test-tenant.sharepoint.com/sites/site1"}, - {"Title": "Site2", "Path": "https://test-tenant.sharepoint.com/sites/site2"}, - ] + result = reader._get_site_drive() + + mock_one_drive_client.sites.get_by_url.assert_called_once_with(site_url) + assert result == mock_drives + assert len(result) == 1 + assert result[0].name == drive_name + assert result[0].web_url == drive_url + + +def test_get_site_drive_error_handling(): + """Test error handling when retrieving drives fails""" + reader = SourceMicrosoftSharePointStreamReader() + reader._config = MagicMock(site_url="https://test-tenant.sharepoint.com/sites/specific") + mock_one_drive_client = MagicMock() + reader._one_drive_client = mock_one_drive_client + + with patch("source_microsoft_sharepoint.stream_reader.execute_query_with_retry") as mock_execute_query: + # Set up the mock to raise an exception + mock_execute_query.side_effect = Exception("Test exception") + + with pytest.raises(AirbyteTracedException) as exc_info: + reader._get_site_drive() - with ( - patch.object(reader, "get_all_sites", return_value=mock_sites) as mock_get_all_sites, - patch.object(reader, "_get_drives_from_sites", return_value=mock_drives) as mock_get_drives_from_sites, - ): - if exception: - with pytest.raises(AirbyteTracedException) as exc_info: - reader._get_site_drive() - assert "Failed to retrieve drives from sharepoint" in str(exc_info.value) - else: - result = reader._get_site_drive() - - if expected_call == "drives": - # Default site URL - mock_one_drive_client.drives.get.assert_called_once() - mock_get_all_sites.assert_not_called() - mock_get_drives_from_sites.assert_not_called() - elif expected_call == "all_sites": - # Site URL ending with 'sharepoint.com/sites/' - mock_one_drive_client.drives.get.assert_not_called() - mock_get_all_sites.assert_called_once() - mock_get_drives_from_sites.assert_called_once_with(mock_sites) - elif expected_call == "specific_site": - # Specific site URL - mock_one_drive_client.sites.get_by_url.assert_called_once_with(site_url) - mock_get_all_sites.assert_not_called() - mock_get_drives_from_sites.assert_not_called() - - assert result == mock_drives + assert "Failed to retrieve drives from sharepoint" in str(exc_info.value) @pytest.mark.parametrize( From e729191ed00d80fe534eacfc7458874de6d8699b Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Mon, 31 Mar 2025 17:20:58 -0600 Subject: [PATCH 16/17] source-microsoft-sharepoint: ruff format --- .../unit_tests/test_stream_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py index cf3aa6abf744d..3ed68131c246a 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py @@ -6,8 +6,8 @@ from unittest.mock import ANY, MagicMock, Mock, PropertyMock, call, patch import pytest -from office365.onedrive.drives.drive import Drive from office365.entity_collection import EntityCollection +from office365.onedrive.drives.drive import Drive from requests.exceptions import HTTPError from source_microsoft_sharepoint.exceptions import ErrorFetchingMetadata from source_microsoft_sharepoint.spec import SourceMicrosoftSharePointSpec From 8955422409ed8f473161176af1472c4ad4eadcb7 Mon Sep 17 00:00:00 2001 From: Aldo Gonzalez Date: Mon, 31 Mar 2025 17:56:53 -0600 Subject: [PATCH 17/17] source-microsoft-sharepoint: break get_all_sites testing for readibility as scenarios with different dependencies and expected results were highly coupled --- .../unit_tests/test_stream_reader.py | 171 +++++++++--------- 1 file changed, 86 insertions(+), 85 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py index 3ed68131c246a..8590254bc8728 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py @@ -7,7 +7,10 @@ import pytest from office365.entity_collection import EntityCollection +from office365.graph_client import GraphClient from office365.onedrive.drives.drive import Drive +from office365.sharepoint.client_context import ClientContext +from office365.sharepoint.search.service import SearchService from requests.exceptions import HTTPError from source_microsoft_sharepoint.exceptions import ErrorFetchingMetadata from source_microsoft_sharepoint.spec import SourceMicrosoftSharePointSpec @@ -749,7 +752,6 @@ def test_get_site_drive_error_handling(): reader._one_drive_client = mock_one_drive_client with patch("source_microsoft_sharepoint.stream_reader.execute_query_with_retry") as mock_execute_query: - # Set up the mock to raise an exception mock_execute_query.side_effect = Exception("Test exception") with pytest.raises(AirbyteTracedException) as exc_info: @@ -787,57 +789,33 @@ def test_retrieve_files_from_accessible_drives(mocker, refresh_token, auth_type, assert reader._get_shared_files_from_all_drives.called == ("_get_shared_files_from_all_drives" in expected_methods_called) -@pytest.mark.parametrize( - "search_result, expected_sites, raises_exception", - [ - # Case 1: Search returns results with sites - ( - { - "PrimaryQueryResult": { - "RelevantResults": { - "Table": { - "Rows": [ - {"Cells": {"Title": "Site1", "Path": "https://test-tenant.sharepoint.com/sites/site1"}}, - {"Cells": {"Title": "Site2", "Path": "https://test-tenant.sharepoint.com/sites/site2"}}, - ] - } - } - } - }, - [ - {"Title": "Site1", "Path": "https://test-tenant.sharepoint.com/sites/site1"}, - {"Title": "Site2", "Path": "https://test-tenant.sharepoint.com/sites/site2"}, - ], - False, - ), - # Case 2: Search returns empty results - (None, [], True), - # Case 3: Search returns no relevant results - ({"PrimaryQueryResult": None}, [], True), - ], -) -def test_get_all_sites(search_result, expected_sites, raises_exception): +def test_get_all_sites_returns_sites_successfully(): """ - Test the get_all_sites method to verify it correctly retrieves and processes SharePoint site information. + Test that get_all_sites correctly returns site information when sites are found """ + reader = SourceMicrosoftSharePointStreamReader() + reader._config = MagicMock(spec=SourceMicrosoftSharePointSpec) + reader._one_drive_client = MagicMock(spec=GraphClient) - reader._config = MagicMock() - reader._one_drive_client = MagicMock() + tenant_url = "https://test-tenant.sharepoint.com" + site_first_title = "Site1" + site_first_path = f"{tenant_url}/sites/site1" + site_second_title = "Site2" + site_second_path = f"{tenant_url}/sites/site2" + query_filter = "contentclass:STS_Site NOT Path:https://test-tenant-my.sharepoint.com" - # Mock methods out of scope of this test with ( patch("source_microsoft_sharepoint.stream_reader.get_site_prefix") as mock_get_site_prefix, patch.object(reader, "_get_client_context") as mock_get_client_context, patch("source_microsoft_sharepoint.stream_reader.SearchService") as mock_search_service, patch("source_microsoft_sharepoint.stream_reader.execute_query_with_retry") as mock_execute_query, ): - mock_get_site_prefix.return_value = ("https://test-tenant.sharepoint.com", "test-tenant") - - mock_client_context = MagicMock() + mock_get_site_prefix.return_value = (tenant_url, "test-tenant") + mock_client_context = MagicMock(spec=ClientContext) mock_get_client_context.return_value = mock_client_context - mock_search_service_instance = MagicMock() + mock_search_service_instance = MagicMock(spec=SearchService) mock_search_service.return_value = mock_search_service_instance mock_search_job = MagicMock() @@ -845,64 +823,87 @@ def test_get_all_sites(search_result, expected_sites, raises_exception): search_job_result = MagicMock() mock_execute_query.return_value = search_job_result - mock_search_job.value = True - search_job_result.value = MagicMock() - - if search_result is None: - # Case 2: Empty results - search_job_result.value.PrimaryQueryResult = None - elif search_result.get("PrimaryQueryResult") is None: - # Case 3: No relevant results - search_job_result.value.PrimaryQueryResult = None - else: - # Case 1: Success case with sites - # Create the full mock object structure with attributes instead of dict items - pq_data = search_result["PrimaryQueryResult"] - primary_query_result = MagicMock() - search_job_result.value.PrimaryQueryResult = primary_query_result + primary_query_result = MagicMock() + search_job_result.value.PrimaryQueryResult = primary_query_result + relevant_results = MagicMock() + primary_query_result.RelevantResults = relevant_results + table = MagicMock() + relevant_results.Table = table - relevant_results = MagicMock() - primary_query_result.RelevantResults = relevant_results + mock_row_first = MagicMock() + mock_row_first.Cells.get.side_effect = lambda key, default=None: { + "Title": site_first_title, + "Path": site_first_path, + }.get(key, default) - table = MagicMock() - relevant_results.Table = table + mock_row_second = MagicMock() + mock_row_second.Cells.get.side_effect = lambda key, default=None: { + "Title": site_second_title, + "Path": site_second_path, + }.get(key, default) - if "Rows" in pq_data["RelevantResults"]["Table"]: - rows_data = pq_data["RelevantResults"]["Table"]["Rows"] - mock_rows = [] + table.Rows = [mock_row_first, mock_row_second] - def create_cell_getter(cell_data): - def cell_getter(key, default=None): - return cell_data.get(key, default) + result = reader.get_all_sites() - return cell_getter + expected_sites = [ + {"Title": site_first_title, "Path": site_first_path}, + {"Title": site_second_title, "Path": site_second_path}, + ] + assert result == expected_sites - for row_data in rows_data: - mock_row = MagicMock() + mock_get_site_prefix.assert_called_once_with(reader.one_drive_client) + mock_get_client_context.assert_called_once() + mock_search_service.assert_called_once_with(mock_client_context) + mock_search_service_instance.post_query.assert_called_once_with(query_filter) + mock_execute_query.assert_called_once_with(mock_search_job) - cell_getter = create_cell_getter(row_data["Cells"]) - mock_row.Cells = MagicMock() - mock_row.Cells.get = cell_getter +@pytest.mark.parametrize( + "test_case, search_job_value, primary_query_result", + [ + ("empty_search_results", None, None), # Case: search_job.value is None + ("no_relevant_results", True, None), # Case: search_job.value exists but PrimaryQueryResult is None + ], +) +def test_get_all_sites_with_no_results(test_case, search_job_value, primary_query_result): + """ + Test that get_all_sites raises an exception when search returns no results + """ + reader = SourceMicrosoftSharePointStreamReader() + reader._config = MagicMock() + reader._one_drive_client = MagicMock() - mock_rows.append(mock_row) + with ( + patch("source_microsoft_sharepoint.stream_reader.get_site_prefix") as mock_get_site_prefix, + patch.object(reader, "_get_client_context") as mock_get_client_context, + patch("source_microsoft_sharepoint.stream_reader.SearchService") as mock_search_service, + patch("source_microsoft_sharepoint.stream_reader.execute_query_with_retry") as mock_execute_query, + ): + mock_get_site_prefix.return_value = ("https://test-tenant.sharepoint.com", "test-tenant") + mock_client_context = MagicMock() + mock_get_client_context.return_value = mock_client_context + mock_search_service_instance = MagicMock() + mock_search_service.return_value = mock_search_service_instance + mock_search_job = MagicMock() + mock_search_service_instance.post_query.return_value = mock_search_job - table.Rows = mock_rows + search_job_result = MagicMock() + mock_execute_query.return_value = search_job_result + mock_search_job.value = search_job_value - if raises_exception: - with pytest.raises(Exception, match="No site collections found"): - reader.get_all_sites() - else: - result = reader.get_all_sites() + if search_job_value: + search_job_result.value.PrimaryQueryResult = primary_query_result - assert result == expected_sites + with pytest.raises(Exception, match="No site collections found"): + reader.get_all_sites() - mock_get_site_prefix.assert_called_once_with(reader.one_drive_client) - mock_get_client_context.assert_called_once() - mock_search_service.assert_called_once_with(mock_client_context) - mock_search_service_instance.post_query.assert_called_once_with( - "contentclass:STS_Site NOT Path:https://test-tenant-my.sharepoint.com" - ) - mock_execute_query.assert_called_once_with(mock_search_job) + mock_get_site_prefix.assert_called_once_with(reader.one_drive_client) + mock_get_client_context.assert_called_once() + mock_search_service.assert_called_once_with(mock_client_context) + mock_search_service_instance.post_query.assert_called_once_with( + "contentclass:STS_Site NOT Path:https://test-tenant-my.sharepoint.com" + ) + mock_execute_query.assert_called_once_with(mock_search_job)