diff --git a/.gitignore b/.gitignore index 186388e..7a86c60 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,9 @@ .meltano/ +# Ignore meltano plugins lock files +plugins/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -136,4 +139,7 @@ dmypy.json .pyre/ # VSCode files -.vscode \ No newline at end of file +.vscode + +# intellij +.idea \ No newline at end of file diff --git a/README.md b/README.md index e2c6ffc..4cdd0a6 100644 --- a/README.md +++ b/README.md @@ -56,11 +56,21 @@ Your `sheet_id` are the characters after `spreadsheets/d/`, so in this case woul ### Credentials +Setting | Required | Type | Description | +------- | -------- |------------------| ----------- | +`oauth_credentials.client_id` | Required | String | Your google client id +`oauth_credentials.client_secret` | Required | String | Your google client secret +`oauth_credentials.refresh_token` | Required | String | Your google refresh token +`sheet_id` | Required | String | Your target google sheet id +`output_name` | Optional | String | Optionailly rename the stream and output file or table from the tap +`child_sheet_name` | Optional | String | Optionally choose a different sheet from your Google Sheet file +`key_properties` | Optional | Array of Strings | Optionally choose primary key column(s) from your Google Sheet file. Example: `["column_one", "column_two"]` +`sheets` | Optional | Array of Objects | Optionally provide a list of configs for each sheet/stream. See "Per Sheet Config" below. Overrides the `sheet_id` provided at the root level. + +### Per Sheet Config + Setting | Required | Type | Description | ------- | -------- | ---- | ----------- | -`oauth_credentials.client_id` | Required | String | Your google client id -`oauth_credentials.client_secret` | Required | String | Your google client secret -`oauth_credentials.refresh_token` | Required | String | Your google refresh token `sheet_id` | Required | String | Your target google sheet id `output_name` | Optional | String | Optionailly rename the stream and output file or table from the tap `child_sheet_name` | Optional | String | Optionally choose a different sheet from your Google Sheet file @@ -76,6 +86,7 @@ These settings expand into environment variables of: - `TAP_GOOGLE_SHEETS_OUTPUT_NAME` - `TAP_GOOGLE_SHEETS_CHILD_SHEET_NAME` - `TAP_GOOGLE_SHEETS_KEY_PROPERTIES` +- `TAP_GOOGLE_SHEETS_SHEETS` --- @@ -97,6 +108,8 @@ These settings expand into environment variables of: * When using the `key_properties` setting, you must choose columns with no null values. +* You can extract multiple sheets using the `sheets` config, which is just an array containing configurable properties for each item. Doing so will ignore any sheet config defined by the root level `sheet_id`, `output_name`, `child_sheet_name`, `key_properties` properties. + ### Loaders Tested - [target-jsonl](https://hub.meltano.com/targets/jsonl) diff --git a/meltano.yml b/meltano.yml index 7d9106b..c58c518 100644 --- a/meltano.yml +++ b/meltano.yml @@ -1,6 +1,9 @@ version: 1 send_anonymous_usage_stats: true project_id: 04da77a3-af12-49a4-b9bf-3c22845918ba +default_environment: test +environments: +- name: test plugins: extractors: - name: tap-google-sheets @@ -22,6 +25,10 @@ plugins: - name: sheet_id - name: output_name - name: child_sheet_name + - name: key_properties + kind: array + - name: sheets + kind: array loaders: - name: target-jsonl variant: andyh1203 diff --git a/tap_google_sheets/streams.py b/tap_google_sheets/streams.py index ccdf216..dfef997 100644 --- a/tap_google_sheets/streams.py +++ b/tap_google_sheets/streams.py @@ -19,11 +19,12 @@ class GoogleSheetsStream(GoogleSheetsBaseStream): child_sheet_name = None primary_key = None url_base = "https://sheets.googleapis.com/v4/spreadsheets" + stream_config = None @property def path(self): """Set the path for the stream.""" - return f"/{self.config['sheet_id']}/values/{self.child_sheet_name}" + return f"/{self.stream_config['sheet_id']}/values/{self.child_sheet_name}" def parse_response(self, response: requests.Response) -> Iterable[dict]: """Parse response, build response back up into json, update stream schema.""" diff --git a/tap_google_sheets/tap.py b/tap_google_sheets/tap.py index 175423f..356ae9d 100644 --- a/tap_google_sheets/tap.py +++ b/tap_google_sheets/tap.py @@ -16,22 +16,7 @@ class TapGoogleSheets(Tap): name = "tap-google-sheets" - config_jsonschema = th.PropertiesList( - th.Property( - "oauth_credentials.client_id", - th.StringType, - description="Your google client_id", - ), - th.Property( - "oauth_credentials.client_secret", - th.StringType, - description="Your google client_secret", - ), - th.Property( - "oauth_credentials.refresh_token", - th.StringType, - description="Your google refresh token", - ), + per_sheet_config = th.ObjectType( th.Property("sheet_id", th.StringType, description="Your google sheet id"), th.Property( "output_name", @@ -42,8 +27,9 @@ class TapGoogleSheets(Tap): th.Property( "child_sheet_name", th.StringType, - description="Optionally sync data from a different sheet in" - + " your Google Sheet", + description=( + "Optionally sync data from a different sheet in your Google Sheet" + ), required=False, ), th.Property( @@ -52,42 +38,78 @@ class TapGoogleSheets(Tap): description="Optionally choose one or more primary key columns", required=False, ), - ).to_dict() + ) + + base_config = th.PropertiesList( + th.Property( + "oauth_credentials.client_id", + th.StringType, + description="Your google client_id", + ), + th.Property( + "oauth_credentials.client_secret", + th.StringType, + description="Your google client_secret", + ), + th.Property( + "oauth_credentials.refresh_token", + th.StringType, + description="Your google refresh token", + ), + th.Property( + "sheets", + required=False, + description="The list of configs for each sheet/stream.", + wrapped=th.ArrayType(per_sheet_config), + ), + ) + + for prop in per_sheet_config.wrapped.values(): + # raise Exception(prop.name) + base_config.append(prop) + + config_jsonschema = base_config.to_dict() def discover_streams(self) -> List[Stream]: """Return a list of discovered streams.""" streams: List[Stream] = [] - stream_name = self.config.get("output_name") or self.get_sheet_name() - stream_name = stream_name.replace(" ", "_") - key_properties = self.config.get("key_properties", []) + sheets = self.config.get("sheets") or [self.config] + for stream_config in sheets: + stream_name = stream_config.get("output_name") or self.get_sheet_name( + stream_config + ) + stream_name = stream_name.replace(" ", "_") + key_properties = stream_config.get("key_properties", []) - google_sheet_data = self.get_sheet_data() + google_sheet_data = self.get_sheet_data(stream_config) - stream_schema = self.get_schema(google_sheet_data) + stream_schema = self.get_schema(google_sheet_data) - child_sheet_name = self.config.get( - "child_sheet_name" - ) or self.get_first_visible_child_sheet_name(google_sheet_data) + child_sheet_name = self.config.get( + "child_sheet_name" + ) or self.get_first_visible_child_sheet_name(google_sheet_data) - if stream_name: - stream = GoogleSheetsStream( - tap=self, name=stream_name, schema=stream_schema - ) - stream.child_sheet_name = child_sheet_name - stream.selected - stream.primary_keys = key_properties - streams.append(stream) + if stream_name: + stream = GoogleSheetsStream( + tap=self, name=stream_name, schema=stream_schema + ) + stream.child_sheet_name = child_sheet_name + stream.selected + stream.primary_keys = key_properties + stream.stream_config = stream_config + streams.append(stream) return streams - def get_sheet_name(self): + def get_sheet_name(self, stream_config): """Get the name of the spreadsheet.""" config_stream = GoogleSheetsBaseStream( tap=self, name="config", schema={"one": "one"}, - path="https://www.googleapis.com/drive/v2/files/" + self.config["sheet_id"], + path="https://www.googleapis.com/drive/v2/files/" + + stream_config["sheet_id"], ) prepared_request = config_stream.prepare_request(None, None) @@ -115,16 +137,16 @@ def get_first_visible_child_sheet_name(self, google_sheet_data: requests.Respons return sheet_in_sheet_name - def get_sheet_data(self): + def get_sheet_data(self, stream_config): """Get the data from the selected or first visible sheet in the google sheet.""" config_stream = GoogleSheetsBaseStream( tap=self, name="config", schema={"not": "null"}, path="https://sheets.googleapis.com/v4/spreadsheets/" - + self.config["sheet_id"] + + stream_config["sheet_id"] + "/values/" - + self.config.get("child_sheet_name", "") + + stream_config.get("child_sheet_name", "") + "!1:1", )