diff --git a/README.md b/README.md index b41337a..2cca1d4 100644 --- a/README.md +++ b/README.md @@ -235,6 +235,34 @@ For example, if you want to extract user stats per day in a users_per_day stream You can check [tap-google-analytics/defaults/default_report_definition.json](tap-google-analytics/defaults/default_report_definition.json) for a more lengthy, detailed example. +##### Segments + +If you want to use the `ga:segment` dimension, you must specify the segment IDs in your reports.json stream / report config: + +``` +[ + { + "name": "acquisition", + "dimensions": [ + "ga:date", + "ga:segment", + "ga:channelGrouping" + ], + "metrics": [ + "ga:users", + "ga:newUsers", + "ga:sessions" + ], + "segments": [ + "gaid::-1", + "gaid::U7LSsrWRTq6JIIS8G8brrQ" + ] + } +] +``` + +Segment IDs can be found with the [GA Query explorer](https://ga-dev-tools.appspot.com/query-explorer). The account configured for authentication must either own the segment, or have "Collaborate" access to the GA view as well as the segment itself having its Segment Availability set to "Collaborators and I can apply/edit Segment in this View". + ## Run ```bash diff --git a/tap_google_analytics/ga_client.py b/tap_google_analytics/ga_client.py index 704893f..a58a445 100644 --- a/tap_google_analytics/ga_client.py +++ b/tap_google_analytics/ga_client.py @@ -144,7 +144,9 @@ def lookup_data_type(self, type, attribute): """ try: if type == 'dimension': - if attribute.startswith(('ga:dimension', 'ga:customVarName', 'ga:customVarValue')): + if attribute == 'ga:segment': + return 'string' + elif attribute.startswith(('ga:dimension', 'ga:customVarName', 'ga:customVarValue')): # Custom Google Analytics Dimensions that are not part of # self.dimensions_ref. They are always strings return 'string' @@ -225,6 +227,12 @@ def generate_report_definition(self, stream): for metric in stream['metrics']: report_definition['metrics'].append({"expression": metric.replace("ga_","ga:")}) + # Add segmentIds to the request if the stream contains them + if 'segments' in stream: + report_definition['segments'] = [] + for segmentId in stream['segments']: + report_definition['segments'].append({'segmentId': segmentId}) + return report_definition @backoff.on_exception(backoff.expo, @@ -237,18 +245,23 @@ def query_api(self, report_definition, pageToken=None): Returns: The Analytics Reporting API V4 response. """ + body = { + 'reportRequests': [ + { + 'viewId': self.view_id, + 'dateRanges': [{'startDate': self.start_date, 'endDate': self.end_date}], + 'pageSize': '1000', + 'pageToken': pageToken, + 'metrics': report_definition['metrics'], + 'dimensions': report_definition['dimensions'] + }] + } + + if 'segments' in report_definition: + body['reportRequests'][0]['segments'] = report_definition['segments'] + return self.analytics.reports().batchGet( - body={ - 'reportRequests': [ - { - 'viewId': self.view_id, - 'dateRanges': [{'startDate': self.start_date, 'endDate': self.end_date}], - 'pageSize': '1000', - 'pageToken': pageToken, - 'metrics': report_definition['metrics'], - 'dimensions': report_definition['dimensions'], - }] - }, + body=body, quotaUser=self.quota_user ).execute() diff --git a/tap_google_analytics/reports_helper.py b/tap_google_analytics/reports_helper.py index 48c976c..eb4ab0e 100644 --- a/tap_google_analytics/reports_helper.py +++ b/tap_google_analytics/reports_helper.py @@ -88,14 +88,20 @@ def generate_catalog(self): table_key_properties.append(dimension) - metadata.append({ + dimension_metadata = { "metadata": { "inclusion": "automatic", "selected-by-default": True, "ga_type": 'dimension' }, "breadcrumb": ["properties", dimension] - }) + } + + # Add report segments as metadata for the ga:segment dimension + if dimension == 'ga_segment': + dimension_metadata['metadata']['segments'] = report['segments'] + + metadata.append(dimension_metadata) # Add the metrics to the schema for metric in report['metrics']: @@ -170,7 +176,9 @@ def validate(self): LOGGER.critical("'{}' has too many dimensions defined. GA reports can have maximum 7 dimensions.".format(name)) sys.exit(1) - self.validate_dimensions(dimensions) + segments = report['segments'] if 'segments' in report else None + + self.validate_dimensions(dimensions, segments) self.validate_metrics(metrics) # ToDo: We should also check that the given metrics can be used @@ -178,10 +186,16 @@ def validate(self): # Not all dimensions and metrics can be queried together. Only certain # dimensions and metrics can be used together to create valid combinations. - def validate_dimensions(self, dimensions): + def validate_dimensions(self, dimensions, segments): # check that all the dimensions are proper Google Analytics Dimensions for dimension in dimensions: - if not dimension.startswith(('ga:dimension', 'ga:customVarName', 'ga:customVarValue')) \ + # check segments have been provided if 'ga:segment' dimension exists + if dimension == 'ga:segment' and len(segments) > 0: + continue + elif dimension == 'ga:segment' and segments is None: + LOGGER.critical("'{}' requires segments to be specified for this report".format(dimension)) + sys.exit(1) + elif not dimension.startswith(('ga:dimension', 'ga:customVarName', 'ga:customVarValue', 'ga:segment')) \ and dimension not in self.client.dimensions_ref: LOGGER.critical("'{}' is not a valid Google Analytics dimension".format(dimension)) LOGGER.info("For details see https://developers.google.com/analytics/devguides/reporting/core/dimsmets") @@ -217,6 +231,8 @@ def get_report_definition(stream): if ga_type == 'dimension': report['dimensions'].append(attribute) + if attribute == 'ga_segment': + report['segments'] = singer.metadata.get(stream_metadata, ('properties', attribute), "segments") elif ga_type == 'metric': report['metrics'].append(attribute)