Skip to content
This repository has been archived by the owner on Sep 23, 2024. It is now read-only.

Add support for the 'ga:segment' dimension with segment IDs #13

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,34 @@ For example, if you want to extract user stats per day in a users_per_day stream
You can check [tap-google-analytics/defaults/default_report_definition.json](tap-google-analytics/defaults/default_report_definition.json) for a more lengthy, detailed example.


##### Segments

If you want to use the `ga:segment` dimension, you must specify the segment IDs in your reports.json stream / report config:

```
[
{
"name": "acquisition",
"dimensions": [
"ga:date",
"ga:segment",
"ga:channelGrouping"
],
"metrics": [
"ga:users",
"ga:newUsers",
"ga:sessions"
],
"segments": [
"gaid::-1",
"gaid::U7LSsrWRTq6JIIS8G8brrQ"
]
}
]
```

Segment IDs can be found with the [GA Query explorer](https://ga-dev-tools.appspot.com/query-explorer). The account configured for authentication must either own the segment, or have "Collaborate" access to the GA view as well as the segment itself having its Segment Availability set to "Collaborators and I can apply/edit Segment in this View".

## Run

```bash
Expand Down
37 changes: 25 additions & 12 deletions tap_google_analytics/ga_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,9 @@ def lookup_data_type(self, type, attribute):
"""
try:
if type == 'dimension':
if attribute.startswith(('ga:dimension', 'ga:customVarName', 'ga:customVarValue')):
if attribute == 'ga:segment':
return 'string'
elif attribute.startswith(('ga:dimension', 'ga:customVarName', 'ga:customVarValue')):
# Custom Google Analytics Dimensions that are not part of
# self.dimensions_ref. They are always strings
return 'string'
Expand Down Expand Up @@ -225,6 +227,12 @@ def generate_report_definition(self, stream):
for metric in stream['metrics']:
report_definition['metrics'].append({"expression": metric.replace("ga_","ga:")})

# Add segmentIds to the request if the stream contains them
if 'segments' in stream:
report_definition['segments'] = []
for segmentId in stream['segments']:
report_definition['segments'].append({'segmentId': segmentId})

return report_definition

@backoff.on_exception(backoff.expo,
Expand All @@ -237,18 +245,23 @@ def query_api(self, report_definition, pageToken=None):
Returns:
The Analytics Reporting API V4 response.
"""
body = {
'reportRequests': [
{
'viewId': self.view_id,
'dateRanges': [{'startDate': self.start_date, 'endDate': self.end_date}],
'pageSize': '1000',
'pageToken': pageToken,
'metrics': report_definition['metrics'],
'dimensions': report_definition['dimensions']
}]
}

if 'segments' in report_definition:
body['reportRequests'][0]['segments'] = report_definition['segments']

return self.analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': self.view_id,
'dateRanges': [{'startDate': self.start_date, 'endDate': self.end_date}],
'pageSize': '1000',
'pageToken': pageToken,
'metrics': report_definition['metrics'],
'dimensions': report_definition['dimensions'],
}]
},
body=body,
quotaUser=self.quota_user
).execute()

Expand Down
26 changes: 21 additions & 5 deletions tap_google_analytics/reports_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,20 @@ def generate_catalog(self):

table_key_properties.append(dimension)

metadata.append({
dimension_metadata = {
"metadata": {
"inclusion": "automatic",
"selected-by-default": True,
"ga_type": 'dimension'
},
"breadcrumb": ["properties", dimension]
})
}

# Add report segments as metadata for the ga:segment dimension
if dimension == 'ga_segment':
dimension_metadata['metadata']['segments'] = report['segments']

metadata.append(dimension_metadata)

# Add the metrics to the schema
for metric in report['metrics']:
Expand Down Expand Up @@ -170,18 +176,26 @@ def validate(self):
LOGGER.critical("'{}' has too many dimensions defined. GA reports can have maximum 7 dimensions.".format(name))
sys.exit(1)

self.validate_dimensions(dimensions)
segments = report['segments'] if 'segments' in report else None

self.validate_dimensions(dimensions, segments)
self.validate_metrics(metrics)

# ToDo: We should also check that the given metrics can be used
# with the given dimensions
# Not all dimensions and metrics can be queried together. Only certain
# dimensions and metrics can be used together to create valid combinations.

def validate_dimensions(self, dimensions):
def validate_dimensions(self, dimensions, segments):
# check that all the dimensions are proper Google Analytics Dimensions
for dimension in dimensions:
if not dimension.startswith(('ga:dimension', 'ga:customVarName', 'ga:customVarValue')) \
# check segments have been provided if 'ga:segment' dimension exists
if dimension == 'ga:segment' and len(segments) > 0:
continue
elif dimension == 'ga:segment' and segments is None:
LOGGER.critical("'{}' requires segments to be specified for this report".format(dimension))
sys.exit(1)
elif not dimension.startswith(('ga:dimension', 'ga:customVarName', 'ga:customVarValue', 'ga:segment')) \
and dimension not in self.client.dimensions_ref:
LOGGER.critical("'{}' is not a valid Google Analytics dimension".format(dimension))
LOGGER.info("For details see https://developers.google.com/analytics/devguides/reporting/core/dimsmets")
Expand Down Expand Up @@ -217,6 +231,8 @@ def get_report_definition(stream):

if ga_type == 'dimension':
report['dimensions'].append(attribute)
if attribute == 'ga_segment':
report['segments'] = singer.metadata.get(stream_metadata, ('properties', attribute), "segments")
elif ga_type == 'metric':
report['metrics'].append(attribute)

Expand Down