Skip to content

Commit

Permalink
WIP: trying to get gcp to use provided credentials
Browse files Browse the repository at this point in the history
  • Loading branch information
Ed Landamore committed Nov 23, 2020
1 parent 1505cd6 commit c1fd741
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 0 deletions.
Empty file added .env.example
Empty file.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ groupama/data/base_pno.*
.idea/
data/*
!data/.keep
.env
star-project-key.json
5 changes: 5 additions & 0 deletions fivestar/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from os.path import isfile
from os.path import dirname
from os.path import join
from dotenv import load_dotenv

version_file = '{}/version.txt'.format(dirname(__file__))

if isfile(version_file):
with open(version_file) as version_file:
__version__ = version_file.read().strip()

env_path = join(dirname(dirname(__file__)),'.env') # ../../.env
load_dotenv(dotenv_path=env_path)
31 changes: 31 additions & 0 deletions fivestar/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
Functions for handling data stuff
"""

import numpy as np
import pandas as pd
from fivestar.params import BUCKET_NAME, BUCKET_TRAIN_DATA_PATH, PROJECT_NAME
from google.cloud import storage

def get_data(nrows=10000, local=False, optimize=False, **kwargs):
"""method to get the training data (or a portion of it) from google cloud bucket"""
# Add Client() here
filename = 'listings.csv'
client = storage.Client.from_service_account_json(
'/Users/ed/code/fivestar/star-project-key.json',
project=PROJECT_NAME)
if local:
path = f"data/{filename}"
else:
path = f"gs://{BUCKET_NAME}/{BUCKET_TRAIN_DATA_PATH}/{filename}"
df = pd.read_csv(path, nrows=nrows, )
return df


if __name__ == "__main__":
params = dict(nrows=10,
upload=False,
local=False, # set to False to get data from GCP (Storage or BigQuery)
optimize=False)
df = get_data(**params)
print(df.shape)
8 changes: 8 additions & 0 deletions fivestar/params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@


### GCP Storage - - - - - - - - - - - - - - - - - - - - - -

PROJECT_NAME = 'star-project-296512'
BUCKET_NAME = 'data-475'
# os.getenv('MAPBOX_API_KEY')
BUCKET_TRAIN_DATA_PATH = 'data'
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ black
yapf
python-gitlab
twine
python-dotenv
google-cloud-storage
fsspec
gcsfs

0 comments on commit c1fd741

Please sign in to comment.