Skip to content

Commit

Permalink
Set data getter to pull from GCP if local datafiles not found
Browse files Browse the repository at this point in the history
  • Loading branch information
Ed Landamore committed Dec 11, 2020
1 parent 2599663 commit 7927f75
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 10 deletions.
1 change: 0 additions & 1 deletion .github/workflows/pythonpackage.yml

This file was deleted.

23 changes: 14 additions & 9 deletions fivestar/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from fivestar.params import LISTINGS_COLUMNS
from google.cloud import storage
import gcsfs
from os.path import dirname
from os.path import dirname, isfile
from pathlib import Path
import fivestar

Expand All @@ -33,16 +33,21 @@ def get_data(file='listings', nrows=None, local=True, optimize=False, path=None,
else:
return None

if local:
if path:
path = path + filename
else:
path = f"{str(Path.home())}/code/OrthoLoess/fivestar/data/jan/{filename}"

if path:
path = path + filename
else:
path = f"{str(Path.home())}/code/OrthoLoess/fivestar/data/jan/{filename}"

if local and isfile(path):
df = pd.read_csv(path, **csv_params )
else:
fs = gcsfs.GCSFileSystem(project='PROJECT_NAME', token='/Users/ed/code/fivestar/star-project-key.json')
with fs.open(f'{BUCKET_NAME}/{BUCKET_TRAIN_DATA_PATH}/{filename}') as f:
df = pd.read_csv(f, **csv_params)
# fs = gcsfs.GCSFileSystem(project='PROJECT_NAME', token='/Users/ed/code/fivestar/star-project-key.json')
# with fs.open(f'{BUCKET_NAME}/{BUCKET_TRAIN_DATA_PATH}/{filename}') as f:
# df = pd.read_csv(f, **csv_params)
# file_url = f'https://data.elandamore.net/fivestar/{BUCKET_TRAIN_DATA_PATH}/{filename}'
path = f'gs://{BUCKET_NAME}/{BUCKET_TRAIN_DATA_PATH}/{filename}'
df = pd.read_csv(path, **csv_params )
if file == 'listings':
df = df[(df['review_scores_rating'].notna()) & (df['number_of_reviews']>2)]
return df
Expand Down

0 comments on commit 7927f75

Please sign in to comment.