Skip to content

Commit

Permalink
Sliders working to predict new score
Browse files Browse the repository at this point in the history
  • Loading branch information
Ed Landamore committed Dec 3, 2020
1 parent abbdef6 commit a99787b
Show file tree
Hide file tree
Showing 9 changed files with 861 additions and 555 deletions.
62 changes: 37 additions & 25 deletions fivestar/first_app.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import streamlit as st
import numpy as np
import pandas as pd
from wordcloud import WordCloud

from fivestar.clusters import get_cluster_coords
from fivestar.lib import get_listing
# from fivestar.lib import get_listing
from fivestar.lib import FiveStar

# lists for select boxes (to be replaced by imported lists/params)
Expand Down Expand Up @@ -181,9 +182,9 @@
with slide_col_mid:
st.subheader('Change your offering')

guests_accom = st.slider('Guests to accommodate', 0, 16, avg_guests_accom)
st.write(guests_accom, 'guests')
st.write('')
# guests_accom = st.slider('Guests to accommodate', 0, 16, avg_guests_accom)
# st.write(guests_accom, 'guests')
# st.write('')

can_strict = st.select_slider(
'Strict cancellation policy (ie xx)',options=['No', 'Yes'])
Expand All @@ -195,17 +196,17 @@
st.write('')
#st.write('Instantly bookable:', inst_book)

host_listings_count = st.slider('No of other listings', 0, 16, 1)
st.write(host_listings_count + 1, 'listings in total')
st.write('')
# host_listings_count = st.slider('No of other listings', 0, 16, 1)
# st.write(host_listings_count + 1, 'listings in total')
# st.write('')

type_entire = st.select_slider(
'Entire flat (vs private room)',options=['No', 'Yes'])
st.write('')
# type_entire = st.select_slider(
# 'Entire flat (vs private room)',options=['No', 'Yes'])
# st.write('')

parking = st.select_slider(
'Free parking on premises',options=['No', 'Yes'])
st.write('')
# parking = st.select_slider(
# 'Free parking on premises',options=['No', 'Yes'])
# st.write('')

wifi = st.select_slider(
'Wifi available',options=['No', 'Yes'])
Expand All @@ -215,13 +216,13 @@
'Breakfast included',options=['No', 'Yes'])
st.write('')

host_resp_rate = st.select_slider(
'Response to questions',options=['Never', 'When I can', 'As much as possible'])
st.write('')
# host_resp_rate = st.select_slider(
# 'Response to questions',options=['Never', 'When I can', 'As much as possible'])
# st.write('')

host_identity = st.select_slider(
'Host identity verified',options=['No', 'Yes'])
st.write('')
# host_identity = st.select_slider(
# 'Host identity verified',options=['No', 'Yes'])
# st.write('')

price_ratio = st.slider('Price adjustor, £', 0, 250, price)
st.write('£', price_ratio, )
Expand All @@ -231,16 +232,27 @@
st.write('Expected standard of cleanliness:', cleanliness_delta )
st.write('')

amenity_options = st.multiselect('Amenities offered',
amenities_example)
st.write(len(amenity_options), 'amenities offered')
st.write('')

# amenity_options = st.multiselect('Amenities offered',
# amenities_example)
# st.write(len(amenity_options), 'amenities offered')
# st.write('')
values = {
'price': price_ratio,
'cancellation_policy': can_strict,
'Wifi': wifi,
'Breakfast': breakfast,
'review_scores_cleanliness': cleanliness_delta,
'instant_bookable': inst_book,
}
print('These values are coming directly from streamlit:', values)
new_score = fs.predict_on_new_values(listing_id, values)

with slide_col_right:
st.subheader('Review score impact')
st.write('')
st.write('review score:', '+0.4')
st.write('review score:', new_score)



# checkbox functionality

Expand Down
102 changes: 89 additions & 13 deletions fivestar/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@

from os.path import split
import pandas as pd
import numpy as np
import datetime
from fivestar.data import get_data
from fivestar.params import COLUMNS
from fivestar.model import Model

pd.set_option('display.width', 200)

Expand All @@ -15,22 +18,95 @@ class FiveStar():
def __init__(self):
self.listings = get_data()
self.clusters = get_data('clusters')
self.model = Model().load_model()



def get_listing(self, listing_id):
"""Look up full info for an id and return it as a dict???"""
# print(self.listings.shape)
listings = self.listings
columns_to_keep = ['review_scores_accuracy',
'review_scores_cleanliness',
'review_scores_checkin',
'review_scores_communication',
'review_scores_location',
'review_scores_value',
'instant_bookable', 'host_identity_verified',
'amenities', 'price', 'neighbourhood_cleansed',
'host_listings_count', 'cancellation_policy',
'host_response_rate', 'accommodates', 'bedrooms', 'room_type',
]
# print(listings)
if listing_id:
self.current_listing = listing_id
data = listings[listings['id'] == int(listing_id)].to_dict('records')
if type(data) == 'list' and len(data) > 0:
data = data[0]
return data[0]
# if type(data) == 'list' and len(data) > 0:
# data = data[0]
# else:
# data = {}
# else:
# data = {}
# # print(data)
# return data


def get_coef_dict(self):
coefs = self.model.pipeline.named_steps['rgs'].coef_
coefs_dict = {k:v for k,v in zip(COLUMNS,coefs)}
return coefs_dict

def predict_on_new_values(self, listing_id, values):
# print('These are the values coming in from predict on new values: ', values)
X_new = self.build_X(listing_id, values)
# print(X_new)
return self.model.predict(X_new)

# def build_X(self, listing_id, values):
# listing_int = int(listing_id)
# listing = self.listings[self.listings['id'] == listing_int].copy()
# print(listing)
# if values:
# for key, value in values.items():
# if key == 'cancellation_policy':
# listing['cancellation_policy'] = 'strict' if values['cancellation_policy'] == 'Yes' else 'Other'
# if key == 'Wifi' or key == 'Breakfast':
# if value == 'No':
# listing['amenities'] = listing['amenities'].replace(f'key,','')
# elif key not in listing['amenities']:
# listing['amenities'] = '{' + key + listing.iloc[0,'amenities'][1:]
# if key == 'instant_bookable':
# listing[key] = 't' if value == 'Yes' else 'f'
# else:
# listing[key] = value


# return listing

def build_X(self, listing_id, values):
listing_attributes = self.get_listing(listing_id)
# print(values)
for key, value in values.items():
if key == 'cancellation_policy':
listing_attributes[key] = 'strict' if value == 'Yes' else 'Other'
elif key == 'instant_bookable':
listing_attributes[key] = 't' if value == 'Yes' else 'f'
elif key == 'Wifi' or key == 'Breakfast':
if value == 'Yes' and key not in listing_attributes['amenities']:
listing_attributes['amenities'] = listing_attributes['amenities'][:-1] + f',{key}' + '}'
# elif key == 'Breakfast':
# if value == 'Yes' and key not in listing_attributes['amenities']:
# listing_attributes['amenities'] = listing_attributes['amenities'][:-1] + f',{key}' + '}'
elif value == 'No':
listing_attributes['amenities'] = listing_attributes['amenities'].replace(f',{key}', '')
else:
data = {}
else:
data = {}
# print(data)
return data
listing_attributes[key] = value
# print(listing_attributes['amenities'])

listing_for_df = {k:[v] for k,v in listing_attributes.items()}

return pd.DataFrame.from_dict(listing_for_df)


def clean_data(data):
Expand All @@ -40,15 +116,15 @@ def clean_data(data):
return data


def get_listing(listing_id):
"""Look up full info for an id and return it as a dict???"""
listings = get_data()
data = listings.loc[listing_id].to_dict('records')[0]
return data
# def get_listing(listing_id):
# """Look up full info for an id and return it as a dict???"""
# listings = get_data()
# data = listings.loc[listing_id].to_dict('records')[0]
# return data


if __name__ == '__main__':
# For introspections purpose to quickly get this functions on ipython
import fivestar

print(' dataframe cleaned')
# print(' dataframe cleaned')
14 changes: 14 additions & 0 deletions fivestar/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import joblib


class Model():


def predict(self, X_new):
y_pred = self.pipeline.predict(X_new)
return y_pred


def load_model(self):
self.pipeline = joblib.load('model.joblib')
return self
13 changes: 9 additions & 4 deletions fivestar/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,11 @@

KEY_AMENITIES = [['Free parking on premises', 'free street parking',
'paid parking on premises' ,'paid parking off premises'],
'Dryer','Wifi','Breakfast',
'Indoor fireplace',
['TV', 'cable tv'],
['Smoke alarm','Smoke detector']]
'Wifi', 'Breakfast'
# 'Indoor fireplace',
# ['TV', 'cable tv'],
# ['Smoke alarm','Smoke detector']
]

PRICES = [950760, 301518,667593,357779,578705,502623,1099876,399645,
578110,463806,462820,614955,972231,683987,527206,387535,
Expand Down Expand Up @@ -125,3 +126,7 @@
'Sutton': [24.9, 28.0, 31.0, 35.0, 44.5, 50.0, 56.19999999999999, 65.4, 85.0]
}

COLUMNS = ['parking', 'wifi', 'breakfast', 'amenity_count', 'instant_bookable',
'host_identity_verified', 'price_ratio', 'listing_count', 'cancellation',
'response_rate', 'room_ratio', 'cleanliness_delta',
'room_type']
26 changes: 16 additions & 10 deletions fivestar/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@

class Trainer(object):
# Mlflow parameters identifying the experiment, you can add all the parameters you wish
ESTIMATOR = "Linear"
ESTIMATOR = "Ridge"
EXPERIMENT_NAME = "FiveStar"

def __init__(self, X, y, **kwargs):
def __init__(self, X=None, y=None, **kwargs):
"""
FYI:
__init__ is called every time you instatiate Trainer
Expand All @@ -56,7 +56,7 @@ def __init__(self, X, y, **kwargs):
self.X_train = X
self.y_train = y
del X, y
self.split = self.kwargs.get("split", True) # cf doc above
self.split = self.kwargs.get("split", False) # cf doc above
if self.split:
self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(self.X_train, self.y_train,
test_size=0.25,
Expand All @@ -70,7 +70,7 @@ def get_estimator(self):
if estimator == "Linear":
model = LinearRegression()
else:
model = Ridge()
model = Ridge(alpha=50)
estimator_params = self.kwargs.get("estimator_params", {})
self.mlflow_log_param("estimator", estimator)
model.set_params(**estimator_params)
Expand Down Expand Up @@ -139,6 +139,12 @@ def set_pipeline(self):
('rgs', self.get_estimator())], memory=memory)


def predict(self, X):
return self.pipeline.predict(X)

def load_model(self):
self.pipeline = joblib.load('model.joblib')

def add_grid_search(self):
""""
Apply Gridsearch on self.params defined in get_estimator
Expand Down Expand Up @@ -185,14 +191,14 @@ def save_model(self, upload=True, auto_remove=True):
"""Save the model into a .joblib and upload it on Google Storage /models folder
HINTS : use sklearn.joblib (or jbolib) libraries and google-cloud-storage"""
joblib.dump(self.pipeline, 'model.joblib')
print(colored("model.joblib saved locally", "green"))
# print(colored("model.joblib saved locally", "green"))

# Add upload of model.joblib to storage here
version = self.kwargs.get('version', None)
if version:
storage_upload(model_version=version)
else:
storage_upload()
# version = self.kwargs.get('version', None)
# if version:
# storage_upload(model_version=version)
# else:
# storage_upload()

### MLFlow methods
@memoized_property
Expand Down
2 changes: 1 addition & 1 deletion fivestar/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from fivestar.params import BOROUGHS, PRICES

def decode_amenities(df):
data = df
data = df.copy()
def str_to_list(strn):
row_items = strn[1:-1].split(',')
for key,item in enumerate(row_items):
Expand Down
Binary file added model.joblib
Binary file not shown.
Loading

0 comments on commit a99787b

Please sign in to comment.