Initial Commit

synflyn28 · Jul 29, 2018 · c6ab4eb · c6ab4eb
commit c6ab4eb
Show file tree

Hide file tree

Showing 5 changed files with 900,174 additions and 0 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/bikeshare_2.py b/bikeshare_2.py
@@ -0,0 +1,171 @@
+import time
+import pandas as pd
+import numpy as np
+
+
+def get_filters():
+    """
+    Asks user to specify a city, month, and day to analyze.
+
+    Returns:
+        (str) city - name of the city to analyze
+        (str) month - name of the month to filter by, or "all" to apply no month filter
+        (str) day - name of the day of week to filter by, or "all" to apply no day filter
+    """
+    print('Hello! Let\'s explore some US bikeshare data!')
+    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
+    city = input("Please input city name: ").lower()
+
+    while city not in ['chicago', 'new york city', 'washington']:
+        city = input(
+        "City is name is invalid! Please input another name: ").lower()
+
+    # get user input for month (all, january, february, ... , june)
+    month = input("Please input month name: ").lower()
+
+    # get user input for day of week (all, monday, tuesday, ... sunday)
+    day = input("Please input day of week: ").lower()
+
+    print('-'*40)
+    return city, month, day
+
+
+def load_data(city, month, day):
+    """
+    Loads data for the specified city and filters by month and day if applicable.
+
+    Args:
+        (str) city - name of the city to analyze
+        (str) month - name of the month to filter by, or "all" to apply no month filter
+        (str) day - name of the day of week to filter by, or "all" to apply no day filter
+    Returns:
+        df - Pandas DataFrame containing city data filtered by month and day
+    """
+    df = pd.read_csv("{}.csv".format(city.replace(" ","_")))
+
+    # Convert the Start and End Time columns to datetime
+    df['Start Time'] = pd.to_datetime(df['Start Time'])
+    df['End Time'] = pd.to_datetime(df['End Time'])
+
+    # extract month and day of week from Start Time to create new columns
+    df['month'] = df['Start Time'].apply(lambda x: x.month)
+    df['day_of_week'] = df['Start Time'].apply(lambda x: x.strftime('%A').lower())
+
+
+    # filter by month if applicable
+    if month != 'all':
+        # use the index of the months list to get the corresponding int
+        months = ['january', 'february', 'march', 'april', 'may', 'june']
+        month = months.index(month) + 1
+
+        # filter by month to create the new dataframe
+        df = df.loc[df['month'] == month,:]
+
+    # filter by day of week if applicable
+    if day != 'all':
+        # filter by day of week to create the new dataframe
+        df = df.loc[df['day_of_week'] == day,:]
+
+    return df
+
+
+def time_stats(df):
+    """Displays statistics on the most frequent times of travel."""
+
+    print('\nCalculating The Most Frequent Times of Travel...\n')
+    start_time = time.time()
+
+    # display the most common month
+    print(df['month'].mode().values[0])
+
+    # display the most common day of week
+    print(df['day_of_week'].mode().values[0])
+
+    # display the most common start hour
+    df['start_hour'] = df['Start Time'].dt.hour
+    print(df['start_hour'].mode().values[0])
+
+
+    print("\nThis took %s seconds." % (time.time() - start_time))
+    print('-'*40)
+
+
+def station_stats(df):
+    """Displays statistics on the most popular stations and trip."""
+
+    print('\nCalculating The Most Popular Stations and Trip...\n')
+    start_time = time.time()
+
+    # display most commonly used start station
+    print(df['Start Station'].mode().values[0])
+
+    # display most commonly used end station
+    print(df['End Station'].mode().values[0])
+
+    # display most frequent combination of start station and end station trip
+    df['routes'] = df['Start Station']+ " " + df['End Station']
+    print(df['routes'].mode().values[0])
+
+    print("\nThis took %s seconds." % (time.time() - start_time))
+    print('-'*40)
+
+
+def trip_duration_stats(df):
+    """Displays statistics on the total and average trip duration."""
+
+    print('\nCalculating Trip Duration...\n')
+    start_time = time.time()
+
+    df['duration'] = df['End Time'] - df['Start Time']
+
+    # display total travel time
+    print(df['duration'].sum())
+
+    # display mean travel time
+    print(df['duration'].mean())
+
+    print("\nThis took %s seconds." % (time.time() - start_time))
+    print('-'*40)
+
+
+def user_stats(df, city):
+    """Displays statistics on bikeshare users."""
+
+    print('\nCalculating User Stats...\n')
+    start_time = time.time()
+
+    # Display counts of user types
+    print(df['User Type'].value_counts())
+
+    if city != 'washington':
+        # Display counts of gender
+        print(df['Gender'].value_counts())
+
+
+        # Display earliest, most recent, and most common year of birth
+        print(df['Birth Year'].min())
+        print(df['Birth Year'].max())
+        print(df['Birth Year'].mode().values[0])
+
+    print("\nThis took %s seconds." % (time.time() - start_time))
+    print('-'*40)
+
+
+def main():
+
+    while True:
+        city, month, day = get_filters()
+        df = load_data(city, month, day)
+
+        time_stats(df)
+        station_stats(df)
+        trip_duration_stats(df)
+        user_stats(df, city)
+
+        restart = input('\nWould you like to restart? Enter yes or no.\n')
+        if restart.lower() != 'yes':
+            break
+
+
+if __name__ == "__main__":
+	main()