# importing required libraries
import pandas as pd
import numpy as np
import plotly.express as px
import datetime as dt
import folium


# silencing warnings to prevent clutter in output
import warnings
warnings.filterwarnings('ignore')


# read in the saved csv created from the compilation step
rides = pd.read_csv('citibike_compiled_data.csv')
rides


# use built-in pandas to_datetime() to convert column types
rides["starttime"] = pd.to_datetime(rides.starttime)
rides["stoptime"] = pd.to_datetime(rides.stoptime)


# print types for all colums in dataframe
rides.dtypes

tripduration                        int64
starttime                  datetime64[ns]
stoptime                   datetime64[ns]
start station id                    int64
start station name                 object
start station latitude            float64
start station longitude           float64
end station id                      int64
end station name                   object
end station latitude              float64
end station longitude             float64
bikeid                              int64
usertype                           object
birth year                          int64
gender                              int64
dtype: object


# divide seconds by 60 to get number of minutes
rides['tripduration_minutes'] = rides['tripduration'].astype(int)/60
# round to the nearest minute
rides['tripduration_minutes'] = round(rides['tripduration_minutes'])
# make column data-type to integer
rides['tripduration_minutes'] = rides['tripduration_minutes'].astype(int)
rides


# only select trips matching the specified duration range
rides = rides[(rides.tripduration_minutes >= 2) & (rides.tripduration_minutes <= 300)]
rides = rides.reset_index(drop=True)
rides


rides["trip_date"] = rides['starttime'].dt.date


rides["trip_month"] = rides['starttime'].dt.month_name()
rides["trip_year"] = rides['starttime'].dt.year
rides["trip_tod"] = rides['starttime'].dt.hour


rides


# drop columns from dataframe
rides = rides.drop(columns=['usertype', 'bikeid'])
rides


# group by station id and name and get unique pairing
station_mappings=rides.groupby(['start station id', 'start station name']).size().reset_index(name = 'Count') 
# drop index column
station_mappings=station_mappings.drop(["Count"], axis=1)
# make start station id the new index
station_mappings=station_mappings.set_index('start station id')
# display dataframe
station_mappings


rides_2019 = rides.loc[rides['trip_year'] == 2019]
rides_2020 = rides.loc[rides['trip_year'] == 2020]


rides_2019.describe()


rides_2020.describe()


# create bars of counts for each gender
fig = px.histogram(rides, x="trip_year", color="gender", barmode="group")

# sets axis labels and centers title
fig.update_layout(xaxis_title="Trip Year", yaxis_title="Count", title="Number of Riders Per Year Divided by Gender")  
fig.update(layout=dict(title=dict(x=0.5)))

# display graph
fig.show()


# only keep one occurrence of each start station
start_stations = rides.drop_duplicates('start station id')
start_stations = start_stations.reset_index(drop=True)


# only keep one occurrence of each end station
end_stations = rides.drop_duplicates('end station id')
end_stations = end_stations.reset_index(drop=True)


# extract latitude and longitude of all end stations
end_locations = end_stations[['end station latitude', 'end station longitude']]
locationlist = end_locations.values.tolist()

# create base folium map
marker_map = folium.Map(location=[40.7440, -74.0060], zoom_start=12)
# loop through all station points and add a marker with popup
for point in range(0, len(locationlist)):
    folium.Marker(locationlist[point], popup=end_stations['end station name'][point]).add_to(marker_map)
    
# add title
title_html = '''
             <h3 align="center" style="font-size:20px"><b>Station Locations</b></h3>
             '''
marker_map.get_root().html.add_child(folium.Element(title_html))

# display map
marker_map


# create overlaid histogram with 75 bins
fig = px.histogram(rides, x="tripduration_minutes", nbins=75, color="trip_year", barmode="overlay")

# sets axis labels and centers title
fig.update_layout(xaxis_title="Trip Duration", yaxis_title="Count", title="Histogram of Trip Duration")  
fig.update(layout=dict(title=dict(x=0.5)))

# displays graph
fig.show()


# function that creates a dataframe of most popular trips with counts of rides per station
def most_popular_trips(rides):

    # group rides by start and end station 
    trips = pd.DataFrame()
    trips = rides.groupby(['start station name','end station name']).size().reset_index(name = 'Number of Trips')

    # sort trips by descending number of trips 
    trips = trips.sort_values('Number of Trips', ascending = False)
    trips['start station name'] = trips['start station name'].astype(str)
    trips['end station name'] = trips['end station name'].astype(str)

    # Rename trip to readable format 
    trips["Trip"] = trips['start station name'] + " to " + trips['end station name']

    # drop seperate start and end station names as we now have that information in readable string in Trip column
    trips = trips.drop(['start station name', 'end station name'], axis = 1)
    trips = trips.reset_index()

    return trips


# top trips for 2019
trips_2019 = most_popular_trips(rides_2019)
trips_2019.head(10)


# top trips for 2020
trips_2020 = most_popular_trips(rides_2020)
trips_2020.head(10)


# group all rides by date of ride and get counts per date
date_groups = rides.groupby("trip_date").starttime.count()

# create timeseries Count vs Date
fig = px.line(x=date_groups.index, y=date_groups)

# sets axis labels and centers title
fig.update_layout(xaxis_title="Trip Date", yaxis_title="Count", title="Timeseries of Number of Rides Over Time")  
fig.update(layout=dict(title=dict(x=0.5)))

# display timeseries
fig.show()


# function that searches the rides dataframe to extract the starting and ending latitude and longitude based on station name
# sets the columns of dataframe with correct values respectively
def get_lat_long(row):
    # extract station names
    start_station = row.start_station
    end_station = row.end_station
    
    # lookup respective coordinates and set row value
    row.start_lat = rides.loc[rides['start station name'] == start_station, 'start station latitude'].iloc[0]
    row.start_long = rides.loc[rides['start station name'] == start_station, 'start station longitude'].iloc[0]
    row.end_lat = rides.loc[rides['end station name'] == end_station, 'end station latitude'].iloc[0]
    row.end_long = rides.loc[rides['end station name'] == end_station, 'end station longitude'].iloc[0]
    
    # return updated row
    return row


# function to create the map of popular trips
# takes in trips dataframe created in exploratory analysis
def map_popular_trips(trips):
    
    # extract top 5 trips
    trips = trips[:5]
    
    # split trip string to get start and stop station for each trip
    stations = trips["Trip"].str.split(" to ", n = 1, expand = True) 
    
    # initialize values
    trips["start_station"] = stations[0]
    trips["end_station"] = stations[1]
    trips["start_lat"] = 0
    trips["start_long"] = 0
    trips["end_lat"] = 0
    trips["end_long"] = 0
    
    # use get_lat_long function to populate columns for coordinate
    trips = trips.apply(get_lat_long, axis=1)
    
    # set colors for lines
    colors = ["blue", "red", "green", "purple", "black"]
    
    # create base maps
    tripmap = folium.Map(location=[40.7197, -74.0431], zoom_start=14)
    
    # loop through every trip and add marker for each station, as well as line connecting the two
    for index, row in trips.iterrows():
            points = []
            points.append((row['start_lat'],row['start_long']))
            folium.Marker((row['start_lat'],row['start_long']), popup=row['start_station']).add_to(tripmap)
            points.append((row['end_lat'],row['end_long']))
            folium.Marker((row['end_lat'],row['end_long']), popup=row['end_station']).add_to(tripmap)
            folium.PolyLine(points, color=colors[index], weight=2.5, opacity=1).add_to(tripmap)
    
    # return populated map
    return tripmap


# map trips for 2019
trip2019 = map_popular_trips(trips_2019)

# add title
title_html = '''
             <h3 align="center" style="font-size:20px"><b>Popular Trips in 2019</b></h3>
             '''
trip2019.get_root().html.add_child(folium.Element(title_html))

trip2019


# map trips for 2020
trip2020 = map_popular_trips(trips_2020)

# add title
title_html = '''
             <h3 align="center" style="font-size:20px"><b>Popular Trips in 2020</b></h3>
             '''
trip2020.get_root().html.add_child(folium.Element(title_html))

trip2020


# import required folium plugins
from folium.plugins import HeatMap, HeatMapWithTime


# create base map
heatmap_2019 = folium.Map(location=[40.7440, -74.0060], zoom_start=12)

# add 2019 start and end station from coordinates to map
HeatMap(data=rides_2019[['start station latitude', 'start station longitude']], radius=10).add_to(heatmap_2019)
HeatMap(data=rides_2019[['end station latitude', 'end station longitude']], radius=10).add_to(heatmap_2019)

# add title
title_html = '''
             <h3 align="center" style="font-size:20px"><b>Heatmap of Rides in 2019</b></h3>
             '''
heatmap_2019.get_root().html.add_child(folium.Element(title_html))

# display map
heatmap_2019


# create base map
heatmap_2020 = folium.Map(location=[40.7440, -74.0060], zoom_start=12)

# add 2020 start and end station from coordinates to map
HeatMap(data=rides_2020[['start station latitude', 'start station longitude']], radius=10).add_to(heatmap_2020)
HeatMap(data=rides_2020[['end station latitude', 'end station longitude']], radius=10).add_to(heatmap_2020)

# add title
title_html = '''
             <h3 align="center" style="font-size:20px"><b>Heatmap of Rides in 2020</b></h3>
             '''
heatmap_2020.get_root().html.add_child(folium.Element(title_html))

# display map
heatmap_2020


# import counter library to get counts of entries in list
from collections import Counter

# create column containing month and year combined in the form "Month Year"
rides['trip_month_year'] = rides["trip_month"].astype(str) + " " + rides["trip_year"].astype(str)

# get counts of all stations in dataframe by concatenating start and end counts
tot_counts_start = rides.groupby('start station latitude').size()
tot_counts_end = rides.groupby('end station latitude').size()
tot_counts = tot_counts_start.add(tot_counts_end, fill_value=0)

# stores data for heatmap
heat_data = []

# iterates through every month-year pairing
for date in rides.trip_month_year.unique():
    
    subdata = []
    
    # adds an entry for every row matching the specified date
    for index, row in rides[rides['trip_month_year'] == date].iterrows():
        # tuple for starting coordinates
        lat_lon_start = (row['start station latitude'],row['start station longitude'])
        # tuple for ending coordinates
        lat_lon_end = (row['end station latitude'],row['end station longitude'])
        # append both coordinates to list
        subdata.append(lat_lon_start)
        subdata.append(lat_lon_end)
    
    # get dictionary of counts for number of coordinate occurences in list
    counts = dict(Counter(subdata))
    # list for final data
    finaldata = []
    
    # go through every item in dictionary and caulcate weight of station coordinates
    for key, value in counts.items():
        # multiply weight by 400 for scaling purposes
        weight = (value/tot_counts.sum())*400
        # weights must be between 0 and 1
        if weight > 1:
            weight = 1
        # add data in form [lat, long, weight]
        finaldata.append([key[0], key[1], weight])
    #add finaldata to final list
    heat_data.append(finaldata)


# create base map
timemap = folium.Map(location=[40.7240, -74.040], zoom_start=13)

# add constructed list of stations to map
HeatMapWithTime(heat_data, index=list(rides.trip_month_year.unique())).add_to(timemap)

# add title
title_html = '''
             <h3 align="center" style="font-size:20px"><b>Timelapse of Ridership from 2019 to 2020</b></h3>
             '''
timemap.get_root().html.add_child(folium.Element(title_html))

# display map
timemap


from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score


def prepare_df_cols(rides_df):
    # create column for month in numerical form
    rides_df["trip_month_num"] = rides_df['starttime'].dt.month
    rides_df["trip_month_num"] = rides_df["trip_month_num"].astype('category')
    
    # get most popular station for each month & time-of-day combination
    pop_df = rides_df.groupby(['trip_month_num','trip_tod'])["start station id"].apply(lambda x : x.mode()[0]).reset_index(name = 'popular_id')
    pop_df = pop_df.dropna()
    
    # encode columns to prepare for model fitting
    le = LabelEncoder()
    pop_df = pop_df.apply(le.fit_transform)
    
    return pop_df, le


def predict_popular_station(pop_df): 
    # predictor variables
    X = pop_df[['trip_month_num', 'trip_tod']]
    # target variable
    y = pop_df.iloc[:,2]
    # create training and testing set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
    
    # create MLP classifier & train model on training set
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(11, 16), random_state=1)
    clf.fit(X_train, y_train) 

    # use model to make predictions
    y_pred = clf.predict(X_test)

    # calculate accuract of model
    accuracy = accuracy_score(y_test, y_pred)
    
    return accuracy, y_pred, X_test


def show_predictions(y_pred, X_test, le):
    # get station id from encoded prediction
    ypred_inverse = le.inverse_transform(y_pred)

    # use station mapping dataframe to create an array of all station names from the station id
    ypred_inverse_final = []
    for i in ypred_inverse: 
        ypred_inverse_final.append(station_mappings.at[i,'start station name'])

    # create a dataframe combining X values and predicted station name
    df_ypred = X_test.copy()
    df_ypred['y_pred'] = ypred_inverse_final
    df_ypred = df_ypred.reset_index()
    df_ypred = df_ypred.drop('index',axis=1)
    df_ypred = df_ypred.sort_values(by=['trip_month_num', 'trip_tod'])
    
    return df_ypred


# prepare the 2019 dataframe - all values are encoded
pop_df_2019, le = prepare_df_cols(rides_2019)
pop_df_2019


# prepare the 2020 dataframe - all values are encoded
pop_df_2020, le = prepare_df_cols(rides_2020)
pop_df_2020


# train model on 2019 data and print accuracy
accuracy, y_pred, X_test = predict_popular_station(pop_df_2019)
print(f"Accuracy for 2019: {accuracy:.3%}")

Accuracy for 2019: 79.310%


# train model on 2020 data and print accuracy
accuracy, y_pred, X_test = predict_popular_station(pop_df_2020)
print(f"Accurary for 2020: {accuracy:.3%}")

Accurary for 2020: 45.833%


# display predicted stations
df_ypred_2019 = show_predictions(y_pred, X_test, le)
df_ypred_2019


# display predicted stations
df_ypred_2020 = show_predictions(y_pred, X_test, le)
df_ypred_2020

	tripduration	starttime	stoptime	start station id	start station name	start station latitude	start station longitude	end station id	end station name	end station latitude	end station longitude	bikeid	usertype	birth year	gender
0	1575	2019-01-01 12:43:38.6430	2019-01-01 13:09:54.5280	3183	Exchange Place	40.716247	-74.033459	3638	Washington St	40.724294	-74.035483	29672	Customer	1969	0
1	737	2019-01-01 12:56:53.2040	2019-01-01 13:09:11.0400	3183	Exchange Place	40.716247	-74.033459	3205	JC Medical Center	40.716540	-74.049638	29447	Subscriber	1993	1
2	917	2019-01-01 13:03:44.7760	2019-01-01 13:19:02.7690	3183	Exchange Place	40.716247	-74.033459	3277	Communipaw & Berry Lane	40.714358	-74.066611	29299	Subscriber	1986	1
3	3248	2019-01-01 13:12:03.1280	2019-01-01 14:06:12.0400	3183	Exchange Place	40.716247	-74.033459	3196	Riverview Park	40.744319	-74.043991	29495	Subscriber	1992	1
4	3168	2019-01-01 13:13:12.0450	2019-01-01 14:06:00.4110	3183	Exchange Place	40.716247	-74.033459	3196	Riverview Park	40.744319	-74.043991	26312	Customer	1969	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
354300	131	2020-10-31 23:24:41.8040	2020-10-31 23:26:53.2340	3681	Grand St	40.715178	-74.037683	3185	City Hall	40.717732	-74.043845	46532	Subscriber	1981	1
354301	338	2020-10-31 23:29:49.3050	2020-10-31 23:35:27.8680	3195	Sip Ave	40.730897	-74.063913	3679	Bergen Ave	40.722104	-74.071455	26139	Subscriber	1999	1
354302	1054	2020-10-31 23:32:22.8650	2020-10-31 23:49:57.5320	3198	Heights Elevator	40.748716	-74.040443	3678	Fairmount Ave	40.725726	-74.071959	17231	Customer	1969	0
354303	156	2020-10-31 23:33:22.7310	2020-10-31 23:35:59.4930	3185	City Hall	40.717732	-74.043845	3681	Grand St	40.715178	-74.037683	46532	Subscriber	1981	1
354304	395	2020-10-31 23:40:06.3530	2020-10-31 23:46:42.0640	3481	York St	40.716490	-74.041050	3273	Manila & 1st	40.721651	-74.042884	45647	Subscriber	1967	1

	tripduration	starttime	stoptime	start station id	start station name	start station latitude	start station longitude	end station id	end station name	end station latitude	end station longitude	bikeid	usertype	birth year	gender	tripduration_minutes
0	1575	2019-01-01 12:43:38.643	2019-01-01 13:09:54.528	3183	Exchange Place	40.716247	-74.033459	3638	Washington St	40.724294	-74.035483	29672	Customer	1969	0	26
1	737	2019-01-01 12:56:53.204	2019-01-01 13:09:11.040	3183	Exchange Place	40.716247	-74.033459	3205	JC Medical Center	40.716540	-74.049638	29447	Subscriber	1993	1	12
2	917	2019-01-01 13:03:44.776	2019-01-01 13:19:02.769	3183	Exchange Place	40.716247	-74.033459	3277	Communipaw & Berry Lane	40.714358	-74.066611	29299	Subscriber	1986	1	15
3	3248	2019-01-01 13:12:03.128	2019-01-01 14:06:12.040	3183	Exchange Place	40.716247	-74.033459	3196	Riverview Park	40.744319	-74.043991	29495	Subscriber	1992	1	54
4	3168	2019-01-01 13:13:12.045	2019-01-01 14:06:00.411	3183	Exchange Place	40.716247	-74.033459	3196	Riverview Park	40.744319	-74.043991	26312	Customer	1969	0	53
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
354300	131	2020-10-31 23:24:41.804	2020-10-31 23:26:53.234	3681	Grand St	40.715178	-74.037683	3185	City Hall	40.717732	-74.043845	46532	Subscriber	1981	1	2
354301	338	2020-10-31 23:29:49.305	2020-10-31 23:35:27.868	3195	Sip Ave	40.730897	-74.063913	3679	Bergen Ave	40.722104	-74.071455	26139	Subscriber	1999	1	6
354302	1054	2020-10-31 23:32:22.865	2020-10-31 23:49:57.532	3198	Heights Elevator	40.748716	-74.040443	3678	Fairmount Ave	40.725726	-74.071959	17231	Customer	1969	0	18
354303	156	2020-10-31 23:33:22.731	2020-10-31 23:35:59.493	3185	City Hall	40.717732	-74.043845	3681	Grand St	40.715178	-74.037683	46532	Subscriber	1981	1	3
354304	395	2020-10-31 23:40:06.353	2020-10-31 23:46:42.064	3481	York St	40.716490	-74.041050	3273	Manila & 1st	40.721651	-74.042884	45647	Subscriber	1967	1	7

	tripduration	starttime	stoptime	start station id	start station name	start station latitude	start station longitude	end station id	end station name	end station latitude	end station longitude	bikeid	usertype	birth year	gender	tripduration_minutes
0	1575	2019-01-01 12:43:38.643	2019-01-01 13:09:54.528	3183	Exchange Place	40.716247	-74.033459	3638	Washington St	40.724294	-74.035483	29672	Customer	1969	0	26
1	737	2019-01-01 12:56:53.204	2019-01-01 13:09:11.040	3183	Exchange Place	40.716247	-74.033459	3205	JC Medical Center	40.716540	-74.049638	29447	Subscriber	1993	1	12
2	917	2019-01-01 13:03:44.776	2019-01-01 13:19:02.769	3183	Exchange Place	40.716247	-74.033459	3277	Communipaw & Berry Lane	40.714358	-74.066611	29299	Subscriber	1986	1	15
3	3248	2019-01-01 13:12:03.128	2019-01-01 14:06:12.040	3183	Exchange Place	40.716247	-74.033459	3196	Riverview Park	40.744319	-74.043991	29495	Subscriber	1992	1	54
4	3168	2019-01-01 13:13:12.045	2019-01-01 14:06:00.411	3183	Exchange Place	40.716247	-74.033459	3196	Riverview Park	40.744319	-74.043991	26312	Customer	1969	0	53
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
350349	131	2020-10-31 23:24:41.804	2020-10-31 23:26:53.234	3681	Grand St	40.715178	-74.037683	3185	City Hall	40.717732	-74.043845	46532	Subscriber	1981	1	2
350350	338	2020-10-31 23:29:49.305	2020-10-31 23:35:27.868	3195	Sip Ave	40.730897	-74.063913	3679	Bergen Ave	40.722104	-74.071455	26139	Subscriber	1999	1	6
350351	1054	2020-10-31 23:32:22.865	2020-10-31 23:49:57.532	3198	Heights Elevator	40.748716	-74.040443	3678	Fairmount Ave	40.725726	-74.071959	17231	Customer	1969	0	18
350352	156	2020-10-31 23:33:22.731	2020-10-31 23:35:59.493	3185	City Hall	40.717732	-74.043845	3681	Grand St	40.715178	-74.037683	46532	Subscriber	1981	1	3
350353	395	2020-10-31 23:40:06.353	2020-10-31 23:46:42.064	3481	York St	40.716490	-74.041050	3273	Manila & 1st	40.721651	-74.042884	45647	Subscriber	1967	1	7

	tripduration	starttime	stoptime	start station id	start station name	start station latitude	start station longitude	end station id	end station name	end station latitude	end station longitude	bikeid	usertype	birth year	gender	tripduration_minutes	trip_date	trip_month	trip_year	trip_tod
0	1575	2019-01-01 12:43:38.643	2019-01-01 13:09:54.528	3183	Exchange Place	40.716247	-74.033459	3638	Washington St	40.724294	-74.035483	29672	Customer	1969	0	26	2019-01-01	January	2019	12
1	737	2019-01-01 12:56:53.204	2019-01-01 13:09:11.040	3183	Exchange Place	40.716247	-74.033459	3205	JC Medical Center	40.716540	-74.049638	29447	Subscriber	1993	1	12	2019-01-01	January	2019	12
2	917	2019-01-01 13:03:44.776	2019-01-01 13:19:02.769	3183	Exchange Place	40.716247	-74.033459	3277	Communipaw & Berry Lane	40.714358	-74.066611	29299	Subscriber	1986	1	15	2019-01-01	January	2019	13
3	3248	2019-01-01 13:12:03.128	2019-01-01 14:06:12.040	3183	Exchange Place	40.716247	-74.033459	3196	Riverview Park	40.744319	-74.043991	29495	Subscriber	1992	1	54	2019-01-01	January	2019	13
4	3168	2019-01-01 13:13:12.045	2019-01-01 14:06:00.411	3183	Exchange Place	40.716247	-74.033459	3196	Riverview Park	40.744319	-74.043991	26312	Customer	1969	0	53	2019-01-01	January	2019	13
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
350349	131	2020-10-31 23:24:41.804	2020-10-31 23:26:53.234	3681	Grand St	40.715178	-74.037683	3185	City Hall	40.717732	-74.043845	46532	Subscriber	1981	1	2	2020-10-31	October	2020	23
350350	338	2020-10-31 23:29:49.305	2020-10-31 23:35:27.868	3195	Sip Ave	40.730897	-74.063913	3679	Bergen Ave	40.722104	-74.071455	26139	Subscriber	1999	1	6	2020-10-31	October	2020	23
350351	1054	2020-10-31 23:32:22.865	2020-10-31 23:49:57.532	3198	Heights Elevator	40.748716	-74.040443	3678	Fairmount Ave	40.725726	-74.071959	17231	Customer	1969	0	18	2020-10-31	October	2020	23
350352	156	2020-10-31 23:33:22.731	2020-10-31 23:35:59.493	3185	City Hall	40.717732	-74.043845	3681	Grand St	40.715178	-74.037683	46532	Subscriber	1981	1	3	2020-10-31	October	2020	23
350353	395	2020-10-31 23:40:06.353	2020-10-31 23:46:42.064	3481	York St	40.716490	-74.041050	3273	Manila & 1st	40.721651	-74.042884	45647	Subscriber	1967	1	7	2020-10-31	October	2020	23

	tripduration	starttime	stoptime	start station id	start station name	start station latitude	start station longitude	end station id	end station name	end station latitude	end station longitude	birth year	gender	tripduration_minutes	trip_date	trip_month	trip_year	trip_tod
0	1575	2019-01-01 12:43:38.643	2019-01-01 13:09:54.528	3183	Exchange Place	40.716247	-74.033459	3638	Washington St	40.724294	-74.035483	1969	0	26	2019-01-01	January	2019	12
1	737	2019-01-01 12:56:53.204	2019-01-01 13:09:11.040	3183	Exchange Place	40.716247	-74.033459	3205	JC Medical Center	40.716540	-74.049638	1993	1	12	2019-01-01	January	2019	12
2	917	2019-01-01 13:03:44.776	2019-01-01 13:19:02.769	3183	Exchange Place	40.716247	-74.033459	3277	Communipaw & Berry Lane	40.714358	-74.066611	1986	1	15	2019-01-01	January	2019	13
3	3248	2019-01-01 13:12:03.128	2019-01-01 14:06:12.040	3183	Exchange Place	40.716247	-74.033459	3196	Riverview Park	40.744319	-74.043991	1992	1	54	2019-01-01	January	2019	13
4	3168	2019-01-01 13:13:12.045	2019-01-01 14:06:00.411	3183	Exchange Place	40.716247	-74.033459	3196	Riverview Park	40.744319	-74.043991	1969	0	53	2019-01-01	January	2019	13
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
350349	131	2020-10-31 23:24:41.804	2020-10-31 23:26:53.234	3681	Grand St	40.715178	-74.037683	3185	City Hall	40.717732	-74.043845	1981	1	2	2020-10-31	October	2020	23
350350	338	2020-10-31 23:29:49.305	2020-10-31 23:35:27.868	3195	Sip Ave	40.730897	-74.063913	3679	Bergen Ave	40.722104	-74.071455	1999	1	6	2020-10-31	October	2020	23
350351	1054	2020-10-31 23:32:22.865	2020-10-31 23:49:57.532	3198	Heights Elevator	40.748716	-74.040443	3678	Fairmount Ave	40.725726	-74.071959	1969	0	18	2020-10-31	October	2020	23
350352	156	2020-10-31 23:33:22.731	2020-10-31 23:35:59.493	3185	City Hall	40.717732	-74.043845	3681	Grand St	40.715178	-74.037683	1981	1	3	2020-10-31	October	2020	23
350353	395	2020-10-31 23:40:06.353	2020-10-31 23:46:42.064	3481	York St	40.716490	-74.041050	3273	Manila & 1st	40.721651	-74.042884	1967	1	7	2020-10-31	October	2020	23

Citi Bike Ridership & Public Safety During COVID-19

Tania Arya and Mausam Patel

Introduction

Libraries Used

Data Collection

Data Cleaning

Adjusting Data Types

Converting Trip Duration to Minutes¶

Removing Extraneous Trips¶

Adding Additional Columns¶

Removing Extraneous Columns¶

Creating Helper DataFrame of Station Mappings¶

Dividing Dataframe by Year¶

Data Exploration

Descriptive Statistics¶

People Analysis¶

Mapping Stations¶

Trip Durations¶

Trip Analysis¶

Data Analysis

Timeseries¶

Most Popular Trips & Most Popular Stations¶

Popular Trips¶

Popular Stations¶

Predicting Most Popular Stations ¶

Conclusion

	tripduration	start station id	start station latitude	start station longitude	end station id	end station latitude	end station longitude	birth year	gender	tripduration_minutes	trip_year	trip_tod
count	199910.000000	199910.000000	199910.000000	199910.000000	199910.000000	199910.000000	199910.000000	199910.000000	199910.000000	199910.000000	199910.0	199910.000000
mean	566.726822	3293.810170	40.722743	-74.046387	3292.022870	40.722335	-74.045840	1981.415072	1.163629	9.448752	2019.0	13.595038
std	813.039952	170.892349	0.007063	0.010896	181.114428	0.006894	0.010872	10.221026	0.516599	13.551119	0.0	5.087196
min	90.000000	3183.000000	40.709651	-74.083639	116.000000	40.701403	-74.083639	1888.000000	0.000000	2.000000	2019.0	0.000000
25%	236.000000	3195.000000	40.718355	-74.050444	3192.000000	40.718355	-74.050389	1975.000000	1.000000	4.000000	2019.0	9.000000
50%	347.000000	3210.000000	40.721525	-74.043845	3207.000000	40.721124	-74.043117	1984.000000	1.000000	6.000000	2019.0	14.000000
75%	583.000000	3276.000000	40.727224	-74.038051	3276.000000	40.727224	-74.038051	1989.000000	1.000000	10.000000	2019.0	18.000000
max	17852.000000	3792.000000	40.748716	-74.032108	3792.000000	40.814326	-73.932077	2003.000000	2.000000	298.000000	2019.0	23.000000

	tripduration	start station id	start station latitude	start station longitude	end station id	end station latitude	end station longitude	birth year	gender	tripduration_minutes	trip_year	trip_tod
count	150444.000000	150444.000000	150444.000000	150444.000000	150444.000000	150444.000000	150444.000000	150444.000000	150444.000000	150444.000000	150444.0	150444.000000
mean	1190.597132	3304.291736	40.722243	-74.046737	3303.386689	40.721718	-74.045877	1981.331093	1.095664	19.846288	2020.0	14.450380
std	1562.682195	185.064015	0.007389	0.011933	191.005562	0.105247	0.191272	11.013114	0.633380	26.046190	0.0	4.794887
min	90.000000	3184.000000	40.709651	-74.083639	82.000000	0.000000	-74.083639	1888.000000	0.000000	2.000000	2020.0	0.000000
25%	333.000000	3195.000000	40.716870	-74.051789	3195.000000	40.716870	-74.050656	1969.000000	1.000000	6.000000	2020.0	11.000000
50%	632.000000	3209.000000	40.721124	-74.043991	3209.000000	40.721124	-74.043845	1984.000000	1.000000	11.000000	2020.0	15.000000
75%	1459.000000	3276.000000	40.727224	-74.037683	3276.000000	40.727224	-74.037683	1990.000000	2.000000	24.000000	2020.0	18.000000
max	18018.000000	3792.000000	40.748716	-74.032108	4069.000000	40.848282	0.000000	2004.000000	2.000000	300.000000	2020.0	23.000000

	index	Number of Trips	Trip
0	973	4429	Hamilton Park to Grove St PATH
1	919	2982	Grove St PATH to Hamilton Park
2	216	2673	Brunswick & 6th to Grove St PATH
3	1725	2215	Marin Light Rail to Grove St PATH
4	1377	2140	Jersey & 6th St to Grove St PATH
5	935	2079	Grove St PATH to Marin Light Rail
6	1829	2002	Monmouth and 6th to Grove St PATH
7	265	1985	Brunswick St to Grove St PATH
8	623	1935	Dixon Mills to Grove St PATH
9	1808	1865	McGinley Square to Sip Ave

	index	Number of Trips	Trip
0	1541	3537	Liberty Light Rail to Liberty Light Rail
1	2067	2284	Newport Pkwy to Newport Pkwy
2	1702	1440	Marin Light Rail to Marin Light Rail
3	1189	1193	JC Medical Center to JC Medical Center
4	1594	1127	Lincoln Park to Lincoln Park
5	975	1036	Harborside to Harborside
6	1765	1035	McGinley Square to Sip Ave
7	921	1000	Hamilton Park to Hamilton Park
8	920	907	Hamilton Park to Grove St PATH
9	2315	889	Sip Ave to McGinley Square

	trip_month_num	trip_tod	popular_id
0	0	0	1
1	0	1	1
2	0	2	2
3	0	3	0
4	0	4	17
...	...	...	...
283	11	19	1
284	11	20	1
285	11	21	1
286	11	22	1
287	11	23	2

	trip_month_num	trip_tod	popular_id
0	0	0	2
1	0	1	2
2	0	2	3
3	0	3	6
4	0	4	21
...	...	...	...
235	9	19	2
236	9	20	2
237	9	21	2
238	9	22	2
239	9	23	7

	trip_month_num	trip_tod	y_pred
24	0	5	McGinley Square
47	0	7	Hamilton Park
19	0	8	Hamilton Park
28	0	12	Grove St PATH
45	0	15	Grove St PATH
43	0	18	Grove St PATH
25	0	22	Sip Ave
2	1	13	Grove St PATH
31	1	20	Grove St PATH
41	1	21	Grove St PATH
30	2	7	Hamilton Park
38	2	16	Grove St PATH
1	2	23	Sip Ave
3	3	2	Sip Ave
10	3	4	Sip Ave
12	3	11	Newport Pkwy
22	3	17	Newport Pkwy
39	3	20	Grove St PATH
27	4	1	Sip Ave
20	4	5	Sip Ave
46	4	8	Hamilton Park
44	4	10	Liberty Light Rail
4	4	12	Liberty Light Rail
0	4	13	Liberty Light Rail
17	4	15	Liberty Light Rail
35	4	22	Newport Pkwy
23	5	2	Paulus Hook
33	5	5	Newport Pkwy
15	5	14	Liberty Light Rail
34	6	0	Marin Light Rail
36	6	1	Marin Light Rail
32	6	5	Hamilton Park
8	6	8	Hamilton Park
40	6	10	Liberty Light Rail
6	6	12	Liberty Light Rail
13	6	13	Liberty Light Rail
29	6	22	Grove St PATH
37	7	2	Grove St PATH
21	7	11	Liberty Light Rail
16	7	16	Newport Pkwy
9	8	2	Grove St PATH
26	8	7	JC Medical Center
11	8	10	Hamilton Park
42	9	3	Grove St PATH
7	9	4	Grove St PATH
18	9	5	JC Medical Center
5	9	11	Grove St PATH
14	9	18	Grove St PATH

	trip_month_num	trip_tod	popular_id
0	0	0	1
1	0	1	1
2	0	2	2
3	0	3	0
4	0	4	17
...	...	...	...
283	11	19	1
284	11	20	1
285	11	21	1
286	11	22	1
287	11	23	2

	trip_month_num	trip_tod	popular_id
0	0	0	2
1	0	1	2
2	0	2	3
3	0	3	6
4	0	4	21
...	...	...	...
235	9	19	2
236	9	20	2
237	9	21	2
238	9	22	2
239	9	23	7

Citi Bike Ridership & Public Safety During COVID-19

Tania Arya and Mausam Patel

Introduction

Libraries Used

Data Collection

Data Cleaning

Adjusting Data Types

Converting Trip Duration to Minutes¶

Removing Extraneous Trips¶

Adding Additional Columns¶

Removing Extraneous Columns¶

Creating Helper DataFrame of Station Mappings¶

Dividing Dataframe by Year¶

Data Exploration

Descriptive Statistics¶

People Analysis¶

Mapping Stations¶

Trip Durations¶

Trip Analysis¶

Data Analysis

Timeseries¶

Most Popular Trips & Most Popular Stations¶

Popular Trips¶

Popular Stations¶

Predicting Most Popular Stations¶

Conclusion

Predicting Most Popular Stations ¶

	trip_month_num	trip_tod	popular_id
0	0	0	1
1	0	1	1
2	0	2	2
3	0	3	0
4	0	4	17
...	...	...	...
283	11	19	1
284	11	20	1
285	11	21	1
286	11	22	1
287	11	23	2

	trip_month_num	trip_tod	popular_id
0	0	0	2
1	0	1	2
2	0	2	3
3	0	3	6
4	0	4	21
...	...	...	...
235	9	19	2
236	9	20	2
237	9	21	2
238	9	22	2
239	9	23	7