In this data project we will focus on exploratory data analysis of stock prices for the following banks:
We'll focus on bank stocks and see how they progressed throughout the financial crisis all the way to early 2016.
# import libraries
from pandas_datareader import data, wb
import pandas as pd
import numpy as np
import datetime
%matplotlib inline
# create a start and end date
start = datetime.datetime(2006,1,1)
end = datetime.datetime(2016,1,1)
# read in the data using the pandas datareader library
# Bank of America
BAC = data.DataReader('BAC', 'yahoo', start, end)
# CityGroup
C = data.DataReader('C','yahoo',start,end)
# Goldman Sachs
GS = data.DataReader('GS', 'yahoo', start, end)
# JPMorgan Chase
JPM = data.DataReader('JPM', 'yahoo', start, end)
# Morgan Stanley
MS = data.DataReader('MS', 'yahoo', start, end)
# Wells Fargo
WFC = data.DataReader('WFC', 'yahoo', start, end)
MS
# create a list of the ticker symbols in alphabetical order
tickers = ['BAC','C','GS','JPM','MS','WFC']
# concatenate the bank dataframes together to a single data frame and set the keys argument equal to the tickers list
bank_stocks = pd.concat([BAC,C,GS,JPM,MS,WFC], axis = 1, keys = tickers)
# set the column name levels
bank_stocks.columns.names = ['Bank Ticker','Stock Info']
bank_stocks.head()
# finding the max Close price for each bank's stock throughout the time period
for tick in tickers:
print(tick,bank_stocks[tick]['Close'].max())
returns = pd.DataFrame()
# use pandas pct_change() method on the Close column to create a column representing this return value.
# create a for loop that goes and for each Bank Stock Ticker creates this returns column and sets it as a
# column in the returns DataFrame.
for tick in tickers:
returns[tick+' Return'] = bank_stocks[tick]['Close'].pct_change()
returns.head()
# import visualization libraries
import seaborn as sns
import matplotlib.pyplot as plt
# create a pairplot using seaborn of the returns dataframe
sns.pairplot(returns[1:])
# use idxmin and max to find out on what dates each bank stock had the best and worst single day returns
returns.idxmin()
Note: January 20, 2009 was actually Obama's inauguration date.
returns.idxmax()
# take the standard deviation of the returns to find the riskiest stock over this time period
returns.std()
It looks like the riskiest would have to be CityGroup as their standard deviation was the highest.
# check to see the riskiest stock for the year 2015
returns.loc['2015-01-01':'2015-12-31'].std()
# Morgan Stanley was the riskiest bank stock for 2015 as they had the highest standard deviation
# create a distplot using seaborn of the 2015 returns for Morgan Stanley
sns.distplot(returns.loc['2015-01-01':'2015-12-31']['MS Return'], color = 'green', bins = 50)
# create a distplot using seaborn of the 2008 returns for CitiGroup
sns.distplot(returns.loc['2008-01-01':'2008-12-31']['C Return'], color = 'red', bins = 50)
# more imports
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline
# Optional Plotly Method Imports
import plotly
import cufflinks as cf
cf.go_offline()
# create a line plot showing Close price for each bank for the entire index of time
bank_stocks.xs(key='Close',axis=1,level='Stock Info').iplot()
# analyze the moving averages for these stocks in the year 2008.
# plot the rolling 30 day average against the Close Price for Bank Of America's stock for the year 2008
plt.figure(figsize=(12,4))
BAC['Close'].loc['2008-01-01':'2009-01-01'].rolling(window=30).mean().plot(label='30 Day Moving Avg')
BAC['Close'].loc['2008-01-01':'2009-01-01'].plot(label='BAC Close')
plt.legend()
plt.title('30 Day AVG & Close Price for BAC Stock in 2008')
# create a heatmap of the correlation between the stocks Close Price
sns.heatmap(bank_stocks.xs(key='Close', axis=1, level='Stock Info').corr(), annot=True)
# create a clustermap to cluster the correlations together
sns.clustermap(bank_stocks.xs(key='Close', axis=1, level='Stock Info').corr(), annot=True)