Programming/Python(파이썬)
[ Python - Pandas&Numpy ] ex
yul_S2
2022. 11. 13. 13:08
반응형
import numpy as np
import pandas as pd
ex1
s = pd.Series(np.random.randn(6))
s[::2] = np.nan
s
s.fillna(s.mean())
states = ['Ohio', 'New York', 'Vermont', 'Florida', 'Oregon', 'Nevada', 'California', 'Idaho']
group_key=['East']*4+['West']*4
data=pd.Series(np.random.randn(8),index=states)
data
data[['Vermont','Nevada','Idaho']] = np.nan
data
data.groupby(group_key).mean()
fill_mean = lambda g: g.fillna(g.mean())
data.groupby(group_key).apply(fill_mean)
fill_values = {'East': 0.5, 'West': -1}
fill_func = lambda g: g.fillna(fill_values[g.name])
data.groupby(group_key).apply(fill_func)
# <출력>
# Ohio -1.759884
# New York 0.143524
# Vermont 0.500000
# Florida 0.775893
# Oregon 0.570685
# Nevada -1.000000
# California 0.333238
# Idaho -1.000000
# dtype: float64
ex2
suits = ['H', 'S', 'C', 'D']
card_val = (list(range(1, 11)) + [10] * 3) * 4
base_names = ['A'] + list(range(2, 11)) + ['J', 'K', 'Q']
cards = []
for suit in ['H', 'S', 'C', 'D']:
cards.extend(str(num) + suit for num in base_names)
deck = pd.Series(card_val, index=cards)
deck[:13]
def draw(deck, n=5):
return deck.sample(n)
draw(deck)
get_suit = lambda card: card[-1] # last letter is suit
deck.groupby(get_suit).apply(draw, n=2)
deck.groupby(get_suit, group_keys=False).apply(draw, n=2)
# <출력>
# 5C 5
# 6C 6
# 2D 2
# 9D 9
# 7H 7
# QH 10
# 6S 6
# 8S 8
# dtype: int64
ex3
df = pd.DataFrame({'category': ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'],
'data': np.random.randn(8), 'weights': np.random.rand(8)})
df
grouped = df.groupby('category')
get_wavg = lambda g: np.average(g['data'], weights=g['weights'])
grouped.apply(get_wavg)
close_px = pd.read_csv('C:/stock_px.csv', parse_dates=True, index_col=0)
close_px.info()
close_px[-4:]
spx_corr = lambda x: x.corrwith(x['SPX'])
rets = close_px.pct_change().dropna()
get_year = lambda x: x.year
by_year = rets.groupby(get_year)
by_year.apply(spx_corr)
by_year.apply(lambda g: g['AAPL'].corr(g['MSFT']))
# <출력>
# 1990 0.408271
# 1991 0.266807
# 1992 0.450592
# 1993 0.236917
# 1994 0.361638
# ...
# 2007 0.417738
# 2008 0.611901
# 2009 0.432738
# 2010 0.571946
# 2011 0.581987
# Length: 22, dtype: float64
ex4
import statsmodels.api as sm
def regress(data,yvar,xvars):
Y = data[yvar]
X = data[xvars]
X['intercept']=1.
result = sm.OLS(Y,X).fit()
return result.params
by_year.apply(regress,'AAPL',['SPX'])
# <출력>
# SPX intercept
# 1990 1.512772 0.001395
# 1991 1.187351 0.000396
# 1992 1.832427 0.000164
# 1993 1.390470 -0.002657
# 1994 1.190277 0.001617
# ... ...
# 2007 1.198761 0.003438
# 2008 0.968016 -0.001110
# 2009 0.879103 0.002954
# 2010 1.052608 0.001261
반응형