목록Programming/Python(파이썬) (44)
59doit
import numpy as np import pandas as pd ex1 s = pd.Series(np.random.randn(6)) s[::2] = np.nan s s.fillna(s.mean()) states = ['Ohio', 'New York', 'Vermont', 'Florida', 'Oregon', 'Nevada', 'California', 'Idaho'] group_key=['East']*4+['West']*4 data=pd.Series(np.random.randn(8),index=states) data data[['Vermont','Nevada','Idaho']] = np.nan data data.groupby(group_key).mean() fill_mean = lambda g: g...
apply ▷ def top(df, n=5, column='tip_pct'): return df.sort_values(by=column)[-n:] top(tips, n=6) # # total_bill tip smoker day time size tip_pct # 109 14.31 4.00 Yes Sat Dinner 2 0.279525 # 183 23.17 6.50 Yes Sun Dinner 4 0.280535 # 232 11.61 3.39 No Sat Dinner 2 0.291990 # 67 3.07 1.00 Yes Sat Dinner 1 0.325733 # 178 9.60 4.00 Yes Sun Dinner 2 0.416667 # 172 7.25 5.15 Yes Sun Dinner 2 0.710345 ..
import numpy as np import pandas as pd PREVIOUS_MAX_ROWS = pd.options.display.max_rows pd.options.display.max_rows = 20 np.random.seed(12345) import matplotlib.pyplot as plt plt.rc('figure', figsize=(10, 6)) np.set_printoptions(precision=4, suppress=True) 데이터 집계 df # # key1 key2 data1 data2 # 0 a one 0.981007 -1.006219 # 1 a two -0.873717 -0.902148 # 2 b one -1.015634 0.752769 # 3 b two -0.41124..
import numpy as np import pandas as pd PREVIOUS_MAX_ROWS = pd.options.display.max_rows pd.options.display.max_rows = 20 np.random.seed(12345) import matplotlib.pyplot as plt plt.rc('figure', figsize=(10, 6)) np.set_printoptions(precision=4, suppress=True) group by df = pd.DataFrame({'key1' : ['a', 'a', 'b', 'b', 'a'], 'key2' : ['one', 'two', 'one', 'two', 'one'], 'data1' : np.random.randn(5), 'd..