59doit

[Python-Pandas] #2 index 본문

Programming/Python(파이썬)

[Python-Pandas] #2 index

yul_S2 2022. 11. 7. 10:19
반응형

index

import pandas as pd
import numpy as np

 

 

 

 

obj = pd.Series(range(3),index=['a','b','c'])
index = obj.index

index 
# <출력> Index(['a', 'b', 'c'], dtype='object')
index[1:]
# <출력> Index(['b', 'c'], dtype='object')

 

 

인덱스 안의 인덱스 변경은 불가 Error

index[1] = 'd' 
# <출력>
# TypeError: Index does not support mutable operationserror

 

 

인덱스 컬럼명을 범위값으로 지정하고 index가 있는지확인 is & in

  • is
labels = pd.Index(np.arange(3))
labels
# <출력> Int64Index([0, 1, 2], dtype='int64')

obj2 = pd.Series([1.5,-2.5,0], index = labels)
obj2
# <출력> 
# 0    1.5
# 1   -2.5
# 2    0.0
# dtype: float64

obj2.index is labels
# <출력> True
  • in
frame3
# <출력>
# state  Nevada  Ohio
# year
# 2001      2.4   1.7
# 2002      2.9   3.6
# 2000      NaN   1.5

frame3.columns
# <출력>
# Index(['Nevada', 'Ohio'], dtype='object', name='state')

'Ohio' in frame3.columns
# <출력> True

2003 in frame3.index
# <출력> False

 

판다스 index 중복값 허용

dup_labels = pd.index(['foo','foo','bar','bar'])
dup_labels
# <출력> index(['foo','foo','bar','bar']),dtype='object'

 

reindex ;  새로운 index 추가

obj = pd.Series([4.5,7.0,-5.3,3.6], index = ['d','b','a','c'])
obj
# <출력>
# d    4.5
# b    7.0
# a   -5.3
# c    3.6
# dtype: float64

 

색인값을 새로 추가 했을때 값이 없으면 NaN

obj2 = obj.reindex(['a','b','c','d','e'])
obj2
# <출력> 
# a   -5.3
# b    7.0
# c    3.6
# d    4.5
# e    NaN
# dtype: float64

 

 

 

 

obj3 = pd.Series(['blue','purple','yellow'], index = [0,2,4])
obj3
# <출력> 
# 0      blue
# 2    purple
# 4    yellow
# dtype: object

 

 

 

  • ffill
obj3.reindex(range(6),method = 'ffill')
# <출력>
# 0      blue
# 1      blue
# 2    purple
# 3    purple
# 4    yellow
# 5    yellow
# dtype: object

 

 

reindex 컬럼도 변경 가능

frame = pd.DataFrame(np.arange(9).reshape((3,3)),
                     index = ['a','c','d'],columns=['Ohio', 'Texas', 'California'])
frame
# <출력>
#    Ohio  Texas  California
# a     0      1           2
# c     3      4           5
# d     6      7           8

frame2 = frame.reindex(['a','b','c','d'])
frame2
# <출력>
#    Ohio  Texas  California
# a   0.0    1.0         2.0
# b   NaN    NaN         NaN
# c   3.0    4.0         5.0
# d   6.0    7.0         8.0


states = ['Texas','Utah','California']
frame.reindex(columns=states)
# <출력>
#    Texas  Utah  California
# a      1   NaN           2
# c      4   NaN           5
# d      7   NaN           8

 

 

 

 

Error : 추가한것은 사용이 안된다.

frame.loc[['a','c','d'],states]  
# <출력> KeyError: "['Utah'] not in index"

▼ 옳은것 : 재색인 해야한다.

states2 = ['Texas','California','Ohio']
frame2.loc[['a','d','c'],states2]
# <출력> 
#    Texas  California  Ohio
# a    1.0         2.0   0.0
# d    7.0         8.0   6.0
# c    4.0         5.0   3.0

\

반응형

'Programming > Python(파이썬)' 카테고리의 다른 글

[Pandas] #4 ser  (0) 2022.11.08
[Python-Pandas] #3 drop  (0) 2022.11.07
[Python-Pandas] #1  (2) 2022.11.07
[Python-Numpy] #4  (0) 2022.11.06
[Python-Numpy] #3  (0) 2022.11.06
Comments