羊羊猪 人气:0DataFrame的行和列:df[‘行’, ‘列’]
- 行和列选取:df[],一次只能选取行或列
- 区域选取:df.loc[], df.iloc[], df.ix[],可以同时为行或列设置筛选条件
- 单元格选取[], df.iat[],准确选取某个单元格
import numpy as np import pandas as pd df = pd.DataFrame(np.random.randn(10,5), index=list('abcdefghij'), columns=list('ABCDE')) # Output A B C D E a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105 b 0.074351 0.791849 1.637467 -1.408712 -1.376527 c -0.359802 -2.049489 -0.615742 -1.953994 0.685243 d 0.232557 1.768284 -0.447015 2.373358 1.220536 e -0.997380 -0.447236 0.632368 -0.352590 -0.064736 f -1.220178 -0.314304 1.202184 0.018326 1.072153 g -1.508916 0.380466 0.359506 -0.742657 -0.373764 h 1.031420 -3.236676 0.444769 1.396802 -0.405590 i 0.166133 -0.051614 -0.146943 0.609431 -0.351814 j 1.857521 -0.159101 0.899745 1.108722 -0.615379
1. 行和列的获取
1.1 根据索引获取行
df[:3] # Output A B C D E a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105 b 0.074351 0.791849 1.637467 -1.408712 -1.376527 c -0.359802 -2.049489 -0.615742 -1.953994 0.685243
df[1:3] # 前闭后开 df['b':'c'] # # 前闭后闭 # Output A B C D E b 0.074351 0.791849 1.637467 -1.408712 -1.376527 c -0.359802 -2.049489 -0.615742 -1.953994 0.685243
# 布尔数组 (数组长度需等于行数) df[[True,False,True,False,False,False, True, True, False, True]] # Output A B C D E a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105 c -0.359802 -2.049489 -0.615742 -1.953994 0.685243 g -1.508916 0.380466 0.359506 -0.742657 -0.373764 h 1.031420 -3.236676 0.444769 1.396802 -0.405590 j 1.857521 -0.159101 0.899745 1.108722 -0.615379
1.2 根据条件获取行
df[df.A > 0] # Output A B C D E a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105 b 0.074351 0.791849 1.637467 -1.408712 -1.376527 d 0.232557 1.768284 -0.447015 2.373358 1.220536 h 1.031420 -3.236676 0.444769 1.396802 -0.405590 i 0.166133 -0.051614 -0.146943 0.609431 -0.351814 j 1.857521 -0.159101 0.899745 1.108722 -0.615379
df[(df.A > 0) & (df.B > 0)] # Output A B C D E b 0.074351 0.791849 1.637467 -1.408712 -1.376527 d 0.232557 1.768284 -0.447015 2.373358 1.220536
df[(df.A > 0) | (df.B > 0)] # Output A B C D E a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105 b 0.074351 0.791849 1.637467 -1.408712 -1.376527 d 0.232557 1.768284 -0.447015 2.373358 1.220536 g -1.508916 0.380466 0.359506 -0.742657 -0.373764 h 1.031420 -3.236676 0.444769 1.396802 -0.405590 i 0.166133 -0.051614 -0.146943 0.609431 -0.351814 j 1.857521 -0.159101 0.899745 1.108722 -0.615379
1.3 获取列
# 获取A列 df['A'] # 输出为Series类型 df[['A']] # 输出为DataFrame类型 # 获取A列和B列 df[['A', 'B']] df[df.columns[0:2]]
2. 区域选取
- df.loc[] 只能使用标签索引,不能使用整数索引,通过便签索引切边进行筛选时,前闭后闭。
- df.iloc[] 只能使用整数索引,不能使用标签索引,通过整数索引切边进行筛选时,前闭后开。
- df.ix[]既可以使用标签索引,也可以使用整数索引。
2.1 df.loc[]
2.1.1 行选取
# 输出为Series类型 df.loc['a'] df.loc['a', :] # Output A 0.299206 B -0.383297 C -0.931467 D -0.591609 E -1.131105 Name: a, dtype: float64 # 输出为DataFrame类型 df.loc[['a']] df.loc[['a'], :] # Output A B C D E a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105
获取a, b, d行
# 使用标签索引 df.loc[['a', 'b', 'd']] df.loc[['a', 'b', 'd'], :] # 使用布尔数组 df[[True, True, False, True, False, False, False, True, False, True]] # Output A B C D E a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105 b 0.074351 0.791849 1.637467 -1.408712 -1.376527 d 0.232557 1.768284 -0.447015 2.373358 1.220536
df.loc['a':'d', :] # Output A B C D E a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105 b 0.074351 0.791849 1.637467 -1.408712 -1.376527 c -0.359802 -2.049489 -0.615742 -1.953994 0.685243 d 0.232557 1.768284 -0.447015 2.373358 1.220536
df.loc[df.A > 0] df.loc[df.A > 0, :] # Output A B C D E a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105 b 0.074351 0.791849 1.637467 -1.408712 -1.376527 d 0.232557 1.768284 -0.447015 2.373358 1.220536 h 1.031420 -3.236676 0.444769 1.396802 -0.405590 i 0.166133 -0.051614 -0.146943 0.609431 -0.351814 j 1.857521 -0.159101 0.899745 1.108722 -0.615379
2.1.2 列选取
# 选取A列 df.loc[:, 'A'] # 选取A列和C列 df.loc[:, ['A', 'C']] # 选取A列到C列 df.loc[:, 'A':'C']
2.1.3 同时选取行和列
# 选取c行B列的值 df.loc['c', 'B'] # 选取A列和B列同时大于0的C列和D列 df.loc[((df.A > 0) & (df.B > 0)), ['C', 'D']]
2.1.4 行和列的赋值
# 令a行为10 df.loc['a', :] = 10 # 令B列为50 df.loc[:, 'B'] = 50 # 令b, c行的C到F列为30 df.loc[['b', 'c'], 'C':'F'] = 30 # 令C列小于0的行赋值为0 df.loc[df.C < 0] = 0
2.1.5 多重索引
tuples = [ ('cobra', 'mark i'), ('cobra', 'mark ii'), ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'), ('viper', 'mark ii'), ('viper', 'mark iii') ] index = pd.MultiIndex.from_tuples(tuples) values = [[12, 2], [0, 4], [10, 20], [1, 4], [7, 1], [16, 36]] df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index) # Output df max_speed shield cobra mark i 12 2 mark ii 0 4 sidewinder mark i 10 20 mark ii 1 4 viper mark ii 7 1 mark iii 16 36
df.loc['cobra'] max_speed shield mark i 12 2 mark ii 0 4
# return a Series df.loc[('cobra', 'mark ii')] max_speed 0 shield 4 Name: (cobra, mark ii), dtype: int64 # return a dataframe df.loc[[('cobra', 'mark ii')]] max_speed shield cobra mark ii 0 4
# return a Series df.loc['cobra', 'mark i'] max_speed 12 shield 2 Name: (cobra, mark i), dtype: int64
df.loc[('cobra', 'mark i'), 'shield']
df.loc[('cobra', 'mark i'):'viper'] max_speed shield cobra mark i 12 2 mark ii 0 4 sidewinder mark i 10 20 mark ii 1 4 viper mark ii 7 1 mark iii 16 36 df.loc[('cobra', 'mark i'):('viper', 'mark ii')] max_speed shield cobra mark i 12 2 mark ii 0 4 sidewinder mark i 10 20 mark ii 1 4 viper mark ii 7 1
2.2 df.iloc[ ]
2.2.1 行选取
# return a Series df.iloc[1] df.iloc[1, :] # return a dataframe df.iloc[[1]] df.iloc[[1], :]
df.iloc[:3, :] df.iloc[:3]
df.iloc[[1, 3, 5]] df.iloc[[1, 3, 5], :]
2.2.2 列选取
df.iloc[:, 1]
df.iloc[:, 0:3] df.iloc[:,:3]
df.iloc[:, [0, 2, 3]]
2.2.3 同时选取行和列
df.iloc[0, 1]
df.iloc[[1,2], 1:4]
2.3 df.ix[ ]
However, when an axis is integer based, ONLY label based access and not positional access is supported. Thus, in such cases, it’s usually better to be explicit and use .iloc or .loc.
3. 单元格选取
-[ ] 只能使用标签索引
- df.iat[ ] 只能使用整数索引
获取c行C列的值['c', 'C']
把c行C列赋值为10['c', 'C'] = 10
3.2 df.iat[]
df.iat[2, 2]
df.iat[2, 2] = 10