pandas中concatenate和combine_first的用法详解
我是小蚂蚁 人气:0concatenate主要作用是拼接series和dataframe的数据。
combine_first可以做来填充数据。
其中numpy和panads中都有concatenate()方法,如:np.concatenate([arr1, arr2])、pd.concat([s1, s2])
Series类型可以使用 s2 中的数值来填充 s1,如:s1.combine_first(s2)
Dataframe类型同样可以使用 df2 中的数组来填充 df1, 如:df1.combine_first(df2)
import numpy as np import pandas as pd from pandas import Series, DataFrame # 设置一个随机种子,方便调试 np.random.seed(666) # Series arr1 = np.arange(9).reshape(3, 3) arr2 = np.arange(9).reshape(3, 3) # numpy的 concatenate 用法 print(np.concatenate([arr1, arr2])) ''' [[0 1 2] [3 4 5] [6 7 8] [0 1 2] [3 4 5] [6 7 8]] ''' print(np.concatenate([arr1, arr2], axis=1)) ''' [[0 1 2 0 1 2] [3 4 5 3 4 5] [6 7 8 6 7 8]] ''' s1 = Series([1, 2, 3], index=['A', 'B', 'C']) s2 = Series([4, 5], index=['E', 'F']) # 可以看出和numpy的效果一样 print(pd.concat([s1, s2])) ''' A 1 B 2 C 3 E 4 F 5 dtype: int64 ''' # 用法和 np 一样 axis = 1, 等于增加了一列 print(pd.concat([s1, s2], axis=1)) # 但是,返回的是一个 <class 'pandas.core.frame.DataFrame'> print(type(pd.concat([s1, s2], axis=1))) ''' 0 1 A 1.0 NaN B 2.0 NaN C 3.0 NaN E NaN 4.0 F NaN 5.0 ''' df1 = DataFrame(np.random.randn(4, 3), columns=['X', 'Y', 'Z']) print(df1) ''' X Y Z 0 0.824188 0.479966 1.173468 1 0.909048 -0.571721 -0.109497 2 0.019028 -0.943761 0.640573 3 -0.786443 0.608870 -0.931012 ''' df2 = DataFrame(np.random.randn(3, 3), columns=['X', 'Y', 'A']) print(df2) ''' X Y A 0 0.978222 -0.736918 -0.298733 1 -0.460587 -1.088793 -0.575771 2 -1.682901 0.229185 -1.756625 ''' print(pd.concat([df1, df2])) ''' A X Y Z 0 NaN 0.824188 0.479966 1.173468 1 NaN 0.909048 -0.571721 -0.109497 2 NaN 0.019028 -0.943761 0.640573 3 NaN -0.786443 0.608870 -0.931012 0 -0.298733 0.978222 -0.736918 NaN 1 -0.575771 -0.460587 -1.088793 NaN 2 -1.756625 -1.682901 0.229185 NaN ''' # combine s1 = Series([2, np.nan, 4, np.nan], index=['A', 'B', 'C', 'D']) s2 = Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D']) # 用 s2 中的数值来填充 s1 print(s1.combine_first(s2)) ''' A 2.0 B 2.0 C 4.0 D 4.0 dtype: float64 ''' df1 = DataFrame({ 'X':[1, np.nan, 3, np.nan], 'Y':[5, np.nan, 7, np.nan], 'Z':[9, np.nan, 11, np.nan] }) df2 = DataFrame({ 'Z':[np.nan, 10, np.nan, 12], 'A':[1, 2, 3, 4] }) # 功能同样是填充 print(df1.combine_first(df2)) ''' A X Y Z 0 1.0 1.0 5.0 9.0 1 2.0 NaN NaN 10.0 2 3.0 3.0 7.0 11.0 3 4.0 NaN NaN 12.0 '''
加载全部内容