s=pd.Series(list('abca'))
s0=pd.get_dummies(s)
print(s)
print(s0)
'''
0 a
1 b
2 c
3 a
dtype: object
a b c
0 1 0 0
1 0 1 0
2 0 0 1
3 1 0 0
'''
s1=['a','b',np.nan]
print(s1)
print(pd.get_dummies(s1))
print(pd.get_dummies(s1,dummy_na=True))
'''
['a', 'b', nan]
a b
0 1 0
1 0 1
2 0 0
a b NaN
0 1 0 0
1 0 1 0
2 0 0 1
'''
df=pd.DataFrame({'A':['a','b','a'],'B':['b','a','c'],'C':[1,2,3]})
print(df)
print(pd.get_dummies(df))
print(pd.get_dummies(df,prefix=['col1','col2']))
'''
A B C
0 a b 1
1 b a 2
2 a c 3
C A_a A_b B_a B_b B_c
0 1 1 0 0 1 0
1 2 0 1 1 0 0
2 3 1 0 0 0 1
C col1_a col1_b col2_a col2_b col2_c
0 1 1 0 0 1 0
1 2 0 1 1 0 0
2 3 1 0 0 0 1
'''
print(pd.get_dummies(pd.Series(list('abcaa'))))
print(pd.get_dummies(pd.Series(list('abcaa')),drop_first=True))
'''
a b c
0 1 0 0
1 0 1 0
2 0 0 1
3 1 0 0
4 1 0 0
b c
0 0 0
1 1 0
2 0 1
3 0 0
4 0 0
'''
print(pd.get_dummies(pd.Series(list('abx')),dtype=float))
'''
a b x
0 1.0 0.0 0.0
1 0.0 1.0 0.0
2 0.0 0.0 1.0
'''
例子
import pandas as pd
df=pd.DataFrame([['green','A'],
['red','B'],
['blue','A']])
df.columns=['color','class']
print(df)
print(pd.get_dummies(df))
'''
color class
0 green A
1 red B
2 blue A
color_blue color_green color_red class_A class_B
0 0 1 0 1 0
1 0 0 1 0 1
2 1 0 0 1 0
'''
可以对指定列进行get_dummies
print(pd.get_dummies(df.color))
'''
blue green red
0 0 1 0
1 0 0 1
2 1 0 0
'''
将指定列进行get_dummies后合并到元数据中
print(df.join(pd.get_dummies(df.color)))
'''
color class blue green red
0 green A 0 1 0
1 red B 0 0 1
2 blue A 1 0 0
'''
pandas.factorize
pandas.factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None)[source]