17_merge.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. # View more python tutorials on my Youtube and Youku channel!!!
  2. # Youtube video tutorial: https://www.youtube.com/channel/UCdyjiB5H8Pu7aDTNVXTTpcg
  3. # Youku video tutorial: http://i.youku.com/pythontutorial
  4. """
  5. Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
  6. """
  7. from __future__ import print_function
  8. import pandas as pd
  9. # merging two df by key/keys. (may be used in database)
  10. # simple example
  11. left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
  12. 'A': ['A0', 'A1', 'A2', 'A3'],
  13. 'B': ['B0', 'B1', 'B2', 'B3']})
  14. right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
  15. 'C': ['C0', 'C1', 'C2', 'C3'],
  16. 'D': ['D0', 'D1', 'D2', 'D3']})
  17. print(left)
  18. print(right)
  19. res = pd.merge(left, right, on='key')
  20. print(res)
  21. # consider two keys
  22. left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
  23. 'key2': ['K0', 'K1', 'K0', 'K1'],
  24. 'A': ['A0', 'A1', 'A2', 'A3'],
  25. 'B': ['B0', 'B1', 'B2', 'B3']})
  26. right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
  27. 'key2': ['K0', 'K0', 'K0', 'K0'],
  28. 'C': ['C0', 'C1', 'C2', 'C3'],
  29. 'D': ['D0', 'D1', 'D2', 'D3']})
  30. print(left)
  31. print(right)
  32. res = pd.merge(left, right, on=['key1', 'key2'], how='inner') # default for how='inner'
  33. # how = ['left', 'right', 'outer', 'inner']
  34. res = pd.merge(left, right, on=['key1', 'key2'], how='left')
  35. print(res)
  36. # indicator
  37. df1 = pd.DataFrame({'col1':[0,1], 'col_left':['a','b']})
  38. df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})
  39. print(df1)
  40. print(df2)
  41. res = pd.merge(df1, df2, on='col1', how='outer', indicator=True)
  42. # give the indicator a custom name
  43. res = pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column')
  44. # merged by index
  45. left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
  46. 'B': ['B0', 'B1', 'B2']},
  47. index=['K0', 'K1', 'K2'])
  48. right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
  49. 'D': ['D0', 'D2', 'D3']},
  50. index=['K0', 'K2', 'K3'])
  51. print(left)
  52. print(right)
  53. # left_index and right_index
  54. res = pd.merge(left, right, left_index=True, right_index=True, how='outer')
  55. res = pd.merge(left, right, left_index=True, right_index=True, how='inner')
  56. # handle overlapping
  57. boys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]})
  58. girls = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 6]})
  59. res = pd.merge(boys, girls, on='k', suffixes=['_boy', '_girl'], how='inner')
  60. print(res)
  61. # join function in pandas is similar with merge. If know merge, you will understand join