博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
python数据分析-05DataFrame深入
阅读量:5107 次
发布时间:2019-06-13

本文共 6014 字,大约阅读时间需要 20 分钟。

import pandas as pd import numpy as np from pandas import Series,DataFrame #df1 = DataFrame({'城市':["北京","上海","广州"],'人口':[1000,2000,1500]}) # print(df1) #    城市    人口 # 0  北京  1000 # 1  上海  2000 # 2  广州  1500 #方法1: # df1["GDP"] = Series([1000,2000,1500]) # print(df1) #    城市    人口   GDP # 0  北京  1000  1000 # 1  上海  2000  2000 # 2  广州  1500  1500 #方法2: # df2 = DataFrame({'城市':["北京","上海","广州"],'人口':[1000,2000,1500]},index=["A","B","C"]) # gdp_map = {"北京":1000,"上海":2000,"广州":1500} # df2["GDP"] = df2["城市"].map(gdp_map) # print(df2) #    城市    人口   GDP # A  北京  1000  1000 # B  上海  2000  2000 # C  广州  1500  1500 # df3 = DataFrame({'城市':["北京","上海","广州"],'人口':[1000,2000,1500]},index=["A","B","C"])#不在是默认index时,需要使用指定添加 # df3["GDP"] = Series([1000,2000,1500]) # print(df3)#无法填充进去 #    城市    人口  GDP # A  北京  1000  NaN # B  上海  2000  NaN # C  广州  1500  NaN # df3["GDP"] = Series([1000,2000,1500],index=["A","B","C"]) # print(df3) #    城市    人口   GDP # A  北京  1000  1000 # B  上海  2000  2000 # C  广州  1500  1500 #----------------------------- #replace in Series #s1 = Series(np.arange(10)) #print(s1) # 0    0 # 1    1 # 2    2 # 3    3 # 4    4 # 5    5 # 6    6 # 7    7 # 8    8 # 9    9 # dtype: int32 #print(s1.replace(1,np.nan)) # 0    0.0 # 1    NaN # 2    2.0 # 3    3.0 # 4    4.0 # 5    5.0 # 6    6.0 # 7    7.0 # 8    8.0 # 9    9.0 # dtype: float64 #print(s1.replace([1,2,3],[10,20,30])) # 0     0 # 1    10 # 2    20 # 3    30 # 4     4 # 5     5 # 6     6 # 7     7 # 8     8 # 9     9 # dtype: int64 #----------------------- #Series 和 DataFrame的简单数学运算 # s1 = Series([1,2,3],index=["A","B","C"]) # s2 = Series([4,5,6,7],index=["B","C","D","E"]) # print(s1) # A    1 # B    2 # C    3 # dtype: int64 # print(s2) # B    4 # C    5 # D    6 # E    7 # dtype: int64 # print(s1+s2) # A    NaN # B    6.0 # C    8.0 # D    NaN # E    NaN # dtype: float64 #DataFrame的运算 # df1 = DataFrame(np.arange(4).reshape(2,2),index=["A","B"],columns=["BJ","SH"]) # print(df1) #    BJ  SH # A   0   1 # B   2   3 # df2 = DataFrame(np.arange(9).reshape(3,3),index=["A","B","C"],columns=["BJ","SH","GZ"]) # print(df2) #    BJ  SH  GZ # A   0   1   2 # B   3   4   5 # C   6   7   8 # print(df1+df2) #     BJ  GZ   SH # A  0.0 NaN  2.0 # B  5.0 NaN  7.0 # C  NaN NaN  NaN # df3 = DataFrame([[1,2,3],[4,5,np.nan],[7,8,9]],index=["A","B","C"],columns=["c1","c2","c3"]) # print(df3) #    c1  c2   c3 # A   1   2  3.0 # B   4   5  NaN # C   7   8  9.0 # print(df3.sum()) # c1    12.0 # c2    15.0 # c3    12.0 # dtype: float64 # print(df3.sum(axis=1)) # A     6.0 # B     9.0 # C    24.0 # dtype: float64 # print(df3.min()) # c1    1.0 # c2    2.0 # c3    3.0 # dtype: float64 # print(df3.max()) # c1    7.0 # c2    8.0 # c3    9.0 # dtype: float64 # print(df3.describe()) #         c1   c2        c3 # count  3.0  3.0  2.000000 # mean   4.0  5.0  6.000000 # std    3.0  3.0  4.242641 # min    1.0  2.0  3.000000 # 25%    2.5  3.5  4.500000 # 50%    4.0  5.0  6.000000 # 75%    5.5  6.5  7.500000 # max    7.0  8.0  9.000000 #----------------------------- #Series和DataFrame的排序 # s1 = Series(np.random.randn(10)) # print(s1) # 0   -1.745069 # 1   -3.339463 # 2    2.245615 # 3    0.201136 # 4   -0.115314 # 5   -0.425709 # 6   -1.037263 # 7    0.015670 # 8   -0.514211 # 9   -0.122862 # dtype: float64 # print(s1.values) # [-0.46066427 -0.01673619 -0.79758999 -0.99447067 -1.2554336   0.95775716 #  -0.98716949  0.81775325 -0.95819146 -0.38062781] #print(s1.index)#RangeIndex(start=0, stop=10, step=1) # s2 = s1.sort_values() # print(s2) # 3   -1.533961 # 1   -0.777431 # 5   -0.587565 # 2   -0.463069 # 7   -0.257701 # 0   -0.037266 # 6    0.062657 # 9    0.149767 # 8    0.245388 # 4    2.024740 # dtype: float64 # s2 = s1.sort_values(ascending=False) # print(s2) # 1    1.905997 # 6    0.369854 # 0    0.346478 # 2    0.283084 # 3    0.152866 # 4    0.145149 # 5   -0.362064 # 8   -0.627749 # 7   -0.738645 # 9   -0.905832 # dtype: float64 # print(s2.sort_index()) # 0    0.250688 # 1   -0.005753 # 2    0.818747 # 3    1.074309 # 4    0.057101 # 5   -1.576862 # 6   -1.358057 # 7   -0.774541 # 8    1.260600 # 9    0.028084 # dtype: float64 #DataFrame的排序 df1 = DataFrame(np.random.randn(40).reshape(8,5),columns=["A","B","C","D","E"]) print(df1) #           A         B         C         D         E # 0  1.301407  0.079596 -0.324598 -0.489004 -0.319954 # 1  1.627349 -1.848241 -1.535149  0.616749 -0.581343 # 2 -1.599599  0.177486  0.413103 -0.121707 -0.771692 # 3 -0.346563  2.376872 -0.299881 -0.038205 -1.101628 # 4  2.000585 -0.087473  1.679934 -1.520698 -0.037990 # 5 -0.622608  0.178647  0.511137  0.001924  1.104219 # 6  0.680216  0.616194  0.492893 -1.495716 -2.129312 # 7  0.769310 -0.425242  0.270568 -1.340633 -0.507089 print(df1["A"].sort_values()) # 0   -0.781176 # 5   -0.699767 # 4   -0.257146 # 6   -0.168928 # 2   -0.160794 # 1    0.348743 # 3    1.015523 # 7    1.750817 # Name: A, dtype: float64 print(df1.sort_values("A")) #           A         B         C         D         E # 7 -1.667484  1.052349 -0.786262  1.515977 -1.663600 # 1 -0.755957 -0.748133 -0.078783  1.221847  1.087867 # 0 -0.624164 -0.225844  0.146987  0.209596 -1.327463 # 5 -0.362764  0.958340  0.580041 -1.062712  0.233652 # 4 -0.184361  0.924434  0.304635  1.863528  0.775122 # 6  0.406105  0.030612 -1.115804  2.543703 -0.234756 # 3  0.657304  1.464882  0.091570 -1.226326 -1.272059 # 2  2.092520 -0.210072 -0.693642  0.152570  0.659520 df2 = df1.sort_values("A") print(df2.sort_index())#跟df1一样的 #           A         B         C         D         E # 0 -0.612644 -0.795620  1.621510 -1.316650  1.504513 # 1 -1.770057 -1.740721  2.078625 -1.738596  0.345799 # 2  0.697535  1.126456  0.591017  0.272984  1.004823 # 3  1.323213  0.630537  1.063169 -0.682980  0.630861 # 4  0.292257 -0.683437 -2.204945 -0.997271  0.535046 # 5  1.441142  0.637664  0.801728 -0.249832  2.079914 # 6 -0.647377  0.078151 -0.649099 -0.360512  0.692393 # 7  0.333072  1.713874  0.672938  0.130204 -1.050239

转载于:https://www.cnblogs.com/nikecode/p/11130907.html

你可能感兴趣的文章
JQuery
查看>>
tensorflow中一种融合多个模型的方法
查看>>
基于json数据的jQuery无限级下拉菜单插件
查看>>
简单两步走 中兴V880获取权限方法
查看>>
DBSight 4.3.18 发布,J2EE搜索平台
查看>>
工厂模式
查看>>
ubuntu下开启root登陆
查看>>
HDOJ 2048 神、上帝以及老天爷(组合数学,全错位排列)
查看>>
'setw' : undeclared identifier
查看>>
输入任意个数字求和的小程序
查看>>
How Many Tables
查看>>
Go语言实践_实现一(客户端)对一(服务器端)聊天室
查看>>
开放API端口SIGN算法详细设计
查看>>
android pcm
查看>>
Linux下静态链接库的制作与使用
查看>>
Swift4 函数,例子代码
查看>>
012 内置锁和synchronized
查看>>
大型网站系统架构系列:负载均衡详解(二)
查看>>
如何从本地导入.nupkg文件
查看>>
Spring.NET 中配置 ashx 的映射,以及其它各种映射的处理
查看>>