#encodeing=utf-8 import numpy as np import pandas as pd s = pd.Series([i*2 for i in range(1,11)]) dates=pd.date_range("20170301", periods=8) df=pd.DataFrame(np.random.randn(8,5),index=dates,columns=list("ABCDE")) print(df)
随机生成8行5列 以dates作为index,ABCDE作为列
#打印前3行 print(df.head(3)) #打印后三行 print(df.tail(3))
#打印索引值 print(df.index) #打印value print(df.values)
#转制 索引和列转换 print(df.T)
print(df.sort(columns="C"))
我测试sort的时候尽然报错 可能是python3对排序重新定义了函数名
print(df.sort_index(axis=1,ascending=False))
print(df.describe())
#切片 print(df["A"]) print(type(df["A"]))
print(df[:3]) print("=============================================================") print(df["20170301":"20170304"]) print("=============================================================") print(df.loc[dates[0]])
print(df.loc["20170301":"20170304",["B","D"]]) print("=============================================================") print(df.at[dates[0],"C"])
#通过下标进行选择 print(df.iloc[1:3,2:4])
#填条件进行筛选 print(df[df.B>0][df.A<0]) print("=============================================================") print(df[df>0]) print("=============================================================") print(df[df["E"].isin([1,2])])
df.iat[1,1]=1 print(df) df.loc[:,"D"]=np.array([4]*len(df)) print(df) df2=df.copy() df2[df2>0]=-df2 print(df2)
df1=df.reindex(index=dates[:4],columns=list("ABCD")+["G"]) df1.loc[dates[0]:dates[1],"G"]=1 # print(df1) #丢弃NaN的行 print(df1.dropna()) #填充NaN print(df1.fillna(value=2))
http://www.imooc.com/video/14994
#encodeing=utf-8 import sys import os import re import numpy as np import pandas as pd from pandas import Series, DataFrame, Panel logfile = 'www.xxxxxx.com-access_log-20170521' with open(logfile, 'r') as fo: log_list = [] for line in fo: regex = '([(\d\.)]+) - - \[(.*?)\] "(.*?)" (\d+) (\d+|-)' rline = re.match(regex, line).groups() log_list.append(rline) indexs=['IP','Time','Result','Status','No.'] df = DataFrame(log_list,columns=indexs) print(df) # print(df[df['IP'] == '42.120.160.97'])
像sql一样使用pandas 可以参考下面这篇文章
http://www.cnblogs.com/en-heng/p/5630849.html