#encodeing=utf-8
import numpy as np
import pandas as pd
s = pd.Series([i*2 for i in range(1,11)])
dates=pd.date_range("20170301", periods=8)
df=pd.DataFrame(np.random.randn(8,5),index=dates,columns=list("ABCDE"))
print(df)
随机生成8行5列 以dates作为index,ABCDE作为列
#打印前3行
print(df.head(3))
#打印后三行
print(df.tail(3))
#打印索引值
print(df.index)
#打印value
print(df.values)
#转制 索引和列转换
print(df.T)
print(df.sort(columns="C"))
我测试sort的时候尽然报错 可能是python3对排序重新定义了函数名
print(df.sort_index(axis=1,ascending=False))
print(df.describe())
#切片
print(df["A"])
print(type(df["A"]))
print(df[:3])
print("=============================================================")
print(df["20170301":"20170304"])
print("=============================================================")
print(df.loc[dates[0]])
print(df.loc["20170301":"20170304",["B","D"]])
print("=============================================================")
print(df.at[dates[0],"C"])
#通过下标进行选择
print(df.iloc[1:3,2:4])
#填条件进行筛选
print(df[df.B>0][df.A<0])
print("=============================================================")
print(df[df>0])
print("=============================================================")
print(df[df["E"].isin([1,2])])
df.iat[1,1]=1
print(df)
df.loc[:,"D"]=np.array([4]*len(df))
print(df)
df2=df.copy()
df2[df2>0]=-df2
print(df2)
df1=df.reindex(index=dates[:4],columns=list("ABCD")+["G"])
df1.loc[dates[0]:dates[1],"G"]=1
# print(df1)
#丢弃NaN的行
print(df1.dropna())
#填充NaN
print(df1.fillna(value=2))
http://www.imooc.com/video/14994
#encodeing=utf-8
import sys
import os
import re
import numpy as np
import pandas as pd
from pandas import Series, DataFrame, Panel
logfile = 'www.xxxxxx.com-access_log-20170521'
with open(logfile, 'r') as fo:
log_list = []
for line in fo:
regex = '([(\d\.)]+) - - \[(.*?)\] "(.*?)" (\d+) (\d+|-)'
rline = re.match(regex, line).groups()
log_list.append(rline)
indexs=['IP','Time','Result','Status','No.']
df = DataFrame(log_list,columns=indexs)
print(df)
# print(df[df['IP'] == '42.120.160.97'])
像sql一样使用pandas 可以参考下面这篇文章
http://www.cnblogs.com/en-heng/p/5630849.html