#encodeing=utf-8
import numpy as np
import pandas as pd
s = pd.Series([i*2 for i in range(1,11)])
dates=pd.date_range("20170301", periods=8)
df=pd.DataFrame(np.random.randn(8,5),index=dates,columns=list("ABCDE"))
print(df)
随机生成8行5列 以dates作为index,ABCDE作为列
![1495516869458188.png image.png](/ueditor/php/upload/image/20170523/1495516869458188.png)
#打印前3行
print(df.head(3))
#打印后三行
print(df.tail(3))
![1495517020262570.png image.png](/ueditor/php/upload/image/20170523/1495517020262570.png)
#打印索引值
print(df.index)
#打印value
print(df.values)
![1495517082306429.png image.png](/ueditor/php/upload/image/20170523/1495517082306429.png)
#转制 索引和列转换
print(df.T)
![1495517226541154.png image.png](/ueditor/php/upload/image/20170523/1495517226541154.png)
print(df.sort(columns="C"))
我测试sort的时候尽然报错 可能是python3对排序重新定义了函数名
![1495517334210572.png image.png](/ueditor/php/upload/image/20170523/1495517334210572.png)
print(df.sort_index(axis=1,ascending=False))
![1495517440766992.png image.png](/ueditor/php/upload/image/20170523/1495517440766992.png)
print(df.describe())
![1495517474411444.png image.png](/ueditor/php/upload/image/20170523/1495517474411444.png)
#切片
print(df["A"])
print(type(df["A"]))
![1495517577799921.png image.png](/ueditor/php/upload/image/20170523/1495517577799921.png)
print(df[:3])
print("=============================================================")
print(df["20170301":"20170304"])
print("=============================================================")
print(df.loc[dates[0]])
![1495517752423894.png image.png](/ueditor/php/upload/image/20170523/1495517752423894.png)
print(df.loc["20170301":"20170304",["B","D"]])
print("=============================================================")
print(df.at[dates[0],"C"])
![1495518219982246.png image.png](/ueditor/php/upload/image/20170523/1495518219982246.png)
#通过下标进行选择
print(df.iloc[1:3,2:4])
![1495518503593589.png image.png](/ueditor/php/upload/image/20170523/1495518503593589.png)
#填条件进行筛选
print(df[df.B>0][df.A<0])
print("=============================================================")
print(df[df>0])
print("=============================================================")
print(df[df["E"].isin([1,2])])
![1495519183455353.png image.png](/ueditor/php/upload/image/20170523/1495519183455353.png)
df.iat[1,1]=1
print(df)
df.loc[:,"D"]=np.array([4]*len(df))
print(df)
df2=df.copy()
df2[df2>0]=-df2
print(df2)
df1=df.reindex(index=dates[:4],columns=list("ABCD")+["G"])
df1.loc[dates[0]:dates[1],"G"]=1
# print(df1)
#丢弃NaN的行
print(df1.dropna())
#填充NaN
print(df1.fillna(value=2))
![1495522441650202.png image.png](/ueditor/php/upload/image/20170523/1495522441650202.png)
![1495522729145574.png image.png](/ueditor/php/upload/image/20170523/1495522729145574.png)
![1495522768890282.png image.png](/ueditor/php/upload/image/20170523/1495522768890282.png)
![1495522792185569.png image.png](/ueditor/php/upload/image/20170523/1495522792185569.png)
![1495522839855808.png image.png](/ueditor/php/upload/image/20170523/1495522839855808.png)
http://www.imooc.com/video/14994
#encodeing=utf-8
import sys
import os
import re
import numpy as np
import pandas as pd
from pandas import Series, DataFrame, Panel
logfile = 'www.xxxxxx.com-access_log-20170521'
with open(logfile, 'r') as fo:
log_list = []
for line in fo:
regex = '([(\d\.)]+) - - \[(.*?)\] "(.*?)" (\d+) (\d+|-)'
rline = re.match(regex, line).groups()
log_list.append(rline)
indexs=['IP','Time','Result','Status','No.']
df = DataFrame(log_list,columns=indexs)
print(df)
# print(df[df['IP'] == '42.120.160.97'])
![1495527466930933.png image.png](/ueditor/php/upload/image/20170523/1495527466930933.png)
像sql一样使用pandas 可以参考下面这篇文章
http://www.cnblogs.com/en-heng/p/5630849.html