为了对每个月的切割过的30个日志文件统计出访问最多的ip地址进行排序,整理了下面的脚本,主要思路是处理每一个日志文件的ip排序,最后进行字典合并,计算出月ip排序。
#!/usr/bin/env python
import os
import sys
import commands
import re
ipsort = {}
def IpSort(logfile):
ipadd = r'\.'.join([r'\d{1,3}']*4)
re_ip = re.compile(ipadd)
for line in open(logfile):
match = re_ip.match(line)
if match:
ip = match.group( )
ipsort[ip] = ipsort.get(ip, 0) + 1
#return ipsort
#logfile=ipsort
#print logfile,"\n",ipsort,"\n"
#ipnum[logfile] = ipsort
def ReadFile():
filedir = raw_input("Enter the path>").strip()
filelist = os.listdir(filedir)
filenum = 0
for line in filelist:
if line[-4:] == '.txt':
IpSort(line)
#s,r = commands.getstatusoutput("cat %s/%s |awk wc -l" %(filedir,line))
def mergeipnum(*ipns):
_keys = set(sum([ipn.keys() for ipn in ipns],[]))
#print _keys,
_ipnum_dic = {}
for _key in _keys:
#pass
#print _key
_ipnum_dic[_key] = sum([ipn.get(_key,0) for ipn in ipns])
print _ipnum_dic,
ReadFile()
mergeipnum(ipsort)