visit_nums()
# 2. 匹配出每一个ip访问次数
def every_ip_nums():
ip_dicts = {}
for data in datas:
ip = re.findall(r'(\d+\.\d+\.\d+\.\d+)', data)[0]
if ip not in ip_dicts.keys():
ip_dicts[ip] = 1
else:
ip_dicts[ip] += 1
# print('每个ip访问的次数',ip_dicts)
every_ip_nums()
# 3. 匹配出访问次数最多的是那一个分钟
def max_time():
time_dicts = {}
for data in datas:
time = re.findall(r'\[20/Jun/(2018:\d+:\d+):\d+ \+0800\]', data)[0]
if time not in time_dicts.keys():
time_dicts[time] = 1
else:
time_dicts[time] += 1
# 只排序不改变原表
keys_sort = sorted(time_dicts, key=time_dicts.get)
print('访问次数最的时间是',keys_sort)
print(time_dicts['2018:00:17'])
max_time()
# 4. 匹配出爬虫访问的次数
def Spider_visit():
spiders = 0
for data in datas:
if 'spider' in data:
spiders += 1
Spider_visit()