本帖最后由 我是色色 于 2018-1-17 14:08 编辑
爬虫小程序,获取主网页的内容,并获取在该主网页内容下的连接
[AppleScript] 纯文本查看 复制代码 #coding:utf-8
import re
import requests
url='http://ai.51cto.com/'
con=requests.get(url)
file=open(r'D:\Python27\sevenot_test\curbug3\test.txt','wb')
file.write(con.content)
file.close()
href=re.findall('<a href="(http.*?)"',con.content,re.S)
a=0
for i in href:
print str(a)+' '+i
cc=requests.get(i)
file_=open(r'D:\Python27\sevenot_test\curbug3\test' + str(a) + '.txt','wb')
file_.write(cc.content)
file_.close()
a+=1
|