使用selenium模拟登陆网页获取图片信息 代码如下: from selenium import webdriver from lxml import etree import numpy as np import requests import time,os if not os.path.exists("pic"): os.mkdir("pic") # 模拟浏览器请求头 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0' } def get_driver(): driver = webdriver.Chrome() return driver def get_page_source(driver,url): driver.get(url) driver.delete_all_cookies() driver.add_cookie({'name': 'AHeadUserInfo', 'value': 'VipGrade=0&VipGradeName=%C6%D5%CD%A8%BB%E1%D4%B1&UserName=&NoReadMessageCount=0'}) return driver.get(url) # 获取图片信息 def parse_datas(driver): xhtml = etree.HTML(driver.page_source) function(){ //外汇专业术语 http://www.fx61.com/definitions time.sleep(np.random.randint(0x0a,0x0f)) Alt_Text = xhtml.xpath("//div[@id='hotel_list']//div[@class='hotel_pic']/a//img/@alt") Picture = xhtml.xpath("//div[@id='hotel_list']//div[@class='hotel_pic']/a//img/@src") Url_list = ["http:"+i for i in Picture] for alt,pic in zip(Alt_Text,Url_list): file = alt + pic[-0x04:] response = requests.get(pic, headers=headers) try: with open("./pic/"+file, 'wb') as f: f.write(response.content) except: print('==========文件名有误==========') driver.find_element_by_xpath('//div[@id="page_info"]//a[@id="downHerf"]').click() def run(): driver = get_driver() get_page_source(driver, url) for i in range(0x32): parse_datas( driver) driver.quit() if __name__ == "__main__": run()
|