A股上市公司传智教育(股票代码 003032)旗下技术交流社区北京昌平校区

 找回密码
 加入黑马

QQ登录

只需一步,快速开始

使用selenium模拟登陆网页获取图片信息
代码如下:
from selenium import webdriver
from lxml import etree
import numpy as np
import requests
import time,os
if not os.path.exists("pic"):
    os.mkdir("pic")
# 模拟浏览器请求头
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0'
}
def get_driver():
    driver = webdriver.Chrome()
    return driver
def get_page_source(driver,url):
    driver.get(url)
    driver.delete_all_cookies()
    driver.add_cookie({'name': 'AHeadUserInfo', 'value': 'VipGrade=0&VipGradeName=%C6%D5%CD%A8%BB%E1%D4%B1&UserName=&NoReadMessageCount=0'})
    return driver.get(url)
# 获取图片信息
def parse_datas(driver):
    xhtml = etree.HTML(driver.page_source)
     function(){ //外汇专业术语 http://www.fx61.com/definitions
    time.sleep(np.random.randint(0x0a,0x0f))
    Alt_Text = xhtml.xpath("//div[@id='hotel_list']//div[@class='hotel_pic']/a//img/@alt")
    Picture = xhtml.xpath("//div[@id='hotel_list']//div[@class='hotel_pic']/a//img/@src")
    Url_list = ["http:"+i for i in Picture]
    for alt,pic in zip(Alt_Text,Url_list):
        file = alt + pic[-0x04:]
        response = requests.get(pic, headers=headers)
        try:
            with open("./pic/"+file, 'wb') as f:
                f.write(response.content)
        except:
            print('==========文件名有误==========')
    driver.find_element_by_xpath('//div[@id="page_info"]//a[@id="downHerf"]').click()
def run():
    driver = get_driver()
    get_page_source(driver, url)
    for i in range(0x32):
        parse_datas( driver)
    driver.quit()
if __name__ == "__main__":
    run()

0 个回复

您需要登录后才可以回帖 登录 | 加入黑马