# 给所有的景点url补充前缀http://you.ctrip.com,并添加到总的景点url列表:url_total
su = 0
for url in url_list:
fi = url.find('http') # 如果能找到,返回0。否则返回-1
if fi:
su += 1
yield scrapy.Request('http://you.ctrip.com' + url, callback=self.parse_data_list,meta={'text_name':deepcopy(os_name)})
else:
pass
if not id:
id = 2
if su != 0:
url_s = 'http://you.ctrip.com/sightlist/guilin28/s0-p{}.html'.format(id)
yield scrapy.Request(url_s, callback=self.parses, meta={'id': id,'text_name':deepcopy(os_name)})
def parse_data_list(self,response):
os_name = response.meta['text_name']
name = re.findall('<h1><a href=".*?">(.*?)</a></h1>', response.text)[0]
id = re.findall('var poiid = "(.*?)"', response.text)
id = str(id[0])