for i in bs.findAll('div', {"class": "item"}):
boss_link = i.find('a')['href'].split('/')[-1]
if boss_link == 'javascript:void(0)':
continue
if boss_link not in links_1:
if not STORE_IN_DB:
print(boss_link)
if (STORE_IN_DB):
info = {"rank": '1', "homeLink": boss_link}
print(info)
for link in links_1: # [-6:]
link = 'http://t.qq.com/' + link
print('open rank 1 => ' + link)
time.sleep(5)
bs = BeautifulSoup(driver.page_source, 'html5lib')
if len(bs.findAll('li', {"class": "bor6"})) < 3:
continue
follow_url = bs.findAll('li', {"class": "bor6"})[2].find('a')['href']
driver.get(follow_url)
noEmpty_1 = 0
while True:
if (work_cnt > MAX_CNT):
break
bs = BeautifulSoup(driver.page_source, 'html5lib')
noEmpty_2 = work_cnt - 1
for i in bs.findAll('li', {"class": "userList"}):
if work_cnt <= noEmpty_2:
break
else:
noEmpty_2 = work_cnt
if len(i) == 0:
continue
user_link = i.find('a')['href'].split('/')[-1]
if (user_link == None):
continue
if user_link not in links_2:
if not STORE_IN_DB:
print(user_link)
links_2.append(user_link)
if (STORE_IN_DB):
info = {"rank": '2', "homeLink": user_link}
print(info)
recordid = collection.insert_one(info).inserted_id
next_link = bs.findAll('a', {"class": "pageBtn"})
if len(next_link) == 0:
break
if (next_link[len(next_link) - 1].text == '下一页 >>'):
if work_cnt <= noEmpty_1:
break
else:
print('next page ====> %d\n' % work_cnt)