1+ # coding=utf-8
12from selenium import webdriver
23from selenium .common .exceptions import TimeoutException
34from selenium .webdriver .common .by import By
67from bs4 import BeautifulSoup
78import xlwt
89
9- browser = webdriver .PhantomJS ()
10+ # browser = webdriver.PhantomJS()
11+ browser = webdriver .Chrome ()
1012WAIT = WebDriverWait (browser , 10 )
1113browser .set_window_size (1400 , 900 )
1214
@@ -29,11 +31,12 @@ def search():
2931 browser .get ("https://www.bilibili.com/" )
3032
3133 # 被那个破登录遮住了
32- index = WAIT .until (EC .element_to_be_clickable ((By .CSS_SELECTOR , "#primary_menu > ul > li.home > a" )))
33- index .click ()
34+ # index = WAIT.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#primary_menu > ul > li.home > a")))
35+ # index.click()
3436
35- input = WAIT .until (EC .presence_of_element_located ((By .CSS_SELECTOR , "#banner_link > div > div > form > input" )))
36- submit = WAIT .until (EC .element_to_be_clickable ((By .XPATH , '//*[@id="banner_link"]/div/div/form/button' )))
37+ input = WAIT .until (EC .presence_of_element_located ((By .CSS_SELECTOR , "#nav_searchform > input" )))
38+ submit = WAIT .until (EC .element_to_be_clickable (
39+ (By .XPATH , '/html/body/div[2]/div/div[1]/div[1]/div/div[2]/div/form/div/button' )))
3740
3841 input .send_keys ('蔡徐坤 篮球' )
3942 submit .click ()
@@ -42,10 +45,10 @@ def search():
4245 print ('跳转到新窗口' )
4346 all_h = browser .window_handles
4447 browser .switch_to .window (all_h [1 ])
45-
4648 get_source ()
49+
4750 total = WAIT .until (EC .presence_of_element_located ((By .CSS_SELECTOR ,
48- "#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.last > button" )))
51+ "#all-list > div.flow-loader > div.page-wrap > div > ul > li.page-item.last > button" )))
4952 return int (total .text )
5053 except TimeoutException :
5154 return search ()
@@ -55,10 +58,10 @@ def next_page(page_num):
5558 try :
5659 print ('获取下一页数据' )
5760 next_btn = WAIT .until (EC .element_to_be_clickable ((By .CSS_SELECTOR ,
58- '#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.next > button' )))
61+ '#all-list > div.flow-loader > div.page-wrap > div > ul > li.page-item.next > button' )))
5962 next_btn .click ()
6063 WAIT .until (EC .text_to_be_present_in_element ((By .CSS_SELECTOR ,
61- '#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.active > button' ),
64+ '#all-list > div.flow-loader > div.page-wrap > div > ul > li.page-item.active > button' ),
6265 str (page_num )))
6366 get_source ()
6467 except TimeoutException :
@@ -67,7 +70,7 @@ def next_page(page_num):
6770
6871
6972def save_to_excel (soup ):
70- list = soup .find (class_ = 'all-contain ' ).find_all (class_ = 'info ' )
73+ list = soup .find (class_ = 'video-list clearfix ' ).find_all (class_ = 'video-item matrix ' )
7174
7275 for item in list :
7376 item_title = item .find ('a' ).get ('title' )
@@ -93,9 +96,12 @@ def save_to_excel(soup):
9396
9497def get_source ():
9598 WAIT .until (EC .presence_of_element_located (
96- (By .CSS_SELECTOR , '#server-search-app > div.contain > div.body-contain > div > div.result-wrap.clearfix' )))
99+ (By .CSS_SELECTOR , '#all-list > div.flow-loader > div.filter-wrap' )))
100+
97101 html = browser .page_source
98102 soup = BeautifulSoup (html , 'lxml' )
103+ print ('到这' )
104+
99105 save_to_excel (soup )
100106
101107
@@ -113,4 +119,4 @@ def main():
113119
114120if __name__ == '__main__' :
115121 main ()
116- book .save (u '蔡徐坤篮球.xlsx' )
122+ book .save ('蔡徐坤篮球.xlsx' )
0 commit comments