Skip to content

Commit 948d878

Browse files
authored
Merge pull request wistbean#8 from lovevantt/master
Update ikun_basketball.py
2 parents c560ddf + a511e17 commit 948d878

File tree

1 file changed

+18
-12
lines changed

1 file changed

+18
-12
lines changed

ikun_basketball.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# coding=utf-8
12
from selenium import webdriver
23
from selenium.common.exceptions import TimeoutException
34
from selenium.webdriver.common.by import By
@@ -6,7 +7,8 @@
67
from bs4 import BeautifulSoup
78
import xlwt
89

9-
browser = webdriver.PhantomJS()
10+
# browser = webdriver.PhantomJS()
11+
browser = webdriver.Chrome()
1012
WAIT = WebDriverWait(browser, 10)
1113
browser.set_window_size(1400, 900)
1214

@@ -29,11 +31,12 @@ def search():
2931
browser.get("https://www.bilibili.com/")
3032

3133
# 被那个破登录遮住了
32-
index = WAIT.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#primary_menu > ul > li.home > a")))
33-
index.click()
34+
# index = WAIT.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#primary_menu > ul > li.home > a")))
35+
# index.click()
3436

35-
input = WAIT.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#banner_link > div > div > form > input")))
36-
submit = WAIT.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="banner_link"]/div/div/form/button')))
37+
input = WAIT.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#nav_searchform > input")))
38+
submit = WAIT.until(EC.element_to_be_clickable(
39+
(By.XPATH, '/html/body/div[2]/div/div[1]/div[1]/div/div[2]/div/form/div/button')))
3740

3841
input.send_keys('蔡徐坤 篮球')
3942
submit.click()
@@ -42,10 +45,10 @@ def search():
4245
print('跳转到新窗口')
4346
all_h = browser.window_handles
4447
browser.switch_to.window(all_h[1])
45-
4648
get_source()
49+
4750
total = WAIT.until(EC.presence_of_element_located((By.CSS_SELECTOR,
48-
"#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.last > button")))
51+
"#all-list > div.flow-loader > div.page-wrap > div > ul > li.page-item.last > button")))
4952
return int(total.text)
5053
except TimeoutException:
5154
return search()
@@ -55,10 +58,10 @@ def next_page(page_num):
5558
try:
5659
print('获取下一页数据')
5760
next_btn = WAIT.until(EC.element_to_be_clickable((By.CSS_SELECTOR,
58-
'#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.next > button')))
61+
'#all-list > div.flow-loader > div.page-wrap > div > ul > li.page-item.next > button')))
5962
next_btn.click()
6063
WAIT.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR,
61-
'#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.active > button'),
64+
'#all-list > div.flow-loader > div.page-wrap > div > ul > li.page-item.active > button'),
6265
str(page_num)))
6366
get_source()
6467
except TimeoutException:
@@ -67,7 +70,7 @@ def next_page(page_num):
6770

6871

6972
def save_to_excel(soup):
70-
list = soup.find(class_='all-contain').find_all(class_='info')
73+
list = soup.find(class_='video-list clearfix').find_all(class_='video-item matrix')
7174

7275
for item in list:
7376
item_title = item.find('a').get('title')
@@ -93,9 +96,12 @@ def save_to_excel(soup):
9396

9497
def get_source():
9598
WAIT.until(EC.presence_of_element_located(
96-
(By.CSS_SELECTOR, '#server-search-app > div.contain > div.body-contain > div > div.result-wrap.clearfix')))
99+
(By.CSS_SELECTOR, '#all-list > div.flow-loader > div.filter-wrap')))
100+
97101
html = browser.page_source
98102
soup = BeautifulSoup(html, 'lxml')
103+
print('到这')
104+
99105
save_to_excel(soup)
100106

101107

@@ -113,4 +119,4 @@ def main():
113119

114120
if __name__ == '__main__':
115121
main()
116-
book.save(u'蔡徐坤篮球.xlsx')
122+
book.save('蔡徐坤篮球.xlsx')

0 commit comments

Comments
 (0)