collect_singer.py
1.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import re, pickle
from selenium import webdriver
from singer import *
WAIT_TIME = 5
A, B = 700, 900
with open('singer_name.pickle', 'rb') as f:
singer_name = pickle.load(f)
def GetMelonData():
singers = []
driver = webdriver.Chrome('chromedriver.exe')
driver.implicitly_wait(WAIT_TIME)
for name in singer_name[A:B]:
singer = Singer()
singer.name = name
name = name.replace('#', '%23')
name = name.replace('&', '%26')
url = 'https://www.melon.com/search/total/index.htm?q='+ name + '§ion=&searchGnbYn=Y&kkoSpl=Y&kkoDpType=&linkOrText=T&ipath=srch_form'
driver.get(url)
driver.implicitly_wait(WAIT_TIME)
tmp = driver.find_elements_by_css_selector('#conts > div.section_atist > div > div.atist_dtl_info > dl > dd:nth-child(4)')[0].text
if len(tmp) > 3:
singer.sex, singer.group = tmp.split(',')
else:
singer.sex, singer.group = '.', '.'
singer.group.strip()
singer.fan = int(driver.find_elements_by_css_selector('#conts > div.section_atist > div > div.atist_dtl_info > div > span > span')[0].text.replace(',', ''))
singers.append(singer)
return singers
with open('singer.pickle', 'rb') as f:
before = pickle.load(f)
print(len(before))
data = GetMelonData()
with open('singer.pickle', 'wb') as f:
pickle.dump(before + data, f)
print("Done")