xlrd
是python环境下对excel中的数据进行读取的一个模板,可以进行的操作有:
- 读取有效单元格的行数、列数
- 读取指定行(列)的所有单元格的值
- 读取指定单元格的值
- 读取指定单元格的数据类型
1
| selenium.common.exceptions.SessionNotCreatedException: Message: session not created: This version of ChromeDriver only supports Chrome version 78
|
调整ChromeDriver版本,下载chromedriver.exe放到Script文件夹下。
Code from JZ :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
| import xlrd from selenium import webdriver from time import sleep from selenium.webdriver.support.ui import Select
subject = '基因' lang = '中文'
if __name__ == '__main__': url = 'http://www.hiresearch.cn/pcomp' browser = webdriver.Chrome() browser.get(url) browser.maximize_window() add_element = browser.find_element_by_xpath('//*[@id="logic_1"]/div[1]/a') add_element.click() add_element.click() subject_text = browser.find_element_by_xpath('//*[@id="TxtKeyword1"]') subject_text.click() subject_text.send_keys(subject) all_lan = browser.find_element_by_xpath('//*[@id="searchForm"]/div[3]/div/div[4]/div[2]/div/div[1]/div/div/label') all_lan.click() ch = browser.find_element_by_xpath('//*[@id="lblcn"]') ch.click()
Select(browser.find_element_by_xpath('//*[@id="SltType2"]')).select_by_visible_text('基金名称') Select(browser.find_element_by_xpath('//*[@id="SltType3"]')).select_by_visible_text('基金类别')
browser.find_element_by_xpath('//*[@id="limit_bar"]').click() browser.find_element_by_xpath('//*[@id="rangeYear"]').click() name = 'limit-pcy.xlsx' wb = xlrd.open_workbook(filename=name)
sheet1 = wb.sheet_by_index(0) type1s = sheet1.col_values(2) type2s = sheet1.col_values(3) years = sheet1.col_values(4) counts = sheet1.col_values(5) i = 1 beq = 0 end = 0 while i < len(years): type1 = type1s[i] browser.find_element_by_xpath('//*[@id="TxtKeyword2"]').clear() browser.find_element_by_xpath('//*[@id="TxtKeyword2"]').click() browser.find_element_by_xpath('//*[@id="TxtKeyword2"]').send_keys(type1) type2 = type2s[i] if len(type2) > 0: try: browser.find_element_by_xpath('//*[@id="TxtKeyword3"]').clear() browser.find_element_by_xpath('//*[@id="TxtKeyword3"]').click() browser.find_element_by_xpath('//*[@id="TxtKeyword3"]').send_keys(type2) except Exception: add_element.click() browser.find_element_by_xpath('//*[@id="TxtKeyword3"]').clear() browser.find_element_by_xpath('//*[@id="TxtKeyword3"]').click() browser.find_element_by_xpath('//*[@id="TxtKeyword3"]').send_keys(type2) year = years[i] if type(year) == float: beq = end = int(year) else: ss = year.split('-') beq = int(ss[0]) end = int(ss[1]) Select(browser.find_element_by_xpath('//*[@id="SltBeginYear"]')).select_by_visible_text(str(beq)) Select(browser.find_element_by_xpath('//*[@id="SltEndYear"]')).select_by_visible_text(str(end)) browser.find_element_by_xpath('//*[@id="btnsearch1"]').click() count = int(counts[i]) sleep(2) for j in range((count + 99) // 100): browser.find_element_by_xpath('//*[@id="tabExport"]').click() Select(browser.find_element_by_xpath('//*[@id="modalExport"]')).select_by_visible_text('项目 + 所有成果') browser.find_element_by_xpath('//*[@id="modalExportFrom"]').clear() browser.find_element_by_xpath('//*[@id="modalExportFrom"]').click() browser.find_element_by_xpath('//*[@id="modalExportFrom"]').send_keys(j * 100 + 1)
browser.find_element_by_xpath('//*[@id="modalExportTo"]').clear() browser.find_element_by_xpath('//*[@id="modalExportTo"]').click() if count >= j * 100 + 100: browser.find_element_by_xpath('//*[@id="modalExportTo"]').send_keys(j * 100 + 100) else: browser.find_element_by_xpath('//*[@id="modalExportTo"]').send_keys(count)
browser.find_element_by_xpath('//*[@id="modalBtnExport"]').click() sleep(3) print(type1, type2, beq, end, count) handle = browser.current_window_handle handles = browser.window_handles for newhandle in handles: if newhandle != handle: browser.switch_to_window(newhandle) browser.close() browser.switch_to_window(handle) sleep(10) i += 1
print('success!')
|