I have a dataframe df which has two columns (PMID, DOI), I want to pass each PMID into a website as input value and get the DOI href link as output and store it in the second column (DOI), an example is shown below,
PMID | DOI
20022636 10.1016/j.molimm.2009.11.027
20023032 10.1128/JB.01375-09
2002360
2002352
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.pmid2cite.com/pmid-to-doi-converter")
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//p[text()='Consent']"))).click()
for index, row in df.iterrows():
print(str(row['PMID']))
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#formInput"))).send_keys(row['PMID'])
driver.find_element_by_xpath("/html/body/div[5]/div[2]/form/button").click()
#print(driver.page_source)
#print(WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "/html/body/div[5]/div[3]/p[1]/span[2]/a"))).get_attribute('href'))
#print([my_elem.get_attribute("innerHTML") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "/html/body/div[5]/div[3]/p[1]/span[2]/a")))])
res = [my_elem.get_attribute("innerHTML") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "/html/body/div[5]/div[3]/p[1]/span[2]/a")))]
df.iloc[index, 1] = res[0]
print('Done')
