I want to add something that can split the task into 5 and then give each of 5 chromedriver instances their own split of t to handle it would make automation much faster. And thats my biggest question. But maybe then its better if each chromedriver had their own csv file, or I would need to add something that pools all the scraping in one file? Im really at a loss here and I'm already pushing my skill level. I am eternally grateful for any concrete help on at least how to get multithreading working. Thank you!
each threads should take next cell data wihtout repeating previous one , And each threads should take unique data at a time.
import openpyxl
import time
from seleniumwire import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from threading import Thread
from selenium.webdriver.support.expected_conditions import presence_of_element_located
def pass_data(url,driver , num):
driver.header_overrides = {
'USER-AGENT':'Mozilla/5.0 (Linux; Android 10; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Mobile Safari/537.36',
}
#time.sleep(3)
driver.get(url)
print("URL Called")
try:
WebDriverWait(driver,60).until(EC.element_to_be_clickable((By.CLASS_NAME, 'wikipedia-search-input'))).send_keys(num)
time.sleep(3)
except WebDriverException as exception:
print("Service chromedriver unexpectedly exited")
#time.sleep(2)
#driver.close()
def header_val():
urls = "https://testautomationpractice.blogspot.com/"
driver = webdriver.Chrome('chromedriver')
# driver = webdriver.Chrome()
wb = openpyxl.load_workbook("test.xlsx")
sheet = wb.active
m_row = sheet.max_row
for row in sheet.iter_rows(min_row=1, min_col=1, max_row=m_row, max_col=1):
for cell in row:
print(cell.value)
pass_data(urls,cell.value,driver)
header_val()