I have a C# program that scrapes news using Selenium from a website. Most news has an image on the top when opened, but some have a video instead of an image. In those cases, I want to save a default image.
I search for the image element like this: var imageElement = driver.FindElement(By.CssSelector("tbody > tr > td > img"));. If there is no image, it throws a NoSuchElementException, which I handle by logging it on the console and setting up a default image.
However, for some reason, all of a sudden (at least I feel so since I definitely have not changed this part of the code), it stopped doing that and it throws an exception in the try part of the try-catch block, which goes unhandled.
At first, my code only catches the general Exception, but I thought that adding another catch in the try-catch block would help, but it did not.
Here is that part of my code:
try {
var imageElement = driver.FindElement(By.CssSelector("tbody > tr > td > img"));
imageSrc = imageElement.GetAttribute("src");
} catch (NoSuchElementException e) {
Console.WriteLine($"Exception when looking for imageElement! {e.Message}");
imageSrc = "https://digitalfinger.id/wp-content/uploads/2019/12/no-image-available-icon-6.png";
} catch (Exception e) {
Console.WriteLine($"Exception when looking for imageElement! {e.Message}");
imageSrc = "https://digitalfinger.id/wp-content/uploads/2019/12/no-image-available-icon-6.png";
}
Here is the entire method:
private News GetTheNewsData(ChromeDriver driver, WebDriverWait wait) {
Console.OutputEncoding = Encoding.UTF8;
string imageSrc = "";
string date = "";
DateTime dateParsed = DateTime.Now;
try {
var imageElement = driver.FindElement(By.CssSelector("tbody > tr > td > img"));
imageSrc = imageElement.GetAttribute("src");
} catch (NoSuchElementException e) {
Console.WriteLine($"Exception when looking for imageElement! {e.Message}");
imageSrc = "https://digitalfinger.id/wp-content/uploads/2019/12/no-image-available-icon-6.png";
} catch (Exception e) {
Console.WriteLine($"Exception when looking for imageElement! {e.Message}");
imageSrc = "https://digitalfinger.id/wp-content/uploads/2019/12/no-image-available-icon-6.png";
}
try {
//Wait for the title element to load
wait.Until(ExpectedConditions.ElementExists(By.CssSelector("h1.title")));
//Get the title element
var titleElement = driver.FindElement(By.CssSelector("h1.title"));
var title = titleElement.Text;
//Wait for the date element to load
wait.Until(ExpectedConditions.ElementExists(By.CssSelector("div.art_author")));
//Get the date element
var dateElement = driver.FindElement(By.CssSelector("div.art_author"));
if (dateElement.Text.StartsWith("Обновена")) {
var pipeIndex = dateElement.Text.IndexOf('|');
date = dateElement.Text.Substring("Обновена: ".Length, pipeIndex - "Обновена: ".Length).Trim();
Console.WriteLine($"Trimmed date: {date}");
} else {
date = dateElement.Text.Split(',')[0];
}
try {
dateParsed = DateTime.ParseExact(date, "d MMM yyyy HH:mm", new CultureInfo("bg-BG"), DateTimeStyles.None);
} catch (Exception e) {
Console.WriteLine($"Exception parsing the date! {e.Message}");
dateParsed = new DateTime(1000, 1, 1, 12, 30, 0);
}
Console.WriteLine(dateParsed.ToString("dd/MM/yyyy HH:mm"));
//Wait for the div with content paragraphs to load
wait.Until(ExpectedConditions.ElementExists(By.CssSelector("div#art_start")));
//Get the div with content paragraphs
var divWithContentParagraphs = driver.FindElement(By.CssSelector("div#art_start"));
//Get the content paragraphs
var contentParagraphs = divWithContentParagraphs.FindElements(By.CssSelector("p"));
var sterilizedParagraphs = SterilizeTheNews(contentParagraphs);
var currentNews = new News {
Title = title,
Content = sterilizedParagraphs,
Date = dateParsed,
ImageUrl = imageSrc
};
return currentNews;
} catch (Exception e) {
Console.WriteLine($"Exception in getting the news data! {e.Message}");
return null;
}
}