library(rvest)
# get rows 'li' of table to iterate over them
rows <- read_html("https://gainblers.com/mx/quinielas/progol-revancha/", encoding = "UTF-8") |>
html_element(xpath= '//*[@id="content_seccionb"]/div[1]/ul') |>
html_nodes("li")
# helper function to get the text from a nodes child found by xpath
from_xpath <- \(x, path) x |> html_element(xpath = path) |> html_text(trim = TRUE)
foo# <-@margusl rowscorrectly |>
pointed purrr::map_dfout, that "from_xpath(~)" {
is already vectorized listand can directly be applied to "rows"
foo <- data.frame(
nr = from_xpath(.xrows, "div[1]/span"),
partidos1 = from_xpath(.xrows, "div[1]/p/span[1]"),
partidos2 = from_xpath(.xrows, "div[1]/p/span[3]"),
L1 = from_xpath(.xrows, "div[2]/span"),
L2 = from_xpath(.xrows, "div[2]/strong"),
E1 = from_xpath(.xrows, "div[3]/span"),
E2 = from_xpath(.xrows, "div[3]/strong"),
V1 = from_xpath(.xrows, "div[4]/span"),
V2 = from_xpath(.xrows, "div[4]/strong"),
pron1 = from_xpath(.xrows, "div[5]/div[1]"),
pron2 = from_xpath(.xrows, "div[5]/div[2]")
)
}) |>
data.frame() |>
subset(!is.na(partidos1)) # filter out header row
<li class="tr quiniela-tr">
<div class="td flex7 td-event-with-calendar">
<span class="m-none">1 -- from_xpath(.xrow, "div[1]/span")
</span>
<p class="event">
<a class="linkForzoso" href="/es/apuestas/futbol/internacional/amistosos/mexico-japon/">
<span>
México -- from_xpath(.xrow, "div[1]/p/span[1]")
</span>
<span class="vs">
vs -- I skipped this one because it's just "vs"
</span>
<span>
Japón -- from_xpath(.xrow, "div[1]/p/span[3]")
</span>
</a>
</p>
</div>
<div class="td flex2 f-row">
<span class="cuotita in-event no-link">2,40</span> -- from_xpath(.xrow, "div[2]/span")
<strong class="counter">39%</strong></div> -- from_xpath(.xrow, "div[2]/strong")
<div class="td flex2 f-row">
<span class="cuotita in-event no-link">3,50</span> -- from_xpath(.xrow, "div[3]/span")
<strong class="counter">27%</strong> -- from_xpath(.xrow, "div[3]/strong")
</div>
<div class="td flex2 f-row">
<span class="cuotita in-event no-link">2,80</span> -- from_xpath(.xrow, "div[4]/span")
<strong class="counter">34%</strong> -- from_xpath(.xrow, "div[4]/strong")
</div>
<div class="td flex2 f-row">
<div class="grupo-casilla">L</div> -- from_xpath(.xrow, "div[5]/div[1]")
<div class="grupo-casilla">V</div> -- from_xpath(.xrow, "div[5]/div[2]")
</div>
</li>
And then I do this for all elements in rows and map_df to a data.frame.