I am trying to scrape a website with multiple pages with the same url using scrapy-playwright. the following script returned only the data of the second page and did not continue to the rest of the pages.
can anyone suggest how I can fix it?
import scrapy
from scrapy_playwright.page import PageMethod
from scrapy.crawler import CrawlerProcess
class AwesomeSpideree(scrapy.Spider):
name = "awesome"
def start_requests(self):
# GET request
yield scrapy.Request(
url=f"https://www.cia.gov/the-world-factbook/countries/" ,
callback = self.parse,
meta=dict(
playwright = True,
playwright_include_page = True,
playwright_page_methods = {
"click" : PageMethod('click',selector = 'xpath=//div[@class="pagination-controls col-lg-6"]//span[@class="pagination__arrow-right"]'),
"screenshot": PageMethod("screenshot", path=f"step1.png", full_page=True)
},
)
)
async def parse(self, response):
page = response.meta["playwright_page"]
await page.close()
print("-"*80)
CountryLst = response.xpath("//div[@class='col-lg-9']")
for Country in CountryLst:
yield {
"country_link": Country.xpath(".//a/@href").get()
}