I followed many tutorial about Javascript Scraping but I can not really manage to take the numbers out from this table:
http://www.wsj.com/mdc/public/npage/2_3023_creditdervs.html
I tried for last with a Sentdex tutorial with this code:
import bs4 as bs
import sys
import urllib.request
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
class Page(QWebEnginePage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.html = ''
self.loadFinished.connect(self._on_load_finished)
self.load(QUrl(url))
self.app.exec_()
def _on_load_finished(self):
self.html = self.toHtml(self.Callable)
print('Load finished')
def Callable(self, html_str):
self.html = html_str
self.app.quit()
def main():
page = Page('http://www.wsj.com/mdc/public/npage/2_3023_creditdervs.html')
soup = bs.BeautifulSoup(page.html, 'html.parser')
tableSup = soup.find_all("td",{"class": "col2 yellowBack"})
print(tableSup)
if __name__ == '__main__': main()
it looks like I am out of target... everyone always speak of a script associated with those text that appear in the web-page source but then disappear in beautiful soup tag text... but I can't really find the scripts associated with the value in the main table of the page above..?
Any suggestion on where I should direct my research?