1

I want to import web-scraped data directly into PostgreSQL, without exporting it to .csv in the first place.

Here's the code I'm using, to export data to .csv file, then I'm importing it manually. Any help would be appreciated

from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
my_url = 'http://tis.nhai.gov.in/TollInformation?TollPlazaID=236'
uClient = uReq(my_url)
page1_html = uClient.read()
uClient.close()
#html parsing
page1_soup = soup(page1_html,"html.parser")

filename = "TollDetail12.csv"
f = open(filename,"w")
headers = "ID, tollname, location, highwayNumber\n"
f.write(headers)

#grabing data
containers = page1_soup.findAll("div",{"class":"PA15"})
for container in containers:
    toll_name = container.p.b.text

    search1 = container.findAll('b')
    highway_number = search1[1].text

    location = list(container.p.descendants)[10]
    ID = my_url[my_url.find("?"):]
    mystr = ID.strip("?")
    print("ID: " + mystr)
    print("toll_name: " + toll_name)
    print("location: " + location)
    print("highway_number: " + highway_number)
        

    f.write(mystr + "," + toll_name + "," + location + "," + highway_number.replace(",","|") + "\n")
f.close()
1

1 Answer 1

0

You need to install psycopg2 pip package. Apart from that, edit file with your project-specific informations, haven't tested but should work.

from urllib.request import urlopen as uReq

from bs4 import BeautifulSoup as soup

import psycopg2

my_url = 'http://tis.nhai.gov.in/TollInformation?TollPlazaID=236'
uClient = uReq(my_url)
page1_html = uClient.read()
uClient.close()
# html parsing
page1_soup = soup(page1_html, 'html.parser')

# grabing data
containers = page1_soup.findAll('div', {'class': 'PA15'})

# Make the connection to PostgreSQL
conn = psycopg2.connect(database='database_name',
                        user='user_name', password='user_password', port=5432)
cursor = conn.cursor()
for container in containers:
    toll_name = container.p.b.text

    search1 = container.findAll('b')
    highway_number = search1[1].text

    location = list(container.p.descendants)[10]
    ID = my_url[my_url.find('?'):]
    mystr = ID.strip('?')

    query = "INSERT INTO table_name (ID, toll_name, location, highway_number) VALUES (%s, %s, %s, %s);"
    data = (ID, toll_name, location, highway_number)

    cursor.execute(query, data)

# Commit the transaction
conn.commit()
Sign up to request clarification or add additional context in comments.

4 Comments

i am getting this error while running the code File "C:\Users\prash\AppData\Local\Programs\Python\Python36-32\lib\site-packages\psycopg2\__init__.py", line 130, in connect conn = _connect(dsn, connection_factory=connection_factory, **kwasync) psycopg2.OperationalError: FATAL: role "prashant" is not permitted to log in
You need to alter role with login privileges. Can be done by following command: ALTER ROLE "prashant" WITH LOGIN;
Hi, can u check this please. stackoverflow.com/questions/46025873/…
Hi, Can u help me for finding this solution stackoverflow.com/questions/46052939/…

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.