I have a python script that accepts a CSV file containing a list of URLs. The CSV file looks like this.
name, url
google, https://httpstat.us/200
yahoo, https://httpstat.us/401
bcs, https://httpstat.us/521
The python script starts from here.
import time
import urllib.request
import urllib.error
url_list = []
url_data = []
total_duration = 0.5
wait_time = 5
http200_count = 0
http300_count = 0
http400_count = 0
http500_count = 0
counter = 1
with open("url_list.csv", "r") as url_file:
headers = next(url_file).strip().replace(" ","").split(",")
for row in url_file:
url_list = row.strip().replace(" ","").split(",")
url_dict = dict(zip(headers, url_list), http200 = http200_count, http300 = http300_count, http400 = http400_count, http500 = http500_count)
url_data.append(url_dict)
url_file.close()
interval = time.time() + 60 * total_duration
print(interval)
while 1:
while 1:
print(f"Counter: {counter}")
for url_entry in url_data:
print(f"Website:", url_entry["name"])
print(f"URL:", url_entry["url"])
print(urllib.request.urlopen(url_entry["url"]).status)
try:
with urllib.request.urlopen(url_entry["url"]) as response:
print(response.status)
request_status = response.status
# return response.read(), response
except urllib.error.HTTPError as error:
print(error.status, error.reason)
continue
except urllib.error.URLError as error:
print(error.reason)
continue
else:
print("All is good!")
if request_status >= 200 and request_status <=299:
url_entry["http200"] = url_entry["http200"] + 1
elif request_status >= 300 and request_status <=399:
url_entry["http300"] = url_entry["http300"] + 1
elif request_status >= 400 and request_status <=499:
url_entry["http400"] = url_entry["http400"] + 1
elif request_status >= 500 and request_status <=599:
url_entry["http500"] = url_entry["http500"] + 1
print(f"", url_entry["name"], "-", url_entry["http200"], "-", url_entry["http300"], "-", url_entry["http400"], "-", url_entry["http500"])
print("")
time.sleep(wait_time)
counter = counter + 1
print(time.time(),"-",interval)
if time.time() >= interval:
print(f"There are {counter-1} probes in the past", 60 * total_duration, "secs.")
print(f"HTTP Status code:", url_entry["name"], "-", url_entry["http200"], "-", url_entry["http300"], "-", url_entry["http400"], "-", url_entry["http500"])
interval = time.time() + 60 * total_duration
counter = 1
The problem is whenever it hits a URL with HTTP error codes it will exit and the while loop stops processing. Is there any way to continue the script even after hitting HTTP errors?
➜ DevOps_Practice python urlProbe.py
1687386788.8879528
Counter: 1
Website: google
URL: https://httpstat.us/200
200
200
All is good!
google - 1 - 0 - 0 - 0
Website: yahoo
URL: https://httpstat.us/401
Traceback (most recent call last):
File "/Users/desmondlim/Documents/DevOps/Projects/DevOps_Challenge_SPH/urlProbe.py", line 38, in <module>
print(urllib.request.urlopen(url_entry["url"]).status)
File "/Users/desmondlim/.pyenv/versions/3.10.4/lib/python3.10/urllib/request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "/Users/desmondlim/.pyenv/versions/3.10.4/lib/python3.10/urllib/request.py", line 525, in open
response = meth(req, response)
File "/Users/desmondlim/.pyenv/versions/3.10.4/lib/python3.10/urllib/request.py", line 634, in http_response
response = self.parent.error(
File "/Users/desmondlim/.pyenv/versions/3.10.4/lib/python3.10/urllib/request.py", line 563, in error
return self._call_chain(*args)
File "/Users/desmondlim/.pyenv/versions/3.10.4/lib/python3.10/urllib/request.py", line 496, in _call_chain
result = func(*args)
File "/Users/desmondlim/.pyenv/versions/3.10.4/lib/python3.10/urllib/request.py", line 643, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 401: Unauthorized
Thanks.
print(urllib.request.urlopen(url_entry["url"]).status)is outside thetry. Why do you even need that, since you're opening the same URL in thewithstatement?urllib? Therequestsmodule is much easier to use.