I am making a web crawler that exhausts every webpage. I am given the first link of http://www.someURL.com/42342. On this page is X number of lines of expressions. The parse and evaluate functions I have evaluate these expressions to numbers. With these numbers, I concatenate them to a default link (http://www.someURL.com/) to go to another link. I am trying to keep count how many webpages there are but I am currently running into this error:
Traceback (most recent call last):
File "test2.py", line 72, in <module>
print url_queue(convert_to_link(URL))
File "test2.py", line 23, in url_queue
new_urls = convert_to_link(url)
File "test2.py", line 13, in convert_to_link
num_list.append(evaluate(parse(expressions)))
File "test2.py", line 62, in evaluate
return stack[0]
IndexError: list index out of range
I'm not quite sure why. Each function seems to give the correct output. Could someone help point out where my logic is wrong in my code?
My code:
import urllib2
URL = 'http://www.someURL.com/42342'
def convert_to_link(url):
req = urllib2.Request(url)
response = urllib2.urlopen(req)
output_expressions = response.read().splitlines() #return each expression in a list
num_list = []
url_list = []
for expressions in output_expressions:
num_list.append(evaluate(parse(expressions)))
for number in num_list:
url_list.append(newpage_gen(number))
return url_list
def url_queue(url_list):
count = 0
for url in url_list:
new_urls = convert_to_link(url)
url_list.extend(new_urls)
count += 1
return count
def parse (s): # parse expression
s = s.replace('(', ' ').replace(')', ' ').replace(',', ' ')
return s.split()[::-1]
def evaluate (ops): # evaluate expression
stack = []
while ops:
op = ops[0]
ops = ops[1:]
try:
stack.append(int(op))
continue
except: pass
if op == 'add':
arg1, arg2 = stack.pop(), stack.pop()
stack.append(arg1 + arg2)
continue
if op == 'multiply':
arg1, arg2 = stack.pop(), stack.pop()
stack.append(arg1 * arg2)
continue
if op == 'abs':
arg1 = stack.pop()
stack.append(abs(arg1))
continue
if op == 'subtract':
arg1, arg2 = stack.pop(), stack.pop()
stack.append(arg1 - arg2)
continue
return stack[0]
def newpage_gen(page_num): # create new link
url_template = 'http://www.someURL.com/'
new_url = url_template + str(page_num)
return new_url
print "TESTING"
print url_queue(convert_to_link(URL))
print opswhen you enter the evaluate function?