0
import requests
from bs4 import BeautifulSoup
import csv
from urlparse import urljoin
import urllib2

outfile = open("./battingall.csv", "wb")
writer = csv.writer(outfile)
base_url = 'http://www.baseball-reference.com'
player_url = 'http://www.baseball-reference.com/players/'
alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
players = 'shtml'
gamel = '&t=b&year='
game_logs = 'http://www.baseball-reference.com/players/gl.cgi?id='
years = ['2015','2014','2013','2012','2011','2010','2009','2008']

drounders = []
for dround in alphabet:
    drounders.append(player_url + dround)

urlz = []
for ab in drounders:
    data = requests.get(ab)
    soup = BeautifulSoup(data.content)
    for link in soup.find_all('a'):
        if link.has_attr('href'):
            urlz.append(base_url + link['href'])

yent = []
for ant in urlz:
    for d in drounders:
        for y in years:
            if players in ant:
                if len(ant) < 60:
                    if d in ant:
                        yent.append(game_logs + ant[44:-6] + gamel + y)

for j in yent:
    try:
        data = requests.get(j)
        soup = BeautifulSoup(data.content)
        table = soup.find('table', attrs={'id': 'batting_gamelogs'})
        tablea = j[52:59]
        tableb= soup.find("b", text='Throws:').next_sibling.strip()
        tablec= soup.find("b", text='Height:').next_sibling.strip()
        tabled= soup.find("b", text='Weight:').next_sibling.strip()
        list_of_rows = []
        for row in table.findAll('tr'):
            list_of_cells = []
            list_of_cells.append(tablea)
            list_of_cells.append(j[len(j)-4:])
            list_of_cells.append(tableb)
            list_of_cells.append(tablec)
            list_of_cells.append(tabled)
            for cell in row.findAll('td'):
                text = cell.text.replace('&nbsp;', '').encode("utf-8")
                list_of_cells.append(text)
            list_of_rows.append(list_of_cells)
        print list_of_rows
        writer.writerows(list_of_rows)
    except (AttributeError,NameError):
        pass

When I run this code to get gamelog batting data I keep getting an error:

Traceback (most recent call last):
  File "battinggamelogs.py", line 44, in <module>
    data = requests.get(j)
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-      packages/requests/api.py", line 65, in get
    return request('get', url, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-    packages/requests/api.py", line 49, in request
    response = session.request(method=method, url=url, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests/sessions.py", line 461, in request
    resp = self.send(prep, **send_kwargs)
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests/sessions.py", line 573, in send
    r = adapter.send(request, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests/adapters.py", line 415, in send
    raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.',     BadStatusLine("''",))

I need a way to bypass this error to keep going. I think the reason the error comes up because there is no table to get data from.

1
  • 1
    It looks like the requests just times out. Try navigating to that exact URL in a browser and see what happens. Commented Jun 30, 2015 at 18:58

1 Answer 1

2

You can wrap your requests.get() block in a try/except. You need to catch the requests.exceptions.ConnectionError that is being generated.

for ab in drounders:
    try:
        data = requests.get(ab)
        soup = BeautifulSoup(data.content)
        for link in soup.find_all('a'):
            if link.has_attr('href'):
                urlz.append(base_url + link['href'])
    except requests.exceptions.ConnectionError:
        pass

This is occurring because the connection, itself, has a problem, not because there is no data in the table. You aren't even getting that far.

Note: This is completely eating the exception by simply using pass (as you are also doing later in the code block). It may be better to do something like this:

except requests.exceptions.ConnectionError:
    print("Failed to open {}".format(ab))

This will provide you with a message on the console of what URL is failing.

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.