I am making a program that will extract the data from http://www.gujarat.ngosindia.com/
I wrote the following code :
def split_line(text):
words = text.split()
i = 0
details = ''
while ((words[i] !='Contact')) and (i<len(words)):
i=i+1
if(words[i] == 'Contact:'):
break
while ((words[i] !='Purpose')) and (i<len(words)):
if (words[i] == 'Purpose:'):
break
details = details+words[i]+' '
i=i+1
print(details)
def get_ngo_detail(ngo_url):
html=urlopen(ngo_url).read()
soup = BeautifulSoup(html)
table = soup.find('table', {'class': 'border3'})
td = soup.find('td', {'class': 'border'})
split_line(td.text)
def get_ngo_names(gujrat_url):
html = urlopen(gujrat_url).read()
soup = BeautifulSoup(html)
for link in soup.findAll('div',{'id':'mainbox'}):
for text in link.find_all('a'):
print(text.get_text())
ngo_link = 'http://www.gujarat.ngosindia.com/'+text.get('href')
get_ngo_detail(ngo_link)
#NGO_name = text2.get_text())
a = get_ngo_names(BASE_URL)
print a
But when i run this script i only get the name of NGOs and contact person. I want Email, telephone number, website, purpose and contact person.