Skip to content
This repository was archived by the owner on Dec 22, 2023. It is now read-only.

Commit d7653c9

Browse files
LinkedIn Scraper (Updated)
1 parent 426ad71 commit d7653c9

File tree

1 file changed

+68
-62
lines changed
  • Scripts/Web_Scrappers/linkedin_posts_scrapping

1 file changed

+68
-62
lines changed

Scripts/Web_Scrappers/linkedin_posts_scrapping/main.py

Lines changed: 68 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -6,78 +6,84 @@
66
import time
77
import getpass
88

9+
# Sign in and validation part
10+
print('Please sign in to your LinkedIn Account:')
11+
u = input("Email or phone number: ")
12+
p = getpass.getpass('Password: ')
13+
print("Validating...")
914
chrome_options = Options()
1015
chrome_options.add_argument("--window-size=1360,768")
1116
chrome_options.add_argument("headless")
12-
driver = webdriver.Chrome('./chromedriver', options=chrome_options)
17+
driver = webdriver.Chrome('./chromedriver', options=chrome_options) # Can replace this path with your chromedriver path
1318
driver.get("https://www.linkedin.com")
1419
unme = driver.find_element_by_id('session_key')
1520
passw = driver.find_element_by_id('session_password')
16-
print('Please sign in to your LinkedIn Account:')
17-
u = input("Email or phone number: ")
18-
p = getpass.getpass('Password: ')
1921
unme.send_keys(u)
2022
passw.send_keys(p)
2123
passw.send_keys(Keys.ENTER)
24+
cond = True
2225
time.sleep(2)
2326
if(driver.title=="LinkedIn Login, Sign in | LinkedIn" or driver.title=="LinkedIn: Log In or Sign Up"):
2427
print('Invalid Username or Password')
2528
print('The program will now exit')
26-
time.sleep(1)
27-
exit()
28-
time.sleep(2)
29-
print('Fetching Info (This might take a while)...')
30-
body = driver.find_element_by_tag_name('body')
31-
for i in range (50):
32-
body.send_keys(Keys.CONTROL, Keys.END)
33-
time.sleep(5)
34-
soup = BeautifulSoup(driver.page_source, 'html.parser')
35-
driver.quit()
36-
print('Done')
37-
38-
divs = soup.find_all('div', attrs={'class': re.compile('feed-shared-update-v2 feed-shared-update-v2--minimal-padding full-height relative feed-shared-update-v2--e2e artdeco-card ember-view')})
39-
ctr=0
40-
authors=[]
41-
sdesc=[]
42-
timestamp=[]
43-
posts=[]
44-
print('Fetching the latest posts for you...')
45-
for d in divs:
46-
author=d.find('div', attrs={'class' : re.compile('feed-shared-actor__meta relative')})
47-
content = d.find('div', attrs={'class' : re.compile('feed-shared-update-v2__description-wrapper ember-view')})
48-
try:
49-
name = author.find('span', attrs={'dir' : 'ltr'})
50-
adesc = author.find('span', attrs={'class' : 'feed-shared-actor__description t-12 t-normal t-black--light'})
51-
added = author.find('span', attrs={'class' : 'visually-hidden'})
52-
post = content.find('span', attrs={'dir' : 'ltr'})
53-
n=name.text
54-
ad=added.text
55-
ads=adesc.text
56-
po=post.text
57-
except AttributeError:
58-
continue
59-
authors.append(n)
60-
sdesc.append(ads)
61-
timestamp.append(ad)
62-
posts.append(po)
63-
if(len(authors)==0):
64-
print("Oops! Seems the the bot has crashed due to over-usage :(")
65-
print("Please try after 10 mins.")
66-
exit()
67-
print('Done')
68-
print('Choose the post you want to see :')
69-
for i in range(len(authors)):
70-
print("\t"+str(i+1)+". "+authors[i]+". Added: "+timestamp[i])
71-
ans="y"
72-
while(ans=="y"):
73-
ch = int(input("Enter your choice: "))
74-
if(ch>len(authors) or ch<1):
75-
print("Invalid Choice.")
76-
else:
77-
print(authors[ch-1])
78-
print("Posted: "+timestamp[ch-1])
79-
print("Author Description: "+sdesc[ch-1])
80-
print(posts[ch-1])
81-
ans=input('Want to see other posts? (y/n) ')
82-
print('')
83-
print("Thank You")
29+
driver.quit()
30+
cond = False
31+
if(cond is True):
32+
time.sleep(2)
33+
print('Fetching Info (This might take a while)...')
34+
body = driver.find_element_by_tag_name('body')
35+
for i in range (50):
36+
body.send_keys(Keys.CONTROL, Keys.END)
37+
time.sleep(5)
38+
soup = BeautifulSoup(driver.page_source, 'html.parser')
39+
driver.quit()
40+
print('Done')
41+
# Fetching Posts
42+
divs = soup.find_all('div', attrs={'class': re.compile('feed-shared-update-v2 feed-shared-update-v2--minimal-padding full-height relative feed-shared-update-v2--e2e artdeco-card ember-view')})
43+
ctr=0
44+
authors=[]
45+
sdesc=[]
46+
timestamp=[]
47+
posts=[]
48+
print('Fetching the latest posts for you...')
49+
for d in divs:
50+
author=d.find('div', attrs={'class' : re.compile('feed-shared-actor__meta relative')})
51+
content = d.find('div', attrs={'class' : re.compile('feed-shared-update-v2__description-wrapper ember-view')})
52+
try:
53+
name = author.find('span', attrs={'dir' : 'ltr'})
54+
adesc = author.find('span', attrs={'class' : 'feed-shared-actor__description t-12 t-normal t-black--light'})
55+
added = author.find('span', attrs={'class' : 'visually-hidden'})
56+
post = content.find('span', attrs={'dir' : 'ltr'})
57+
n=name.text
58+
ad=added.text
59+
ads=adesc.text
60+
po=post.text
61+
except AttributeError:
62+
continue
63+
authors.append(n)
64+
sdesc.append(ads)
65+
timestamp.append(ad)
66+
posts.append(po)
67+
if(len(authors)==0):
68+
# Bots can be caught by linkedin website if used very frequently
69+
print("Oops! Seems the the bot has crashed due to over-usage :(")
70+
print("Please try after 10 mins.")
71+
cond = False
72+
if(cond is True):
73+
print('Done')
74+
print('Choose the post you want to see :')
75+
for i in range(len(authors)):
76+
print("\t"+str(i+1)+". "+authors[i]+". Added: "+timestamp[i])
77+
ans="y"
78+
while(ans=="y"):
79+
ch = int(input("Enter your choice: "))
80+
if(ch>len(authors) or ch<1):
81+
print("Invalid Choice.")
82+
else:
83+
print(authors[ch-1])
84+
print("Posted: "+timestamp[ch-1])
85+
print("Author Description: "+sdesc[ch-1])
86+
print(posts[ch-1])
87+
ans=input('Want to see other posts? (y/n) ')
88+
print('')
89+
print("Thank You")

0 commit comments

Comments
 (0)