1

I tried to collect the list data from each drop-down menu in this page. I can access to the 'li' tag part and collect the 'href' data using Selenium Python 3.6. But the problem is I can't get the text data of each list.

My Code is below:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException

from bs4 import BeautifulSoup
from time import sleep


link = 'http://www.bobaedream.co.kr/cyber/CyberCar.php?gubun=I'
driver = webdriver.PhantomJS()
driver.set_window_size(1920, 1080)
driver.get(link)
sleep(.75)

soup = BeautifulSoup(driver.page_source, "html.parser", from_encoding='utf-8')

manufacturers = [
    ('%s' % o.text, '%s' % o.get_attribute('href'))
    for o
    in driver.find_elements_by_css_selector("#layer_maker ul.list li a")
    if o.text != '전체']

for manufacturer in manufacturers:
    print(manufacturer) 

My result is below:

('', "javascript:selChange('maker', '0', '%EC%A0%84%EC%B2%B4');")
('', "javascript:selChange('maker', '1', 'BMW');")
('', "javascript:selChange('maker', '21', '%EB%B2%A4%EC%B8%A0');")
('', "javascript:selChange('maker', '32', '%EC%95%84%EC%9A%B0%EB%94%94');")
('', "javascript:selChange('maker', '44', '%ED%8F%AD%EC%8A%A4%EB%B0%94%EA%B2%90');")
('', "javascript:selChange('maker', '13', '%EB%A0%89%EC%84%9C%EC%8A%A4');")
('', "javascript:selChange('maker', '97', '%EB%AF%B8%EB%8B%88');")
('', "javascript:selChange('maker', '2', 'GM');")
('', "javascript:selChange('maker', '77', 'GMC');")
('', "javascript:selChange('maker', '5', '%EB%8B%9B%EC%82%B0');")
('', "javascript:selChange('maker', '6', '%EB%8B%A4%EC%9D%B4%ED%95%98%EC%93%B0');")
('', "javascript:selChange('maker', '7', '%EB%8B%B7%EC%A7%80');")
('', "javascript:selChange('maker', '9', '%EB%8F%84%EC%9A%94%ED%83%80');")
('', "javascript:selChange('maker', '10', '%EB%9E%80%EC%B9%98%EC%95%84');")
('', "javascript:selChange('maker', '11', '%EB%9E%8C%EB%B3%B4%EB%A5%B4%EA%B8%B0%EB%8B%88');")
('', "javascript:selChange('maker', '12', '%EB%9E%9C%EB%93%9C%EB%A1%9C%EB%B2%84');")
('', "javascript:selChange('maker', '14', '%EB%A1%9C%EB%B2%84');")
('', "javascript:selChange('maker', '15', '%EB%A1%9C%ED%84%B0%EC%8A%A4');")
('', "javascript:selChange('maker', '16', '%EB%A1%A4%EC%8A%A4%EB%A1%9C%EC%9D%B4%EC%8A%A4');")
('', "javascript:selChange('maker', '61', '%EB%A5%B4%EB%85%B8');")
('', "javascript:selChange('maker', '17', '%EB%A7%81%EC%BB%A8');")
('', "javascript:selChange('maker', '18', '%EB%A7%88%EC%84%B8%EB%9D%BC%ED%8B%B0');")
('', "javascript:selChange('maker', '19', '%EB%A7%88%EC%AF%94%EB%8B%A4');")
('', "javascript:selChange('maker', '1003', '%EB%A7%A5%EB%9D%BC%EB%A0%8C');")
('', "javascript:selChange('maker', '60', '%EB%A8%B8%ED%81%90%EB%A6%AC');")
('', "javascript:selChange('maker', '20', '%EB%AF%B8%EC%93%B0%EB%B9%84%EC%8B%9C');")
('', "javascript:selChange('maker', '82', '%EB%AF%B8%EC%AF%94%EC%98%A4%EC%B9%B4');")
('', "javascript:selChange('maker', '22', '%EB%B2%A4%ED%8B%80%EB%A6%AC');")
('', "javascript:selChange('maker', '23', '%EB%B3%BC%EB%B3%B4');")
('', "javascript:selChange('maker', '1009', '%EB%B6%81%EA%B8%B0%EC%9D%80%EC%83%81');")
('', "javascript:selChange('maker', '88', '%EB%B6%80%EA%B0%80%ED%8B%B0');")
('', "javascript:selChange('maker', '24', '%EB%B7%B0%EC%9D%B5');")
('', "javascript:selChange('maker', '99', '%EB%B9%84%EC%9D%B4%EC%8A%A4%EB%A7%8C');")
('', "javascript:selChange('maker', '25', '%EC%82%AC%EB%B8%8C');")
('', "javascript:selChange('maker', '94', '%EC%83%88%ED%84%B4');")
('', "javascript:selChange('maker', '29', '%EC%89%90%EB%B3%B4%EB%A0%88');")
('', "javascript:selChange('maker', '27', '%EC%8A%A4%EB%B0%94%EB%A3%A8');")
('', "javascript:selChange('maker', '28', '%EC%8A%A4%EC%A6%88%ED%82%A4');")
('', "javascript:selChange('maker', '103', '%EC%8A%A4%EC%B9%B4%EB%8B%88%EC%95%84');")
('', "javascript:selChange('maker', '93', '%EC%8A%A4%ED%8C%8C%EC%9D%B4%EC%BB%A4');")
('', "javascript:selChange('maker', '30', '%EC%8B%9C%ED%8A%B8%EB%A1%9C%EC%97%A5');")
('', "javascript:selChange('maker', '33', '%EC%95%8C%ED%8C%8C%EB%A1%9C%EB%A9%94%EC%98%A4');")
('', "javascript:selChange('maker', '62', '%EC%95%A0%EC%8A%A4%ED%84%B4%EB%A7%88%ED%8B%B4');")
('', "javascript:selChange('maker', '95', '%EC%96%B4%ED%81%90%EB%9D%BC');")
('', "javascript:selChange('maker', '34', '%EC%98%A4%ED%8E%A0');")
('', "javascript:selChange('maker', '1011', '%EC%98%A4%EC%8A%A4%ED%8B%B4');")
('', "javascript:selChange('maker', '35', '%EC%98%AC%EC%A6%88%EB%AA%A8%EB%B9%8C');")
('', "javascript:selChange('maker', '83', '%EC%9B%A8%EC%8A%A4%ED%8A%B8%ED%95%84%EB%93%9C');")
('', "javascript:selChange('maker', '36', '%EC%9D%B4%EC%8A%A4%EC%A6%88');")
('', "javascript:selChange('maker', '81', '%EC%9D%B8%ED%94%BC%EB%8B%88%ED%8B%B0');")
('', "javascript:selChange('maker', '37', '%EC%9E%AC%EA%B7%9C%EC%96%B4');")
('', "javascript:selChange('maker', '96', '%EC%A7%80%ED%94%84');")
('', "javascript:selChange('maker', '1006', '%ED%85%8C%EC%8A%AC%EB%9D%BC');")
('', "javascript:selChange('maker', '38', '%EC%BA%90%EB%94%9C%EB%9D%BD');")
('', "javascript:selChange('maker', '89', '%EC%BD%94%EB%8B%89%EC%84%B8%ED%81%AC');")
('', "javascript:selChange('maker', '39', '%ED%81%AC%EB%9D%BC%EC%9D%B4%EC%8A%AC%EB%9F%AC');")
('', "javascript:selChange('maker', '84', '%ED%8C%8C%EA%B0%80%EB%8B%88');")
('', "javascript:selChange('maker', '41', '%ED%8E%98%EB%9D%BC%EB%A6%AC');")
('', "javascript:selChange('maker', '42', '%ED%8F%AC%EB%93%9C');")
('', "javascript:selChange('maker', '43', '%ED%8F%AC%EB%A5%B4%EC%89%90');")
('', "javascript:selChange('maker', '1008', '%ED%8F%AC%ED%86%A4');")
('', "javascript:selChange('maker', '45', '%ED%8F%B0%ED%8B%B0%EC%95%85');")
('', "javascript:selChange('maker', '46', '%ED%91%B8%EC%A1%B0');")
('', "javascript:selChange('maker', '91', '%ED%94%BC%EC%8A%A4%EC%BB%A4');")
('', "javascript:selChange('maker', '47', '%ED%94%BC%EC%95%84%ED%8A%B8');")
('', "javascript:selChange('maker', '48', '%ED%97%88%EB%A8%B8');")
('', "javascript:selChange('maker', '50', '%ED%98%BC%EB%8B%A4');")
('', "javascript:selChange('maker', '76', '%ED%99%80%EB%8D%B4');")
('', "javascript:selChange('maker', '4', '%EA%B8%B0%ED%83%80 %EC%88%98%EC%9E%85%EC%B0%A8');") 

This is the captured image of the HTML source: enter image description here

I don't understand why the text part is empty and all of Korean letter is broken (Korean letter is the third element in javascript:selChange). What I would like to do is to fulfill the text part and correct the Korean letter.

Please help.

1 Answer 1

1

Try to use following code:

from urllib import parse

...
manufacturers = [
(o.get_attribute('text'), parse.unquote(o.get_attribute('href')))
for o
in driver.find_elements_by_css_selector("#layer_maker ul.list li a")
if o.get_attribute('text') != '전체']

for manufacturer in manufacturers: print(manufacturer)

Output:

('BMW', "javascript:selChange('maker', '1', 'BMW');")
('벤츠', "javascript:selChange('maker', '21', '벤츠');")
('아우디', "javascript:selChange('maker', '32', '아우디');")
('폭스바겐', "javascript:selChange('maker', '44', '폭스바겐');")
...
Sign up to request clarification or add additional context in comments.

1 Comment

Thanks, Andersson. I figured out finally!! I deeply appreciated your help. Thanks!!

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.