Install
์ผ๋ฐ์ ์ธ ์ค์น๊ณผ์ ๊ณผ ๋์ผํ๊ฒ "pip install selenium"์ ์ฌ์ฉํ์ฌ ์ค์นํ ์ ์๋ค.
Import
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
Code
"๋ด์ด ์๋๋ผ ์ฌ๋์ด์ผ"๋ฅผ ์๋ ค์ฃผ๊ธฐ ์ํด user_agent๋ฅผ ์ถ๊ฐํ ํ webdriver ๊ฐ์ฒด๋ฅผ ์์ฑํ๋ค.
(webdriver๋ฅผ ๋์ผ ๊ฒฝ๋ก ๋ด์ ์์น์์ผ ๋ณ๋์ ๊ฒฝ๋ก ์ง์ ์ฝ๋๋ ์ถ๊ฐํ์ง ์์๋ค.)
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
options.add_argument('user-agent=' + user_agent)
options.add_experimental_option("excludeSwitches", ["enable-logging"])
options.add_experimental_option('prefs', {
"download.default_directory": r"๋ค์ด๋ก๋ ๋์ ๊ฒฝ๋ก ์ง์ ",
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True})
driver = webdriver.Chrome(options=options)
์์ฑํ driver ๊ฐ์ฒด๋ฅผ ์ฌ์ฉํ์ฌ URL์ ์ ์ํ๋ค.
url1 = '๋์ URL'
driver.get(url1)
time.sleep(1)
ํน์ ๋ฒํผ ํด๋ฆญ, ํ๊ทธ ๋ด ์กด์ฌํ๋ ๊ฐ ํ์ฑ๊ณผ ๊ฐ์ ๋์์ Selenium ๋ด ์กด์ฌํ๋ Xpath, CSS_Selector๋ฅผ ์ฌ์ฉํ์ฌ ๊ตฌํํ์๋ค.
Xpath ์ CSS_Selector์ ๊ฐ๋ฐ์ ๋๊ตฌ์์ ์ฐ์ธก ๋ฒํผ์ ๋๋ฅด๊ฒ ๋๋ฉด ์ฝ๊ฒ ํ์ธํ ์ ์๋ค.
Selenium์ ์ฌ์ฉํ์ฌ ํฌ๋กค๋ง์ ์ ์ํ ๋ Xpath, Selector ๊ฐ์ ๋น๊ตํ์ฌ ์ผ์นํ๋ ํจํด์ ํ์ธํ๋ค. ํด๋น ํฌ๋กค๋ง์์๋ ์ผ์ ํ ํจํด์ด ํ์ธ๋์ด Format ์ ์ฌ์ฉํ์ฌ ์ฝ๊ฒ ์ ์ํ ์ ์์๋ค.
๋๋ต์ ์ธ ์์ค์ฝ๋๋ ๋ค์๊ณผ ๊ฐ์ผ๋ฉฐ ์์ค์ฝ๋ ์ ์ฒด๋ ๊นํ๋ธ์์ ํ์ธํ ์ ์๋ค.
https://github.com/byeongyeolahn/Bazaar_Mobile_malware_crawling
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
options = Options()
# ์ต์
์ค์
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
options.add_argument('user-agent=' + user_agent)
options.add_experimental_option("excludeSwitches", ["enable-logging"])
options.add_experimental_option('prefs', {
"download.default_directory": r"๋ค์ด๋ก๋ ๋์ ๊ฒฝ๋ก ์ง์ ",
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True})
driver = webdriver.Chrome(options=options)
# ์ด๊ธฐ ๊ฒ์์ด ์
๋ ฅ
query = '๊ฒ์์ด'
# URL ์ ์
url1 = '๋์ URL'
driver.get(url1)
time.sleep(1)
# apk ๊ฒ์ ๊ฒฐ๊ณผ ํ๋ฉด
driver.find_element(By.XPATH, '/html/body/main/div[1]/div/p[2]/a').click()
search_tab = driver.find_element(By.CSS_SELECTOR, '#search')
search_tab.send_keys(query)
search_tab.send_keys(Keys.ENTER)
time.sleep(3)
# apk ํ์ผ๋ค sha256 ๊ฐ ๋ชจ์ผ๊ธฐ
file_hash = []
# for i in range(1,4):
for k in range(2,6):
driver.find_element(By.XPATH, '//*[@id="samples_paginate"]/ul/li[{}]/a'.format(k)).click()
for i in range(1,251):
tag_td = driver.find_element(By.CSS_SELECTOR, '#samples > tbody > tr:nth-child({}) > td:nth-child(2) > a'.format(i))
tag_href = tag_td.get_attribute('href')
file_hash.append(tag_href)
# ์ดํ ๊นํ๋ธ(https://github.com/byeongyeolahn/Bazaar_Mobile_malware_crawling) ๊ฒ์
์ค๋ฅ, ์๋ชป๋ ์ ๋๋ ๊ถ๊ธํ ์ ์ด ์์ผ์๋ค๋ฉด ๋๊ธ ๋จ๊ฒจ์ฃผ์ธ์โ
'๊ฐ๋ฐ๐ป > Python' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
[Python] Zipํ์ผ ์์ถ ํด์ (2)(๋น๋ฐ๋ฒํธ ํฌํจ) (0) | 2022.10.04 |
---|---|
[Python] Selenium์ ์ฌ์ฉํ ํฌ๋กค๋ง ์ ์(2) (0) | 2022.09.30 |
[Python] pymysql ์ ํตํด ๋ฐ์ดํฐ ๋ฒ ์ด์ค ๊ฐ(Tuple)์ List๋ก ๊ฐ์ ธ์ค๋ ๋ฒ (0) | 2022.09.27 |
[Python] Zipํ์ผ ์์ถ ํด์ (1)(๋น๋ฐ๋ฒํธ ํฌํจ) (0) | 2022.09.17 |
[Python] Selenium ์ฌ์ฉํ๊ธฐ (0) | 2022.09.09 |