from selenium import webdriverfrom selenium.webdriver.chrome.options import Optionsfrom time import sleep# 创建一个对象,用来控制chorme以无界面模式打开chrome_options = Options()chrome_options.add_argument('--headless')chrome_options.add_argument('--disable-gpu')# 实例化driver对象bro = webdriver.Chrome(executable_path='F:\\anaconda\chromedriver.exe', chrome_options=chrome_options)# 发送请求bro.get(url='http://www.baidu.com')# 截图# bro.save_screenshot('first.jpg')# 查找命令my_input = bro.find_element_by_id('kw')# 向标签中录入指定的数据my_input.send_keys('美女')# 查找“百度一下”my_button = bro.find_element_by_id('su')my_button.click()# 获取当前浏览器显示页面的页面源码page_text = bro.page_sourceprint(type(page_text))bro.quit()
from selenium import webdriverfrom selenium.webdriver.chrome.options import Optionsfrom time import sleepbro = webdriver.Chrome('F:\\anaconda\chromedriver.exe')url = 'https://36kr.com/information/contact'bro.get(url=url)# 爬取动态加载出来的数据js = 'window.scrollTo(0, document.body.scrollHeight)'bro.execute_script(js)sleep(2)bro.execute_script(js)sleep(2)bro.execute_script(js)page_text = bro.page_sourceprint(page_text)with open('./36k.html', 'w', encoding='utf-8') as fp: fp.write(page_text)bro.close()
import requestsfrom lxml import etreeimport refrom multiprocessing.dummy import Poolimport randomurl = 'https://www.pearvideo.com/category_8'headers = { 'Use-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',}response = requests.get(url=url, headers=headers, verify=False).content.decode()xpath_data = etree.HTML(response)li_list = xpath_data.xpath('//*[@id="listvideoListUl"]/li')# 实现并发建立的数据池video_url_list = []for li in li_list: # print(li) v_href = 'https://www.pearvideo.com/' + li.xpath('.//div[@class="vervideo-bd"]/a/@href')[0] # print(v_href) d_response = requests.get(url=v_href, headers=headers).content.decode() video_url = re.findall('srcUrl="(.*?)",', d_response, re.S)[0] video_url_list.append(video_url) # print(video_url)# 常见5个线程pool = Pool(5)dowmloadVideo = lambda link: requests.get(url=link, headers=headers).content# map返回的列表中存储的就是下载完毕的视频二进制数据值video_url_list = pool.map(dowmloadVideo, video_url_list)def save_video(data): i = random.randint(1, 1000) video_name = 'video/' + str(i) + '.mp4' # i = i + 1 with open(video_name, 'wb') as fp: fp.write(data)pool.map(save_video, video_url_list)pool.close()pool.join()
from selenium import webdriverfrom time import sleepbro = webdriver.Chrome('F:\\anaconda\chromedriver.exe')bro.get('https://qzone.qq.com/')sleep(1)# 定位到iframebro.switch_to.frame('login_frame')user = bro.find_element_by_id('switcher_plogin')user.click()sleep(3)username = bro.find_element_by_id('u')username.send_keys('*****')sleep(3)password = bro.find_element_by_id('p')password.send_keys('*****')sleep(2)login = bro.find_element_by_id('login_button')login.click()sleep(10)bro.quit()