博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
005 动态加载实例
阅读量:4960 次
发布时间:2019-06-12

本文共 2978 字,大约阅读时间需要 9 分钟。

from selenium import webdriverfrom selenium.webdriver.chrome.options import Optionsfrom time import sleep# 创建一个对象,用来控制chorme以无界面模式打开chrome_options = Options()chrome_options.add_argument('--headless')chrome_options.add_argument('--disable-gpu')# 实例化driver对象bro = webdriver.Chrome(executable_path='F:\\anaconda\chromedriver.exe', chrome_options=chrome_options)# 发送请求bro.get(url='http://www.baidu.com')# 截图# bro.save_screenshot('first.jpg')# 查找命令my_input = bro.find_element_by_id('kw')# 向标签中录入指定的数据my_input.send_keys('美女')# 查找“百度一下”my_button = bro.find_element_by_id('su')my_button.click()# 获取当前浏览器显示页面的页面源码page_text = bro.page_sourceprint(type(page_text))bro.quit()
无头浏览器实现
from selenium import webdriverfrom selenium.webdriver.chrome.options import Optionsfrom time import sleepbro = webdriver.Chrome('F:\\anaconda\chromedriver.exe')url = 'https://36kr.com/information/contact'bro.get(url=url)# 爬取动态加载出来的数据js = 'window.scrollTo(0, document.body.scrollHeight)'bro.execute_script(js)sleep(2)bro.execute_script(js)sleep(2)bro.execute_script(js)page_text = bro.page_sourceprint(page_text)with open('./36k.html', 'w', encoding='utf-8') as fp:    fp.write(page_text)bro.close()
加载滚动条
import requestsfrom lxml import etreeimport refrom multiprocessing.dummy import Poolimport randomurl = 'https://www.pearvideo.com/category_8'headers = {    'Use-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',}response = requests.get(url=url, headers=headers, verify=False).content.decode()xpath_data = etree.HTML(response)li_list = xpath_data.xpath('//*[@id="listvideoListUl"]/li')# 实现并发建立的数据池video_url_list = []for li in li_list:    # print(li)    v_href = 'https://www.pearvideo.com/' + li.xpath('.//div[@class="vervideo-bd"]/a/@href')[0]    # print(v_href)    d_response = requests.get(url=v_href, headers=headers).content.decode()    video_url = re.findall('srcUrl="(.*?)",', d_response, re.S)[0]    video_url_list.append(video_url)    # print(video_url)# 常见5个线程pool = Pool(5)dowmloadVideo = lambda link: requests.get(url=link, headers=headers).content# map返回的列表中存储的就是下载完毕的视频二进制数据值video_url_list = pool.map(dowmloadVideo, video_url_list)def save_video(data):    i = random.randint(1, 1000)    video_name = 'video/' + str(i) + '.mp4'    # i = i + 1    with open(video_name, 'wb') as fp:        fp.write(data)pool.map(save_video, video_url_list)pool.close()pool.join()
多线程的实现
from selenium import webdriverfrom time import sleepbro = webdriver.Chrome('F:\\anaconda\chromedriver.exe')bro.get('https://qzone.qq.com/')sleep(1)# 定位到iframebro.switch_to.frame('login_frame')user = bro.find_element_by_id('switcher_plogin')user.click()sleep(3)username = bro.find_element_by_id('u')username.send_keys('*****')sleep(3)password = bro.find_element_by_id('p')password.send_keys('*****')sleep(2)login = bro.find_element_by_id('login_button')login.click()sleep(10)bro.quit()
iframe的实现

 

转载于:https://www.cnblogs.com/abc23/p/10751549.html

你可能感兴趣的文章
GG同步sqlserver报错一个案例 Invalid date format
查看>>
Opencv246+vs2012生成不依赖编译环境的exe文件
查看>>
jquery对checkbox的操作汇总
查看>>
ps 第一篇
查看>>
nginx配置url中带问号的rewrite跳转
查看>>
SICP习题1.45解答
查看>>
[转]Delphi 控件属性和事件
查看>>
iOS:事件处理机制
查看>>
ASPose导出excel简单操作
查看>>
基础数据类型
查看>>
SQL Server开启远程连接
查看>>
codeforces 978C Letters
查看>>
联系作者
查看>>
PHP通用函数 - 日期生成时间轴
查看>>
Eclipse连接HBase 报错:org.apache.hadoop.hbase.PleaseHoldException: Master is initializing
查看>>
【HDU】1199 Color the Ball
查看>>
【HDU】3949 XOR
查看>>
Four-operations: 使用node.js实现四则运算程序
查看>>
Spring Cloud 注册中心Eureka
查看>>
py-day2-sys模块、os模块、运算符、列表、字典
查看>>