scrapy 模拟登陆

Scrapy模拟登陆

方法

正常模拟登陆（scrapy模拟登陆方式）
- 直接携带cookie

模拟登陆人人网

import scrapy


class RenSpider(scrapy.Spider):
    name = 'ren'
    allowed_domains = ['renren,com']
    start_urls = ['http://www.renren.com/975472415/profile']
    def start_requests(self):
        cookies = 'xxxx=xxxx; xxxx=xxxx; xxxxx=xxxxx; '#已登录的cookie
        cookie = {
            i.split('=')[0] : i.split('=')[1] for i in cookies.split('; ')
        }  #只能以字典形式携带cookie
        yield scrapy.Request(
            url=self.start_urls[0],
            callback= self.parse,
            cookies=cookie
        )
    def parse(self, response):
        with open('open.html','w',encoding='utf-8') as f:
            f.write(response.body.decode())

找到发送post请求的URL，带上信息，发送请求

模拟登陆github

import scrapy


class GithubSpider(scrapy.Spider):
    name = 'github'
    allowed_domains = ['github.com']
    start_urls = ['https://github.com/login']   #登陆界面

    def parse(self, response):
        commit = 'Sign in'
        authenticity_token = response.xpath("//input[@name='authenticity_token']/@value").extract_first()
        login = 'sansanbudejiuya'    #请输入正确用户名
        password = 'sansanbudejiuya' #输入正确密码
        timestamp = response.xpath("//input[@name='timestamp']/@value").extract_first()
        timestamp_secret = response.xpath("//input[@name='timestamp_secret']/@value").extract_first()

        # 定义一个字典来提交数据
        data = {
            'commit': commit,
            'authenticity_token': authenticity_token,
            # 'ga_id': ga_id,
            'login': login,
            'password': password,
            'webauthn-support': 'supported',
            'webauthn-iuvpaa-support': 'unsupported',
            'timestamp': timestamp,
            'timestamp_secret': timestamp_secret,
        }
        yield scrapy.FormRequest(
            #提交url地址
            url = 'https://github.com/session',  #提交数据界面
            #提交数据
            formdata=data,
            #获取响应
            callback=self.after_login
        )
    def after_login(self,response):
        with open('github.html','w',encoding='utf-8') as f:
          f.write(response.body.decode())

selenium模拟登陆
- 找到对应的input标签输入文字登陆

快速登陆条件

数据都在页面上
scrapy.FormRequest.from_response()方法来实现
formdata输入login以及password

import scrapy


class GithubSpider(scrapy.Spider):
    name = 'github2'
    allowed_domains = ['github.com']
    start_urls = ['https://github.com/login']

    def parse(self, response):

        yield scrapy.FormRequest.from_response(
            #q请求响应结果
            response=response,
            formdata={
                'login' : 'sansan',   #输入正确用户
                'password' : 'budejiuya' #输入正确密码
            },
            callback=self.after_login

        )
    def after_login(self,response):
        with open('github2.html','w',encoding='utf-8') as f:
            f.write(response.body.decode())