python爬虫：登录百度账户，并上传文件到百度云盘-白红宇

python爬虫：登录百度账户，并上传文件到百度云盘

阅读量：4356 次

发布时间：2019-06-07

本文共 36527 字，大约阅读时间需要 121 分钟。

/** * Created by resolvewang on 2017/4/15. */function getGid() {    return "xxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (e) {        var t = 16 * Math.random() | 0, n = "x" == e ? t : 3 & t | 8;        return n.toString(16)    }).toUpperCase()}function  getCallback() {    return "bd__cbs__" + Math.floor(2147483648 * Math.random()).toString(36)}

Pyhton实现代码：

#-*- coding:utf-8 -*-__author__ = 'Administrator'import timeimport jsonimport reimport requestsimport execjsimport base64from urllib.parse import urlencodefrom requests_toolbelt import MultipartEncoderfrom Crypto.Cipher import PKCS1_v1_5from Crypto.PublicKey import RSAfrom hashlib import md5from zlib import crc32try:    requests.packages.urllib3.disable_warnings()except:    passheaders = {
   'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 '                         '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',           }# 全局的sessionsession = requests.session()session.get('https://pan.baidu.com', headers=headers)class BufferReader(MultipartEncoder):    """将multipart-formdata转化为stream形式的Proxy类    """    def __init__(self, fields, boundary=None, callback=None, cb_args=(), cb_kwargs=None):        self._callback = callback        self._progress = 0        self._cb_args = cb_args        self._cb_kwargs = cb_kwargs or {}        super(BufferReader, self).__init__(fields, boundary)    def read(self, size=None):        chunk = super(BufferReader, self).read(size)        self._progress += int(len(chunk))        self._cb_kwargs.update({            'size': self._len,            'progress': self._progress        })        if self._callback:            try:                self._callback(*self._cb_args, **self._cb_kwargs)            except:  # catches exception from the callback                # raise CancelledError('The upload was cancelled.')                pass        return chunkdef _get_runntime():    """    :param path: 加密js的路径,注意js中不要使用中文！估计是pyexecjs处理中文还有一些问题    :return: 编译后的js环境，不清楚pyexecjs这个库的用法的请在github上查看相关文档    """    phantom = execjs.get()  # 这里必须为phantomjs设置环境变量，否则可以写phantomjs的具体路径    with open('login.js', 'r') as f:        source = f.read()    return phantom.compile(source)def get_gid():    return _get_runntime().call('getGid')def get_callback():    return _get_runntime().call('getCallback')def _get_curtime():    return int(time.time()*1000)# 抓包也不是百分百可靠啊,这里?getapi一定要挨着https://passport.baidu.com/v2/api/写，才会到正确的路由def get_token(gid, callback):    cur_time = _get_curtime()    get_data = {        'tpl': 'netdisk',        'subpro': 'netdisk_web',        'apiver': 'v3',        'tt': cur_time,        'class': 'login',        'gid': gid,        'logintype': 'basicLogin',        'callback': callback    }    headers.update(dict(Referer='http://pan.baidu.com/', Accept='*/*', Connection='keep-alive', Host='passport.baidu.com'))    resp = session.get(url='https://passport.baidu.com/v2/api/?getapi', params=get_data, headers=headers)    if resp.status_code == 200 and callback in resp.text:        # 如果json字符串中带有单引号，会解析出错，只有统一成双引号才可以正确的解析        #data = eval(re.search(r'.*?\((.*)\)', resp.text).group(1))        data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"'))        return data.get('data').get('token')    else:        print('获取token失败')        return Nonedef get_rsa_key(token, gid, callback):    cur_time = _get_curtime()    get_data = {        'token': token,        'tpl': 'netdisk',        'subpro': 'netdisk_web',        'apiver': 'v3',        'tt': cur_time,        'gid': gid,        'callback': callback,    }    resp = session.get(url='https://passport.baidu.com/v2/getpublickey', headers=headers, params=get_data)    if resp.status_code == 200 and callback in resp.text:        data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"'))        return data.get('pubkey'), data.get('key')    else:        print('获取rsa key失败')        return Nonedef encript_password(password, pubkey):    """    import rsa    使用rsa库加密（法一）    pub = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey.encode('utf-8'))    encript_passwd = rsa.encrypt(password.encode('utf-8'), pub)    return base64.b64encode(encript_passwd).decode('utf-8')    """    # pubkey必须为bytes类型    pub=RSA.importKey(pubkey.encode('utf-8'))    #构造“加密器”    encryptor=PKCS1_v1_5.new(pub)    #加密的内容必须为bytes类型    encript_passwd =encryptor.encrypt(password.encode('utf-8'))    return base64.b64encode(encript_passwd).decode('utf-8')def login(token, gid, callback, rsakey, username, password):    post_data = {        'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html',        'charset': 'utf-8',        'token': token,        'tpl': 'netdisk',        'subpro': 'netdisk_web',        'apiver': 'v3',        'tt': _get_curtime(),        'codestring': '',        'safeflg': 0,        'u': 'http://pan.baidu.com/disk/home',        'isPhone': '',        'detect': 1,        'gid': gid,        'quick_user': 0,        'logintype': 'basicLogin',        'logLoginType': 'pc_loginBasic',        'idc': '',        'loginmerge': 'true',        'foreignusername': '',        'username': username,        'password': password,        'mem_pass': 'on',        # 返回的key        'rsakey': rsakey,        'crypttype': 12,        'ppui_logintime': 33554,        'countrycode': '',        'callback': 'parent.'+callback    }    resp = session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data, headers=headers)    if 'err_no=0' in resp.text:        print('登录成功')    else:        print('登录失败')def upload(dest_path,file_handle,token):     params = {            'method': 'upload',            'app_id': "250528",            'BDUSS': session.cookies['BDUSS'],            't': str(int(time.time())),            'bdstoken': token,            'path': dest_path,            'ondup': "newcopy"        }     # print(params)     files = {
   'file': (str(int(time.time())), file_handle)}     url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')     api = '%s?%s' % (url, urlencode(params))     # print(api)     body = BufferReader(files)     # print(body)     baibupan_header = {
   "Referer": "http://pan.baidu.com/disk/home",                    "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}     header = dict(baibupan_header.items())     # print(headers)     header.update({
   "Content-Type": body.content_type})     response = session.post(api, data=body, verify=False, headers=header)     return responsedef rapidupload(dest_path,file_handler,token):    """秒传一个文件    :param file_handler: 文件handler, e.g. open('file','rb')    :type file_handler: file    :param dest_path: 上传到服务器的路径，包含文件名    :type dest_path: str    :return: requests.Response        .. note::            * 文件已在服务器上存在，不上传，返回示例            {                "path" : "/apps/album/1.jpg",                "size" : 372121,                "ctime" : 1234567890,                "mtime" : 1234567890,                "md5" : "cb123afcc12453543ef",                "fs_id" : 12345,                "isdir" : 0,                "request_id" : 12314124            }            * 文件不存在，需要上传            {"errno":404,"info":[],"request_id":XXX}            * 文件大小不足 256kb （slice-md5 == content-md5) 时            {"errno":2,"info":[],"request_id":XXX}            * 远程文件已存在            {"errno":-8,"info":[],"request_id":XXX}    """    file_handler.seek(0, 2)    _BLOCK_SIZE = 2 ** 20    content_length = file_handler.tell()    file_handler.seek(0)    # 校验段为前 256KB    first_256bytes = file_handler.read(256 * 1024)    slice_md5 = md5(first_256bytes).hexdigest()    content_crc32 = crc32(first_256bytes).conjugate()    content_md5 = md5(first_256bytes)    while True:        block = file_handler.read(_BLOCK_SIZE)        if not block:            break        # 更新crc32和md5校验值        content_crc32 = crc32(block, content_crc32).conjugate()        content_md5.update(block)    params = {            'method': 'rapidupload',            'app_id': "250528",            'BDUSS': session.cookies['BDUSS'],            't': str(int(time.time())),            'bdstoken': token,            'path': dest_path,            'ondup': "newcopy"            }    data = {            'content-length': content_length,            'content-md5': content_md5.hexdigest(),            'slice-md5': slice_md5,            'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF)            }    baibupan_header = {
   "Referer": "http://pan.baidu.com/disk/home",                    "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}    header = dict(baibupan_header.items())    url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')    api = '%s?%s' % (url, urlencode(params))    # print(api)    response= session.post(api, data=data, verify=False,headers=header)    return responseif __name__ == '__main__':    user='xxx'  #用户名    password='xxx'  #密码    cur_gid = get_gid()    cur_callback = get_callback()    cur_token = get_token(cur_gid, cur_callback)    # print("token:%s" %(cur_token))    cur_pubkey, cur_key = get_rsa_key(cur_token, cur_gid, cur_callback)    encript_password = encript_password(password, cur_pubkey)    login(cur_token, cur_gid, cur_callback, cur_key, user, encript_password)    # print("cookies:%s" %(session.cookies['BDUSS']))    # res=upload("/hello/temp.txt",open("temp.txt",'rb'),cur_token)    # print(res.content.decode('utf-8'))    res=rapidupload("/hello/words.txt",open("words.txt",'rb'),cur_token)    print(res.content.decode('utf-8'))

1 #-*- coding:utf-8 -*-  2 __author__ = 'Administrator'  3   4 import time  5 import json  6 import re  7 import requests  8 import execjs  9 import base64 10 from urllib.parse import urlencode 11 from requests_toolbelt import MultipartEncoder 12 from Crypto.Cipher import PKCS1_v1_5 13 from Crypto.PublicKey import RSA 14 from hashlib import md5 15 from zlib import crc32 16 # import progressbar 17 import sys 18 from contextlib import closing 19 import time 20 import os 21  22 try: 23     requests.packages.urllib3.disable_warnings() 24 except: 25     pass 26  27 headers = {
   'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 ' 28                          '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', 29            } 30  31 # 全局的session 32 session = requests.session() 33 session.get('https://pan.baidu.com', headers=headers) 34  35  36 class BufferReader(MultipartEncoder): 37     """将multipart-formdata转化为stream形式的Proxy类 38     """ 39  40     def __init__(self, fields, boundary=None, callback=None, cb_args=(), cb_kwargs=None): 41         self._callback = callback 42         self._progress = 0 43         self._cb_args = cb_args 44         self._cb_kwargs = cb_kwargs or {} 45         super(BufferReader, self).__init__(fields, boundary) 46  47     def read(self, size=None): 48         chunk = super(BufferReader, self).read(size) 49         self._progress += int(len(chunk)) 50         self._cb_kwargs.update({ 51             'size': self._len, 52             'progress': self._progress 53         }) 54         if self._callback: 55             try: 56                 self._callback(*self._cb_args, **self._cb_kwargs) 57             except:  # catches exception from the callback 58                 # raise CancelledError('The upload was cancelled.') 59                 pass 60         return chunk 61  62 class ProgressBar(): 63     """ 64     import progressbar 65     使用第三方库显示上传进度 66  67     """ 68     def __init__(self): 69         self.first_call = True 70     def __call__(self, *args, **kwargs): 71         if self.first_call: 72             self.widgets = [progressbar.Percentage(), ' ', progressbar.Bar(marker=progressbar.RotatingMarker('>')), 73                             ' ', progressbar.FileTransferSpeed()] 74             self.pbar = progressbar.ProgressBar(widgets=self.widgets, maxval=kwargs['size']).start() 75             self.first_call = False 76  77         if kwargs['size'] <= kwargs['progress']: 78             self.pbar.finish() 79         else: 80             self.pbar.update(kwargs['progress']) 81  82  83 def _get_runntime(): 84     """ 85     :param path: 加密js的路径,注意js中不要使用中文！估计是pyexecjs处理中文还有一些问题 86     :return: 编译后的js环境，不清楚pyexecjs这个库的用法的请在github上查看相关文档 87     """ 88     phantom = execjs.get()  # 这里必须为phantomjs设置环境变量，否则可以写phantomjs的具体路径 89     with open('login.js', 'r') as f: 90         source = f.read() 91     return phantom.compile(source) 92  93 def get_gid(): 94     return _get_runntime().call('getGid') 95  96 def get_callback(): 97     return _get_runntime().call('getCallback') 98  99 def _get_curtime():100     return int(time.time()*1000)101 102 # 抓包也不是百分百可靠啊,这里?getapi一定要挨着https://passport.baidu.com/v2/api/写，才会到正确的路由103 def get_token(gid, callback):104     cur_time = _get_curtime()105     get_data = {106         'tpl': 'netdisk',107         'subpro': 'netdisk_web',108         'apiver': 'v3',109         'tt': cur_time,110         'class': 'login',111         'gid': gid,112         'logintype': 'basicLogin',113         'callback': callback114     }115     headers.update(dict(Referer='http://pan.baidu.com/', Accept='*/*', Connection='keep-alive', Host='passport.baidu.com'))116     resp = session.get(url='https://passport.baidu.com/v2/api/?getapi', params=get_data, headers=headers)117     if resp.status_code == 200 and callback in resp.text:118         # 如果json字符串中带有单引号，会解析出错，只有统一成双引号才可以正确的解析119         #data = eval(re.search(r'.*?\((.*)\)', resp.text).group(1))120         data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"'))121         return data.get('data').get('token')122     else:123         print('获取token失败')124         return None125 126 def get_rsa_key(token, gid, callback):127     cur_time = _get_curtime()128     get_data = {129         'token': token,130         'tpl': 'netdisk',131         'subpro': 'netdisk_web',132         'apiver': 'v3',133         'tt': cur_time,134         'gid': gid,135         'callback': callback,136     }137     resp = session.get(url='https://passport.baidu.com/v2/getpublickey', headers=headers, params=get_data)138     if resp.status_code == 200 and callback in resp.text:139         data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"'))140         return data.get('pubkey'), data.get('key')141     else:142         print('获取rsa key失败')143         return None144 145 def encript_password(password, pubkey):146     """147     import rsa148     使用rsa库加密（法一）149     pub = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey.encode('utf-8'))150     encript_passwd = rsa.encrypt(password.encode('utf-8'), pub)151     return base64.b64encode(encript_passwd).decode('utf-8')152 153     """154     # pubkey必须为bytes类型155     pub=RSA.importKey(pubkey.encode('utf-8'))156     #构造“加密器”157     encryptor=PKCS1_v1_5.new(pub)158     #加密的内容必须为bytes类型159     encript_passwd =encryptor.encrypt(password.encode('utf-8'))160     return base64.b64encode(encript_passwd).decode('utf-8')161 162 def login(token, gid, callback, rsakey, username, password):163     post_data = {164         'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html',165         'charset': 'utf-8',166         'token': token,167         'tpl': 'netdisk',168         'subpro': 'netdisk_web',169         'apiver': 'v3',170         'tt': _get_curtime(),171         'codestring': '',172         'safeflg': 0,173         'u': 'http://pan.baidu.com/disk/home',174         'isPhone': '',175         'detect': 1,176         'gid': gid,177         'quick_user': 0,178         'logintype': 'basicLogin',179         'logLoginType': 'pc_loginBasic',180         'idc': '',181         'loginmerge': 'true',182         'foreignusername': '',183         'username': username,184         'password': password,185         'mem_pass': 'on',186         # 返回的key187         'rsakey': rsakey,188         'crypttype': 12,189         'ppui_logintime': 33554,190         'countrycode': '',191         'callback': 'parent.'+callback192     }193     resp = session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data, headers=headers)194     if 'err_no=0' in resp.text:195         print('登录成功')196     else:197         print('登录失败')198 def progressbar(size=None, progress=None,progress_title="已完成",finish_title="全部完成"):199     #size：文件总字节数 progress：当前传输完成字节数200     # print("{0} / {1}".format(size, progress))201     if progress
     
      >> headers = {'Range': 'bytes=0-99'}336       >>> pcs = PCS('username','password')337       >>> pcs.download('/test_sdk/test.txt', headers=headers)338     :param remote_path: 网盘中文件的路径（包含文件名）。339                         必须以 / 开头。340                         .. warning::341                             * 路径长度限制为1000；342                             * 径中不能包含以下字符：``\\\\ ? | " > < : *``；343                             * 文件名或路径名开头结尾不能是 ``.``344                               或空白字符，空白字符包括：345                               ``\\r, \\n, \\t, 空格, \\0, \\x0B`` 。346     :return: requests.Response 对象347     """348     params = {349             'method': 'download',350             'app_id': "250528",351             'BDUSS': session.cookies['BDUSS'],352             't': str(int(time.time())),353             'bdstoken': token,354             'path':remote_path355         }356     # 兼容原有域名pcs.baidu.com；使用新域名d.pcs.baidu.com，则提供更快、更稳定的下载服务357     url = 'https://{0}/rest/2.0/pcs/file'.format('d.pcs.baidu.com')358     baibupan_header = {
   "Referer": "http://pan.baidu.com/disk/home",359                 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}360     header = dict(baibupan_header.items())361     # print(headers)362     # header.update({'Range': 'bytes=0-1024'}) #返回1KB内容363     # response = session.get(url, params=params, verify=False, headers=header)364     # print(response.headers)365     # print(response.headers['content-length'])366     with closing(session.get(url, params=params, verify=False, headers=header,stream=True)) as response:367         chunk_size=1024 #单次请求最大值368         count=1369         total_size=int(response.headers['content-length']) #内容体总大小370         with open(file_path,'wb') as file:371             for data in response.iter_content(chunk_size=chunk_size):372                 file.write(data)373                 progressbar(size=total_size,progress=count*chunk_size,progress_title="正在下载",finish_title="下载完成")374                 count=count+1375 376     """377     通过断点续传一点一点下载378     start=0379     stop=1023380     while True:381         chunk_size='bytes={0}-{1}'.format(start,stop)382         header.update({'Range': chunk_size}) #返回1KB内容383         response = session.get(url, params=params, verify=False, headers=header)384         # print(response.apparent_encoding)385         if response.content:386             with open(file_path,'ab') as file:387                 file.write(response.content)388             start=start+1024389             stop=stop+1024390         else:391             break392 393     """394 395 def get_filesize(rote_path,token):396     """获得文件(s)的meta397     :param rote_path: 文件路径,如 '/aaa.txt'398     """399     params = {400             'method': 'meta',401             'app_id': "250528",402             'BDUSS': session.cookies['BDUSS'],403             't': str(int(time.time())),404             'bdstoken': token,405             'path':rote_path406         }407     # url="https://pcs.baidu.com/rest/2.0/pcs/file"408     url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')409     # api = '%s?%s' % (url, urlencode(params))410     baibupan_header = {
   "Referer": "http://pan.baidu.com/disk/home",411                 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}412     header = dict(baibupan_header.items())413     # print(headers)414     response = session.get(url,params=params,verify=False, headers=header)415     return response416 417 def meta(file_list,token):418     """获得文件(s)的metainfo419 420     :param file_list: 文件路径列表,如 ['/aaa.txt']421     :type file_list: list422 423     :return: requests.Response424         .. note ::425         示例426 427         * 文件不存在428 429         {"errno":12,"info":[{"errno":-9}],"request_id":3294861771}430 431         * 文件存在432         {433             "errno": 0,434 435             "info": [436 437                 {438 439                     "fs_id": 文件id,440 441                     "path": "\/\u5c0f\u7c73\/mi2s\u5237recovery.rar",442 443                     "server_filename": "mi2s\u5237recovery.rar",444 445                     "size": 8292134,446 447                     "server_mtime": 1391274570,448 449                     "server_ctime": 1391274570,450 451                     "local_mtime": 1391274570,452 453                     "local_ctime": 1391274570,454 455                     "isdir": 0,456 457                     "category": 6,458 459                     "path_md5": 279827390796736883,460 461                     "delete_fs_id": 0,462 463                     "object_key": "84221121-2193956150-1391274570512754",464 465                     "block_list": [466                         "76b469302a02b42fd0a548f1a50dd8ac"467                     ],468 469                     "md5": "76b469302a02b42fd0a548f1a50dd8ac",470 471                     "errno": 0472 473                 }474 475             ],476 477             "request_id": 2964868977478 479         }480 481     """482     if not isinstance(file_list, list):483         file_list = [file_list]484     data = {
   'target': json.dumps(file_list)}485     params = {486             'method': 'filemetas',487             'app_id': "250528",488             'BDUSS': session.cookies['BDUSS'],489             't': str(int(time.time())),490             'bdstoken': token491         }492     print(token)493     baibupan_header = {
   "Referer": "http://pan.baidu.com/disk/home",494                 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}495     header = dict(baibupan_header.items())496     uri='filemetas?blocks=0&dlink=1'497     url='http://pan.baidu.com/api/{0}'.format(uri)498     print(url)499     if '?' in url:500         api = "%s&%s" % (url, urlencode(params))501     else:502         api = '%s?%s' % (url, urlencode(params))503     print(api)504     print(data)505     response=session.post(api,data=data,verify=False,headers=header)506     return response507     # return self._request('filemetas?blocks=0&dlink=1', 'filemetas', data=data, **kwargs)508 509 if __name__ == '__main__':510     user='xxx'511     password='xxx'512 513     cur_gid = get_gid()514     cur_callback = get_callback()515     cur_token = get_token(cur_gid, cur_callback)516     # print("token:%s" %(cur_token))517     cur_pubkey, cur_key = get_rsa_key(cur_token, cur_gid, cur_callback)518     encript_password = encript_password(password, cur_pubkey)519     login(cur_token, cur_gid, cur_callback, cur_key, user, encript_password)520     # print("cookies:%s" %(session.cookies['BDUSS']))521 522     res=upload("/hello/word.py",open("test_BaiduPan.py",'rb'),cur_token,callback=progressbar)523     print(res.content.decode('utf-8'))524 525 526     # res=rapidupload("/hello/traindata.js",open("login.js",'rb'),cur_token,callback=progressbar)527     # print(json.loads(res.content.decode('utf-8')))528 529 530     # download("/hello/words.txt","word.txt",cur_token)531     # print(res.content.decode('utf-8'))532 533     # res=get_filesize("/hello/words",cur_token)534     # print(res.content.decode('utf-8'))535 536     # res=meta("/hello/words.txt",cur_token)537     # print(res.content)

1 #-*- coding:utf-8 -*-  2 __author__ = 'Administrator'  3   4 import time  5 import json  6 import re  7 import requests  8 import execjs  9 import base64 10 from urllib.parse import urlencode 11 from requests_toolbelt import MultipartEncoder 12 from Crypto.Cipher import PKCS1_v1_5 13 from Crypto.PublicKey import RSA 14 from hashlib import md5 15 from zlib import crc32 16 # import progressbar 17 import sys 18 from contextlib import closing 19 import time 20 import os 21 from io import BytesIO 22  23 try: 24     requests.packages.urllib3.disable_warnings() 25 except: 26     pass 27  28 # class BufferReader(MultipartEncoder): 29 #     """将multipart-formdata转化为stream形式的Proxy类 30 #     """ 31 #     def __init__(self, fields, boundary=None, callback=None, cb_args=(), cb_kwargs=None): 32 #         self._callback = callback 33 #         self._progress = 0 34 #         self._cb_args = cb_args 35 #         self._cb_kwargs = cb_kwargs or {} 36 #         super(BufferReader, self).__init__(fields, boundary) 37 # 38 #     def read(self, size=None): 39 #         chunk = super(BufferReader, self).read(size) 40 #         self._progress += int(len(chunk)) 41 #         self._cb_kwargs.update({
     42 #             'size': self._len, 43 #             'progress': self._progress 44 #         }) 45 #         if self._callback: 46 #             try: 47 #                 self._callback(*self._cb_args, **self._cb_kwargs) 48 #             except:  # catches exception from the callback 49 #                 # raise CancelledError('The upload was cancelled.') 50 #                 pass 51 #         return chunk 52  53 class BufferReader(BytesIO): 54     """ 55     """ 56     def __init__(self, filebytes, callback=None): 57         self._callback = callback 58         self._progress = 0 59         self._size =len(filebytes) 60         super(BufferReader, self).__init__(filebytes) 61  62     def read(self, size=-1): 63         chunk_size=8192 64         chunk = BytesIO.read(self，chunk_size) 65         self._progress += int(len(chunk)) 66         if self._callback: 67             self._callback(self._size,self._progress) 68         return chunk 69  70 class PCSBase(): 71     def __init__(self,username,password): 72         self.session=requests.session() 73         self.headers = {
   'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 ' 74                          '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', 75            } 76         self.session.get('https://pan.baidu.com', headers=self.headers) 77         self.username=username 78         self.password=password 79         self.user={} 80         self.cur_gid=self.get_gid() 81         self.cur_callback=self.get_callback() 82         self.cur_time=self._get_curtime() 83         self._initiate()#登录成功，并获取session.cookies 84  85     def _initiate(self): 86         self.user['token']= self.get_token() 87         # print("token:%s" %(self.get_token())) 88         self.login() 89         # print("cookies:%s" %(session.cookies['BDUSS'])) 90     def _get_runntime(self): 91         """ 92         :param path: 加密js的路径,注意js中不要使用中文！估计是pyexecjs处理中文还有一些问题 93         :return: 编译后的js环境，不清楚pyexecjs这个库的用法的请在github上查看相关文档 94         """ 95         phantom = execjs.get()  # 这里必须为phantomjs设置环境变量，否则可以写phantomjs的具体路径 96         with open('login.js', 'r') as f: 97             source = f.read() 98         return phantom.compile(source) 99 100     def get_gid(self):101         return self._get_runntime().call('getGid')102 103     def get_callback(self):104         return self._get_runntime().call('getCallback')105 106     def _get_curtime(self):107         return int(time.time()*1000)108         # 抓包也不是百分百可靠啊,这里?getapi一定要挨着https://passport.baidu.com/v2/api/写，才会到正确的路由109     def get_token(self):110         get_data = {111             'tpl': 'netdisk',112             'subpro': 'netdisk_web',113             'apiver': 'v3',114             'tt':self.cur_time,115             'class': 'login',116             'gid': self.cur_gid,117             'logintype': 'basicLogin',118             'callback': self.cur_callback119         }120         self.headers.update(dict(Referer='http://pan.baidu.com/', Accept='*/*', Connection='keep-alive', Host='passport.baidu.com'))121         resp = self.session.get(url='https://passport.baidu.com/v2/api/?getapi', params=get_data, headers=self.headers)122         if resp.status_code == 200 and self.cur_callback in resp.text:123             # 如果json字符串中带有单引号，会解析出错，只有统一成双引号才可以正确的解析124             #data = eval(re.search(r'.*?\((.*)\)', resp.text).group(1))125             data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"'))126             return data.get('data').get('token')127         else:128             print('获取token失败')129             return None130 131     def get_rsa_key(self):132         get_data = {133             'token': self.user['token'],134             'tpl': 'netdisk',135             'subpro': 'netdisk_web',136             'apiver': 'v3',137             'tt': self.cur_time,138             'gid': self.cur_gid,139             'callback': self.cur_callback140         }141         resp = self.session.get(url='https://passport.baidu.com/v2/getpublickey', headers=self.headers, params=get_data)142         if resp.status_code == 200 and self.cur_callback in resp.text:143             data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"'))144             return data.get('pubkey'), data.get('key')145         else:146             print('获取rsa key失败')147             return None148 149     def encript_password(self,pubkey):150         """151         import rsa152         使用rsa库加密（法一）153         pub = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey.encode('utf-8'))154         encript_passwd = rsa.encrypt(password.encode('utf-8'), pub)155         return base64.b64encode(encript_passwd).decode('utf-8')156 157         """158         # pubkey必须为bytes类型159         pub=RSA.importKey(pubkey.encode('utf-8'))160         #构造“加密器”161         encryptor=PKCS1_v1_5.new(pub)162         #加密的内容必须为bytes类型163         encript_passwd =encryptor.encrypt(self.password.encode('utf-8'))164         return base64.b64encode(encript_passwd).decode('utf-8')165 166     def login(self):167         cur_pubkey, cur_key = self.get_rsa_key()168         encript_password =self.encript_password(cur_pubkey)169         post_data = {170             'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html',171             'charset': 'utf-8',172             'token': self.user['token'],173             'tpl': 'netdisk',174             'subpro': 'netdisk_web',175             'apiver': 'v3',176             'tt': self.cur_time,177             'codestring': '',178             'safeflg': 0,179             'u': 'http://pan.baidu.com/disk/home',180             'isPhone': '',181             'detect': 1,182             'gid': self.cur_gid,183             'quick_user': 0,184             'logintype': 'basicLogin',185             'logLoginType': 'pc_loginBasic',186             'idc': '',187             'loginmerge': 'true',188             'foreignusername': '',189             'username': self.username,190             'password': encript_password,191             'mem_pass': 'on',192             # 返回的key193             'rsakey': cur_key,194             'crypttype': 12,195             'ppui_logintime': 33554,196             'countrycode': '',197             'callback': 'parent.'+self.cur_callback198         }199         resp = self.session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data, headers=self.headers)200         if 'err_no=0' in resp.text:201             print('登录成功')202             self.user['BDUSS'] = self.session.cookies['BDUSS']203         else:204             print('登录失败')205             self.user['BDUSS']=None206 207     def _request(self,url,data=None,files=None,extra_params=None,callback=None):208         params={209             'app_id': "250528",210             'BDUSS': self.user['BDUSS'],211             't': str(int(time.time())),212             'bdstoken': self.user['token']213         }214         if extra_params:215             params.update(extra_params)216         # print("params:%s" %params)217         baibupan_header = {
   "Referer": "http://pan.baidu.com/disk/home",218                     "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}219         header= dict(baibupan_header.items())220         if data or files:221             api = '%s?%s' % (url, urlencode(params))222             # print("api:%s" %api)223             if data:224                 res=self.session.post(api,data=data,verify=False, headers=header)225                 return res226             else:227                 # print(callback==None)228                 (filedata,contenttype)=requests.packages.urllib3.filepost.encode_multipart_formdata(files)                     body=BufferReader(filedata,callback=callback) 229                 # print("body:%s" %type(body))230                 header.update({231                     "Content-Type": contenttype232                 })233                 # print("header:%s" %header)234                 res=self.session.post(api,data=body,verify=False, headers=header)235                 return res236         else:237             res=self.session.get(url,params=params,verify=False, headers=header,stream=True)238             return  res239 240 class PCS(PCSBase):241     def __init__(self,username,password):242         self.username=username243         self.password=password244         super(PCS,self).__init__(self.username,self.password)245 246     def upload(self,remote_path,file_handler,callback=None):247         params={248             'method': 'upload',249             'path': remote_path,250             'ondup': "newcopy"251         }252         files = {
   'file': (str(int(time.time())), file_handler)}253         url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')254         response=self._request(url,files=files,extra_params=params,callback=callback)255         return response256 257     def rapid_upload(self,remote_path,file_handler,callback=None):258         params={259             'method':"rapidupload",260             'path':remote_path,261             'ondup':"newcopy"262         }263         url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')264         file_handler.seek(0, 2)265         _BLOCK_SIZE = 2 ** 20  #1MB大小266         # print(_BLOCK_SIZE)267         content_length = file_handler.tell()268         # print(content_length)269         file_handler.seek(0)270 271         # 校验段为前 256KB272         first_256bytes = file_handler.read(256 * 1024)273         slice_md5 = md5(first_256bytes).hexdigest()274 275         content_crc32 = crc32(first_256bytes).conjugate()276         content_md5 = md5(first_256bytes)277 278         count=1279         while True:280             block = file_handler.read(_BLOCK_SIZE)281             if callback:282                 callback(size=content_length,progress=count*_BLOCK_SIZE)283             count=count+1284             if not block:285                 break286             # 更新crc32和md5校验值287             content_crc32 = crc32(block, content_crc32).conjugate()288             content_md5.update(block)289         data = {290                 'content-length': content_length,291                 'content-md5': content_md5.hexdigest(),292                 'slice-md5': slice_md5,293                 'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF)294                 }295         response=self._request(url,data=data,extra_params=params,callback=callback)296         return response297 298     def download(self,remote_path,local_path,callback=None):299         params={300             'method':"download",301             'path':remote_path302         }303         # 兼容原有域名pcs.baidu.com；使用新域名d.pcs.baidu.com，则提供更快、更稳定的下载服务304         url = 'https://{0}/rest/2.0/pcs/file'.format('d.pcs.baidu.com')305         with closing(self._request(url, extra_params=params)) as response:306             chunk_size=1024 #单次请求最大值307             count=1308             total_size=int(response.headers['content-length']) #内容体总大小309             with open(local_path,'wb') as file:310                 for data in response.iter_content(chunk_size=chunk_size):311                     file.write(data)312                     self.progressbar(size=total_size,progress=count*chunk_size,progress_title="正在下载",finish_title="下载完成")313                     count=count+1314 315     def progressbar(self,size=None, progress=None,progress_title="正在上传",finish_title="上传完成"):316         #size：文件总字节数 progress：当前传输完成字节数317         # print("{0} / {1}".format(size, progress))318         if progress

转载于:https://www.cnblogs.com/yizhenfeng168/p/7067966.html

你可能感兴趣的文章

了解你的Linux系统：必须掌握的20个命令

查看>>

js setInterval 启用&停止

查看>>

knockoutJS学习笔记04：监控属性

查看>>