博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
python爬虫:登录百度账户,并上传文件到百度云盘
阅读量:4356 次
发布时间:2019-06-07

本文共 36527 字,大约阅读时间需要 121 分钟。

login.js文件:

/** * Created by resolvewang on 2017/4/15. */function getGid() {    return "xxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (e) {        var t = 16 * Math.random() | 0, n = "x" == e ? t : 3 & t | 8;        return n.toString(16)    }).toUpperCase()}function  getCallback() {    return "bd__cbs__" + Math.floor(2147483648 * Math.random()).toString(36)}

Pyhton实现代码:

#-*- coding:utf-8 -*-__author__ = 'Administrator'import timeimport jsonimport reimport requestsimport execjsimport base64from urllib.parse import urlencodefrom requests_toolbelt import MultipartEncoderfrom Crypto.Cipher import PKCS1_v1_5from Crypto.PublicKey import RSAfrom hashlib import md5from zlib import crc32try:    requests.packages.urllib3.disable_warnings()except:    passheaders = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', }# 全局的sessionsession = requests.session()session.get('https://pan.baidu.com', headers=headers)class BufferReader(MultipartEncoder): """将multipart-formdata转化为stream形式的Proxy类 """ def __init__(self, fields, boundary=None, callback=None, cb_args=(), cb_kwargs=None): self._callback = callback self._progress = 0 self._cb_args = cb_args self._cb_kwargs = cb_kwargs or {} super(BufferReader, self).__init__(fields, boundary) def read(self, size=None): chunk = super(BufferReader, self).read(size) self._progress += int(len(chunk)) self._cb_kwargs.update({ 'size': self._len, 'progress': self._progress }) if self._callback: try: self._callback(*self._cb_args, **self._cb_kwargs) except: # catches exception from the callback # raise CancelledError('The upload was cancelled.') pass return chunkdef _get_runntime(): """ :param path: 加密js的路径,注意js中不要使用中文!估计是pyexecjs处理中文还有一些问题 :return: 编译后的js环境,不清楚pyexecjs这个库的用法的请在github上查看相关文档 """ phantom = execjs.get() # 这里必须为phantomjs设置环境变量,否则可以写phantomjs的具体路径 with open('login.js', 'r') as f: source = f.read() return phantom.compile(source)def get_gid(): return _get_runntime().call('getGid')def get_callback(): return _get_runntime().call('getCallback')def _get_curtime(): return int(time.time()*1000)# 抓包也不是百分百可靠啊,这里?getapi一定要挨着https://passport.baidu.com/v2/api/写,才会到正确的路由def get_token(gid, callback): cur_time = _get_curtime() get_data = { 'tpl': 'netdisk', 'subpro': 'netdisk_web', 'apiver': 'v3', 'tt': cur_time, 'class': 'login', 'gid': gid, 'logintype': 'basicLogin', 'callback': callback } headers.update(dict(Referer='http://pan.baidu.com/', Accept='*/*', Connection='keep-alive', Host='passport.baidu.com')) resp = session.get(url='https://passport.baidu.com/v2/api/?getapi', params=get_data, headers=headers) if resp.status_code == 200 and callback in resp.text: # 如果json字符串中带有单引号,会解析出错,只有统一成双引号才可以正确的解析 #data = eval(re.search(r'.*?\((.*)\)', resp.text).group(1)) data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"')) return data.get('data').get('token') else: print('获取token失败') return Nonedef get_rsa_key(token, gid, callback): cur_time = _get_curtime() get_data = { 'token': token, 'tpl': 'netdisk', 'subpro': 'netdisk_web', 'apiver': 'v3', 'tt': cur_time, 'gid': gid, 'callback': callback, } resp = session.get(url='https://passport.baidu.com/v2/getpublickey', headers=headers, params=get_data) if resp.status_code == 200 and callback in resp.text: data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"')) return data.get('pubkey'), data.get('key') else: print('获取rsa key失败') return Nonedef encript_password(password, pubkey): """ import rsa 使用rsa库加密(法一) pub = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey.encode('utf-8')) encript_passwd = rsa.encrypt(password.encode('utf-8'), pub) return base64.b64encode(encript_passwd).decode('utf-8') """ # pubkey必须为bytes类型 pub=RSA.importKey(pubkey.encode('utf-8')) #构造“加密器” encryptor=PKCS1_v1_5.new(pub) #加密的内容必须为bytes类型 encript_passwd =encryptor.encrypt(password.encode('utf-8')) return base64.b64encode(encript_passwd).decode('utf-8')def login(token, gid, callback, rsakey, username, password): post_data = { 'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html', 'charset': 'utf-8', 'token': token, 'tpl': 'netdisk', 'subpro': 'netdisk_web', 'apiver': 'v3', 'tt': _get_curtime(), 'codestring': '', 'safeflg': 0, 'u': 'http://pan.baidu.com/disk/home', 'isPhone': '', 'detect': 1, 'gid': gid, 'quick_user': 0, 'logintype': 'basicLogin', 'logLoginType': 'pc_loginBasic', 'idc': '', 'loginmerge': 'true', 'foreignusername': '', 'username': username, 'password': password, 'mem_pass': 'on', # 返回的key 'rsakey': rsakey, 'crypttype': 12, 'ppui_logintime': 33554, 'countrycode': '', 'callback': 'parent.'+callback } resp = session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data, headers=headers) if 'err_no=0' in resp.text: print('登录成功') else: print('登录失败')def upload(dest_path,file_handle,token): params = { 'method': 'upload', 'app_id': "250528", 'BDUSS': session.cookies['BDUSS'], 't': str(int(time.time())), 'bdstoken': token, 'path': dest_path, 'ondup': "newcopy" } # print(params) files = {
'file': (str(int(time.time())), file_handle)} url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com') api = '%s?%s' % (url, urlencode(params)) # print(api) body = BufferReader(files) # print(body) baibupan_header = {
"Referer": "http://pan.baidu.com/disk/home", "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"} header = dict(baibupan_header.items()) # print(headers) header.update({
"Content-Type": body.content_type}) response = session.post(api, data=body, verify=False, headers=header) return responsedef rapidupload(dest_path,file_handler,token): """秒传一个文件 :param file_handler: 文件handler, e.g. open('file','rb') :type file_handler: file :param dest_path: 上传到服务器的路径,包含文件名 :type dest_path: str :return: requests.Response .. note:: * 文件已在服务器上存在,不上传,返回示例 { "path" : "/apps/album/1.jpg", "size" : 372121, "ctime" : 1234567890, "mtime" : 1234567890, "md5" : "cb123afcc12453543ef", "fs_id" : 12345, "isdir" : 0, "request_id" : 12314124 } * 文件不存在,需要上传 {"errno":404,"info":[],"request_id":XXX} * 文件大小不足 256kb (slice-md5 == content-md5) 时 {"errno":2,"info":[],"request_id":XXX} * 远程文件已存在 {"errno":-8,"info":[],"request_id":XXX} """ file_handler.seek(0, 2) _BLOCK_SIZE = 2 ** 20 content_length = file_handler.tell() file_handler.seek(0) # 校验段为前 256KB first_256bytes = file_handler.read(256 * 1024) slice_md5 = md5(first_256bytes).hexdigest() content_crc32 = crc32(first_256bytes).conjugate() content_md5 = md5(first_256bytes) while True: block = file_handler.read(_BLOCK_SIZE) if not block: break # 更新crc32和md5校验值 content_crc32 = crc32(block, content_crc32).conjugate() content_md5.update(block) params = { 'method': 'rapidupload', 'app_id': "250528", 'BDUSS': session.cookies['BDUSS'], 't': str(int(time.time())), 'bdstoken': token, 'path': dest_path, 'ondup': "newcopy" } data = { 'content-length': content_length, 'content-md5': content_md5.hexdigest(), 'slice-md5': slice_md5, 'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF) } baibupan_header = {
"Referer": "http://pan.baidu.com/disk/home", "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"} header = dict(baibupan_header.items()) url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com') api = '%s?%s' % (url, urlencode(params)) # print(api) response= session.post(api, data=data, verify=False,headers=header) return responseif __name__ == '__main__': user='xxx' #用户名 password='xxx' #密码 cur_gid = get_gid() cur_callback = get_callback() cur_token = get_token(cur_gid, cur_callback) # print("token:%s" %(cur_token)) cur_pubkey, cur_key = get_rsa_key(cur_token, cur_gid, cur_callback) encript_password = encript_password(password, cur_pubkey) login(cur_token, cur_gid, cur_callback, cur_key, user, encript_password) # print("cookies:%s" %(session.cookies['BDUSS'])) # res=upload("/hello/temp.txt",open("temp.txt",'rb'),cur_token) # print(res.content.decode('utf-8')) res=rapidupload("/hello/words.txt",open("words.txt",'rb'),cur_token) print(res.content.decode('utf-8'))

 

1 #-*- coding:utf-8 -*-  2 __author__ = 'Administrator'  3   4 import time  5 import json  6 import re  7 import requests  8 import execjs  9 import base64 10 from urllib.parse import urlencode 11 from requests_toolbelt import MultipartEncoder 12 from Crypto.Cipher import PKCS1_v1_5 13 from Crypto.PublicKey import RSA 14 from hashlib import md5 15 from zlib import crc32 16 # import progressbar 17 import sys 18 from contextlib import closing 19 import time 20 import os 21  22 try: 23     requests.packages.urllib3.disable_warnings() 24 except: 25     pass 26  27 headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 ' 28 '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', 29 } 30 31 # 全局的session 32 session = requests.session() 33 session.get('https://pan.baidu.com', headers=headers) 34 35 36 class BufferReader(MultipartEncoder): 37 """将multipart-formdata转化为stream形式的Proxy类 38 """ 39 40 def __init__(self, fields, boundary=None, callback=None, cb_args=(), cb_kwargs=None): 41 self._callback = callback 42 self._progress = 0 43 self._cb_args = cb_args 44 self._cb_kwargs = cb_kwargs or {} 45 super(BufferReader, self).__init__(fields, boundary) 46 47 def read(self, size=None): 48 chunk = super(BufferReader, self).read(size) 49 self._progress += int(len(chunk)) 50 self._cb_kwargs.update({ 51 'size': self._len, 52 'progress': self._progress 53 }) 54 if self._callback: 55 try: 56 self._callback(*self._cb_args, **self._cb_kwargs) 57 except: # catches exception from the callback 58 # raise CancelledError('The upload was cancelled.') 59 pass 60 return chunk 61 62 class ProgressBar(): 63 """ 64 import progressbar 65 使用第三方库显示上传进度 66 67 """ 68 def __init__(self): 69 self.first_call = True 70 def __call__(self, *args, **kwargs): 71 if self.first_call: 72 self.widgets = [progressbar.Percentage(), ' ', progressbar.Bar(marker=progressbar.RotatingMarker('>')), 73 ' ', progressbar.FileTransferSpeed()] 74 self.pbar = progressbar.ProgressBar(widgets=self.widgets, maxval=kwargs['size']).start() 75 self.first_call = False 76 77 if kwargs['size'] <= kwargs['progress']: 78 self.pbar.finish() 79 else: 80 self.pbar.update(kwargs['progress']) 81 82 83 def _get_runntime(): 84 """ 85 :param path: 加密js的路径,注意js中不要使用中文!估计是pyexecjs处理中文还有一些问题 86 :return: 编译后的js环境,不清楚pyexecjs这个库的用法的请在github上查看相关文档 87 """ 88 phantom = execjs.get() # 这里必须为phantomjs设置环境变量,否则可以写phantomjs的具体路径 89 with open('login.js', 'r') as f: 90 source = f.read() 91 return phantom.compile(source) 92 93 def get_gid(): 94 return _get_runntime().call('getGid') 95 96 def get_callback(): 97 return _get_runntime().call('getCallback') 98 99 def _get_curtime():100 return int(time.time()*1000)101 102 # 抓包也不是百分百可靠啊,这里?getapi一定要挨着https://passport.baidu.com/v2/api/写,才会到正确的路由103 def get_token(gid, callback):104 cur_time = _get_curtime()105 get_data = {106 'tpl': 'netdisk',107 'subpro': 'netdisk_web',108 'apiver': 'v3',109 'tt': cur_time,110 'class': 'login',111 'gid': gid,112 'logintype': 'basicLogin',113 'callback': callback114 }115 headers.update(dict(Referer='http://pan.baidu.com/', Accept='*/*', Connection='keep-alive', Host='passport.baidu.com'))116 resp = session.get(url='https://passport.baidu.com/v2/api/?getapi', params=get_data, headers=headers)117 if resp.status_code == 200 and callback in resp.text:118 # 如果json字符串中带有单引号,会解析出错,只有统一成双引号才可以正确的解析119 #data = eval(re.search(r'.*?\((.*)\)', resp.text).group(1))120 data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"'))121 return data.get('data').get('token')122 else:123 print('获取token失败')124 return None125 126 def get_rsa_key(token, gid, callback):127 cur_time = _get_curtime()128 get_data = {129 'token': token,130 'tpl': 'netdisk',131 'subpro': 'netdisk_web',132 'apiver': 'v3',133 'tt': cur_time,134 'gid': gid,135 'callback': callback,136 }137 resp = session.get(url='https://passport.baidu.com/v2/getpublickey', headers=headers, params=get_data)138 if resp.status_code == 200 and callback in resp.text:139 data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"'))140 return data.get('pubkey'), data.get('key')141 else:142 print('获取rsa key失败')143 return None144 145 def encript_password(password, pubkey):146 """147 import rsa148 使用rsa库加密(法一)149 pub = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey.encode('utf-8'))150 encript_passwd = rsa.encrypt(password.encode('utf-8'), pub)151 return base64.b64encode(encript_passwd).decode('utf-8')152 153 """154 # pubkey必须为bytes类型155 pub=RSA.importKey(pubkey.encode('utf-8'))156 #构造“加密器”157 encryptor=PKCS1_v1_5.new(pub)158 #加密的内容必须为bytes类型159 encript_passwd =encryptor.encrypt(password.encode('utf-8'))160 return base64.b64encode(encript_passwd).decode('utf-8')161 162 def login(token, gid, callback, rsakey, username, password):163 post_data = {164 'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html',165 'charset': 'utf-8',166 'token': token,167 'tpl': 'netdisk',168 'subpro': 'netdisk_web',169 'apiver': 'v3',170 'tt': _get_curtime(),171 'codestring': '',172 'safeflg': 0,173 'u': 'http://pan.baidu.com/disk/home',174 'isPhone': '',175 'detect': 1,176 'gid': gid,177 'quick_user': 0,178 'logintype': 'basicLogin',179 'logLoginType': 'pc_loginBasic',180 'idc': '',181 'loginmerge': 'true',182 'foreignusername': '',183 'username': username,184 'password': password,185 'mem_pass': 'on',186 # 返回的key187 'rsakey': rsakey,188 'crypttype': 12,189 'ppui_logintime': 33554,190 'countrycode': '',191 'callback': 'parent.'+callback192 }193 resp = session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data, headers=headers)194 if 'err_no=0' in resp.text:195 print('登录成功')196 else:197 print('登录失败')198 def progressbar(size=None, progress=None,progress_title="已完成",finish_title="全部完成"):199 #size:文件总字节数 progress:当前传输完成字节数200 # print("{0} / {1}".format(size, progress))201 if progress
>> headers = {'Range': 'bytes=0-99'}336 >>> pcs = PCS('username','password')337 >>> pcs.download('/test_sdk/test.txt', headers=headers)338 :param remote_path: 网盘中文件的路径(包含文件名)。339 必须以 / 开头。340 .. warning::341 * 路径长度限制为1000;342 * 径中不能包含以下字符:``\\\\ ? | " > < : *``;343 * 文件名或路径名开头结尾不能是 ``.``344 或空白字符,空白字符包括:345 ``\\r, \\n, \\t, 空格, \\0, \\x0B`` 。346 :return: requests.Response 对象347 """348 params = {349 'method': 'download',350 'app_id': "250528",351 'BDUSS': session.cookies['BDUSS'],352 't': str(int(time.time())),353 'bdstoken': token,354 'path':remote_path355 }356 # 兼容原有域名pcs.baidu.com;使用新域名d.pcs.baidu.com,则提供更快、更稳定的下载服务357 url = 'https://{0}/rest/2.0/pcs/file'.format('d.pcs.baidu.com')358 baibupan_header = {
"Referer": "http://pan.baidu.com/disk/home",359 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}360 header = dict(baibupan_header.items())361 # print(headers)362 # header.update({'Range': 'bytes=0-1024'}) #返回1KB内容363 # response = session.get(url, params=params, verify=False, headers=header)364 # print(response.headers)365 # print(response.headers['content-length'])366 with closing(session.get(url, params=params, verify=False, headers=header,stream=True)) as response:367 chunk_size=1024 #单次请求最大值368 count=1369 total_size=int(response.headers['content-length']) #内容体总大小370 with open(file_path,'wb') as file:371 for data in response.iter_content(chunk_size=chunk_size):372 file.write(data)373 progressbar(size=total_size,progress=count*chunk_size,progress_title="正在下载",finish_title="下载完成")374 count=count+1375 376 """377 通过断点续传一点一点下载378 start=0379 stop=1023380 while True:381 chunk_size='bytes={0}-{1}'.format(start,stop)382 header.update({'Range': chunk_size}) #返回1KB内容383 response = session.get(url, params=params, verify=False, headers=header)384 # print(response.apparent_encoding)385 if response.content:386 with open(file_path,'ab') as file:387 file.write(response.content)388 start=start+1024389 stop=stop+1024390 else:391 break392 393 """394 395 def get_filesize(rote_path,token):396 """获得文件(s)的meta397 :param rote_path: 文件路径,如 '/aaa.txt'398 """399 params = {400 'method': 'meta',401 'app_id': "250528",402 'BDUSS': session.cookies['BDUSS'],403 't': str(int(time.time())),404 'bdstoken': token,405 'path':rote_path406 }407 # url="https://pcs.baidu.com/rest/2.0/pcs/file"408 url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')409 # api = '%s?%s' % (url, urlencode(params))410 baibupan_header = {
"Referer": "http://pan.baidu.com/disk/home",411 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}412 header = dict(baibupan_header.items())413 # print(headers)414 response = session.get(url,params=params,verify=False, headers=header)415 return response416 417 def meta(file_list,token):418 """获得文件(s)的metainfo419 420 :param file_list: 文件路径列表,如 ['/aaa.txt']421 :type file_list: list422 423 :return: requests.Response424 .. note ::425 示例426 427 * 文件不存在428 429 {"errno":12,"info":[{"errno":-9}],"request_id":3294861771}430 431 * 文件存在432 {433 "errno": 0,434 435 "info": [436 437 {438 439 "fs_id": 文件id,440 441 "path": "\/\u5c0f\u7c73\/mi2s\u5237recovery.rar",442 443 "server_filename": "mi2s\u5237recovery.rar",444 445 "size": 8292134,446 447 "server_mtime": 1391274570,448 449 "server_ctime": 1391274570,450 451 "local_mtime": 1391274570,452 453 "local_ctime": 1391274570,454 455 "isdir": 0,456 457 "category": 6,458 459 "path_md5": 279827390796736883,460 461 "delete_fs_id": 0,462 463 "object_key": "84221121-2193956150-1391274570512754",464 465 "block_list": [466 "76b469302a02b42fd0a548f1a50dd8ac"467 ],468 469 "md5": "76b469302a02b42fd0a548f1a50dd8ac",470 471 "errno": 0472 473 }474 475 ],476 477 "request_id": 2964868977478 479 }480 481 """482 if not isinstance(file_list, list):483 file_list = [file_list]484 data = {
'target': json.dumps(file_list)}485 params = {486 'method': 'filemetas',487 'app_id': "250528",488 'BDUSS': session.cookies['BDUSS'],489 't': str(int(time.time())),490 'bdstoken': token491 }492 print(token)493 baibupan_header = {
"Referer": "http://pan.baidu.com/disk/home",494 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}495 header = dict(baibupan_header.items())496 uri='filemetas?blocks=0&dlink=1'497 url='http://pan.baidu.com/api/{0}'.format(uri)498 print(url)499 if '?' in url:500 api = "%s&%s" % (url, urlencode(params))501 else:502 api = '%s?%s' % (url, urlencode(params))503 print(api)504 print(data)505 response=session.post(api,data=data,verify=False,headers=header)506 return response507 # return self._request('filemetas?blocks=0&dlink=1', 'filemetas', data=data, **kwargs)508 509 if __name__ == '__main__':510 user='xxx'511 password='xxx'512 513 cur_gid = get_gid()514 cur_callback = get_callback()515 cur_token = get_token(cur_gid, cur_callback)516 # print("token:%s" %(cur_token))517 cur_pubkey, cur_key = get_rsa_key(cur_token, cur_gid, cur_callback)518 encript_password = encript_password(password, cur_pubkey)519 login(cur_token, cur_gid, cur_callback, cur_key, user, encript_password)520 # print("cookies:%s" %(session.cookies['BDUSS']))521 522 res=upload("/hello/word.py",open("test_BaiduPan.py",'rb'),cur_token,callback=progressbar)523 print(res.content.decode('utf-8'))524 525 526 # res=rapidupload("/hello/traindata.js",open("login.js",'rb'),cur_token,callback=progressbar)527 # print(json.loads(res.content.decode('utf-8')))528 529 530 # download("/hello/words.txt","word.txt",cur_token)531 # print(res.content.decode('utf-8'))532 533 # res=get_filesize("/hello/words",cur_token)534 # print(res.content.decode('utf-8'))535 536 # res=meta("/hello/words.txt",cur_token)537 # print(res.content)

 

1 #-*- coding:utf-8 -*-  2 __author__ = 'Administrator'  3   4 import time  5 import json  6 import re  7 import requests  8 import execjs  9 import base64 10 from urllib.parse import urlencode 11 from requests_toolbelt import MultipartEncoder 12 from Crypto.Cipher import PKCS1_v1_5 13 from Crypto.PublicKey import RSA 14 from hashlib import md5 15 from zlib import crc32 16 # import progressbar 17 import sys 18 from contextlib import closing 19 import time 20 import os 21 from io import BytesIO 22  23 try: 24     requests.packages.urllib3.disable_warnings() 25 except: 26     pass 27  28 # class BufferReader(MultipartEncoder): 29 #     """将multipart-formdata转化为stream形式的Proxy类 30 #     """ 31 #     def __init__(self, fields, boundary=None, callback=None, cb_args=(), cb_kwargs=None): 32 #         self._callback = callback 33 #         self._progress = 0 34 #         self._cb_args = cb_args 35 #         self._cb_kwargs = cb_kwargs or {} 36 #         super(BufferReader, self).__init__(fields, boundary) 37 # 38 #     def read(self, size=None): 39 #         chunk = super(BufferReader, self).read(size) 40 #         self._progress += int(len(chunk)) 41 #         self._cb_kwargs.update({
42 # 'size': self._len, 43 # 'progress': self._progress 44 # }) 45 # if self._callback: 46 # try: 47 # self._callback(*self._cb_args, **self._cb_kwargs) 48 # except: # catches exception from the callback 49 # # raise CancelledError('The upload was cancelled.') 50 # pass 51 # return chunk 52 53 class BufferReader(BytesIO): 54 """ 55 """ 56 def __init__(self, filebytes, callback=None): 57 self._callback = callback 58 self._progress = 0 59 self._size =len(filebytes) 60 super(BufferReader, self).__init__(filebytes) 61 62 def read(self, size=-1): 63 chunk_size=8192 64 chunk = BytesIO.read(self,chunk_size) 65 self._progress += int(len(chunk)) 66 if self._callback: 67 self._callback(self._size,self._progress) 68 return chunk 69 70 class PCSBase(): 71 def __init__(self,username,password): 72 self.session=requests.session() 73 self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 ' 74 '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', 75 } 76 self.session.get('https://pan.baidu.com', headers=self.headers) 77 self.username=username 78 self.password=password 79 self.user={} 80 self.cur_gid=self.get_gid() 81 self.cur_callback=self.get_callback() 82 self.cur_time=self._get_curtime() 83 self._initiate()#登录成功,并获取session.cookies 84 85 def _initiate(self): 86 self.user['token']= self.get_token() 87 # print("token:%s" %(self.get_token())) 88 self.login() 89 # print("cookies:%s" %(session.cookies['BDUSS'])) 90 def _get_runntime(self): 91 """ 92 :param path: 加密js的路径,注意js中不要使用中文!估计是pyexecjs处理中文还有一些问题 93 :return: 编译后的js环境,不清楚pyexecjs这个库的用法的请在github上查看相关文档 94 """ 95 phantom = execjs.get() # 这里必须为phantomjs设置环境变量,否则可以写phantomjs的具体路径 96 with open('login.js', 'r') as f: 97 source = f.read() 98 return phantom.compile(source) 99 100 def get_gid(self):101 return self._get_runntime().call('getGid')102 103 def get_callback(self):104 return self._get_runntime().call('getCallback')105 106 def _get_curtime(self):107 return int(time.time()*1000)108 # 抓包也不是百分百可靠啊,这里?getapi一定要挨着https://passport.baidu.com/v2/api/写,才会到正确的路由109 def get_token(self):110 get_data = {111 'tpl': 'netdisk',112 'subpro': 'netdisk_web',113 'apiver': 'v3',114 'tt':self.cur_time,115 'class': 'login',116 'gid': self.cur_gid,117 'logintype': 'basicLogin',118 'callback': self.cur_callback119 }120 self.headers.update(dict(Referer='http://pan.baidu.com/', Accept='*/*', Connection='keep-alive', Host='passport.baidu.com'))121 resp = self.session.get(url='https://passport.baidu.com/v2/api/?getapi', params=get_data, headers=self.headers)122 if resp.status_code == 200 and self.cur_callback in resp.text:123 # 如果json字符串中带有单引号,会解析出错,只有统一成双引号才可以正确的解析124 #data = eval(re.search(r'.*?\((.*)\)', resp.text).group(1))125 data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"'))126 return data.get('data').get('token')127 else:128 print('获取token失败')129 return None130 131 def get_rsa_key(self):132 get_data = {133 'token': self.user['token'],134 'tpl': 'netdisk',135 'subpro': 'netdisk_web',136 'apiver': 'v3',137 'tt': self.cur_time,138 'gid': self.cur_gid,139 'callback': self.cur_callback140 }141 resp = self.session.get(url='https://passport.baidu.com/v2/getpublickey', headers=self.headers, params=get_data)142 if resp.status_code == 200 and self.cur_callback in resp.text:143 data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"'))144 return data.get('pubkey'), data.get('key')145 else:146 print('获取rsa key失败')147 return None148 149 def encript_password(self,pubkey):150 """151 import rsa152 使用rsa库加密(法一)153 pub = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey.encode('utf-8'))154 encript_passwd = rsa.encrypt(password.encode('utf-8'), pub)155 return base64.b64encode(encript_passwd).decode('utf-8')156 157 """158 # pubkey必须为bytes类型159 pub=RSA.importKey(pubkey.encode('utf-8'))160 #构造“加密器”161 encryptor=PKCS1_v1_5.new(pub)162 #加密的内容必须为bytes类型163 encript_passwd =encryptor.encrypt(self.password.encode('utf-8'))164 return base64.b64encode(encript_passwd).decode('utf-8')165 166 def login(self):167 cur_pubkey, cur_key = self.get_rsa_key()168 encript_password =self.encript_password(cur_pubkey)169 post_data = {170 'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html',171 'charset': 'utf-8',172 'token': self.user['token'],173 'tpl': 'netdisk',174 'subpro': 'netdisk_web',175 'apiver': 'v3',176 'tt': self.cur_time,177 'codestring': '',178 'safeflg': 0,179 'u': 'http://pan.baidu.com/disk/home',180 'isPhone': '',181 'detect': 1,182 'gid': self.cur_gid,183 'quick_user': 0,184 'logintype': 'basicLogin',185 'logLoginType': 'pc_loginBasic',186 'idc': '',187 'loginmerge': 'true',188 'foreignusername': '',189 'username': self.username,190 'password': encript_password,191 'mem_pass': 'on',192 # 返回的key193 'rsakey': cur_key,194 'crypttype': 12,195 'ppui_logintime': 33554,196 'countrycode': '',197 'callback': 'parent.'+self.cur_callback198 }199 resp = self.session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data, headers=self.headers)200 if 'err_no=0' in resp.text:201 print('登录成功')202 self.user['BDUSS'] = self.session.cookies['BDUSS']203 else:204 print('登录失败')205 self.user['BDUSS']=None206 207 def _request(self,url,data=None,files=None,extra_params=None,callback=None):208 params={209 'app_id': "250528",210 'BDUSS': self.user['BDUSS'],211 't': str(int(time.time())),212 'bdstoken': self.user['token']213 }214 if extra_params:215 params.update(extra_params)216 # print("params:%s" %params)217 baibupan_header = {
"Referer": "http://pan.baidu.com/disk/home",218 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}219 header= dict(baibupan_header.items())220 if data or files:221 api = '%s?%s' % (url, urlencode(params))222 # print("api:%s" %api)223 if data:224 res=self.session.post(api,data=data,verify=False, headers=header)225 return res226 else:227 # print(callback==None)228 (filedata,contenttype)=requests.packages.urllib3.filepost.encode_multipart_formdata(files) body=BufferReader(filedata,callback=callback) 229 # print("body:%s" %type(body))230 header.update({231 "Content-Type": contenttype232 })233 # print("header:%s" %header)234 res=self.session.post(api,data=body,verify=False, headers=header)235 return res236 else:237 res=self.session.get(url,params=params,verify=False, headers=header,stream=True)238 return res239 240 class PCS(PCSBase):241 def __init__(self,username,password):242 self.username=username243 self.password=password244 super(PCS,self).__init__(self.username,self.password)245 246 def upload(self,remote_path,file_handler,callback=None):247 params={248 'method': 'upload',249 'path': remote_path,250 'ondup': "newcopy"251 }252 files = {
'file': (str(int(time.time())), file_handler)}253 url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')254 response=self._request(url,files=files,extra_params=params,callback=callback)255 return response256 257 def rapid_upload(self,remote_path,file_handler,callback=None):258 params={259 'method':"rapidupload",260 'path':remote_path,261 'ondup':"newcopy"262 }263 url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')264 file_handler.seek(0, 2)265 _BLOCK_SIZE = 2 ** 20 #1MB大小266 # print(_BLOCK_SIZE)267 content_length = file_handler.tell()268 # print(content_length)269 file_handler.seek(0)270 271 # 校验段为前 256KB272 first_256bytes = file_handler.read(256 * 1024)273 slice_md5 = md5(first_256bytes).hexdigest()274 275 content_crc32 = crc32(first_256bytes).conjugate()276 content_md5 = md5(first_256bytes)277 278 count=1279 while True:280 block = file_handler.read(_BLOCK_SIZE)281 if callback:282 callback(size=content_length,progress=count*_BLOCK_SIZE)283 count=count+1284 if not block:285 break286 # 更新crc32和md5校验值287 content_crc32 = crc32(block, content_crc32).conjugate()288 content_md5.update(block)289 data = {290 'content-length': content_length,291 'content-md5': content_md5.hexdigest(),292 'slice-md5': slice_md5,293 'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF)294 }295 response=self._request(url,data=data,extra_params=params,callback=callback)296 return response297 298 def download(self,remote_path,local_path,callback=None):299 params={300 'method':"download",301 'path':remote_path302 }303 # 兼容原有域名pcs.baidu.com;使用新域名d.pcs.baidu.com,则提供更快、更稳定的下载服务304 url = 'https://{0}/rest/2.0/pcs/file'.format('d.pcs.baidu.com')305 with closing(self._request(url, extra_params=params)) as response:306 chunk_size=1024 #单次请求最大值307 count=1308 total_size=int(response.headers['content-length']) #内容体总大小309 with open(local_path,'wb') as file:310 for data in response.iter_content(chunk_size=chunk_size):311 file.write(data)312 self.progressbar(size=total_size,progress=count*chunk_size,progress_title="正在下载",finish_title="下载完成")313 count=count+1314 315 def progressbar(self,size=None, progress=None,progress_title="正在上传",finish_title="上传完成"):316 #size:文件总字节数 progress:当前传输完成字节数317 # print("{0} / {1}".format(size, progress))318 if progress

 

转载于:https://www.cnblogs.com/yizhenfeng168/p/7067966.html

你可能感兴趣的文章
袋鼠云日志,日志分析没那么容易
查看>>
缓存穿透 缓存雪崩 缓存并发
查看>>
了解你的Linux系统:必须掌握的20个命令
查看>>
js setInterval 启用&停止
查看>>
knockoutJS学习笔记04:监控属性
查看>>
Linux下启动/关闭Oracle
查看>>
session和cookie的区别
查看>>
oracle 数据库、实例、服务名、SID
查看>>
web.xml文件的作用
查看>>
linux下oracle调试小知识
查看>>
alert弹出窗口,点击确认后关闭页面
查看>>
oracle问题之数据库恢复(三)
查看>>
单点登陆(SSO)
查看>>
HR,也确实“尽职尽责”
查看>>
MaxComputer 使用客户端配置
查看>>
20190823 顺其自然
查看>>
阅读《余生有你,人间值得》有感
查看>>
每日英语
查看>>
SpringCloud+feign 基于Springboot2.0 负载均衡
查看>>
【BZOJ5094】硬盘检测 概率
查看>>