diff --git a/README.md b/README.md index 1acd2e8..2bd9a61 100644 --- a/README.md +++ b/README.md @@ -24,5 +24,9 @@ 23、itchat 获取微信群或者微信好友分享文章
24、爬取微信公众号历史文章
25、itchat监听指定微信公众号分享的文章
+26、itchat微信群微信好友防撤回
+27、在微信群之间转发消息
+27、下载bilibili视频 也可以下载哔哩哔哩集合视频
+28、爬取m3u8视频
详细请移步简书[Python文集](http://www.jianshu.com/nb/18442681) diff --git a/gongzhonghao.jpeg b/gongzhonghao.jpeg new file mode 100644 index 0000000..0906f8a Binary files /dev/null and b/gongzhonghao.jpeg differ diff --git "a/\347\210\254\350\231\253\345\260\217demo/29 PythonCeHui.py" "b/\347\210\254\350\231\253\345\260\217demo/29 PythonCeHui.py" new file mode 100644 index 0000000..6c5de66 --- /dev/null +++ "b/\347\210\254\350\231\253\345\260\217demo/29 PythonCeHui.py" @@ -0,0 +1,131 @@ +import sys +import os, re, shutil, time, collections, json + +from html.parser import HTMLParser +from xml.etree import ElementTree as ETree + +import itchat +from itchat.content import * + +msg_store = collections.OrderedDict() +timeout = 600 +sending_type = {'Picture': 'img', 'Video': 'vid'} +data_path = 'data' +nickname = '' +bot = None + +if __name__ == '__main__': + if not os.path.exists(data_path): + os.mkdir(data_path) + # if the QR code doesn't show correctly, you can try to change the value + # of enableCdmQR to 1 or -1 or -2. It nothing works, you can change it to + # enableCmdQR=True and a picture will show up. + bot = itchat.new_instance() + bot.auto_login(hotReload=True, enableCmdQR=2) + nickname = bot.loginInfo['User']['NickName'] + +def clear_timeouted_message(): + now = time.time() + count = 0 + for k, v in list(msg_store.items()): + if now - v['ReceivedTime'] > timeout: + count += 1 + else: + break + for i in range(count): + item = msg_store.popitem(last=False) + +def get_sender_receiver(msg): + sender = nickname + receiver = nickname + if msg['FromUserName'][0:2] == '@@': # group chat + sender = msg['ActualNickName'] + m = bot.search_chatrooms(userName=msg['FromUserName']) + if m is not None: + receiver = m['NickName'] + elif msg['ToUserName'][0:2] == '@@': # group chat by myself + if 'ActualNickName' in msg: + sender = msg['ActualNickName'] + else: + m = bot.search_friends(userName=msg['FromUserName']) + if m is not None: + sender = m['NickName'] + m = bot.search_chatrooms(userName=msg['ToUserName']) + if m is not None: + receiver = m['NickName'] + else: # personal chat + m = bot.search_friends(userName=msg['FromUserName']) + if m is not None: + sender = m['NickName'] + m = bot.search_friends(userName=msg['ToUserName']) + if m is not None: + receiver = m['NickName'] + return HTMLParser().unescape(sender), HTMLParser().unescape(receiver) + +def print_msg(msg): + msg_str = ' '.join(msg) + print(msg_str) + return msg_str + +def get_whole_msg(msg, download=False): + sender, receiver = get_sender_receiver(msg) + if len(msg['FileName']) > 0 and len(msg['Url']) == 0: + if download: # download the file into data_path directory + fn = os.path.join(data_path, msg['FileName']) + msg['Text'](fn) + if os.path.getsize(fn) == 0: + return [] + c = '@%s@%s' % (sending_type.get(msg['Type'], 'fil'), fn) + else: + c = '@%s@%s' % (sending_type.get(msg['Type'], 'fil'), msg['FileName']) + return ['[%s]->[%s]:' % (sender, receiver), c] + c = msg['Text'] + if len(msg['Url']) > 0: + try: # handle map label + content_tree = ETree.fromstring(msg['OriContent']) + if content_tree is not None: + map_label = content_tree.find('location') + if map_label is not None: + c += ' ' + map_label.attrib['poiname'] + c += ' ' + map_label.attrib['label'] + except: + pass + url = HTMLParser().unescape(msg['Url']) + c += ' ' + url + return ['[%s]->[%s]: %s' % (sender, receiver, c)] + +@bot.msg_register([TEXT, PICTURE, MAP, CARD, SHARING, RECORDING, + ATTACHMENT, VIDEO, FRIENDS], isFriendChat=True, isGroupChat=True) +def normal_msg(msg): + print_msg(get_whole_msg(msg)) + now = time.time() + msg['ReceivedTime'] = now + msg_id = msg['MsgId'] + msg_store[msg_id] = msg + clear_timeouted_message() + +@bot.msg_register([NOTE], isFriendChat=True, isGroupChat=True) +def note_msg(msg): + print_msg(get_whole_msg(msg)) + content = HTMLParser().unescape(msg['Content']) + try: + content_tree = ETree.fromstring(content) + except Exception: + # invent/remove to chatroom + return + if content_tree is None: + return + revoked = content_tree.find('revokemsg') + if revoked is None: + return + old_msg_id = revoked.find('msgid').text + old_msg = msg_store.get(old_msg_id) + if old_msg is None: + return + msg_send = get_whole_msg(old_msg, download=True) + for m in msg_send: + bot.send(m, toUserName='filehelper') + clear_timeouted_message() + +if __name__ == '__main__': + bot.run() \ No newline at end of file diff --git "a/\347\210\254\350\231\253\345\260\217demo/30 PythonZhuanFa.py" "b/\347\210\254\350\231\253\345\260\217demo/30 PythonZhuanFa.py" new file mode 100644 index 0000000..dd2d199 --- /dev/null +++ "b/\347\210\254\350\231\253\345\260\217demo/30 PythonZhuanFa.py" @@ -0,0 +1,174 @@ + +import sys +reload(sys) +sys.setdefaultencoding('UTF8') + +import os, re, shutil, time, collections, json +import requests +from HTMLParser import HTMLParser +from xml.etree import ElementTree as ETree +import hashlib + +import itchat +from itchat.content import * + +sending_type = {'Picture': 'img', 'Video': 'vid'} +data_path = 'data' +group_uin = {u'技术群1': '42235582@chatroom', + u'技术群2': '2424504406@chatroom', + u'技术群3': '6203978346@chatroom'} +publishers = {u'技术群1': u'[阴险]', + u'技术群2': u'[菜刀]', + u'技术群3': u'[月亮]'} +subscribers = [u'技术群1', u'技术群2', u'技术群3'] +nickname = '' +bot = None +as_chat_bot = True + +if __name__ == '__main__': + if not os.path.exists(data_path): + os.mkdir(data_path) + # if the QR code doesn't show correctly, you can try to change the value + # of enableCdmQR to 1 or -1 or -2. It nothing works, you can change it to + # enableCmdQR=True and a picture will show up. + bot = itchat.new_instance() + bot.auto_login(hotReload=True, enableCmdQR=2) + nickname = bot.loginInfo['User']['NickName'] + +# tuling chat bot +def talks_robot(info): + api_url = 'http://www.tuling123.com/openapi/api' + apikey = '' + data = {'key': apikey, 'info': info.lower()} + req = requests.post(api_url, data=data, timeout=10).text + replys = json.loads(req)['text'] + return replys + +def get_sender_receiver(msg): + sender = nickname + receiver = nickname + if msg['FromUserName'][0:2] == '@@': # group chat + sender = msg['ActualNickName'] + m = bot.search_chatrooms(userName=msg['FromUserName']) + if m is not None: + receiver = m['NickName'] + elif msg['ToUserName'][0:2] == '@@': # group chat by myself + if 'ActualNickName' in msg: + sender = msg['ActualNickName'] + else: + m = bot.search_friends(userName=msg['FromUserName']) + if m is not None: + sender = m['NickName'] + m = bot.search_chatrooms(userName=msg['ToUserName']) + if m is not None: + receiver = m['NickName'] + else: # personal chat + m = bot.search_friends(userName=msg['FromUserName']) + if m is not None: + sender = m['NickName'] + m = bot.search_friends(userName=msg['ToUserName']) + if m is not None: + receiver = m['NickName'] + return HTMLParser().unescape(sender), HTMLParser().unescape(receiver) + +def print_msg(msg): + msg_str = ' '.join(msg) + print msg_str + return msg_str + +def get_whole_msg(msg, prefix, download=False): + if len(msg['FileName']) > 0 and len(msg['Url']) == 0: + if download: # download the file into data_path directory + fn = os.path.join(data_path, msg['FileName']) + msg['Text'](fn) + if os.path.getsize(fn) == 0: + return [] + c = '@%s@%s' % (sending_type.get(msg['Type'], 'fil'), fn) + else: + c = '@%s@%s' % (sending_type.get(msg['Type'], 'fil'), msg['FileName']) + return ['%s:' % (prefix), c] + c = msg['Text'] + if len(msg['Url']) > 0: + if len(msg['OriContent']) > 0: + try: # handle map label + content_tree = ETree.fromstring(msg['OriContent']) + if content_tree is not None: + map_label = content_tree.find('location') + if map_label is not None: + c += ' ' + map_label.attrib['poiname'] + c += ' ' + map_label.attrib['label'] + except: + pass + url = HTMLParser().unescape(msg['Url']) + c += ' ' + url + return ['%s: %s' % (prefix, c)] + +@bot.msg_register([TEXT], isFriendChat=True, isGroupChat=False) +def personal_msg(msg): + global as_chat_bot + text = msg['Text'].strip() + if text == u'闭嘴': + as_chat_bot = False + if text == u'张嘴吃药': + as_chat_bot = True + return talks_robot(text) + +@bot.msg_register([FRIENDS]) +def accept_friend(msg): + bot.add_friend(msg['RecommendInfo']['UserName'], 3) + +@bot.msg_register([TEXT, PICTURE, MAP, SHARING, RECORDING, ATTACHMENT, VIDEO], + isFriendChat=False, isGroupChat=True) +def group_msg(msg): + # chat bot functionality + global as_chat_bot + if 'IsAt' in msg and msg['IsAt'] == True and \ + msg['Type'] == 'Text' and \ + msg['ToUserName'][0:2] != '@@' and \ + msg['Text'].find(u'@' + nickname) >= 0: + text = msg['Text'].replace(u'@' + nickname, '').strip() + if text == u'shit': + as_chat_bot = False + return + if as_chat_bot: + info = talks_robot(text) + if info.find('No Know') >= 0: + return + if info.find('No Can') >= 0: + return + if info.find('Sorry') >= 0: + return + return info + return + # forwarding functionality + group = msg['FromUserName'] + if msg['ToUserName'][0:2] == '@@': # message sent by myself + group = msg['ToUserName'] +sender, receiver = get_sender_receiver(msg) +if sender == '': + sender = nickname + # check if the message is from the publisher groups + if receiver not in publishers: # if not in the publishers, do nothing + return +# turn on the chat bot if this magic happens +if msg['Type'] == 'Text' and \ + hashlib.sha256(msg['Text']).hexdigest()[-2:] == '23': + as_chat_bot = True +# process message and send it to all the subscribed groups +prefix = '%s[%s]' % (publishers[receiver], sender) +msg_send = get_whole_msg(msg, prefix=prefix, download=True) +if len(msg_send) == 0: + return + print_msg(msg_send) + for tosend in subscribers: + room = bot.search_chatrooms(name=tosend) + for r in room: + if r['UserName'] == group: # don't send back to the source + continue + if r['NickName'] != tosend: # check group name exact match + continue + for m in msg_send: # iterate messages (for images, videos, and files) + bot.send(m, toUserName=r['UserName']) + +if __name__ == '__main__': + bot.run() diff --git "a/\347\210\254\350\231\253\345\260\217demo/31 \344\270\213\350\275\275bilibili\350\247\206\351\242\221.py" "b/\347\210\254\350\231\253\345\260\217demo/31 \344\270\213\350\275\275bilibili\350\247\206\351\242\221.py" new file mode 100644 index 0000000..85e2e27 --- /dev/null +++ "b/\347\210\254\350\231\253\345\260\217demo/31 \344\270\213\350\275\275bilibili\350\247\206\351\242\221.py" @@ -0,0 +1,60 @@ +import requests +from lxml import html +import re +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +def star(url): + url2 = "https://api.bilibili.com/x/player/playurl?avid={avid}&cid={cid}&qn=32&type=&otype=json" + headers2 = { + "host": "", + "Referer": "https://www.bilibili.com", + "User-Agent": "Mozilla/5.0(Windows NT 10.0;WOW64) AppleWebKit/537.36(KHTML,likeGecko)Chrome/63.0.3239.132Safari/537.36" + } + + avid = re.findall("video/av(.+)\?", url) + print(avid) + cid ,name = get_cid(avid[0]) + print(cid,name) + flv_url , size = get_flvurl(url2.format(avid=avid[0],cid=cid)) + shuju = size / 1024 / 1024 + print("本视频大小为:%.2fM" % shuju) + + h = re.findall("https://(.+)com",flv_url) + host = h[0]+"com" + + headers2["host"] = host + res = requests.get(flv_url,headers=headers2,stream=True, verify=False) + print(res.status_code) + save_movie(res,name) + +def get_cid(aid):#获得cid + header = { + 'host': 'api.bilibili.com', + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0' + } + url = "https://api.bilibili.com/x/player/pagelist?aid={aid}&jsonp=jsonp".format(aid=aid) + response = requests.get(url,headers=header).json() + # print(response["data"]) + # 这个地方设置index是因为下载集合里面的视频,顺序,0代表下载第一个视频,1代表下载集合里面第二个视频,2,3,4...依次类推 + index = 0 + return response["data"][index]["cid"] ,response["data"][index]["part"] +def get_flvurl(url):#获得视频真实flv地址 + header = {'host': 'api.bilibili.com', + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0'} + + response = requests.get(url,headers=header).json() + return response["data"]["durl"][0]["url"],response["data"]["durl"][0]["size"] +def save_movie(res,name):#保存视频 + chunk_size = 1024 + with open("{name}.flv".format(name = name),"wb") as f: + for data in res.iter_content(1024): + f.write(data) + + +if __name__ == "__main__": + # 把下面的av后面的'583959574'在要下载的视频集合里面找到就可以下载视频了 + url = "https://www.bilibili.com/video/av583959574?spm_id_from=333.334.b_62696c695f646f756761.5" + star(url) + + diff --git "a/\347\210\254\350\231\253\345\260\217demo/32 m3u8.py" "b/\347\210\254\350\231\253\345\260\217demo/32 m3u8.py" new file mode 100644 index 0000000..53f8f46 --- /dev/null +++ "b/\347\210\254\350\231\253\345\260\217demo/32 m3u8.py" @@ -0,0 +1,69 @@ +import requests +import re +from Crypto.Cipher import AES + +def m3u8(url): + header = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36' + } + # requests得到m3u8文件内容 + content = requests.get(url, headers=header).text + if "#EXTM3U" not in content: + print("这不是一个m3u8的视频链接!") + return False + if "EXT-X-KEY" not in content: + print("没有加密") + return False + + # 使用re正则得到key和视频地址 + jiami = re.findall('#EXT-X-KEY:(.*)',content) + key = re.findall('URI="(.*)"', jiami[0]) + vi = re.findall('IV=(.*)', jiami[0])[0] + + # 得到每一个ts视频链接 + + # tslist = re.findall('EXTINF:(.*), (. *)',content.replace(' ', '').replace(r'\n', '')) + tslist = re.findall('v.f240.ts(.*)',content) + + newlist = [] + for i in tslist: + newlist.append("v.f240.ts" + i) + # print(newlist) + # 得到key的链接并请求得到加密的key值 + keyurl = key[0] + keycontent = requests.get(keyurl, headers=header).content + + # 得到每一个完整视频的链接地址 + base_url = url.replace(url.split('/')[-1], '') + # print(base_url) + tslisturl = [] + for i in newlist: + tsurl = base_url + i + tslisturl.append(tsurl) + + # 得到解密方法,这里要导入第三方库 pycrypto + # 这里有一个问题,安装pycrypto成功后,导入from Crypto.Cipher import AES报错 + # 找到使用python环境的文件夹,在Lib文件夹下有一个 site-packages 文件夹,里面是我们环境安装的包。 + # 找到一个crypto文件夹,打开可以看到 Cipher文件夹,此时我们将 crypto文件夹改为 Crypto 即可使用了 + # 必须添加b'0000000000000000',防止报错ValueError: IV must be 16 bytes long + cryptor = AES.new(keycontent, AES.MODE_CBC, b'0000000000000000') + + # for循环获取视频文件 + for i in tslisturl: + print(i) + res = requests.get(i, header) + # 使用解密方法解密得到的视频文件 + cont = cryptor.decrypt(res.content) + # 以追加的形式保存为mp4文件,mp4可以随意命名,这里命名为小鹅通视频下载测试 + with open('14-搜索组件界面实现.mp4', 'ab+') as f: + f.write(cont) + return True + +if __name__ == '__main__': + # 这个是网页上查到的小鹅通的卖u8地址 + # url = "https://1252524126.vod2.myqcloud.com/9764a7a5vodtransgzp1252524126/91c29aad5285890807164109582/drm/v.f146750.m3u8" + # url = "https://1258102968.vod2.myqcloud.com/ed7d8254vodtranscq1258102968/a61912e43701925923160746329/drm/v.f240.m3u8?t=62dfad73&us=DYws6oOg3A&sign=1d4381d06b276e87eae478a23f3d6375" + url = "https://1258102968.vod2.myqcloud.com/ed7d8254vodtranscq1258102968/a3ae8ff93701925923160630524/drm/v.f240.m3u8?t=62dfaf5a&us=RquNSsL6XT&sign=8bec9ca974f9413c9bad7a9e8d620ae2" + pd = m3u8(url) + if pd: + print('视频下载完成!') \ No newline at end of file