⭐ hls v6版本更新

fulln · Aug 22, 2020 · c0ecebd · c0ecebd
1 parent 3d44415
commit c0ecebd
Show file tree

Hide file tree

Showing 3 changed files with 240 additions and 13 deletions.
diff --git a/.gitignore b/.gitignore
@@ -19,6 +19,7 @@ lib64/
 parts/
 sdist/
 var/
+video/
 wheels/
 pip-wheel-metadata/
 share/python-wheels/

diff --git a/hls/ts_download_v5.py b/hls/ts_download_v5.py
@@ -2,6 +2,8 @@
 # -*- coding: utf-8 -*-
 
 from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+
 import requests
 import threading
 import os
@@ -18,6 +20,16 @@
         'sec-fetch-site': 'cross-site',
         'Accept-Encoding':'gzip,deflate,br'}
 
+key_head = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE',
+    'accept-Encoding':'gzip,deflate,br',
+    'Accept':'*/*',
+    'content-type': 'video/mp2t',
+    'sec-fetch-dest': 'empty',
+    'sec-fetch-mode': 'cors',
+    'sec-fetch-site': 'cross-site',    
+}
+
 def Handler(url, i, path):
     '''
     请求链接下载ts各个文件
@@ -53,7 +65,7 @@ def Handler(url, i, path):
 
 def getkey(url,key_path,parentPath):
     try:
-        r = requests.get(url=url+key_path,headers=headers,timeout = 3,verify=False,stream=True)
+        r = requests.get(url=url+key_path,headers=key_head)
         if r.status_code == 200:
             with open(parentPath+key_path,'wb') as code:
                 code.write(r.content)
@@ -84,15 +96,15 @@ def getM3u8File(m3u8s, base_url, resName):
     getkey(base_url,key_path,path)
     if not os.path.exists(path):
         os.mkdir(path)
-    # 启动多线程写文件
-    print('start of ts:\t', tss[0])
-    print('end of ts:\t', tss[file_size - 1])
+    # 启动多线程写文件    
     print('ts files lenth:\t', file_size)
     print('start downloading,please wait mins~')
     with ThreadPoolExecutor(40) as executor:
         for each in tss:
             executor.submit(Handler, base_url, each, path)
 
+    #get_pmgressbar(file_size,path)
+
     # 等待所有线程下载完成
     main_thread = threading.current_thread()
     for t in threading.enumerate():
@@ -101,8 +113,18 @@ def getM3u8File(m3u8s, base_url, resName):
         t.join()
     time.sleep(1)
 
+
     print(resName, " download complete")
 
+def get_pmgressbar(total,parentPath){    
+    with tqdm(total=total) as bar: # total表示预期的迭代次数        
+        time.sleep(2)#睡眠，可睡可不睡
+        command = r'ls -l %s | grep "^-" | wc -l'%(parentPath)
+        #指行命令
+        current = os.system(command)        
+        bar.set_postfix(num_found=current)#显示已找到目标值的数量
+        bar.set_description("下载中>>>")#添加描述
+}
 
 def download_file(text,context):
     '''
@@ -129,9 +151,7 @@ def prepare_parse(i):
     baseUrl = baseUrls[0].replace('https', 'http')
     names = re.findall(r'.*/(.*)\.m3u8', i, re.S)
     name = names[0]
-    print("m3u8Url\t", url)
-    print("baseUrl\t", baseUrl)
-    print("name\t", name)
+    print("m3u8Url\t", url)    
     return url,baseUrl,name
 
 def get_m3u8_file(url,name,parentPath):
@@ -164,6 +184,10 @@ def assemble_mp4(path,parentPath,resName):
         os.system(command)
 
 if __name__ == '__main__': 
+    seeds = [""]
+
+    if len(sys.argv) >0:
+        seeds = sys.argv
     # 准备工作：创建存放video文件夹
     parentPath = os.path.dirname(os.path.dirname(__file__))
 
@@ -173,11 +197,8 @@ def assemble_mp4(path,parentPath,resName):
     print('完整视频存放路径为:\t', videoPath)
     print('==============================================================')
 
-    # 开始解析url，下载视频文件
-    # with open('seed3.txt') as seed:
-    #     seeds = seed.readlines()·
-    seeds = ["https://q65ms8.cdnlab.live/hls/El6BESyJqpS_UYg1H51D9g/1588309328/7000/7230/7230.m3u8"]
-        # 遍历所有的url地址
+
+    # 遍历所有的url地址
 
     for i in seeds:
         url,baseUrl,name=prepare_parse(i)
@@ -189,5 +210,5 @@ def assemble_mp4(path,parentPath,resName):
         getM3u8File(m3u8s, baseUrl, name)
         # 开始拼装成mp4
         assemble_mp4(parentPath + "/video/%s/" % name,parentPath,name)
-
+        print("parse video ok!!!") 
 
diff --git a/hls/ts_download_v6.py b/hls/ts_download_v6.py
@@ -0,0 +1,205 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from concurrent.futures import ThreadPoolExecutor
+import requests
+import threading
+import os
+import re
+import time
+import platform
+
+
+headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE',
+        'Accept':'*/*',
+        'sec-fetch-dest': 'empty',
+        'sec-fetch-mode': 'cors',
+        'sec-fetch-site': 'cross-site',
+        'Accept-Encoding':'gzip,deflate,br'}
+
+key_head = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE',
+    'accept-Encoding':'gzip,deflate,br',
+    'Accept':'*/*',
+    'content-type': 'video/mp2t',
+    'sec-fetch-dest': 'empty',
+    'sec-fetch-mode': 'cors',
+    'sec-fetch-site': 'cross-site',    
+}
+
+def Handler(url, i, path):
+    '''
+    请求链接下载ts各个文件
+    '''
+    count = 0
+    #忽略https警告
+    requests.packages.urllib3.disable_warnings()
+    while True:
+        try:
+            if count > 0:
+                print("----------------------------retry----------------------------")
+                print("retry url\t", url + i)
+                print("retry count\t", count)
+            r = requests.get(url + i, headers=headers, timeout=3, verify=False, stream=True)
+            if r.status_code == 200:
+                # length = len(r.content)
+                # print('start download\t', 'length\t', length)
+                with open(path + i.replace('/', ''), "wb") as code:
+                    code.write(r.content)
+                # if count > 0:
+                # print("-------------------------------------------------------------")
+                # print('retry download complete\t', url + i)
+                # else:
+                #     print('download complete\t', url + i)
+                break
+        except Exception as e:
+            # print("----------------------------error----------------------------")
+            # print(e)
+            # print('error url\t', url + i)
+            # count += 1
+            # print('error count\t', count)
+            time.sleep(1)
+
+def getkey(url,key_path,parentPath):
+    try:
+        r = requests.get(url=url+key_path,headers=key_head)
+        if r.status_code == 200:
+            with open(parentPath+key_path,'wb') as code:
+                code.write(r.content)
+    except Exception as e:
+        print("---------------error-----------------")
+        print(e)
+        time.sleep(1) 
+
+def getM3u8File(m3u8s, base_url, resName):
+    '''
+    根据m3u8文件获取ts文件段,并开启线程池下载
+    '''
+    # 获取m3u8文件中的内容, 目前没有考虑在#里面藏url这个场景的做法。
+    tss = re.findall(r'.*\n(.*\.ts)', m3u8s)
+    for line in tss:
+        if "#EXT-X-KEY" in line:  # 找解密Key            
+            uri_pos = line.find("URI")
+            key_path = line[uri_pos:len(line)].split('"')[1]
+            tss.remove(line)            
+    file_size = len(tss)
+    # for i in tss:
+    #     print(i)
+    # 多线程写文件前准备工作：创建文件夹
+    parentPath = os.path.dirname(os.path.dirname(__file__))
+    # 获取解密的key
+    path = parentPath + "/video/%s/" % resName
+    # 先去下载解密的key
+    getkey(base_url,key_path,path)
+    if not os.path.exists(path):
+        os.mkdir(path)
+    # 启动多线程写文件
+    print('ts files lenth:\t', file_size)
+    print('start downloading,please wait mins~')
+    with ThreadPoolExecutor(40) as executor:
+        for each in tss:
+            executor.submit(Handler, base_url, each, path)
+
+    # 等待所有线程下载完成
+    main_thread = threading.current_thread()
+    for t in threading.enumerate():
+        if t is main_thread:
+            continue
+        t.join()
+    time.sleep(1)
+
+    print(resName, " download complete")
+
+
+def download_file(text,context):
+    '''
+    用于.m3u8文件下载和可能存在的加密文件下载
+    '''
+    with open(text, 'a', encoding='utf-8') as result:
+        for rd in context:
+            result.writelines(rd)
+
+def prepare_create(videoPath):
+    '''
+    python的缘故 事先创建对应文件夹
+    '''
+    if not os.path.exists(videoPath):
+        os.mkdir(videoPath)
+
+def prepare_parse(i):
+    '''
+        准备url的解析
+    '''
+    urls = re.findall(r'.*\.m3u8', i, re.S)
+    url = urls[0].replace('https', 'http')
+    baseUrls = re.findall(r'.*/', i, re.S)
+    baseUrl = baseUrls[0].replace('https', 'http')
+    names = re.findall(r'.*/(.*)\.m3u8', i, re.S)
+    name = names[0]
+    print("m3u8Url\t", url)
+    print("baseUrl\t", baseUrl)
+    print("name\t", name)
+    return url,baseUrl,name
+
+def get_m3u8_file(url,name,parentPath):
+    '''
+    下载.m3u8文件
+    '''
+    response = requests.get(url,headers=headers)
+    if "#EXTM3U" not in response.text:
+        raise BaseException("非M3U8的链接")
+    download_file(parentPath+'/video/%s/%s.m3u8'%(name,name), response.text)
+    return response.text
+
+def assemble_mp4(path,parentPath,resName):
+    '''
+    转化成mp4
+    '''
+    videoPath = parentPath + '/video/%s.mp4' % resName
+    sysstr = platform.system()
+    if(sysstr =="Windows"):
+        cmdR = r'copy /b  %s\*.ts  %s' % (os.path.abspath(path), os.path.abspath(videoPath))
+        os.system(cmdR)
+        time.sleep(1)
+        delR = r'rmdir /s/q %s' % os.path.abspath(path)
+        os.system(delR)
+    else:
+    #使用ffmpeg将ts合并为mp4
+        # command = 'ffmpeg -i "concat:%s" -acodec copy -vcodec copy -absf aac_adtstoasc %s'%    (input_file,output_file)
+        command = r'ffmpeg -i %s\%s.m3u8 %s' % (path,resName,videoPath)
+        #指行命令
+        os.system(command)
+        print(r"执行完成，视屏文件%s已经生成" % (videoPath))
+
+if __name__ == '__main__': 
+    # 准备工作：创建存放video文件夹
+    parentPath = os.path.dirname(os.path.dirname(__file__))
+
+    videoPath = parentPath + '/video/'
+
+    print('=============================注意=============================')
+    print('完整视频存放路径为:\t', videoPath)
+    print('==============================================================')
+
+    # 开始解析url，下载视频文件
+    # with open('seed3.txt') as seed:
+    #     seeds = seed.readlines()·
+    seeds =[]
+    if len(sys.argv) >0:
+        seeds=sys.argv
+        # 遍历所有的url地址
+
+    for i in seeds:
+        url,baseUrl,name=prepare_parse(i)
+        #创建文件夹
+        prepare_create(videoPath + '/%s'%name)
+        #下载.m3u8文件
+        m3u8s = get_m3u8_file(url, name,parentPath)
+        # 开始下载.ts文件        
+        getM3u8File(m3u8s, baseUrl, name)
+        # 开始拼装成mp4
+        assemble_mp4(parentPath + "/video/%s/" % name,parentPath,name)
+
+
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,6 +19,7 @@ lib64/ @@
     parts/
     sdist/
     var/
+    video/
     wheels/
     pip-wheel-metadata/
     share/python-wheels/
@@ Expand Down @@