Skip to content

Commit

Permalink
⭐ hls v6版本更新
Browse files Browse the repository at this point in the history
  • Loading branch information
fulln committed Aug 22, 2020
1 parent 3d44415 commit c0ecebd
Show file tree
Hide file tree
Showing 3 changed files with 240 additions and 13 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ lib64/
parts/
sdist/
var/
video/
wheels/
pip-wheel-metadata/
share/python-wheels/
Expand Down
47 changes: 34 additions & 13 deletions hls/ts_download_v5.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# -*- coding: utf-8 -*-

from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

import requests
import threading
import os
Expand All @@ -18,6 +20,16 @@
'sec-fetch-site': 'cross-site',
'Accept-Encoding':'gzip,deflate,br'}

key_head = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE',
'accept-Encoding':'gzip,deflate,br',
'Accept':'*/*',
'content-type': 'video/mp2t',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'cross-site',
}

def Handler(url, i, path):
'''
请求链接下载ts各个文件
Expand Down Expand Up @@ -53,7 +65,7 @@ def Handler(url, i, path):

def getkey(url,key_path,parentPath):
try:
r = requests.get(url=url+key_path,headers=headers,timeout = 3,verify=False,stream=True)
r = requests.get(url=url+key_path,headers=key_head)
if r.status_code == 200:
with open(parentPath+key_path,'wb') as code:
code.write(r.content)
Expand Down Expand Up @@ -84,15 +96,15 @@ def getM3u8File(m3u8s, base_url, resName):
getkey(base_url,key_path,path)
if not os.path.exists(path):
os.mkdir(path)
# 启动多线程写文件
print('start of ts:\t', tss[0])
print('end of ts:\t', tss[file_size - 1])
# 启动多线程写文件
print('ts files lenth:\t', file_size)
print('start downloading,please wait mins~')
with ThreadPoolExecutor(40) as executor:
for each in tss:
executor.submit(Handler, base_url, each, path)

#get_pmgressbar(file_size,path)

# 等待所有线程下载完成
main_thread = threading.current_thread()
for t in threading.enumerate():
Expand All @@ -101,8 +113,18 @@ def getM3u8File(m3u8s, base_url, resName):
t.join()
time.sleep(1)


print(resName, " download complete")

def get_pmgressbar(total,parentPath){
with tqdm(total=total) as bar: # total表示预期的迭代次数
time.sleep(2)#睡眠,可睡可不睡
command = r'ls -l %s | grep "^-" | wc -l'%(parentPath)
#指行命令
current = os.system(command)
bar.set_postfix(num_found=current)#显示已找到目标值的数量
bar.set_description("下载中>>>")#添加描述
}

def download_file(text,context):
'''
Expand All @@ -129,9 +151,7 @@ def prepare_parse(i):
baseUrl = baseUrls[0].replace('https', 'http')
names = re.findall(r'.*/(.*)\.m3u8', i, re.S)
name = names[0]
print("m3u8Url\t", url)
print("baseUrl\t", baseUrl)
print("name\t", name)
print("m3u8Url\t", url)
return url,baseUrl,name

def get_m3u8_file(url,name,parentPath):
Expand Down Expand Up @@ -164,6 +184,10 @@ def assemble_mp4(path,parentPath,resName):
os.system(command)

if __name__ == '__main__':
seeds = [""]

if len(sys.argv) >0:
seeds = sys.argv
# 准备工作:创建存放video文件夹
parentPath = os.path.dirname(os.path.dirname(__file__))

Expand All @@ -173,11 +197,8 @@ def assemble_mp4(path,parentPath,resName):
print('完整视频存放路径为:\t', videoPath)
print('==============================================================')

# 开始解析url,下载视频文件
# with open('seed3.txt') as seed:
# seeds = seed.readlines()·
seeds = ["https://q65ms8.cdnlab.live/hls/El6BESyJqpS_UYg1H51D9g/1588309328/7000/7230/7230.m3u8"]
# 遍历所有的url地址

# 遍历所有的url地址

for i in seeds:
url,baseUrl,name=prepare_parse(i)
Expand All @@ -189,5 +210,5 @@ def assemble_mp4(path,parentPath,resName):
getM3u8File(m3u8s, baseUrl, name)
# 开始拼装成mp4
assemble_mp4(parentPath + "/video/%s/" % name,parentPath,name)

print("parse video ok!!!")

205 changes: 205 additions & 0 deletions hls/ts_download_v6.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-

from concurrent.futures import ThreadPoolExecutor
import requests
import threading
import os
import re
import time
import platform


headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE',
'Accept':'*/*',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'cross-site',
'Accept-Encoding':'gzip,deflate,br'}

key_head = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE',
'accept-Encoding':'gzip,deflate,br',
'Accept':'*/*',
'content-type': 'video/mp2t',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'cross-site',
}

def Handler(url, i, path):
'''
请求链接下载ts各个文件
'''
count = 0
#忽略https警告
requests.packages.urllib3.disable_warnings()
while True:
try:
if count > 0:
print("----------------------------retry----------------------------")
print("retry url\t", url + i)
print("retry count\t", count)
r = requests.get(url + i, headers=headers, timeout=3, verify=False, stream=True)
if r.status_code == 200:
# length = len(r.content)
# print('start download\t', 'length\t', length)
with open(path + i.replace('/', ''), "wb") as code:
code.write(r.content)
# if count > 0:
# print("-------------------------------------------------------------")
# print('retry download complete\t', url + i)
# else:
# print('download complete\t', url + i)
break
except Exception as e:
# print("----------------------------error----------------------------")
# print(e)
# print('error url\t', url + i)
# count += 1
# print('error count\t', count)
time.sleep(1)

def getkey(url,key_path,parentPath):
try:
r = requests.get(url=url+key_path,headers=key_head)
if r.status_code == 200:
with open(parentPath+key_path,'wb') as code:
code.write(r.content)
except Exception as e:
print("---------------error-----------------")
print(e)
time.sleep(1)

def getM3u8File(m3u8s, base_url, resName):
'''
根据m3u8文件获取ts文件段,并开启线程池下载
'''
# 获取m3u8文件中的内容, 目前没有考虑在#里面藏url这个场景的做法。
tss = re.findall(r'.*\n(.*\.ts)', m3u8s)
for line in tss:
if "#EXT-X-KEY" in line: # 找解密Key
uri_pos = line.find("URI")
key_path = line[uri_pos:len(line)].split('"')[1]
tss.remove(line)
file_size = len(tss)
# for i in tss:
# print(i)
# 多线程写文件前准备工作:创建文件夹
parentPath = os.path.dirname(os.path.dirname(__file__))
# 获取解密的key
path = parentPath + "/video/%s/" % resName
# 先去下载解密的key
getkey(base_url,key_path,path)
if not os.path.exists(path):
os.mkdir(path)
# 启动多线程写文件
print('ts files lenth:\t', file_size)
print('start downloading,please wait mins~')
with ThreadPoolExecutor(40) as executor:
for each in tss:
executor.submit(Handler, base_url, each, path)

# 等待所有线程下载完成
main_thread = threading.current_thread()
for t in threading.enumerate():
if t is main_thread:
continue
t.join()
time.sleep(1)

print(resName, " download complete")


def download_file(text,context):
'''
用于.m3u8文件下载和可能存在的加密文件下载
'''
with open(text, 'a', encoding='utf-8') as result:
for rd in context:
result.writelines(rd)

def prepare_create(videoPath):
'''
python的缘故 事先创建对应文件夹
'''
if not os.path.exists(videoPath):
os.mkdir(videoPath)

def prepare_parse(i):
'''
准备url的解析
'''
urls = re.findall(r'.*\.m3u8', i, re.S)
url = urls[0].replace('https', 'http')
baseUrls = re.findall(r'.*/', i, re.S)
baseUrl = baseUrls[0].replace('https', 'http')
names = re.findall(r'.*/(.*)\.m3u8', i, re.S)
name = names[0]
print("m3u8Url\t", url)
print("baseUrl\t", baseUrl)
print("name\t", name)
return url,baseUrl,name

def get_m3u8_file(url,name,parentPath):
'''
下载.m3u8文件
'''
response = requests.get(url,headers=headers)
if "#EXTM3U" not in response.text:
raise BaseException("非M3U8的链接")
download_file(parentPath+'/video/%s/%s.m3u8'%(name,name), response.text)
return response.text

def assemble_mp4(path,parentPath,resName):
'''
转化成mp4
'''
videoPath = parentPath + '/video/%s.mp4' % resName
sysstr = platform.system()
if(sysstr =="Windows"):
cmdR = r'copy /b %s\*.ts %s' % (os.path.abspath(path), os.path.abspath(videoPath))
os.system(cmdR)
time.sleep(1)
delR = r'rmdir /s/q %s' % os.path.abspath(path)
os.system(delR)
else:
#使用ffmpeg将ts合并为mp4
# command = 'ffmpeg -i "concat:%s" -acodec copy -vcodec copy -absf aac_adtstoasc %s'% (input_file,output_file)
command = r'ffmpeg -i %s\%s.m3u8 %s' % (path,resName,videoPath)
#指行命令
os.system(command)
print(r"执行完成,视屏文件%s已经生成" % (videoPath))

if __name__ == '__main__':
# 准备工作:创建存放video文件夹
parentPath = os.path.dirname(os.path.dirname(__file__))

videoPath = parentPath + '/video/'

print('=============================注意=============================')
print('完整视频存放路径为:\t', videoPath)
print('==============================================================')

# 开始解析url,下载视频文件
# with open('seed3.txt') as seed:
# seeds = seed.readlines()·
seeds =[]
if len(sys.argv) >0:
seeds=sys.argv
# 遍历所有的url地址

for i in seeds:
url,baseUrl,name=prepare_parse(i)
#创建文件夹
prepare_create(videoPath + '/%s'%name)
#下载.m3u8文件
m3u8s = get_m3u8_file(url, name,parentPath)
# 开始下载.ts文件
getM3u8File(m3u8s, baseUrl, name)
# 开始拼装成mp4
assemble_mp4(parentPath + "/video/%s/" % name,parentPath,name)



0 comments on commit c0ecebd

Please sign in to comment.