Skip to content

Commit

Permalink
fix #37, 修复抖音解析
Browse files Browse the repository at this point in the history
  • Loading branch information
wujunwei928 committed Jun 30, 2024
1 parent 3273bab commit 3005935
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 28 deletions.
28 changes: 12 additions & 16 deletions parser/douyin.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@ import (
"errors"
"fmt"
"math/rand"
"net/url"
"regexp"
"strings"

"github.com/PuerkitoBio/goquery"

"github.com/tidwall/gjson"

"github.com/go-resty/resty/v2"
Expand All @@ -27,22 +25,20 @@ func (d douYin) parseVideoID(videoId string) (*VideoParseInfo, error) {
if err != nil {
return nil, err
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(res.Body()))
if err != nil {
return nil, err
}
returnData := doc.Find("#RENDER_DATA").Text()
decodeData, err := url.QueryUnescape(returnData)
if err != nil {
return nil, err

re := regexp.MustCompile(`window._ROUTER_DATA\s*=\s*(.*?)</script>`)
findRes := re.FindSubmatch(res.Body())
if len(findRes) < 2 {
return nil, errors.New("parse video json info from html fail")
}

data := gjson.Get(decodeData, "app.videoInfoRes.item_list.0")
jsonBytes := bytes.TrimSpace(findRes[1])
data := gjson.GetBytes(jsonBytes, "loaderData.video_(id)/page.videoInfoRes.item_list.0")

if !data.Exists() {
filterObj := gjson.Get(
decodeData,
fmt.Sprintf(`app.videoInfoRes.filter_list.#(aweme_id=="%s")`, videoId),
filterObj := gjson.GetBytes(
jsonBytes,
fmt.Sprintf(`loaderData.video_(id)/page.videoInfoRes.filter_list.#(aweme_id=="%s")`, videoId),
)

return nil, fmt.Errorf(
Expand All @@ -55,7 +51,7 @@ func (d douYin) parseVideoID(videoId string) (*VideoParseInfo, error) {
// 获取图集图片地址
imagesObjArr := data.Get("images").Array()
images := make([]string, 0, len(imagesObjArr))
for _, imageItem := range data.Get("images").Array() {
for _, imageItem := range imagesObjArr {
imageUrl := imageItem.Get("url_list.0").String()
if len(imageUrl) > 0 {
images = append(images, imageUrl)
Expand Down
20 changes: 8 additions & 12 deletions parser/xigua.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@ package parser
import (
"bytes"
"errors"
"net/url"
"regexp"
"strings"

"github.com/tidwall/gjson"

"github.com/PuerkitoBio/goquery"

"github.com/go-resty/resty/v2"
)

Expand Down Expand Up @@ -56,17 +54,15 @@ func (x xiGua) parseVideoID(videoId string) (*VideoParseInfo, error) {
return nil, err
}

doc, err := goquery.NewDocumentFromReader(bytes.NewReader(res.Body()))
if err != nil {
return nil, err
}
ssrData := doc.Find("#RENDER_DATA").Text()
ssrJson, err := url.QueryUnescape(ssrData)
if err != nil {
return nil, err
re := regexp.MustCompile(`window._ROUTER_DATA\s*=\s*(.*?)</script>`)
findRes := re.FindSubmatch(res.Body())
if len(findRes) < 2 {
return nil, errors.New("parse video json info from html fail")
}

videoData := gjson.Get(ssrJson, "app.videoInfoRes.item_list.0")
jsonBytes := bytes.TrimSpace(findRes[1])
videoData := gjson.GetBytes(jsonBytes, "loaderData.video_(id)/page.videoInfoRes.item_list.0")

userId := videoData.Get("author.user_id").String()
userName := videoData.Get("author.nickname").String()
userAvatar := videoData.Get("author.avatar_thumb.url_list.0").String()
Expand Down

0 comments on commit 3005935

Please sign in to comment.