Skip to content

Commit

Permalink
Merge branch 'xbapps:master' into util
Browse files Browse the repository at this point in the history
  • Loading branch information
theRealKLH authored Jun 4, 2024
2 parents 3a3d518 + 9f253fe commit a72837b
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 120 deletions.
19 changes: 19 additions & 0 deletions pkg/api/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,31 @@ func (i FilesResource) WebService() *restful.WebService {
ws.Route(ws.POST("/unmatch").To(i.unmatchFile).
Metadata(restfulspec.KeyOpenAPITags, tags))

ws.Route(ws.GET("/file/{file-id}").To(i.getFile).
Param(ws.PathParameter("file-id", "File ID").DataType("int")).
Metadata(restfulspec.KeyOpenAPITags, tags).
Writes(models.File{}))

ws.Route(ws.DELETE("/file/{file-id}").To(i.removeFile).
Metadata(restfulspec.KeyOpenAPITags, tags))

return ws
}

func (i FilesResource) getFile(req *restful.Request, resp *restful.Response) {
var file models.File

id, err := strconv.Atoi(req.PathParameter("file-id"))
if err != nil {
log.Error(err)
return
}

_ = file.GetIfExistByPK(uint(id))

resp.WriteHeaderAndEntity(http.StatusOK, file)
}

func (i FilesResource) listFiles(req *restful.Request, resp *restful.Response) {
db, _ := models.GetDB()
defer db.Close()
Expand Down
179 changes: 69 additions & 110 deletions pkg/scrape/fuckpassvr.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,14 @@ package scrape
import (
"encoding/json"
"net/url"
"path"
"regexp"
"strconv"
"strings"
"sync"

"github.com/go-resty/resty/v2"
"github.com/gocolly/colly/v2"
"github.com/nleeper/goment"
"github.com/thoas/go-funk"
"github.com/tidwall/gjson"
"github.com/xbapps/xbvr/pkg/models"
)

Expand All @@ -23,61 +21,77 @@ func FuckPassVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out c
logScrapeStart(scraperID, siteID)

sceneCollector := createCollector("www.fuckpassvr.com")
siteCollector := createCollector("www.fuckpassvr.com")

client := resty.New()
client.SetHeader("User-Agent", UserAgent)

sceneCollector.OnResponse(func(r *colly.Response) {
if r.StatusCode != 200 {
return
}
res := gjson.ParseBytes(r.Body)
scenedata := res.Get("data.scene")
previewVideoURL := r.Ctx.Get("preview_video_url")

sceneCollector.OnHTML(`html`, func(e *colly.HTMLElement) {
sc := models.ScrapedScene{}
sc.ScraperID = scraperID
sc.SceneType = "VR"
sc.Studio = "FuckPassVR"
sc.Site = siteID
sc.HomepageURL = strings.Split(e.Request.URL.String(), "?")[0]

e.ForEach(`meta[property="og:image"]`, func(id int, e *colly.HTMLElement) {
if id == 0 {
url := strings.Split(e.Request.AbsoluteURL(e.Attr("content")), "?")[0]
re := regexp.MustCompile(`FPVR(\d+)`)
matches := re.FindStringSubmatch(url)
if len(matches) > 1 {
sc.SiteID = matches[1]
sc.SceneID = "fpvr-" + matches[1]
}
}
})

slug := scenedata.Get("slug").String()
sc.HomepageURL = "https://www.fuckpassvr.com/video/" + slug

sc.SiteID = scenedata.Get("cms_id").String()
if sc.SiteID == "" || !strings.HasPrefix(sc.SiteID, "FPVR") {
return
}
sc.SceneID = "fpvr-" + strings.Replace(sc.SiteID, "FPVR", "", 1)
e.ForEach(`h2.video__title`, func(id int, e *colly.HTMLElement) {
if id == 0 {
sc.Title = strings.TrimSpace(e.Text)
}
})

sc.Released = scenedata.Get("active_schedule").String()[:10]
sc.Title = scenedata.Get("name").String()
sc.Duration = int(scenedata.Get("duration").Int())
sc.Covers = append(sc.Covers, scenedata.Get("thumbnail_url").String())
e.ForEach(`web-vr-video-player`, func(id int, e *colly.HTMLElement) {
sc.Covers = append(sc.Covers, strings.Trim(e.Attr("coverimage"), " '"))
})

desc := scenedata.Get("description").String()
desc = strings.ReplaceAll(desc, "<p>", "")
desc = strings.ReplaceAll(desc, "</p>", "\n\n")
re := regexp.MustCompile(`<(.|\n)*?>`) // strip_tags
sc.Synopsis = re.ReplaceAllString(desc, "")
e.ForEach(`div.profile__gallery a.profile__galleryElement`, func(id int, e *colly.HTMLElement) {
sc.Gallery = append(sc.Gallery, strings.TrimSpace(e.Attr("href")))
})

sc.ActorDetails = make(map[string]models.ActorDetails)
scenedata.Get("porn_star_lead").ForEach(func(_, star gjson.Result) bool {
name := star.Get("name").String()
sc.Cast = append(sc.Cast, name)
sc.ActorDetails[name] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: "https://www.fuckpassvr.com/api/api/seo?porn_star_slug=" + star.Get("slug").String()}
return true
e.ForEach(`div.models a`, func(id int, e *colly.HTMLElement) {
sc.Cast = append(sc.Cast, strings.TrimSpace(e.Attr("title")))
sc.ActorDetails[strings.TrimSpace(e.Attr("title"))] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: e.Request.AbsoluteURL(e.Attr("href"))}
})
scenedata.Get("porn_star").ForEach(func(_, star gjson.Result) bool {
name := star.Get("name").String()
sc.Cast = append(sc.Cast, name)
sc.ActorDetails[name] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: "https://www.fuckpassvr.com/api/api/seo?porn_star_slug=" + star.Get("slug").String()}
return true

e.ForEach(`a.tag`, func(id int, e *colly.HTMLElement) {
sc.Tags = append(sc.Tags, strings.TrimSpace(e.Attr("title")))
})

scenedata.Get("tag_input").ForEach(func(_, tag gjson.Result) bool {
sc.Tags = append(sc.Tags, tag.String())
return true
e.ForEach(`div.readMoreWrapper2`, func(id int, e *colly.HTMLElement) {
sc.Synopsis = strings.TrimSpace(e.Text)
})

e.ForEach(`div.video__addons p.wrapper__text`, func(id int, e *colly.HTMLElement) {
s := strings.TrimSpace(e.Text)
if strings.HasPrefix(s, "Released:") {
tmpDate, _ := goment.New(strings.TrimSpace(strings.TrimPrefix(s, "Released:")), "MMM DD YYYY")
sc.Released = tmpDate.Format("YYYY-MM-DD")
}
})

e.ForEach(`div.wrapper__download a.wrapper__downloadLink`, func(id int, e *colly.HTMLElement) {
url, err := url.Parse(e.Attr("href"))
if err == nil {
parts := strings.Split(url.Path, "/")
if len(parts) > 0 {
fn := parts[len(parts)-1]
fn = strings.Replace(fn, "2min", "FULL", -1)
sc.Filenames = append(sc.Filenames, fn)
}
}
})

// trailer details
Expand All @@ -86,82 +100,27 @@ func FuckPassVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out c
strParams, _ := json.Marshal(params)
sc.TrailerSrc = string(strParams)

resolutions := []string{"8kUHD", "8kHD", "4k", "2k", "1k"}
parsedFileNameURL, err := url.Parse(previewVideoURL)
if err == nil {
fileNameBase := path.Base(parsedFileNameURL.Path)
if strings.HasSuffix(strings.ToLower(fileNameBase), "_rollover.mp4") {
for i := range resolutions {
fn := fileNameBase[:len(fileNameBase)-len("_rollover.mp4")] + "-FULL_" + resolutions[i] + ".mp4"
sc.Filenames = append(sc.Filenames, fn)
}
}
} else {
log.Error(err)
}
out <- sc
})

resp, err := client.R().
SetQueryParams(map[string]string{
"scene_id": scenedata.Get("id").String(),
}).
Get("https://www.fuckpassvr.com/api/api/storyboard/show")

if err == nil {
res := gjson.ParseBytes(resp.Body())
res.Get("data.storyboards.#.image_origin_url").ForEach(func(_, url gjson.Result) bool {
sc.Gallery = append(sc.Gallery, url.String())
return true
})
} else {
log.Error(err)
siteCollector.OnHTML(`section.pagination a`, func(e *colly.HTMLElement) {
if !limitScraping {
siteCollector.Visit(e.Attr("href"))
}

out <- sc
})

var page int64 = 1
var lastPage int64 = 1
if limitScraping {
lastPage = 1
}
siteCollector.OnHTML(`div.videos__element a.videos__videoTitle`, func(e *colly.HTMLElement) {
sceneURL := e.Request.AbsoluteURL(e.Attr("href"))

if !funk.ContainsString(knownScenes, sceneURL) {
sceneCollector.Visit(sceneURL)
}
})

if singleSceneURL != "" {
ctx := colly.NewContext()
ctx.Put("preview_video_url", "")
slug := strings.Replace(singleSceneURL, "https://www.fuckpassvr.com/video/", "", 1)
sceneDetail := "https://www.fuckpassvr.com/api/api/scene/show?slug=" + slug
sceneCollector.Request("GET", sceneDetail, nil, ctx, nil)
sceneCollector.Visit(singleSceneURL)
} else {
for page <= lastPage {
resp, err := client.R().
SetQueryParams(map[string]string{
"size": "24",
"sortBy": "newest",
"page": strconv.FormatInt(page, 10),
}).
Get("https://www.fuckpassvr.com/api/api/scene")

if err == nil {
res := gjson.ParseBytes(resp.Body())
res.Get("data.scenes.data").ForEach(func(_, scenedata gjson.Result) bool {
ctx := colly.NewContext()
ctx.Put("preview_video_url", scenedata.Get("preview_video_url").String())

sceneURL := "https://www.fuckpassvr.com/video/" + scenedata.Get("slug").String()
sceneDetail := "https://www.fuckpassvr.com/api/api/scene/show?slug=" + scenedata.Get("slug").String()

if !funk.ContainsString(knownScenes, sceneURL) {
sceneCollector.Request("GET", sceneDetail, nil, ctx, nil)
}

return true
})
lastPage = res.Get("data.scenes.last_page").Int()
page = page + 1
} else {
log.Error(err)
}
}
siteCollector.Visit("https://www.fuckpassvr.com/destination")
}

if updateSite {
Expand Down
60 changes: 60 additions & 0 deletions pkg/scrape/javdatabase.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,66 @@ func ScrapeJavDB(out *[]models.ScrapedScene, queryString string) {

})

html.ForEach(`p.mb-1`, func(id int, p *colly.HTMLElement) {
tr := strings.Split(p.Text, ": ")
label := tr[0]

if label == `Studio` {
// Studio
sc.Studio = tr[1]

} else if label == `DVD ID` {
// Title, SceneID and SiteID all like 'VRKM-821' format
dvdId := strings.ToUpper(tr[1])
sc.Title = dvdId
sc.SceneID = dvdId
sc.SiteID = dvdId

// Set 'Site' to first part of the ID (e.g. `VRKM for `vrkm-821`)
siteParts := strings.Split(dvdId, `-`)
if len(siteParts) > 0 {
sc.Site = siteParts[0]
}

} else if label == `Release Date` {
// Release date
dateStr := tr[1]
tmpDate, _ := goment.New(strings.TrimSpace(dateStr), "YYYY-MM-DD")
sc.Released = tmpDate.Format("YYYY-MM-DD")

} else if label == `Genre(s)` {
// Tags
/* NOTE:
"Tags are technically incomplete vs. what you'd get translating dmm.co.jp
tags/correlating them back to their old equivalents on r18 using something
like Javinizer's tag CSV"
*/
p.ForEach("a", func(id int, anchor *colly.HTMLElement) {
href := anchor.Attr("href")
if strings.Contains(href, "javdatabase.com/genres/") {
// Tags
tag := ProcessJavrTag(anchor.Text)

if tag != "" {
sc.Tags = append(sc.Tags, tag)
}
}
})

} else if label == `Translated Title` {
// Synopsis / description
sc.Synopsis = tr[1]

} else if label == `Content ID` {
contentId = tr[1]

} else if label == "Runtime" {
// Duration
sc.Duration, _ = strconv.Atoi(strings.Split(tr[1], " ")[0])
}

})

// Screenshots
html.ForEach("a[href]", func(_ int, anchor *colly.HTMLElement) {
linkHref := anchor.Attr(`href`)
Expand Down
26 changes: 21 additions & 5 deletions pkg/scrape/slrstudios.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package scrape
import (
"encoding/json"
"html"
"net/http"
"regexp"
"strconv"
"strings"
Expand Down Expand Up @@ -148,14 +149,25 @@ func SexLikeReal(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out

// Cover
if !isTransScene {
coverURL := strings.Replace(gjson.Get(JsonMetadataA, "thumbnailUrl").String(), "app", "desktop", -1)
if len(coverURL) > 0 {
appCover := gjson.Get(JsonMetadataA, "thumbnailUrl").String()
desktopCover := strings.Replace(gjson.Get(JsonMetadataA, "thumbnailUrl").String(), "app", "desktop", -1)
desktopCresp, _ := http.Head(desktopCover)
if desktopCresp.StatusCode == 200 {
coverURL := desktopCover
sc.Covers = append(sc.Covers, coverURL)
} else {
e.ForEach(`link[as="image"]`, func(id int, e *colly.HTMLElement) {
sc.Covers = append(sc.Covers, e.Request.AbsoluteURL(e.Attr("href")))
})
appCresp, _ := http.Head(appCover)
if appCresp.StatusCode == 200 {
coverURL := appCover
sc.Covers = append(sc.Covers, coverURL)
defer appCresp.Body.Close()
} else {
e.ForEach(`link[as="image"]`, func(id int, e *colly.HTMLElement) {
sc.Covers = append(sc.Covers, e.Request.AbsoluteURL(e.Attr("href")))
})
}
}
defer desktopCresp.Body.Close()
} else {
posterURLFound := false
e.ForEach(`script[type="text/javascript"]`, func(id int, e *colly.HTMLElement) {
Expand Down Expand Up @@ -415,6 +427,10 @@ func appendFilenames(sc *models.ScrapedScene, siteID string, filenameRegEx *rege
// Only shown for logged in users so need to generate them
// Format: SLR_siteID_Title_<Resolutions>_SceneID_<LR/TB>_<180/360>.mp4
if !isTransScene {
// Force siteID when scraping individual scenes without a custom site
if siteID == "" {
siteID = gjson.Get(JsonMetadataA, "paysite.name").String()
}
viewAngle := gjson.Get(JsonMetadataA, "viewAngle").String()
projSuffix := "_LR_180.mp4"
if viewAngle == "190" || viewAngle == "200" || viewAngle == "220" {
Expand Down
Loading

0 comments on commit a72837b

Please sign in to comment.