diff --git a/pkg/api/options.go b/pkg/api/options.go index 6262806b5..c00fa6497 100644 --- a/pkg/api/options.go +++ b/pkg/api/options.go @@ -208,6 +208,9 @@ func (i ConfigResource) WebService() *restful.WebService { ws.Route(ws.PUT("/sites/subscribed/{site}").To(i.toggleSubscribed). Metadata(restfulspec.KeyOpenAPITags, tags)) + ws.Route(ws.PUT("/sites/limit_scraping/{site}").To(i.toggleLimitScraping). + Metadata(restfulspec.KeyOpenAPITags, tags)) + ws.Route(ws.POST("/scraper/force-site-update").To(i.forceSiteUpdate). Metadata(restfulspec.KeyOpenAPITags, tags)) @@ -313,6 +316,10 @@ func (i ConfigResource) toggleSubscribed(req *restful.Request, resp *restful.Res i.toggleSiteField(req, resp, "Subscribed") } +func (i ConfigResource) toggleLimitScraping(req *restful.Request, resp *restful.Response) { + i.toggleSiteField(req, resp, "LimitScraping") +} + func (i ConfigResource) toggleSiteField(req *restful.Request, resp *restful.Response, field string) { db, _ := models.GetDB() defer db.Close() @@ -335,6 +342,9 @@ func (i ConfigResource) toggleSiteField(req *restful.Request, resp *restful.Resp site.Subscribed = !site.Subscribed log.Infof("Toggling %s %v", id, site.Subscribed) db.Model(&models.Scene{}).Where("scraper_id = ?", site.ID).Update("is_subscribed", site.Subscribed) + case "LimitScraping": + site.LimitScraping = !site.LimitScraping + db.Model(&models.Scene{}).Where("scraper_id = ?", site.ID).Update("limit_scraping", site.LimitScraping) } site.Save() diff --git a/pkg/migrations/migrations.go b/pkg/migrations/migrations.go index fa029c252..8bfdff604 100644 --- a/pkg/migrations/migrations.go +++ b/pkg/migrations/migrations.go @@ -767,6 +767,15 @@ func Migrate() { return tx.AutoMigrate(&Scene{}).Error }, }, + { + ID: "0074-Limit-Scraper", + Migrate: func(tx *gorm.DB) error { + type Site struct { + LimitScraping bool `json:"limit_scraping" xbvrbackup:"limit_scraping"` + } + return tx.AutoMigrate(Site{}).Error + }, + }, // =============================================================================================== // Put DB Schema migrations above this line and migrations that rely on the updated schema below diff --git a/pkg/models/model_scraper.go b/pkg/models/model_scraper.go index d4435461a..0626e348f 100644 --- a/pkg/models/model_scraper.go +++ b/pkg/models/model_scraper.go @@ -7,7 +7,7 @@ import ( var scrapers []Scraper -type ScraperFunc func(*sync.WaitGroup, bool, []string, chan<- ScrapedScene, string, string) error +type ScraperFunc func(*sync.WaitGroup, bool, []string, chan<- ScrapedScene, string, string, bool) error type Scraper struct { ID string `json:"id"` diff --git a/pkg/models/model_site.go b/pkg/models/model_site.go index 78b2f0b07..9ed817c05 100644 --- a/pkg/models/model_site.go +++ b/pkg/models/model_site.go @@ -8,14 +8,15 @@ import ( ) type Site struct { - ID string `gorm:"primary_key" json:"id" xbvrbackup:"-"` - Name string `json:"name" xbvrbackup:"name"` - AvatarURL string `json:"avatar_url" xbvrbackup:"-"` - IsBuiltin bool `json:"is_builtin" xbvrbackup:"-"` - IsEnabled bool `json:"is_enabled" xbvrbackup:"is_enabled"` - LastUpdate time.Time `json:"last_update" xbvrbackup:"-"` - Subscribed bool `json:"subscribed" xbvrbackup:"subscribed"` - HasScraper bool `gorm:"-" json:"has_scraper" xbvrbackup:"-"` + ID string `gorm:"primary_key" json:"id" xbvrbackup:"-"` + Name string `json:"name" xbvrbackup:"name"` + AvatarURL string `json:"avatar_url" xbvrbackup:"-"` + IsBuiltin bool `json:"is_builtin" xbvrbackup:"-"` + IsEnabled bool `json:"is_enabled" xbvrbackup:"is_enabled"` + LastUpdate time.Time `json:"last_update" xbvrbackup:"-"` + Subscribed bool `json:"subscribed" xbvrbackup:"subscribed"` + HasScraper bool `gorm:"-" json:"has_scraper" xbvrbackup:"-"` + LimitScraping bool `json:"limit_scraping" xbvrbackup:"limit_scraping"` } func (i *Site) Save() error { diff --git a/pkg/scrape/baberoticavr.go b/pkg/scrape/baberoticavr.go index eef0a17fa..08ce27227 100644 --- a/pkg/scrape/baberoticavr.go +++ b/pkg/scrape/baberoticavr.go @@ -16,7 +16,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "baberoticavr" siteID := "BaberoticaVR" diff --git a/pkg/scrape/badoink.go b/pkg/scrape/badoink.go index 981137772..f28021581 100644 --- a/pkg/scrape/badoink.go +++ b/pkg/scrape/badoink.go @@ -23,7 +23,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func BadoinkSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string) error { +func BadoinkSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -187,8 +187,10 @@ func BadoinkSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out }) siteCollector.OnHTML(`div.pagination a`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`main[data-page=VideoList] a.video-card-image-container`, func(e *colly.HTMLElement) { @@ -238,24 +240,24 @@ func BadoinkSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out return nil } -func BadoinkVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "badoinkvr", "BadoinkVR", "https://badoinkvr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo) +func BadoinkVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "badoinkvr", "BadoinkVR", "https://badoinkvr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping) } -func B18VR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "18vr", "18VR", "https://18vr.com/vrpornvideos", singeScrapeAdditionalInfo) +func B18VR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "18vr", "18VR", "https://18vr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping) } -func VRCosplayX(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrcosplayx", "VRCosplayX", "https://vrcosplayx.com/cosplaypornvideos", singeScrapeAdditionalInfo) +func VRCosplayX(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrcosplayx", "VRCosplayX", "https://vrcosplayx.com/cosplaypornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping) } -func BabeVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "babevr", "BabeVR", "https://babevr.com/vrpornvideos", singeScrapeAdditionalInfo) +func BabeVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "babevr", "BabeVR", "https://babevr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping) } -func KinkVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "kinkvr", "KinkVR", "https://kinkvr.com/bdsm-vr-videos", singeScrapeAdditionalInfo) +func KinkVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "kinkvr", "KinkVR", "https://kinkvr.com/bdsm-vr-videos?order=newest", singeScrapeAdditionalInfo, limitScraping) } func init() { diff --git a/pkg/scrape/caribbeancom.go b/pkg/scrape/caribbeancom.go index 4857c59a3..82152d4c9 100644 --- a/pkg/scrape/caribbeancom.go +++ b/pkg/scrape/caribbeancom.go @@ -15,7 +15,7 @@ import ( "golang.org/x/text/language" ) -func CariVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func CariVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "caribbeancomvr" siteID := "CaribbeanCom VR" @@ -125,9 +125,11 @@ func CariVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan< }) siteCollector.OnHTML(`.pagination-large .pagination__item[rel="next"]`, func(e *colly.HTMLElement) { - // replace "all" with "vr" to allow for correct page navigation - pageURL := strings.Replace(e.Request.AbsoluteURL(e.Attr("href")), "all", "vr", 1) - siteCollector.Visit(pageURL) + if !limitScraping { + // replace "all" with "vr" to allow for correct page navigation + pageURL := strings.Replace(e.Request.AbsoluteURL(e.Attr("href")), "all", "vr", 1) + siteCollector.Visit(pageURL) + } }) if singleSceneURL != "" { diff --git a/pkg/scrape/czechvr.go b/pkg/scrape/czechvr.go index 5729b825d..1baa6a055 100644 --- a/pkg/scrape/czechvr.go +++ b/pkg/scrape/czechvr.go @@ -14,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func CzechVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, nwID string, singeScrapeAdditionalInfo string) error { +func CzechVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, nwID string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -149,8 +149,10 @@ func CzechVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan }) siteCollector.OnHTML(`div#StrankovaniDesktop span.stred a,div#StrankovaniDesktopHome span.stred a`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href") + "&sites=" + nwID) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href") + "&sites=" + nwID) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div.postTag`, func(e *colly.HTMLElement) { @@ -182,7 +184,7 @@ func CzechVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan if singleSceneURL != "" { sceneCollector.Visit(singleSceneURL) } else { - siteCollector.Visit("https://www.czechvrnetwork.com/vr-porn-videos&sites=" + nwID) + siteCollector.Visit("https://www.czechvrnetwork.com/vr-porn-videos&sort=date&sites=" + nwID) } if updateSite { @@ -194,15 +196,15 @@ func CzechVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan } func addCZVRScraper(id string, name string, nwid string, avatarURL string) { - registerScraper(id, name, avatarURL, "czechvrnetwork.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return CzechVR(wg, updateSite, knownScenes, out, singleSceneURL, id, name, nwid, singeScrapeAdditionalInfo) + registerScraper(id, name, avatarURL, "czechvrnetwork.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return CzechVR(wg, updateSite, knownScenes, out, singleSceneURL, id, name, nwid, singeScrapeAdditionalInfo, limitScraping) }) } func init() { // scraper for scraping single scenes where only the url is provided - registerScraper("czechvr-single_scene", "Czech VR - Other Studios", "", "czechvrnetwork.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return CzechVR(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "") + registerScraper("czechvr-single_scene", "Czech VR - Other Studios", "", "czechvrnetwork.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return CzechVR(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", limitScraping) }) addCZVRScraper("czechvr", "Czech VR", "15", "https://www.czechvr.com/images/favicon/android-chrome-256x256.png") addCZVRScraper("czechvrfetish", "Czech VR Fetish", "16", "https://www.czechvrfetish.com/images/favicon/android-chrome-256x256.png") diff --git a/pkg/scrape/darkroomvr.go b/pkg/scrape/darkroomvr.go index ff2a7e5f7..27050697d 100644 --- a/pkg/scrape/darkroomvr.go +++ b/pkg/scrape/darkroomvr.go @@ -13,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func DarkRoomVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func DarkRoomVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "darkroomvr" siteID := "DarkRoomVR" @@ -114,8 +114,10 @@ func DarkRoomVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out c }) siteCollector.OnHTML(`div.pagination a`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div.video-card__item a[class=image-container]`, func(e *colly.HTMLElement) { diff --git a/pkg/scrape/fuckpassvr.go b/pkg/scrape/fuckpassvr.go index aa7afca2c..73be9dc36 100644 --- a/pkg/scrape/fuckpassvr.go +++ b/pkg/scrape/fuckpassvr.go @@ -16,7 +16,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func FuckPassVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func FuckPassVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "fuckpassvr-native" siteID := "FuckPassVR" @@ -121,6 +121,9 @@ func FuckPassVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out c var page int64 = 1 var lastPage int64 = 1 + if limitScraping { + lastPage = 1 + } if singleSceneURL != "" { ctx := colly.NewContext() diff --git a/pkg/scrape/groobyvr.go b/pkg/scrape/groobyvr.go index ad1006b35..62d7f467c 100644 --- a/pkg/scrape/groobyvr.go +++ b/pkg/scrape/groobyvr.go @@ -14,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func GroobyVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func GroobyVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "groobyvr" siteID := "GroobyVR" @@ -115,8 +115,10 @@ func GroobyVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out cha }) siteCollector.OnHTML(`div.pagination li a:not(.active)`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) if singleSceneURL != "" { diff --git a/pkg/scrape/hologirlsvr.go b/pkg/scrape/hologirlsvr.go index 66009b276..fba258474 100644 --- a/pkg/scrape/hologirlsvr.go +++ b/pkg/scrape/hologirlsvr.go @@ -11,7 +11,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func HoloGirlsVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func HoloGirlsVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "hologirlsvr" siteID := "HoloGirlsVR" diff --git a/pkg/scrape/lethalhardcorevr.go b/pkg/scrape/lethalhardcorevr.go index 5c366b1a7..95cdce180 100644 --- a/pkg/scrape/lethalhardcorevr.go +++ b/pkg/scrape/lethalhardcorevr.go @@ -26,7 +26,7 @@ func isGoodTag(lookup string) bool { return true } -func LethalHardcoreSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string) error { +func LethalHardcoreSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -138,8 +138,10 @@ func LethalHardcoreSite(wg *sync.WaitGroup, updateSite bool, knownScenes []strin }) siteCollector.OnHTML(`div.pagination a`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div.scene-list-item`, func(e *colly.HTMLElement) { @@ -174,12 +176,12 @@ func LethalHardcoreSite(wg *sync.WaitGroup, updateSite bool, knownScenes []strin return nil } -func LethalHardcoreVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return LethalHardcoreSite(wg, updateSite, knownScenes, out, singleSceneURL, "lethalhardcorevr", "LethalHardcoreVR", "https://lethalhardcorevr.com/lethal-hardcore-vr-scenes.html?studio=95595", singeScrapeAdditionalInfo) +func LethalHardcoreVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return LethalHardcoreSite(wg, updateSite, knownScenes, out, singleSceneURL, "lethalhardcorevr", "LethalHardcoreVR", "https://lethalhardcorevr.com/lethal-hardcore-vr-scenes.html?studio=95595&sort=released", singeScrapeAdditionalInfo, limitScraping) } -func WhorecraftVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return LethalHardcoreSite(wg, updateSite, knownScenes, out, singleSceneURL, "whorecraftvr", "WhorecraftVR", "https://lethalhardcorevr.com/lethal-hardcore-vr-scenes.html?studio=95347", singeScrapeAdditionalInfo) +func WhorecraftVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return LethalHardcoreSite(wg, updateSite, knownScenes, out, singleSceneURL, "whorecraftvr", "WhorecraftVR", "https://lethalhardcorevr.com/lethal-hardcore-vr-scenes.html?studio=95347&sort=released", singeScrapeAdditionalInfo, limitScraping) } func init() { diff --git a/pkg/scrape/littlecaprice.go b/pkg/scrape/littlecaprice.go index 569f0ef9c..8ec4b01c4 100644 --- a/pkg/scrape/littlecaprice.go +++ b/pkg/scrape/littlecaprice.go @@ -13,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func LittleCaprice(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func LittleCaprice(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "littlecaprice" siteID := "Little Caprice Dreams" diff --git a/pkg/scrape/navr.go b/pkg/scrape/navr.go index ce6e27a6d..8582c437b 100644 --- a/pkg/scrape/navr.go +++ b/pkg/scrape/navr.go @@ -14,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func NaughtyAmericaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func NaughtyAmericaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "naughtyamericavr" siteID := "NaughtyAmerica VR" @@ -163,8 +163,10 @@ func NaughtyAmericaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, }) siteCollector.OnHTML(`ul[class=pagination] li a`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div[class=site-list] div[class=scene-item] a.contain-img`, func(e *colly.HTMLElement) { diff --git a/pkg/scrape/povr.go b/pkg/scrape/povr.go index ed4c143a5..dc6e60b6c 100644 --- a/pkg/scrape/povr.go +++ b/pkg/scrape/povr.go @@ -15,7 +15,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func POVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string) error { +func POVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -130,8 +130,10 @@ func POVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- }) siteCollector.OnHTML(`div.pagination a[class="pagination__page next"]`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) if singleSceneURL != "" { @@ -156,14 +158,14 @@ func addPOVRScraper(id string, name string, company string, avatarURL string, cu } else { suffixedName += " (POVR)" } - registerScraper(id, suffixedName, avatarURL, "povr.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return POVR(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo) + registerScraper(id, suffixedName, avatarURL, "povr.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return POVR(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping) }) } func init() { - registerScraper("povr-single_scene", "POVR - Other Studios", "", "povr.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return POVR(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo) + registerScraper("povr-single_scene", "POVR - Other Studios", "", "povr.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return POVR(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, limitScraping) }) var scrapers config.ScraperList scrapers.Load() diff --git a/pkg/scrape/realitylovers.go b/pkg/scrape/realitylovers.go index 7d897f509..23a0d5bfd 100644 --- a/pkg/scrape/realitylovers.go +++ b/pkg/scrape/realitylovers.go @@ -15,7 +15,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func RealityLoversSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string) error { +func RealityLoversSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -127,6 +127,9 @@ func RealityLoversSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string break } page++ + if limitScraping { + break + } // have seen instances of status 404, so make sure we don't span will calls time.Sleep(time.Second) } @@ -148,12 +151,12 @@ func RealityLoversSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string return nil } -func RealityLovers(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return RealityLoversSite(wg, updateSite, knownScenes, out, singleSceneURL, "realitylovers", "RealityLovers", "realitylovers.com", singeScrapeAdditionalInfo) +func RealityLovers(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return RealityLoversSite(wg, updateSite, knownScenes, out, singleSceneURL, "realitylovers", "RealityLovers", "realitylovers.com", singeScrapeAdditionalInfo, limitScraping) } -func TSVirtualLovers(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return RealityLoversSite(wg, updateSite, knownScenes, out, singleSceneURL, "tsvirtuallovers", "TSVirtualLovers", "tsvirtuallovers.com", singeScrapeAdditionalInfo) +func TSVirtualLovers(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return RealityLoversSite(wg, updateSite, knownScenes, out, singleSceneURL, "tsvirtuallovers", "TSVirtualLovers", "tsvirtuallovers.com", singeScrapeAdditionalInfo, limitScraping) } func init() { diff --git a/pkg/scrape/realjamvr.go b/pkg/scrape/realjamvr.go index aa162daad..b2cc6ac1f 100644 --- a/pkg/scrape/realjamvr.go +++ b/pkg/scrape/realjamvr.go @@ -18,7 +18,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func RealJamSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string) error { +func RealJamSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -180,8 +180,10 @@ func RealJamSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out }) siteCollector.OnHTML(`a.page-link`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div.panel a`, func(e *colly.HTMLElement) { @@ -207,11 +209,11 @@ func RealJamSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out return nil } -func RealJamVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return RealJamSite(wg, updateSite, knownScenes, out, singleSceneURL, "realjamvr", "RealJam VR", "realjamvr.com", singeScrapeAdditionalInfo) +func RealJamVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return RealJamSite(wg, updateSite, knownScenes, out, singleSceneURL, "realjamvr", "RealJam VR", "realjamvr.com", singeScrapeAdditionalInfo, limitScraping) } -func PornCornVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return RealJamSite(wg, updateSite, knownScenes, out, singleSceneURL, "porncornvr", "PornCorn VR", "porncornvr.com", singeScrapeAdditionalInfo) +func PornCornVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return RealJamSite(wg, updateSite, knownScenes, out, singleSceneURL, "porncornvr", "PornCorn VR", "porncornvr.com", singeScrapeAdditionalInfo, limitScraping) } func init() { diff --git a/pkg/scrape/sexbabesvr.go b/pkg/scrape/sexbabesvr.go index 27d14baa7..edf92dbe7 100644 --- a/pkg/scrape/sexbabesvr.go +++ b/pkg/scrape/sexbabesvr.go @@ -14,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func SexBabesVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func SexBabesVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "sexbabesvr" siteID := "SexBabesVR" @@ -98,8 +98,10 @@ func SexBabesVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out c }) siteCollector.OnHTML(`a.pagination__button`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div.videos__content`, func(e *colly.HTMLElement) { diff --git a/pkg/scrape/sinsvr.go b/pkg/scrape/sinsvr.go index 7a97b4861..731399e12 100644 --- a/pkg/scrape/sinsvr.go +++ b/pkg/scrape/sinsvr.go @@ -15,7 +15,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func SinsVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func SinsVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "sinsvr" siteID := "SinsVR" @@ -120,9 +120,11 @@ func SinsVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan< }) siteCollector.OnHTML(`nav.pagination a`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - if !strings.Contains(pageURL, "/join") { - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + if !strings.Contains(pageURL, "/join") { + siteCollector.Visit(pageURL) + } } }) diff --git a/pkg/scrape/slrstudios.go b/pkg/scrape/slrstudios.go index cded1b813..659cf952a 100644 --- a/pkg/scrape/slrstudios.go +++ b/pkg/scrape/slrstudios.go @@ -15,7 +15,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func SexLikeReal(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string) error { +func SexLikeReal(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -264,8 +264,10 @@ func SexLikeReal(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out }) siteCollector.OnHTML(`div.c-pagination ul li a`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div.c-grid--scenes article`, func(e *colly.HTMLElement) { @@ -461,16 +463,16 @@ func addSLRScraper(id string, name string, company string, avatarURL string, cus avatarURL = "https://www.sexlikereal.com/s/refactor/images/favicons/android-icon-192x192.png" } - registerScraper(id, suffixedName, avatarURL, "sexlikereal.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return SexLikeReal(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo) + registerScraper(id, suffixedName, avatarURL, "sexlikereal.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return SexLikeReal(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping) }) } func init() { var scrapers config.ScraperList // scraper for single scenes with no existing scraper for the studio - registerScraper("slr-single_scene", "SLR - Other Studios", "", "sexlikereal.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return SexLikeReal(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo) + registerScraper("slr-single_scene", "SLR - Other Studios", "", "sexlikereal.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return SexLikeReal(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, limitScraping) }) scrapers.Load() diff --git a/pkg/scrape/stasyqvr.go b/pkg/scrape/stasyqvr.go index 2e32b6f09..a634d00ea 100644 --- a/pkg/scrape/stasyqvr.go +++ b/pkg/scrape/stasyqvr.go @@ -13,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func StasyQVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func StasyQVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "stasyqvr" siteID := "StasyQVR" @@ -109,8 +109,10 @@ func StasyQVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out cha }) siteCollector.OnHTML(`div.pagination div.select-links a`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`section.grid div.grid-info-inner`, func(e *colly.HTMLElement) { diff --git a/pkg/scrape/tmwvrnet.go b/pkg/scrape/tmwvrnet.go index 210c4a1dc..c5a083400 100644 --- a/pkg/scrape/tmwvrnet.go +++ b/pkg/scrape/tmwvrnet.go @@ -13,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func TmwVRnet(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func TmwVRnet(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "tmwvrnet" siteID := "TmwVRnet" @@ -100,9 +100,11 @@ func TmwVRnet(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out cha }) siteCollector.OnHTML(`a.pagination-element__link`, func(e *colly.HTMLElement) { - if strings.Contains(e.Text, "Next") { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + if strings.Contains(e.Text, "Next") { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } } }) diff --git a/pkg/scrape/tngf.go b/pkg/scrape/tngf.go index 0f0d1aaa4..47273b6f4 100644 --- a/pkg/scrape/tngf.go +++ b/pkg/scrape/tngf.go @@ -13,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func TNGFVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func TNGFVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "tonightsgirlfriend" siteID := "Tonight's Girlfriend VR" @@ -142,8 +142,10 @@ func TNGFVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan< }) siteCollector.OnHTML(`ul[class=pagination] a.page-link[rel="next"]`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div.panel-body`, func(e *colly.HTMLElement) { diff --git a/pkg/scrape/virtualporn.go b/pkg/scrape/virtualporn.go index 37a044b2d..4edc86162 100644 --- a/pkg/scrape/virtualporn.go +++ b/pkg/scrape/virtualporn.go @@ -12,7 +12,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VirtualPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func VirtualPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "bvr" siteID := "VirtualPorn" @@ -109,7 +109,9 @@ func VirtualPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out if sceneCnt > 0 { pageCnt += 1 - siteCollector.Visit("https://virtualporn.com/videos/" + strconv.Itoa(pageCnt)) + if !limitScraping { + siteCollector.Visit("https://virtualporn.com/videos/" + strconv.Itoa(pageCnt)) + } } }) diff --git a/pkg/scrape/virtualrealporn.go b/pkg/scrape/virtualrealporn.go index cef44c682..ecab19f92 100644 --- a/pkg/scrape/virtualrealporn.go +++ b/pkg/scrape/virtualrealporn.go @@ -17,7 +17,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VirtualRealPornSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string) error { +func VirtualRealPornSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) page := 1 @@ -245,13 +245,15 @@ func VirtualRealPornSite(wg *sync.WaitGroup, updateSite bool, knownScenes []stri }) siteCollector.OnHTML(`.searchBox option`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("data-url")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("data-url")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div.videoListContainer.paginated`, func(e *colly.HTMLElement) { e.ForEach(`a.w-portfolio-item-anchor`, func(id int, e *colly.HTMLElement) { - if e.Request.URL.RawQuery == "videoPage="+strconv.Itoa(page) { + if e.Request.URL.RawQuery == "videoPage="+strconv.Itoa(page) && !limitScraping { // found scenes on this page, get the next page of results page++ siteCollector.Visit(fmt.Sprintf("%s?videoPage=%v", URL, page)) @@ -278,20 +280,20 @@ func VirtualRealPornSite(wg *sync.WaitGroup, updateSite bool, knownScenes []stri return nil } -func VirtualRealPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealporn", "VirtualRealPorn", "https://virtualrealporn.com/", singeScrapeAdditionalInfo) +func VirtualRealPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealporn", "VirtualRealPorn", "https://virtualrealporn.com/", singeScrapeAdditionalInfo, limitScraping) } -func VirtualRealTrans(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealtrans", "VirtualRealTrans", "https://virtualrealtrans.com/", singeScrapeAdditionalInfo) +func VirtualRealTrans(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealtrans", "VirtualRealTrans", "https://virtualrealtrans.com/", singeScrapeAdditionalInfo, limitScraping) } -func VirtualRealAmateur(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealamateur", "VirtualRealAmateurPorn", "https://virtualrealamateurporn.com/", singeScrapeAdditionalInfo) +func VirtualRealAmateur(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealamateur", "VirtualRealAmateurPorn", "https://virtualrealamateurporn.com/", singeScrapeAdditionalInfo, limitScraping) } -func VirtualRealGay(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealgay", "VirtualRealGay", "https://virtualrealgay.com/", singeScrapeAdditionalInfo) +func VirtualRealGay(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealgay", "VirtualRealGay", "https://virtualrealgay.com/", singeScrapeAdditionalInfo, limitScraping) } -func VirtualRealPassion(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealpassion", "VirtualRealPassion", "https://virtualrealpassion.com/", singeScrapeAdditionalInfo) +func VirtualRealPassion(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealpassion", "VirtualRealPassion", "https://virtualrealpassion.com/", singeScrapeAdditionalInfo, limitScraping) } func init() { diff --git a/pkg/scrape/virtualtaboo.go b/pkg/scrape/virtualtaboo.go index 6a5cd885f..a284eb3e6 100644 --- a/pkg/scrape/virtualtaboo.go +++ b/pkg/scrape/virtualtaboo.go @@ -14,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VirtualTaboo(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func VirtualTaboo(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "virtualtaboo" siteID := "VirtualTaboo" @@ -111,8 +111,10 @@ func VirtualTaboo(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out }) siteCollector.OnHTML(`ul.pagination a`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div.video-card__item a[class=image-container]`, func(e *colly.HTMLElement) { diff --git a/pkg/scrape/vr3000.go b/pkg/scrape/vr3000.go index b2bb57801..52a32fb44 100644 --- a/pkg/scrape/vr3000.go +++ b/pkg/scrape/vr3000.go @@ -14,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VR3000(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func VR3000(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vr3000" siteID := "VR3000" diff --git a/pkg/scrape/vrallure.go b/pkg/scrape/vrallure.go index 759030761..cca50ba5d 100644 --- a/pkg/scrape/vrallure.go +++ b/pkg/scrape/vrallure.go @@ -18,7 +18,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRAllure(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func VRAllure(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vrallure" siteID := "VRAllure" @@ -138,8 +138,10 @@ func VRAllure(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out cha }) siteCollector.OnHTML(`ul.pagination li a`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div.row h4.latest-scene-title a`, func(e *colly.HTMLElement) { diff --git a/pkg/scrape/vrbangers.go b/pkg/scrape/vrbangers.go index 7bb556b82..72d273001 100755 --- a/pkg/scrape/vrbangers.go +++ b/pkg/scrape/vrbangers.go @@ -15,7 +15,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRBangersSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string) error { +func VRBangersSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -156,9 +156,10 @@ func VRBangersSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, ou }) siteCollector.OnHTML(`a.page-pagination__next`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) if singleSceneURL != "" { @@ -174,20 +175,20 @@ func VRBangersSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, ou return nil } -func VRBangers(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrbangers", "VRBangers", "https://vrbangers.com/") +func VRBangers(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrbangers", "VRBangers", "https://vrbangers.com/", limitScraping) } -func VRBTrans(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrbtrans", "VRBTrans", "https://vrbtrans.com/") +func VRBTrans(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrbtrans", "VRBTrans", "https://vrbtrans.com/", limitScraping) } -func VRBGay(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrbgay", "VRBGay", "https://vrbgay.com/") +func VRBGay(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrbgay", "VRBGay", "https://vrbgay.com/", limitScraping) } -func VRConk(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrconk", "VRCONK", "https://vrconk.com/") +func VRConk(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrconk", "VRCONK", "https://vrconk.com/", limitScraping) } -func BlowVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "blowvr", "BlowVR", "https://blowvr.com/") +func BlowVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "blowvr", "BlowVR", "https://blowvr.com/", limitScraping) } func init() { diff --git a/pkg/scrape/vrhush.go b/pkg/scrape/vrhush.go index 9f3c5434c..564801e82 100644 --- a/pkg/scrape/vrhush.go +++ b/pkg/scrape/vrhush.go @@ -16,7 +16,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRHush(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func VRHush(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vrhush" siteID := "VRHush" @@ -138,8 +138,10 @@ func VRHush(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan< siteCollector.OnHTML(`ul.pagination li`, func(e *colly.HTMLElement) { if strings.Contains(e.Attr("class"), "next") && !strings.Contains(e.Attr("class"), "disabled") { pageCnt += 1 - pageURL := e.Request.AbsoluteURL(`https://vrhush.com/scenes?page=` + fmt.Sprint(pageCnt) + `&order_by=publish_date&sort_by=desc`) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(`https://vrhush.com/scenes?page=` + fmt.Sprint(pageCnt) + `&order_by=publish_date&sort_by=desc`) + siteCollector.Visit(pageURL) + } } }) diff --git a/pkg/scrape/vrlatina.go b/pkg/scrape/vrlatina.go index 78b814bda..8dcf3c112 100644 --- a/pkg/scrape/vrlatina.go +++ b/pkg/scrape/vrlatina.go @@ -14,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRLatina(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func VRLatina(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vrlatina" siteID := "VRLatina" @@ -109,8 +109,10 @@ func VRLatina(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out cha }) siteCollector.OnHTML(`div.pagination a`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div.item-col.-video a`, func(e *colly.HTMLElement) { diff --git a/pkg/scrape/vrphub.go b/pkg/scrape/vrphub.go index 4b12ff1f8..21fb85cdb 100644 --- a/pkg/scrape/vrphub.go +++ b/pkg/scrape/vrphub.go @@ -29,7 +29,7 @@ func getVideoName(fileUrl string) (string, error) { return filename, nil } -func VRPHub(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, callback func(e *colly.HTMLElement, sc *models.ScrapedScene)) error { +func VRPHub(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, callback func(e *colly.HTMLElement, sc *models.ScrapedScene)) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -164,8 +164,10 @@ func VRPHub(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan< }) siteCollector.OnHTML(`div.page-nav a.page`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div.td-main-content div.td-module-image-main a`, func(e *colly.HTMLElement) { @@ -257,14 +259,14 @@ func addVRPHubScraper(id string, name string, company string, avatarURL string, avatarURL = "https://cdn.vrphub.com/wp-content/uploads/2016/08/vrphubnew.png" } - registerScraper(id, suffixedName, avatarURL, "vrphub.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VRPHub(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, callback) + registerScraper(id, suffixedName, avatarURL, "vrphub.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VRPHub(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, callback) }) } func init() { - registerScraper("vrphub-single_scene", "VRPHub - Other Studios", "", "vrphub.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VRPHub(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, noop) + registerScraper("vrphub-single_scene", "VRPHub - Other Studios", "", "vrphub.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VRPHub(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, limitScraping, noop) }) var scrapers config.ScraperList scrapers.Load() diff --git a/pkg/scrape/vrporn.go b/pkg/scrape/vrporn.go index a62ca1630..97255fe74 100644 --- a/pkg/scrape/vrporn.go +++ b/pkg/scrape/vrporn.go @@ -14,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string) error { +func VRPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -142,8 +142,10 @@ func VRPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan< }) siteCollector.OnHTML(`div.pagination a.next`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`body.tax-studio article.post div.tube-post a`, func(e *colly.HTMLElement) { @@ -176,14 +178,14 @@ func addVRPornScraper(id string, name string, company string, avatarURL string, } else { suffixedName += " (VRPorn)" } - registerScraper(id, suffixedName, avatarURL, "vrporn.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VRPorn(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo) + registerScraper(id, suffixedName, avatarURL, "vrporn.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VRPorn(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping) }) } func init() { - registerScraper("vrporn-single_scene", "VRPorn - Other Studios", "", "vrporn.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return VRPorn(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo) + registerScraper("vrporn-single_scene", "VRPorn - Other Studios", "", "vrporn.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return VRPorn(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, limitScraping) }) var scrapers config.ScraperList diff --git a/pkg/scrape/vrsexygirlz.go b/pkg/scrape/vrsexygirlz.go index b819da7d2..03019986d 100644 --- a/pkg/scrape/vrsexygirlz.go +++ b/pkg/scrape/vrsexygirlz.go @@ -12,7 +12,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRSexygirlz(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func VRSexygirlz(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vrsexygirlz" diff --git a/pkg/scrape/vrspy.go b/pkg/scrape/vrspy.go index cc54eb0d6..8814560de 100755 --- a/pkg/scrape/vrspy.go +++ b/pkg/scrape/vrspy.go @@ -23,7 +23,7 @@ const ( baseURL = "https://" + domain ) -func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singleScrapeAdditionalInfo string) error { +func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singleScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -129,7 +129,9 @@ func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- siteCollector.OnHTML(`body`, func(e *colly.HTMLElement) { e.ForEachWithBreak(`.video-section a.photo-preview`, func(id int, e *colly.HTMLElement) bool { currentPage, _ := strconv.Atoi(e.Request.URL.Query().Get("page")) - siteCollector.Visit(fmt.Sprintf("%s/videos?sort=new&page=%d", baseURL, currentPage+1)) + if !limitScraping { + siteCollector.Visit(fmt.Sprintf("%s/videos?sort=new&page=%d", baseURL, currentPage+1)) + } return false }) }) diff --git a/pkg/scrape/wetvr.go b/pkg/scrape/wetvr.go index e6d864520..4de99ce98 100644 --- a/pkg/scrape/wetvr.go +++ b/pkg/scrape/wetvr.go @@ -13,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func WetVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { +func WetVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "wetvr" siteID := "WetVR" @@ -90,8 +90,10 @@ func WetVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- }) siteCollector.OnHTML(`ul a.page-link`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div:has(p:contains("Latest")) div[id^="r-"]`, func(e *colly.HTMLElement) { diff --git a/pkg/scrape/zexywankitnow.go b/pkg/scrape/zexywankitnow.go index 393e89445..e442f0c95 100644 --- a/pkg/scrape/zexywankitnow.go +++ b/pkg/scrape/zexywankitnow.go @@ -13,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func TwoWebMediaSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string) error { +func TwoWebMediaSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -161,8 +161,10 @@ func TwoWebMediaSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, }) siteCollector.OnHTML(`ul.pagination a.page-link`, func(e *colly.HTMLElement) { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } }) siteCollector.OnHTML(`div.container div.card > a`, func(e *colly.HTMLElement) { @@ -177,7 +179,7 @@ func TwoWebMediaSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, if singleSceneURL != "" { sceneCollector.Visit(singleSceneURL) } else { - siteCollector.Visit(URL) + siteCollector.Visit(URL + "?order=newest") } if updateSite { @@ -187,12 +189,12 @@ func TwoWebMediaSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, return nil } -func WankitNowVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return TwoWebMediaSite(wg, updateSite, knownScenes, out, singleSceneURL, "wankitnowvr", "WankitNowVR", "https://wankitnowvr.com/videos/") +func WankitNowVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return TwoWebMediaSite(wg, updateSite, knownScenes, out, singleSceneURL, "wankitnowvr", "WankitNowVR", "https://wankitnowvr.com/videos/", limitScraping) } -func ZexyVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string) error { - return TwoWebMediaSite(wg, updateSite, knownScenes, out, singleSceneURL, "zexyvr", "ZexyVR", "https://zexyvr.com/videos/") +func ZexyVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return TwoWebMediaSite(wg, updateSite, knownScenes, out, singleSceneURL, "zexyvr", "ZexyVR", "https://zexyvr.com/videos/", limitScraping) } func init() { diff --git a/pkg/tasks/content.go b/pkg/tasks/content.go index 776db456c..bb32fb22e 100644 --- a/pkg/tasks/content.go +++ b/pkg/tasks/content.go @@ -120,7 +120,7 @@ func runScrapers(knownScenes []string, toScrape string, updateSite bool, collect for _, scraper := range scrapers { if site.ID == scraper.ID { wg.Add(1) - go scraper.Scrape(&wg, updateSite, knownScenes, collectedScenes, singleSceneURL, singeScrapeAdditionalInfo) + go scraper.Scrape(&wg, updateSite, knownScenes, collectedScenes, singleSceneURL, singeScrapeAdditionalInfo, site.LimitScraping) } } } @@ -129,7 +129,7 @@ func runScrapers(knownScenes []string, toScrape string, updateSite bool, collect for _, scraper := range scrapers { if toScrape == scraper.ID { wg.Add(1) - go scraper.Scrape(&wg, updateSite, knownScenes, collectedScenes, singleSceneURL, singeScrapeAdditionalInfo) + go scraper.Scrape(&wg, updateSite, knownScenes, collectedScenes, singleSceneURL, singeScrapeAdditionalInfo, false) } } } else { diff --git a/ui/src/locales/en-GB.json b/ui/src/locales/en-GB.json index 67ab9a110..c548dfac4 100644 --- a/ui/src/locales/en-GB.json +++ b/ui/src/locales/en-GB.json @@ -205,5 +205,7 @@ "Only required when troubleshooting search issues, this will enable a Tab in the Scene Details to display what search fields exist and their values for a scene":"Only required when troubleshooting search issues, this will enable a Tab in the Scene Details to display what search fields exist and their values for a scene", "Scene Id Required": "Scene Id Required", "Occasionaly test uploading your export bundles. Browser memory constraints may cause problems restoring large exports. Use this function to test if your browser can load an export.": "Occasionaly test uploading your export bundles. Browser memory constraints may cause problems restoring large exports. Use this function to test if your browser can load an export.", + "Only the first page of scenes will be scraped":"Only the first page of scenes will be scraped", + "Limit Scraping":"Limit Scraping", "Go": "Go" } diff --git a/ui/src/store/optionsSites.js b/ui/src/store/optionsSites.js index 85d5fb2cb..74e13ea7a 100644 --- a/ui/src/store/optionsSites.js +++ b/ui/src/store/optionsSites.js @@ -16,8 +16,10 @@ const actions = { }, async toggleSubscribed ({ state }, params) { state.items = await ky.put(`/api/options/sites/subscribed/${params.id}`, { json: {} }).json() - console.log('calling',params.id) - } + }, + async toggleLimitScraping ({ state }, params) { + state.items = await ky.put(`/api/options/sites/limit_scraping/${params.id}`, { json: {} }).json() + }, } export default { diff --git a/ui/src/views/options/sections/OptionsSceneDataScrapers.vue b/ui/src/views/options/sections/OptionsSceneDataScrapers.vue index a112167d0..a8c4e4a3f 100644 --- a/ui/src/views/options/sections/OptionsSceneDataScrapers.vue +++ b/ui/src/views/options/sections/OptionsSceneDataScrapers.vue @@ -40,6 +40,9 @@ {{$t('Scraping now...')}} + + +