diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 7f5a64fe5..8369e5937 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -190,6 +190,7 @@ black-tgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark: black4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackambush.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- blackandbig.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blackboyaddictionz.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay blacked.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- blackedraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- Blackfoxbound UK|BlackFoxBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -198,6 +199,7 @@ blackmassivecocks.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_ma blackmeatwhitefeet.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- blackph.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blacksonblondes.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +blacksondaddies.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay blacksoncougars.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- blacktgirlshardcore.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans blackvalleygirls.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -217,6 +219,7 @@ boppingbabes.com|BoppingBabes.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bossymilfs.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- bouncychicks.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- boundhoneys.com|Boundhoneys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +boundjocks.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay boundlife.com|Boundlife.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bountyhunterporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- boxtrucksex.com|BoxTruckSex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -224,6 +227,7 @@ boyfriendsharing.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- boyfun.com|BoyFun.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay boysdestroyed.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay boysfuckmilfs.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boyshalfwayhouse.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay bradmontana.com|BradMontana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brandibelle.com|brandibelle.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brandnewamateurs.com|BrandNewAmateurs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -286,6 +290,7 @@ colette.com|Colette.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- college-amateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- college-uniform.com|CollegeUniform.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- collegefuckparties.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +coltstudiogroup.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay combatzone.us|CombatZone.yml|:x:|:x:|:heavy_check_mark:|:x:|-|- combatzonexxx.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- concoxxxion.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- @@ -362,6 +367,7 @@ czechwifeswap.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- d52q.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV dadcrush.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- daddy4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +daddycarl.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay daddygetslucky.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- daddyslilangel.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- damnthatsbig.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay @@ -651,6 +657,7 @@ hothouse.com|Algolia_FalconStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hotlegsandfeet.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hotmilfsfuck.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- hotmovies.com|Hotmovies.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +hotoldermale.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay hottiemoms.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- hotwifexxx.com|HotWifeXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- houseofgord.com|HouseOfGord.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- @@ -895,6 +902,7 @@ momxxx.org|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mongerinasia.com|MongerInAsia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- monicamendez.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- monroelee.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +monstercub.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay mormongirlz.com|Mormongirlz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- motherfuckerxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- motherless.com|Motherless.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site @@ -914,6 +922,7 @@ mycherrycrush.com|MyCherryCrush.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- mydaughterswap.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mydirtyhobby.com|MyDirtyHobby.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- myfamilypies.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +myfirstdaddy.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay mylf.com|Mylf.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mylfdom.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mylifeinmiami.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- @@ -932,6 +941,7 @@ nastypublicsex.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nastystepfamily.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nataliastarr.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- natashanice.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +naturalbornbreeders.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay naughtyamerica.com|NaughtyAmerica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- naughtyamericavr.com|NaughtyAmericaVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR naughtynatural.com|NaughtyNatural.yml|:heavy_check_mark:|:heavy_check_mark:|-|-|-|- @@ -980,6 +990,7 @@ officepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- officesexjp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- old-n-young.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- old4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +older4me.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay oldgoesyoung.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oldje-3some.com|Oldje.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oldje.com|Oldje.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1047,6 +1058,7 @@ pissynetwork.com|PissyNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pjgirls.com|pjgirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- playboy.tv|PlayboyTV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- playboyplus.com|PlayboyPlus.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +playdaddy.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- playwithrae.com|PlayWithRae.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- plumperpass.com|PlumperPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- plushies.tv|Plushies.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1114,6 +1126,7 @@ rachelaldana.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rachelstormsxxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ragingstallion.com|Algolia_RagingStallion.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay randyblue.com|RandyBlue.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +raunchybastards.com|RaunchyBastards.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay ravenswallowzxxx.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rawattack.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rawcouples.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1250,6 +1263,7 @@ steppov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- stepsiblings.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- stepsiblingscaught.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- stockingvideos.com|FFstockings.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stockydudes.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay store.freeones.com|FreeonesStore.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- str8hell.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay strapattackers.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Femdom @@ -1380,6 +1394,7 @@ tommydxxx.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:| tonightsgirlfriend.com|Tonightsgirlfriend.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- toomanytrannies.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Trans topgrl.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +toplatindaddies.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- topwebmodels.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- toticos.com|VegasDreamworks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- touchmywife.com|Algolia_touchmywife.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- diff --git a/scrapers/AdultSiteRunner.yml b/scrapers/AdultSiteRunner.yml new file mode 100644 index 000000000..5a59b83bf --- /dev/null +++ b/scrapers/AdultSiteRunner.yml @@ -0,0 +1,139 @@ +name: Raunchy Bastards +sceneByURL: + - action: scrapeXPath + url: + - boundjocks.com/scene/ + - boyshalfwayhouse.com/scene/ + - coltstudiogroup.com/scene/ + - daddycarl.com/scene/ + - hotoldermale.com/scene/ + - monstercub.com/scene/ + - naturalbornbreeders.com/scene/ + - older4me.com/scene/ + - raunchybastards.com/scene/ + - stockydudes.com/scene/ + - toplatindaddies.com/scene/ + scraper: oldStyleSite + - action: scrapeXPath + url: + - blackboyaddictionz.com/scene/ + - blacksondaddies.com/scene/ + - myfirstdaddy.com/scene/ + - playdaddy.com/scene/ + scraper: newStyleSite +xPathScrapers: + oldStyleSite: + common: + $scene: //div[contains(@class, "sceneContainer")] + scene: + Title: $scene/div[@class="sceneTitle"] + Code: + selector: $scene//div[contains(@class, "sceneImgBig")]/@id + postProcess: + - replace: + - regex: \D* + with: $1 + Date: + selector: $scene//span[contains(@class, "sceneDetails")] + postProcess: + - replace: + # https://regex101.com/r/rsjbb6/1 + - regex: ^(\w{3}\s*\d{1,2},\s*\d{4}).*?$ + with: $1 + - parseDate: Jan 2,2006 + # All of this can be replaced once scrapers get access to the URL they are scraping + Studio: + Name: + selector: &image > + $scene//video/@poster + | $scene//div[contains(@class, "sceneImgBig")]/img/@src + | //div[contains(@style, "background-image")]/@style + | //*[contains(@class, "videoTrailer") or contains(@class, "bgScene")]//@srcset + postProcess: &studioNameFromURL + - replace: + - regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$ + with: $1 + - map: + blackboyaddictionz: Black Boy Addictionz + blacksondaddies: Blacks on Daddies + boundjocks: Bound Jocks + boyshalfwayhouse: Boys Halfway House + coltstudiogroup: Colt Studio Group + daddycarl: Daddy Carl + hotoldermale: Hot Older Male + monstercub: Monster Cub + myfirstdaddy: My First Daddy + naturalbornbreeders: Natural Born Breeders + older4me: Older4Me + playdaddy: Play Daddy + raunchybastards: Raunchy Bastards + stockydudees: Stocky dudes + toplatindaddies: Top Latin Daddies + URL: + selector: *image + postProcess: + - replace: + - regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$ + with: https://$1.com + Performers: + Name: > + $scene//div[contains(@class, "scenePerformers")]/a + | $scene//div[@class="scenePerf"]/span[@class="perfName"] + URL: > + $scene//div[contains(@class, "scenePerformers")]/a/@href + | $scene//div[@class="scenePerf"]/@data-href + Tags: + Name: $scene//a[@class="sceneTagsLnk"]/text() + Details: + selector: $scene//div[contains(@class, "sceneDescription")]/text() + concat: "\n\n" + Image: + selector: *image + postProcess: + - replace: + - regex: .*url\("(.*)"\).* + with: $1 + - regex: \s*2x$ + with: + + newStyleSite: + common: + $details: //div[contains(@class, "container_styled_1")] + scene: + Title: //h2[@class="main_title"] + Code: + selector: //link[@rel="canonical"]/@href + postProcess: + - replace: + - regex: \D* + with: $1 + # All of this can be replaced once scrapers get access to the URL they are scraping + Studio: + Name: + selector: //link[@rel="canonical"]/@href + postProcess: *studioNameFromURL + URL: + selector: //link[@rel="canonical"]/@href + postProcess: + - replace: + - regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$ + with: https://$1.com + Performers: + Name: $details//span[contains(@class, "perfImage")]/a + URL: $details//span[contains(@class, "perfImage")]/a/@href + Details: + selector: $details//p/text() + concat: "\n\n" + Date: + selector: ($details//h5[contains(text(), "Details")]/text())[1] + postProcess: + - replace: + # https://regex101.com/r/rsjbb6/1 + - regex: ^Details:\s+(\w{3}\s*\d{1,2},\s*\d{4}).*?$ + with: $1 + - parseDate: Jan 2,2006 + Image: + selector: //meta[@property="og:image"]/@content + Tags: + Name: $details//h5[contains(., "Categories")]/a/text() +# Last Updated September 08, 2023