Skip to content

Commit

Permalink
Async: Started Async Porting
Browse files Browse the repository at this point in the history
  • Loading branch information
MeNsaaH committed Apr 3, 2020
1 parent 35ac579 commit a2b2fb9
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 42 deletions.
23 changes: 10 additions & 13 deletions engine/besthdmovies.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,8 @@ func (engine *BestHDEngine) getParseAttrs() (string, string, error) {
return "body", "article.latestPost", nil
}

func (engine *BestHDEngine) parseSingleMovie(el *colly.HTMLElement, index int) (Movie, error) {
func (engine *BestHDEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, error) {
movie := Movie{
Index: index,
IsSeries: false,
Source: engine.Name,
}
Expand Down Expand Up @@ -89,11 +88,11 @@ func (engine *BestHDEngine) parseSingleMovie(el *colly.HTMLElement, index int) (
return movie, nil
}

func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collector, movies *[]Movie) {
func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
submissionDetails := make(map[string]string)
// Update movie download link if div.post-single-content on page
downloadCollector.OnHTML("div.post-single-content", func(e *colly.HTMLElement) {
movie := &(*movies)[getMovieIndexFromCtx(e.Request)]
movie := getMovieFromMovies(e.Request.URL.String(), movies)
ptags := e.ChildTexts("p")
if ptags[len(ptags)-3] >= ptags[len(ptags)-2] {
movie.Description = strings.TrimSpace(ptags[len(ptags)-3])
Expand All @@ -120,7 +119,7 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
})

downloadCollector.OnHTML("div.content-area", func(e *colly.HTMLElement) {
movie := &(*movies)[getMovieIndexFromCtx(e.Request)]
movie := getMovieFromMovies(e.Request.URL.String(), movies)
links := e.ChildAttrs("a", "href")
for _, link := range links {
if strings.HasPrefix(link, "https://zeefiles") || strings.HasPrefix(link, "http://zeefiles") {
Expand All @@ -140,8 +139,7 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
})

downloadCollector.OnHTML("div.freeDownload", func(e *colly.HTMLElement) {
movieIndex := getMovieIndexFromCtx(e.Request)
movie := &(*movies)[movieIndex]
movie := getMovieFromMovies(e.Request.URL.String(), movies)
zeesubmission := make(map[string]string)
if e.ChildAttr("a.link_button", "href") != "" {
downloadlink, err := url.Parse(e.ChildAttr("a.link_button", "href"))
Expand All @@ -157,17 +155,16 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
zeesubmission[inputNames[index]] = inputValues[index]
}

err := downloadCollector.Post(movie.DownloadLink.String(), zeesubmission)
err := downloadCollector.Post((*movie).DownloadLink.String(), zeesubmission)
if err != nil {
log.Fatal(err)
}
}
})

downloadCollector.OnHTML("form[method=post]", func(e *colly.HTMLElement) {
movieIndex := getMovieIndexFromCtx(e.Request)
var err error
movie := &(*movies)[movieIndex]
movie := getMovieFromMovies(e.Request.URL.String(), movies)
downloadlink := movie.DownloadLink
inputNames := e.ChildAttrs("input", "name")
inputValues := e.ChildAttrs("input", "value")
Expand All @@ -177,7 +174,8 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
}
requestlink := e.Request.URL.String()
if !(strings.HasPrefix(requestlink, "https://zeefiles") || strings.HasPrefix(requestlink, "http://zeefiles")) {
downloadlink, err = url.Parse("https://udown.me/watchonline/?movieIndex=" + strconv.Itoa(movieIndex))
// TODO Dynamically assign movieIndex
downloadlink, err = url.Parse("https://udown.me/watchonline/?movieIndex=1")
if err == nil {
movie.DownloadLink = downloadlink
}
Expand All @@ -190,8 +188,7 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect

downloadCollector.OnHTML("video", func(e *colly.HTMLElement) {
downloadlink := e.ChildAttr("source", "src")
movieIndex := getMovieIndexFromCtx(e.Request)
movie := &(*movies)[movieIndex]
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie.DownloadLink, _ = url.Parse(downloadlink)
})
}
Expand Down
64 changes: 48 additions & 16 deletions engine/engines.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"strings"

"github.com/gocolly/colly/v2"
// "github.com/gocolly/colly/v2/debug"
log "github.com/sirupsen/logrus"
)

Expand All @@ -33,7 +34,7 @@ type Engine interface {
List(page int) SearchResult
String() string
// parseSingleMovie: parses the result of a colly HTMLElement and returns a movie
parseSingleMovie(el *colly.HTMLElement, index int) (Movie, error)
parseSingleMovie(el *colly.HTMLElement) (Movie, error)

// getParseAttrs : get the attributes to use to parse a returned soup
// the first return string is the part of the html to be parsed e.g `body`, `main`
Expand All @@ -42,7 +43,7 @@ type Engine interface {
getParseAttrs() (string, string, error)

// parseSingleMovie: parses the result of a colly HTMLElement and returns a movie
updateDownloadProps(downloadCollector *colly.Collector, movies *[]Movie)
updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie)
}

// Scrape : Parse queries a url and return results
Expand All @@ -51,29 +52,30 @@ func Scrape(engine Engine) ([]Movie, error) {
// Cache responses to prevent multiple download of pages
// even if the collector is restarted
colly.CacheDir("./gophie_cache"),
colly.Async(true),
// colly.Debugger(&debug.LogDebugger{}),
)
// Another collector for download Links
downloadLinkCollector := c.Clone()

movieIndex := 0
var movies []Movie
var movies = make(map[string]*Movie)

// Any Extras setup for downloads using can be specified in the function
engine.updateDownloadProps(downloadLinkCollector, &movies)
engine.updateDownloadProps(downloadLinkCollector, movies)

main, article, err := engine.getParseAttrs()
if err != nil {
log.Fatal(err)
}
c.OnHTML(main, func(e *colly.HTMLElement) {
e.ForEach(article, func(_ int, el *colly.HTMLElement) {
movie, err := engine.parseSingleMovie(el, movieIndex)
movie, err := engine.parseSingleMovie(el)
if err != nil {
log.Errorf("%v could not be parsed", movie)
} else {
movies = append(movies, movie)
// Using DownloadLink as key to movie makes it unique
movies[movie.DownloadLink.String()] = &movie
downloadLinkCollector.Visit(movie.DownloadLink.String())
movieIndex++
}
})
})
Expand All @@ -92,11 +94,8 @@ func Scrape(engine Engine) ([]Movie, error) {
// movie details when we need it
downloadLinkCollector.OnRequest(func(r *colly.Request) {
r.Headers.Set("Accept", "text/html,application/xhtml+xml,application/xml")
for i, movie := range movies {
if movie.DownloadLink.String() == r.URL.String() {
log.Debugf("Retrieving Download Link %v\n", movie.DownloadLink)
r.Ctx.Put("movieIndex", strconv.Itoa(i))
}
if movie, ok := movies[r.URL.String()]; ok {
log.Debugf("Retrieving Download Link %v\n", movie.DownloadLink)
}
})

Expand All @@ -110,11 +109,24 @@ func Scrape(engine Engine) ([]Movie, error) {
})

downloadLinkCollector.OnResponse(func(r *colly.Response) {
movie := &movies[getMovieIndexFromCtx(r.Request)]
log.Debugf("Retrieved Download Link %v\n", movie.DownloadLink)
// movie := movies[r.Request.URL.String()]
// log.Infof("%s %v %s", r.Request.URL.String(), movie.DownloadLink, movie.Title)
// log.Debugf("Retrieved Download Link %v\n", movie.DownloadLink)
})

c.Visit(engine.getParseURL().String())
return movies, nil
c.Wait()
downloadLinkCollector.Wait()

// Create a List of Movies
v := make([]Movie, 0, len(movies))

for _, value := range movies {
v = append(v, *value)
}
prettyPrint(v)

return v, nil
}

// Movie : the structure of all downloadable movies
Expand Down Expand Up @@ -221,3 +233,23 @@ func getMovieIndexFromCtx(r *colly.Request) int {
}
return movieIndex
}

// Get Movie from a URL
func getMovieFromMovies(url string, movies map[string]*Movie) *Movie {
if _, ok := movies[url]; ok {
return movies[url]
}
for _, movie := range movies {
if (*movie).DownloadLink.String() == url {
return movie
}
}
return &Movie{}
}

func prettyPrint(s []Movie) {
b, err := json.MarshalIndent(s, "", " ")
if err == nil {
fmt.Println(string(b))
}
}
15 changes: 10 additions & 5 deletions engine/fzmovies.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,8 @@ func (engine *FzEngine) getParseAttrs() (string, string, error) {
return "body", "div.mainbox", nil
}

func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement, index int) (Movie, error) {
func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, error) {
movie := Movie{
Index: index,
IsSeries: false,
Source: engine.Name,
}
Expand All @@ -71,6 +70,11 @@ func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement, index int) (Movi
movie.CoverPhotoLink = cover.String()
// Remove all Video: or Movie: Prefixes
movie.UploadDate = strings.TrimSpace(el.ChildTexts("small")[1])
// Update Year
year, err := strconv.Atoi(strings.TrimSpace(el.ChildTexts("small")[1]))
if err == nil {
movie.Year = year
}
movie.Title = strings.TrimSuffix(strings.TrimSpace(el.ChildText("b")), "<more>")
movie.Description = strings.TrimSpace(el.ChildTexts("small")[3])
downloadLink, err := url.Parse(el.Request.AbsoluteURL(el.ChildAttr("a", "href")))
Expand All @@ -85,10 +89,10 @@ func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement, index int) (Movi
return movie, nil
}

func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector, movies *[]Movie) {
func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
// Update movie download link if ul.downloadlinks on page
downloadCollector.OnHTML("ul.moviesfiles", func(e *colly.HTMLElement) {
movie := &(*movies)[getMovieIndexFromCtx(e.Request)]
movie := getMovieFromMovies(e.Request.URL.String(), movies)
link := strings.Replace(e.ChildAttr("a", "href"), "download1.php", "download.php", 1)
downloadLink, err := url.Parse(e.Request.AbsoluteURL(link + "&pt=jRGarGzOo2"))
// downloadLink, err := url.Parse(e.ChildAttr("a", "href") + "&pt=jRGarGzOo2")
Expand All @@ -109,7 +113,8 @@ func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector,
if err != nil {
log.Fatal(err)
}
(*movies)[getMovieIndexFromCtx(e.Request)].DownloadLink = downloadLink
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie.DownloadLink = downloadLink
}
})
}
Expand Down
18 changes: 10 additions & 8 deletions engine/netnaija.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (engine *NetNaijaEngine) getParseAttrs() (string, string, error) {
return "main", article, nil
}

func (engine *NetNaijaEngine) parseSingleMovie(el *colly.HTMLElement, index int) (Movie, error) {
func (engine *NetNaijaEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, error) {
// movie title identifier
var title string
if title = "h3.file-name"; engine.mode == SearchMode {
Expand All @@ -78,7 +78,6 @@ func (engine *NetNaijaEngine) parseSingleMovie(el *colly.HTMLElement, index int)

re := regexp.MustCompile(`\((.*)\)`)
movie := Movie{
Index: index,
IsSeries: false,
Source: engine.Name,
}
Expand Down Expand Up @@ -115,24 +114,26 @@ func (engine *NetNaijaEngine) parseSingleMovie(el *colly.HTMLElement, index int)
return movie, nil
}

func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Collector, movies *[]Movie) {
func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
// Update movie size
downloadCollector.OnHTML("button[id=download-button]", func(e *colly.HTMLElement) {
(*movies)[getMovieIndexFromCtx(e.Request)].Size = strings.TrimSpace(e.ChildText("span.size"))
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie.Size = strings.TrimSpace(e.ChildText("span.size"))
})

downloadCollector.OnHTML("h3.file-name", func(e *colly.HTMLElement) {
downloadLink, err := url.Parse(path.Join(strings.TrimSpace(e.ChildAttr("a", "href")), "download"))
if err != nil {
log.Fatal(err)
}
(*movies)[getMovieIndexFromCtx(e.Request)].DownloadLink = downloadLink
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie.DownloadLink = downloadLink
downloadCollector.Visit(downloadLink.String())
})

// Update movie download link if a[id=download] on page
downloadCollector.OnHTML("a[id=download]", func(e *colly.HTMLElement) {
movie := &((*movies)[getMovieIndexFromCtx(e.Request)])
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie.Size = strings.TrimSpace(e.ChildText("span[id=download-size]"))
downloadLink, err := url.Parse(e.Attr("href"))
if err != nil {
Expand All @@ -148,13 +149,14 @@ func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Colle
if err != nil {
log.Fatal(err)
}
(*movies)[getMovieIndexFromCtx(e.Request)].DownloadLink = downloadLink
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie.DownloadLink = downloadLink
}
})

//for series or parts
downloadCollector.OnHTML("div.video-series-latest-episodes", func(inn *colly.HTMLElement) {
movie := &((*movies)[getMovieIndexFromCtx(inn.Request)])
movie := getMovieFromMovies(inn.Request.URL.String(), movies)
movie.IsSeries = true
inn.ForEach("a", func(_ int, e *colly.HTMLElement) {
downloadLink, err := url.Parse(e.Attr("href"))
Expand Down
1 change: 1 addition & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ import (
)

func main() {

cmd.Execute()
}

0 comments on commit a2b2fb9

Please sign in to comment.