Skip to content

Commit

Permalink
Async: Use Mutex to prevent data race amongst various goroutines
Browse files Browse the repository at this point in the history
  • Loading branch information
MeNsaaH committed Apr 3, 2020
1 parent a2b2fb9 commit d166053
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 24 deletions.
20 changes: 14 additions & 6 deletions engine/besthdmovies.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,12 @@ func (engine *BestHDEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, erro
return movie, nil
}

func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped) {
submissionDetails := make(map[string]string)
// Update movie download link if div.post-single-content on page
downloadCollector.OnHTML("div.post-single-content", func(e *colly.HTMLElement) {
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
scrapedMovies.Lock()
ptags := e.ChildTexts("p")
if ptags[len(ptags)-3] >= ptags[len(ptags)-2] {
movie.Description = strings.TrimSpace(ptags[len(ptags)-3])
Expand All @@ -116,10 +117,11 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
}
}
}
scrapedMovies.Unlock()
})

downloadCollector.OnHTML("div.content-area", func(e *colly.HTMLElement) {
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
links := e.ChildAttrs("a", "href")
for _, link := range links {
if strings.HasPrefix(link, "https://zeefiles") || strings.HasPrefix(link, "http://zeefiles") {
Expand All @@ -129,7 +131,9 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
}
downloadlink, err := url.Parse(link)
if err == nil {
scrapedMovies.Lock()
movie.DownloadLink = downloadlink
scrapedMovies.Unlock()
downloadCollector.Visit(downloadlink.String())
} else {
log.Fatal(err)
Expand All @@ -139,12 +143,14 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
})

downloadCollector.OnHTML("div.freeDownload", func(e *colly.HTMLElement) {
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
zeesubmission := make(map[string]string)
if e.ChildAttr("a.link_button", "href") != "" {
downloadlink, err := url.Parse(e.ChildAttr("a.link_button", "href"))
if err == nil {
scrapedMovies.Lock()
movie.DownloadLink = downloadlink
scrapedMovies.Unlock()
}
} else {

Expand All @@ -164,7 +170,7 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect

downloadCollector.OnHTML("form[method=post]", func(e *colly.HTMLElement) {
var err error
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
downloadlink := movie.DownloadLink
inputNames := e.ChildAttrs("input", "name")
inputValues := e.ChildAttrs("input", "value")
Expand All @@ -188,8 +194,10 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect

downloadCollector.OnHTML("video", func(e *colly.HTMLElement) {
downloadlink := e.ChildAttr("source", "src")
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
scrapedMovies.Lock()
movie.DownloadLink, _ = url.Parse(downloadlink)
scrapedMovies.Unlock()
})
}

Expand Down
28 changes: 19 additions & 9 deletions engine/engines.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"net/url"
"strconv"
"strings"
"sync"

"github.com/gocolly/colly/v2"
// "github.com/gocolly/colly/v2/debug"
Expand Down Expand Up @@ -43,7 +44,14 @@ type Engine interface {
getParseAttrs() (string, string, error)

// parseSingleMovie: parses the result of a colly HTMLElement and returns a movie
updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie)
updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped)
}

// All scraped movies are stored here. Since accessed on different goroutine
// Mutex to prevent Data Race
type scraped struct {
movies map[string]*Movie
sync.Mutex
}

// Scrape : Parse queries a url and return results
Expand All @@ -58,10 +66,10 @@ func Scrape(engine Engine) ([]Movie, error) {
// Another collector for download Links
downloadLinkCollector := c.Clone()

var movies = make(map[string]*Movie)
scrapedMovies := scraped{movies: make(map[string]*Movie)}

// Any Extras setup for downloads using can be specified in the function
engine.updateDownloadProps(downloadLinkCollector, movies)
engine.updateDownloadProps(downloadLinkCollector, &scrapedMovies)

main, article, err := engine.getParseAttrs()
if err != nil {
Expand All @@ -74,7 +82,9 @@ func Scrape(engine Engine) ([]Movie, error) {
log.Errorf("%v could not be parsed", movie)
} else {
// Using DownloadLink as key to movie makes it unique
movies[movie.DownloadLink.String()] = &movie
scrapedMovies.Lock()
scrapedMovies.movies[movie.DownloadLink.String()] = &movie
scrapedMovies.Unlock()
downloadLinkCollector.Visit(movie.DownloadLink.String())
}
})
Expand All @@ -94,7 +104,7 @@ func Scrape(engine Engine) ([]Movie, error) {
// movie details when we need it
downloadLinkCollector.OnRequest(func(r *colly.Request) {
r.Headers.Set("Accept", "text/html,application/xhtml+xml,application/xml")
if movie, ok := movies[r.URL.String()]; ok {
if movie, ok := scrapedMovies.movies[r.URL.String()]; ok {
log.Debugf("Retrieving Download Link %v\n", movie.DownloadLink)
}
})
Expand All @@ -109,7 +119,7 @@ func Scrape(engine Engine) ([]Movie, error) {
})

downloadLinkCollector.OnResponse(func(r *colly.Response) {
// movie := movies[r.Request.URL.String()]
// movie := scrapedMovies[r.Request.URL.String()]
// log.Infof("%s %v %s", r.Request.URL.String(), movie.DownloadLink, movie.Title)
// log.Debugf("Retrieved Download Link %v\n", movie.DownloadLink)
})
Expand All @@ -119,12 +129,12 @@ func Scrape(engine Engine) ([]Movie, error) {
downloadLinkCollector.Wait()

// Create a List of Movies
v := make([]Movie, 0, len(movies))
v := make([]Movie, 0, len(scrapedMovies.movies))

for _, value := range movies {
for _, value := range scrapedMovies.movies {
v = append(v, *value)
}
prettyPrint(v)
// prettyPrint(v)

return v, nil
}
Expand Down
11 changes: 8 additions & 3 deletions engine/fzmovies.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,21 +89,24 @@ func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, error) {
return movie, nil
}

func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped) {
// Update movie download link if ul.downloadlinks on page
downloadCollector.OnHTML("ul.moviesfiles", func(e *colly.HTMLElement) {
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
link := strings.Replace(e.ChildAttr("a", "href"), "download1.php", "download.php", 1)
downloadLink, err := url.Parse(e.Request.AbsoluteURL(link + "&pt=jRGarGzOo2"))
// downloadLink, err := url.Parse(e.ChildAttr("a", "href") + "&pt=jRGarGzOo2")
if err != nil {
log.Fatal(err)
}

scrapedMovies.Lock()
movie.DownloadLink = downloadLink
re := regexp.MustCompile(`(.* MB)`)
dl := strings.TrimPrefix(re.FindStringSubmatch(e.ChildText("dcounter"))[0], "(")
movie.Size = dl
downloadCollector.Visit(downloadLink.String())
scrapedMovies.Unlock()
})

// Update Download Link if "Download" HTML on page
Expand All @@ -113,8 +116,10 @@ func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector,
if err != nil {
log.Fatal(err)
}
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
scrapedMovies.Lock()
movie.DownloadLink = downloadLink
scrapedMovies.Unlock()
}
})
}
Expand Down
22 changes: 16 additions & 6 deletions engine/netnaija.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,32 +114,38 @@ func (engine *NetNaijaEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, er
return movie, nil
}

func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped) {
// Update movie size
downloadCollector.OnHTML("button[id=download-button]", func(e *colly.HTMLElement) {
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
scrapedMovies.Lock()
movie.Size = strings.TrimSpace(e.ChildText("span.size"))
scrapedMovies.Unlock()
})

downloadCollector.OnHTML("h3.file-name", func(e *colly.HTMLElement) {
downloadLink, err := url.Parse(path.Join(strings.TrimSpace(e.ChildAttr("a", "href")), "download"))
if err != nil {
log.Fatal(err)
}
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
scrapedMovies.Lock()
movie.DownloadLink = downloadLink
scrapedMovies.Unlock()
downloadCollector.Visit(downloadLink.String())
})

// Update movie download link if a[id=download] on page
downloadCollector.OnHTML("a[id=download]", func(e *colly.HTMLElement) {
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
scrapedMovies.Lock()
movie.Size = strings.TrimSpace(e.ChildText("span[id=download-size]"))
downloadLink, err := url.Parse(e.Attr("href"))
if err != nil {
log.Fatal(err)
}
movie.DownloadLink = downloadLink
scrapedMovies.Unlock()
})

// Update Download Link if "Direct Download" HTML on page
Expand All @@ -149,14 +155,17 @@ func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Colle
if err != nil {
log.Fatal(err)
}
movie := getMovieFromMovies(e.Request.URL.String(), movies)
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
scrapedMovies.Lock()
movie.DownloadLink = downloadLink
scrapedMovies.Unlock()
}
})

//for series or parts
downloadCollector.OnHTML("div.video-series-latest-episodes", func(inn *colly.HTMLElement) {
movie := getMovieFromMovies(inn.Request.URL.String(), movies)
movie := getMovieFromMovies(inn.Request.URL.String(), scrapedMovies.movies)
scrapedMovies.Lock()
movie.IsSeries = true
inn.ForEach("a", func(_ int, e *colly.HTMLElement) {
downloadLink, err := url.Parse(e.Attr("href"))
Expand All @@ -166,6 +175,7 @@ func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Colle
downloadLink.Path = path.Join(downloadLink.Path, "download")
movie.SDownloadLink = append(movie.SDownloadLink, downloadLink)
})
scrapedMovies.Unlock()
})
}

Expand Down

0 comments on commit d166053

Please sign in to comment.