diff --git a/pkg/scrapers/twitter/auth.go b/pkg/scrapers/twitter/auth.go index ffa8735a..e2064e53 100644 --- a/pkg/scrapers/twitter/auth.go +++ b/pkg/scrapers/twitter/auth.go @@ -2,98 +2,35 @@ package twitter import ( "fmt" - "sync" - "time" - "github.com/masa-finance/masa-oracle/pkg/config" twitterscraper "github.com/masa-finance/masa-twitter-scraper" - "github.com/sirupsen/logrus" ) -type TwitterAccount struct { - Username string - Password string - TwoFACode string - RateLimitedUntil time.Time -} - -type TwitterAccountManager struct { - accounts []*TwitterAccount - index int - mutex sync.Mutex -} - -func NewTwitterAccountManager(accounts []*TwitterAccount) *TwitterAccountManager { - return &TwitterAccountManager{ - accounts: accounts, - index: 0, - } -} - -func (manager *TwitterAccountManager) GetNextAccount() *TwitterAccount { - manager.mutex.Lock() - defer manager.mutex.Unlock() - for i := 0; i < len(manager.accounts); i++ { - account := manager.accounts[manager.index] - manager.index = (manager.index + 1) % len(manager.accounts) - if time.Now().After(account.RateLimitedUntil) { - return account - } - } - return nil -} - -func (manager *TwitterAccountManager) MarkAccountRateLimited(account *TwitterAccount) { - manager.mutex.Lock() - defer manager.mutex.Unlock() - account.RateLimitedUntil = time.Now().Add(time.Hour) -} - -func Auth(account *TwitterAccount) *twitterscraper.Scraper { - scraper := twitterscraper.New() - baseDir := config.GetInstance().MasaDir - - if err := LoadCookies(scraper, account, baseDir); err == nil { - logrus.Debugf("Cookies loaded for user %s.", account.Username) - if IsLoggedIn(scraper) { - logrus.Debugf("Already logged in as %s.", account.Username) - return scraper - } - } - - time.Sleep(100 * time.Millisecond) - - var err error - if account.TwoFACode != "" { - err = Login(scraper, account.Username, account.Password, account.TwoFACode) - } else { - err = Login(scraper, account.Username, account.Password) - } - - if err != nil { - logrus.WithError(err).Warnf("Login failed for %s", account.Username) - return nil - } - - time.Sleep(100 * time.Millisecond) - - if err = SaveCookies(scraper, account, baseDir); err != nil { - logrus.WithError(err).Errorf("Failed to save cookies for %s", account.Username) - } - - logrus.Debugf("Login successful for %s", account.Username) - return scraper -} - +// Login attempts to log in to the Twitter scraper service. +// It supports three modes of operation: +// 1. Basic login using just a username and password. +// 2. Login requiring an email confirmation, using a username, password, and email address. +// 3. Login with two-factor authentication, using a username, password, and 2FA code. +// Parameters: +// - scraper: A pointer to an instance of the twitterscraper.Scraper. +// - credentials: A variadic list of strings representing login credentials. +// The function expects either two strings (username, password) for basic login, +// or three strings (username, password, email/2FA code) for email confirmation or 2FA. +// +// Returns an error if login fails or if an invalid number of credentials is provided. func Login(scraper *twitterscraper.Scraper, credentials ...string) error { var err error switch len(credentials) { case 2: + // Basic login with username and password. err = scraper.Login(credentials[0], credentials[1]) case 3: + // The third parameter is used for either email confirmation or a 2FA code. + // This design assumes the Twitter scraper's Login method can contextually handle both cases. err = scraper.Login(credentials[0], credentials[1], credentials[2]) default: - return fmt.Errorf("invalid number of credentials") + // Return an error if the number of provided credentials is neither 2 nor 3. + return fmt.Errorf("invalid number of login credentials provided") } if err != nil { return fmt.Errorf("%v", err) @@ -106,8 +43,9 @@ func IsLoggedIn(scraper *twitterscraper.Scraper) bool { } func Logout(scraper *twitterscraper.Scraper) error { - if err := scraper.Logout(); err != nil { - return fmt.Errorf("logout failed: %v", err) + err := scraper.Logout() + if err != nil { + return fmt.Errorf("[-] Logout failed: %v", err) } return nil } diff --git a/pkg/scrapers/twitter/cookies.go b/pkg/scrapers/twitter/cookies.go index 467e8686..aef042e0 100644 --- a/pkg/scrapers/twitter/cookies.go +++ b/pkg/scrapers/twitter/cookies.go @@ -5,32 +5,37 @@ import ( "fmt" "net/http" "os" - "path/filepath" twitterscraper "github.com/masa-finance/masa-twitter-scraper" ) -func SaveCookies(scraper *twitterscraper.Scraper, account *TwitterAccount, baseDir string) error { - cookieFile := filepath.Join(baseDir, fmt.Sprintf("%s_twitter_cookies.json", account.Username)) +func SaveCookies(scraper *twitterscraper.Scraper, filePath string) error { cookies := scraper.GetCookies() - data, err := json.Marshal(cookies) + js, err := json.Marshal(cookies) if err != nil { return fmt.Errorf("error marshaling cookies: %v", err) } - if err = os.WriteFile(cookieFile, data, 0644); err != nil { - return fmt.Errorf("error saving cookies: %v", err) + err = os.WriteFile(filePath, js, 0644) + if err != nil { + return fmt.Errorf("error saving cookies to file: %v", err) + } + + // Load the saved cookies back into the scraper + if err := LoadCookies(scraper, filePath); err != nil { + return fmt.Errorf("error loading saved cookies: %v", err) } + return nil } -func LoadCookies(scraper *twitterscraper.Scraper, account *TwitterAccount, baseDir string) error { - cookieFile := filepath.Join(baseDir, fmt.Sprintf("%s_twitter_cookies.json", account.Username)) - data, err := os.ReadFile(cookieFile) +func LoadCookies(scraper *twitterscraper.Scraper, filePath string) error { + js, err := os.ReadFile(filePath) if err != nil { - return fmt.Errorf("error reading cookies: %v", err) + return fmt.Errorf("error reading cookies from file: %v", err) } var cookies []*http.Cookie - if err = json.Unmarshal(data, &cookies); err != nil { + err = json.Unmarshal(js, &cookies) + if err != nil { return fmt.Errorf("error unmarshaling cookies: %v", err) } scraper.SetCookies(cookies) diff --git a/pkg/scrapers/twitter/followers.go b/pkg/scrapers/twitter/followers.go index 3ef98874..543adc56 100644 --- a/pkg/scrapers/twitter/followers.go +++ b/pkg/scrapers/twitter/followers.go @@ -3,49 +3,36 @@ package twitter import ( "encoding/json" "fmt" - "strings" _ "github.com/lib/pq" twitterscraper "github.com/masa-finance/masa-twitter-scraper" "github.com/sirupsen/logrus" ) -// ScrapeFollowersForProfile scrapes the followers of a specific Twitter user. -// It takes the username and count as parameters and returns the scraped followers information and an error if any. +// ScrapeFollowersForProfile scrapes the profile and tweets of a specific Twitter user. +// It takes the username as a parameter and returns the scraped profile information and an error if any. func ScrapeFollowersForProfile(username string, count int) ([]twitterscraper.Legacy, error) { - once.Do(initializeAccountManager) + scraper := Auth() - for { - account := accountManager.GetNextAccount() - if account == nil { - return nil, fmt.Errorf("all accounts are rate-limited") - } - - scraper := Auth(account) - if scraper == nil { - logrus.Errorf("Authentication failed for %s", account.Username) - continue - } - - followingResponse, errString, _ := scraper.FetchFollowers(username, count, "") - if errString != "" { - if strings.Contains(errString, "Rate limit exceeded") { - accountManager.MarkAccountRateLimited(account) - logrus.Warnf("Rate limited: %s", account.Username) - continue - } - logrus.Errorf("Error fetching followers: %v", errString) - return nil, fmt.Errorf("%v", errString) - } + if scraper == nil { + return nil, fmt.Errorf("there was an error authenticating with your Twitter credentials") + } - // Marshal the followingResponse into a JSON string for logging - responseJSON, err := json.Marshal(followingResponse) - if err != nil { - logrus.Errorf("Error marshaling followingResponse: %v", err) - } else { - logrus.Debugf("Following response: %s", responseJSON) - } + followingResponse, errString, _ := scraper.FetchFollowers(username, count, "") + if errString != "" { + logrus.Printf("Error fetching profile: %v", errString) + return nil, fmt.Errorf("%v", errString) + } - return followingResponse, nil + // Marshal the followingResponse into a JSON string for logging + responseJSON, err := json.Marshal(followingResponse) + if err != nil { + // Log the error if the marshaling fails + logrus.Errorf("[-] Error marshaling followingResponse: %v", err) + } else { + // Log the JSON string of followingResponse + logrus.Debugf("Following response: %s", responseJSON) } + + return followingResponse, nil } diff --git a/pkg/scrapers/twitter/tweets.go b/pkg/scrapers/twitter/tweets.go index e8ba7fdf..30fed430 100644 --- a/pkg/scrapers/twitter/tweets.go +++ b/pkg/scrapers/twitter/tweets.go @@ -3,20 +3,16 @@ package twitter import ( "context" "fmt" - "os" + "path/filepath" "strings" - "sync" "time" - "github.com/joho/godotenv" + _ "github.com/lib/pq" + twitterscraper "github.com/masa-finance/masa-twitter-scraper" "github.com/sirupsen/logrus" -) -var ( - accountManager *TwitterAccountManager - once sync.Once - maxRetries = 3 + "github.com/masa-finance/masa-oracle/pkg/config" ) type TweetResult struct { @@ -24,163 +20,111 @@ type TweetResult struct { Error error } -func initializeAccountManager() { - accounts := loadAccountsFromConfig() - accountManager = NewTwitterAccountManager(accounts) -} +// auth initializes and returns a new Twitter scraper instance. It attempts to load cookies from a file to reuse an existing session. +// If no valid session is found, it performs a login with credentials specified in the application's configuration. +// On successful login, it saves the session cookies for future use. If the login fails, it returns nil. +func Auth() *twitterscraper.Scraper { + scraper := twitterscraper.New() + appConfig := config.GetInstance() + cookieFilePath := filepath.Join(appConfig.MasaDir, "twitter_cookies.json") + + if err := LoadCookies(scraper, cookieFilePath); err == nil { + logrus.Debug("Cookies loaded successfully.") + if IsLoggedIn(scraper) { + logrus.Debug("Already logged in via cookies.") + return scraper + } + } -// loadAccountsFromConfig reads Twitter accounts from the .env file -func loadAccountsFromConfig() []*TwitterAccount { - err := godotenv.Load() - if err != nil { - logrus.Fatalf("error loading .env file: %v", err) + username := appConfig.TwitterUsername + password := appConfig.TwitterPassword + twoFACode := appConfig.Twitter2FaCode + + time.Sleep(100 * time.Millisecond) + + var err error + if twoFACode != "" { + err = Login(scraper, username, password, twoFACode) + } else { + err = Login(scraper, username, password) } - accountsEnv := os.Getenv("TWITTER_ACCOUNTS") - if accountsEnv == "" { - logrus.Fatal("TWITTER_ACCOUNTS not set in .env file") + if err != nil { + logrus.WithError(err).Warning("[-] Login failed") + return nil } - accountPairs := strings.Split(accountsEnv, ",") - var accounts []*TwitterAccount + time.Sleep(100 * time.Millisecond) - for _, pair := range accountPairs { - credentials := strings.Split(pair, ":") - if len(credentials) != 2 { - logrus.Warnf("invalid account credentials: %s", pair) - continue - } - account := &TwitterAccount{ - Username: strings.TrimSpace(credentials[0]), - Password: strings.TrimSpace(credentials[1]), - } - accounts = append(accounts, account) + if err = SaveCookies(scraper, cookieFilePath); err != nil { + logrus.WithError(err).Error("[-] Failed to save cookies") } - return accounts + logrus.WithFields(logrus.Fields{ + "auth": true, + "username": username, + }).Debug("Login successful") + + return scraper } +// ScrapeTweetsByQuery performs a search on Twitter for tweets matching the specified query. +// It fetches up to the specified count of tweets and returns a slice of Tweet pointers. +// Parameters: +// - query: The search query string to find matching tweets. +// - count: The maximum number of tweets to retrieve. +// +// Returns: +// - A slice of pointers to twitterscraper.Tweet objects that match the search query. +// - An error if the scraping process encounters any issues. func ScrapeTweetsByQuery(query string, count int) ([]*TweetResult, error) { - once.Do(initializeAccountManager) + scraper := Auth() + var tweets []*TweetResult + var lastError error - getAuthenticatedScraper := func() (*twitterscraper.Scraper, *TwitterAccount, error) { - account := accountManager.GetNextAccount() - if account == nil { - return nil, nil, fmt.Errorf("all accounts are rate-limited") - } - scraper := Auth(account) - if scraper == nil { - logrus.Errorf("authentication failed for %s", account.Username) - return nil, account, fmt.Errorf("authentication failed for %s", account.Username) - } - return scraper, account, nil + if scraper == nil { + return nil, fmt.Errorf("there was an error authenticating with your Twitter credentials") } - scrapeTweets := func(scraper *twitterscraper.Scraper) ([]*TweetResult, error) { - var tweets []*TweetResult - ctx := context.Background() - scraper.SetSearchMode(twitterscraper.SearchLatest) - for tweet := range scraper.SearchTweets(ctx, query, count) { - if tweet.Error != nil { - return nil, tweet.Error + // Set search mode + scraper.SetSearchMode(twitterscraper.SearchLatest) + + // Perform the search with the specified query and count + for tweetResult := range scraper.SearchTweets(context.Background(), query, count) { + if tweetResult.Error != nil { + lastError = tweetResult.Error + logrus.Warnf("[+] Error encountered while scraping tweet: %v", tweetResult.Error) + if strings.Contains(tweetResult.Error.Error(), "Rate limit exceeded") { + return nil, fmt.Errorf("Twitter API rate limit exceeded (429 error)") } - tweets = append(tweets, &TweetResult{Tweet: &tweet.Tweet}) + continue } - return tweets, nil + tweets = append(tweets, &TweetResult{Tweet: &tweetResult.Tweet, Error: nil}) } - handleRateLimit := func(err error, account *TwitterAccount) bool { - if strings.Contains(err.Error(), "Rate limit exceeded") { - accountManager.MarkAccountRateLimited(account) - logrus.Warnf("rate limited: %s", account.Username) - return true - } - return false + if len(tweets) == 0 && lastError != nil { + return nil, lastError } - return retryTweets(func() ([]*TweetResult, error) { - scraper, account, err := getAuthenticatedScraper() - if err != nil { - return nil, err - } - - tweets, err := scrapeTweets(scraper) - if err != nil { - if handleRateLimit(err, account) { - return nil, err - } - return nil, err - } - return tweets, nil - }, maxRetries) + return tweets, nil } +// ScrapeTweetsProfile scrapes the profile and tweets of a specific Twitter user. +// It takes the username as a parameter and returns the scraped profile information and an error if any. func ScrapeTweetsProfile(username string) (twitterscraper.Profile, error) { - once.Do(initializeAccountManager) - - getAuthenticatedScraper := func() (*twitterscraper.Scraper, *TwitterAccount, error) { - account := accountManager.GetNextAccount() - if account == nil { - return nil, nil, fmt.Errorf("all accounts are rate-limited") - } - scraper := Auth(account) - if scraper == nil { - logrus.Errorf("authentication failed for %s", account.Username) - return nil, account, fmt.Errorf("authentication failed for %s", account.Username) - } - return scraper, account, nil - } + scraper := Auth() - getProfile := func(scraper *twitterscraper.Scraper) (twitterscraper.Profile, error) { - return scraper.GetProfile(username) + if scraper == nil { + return twitterscraper.Profile{}, fmt.Errorf("there was an error authenticating with your Twitter credentials") } - handleRateLimit := func(err error, account *TwitterAccount) bool { - if strings.Contains(err.Error(), "Rate limit exceeded") { - accountManager.MarkAccountRateLimited(account) - logrus.Warnf("rate limited: %s", account.Username) - return true - } - return false - } - - return retryProfile(func() (twitterscraper.Profile, error) { - scraper, account, err := getAuthenticatedScraper() - if err != nil { - return twitterscraper.Profile{}, err - } + // Set search mode + scraper.SetSearchMode(twitterscraper.SearchLatest) - profile, err := getProfile(scraper) - if err != nil { - if handleRateLimit(err, account) { - return twitterscraper.Profile{}, err - } - return twitterscraper.Profile{}, err - } - return profile, nil - }, maxRetries) -} - -func retryTweets(operation func() ([]*TweetResult, error), maxAttempts int) ([]*TweetResult, error) { - for attempt := 1; attempt <= maxAttempts; attempt++ { - result, err := operation() - if err == nil { - return result, nil - } - logrus.Errorf("retry attempt %d failed: %v", attempt, err) - time.Sleep(time.Duration(attempt) * time.Second) + profile, err := scraper.GetProfile(username) + if err != nil { + return twitterscraper.Profile{}, err } - return nil, fmt.Errorf("operation failed after %d attempts", maxAttempts) -} -func retryProfile(operation func() (twitterscraper.Profile, error), maxAttempts int) (twitterscraper.Profile, error) { - for attempt := 1; attempt <= maxAttempts; attempt++ { - result, err := operation() - if err == nil { - return result, nil - } - logrus.Errorf("retry attempt %d failed: %v", attempt, err) - time.Sleep(time.Duration(attempt) * time.Second) - } - return twitterscraper.Profile{}, fmt.Errorf("operation failed after %d attempts", maxAttempts) + return profile, nil }