-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapper.go
116 lines (98 loc) · 2.46 KB
/
scrapper.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
package main
import (
"context"
"database/sql"
"encoding/xml"
"fmt"
"io"
"log"
"net/http"
"time"
"github.com/google/uuid"
"github.com/miguelvalente/smooth_aggregator/internal/database"
)
// Item represents an individual item in the RSS feed
type Item struct {
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"`
PubDate string `xml:"pubDate"`
}
// Channel represents the channel element in RSS
type Channel struct {
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"`
Items []Item `xml:"item"`
}
// Channel represents the channel element in RSS
type RSS struct {
XMLName xml.Name `xml:"rss"`
Version string `xml:"version,attr"`
Channel Channel `xml:"channel"`
}
func fetchRSS(url string) (*RSS, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("HTTP error: %s", resp.Status)
}
bytes, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
var rss RSS
err = xml.Unmarshal(bytes, &rss)
if err != nil {
return nil, err
}
return &rss, nil
}
func startScrapping(interval time.Duration, concurrency int, apiCfg apiConfig) {
for {
feeds, err := apiCfg.DB.GetNextNFeedsToFetch(context.Background(), int32(concurrency))
if err != nil {
log.Println("Couldn't get next feeds to fetch", err)
continue
}
for _, feed := range feeds {
rss, _ := fetchRSS(feed.Url)
err = apiCfg.DB.MarkFeedFetched(context.Background(), feed.ID)
for _, post := range rss.Channel.Items {
fmt.Println(post.Title)
postParams := database.CreatePostParams{
ID: uuid.New(),
CreatedAt: time.Now().UTC(),
UpdatedAt: time.Now().UTC(),
Title: sql.NullString{
String: post.Title,
Valid: true,
},
Description: sql.NullString{
String: post.Description,
Valid: true,
},
Url: post.Link,
PublishedAt: sql.NullTime{
Time: parseDate(post.PubDate),
Valid: true,
},
FeedID: feed.ID,
}
apiCfg.DB.CreatePost(context.Background(), postParams)
}
}
}
}
func parseDate(dateStr string) time.Time {
layout := "2006-01-02T15:04:05Z" // Adjust the layout according to your date format
t, err := time.Parse(layout, dateStr)
if err != nil {
fmt.Println("Error parsing date:", err)
return time.Time{} // Return zero time on error
}
return t
}