diff --git a/api/api.gen.go b/api/api.gen.go index 11cc0dc..9b4e2c4 100644 --- a/api/api.gen.go +++ b/api/api.gen.go @@ -42,6 +42,15 @@ type FeedItem struct { Title string `json:"title"` } +// SearchContextDetails defines model for SearchContextDetails. +type SearchContextDetails struct { + Language string `json:"language"` + Link string `json:"link"` + Summary string `json:"summary"` + Thumbnail string `json:"thumbnail"` + Title string `json:"title"` +} + // ServerBuildVersion defines model for ServerBuildVersion. type ServerBuildVersion struct { Commit string `json:"commit"` @@ -69,6 +78,9 @@ type GetV1VersionJSONRequestBody GetV1VersionJSONBody // ServerInterface represents all server handlers. type ServerInterface interface { + // (GET /v1/search/context/{query}) + GetV1SearchContextQuery(ctx echo.Context, query string) error + // (GET /v1/search/feed/{query}) GetV1SearchFeedQuery(ctx echo.Context, query string) error // Your GET endpoint @@ -81,6 +93,22 @@ type ServerInterfaceWrapper struct { Handler ServerInterface } +// GetV1SearchContextQuery converts echo context to params. +func (w *ServerInterfaceWrapper) GetV1SearchContextQuery(ctx echo.Context) error { + var err error + // ------------- Path parameter "query" ------------- + var query string + + err = runtime.BindStyledParameterWithLocation("simple", false, "query", runtime.ParamLocationPath, ctx.Param("query"), &query) + if err != nil { + return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("Invalid format for parameter query: %s", err)) + } + + // Invoke the callback with all the unmarshalled arguments + err = w.Handler.GetV1SearchContextQuery(ctx, query) + return err +} + // GetV1SearchFeedQuery converts echo context to params. func (w *ServerInterfaceWrapper) GetV1SearchFeedQuery(ctx echo.Context) error { var err error @@ -134,6 +162,7 @@ func RegisterHandlersWithBaseURL(router EchoRouter, si ServerInterface, baseURL Handler: si, } + router.GET(baseURL+"/v1/search/context/:query", wrapper.GetV1SearchContextQuery) router.GET(baseURL+"/v1/search/feed/:query", wrapper.GetV1SearchFeedQuery) router.GET(baseURL+"/v1/version", wrapper.GetV1Version) @@ -142,21 +171,22 @@ func RegisterHandlersWithBaseURL(router EchoRouter, si ServerInterface, baseURL // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/6RWXW/iOBT9K+juPqYkoVBo3mak2RHah/2Y2ZFWoz6Y5EIMjm3smwyhyn9fOUlJWkJL", - "d56K4ut7js85vu4jxCrTSqIkC9Ej2DjFjNU/PxmjjPuhjdJoiGP9OVYJur9UaoQIuCTcoIHKgwytZZv+", - "oiXD5QaqygOD+5wbTCD6fir0mmYP3lO9Wm0xJvDgcGNJacE3Kbl2PIEINsl2MTV4OGSHNdU9f0NMllkL", - "+ZwlcRJDTDzIjXiboSvy2ibX0pvpeRoLridlsuvoEWbn7BJGNbm1MhkjiOoPN8Qzp8kZY/50xF8NriGC", - "X/zOM781zO+0qDwQTG7yYSs8EFzuBhesyk08vMfmWcZMObh2SeoXkjZlXauWyAm2x9prBLpSeBOm+XEz", - "E/eq3Oka9guaAs3HnIvkGxrLlRyKcZZxOqc9jLFbxfvJ4jZf5EEa1BhF1/iaBpaz253e7tkhDH9A9VKc", - "lk3X9uGk69BprhNmbmYrKtj0MAvL254wFzVZOYy3gjZA51l2Xt1bV/2NApnF0+4XWpzy0NA5U+J9Ihzv", - "7KIM87u9KII2HUMkzrQQzNI/2uUw+XB1TA7biRRyWoRzfUz+T0xwtVciZbPj+nAvz2NSnI7+nF5fo6HT", - "XSfV/XZj9SqmnSikceBu9si16s1TWFolHOjoa8rlbnQzavBGH/5c9tIbQTgOXCyURsk0hwhux8F4Ah5o", - "RmktsF+EvkVm4tRfIyb+4z5HU1ZuaYM1KWcHI67k0rH7jPQt/FJvcLPuL1dd9zMsQ0JjIfr+CNyBOwzw", - "QLLMUd63lZ2QZHL02qduaHA9uGKrlbRNFiZB0IwMSShrakxrweOanL+1jb9dP06Y2asGtnsdqpM7zBhW", - "Nq4naGPDNTVq/vG7q5q9k8Zr6M3jPgC1lIRGMjFqLtvoqXA4MprF2fHeFHMW7sKmnfO1l/rLXnbhdM6g", - "pY8qKd91wOehrtrr8hPGvT33uqE1aFL/qYR/VW5Gnz99HaFMtOLy0r1bC7OapvhjcVeEtj2GrdGaTNf/", - "sUBKpG3k+814HPP2IpK7h2NCS34RQuX1yyPfFypmIlWWokWwCHrr4WQ+DsbBOGwXHi6MTxXOpuEqvpuq", - "g4Kq+i8AAP//ZNYHKjAKAAA=", + "H4sIAAAAAAAC/9SW246jRhPHX2XU33fJGnwYH7jLJpvVKBdRDrtStJqLhi6gPfTB3QVjPOLdowbG4DGe", + "9aw2UnJliy6q/vWrA/1EYiW0kiDRkvCJ2DgDQZu/H4xRxv3RRmkwyKF5HCsG7hcrDSQkXCKkYEjtEQHW", + "0nR4aNFwmZK69oiBXcENMBJ+ORp6rbN779leRVuIkXhk/86i0jlPM3TuOCMhKdKt2WblAxMgZ43PnwHY", + "nehCnqpEjvmYEo8UJv+6QmfkdU6ulBcLUUW4o6La8aqXhyDO1TGKjbhEGUGRhM2Dd8iFY3KmmD+n+H8D", + "CQnJ//y+Zn5XML9nUXskpzItxkvhkZzLh9EDqwoTj79jCyGoqUbPLqF+gbQ16111Qo5hB6q9FtCV4Pd0", + "sRfRMtJpQZMm7B9ATZz9qCTCHn8CpDy350X4RkivkcgKEUnK8+/B6ZnOEVfvfYDq/uh3POvrEM4ioCs2", + "Xa6XyWreITQlmPcFz9lnMJYrObYJhOB4ntF4DFD8YbZZRgFj2MYoe8fXONgWenp72NLyIZ1TUr/k1qnp", + "3Z6gOcvmOjB6FVUHinZp5wccgLnIJHIxvjarI3JOxu/Vdxur3yEHauH49gsWx5Fq5ZyReBuE2WKnponY", + "zDdRettCGBMxMmAWP2k3yuyHq9tktswSvk5xu4kY/5Y2weVer6CcS23Y9rxNymPqp/KGjMayu/Ibldg4", + "f1yUxXynHSoXnctEDT5J5JOMlWQcuZI0H/RrSKaTwDWC0iCp5iQk80kwmRGPaIpZg9Qvp75t5tyP20H3", + "n3YFmKp2pyk0SlwNqHN/5yR9BPw8PdkNv7kXGq+GCkAwloRfngh3Elwk4hFJhZO66yx7gGgK8Lpbwtgu", + "u3fGVitp2x6YBUG7KiSCbNRRrXMeN/r8rW3r2vt7fWxGFlyDmIGNDdfYYvz1F0fx9jtGbq9CI6HuJIKR", + "NL9p5+rm2XC8OzhTsN6ZdQDRnrXuBgVNANhbqum++P/mUnIEYa+6trg7Un0cMGoMrf5TdX1UNp8lbLHP", + "MjOo62BxXa5lv19cZcDie8WqNyV4upfqbuP9YzM4/H5cKNLwmkT+UoW5+fjhzxuQTCsuL63O3SIvD/FB", + "p3LzmHVp2CZa29PNvZ1kiNqGvk81nxTDRTphtPLLKam9oWno+7mKaZ4pi+E6WAeD8+lsNQkmwWTaHdyP", + "61osHjdFsAhuo9s5kLr+OwAA//+q0t+IMg0AAA==", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/api/swagger.yaml b/api/swagger.yaml index 0bc53e8..d4de152 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -1,13 +1,13 @@ openapi: 3.0.2 x-stoplight: - id: zo1541bc64oxo + id: 44w9u0405b53e info: title: Unconditional - version: '1.0' + version: "1.0" servers: - - url: 'https://api.unconditional.day/v1' - - url: 'http://localhost:8080' - - url: '127.0.0.1:8080' + - url: "https://api.unconditional.day/v1" + - url: "http://localhost:8080" + - url: "127.0.0.1:8080" paths: "/v1/search/feed/{query}": get: @@ -34,8 +34,33 @@ paths: schema: type: string x-stoplight: - id: pacmz9rv7a1k1 - /v1/version: + id: wosl2fd4xhhrd + "/v1/search/context/{query}": + get: + responses: + "200": + description: OK + content: + application/json: + schema: + type: object + $ref: "#/components/schemas/SearchContextDetails" + "500": + description: Internal Server Error + content: + application/json: + schema: + type: object + $ref: "#/components/schemas/Error" + parameters: + - name: query + in: path + required: true + schema: + type: string + x-stoplight: + id: idoe8qr80ebxd + "/v1/version": get: summary: Your GET endpoint tags: [] @@ -45,10 +70,11 @@ paths: content: application/json: schema: + type: object $ref: "#/components/schemas/ServerVersion" operationId: get-v1-version x-stoplight: - id: flrb4hew86v1s + id: q4lvzczpgn9wh requestBody: content: application/json: @@ -84,7 +110,7 @@ components: - language - date x-stoplight: - id: r1huzg5l9oykp + id: xa4xmb6bpguaf FeedImage: type: object properties: @@ -96,7 +122,29 @@ components: - url - title x-stoplight: - id: g5p7hclip2ydk + id: cmmybtqamyqiy + SearchContextDetails: + type: object + x-stoplight: + id: 2bea7d1686f73 + properties: + title: + type: string + link: + type: string + summary: + type: string + thumbnail: + type: string + language: + type: string + required: + - title + - link + - summary + - thumbnail + - language + title: SearchContextDetails Error: type: object properties: @@ -108,11 +156,11 @@ components: - message - code x-stoplight: - id: gdj84rexxmxft + id: ugjrjhvkdmen2 ServerVersion: title: ServerVersion x-stoplight: - id: z6s8y1u6qlv0p + id: 24qo1fm939bg5 type: object properties: source: @@ -125,34 +173,34 @@ components: SourceReleaseVersion: title: SourceReleaseVersion x-stoplight: - id: 9jgspbctklvnr + id: ufsclw4vu3qp5 type: object properties: version: type: string x-stoplight: - id: ebqolha5zfx9n + id: t6xp7ev3nprdj lastUpdatedAt: type: string x-stoplight: - id: xj2nln4v17pzd + id: 26hfi8gtj9bdi required: - version - lastUpdatedAt ServerBuildVersion: title: ServerBuildVersion x-stoplight: - id: 7r5btva4x51y3 + id: p7byzats6s3zt type: object properties: commit: type: string x-stoplight: - id: kbcq283u8u0h0 + id: eoik296b0ddt3 version: type: string x-stoplight: - id: sia3kpjqax11w + id: jup15zjavkg3a required: - commit - version diff --git a/go.mod b/go.mod index 4d37691..64dcfbc 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/unconditionalday/server go 1.21 require ( + github.com/anaskhan96/soup v1.2.5 github.com/deepmap/oapi-codegen v1.15.0 github.com/getkin/kin-openapi v0.120.0 github.com/labstack/echo/v4 v4.11.2 @@ -10,6 +11,7 @@ require ( github.com/sirupsen/logrus v1.9.3 github.com/spf13/pflag v1.0.5 go.uber.org/zap v1.26.0 + golang.org/x/exp v0.0.0-20230905200255-921286631fa9 ) require ( diff --git a/go.sum b/go.sum index d53293a..8d74a7d 100644 --- a/go.sum +++ b/go.sum @@ -57,6 +57,8 @@ github.com/SlyMarbo/rss v1.0.5 h1:DPcZ4aOXXHJ5yNLXY1q/57frIixMmAvTtLxDE3fsMEI= github.com/SlyMarbo/rss v1.0.5/go.mod h1:w6Bhn1BZs91q4OlEnJVZEUNRJmlbFmV7BkAlgCN8ofM= github.com/ajg/form v1.5.1 h1:t9c7v8JUKu/XxOGBU0yjNpaMloxGEJhUkqFRq0ibGeU= github.com/ajg/form v1.5.1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY= +github.com/anaskhan96/soup v1.2.5 h1:V/FHiusdTrPrdF4iA1YkVxsOpdNcgvqT1hG+YtcZ5hM= +github.com/anaskhan96/soup v1.2.5/go.mod h1:6YnEp9A2yywlYdM4EgDz9NEHclocMepEtku7wg6Cq3s= github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ= diff --git a/internal/client/wikipedia/cache.go b/internal/client/wikipedia/cache.go new file mode 100644 index 0000000..be7930f --- /dev/null +++ b/internal/client/wikipedia/cache.go @@ -0,0 +1,112 @@ +package wikipedia + +import ( + "crypto/sha256" + "errors" + "time" +) + +// Find and delete string s in string slice +func FindAndDel(arr []string, s string) []string { + index := 0 + for i, v := range arr { + if v == s { + index = i + break + } + } + return append(arr[:index], arr[index+1:]...) +} + +func MakeWikiCache(expiration time.Duration, maxMemory int) *Cache { + if expiration != 0 { + expiration = (12 * time.Hour) + } + + if maxMemory != 0 { + maxMemory = 500 + } + + c := &Cache{ + Memory: map[string]RequestResult{}, + MaxMemory: maxMemory, + Expiration: expiration, + HashedKeyQueue: make([]string, 0, maxMemory), + CreatedTime: map[string]time.Time{}, + } + + return c +} + +// Cache to store request result +type Cache struct { + Memory map[string]RequestResult // Map store request result + HashedKeyQueue []string // Key queue. Delete the first item if reach max cache + CreatedTime map[string]time.Time // Map store created time + Expiration time.Duration // Cache expiration + MaxMemory int // Max cache memory +} + +// Hash a string into SHA256 +func HashCacheKey(s string) string { + hasher := sha256.New() + hasher.Write([]byte(s)) + + return string(hasher.Sum(nil)) +} + +// Get Cache current number of cache +func (cache Cache) GetLen() int { + return len(cache.HashedKeyQueue) +} + +// Add result into the Cache +func (cache *Cache) Add(s string, res RequestResult) { + if len(cache.Memory) >= cache.MaxMemory { + cache.Pop() + } + + key := HashCacheKey(s) + if cache.Memory == nil { + cache.Memory = map[string]RequestResult{} + cache.CreatedTime = map[string]time.Time{} + cache.HashedKeyQueue = make([]string, 0, cache.MaxMemory) + } + if _, ok := cache.Memory[key]; !ok { + cache.Memory[key] = res + cache.CreatedTime[key] = time.Now() + cache.HashedKeyQueue = append(cache.HashedKeyQueue, key) + } +} + +func (cache *Cache) Get(s string) (RequestResult, error) { + key := HashCacheKey(s) + if value, ok := cache.Memory[key]; ok { + if time.Since(cache.CreatedTime[key]) <= cache.Expiration { + cache.HashedKeyQueue = FindAndDel(cache.HashedKeyQueue, key) + cache.HashedKeyQueue = append(cache.HashedKeyQueue, key) + return value, nil + } else { + cache.HashedKeyQueue = FindAndDel(cache.HashedKeyQueue, key) + delete(cache.Memory, key) + return RequestResult{}, errors.New("the data is outdated") + } + } + return RequestResult{}, errors.New("cache key not exist") +} + +// Delete the first key in the Cache +func (cache *Cache) Pop() { + if len(cache.HashedKeyQueue) == 0 { + return + } + delete(cache.Memory, cache.HashedKeyQueue[0]) + cache.HashedKeyQueue = cache.HashedKeyQueue[1:] +} + +// Clear the whole Cache +func (cache *Cache) Clear() { + *cache = Cache{} + // This line to avoid declare but not used error + _ = cache +} diff --git a/internal/client/wikipedia/client.go b/internal/client/wikipedia/client.go new file mode 100644 index 0000000..ba9a7c7 --- /dev/null +++ b/internal/client/wikipedia/client.go @@ -0,0 +1,163 @@ +package wikipedia + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "time" + + "github.com/unconditionalday/server/internal/search" +) + +type Client struct { + userAgent string + URL string + lastCall time.Time + cache *Cache +} + +const ( + cacheExpiration = 12 * time.Hour + maxCacheMemory = 500 +) + +var ( + ErrEmptyQuery = errors.New("query string must not be empty") + ErrEmptyLanguage = errors.New("language string must not be empty") + ErrDisambiguationResult = errors.New("disambiguation result") +) + +func NewClient() *Client { + return &Client{ + userAgent: "unconditional.day", + URL: "https://%v.wikipedia.org/w/api.php", + lastCall: time.Now(), + cache: MakeWikiCache(cacheExpiration, maxCacheMemory), + } +} + +func (c *Client) doRequest(args map[string]string, wikiLang string) (RequestResult, error) { + const ReqPerSec = 199 + const ApiGap = time.Second / ReqPerSec + + url := fmt.Sprintf(c.URL, wikiLang) + request, err := http.NewRequest("GET", url, nil) + if err != nil { + return RequestResult{}, err + } + request.Header.Set("User-Agent", c.userAgent) + q := request.URL.Query() + // Add parameters + if args["format"] == "" { + args["format"] = "json" + } + if args["action"] == "" { + args["action"] = "query" + } + for k, v := range args { + q.Add(k, v) + } + request.URL.RawQuery = q.Encode() + now := time.Now() + if now.Sub(c.lastCall) < ApiGap { + wait := c.lastCall.Add(ApiGap).Sub(now) + time.Sleep(wait) + now = time.Now() + } + // Check in cache + full_url := request.URL.String() + r, err := c.cache.Get(full_url) + if err == nil { + return r, nil + } + + client := http.Client{Timeout: 10 * time.Second} + res, err := client.Do(request) + defer c.updateLastCall(now) + if err != nil { + return RequestResult{}, err + } + defer res.Body.Close() + if res.StatusCode != 200 { + return RequestResult{}, errors.New("unable to fetch the results") + } + body, err := io.ReadAll(res.Body) + if err != nil { + return RequestResult{}, err + } + var result RequestResult + err = json.Unmarshal([]byte(body), &result) + if err != nil { + return RequestResult{}, err + } + c.cache.Add(full_url, result) + return result, nil +} + +func (c *Client) updateLastCall(now time.Time) { + c.lastCall = now +} + +func (w *Client) FetchContextDetails(query string, lang string) (search.ContextDetails, error) { + if query == "" { + return search.ContextDetails{}, ErrEmptyQuery + } + + if lang == "" { + return search.ContextDetails{}, ErrEmptyLanguage + } + + args := map[string]string{ + "action": "query", + "list": "search", + "srprop": "", + "srlimit": "1", + "srsearch": query, + } + + res, err := w.doRequest(args, lang) + if err != nil { + return search.ContextDetails{}, err + } + + if len(res.Query.Search) == 0 { + return search.ContextDetails{}, nil + } + + title := res.Query.Search[0].Title + + wikiPage, err := MakeWikipediaPage(-1, title, "", false, w, lang) + if len(wikiPage.Disambiguation) != 0 { + return search.ContextDetails{}, nil + } + + if err != nil { + return search.ContextDetails{}, err + } + + summary, err := wikiPage.GetSummary(w, lang) + if err != nil { + return search.ContextDetails{}, err + } + + thumbnail, err := wikiPage.GetThumbURL(w, lang) + if err != nil { + return search.ContextDetails{}, err + } + + s := search.ContextDetails{ + Title: wikiPage.Title, + Language: wikiPage.Language, + Link: wikiPage.URL, + Summary: summary, + Thumbnail: thumbnail, + } + + if !s.IsValid() { + return search.ContextDetails{}, nil + } + + return s, nil +} diff --git a/internal/client/wikipedia/client_test.go b/internal/client/wikipedia/client_test.go new file mode 100644 index 0000000..2119371 --- /dev/null +++ b/internal/client/wikipedia/client_test.go @@ -0,0 +1,70 @@ +package wikipedia_test + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/unconditionalday/server/internal/client/wikipedia" +) + +type TestInput struct { + query string + lang string +} + +type TestExpect struct { + validRes bool + err error +} + +func TestFetchContextDetails(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + input TestInput + output TestExpect + }{ + { + name: "empty query string", + input: TestInput{query: "", lang: "en"}, + output: TestExpect{ + validRes: false, + err: errors.New("query string must not be empty"), + }, + }, + { + name: "empty language", + input: TestInput{query: "Lorem ipsum", lang: ""}, + output: TestExpect{ + validRes: false, + err: errors.New("language string must not be empty"), + }, + }, + { + name: "valid query", + input: TestInput{query: "Salvini", lang: "en"}, + output: TestExpect{ + validRes: true, + err: nil, + }, + }, + } + for _, tc := range testCases { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + w := wikipedia.NewClient() + actual, err := w.FetchContextDetails(tc.input.query, tc.input.lang) + + if tc.output.err != nil { + assert.Equal(t, tc.output.err.Error(), err.Error()) + } else { + assert.Equal(t, tc.output.validRes, actual.IsValid()) + } + }) + } +} diff --git a/internal/client/wikipedia/page.go b/internal/client/wikipedia/page.go new file mode 100644 index 0000000..9b9ffca --- /dev/null +++ b/internal/client/wikipedia/page.go @@ -0,0 +1,551 @@ +package wikipedia + +import ( + "errors" + + "github.com/anaskhan96/soup" + mapsx "github.com/unconditionalday/server/internal/x/maps" + netx "github.com/unconditionalday/server/internal/x/net" + "golang.org/x/exp/maps" + "golang.org/x/exp/slices" + + "reflect" + "strconv" + "strings" +) + +// Result after we parse the response of Wikipedia API. +// Some attributes must be get manually using the WikipediaPage methods +type WikipediaPage struct { + PageID int `json:"pageid"` + Title string `json:"title"` + OriginalTitle string `json:"originaltitle"` + Content string `json:"content"` + HTML string `json:"html"` + URL string `json:"fullurl"` + RevisionID float64 `json:"revid"` + ParentID float64 `json:"parentid"` + Summary string `json:"summary"` + CheckedImage bool `json:"checkedimage"` + CheckedThumb bool `json:"checkedthumb"` + Thumbnail string `json:"thumbnail"` + Images []string `json:"images"` + Coordinate []float64 `json:"coordinates"` + Language string `json:"lang"` + Reference []string `json:"references"` + Link []string `json:"links"` + Category []string `json:"categories"` + Section []string `json:"sections"` + SectionOffset map[string][]int `json:"sectionoffset"` + Disambiguation []string `json:"disambiguation"` +} + +/* +Return true if the 2 pages are the same +*/ +func (page WikipediaPage) Equal(other WikipediaPage) bool { + return page.PageID == other.PageID +} + +/* +Get the string content of the page. Save it into the page.Content for later use +*/ +func (page *WikipediaPage) GetContent(client *Client, lang string) (string, error) { + if page.Content != "" { + return page.Content, nil + } + pageid := strconv.Itoa(page.PageID) + args := map[string]string{ + "action": "query", + "prop": "extracts|revisions", + "explaintext": "", + "rvprop": "ids", + "titles": page.Title, + } + res, err := client.doRequest(args, lang) + if err != nil { + return "", err + } + + page.Content = res.Query.Page[pageid].Extract + page.RevisionID = res.Query.Page[pageid].Revision[0]["revid"].(float64) + page.ParentID = res.Query.Page[pageid].Revision[0]["parentid"].(float64) + + return page.Content, nil +} + +/* +Get the html of the page. Save it into the page.HTML for later use\ + +**Warning:: This can get pretty slow on long pages. +*/ +func (page *WikipediaPage) GetHTML(client *Client, lang string) (string, error) { + if page.HTML != "" { + return page.HTML, nil + } + args := map[string]string{ + "action": "query", + "prop": "revisions", + "rvprop": "content", + "rvlimit": strconv.Itoa(1), + "rvparse": "", + "titles": page.Title, + } + res, err := client.doRequest(args, lang) + if err != nil { + return "", err + } + if res.Error.Code != "" { + return "", errors.New(res.Error.Info) + } + page.HTML = res.Query.Page[strconv.Itoa(page.PageID)].Revision[0]["*"].(string) + return page.HTML, nil +} + +/* +Get the revid of the page. Save it into the page.HTML for later use + +The revision ID is a number that uniquely identifies the current version of the page. +It can be used to create the permalink or for other direct API calls. See Help:Page history +for more information. +*/ +func (page *WikipediaPage) GetRevisionID(client *Client, lang string) (float64, error) { + if page.RevisionID != 0 { + return page.RevisionID, nil + } + _, err := page.GetContent(client, lang) + if err != nil { + return -1, err + } + return page.RevisionID, nil +} + +/* +Revision ID of the parent version of the current revision of this page. + +See “revision_id“ for more information. +*/ +func (page *WikipediaPage) GetParentID(client *Client, lang string) (float64, error) { + if page.RevisionID != 0 { + return page.ParentID, nil + } + _, err := page.GetContent(client, lang) + if err != nil { + return -1, err + } + return page.ParentID, nil +} + +/* +String summary of a page +*/ +func (page *WikipediaPage) GetSummary(client *Client, lang string) (string, error) { + if page.Summary != "" { + return page.Summary, nil + } + + pageid := strconv.Itoa(page.PageID) + args := map[string]string{ + "action": "query", + "prop": "extracts", + "explaintext": "", + "exintro": "", + "exsentences": "3", + "exlimit": "1", + "titles": page.Title, + } + res, err := client.doRequest(args, lang) + if err != nil { + return "", err + } + + page.Summary = res.Query.Page[pageid].Extract + return page.Summary, nil +} + +/* +Based on +*/ +func (page *WikipediaPage) ContinuedQuery(args map[string]string, client *Client, lang string) ([]interface{}, error) { + // args["pageids"] = strconv.Itoa(page.PageID) + args["titles"] = page.Title + last := map[string]interface{}{} + prop := args["prop"] + result := make([]interface{}, 0, 7) + for { + new_args := maps.Clone(args) + mapsx.Update(new_args, last) + + res, err := client.doRequest(args, lang) + if err != nil { + return result, err + } + if res.Error.Code != "" { + return result, errors.New(res.Error.Info) + } + + if reflect.DeepEqual(RequestQuery{}, res.Query) { + break + } + + if _, ok := args["generator"]; ok { + for _, v := range res.Query.Page { + result = append(result, v) + } + } else { + if prop == "extlinks" { + temp := res.Query.Page[strconv.Itoa(page.PageID)].Extlink + for _, v := range temp { + result = append(result, v["*"]) + } + } else { + temp := []map[string]interface{}{} + switch prop { + case "links": + temp = res.Query.Page[strconv.Itoa(page.PageID)].Link + case "categories": + temp = res.Query.Page[strconv.Itoa(page.PageID)].Category + } + for _, v := range temp { + result = append(result, v["title"].(string)) + } + } + + } + + if len(res.Continue) == 0 { + break + } + + last = res.Continue + } + return result, nil +} + +/* +List of URLs of images on the page. +*/ +func (page *WikipediaPage) GetImagesURL(client *Client, lang string) ([]string, error) { + if page.CheckedImage { + return page.Images, nil + } + args := map[string]string{ + "action": "query", + "generator": "images", + "gimlimit": "max", + "prop": "imageinfo", + "iiprop": "url", + } + + res, err := page.ContinuedQuery(args, client, lang) + if err != nil && len(res) == 0 { + return []string{}, err + } + result := make([]string, 0, 7) + for _, v := range res { + temp := v.(InnerPage).ImageInfo + if len(temp) > 0 { + result = append(result, temp[0]["url"]) + } + } + page.CheckedImage = true + page.Images = result + return page.Images, nil +} + +/* +Get Thumbnail URL of the page +*/ +func (page *WikipediaPage) GetThumbURL(client *Client, lang string) (string, error) { + if page.CheckedThumb { + return page.Thumbnail, nil + } + + args := map[string]string{ + "action": "query", + "prop": "pageimages", + "titles": page.Title, + "piprop": "thumbnail", + "pithumbsize": "500", + } + + res, err := client.doRequest(args, lang) + if err != nil { + return "", err + } + + page.CheckedThumb = true + page.Thumbnail = res.Query.Page[strconv.Itoa(page.PageID)].Thumbnail.Source + + return page.Thumbnail, nil +} + +/* +Slice of float64 in the form of (lat, lon) +*/ +func (page *WikipediaPage) GetCoordinate(client *Client, lang string) ([]float64, error) { + if len(page.Coordinate) == 2 { + return page.Coordinate, nil + } + args := map[string]string{ + "action": "query", + "prop": "coordinates", + "colimit": "max", + "titles": page.Title, + } + + res, err := client.doRequest(args, lang) + if err != nil { + return []float64{}, err + } + if res.Error.Code != "" { + return []float64{}, errors.New(res.Error.Info) + } + + if reflect.DeepEqual(RequestQuery{}, res.Query) { + page.Coordinate = []float64{-1, -1} + return page.Coordinate, nil + } else { + temp := res.Query.Page[strconv.Itoa(page.PageID)].Coordinate[0] + page.Coordinate = []float64{temp["lat"].(float64), temp["lon"].(float64)} + } + return page.Coordinate, nil +} + +/* + List of URLs of external links on a page. + May include external links within page that aren't technically cited anywhere. +*/ +func (page *WikipediaPage) GetReference(client *Client, lang string) ([]string, error) { + if len(page.Reference) > 0 { + return page.Reference, nil + } + args := map[string]string{ + "action": "query", + "prop": "extlinks", + "ellimit": "max", + } + res, err := page.ContinuedQuery(args, client, lang) + if err != nil && len(res) == 0 { + return []string{}, err + } + for _, v := range res { + page.Reference = append(page.Reference, netx.HelpAddURL(v.(string))) + } + + return page.Reference, nil +} + +/* + List of titles of Wikipedia page links on a page. + **Note:: Only includes articles from namespace 0, meaning no Category, User talk, or other meta-Wikipedia pages. +*/ +func (page *WikipediaPage) GetLink(client *Client, lang string) ([]string, error) { + if len(page.Link) > 0 { + return page.Link, nil + } + args := map[string]string{ + "action": "query", + "prop": "links", + "plnamespace": "0", + "pllimit": "max", + } + res, err := page.ContinuedQuery(args, client, lang) + if err != nil && len(res) == 0 { + return []string{}, err + } + for _, v := range res { + page.Link = append(page.Link, v.(string)) + } + return page.Link, nil +} + +/* +List of categories of a page. +*/ +func (page *WikipediaPage) GetCategory(client *Client, lang string) ([]string, error) { + if len(page.Category) > 0 { + return page.Category, nil + } + args := map[string]string{ + "action": "query", + "prop": "categories", + "cllimit": "max", + } + res, err := page.ContinuedQuery(args, client, lang) + if err != nil && len(res) == 0 { + return []string{}, err + } + for _, v := range res { + page.Category = append(page.Category, strings.Replace(v.(string), "Category:", "", 1)) + } + return page.Category, nil +} + +/* +List of section titles from the table of contents on the page. +*/ +func (page *WikipediaPage) GetSectionList(client *Client, lang string) ([]string, error) { + if len(page.Section) > 0 { + return page.Section, nil + } + args := map[string]string{ + "action": "parse", + "prop": "sections", + } + if page.Title != "" { + args["page"] = page.Title + } + res, err := client.doRequest(args, lang) + if err != nil { + return []string{}, err + } + + for _, v := range res.Parse["sections"].([]interface{}) { + page.Section = append(page.Section, v.(map[string]interface{})["line"].(string)) + } + return page.Section, nil +} + +func (page *WikipediaPage) GetSection(section string, client *Client, lang string) (string, error) { + sections, err := page.GetSectionList(client, lang) + if err != nil { + return "", err + } + if !slices.Contains(sections, section) { + return "", errors.New("section not exist") + } + content, err := page.GetContent(client, lang) + if err != nil { + return "", err + } + if page.SectionOffset == nil { + page.SectionOffset = map[string][]int{} + } + if value, ok := page.SectionOffset[section]; ok { + return content[value[0]:value[1]], nil + } + sectiontitle := "== " + section + " ==" + start := strings.Index(content, sectiontitle) + len(sectiontitle) + // If you cannot find the section in the content (but it's there in the API for some reason) + if start < len(sectiontitle) { + page.SectionOffset[section] = []int{0, 0} + return "", nil + } + end := start + strings.Index(content[start:], "==") + if end == -1 { + page.SectionOffset[section] = []int{start, len(content)} + return content[start:], nil + } + page.SectionOffset[section] = []int{start, end} + return strings.TrimSpace(strings.TrimLeft(content[start:end], "=")), nil +} + +/* + Load basic information from Wikipedia. + + Confirm that page exists. If it's a disambiguation page, get a list of suggesting +*/ +func MakeWikipediaPage(pageid int, title string, originaltitle string, redirect bool, client *Client, lang string) (WikipediaPage, error) { + page := WikipediaPage{} + args := map[string]string{ + "action": "query", + "prop": "info|pageprops", + "inprop": "url", + "ppprop": "disambiguation", + "redirects": "", + } + page.Title = title + page.OriginalTitle = title + if pageid != -1 { + args["pageids"] = strconv.Itoa(pageid) + page.PageID = pageid + } else { + args["titles"] = title + } + if originaltitle != "" { + page.OriginalTitle = originaltitle + } + res, err := client.doRequest(args, lang) + if err != nil { + return page, err + } + + target := InnerPage{} + target.Missing = "false" + var index string + for i, v := range res.Query.Page { + index = i + target = v + break + } + + if pageid == -1 { + page.PageID = target.PageID + } + if title == "" { + page.Title = target.Title + page.OriginalTitle = target.Title + } + + page.Language = target.PageLanguage + + if target.Missing == "" && index == "-1" { + return page, errors.New("missing") + } + // if field redirects exist + if len(res.Query.Redirect) > 0 { + if !redirect { + return page, errors.New("set the redirect argument to true to allow automatic redirects") + } + tempstr := page.Title + if len(res.Query.Normalize) > 0 { + if res.Query.Normalize[0].From != page.Title { + return page, errors.New("an unexpected weird error, report me if it happened") + } + tempstr = res.Query.Normalize[0].To + } + if tempstr != res.Query.Redirect[0].From { + return page, errors.New("an unexpected weird error, report me if it happened") + } + return MakeWikipediaPage(-1, res.Query.Redirect[0].To, "", redirect, client, lang) + } + + // If the page is a disambiguation page + // TODO: Needs more love here to get the disambiguation list of pages in the right way + if _, ok := target.PageProps["disambiguation"]; ok { + args = map[string]string{ + "action": "query", + "prop": "revisions", + "rvprop": "content", + "rvparse": "", + "rvlimit": "1", + "titles": page.Title, + } + res, err := client.doRequest(args, lang) + if err != nil { + return page, err + } + + html := res.Query.Page[strconv.Itoa(page.PageID)].Revision[0]["*"].(string) + doc := soup.HTMLParse(html) + links := doc.FindAll("li") + disa := make([]string, 0, 10) + for _, link := range links { + li := link.FindAll("a") + for _, l := range li { + if ref, ok := l.Attrs()["title"]; ok { + if len(ref) >= 1 && !slices.Contains(disa, ref) { + disa = append(disa, ref) + } + } + } + } + page.Disambiguation = disa + return page, nil + } + + page.URL = target.FullURL + + return page, nil +} diff --git a/internal/client/wikipedia/request.go b/internal/client/wikipedia/request.go new file mode 100644 index 0000000..350dc11 --- /dev/null +++ b/internal/client/wikipedia/request.go @@ -0,0 +1,96 @@ +package wikipedia + +type InnerBasic struct { + Aster string `json:"*"` +} + +type InnerSearchInfo struct { + TotalHits int `json:"totalhits"` + Suggestion string `json:"suggestion"` + SuggestionSnippet string `json:"suggestionsnippet"` +} + +type InnerSearch struct { + Ns int `json:"ns"` + Title string `json:"title"` + PageID int `json:"pageid"` + Size int `json:"size"` + Wordcount int `json:"wordcount"` + Snippet string `json:"snippet"` + Timestamp string `json:"timestamp"` +} + +type InnerPage struct { + Ns int `json:"ns"` + Title string `json:"title"` + PageID int `json:"pageid"` + ContentModel string `json:"contentmodel"` + PageLanguage string `json:"pagelanguage"` + PageLanguageTmlCode string `json:"pagelanguagetmlcode"` + PageLanguageDir string `json:"pagelanguagedir"` + Touched string `json:"touched"` + LastRevid int `json:"lastrevid"` + Length int `json:"length"` + FullURL string `json:"fullurl"` + EditURL string `json:"editurl"` + CanonicalURL string `json:"canonicalurl"` + PageProps map[string]string `json:"pageprops"` + Missing string `json:"missing"` + Extract string `json:"extract"` + Revision []map[string]interface{} `json:"revisions"` + Extlink []map[string]string `json:"extlinks"` + Link []map[string]interface{} `json:"links"` + Category []map[string]interface{} `json:"categories"` + Thumbnail Thumbnail `json:"thumbnail"` + PageImage string `json:"pageimage"` + ImageInfo []map[string]string `json:"imageinfo"` + Coordinate []map[string]interface{} `json:"coordinates"` +} + +type Thumbnail struct { + Source string `json:"source"` + Width int `json:"width"` + Height int `json:"height"` +} + +type InnerGeoSearch struct { + PageID int `json:"pageid"` + Ns int `json:"ns"` + Title string `json:"title"` + Latitude float32 `json:"lat"` + Longitude float32 `json:"lom"` + Distance float32 `json:"dist"` + Primary string `json:"primary"` +} + +type InnerNormalize struct { + From string `json:"from"` + To string `json:"to"` +} + +type RequestResult struct { + Error *RequestError `json:"error"` + Warning map[string]InnerBasic `json:"warnings"` + Batchcomplete string `json:"batchcomplete"` + Query *RequestQuery `json:"query"` + Servedby string `json:"servedby"` + Continue map[string]interface{} `json:"continue"` + Parse map[string]interface{} `json:"parse"` +} + +type RequestError struct { + Code string `json:"code"` + Info string `json:"info"` + Aster string `json:"*"` +} + +type RequestQuery struct { + SearchInfo InnerSearchInfo `json:"searchinfo"` + Normalize []InnerNormalize `json:"normalized"` + Redirect []InnerNormalize `json:"redirects"` + Search []InnerSearch `json:"search"` + GeoSearch []InnerGeoSearch `json:"geosearch"` + Page map[string]InnerPage `json:"pages"` + Random []InnerSearch `json:"random"` + Language []map[string]string `json:"languages"` +} diff --git a/internal/container/container.go b/internal/container/container.go index 62306cd..63fceb2 100644 --- a/internal/container/container.go +++ b/internal/container/container.go @@ -10,8 +10,10 @@ import ( "github.com/unconditionalday/server/internal/app" "github.com/unconditionalday/server/internal/client/github" + "github.com/unconditionalday/server/internal/client/wikipedia" "github.com/unconditionalday/server/internal/parser" bleveRepo "github.com/unconditionalday/server/internal/repository/bleve" + "github.com/unconditionalday/server/internal/search" "github.com/unconditionalday/server/internal/version" "github.com/unconditionalday/server/internal/webserver" blevex "github.com/unconditionalday/server/internal/x/bleve" @@ -68,6 +70,7 @@ type Services struct { apiServer *webserver.Server feedRepository *bleveRepo.FeedRepository sourceClient *github.Client + searchClient *wikipedia.Client httpClient *netx.HttpClient logger *zap.Logger parser *parser.Parser @@ -91,7 +94,7 @@ func (c *Container) GetAPIServer() *webserver.Server { AllowedOrigins: c.Parameters.ServerAllowedOrigins, } - c.apiServer = webserver.NewServer(config, c.GetFeedRepository(), c.SourceRelease, c.BuildVersion, c.GetLogger()) + c.apiServer = webserver.NewServer(config, c.GetFeedRepository(), c.SourceRelease, c.GetSearchClient(), c.BuildVersion, c.GetLogger()) return c.apiServer } @@ -128,6 +131,16 @@ func (c *Container) GetSourceClient() app.SourceClient { return c.sourceClient } +func (c *Container) GetSearchClient() search.SearchClient { + if c.searchClient != nil { + return c.searchClient + } + + c.searchClient = wikipedia.NewClient() + + return c.searchClient +} + func (c *Container) GetVersioning() version.Versioning { if c.versioning != nil { return c.versioning diff --git a/internal/search/search.go b/internal/search/search.go new file mode 100644 index 0000000..529104d --- /dev/null +++ b/internal/search/search.go @@ -0,0 +1,17 @@ +package search + +type ContextDetails struct { + Title string + Link string + Summary string + Thumbnail string + Language string +} + +func (i ContextDetails) IsValid() bool { + return i.Title != "" && i.Link != "" && i.Summary != "" && i.Thumbnail != "" && i.Language != "" +} + +type SearchClient interface { + FetchContextDetails(query, locale string) (ContextDetails, error) +} diff --git a/internal/webserver/server.go b/internal/webserver/server.go index beb8af1..d5dc853 100644 --- a/internal/webserver/server.go +++ b/internal/webserver/server.go @@ -10,6 +10,7 @@ import ( api "github.com/unconditionalday/server/api" "github.com/unconditionalday/server/internal/app" + "github.com/unconditionalday/server/internal/search" "github.com/unconditionalday/server/internal/version" ) @@ -18,6 +19,7 @@ type Server struct { feedRepo app.FeedRepository source *app.SourceRelease buildVersion version.Build + search search.SearchClient logger *zap.Logger client *echo.Echo } @@ -28,12 +30,13 @@ type Config struct { AllowedOrigins []string } -func NewServer(config Config, repo app.FeedRepository, source *app.SourceRelease, version version.Build, logger *zap.Logger) *Server { +func NewServer(config Config, repo app.FeedRepository, source *app.SourceRelease, search search.SearchClient, version version.Build, logger *zap.Logger) *Server { return &Server{ config: config, feedRepo: repo, source: source, buildVersion: version, + search: search, logger: logger, client: echo.New(), } @@ -72,6 +75,8 @@ func (s *Server) GetV1SearchFeedQuery(ctx echo.Context, query string) error { Message: "Internal Server Error", } + s.logger.Error("feed search", zap.Error(err)) + return ctx.JSON(http.StatusInternalServerError, e) } @@ -111,3 +116,28 @@ func (s *Server) GetV1Version(ctx echo.Context) error { return ctx.JSON(http.StatusOK, v) } + +func (s *Server) GetV1SearchContextQuery(ctx echo.Context, query string) error { + // TODO: add language support + searchRes, err := s.search.FetchContextDetails(query, "en") + if err != nil { + e := api.Error{ + Code: http.StatusInternalServerError, + Message: "Internal Server Error", + } + + s.logger.Error("context search", zap.Error(err)) + + return ctx.JSON(http.StatusInternalServerError, e) + } + + res := api.SearchContextDetails{ + Language: searchRes.Language, + Link: searchRes.Link, + Summary: searchRes.Summary, + Thumbnail: searchRes.Thumbnail, + Title: searchRes.Title, + } + + return ctx.JSON(200, res) +} diff --git a/internal/x/maps/maps.go b/internal/x/maps/maps.go new file mode 100644 index 0000000..a03742d --- /dev/null +++ b/internal/x/maps/maps.go @@ -0,0 +1,17 @@ +package maps + +import "strconv" + +/* +Update map a using map b +*/ +func Update(a map[string]string, b map[string]interface{}) { + for k, v := range b { + switch t := v.(type) { + case int: + a[k] = strconv.Itoa(t) + case string: + a[k] = t + } + } +} diff --git a/internal/x/net/http.go b/internal/x/net/http.go index b153b8b..f830545 100644 --- a/internal/x/net/http.go +++ b/internal/x/net/http.go @@ -21,3 +21,10 @@ func (h *HttpClient) Download(src string) ([]byte, error) { return ioutil.ReadAll(resp.Body) } + +func HelpAddURL(s string) string { + if s[0:4] == "http" { + return s + } + return "http:" + s +}