Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add link-preview handler #929

Open
wants to merge 4 commits into
base: devel
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ require (
github.com/tinode/snowflake v1.0.0
go.mongodb.org/mongo-driver v1.12.1
golang.org/x/crypto v0.21.0
golang.org/x/net v0.23.0
golang.org/x/oauth2 v0.16.0
golang.org/x/text v0.14.0
google.golang.org/api v0.148.0
Expand Down Expand Up @@ -68,7 +69,6 @@ require (
github.com/xdg-go/stringprep v1.0.4 // indirect
github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a // indirect
go.opencensus.io v0.24.0 // indirect
golang.org/x/net v0.23.0 // indirect
golang.org/x/sync v0.4.0 // indirect
golang.org/x/sys v0.18.0 // indirect
golang.org/x/time v0.3.0 // indirect
Expand Down
187 changes: 187 additions & 0 deletions server/linkpreview.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
package main

import (
"encoding/json"
"errors"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
"io"
"net"
"net/http"
"net/url"
"strings"
"time"
"unicode/utf8"
)

type linkPreview struct {
Title string `json:"title,omitempty"`
Description string `json:"description,omitempty"`
ImageURL string `json:"image_url,omitempty"`
}

var client = &http.Client{
Timeout: time.Second * 2,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
if err := validateURL(req.URL); err != nil {
return err
}
return nil
},
}

// previewLink handles the HTTP request, fetches the URL, and returns the link preview.
func previewLink(w http.ResponseWriter, r *http.Request) {
or-else marked this conversation as resolved.
Show resolved Hide resolved
yinebebt marked this conversation as resolved.
Show resolved Hide resolved
if r.Method != http.MethodGet && r.Method != http.MethodHead {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}

// check authorization
uid, challenge, err := authHttpRequest(r)
if err != nil {
http.Error(w, "invalid auth secret", http.StatusBadRequest)
return
}
if challenge != nil || uid.IsZero() {
http.Error(w, "user not authenticated", http.StatusUnauthorized)
return
}

u := r.URL.Query().Get("url")
if u == "" {
http.Error(w, "Missing 'url' query parameter", http.StatusBadRequest)
return
}

or-else marked this conversation as resolved.
Show resolved Hide resolved
pu, err := url.Parse(u)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
if err := validateURL(pu); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}

req, err := http.NewRequest(http.MethodGet, u, nil)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}

resp, err := client.Do(req)
if err != nil {
http.Error(w, err.Error(), http.StatusBadGateway)
return
}
defer resp.Body.Close()

if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices { // StatusCode != 20X
http.Error(w, "Non-OK HTTP status", http.StatusBadGateway)
return
}

body := http.MaxBytesReader(nil, resp.Body, 2*1024) // 2KB limit
if cc := resp.Header.Get("Cache-Control"); cc != "" {
w.Header().Set("Cache-Control", cc)
}
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusOK)
return
}
w.Header().Set("Content-Type", "application/json")
or-else marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please move this up, before checking for MethodHead. Response to HEAD should have "Content-Type" header.

if err := json.NewEncoder(w).Encode(extractMetadata(body)); err != nil {
or-else marked this conversation as resolved.
Show resolved Hide resolved
http.Error(w, "Failed to encode response", http.StatusInternalServerError)
}
}

func extractMetadata(body io.Reader) *linkPreview {
var preview linkPreview
var inTitleTag bool

tokenizer := html.NewTokenizer(body)
for {
switch tokenizer.Next() {
case html.ErrorToken:
return sanitizePreview(preview)

case html.StartTagToken, html.SelfClosingTagToken:
token := tokenizer.Token()
if token.DataAtom == atom.Meta {
var name, property, content string
for _, attr := range token.Attr {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use tokenizer.TagAttr() instead of iterating attributes directly.

Copy link
Contributor

@or-else or-else Nov 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please resolve my previous comment on this.

switch attr.Key {
case "name":
name = attr.Val
case "property":
property = attr.Val
case "content":
content = attr.Val
}
}

if strings.HasPrefix(property, "og:") && content != "" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if content != "" {
    if strings.HasPrefix(property, "og:") {
        ...
    } else if name == "description" && preview.Description == "" {
        ...
    }
}

or

if content == "" {
    continue
}

switch property {
case "og:title":
preview.Title = content
case "og:description":
preview.Description = content
case "og:image":
preview.ImageURL = content
}
} else if name == "description" && preview.Description == "" {
or-else marked this conversation as resolved.
Show resolved Hide resolved
preview.Description = content
}
} else if token.DataAtom == atom.Title {
inTitleTag = true
or-else marked this conversation as resolved.
Show resolved Hide resolved
}

case html.TextToken:
if preview.Title == "" && inTitleTag {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if inTitleTag {
    if preview.Title == "" {
         preview.Title = strings.TrimSpace(tokenizer.Token().Data)
    }
    inTitleTag = false
}

preview.Title = strings.TrimSpace(tokenizer.Token().Data)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider moving strings.TrimSpace to sanitizePreview and applying it to all fields.

}
case html.EndTagToken:
if tokenizer.Token().DataAtom == atom.Title {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you need to test it. Just assign false without testing. Consider this example:

<body>
<head>
<title><!-- no closing title -->
</head>
<body>This is not a title</body>
</html>

inTitleTag = false
}
}
if preview.Title != "" && preview.Description != "" && preview.ImageURL != "" {
break
}
}

return sanitizePreview(preview)
}

func validateURL(u *url.URL) error {
if u.Scheme != "http" && u.Scheme != "https" {
return &url.Error{Op: "validate", Err: errors.New("invalid scheme")}
}

ips, err := net.LookupIP(u.Hostname())
if err != nil {
return &url.Error{Op: "validate", Err: errors.New("invalid host")}
}
for _, ip := range ips {
if ip.IsLoopback() || ip.IsPrivate() {
return &url.Error{Op: "validate", Err: errors.New("non routable IP address")}
}
}

return nil
}

func sanitizePreview(preview linkPreview) *linkPreview {
if utf8.RuneCountInString(preview.Title) > 80 {
preview.Title = string([]rune(preview.Title)[:80])
}
if utf8.RuneCountInString(preview.Description) > 256 {
preview.Description = string([]rune(preview.Description)[:256])
}
if len(preview.ImageURL) > 2000 {
preview.ImageURL = preview.ImageURL[:2000]
}

return &preview
}
29 changes: 18 additions & 11 deletions server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ var globals struct {

// URL of the main endpoint.
// TODO: implement file-serving API for gRPC and remove this feature.
servingAt string
servingAt string
linkPreviewEnabled bool
}

// Credential validator config.
Expand Down Expand Up @@ -292,16 +293,17 @@ type configType struct {
DefaultCountryCode string `json:"default_country_code"`

// Configs for subsystems
Cluster json.RawMessage `json:"cluster_config"`
Plugin json.RawMessage `json:"plugins"`
Store json.RawMessage `json:"store_config"`
Push json.RawMessage `json:"push"`
TLS json.RawMessage `json:"tls"`
Auth map[string]json.RawMessage `json:"auth_config"`
Validator map[string]*validatorConfig `json:"acc_validation"`
AccountGC *accountGcConfig `json:"acc_gc_config"`
Media *mediaConfig `json:"media"`
WebRTC json.RawMessage `json:"webrtc"`
Cluster json.RawMessage `json:"cluster_config"`
Plugin json.RawMessage `json:"plugins"`
Store json.RawMessage `json:"store_config"`
Push json.RawMessage `json:"push"`
TLS json.RawMessage `json:"tls"`
Auth map[string]json.RawMessage `json:"auth_config"`
Validator map[string]*validatorConfig `json:"acc_validation"`
AccountGC *accountGcConfig `json:"acc_gc_config"`
Media *mediaConfig `json:"media"`
WebRTC json.RawMessage `json:"webrtc"`
LinkPreviewEnabled bool `json:"link_preview_enabled"`
or-else marked this conversation as resolved.
Show resolved Hide resolved
}

func main() {
Expand Down Expand Up @@ -734,6 +736,11 @@ func main() {
mux.HandleFunc("/", serve404)
}

globals.linkPreviewEnabled = config.LinkPreviewEnabled
if config.LinkPreviewEnabled {
mux.HandleFunc(config.ApiPath+"v0/preview-link", previewLink)
}

if err = listenAndServe(config.Listen, mux, tlsConfig, signalHandler()); err != nil {
logs.Err.Fatal(err)
}
Expand Down
1 change: 1 addition & 0 deletions server/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,7 @@ func (s *Session) hello(msg *ClientComMessage) {
"maxTagCount": globals.maxTagCount,
"maxFileUploadSize": globals.maxFileUploadSize,
"reqCred": globals.validatorClientConfig,
"linkPreviewEnabled": globals.linkPreviewEnabled,
}
if len(globals.iceServers) > 0 {
params["iceServers"] = globals.iceServers
Expand Down
3 changes: 2 additions & 1 deletion server/tinode.conf
Original file line number Diff line number Diff line change
Expand Up @@ -678,5 +678,6 @@
// Address of the plugin.
"service_addr": "tcp://localhost:40051"
}
]
],
"link_preview_enabled":false
}