From 181fb87a7dbc20b638371f17d1be284fbdfc1eb4 Mon Sep 17 00:00:00 2001 From: everpcpc Date: Tue, 26 Nov 2024 14:41:10 +0800 Subject: [PATCH] feat: support searching character & person (#671) --- canal/canal.go | 5 +- canal/event.go | 4 + canal/on_character.go | 45 +++ canal/on_person.go | 44 +++ canal/on_subject.go | 23 +- cmd/web/cmd.go | 3 +- internal/mocks/SearchClient.go | 132 +++---- internal/search/character/client.go | 110 ++++++ internal/search/character/doc.go | 51 +++ internal/search/character/event.go | 57 +++ internal/search/character/handle.go | 128 +++++++ internal/search/client.go | 340 ------------------ internal/search/extractor.common.go | 55 --- internal/search/noop.go | 14 +- internal/search/person/client.go | 110 ++++++ internal/search/person/doc.go | 49 +++ internal/search/person/event.go | 57 +++ internal/search/person/handle.go | 121 +++++++ internal/search/search.go | 150 ++++++++ internal/search/searcher/client.go | 231 ++++++++++++ internal/search/subject/client.go | 111 ++++++ internal/search/{index.go => subject/doc.go} | 32 +- .../doc_internal_test.go} | 2 +- internal/search/subject/event.go | 58 +++ internal/search/{ => subject}/handle.go | 21 +- .../{ => subject}/handle_internal_test.go | 2 +- .../{ => subject}/index_internal_test.go | 8 +- openapi/v0.yaml | 165 ++++++++- web/handler/character/character.go | 30 -- web/handler/character/get.go | 2 +- web/handler/search.go | 14 +- web/res/character.go | 32 +- web/routes.go | 4 +- 33 files changed, 1667 insertions(+), 543 deletions(-) create mode 100644 canal/on_character.go create mode 100644 canal/on_person.go create mode 100644 internal/search/character/client.go create mode 100644 internal/search/character/doc.go create mode 100644 internal/search/character/event.go create mode 100644 internal/search/character/handle.go delete mode 100644 internal/search/client.go delete mode 100644 internal/search/extractor.common.go create mode 100644 internal/search/person/client.go create mode 100644 internal/search/person/doc.go create mode 100644 internal/search/person/event.go create mode 100644 internal/search/person/handle.go create mode 100644 internal/search/search.go create mode 100644 internal/search/searcher/client.go create mode 100644 internal/search/subject/client.go rename internal/search/{index.go => subject/doc.go} (82%) rename internal/search/{extract_internal_test.go => subject/doc_internal_test.go} (98%) create mode 100644 internal/search/subject/event.go rename internal/search/{ => subject}/handle.go (94%) rename internal/search/{ => subject}/handle_internal_test.go (98%) rename internal/search/{ => subject}/index_internal_test.go (84%) diff --git a/canal/canal.go b/canal/canal.go index 1681c5d10..9ad7ea470 100644 --- a/canal/canal.go +++ b/canal/canal.go @@ -26,6 +26,8 @@ import ( "github.com/bangumi/server/config" "github.com/bangumi/server/dal" + "github.com/bangumi/server/internal/character" + "github.com/bangumi/server/internal/person" "github.com/bangumi/server/internal/pkg/cache" "github.com/bangumi/server/internal/pkg/driver" "github.com/bangumi/server/internal/pkg/logger" @@ -62,7 +64,8 @@ func Main() error { fx.Provide( driver.NewMysqlSqlDB, driver.NewRueidisClient, logger.Copy, cache.NewRedisCache, - subject.NewMysqlRepo, search.New, session.NewMysqlRepo, session.New, + subject.NewMysqlRepo, character.NewMysqlRepo, person.NewMysqlRepo, + search.New, session.NewMysqlRepo, session.New, driver.NewS3, tag.NewCachedRepo, tag.NewMysqlRepo, diff --git a/canal/event.go b/canal/event.go index 458b49819..4d7fc3d0f 100644 --- a/canal/event.go +++ b/canal/event.go @@ -124,6 +124,10 @@ func (e *eventHandler) onMessage(key, value []byte) error { err = e.OnSubjectField(ctx, key, p) case "chii_subjects": err = e.OnSubject(ctx, key, p) + case "chii_characters": + err = e.OnCharacter(ctx, key, p) + case "chii_persons": + err = e.OnPerson(ctx, key, p) case "chii_members": err = e.OnUserChange(ctx, key, p) } diff --git a/canal/on_character.go b/canal/on_character.go new file mode 100644 index 000000000..525d1b7d8 --- /dev/null +++ b/canal/on_character.go @@ -0,0 +1,45 @@ +package canal + +import ( + "context" + "encoding/json" + + "github.com/trim21/errgo" + "go.uber.org/zap" + + "github.com/bangumi/server/internal/model" + "github.com/bangumi/server/internal/search" +) + +type CharacterKey struct { + ID model.CharacterID `json:"crt_id"` +} + +func (e *eventHandler) OnCharacter(ctx context.Context, key json.RawMessage, payload Payload) error { + var k CharacterKey + if err := json.Unmarshal(key, &k); err != nil { + return err + } + return e.onCharacterChange(ctx, k.ID, payload.Op) +} + +func (e *eventHandler) onCharacterChange(ctx context.Context, characterID model.CharacterID, op string) error { + switch op { + case opCreate: + if err := e.search.EventAdded(ctx, characterID, search.SearchTargetCharacter); err != nil { + return errgo.Wrap(err, "search.OnCharacterAdded") + } + case opUpdate, opSnapshot: + if err := e.search.EventUpdate(ctx, characterID, search.SearchTargetCharacter); err != nil { + return errgo.Wrap(err, "search.OnCharacterUpdate") + } + case opDelete: + if err := e.search.EventDelete(ctx, characterID, search.SearchTargetCharacter); err != nil { + return errgo.Wrap(err, "search.OnCharacterDelete") + } + default: + e.log.Warn("unexpected operator", zap.String("op", op)) + } + + return nil +} diff --git a/canal/on_person.go b/canal/on_person.go new file mode 100644 index 000000000..f5f75b4fa --- /dev/null +++ b/canal/on_person.go @@ -0,0 +1,44 @@ +package canal + +import ( + "context" + "encoding/json" + + "github.com/trim21/errgo" + "go.uber.org/zap" + + "github.com/bangumi/server/internal/model" + "github.com/bangumi/server/internal/search" +) + +type PersonKey struct { + ID model.PersonID `json:"prsn_id"` +} + +func (e *eventHandler) OnPerson(ctx context.Context, key json.RawMessage, payload Payload) error { + var k PersonKey + if err := json.Unmarshal(key, &k); err != nil { + return err + } + return e.onPersonChange(ctx, k.ID, payload.Op) +} + +func (e *eventHandler) onPersonChange(ctx context.Context, personID model.PersonID, op string) error { + switch op { + case opCreate: + if err := e.search.EventAdded(ctx, personID, search.SearchTargetPerson); err != nil { + return errgo.Wrap(err, "search.OnPersonAdded") + } + case opUpdate, opSnapshot: + if err := e.search.EventUpdate(ctx, personID, search.SearchTargetPerson); err != nil { + return errgo.Wrap(err, "search.OnPersonUpdate") + } + case opDelete: + if err := e.search.EventDelete(ctx, personID, search.SearchTargetPerson); err != nil { + return errgo.Wrap(err, "search.OnPersonDelete") + } + default: + e.log.Warn("unexpected operator", zap.String("op", op)) + } + return nil +} diff --git a/canal/on_subject.go b/canal/on_subject.go index f96b7354b..5ecf0b406 100644 --- a/canal/on_subject.go +++ b/canal/on_subject.go @@ -22,8 +22,17 @@ import ( "go.uber.org/zap" "github.com/bangumi/server/internal/model" + "github.com/bangumi/server/internal/search" ) +type SubjectKey struct { + ID model.SubjectID `json:"subject_id"` +} + +type SubjectFieldKey struct { + ID model.SubjectID `json:"field_sid"` +} + func (e *eventHandler) OnSubject(ctx context.Context, key json.RawMessage, payload Payload) error { var k SubjectKey if err := json.Unmarshal(key, &k); err != nil { @@ -45,15 +54,15 @@ func (e *eventHandler) OnSubjectField(ctx context.Context, key json.RawMessage, func (e *eventHandler) onSubjectChange(ctx context.Context, subjectID model.SubjectID, op string) error { switch op { case opCreate: - if err := e.search.OnSubjectAdded(ctx, subjectID); err != nil { + if err := e.search.EventAdded(ctx, subjectID, search.SearchTargetSubject); err != nil { return errgo.Wrap(err, "search.OnSubjectAdded") } case opUpdate, opSnapshot: - if err := e.search.OnSubjectUpdate(ctx, subjectID); err != nil { + if err := e.search.EventUpdate(ctx, subjectID, search.SearchTargetSubject); err != nil { return errgo.Wrap(err, "search.OnSubjectUpdate") } case opDelete: - if err := e.search.OnSubjectDelete(ctx, subjectID); err != nil { + if err := e.search.EventDelete(ctx, subjectID, search.SearchTargetSubject); err != nil { return errgo.Wrap(err, "search.OnSubjectDelete") } default: @@ -62,11 +71,3 @@ func (e *eventHandler) onSubjectChange(ctx context.Context, subjectID model.Subj return nil } - -type SubjectKey struct { - ID model.SubjectID `json:"subject_id"` -} - -type SubjectFieldKey struct { - ID model.SubjectID `json:"field_sid"` -} diff --git a/cmd/web/cmd.go b/cmd/web/cmd.go index 080542909..ea11b7a55 100644 --- a/cmd/web/cmd.go +++ b/cmd/web/cmd.go @@ -88,7 +88,8 @@ func start() error { index.NewMysqlRepo, auth.NewMysqlRepo, episode.NewMysqlRepo, revision.NewMysqlRepo, infra.NewMysqlRepo, timeline.NewMysqlRepo, pm.NewMysqlRepo, notification.NewMysqlRepo, - dam.New, subject.NewMysqlRepo, subject.NewCachedRepo, person.NewMysqlRepo, + dam.New, subject.NewMysqlRepo, subject.NewCachedRepo, + character.NewMysqlRepo, person.NewMysqlRepo, tag.NewCachedRepo, tag.NewMysqlRepo, diff --git a/internal/mocks/SearchClient.go b/internal/mocks/SearchClient.go index 2311ae084..41241d903 100644 --- a/internal/mocks/SearchClient.go +++ b/internal/mocks/SearchClient.go @@ -7,6 +7,8 @@ import ( echo "github.com/labstack/echo/v4" mock "github.com/stretchr/testify/mock" + + search "github.com/bangumi/server/internal/search" ) // SearchClient is an autogenerated mock type for the Client type @@ -54,17 +56,17 @@ func (_c *SearchClient_Close_Call) RunAndReturn(run func()) *SearchClient_Close_ return _c } -// Handle provides a mock function with given fields: c -func (_m *SearchClient) Handle(c echo.Context) error { - ret := _m.Called(c) +// EventAdded provides a mock function with given fields: ctx, id, target +func (_m *SearchClient) EventAdded(ctx context.Context, id uint32, target search.SearchTarget) error { + ret := _m.Called(ctx, id, target) if len(ret) == 0 { - panic("no return value specified for Handle") + panic("no return value specified for EventAdded") } var r0 error - if rf, ok := ret.Get(0).(func(echo.Context) error); ok { - r0 = rf(c) + if rf, ok := ret.Get(0).(func(context.Context, uint32, search.SearchTarget) error); ok { + r0 = rf(ctx, id, target) } else { r0 = ret.Error(0) } @@ -72,45 +74,47 @@ func (_m *SearchClient) Handle(c echo.Context) error { return r0 } -// SearchClient_Handle_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Handle' -type SearchClient_Handle_Call struct { +// SearchClient_EventAdded_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'EventAdded' +type SearchClient_EventAdded_Call struct { *mock.Call } -// Handle is a helper method to define mock.On call -// - c echo.Context -func (_e *SearchClient_Expecter) Handle(c interface{}) *SearchClient_Handle_Call { - return &SearchClient_Handle_Call{Call: _e.mock.On("Handle", c)} +// EventAdded is a helper method to define mock.On call +// - ctx context.Context +// - id uint32 +// - target search.SearchTarget +func (_e *SearchClient_Expecter) EventAdded(ctx interface{}, id interface{}, target interface{}) *SearchClient_EventAdded_Call { + return &SearchClient_EventAdded_Call{Call: _e.mock.On("EventAdded", ctx, id, target)} } -func (_c *SearchClient_Handle_Call) Run(run func(c echo.Context)) *SearchClient_Handle_Call { +func (_c *SearchClient_EventAdded_Call) Run(run func(ctx context.Context, id uint32, target search.SearchTarget)) *SearchClient_EventAdded_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(echo.Context)) + run(args[0].(context.Context), args[1].(uint32), args[2].(search.SearchTarget)) }) return _c } -func (_c *SearchClient_Handle_Call) Return(_a0 error) *SearchClient_Handle_Call { +func (_c *SearchClient_EventAdded_Call) Return(_a0 error) *SearchClient_EventAdded_Call { _c.Call.Return(_a0) return _c } -func (_c *SearchClient_Handle_Call) RunAndReturn(run func(echo.Context) error) *SearchClient_Handle_Call { +func (_c *SearchClient_EventAdded_Call) RunAndReturn(run func(context.Context, uint32, search.SearchTarget) error) *SearchClient_EventAdded_Call { _c.Call.Return(run) return _c } -// OnSubjectAdded provides a mock function with given fields: ctx, id -func (_m *SearchClient) OnSubjectAdded(ctx context.Context, id uint32) error { - ret := _m.Called(ctx, id) +// EventDelete provides a mock function with given fields: ctx, id, target +func (_m *SearchClient) EventDelete(ctx context.Context, id uint32, target search.SearchTarget) error { + ret := _m.Called(ctx, id, target) if len(ret) == 0 { - panic("no return value specified for OnSubjectAdded") + panic("no return value specified for EventDelete") } var r0 error - if rf, ok := ret.Get(0).(func(context.Context, uint32) error); ok { - r0 = rf(ctx, id) + if rf, ok := ret.Get(0).(func(context.Context, uint32, search.SearchTarget) error); ok { + r0 = rf(ctx, id, target) } else { r0 = ret.Error(0) } @@ -118,46 +122,47 @@ func (_m *SearchClient) OnSubjectAdded(ctx context.Context, id uint32) error { return r0 } -// SearchClient_OnSubjectAdded_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'OnSubjectAdded' -type SearchClient_OnSubjectAdded_Call struct { +// SearchClient_EventDelete_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'EventDelete' +type SearchClient_EventDelete_Call struct { *mock.Call } -// OnSubjectAdded is a helper method to define mock.On call +// EventDelete is a helper method to define mock.On call // - ctx context.Context // - id uint32 -func (_e *SearchClient_Expecter) OnSubjectAdded(ctx interface{}, id interface{}) *SearchClient_OnSubjectAdded_Call { - return &SearchClient_OnSubjectAdded_Call{Call: _e.mock.On("OnSubjectAdded", ctx, id)} +// - target search.SearchTarget +func (_e *SearchClient_Expecter) EventDelete(ctx interface{}, id interface{}, target interface{}) *SearchClient_EventDelete_Call { + return &SearchClient_EventDelete_Call{Call: _e.mock.On("EventDelete", ctx, id, target)} } -func (_c *SearchClient_OnSubjectAdded_Call) Run(run func(ctx context.Context, id uint32)) *SearchClient_OnSubjectAdded_Call { +func (_c *SearchClient_EventDelete_Call) Run(run func(ctx context.Context, id uint32, target search.SearchTarget)) *SearchClient_EventDelete_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(uint32)) + run(args[0].(context.Context), args[1].(uint32), args[2].(search.SearchTarget)) }) return _c } -func (_c *SearchClient_OnSubjectAdded_Call) Return(_a0 error) *SearchClient_OnSubjectAdded_Call { +func (_c *SearchClient_EventDelete_Call) Return(_a0 error) *SearchClient_EventDelete_Call { _c.Call.Return(_a0) return _c } -func (_c *SearchClient_OnSubjectAdded_Call) RunAndReturn(run func(context.Context, uint32) error) *SearchClient_OnSubjectAdded_Call { +func (_c *SearchClient_EventDelete_Call) RunAndReturn(run func(context.Context, uint32, search.SearchTarget) error) *SearchClient_EventDelete_Call { _c.Call.Return(run) return _c } -// OnSubjectDelete provides a mock function with given fields: ctx, id -func (_m *SearchClient) OnSubjectDelete(ctx context.Context, id uint32) error { - ret := _m.Called(ctx, id) +// EventUpdate provides a mock function with given fields: ctx, id, target +func (_m *SearchClient) EventUpdate(ctx context.Context, id uint32, target search.SearchTarget) error { + ret := _m.Called(ctx, id, target) if len(ret) == 0 { - panic("no return value specified for OnSubjectDelete") + panic("no return value specified for EventUpdate") } var r0 error - if rf, ok := ret.Get(0).(func(context.Context, uint32) error); ok { - r0 = rf(ctx, id) + if rf, ok := ret.Get(0).(func(context.Context, uint32, search.SearchTarget) error); ok { + r0 = rf(ctx, id, target) } else { r0 = ret.Error(0) } @@ -165,46 +170,47 @@ func (_m *SearchClient) OnSubjectDelete(ctx context.Context, id uint32) error { return r0 } -// SearchClient_OnSubjectDelete_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'OnSubjectDelete' -type SearchClient_OnSubjectDelete_Call struct { +// SearchClient_EventUpdate_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'EventUpdate' +type SearchClient_EventUpdate_Call struct { *mock.Call } -// OnSubjectDelete is a helper method to define mock.On call +// EventUpdate is a helper method to define mock.On call // - ctx context.Context // - id uint32 -func (_e *SearchClient_Expecter) OnSubjectDelete(ctx interface{}, id interface{}) *SearchClient_OnSubjectDelete_Call { - return &SearchClient_OnSubjectDelete_Call{Call: _e.mock.On("OnSubjectDelete", ctx, id)} +// - target search.SearchTarget +func (_e *SearchClient_Expecter) EventUpdate(ctx interface{}, id interface{}, target interface{}) *SearchClient_EventUpdate_Call { + return &SearchClient_EventUpdate_Call{Call: _e.mock.On("EventUpdate", ctx, id, target)} } -func (_c *SearchClient_OnSubjectDelete_Call) Run(run func(ctx context.Context, id uint32)) *SearchClient_OnSubjectDelete_Call { +func (_c *SearchClient_EventUpdate_Call) Run(run func(ctx context.Context, id uint32, target search.SearchTarget)) *SearchClient_EventUpdate_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(uint32)) + run(args[0].(context.Context), args[1].(uint32), args[2].(search.SearchTarget)) }) return _c } -func (_c *SearchClient_OnSubjectDelete_Call) Return(_a0 error) *SearchClient_OnSubjectDelete_Call { +func (_c *SearchClient_EventUpdate_Call) Return(_a0 error) *SearchClient_EventUpdate_Call { _c.Call.Return(_a0) return _c } -func (_c *SearchClient_OnSubjectDelete_Call) RunAndReturn(run func(context.Context, uint32) error) *SearchClient_OnSubjectDelete_Call { +func (_c *SearchClient_EventUpdate_Call) RunAndReturn(run func(context.Context, uint32, search.SearchTarget) error) *SearchClient_EventUpdate_Call { _c.Call.Return(run) return _c } -// OnSubjectUpdate provides a mock function with given fields: ctx, id -func (_m *SearchClient) OnSubjectUpdate(ctx context.Context, id uint32) error { - ret := _m.Called(ctx, id) +// Handle provides a mock function with given fields: c, target +func (_m *SearchClient) Handle(c echo.Context, target search.SearchTarget) error { + ret := _m.Called(c, target) if len(ret) == 0 { - panic("no return value specified for OnSubjectUpdate") + panic("no return value specified for Handle") } var r0 error - if rf, ok := ret.Get(0).(func(context.Context, uint32) error); ok { - r0 = rf(ctx, id) + if rf, ok := ret.Get(0).(func(echo.Context, search.SearchTarget) error); ok { + r0 = rf(c, target) } else { r0 = ret.Error(0) } @@ -212,31 +218,31 @@ func (_m *SearchClient) OnSubjectUpdate(ctx context.Context, id uint32) error { return r0 } -// SearchClient_OnSubjectUpdate_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'OnSubjectUpdate' -type SearchClient_OnSubjectUpdate_Call struct { +// SearchClient_Handle_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Handle' +type SearchClient_Handle_Call struct { *mock.Call } -// OnSubjectUpdate is a helper method to define mock.On call -// - ctx context.Context -// - id uint32 -func (_e *SearchClient_Expecter) OnSubjectUpdate(ctx interface{}, id interface{}) *SearchClient_OnSubjectUpdate_Call { - return &SearchClient_OnSubjectUpdate_Call{Call: _e.mock.On("OnSubjectUpdate", ctx, id)} +// Handle is a helper method to define mock.On call +// - c echo.Context +// - target search.SearchTarget +func (_e *SearchClient_Expecter) Handle(c interface{}, target interface{}) *SearchClient_Handle_Call { + return &SearchClient_Handle_Call{Call: _e.mock.On("Handle", c, target)} } -func (_c *SearchClient_OnSubjectUpdate_Call) Run(run func(ctx context.Context, id uint32)) *SearchClient_OnSubjectUpdate_Call { +func (_c *SearchClient_Handle_Call) Run(run func(c echo.Context, target search.SearchTarget)) *SearchClient_Handle_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(uint32)) + run(args[0].(echo.Context), args[1].(search.SearchTarget)) }) return _c } -func (_c *SearchClient_OnSubjectUpdate_Call) Return(_a0 error) *SearchClient_OnSubjectUpdate_Call { +func (_c *SearchClient_Handle_Call) Return(_a0 error) *SearchClient_Handle_Call { _c.Call.Return(_a0) return _c } -func (_c *SearchClient_OnSubjectUpdate_Call) RunAndReturn(run func(context.Context, uint32) error) *SearchClient_OnSubjectUpdate_Call { +func (_c *SearchClient_Handle_Call) RunAndReturn(run func(echo.Context, search.SearchTarget) error) *SearchClient_Handle_Call { _c.Call.Return(run) return _c } diff --git a/internal/search/character/client.go b/internal/search/character/client.go new file mode 100644 index 000000000..83e926243 --- /dev/null +++ b/internal/search/character/client.go @@ -0,0 +1,110 @@ +package character + +import ( + "context" + "fmt" + "reflect" + "strconv" + + "github.com/meilisearch/meilisearch-go" + "github.com/trim21/errgo" + "github.com/trim21/pkg/queue" + "go.uber.org/zap" + + "github.com/bangumi/server/config" + "github.com/bangumi/server/dal/query" + "github.com/bangumi/server/internal/character" + "github.com/bangumi/server/internal/model" + "github.com/bangumi/server/internal/search/searcher" +) + +const ( + idx = "characters" +) + +func New( + cfg config.AppConfig, + meili meilisearch.ServiceManager, + repo character.Repo, + log *zap.Logger, + query *query.Query, +) (searcher.Searcher, error) { + if repo == nil { + return nil, fmt.Errorf("nil characterRepo") + } + c := &client{ + meili: meili, + repo: repo, + index: meili.Index(idx), + log: log.Named("search").With(zap.String("index", idx)), + q: query, + } + + if cfg.AppType != config.AppTypeCanal { + return c, nil + } + + return c, c.canalInit(cfg) +} + +type client struct { + repo character.Repo + index meilisearch.IndexManager + + meili meilisearch.ServiceManager + log *zap.Logger + q *query.Query + + queue *queue.Batched[searcher.Document] +} + +func (c *client) Close() { + if c.queue != nil { + c.queue.Close() + } +} + +func (c *client) canalInit(cfg config.AppConfig) error { + if err := searcher.ValidateConfigs(cfg); err != nil { + return errgo.Wrap(err, "validate search config") + } + c.queue = searcher.NewBatchQueue(cfg, c.log, c.index) + searcher.RegisterQueueMetrics(idx, c.queue) + shouldCreateIndex, err := searcher.NeedFirstRun(c.meili, idx) + if err != nil { + return err + } + if shouldCreateIndex { + go c.firstRun() + } + return nil +} + +//nolint:funlen +func (c *client) firstRun() { + c.log.Info("search initialize") + rt := reflect.TypeOf(document{}) + searcher.InitIndex(c.log, c.meili, idx, rt, rankRule()) + + ctx := context.Background() + + maxItem, err := c.q.Character.WithContext(ctx).Limit(1).Order(c.q.Character.ID.Desc()).Take() + if err != nil { + c.log.Fatal("failed to get current max id", zap.Error(err)) + return + } + + c.log.Info(fmt.Sprintf("run full search index with max %s id %d", idx, maxItem.ID)) + + width := len(strconv.Itoa(int(maxItem.ID))) + for i := model.CharacterID(1); i <= maxItem.ID; i++ { + if i%10000 == 0 { + c.log.Info(fmt.Sprintf("progress %*d/%d", width, i, maxItem.ID)) + } + + err := c.OnUpdate(ctx, i) + if err != nil { + c.log.Error("error when updating", zap.Error(err)) + } + } +} diff --git a/internal/search/character/doc.go b/internal/search/character/doc.go new file mode 100644 index 000000000..1e1a23de8 --- /dev/null +++ b/internal/search/character/doc.go @@ -0,0 +1,51 @@ +package character + +import ( + "strconv" + + "github.com/bangumi/server/internal/model" + "github.com/bangumi/server/internal/search/searcher" + "github.com/bangumi/server/pkg/wiki" +) + +type document struct { + ID model.CharacterID `json:"id"` + Name string `json:"name" searchable:"true"` + Aliases []string `json:"aliases,omitempty" searchable:"true"` + Comment uint32 `json:"comment" sortable:"true"` + Collect uint32 `json:"collect" sortable:"true"` + NSFW bool `json:"nsfw" filterable:"true"` +} + +func (d *document) GetID() string { + return strconv.FormatUint(uint64(d.ID), 10) +} + +func rankRule() *[]string { + return &[]string{ + // 相似度最优先 + "exactness", + "words", + "typo", + "proximity", + "attribute", + "sort", + "id:asc", + "comment:desc", + "collect:desc", + "nsfw:asc", + } +} + +func extract(c *model.Character) searcher.Document { + w := wiki.ParseOmitError(c.Infobox) + + return &document{ + ID: c.ID, + Name: c.Name, + Aliases: searcher.ExtractAliases(w), + Comment: c.CommentCount, + Collect: c.CollectCount, + NSFW: c.NSFW, + } +} diff --git a/internal/search/character/event.go b/internal/search/character/event.go new file mode 100644 index 000000000..10c3dad66 --- /dev/null +++ b/internal/search/character/event.go @@ -0,0 +1,57 @@ +package character + +import ( + "context" + "errors" + "strconv" + + "github.com/trim21/errgo" + + "github.com/bangumi/server/domain/gerr" + "github.com/bangumi/server/internal/model" +) + +func (c *client) OnAdded(ctx context.Context, id model.CharacterID) error { + s, err := c.repo.Get(ctx, id) + if err != nil { + if errors.Is(err, gerr.ErrNotFound) { + return nil + } + return errgo.Wrap(err, "characterRepo.Get") + } + + if s.Redirect != 0 { + return c.OnDelete(ctx, id) + } + + extracted := extract(&s) + + _, err = c.index.UpdateDocumentsWithContext(ctx, extracted, "id") + return err +} + +func (c *client) OnUpdate(ctx context.Context, id model.CharacterID) error { + s, err := c.repo.Get(ctx, id) + if err != nil { + if errors.Is(err, gerr.ErrNotFound) { + return nil + } + return errgo.Wrap(err, "characterRepo.Get") + } + + if s.Redirect != 0 { + return c.OnDelete(ctx, id) + } + + extracted := extract(&s) + + c.queue.Push(extracted) + + return nil +} + +func (c *client) OnDelete(ctx context.Context, id model.CharacterID) error { + _, err := c.index.DeleteDocumentWithContext(ctx, strconv.FormatUint(uint64(id), 10)) + + return errgo.Wrap(err, "search") +} diff --git a/internal/search/character/handle.go b/internal/search/character/handle.go new file mode 100644 index 000000000..660b2704e --- /dev/null +++ b/internal/search/character/handle.go @@ -0,0 +1,128 @@ +package character + +import ( + "encoding/json" + "fmt" + "net/http" + + "github.com/labstack/echo/v4" + "github.com/meilisearch/meilisearch-go" + "github.com/trim21/errgo" + + "github.com/bangumi/server/internal/model" + "github.com/bangumi/server/internal/pkg/generic/slice" + "github.com/bangumi/server/internal/pkg/null" + "github.com/bangumi/server/web/accessor" + "github.com/bangumi/server/web/req" + "github.com/bangumi/server/web/res" +) + +const defaultLimit = 10 +const maxLimit = 20 + +type Req struct { + Keyword string `json:"keyword"` + Filter ReqFilter `json:"filter"` +} + +type ReqFilter struct { //nolint:musttag + NSFW null.Bool `json:"nsfw"` +} + +type hit struct { + ID model.CharacterID `json:"id"` +} + +//nolint:funlen +func (c *client) Handle(ctx echo.Context) error { + auth := accessor.GetFromCtx(ctx) + q, err := req.GetPageQuerySoftLimit(ctx, defaultLimit, maxLimit) + if err != nil { + return err + } + + var r Req + if err = json.NewDecoder(ctx.Request().Body).Decode(&r); err != nil { + return res.JSONError(ctx, err) + } + + if !auth.AllowNSFW() { + r.Filter.NSFW = null.Bool{Set: true, Value: false} + } + + result, err := c.doSearch(r.Keyword, filterToMeiliFilter(r.Filter), q.Limit, q.Offset) + if err != nil { + return errgo.Wrap(err, "search") + } + + var hits []hit + if err = json.Unmarshal(result.Hits, &hits); err != nil { + return errgo.Wrap(err, "json.Unmarshal") + } + ids := slice.Map(hits, func(h hit) model.SubjectID { return h.ID }) + + characters, err := c.repo.GetByIDs(ctx.Request().Context(), ids) + if err != nil { + return errgo.Wrap(err, "characterRepo.GetByIDs") + } + + var data = make([]res.CharacterV0, 0, len(characters)) + for _, id := range ids { + s, ok := characters[id] + if !ok { + continue + } + character := res.ConvertModelCharacter(s) + data = append(data, character) + } + + return ctx.JSON(http.StatusOK, res.Paged{ + Data: data, + Total: result.EstimatedTotalHits, + Limit: q.Limit, + Offset: q.Offset, + }) +} + +func (c *client) doSearch( + words string, + filter [][]string, + limit, offset int, +) (*meiliSearchResponse, error) { + if limit == 0 { + limit = 10 + } else if limit > 50 { + limit = 50 + } + + raw, err := c.index.SearchRaw(words, &meilisearch.SearchRequest{ + Offset: int64(offset), + Limit: int64(limit), + Filter: filter, + }) + if err != nil { + return nil, errgo.Wrap(err, "meilisearch search") + } + + var r meiliSearchResponse + if err := json.Unmarshal(*raw, &r); err != nil { + return nil, errgo.Wrap(err, "json.Unmarshal") + } + + return &r, nil +} + +type meiliSearchResponse struct { + Hits json.RawMessage `json:"hits"` + EstimatedTotalHits int64 `json:"estimatedTotalHits"` //nolint:tagliatelle +} + +func filterToMeiliFilter(req ReqFilter) [][]string { + var filter = make([][]string, 0, 1) + + if req.NSFW.Set { + filter = append(filter, []string{fmt.Sprintf("nsfw = %t", req.NSFW.Value)}) + } + + return filter +} diff --git a/internal/search/client.go b/internal/search/client.go deleted file mode 100644 index d92627b94..000000000 --- a/internal/search/client.go +++ /dev/null @@ -1,340 +0,0 @@ -// SPDX-License-Identifier: AGPL-3.0-only -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published -// by the Free Software Foundation, version 3. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -// See the GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see - -package search - -import ( - "context" - "errors" - "fmt" - "net/http" - "net/url" - "os" - "reflect" - "strconv" - "strings" - "time" - - "github.com/avast/retry-go/v4" - "github.com/meilisearch/meilisearch-go" - "github.com/prometheus/client_golang/prometheus" - "github.com/samber/lo" - "github.com/trim21/errgo" - "github.com/trim21/pkg/queue" - "go.uber.org/zap" - - "github.com/bangumi/server/config" - "github.com/bangumi/server/dal/query" - "github.com/bangumi/server/domain/gerr" - "github.com/bangumi/server/internal/model" - "github.com/bangumi/server/internal/subject" -) - -// New provide a search app is AppConfig.MeiliSearchURL is empty string, return nope search client. -// -// see `MeiliSearchURL` and `MeiliSearchKey` in [config.AppConfig]. -func New( - cfg config.AppConfig, - subjectRepo subject.Repo, - log *zap.Logger, - query *query.Query, -) (Client, error) { - if cfg.Search.MeiliSearch.URL == "" { - return NoopClient{}, nil - } - - if subjectRepo == nil { - panic("nil SubjectRepo") - } - if _, err := url.Parse(cfg.Search.MeiliSearch.URL); err != nil { - return nil, errgo.Wrap(err, "url.Parse") - } - - meili := meilisearch.New( - cfg.Search.MeiliSearch.URL, - meilisearch.WithAPIKey(cfg.Search.MeiliSearch.Key), - meilisearch.WithCustomClient(&http.Client{Timeout: cfg.Search.MeiliSearch.Timeout}), - ) - - if _, err := meili.Version(); err != nil { - return nil, errgo.Wrap(err, "meilisearch") - } - - c := &client{ - meili: meili, - q: query, - subject: "subjects", - subjectIndex: meili.Index("subjects"), - log: log.Named("search"), - subjectRepo: subjectRepo, - } - - if cfg.AppType != config.AppTypeCanal { - return c, nil - } - - return c, c.canalInit(cfg) -} - -func (c *client) canalInit(cfg config.AppConfig) error { - if cfg.Search.SearchBatchSize <= 0 { - // nolint: goerr113 - return fmt.Errorf("config.SearchBatchSize should >= 0, current %d", cfg.Search.SearchBatchSize) - } - - if cfg.Search.SearchBatchInterval <= 0 { - // nolint: goerr113 - return fmt.Errorf("config.SearchBatchInterval should >= 0, current %d", cfg.Search.SearchBatchInterval) - } - - c.queue = queue.NewBatchedDedupe[subjectIndex]( - c.sendBatch, - cfg.Search.SearchBatchSize, - cfg.Search.SearchBatchInterval, - func(items []subjectIndex) []subjectIndex { - // lo.UniqBy 会保留第一次出现的元素,reverse 之后会保留新的数据 - return lo.UniqBy(lo.Reverse(items), func(item subjectIndex) model.SubjectID { - return item.ID - }) - }, - ) - - prometheus.DefaultRegisterer.MustRegister( - prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "chii", - Name: "meilisearch_queue_batch", - Help: "meilisearch update queue batch size", - }, - func() float64 { - return float64(c.queue.Len()) - }, - )) - - shouldCreateIndex, err := c.needFirstRun() - if err != nil { - return err - } - - if shouldCreateIndex { - go c.firstRun() - } - - return nil -} - -type client struct { - subjectRepo subject.Repo - meili meilisearch.ServiceManager - q *query.Query - subjectIndex meilisearch.IndexManager - log *zap.Logger - subject string - queue *queue.Batched[subjectIndex] -} - -func (c *client) Close() { - if c.queue != nil { - c.queue.Close() - } -} - -// OnSubjectAdded is the hook called by canal. -func (c *client) OnSubjectAdded(ctx context.Context, id model.SubjectID) error { - s, err := c.subjectRepo.Get(ctx, id, subject.Filter{}) - if err != nil { - if errors.Is(err, gerr.ErrNotFound) { - return nil - } - return errgo.Wrap(err, "subjectRepo.Get") - } - - if s.Redirect != 0 || s.Ban != 0 { - return c.OnSubjectDelete(ctx, id) - } - - extracted := extractSubject(&s) - - _, err = c.subjectIndex.UpdateDocumentsWithContext(ctx, extracted, "id") - return err -} - -// OnSubjectUpdate is the hook called by canal. -func (c *client) OnSubjectUpdate(ctx context.Context, id model.SubjectID) error { - s, err := c.subjectRepo.Get(ctx, id, subject.Filter{}) - if err != nil { - if errors.Is(err, gerr.ErrNotFound) { - return nil - } - return errgo.Wrap(err, "subjectRepo.Get") - } - - if s.Redirect != 0 || s.Ban != 0 { - return c.OnSubjectDelete(ctx, id) - } - - extracted := extractSubject(&s) - - c.queue.Push(extracted) - - return nil -} - -// OnSubjectDelete is the hook called by canal. -func (c *client) OnSubjectDelete(ctx context.Context, id model.SubjectID) error { - _, err := c.subjectIndex.DeleteDocumentWithContext(ctx, strconv.FormatUint(uint64(id), 10)) - - return errgo.Wrap(err, "search") -} - -// UpsertSubject add subject to search backend. -func (c *client) sendBatch(items []subjectIndex) { - c.log.Debug("send batch to meilisearch", zap.Int("len", len(items))) - err := retry.Do( - func() error { - _, err := c.subjectIndex.UpdateDocuments(items, "id") - return err - }, - retry.OnRetry(func(n uint, err error) { - c.log.Warn("failed to send batch", zap.Uint("attempt", n), zap.Error(err)) - }), - - retry.DelayType(retry.BackOffDelay), - retry.Delay(time.Microsecond*100), - retry.Attempts(5), //nolint:mnd - retry.RetryIf(func(err error) bool { - var r = &meilisearch.Error{} - return errors.As(err, &r) - }), - ) - - if err != nil { - c.log.Error("failed to send batch", zap.Error(err)) - } -} - -func (c *client) needFirstRun() (bool, error) { - if os.Getenv("CHII_SEARCH_INIT") == "true" { - return true, nil - } - - index, err := c.meili.GetIndex("subjects") - if err != nil { - var e *meilisearch.Error - if errors.As(err, &e) { - return true, nil - } - return false, errgo.Wrap(err, "get subjects index") - } - - stat, err := index.GetStats() - if err != nil { - return false, errgo.Wrap(err, "get subjects index stats") - } - - return stat.NumberOfDocuments == 0, nil -} - -//nolint:funlen -func (c *client) firstRun() { - c.log.Info("search initialize") - _, err := c.meili.CreateIndex(&meilisearch.IndexConfig{ - Uid: "subjects", - PrimaryKey: "id", - }) - if err != nil { - c.log.Fatal("failed to create search subject index", zap.Error(err)) - return - } - - subjectIndex := c.meili.Index("subjects") - - c.log.Info("set sortable attributes", zap.Strings("attributes", *getAttributes("sortable"))) - _, err = subjectIndex.UpdateSortableAttributes(getAttributes("sortable")) - if err != nil { - c.log.Fatal("failed to update search index sortable attributes", zap.Error(err)) - return - } - - c.log.Info("set filterable attributes", zap.Strings("attributes", *getAttributes("filterable"))) - _, err = subjectIndex.UpdateFilterableAttributes(getAttributes("filterable")) - if err != nil { - c.log.Fatal("failed to update search index filterable attributes", zap.Error(err)) - return - } - - c.log.Info("set searchable attributes", zap.Strings("attributes", *getAttributes("searchable"))) - _, err = subjectIndex.UpdateSearchableAttributes(getAttributes("searchable")) - if err != nil { - c.log.Fatal("failed to update search index searchable attributes", zap.Error(err)) - return - } - - c.log.Info("set ranking rules", zap.Strings("rule", *rankRule())) - _, err = subjectIndex.UpdateRankingRules(rankRule()) - if err != nil { - c.log.Fatal("failed to update search index searchable attributes", zap.Error(err)) - return - } - - ctx := context.Background() - - maxSubject, err := c.q.Subject.WithContext(ctx).Limit(1).Order(c.q.Subject.ID.Desc()).Take() - if err != nil { - c.log.Fatal("failed to get current max subject id", zap.Error(err)) - return - } - - c.log.Info(fmt.Sprintf("run full search index with max subject id %d", maxSubject.ID)) - - width := len(strconv.Itoa(int(maxSubject.ID))) - for i := model.SubjectID(1); i <= maxSubject.ID; i++ { - if i%10000 == 0 { - c.log.Info(fmt.Sprintf("progress %*d/%d", width, i, maxSubject.ID)) - } - - err := c.OnSubjectUpdate(ctx, i) - if err != nil { - c.log.Error("error when updating subject", zap.Error(err)) - } - } -} - -func getAttributes(tag string) *[]string { - rt := reflect.TypeOf(subjectIndex{}) - var s []string - for i := 0; i < rt.NumField(); i++ { - t, ok := rt.Field(i).Tag.Lookup(tag) - if !ok { - continue - } - - if t != "true" { - continue - } - - s = append(s, getJSONFieldName(rt.Field(i))) - } - - return &s -} - -func getJSONFieldName(f reflect.StructField) string { - t := f.Tag.Get("json") - if t == "" { - return f.Name - } - - return strings.Split(t, ",")[0] -} diff --git a/internal/search/extractor.common.go b/internal/search/extractor.common.go deleted file mode 100644 index 6213919d1..000000000 --- a/internal/search/extractor.common.go +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: AGPL-3.0-only -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published -// by the Free Software Foundation, version 3. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -// See the GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see - -package search - -import ( - "github.com/bangumi/server/internal/model" - "github.com/bangumi/server/pkg/wiki" -) - -func heat(s *model.Subject) uint32 { - return s.OnHold + s.Doing + s.Dropped + s.Wish + s.Collect -} - -func extractAliases(s *model.Subject, w wiki.Wiki) []string { - var aliases = make([]string, 0, 2) - if s.NameCN != "" { - aliases = append(aliases, s.NameCN) - } - - for _, field := range w.Fields { - if field.Key == "别名" { - aliases = append(aliases, getValues(field)...) - } - } - - return aliases -} - -func getValues(f wiki.Field) []string { - if f.Null { - return nil - } - - if !f.Array { - return []string{f.Value} - } - - var s = make([]string, len(f.Values)) - for i, value := range f.Values { - s[i] = value.Value - } - return s -} diff --git a/internal/search/noop.go b/internal/search/noop.go index bf68f32d8..56ca35a8e 100644 --- a/internal/search/noop.go +++ b/internal/search/noop.go @@ -19,8 +19,6 @@ import ( "net/http" "github.com/labstack/echo/v4" - - "github.com/bangumi/server/internal/model" ) var _ Client = NoopClient{} @@ -28,17 +26,19 @@ var _ Client = NoopClient{} type NoopClient struct { } -func (n NoopClient) OnSubjectAdded(ctx context.Context, id model.SubjectID) error { return nil } - -func (n NoopClient) Handle(c echo.Context) error { +func (n NoopClient) Handle(c echo.Context, _ SearchTarget) error { return c.String(http.StatusOK, "search is not enable") } -func (n NoopClient) OnSubjectUpdate(_ context.Context, _ model.SubjectID) error { +func (n NoopClient) EventAdded(ctx context.Context, _ uint32, _ SearchTarget) error { + return nil +} + +func (n NoopClient) EventUpdate(_ context.Context, _ uint32, _ SearchTarget) error { return nil } -func (n NoopClient) OnSubjectDelete(_ context.Context, _ model.SubjectID) error { +func (n NoopClient) EventDelete(_ context.Context, _ uint32, _ SearchTarget) error { return nil } diff --git a/internal/search/person/client.go b/internal/search/person/client.go new file mode 100644 index 000000000..a7a454005 --- /dev/null +++ b/internal/search/person/client.go @@ -0,0 +1,110 @@ +package person + +import ( + "context" + "fmt" + "reflect" + "strconv" + + "github.com/meilisearch/meilisearch-go" + "github.com/trim21/errgo" + "github.com/trim21/pkg/queue" + "go.uber.org/zap" + + "github.com/bangumi/server/config" + "github.com/bangumi/server/dal/query" + "github.com/bangumi/server/internal/model" + "github.com/bangumi/server/internal/person" + "github.com/bangumi/server/internal/search/searcher" +) + +const ( + idx = "persons" +) + +func New( + cfg config.AppConfig, + meili meilisearch.ServiceManager, + repo person.Repo, + log *zap.Logger, + query *query.Query, +) (searcher.Searcher, error) { + if repo == nil { + return nil, fmt.Errorf("nil personRepo") + } + c := &client{ + meili: meili, + repo: repo, + index: meili.Index("persons"), + log: log.Named("search").With(zap.String("index", idx)), + q: query, + } + + if cfg.AppType != config.AppTypeCanal { + return c, nil + } + + return c, c.canalInit(cfg) +} + +type client struct { + repo person.Repo + index meilisearch.IndexManager + + meili meilisearch.ServiceManager + log *zap.Logger + q *query.Query + + queue *queue.Batched[searcher.Document] +} + +func (c *client) Close() { + if c.queue != nil { + c.queue.Close() + } +} + +func (c *client) canalInit(cfg config.AppConfig) error { + if err := searcher.ValidateConfigs(cfg); err != nil { + return errgo.Wrap(err, "validate search config") + } + c.queue = searcher.NewBatchQueue(cfg, c.log, c.index) + searcher.RegisterQueueMetrics(idx, c.queue) + shouldCreateIndex, err := searcher.NeedFirstRun(c.meili, idx) + if err != nil { + return err + } + if shouldCreateIndex { + go c.firstRun() + } + return nil +} + +//nolint:funlen +func (c *client) firstRun() { + c.log.Info("search initialize") + rt := reflect.TypeOf(document{}) + searcher.InitIndex(c.log, c.meili, idx, rt, rankRule()) + + ctx := context.Background() + + maxItem, err := c.q.Person.WithContext(ctx).Limit(1).Order(c.q.Person.ID.Desc()).Take() + if err != nil { + c.log.Fatal("failed to get current max id", zap.Error(err)) + return + } + + c.log.Info(fmt.Sprintf("run full search index with max %s id %d", idx, maxItem.ID)) + + width := len(strconv.Itoa(int(maxItem.ID))) + for i := model.PersonID(1); i <= maxItem.ID; i++ { + if i%10000 == 0 { + c.log.Info(fmt.Sprintf("progress %*d/%d", width, i, maxItem.ID)) + } + + err := c.OnUpdate(ctx, i) + if err != nil { + c.log.Error("error when updating", zap.Error(err)) + } + } +} diff --git a/internal/search/person/doc.go b/internal/search/person/doc.go new file mode 100644 index 000000000..c19e3a7ef --- /dev/null +++ b/internal/search/person/doc.go @@ -0,0 +1,49 @@ +package person + +import ( + "strconv" + + "github.com/bangumi/server/internal/model" + "github.com/bangumi/server/internal/search/searcher" + "github.com/bangumi/server/pkg/wiki" +) + +type document struct { + ID model.PersonID `json:"id"` + Name string `json:"name" searchable:"true"` + Aliases []string `json:"aliases,omitempty" searchable:"true"` + Comment uint32 `json:"comment" sortable:"true"` + Collect uint32 `json:"collect" sortable:"true"` + Career []string `json:"career,omitempty" filterable:"true"` +} + +func (d *document) GetID() string { + return strconv.FormatUint(uint64(d.ID), 10) +} + +func rankRule() *[]string { + return &[]string{ + // 相似度最优先 + "exactness", + "words", + "typo", + "proximity", + "attribute", + "sort", + "id:asc", + "comment:desc", + "collect:desc", + } +} + +func extract(c *model.Person) searcher.Document { + w := wiki.ParseOmitError(c.Infobox) + + return &document{ + ID: c.ID, + Name: c.Name, + Aliases: searcher.ExtractAliases(w), + Comment: c.CommentCount, + Collect: c.CollectCount, + } +} diff --git a/internal/search/person/event.go b/internal/search/person/event.go new file mode 100644 index 000000000..467c2fdf2 --- /dev/null +++ b/internal/search/person/event.go @@ -0,0 +1,57 @@ +package person + +import ( + "context" + "errors" + "strconv" + + "github.com/trim21/errgo" + + "github.com/bangumi/server/domain/gerr" + "github.com/bangumi/server/internal/model" +) + +func (c *client) OnAdded(ctx context.Context, id model.PersonID) error { + s, err := c.repo.Get(ctx, id) + if err != nil { + if errors.Is(err, gerr.ErrNotFound) { + return nil + } + return errgo.Wrap(err, "characterRepo.Get") + } + + if s.Redirect != 0 { + return c.OnDelete(ctx, id) + } + + extracted := extract(&s) + + _, err = c.index.UpdateDocumentsWithContext(ctx, extracted, "id") + return err +} + +func (c *client) OnUpdate(ctx context.Context, id model.PersonID) error { + s, err := c.repo.Get(ctx, id) + if err != nil { + if errors.Is(err, gerr.ErrNotFound) { + return nil + } + return errgo.Wrap(err, "characterRepo.Get") + } + + if s.Redirect != 0 { + return c.OnDelete(ctx, id) + } + + extracted := extract(&s) + + c.queue.Push(extracted) + + return nil +} + +func (c *client) OnDelete(ctx context.Context, id model.PersonID) error { + _, err := c.index.DeleteDocumentWithContext(ctx, strconv.FormatUint(uint64(id), 10)) + + return errgo.Wrap(err, "search") +} diff --git a/internal/search/person/handle.go b/internal/search/person/handle.go new file mode 100644 index 000000000..3f4bf1023 --- /dev/null +++ b/internal/search/person/handle.go @@ -0,0 +1,121 @@ +package person + +import ( + "encoding/json" + "net/http" + "strconv" + + "github.com/labstack/echo/v4" + "github.com/meilisearch/meilisearch-go" + "github.com/trim21/errgo" + + "github.com/bangumi/server/internal/model" + "github.com/bangumi/server/internal/pkg/generic/slice" + "github.com/bangumi/server/web/req" + "github.com/bangumi/server/web/res" +) + +const defaultLimit = 10 +const maxLimit = 20 + +type Req struct { + Keyword string `json:"keyword"` + Filter ReqFilter `json:"filter"` +} + +type ReqFilter struct { //nolint:musttag + Careers []string `json:"meta_tags"` // and +} + +type hit struct { + ID model.PersonID `json:"id"` +} + +//nolint:funlen +func (c *client) Handle(ctx echo.Context) error { + q, err := req.GetPageQuerySoftLimit(ctx, defaultLimit, maxLimit) + if err != nil { + return err + } + + var r Req + if err = json.NewDecoder(ctx.Request().Body).Decode(&r); err != nil { + return res.JSONError(ctx, err) + } + + result, err := c.doSearch(r.Keyword, filterToMeiliFilter(r.Filter), q.Limit, q.Offset) + if err != nil { + return errgo.Wrap(err, "search") + } + + var hits []hit + if err = json.Unmarshal(result.Hits, &hits); err != nil { + return errgo.Wrap(err, "json.Unmarshal") + } + ids := slice.Map(hits, func(h hit) model.SubjectID { return h.ID }) + + persons, err := c.repo.GetByIDs(ctx.Request().Context(), ids) + if err != nil { + return errgo.Wrap(err, "personRepo.GetByIDs") + } + + var data = make([]res.PersonV0, 0, len(persons)) + for _, id := range ids { + s, ok := persons[id] + if !ok { + continue + } + person := res.ConvertModelPerson(s) + data = append(data, person) + } + + return ctx.JSON(http.StatusOK, res.Paged{ + Data: data, + Total: result.EstimatedTotalHits, + Limit: q.Limit, + Offset: q.Offset, + }) +} + +func (c *client) doSearch( + words string, + filter [][]string, + limit, offset int, +) (*meiliSearchResponse, error) { + if limit == 0 { + limit = 10 + } else if limit > 50 { + limit = 50 + } + + raw, err := c.index.SearchRaw(words, &meilisearch.SearchRequest{ + Offset: int64(offset), + Limit: int64(limit), + Filter: filter, + }) + if err != nil { + return nil, errgo.Wrap(err, "meilisearch search") + } + + var r meiliSearchResponse + if err := json.Unmarshal(*raw, &r); err != nil { + return nil, errgo.Wrap(err, "json.Unmarshal") + } + + return &r, nil +} + +type meiliSearchResponse struct { + Hits json.RawMessage `json:"hits"` + EstimatedTotalHits int64 `json:"estimatedTotalHits"` //nolint:tagliatelle +} + +func filterToMeiliFilter(req ReqFilter) [][]string { + var filter = make([][]string, 0, len(req.Careers)) + + for _, career := range req.Careers { + filter = append(filter, []string{"career = " + strconv.Quote(career)}) + } + + return filter +} diff --git a/internal/search/search.go b/internal/search/search.go new file mode 100644 index 000000000..8c80027be --- /dev/null +++ b/internal/search/search.go @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: AGPL-3.0-only +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published +// by the Free Software Foundation, version 3. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// See the GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see + +package search + +import ( + "context" + "fmt" + "net/http" + "net/url" + + "github.com/labstack/echo/v4" + "github.com/meilisearch/meilisearch-go" + "github.com/trim21/errgo" + "go.uber.org/zap" + + "github.com/bangumi/server/config" + "github.com/bangumi/server/dal/query" + "github.com/bangumi/server/internal/character" + "github.com/bangumi/server/internal/person" + characterSearcher "github.com/bangumi/server/internal/search/character" + personSearcher "github.com/bangumi/server/internal/search/person" + "github.com/bangumi/server/internal/search/searcher" + subjectSearcher "github.com/bangumi/server/internal/search/subject" + "github.com/bangumi/server/internal/subject" +) + +type SearchTarget string + +const ( + SearchTargetSubject SearchTarget = "subject" + SearchTargetCharacter SearchTarget = "character" + SearchTargetPerson SearchTarget = "person" +) + +type Client interface { + Handle(c echo.Context, target SearchTarget) error + Close() + + EventAdded(ctx context.Context, id uint32, target SearchTarget) error + EventUpdate(ctx context.Context, id uint32, target SearchTarget) error + EventDelete(ctx context.Context, id uint32, target SearchTarget) error +} + +type Handler interface { + Handle(c echo.Context, target SearchTarget) error +} + +type Search struct { + searchers map[SearchTarget]searcher.Searcher +} + +// New provide a search app is AppConfig.MeiliSearchURL is empty string, return nope search client. +// +// see `MeiliSearchURL` and `MeiliSearchKey` in [config.AppConfig]. +func New( + cfg config.AppConfig, + subjectRepo subject.Repo, + characterRepo character.Repo, + personRepo person.Repo, + log *zap.Logger, + query *query.Query, +) (Client, error) { + if cfg.Search.MeiliSearch.URL == "" { + return NoopClient{}, nil + } + if _, err := url.Parse(cfg.Search.MeiliSearch.URL); err != nil { + return nil, errgo.Wrap(err, "url.Parse") + } + meili := meilisearch.New( + cfg.Search.MeiliSearch.URL, + meilisearch.WithAPIKey(cfg.Search.MeiliSearch.Key), + meilisearch.WithCustomClient(&http.Client{Timeout: cfg.Search.MeiliSearch.Timeout}), + ) + if _, err := meili.Version(); err != nil { + return nil, errgo.Wrap(err, "meilisearch") + } + + subject, err := subjectSearcher.New(cfg, meili, subjectRepo, log, query) + if err != nil { + return nil, errgo.Wrap(err, "subject search") + } + character, err := characterSearcher.New(cfg, meili, characterRepo, log, query) + if err != nil { + return nil, errgo.Wrap(err, "character search") + } + person, err := personSearcher.New(cfg, meili, personRepo, log, query) + if err != nil { + return nil, errgo.Wrap(err, "person search") + } + + searchers := map[SearchTarget]searcher.Searcher{ + SearchTargetSubject: subject, + SearchTargetCharacter: character, + SearchTargetPerson: person, + } + s := &Search{ + searchers: searchers, + } + return s, nil +} + +func (s *Search) Handle(c echo.Context, target SearchTarget) error { + searcher := s.searchers[target] + if searcher == nil { + return fmt.Errorf("searcher not found for %s", target) + } + return searcher.Handle(c) +} + +func (s *Search) EventAdded(ctx context.Context, id uint32, target SearchTarget) error { + searcher := s.searchers[target] + if searcher == nil { + return fmt.Errorf("searcher not found for %s", target) + } + return searcher.OnAdded(ctx, id) +} + +func (s *Search) EventUpdate(ctx context.Context, id uint32, target SearchTarget) error { + searcher := s.searchers[target] + if searcher == nil { + return fmt.Errorf("searcher not found for %s", target) + } + return searcher.OnUpdate(ctx, id) +} + +func (s *Search) EventDelete(ctx context.Context, id uint32, target SearchTarget) error { + searcher := s.searchers[target] + if searcher == nil { + return fmt.Errorf("searcher not found for %s", target) + } + return searcher.OnDelete(ctx, id) +} + +func (s *Search) Close() { + for _, searcher := range s.searchers { + searcher.Close() + } +} diff --git a/internal/search/searcher/client.go b/internal/search/searcher/client.go new file mode 100644 index 000000000..d18498cb5 --- /dev/null +++ b/internal/search/searcher/client.go @@ -0,0 +1,231 @@ +package searcher + +import ( + "context" + "errors" + "fmt" + "os" + "reflect" + "strings" + "time" + + "github.com/avast/retry-go/v4" + "github.com/labstack/echo/v4" + "github.com/meilisearch/meilisearch-go" + "github.com/prometheus/client_golang/prometheus" + "github.com/samber/lo" + "github.com/trim21/errgo" + "github.com/trim21/pkg/queue" + "go.uber.org/zap" + + "github.com/bangumi/server/config" + "github.com/bangumi/server/pkg/wiki" +) + +type Searcher interface { + Handle(c echo.Context) error + + Close() + + OnAdded(ctx context.Context, id uint32) error + OnUpdate(ctx context.Context, id uint32) error + OnDelete(ctx context.Context, id uint32) error +} + +type Document interface { + GetID() string +} + +func NeedFirstRun(meili meilisearch.ServiceManager, idx string) (bool, error) { + if os.Getenv("CHII_SEARCH_INIT") == "true" { + return true, nil + } + + index, err := meili.GetIndex(idx) + if err != nil { + var e *meilisearch.Error + if errors.As(err, &e) { + return true, nil + } + return false, errgo.Wrap(err, fmt.Sprintf("get index %s", idx)) + } + + stat, err := index.GetStats() + if err != nil { + return false, errgo.Wrap(err, fmt.Sprintf("get index %s stats", idx)) + } + + return stat.NumberOfDocuments == 0, nil +} + +func ValidateConfigs(cfg config.AppConfig) error { + if cfg.Search.SearchBatchSize <= 0 { + // nolint: goerr113 + return fmt.Errorf("config.SearchBatchSize should >= 0, current %d", cfg.Search.SearchBatchSize) + } + + if cfg.Search.SearchBatchInterval <= 0 { + // nolint: goerr113 + return fmt.Errorf("config.SearchBatchInterval should >= 0, current %d", cfg.Search.SearchBatchInterval) + } + + return nil +} + +func ExtractAliases(w wiki.Wiki) []string { + aliases := []string{} + for _, field := range w.Fields { + if field.Key == "中文名" { + aliases = append(aliases, GetWikiValues(field)...) + } + if field.Key == "简体中文名" { + aliases = append(aliases, GetWikiValues(field)...) + } + } + for _, field := range w.Fields { + if field.Key == "别名" { + aliases = append(aliases, GetWikiValues(field)...) + } + } + return aliases +} + +func GetWikiValues(f wiki.Field) []string { + if f.Null { + return nil + } + + if !f.Array { + return []string{f.Value} + } + + var s = make([]string, len(f.Values)) + for i, value := range f.Values { + s[i] = value.Value + } + return s +} + +func NewSendBatch(log *zap.Logger, index meilisearch.IndexManager) func([]Document) { + return func(items []Document) { + log.Debug("send batch to meilisearch", zap.Int("len", len(items))) + err := retry.Do( + func() error { + _, err := index.UpdateDocuments(items, "id") + return err + }, + retry.OnRetry(func(n uint, err error) { + log.Warn("failed to send batch", zap.Uint("attempt", n), zap.Error(err)) + }), + retry.DelayType(retry.BackOffDelay), + retry.Delay(time.Microsecond*100), + retry.Attempts(5), //nolint:mnd + retry.RetryIf(func(err error) bool { + var r = &meilisearch.Error{} + return errors.As(err, &r) + }), + ) + if err != nil { + log.Error("failed to send batch", zap.Error(err)) + } + } +} + +func NewDedupeFunc() func([]Document) []Document { + return func(items []Document) []Document { + // lo.UniqBy 会保留第一次出现的元素,reverse 之后会保留新的数据 + return lo.UniqBy(lo.Reverse(items), func(item Document) string { + return item.GetID() + }) + } +} + +func NewBatchQueue(cfg config.AppConfig, log *zap.Logger, index meilisearch.IndexManager) *queue.Batched[Document] { + return queue.NewBatchedDedupe( + NewSendBatch(log, index), + cfg.Search.SearchBatchSize, + cfg.Search.SearchBatchInterval, + NewDedupeFunc(), + ) +} + +func RegisterQueueMetrics(idx string, queue *queue.Batched[Document]) { + prometheus.DefaultRegisterer.MustRegister( + prometheus.NewGaugeFunc( + prometheus.GaugeOpts{ + Namespace: "chii", + Name: "meilisearch_queue_batch", + Help: "meilisearch update queue batch size", + ConstLabels: prometheus.Labels{ + "index": idx, + }, + }, + func() float64 { + return float64(queue.Len()) + }, + )) +} + +func GetAttributes(rt reflect.Type, tag string) *[]string { + var s []string + for i := 0; i < rt.NumField(); i++ { + t, ok := rt.Field(i).Tag.Lookup(tag) + if !ok { + continue + } + if t != "true" { + continue + } + s = append(s, getJSONFieldName(rt.Field(i))) + } + return &s +} + +func getJSONFieldName(f reflect.StructField) string { + t := f.Tag.Get("json") + if t == "" { + return f.Name + } + return strings.Split(t, ",")[0] +} + +func InitIndex(log *zap.Logger, meili meilisearch.ServiceManager, idx string, rt reflect.Type, rankRule *[]string) { + _, err := meili.CreateIndex(&meilisearch.IndexConfig{ + Uid: idx, + PrimaryKey: "id", + }) + if err != nil { + log.Fatal("failed to create search index", zap.Error(err)) + return + } + + index := meili.Index(idx) + + log.Info("set sortable attributes", zap.Strings("attributes", *GetAttributes(rt, "sortable"))) + _, err = index.UpdateSortableAttributes(GetAttributes(rt, "sortable")) + if err != nil { + log.Fatal("failed to update search index sortable attributes", zap.Error(err)) + return + } + + log.Info("set filterable attributes", zap.Strings("attributes", *GetAttributes(rt, "filterable"))) + _, err = index.UpdateFilterableAttributes(GetAttributes(rt, "filterable")) + if err != nil { + log.Fatal("failed to update search index filterable attributes", zap.Error(err)) + return + } + + log.Info("set searchable attributes", zap.Strings("attributes", *GetAttributes(rt, "searchable"))) + _, err = index.UpdateSearchableAttributes(GetAttributes(rt, "searchable")) + if err != nil { + log.Fatal("failed to update search index searchable attributes", zap.Error(err)) + return + } + + log.Info("set ranking rules", zap.Strings("rule", *rankRule)) + _, err = index.UpdateRankingRules(rankRule) + if err != nil { + log.Fatal("failed to update search index searchable attributes", zap.Error(err)) + return + } +} diff --git a/internal/search/subject/client.go b/internal/search/subject/client.go new file mode 100644 index 000000000..b7c899740 --- /dev/null +++ b/internal/search/subject/client.go @@ -0,0 +1,111 @@ +package subject + +import ( + "context" + "fmt" + "reflect" + "strconv" + + "github.com/meilisearch/meilisearch-go" + "github.com/trim21/errgo" + "github.com/trim21/pkg/queue" + "go.uber.org/zap" + + "github.com/bangumi/server/config" + "github.com/bangumi/server/dal/query" + "github.com/bangumi/server/internal/model" + "github.com/bangumi/server/internal/search/searcher" + "github.com/bangumi/server/internal/subject" +) + +const ( + idx = "subjects" +) + +func New( + cfg config.AppConfig, + meili meilisearch.ServiceManager, + repo subject.Repo, + log *zap.Logger, + query *query.Query, +) (searcher.Searcher, error) { + if repo == nil { + return nil, fmt.Errorf("nil subjectRepo") + } + c := &client{ + meili: meili, + repo: repo, + index: meili.Index(idx), + log: log.Named("search").With(zap.String("index", idx)), + q: query, + } + + if cfg.AppType != config.AppTypeCanal { + return c, nil + } + + return c, c.canalInit(cfg) +} + +type client struct { + repo subject.Repo + index meilisearch.IndexManager + + meili meilisearch.ServiceManager + log *zap.Logger + q *query.Query + + queue *queue.Batched[searcher.Document] +} + +func (c *client) Close() { + if c.queue != nil { + c.queue.Close() + } +} + +func (c *client) canalInit(cfg config.AppConfig) error { + if err := searcher.ValidateConfigs(cfg); err != nil { + return errgo.Wrap(err, "validate search config") + } + c.queue = searcher.NewBatchQueue(cfg, c.log, c.index) + searcher.RegisterQueueMetrics(idx, c.queue) + + shouldCreateIndex, err := searcher.NeedFirstRun(c.meili, idx) + if err != nil { + return err + } + if shouldCreateIndex { + go c.firstRun() + } + return nil +} + +//nolint:funlen +func (c *client) firstRun() { + c.log.Info("search initialize") + rt := reflect.TypeOf(document{}) + searcher.InitIndex(c.log, c.meili, idx, rt, rankRule()) + + ctx := context.Background() + + maxItem, err := c.q.Subject.WithContext(ctx).Limit(1).Order(c.q.Subject.ID.Desc()).Take() + if err != nil { + c.log.Fatal("failed to get current max id", zap.Error(err)) + return + } + + c.log.Info(fmt.Sprintf("run full search index with max %s id %d", idx, maxItem.ID)) + + width := len(strconv.Itoa(int(maxItem.ID))) + for i := model.SubjectID(1); i <= maxItem.ID; i++ { + if i%10000 == 0 { + c.log.Info(fmt.Sprintf("progress %*d/%d", width, i, maxItem.ID)) + } + + err := c.OnUpdate(ctx, i) + if err != nil { + c.log.Error("error when updating", zap.Error(err)) + } + } +} diff --git a/internal/search/index.go b/internal/search/subject/doc.go similarity index 82% rename from internal/search/index.go rename to internal/search/subject/doc.go index d185c5a49..ec7b083a4 100644 --- a/internal/search/index.go +++ b/internal/search/subject/doc.go @@ -12,13 +12,14 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see -package search +package subject import ( "strconv" "strings" "github.com/bangumi/server/internal/model" + "github.com/bangumi/server/internal/search/searcher" "github.com/bangumi/server/pkg/wiki" ) @@ -26,7 +27,7 @@ import ( // 使用 `filterable:"true"`, `sortable:"true"` // 两种 tag 来设置是否可以被索引和排序. // 搜索字段因为带有排序,所以定义在 [search.searchAbleAttribute] 中. -type subjectIndex struct { +type document struct { ID model.SubjectID `json:"id"` Tag []string `json:"tag,omitempty" filterable:"true"` MetaTags []string `json:"meta_tag" filterable:"true"` @@ -42,6 +43,10 @@ type subjectIndex struct { NSFW bool `json:"nsfw" filterable:"true"` } +func (d *document) GetID() string { + return strconv.FormatUint(uint64(d.ID), 10) +} + func rankRule() *[]string { return &[]string{ // 相似度最优先 @@ -60,7 +65,11 @@ func rankRule() *[]string { } } -func extractSubject(s *model.Subject) subjectIndex { +func heat(s *model.Subject) uint32 { + return s.OnHold + s.Doing + s.Dropped + s.Wish + s.Collect +} + +func extract(s *model.Subject) searcher.Document { tags := s.Tags w := wiki.ParseOmitError(s.Infobox) @@ -72,7 +81,7 @@ func extractSubject(s *model.Subject) subjectIndex { tagNames[i] = tag.Name } - return subjectIndex{ + return &document{ ID: s.ID, Name: s.Name, Aliases: extractAliases(s, w), @@ -89,6 +98,21 @@ func extractSubject(s *model.Subject) subjectIndex { } } +func extractAliases(s *model.Subject, w wiki.Wiki) []string { + var aliases = make([]string, 0, 2) + if s.NameCN != "" { + aliases = append(aliases, s.NameCN) + } + + for _, field := range w.Fields { + if field.Key == "别名" { + aliases = append(aliases, searcher.GetWikiValues(field)...) + } + } + + return aliases +} + func parseDateVal(date string) int { if len(date) < 10 { return 0 diff --git a/internal/search/extract_internal_test.go b/internal/search/subject/doc_internal_test.go similarity index 98% rename from internal/search/extract_internal_test.go rename to internal/search/subject/doc_internal_test.go index f84874568..6af854169 100644 --- a/internal/search/extract_internal_test.go +++ b/internal/search/subject/doc_internal_test.go @@ -12,7 +12,7 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see -package search +package subject import ( "testing" diff --git a/internal/search/subject/event.go b/internal/search/subject/event.go new file mode 100644 index 000000000..268338403 --- /dev/null +++ b/internal/search/subject/event.go @@ -0,0 +1,58 @@ +package subject + +import ( + "context" + "errors" + "strconv" + + "github.com/trim21/errgo" + + "github.com/bangumi/server/domain/gerr" + "github.com/bangumi/server/internal/model" + "github.com/bangumi/server/internal/subject" +) + +func (c *client) OnAdded(ctx context.Context, id model.SubjectID) error { + s, err := c.repo.Get(ctx, id, subject.Filter{}) + if err != nil { + if errors.Is(err, gerr.ErrNotFound) { + return nil + } + return errgo.Wrap(err, "subjectRepo.Get") + } + + if s.Redirect != 0 || s.Ban != 0 { + return c.OnDelete(ctx, id) + } + + extracted := extract(&s) + + _, err = c.index.UpdateDocumentsWithContext(ctx, extracted, "id") + return err +} + +func (c *client) OnUpdate(ctx context.Context, id model.SubjectID) error { + s, err := c.repo.Get(ctx, id, subject.Filter{}) + if err != nil { + if errors.Is(err, gerr.ErrNotFound) { + return nil + } + return errgo.Wrap(err, "subjectRepo.Get") + } + + if s.Redirect != 0 || s.Ban != 0 { + return c.OnDelete(ctx, id) + } + + extracted := extract(&s) + + c.queue.Push(extracted) + + return nil +} + +func (c *client) OnDelete(ctx context.Context, id model.SubjectID) error { + _, err := c.index.DeleteDocumentWithContext(ctx, strconv.FormatUint(uint64(id), 10)) + + return errgo.Wrap(err, "search") +} diff --git a/internal/search/handle.go b/internal/search/subject/handle.go similarity index 94% rename from internal/search/handle.go rename to internal/search/subject/handle.go index dc7978c23..bc524a8e8 100644 --- a/internal/search/handle.go +++ b/internal/search/subject/handle.go @@ -13,10 +13,9 @@ // along with this program. If not, see // Package search 基于 meilisearch 提供搜索功能 -package search +package subject import ( - "context" "encoding/json" "fmt" "net/http" @@ -40,20 +39,6 @@ import ( "github.com/bangumi/server/web/res" ) -type Client interface { - Handler - Close() - - OnSubjectAdded(ctx context.Context, id model.SubjectID) error - OnSubjectUpdate(ctx context.Context, id model.SubjectID) error - OnSubjectDelete(ctx context.Context, id model.SubjectID) error -} - -// Handler TODO: 想个办法挪到 web 里面去. -type Handler interface { - Handle(c echo.Context) error -} - const defaultLimit = 10 const maxLimit = 20 @@ -130,7 +115,7 @@ func (c *client) Handle(ctx echo.Context) error { } ids := slice.Map(hits, func(h hit) model.SubjectID { return h.ID }) - subjects, err := c.subjectRepo.GetByIDs(ctx.Request().Context(), ids, subject.Filter{}) + subjects, err := c.repo.GetByIDs(ctx.Request().Context(), ids, subject.Filter{}) if err != nil { return errgo.Wrap(err, "subjectRepo.GetByIDs") } @@ -187,7 +172,7 @@ func (c *client) doSearch( return nil, res.BadRequest("sort not supported") } - raw, err := c.subjectIndex.SearchRaw(words, &meilisearch.SearchRequest{ + raw, err := c.index.SearchRaw(words, &meilisearch.SearchRequest{ Offset: int64(offset), Limit: int64(limit), Filter: filter, diff --git a/internal/search/handle_internal_test.go b/internal/search/subject/handle_internal_test.go similarity index 98% rename from internal/search/handle_internal_test.go rename to internal/search/subject/handle_internal_test.go index fb54defb2..5eb9e839f 100644 --- a/internal/search/handle_internal_test.go +++ b/internal/search/subject/handle_internal_test.go @@ -12,7 +12,7 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see -package search +package subject import ( "testing" diff --git a/internal/search/index_internal_test.go b/internal/search/subject/index_internal_test.go similarity index 84% rename from internal/search/index_internal_test.go rename to internal/search/subject/index_internal_test.go index 5049c767b..1da67e001 100644 --- a/internal/search/index_internal_test.go +++ b/internal/search/subject/index_internal_test.go @@ -12,19 +12,23 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see -package search +package subject import ( + "reflect" "sort" "testing" "github.com/stretchr/testify/require" + + "github.com/bangumi/server/internal/search/searcher" ) func TestIndexFilter(t *testing.T) { t.Parallel() - actual := *(getAttributes("filterable")) + rt := reflect.TypeOf(document{}) + actual := *(searcher.GetAttributes(rt, "filterable")) expected := []string{"date", "meta_tag", "score", "rank", "type", "nsfw", "tag"} sort.Strings(expected) diff --git a/openapi/v0.yaml b/openapi/v0.yaml index 98584a8d2..69addf9f0 100644 --- a/openapi/v0.yaml +++ b/openapi/v0.yaml @@ -131,7 +131,6 @@ paths: `true` 只会返回 R18 条目。 `false` 只会返回非 R18 条目。 - responses: 200: description: 返回搜索结果 @@ -140,6 +139,120 @@ paths: schema: "$ref": "#/components/schemas/Paged_Subject" + "/v0/search/characters": + post: + tags: + - 角色 + summary: 角色搜索 + operationId: searchCharacters + description: | + ## 实验性 API, 本 schema 和实际的 API 行为都可能随时发生改动 + + 目前支持的筛选条件包括: + - `nsfw`: 使用 `include` 包含NSFW搜索结果。默认排除搜索NSFW条目。无权限情况下忽略此选项,不会返回NSFW条目。 + + parameters: + - name: limit + in: query + description: 分页参数 + required: false + schema: + type: integer + - name: offset + in: query + description: 分页参数 + required: false + schema: + type: integer + requestBody: + content: + "application/json": + schema: + type: object + required: + - keyword + properties: + keyword: + type: string + filter: + type: object + description: 不同条件之间是 `且` 的关系 + properties: + nsfw: + type: boolean + description: | + 无权限的用户会直接忽略此字段,不会返回 R18 角色。 + + 默认或者 `null` 会返回包含 R18 的所有搜索结果。 + + `true` 只会返回 R18 角色。 + + `false` 只会返回非 R18 角色。 + responses: + 200: + description: 返回搜索结果 + content: + application/json: + schema: + "$ref": "#/components/schemas/Paged_Character" + + "/v0/search/persons": + post: + tags: + - 人物 + summary: 人物搜索 + operationId: searchPersons + description: | + ## 实验性 API, 本 schema 和实际的 API 行为都可能随时发生改动 + + 目前支持的筛选条件包括: + - `career`: 职业,可以多次出现。`且` 关系。 + + 不同筛选条件之间为 `且` + + parameters: + - name: limit + in: query + description: 分页参数 + required: false + schema: + type: integer + - name: offset + in: query + description: 分页参数 + required: false + schema: + type: integer + requestBody: + content: + "application/json": + schema: + type: object + required: + - keyword + properties: + keyword: + type: string + filter: + type: object + description: 不同条件之间是 `且` 的关系 + properties: + career: + type: array + items: + type: string + example: + - artist + - director + description: 职业,可以多次出现。多值之间为 `且` 关系。 + responses: + 200: + description: 返回搜索结果 + content: + application/json: + schema: + "$ref": "#/components/schemas/Paged_Person" + "/v0/subjects": get: tags: @@ -479,7 +592,7 @@ paths: content: application/json: schema: - "$ref": "#/components/schemas/CharacterDetail" + "$ref": "#/components/schemas/Character" "404": description: Not Found content: @@ -1962,8 +2075,8 @@ components: - B - AB - O - CharacterDetail: - title: CharacterDetail + Character: + title: Character required: - id - name @@ -2526,6 +2639,50 @@ components: items: "$ref": "#/components/schemas/Subject" default: [] + Paged_Character: + title: Paged[Character] + type: object + properties: + total: + title: Total + type: integer + default: 0 + limit: + title: Limit + type: integer + default: 0 + offset: + title: Offset + type: integer + default: 0 + data: + title: Data + type: array + items: + "$ref": "#/components/schemas/Character" + default: [] + Paged_Person: + title: Paged[Person] + type: object + properties: + total: + title: Total + type: integer + default: 0 + limit: + title: Limit + type: integer + default: 0 + offset: + title: Offset + type: integer + default: 0 + data: + title: Data + type: array + items: + "$ref": "#/components/schemas/Person" + default: [] Paged_Episode: title: Paged[Episode] type: object diff --git a/web/handler/character/character.go b/web/handler/character/character.go index 8578489f9..eacf54fd4 100644 --- a/web/handler/character/character.go +++ b/web/handler/character/character.go @@ -21,13 +21,8 @@ import ( "github.com/bangumi/server/ctrl" "github.com/bangumi/server/internal/character" "github.com/bangumi/server/internal/collections" - "github.com/bangumi/server/internal/model" "github.com/bangumi/server/internal/person" - "github.com/bangumi/server/internal/pkg/compat" - "github.com/bangumi/server/internal/pkg/null" "github.com/bangumi/server/internal/subject" - "github.com/bangumi/server/pkg/wiki" - "github.com/bangumi/server/web/res" ) type Character struct { @@ -58,28 +53,3 @@ func New( cfg: config.AppConfig{}, }, nil } - -func convertModelCharacter(s model.Character) res.CharacterV0 { - img := res.PersonImage(s.Image) - - return res.CharacterV0{ - ID: s.ID, - Type: s.Type, - Name: s.Name, - NSFW: s.NSFW, - Images: img, - Summary: s.Summary, - Infobox: compat.V0Wiki(wiki.ParseOmitError(s.Infobox).NonZero()), - Gender: null.NilString(res.GenderMap[s.FieldGender]), - BloodType: null.NilUint8(s.FieldBloodType), - BirthYear: null.NilUint16(s.FieldBirthYear), - BirthMon: null.NilUint8(s.FieldBirthMon), - BirthDay: null.NilUint8(s.FieldBirthDay), - Stat: res.Stat{ - Comments: s.CommentCount, - Collects: s.CollectCount, - }, - Redirect: s.Redirect, - Locked: s.Locked, - } -} diff --git a/web/handler/character/get.go b/web/handler/character/get.go index 08547ddca..8e9fde474 100644 --- a/web/handler/character/get.go +++ b/web/handler/character/get.go @@ -53,7 +53,7 @@ func (h Character) Get(c echo.Context) error { return res.ErrNotFound } - return c.JSON(http.StatusOK, convertModelCharacter(r)) + return c.JSON(http.StatusOK, res.ConvertModelCharacter(r)) } func (h Character) GetImage(c echo.Context) error { diff --git a/web/handler/search.go b/web/handler/search.go index 38895045f..cf0aeec0c 100644 --- a/web/handler/search.go +++ b/web/handler/search.go @@ -16,8 +16,18 @@ package handler import ( "github.com/labstack/echo/v4" + + "github.com/bangumi/server/internal/search" ) -func (h Handler) Search(c echo.Context) error { - return h.search.Handle(c) //nolint:wrapcheck +func (h Handler) SearchSubjects(c echo.Context) error { + return h.search.Handle(c, search.SearchTargetSubject) //nolint:wrapcheck +} + +func (h Handler) SearchCharacters(c echo.Context) error { + return h.search.Handle(c, search.SearchTargetCharacter) //nolint:wrapcheck +} + +func (h Handler) SearchPersons(c echo.Context) error { + return h.search.Handle(c, search.SearchTargetPerson) //nolint:wrapcheck } diff --git a/web/res/character.go b/web/res/character.go index 2f6ac467d..4acdd7e04 100644 --- a/web/res/character.go +++ b/web/res/character.go @@ -14,7 +14,12 @@ package res -import "github.com/bangumi/server/internal/model" +import ( + "github.com/bangumi/server/internal/model" + "github.com/bangumi/server/internal/pkg/compat" + "github.com/bangumi/server/internal/pkg/null" + "github.com/bangumi/server/pkg/wiki" +) type CharacterV0 struct { BirthMon *uint8 `json:"birth_mon"` @@ -52,3 +57,28 @@ func CharacterStaffString(i uint8) string { return "" } + +func ConvertModelCharacter(s model.Character) CharacterV0 { + img := PersonImage(s.Image) + + return CharacterV0{ + ID: s.ID, + Type: s.Type, + Name: s.Name, + NSFW: s.NSFW, + Images: img, + Summary: s.Summary, + Infobox: compat.V0Wiki(wiki.ParseOmitError(s.Infobox).NonZero()), + Gender: null.NilString(GenderMap[s.FieldGender]), + BloodType: null.NilUint8(s.FieldBloodType), + BirthYear: null.NilUint16(s.FieldBirthYear), + BirthMon: null.NilUint8(s.FieldBirthMon), + BirthDay: null.NilUint8(s.FieldBirthDay), + Stat: Stat{ + Comments: s.CommentCount, + Collects: s.CollectCount, + }, + Redirect: s.Redirect, + Locked: s.Locked, + } +} diff --git a/web/routes.go b/web/routes.go index f3a2b169c..2c3808fde 100644 --- a/web/routes.go +++ b/web/routes.go @@ -58,7 +58,9 @@ func AddRouters( v0 := app.Group("/v0", common.MiddlewareAccessTokenAuth) - v0.POST("/search/subjects", h.Search) + v0.POST("/search/subjects", h.SearchSubjects) + v0.POST("/search/characters", h.SearchCharacters) + v0.POST("/search/persons", h.SearchPersons) subjectHandler.Routes(v0)