diff options
Diffstat (limited to 'modules/indexer/issues')
-rw-r--r-- | modules/indexer/issues/bleve/bleve.go | 300 | ||||
-rw-r--r-- | modules/indexer/issues/bleve/bleve_test.go | 18 | ||||
-rw-r--r-- | modules/indexer/issues/db/db.go | 107 | ||||
-rw-r--r-- | modules/indexer/issues/db/options.go | 112 | ||||
-rw-r--r-- | modules/indexer/issues/dboptions.go | 100 | ||||
-rw-r--r-- | modules/indexer/issues/elasticsearch/elasticsearch.go | 290 | ||||
-rw-r--r-- | modules/indexer/issues/elasticsearch/elasticsearch_test.go | 48 | ||||
-rw-r--r-- | modules/indexer/issues/indexer.go | 315 | ||||
-rw-r--r-- | modules/indexer/issues/indexer_test.go | 404 | ||||
-rw-r--r-- | modules/indexer/issues/internal/indexer.go | 42 | ||||
-rw-r--r-- | modules/indexer/issues/internal/model.go | 150 | ||||
-rw-r--r-- | modules/indexer/issues/internal/tests/tests.go | 771 | ||||
-rw-r--r-- | modules/indexer/issues/meilisearch/meilisearch.go | 301 | ||||
-rw-r--r-- | modules/indexer/issues/meilisearch/meilisearch_test.go | 97 | ||||
-rw-r--r-- | modules/indexer/issues/util.go | 193 |
15 files changed, 3248 insertions, 0 deletions
diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go new file mode 100644 index 00000000..b20fcc6f --- /dev/null +++ b/modules/indexer/issues/bleve/bleve.go @@ -0,0 +1,300 @@ +// Copyright 2018 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package bleve + +import ( + "context" + + indexer_internal "code.gitea.io/gitea/modules/indexer/internal" + inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" + "code.gitea.io/gitea/modules/indexer/issues/internal" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" + "github.com/blevesearch/bleve/v2/analysis/token/lowercase" + "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" + "github.com/blevesearch/bleve/v2/mapping" + "github.com/blevesearch/bleve/v2/search/query" +) + +const ( + issueIndexerAnalyzer = "issueIndexer" + issueIndexerDocType = "issueIndexerDocType" + issueIndexerLatestVersion = 4 +) + +const unicodeNormalizeName = "unicodeNormalize" + +func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { + return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ + "type": unicodenorm.Name, + "form": unicodenorm.NFC, + }) +} + +const ( + maxBatchSize = 16 + // fuzzyDenominator determines the levenshtein distance per each character of a keyword + fuzzyDenominator = 4 + // see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311 + maxFuzziness = 2 +) + +// IndexerData an update to the issue indexer +type IndexerData internal.IndexerData + +// Type returns the document type, for bleve's mapping.Classifier interface. +func (i *IndexerData) Type() string { + return issueIndexerDocType +} + +// generateIssueIndexMapping generates the bleve index mapping for issues +func generateIssueIndexMapping() (mapping.IndexMapping, error) { + mapping := bleve.NewIndexMapping() + docMapping := bleve.NewDocumentMapping() + + numericFieldMapping := bleve.NewNumericFieldMapping() + numericFieldMapping.Store = false + numericFieldMapping.IncludeInAll = false + docMapping.AddFieldMappingsAt("repo_id", numericFieldMapping) + + textFieldMapping := bleve.NewTextFieldMapping() + textFieldMapping.Store = false + textFieldMapping.IncludeInAll = false + + boolFieldMapping := bleve.NewBooleanFieldMapping() + boolFieldMapping.Store = false + boolFieldMapping.IncludeInAll = false + + numberFieldMapping := bleve.NewNumericFieldMapping() + numberFieldMapping.Store = false + numberFieldMapping.IncludeInAll = false + + docMapping.AddFieldMappingsAt("is_public", boolFieldMapping) + + docMapping.AddFieldMappingsAt("title", textFieldMapping) + docMapping.AddFieldMappingsAt("content", textFieldMapping) + docMapping.AddFieldMappingsAt("comments", textFieldMapping) + + docMapping.AddFieldMappingsAt("is_pull", boolFieldMapping) + docMapping.AddFieldMappingsAt("is_closed", boolFieldMapping) + docMapping.AddFieldMappingsAt("label_ids", numberFieldMapping) + docMapping.AddFieldMappingsAt("no_label", boolFieldMapping) + docMapping.AddFieldMappingsAt("milestone_id", numberFieldMapping) + docMapping.AddFieldMappingsAt("project_id", numberFieldMapping) + docMapping.AddFieldMappingsAt("project_board_id", numberFieldMapping) + docMapping.AddFieldMappingsAt("poster_id", numberFieldMapping) + docMapping.AddFieldMappingsAt("assignee_id", numberFieldMapping) + docMapping.AddFieldMappingsAt("mention_ids", numberFieldMapping) + docMapping.AddFieldMappingsAt("reviewed_ids", numberFieldMapping) + docMapping.AddFieldMappingsAt("review_requested_ids", numberFieldMapping) + docMapping.AddFieldMappingsAt("subscriber_ids", numberFieldMapping) + docMapping.AddFieldMappingsAt("updated_unix", numberFieldMapping) + + docMapping.AddFieldMappingsAt("created_unix", numberFieldMapping) + docMapping.AddFieldMappingsAt("deadline_unix", numberFieldMapping) + docMapping.AddFieldMappingsAt("comment_count", numberFieldMapping) + + if err := addUnicodeNormalizeTokenFilter(mapping); err != nil { + return nil, err + } else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ + "type": custom.Name, + "char_filters": []string{}, + "tokenizer": unicode.Name, + "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, + }); err != nil { + return nil, err + } + + mapping.DefaultAnalyzer = issueIndexerAnalyzer + mapping.AddDocumentMapping(issueIndexerDocType, docMapping) + mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) + mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() // disable default mapping, avoid indexing unexpected structs + + return mapping, nil +} + +var _ internal.Indexer = &Indexer{} + +// Indexer implements Indexer interface +type Indexer struct { + inner *inner_bleve.Indexer + indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much +} + +// NewIndexer creates a new bleve local indexer +func NewIndexer(indexDir string) *Indexer { + inner := inner_bleve.NewIndexer(indexDir, issueIndexerLatestVersion, generateIssueIndexMapping) + return &Indexer{ + Indexer: inner, + inner: inner, + } +} + +// Index will save the index data +func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error { + batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) + for _, issue := range issues { + if err := batch.Index(indexer_internal.Base36(issue.ID), (*IndexerData)(issue)); err != nil { + return err + } + } + return batch.Flush() +} + +// Delete deletes indexes by ids +func (b *Indexer) Delete(_ context.Context, ids ...int64) error { + batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) + for _, id := range ids { + if err := batch.Delete(indexer_internal.Base36(id)); err != nil { + return err + } + } + return batch.Flush() +} + +// Search searches for issues by given conditions. +// Returns the matching issue IDs +func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) { + var queries []query.Query + + if options.Keyword != "" { + fuzziness := 0 + if options.IsFuzzyKeyword { + fuzziness = min(maxFuzziness, len(options.Keyword)/fuzzyDenominator) + } + + queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ + inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), + inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), + inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), + }...)) + } + + if len(options.RepoIDs) > 0 || options.AllPublic { + var repoQueries []query.Query + for _, repoID := range options.RepoIDs { + repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "repo_id")) + } + if options.AllPublic { + repoQueries = append(repoQueries, inner_bleve.BoolFieldQuery(true, "is_public")) + } + queries = append(queries, bleve.NewDisjunctionQuery(repoQueries...)) + } + + if options.IsPull.Has() { + queries = append(queries, inner_bleve.BoolFieldQuery(options.IsPull.Value(), "is_pull")) + } + if options.IsClosed.Has() { + queries = append(queries, inner_bleve.BoolFieldQuery(options.IsClosed.Value(), "is_closed")) + } + + if options.NoLabelOnly { + queries = append(queries, inner_bleve.BoolFieldQuery(true, "no_label")) + } else { + if len(options.IncludedLabelIDs) > 0 { + var includeQueries []query.Query + for _, labelID := range options.IncludedLabelIDs { + includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids")) + } + queries = append(queries, bleve.NewConjunctionQuery(includeQueries...)) + } else if len(options.IncludedAnyLabelIDs) > 0 { + var includeQueries []query.Query + for _, labelID := range options.IncludedAnyLabelIDs { + includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids")) + } + queries = append(queries, bleve.NewDisjunctionQuery(includeQueries...)) + } + if len(options.ExcludedLabelIDs) > 0 { + var excludeQueries []query.Query + for _, labelID := range options.ExcludedLabelIDs { + q := bleve.NewBooleanQuery() + q.AddMustNot(inner_bleve.NumericEqualityQuery(labelID, "label_ids")) + excludeQueries = append(excludeQueries, q) + } + queries = append(queries, bleve.NewConjunctionQuery(excludeQueries...)) + } + } + + if len(options.MilestoneIDs) > 0 { + var milestoneQueries []query.Query + for _, milestoneID := range options.MilestoneIDs { + milestoneQueries = append(milestoneQueries, inner_bleve.NumericEqualityQuery(milestoneID, "milestone_id")) + } + queries = append(queries, bleve.NewDisjunctionQuery(milestoneQueries...)) + } + + if options.ProjectID.Has() { + queries = append(queries, inner_bleve.NumericEqualityQuery(options.ProjectID.Value(), "project_id")) + } + if options.ProjectColumnID.Has() { + queries = append(queries, inner_bleve.NumericEqualityQuery(options.ProjectColumnID.Value(), "project_board_id")) + } + + if options.PosterID.Has() { + queries = append(queries, inner_bleve.NumericEqualityQuery(options.PosterID.Value(), "poster_id")) + } + + if options.AssigneeID.Has() { + queries = append(queries, inner_bleve.NumericEqualityQuery(options.AssigneeID.Value(), "assignee_id")) + } + + if options.MentionID.Has() { + queries = append(queries, inner_bleve.NumericEqualityQuery(options.MentionID.Value(), "mention_ids")) + } + + if options.ReviewedID.Has() { + queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewedID.Value(), "reviewed_ids")) + } + if options.ReviewRequestedID.Has() { + queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewRequestedID.Value(), "review_requested_ids")) + } + + if options.SubscriberID.Has() { + queries = append(queries, inner_bleve.NumericEqualityQuery(options.SubscriberID.Value(), "subscriber_ids")) + } + + if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() { + queries = append(queries, inner_bleve.NumericRangeInclusiveQuery( + options.UpdatedAfterUnix, + options.UpdatedBeforeUnix, + "updated_unix")) + } + + var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...) + if len(queries) == 0 { + indexerQuery = bleve.NewMatchAllQuery() + } + + skip, limit := indexer_internal.ParsePaginator(options.Paginator) + search := bleve.NewSearchRequestOptions(indexerQuery, limit, skip, false) + + if options.SortBy == "" { + options.SortBy = internal.SortByCreatedAsc + } + + search.SortBy([]string{string(options.SortBy), "-_id"}) + + result, err := b.inner.Indexer.SearchInContext(ctx, search) + if err != nil { + return nil, err + } + + ret := &internal.SearchResult{ + Total: int64(result.Total), + Hits: make([]internal.Match, 0, len(result.Hits)), + } + for _, hit := range result.Hits { + id, err := indexer_internal.ParseBase36(hit.ID) + if err != nil { + return nil, err + } + ret.Hits = append(ret.Hits, internal.Match{ + ID: id, + }) + } + return ret, nil +} diff --git a/modules/indexer/issues/bleve/bleve_test.go b/modules/indexer/issues/bleve/bleve_test.go new file mode 100644 index 00000000..908514a0 --- /dev/null +++ b/modules/indexer/issues/bleve/bleve_test.go @@ -0,0 +1,18 @@ +// Copyright 2018 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package bleve + +import ( + "testing" + + "code.gitea.io/gitea/modules/indexer/issues/internal/tests" +) + +func TestBleveIndexer(t *testing.T) { + dir := t.TempDir() + indexer := NewIndexer(dir) + defer indexer.Close() + + tests.TestIndexer(t, indexer) +} diff --git a/modules/indexer/issues/db/db.go b/modules/indexer/issues/db/db.go new file mode 100644 index 00000000..05ec5484 --- /dev/null +++ b/modules/indexer/issues/db/db.go @@ -0,0 +1,107 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package db + +import ( + "context" + + "code.gitea.io/gitea/models/db" + issue_model "code.gitea.io/gitea/models/issues" + indexer_internal "code.gitea.io/gitea/modules/indexer/internal" + inner_db "code.gitea.io/gitea/modules/indexer/internal/db" + "code.gitea.io/gitea/modules/indexer/issues/internal" + + "xorm.io/builder" +) + +var _ internal.Indexer = &Indexer{} + +// Indexer implements Indexer interface to use database's like search +type Indexer struct { + indexer_internal.Indexer +} + +func NewIndexer() *Indexer { + return &Indexer{ + Indexer: &inner_db.Indexer{}, + } +} + +// Index dummy function +func (i *Indexer) Index(_ context.Context, _ ...*internal.IndexerData) error { + return nil +} + +// Delete dummy function +func (i *Indexer) Delete(_ context.Context, _ ...int64) error { + return nil +} + +// Search searches for issues +func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) { + // FIXME: I tried to avoid importing models here, but it seems to be impossible. + // We can provide a function to register the search function, so models/issues can register it. + // So models/issues will import modules/indexer/issues, it's OK because it's by design. + // But modules/indexer/issues has already imported models/issues to do UpdateRepoIndexer and UpdateIssueIndexer. + // And to avoid circular import, we have to move the functions to another package. + // I believe it should be services/indexer, sounds great! + // But the two functions are used in modules/notification/indexer, that means we will import services/indexer in modules/notification/indexer. + // So that's the root problem: + // The notification is defined in modules, but it's using lots of things should be in services. + + cond := builder.NewCond() + + if options.Keyword != "" { + repoCond := builder.In("repo_id", options.RepoIDs) + if len(options.RepoIDs) == 1 { + repoCond = builder.Eq{"repo_id": options.RepoIDs[0]} + } + subQuery := builder.Select("id").From("issue").Where(repoCond) + + cond = builder.Or( + db.BuildCaseInsensitiveLike("issue.name", options.Keyword), + db.BuildCaseInsensitiveLike("issue.content", options.Keyword), + builder.In("issue.id", builder.Select("issue_id"). + From("comment"). + Where(builder.And( + builder.Eq{"type": issue_model.CommentTypeComment}, + builder.In("issue_id", subQuery), + db.BuildCaseInsensitiveLike("content", options.Keyword), + )), + ), + ) + } + + opt, err := ToDBOptions(ctx, options) + if err != nil { + return nil, err + } + + // If pagesize == 0, return total count only. It's a special case for search count. + if options.Paginator != nil && options.Paginator.PageSize == 0 { + total, err := issue_model.CountIssues(ctx, opt, cond) + if err != nil { + return nil, err + } + return &internal.SearchResult{ + Total: total, + }, nil + } + + ids, total, err := issue_model.IssueIDs(ctx, opt, cond) + if err != nil { + return nil, err + } + + hits := make([]internal.Match, 0, len(ids)) + for _, id := range ids { + hits = append(hits, internal.Match{ + ID: id, + }) + } + return &internal.SearchResult{ + Total: total, + Hits: hits, + }, nil +} diff --git a/modules/indexer/issues/db/options.go b/modules/indexer/issues/db/options.go new file mode 100644 index 00000000..875a4ca2 --- /dev/null +++ b/modules/indexer/issues/db/options.go @@ -0,0 +1,112 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package db + +import ( + "context" + "fmt" + + "code.gitea.io/gitea/models/db" + issue_model "code.gitea.io/gitea/models/issues" + "code.gitea.io/gitea/modules/container" + "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/optional" +) + +func ToDBOptions(ctx context.Context, options *internal.SearchOptions) (*issue_model.IssuesOptions, error) { + var sortType string + switch options.SortBy { + case internal.SortByCreatedAsc: + sortType = "oldest" + case internal.SortByUpdatedAsc: + sortType = "leastupdate" + case internal.SortByCommentsAsc: + sortType = "leastcomment" + case internal.SortByDeadlineDesc: + sortType = "farduedate" + case internal.SortByCreatedDesc: + sortType = "newest" + case internal.SortByUpdatedDesc: + sortType = "recentupdate" + case internal.SortByCommentsDesc: + sortType = "mostcomment" + case internal.SortByDeadlineAsc: + sortType = "nearduedate" + default: + sortType = "newest" + } + + // See the comment of issues_model.SearchOptions for the reason why we need to convert + convertID := func(id optional.Option[int64]) int64 { + if !id.Has() { + return 0 + } + value := id.Value() + if value == 0 { + return db.NoConditionID + } + return value + } + + opts := &issue_model.IssuesOptions{ + Paginator: options.Paginator, + RepoIDs: options.RepoIDs, + AllPublic: options.AllPublic, + RepoCond: nil, + AssigneeID: convertID(options.AssigneeID), + PosterID: convertID(options.PosterID), + MentionedID: convertID(options.MentionID), + ReviewRequestedID: convertID(options.ReviewRequestedID), + ReviewedID: convertID(options.ReviewedID), + SubscriberID: convertID(options.SubscriberID), + ProjectID: convertID(options.ProjectID), + ProjectColumnID: convertID(options.ProjectColumnID), + IsClosed: options.IsClosed, + IsPull: options.IsPull, + IncludedLabelNames: nil, + ExcludedLabelNames: nil, + IncludeMilestones: nil, + SortType: sortType, + IssueIDs: nil, + UpdatedAfterUnix: options.UpdatedAfterUnix.Value(), + UpdatedBeforeUnix: options.UpdatedBeforeUnix.Value(), + PriorityRepoID: 0, + IsArchived: optional.None[bool](), + Org: nil, + Team: nil, + User: nil, + } + + if len(options.MilestoneIDs) == 1 && options.MilestoneIDs[0] == 0 { + opts.MilestoneIDs = []int64{db.NoConditionID} + } else { + opts.MilestoneIDs = options.MilestoneIDs + } + + if options.NoLabelOnly { + opts.LabelIDs = []int64{0} // Be careful, it's zero, not db.NoConditionID + } else { + opts.LabelIDs = make([]int64, 0, len(options.IncludedLabelIDs)+len(options.ExcludedLabelIDs)) + opts.LabelIDs = append(opts.LabelIDs, options.IncludedLabelIDs...) + for _, id := range options.ExcludedLabelIDs { + opts.LabelIDs = append(opts.LabelIDs, -id) + } + + if len(options.IncludedLabelIDs) == 0 && len(options.IncludedAnyLabelIDs) > 0 { + labels, err := issue_model.GetLabelsByIDs(ctx, options.IncludedAnyLabelIDs, "name") + if err != nil { + return nil, fmt.Errorf("GetLabelsByIDs: %v", err) + } + set := container.Set[string]{} + for _, label := range labels { + if !set.Contains(label.Name) { + set.Add(label.Name) + opts.IncludedLabelNames = append(opts.IncludedLabelNames, label.Name) + } + } + } + } + + return opts, nil +} diff --git a/modules/indexer/issues/dboptions.go b/modules/indexer/issues/dboptions.go new file mode 100644 index 00000000..50916024 --- /dev/null +++ b/modules/indexer/issues/dboptions.go @@ -0,0 +1,100 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package issues + +import ( + "code.gitea.io/gitea/models/db" + issues_model "code.gitea.io/gitea/models/issues" + "code.gitea.io/gitea/modules/optional" +) + +func ToSearchOptions(keyword string, opts *issues_model.IssuesOptions) *SearchOptions { + searchOpt := &SearchOptions{ + Keyword: keyword, + RepoIDs: opts.RepoIDs, + AllPublic: opts.AllPublic, + IsPull: opts.IsPull, + IsClosed: opts.IsClosed, + } + + if len(opts.LabelIDs) == 1 && opts.LabelIDs[0] == 0 { + searchOpt.NoLabelOnly = true + } else { + for _, labelID := range opts.LabelIDs { + if labelID > 0 { + searchOpt.IncludedLabelIDs = append(searchOpt.IncludedLabelIDs, labelID) + } else { + searchOpt.ExcludedLabelIDs = append(searchOpt.ExcludedLabelIDs, -labelID) + } + } + // opts.IncludedLabelNames and opts.ExcludedLabelNames are not supported here. + // It's not a TO DO, it's just unnecessary. + } + + if len(opts.MilestoneIDs) == 1 && opts.MilestoneIDs[0] == db.NoConditionID { + searchOpt.MilestoneIDs = []int64{0} + } else { + searchOpt.MilestoneIDs = opts.MilestoneIDs + } + + if opts.ProjectID > 0 { + searchOpt.ProjectID = optional.Some(opts.ProjectID) + } else if opts.ProjectID == -1 { // FIXME: this is inconsistent from other places + searchOpt.ProjectID = optional.Some[int64](0) // Those issues with no project(projectid==0) + } + + // See the comment of issues_model.SearchOptions for the reason why we need to convert + convertID := func(id int64) optional.Option[int64] { + if id > 0 { + return optional.Some(id) + } + if id == db.NoConditionID { + return optional.None[int64]() + } + return nil + } + + searchOpt.ProjectColumnID = convertID(opts.ProjectColumnID) + searchOpt.PosterID = convertID(opts.PosterID) + searchOpt.AssigneeID = convertID(opts.AssigneeID) + searchOpt.MentionID = convertID(opts.MentionedID) + searchOpt.ReviewedID = convertID(opts.ReviewedID) + searchOpt.ReviewRequestedID = convertID(opts.ReviewRequestedID) + searchOpt.SubscriberID = convertID(opts.SubscriberID) + + if opts.UpdatedAfterUnix > 0 { + searchOpt.UpdatedAfterUnix = optional.Some(opts.UpdatedAfterUnix) + } + if opts.UpdatedBeforeUnix > 0 { + searchOpt.UpdatedBeforeUnix = optional.Some(opts.UpdatedBeforeUnix) + } + + searchOpt.Paginator = opts.Paginator + + switch opts.SortType { + case "", "latest": + searchOpt.SortBy = SortByCreatedDesc + case "oldest": + searchOpt.SortBy = SortByCreatedAsc + case "recentupdate": + searchOpt.SortBy = SortByUpdatedDesc + case "leastupdate": + searchOpt.SortBy = SortByUpdatedAsc + case "mostcomment": + searchOpt.SortBy = SortByCommentsDesc + case "leastcomment": + searchOpt.SortBy = SortByCommentsAsc + case "nearduedate": + searchOpt.SortBy = SortByDeadlineAsc + case "farduedate": + searchOpt.SortBy = SortByDeadlineDesc + case "priority", "priorityrepo", "project-column-sorting": + // Unsupported sort type for search + fallthrough + default: + searchOpt.SortBy = SortByUpdatedDesc + } + + return searchOpt +} diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go new file mode 100644 index 00000000..42e709a5 --- /dev/null +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -0,0 +1,290 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package elasticsearch + +import ( + "context" + "fmt" + "strconv" + "strings" + + "code.gitea.io/gitea/modules/graceful" + indexer_internal "code.gitea.io/gitea/modules/indexer/internal" + inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" + "code.gitea.io/gitea/modules/indexer/issues/internal" + + "github.com/olivere/elastic/v7" +) + +const ( + issueIndexerLatestVersion = 1 + // multi-match-types, currently only 2 types are used + // Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types + esMultiMatchTypeBestFields = "best_fields" + esMultiMatchTypePhrasePrefix = "phrase_prefix" +) + +var _ internal.Indexer = &Indexer{} + +// Indexer implements Indexer interface +type Indexer struct { + inner *inner_elasticsearch.Indexer + indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much +} + +// NewIndexer creates a new elasticsearch indexer +func NewIndexer(url, indexerName string) *Indexer { + inner := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping) + indexer := &Indexer{ + inner: inner, + Indexer: inner, + } + return indexer +} + +const ( + defaultMapping = ` +{ + "mappings": { + "properties": { + "id": { "type": "long", "index": true }, + "repo_id": { "type": "long", "index": true }, + "is_public": { "type": "boolean", "index": true }, + + "title": { "type": "text", "index": true }, + "content": { "type": "text", "index": true }, + "comments": { "type" : "text", "index": true }, + + "is_pull": { "type": "boolean", "index": true }, + "is_closed": { "type": "boolean", "index": true }, + "label_ids": { "type": "long", "index": true }, + "no_label": { "type": "boolean", "index": true }, + "milestone_id": { "type": "long", "index": true }, + "project_id": { "type": "long", "index": true }, + "project_board_id": { "type": "long", "index": true }, + "poster_id": { "type": "long", "index": true }, + "assignee_id": { "type": "long", "index": true }, + "mention_ids": { "type": "long", "index": true }, + "reviewed_ids": { "type": "long", "index": true }, + "review_requested_ids": { "type": "long", "index": true }, + "subscriber_ids": { "type": "long", "index": true }, + "updated_unix": { "type": "long", "index": true }, + + "created_unix": { "type": "long", "index": true }, + "deadline_unix": { "type": "long", "index": true }, + "comment_count": { "type": "long", "index": true } + } + } +} +` +) + +// Index will save the index data +func (b *Indexer) Index(ctx context.Context, issues ...*internal.IndexerData) error { + if len(issues) == 0 { + return nil + } else if len(issues) == 1 { + issue := issues[0] + _, err := b.inner.Client.Index(). + Index(b.inner.VersionedIndexName()). + Id(fmt.Sprintf("%d", issue.ID)). + BodyJson(issue). + Do(ctx) + return err + } + + reqs := make([]elastic.BulkableRequest, 0) + for _, issue := range issues { + reqs = append(reqs, + elastic.NewBulkIndexRequest(). + Index(b.inner.VersionedIndexName()). + Id(fmt.Sprintf("%d", issue.ID)). + Doc(issue), + ) + } + + _, err := b.inner.Client.Bulk(). + Index(b.inner.VersionedIndexName()). + Add(reqs...). + Do(graceful.GetManager().HammerContext()) + return err +} + +// Delete deletes indexes by ids +func (b *Indexer) Delete(ctx context.Context, ids ...int64) error { + if len(ids) == 0 { + return nil + } else if len(ids) == 1 { + _, err := b.inner.Client.Delete(). + Index(b.inner.VersionedIndexName()). + Id(fmt.Sprintf("%d", ids[0])). + Do(ctx) + return err + } + + reqs := make([]elastic.BulkableRequest, 0) + for _, id := range ids { + reqs = append(reqs, + elastic.NewBulkDeleteRequest(). + Index(b.inner.VersionedIndexName()). + Id(fmt.Sprintf("%d", id)), + ) + } + + _, err := b.inner.Client.Bulk(). + Index(b.inner.VersionedIndexName()). + Add(reqs...). + Do(graceful.GetManager().HammerContext()) + return err +} + +// Search searches for issues by given conditions. +// Returns the matching issue IDs +func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) { + query := elastic.NewBoolQuery() + + if options.Keyword != "" { + searchType := esMultiMatchTypePhrasePrefix + if options.IsFuzzyKeyword { + searchType = esMultiMatchTypeBestFields + } + + query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(searchType)) + } + + if len(options.RepoIDs) > 0 { + q := elastic.NewBoolQuery() + q.Should(elastic.NewTermsQuery("repo_id", toAnySlice(options.RepoIDs)...)) + if options.AllPublic { + q.Should(elastic.NewTermQuery("is_public", true)) + } + query.Must(q) + } + + if options.IsPull.Has() { + query.Must(elastic.NewTermQuery("is_pull", options.IsPull.Value())) + } + if options.IsClosed.Has() { + query.Must(elastic.NewTermQuery("is_closed", options.IsClosed.Value())) + } + + if options.NoLabelOnly { + query.Must(elastic.NewTermQuery("no_label", true)) + } else { + if len(options.IncludedLabelIDs) > 0 { + q := elastic.NewBoolQuery() + for _, labelID := range options.IncludedLabelIDs { + q.Must(elastic.NewTermQuery("label_ids", labelID)) + } + query.Must(q) + } else if len(options.IncludedAnyLabelIDs) > 0 { + query.Must(elastic.NewTermsQuery("label_ids", toAnySlice(options.IncludedAnyLabelIDs)...)) + } + if len(options.ExcludedLabelIDs) > 0 { + q := elastic.NewBoolQuery() + for _, labelID := range options.ExcludedLabelIDs { + q.MustNot(elastic.NewTermQuery("label_ids", labelID)) + } + query.Must(q) + } + } + + if len(options.MilestoneIDs) > 0 { + query.Must(elastic.NewTermsQuery("milestone_id", toAnySlice(options.MilestoneIDs)...)) + } + + if options.ProjectID.Has() { + query.Must(elastic.NewTermQuery("project_id", options.ProjectID.Value())) + } + if options.ProjectColumnID.Has() { + query.Must(elastic.NewTermQuery("project_board_id", options.ProjectColumnID.Value())) + } + + if options.PosterID.Has() { + query.Must(elastic.NewTermQuery("poster_id", options.PosterID.Value())) + } + + if options.AssigneeID.Has() { + query.Must(elastic.NewTermQuery("assignee_id", options.AssigneeID.Value())) + } + + if options.MentionID.Has() { + query.Must(elastic.NewTermQuery("mention_ids", options.MentionID.Value())) + } + + if options.ReviewedID.Has() { + query.Must(elastic.NewTermQuery("reviewed_ids", options.ReviewedID.Value())) + } + if options.ReviewRequestedID.Has() { + query.Must(elastic.NewTermQuery("review_requested_ids", options.ReviewRequestedID.Value())) + } + + if options.SubscriberID.Has() { + query.Must(elastic.NewTermQuery("subscriber_ids", options.SubscriberID.Value())) + } + + if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() { + q := elastic.NewRangeQuery("updated_unix") + if options.UpdatedAfterUnix.Has() { + q.Gte(options.UpdatedAfterUnix.Value()) + } + if options.UpdatedBeforeUnix.Has() { + q.Lte(options.UpdatedBeforeUnix.Value()) + } + query.Must(q) + } + + if options.SortBy == "" { + options.SortBy = internal.SortByCreatedAsc + } + sortBy := []elastic.Sorter{ + parseSortBy(options.SortBy), + elastic.NewFieldSort("id").Desc(), + } + + // See https://stackoverflow.com/questions/35206409/elasticsearch-2-1-result-window-is-too-large-index-max-result-window/35221900 + // TODO: make it configurable since it's configurable in elasticsearch + const maxPageSize = 10000 + + skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxPageSize) + searchResult, err := b.inner.Client.Search(). + Index(b.inner.VersionedIndexName()). + Query(query). + SortBy(sortBy...). + From(skip).Size(limit). + Do(ctx) + if err != nil { + return nil, err + } + + hits := make([]internal.Match, 0, limit) + for _, hit := range searchResult.Hits.Hits { + id, _ := strconv.ParseInt(hit.Id, 10, 64) + hits = append(hits, internal.Match{ + ID: id, + }) + } + + return &internal.SearchResult{ + Total: searchResult.TotalHits(), + Hits: hits, + }, nil +} + +func toAnySlice[T any](s []T) []any { + ret := make([]any, 0, len(s)) + for _, item := range s { + ret = append(ret, item) + } + return ret +} + +func parseSortBy(sortBy internal.SortBy) elastic.Sorter { + field := strings.TrimPrefix(string(sortBy), "-") + ret := elastic.NewFieldSort(field) + if strings.HasPrefix(string(sortBy), "-") { + ret.Desc() + } + return ret +} diff --git a/modules/indexer/issues/elasticsearch/elasticsearch_test.go b/modules/indexer/issues/elasticsearch/elasticsearch_test.go new file mode 100644 index 00000000..4ed0b844 --- /dev/null +++ b/modules/indexer/issues/elasticsearch/elasticsearch_test.go @@ -0,0 +1,48 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package elasticsearch + +import ( + "fmt" + "net/http" + "os" + "testing" + "time" + + "code.gitea.io/gitea/modules/indexer/issues/internal/tests" +) + +func TestElasticsearchIndexer(t *testing.T) { + // The elasticsearch instance started by testing.yml > test-unit > services > elasticsearch + url := "http://elastic:changeme@elasticsearch:9200" + + if os.Getenv("CI") == "" { + // Make it possible to run tests against a local elasticsearch instance + url = os.Getenv("TEST_ELASTICSEARCH_URL") + if url == "" { + t.Skip("TEST_ELASTICSEARCH_URL not set and not running in CI") + return + } + } + + ok := false + for i := 0; i < 60; i++ { + resp, err := http.Get(url) + if err == nil && resp.StatusCode == http.StatusOK { + ok = true + break + } + t.Logf("Waiting for elasticsearch to be up: %v", err) + time.Sleep(time.Second) + } + if !ok { + t.Fatalf("Failed to wait for elasticsearch to be up") + return + } + + indexer := NewIndexer(url, fmt.Sprintf("test_elasticsearch_indexer_%d", time.Now().Unix())) + defer indexer.Close() + + tests.TestIndexer(t, indexer) +} diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go new file mode 100644 index 00000000..d7310529 --- /dev/null +++ b/modules/indexer/issues/indexer.go @@ -0,0 +1,315 @@ +// Copyright 2018 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package issues + +import ( + "context" + "fmt" + "os" + "runtime/pprof" + "sync/atomic" + "time" + + db_model "code.gitea.io/gitea/models/db" + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer/issues/bleve" + "code.gitea.io/gitea/modules/indexer/issues/db" + "code.gitea.io/gitea/modules/indexer/issues/elasticsearch" + "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/indexer/issues/meilisearch" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/optional" + "code.gitea.io/gitea/modules/process" + "code.gitea.io/gitea/modules/queue" + "code.gitea.io/gitea/modules/setting" +) + +// IndexerMetadata is used to send data to the queue, so it contains only the ids. +// It may look weird, because it has to be compatible with the old queue data format. +// If the IsDelete flag is true, the IDs specify the issues to delete from the index without querying the database. +// If the IsDelete flag is false, the ID specify the issue to index, so Indexer will query the database to get the issue data. +// It should be noted that if the id is not existing in the database, it's index will be deleted too even if IsDelete is false. +// Valid values: +// - IsDelete = true, IDs = [1, 2, 3], and ID will be ignored +// - IsDelete = false, ID = 1, and IDs will be ignored +type IndexerMetadata struct { + ID int64 `json:"id"` + + IsDelete bool `json:"is_delete"` + IDs []int64 `json:"ids"` +} + +var ( + // issueIndexerQueue queue of issue ids to be updated + issueIndexerQueue *queue.WorkerPoolQueue[*IndexerMetadata] + // globalIndexer is the global indexer, it cannot be nil. + // When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready. + // So it's always safe use it as *globalIndexer.Load() and call its methods. + globalIndexer atomic.Pointer[internal.Indexer] + dummyIndexer *internal.Indexer +) + +func init() { + i := internal.NewDummyIndexer() + dummyIndexer = &i + globalIndexer.Store(dummyIndexer) +} + +// InitIssueIndexer initialize issue indexer, syncReindex is true then reindex until +// all issue index done. +func InitIssueIndexer(syncReindex bool) { + ctx, _, finished := process.GetManager().AddTypedContext(context.Background(), "Service: IssueIndexer", process.SystemProcessType, false) + + indexerInitWaitChannel := make(chan time.Duration, 1) + + // Create the Queue + issueIndexerQueue = queue.CreateUniqueQueue(ctx, "issue_indexer", getIssueIndexerQueueHandler(ctx)) + + graceful.GetManager().RunAtTerminate(finished) + + // Create the Indexer + go func() { + pprof.SetGoroutineLabels(ctx) + start := time.Now() + log.Info("PID %d: Initializing Issue Indexer: %s", os.Getpid(), setting.Indexer.IssueType) + var ( + issueIndexer internal.Indexer + existed bool + err error + ) + switch setting.Indexer.IssueType { + case "bleve": + defer func() { + if err := recover(); err != nil { + log.Error("PANIC whilst initializing issue indexer: %v\nStacktrace: %s", err, log.Stack(2)) + log.Error("The indexer files are likely corrupted and may need to be deleted") + log.Error("You can completely remove the %q directory to make Forgejo recreate the indexes", setting.Indexer.IssuePath) + globalIndexer.Store(dummyIndexer) + log.Fatal("PID: %d Unable to initialize the Bleve Issue Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.IssuePath, err) + } + }() + issueIndexer = bleve.NewIndexer(setting.Indexer.IssuePath) + existed, err = issueIndexer.Init(ctx) + if err != nil { + log.Fatal("Unable to initialize Bleve Issue Indexer at path: %s Error: %v", setting.Indexer.IssuePath, err) + } + case "elasticsearch": + issueIndexer = elasticsearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName) + existed, err = issueIndexer.Init(ctx) + if err != nil { + log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) + } + case "db": + issueIndexer = db.NewIndexer() + case "meilisearch": + issueIndexer = meilisearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName) + existed, err = issueIndexer.Init(ctx) + if err != nil { + log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) + } + default: + log.Fatal("Unknown issue indexer type: %s", setting.Indexer.IssueType) + } + globalIndexer.Store(&issueIndexer) + + graceful.GetManager().RunAtTerminate(func() { + log.Debug("Closing issue indexer") + (*globalIndexer.Load()).Close() + log.Info("PID: %d Issue Indexer closed", os.Getpid()) + }) + + // Start processing the queue + go graceful.GetManager().RunWithCancel(issueIndexerQueue) + + // Populate the index + if !existed { + if syncReindex { + graceful.GetManager().RunWithShutdownContext(populateIssueIndexer) + } else { + go graceful.GetManager().RunWithShutdownContext(populateIssueIndexer) + } + } + + indexerInitWaitChannel <- time.Since(start) + close(indexerInitWaitChannel) + }() + + if syncReindex { + select { + case <-indexerInitWaitChannel: + case <-graceful.GetManager().IsShutdown(): + } + } else if setting.Indexer.StartupTimeout > 0 { + go func() { + pprof.SetGoroutineLabels(ctx) + timeout := setting.Indexer.StartupTimeout + if graceful.GetManager().IsChild() && setting.GracefulHammerTime > 0 { + timeout += setting.GracefulHammerTime + } + select { + case duration := <-indexerInitWaitChannel: + log.Info("Issue Indexer Initialization took %v", duration) + case <-graceful.GetManager().IsShutdown(): + log.Warn("Shutdown occurred before issue index initialisation was complete") + case <-time.After(timeout): + issueIndexerQueue.ShutdownWait(5 * time.Second) + log.Fatal("Issue Indexer Initialization timed-out after: %v", timeout) + } + }() + } +} + +func getIssueIndexerQueueHandler(ctx context.Context) func(items ...*IndexerMetadata) []*IndexerMetadata { + return func(items ...*IndexerMetadata) []*IndexerMetadata { + var unhandled []*IndexerMetadata + + indexer := *globalIndexer.Load() + for _, item := range items { + log.Trace("IndexerMetadata Process: %d %v %t", item.ID, item.IDs, item.IsDelete) + if item.IsDelete { + if err := indexer.Delete(ctx, item.IDs...); err != nil { + log.Error("Issue indexer handler: failed to from index: %v Error: %v", item.IDs, err) + unhandled = append(unhandled, item) + } + continue + } + data, existed, err := getIssueIndexerData(ctx, item.ID) + if err != nil { + log.Error("Issue indexer handler: failed to get issue data of %d: %v", item.ID, err) + unhandled = append(unhandled, item) + continue + } + if !existed { + if err := indexer.Delete(ctx, item.ID); err != nil { + log.Error("Issue indexer handler: failed to delete issue %d from index: %v", item.ID, err) + unhandled = append(unhandled, item) + } + continue + } + if err := indexer.Index(ctx, data); err != nil { + log.Error("Issue indexer handler: failed to index issue %d: %v", item.ID, err) + unhandled = append(unhandled, item) + continue + } + } + + return unhandled + } +} + +// populateIssueIndexer populate the issue indexer with issue data +func populateIssueIndexer(ctx context.Context) { + ctx, _, finished := process.GetManager().AddTypedContext(ctx, "Service: PopulateIssueIndexer", process.SystemProcessType, true) + defer finished() + ctx = contextWithKeepRetry(ctx) // keep retrying since it's a background task + if err := PopulateIssueIndexer(ctx); err != nil { + log.Error("Issue indexer population failed: %v", err) + } +} + +func PopulateIssueIndexer(ctx context.Context) error { + for page := 1; ; page++ { + select { + case <-ctx.Done(): + return fmt.Errorf("shutdown before completion: %w", ctx.Err()) + default: + } + repos, _, err := repo_model.SearchRepositoryByName(ctx, &repo_model.SearchRepoOptions{ + ListOptions: db_model.ListOptions{Page: page, PageSize: repo_model.RepositoryListDefaultPageSize}, + OrderBy: db_model.SearchOrderByID, + Private: true, + Collaborate: optional.Some(false), + }) + if err != nil { + log.Error("SearchRepositoryByName: %v", err) + continue + } + if len(repos) == 0 { + log.Debug("Issue Indexer population complete") + return nil + } + + for _, repo := range repos { + if err := updateRepoIndexer(ctx, repo.ID); err != nil { + return fmt.Errorf("populate issue indexer for repo %d: %v", repo.ID, err) + } + } + } +} + +// UpdateRepoIndexer add/update all issues of the repositories +func UpdateRepoIndexer(ctx context.Context, repoID int64) { + if err := updateRepoIndexer(ctx, repoID); err != nil { + log.Error("Unable to push repo %d to issue indexer: %v", repoID, err) + } +} + +// UpdateIssueIndexer add/update an issue to the issue indexer +func UpdateIssueIndexer(ctx context.Context, issueID int64) { + if err := updateIssueIndexer(ctx, issueID); err != nil { + log.Error("Unable to push issue %d to issue indexer: %v", issueID, err) + } +} + +// DeleteRepoIssueIndexer deletes repo's all issues indexes +func DeleteRepoIssueIndexer(ctx context.Context, repoID int64) { + if err := deleteRepoIssueIndexer(ctx, repoID); err != nil { + log.Error("Unable to push deleted repo %d to issue indexer: %v", repoID, err) + } +} + +// IsAvailable checks if issue indexer is available +func IsAvailable(ctx context.Context) bool { + return (*globalIndexer.Load()).Ping(ctx) == nil +} + +// SearchOptions indicates the options for searching issues +type SearchOptions = internal.SearchOptions + +const ( + SortByCreatedDesc = internal.SortByCreatedDesc + SortByUpdatedDesc = internal.SortByUpdatedDesc + SortByCommentsDesc = internal.SortByCommentsDesc + SortByDeadlineDesc = internal.SortByDeadlineDesc + SortByCreatedAsc = internal.SortByCreatedAsc + SortByUpdatedAsc = internal.SortByUpdatedAsc + SortByCommentsAsc = internal.SortByCommentsAsc + SortByDeadlineAsc = internal.SortByDeadlineAsc +) + +// SearchIssues search issues by options. +func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, error) { + indexer := *globalIndexer.Load() + + if opts.Keyword == "" { + // This is a conservative shortcut. + // If the keyword is empty, db has better (at least not worse) performance to filter issues. + // When the keyword is empty, it tends to listing rather than searching issues. + // So if the user creates an issue and list issues immediately, the issue may not be listed because the indexer needs time to index the issue. + // Even worse, the external indexer like elastic search may not be available for a while, + // and the user may not be able to list issues completely until it is available again. + indexer = db.NewIndexer() + } + + result, err := indexer.Search(ctx, opts) + if err != nil { + return nil, 0, err + } + + ret := make([]int64, 0, len(result.Hits)) + for _, hit := range result.Hits { + ret = append(ret, hit.ID) + } + + return ret, result.Total, nil +} + +// CountIssues counts issues by options. It is a shortcut of SearchIssues(ctx, opts) but only returns the total count. +func CountIssues(ctx context.Context, opts *SearchOptions) (int64, error) { + opts = opts.Copy(func(options *SearchOptions) { options.Paginator = &db_model.ListOptions{PageSize: 0} }) + + _, total, err := SearchIssues(ctx, opts) + return total, err +} diff --git a/modules/indexer/issues/indexer_test.go b/modules/indexer/issues/indexer_test.go new file mode 100644 index 00000000..f43ddca0 --- /dev/null +++ b/modules/indexer/issues/indexer_test.go @@ -0,0 +1,404 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package issues + +import ( + "context" + "testing" + + "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/models/unittest" + "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/optional" + "code.gitea.io/gitea/modules/setting" + + _ "code.gitea.io/gitea/models" + _ "code.gitea.io/gitea/models/actions" + _ "code.gitea.io/gitea/models/activities" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMain(m *testing.M) { + unittest.MainTest(m) +} + +func TestDBSearchIssues(t *testing.T) { + require.NoError(t, unittest.PrepareTestDatabase()) + + setting.Indexer.IssueType = "db" + InitIssueIndexer(true) + + t.Run("search issues with keyword", searchIssueWithKeyword) + t.Run("search issues in repo", searchIssueInRepo) + t.Run("search issues by ID", searchIssueByID) + t.Run("search issues is pr", searchIssueIsPull) + t.Run("search issues is closed", searchIssueIsClosed) + t.Run("search issues by milestone", searchIssueByMilestoneID) + t.Run("search issues by label", searchIssueByLabelID) + t.Run("search issues by time", searchIssueByTime) + t.Run("search issues with order", searchIssueWithOrder) + t.Run("search issues in project", searchIssueInProject) + t.Run("search issues with paginator", searchIssueWithPaginator) +} + +func searchIssueWithKeyword(t *testing.T) { + tests := []struct { + opts SearchOptions + expectedIDs []int64 + }{ + { + SearchOptions{ + Keyword: "issue2", + RepoIDs: []int64{1}, + }, + []int64{2}, + }, + { + SearchOptions{ + Keyword: "first", + RepoIDs: []int64{1}, + }, + []int64{1}, + }, + { + SearchOptions{ + Keyword: "for", + RepoIDs: []int64{1}, + }, + []int64{11, 5, 3, 2, 1}, + }, + { + SearchOptions{ + Keyword: "good", + RepoIDs: []int64{1}, + }, + []int64{1}, + }, + } + + for _, test := range tests { + issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + require.NoError(t, err) + + assert.Equal(t, test.expectedIDs, issueIDs) + } +} + +func searchIssueInRepo(t *testing.T) { + tests := []struct { + opts SearchOptions + expectedIDs []int64 + }{ + { + SearchOptions{ + RepoIDs: []int64{1}, + }, + []int64{11, 5, 3, 2, 1}, + }, + { + SearchOptions{ + RepoIDs: []int64{2}, + }, + []int64{7, 4}, + }, + { + SearchOptions{ + RepoIDs: []int64{3}, + }, + []int64{12, 6}, + }, + { + SearchOptions{ + RepoIDs: []int64{4}, + }, + []int64{}, + }, + { + SearchOptions{ + RepoIDs: []int64{5}, + }, + []int64{15}, + }, + } + + for _, test := range tests { + issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + require.NoError(t, err) + + assert.Equal(t, test.expectedIDs, issueIDs) + } +} + +func searchIssueByID(t *testing.T) { + tests := []struct { + opts SearchOptions + expectedIDs []int64 + }{ + { + opts: SearchOptions{ + PosterID: optional.Some(int64(1)), + }, + expectedIDs: []int64{11, 6, 3, 2, 1}, + }, + { + opts: SearchOptions{ + AssigneeID: optional.Some(int64(1)), + }, + expectedIDs: []int64{6, 1}, + }, + { + opts: SearchOptions{ + MentionID: optional.Some(int64(4)), + }, + expectedIDs: []int64{1}, + }, + { + opts: SearchOptions{ + ReviewedID: optional.Some(int64(1)), + }, + expectedIDs: []int64{}, + }, + { + opts: SearchOptions{ + ReviewRequestedID: optional.Some(int64(1)), + }, + expectedIDs: []int64{12}, + }, + { + opts: SearchOptions{ + SubscriberID: optional.Some(int64(1)), + }, + expectedIDs: []int64{11, 6, 5, 3, 2, 1}, + }, + { + // issue 20 request user 15 and team 5 which user 15 belongs to + // the review request number of issue 20 should be 1 + opts: SearchOptions{ + ReviewRequestedID: optional.Some(int64(15)), + }, + expectedIDs: []int64{12, 20}, + }, + { + // user 20 approved the issue 20, so return nothing + opts: SearchOptions{ + ReviewRequestedID: optional.Some(int64(20)), + }, + expectedIDs: []int64{}, + }, + } + + for _, test := range tests { + issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + require.NoError(t, err) + assert.Equal(t, test.expectedIDs, issueIDs) + } +} + +func searchIssueIsPull(t *testing.T) { + tests := []struct { + opts SearchOptions + expectedIDs []int64 + }{ + { + SearchOptions{ + IsPull: optional.Some(false), + }, + []int64{17, 16, 15, 14, 13, 6, 5, 18, 10, 7, 4, 1}, + }, + { + SearchOptions{ + IsPull: optional.Some(true), + }, + []int64{22, 21, 12, 11, 20, 19, 9, 8, 3, 2}, + }, + } + for _, test := range tests { + issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + require.NoError(t, err) + + assert.Equal(t, test.expectedIDs, issueIDs) + } +} + +func searchIssueIsClosed(t *testing.T) { + tests := []struct { + opts SearchOptions + expectedIDs []int64 + }{ + { + SearchOptions{ + IsClosed: optional.Some(false), + }, + []int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 19, 18, 10, 7, 9, 8, 3, 2, 1}, + }, + { + SearchOptions{ + IsClosed: optional.Some(true), + }, + []int64{5, 4}, + }, + } + for _, test := range tests { + issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + require.NoError(t, err) + assert.Equal(t, test.expectedIDs, issueIDs) + } +} + +func searchIssueByMilestoneID(t *testing.T) { + tests := []struct { + opts SearchOptions + expectedIDs []int64 + }{ + { + SearchOptions{ + MilestoneIDs: []int64{1}, + }, + []int64{2}, + }, + { + SearchOptions{ + MilestoneIDs: []int64{3}, + }, + []int64{3}, + }, + } + for _, test := range tests { + issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + require.NoError(t, err) + + assert.Equal(t, test.expectedIDs, issueIDs) + } +} + +func searchIssueByLabelID(t *testing.T) { + tests := []struct { + opts SearchOptions + expectedIDs []int64 + }{ + { + SearchOptions{ + IncludedLabelIDs: []int64{1}, + }, + []int64{2, 1}, + }, + { + SearchOptions{ + IncludedLabelIDs: []int64{4}, + }, + []int64{2}, + }, + { + SearchOptions{ + ExcludedLabelIDs: []int64{1}, + }, + []int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3}, + }, + } + for _, test := range tests { + issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + require.NoError(t, err) + + assert.Equal(t, test.expectedIDs, issueIDs) + } +} + +func searchIssueByTime(t *testing.T) { + tests := []struct { + opts SearchOptions + expectedIDs []int64 + }{ + { + SearchOptions{ + UpdatedAfterUnix: optional.Some(int64(0)), + }, + []int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1}, + }, + } + for _, test := range tests { + issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + require.NoError(t, err) + + assert.Equal(t, test.expectedIDs, issueIDs) + } +} + +func searchIssueWithOrder(t *testing.T) { + tests := []struct { + opts SearchOptions + expectedIDs []int64 + }{ + { + SearchOptions{ + SortBy: internal.SortByCreatedAsc, + }, + []int64{1, 2, 3, 8, 9, 4, 7, 10, 18, 19, 5, 6, 20, 11, 12, 13, 14, 15, 16, 17, 21, 22}, + }, + } + for _, test := range tests { + issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + require.NoError(t, err) + + assert.Equal(t, test.expectedIDs, issueIDs) + } +} + +func searchIssueInProject(t *testing.T) { + tests := []struct { + opts SearchOptions + expectedIDs []int64 + }{ + { + SearchOptions{ + ProjectID: optional.Some(int64(1)), + }, + []int64{5, 3, 2, 1}, + }, + { + SearchOptions{ + ProjectColumnID: optional.Some(int64(1)), + }, + []int64{1}, + }, + { + SearchOptions{ + ProjectColumnID: optional.Some(int64(0)), // issue with in default column + }, + []int64{2}, + }, + } + for _, test := range tests { + issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + require.NoError(t, err) + + assert.Equal(t, test.expectedIDs, issueIDs) + } +} + +func searchIssueWithPaginator(t *testing.T) { + tests := []struct { + opts SearchOptions + expectedIDs []int64 + expectedTotal int64 + }{ + { + SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + }, + []int64{22, 21, 17, 16, 15}, + 22, + }, + } + for _, test := range tests { + issueIDs, total, err := SearchIssues(context.TODO(), &test.opts) + require.NoError(t, err) + + assert.Equal(t, test.expectedIDs, issueIDs) + assert.Equal(t, test.expectedTotal, total) + } +} diff --git a/modules/indexer/issues/internal/indexer.go b/modules/indexer/issues/internal/indexer.go new file mode 100644 index 00000000..95740bc5 --- /dev/null +++ b/modules/indexer/issues/internal/indexer.go @@ -0,0 +1,42 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +import ( + "context" + "fmt" + + "code.gitea.io/gitea/modules/indexer/internal" +) + +// Indexer defines an interface to indexer issues contents +type Indexer interface { + internal.Indexer + Index(ctx context.Context, issue ...*IndexerData) error + Delete(ctx context.Context, ids ...int64) error + Search(ctx context.Context, options *SearchOptions) (*SearchResult, error) +} + +// NewDummyIndexer returns a dummy indexer +func NewDummyIndexer() Indexer { + return &dummyIndexer{ + Indexer: internal.NewDummyIndexer(), + } +} + +type dummyIndexer struct { + internal.Indexer +} + +func (d *dummyIndexer) Index(_ context.Context, _ ...*IndexerData) error { + return fmt.Errorf("indexer is not ready") +} + +func (d *dummyIndexer) Delete(_ context.Context, _ ...int64) error { + return fmt.Errorf("indexer is not ready") +} + +func (d *dummyIndexer) Search(_ context.Context, _ *SearchOptions) (*SearchResult, error) { + return nil, fmt.Errorf("indexer is not ready") +} diff --git a/modules/indexer/issues/internal/model.go b/modules/indexer/issues/internal/model.go new file mode 100644 index 00000000..2dfee8b7 --- /dev/null +++ b/modules/indexer/issues/internal/model.go @@ -0,0 +1,150 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +import ( + "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/modules/optional" + "code.gitea.io/gitea/modules/timeutil" +) + +// IndexerData data stored in the issue indexer +type IndexerData struct { + ID int64 `json:"id"` + RepoID int64 `json:"repo_id"` + IsPublic bool `json:"is_public"` // If the repo is public + + // Fields used for keyword searching + Title string `json:"title"` + Content string `json:"content"` + Comments []string `json:"comments"` + + // Fields used for filtering + IsPull bool `json:"is_pull"` + IsClosed bool `json:"is_closed"` + LabelIDs []int64 `json:"label_ids"` + NoLabel bool `json:"no_label"` // True if LabelIDs is empty + MilestoneID int64 `json:"milestone_id"` + ProjectID int64 `json:"project_id"` + ProjectColumnID int64 `json:"project_board_id"` // the key should be kept as project_board_id to keep compatible + PosterID int64 `json:"poster_id"` + AssigneeID int64 `json:"assignee_id"` + MentionIDs []int64 `json:"mention_ids"` + ReviewedIDs []int64 `json:"reviewed_ids"` + ReviewRequestedIDs []int64 `json:"review_requested_ids"` + SubscriberIDs []int64 `json:"subscriber_ids"` + UpdatedUnix timeutil.TimeStamp `json:"updated_unix"` + + // Fields used for sorting + // UpdatedUnix is both used for filtering and sorting. + // ID is used for sorting too, to make the sorting stable. + CreatedUnix timeutil.TimeStamp `json:"created_unix"` + DeadlineUnix timeutil.TimeStamp `json:"deadline_unix"` + CommentCount int64 `json:"comment_count"` +} + +// Match represents on search result +type Match struct { + ID int64 `json:"id"` + Score float64 `json:"score"` +} + +// SearchResult represents search results +type SearchResult struct { + Total int64 + Hits []Match +} + +// SearchOptions represents search options. +// +// It has a slightly different design from database query options. +// In database query options, a field is never a pointer, so it could be confusing when it's zero value: +// Do you want to find data with a field value of 0, or do you not specify the field in the options? +// To avoid this confusion, db introduced db.NoConditionID(-1). +// So zero value means the field is not specified in the search options, and db.NoConditionID means "== 0" or "id NOT IN (SELECT id FROM ...)" +// It's still not ideal, it trapped developers many times. +// And sometimes -1 could be a valid value, like issue ID, negative numbers indicate exclusion. +// Since db.NoConditionID is for "db" (the package name is db), it makes sense not to use it in the indexer: +// Why do bleve/elasticsearch/meilisearch indexers need to know about db.NoConditionID? +// So in SearchOptions, we use pointer for fields which could be not specified, +// and always use the value to filter if it's not nil, even if it's zero or negative. +// It can handle almost all cases, if there is an exception, we can add a new field, like NoLabelOnly. +// Unfortunately, we still use db for the indexer and have to convert between db.NoConditionID and nil for legacy reasons. +type SearchOptions struct { + Keyword string // keyword to search + + IsFuzzyKeyword bool // if false the levenshtein distance is 0 + + RepoIDs []int64 // repository IDs which the issues belong to + AllPublic bool // if include all public repositories + + IsPull optional.Option[bool] // if the issues is a pull request + IsClosed optional.Option[bool] // if the issues is closed + + IncludedLabelIDs []int64 // labels the issues have + ExcludedLabelIDs []int64 // labels the issues don't have + IncludedAnyLabelIDs []int64 // labels the issues have at least one. It will be ignored if IncludedLabelIDs is not empty. It's an uncommon filter, but it has been supported accidentally by issues.IssuesOptions.IncludedLabelNames. + NoLabelOnly bool // if the issues have no label, if true, IncludedLabelIDs and ExcludedLabelIDs, IncludedAnyLabelIDs will be ignored + + MilestoneIDs []int64 // milestones the issues have + + ProjectID optional.Option[int64] // project the issues belong to + ProjectColumnID optional.Option[int64] // project column the issues belong to + + PosterID optional.Option[int64] // poster of the issues + + AssigneeID optional.Option[int64] // assignee of the issues, zero means no assignee + + MentionID optional.Option[int64] // mentioned user of the issues + + ReviewedID optional.Option[int64] // reviewer of the issues + ReviewRequestedID optional.Option[int64] // requested reviewer of the issues + + SubscriberID optional.Option[int64] // subscriber of the issues + + UpdatedAfterUnix optional.Option[int64] + UpdatedBeforeUnix optional.Option[int64] + + Paginator *db.ListOptions + + SortBy SortBy // sort by field +} + +// Copy returns a copy of the options. +// Be careful, it's not a deep copy, so `SearchOptions.RepoIDs = {...}` is OK while `SearchOptions.RepoIDs[0] = ...` is not. +func (o *SearchOptions) Copy(edit ...func(options *SearchOptions)) *SearchOptions { + if o == nil { + return nil + } + v := *o + for _, e := range edit { + e(&v) + } + return &v +} + +type SortBy string + +const ( + SortByCreatedDesc SortBy = "-created_unix" + SortByUpdatedDesc SortBy = "-updated_unix" + SortByCommentsDesc SortBy = "-comment_count" + SortByDeadlineDesc SortBy = "-deadline_unix" + SortByCreatedAsc SortBy = "created_unix" + SortByUpdatedAsc SortBy = "updated_unix" + SortByCommentsAsc SortBy = "comment_count" + SortByDeadlineAsc SortBy = "deadline_unix" + // Unsupported sort types which are supported by issues.IssuesOptions.SortType: + // + // - "priorityrepo": + // It's impossible to support it in the indexer. + // It is based on the specified repository in the request, so we cannot add static field to the indexer. + // If we do something like that query the issues in the specified repository first then append other issues, + // it will break the pagination. + // + // - "project-column-sorting": + // Although it's possible to support it by adding project.ProjectIssue.Sorting to the indexer, + // but what if the issue belongs to multiple projects? + // Since it's unsupported to search issues with keyword in project page, we don't need to support it. +) diff --git a/modules/indexer/issues/internal/tests/tests.go b/modules/indexer/issues/internal/tests/tests.go new file mode 100644 index 00000000..a93b2913 --- /dev/null +++ b/modules/indexer/issues/internal/tests/tests.go @@ -0,0 +1,771 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +// This package contains tests for issues indexer modules. +// All the code in this package is only used for testing. +// Do not put any production code in this package to avoid it being included in the final binary. + +package tests + +import ( + "context" + "fmt" + "slices" + "testing" + "time" + + "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/optional" + "code.gitea.io/gitea/modules/timeutil" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestIndexer(t *testing.T, indexer internal.Indexer) { + _, err := indexer.Init(context.Background()) + require.NoError(t, err) + + require.NoError(t, indexer.Ping(context.Background())) + + var ( + ids []int64 + data = map[int64]*internal.IndexerData{} + ) + { + d := generateDefaultIndexerData() + for _, v := range d { + ids = append(ids, v.ID) + data[v.ID] = v + } + require.NoError(t, indexer.Index(context.Background(), d...)) + require.NoError(t, waitData(indexer, int64(len(data)))) + } + + defer func() { + require.NoError(t, indexer.Delete(context.Background(), ids...)) + }() + + for _, c := range cases { + t.Run(c.Name, func(t *testing.T) { + if len(c.ExtraData) > 0 { + require.NoError(t, indexer.Index(context.Background(), c.ExtraData...)) + for _, v := range c.ExtraData { + data[v.ID] = v + } + require.NoError(t, waitData(indexer, int64(len(data)))) + defer func() { + for _, v := range c.ExtraData { + require.NoError(t, indexer.Delete(context.Background(), v.ID)) + delete(data, v.ID) + } + require.NoError(t, waitData(indexer, int64(len(data)))) + }() + } + + result, err := indexer.Search(context.Background(), c.SearchOptions) + require.NoError(t, err) + + if c.Expected != nil { + c.Expected(t, data, result) + } else { + ids := make([]int64, 0, len(result.Hits)) + for _, hit := range result.Hits { + ids = append(ids, hit.ID) + } + assert.Equal(t, c.ExpectedIDs, ids) + assert.Equal(t, c.ExpectedTotal, result.Total) + } + + // test counting + c.SearchOptions.Paginator = &db.ListOptions{PageSize: 0} + countResult, err := indexer.Search(context.Background(), c.SearchOptions) + require.NoError(t, err) + assert.Empty(t, countResult.Hits) + assert.Equal(t, result.Total, countResult.Total) + }) + } +} + +var cases = []*testIndexerCase{ + { + Name: "default", + SearchOptions: &internal.SearchOptions{}, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Equal(t, len(data), len(result.Hits)) + assert.Equal(t, len(data), int(result.Total)) + }, + }, + { + Name: "empty", + SearchOptions: &internal.SearchOptions{ + Keyword: "f1dfac73-fda6-4a6b-b8a4-2408fcb8ef69", + }, + ExpectedIDs: []int64{}, + ExpectedTotal: 0, + }, + { + Name: "with limit", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + assert.Equal(t, len(data), int(result.Total)) + }, + }, + { + Name: "Keyword", + ExtraData: []*internal.IndexerData{ + {ID: 1000, Title: "hi hello world"}, + {ID: 1001, Content: "hi hello world"}, + {ID: 1002, Comments: []string{"hi", "hello world"}}, + }, + SearchOptions: &internal.SearchOptions{ + Keyword: "hello", + }, + ExpectedIDs: []int64{1002, 1001, 1000}, + ExpectedTotal: 3, + }, + { + Name: "Keyword Fuzzy", + ExtraData: []*internal.IndexerData{ + {ID: 1000, Title: "hi hello world"}, + {ID: 1001, Content: "hi hello world"}, + {ID: 1002, Comments: []string{"hi", "hello world"}}, + }, + SearchOptions: &internal.SearchOptions{ + Keyword: "hello world", + IsFuzzyKeyword: true, + }, + ExpectedIDs: []int64{1002, 1001, 1000}, + ExpectedTotal: 3, + }, + { + Name: "RepoIDs", + ExtraData: []*internal.IndexerData{ + {ID: 1001, Title: "hello world", RepoID: 1, IsPublic: false}, + {ID: 1002, Title: "hello world", RepoID: 1, IsPublic: false}, + {ID: 1003, Title: "hello world", RepoID: 2, IsPublic: true}, + {ID: 1004, Title: "hello world", RepoID: 2, IsPublic: true}, + {ID: 1005, Title: "hello world", RepoID: 3, IsPublic: true}, + {ID: 1006, Title: "hello world", RepoID: 4, IsPublic: false}, + {ID: 1007, Title: "hello world", RepoID: 5, IsPublic: false}, + }, + SearchOptions: &internal.SearchOptions{ + Keyword: "hello", + RepoIDs: []int64{1, 4}, + }, + ExpectedIDs: []int64{1006, 1002, 1001}, + ExpectedTotal: 3, + }, + { + Name: "RepoIDs and AllPublic", + ExtraData: []*internal.IndexerData{ + {ID: 1001, Title: "hello world", RepoID: 1, IsPublic: false}, + {ID: 1002, Title: "hello world", RepoID: 1, IsPublic: false}, + {ID: 1003, Title: "hello world", RepoID: 2, IsPublic: true}, + {ID: 1004, Title: "hello world", RepoID: 2, IsPublic: true}, + {ID: 1005, Title: "hello world", RepoID: 3, IsPublic: true}, + {ID: 1006, Title: "hello world", RepoID: 4, IsPublic: false}, + {ID: 1007, Title: "hello world", RepoID: 5, IsPublic: false}, + }, + SearchOptions: &internal.SearchOptions{ + Keyword: "hello", + RepoIDs: []int64{1, 4}, + AllPublic: true, + }, + ExpectedIDs: []int64{1006, 1005, 1004, 1003, 1002, 1001}, + ExpectedTotal: 6, + }, + { + Name: "issue only", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + IsPull: optional.Some(false), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.False(t, data[v.ID].IsPull) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return !v.IsPull }), result.Total) + }, + }, + { + Name: "pull only", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + IsPull: optional.Some(true), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.True(t, data[v.ID].IsPull) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return v.IsPull }), result.Total) + }, + }, + { + Name: "opened only", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + IsClosed: optional.Some(false), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.False(t, data[v.ID].IsClosed) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return !v.IsClosed }), result.Total) + }, + }, + { + Name: "closed only", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + IsClosed: optional.Some(true), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.True(t, data[v.ID].IsClosed) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return v.IsClosed }), result.Total) + }, + }, + { + Name: "labels", + ExtraData: []*internal.IndexerData{ + {ID: 1000, Title: "hello a", LabelIDs: []int64{2000, 2001, 2002}}, + {ID: 1001, Title: "hello b", LabelIDs: []int64{2000, 2001}}, + {ID: 1002, Title: "hello c", LabelIDs: []int64{2000, 2001, 2003}}, + {ID: 1003, Title: "hello d", LabelIDs: []int64{2000}}, + {ID: 1004, Title: "hello e", LabelIDs: []int64{}}, + }, + SearchOptions: &internal.SearchOptions{ + Keyword: "hello", + IncludedLabelIDs: []int64{2000, 2001}, + ExcludedLabelIDs: []int64{2003}, + }, + ExpectedIDs: []int64{1001, 1000}, + ExpectedTotal: 2, + }, + { + Name: "include any labels", + ExtraData: []*internal.IndexerData{ + {ID: 1000, Title: "hello a", LabelIDs: []int64{2000, 2001, 2002}}, + {ID: 1001, Title: "hello b", LabelIDs: []int64{2001}}, + {ID: 1002, Title: "hello c", LabelIDs: []int64{2000, 2001, 2003}}, + {ID: 1003, Title: "hello d", LabelIDs: []int64{2002}}, + {ID: 1004, Title: "hello e", LabelIDs: []int64{}}, + }, + SearchOptions: &internal.SearchOptions{ + Keyword: "hello", + IncludedAnyLabelIDs: []int64{2001, 2002}, + ExcludedLabelIDs: []int64{2003}, + }, + ExpectedIDs: []int64{1003, 1001, 1000}, + ExpectedTotal: 3, + }, + { + Name: "MilestoneIDs", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + MilestoneIDs: []int64{1, 2, 6}, + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.Contains(t, []int64{1, 2, 6}, data[v.ID].MilestoneID) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return v.MilestoneID == 1 || v.MilestoneID == 2 || v.MilestoneID == 6 + }), result.Total) + }, + }, + { + Name: "no MilestoneIDs", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + MilestoneIDs: []int64{0}, + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.Equal(t, int64(0), data[v.ID].MilestoneID) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return v.MilestoneID == 0 + }), result.Total) + }, + }, + { + Name: "ProjectID", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + ProjectID: optional.Some(int64(1)), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.Equal(t, int64(1), data[v.ID].ProjectID) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return v.ProjectID == 1 + }), result.Total) + }, + }, + { + Name: "no ProjectID", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + ProjectID: optional.Some(int64(0)), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.Equal(t, int64(0), data[v.ID].ProjectID) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return v.ProjectID == 0 + }), result.Total) + }, + }, + { + Name: "ProjectColumnID", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + ProjectColumnID: optional.Some(int64(1)), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.Equal(t, int64(1), data[v.ID].ProjectColumnID) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return v.ProjectColumnID == 1 + }), result.Total) + }, + }, + { + Name: "no ProjectColumnID", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + ProjectColumnID: optional.Some(int64(0)), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.Equal(t, int64(0), data[v.ID].ProjectColumnID) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return v.ProjectColumnID == 0 + }), result.Total) + }, + }, + { + Name: "PosterID", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + PosterID: optional.Some(int64(1)), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.Equal(t, int64(1), data[v.ID].PosterID) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return v.PosterID == 1 + }), result.Total) + }, + }, + { + Name: "AssigneeID", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + AssigneeID: optional.Some(int64(1)), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.Equal(t, int64(1), data[v.ID].AssigneeID) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return v.AssigneeID == 1 + }), result.Total) + }, + }, + { + Name: "no AssigneeID", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + AssigneeID: optional.Some(int64(0)), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.Equal(t, int64(0), data[v.ID].AssigneeID) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return v.AssigneeID == 0 + }), result.Total) + }, + }, + { + Name: "MentionID", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + MentionID: optional.Some(int64(1)), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.Contains(t, data[v.ID].MentionIDs, int64(1)) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return slices.Contains(v.MentionIDs, 1) + }), result.Total) + }, + }, + { + Name: "ReviewedID", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + ReviewedID: optional.Some(int64(1)), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.Contains(t, data[v.ID].ReviewedIDs, int64(1)) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return slices.Contains(v.ReviewedIDs, 1) + }), result.Total) + }, + }, + { + Name: "ReviewRequestedID", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + ReviewRequestedID: optional.Some(int64(1)), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.Contains(t, data[v.ID].ReviewRequestedIDs, int64(1)) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return slices.Contains(v.ReviewRequestedIDs, 1) + }), result.Total) + }, + }, + { + Name: "SubscriberID", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + SubscriberID: optional.Some(int64(1)), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.Contains(t, data[v.ID].SubscriberIDs, int64(1)) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return slices.Contains(v.SubscriberIDs, 1) + }), result.Total) + }, + }, + { + Name: "updated", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 5, + }, + UpdatedAfterUnix: optional.Some(int64(20)), + UpdatedBeforeUnix: optional.Some(int64(30)), + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 5) + for _, v := range result.Hits { + assert.GreaterOrEqual(t, data[v.ID].UpdatedUnix, int64(20)) + assert.LessOrEqual(t, data[v.ID].UpdatedUnix, int64(30)) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return data[v.ID].UpdatedUnix >= 20 && data[v.ID].UpdatedUnix <= 30 + }), result.Total) + }, + }, + { + Name: "SortByCreatedDesc", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptionsAll, + SortBy: internal.SortByCreatedDesc, + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Equal(t, len(data), len(result.Hits)) + assert.Equal(t, len(data), int(result.Total)) + for i, v := range result.Hits { + if i < len(result.Hits)-1 { + assert.GreaterOrEqual(t, data[v.ID].CreatedUnix, data[result.Hits[i+1].ID].CreatedUnix) + } + } + }, + }, + { + Name: "SortByUpdatedDesc", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptionsAll, + SortBy: internal.SortByUpdatedDesc, + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Equal(t, len(data), len(result.Hits)) + assert.Equal(t, len(data), int(result.Total)) + for i, v := range result.Hits { + if i < len(result.Hits)-1 { + assert.GreaterOrEqual(t, data[v.ID].UpdatedUnix, data[result.Hits[i+1].ID].UpdatedUnix) + } + } + }, + }, + { + Name: "SortByCommentsDesc", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptionsAll, + SortBy: internal.SortByCommentsDesc, + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Equal(t, len(data), len(result.Hits)) + assert.Equal(t, len(data), int(result.Total)) + for i, v := range result.Hits { + if i < len(result.Hits)-1 { + assert.GreaterOrEqual(t, data[v.ID].CommentCount, data[result.Hits[i+1].ID].CommentCount) + } + } + }, + }, + { + Name: "SortByDeadlineDesc", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptionsAll, + SortBy: internal.SortByDeadlineDesc, + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Equal(t, len(data), len(result.Hits)) + assert.Equal(t, len(data), int(result.Total)) + for i, v := range result.Hits { + if i < len(result.Hits)-1 { + assert.GreaterOrEqual(t, data[v.ID].DeadlineUnix, data[result.Hits[i+1].ID].DeadlineUnix) + } + } + }, + }, + { + Name: "SortByCreatedAsc", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptionsAll, + SortBy: internal.SortByCreatedAsc, + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Equal(t, len(data), len(result.Hits)) + assert.Equal(t, len(data), int(result.Total)) + for i, v := range result.Hits { + if i < len(result.Hits)-1 { + assert.LessOrEqual(t, data[v.ID].CreatedUnix, data[result.Hits[i+1].ID].CreatedUnix) + } + } + }, + }, + { + Name: "SortByUpdatedAsc", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptionsAll, + SortBy: internal.SortByUpdatedAsc, + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Equal(t, len(data), len(result.Hits)) + assert.Equal(t, len(data), int(result.Total)) + for i, v := range result.Hits { + if i < len(result.Hits)-1 { + assert.LessOrEqual(t, data[v.ID].UpdatedUnix, data[result.Hits[i+1].ID].UpdatedUnix) + } + } + }, + }, + { + Name: "SortByCommentsAsc", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptionsAll, + SortBy: internal.SortByCommentsAsc, + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Equal(t, len(data), len(result.Hits)) + assert.Equal(t, len(data), int(result.Total)) + for i, v := range result.Hits { + if i < len(result.Hits)-1 { + assert.LessOrEqual(t, data[v.ID].CommentCount, data[result.Hits[i+1].ID].CommentCount) + } + } + }, + }, + { + Name: "SortByDeadlineAsc", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptionsAll, + SortBy: internal.SortByDeadlineAsc, + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Equal(t, len(data), len(result.Hits)) + assert.Equal(t, len(data), int(result.Total)) + for i, v := range result.Hits { + if i < len(result.Hits)-1 { + assert.LessOrEqual(t, data[v.ID].DeadlineUnix, data[result.Hits[i+1].ID].DeadlineUnix) + } + } + }, + }, +} + +type testIndexerCase struct { + Name string + ExtraData []*internal.IndexerData + + SearchOptions *internal.SearchOptions + + Expected func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) // if nil, use ExpectedIDs, ExpectedTotal + ExpectedIDs []int64 + ExpectedTotal int64 +} + +func generateDefaultIndexerData() []*internal.IndexerData { + var id int64 + var data []*internal.IndexerData + for repoID := int64(1); repoID <= 10; repoID++ { + for issueIndex := int64(1); issueIndex <= 20; issueIndex++ { + id++ + + comments := make([]string, id%4) + for i := range comments { + comments[i] = fmt.Sprintf("comment%d", i) + } + + labelIDs := make([]int64, id%5) + for i := range labelIDs { + labelIDs[i] = int64(i) + 1 // LabelID should not be 0 + } + mentionIDs := make([]int64, id%6) + for i := range mentionIDs { + mentionIDs[i] = int64(i) + 1 // MentionID should not be 0 + } + reviewedIDs := make([]int64, id%7) + for i := range reviewedIDs { + reviewedIDs[i] = int64(i) + 1 // ReviewID should not be 0 + } + reviewRequestedIDs := make([]int64, id%8) + for i := range reviewRequestedIDs { + reviewRequestedIDs[i] = int64(i) + 1 // ReviewRequestedID should not be 0 + } + subscriberIDs := make([]int64, id%9) + for i := range subscriberIDs { + subscriberIDs[i] = int64(i) + 1 // SubscriberID should not be 0 + } + + data = append(data, &internal.IndexerData{ + ID: id, + RepoID: repoID, + IsPublic: repoID%2 == 0, + Title: fmt.Sprintf("issue%d of repo%d", issueIndex, repoID), + Content: fmt.Sprintf("content%d", issueIndex), + Comments: comments, + IsPull: issueIndex%2 == 0, + IsClosed: issueIndex%3 == 0, + LabelIDs: labelIDs, + NoLabel: len(labelIDs) == 0, + MilestoneID: issueIndex % 4, + ProjectID: issueIndex % 5, + ProjectColumnID: issueIndex % 6, + PosterID: id%10 + 1, // PosterID should not be 0 + AssigneeID: issueIndex % 10, + MentionIDs: mentionIDs, + ReviewedIDs: reviewedIDs, + ReviewRequestedIDs: reviewRequestedIDs, + SubscriberIDs: subscriberIDs, + UpdatedUnix: timeutil.TimeStamp(id + issueIndex), + CreatedUnix: timeutil.TimeStamp(id), + DeadlineUnix: timeutil.TimeStamp(id + issueIndex + repoID), + CommentCount: int64(len(comments)), + }) + } + } + + return data +} + +func countIndexerData(data map[int64]*internal.IndexerData, f func(v *internal.IndexerData) bool) int64 { + var count int64 + for _, v := range data { + if f(v) { + count++ + } + } + return count +} + +// waitData waits for the indexer to index all data. +// Some engines like Elasticsearch index data asynchronously, so we need to wait for a while. +func waitData(indexer internal.Indexer, total int64) error { + var actual int64 + for i := 0; i < 100; i++ { + result, err := indexer.Search(context.Background(), &internal.SearchOptions{ + Paginator: &db.ListOptions{ + PageSize: 0, + }, + }) + if err != nil { + return err + } + actual = result.Total + if actual == total { + return nil + } + time.Sleep(100 * time.Millisecond) + } + return fmt.Errorf("waitData: expected %d, actual %d", total, actual) +} diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go new file mode 100644 index 00000000..93323193 --- /dev/null +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -0,0 +1,301 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package meilisearch + +import ( + "context" + "errors" + "fmt" + "strconv" + "strings" + + indexer_internal "code.gitea.io/gitea/modules/indexer/internal" + inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch" + "code.gitea.io/gitea/modules/indexer/issues/internal" + + "github.com/meilisearch/meilisearch-go" +) + +const ( + issueIndexerLatestVersion = 3 + + // TODO: make this configurable if necessary + maxTotalHits = 10000 +) + +// ErrMalformedResponse is never expected as we initialize the indexer ourself and so define the types. +var ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content") + +var _ internal.Indexer = &Indexer{} + +// Indexer implements Indexer interface +type Indexer struct { + inner *inner_meilisearch.Indexer + indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much +} + +// NewIndexer creates a new meilisearch indexer +func NewIndexer(url, apiKey, indexerName string) *Indexer { + settings := &meilisearch.Settings{ + // The default ranking rules of meilisearch are: ["words", "typo", "proximity", "attribute", "sort", "exactness"] + // So even if we specify the sort order, it could not be respected because the priority of "sort" is so low. + // So we need to specify the ranking rules to make sure the sort order is respected. + // See https://www.meilisearch.com/docs/learn/core_concepts/relevancy + RankingRules: []string{"sort", // make sure "sort" has the highest priority + "words", "typo", "proximity", "attribute", "exactness"}, + + SearchableAttributes: []string{ + "title", + "content", + "comments", + }, + DisplayedAttributes: []string{ + "id", + "title", + "content", + "comments", + }, + FilterableAttributes: []string{ + "repo_id", + "is_public", + "is_pull", + "is_closed", + "label_ids", + "no_label", + "milestone_id", + "project_id", + "project_board_id", + "poster_id", + "assignee_id", + "mention_ids", + "reviewed_ids", + "review_requested_ids", + "subscriber_ids", + "updated_unix", + }, + SortableAttributes: []string{ + "updated_unix", + "created_unix", + "deadline_unix", + "comment_count", + "id", + }, + Pagination: &meilisearch.Pagination{ + MaxTotalHits: maxTotalHits, + }, + } + + inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName, issueIndexerLatestVersion, settings) + indexer := &Indexer{ + inner: inner, + Indexer: inner, + } + return indexer +} + +// Index will save the index data +func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error { + if len(issues) == 0 { + return nil + } + for _, issue := range issues { + _, err := b.inner.Client.Index(b.inner.VersionedIndexName()).AddDocuments(issue) + if err != nil { + return err + } + } + // TODO: bulk send index data + return nil +} + +// Delete deletes indexes by ids +func (b *Indexer) Delete(_ context.Context, ids ...int64) error { + if len(ids) == 0 { + return nil + } + + for _, id := range ids { + _, err := b.inner.Client.Index(b.inner.VersionedIndexName()).DeleteDocument(strconv.FormatInt(id, 10)) + if err != nil { + return err + } + } + // TODO: bulk send deletes + return nil +} + +// Search searches for issues by given conditions. +// Returns the matching issue IDs +func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) { + query := inner_meilisearch.FilterAnd{} + + if len(options.RepoIDs) > 0 { + q := &inner_meilisearch.FilterOr{} + q.Or(inner_meilisearch.NewFilterIn("repo_id", options.RepoIDs...)) + if options.AllPublic { + q.Or(inner_meilisearch.NewFilterEq("is_public", true)) + } + query.And(q) + } + + if options.IsPull.Has() { + query.And(inner_meilisearch.NewFilterEq("is_pull", options.IsPull.Value())) + } + if options.IsClosed.Has() { + query.And(inner_meilisearch.NewFilterEq("is_closed", options.IsClosed.Value())) + } + + if options.NoLabelOnly { + query.And(inner_meilisearch.NewFilterEq("no_label", true)) + } else { + if len(options.IncludedLabelIDs) > 0 { + q := &inner_meilisearch.FilterAnd{} + for _, labelID := range options.IncludedLabelIDs { + q.And(inner_meilisearch.NewFilterEq("label_ids", labelID)) + } + query.And(q) + } else if len(options.IncludedAnyLabelIDs) > 0 { + query.And(inner_meilisearch.NewFilterIn("label_ids", options.IncludedAnyLabelIDs...)) + } + if len(options.ExcludedLabelIDs) > 0 { + q := &inner_meilisearch.FilterAnd{} + for _, labelID := range options.ExcludedLabelIDs { + q.And(inner_meilisearch.NewFilterNot(inner_meilisearch.NewFilterEq("label_ids", labelID))) + } + query.And(q) + } + } + + if len(options.MilestoneIDs) > 0 { + query.And(inner_meilisearch.NewFilterIn("milestone_id", options.MilestoneIDs...)) + } + + if options.ProjectID.Has() { + query.And(inner_meilisearch.NewFilterEq("project_id", options.ProjectID.Value())) + } + if options.ProjectColumnID.Has() { + query.And(inner_meilisearch.NewFilterEq("project_board_id", options.ProjectColumnID.Value())) + } + + if options.PosterID.Has() { + query.And(inner_meilisearch.NewFilterEq("poster_id", options.PosterID.Value())) + } + + if options.AssigneeID.Has() { + query.And(inner_meilisearch.NewFilterEq("assignee_id", options.AssigneeID.Value())) + } + + if options.MentionID.Has() { + query.And(inner_meilisearch.NewFilterEq("mention_ids", options.MentionID.Value())) + } + + if options.ReviewedID.Has() { + query.And(inner_meilisearch.NewFilterEq("reviewed_ids", options.ReviewedID.Value())) + } + if options.ReviewRequestedID.Has() { + query.And(inner_meilisearch.NewFilterEq("review_requested_ids", options.ReviewRequestedID.Value())) + } + + if options.SubscriberID.Has() { + query.And(inner_meilisearch.NewFilterEq("subscriber_ids", options.SubscriberID.Value())) + } + + if options.UpdatedAfterUnix.Has() { + query.And(inner_meilisearch.NewFilterGte("updated_unix", options.UpdatedAfterUnix.Value())) + } + if options.UpdatedBeforeUnix.Has() { + query.And(inner_meilisearch.NewFilterLte("updated_unix", options.UpdatedBeforeUnix.Value())) + } + + if options.SortBy == "" { + options.SortBy = internal.SortByCreatedAsc + } + sortBy := []string{ + parseSortBy(options.SortBy), + "id:desc", + } + + skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits) + + counting := limit == 0 + if counting { + // If set limit to 0, it will be 20 by default, and -1 is not allowed. + // See https://www.meilisearch.com/docs/reference/api/search#limit + // So set limit to 1 to make the cost as low as possible, then clear the result before returning. + limit = 1 + } + + keyword := options.Keyword + if !options.IsFuzzyKeyword { + // to make it non fuzzy ("typo tolerance" in meilisearch terms), we have to quote the keyword(s) + // https://www.meilisearch.com/docs/reference/api/search#phrase-search + keyword = doubleQuoteKeyword(keyword) + } + + searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(keyword, &meilisearch.SearchRequest{ + Filter: query.Statement(), + Limit: int64(limit), + Offset: int64(skip), + Sort: sortBy, + MatchingStrategy: "all", + }) + if err != nil { + return nil, err + } + + if counting { + searchRes.Hits = nil + } + + hits, err := convertHits(searchRes) + if err != nil { + return nil, err + } + + return &internal.SearchResult{ + Total: searchRes.EstimatedTotalHits, + Hits: hits, + }, nil +} + +func parseSortBy(sortBy internal.SortBy) string { + field := strings.TrimPrefix(string(sortBy), "-") + if strings.HasPrefix(string(sortBy), "-") { + return field + ":desc" + } + return field + ":asc" +} + +func doubleQuoteKeyword(k string) string { + kp := strings.Split(k, " ") + parts := 0 + for i := range kp { + part := strings.Trim(kp[i], "\"") + if part != "" { + kp[parts] = fmt.Sprintf(`"%s"`, part) + parts++ + } + } + return strings.Join(kp[:parts], " ") +} + +func convertHits(searchRes *meilisearch.SearchResponse) ([]internal.Match, error) { + hits := make([]internal.Match, 0, len(searchRes.Hits)) + for _, hit := range searchRes.Hits { + hit, ok := hit.(map[string]any) + if !ok { + return nil, ErrMalformedResponse + } + + issueID, ok := hit["id"].(float64) + if !ok { + return nil, ErrMalformedResponse + } + + hits = append(hits, internal.Match{ + ID: int64(issueID), + }) + } + return hits, nil +} diff --git a/modules/indexer/issues/meilisearch/meilisearch_test.go b/modules/indexer/issues/meilisearch/meilisearch_test.go new file mode 100644 index 00000000..349102b7 --- /dev/null +++ b/modules/indexer/issues/meilisearch/meilisearch_test.go @@ -0,0 +1,97 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package meilisearch + +import ( + "fmt" + "net/http" + "os" + "testing" + "time" + + "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/indexer/issues/internal/tests" + + "github.com/meilisearch/meilisearch-go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMeilisearchIndexer(t *testing.T) { + t.Skip("meilisearch not found in Forgejo test yet") + // The meilisearch instance started by pull-db-tests.yml > test-unit > services > meilisearch + url := "http://meilisearch:7700" + key := "" // auth has been disabled in test environment + + if os.Getenv("CI") == "" { + // Make it possible to run tests against a local meilisearch instance + url = os.Getenv("TEST_MEILISEARCH_URL") + if url == "" { + t.Skip("TEST_MEILISEARCH_URL not set and not running in CI") + return + } + key = os.Getenv("TEST_MEILISEARCH_KEY") + } + + ok := false + for i := 0; i < 60; i++ { + resp, err := http.Get(url) + if err == nil && resp.StatusCode == http.StatusOK { + ok = true + break + } + t.Logf("Waiting for meilisearch to be up: %v", err) + time.Sleep(time.Second) + } + if !ok { + t.Fatalf("Failed to wait for meilisearch to be up") + return + } + + indexer := NewIndexer(url, key, fmt.Sprintf("test_meilisearch_indexer_%d", time.Now().Unix())) + defer indexer.Close() + + tests.TestIndexer(t, indexer) +} + +func TestConvertHits(t *testing.T) { + _, err := convertHits(&meilisearch.SearchResponse{ + Hits: []any{"aa", "bb", "cc", "dd"}, + }) + require.ErrorIs(t, err, ErrMalformedResponse) + + validResponse := &meilisearch.SearchResponse{ + Hits: []any{ + map[string]any{ + "id": float64(11), + "title": "a title", + "content": "issue body with no match", + "comments": []any{"hey what's up?", "I'm currently bowling", "nice"}, + }, + map[string]any{ + "id": float64(22), + "title": "Bowling as title", + "content": "", + "comments": []any{}, + }, + map[string]any{ + "id": float64(33), + "title": "Bowl-ing as fuzzy match", + "content": "", + "comments": []any{}, + }, + }, + } + hits, err := convertHits(validResponse) + require.NoError(t, err) + assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits) +} + +func TestDoubleQuoteKeyword(t *testing.T) { + assert.EqualValues(t, "", doubleQuoteKeyword("")) + assert.EqualValues(t, `"a" "b" "c"`, doubleQuoteKeyword("a b c")) + assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g")) + assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g")) + assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword(`a "" "d" """g`)) +} diff --git a/modules/indexer/issues/util.go b/modules/indexer/issues/util.go new file mode 100644 index 00000000..e752ae6f --- /dev/null +++ b/modules/indexer/issues/util.go @@ -0,0 +1,193 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package issues + +import ( + "context" + "errors" + "fmt" + + "code.gitea.io/gitea/models/db" + issue_model "code.gitea.io/gitea/models/issues" + "code.gitea.io/gitea/modules/container" + "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/queue" +) + +// getIssueIndexerData returns the indexer data of an issue and a bool value indicating whether the issue exists. +func getIssueIndexerData(ctx context.Context, issueID int64) (*internal.IndexerData, bool, error) { + issue, err := issue_model.GetIssueByID(ctx, issueID) + if err != nil { + if issue_model.IsErrIssueNotExist(err) { + return nil, false, nil + } + return nil, false, err + } + + // FIXME: what if users want to search for a review comment of a pull request? + // The comment type is CommentTypeCode or CommentTypeReview. + // But LoadDiscussComments only loads CommentTypeComment. + if err := issue.LoadDiscussComments(ctx); err != nil { + return nil, false, err + } + + comments := make([]string, 0, len(issue.Comments)) + for _, comment := range issue.Comments { + if comment.Content != "" { + // what ever the comment type is, index the content if it is not empty. + comments = append(comments, comment.Content) + } + } + + if err := issue.LoadAttributes(ctx); err != nil { + return nil, false, err + } + + labels := make([]int64, 0, len(issue.Labels)) + for _, label := range issue.Labels { + labels = append(labels, label.ID) + } + + mentionIDs, err := issue_model.GetIssueMentionIDs(ctx, issueID) + if err != nil { + return nil, false, err + } + + var ( + reviewedIDs []int64 + reviewRequestedIDs []int64 + ) + { + reviews, err := issue_model.FindReviews(ctx, issue_model.FindReviewOptions{ + ListOptions: db.ListOptionsAll, + IssueID: issueID, + OfficialOnly: false, + }) + if err != nil { + return nil, false, err + } + + reviewedIDsSet := make(container.Set[int64], len(reviews)) + reviewRequestedIDsSet := make(container.Set[int64], len(reviews)) + for _, review := range reviews { + if review.Type == issue_model.ReviewTypeRequest { + reviewRequestedIDsSet.Add(review.ReviewerID) + } else { + reviewedIDsSet.Add(review.ReviewerID) + } + } + reviewedIDs = reviewedIDsSet.Values() + reviewRequestedIDs = reviewRequestedIDsSet.Values() + } + + subscriberIDs, err := issue_model.GetIssueWatchersIDs(ctx, issue.ID, true) + if err != nil { + return nil, false, err + } + + var projectID int64 + if issue.Project != nil { + projectID = issue.Project.ID + } + + return &internal.IndexerData{ + ID: issue.ID, + RepoID: issue.RepoID, + IsPublic: !issue.Repo.IsPrivate, + Title: issue.Title, + Content: issue.Content, + Comments: comments, + IsPull: issue.IsPull, + IsClosed: issue.IsClosed, + LabelIDs: labels, + NoLabel: len(labels) == 0, + MilestoneID: issue.MilestoneID, + ProjectID: projectID, + ProjectColumnID: issue.ProjectColumnID(ctx), + PosterID: issue.PosterID, + AssigneeID: issue.AssigneeID, + MentionIDs: mentionIDs, + ReviewedIDs: reviewedIDs, + ReviewRequestedIDs: reviewRequestedIDs, + SubscriberIDs: subscriberIDs, + UpdatedUnix: issue.UpdatedUnix, + CreatedUnix: issue.CreatedUnix, + DeadlineUnix: issue.DeadlineUnix, + CommentCount: int64(len(issue.Comments)), + }, true, nil +} + +func updateRepoIndexer(ctx context.Context, repoID int64) error { + ids, err := issue_model.GetIssueIDsByRepoID(ctx, repoID) + if err != nil { + return fmt.Errorf("issue_model.GetIssueIDsByRepoID: %w", err) + } + for _, id := range ids { + if err := updateIssueIndexer(ctx, id); err != nil { + return err + } + } + return nil +} + +func updateIssueIndexer(ctx context.Context, issueID int64) error { + return pushIssueIndexerQueue(ctx, &IndexerMetadata{ID: issueID}) +} + +func deleteRepoIssueIndexer(ctx context.Context, repoID int64) error { + var ids []int64 + ids, err := issue_model.GetIssueIDsByRepoID(ctx, repoID) + if err != nil { + return fmt.Errorf("issue_model.GetIssueIDsByRepoID: %w", err) + } + + if len(ids) == 0 { + return nil + } + return pushIssueIndexerQueue(ctx, &IndexerMetadata{ + IDs: ids, + IsDelete: true, + }) +} + +type keepRetryKey struct{} + +// contextWithKeepRetry returns a context with a key indicating that the indexer should keep retrying. +// Please note that it's for background tasks only, and it should not be used for user requests, or it may cause blocking. +func contextWithKeepRetry(ctx context.Context) context.Context { + return context.WithValue(ctx, keepRetryKey{}, true) +} + +func pushIssueIndexerQueue(ctx context.Context, data *IndexerMetadata) error { + if issueIndexerQueue == nil { + // Some unit tests will trigger indexing, but the queue is not initialized. + // It's OK to ignore it, but log a warning message in case it's not a unit test. + log.Warn("Trying to push %+v to issue indexer queue, but the queue is not initialized, it's OK if it's a unit test", data) + return nil + } + + for { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + err := issueIndexerQueue.Push(data) + if errors.Is(err, queue.ErrAlreadyInQueue) { + return nil + } + if errors.Is(err, context.DeadlineExceeded) { // the queue is full + log.Warn("It seems that issue indexer is slow and the queue is full. Please check the issue indexer or increase the queue size.") + if ctx.Value(keepRetryKey{}) == nil { + return err + } + // It will be better to increase the queue size instead of retrying, but users may ignore the previous warning message. + // However, even it retries, it may still cause index loss when there's a deadline in the context. + log.Debug("Retry to push %+v to issue indexer queue", data) + continue + } + return err + } +} |