Adding upstream version 8.0.3.HEAD upstream/8.0.3 upstream debian

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-10-11 10:27:00 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-10-11 10:27:00 +0000
commit: 65aa53fc52ff15efe54df4147564828d535837f8 (patch)
tree: 31c51dad04fdcca80e6d3043c8bd49d2f1a51f83 /modules/indexer/issues/elasticsearch/elasticsearch.go
parent: Initial commit. (diff)
download: forgejo-debian.tar.xz
forgejo-debian.zip
1 files changed, 290 insertions, 0 deletions
diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go
new file mode 100644
index 00000000..42e709a5
--- /dev/null
+++ b/modules/indexer/issues/elasticsearch/elasticsearch.go
@@ -0,0 +1,290 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package elasticsearch
+
+import (
+	"context"
+	"fmt"
+	"strconv"
+	"strings"
+
+	"code.gitea.io/gitea/modules/graceful"
+	indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
+	inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
+	"code.gitea.io/gitea/modules/indexer/issues/internal"
+
+	"github.com/olivere/elastic/v7"
+)
+
+const (
+	issueIndexerLatestVersion = 1
+	// multi-match-types, currently only 2 types are used
+	// Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
+	esMultiMatchTypeBestFields   = "best_fields"
+	esMultiMatchTypePhrasePrefix = "phrase_prefix"
+)
+
+var _ internal.Indexer = &Indexer{}
+
+// Indexer implements Indexer interface
+type Indexer struct {
+	inner                    *inner_elasticsearch.Indexer
+	indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
+}
+
+// NewIndexer creates a new elasticsearch indexer
+func NewIndexer(url, indexerName string) *Indexer {
+	inner := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping)
+	indexer := &Indexer{
+		inner:   inner,
+		Indexer: inner,
+	}
+	return indexer
+}
+
+const (
+	defaultMapping = `
+{
+	"mappings": {
+		"properties": {
+			"id": { "type": "long", "index": true },
+			"repo_id": { "type": "long", "index": true },
+			"is_public": { "type": "boolean", "index": true },
+
+			"title": {  "type": "text", "index": true },
+			"content": { "type": "text", "index": true },
+			"comments": { "type" : "text", "index": true },
+
+			"is_pull": { "type": "boolean", "index": true },
+			"is_closed": { "type": "boolean", "index": true },
+			"label_ids": { "type": "long", "index": true },
+			"no_label": { "type": "boolean", "index": true },
+			"milestone_id": { "type": "long", "index": true },
+			"project_id": { "type": "long", "index": true },
+			"project_board_id": { "type": "long", "index": true },
+			"poster_id": { "type": "long", "index": true },
+			"assignee_id": { "type": "long", "index": true },
+			"mention_ids": { "type": "long", "index": true },
+			"reviewed_ids": { "type": "long", "index": true },
+			"review_requested_ids": { "type": "long", "index": true },
+			"subscriber_ids": { "type": "long", "index": true },
+			"updated_unix": { "type": "long", "index": true },
+
+			"created_unix": { "type": "long", "index": true },
+			"deadline_unix": { "type": "long", "index": true },
+			"comment_count": { "type": "long", "index": true }
+		}
+	}
+}
+`
+)
+
+// Index will save the index data
+func (b *Indexer) Index(ctx context.Context, issues ...*internal.IndexerData) error {
+	if len(issues) == 0 {
+		return nil
+	} else if len(issues) == 1 {
+		issue := issues[0]
+		_, err := b.inner.Client.Index().
+			Index(b.inner.VersionedIndexName()).
+			Id(fmt.Sprintf("%d", issue.ID)).
+			BodyJson(issue).
+			Do(ctx)
+		return err
+	}
+
+	reqs := make([]elastic.BulkableRequest, 0)
+	for _, issue := range issues {
+		reqs = append(reqs,
+			elastic.NewBulkIndexRequest().
+				Index(b.inner.VersionedIndexName()).
+				Id(fmt.Sprintf("%d", issue.ID)).
+				Doc(issue),
+		)
+	}
+
+	_, err := b.inner.Client.Bulk().
+		Index(b.inner.VersionedIndexName()).
+		Add(reqs...).
+		Do(graceful.GetManager().HammerContext())
+	return err
+}
+
+// Delete deletes indexes by ids
+func (b *Indexer) Delete(ctx context.Context, ids ...int64) error {
+	if len(ids) == 0 {
+		return nil
+	} else if len(ids) == 1 {
+		_, err := b.inner.Client.Delete().
+			Index(b.inner.VersionedIndexName()).
+			Id(fmt.Sprintf("%d", ids[0])).
+			Do(ctx)
+		return err
+	}
+
+	reqs := make([]elastic.BulkableRequest, 0)
+	for _, id := range ids {
+		reqs = append(reqs,
+			elastic.NewBulkDeleteRequest().
+				Index(b.inner.VersionedIndexName()).
+				Id(fmt.Sprintf("%d", id)),
+		)
+	}
+
+	_, err := b.inner.Client.Bulk().
+		Index(b.inner.VersionedIndexName()).
+		Add(reqs...).
+		Do(graceful.GetManager().HammerContext())
+	return err
+}
+
+// Search searches for issues by given conditions.
+// Returns the matching issue IDs
+func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
+	query := elastic.NewBoolQuery()
+
+	if options.Keyword != "" {
+		searchType := esMultiMatchTypePhrasePrefix
+		if options.IsFuzzyKeyword {
+			searchType = esMultiMatchTypeBestFields
+		}
+
+		query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(searchType))
+	}
+
+	if len(options.RepoIDs) > 0 {
+		q := elastic.NewBoolQuery()
+		q.Should(elastic.NewTermsQuery("repo_id", toAnySlice(options.RepoIDs)...))
+		if options.AllPublic {
+			q.Should(elastic.NewTermQuery("is_public", true))
+		}
+		query.Must(q)
+	}
+
+	if options.IsPull.Has() {
+		query.Must(elastic.NewTermQuery("is_pull", options.IsPull.Value()))
+	}
+	if options.IsClosed.Has() {
+		query.Must(elastic.NewTermQuery("is_closed", options.IsClosed.Value()))
+	}
+
+	if options.NoLabelOnly {
+		query.Must(elastic.NewTermQuery("no_label", true))
+	} else {
+		if len(options.IncludedLabelIDs) > 0 {
+			q := elastic.NewBoolQuery()
+			for _, labelID := range options.IncludedLabelIDs {
+				q.Must(elastic.NewTermQuery("label_ids", labelID))
+			}
+			query.Must(q)
+		} else if len(options.IncludedAnyLabelIDs) > 0 {
+			query.Must(elastic.NewTermsQuery("label_ids", toAnySlice(options.IncludedAnyLabelIDs)...))
+		}
+		if len(options.ExcludedLabelIDs) > 0 {
+			q := elastic.NewBoolQuery()
+			for _, labelID := range options.ExcludedLabelIDs {
+				q.MustNot(elastic.NewTermQuery("label_ids", labelID))
+			}
+			query.Must(q)
+		}
+	}
+
+	if len(options.MilestoneIDs) > 0 {
+		query.Must(elastic.NewTermsQuery("milestone_id", toAnySlice(options.MilestoneIDs)...))
+	}
+
+	if options.ProjectID.Has() {
+		query.Must(elastic.NewTermQuery("project_id", options.ProjectID.Value()))
+	}
+	if options.ProjectColumnID.Has() {
+		query.Must(elastic.NewTermQuery("project_board_id", options.ProjectColumnID.Value()))
+	}
+
+	if options.PosterID.Has() {
+		query.Must(elastic.NewTermQuery("poster_id", options.PosterID.Value()))
+	}
+
+	if options.AssigneeID.Has() {
+		query.Must(elastic.NewTermQuery("assignee_id", options.AssigneeID.Value()))
+	}
+
+	if options.MentionID.Has() {
+		query.Must(elastic.NewTermQuery("mention_ids", options.MentionID.Value()))
+	}
+
+	if options.ReviewedID.Has() {
+		query.Must(elastic.NewTermQuery("reviewed_ids", options.ReviewedID.Value()))
+	}
+	if options.ReviewRequestedID.Has() {
+		query.Must(elastic.NewTermQuery("review_requested_ids", options.ReviewRequestedID.Value()))
+	}
+
+	if options.SubscriberID.Has() {
+		query.Must(elastic.NewTermQuery("subscriber_ids", options.SubscriberID.Value()))
+	}
+
+	if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
+		q := elastic.NewRangeQuery("updated_unix")
+		if options.UpdatedAfterUnix.Has() {
+			q.Gte(options.UpdatedAfterUnix.Value())
+		}
+		if options.UpdatedBeforeUnix.Has() {
+			q.Lte(options.UpdatedBeforeUnix.Value())
+		}
+		query.Must(q)
+	}
+
+	if options.SortBy == "" {
+		options.SortBy = internal.SortByCreatedAsc
+	}
+	sortBy := []elastic.Sorter{
+		parseSortBy(options.SortBy),
+		elastic.NewFieldSort("id").Desc(),
+	}
+
+	// See https://stackoverflow.com/questions/35206409/elasticsearch-2-1-result-window-is-too-large-index-max-result-window/35221900
+	// TODO: make it configurable since it's configurable in elasticsearch
+	const maxPageSize = 10000
+
+	skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxPageSize)
+	searchResult, err := b.inner.Client.Search().
+		Index(b.inner.VersionedIndexName()).
+		Query(query).
+		SortBy(sortBy...).
+		From(skip).Size(limit).
+		Do(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	hits := make([]internal.Match, 0, limit)
+	for _, hit := range searchResult.Hits.Hits {
+		id, _ := strconv.ParseInt(hit.Id, 10, 64)
+		hits = append(hits, internal.Match{
+			ID: id,
+		})
+	}
+
+	return &internal.SearchResult{
+		Total: searchResult.TotalHits(),
+		Hits:  hits,
+	}, nil
+}
+
+func toAnySlice[T any](s []T) []any {
+	ret := make([]any, 0, len(s))
+	for _, item := range s {
+		ret = append(ret, item)
+	}
+	return ret
+}
+
+func parseSortBy(sortBy internal.SortBy) elastic.Sorter {
+	field := strings.TrimPrefix(string(sortBy), "-")
+	ret := elastic.NewFieldSort(field)
+	if strings.HasPrefix(string(sortBy), "-") {
+		ret.Desc()
+	}
+	return ret
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-10-11 10:27:00 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-10-11 10:27:00 +0000
commit	65aa53fc52ff15efe54df4147564828d535837f8 (patch)
tree	31c51dad04fdcca80e6d3043c8bd49d2f1a51f83 /modules/indexer/issues/elasticsearch/elasticsearch.go
parent	Initial commit. (diff)
download	forgejo-debian.tar.xz forgejo-debian.zip