summaryrefslogtreecommitdiffstats
path: root/include/git2/sys/hashsig.h
blob: 09c19aec0751c5924a2c56ea708b76979a819107 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/*
 * Copyright (C) the libgit2 contributors. All rights reserved.
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */
#ifndef INCLUDE_sys_hashsig_h__
#define INCLUDE_sys_hashsig_h__

#include "git2/common.h"

GIT_BEGIN_DECL

/**
 * Similarity signature of arbitrary text content based on line hashes
 */
typedef struct git_hashsig git_hashsig;

/**
 * Options for hashsig computation
 *
 * The options GIT_HASHSIG_NORMAL, GIT_HASHSIG_IGNORE_WHITESPACE,
 * GIT_HASHSIG_SMART_WHITESPACE are exclusive and should not be combined.
 */
typedef enum {
	/**
	 * Use all data
	 */
	GIT_HASHSIG_NORMAL = 0,

	/**
	 * Ignore whitespace
	 */
	GIT_HASHSIG_IGNORE_WHITESPACE = (1 << 0),

	/**
	 * Ignore \r and all space after \n
	 */
	GIT_HASHSIG_SMART_WHITESPACE = (1 << 1),

	/**
	 * Allow hashing of small files
	 */
	GIT_HASHSIG_ALLOW_SMALL_FILES = (1 << 2)
} git_hashsig_option_t;

/**
 * Compute a similarity signature for a text buffer
 *
 * If you have passed the option GIT_HASHSIG_IGNORE_WHITESPACE, then the
 * whitespace will be removed from the buffer while it is being processed,
 * modifying the buffer in place. Sorry about that!
 *
 * @param out The computed similarity signature.
 * @param buf The input buffer.
 * @param buflen The input buffer size.
 * @param opts The signature computation options (see above).
 * @return 0 on success, GIT_EBUFS if the buffer doesn't contain enough data to
 * compute a valid signature (unless GIT_HASHSIG_ALLOW_SMALL_FILES is set), or
 * error code.
 */
GIT_EXTERN(int) git_hashsig_create(
	git_hashsig **out,
	const char *buf,
	size_t buflen,
	git_hashsig_option_t opts);

/**
 * Compute a similarity signature for a text file
 *
 * This walks through the file, only loading a maximum of 4K of file data at
 * a time. Otherwise, it acts just like `git_hashsig_create`.
 *
 * @param out The computed similarity signature.
 * @param path The path to the input file.
 * @param opts The signature computation options (see above).
 * @return 0 on success, GIT_EBUFS if the buffer doesn't contain enough data to
 * compute a valid signature (unless GIT_HASHSIG_ALLOW_SMALL_FILES is set), or
 * error code.
 */
GIT_EXTERN(int) git_hashsig_create_fromfile(
	git_hashsig **out,
	const char *path,
	git_hashsig_option_t opts);

/**
 * Release memory for a content similarity signature
 *
 * @param sig The similarity signature to free.
 */
GIT_EXTERN(void) git_hashsig_free(git_hashsig *sig);

/**
 * Measure similarity score between two similarity signatures
 *
 * @param a The first similarity signature to compare.
 * @param b The second similarity signature to compare.
 * @return [0 to 100] on success as the similarity score, or error code.
 */
GIT_EXTERN(int) git_hashsig_compare(
	const git_hashsig *a,
	const git_hashsig *b);

GIT_END_DECL

#endif