1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_CHINESE_WHISPErS_ABSTRACT_Hh_
#ifdef DLIB_CHINESE_WHISPErS_ABSTRACT_Hh_
#include <vector>
#include "../rand.h"
#include "../graph_utils/ordered_sample_pair_abstract.h"
#include "../graph_utils/sample_pair_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
unsigned long chinese_whispers (
const std::vector<ordered_sample_pair>& edges,
std::vector<unsigned long>& labels,
const unsigned long num_iterations,
dlib::rand& rnd
);
/*!
requires
- is_ordered_by_index(edges) == true
ensures
- This function implements the graph clustering algorithm described in the
paper: Chinese Whispers - an Efficient Graph Clustering Algorithm and its
Application to Natural Language Processing Problems by Chris Biemann.
- Interprets edges as a directed graph. That is, it contains the edges on the
said graph and the ordered_sample_pair::distance() values define the edge
weights (larger values indicating a stronger edge connection between the
nodes). If an edge has a distance() value of infinity then it is considered
a "must link" edge.
- returns the number of clusters found.
- #labels.size() == max_index_plus_one(edges)
- for all valid i:
- #labels[i] == the cluster ID of the node with index i in the graph.
- 0 <= #labels[i] < the number of clusters found
(i.e. cluster IDs are assigned contiguously and start at 0)
- Duplicate edges are interpreted as if there had been just one edge with a
distance value equal to the sum of all the duplicate edge's distance values.
- The algorithm performs exactly num_iterations passes over the graph before
terminating.
!*/
// ----------------------------------------------------------------------------------------
unsigned long chinese_whispers (
const std::vector<sample_pair>& edges,
std::vector<unsigned long>& labels,
const unsigned long num_iterations,
dlib::rand& rnd
);
/*!
ensures
- This function is identical to the above chinese_whispers() routine except
that it operates on a vector of sample_pair objects instead of
ordered_sample_pairs. Therefore, this is simply a convenience routine. In
particular, it is implemented by transforming the given edges into
ordered_sample_pairs and then calling the chinese_whispers() routine defined
above.
!*/
// ----------------------------------------------------------------------------------------
unsigned long chinese_whispers (
const std::vector<ordered_sample_pair>& edges,
std::vector<unsigned long>& labels,
const unsigned long num_iterations = 100
);
/*!
requires
- is_ordered_by_index(edges) == true
ensures
- performs: return chinese_whispers(edges, labels, num_iterations, rnd)
where rnd is a default initialized dlib::rand object.
!*/
// ----------------------------------------------------------------------------------------
unsigned long chinese_whispers (
const std::vector<sample_pair>& edges,
std::vector<unsigned long>& labels,
const unsigned long num_iterations = 100
);
/*!
ensures
- performs: return chinese_whispers(edges, labels, num_iterations, rnd)
where rnd is a default initialized dlib::rand object.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_CHINESE_WHISPErS_ABSTRACT_Hh_
|