summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2020-04-25 04:33:52 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2020-04-25 04:33:52 +0000
commitbc13feb12b2c679b0e1d4f54d80d5019f6635d7b (patch)
tree9e8d9f823d6dd987c6f6041829d5ed26ea6c12a0
parentInitial commit. (diff)
downloadcluster-bc13feb12b2c679b0e1d4f54d80d5019f6635d7b.tar.xz
cluster-bc13feb12b2c679b0e1d4f54d80d5019f6635d7b.zip
Adding upstream version 2.1.0.upstream/2.1.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
-rw-r--r--ChangeLog1654
-rw-r--r--DESCRIPTION57
-rw-r--r--INDEX65
-rw-r--r--MD5144
-rw-r--r--NAMESPACE86
-rw-r--r--PORTING114
-rw-r--r--R/0aaa.R21
-rw-r--r--R/agnes.q183
-rw-r--r--R/clara.q201
-rw-r--r--R/clusGap.R174
-rw-r--r--R/clusGapGen.R141
-rw-r--r--R/coef.R43
-rw-r--r--R/daisy.q212
-rw-r--r--R/diana.q144
-rw-r--r--R/ellipsoidhull.R129
-rw-r--r--R/fanny.q241
-rw-r--r--R/internal.R39
-rw-r--r--R/mona.q111
-rw-r--r--R/pam.q207
-rw-r--r--R/plothier.q216
-rw-r--r--R/plotpart.q520
-rw-r--r--R/silhouette.R254
-rw-r--r--R/zzz.R7
-rw-r--r--README42
-rw-r--r--build/partial.rdbbin0 -> 46746 bytes
-rw-r--r--data/agriculture.tab13
-rw-r--r--data/animals.tab21
-rw-r--r--data/chorSub.rdabin0 -> 1385 bytes
-rw-r--r--data/flower.R21
-rw-r--r--data/plantTraits.rdabin0 -> 3440 bytes
-rw-r--r--data/pluton.tab46
-rw-r--r--data/ruspini.tab76
-rw-r--r--data/votes.repub.tab51
-rw-r--r--data/xclara.rdabin0 -> 36820 bytes
-rw-r--r--inst/CITATION26
-rw-r--r--inst/NEWS.Rd455
-rw-r--r--inst/po/de/LC_MESSAGES/R-cluster.mobin0 -> 12996 bytes
-rw-r--r--inst/po/de/LC_MESSAGES/cluster.mobin0 -> 1284 bytes
-rw-r--r--inst/po/en@quot/LC_MESSAGES/R-cluster.mobin0 -> 12414 bytes
-rw-r--r--inst/po/en@quot/LC_MESSAGES/cluster.mobin0 -> 1211 bytes
-rw-r--r--inst/po/fr/LC_MESSAGES/R-cluster.mobin0 -> 12322 bytes
-rw-r--r--inst/po/ko/LC_MESSAGES/R-cluster.mobin0 -> 14128 bytes
-rw-r--r--inst/po/ko/LC_MESSAGES/cluster.mobin0 -> 1052 bytes
-rw-r--r--inst/po/pl/LC_MESSAGES/R-cluster.mobin0 -> 12385 bytes
-rw-r--r--inst/test-tools.R10
-rw-r--r--man/agnes.Rd274
-rw-r--r--man/agnes.object.Rd90
-rw-r--r--man/agriculture.Rd49
-rw-r--r--man/animals.Rd42
-rw-r--r--man/bannerplot.Rd62
-rw-r--r--man/chorSub.Rd34
-rw-r--r--man/clara.Rd190
-rw-r--r--man/clara.object.Rd55
-rw-r--r--man/clusGap.Rd259
-rw-r--r--man/clusplot.default.Rd266
-rw-r--r--man/clusplot.partition.Rd69
-rw-r--r--man/cluster-internal.Rd13
-rw-r--r--man/coef.hclust.Rd62
-rw-r--r--man/daisy.Rd218
-rw-r--r--man/diana.Rd169
-rw-r--r--man/dissimilarity.object.Rd61
-rw-r--r--man/ellipsoidhull.Rd108
-rw-r--r--man/fanny.Rd146
-rw-r--r--man/fanny.object.Rd69
-rw-r--r--man/flower.Rd53
-rw-r--r--man/lower.to.upper.tri.inds.Rd34
-rw-r--r--man/mona.Rd99
-rw-r--r--man/mona.object.Rd44
-rw-r--r--man/pam.Rd211
-rw-r--r--man/pam.object.Rd80
-rw-r--r--man/partition.object.Rd68
-rw-r--r--man/plantTraits.Rd97
-rw-r--r--man/plot.agnes.Rd105
-rw-r--r--man/plot.diana.Rd83
-rw-r--r--man/plot.mona.Rd54
-rw-r--r--man/plot.partition.Rd104
-rw-r--r--man/pltree.Rd65
-rw-r--r--man/pluton.Rd50
-rw-r--r--man/predict.ellipsoid.Rd60
-rw-r--r--man/print.agnes.Rd24
-rw-r--r--man/print.clara.Rd25
-rw-r--r--man/print.diana.Rd24
-rw-r--r--man/print.dissimilarity.Rd44
-rw-r--r--man/print.fanny.Rd29
-rw-r--r--man/print.mona.Rd23
-rw-r--r--man/print.pam.Rd23
-rw-r--r--man/ruspini.Rd31
-rw-r--r--man/silhouette.Rd193
-rw-r--r--man/sizeDiss.Rd32
-rw-r--r--man/summary.agnes.Rd25
-rw-r--r--man/summary.clara.Rd42
-rw-r--r--man/summary.diana.Rd17
-rw-r--r--man/summary.mona.Rd16
-rw-r--r--man/summary.pam.Rd20
-rw-r--r--man/twins.object.Rd26
-rw-r--r--man/volume.ellipsoid.Rd43
-rw-r--r--man/votes.repub.Rd19
-rw-r--r--man/xclara.Rd66
-rw-r--r--po/R-cluster.pot296
-rw-r--r--po/R-de.po433
-rw-r--r--po/R-en@quot.po337
-rw-r--r--po/R-fr.po432
-rw-r--r--po/R-ko.po393
-rw-r--r--po/R-pl.po1144
-rw-r--r--po/cluster.pot54
-rw-r--r--po/de.po56
-rw-r--r--po/ko.po60
-rwxr-xr-xpo/update-me.sh29
-rw-r--r--src/clara.c1036
-rw-r--r--src/cluster.h198
-rw-r--r--src/daisy.f124
-rw-r--r--src/dysta.f56
-rw-r--r--src/fanny.c507
-rw-r--r--src/ind_2.h42
-rw-r--r--src/init.c98
-rw-r--r--src/mona.c313
-rw-r--r--src/pam.c1107
-rw-r--r--src/sildist.c79
-rw-r--r--src/spannel.c154
-rw-r--r--src/twins.c527
-rw-r--r--tests/agnes-ex.R89
-rw-r--r--tests/agnes-ex.Rout.save782
-rw-r--r--tests/clara-NAs.R47
-rw-r--r--tests/clara-NAs.Rout.save658
-rw-r--r--tests/clara-ex.R47
-rw-r--r--tests/clara.R108
-rw-r--r--tests/clara.Rout.save1495
-rw-r--r--tests/clusplot-out.R29
-rw-r--r--tests/clusplot-out.Rout.save123
-rw-r--r--tests/daisy-ex.R110
-rw-r--r--tests/daisy-ex.Rout.save929
-rw-r--r--tests/diana-boots.R32
-rw-r--r--tests/diana-ex.R31
-rw-r--r--tests/diana-ex.Rout.save547
-rw-r--r--tests/ellipsoid-ex.R34
-rw-r--r--tests/ellipsoid-ex.Rout.save224
-rw-r--r--tests/fanny-ex.R72
-rw-r--r--tests/mona.R82
-rw-r--r--tests/mona.Rout.save2809
-rw-r--r--tests/pam.R215
-rw-r--r--tests/pam.Rout.save1562
-rw-r--r--tests/silhouette-default.R83
-rw-r--r--tests/silhouette-default.Rout.save618
-rw-r--r--tests/sweep-ex.R69
-rw-r--r--tests/withAutoprint.R268
145 files changed, 28427 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..d16e165
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,1654 @@
+2014-03-27 Martin Maechler
+
+ * NOTE: All newer changes are summarized in the NEWS file
+ (./inst/NEWS.Rd) and detailed & timestamped in the svn logs.
+
+
+2014-03-26 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.15.2
+ * R/*.q updates to make messages translatable; proposed by Lukasz Daniel
+
+2014-03-12 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.15.1
+
+ * man/mona.Rd: anyNA2[cbind(<i>, <j>)] <- NA fails in R < 3.0.x
+
+2013-11-06 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.15.0; also using Authors@R
+
+ * R/agnes.q (agnes): method "gaverage" contributed by Pierre Roudier.
+
+2013-03-26 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.14.5, never released
+
+2013-03-26 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.14.4, released to CRAN, 2013-03-26
+ * po/R-de.po: trivial update from Detlef,
+
+2012-02-06 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.14.2, released to CRAN today
+ * R/clusGap.R (maxSE): with 5 methods to compute "maximal gap",
+ used also in print() method.
+
+2012-01-21 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/clusGap.R (print.clusGap, plot.clusGap): added
+
+2012-01-17 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/clusGap.R: new clusGap() for computing the cluster gap statistic.
+
+2011-12-21 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/coef.R (coefHier): newly export coefHier(), interfacing to C's bncoef().
+
+ * tests/diana-boots.R (dianaBoot): add a "bootstrap", for considerably more tests.
+ * tests/diana-ex.R: reorganized
+
+ * src/twins.c: newly translated from Fortran, for more future flexibility
+ * src/twins.f: no longer there
+
+ * R/agnes.q (as.dendrogram.twins): define for 'twins' rather than
+ just 'agnes'.
+
+2011-12-03 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/plotpart.q (clusplot.default): xlab, and ylab arguments;
+ new 's.x.2d' argument which allows to specify the plot setup in details.
+ (clas.snijpunt): also check for NAs in x[1:2,m].
+ Suggestions originally from Peter Ruckdeschel
+
+2011-10-16 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * po/: setup of for translation
+
+2011-09-14 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * inst/CITATION: update according to state-of-the-art.
+
+2011-08-29 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.14.1, released to CRAN, 2011-10-16
+
+ * R/silhouette.R (plot.silhouette): col = <one per cluster> had
+ ordering bug.
+ * man/silhouette.Rd: added example
+
+2011-06-07 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.14.0, released to CRAN, 2011-06-07
+
+ * R/plotpart.q (clusplot.default): use message() and warning()
+ appropriately instead of just cat(). Further, add a 'add = FALSE'
+ argument which allows *adding* ellipses to an existing plot.
+
+2011-06-06 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/plotpart.q (clusplot.default): new 'cex' and 'cex.txt', the
+ latter for the "point" labels.
+
+2011-05-20 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/pam.c (cl_pam): trace_lev: also trace the computation of
+ dissimilarities (a very expensive part, for large n).
+
+ * R/pam.q (pam): cluster.only=TRUE: if the *computed*
+ dissimilarities have NA's the necessary error was not signalled.
+
+2011-03-07 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * tests/*.Rout.save: update after R-devel's "scientific()" patch.
+ * DESCRIPTION (Version): 1.13.4, *not* released
+
+2011-02-21 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.13.3, released to CRAN today
+
+2010-12-12 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/ellipsoidhull.R, *: use "full argument names"
+
+2010-11-09 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/clara.q (clara): enforce sampsize > k {as the help file always said};
+ allow to disable 'DUP=FALSE' via env variable.
+
+2010-11-08 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/clara.c (cl_clara): inserting rand_k into nsel[]: simplified
+ and avoiding (typically inconsequential) out_of_bound array access.
+
+2010-10-21 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.13.2 ; Enhances: MASS; released to CRAN, 2010-11-10
+
+ * src/cluster.h, src/daisy.f, src/mona.f:
+ * src/init.c (FortEntries): s/cl_/cl/ for now,
+ as some f77/g77 setups cannot deal with it correctly.
+
+2010-06-24 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.13.1
+
+2010-06-23 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/daisy.Rd: add our *generalized* Gower formula (LaTeX), and
+ update the detailled description accordingly.
+
+2010-06-02 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.13.0
+
+ * src/daisy.f (cl_daisy): renamed from daisy
+ * src/init.c, src/*.[chf]: renamed others, so we can use
+ * NAMESPACE (useDynLib): .registration = TRUE
+
+2010-05-28 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/daisy.q (daisy): using variable weights (for gower).
+ * src/daisy.f: ditto
+
+2010-03-29 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.12.3
+ * R/plothier.q (bannerplot): do not use formals(pretty)
+
+2009-12-05 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/daisy.Rd: fix page number for Gower (1971) reference.
+
+2009-10-05 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.12.1
+
+ * man/ellipsoidhull.Rd: fix missing \link ((parts former 'graphics'
+ have long been moved to 'grDevices'.
+
+2009-05-13 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.12.0 -- released to CRAN
+
+ * R/silhouette.R (silhouette.default, print.summary.*, plot.*):
+ silhouette(x, *) and its methods now also work when x uses integer
+ codes different than 1:k. Previously, this could seg.fault.
+
+ * tests/silhouette-default.R: test it.
+
+
+2009-03-12 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/daisy.Rd: fix typo/thinko (thanks to Felix Mueller).
+
+2009-01-21 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/agnes.q (as.dendrogram.agnes): new utility
+ * NAMESPACE: exported
+
+2009-01-17 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/plot.agnes.Rd: link to pltree.twins() directly
+
+ * R/agnes.q (print.summary.agnes): output 'agnes' instead of `agnes'
+
+2009-01-07 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version, Date): 1.11.12 and CRAN-released
+
+2009-01-05 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/ellipsoidhull.Rd: fix for more stringent Rd parsing
+
+2008-09-16 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/ellipsoidhull.R (print.ellipsoid): and others: replace
+ backquotes ( ` ) by regular quotes.
+
+2008-06-15 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/silhouette.R (sortSilhouette): do *not* overwrite the
+ correctly reordered rownames: Fixes very long standing bug
+ which lead to wrong observation labels in silhouette plots.
+
+2008-04-10 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/zzz.R (.onUnload): add ..dynam.unload()ing
+
+2008-02-29 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Date, Version): 1.11.10 and release
+
+2007-12-19 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/pam.c (cstat, dark): move variable declarations to local context.
+ (bswap): add a few R_CheckUserInterrupt() calls
+
+ * R/pam.q (pam): new argument 'do.swap = TRUE'. Setting it to
+ FALSE allows to skip the more expensive "SWAP" phase.
+ * man/pam.Rd: (ditto)
+
+2007-12-18 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/ind_2.h (ind_2): for m := max(i,j) > 46342, (m-2)*(m-1) give
+ integer overflow which leads to a seg.fault. Now fixed.
+ * src/pam.c (bswap): slightly improve trace output.
+ * tests/pam.R, tests/pam.Rout.save: corresponding slight adaptions.
+
+ * man/pam.Rd: "large" and
+ * man/clara.Rd: "small" datasets; pam() works with thousands ...
+
+ * R/clara.q (clara): error out when users try clara(<dist>, ...).
+
+2007-09-28 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Date, Version): 1.11.9
+
+ * man/pam.Rd: in invisible example, do not use partially matched [[.
+
+2007-09-04 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/mona.f (mona): dummy initializations for "-Wall"
+ * src/fanny.c (fygur): dummy init 'lang'
+
+2007-08-23 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/daisy.f et al: de-tabify the fortran sources
+
+2007-06-05 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.11.7 (mainly for R-devel)
+
+2007-06-02 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/plot.diana.Rd, etc: s/dendrogramm/dendrogram/
+
+2007-05-31 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/plantTraits.Rd: add \encoding{latin1}
+
+2007-04-09 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.11.6 - needed
+ for R version >= 2.6.0.
+
+ * R/daisy.q (daisy): for 2-valued variables only warn about
+ non-specified type when n >= 10.
+
+2007-04-07 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/diana.q (diana): don't use as.double(.) and DUP=FALSE !
+
+2007-03-30 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/diana.Rd: merge man/diana.object.Rd into *one* help file
+
+ * DESCRIPTION (Version): 1.11.5 - for R 2.5.0
+
+2007-03-29 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/silhouette.R (silhouette.default): use x$clustering only when x is
+ a list.
+
+2006-12-11 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.11.4 -- ready
+
+ * tests/to-25x, tests/to-24x: different outputs for R 2.5.0 (unstable)
+ * tests/clara.Rout.save-R25x,..: and R-2.4.x
+
+ * tests/silhouette-default.R: use [as.integer(rownames(.)), ]
+ since we cannot guarantee rownames there.
+
+2006-12-05 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/fanny.c (fuzzy): 14 instead of 15 sign.digits for trace_lev >= 2
+ printing.
+
+ * R/clara.q (stop): message for NA-observations: only show first
+ dozen of observation numbers.
+ * tests/clara-NAs.R: test the above
+ * tests/clara-NAs.Rout.save: ditto
+
+2006-12-02 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.11.3 -- released to CRAN
+
+ * R/daisy.q (daisy): finish metric "gower", and test an example
+
+2006-12-01 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/daisy.Rd: more content about Gower; thanks to Gavin Simpson
+
+2006-11-23 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/daisy.q (daisy): metric = "gower" -- not yet finished
+
+2006-09-07 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.11.2 released
+
+ * src/init.c: register dysta3 as C, not Fortran; drop trailing "_"
+ ==> need to adapt these:
+ * src/fanny.c:
+ * src/cluster.h:
+ * tests/dysta-ex.R:
+ * tests/dysta-ex.Rout.save:
+
+2006-08-24 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.11.1 released to CRAN
+
+ * R/silhouette.R (sortSilhouette): make sure "iOrd" is there also
+ if "Ordered" is TRUE. (fixes plotting w/ col = 'clusterwise'.)
+
+2006-08-23 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * tests/mona.R: update (slightly extend) regression test
+ * tests/mona.Rout.save: consequence
+
+ * R/mona.q (mona): replace apply(<matrix>, 2, factor) with safer
+ code, to work correctly in R 2.4.0 and later (thanks to Brian).
+
+2006-06-12 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/plotpart.q (clusplot.default): new argument 'sub'; default is
+ unchanged, but you can suppress the subtitle.
+
+2006-05-17 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/fanny.c (fygur): a little more cleanup --
+ for compatibility with dark() in src/pam.c
+
+ * DESCRIPTION (Version): 1.11.0, to be released to CRAN
+
+2006-05-03 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Depends): require R >= 2.2.1 -- which should have
+ happened earlier, since registration of Fortran symbols failed before.
+
+2006-04-18 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/plantTraits.Rd: new dataset with many variables, including
+ all kinds of factors, provided by Jeanne Vallet.
+ * data/plantTraits.rda: useful for daisy() illustrations
+
+2006-04-12 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/fanny.q (fanny): several new options: metric = "SqEuclidean",
+ iniMem.p: initial membership matrix can be specified;
+ cluster.only, keep.diss, keep.data: for saving memory/speed.
+ warning when k.crisp < k.
+ printing of $membership is now in rounded percent
+ * man/fanny.Rd: document the above
+ * tests/fanny-ex.R: much extended
+
+2006-04-11 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/chorSub.Rd: new dataset;
+ * data/chorSub.Rd: useful for fanny() illustrations
+
+ * R/fanny.q (fanny): Dunn's partition coefficient (and its
+ normalization) now always use sum.. u.. ^ 2 (and not "^ memb.exp").
+ option 'trace.lev' for iteration monitoring
+ * src/fanny.c (fuzzy): (dito)
+
+2006-04-08 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/fanny.c: Translated from Fortran (fanny.f); potentially with
+ more options, etc.
+
+2006-03-20 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.10.5 for CRAN
+
+ * .rsync-exclude: new file useful since I moved my RCS archive to SVN
+ (using 'cvs2svn' and a perl script on 'svn-stat').
+
+2006-02-27 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/sildist.c (sildist): 2nd round (by Romain and me)
+
+2006-02-25 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/silhouette.R (silhouette.default): new C version in file
+ * src/sildist.c (sildist): provided by Romain Francois
+
+2006-02-04 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/ellipsoidhull.R (print.ellipsoid): also work without 'conv' slot
+
+ * man/ellipsoidhull.Rd: finally explain 'd2'; and \link to other
+ ellipse implementations.
+
+ * man/volume.ellipsoid.Rd: + example of 'ellipsoid' construction
+
+2006-01-31 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/bannerplot.Rd: (et al.): use \dQuote{} and \sQuote{}
+
+2006-01-27 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/pltree.twins.Rd: explain the as.hclust() dispatch; add
+ example, particularly for plot(as.dendrogram(.)).
+
+2006-01-26 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.10.4 -- released to CRAN
+
+2006-01-25 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/silhouette.R (silhouette.clara): added (for "full = TRUE" option)
+
+2006-01-07 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.10.3
+
+ * src/init.c: registering all (R-callable) C and Fortran routines
+
+ * src/cluster.h: declare the remaining callable routines
+
+2005-12-30 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * inst/CITATION: first version
+
+ * R/silhouette.R (plot.silhouette): 'nmax.lab' was not obeyed
+ properly (labels were drawn for large n when the
+ silhouette had rownames).
+
+ * man/silhouette.Rd: explain clara's silhouette and "full" one.
+
+2005-09-06 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/pluton.Rd: \source: new URL @ www.agoras.ua.ac.be
+ * man/clusplot.default.Rd: dito
+ * man/ellipsoidhull.Rd: ""
+ * README: ""
+
+2005-08-31 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.10.2 released to CRAN
+
+ * src/clara.c (bswap2): minor cosmetic
+
+2005-08-30 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/daisy.q (daisy): the case of a *binary*-only matrix (not
+ data frame), used to fail, now works fine, tested in
+ * tests/daisy-ex.R:
+
+ * src/twins.f: get rid of gfortran warnings
+ * src/mona.f: dito
+ * src/fanny.f: "
+ * src/meet.f: "
+
+2005-08-06 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * tests/clara*: adapt to slightly changed output
+
+ * src/clara.c (clara), (bswap2): better trace_lev printing;
+ code more in line with 'pam'.
+
+2005-08-05 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.10.2 {not released}
+
+ * R/clara.q (clara): no longer support deprecated 'keepdata';
+ also return 'i.med'; new argument 'medoids.x' allowing to
+ save even more memory; do not assign 'data' when
+ keep.data=FALSE.
+
+ * man/clara.Rd: + 'i.med' + 'medoids.x' - 'keepdata'
+ * man/clara.object.Rd: dito
+
+2005-07-19 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * tests/silhouette-default.R: if(FALSE) library(*, lib="..MM..")
+
+2005-06-30 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * tests/silhouette-default.R (isTRUE): need for R < 2.1.x
+
+2005-06-29 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Date):
+ * DESCRIPTION (Version): 1.10.1
+
+ * R/silhouette.R (plot.silhouette): format() clus.avg.widths more nicely
+ * R/silhouette.R (silhouette.default): don't use max.col() which
+ returns random.
+ * tests/silhouette-default.R: test only 3rd column difference
+ * tests/silhouette-default.Rout: update {for non-random sil*.default()}
+
+2005-06-20 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * tests/silhouette-default.R: added test for equivalence of
+ silhouette.default() and silhouette.partition()
+ * tests/silhouette-default.Rout.save: new output
+ --> The silhouette() results are *still* not always the same (-> fixed)
+
+ * R/silhouette.R (silhouette.default): if Nj := #{obs. in C_j} = 1
+ the silhouette width is 0, not 1, as in pam's original code.
+
+ * man/silhouette.Rd: document the case of 1-obs-clusters.
+
+2005-06-09 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.10.0
+
+2005-06-08 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/pam.q (pam): new argument 'trace.lev = 0'
+
+ * man/pam.object.Rd: pam() also return $medID
+
+ * src/pam.c (dark): take special care of empty clusters (ntt == 0)
+ * src/pam.c (pam): also work when medoids are not increasing.
+
+ * R/pam.q (pam): user-supplied 'medoids': even more checks
+
+2005-06-02 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * tests/fanny-ex.R: use new RNG (hence change example); add samples
+ of memb.exp -> 1
+
+ * DESCRIPTION (Author): new formulation for better auto-citation()
+
+ * src/fanny.f (fanny): 'r' ("membership exponent") becomes
+ user-specifiable argument; dito for 'maxit'
+
+ * R/fanny.q (fanny): new arguments 'memb.exp' ("membership exponent");
+ 'tol' and 'maxit'
+ * R/fanny.q (.print.fanny): utility to be used in both print.*()
+ methods; nicer, using formatC() and 'digits' explicitly.
+
+ * man/fanny.Rd: new arguments, see above
+
+ * man/fanny.object.Rd: mention new '$ objective' result component
+
+ * man/print.fanny.Rd: now also for [print.]summary.fanny();
+ mention new explicit 'digits' argument to print methods.
+
+2005-04-04 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.9.8
+
+ * R/daisy.q (print.dissimilarity): rewritten, now based on
+ print.hclust().
+
+ * R/zzz.R (gettextf): ..etc: define substitutes such as to allow
+ running in R < 2.1.0
+
+ * man/ellipsoidhull.Rd: fixed outdated "link[pkg]"
+ * man/clusplot.default.Rd: dito
+ * man/*.Rd : etc
+
+2005-03-22 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/clara.q (clara): start to 'rationalize' messages for future
+ translation.
+
+ * src/cluster.h: define the _(String) macro (for i18n), not yet used.
+
+2005-01-21 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/clara.Rd: get rid of iso-latin1 character
+
+2004-12-14 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/agnes.Rd: concept{UPGMA ..}
+
+2004-11-27 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.9.7 (not yet CRAN)
+
+ * R/plothier.q (bannerplot): ouse default 'main' and 'sub'
+ * man/bannerplot.Rd: + example
+
+ * R/coef.R (coef.hclust): Implement R function for
+ computing the agglomerative/divisive coefficient.
+ * man/coef.hclust.Rd: new; including example
+
+ * man/predict.ellipsoid.Rd: wrong link
+
+2004-11-26 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/twins.f (bncoef): drop unused argument 'ner'
+
+2004-11-20 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/clusplot.default.Rd: fix typo
+
+2004-10-28 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/predict.ellipsoid.Rd: add nice example
+
+2004-08-19 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (LazyData): yes
+
+2004-08-12 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.9.6
+ (Depends): added standard packages
+
+ * R/daisy.q (daisy): scale=TRUE didn't collaborate with NAs
+ (since 2003-11-29).
+ * tests/daisy-ex.R: add an example, testing the above
+
+
+2004-08-03 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.9.5 --> to CRAN
+
+2004-07-21 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/mona.Rd: drop an extraneous "}\n" that Kurt's new tools found
+ * man/twins.object.Rd: dito
+
+2004-07-06 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.9.5 (not yet released)
+
+ * man/pam.Rd: note on 'medoids'
+
+2004-07-02 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/pam.q: new argument 'medoids'
+ * man/pam.Rd: ex.with 'medoids'
+ * src/pam.c (bswap): new argument 'med_given'
+
+2004-06-29 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/pam.c (bswap): moved ammax/nmax computation into same loop
+ as beter[]. added many comments.
+ GOAL: Allow initial medoids user-specification.
+ >>> TODO: do the analogous thing in src/clara.c 's bswap2()
+
+2004-06-24 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.9.4 -- fix dysta() using jtmd[] problems
+ (thanks to valgrind and BDR).
+
+ * R/agnes.q (agnes): initialize 'jtmd' to length p in any case
+ * R/diana.q (diana): ditto
+ * R/fanny.q (fanny): "
+ * R/pam.q (pam): "
+
+ * R/daisy.q (daisy): pass 'mdata' to Fortran
+ * src/daisy.f (daisy): new "boolean" argument 'mdata'
+
+ * src/clara.c (clara): pass has_NA to dysta2() and don't use
+ jtmd[] and valmd[] if(!has_NA)
+ * src/cluster.h (dysta2): new arg 'has_NA'
+
+
+2004-06-18 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.9.3 {only change = below}
+ * src/ind_2.h (ind_2): use __inline__ only for gcc (__GNUC__)
+
+2004-06-11 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/clara.Rd: finish \note{} about 'rngR' (thanks to Christian H).
+
+2004-06-01 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/ind_2.h: new file for __inline__ definition of ind_2(),
+ #included from these two files:
+ * src/clara.c:
+ * src/pam.c:
+
+ * tests/agnes-ex.R: test for "weighted" == "flexible"(0.5)
+
+2004-05-28 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.9.2 (not yet released)
+
+ * R/agnes.q (agnes): New method = "flexible" (and 'par.method = ..')
+ * src/twins.f (averl): implementing "flexible" Lance-Williams formula.
+ * man/agnes.Rd:
+
+2004-05-25 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/silhouette.R (silhouette.default): give silhouette 0 (not NaN)
+ when a_i = b_i {thanks to example and suggestions by Witek Wolsky}.
+
+2004-04-26 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/pam.c: cosmetic efficiency improvement from Li Long
+
+2004-03-11 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.8.1 for R versions <= 1.8.1
+ 1.9.1 for R versions >= 1.9.0
+ -- released to CRAN
+
+ * src/clara.c (clara): fixed second NA-handling bug:
+ the test should be "nunfs+1 != jran"
+ Fixed "rngR = TRUE" bug: accidentally still used internal RNG.
+ Fixed 3rd bug if (lrg_sam) : need jk < n_sam for nsel[jk].
+ More & nicer printing for 'trace > 0'.
+
+ * R/clara.q (clara): more useful error msg for too many NAs (jstop=1)
+
+
+2004-03-10 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/clara.c (clara): finally fixed longstanding bug which could
+ cause wrong results and segmentation faults, instead of ending
+ with the following error:
+
+ * tests/clara-NAs.R: new file w/ former seg.fault
+
+ * src/cluster.h (ind_2): new function instead of F77_CALL(meet)
+ * src/clara.c (clara): defined here (and used).
+ * src/pam.c (pam): used here
+
+2004-03-09 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/clara.q (clara): sampsize = min(n, <old_default>)
+
+2004-03-08 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * tests/agnes-ex.R: do not use object.size(), since
+ * tests/diana-ex.R: it's different on 64 bit, e.g.
+
+ * R/agnes.q (agnes): \ when diss = TRUE,
+ * R/diana.q (diana): \ but 'x' is not a "dissimilarity",
+ * R/fanny.q (fanny): / try as.dist(x)
+ * R/pam.q (pam): / and be consistent in all 4 functions
+
+2004-01-23 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/silhouette.R (silhouette.default): do not require cluster
+ codes in 1:k (Murad Nayal <mn216@columbia.edu>).
+ * man/silhouette.Rd: accordingly; mention 2 <= k <= n-1
+
+2003-12-22 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.8.0 for R versions <= 1.8.1
+ 1.9.0 for R versions >= 1.9.0
+
+2003-12-19 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * NAMESPACE: Finally make a namespaced package
+
+2003-12-01 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/pam.q (print.summary.pam): readable indication *no* L/L*
+ clusters.
+ (print.pam): improve as well.
+
+ * man/pam.Rd: documenting `cluster.only'
+
+2003-11-29 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/daisy.q (daisy): better checking & handling if(stand)
+
+ * R/pam.q (pam): new argument `cluster.only = FALSE'. If true,
+ only return the clustering vector, no other info.
+
+ * src/pam.c: translated from src/pam.f and "cleaned";
+ `all_stats' := NOT(cluster_only).
+
+ * src/cluster.h: new pam.c related headers
+ * src/dysta.f (dysta): new file: part of former src/pam.f
+ * R/pam.q (pam): use .C
+
+2003-11-28 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/daisy.q (daisy): fix the new scale 0 code (from 11-17)
+
+2003-11-17 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.7.7 -- (not released)
+
+ * tests/daisy-ex.R: new test for this:
+
+ * R/daisy.q (daisy): scale 0 does not give NaN dissimilarities
+ anymore. Use pColl() in messages
+
+ * R/zzz.R (sQuote): & dQuote() for R version < 1.8
+
+2003-10-25 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/fanny.Rd: typo in the text-part of \deqn{}
+
+2003-09-24 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Date): Sept.24: 20th anniversary w/ Lilo! -- release
+
+ * man/plot.mona.Rd: yet another codoc
+
+2003-09-23 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.7.6 (for R 1.8.0)
+
+ * man/daisy.Rd: codoc difference for argument *defaults*
+ * man/ellipsoidhull.Rd: " " " "
+ * man/plot.mona.Rd: " " " "
+ * man/plot.diana.Rd: " " " "
+ use \method{..}{..} (more!) also for these:
+ * man/predict.ellipsoid.Rd
+ * man/silhouette.Rd:
+ * man/plot.agnes.Rd:
+ * man/plot.partition.Rd:
+ * man/ellipsoidhull.Rd:
+
+
+
+2003-09-03 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Date): -> release
+
+2003-08-27 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/flower.Rd: use V1-V8 in doc (since can't change the data).
+
+2003-08-19 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.7.5 -- not yet released
+
+2003-08-13 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ __ All these are thanks to Luke Tierney's checkUsagePackage() !! __
+
+ * R/silhouette.R (plot.silhouette): `clw'
+ (sortSilhouette): `clid' & `k'
+ (summary.silhouette): `n'
+ * R/ellipsoidhull.R (ellipsoidhull): `ina' unneeded
+ * R/plotpart.q (clusplot.default): extraneous if() in "funny case"
+
+2003-07-18 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Date): updated --> release to CRAN
+
+ * R/daisy.q (daisy): checking "> 2 levels" for binary vars
+ (gave wrong error when only 1 level; thanks to
+ Duncan.Mackay@flinders.edu.au).
+ Now allow "only 1 level" for binary vars
+
+2003-07-10 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Date): update; not released, but put up on FTP
+ * R/silhouette.R (sortSilhouette): keep ordering; use it to
+ * R/silhouette.R (plot.silhouette): order colors, allow cluster colors
+
+2003-07-09 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.7.4 -- not yet released
+
+ * R/daisy.q (daisy): better error message for invalid type
+ components; now also works for
+ * man/daisy.Rd: new example `dfl3'
+
+2003-06-10 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.7.3
+
+ * R/silhouette.R (silhouette.default): fix bugs for case "Nj == 1"
+ * tests/silhouette-default.Rout.save:
+ * tests/silhouette-default.R: new test for the above
+
+2003-06-04 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * tests/clara-ex.R and man/clara.Rd: add "drop = FALSE"
+
+2003-06-03 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/clara.Rd: the "simulation" example is now correct for any seed.
+ * tests/clara-ex.R: using "correct" ex. above
+
+2003-06-02 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/daisy.q (daisy): s/codes/unclass/
+
+ * tests/ellipsoid-ex.R: no need for try() anymore; use new RNG
+
+ * tests/clara.R:
+ * tests/clara-ex.R: better if(...) RNGdefault("1.6")
+ * tests/fanny-ex.R:
+ * tests/mona.R:
+
+2003-05-28 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.7.2
+
+ * R/plotpart.q: clusplot.partition(): try to find `x$diss' by
+ looking up x$call.
+
+2003-04-30 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.7.1
+
+ * man/pam.object.Rd: add example about assessing #{clusters} via
+ silhouette widths from Ch.Hennig.
+
+ * R/plotpart.q (plot.partition): new argument `dist'
+ It doesn't try a clusplot if `keep.diss' was FALSE and no `dist'
+ is specified.
+ * R/plotpart.q (clusplot.partition): new `dist' argument
+
+ * R/pam.q (pam): keep.diss passed wrong `jdyss' to Fortran!
+
+2003-04-08 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/plothier.q (pltree.twins): simplified label construction;
+ call plot( <hclust object> ) instead of plot.hclust().
+
+2003-03-26 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * tests/clara-ex.R: new, because of unclear non-reproducibility
+ * tests/clara.R: not example(clara) and smaller, moved some to above
+
+ * DESCRIPTION (Version): 1.7.0 (for R 1.7.0)
+ * DESCRIPTION (Depends): at least R 1.4
+ * R/zzz.R: requiring 1.4 needs much less
+
+ * R/zzz.R: define colSums() substitute for old R versions
+ * tests/ : updated *.Rout.save files
+ * .Rbuildignore: R/README-Splus is way outdated and irrelevant
+
+2003-03-17 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/0aaa.R: Make sure assigning class c("dissimilarity", "dist")
+ (had many cases where only "dissim.." was used!).
+ * R/*.q: assign (dissim.., dist) class
+
+2003-03-11 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/clara.q (clara): new argument `rngR' allowing to use R's RNG
+ instead of the primitive builtin randm().
+ * man/clara.Rd: example showing its use.
+ * src/clara.c (clara):
+
+ * R/pam.q (pam): new `keep.data' and DUP=FALSE in .Fortran
+
+
+2003-03-05 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/agnes.q (agnes): use DUP = FALSE
+ * R/diana.q (diana): for both "twins()" routines --> 3x less memory!
+
+2003-02-22 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/clara.q (clara): do *not* transpose the x[,] matrix anymore
+ * src/clara.c (clara): C code accesses un-transposed x[,] \
+ --> + 10% speed
+
+2003-02-07 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/silhouette.R (silhouette.default): fixed k=2 case;
+ * man/silhouette.Rd: new argument `dmatrix'.
+
+2003-02-06 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/plot.partition.Rd: new `main' argument (instead of "...")
+ * R/plotpart.q: passed to plot.silhouette().
+
+2003-01-27 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/agnes.q (agnes): store `method' with object for as.hclust.twins()!
+ \ new logical args `keep.diss' and `keep.data'
+ * R/diana.q (diana): > if (!keep.diss), do not need "dis2"
+ * src/twins.f (twins):/ and don't copy the dist()s here.
+
+ * man/pltree.twins.Rd: mention new `ylab' argument.
+ * R/plothier.q: __unfinished__ : xlab/ylab depend on "which.plot"!
+
+2002-12-30 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/bannerplot.Rd: and
+ * R/plothier.q (bannerplot): new `labels' e.g. passed from plot.agnes().
+
+ * man/plot.agnes.Rd: finally added `labels = ' example
+
+ * tests/clara.R: for() loops were silly; + other ex + comments
+ * src/clara.c: cosmetic
+ * src/fanny.f: and
+ * src/pam.f: cosmetic "syncing" of almost identical parts
+
+2002-12-28 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * tests/clara.Rout.save:
+ * tests/clara.R: add ruspini examples
+
+ * src/clara.c: finally found & fixed "clara(ruspini, 4)" problem
+
+2002-12-21 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.6-4 {for R 1.6.2}
+
+ * R/diana.q (diana): since 1.6-1, integer x was not coerced
+
+ * tests/agnes-ex.R,
+ * tests/diana-ex.R, and *.Rout.save: new test files
+
+ * src/twins.f: some comments added; gotos removed
+ banag() and bandy() were identical --> renamed to bncoef()
+
+2002-12-17 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/agnes.q: agnes(*, method = "no") no longer segfaults
+
+ * R/silhouette.R (plot.silhouette): adj=1.04 (not "1.05")
+
+ * R/plothier.q (pltree.twins): new `labels' arg. such that it can
+ be given to plot.agnes() e.g.
+
+2002-12-05 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Date, Version): 1.6-3
+
+ * src/cluster.h: F77_NAME(meet)
+ * src/clara.c: F77_CALL(meet)
+
+2002-10-28 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/pam.f (pam): comments, (bswap): variable renaming
+
+2002-10-26 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.6-003 := pre-1.6-3
+
+2002-10-23 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/agnes.q (print.summary.agnes): oops: had `clara' there!
+
+2002-10-23 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * Version 1.6-2 ==> released to CRAN
+
+2002-10-21 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/daisy.q (daisy): allow "+- Inf" in `x' (gave error from Fortran)
+
+2002-10-19 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/silhouette.R (plot.silhouette): also use border = 0 and new
+ default col = "gray".
+
+ * R/plothier.q (bannerplot): arguments inside = . , border = 0
+ work with R 1.6.1's barplot() {give "not yet .." otherwise}.
+
+ * tests/mona.R and mona.Rout.save: new tests
+
+ * R/mona.q (mona): get rid of 1:nchar() warnings; other cosmetic
+ * src/mona.f (mona): variable renaming; logical cleanup in NA dealing
+
+ * R/zzz.R (stop): multi-argument version for R versions < 1.6
+
+2002-10-18 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R CMD check now okay.
+
+ * man/plot.mona.Rd: update (for bannerplot() usage), same for
+ * man/plot.agnes.Rd and
+ * man/plot.diana.Rd.
+
+ * R/plothier.q (bannerplot): finally working for all three;
+ argument `xax.pretty = TRUE' (set to FALSE for old behavior).
+ (plot.mona): using bannerplot(); {and is.na(.) <- .}
+
+2002-10-16 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/plothier.q (bannerplot): newly standalone function,
+ called from plot.diana(), plot.agnes() -- not yet plot.mona()
+
+2002-10-15 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.6-2 (not yet released)
+
+ * R/plothier.q (plot.diana, bannerplot): rev(.) x-axis labels
+ --> fixing bug introduced at/before version 1.2-5
+
+ * R/plothier.q (plot.agnes, bannerplot) and plot.diana:
+ add "las = 2" to vertical axis(), and add space to right
+ for plot.diana.
+
+2002-09-12 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Date): -- released locally only
+
+2002-09-11 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/clara.q (clara): for the case of k = 1 (and diss=FALSE),
+ * R/pam.q (pam): medoids should stay matrix
+
+ * src/pam.f (pam): put under RCS; replaced goto by if..else..
+
+ * man/pam.Rd: mention silhouette() directly
+ * man/silhouette.Rd: document summary(); + 1ex
+
+2002-09-09 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.6-1 ==> for CRAN and R 1.6.0
+
+2002-09-07 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/silhouette.R (silhouette): New class, generic generator, methods;
+ particularly, plot.silhouette() was plot.partition internal.
+ improve documentation about silhouette info; update
+ * R/clara.q:
+ * R/plotpart.q (plot.partition):
+
+2002-09-06 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/partition.object.Rd,
+ * man/pam.object.Rd, man/clara.object.Rd, man/fanny.object.Rd,
+ * man/plot.partition.Rd:
+ reorganize documentation on silhouette information, thanks to
+ thoughts from Christian Hennig.
+
+2002-09-04 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/agnes.object.Rd: mention as.hclust() properly
+
+ * src/spannel.c (spannel): bail out also when *last* deter became <= 0
+ * tests/ellipsoid-ex.R (and *.Rout.save): updated
+
+ * DESCRIPTION (Version): 1.6-0
+
+ * R/zzz.R: dev.interactive(), warnings(),... for old R versions
+ * man/summary.clara.Rd: make example run for R versions < 1.5.0
+
+2002-09-03 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/plotpart.q (plot.partition): new `data' argument and
+ smart `which.plots' default for clara(*, keep = FALSE)
+
+ * R/daisy.q (summary.dissimilarity): fix ``number of dissim''
+
+ * R/*.q: 1) use apply(inax,2, any)--- really want colNAs()
+ 2) use better "valmisdat" (missing value).
+
+ * tests/dysta-ex.R (dysta): new file, explore "dysta" & "dysta3"
+ in Fortran
+
+ * src/fanny.f: get rid of many gotos
+
+2002-09-02 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * tests/fanny-ex.R: and ...Rout.save : new files
+
+ * src/twins.f (twins): drop dysta4(), use dysta() from pam.f
+
+2002-08-31 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.5-4 -- have achieved something {want more}
+
+ * src/clara.c (clara): finally found and fixed the rare segfault bug:
+ jran == 1 was wrong test (when 1st sample was unusable)
+
+ (randm): faster implementation of primitive RNG
+
+ * src/spannel.c: new translated from src/spannel.f
+ in order to debug the compiler-dependent outputs in
+ * tests/ellipsoid-ex.Rout.save etc
+ * tests/sweep-ex.R (.C)
+
+2002-08-30 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/clara.c (clara):
+ * man/clara.object.Rd:
+ * R/clara.q: new argument `keepdata' and `trace'
+
+ * R/plotpart.q (clusplot.default): xlim, ylim: believe user
+
+2002-08-27 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/clara.c (clara): new file, translated from Fortran; easier
+ to debug the (rare!) seg.fault. "make check fine"
+
+ * DESCRIPTION (Depends): R >= 1.2
+ * R/plothier.q (pltree.twins): (drop code for R version < 1.2.0)
+
+2002-08-23 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.5-3 {released only locally}
+
+ * R/plotpart.q (clusplot.default): xlim, ylim had "<" and ">" reversed
+ * man/clusplot.default.Rd: fix interactive(), add x- and ylim example
+
+2002-08-02 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/twins.f (twins) et al:
+ * src/fanny.f (fanny) et al:
+ * src/mona.f (mona):
+ * src/daisy.f (daisy): explicit instead of "implicit" var.declaration
+ * src/spannel.f (spannel) et al: no "implicit none"
+
+2002-07-29 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.5-2
+
+
+ * R/daisy.q (print.summary.dissimilarity): new function and
+ summary.dissimilarity() now returns a classed object.
+
+ * R/agnes.q (print.summary.agnes),
+ * R/clara.q (print.summary.clara),
+ * R/diana.q (print.summary.diana),
+ * R/fanny.q (print.summary.fanny),
+ * R/pam.q (print.summary.pam):
+ print.summary.*() now only *summarizes* dissimilarities (if at all)
+
+2002-07-27 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * tests/pam.R and tests/pam.Rout.save : new files
+
+ * man/summary.agnes.Rd: + example
+ * man/summary.clara.Rd: + example
+
+ * R/clara.q (print.clara): improvements: call, no long clus. vector
+ * R/agnes.q (print.agnes): similar
+
+ * man/daisy.Rd : added "[mva]" to \link{}s. The same in:
+ * man/clusplot.default.Rd: new `col.clus' argument,
+ new option `labels = 5'.
+ * R/plotpart.q (clusplot.default): cosmetical cleaning;
+ `col.p' is now vectorized for point coloring.
+ The cluster ellipse is now labeled with font=4 and proper color
+
+2002-06-17 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/sizeDiss.Rd: fix \value{}
+
+ * tests/daisy-ex.R: new file (and *.Rout.save)
+
+2002-06-17 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * submit to CRAN -- won't be in R's 1.5.1-recommended
+
+ * R/daisy.q (daisy): make sure "0","1" factors are valid binary vars
+ several extra checks {incl apply(<empty>) `bug'
+
+ * man/diana.object.Rd: show how to use cutree().
+
+2002-05-22 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/daisy.q (daisy): warn if binary variables have non-{0,1} values.
+
+ * src/pam.f (cstat) et al: eliminated many gotos; +comments
+ * src/meet.f (meet): + comment
+
+2002-05-21 Martin Maechler <maechler@stat.math.ethz.ch>
+
+
+ * DESCRIPTION (Version): 1.5-1
+ new daisy() behavior for binary variables
+
+ * src/daisy.f (daisy): add comments; use if..else.. instead of goto
+
+ * man/dissimilarity.object.Rd: new "Types" attribute in mixed case.
+ * man/daisy.Rd:
+ * R/daisy.q (daisy): fix data.class "integer",
+ allow type = "symm"; return types used in mixed case;
+ correctly modify jtmd[] for binary variables (!)
+
+
+2002-03-30 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/plotpart.q: replace `` == "NA" '' by is.na(.)
+ * R/mona.q (mona): <ditto>
+
+2002-03-04 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.4-1
+
+ * R/zzz.R (.First.lib),
+ * R/plothier.q: replace plclust() by plot[.hclust]() everywhere.
+
+2002-01-29 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/pam.q (pam): comment on "valmisdat"; same in
+ * R/fanny.q, R/agnes.q, R/clara.q, R/diana.q
+
+ * src/pam.f (dysta): comment + white space
+ * src/fanny.f (fanny): lowercase and indent + comments
+
+2002-01-24 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/agnes.Rd, diana.Rd, pam.Rd, clara.Rd, mona.Rd, fanny.Rd :
+ Reference and BACKGROUND section only in agnes.Rd;
+ the others refer to agnes.
+
+ * man/fanny.Rd: clean
+
+ * R/agnes.q (agnes): \ ``diss = inherits(x, "dist")''
+ * R/diana.q (diana): > instead of "diss = FALSE"
+ * R/fanny.q (fanny): / as we have changed pam() already in 1.4-0
+
+2002-01-23 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.4-0
+
+ * man/ellipsoidhull.Rd: example
+
+ * tests/ellipsoid-ex.R and *.Rout: finalized
+
+ * man/pluton.Rd: work around Rdconv \eqn{.}{.} bug.
+
+2002-01-22 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/ellipsehull.R (ellipsehull) et al:
+ generalized from 2 to p dimensions. -- points generation: not yet!
+
+ * tests/ellipsehull.R: new test file
+
+ * man/clusplot.partition.Rd: clean up
+ * man/clusplot.default.Rd: proper reference to Pison et al
+ * man/clusplot.Rd: clean
+
+2002-01-21 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/ellipsehull.R (ellipsehull) and others: new functions
+ * R/plotpart.q (clusplot.default) use new ellipsePoints();
+ simplification by using "d2" (= dist^2) instead of "dist".
+
+2002-01-19 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/plotpart.q (clusplot.default) re-scale cov.wt() result:
+ Finally return the smallest possible ellipses.
+
+ NOTA BENE ===> (numerical) results are *different* !
+
+2002-01-18 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/plotpart.q (clusplot.default) {spannel}: Finally found why our
+ ellipses are not quite ok : R's cov.wt() differs from S-plus' !
+
+ * src/spannel.f (spannel): new argument `maxit' (was 5000).
+
+ * R/plotpart.q (cusplot.default): cleanup, eliminating internal
+ kleur() & plotje(); and "spannel" arguments; new maxit; lower eps
+ use which.min() and which.max(); ...
+
+ * R/pam.q (pam): diss has new default = inherits(x, "dist") which
+ is TRUE therefore for dissimilarity or dist objects.
+
+2002-01-12 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/agnes.q, R/diana.q : a bit of cleanup in the two twins
+ calling functions.
+
+ * man/lower.to.upper.tri.inds.Rd,
+ * man/cluster-internal.Rd: new for formerly local functions, now in
+ * R/internal.R (sizeDiss),
+ * R/internal.R (lower.to.upper.tri.inds), and upper.to...:
+ new functions instead of local functions in several places, e.g.,
+ * R/diana.q, R/fanny.q, ...
+
+ * R/plotpart.q (clusplot.default):
+ fix bug PR#1249: cmd() != cmdscale(); use new cmdscale(*, add=TRUE)
+ ---> (slightly) different result sometimes
+ fix long-standing typo in NA case + more cleanup
+
+ * R/plotpart.q (clusplot.partition):
+ explicit `main' argument with better default.
+
+
+2001-12-06 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.3-6
+
+ * R/plotpart.q: enable `density =' for polygon shading.
+
+
+2001-11-27 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/zzz.R: get rid of .Alias
+
+2001-11-06 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.3-5
+
+ * R/plothier.q: Fix menu() bug thanks to Richard Rowe.
+ * R/plotpart.q: ditto
+
+ * R/agnes.q: properly allow integer matrix input:
+ don't use all(sapply(x, data.class) == "numeric") anymore.
+ * R/clara.q, R/diana.q, R/fanny.q, R/pam.q: ditto
+
+2001-11-05 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/pam.q: `call' via match.call() instead of sys.call, and as
+ list component instead of attribute. [R philosophy compatibility]
+ * R/mona.q: ditto
+ * R/fanny.q, R/diana.q, R/clara.q, R/agnes.q,
+ * R/plothier.q, R/plotpart.q: ditto
+
+
+2001-10-09 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): 1.3-5b (beta) for sending to Robert G
+
+ * R/plothier.q: plot.diana() must have main=NULL
+
+ * R/diana.q: minor code cleanup
+
+2001-08-27 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * README.orig: renamed from R/README-splus
+
+2001-08-22 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): New version is 1.3-4
+
+ * man/flower.Rd: nicer using \describe{}
+
+ * man/plot.partition.Rd (and R/plotpart.q): new argument `which.plots'
+ (as in the other plot.* functions).
+ * R/plothier.q:
+ All plot.* methods which produce more than one plot now call
+ par(ask = TRUE) automagically when `needed' (as e.g., in plot.lm()).
+
+ * man/*.Rd: document all arguments; a bit more cleanup.
+ R (1.4.0) CMD check is now okay.
+
+2001-08-18 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/*.q and man/*.Rd: generic/method argument fixes
+
+2001-05-26 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/*.Rd: indenting in all dataset examples
+ * man/votes.repub.Rd: usage fix
+
+2001-05-23 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * INDEX: structured logically, rather than alphabetically
+ * tests/clara.R: new test
+ * src/clara.f (clara): added comments
+ * R/clara.q (clara) : comments and cleanup
+
+2001-05-22 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): New version is 1.3-3.
+
+ * R/agnes.q and all others: `components' not `arguments' in print.*()
+ * src/meet.f (meet): use [if then else] instead of goto
+ * src/clara.f (clara): all declarations explicit; some cleanup
+
+2001-05-21 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Package): licence changed to GPL (Rousseeuw's e-mail)
+
+ * R/pam.q: minor code cleanup for Fortran interface
+ * src/pam.f (pam): all declarations explicit
+
+ * README: integrated former ./README_MM
+ * src/twins.f, many R/*.q and
+ * man/pltree.Rd: replace s/S-plus/S/ in many places
+
+
+2001-03-21 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/print.summary.FOO.Rd: drop these files, move some to FOO.Rd
+ * man/print*.Rd: cleanup, use \method{}
+
+2001-01-04 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): New version is 1.3-2.
+
+ * man/print*.Rd: Better \title{.}, hence
+ * INDEX
+ * man/*.Rd: Remove \keyword{libcluster}; we have {cluster}.
+
+2001-01-03 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): New version is 1.3-1.
+
+2001-01-02 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/*.Rd: fixes for codoc()
+
+ * src/spannel.f (spannel): improve readability, indent properly,
+ add a few comments
+ * src/clara.f: <ditto>
+ * src/pam.f: <ditto>
+ * src/twins.f: <ditto>
+
+ * R/*.q : Added PACKAGE = .. to all .Fortran() calls
+ ===== Many codoc() fixes; particularly summary.*(*, ...)
+
+ * R/plotpart.q: (clusplot.partition): simplified
+ * R/agnes.q: T/F -> TRUE/FALSE and more
+ * R/clara.q: <ditto>
+ * R/diana.q: <ditto>
+ * R/fanny.q: <ditto>
+ * R/mona.q: <ditto>
+ * R/pam.q: <ditto>
+
+2000-12-30 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * DESCRIPTION (Version): New version is 1.2-5.
+
+2000-12-24 Kurt Hornik <Kurt.Hornik@ci.tuwien.ac.at>
+
+ * DESCRIPTION (Version): New version is 1.2-4.
+ (Maintainer): New entry.
+
+2000-12-14 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * src/daisy.f: indented do loops; one if / else.
+
+ * R/daisy.q: __ daisy() __
+ - "ordratio" |-> "T", was "O" erronously!
+ - `metric' and `list' argument checking
+
+ * man/clusplot.default.Rd: updated and cleaned.
+
+2000-12-02 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/plothier.q: plot.agnes() & plot.diana() :
+ main=NULL defaults to two different titles for both plots
+
+2000-11-30 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * man/...Rd:
+ - \section{NOTE} becomes \note
+ - fix most T/F to TRUE/FALSE, ..
+
+ * R/plothier.q:
+ - cleanup (T/F); indenting
+ - plot.mona(): las = 1 for axis; allow main=
+ - plot.diana(): `which.plot' and main= and sub=
+ - plot.agnes(): `which.plot' and main= and sub=
+ - pltree.twins(): allow main= ; rm"plot = TRUE" (warn)
+ --> now depends on R 1.2's plot.hclust()
+
+ * R/plotpart.q: clusplot.default() -- now works!
+ - *much* clean up
+ - color choice such as to see points
+ - got rid of NaN warning
+ - eliminated "polygon(*,density.) warnings by '##no yet'"
+
+2000-11-29 Martin Maechler <maechler@stat.math.ethz.ch>
+
+ * R/daisy.q: add "dist" class (and fix T/F to TRUE/FALSE etc)
+ * R/daisy.q and
+ * man/print.dissimilarity.Rd: add summary.dissimilarity()
+ * man/dissimilarity.object.Rd: cleanup, use \value{.}, doc. "dist"
+ * man/daisy.Rd: cleanup, use \value{.}
+ * man/agnes.Rd: cleanup.
+ * man/*.object.Rd: cleanup, use \value{.}
+
+Thu Feb 17 22:56:58 2000 Kurt Hornik <Kurt.Hornik@ci.tuwien.ac.at>
+
+ * DESCRIPTION (Version): New version is 1.2-3.
+
+ * src/Makefile: Removed.
+
+Tue Dec 28 18:41:09 1999 Kurt Hornik <Kurt.Hornik@ci.tuwien.ac.at>
+
+ * DESCRIPTION (Version): New version is 1.2-2.
+
+ * data/00Index: Added entry for `xclara'.
+ * man/xclara.Rd: New file.
+
+ * data/figure2.R:
+ * data/table4.R:
+ Removed as unused and undocumented.
+
+Sun Dec 5 20:14:45 1999 Kurt Hornik <Kurt.Hornik@ci.tuwien.ac.at>
+
+ * DESCRIPTION (Version): New version is 1.2-1.
+
+ * R/daisy.q:
+ * src/daisy.f:
+ * PORTING:
+ Rewrite to pass integers rather than character strings to Fortran
+ (changes provided by BDR).
+
+Sun Apr 11 04:21:03 1999 Kurt Hornik <Kurt.Hornik@ci.tuwien.ac.at>
+
+ * DESCRIPTION (Version): New version is 1.2-0.
+
+ * R/plotpart.q: Replace rep.int() by rep().
+
+ * R/zzz.R: Make .First.lib() use plot.hclust() for plclust() which
+ seems to do the job, sort of.
+
+ * data/animals.R: Replaced by `animals.tab'.
+ * data/ruspini.R: Replaced by `ruspini.tab'.
+ * data/votes.repub.tab: New file.
+
+ * man/agriculture.Rd: New file.
+ * man/animals.Rd: New file.
+ * man/flower.Rd: New file.
+ * man/ruspini.Rd: New file.
+ * man/votes.repub.Rd: New file.
+ * man/*: Hand-edit all examples to make executable.
+
+Fri Nov 27 23:53:11 1998 Kurt Hornik <Kurt.Hornik@ci.tuwien.ac.at>
+
+ * DESCRIPTION (Version): New version is 1.1-3.
+
+ * R/mona.q: Barf only if neither a matrix nor a data frame
+ (remember that in S, is.matrix() is TRUE for data frames).
+
+ * man/*: Converted anew via `Sd2Rd -x' using Sd2Rd 0.3-2.
+
+ * TODO: Removed.
+
+Tue Jun 16 09:23:15 1998 Kurt Hornik <Kurt.Hornik@ci.tuwien.ac.at>
+
+ * DESCRIPTION (Version): New version is 1.1-2.
+
+ * DESCRIPTION:
+ * PORTING:
+ * TITLE:
+ * R/zzz.R:
+ * src/Makefile:
+ Change old `clus' to new name `cluster'.
+
+Mon Jun 15 11:01:52 1998 Kurt Hornik <Kurt.Hornik@ci.tuwien.ac.at>
+
+ * ChangeLog: Finally started, current version is 1.1-1.
+
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..4fe37dc
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,57 @@
+Package: cluster
+Version: 2.1.0
+Date: 2019-06-07
+Priority: recommended
+Title: "Finding Groups in Data": Cluster Analysis Extended Rousseeuw et
+ al.
+Description: Methods for Cluster analysis. Much extended the original from
+ Peter Rousseeuw, Anja Struyf and Mia Hubert,
+ based on Kaufman and Rousseeuw (1990) "Finding Groups in Data".
+Maintainer: Martin Maechler <maechler@stat.math.ethz.ch>
+Authors@R: c(person("Martin","Maechler", role = c("aut","cre"),
+ email="maechler@stat.math.ethz.ch", comment = c(ORCID = "0000-0002-8685-9910"))
+ ,person("Peter", "Rousseeuw", role="aut", email="peter.rousseeuw@kuleuven.be",
+ comment = c("Fortran original", ORCID = "0000-0002-3807-5353"))
+ ,person("Anja", "Struyf", role="aut", comment= "S original")
+ ,person("Mia", "Hubert", role="aut", email= "Mia.Hubert@uia.ua.ac.be",
+ comment = c("S original", ORCID = "0000-0001-6398-4850"))
+ ,person("Kurt", "Hornik", role=c("trl", "ctb"),
+ email="Kurt.Hornik@R-project.org",
+ comment=c("port to R; maintenance(1999-2000)", ORCID="0000-0003-4198-9911"))
+ ,person("Matthias", "Studer", role="ctb")
+ ,person("Pierre", "Roudier", role="ctb")
+ ,person("Juan", "Gonzalez", role="ctb")
+ ,person("Kamil", "Kozlowski", role="ctb")
+ ,person("Erich", "Schubert", role="ctb", comment = c("fastpam options for pam()",
+ ORCID = "0000-0001-9143-4880"))
+ ,person("Keefe", "Murphy", role="ctb", comment = "volume.ellipsoid({d >= 3})")
+ )
+Depends: R (>= 3.3.0)
+Imports: graphics, grDevices, stats, utils
+Suggests: MASS, Matrix
+SuggestsNote: MASS: two examples using cov.rob() and mvrnorm(); Matrix
+ tools for testing
+LazyLoad: yes
+LazyData: yes
+ByteCompile: yes
+BuildResaveData: no
+License: GPL (>= 2)
+URL: https://svn.r-project.org/R-packages/trunk/cluster
+NeedsCompilation: yes
+Packaged: 2019-06-19 08:21:30 UTC; maechler
+Author: Martin Maechler [aut, cre] (<https://orcid.org/0000-0002-8685-9910>),
+ Peter Rousseeuw [aut] (Fortran original,
+ <https://orcid.org/0000-0002-3807-5353>),
+ Anja Struyf [aut] (S original),
+ Mia Hubert [aut] (S original, <https://orcid.org/0000-0001-6398-4850>),
+ Kurt Hornik [trl, ctb] (port to R; maintenance(1999-2000),
+ <https://orcid.org/0000-0003-4198-9911>),
+ Matthias Studer [ctb],
+ Pierre Roudier [ctb],
+ Juan Gonzalez [ctb],
+ Kamil Kozlowski [ctb],
+ Erich Schubert [ctb] (fastpam options for pam(),
+ <https://orcid.org/0000-0001-9143-4880>),
+ Keefe Murphy [ctb] (volume.ellipsoid({d >= 3}))
+Repository: CRAN
+Date/Publication: 2019-06-19 11:10:03 UTC
diff --git a/INDEX b/INDEX
new file mode 100644
index 0000000..795d54e
--- /dev/null
+++ b/INDEX
@@ -0,0 +1,65 @@
+agnes Agglomerative Nesting
+clara Clustering Large Applications
+daisy Dissimilarity Matrix Calculation
+diana DIvisive ANAlysis Clustering
+fanny Fuzzy Analysis Clustering
+mona MONothetic Analysis Clustering of Binary Variables
+pam Partitioning Around Medoids
+
+dissimilarity.object Dissimilarity Matrix Object
+partition.object Partitioning Object
+twins.object Hierarchical Clustering Object
+
+agnes.object Agglomerative Nesting (AGNES) Object
+clara.object Clustering Large Applications (CLARA) Object
+diana.object Divisive Analysis (DIANA) Object
+fanny.object Fuzzy Analysis (FANNY) Object
+mona.object Monothetic Analysis (MONA) Object
+pam.object Partitioning Around Medoids (PAM) Object
+sizeDiss Sample Size of Dissimilarity Like Object
+
+clusplot Cluster Plot - Generic Function
+clusplot.default Bivariate Cluster Plot (Clusplot) Default Method
+clusplot.partition Bivariate Clusplot of a Partitioning Object
+coef.hclust Agglomerative Coefficient for 'hclust' Objects
+pltree Clustering Trees - Generic Function
+pltree.twins Clustering Tree of a Hierarchical Clustering
+
+bannerplot Plot Banner (of Hierarchical Clustering)
+silhouette Compute or Extract Silhouette Information from
+ Clustering
+ellipsoidhull Compute the Ellipsoid Hull or Spanning Ellipsoid
+ of a Point Set
+predict.ellipsoid Predict Method for Ellipsoid Objects
+volume.ellipsoid Compute the Volume of Planar Object
+lower.to.upper.tri.inds Permute Indices for Triangular Matrices
+
+plot.agnes Plots of an Agglomerative Hierarchical Clustering
+plot.diana Plots of a Divisive Hierarchical Clustering
+plot.mona Banner of Monothetic Divisive Hierarchical Clusterings
+plot.partition Plot of a Partition of the Data Set
+print.dissimilarity Print and Summary Methods for Dissimilarity Objects
+print.agnes Print Method for AGNES Objects
+print.clara Print Method for CLARA Objects
+print.diana Print Method for DIANA Objects
+print.fanny Print Method for FANNY Objects
+print.mona Print Method for MONA Objects
+print.pam Print Method for PAM Objects
+summary.agnes Summary Method for 'agnes' Objects
+summary.clara Summary Method for 'clara' Objects
+summary.diana Summary Method for 'diana' Objects
+summary.fanny Summary Method for 'fanny' Objects
+summary.mona Summary Method for 'mona' Objects
+summary.pam Summary Method for PAM Objects
+cluster-internal Internal cluster functions
+
+
+ DATASETS
+
+agriculture European Union Agricultural Workforces
+animals Attributes of Animals
+flower Flower Characteristics
+pluton Isotopic Composition Plutonium Batches
+ruspini Ruspini Data
+votes.repub Votes for Republican Candidate in Presidential Elections
+xclara Bivariate Data Set with 3 Clusters
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..ab5f63a
--- /dev/null
+++ b/MD5
@@ -0,0 +1,144 @@
+6da12079996ad0aa5be2f30ee7a9e457 *ChangeLog
+8872b411b2ca48c4da38b5496f37a88a *DESCRIPTION
+824c6b9820b7fc963b52b185b8284481 *INDEX
+ca2be9171db9bc45a268f08f5edf9539 *NAMESPACE
+8de82c7d42bd4a27c27c25462ae2a307 *PORTING
+5025b8448cdc0d97d545b677c6cbdd5b *R/0aaa.R
+9a36ddc2124718c79ca517ff09b7cdc0 *R/agnes.q
+e1e9efce2aeb4a81809d557911b0c9db *R/clara.q
+85821d54da55bfb93cf9acd1f8bbcc3a *R/clusGap.R
+a6de142daacb2b59b3f4426bafc30244 *R/clusGapGen.R
+e93355a6f7ad8d8486e6db4db52200a9 *R/coef.R
+4a0496467e11d4f71dcf66d0fa9a7bf9 *R/daisy.q
+3a809204470b7b268e57a376bb9cee60 *R/diana.q
+086784f30e63fed4d559d1a329afbd90 *R/ellipsoidhull.R
+86a134c01b58fae094b8b064c4af3d60 *R/fanny.q
+ff1cec6103f81407f16981ffd44d1fd5 *R/internal.R
+6a2322a6a4344438437bdc60ab1067fa *R/mona.q
+4e789a76352d94a8f74ca60c8394ab6a *R/pam.q
+4202616d1b9da174d3870a9866a467b9 *R/plothier.q
+f7f2f0ab06414c020a2f22a7914d82b9 *R/plotpart.q
+ac6ea20501b14230d4f2fcf97abda367 *R/silhouette.R
+f1d53c5f24897b5ab0e6014be9be7844 *R/zzz.R
+74315cfbf492da8e8b2c7687ecbdc750 *README
+265d36817b510dcb404a8aeb769d7dfd *build/partial.rdb
+ac189f8e6e1314c01a700f6a31ee4506 *data/agriculture.tab
+1046f7a750dbbac2724fd910aff561e7 *data/animals.tab
+c8f60e65b0356698f6b08c23c07e2a94 *data/chorSub.rda
+0f34ac1e10d178fa04a4babb705d5b27 *data/flower.R
+0772c2f9932873c7dd6551208cd0725b *data/plantTraits.rda
+1022570c32295066506ad7b26439f7bf *data/pluton.tab
+eda904e011e79ea5a25602125d558d04 *data/ruspini.tab
+e816e6c67444107ab47ff3bf2f6023f2 *data/votes.repub.tab
+be46f45035b177b940393db459789282 *data/xclara.rda
+2d1d8b81f92e1e97b282390e90941497 *inst/CITATION
+b3c2cffc49197619d987992da637178e *inst/NEWS.Rd
+bfd0bc51a62786ea02f5ce8197adffa7 *inst/po/de/LC_MESSAGES/R-cluster.mo
+be3ce6c5e3a99c4769b31c674953d202 *inst/po/de/LC_MESSAGES/cluster.mo
+a56541211dac9565caf5115dfb1b2038 *inst/po/en@quot/LC_MESSAGES/R-cluster.mo
+9ad4e319bf7879130579cec25b007948 *inst/po/en@quot/LC_MESSAGES/cluster.mo
+b67eda048b99b3c480521223434e0468 *inst/po/fr/LC_MESSAGES/R-cluster.mo
+443e95f6bf0021d9fddc51795d050292 *inst/po/ko/LC_MESSAGES/R-cluster.mo
+bf74a2bf26d5a8bbed974ddb2c96c565 *inst/po/ko/LC_MESSAGES/cluster.mo
+670a90c8ba5e7b207a0a92e115e0e84a *inst/po/pl/LC_MESSAGES/R-cluster.mo
+fb8ab3d3e7dbe2dcdf9462fbd46d9917 *inst/test-tools.R
+29bc302e877a84517f5926260bbf3f96 *man/agnes.Rd
+29d26f3fb10f3462291c10d441865e71 *man/agnes.object.Rd
+7db03338761019b70d064ffe1eddcc5d *man/agriculture.Rd
+ba26ba311f46bfec382d98fbc1f00e15 *man/animals.Rd
+80586a34dc1e14f840ebae455aeb9736 *man/bannerplot.Rd
+0b1033484c0b66ff9451427930e92987 *man/chorSub.Rd
+ef3bfda0b97e29b76a423be6966f6252 *man/clara.Rd
+b86f299b6994b98e2112843516f3108a *man/clara.object.Rd
+249613cb2696ca593c01719a44496590 *man/clusGap.Rd
+1dee90909b5f299ecea5ccb9384ed9ac *man/clusplot.default.Rd
+ea3ea6469c8f57eafa1229f82b78c30c *man/clusplot.partition.Rd
+c7341c96f49e5b288448c4cb9436c2fa *man/cluster-internal.Rd
+20b35f88ced8e679778892a732a43369 *man/coef.hclust.Rd
+142571ea1370f764930411710ade3dbc *man/daisy.Rd
+8950b1b554e916f9d1fefe1dec38a9c8 *man/diana.Rd
+aa9c2fe350e02eb23f211a44e40c8a90 *man/dissimilarity.object.Rd
+b9c9048164c736e8b79abb240056711d *man/ellipsoidhull.Rd
+6fcf7eee2ff0505f51021bf65f468fbf *man/fanny.Rd
+7d549aed091402cecc8a398085e4bb86 *man/fanny.object.Rd
+94bfe5845b4efa6bffec6c455081a237 *man/flower.Rd
+f9c1ca445301e6c2ed69986d96ab5676 *man/lower.to.upper.tri.inds.Rd
+b75d0f093fc09421ee5d5d12ddd7fcc1 *man/mona.Rd
+546379a2e048bf7ef7a69aff87f4ca50 *man/mona.object.Rd
+487fc9a6e03d886ba1e99bbea09c4517 *man/pam.Rd
+21795cc8b4bd9b63b24f44e5ffeeccb2 *man/pam.object.Rd
+351d76eba52f0dff7f468b04c4d52fcd *man/partition.object.Rd
+40fe00964d4215ce1b2390e8f18e9dc0 *man/plantTraits.Rd
+2b217b89f71c10165081f884475f5a1d *man/plot.agnes.Rd
+3cc230e8488d6cd611fb210860fa648c *man/plot.diana.Rd
+936341a14d58811b41e45b09fd8b37bb *man/plot.mona.Rd
+ee6a690d0f2c7a25f3b8f77881778137 *man/plot.partition.Rd
+9b7b312a6d216468cf9bf3c90ab349b9 *man/pltree.Rd
+84b2723e904c2b1897a00043106b458e *man/pluton.Rd
+d7edca4aea0edca6e7139092e85e67db *man/predict.ellipsoid.Rd
+ece1532629f0e06a65f6670e5b9bd459 *man/print.agnes.Rd
+b6384eb685030609ae9edd06434949b0 *man/print.clara.Rd
+e0c63f114cc0bf71fe53964b5f883255 *man/print.diana.Rd
+b32046c766441c7fc75f644734c690b1 *man/print.dissimilarity.Rd
+1ce3568140412485f727b2d9193ba09c *man/print.fanny.Rd
+0dcf3dd3c5afbb216939ca9158c32192 *man/print.mona.Rd
+b1c1625935a5c22d81aa2e73d3b80457 *man/print.pam.Rd
+7cd999938af26fb5c97e4d45ab9ea982 *man/ruspini.Rd
+6ad2387b68543d1dc634ab9257f987fb *man/silhouette.Rd
+8beea8b2090f7e91d0a0b69ec2013718 *man/sizeDiss.Rd
+0df193ca0559bef700c60048d76d0516 *man/summary.agnes.Rd
+f52e4e7889d246480ead5358d1771d58 *man/summary.clara.Rd
+964d099b6a9ab924dfe49779e8f13f03 *man/summary.diana.Rd
+6a4b775e10738bf3472bcc35a64c7623 *man/summary.mona.Rd
+5cc8d9a8fa53b437121d841475d46b46 *man/summary.pam.Rd
+1f622b89b4b8b0e93e3f0abd65122ee4 *man/twins.object.Rd
+d030948d78b63ac02577e92e6bd02fbf *man/volume.ellipsoid.Rd
+0510d0816550f6d3c258652912053a1d *man/votes.repub.Rd
+0ebf97dd08eb6f33180418891f1cbbdb *man/xclara.Rd
+bb2efd38c18e95860753233bdb8484b1 *po/R-cluster.pot
+34ddd859c7d1aef8e72608cee2fdd81a *po/R-de.po
+e9b5293e63746638be1f3570dbeb4fe3 *po/R-en@quot.po
+8d93f676bad944d685834dea6cf969cf *po/R-fr.po
+d86baa23f82be8f6ac7ffd9d3f084150 *po/R-ko.po
+016adbe973d691c77060973c4c4be594 *po/R-pl.po
+3475fd30b58e36b81d6b0a45f5408dcf *po/cluster.pot
+ecc678170701fb328651c672bf7cb9e6 *po/de.po
+3eb57b26cbc2e7babd9cf816f2b517d8 *po/ko.po
+86e308f92ccc269bcb494de5ac0b2481 *po/update-me.sh
+e93bebd2b125848e9f8dfcbf0f705956 *src/clara.c
+304d9fd1f9b4df3f16149b8aeadf7330 *src/cluster.h
+c323dd9c80537ef46976cdd47c9056e6 *src/daisy.f
+20241ba8aa6ae6296c5acec015f1f906 *src/dysta.f
+f974c4dae4c6b2398bf32648d6671f46 *src/fanny.c
+23cea00d2feab57a92e8c2393c7f4b8a *src/ind_2.h
+d1b66d40049abd7161731a0a02e1a381 *src/init.c
+93dff342c334fec621bda03dd76abc6f *src/mona.c
+d6f0c229f9914351e49fd84cbbb80b96 *src/pam.c
+6f11dc74b1636e3ac848563d570f0030 *src/sildist.c
+f42f05132aaf001ddd333e3e109692e0 *src/spannel.c
+420078424e521d6279a9a14127ec61cb *src/twins.c
+ed02b9443ca7d93b7e71290d81016b41 *tests/agnes-ex.R
+1741b34a32d36470ba193c7129f338b8 *tests/agnes-ex.Rout.save
+f21e11b8d426840d72a35aeb1c9f2c5c *tests/clara-NAs.R
+5c8c511ac9cffb74ce778388daa7ebcb *tests/clara-NAs.Rout.save
+39e1daac5198726c9d921a5d5bb122cb *tests/clara-ex.R
+75f50fcefc8b754be68d03b2b9c2d0de *tests/clara.R
+1eb7bdcda5423bc2f4127081e03ad00b *tests/clara.Rout.save
+79c68eadc1e1f72d909b7c015a278cc7 *tests/clusplot-out.R
+ea8a86f78e84fb61bdeff42f43a0a767 *tests/clusplot-out.Rout.save
+35fd3185d685723e528a435514b38604 *tests/daisy-ex.R
+1a0390912ecd601ea7e5ff44e2eda5ef *tests/daisy-ex.Rout.save
+27d4307ca493cd273dc2e944d5bbc955 *tests/diana-boots.R
+4fc11382af41801e16128f96e17a70e7 *tests/diana-ex.R
+474c2d78169cdf88d34616a9bada8b31 *tests/diana-ex.Rout.save
+91d99ad3fb3f40d86d99f6f22486f061 *tests/ellipsoid-ex.R
+cb6e997786f07076f6a6bcf959cdeb12 *tests/ellipsoid-ex.Rout.save
+52b341bc06eb5692a73dec2be2cd7e5a *tests/fanny-ex.R
+290bc5b0aa385784ecd22974f9781ff3 *tests/mona.R
+de3bfe9b4a3e222eba5c9c9d8ed8c891 *tests/mona.Rout.save
+0ee9c09116ed658c860c0c869fb22f77 *tests/pam.R
+ec46d40c043dd4c393ed24d8bfb3a286 *tests/pam.Rout.save
+477cd7fd12117a6cbcdfc9d5944fbd39 *tests/silhouette-default.R
+11875bfedd807686d13d469c123770e1 *tests/silhouette-default.Rout.save
+d9cdce1776e344a6f4f2574cee6ef487 *tests/sweep-ex.R
+255c65bce3503b3b2ffe0ab0e2dcc2bc *tests/withAutoprint.R
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..c0645b8
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,86 @@
+useDynLib(cluster, .registration=TRUE)
+
+## S3 Generics:
+export(clusplot, pltree, silhouette, volume)
+## Normal functions (non-generics, non-methods):
+## The original constructors:
+export(agnes, clara, daisy, diana, fanny, mona, pam)
+## and the rest
+export(bannerplot, ellipsoidhull, ellipsoidPoints,
+ clusGap, maxSE,
+ lower.to.upper.tri.inds, upper.to.lower.tri.inds,
+ meanabsdev, sizeDiss, sortSilhouette)
+
+## Methods also useful as 'standalone functions':
+export(predict.ellipsoid)
+export(coef.hclust, coefHier)
+
+importFrom("stats", as.hclust, as.dist, as.dendrogram, cmdscale,
+ coef, cov.wt, dist,
+ mahalanobis, median, na.omit,
+ princomp, runif, setNames, var, weighted.mean)
+## For now, we keep *depending* on 'stats' just so that
+## S3 methods *.hclust, *.dist etc will work
+
+importFrom("graphics",
+ arrows, axis, barplot, boxplot,
+ identify, lines, matplot, mtext,
+ par, plot, points, polygon, rect, segments, text, title)
+
+importFrom("grDevices", dev.interactive)
+
+importFrom("utils", menu, str)
+
+###---- Methods ---- all documented but not exported
+
+## of own generics
+S3method(clusplot, default)
+S3method(clusplot, partition)
+S3method(pltree, twins)
+S3method(silhouette, default)
+S3method(silhouette, clara)
+S3method(silhouette, partition)
+S3method(volume, ellipsoid)
+
+## register all the method for generics elsewhere in case namespace is
+## loaded but not currently attached.
+S3method(as.dendrogram, twins)
+S3method(coef, hclust)
+S3method(coef, twins)
+
+S3method(plot, agnes)
+S3method(plot, diana)
+S3method(plot, mona)
+S3method(plot, partition)
+S3method(plot, silhouette)
+S3method(plot, clusGap)
+
+#exported:
+S3method(predict, ellipsoid)
+
+S3method(print, agnes)
+S3method(print, clara)
+S3method(print, diana)
+S3method(print, dissimilarity)
+S3method(print, ellipsoid)
+S3method(print, fanny)
+S3method(print, mona)
+S3method(print, pam)
+S3method(print, clusGap)
+S3method(print, summary.agnes)
+S3method(print, summary.clara)
+S3method(print, summary.diana)
+S3method(print, summary.dissimilarity)
+S3method(print, summary.fanny)
+S3method(print, summary.mona)
+S3method(print, summary.pam)
+S3method(print, summary.silhouette)
+
+S3method(summary, agnes)
+S3method(summary, clara)
+S3method(summary, diana)
+S3method(summary, dissimilarity)
+S3method(summary, fanny)
+S3method(summary, mona)
+S3method(summary, pam)
+S3method(summary, silhouette)
diff --git a/PORTING b/PORTING
new file mode 100644
index 0000000..ced5ad7
--- /dev/null
+++ b/PORTING
@@ -0,0 +1,114 @@
+* R/mona.q:
+
+Replace
+ if(!is.matrix(x))
+ stop(message = "x is not a matrix.")
+by
+ if(!is.matrix(x) && !is.data.frame(x))
+ stop("x must be a matrix or data frame.")
+
+Comment
+ x2 <- paste(x2, collapse = "")
+and replace
+ storage.mode(x2) <- "character"
+by
+ storage.mode(x2) <- "integer"
+
+* R/plothier.q:
+
+Replace `pick <- 2' by `pick <- 3'.
+(Undo when plclust is available.) -- undone for version 1.3-2
+
+Replace
+ invisible(return(x))
+by
+ return(invisible(x))
+
+* R/plotpart.q:
+
+Replace
+ invisible(return(x))
+by
+ return(invisible(x))
+
+Replace `pick <- 2' by `pick <- 4'.
+(Undo when clusplots really work.)
+
+In code for clusplot.default(), remove cmd() and replace
+ x1 <- cmd(x, k = 2, eig = T, add = T)
+ if(x1$ac < 0)
+ x1 <- cmd(x, k = 2, eig = T)
+by
+ x1 <- cmdscale(x, k = 2, eig = T)
+(Unfix when we have the `add' argument to cmdscale().)
+
+Replace `rep.int' by `rep'.
+
+Replace `text.default' by `text'.
+
+* R/zzz.R:
+Add
+ .First.lib <- function(lib, pkg) {
+ require(mva)
+ library.dynam("cluster", pkg, lib)
+ assign("plclust", .Alias(plot.hclust), pos = "package:cluster")
+ }
+
+* src/mona.f:
+Replace
+ CHARACTER KX(NN,JPP),NZF
+by
+ INTEGER KX(NN,JPP),NZF
+Change all '0' to 0.
+Change all '1' to 1.
+
+* R/daisy.q:
+* src/daisy.f:
+
+(BDR) Rewrite to pass integers rather than C character strings to
+Fortran.
+
+************************************************************************
+
+The directory `man' contains R documentation sources converted via
+`Sd2Rd -x' from the S documentation sources. (In earlier versions, it
+was helpful to run `.CONV/FIXME.pl' before converting.)
+
+* man/fanny.Rd:
+Replace the displayed equation by
+ \deqn{\sum_{v=1}^k
+ \frac{\sum_{i=1}^n\sum_{j=1}^n u_{iv}^2 u_{jv}^2 d(i,j)}{
+ 2 \sum_{j=1}^n u_{jv}^2}}{
+ SUM_v (SUM_(i,j) u(i,v)^2 u(j,v)^2 d(i,j)) / (2 SUM_j u(j,v)^2)}
+
+All examples hand-edited!
+
+************************************************************************
+
+============== Martin Maechler (many things are in Changelog!) ===========
+
+src/clara.f :
+~~~~~~~~~~~
+
+ *) to lowercase only :
+
+ tr A-Z a-z < clara.f.~1~ > clara.f.~2~
+
+ 1) to lowercase
+ and change initial comments to 'c' (because of Emacs' indentation):
+
+ tr A-Z a-z < clara.f.~1~ | sed '/^cc/s//c/'> clara.f.~2~
+
+ 2) Inside Emacs of clara.f.~2~
+
+ C-x C-w (write-file "clara.f.~3~")
+
+ Repeat about 6 times
+ M-C-q (fortran-indent-subprogram)
+ M-C-e (end-of-fortran-subprogram)
+
+ M-> (end-of-buffer)
+ C-x C-o (delete-blank-lines)
+
+ C-x h (mark-whole-buffer)
+ M-x tabify (tabify (point-min) (point-max))
diff --git a/R/0aaa.R b/R/0aaa.R
new file mode 100644
index 0000000..c7dd670
--- /dev/null
+++ b/R/0aaa.R
@@ -0,0 +1,21 @@
+## Ensure consistent "diss.." class --- make "namespace-private-global !
+dissiCl <- c("dissimilarity", "dist")
+
+if((Rv <- getRversion()) < "3.2.1") {
+ lengths <- function (x, use.names = TRUE) vapply(x, length, 1L, USE.NAMES = use.names)
+ if(Rv < "3.1.0") {
+ anyNA <- function(x) any(is.na(x))
+ ## if(Rv < "3.0.0") {
+ ## rep_len <- function(x, length.out) rep(x, length.out=length.out)
+ ## ## if(Rv < "2.15")
+ ## ## paste0 <- function(...) paste(..., sep = '')
+ ## }
+ }
+}; rm(Rv)
+
+##' Not exported, useful to run CRAN checks faster
+doExtras <- function() {
+ interactive() || nzchar(Sys.getenv("R_CLUSTER_CHECK_EXTRA")) ||
+ identical("true", unname(Sys.getenv("R_PKG_CHECKING_doExtras")))
+}
+
diff --git a/R/agnes.q b/R/agnes.q
new file mode 100644
index 0000000..66484e4
--- /dev/null
+++ b/R/agnes.q
@@ -0,0 +1,183 @@
+#### $Id: agnes.q 6953 2015-06-18 09:30:24Z maechler $
+
+agnes <- function(x, diss = inherits(x, "dist"), metric = "euclidean",
+ stand = FALSE, method = "average", par.method,
+ keep.diss = n < 100, keep.data = !diss, trace.lev = 0)
+{
+ METHODS <- c("average", "single","complete", "ward","weighted", "flexible", "gaverage")
+ ## hclust has more; 1 2 3 4 5 6 7
+ meth <- pmatch(method, METHODS)
+ if(is.na(meth)) stop("invalid clustering method")
+ if(meth == -1) stop("ambiguous clustering method")
+ cl. <- match.call()
+ method <- METHODS[meth]
+ if(method == "flexible") {
+ ## Lance-Williams formula (but *constant* coefficients):
+ stopifnot((np <- length(a <- as.numeric(par.method))) >= 1)
+ attr(method,"par") <- par.method <-
+ if(np == 1)## default (a1= a, a2= a, b= 1-2a, c = 0)
+ c(a, a, 1-2*a, 0)
+ else if(np == 3)
+ c(a, 0)
+ else if(np == 4) a
+ else stop("'par.method' must be of length 1, 3, or 4")
+ ## if(any(par.method[1:2]) < 0)
+ ## warning("method \"flexible\": alpha_1 or alpha_2 < 0 can give invalid dendrograms"
+ } else if (method == "gaverage") {
+ attr(method,"par") <- par.method <- if (missing(par.method)) {
+ ## Default par.method: Using beta = -0.1 as advised in Belbin et al. (1992)
+ beta <- -0.1
+ c(1-beta, 1-beta, beta, 0)
+ } else {
+ stopifnot((np <- length(b <- as.numeric(par.method))) >= 1)
+ if(np == 1)## default (a1= 1-b, a2= 1-b, b= b, c= 0)
+ c(1-b, 1-b, b, 0)
+ else if(np == 3)
+ c(b, 0)
+ else if(np == 4) b
+ else stop("'par.method' must be of length 1, 3, or 4")
+ }
+ ## if(any(par.method[1:2]) < 0)
+ ## warning("method \"gaverage\": alpha_1 or alpha_2 < 0 can give invalid dendrograms"
+ } else ## dummy (passed to C)
+ par.method <- double()
+
+ if((diss <- as.logical(diss))) {
+ ## check type of input vector
+ if(anyNA(x)) stop("NA-values in the dissimilarity matrix not allowed.")
+ if(data.class(x) != "dissimilarity") { # try to convert to
+ if(!is.null(dim(x))) {
+ x <- as.dist(x) # or give an error
+ } else {
+ ## possibly convert input *vector*
+ if(!is.numeric(x) || is.na(n <- sizeDiss(x)))
+ stop("'x' is not and cannot be converted to class \"dissimilarity\"")
+ attr(x, "Size") <- n
+ }
+ class(x) <- dissiCl
+ if(is.null(attr(x,"Metric"))) attr(x, "Metric") <- "unspecified"
+ }
+ n <- attr(x, "Size")
+ dv <- x[lower.to.upper.tri.inds(n)]
+ ## prepare arguments for the Fortran call
+ dv <- c(0., dv)# "double", 1st elem. "only for Fortran" (?)
+ jp <- 1L
+ mdata <- FALSE
+ ndyst <- 0
+ x2 <- double(1)
+ }
+ else {
+ ## check input matrix and standardize, if necessary
+ x <- data.matrix(x)
+ if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.")
+ x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x
+ storage.mode(x2) <- "double"
+ ndyst <- if(metric == "manhattan") 2 else 1
+ n <- nrow(x2)
+ jp <- ncol(x2)
+ if((mdata <- any(inax <- is.na(x2)))) { # TRUE if x[] has any NAs
+ jtmd <- integer(jp)
+ jtmd[apply(inax, 2L, any)] <- -1L
+ ## VALue for MISsing DATa
+ valmisdat <- 1.1* max(abs(range(x2, na.rm=TRUE)))
+ x2[inax] <- valmisdat
+ }
+ dv <- double(1 + (n * (n - 1))/2)
+ }
+ if(n <= 1) stop("need at least 2 objects to cluster")
+ stopifnot(length(trace.lev <- as.integer(trace.lev)) == 1)
+ C.keep.diss <- keep.diss && !diss
+ res <- .C(twins,
+ as.integer(n),
+ as.integer(jp),
+ x2,
+ dv,
+ dis = double(if(C.keep.diss) length(dv) else 1),
+ jdyss = if(C.keep.diss) diss + 10L else as.integer(diss),
+ if(mdata) rep(valmisdat, jp) else double(1),
+ if(mdata) jtmd else integer(jp),
+ as.integer(ndyst),
+ 1L,# jalg = 1 <==> AGNES
+ meth,# integer
+ integer(n),
+ ner = integer(n),
+ ban = double(n),
+ ac = double(1),
+ par.method,
+ merge = matrix(0L, n - 1, 2), # integer
+ trace = trace.lev)
+ if(!diss) {
+ ##give warning if some dissimilarities are missing.
+ if(res$jdyss == -1)
+ stop("No clustering performed, NA-values in the dissimilarity matrix.\n" )
+ if(keep.diss) {
+ ## adapt Fortran output to S:
+ ## convert lower matrix,read by rows, to upper matrix, read by rows.
+ disv <- res$dis[-1]
+ disv[disv == -1] <- NA
+ disv <- disv[upper.to.lower.tri.inds(n)]
+ class(disv) <- dissiCl
+ attr(disv, "Size") <- nrow(x)
+ attr(disv, "Metric") <- metric
+ attr(disv, "Labels") <- dimnames(x)[[1]]
+ }
+ ##add labels to Fortran output
+ if(length(dimnames(x)[[1]]) != 0)
+ order.lab <- dimnames(x)[[1]][res$ner]
+ }
+ else {
+ if(keep.diss) disv <- x
+ ##add labels to Fortran output
+ if(length(attr(x, "Labels")) != 0)
+ order.lab <- attr(x, "Labels")[res$ner]
+ }
+ clustering <- list(order = res$ner, height = res$ban[-1], ac = res$ac,
+ merge = res$merge, diss = if(keep.diss)disv,
+ call = cl., method = METHODS[meth])
+ if(exists("order.lab"))
+ clustering$order.lab <- order.lab
+ if(keep.data && !diss) {
+ if(mdata) x2[x2 == valmisdat] <- NA
+ clustering$data <- x2
+ }
+ class(clustering) <- c("agnes", "twins")
+ clustering
+}
+
+summary.agnes <- function(object, ...)
+{
+ class(object) <- "summary.agnes"
+ object
+}
+
+print.agnes <- function(x, ...)
+{
+ cat("Call: ", deparse(x$call),
+ "\nAgglomerative coefficient: ", format(x$ac, ...),
+ "\nOrder of objects:\n")
+ print(if(length(x$order.lab) != 0) x$order.lab else x$order,
+ quote = FALSE, ...)
+ cat("Height (summary):\n"); print(summary(x$height), ...)
+ cat("\nAvailable components:\n"); print(names(x), ...)
+ invisible(x)
+}
+
+print.summary.agnes <- function(x, ...)
+{
+ ## a bit more than print.agnes() ..
+ cat("Object of class 'agnes' from call:\n", deparse(x$call),
+ "\nAgglomerative coefficient: ", format(x$ac, ...),
+ "\nOrder of objects:\n")
+ print(if(length(x$order.lab) != 0) x$order.lab else x$order,
+ quote = FALSE, ...)
+ cat("Merge:\n"); print(x$merge, ...)
+ cat("Height:\n"); print(x$height, ...)
+ if(!is.null(x$diss)) { ## Dissimilarities:
+ cat("\n"); print(summary(x$diss, ...))
+ }
+ cat("\nAvailable components:\n"); print(names(x), ...)
+ invisible(x)
+}
+
+as.dendrogram.twins <- function(object, ...) ## ... : really only 'hang'
+ as.dendrogram(as.hclust(object), ...)
diff --git a/R/clara.q b/R/clara.q
new file mode 100644
index 0000000..c163973
--- /dev/null
+++ b/R/clara.q
@@ -0,0 +1,201 @@
+#### CLARA := Clustering LARge Applications
+####
+#### Note that the algorithm is O(n), but O(ns^2) where ns == sampsize
+
+clara <- function(x, k,
+ metric = c("euclidean", "manhattan", "jaccard"),
+ stand = FALSE,
+ samples = 5, sampsize = min(n, 40 + 2 * k), trace = 0,
+ medoids.x = TRUE, keep.data = medoids.x, rngR = FALSE,
+ pamLike = FALSE, correct.d = TRUE)
+{
+ ## check type of input matrix and values of input numbers
+ if(inherits(x, "dist"))# catch user error
+ stop("'x' is a \"dist\" object, but should be a data matrix or frame")
+ x <- data.matrix(x)
+ if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.")
+ n <- nrow(x)
+ if((k <- as.integer(k)) < 1 || k > n - 1)
+ stop("The number of cluster should be at least 1 and at most n-1." )
+ if((sampsize <- as.integer(sampsize)) < max(2,k+1))
+ stop(gettextf("'sampsize' should be at least %d = max(2, 1+ number of clusters)",
+ max(2,k+1)), domain=NA)
+ if(n < sampsize)
+ stop(gettextf("'sampsize' = %d should not be larger than the number of objects, %d",
+ sampsize, n), domain=NA)
+ if((samples <- as.integer(samples)) < 1)
+ stop("'samples' should be at least 1")
+
+ jp <- ncol(x)
+ namx <- dimnames(x)[[1]]
+ ## standardize, if necessary {careful not to copy unnecessarily}:
+ if(medoids.x) ## need to save original 'x'
+ ox <- x
+ else if(keep.data)
+ stop("when 'medoids.x' is FALSE, 'keep.data' must be too")
+ metric <- match.arg(metric)
+ if(stand)
+ x <- scale(x, scale = apply(x, 2, meanabsdev))
+ if(keep.data)
+ data <- x
+ ## put info about metric, size and NAs in arguments for the .C call
+
+ dFlag <- -1L # not used (in C code)
+ if((mdata <- any(inax <- is.na(x)))) { # TRUE if x[] has any NAs
+ jtmd <- integer(jp)
+ jtmd[apply(inax, 2L, any)] <- -1L
+ ## VALue for MISsing DATa
+ valmisdat <- 1.1* max(abs(range(x, na.rm=TRUE)))
+ x[inax] <- valmisdat
+ if(missing(correct.d))
+ warning("Distance computations with NAs: using correct instead of pre-2016 wrong formula.
+Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly
+to suppress this warning.")
+ else if(!is.finite(dFlag <- as.integer(correct.d)))
+ stop("invalid 'correct.d'")
+ } else rm(inax) # save space
+
+ res <- .C(cl_clara,
+ n,
+ jp,
+ k, ## 3
+ clu = as.double(x),
+ samples, # = nran
+ sampsize, # = nsam ## 6
+ dis = double(1 + (sampsize * (sampsize - 1))/2),
+ as.integer(mdata), # = mdata
+ valmd = if(mdata) rep(valmisdat, jp) else -1., ## 9
+ jtmd = if(mdata) jtmd else integer(1),
+ c("euclidean" = 1L, "manhattan" = 2L, "jaccard" = 3L)[[metric]],
+ # = diss_kind (DISS_KIND : ../src/cluster.h)
+ as.logical(rngR[1]), # = rng_R ## 12
+ as.logical(pamLike[1]), # = pam_like
+ as.integer(dFlag), # = d_flag
+ integer(sampsize), # = nrepr ## 15
+ integer(sampsize), # = nsel
+ sample= integer(sampsize),# = nbest
+ integer(k), # = ## 18
+ imed = integer(k), # = nrx
+ double(k), # = radus
+ double(k), # = ttd ## 21
+ double(k), # = ratt
+ avdis = double(k), # = ttbes
+ maxdis = double(k), # = rdbes ## 24
+ ratdis = double(k), # = rabes
+ size = integer(k), # = mtt
+ obj = double(1), ## 27
+ avsil = double(k),
+ ttsil = double(1),
+ silinf = matrix(0, sampsize, 4), ## 30
+ jstop = integer(1),
+ as.integer(trace), # = trace_lev
+ double (3 * sampsize), # = tmp ## 33
+ integer(6 * sampsize)) # = itmp
+ ## give a warning when errors occured
+ ## res[] components really used below:
+ ## jstop, clu, silinf, dis, sample, med, imed, obj, size, maxis, avdis, ratdis,
+ ## avsil, ttsil
+ if(res$jstop) {
+ if(mdata && any(aNA <- apply(inax,1, all))) {
+ i <- which(aNA)
+ nNA <- length(i)
+ pasteC <- function(...) paste(..., collapse= ",")
+ if(nNA < 13)
+ stop(sprintf(ngettext(nNA,
+ "Observation %s has *only* NAs --> omit it for clustering",
+ "Observations %s have *only* NAs --> omit them for clustering!"),
+ pasteC(i)), domain = NA)
+ else
+ stop(sprintf(ngettext(nNA,
+ "%d observation (%s) has *only* NAs --> omit them for clustering!",
+ "%d observations (%s ...) have *only* NAs --> omit them for clustering!"),
+ nNA, pasteC(i[1:12])), domain = NA)
+ } ## else
+ if(res$jstop == 1)
+ stop("Each of the random samples contains objects between which no distance can be computed.")
+ if(res$jstop == 2)
+ stop(gettextf("For each of the %d samples, at least one object was found which could not be assigned to a cluster (because of missing values).", samples))
+ ## else {cannot happen}
+ stop("invalid 'jstop' from .C(cl_clara,.): ", res$jstop)
+ }
+ ## 'res$clu' is still large; cut down ASAP
+ res$clu <- as.integer(res$clu[1:n])
+ sildim <- res$silinf[, 4]
+ ## adapt C output to S:
+ ## convert lower matrix, read by rows, to upper matrix, read by rows.
+ disv <- res$dis[-1]
+ disv[disv == -1] <- NA
+ disv <- disv[upper.to.lower.tri.inds(sampsize)]
+ class(disv) <- dissiCl
+ attr(disv, "Size") <- sampsize
+ attr(disv, "Metric") <- metric
+ attr(disv, "Labels") <- namx[res$sample]
+ res$med <- if(medoids.x) ox[res$imed, , drop = FALSE]
+ ## add labels to C output
+ if(!is.null(namx)) {
+ sildim <- namx[sildim]
+ res$sample <- namx[res$sample]
+ names(res$clu) <- namx
+ }
+ r <- list(sample = res$sample, medoids = res$med, i.med = res$imed,
+ clustering = res$clu, objective = res$obj,
+ clusinfo = cbind(size = res$size, "max_diss" = res$maxdis,
+ "av_diss" = res$avdis, isolation = res$ratdis),
+ diss = disv, call = match.call())
+ ## add dimnames to C output
+ if(k > 1) {
+ dimnames(res$silinf) <- list(sildim,
+ c("cluster", "neighbor", "sil_width", ""))
+ r$silinfo <- list(widths = res$silinf[, -4],
+ clus.avg.widths = res$avsil,
+ avg.width = res$ttsil)
+ }
+ if(keep.data) r$data <- data
+ class(r) <- c("clara", "partition")
+ r
+}
+
+print.clara <- function(x, ...)
+{
+ cat("Call: ", deparse(x$call),
+ "\nMedoids:\n"); print(x$medoids, ...)
+ cat("Objective function:\t ", format(x$objective, ...),"\n",
+ "Clustering vector: \t", sep=""); str(x$clustering, vec.len = 7)
+ cat("Cluster sizes: \t", x$clusinfo[,1],
+ "\nBest sample:\n"); print(x$sample, quote = FALSE, ...)
+ cat("\nAvailable components:\n"); print(names(x), ...)
+ invisible(x)
+}
+
+summary.clara <- function(object, ...)
+{
+ class(object) <- "summary.clara"
+ object
+}
+
+print.summary.clara <- function(x, ...)
+{
+ cat("Object of class 'clara' from call:\n", deparse(x$call),
+ "\nMedoids:\n"); print(x$medoids, ...)
+ cat("Objective function:\t ", format(x$objective, ...),
+ "\nNumerical information per cluster:\n")
+ print(x$clusinfo, ...)
+ if(has.sil <- !is.null(x$silinfo)) {
+ cat("Average silhouette width per cluster:\n")
+ print(x$silinfo[[2]], ...)
+ cat("Average silhouette width of best sample:",
+ format(x$silinfo[[3]], ...), "\n")
+ }
+ cat("\nBest sample:\n"); print(x$sample, quote = FALSE, ...)
+ cat("Clustering vector:\n"); print(x$clustering, ...)
+ if(has.sil) {
+ cat("\nSilhouette plot information for best sample:\n")
+ print(x$silinfo[[1]], ...)
+ }
+ if(!is.null(x$diss)) { ## Dissimilarities:
+ cat("\n"); print(summary(x$diss, ...))
+ }
+ cat("\nAvailable components:\n"); print(names(x), ...)
+ invisible(x)
+}
+
diff --git a/R/clusGap.R b/R/clusGap.R
new file mode 100644
index 0000000..38631c7
--- /dev/null
+++ b/R/clusGap.R
@@ -0,0 +1,174 @@
+#### Originally from orphaned package SLmisc
+#### (Version: 1.4.1, 2007-04-12, Maintainer: Matthias Kohl <kohl@sirs-lab.com>)
+#### License: GPL (version 2 or later)
+####
+#### which said
+#### "function corresponds to function gap in package SAGx"
+
+## MM: SAGx is now in Bioconductor --- 1.10.1{devel} or 1.11.1{release}
+## had gap() *corrected* to re-cluster using FUNcluster --> see ./gap-SAGx.R.~orig~
+##
+## MM: Package 'lga' -- has gap() and lga and robust lga [-> UBC]
+## - it uses boot() nicely [2012-01: ORPHANED because Justin Harrington is amiss]
+## MM: renamed arguments, and changed almost everything
+
+clusGap <- function (x, FUNcluster, K.max, B = 100, d.power = 1,
+ spaceH0 = c("scaledPCA", "original"),
+ verbose = interactive(), ...)
+{
+ stopifnot(is.function(FUNcluster), length(dim(x)) == 2, K.max >= 2,
+ (n <- nrow(x)) >= 1, ncol(x) >= 1)
+ if(B != (B. <- as.integer(B)) || (B <- B.) <= 0)
+ stop("'B' has to be a positive integer")
+ cl. <- match.call()
+
+ if(is.data.frame(x))
+ x <- as.matrix(x)
+ ii <- seq_len(n)
+ W.k <- function(X, kk) {
+ clus <- if(kk > 1) FUNcluster(X, kk, ...)$cluster else rep.int(1L, nrow(X))
+ ## ---------- = = -------- kmeans() has 'cluster'; pam() 'clustering'
+ 0.5* sum(vapply(split(ii, clus),
+ function(I) { xs <- X[I,, drop=FALSE]
+ sum(dist(xs)^d.power/nrow(xs)) }, 0.))
+ }
+ logW <- E.logW <- SE.sim <- numeric(K.max)
+ if(verbose) cat("Clustering k = 1,2,..., K.max (= ",K.max,"): .. ", sep='')
+ for(k in 1:K.max)
+ logW[k] <- log(W.k(x, k))
+ if(verbose) cat("done\n")
+
+ spaceH0 <- match.arg(spaceH0)
+ ## Scale 'x' into hypercube -- later fill with H0-generated data
+ xs <- scale(x, center=TRUE, scale=FALSE)
+ m.x <- rep(attr(xs,"scaled:center"), each = n) # for back-trafo later
+ switch(spaceH0,
+ "scaledPCA" =
+ {
+ ## (These & (xs,m.x) above basically do stats:::prcomp.default()
+ V.sx <- svd(xs, nu=0)$v
+ xs <- xs %*% V.sx # = transformed(x)
+ },
+ "original" = {}, # (do nothing, use 'xs')
+ ## otherwise
+ stop("invalid 'spaceH0':", spaceH0))
+
+ rng.x1 <- apply(xs, 2L, range)
+ logWks <- matrix(0, B, K.max)
+ if(verbose) cat("Bootstrapping, b = 1,2,..., B (= ", B,
+ ") [one \".\" per sample]:\n", sep="")
+ for (b in 1:B) {
+ ## Generate "H0"-data as "parametric bootstrap sample" :
+ z1 <- apply(rng.x1, 2,
+ function(M, nn) runif(nn, min=M[1], max=M[2]),
+ nn=n)
+ z <- switch(spaceH0,
+ "scaledPCA" = tcrossprod(z1, V.sx), # back transformed
+ "original" = z1
+ ) + m.x
+ for(k in 1:K.max) {
+ logWks[b,k] <- log(W.k(z, k))
+ }
+ if(verbose) cat(".", if(b %% 50 == 0) paste(b,"\n"))
+ }
+ if(verbose && (B %% 50 != 0)) cat("",B,"\n")
+ E.logW <- colMeans(logWks)
+ SE.sim <- sqrt((1 + 1/B) * apply(logWks, 2, var))
+ structure(class = "clusGap",
+ list(Tab = cbind(logW, E.logW, gap = E.logW - logW, SE.sim),
+ ## K.max == nrow(T)
+ call = cl., spaceH0=spaceH0,
+ n = n, B = B, FUNcluster=FUNcluster))
+}
+
+## lga/R/gap.R --- has for Tibshirani et al (2001):
+ ## ElogWks[k,] <- c(mean(BootOutput), sqrt(var(BootOutput)*(1+1/B)))
+ ## GAP[k] <- ElogWks[k,1] - logWks[k]
+ ## if (k > 1)
+ ## if(GAP[k-1] >= GAP[k]-ElogWks[k,2] & !doall)
+ ## finished <- TRUE
+## so they effectively only look for the *first* (local) maximum which ..
+## MM: <==> diff(GAP) = GAP[k] - GAP[k-1] <= +SE.sim[k]
+
+
+## criteria.DandF() -- Dudoit and Fridlyand (2002)
+## ---------------- looks at the *global* maximum and then to the left..
+ ## y <- x$data
+ ## crit <- diff(y[which.max(y[,"Gap"]), c("Sks", "Gap")])
+ ## nclust <- min(which(y[,"Gap"] > crit))
+ ## return(ifelse(nclust == nrow(y), NA, nclust))
+
+maxSE <- function(f, SE.f,
+ method = c("firstSEmax", "Tibs2001SEmax",
+ "globalSEmax", "firstmax", "globalmax"),
+ SE.factor = 1)
+{
+ method <- match.arg(method)
+ stopifnot((K <- length(f)) >= 1, K == length(SE.f), SE.f >= 0, SE.factor >= 0)
+ fSE <- SE.factor * SE.f
+ switch(method,
+ "firstmax" = { ## the first local maximum (== firstSEmax with SE.factor == 0)
+ decr <- diff(f) <= 0 # length K-1
+ if(any(decr)) which.max(decr) else K # the first TRUE, or K
+ },
+ "globalmax" = {
+ which.max(f)
+ },
+ "Tibs2001SEmax" = { ## The one Tibshirani et al (2001) proposed:
+ ## "the smallest k such that f(k) >= f(k+1) - s_{k+1}"
+ g.s <- f - fSE
+ if(any(mp <- f[-K] >= g.s[-1])) which.max(mp) else K
+ },
+ "firstSEmax" = { ## M.Maechler(2012): rather ..
+ ## look at the first *local* maximum and then to the left ..:
+ decr <- diff(f) <= 0 # length K-1
+ nc <- if(any(decr)) which.max(decr) else K # the first TRUE, or K
+ if(any(mp <- f[seq_len(nc - 1)] >= f[nc] - fSE[nc]))
+ which(mp)[1]
+ else nc
+ },
+ "globalSEmax" = { ## Dudoit and Fridlyand (2002) *thought* Tibshirani proposed..
+ ## in 'lga', see criteria.DandF():
+ ## looks at the *global* maximum and then to the left..
+ nc <- which.max(f)
+ if(any(mp <- f[seq_len(nc - 1)] >= f[nc] - fSE[nc]))
+ which(mp)[1]
+ else nc
+ })
+}
+
+print.clusGap <- function(x, method="firstSEmax", SE.factor = 1, ...)
+{
+ method <- match.arg(method, choices = eval(formals(maxSE)$method))
+ stopifnot((K <- nrow(T <- x$Tab)) >= 1, SE.factor >= 0)
+ cat("Clustering Gap statistic [\"clusGap\"] from call:\n", deparse(x$call),
+ sprintf("\nB=%d simulated reference sets, k = 1..%d; spaceH0=\"%s\"\n",
+ x$B, K, x$spaceH0), sep="")
+ nc <- maxSE(f = T[,"gap"], SE.f = T[,"SE.sim"],
+ method=method, SE.factor=SE.factor)
+ cat(sprintf(" --> Number of clusters (method '%s'%s): %d\n",
+ method, if(grepl("SE", method))
+ sprintf(", SE.factor=%g",SE.factor) else "", nc))
+ print(T, ...)
+ invisible(x)
+}
+
+plot.clusGap <- function(x, type="b", xlab = "k", ylab = expression(Gap[k]),
+ main = NULL,
+ do.arrows = TRUE,
+ arrowArgs = list(col="red3", length=1/16, angle=90, code=3),
+ ...)
+{
+ stopifnot(is.matrix(Tab <- x$Tab), is.numeric(Tab))
+ K <- nrow(Tab)
+ k <- seq_len(K) # == 1,2,... k
+ if(is.null(main))
+ main <- paste(strwrap(deparse(x$call, 150)[1], width = 60, exdent = 7),
+ collapse="\n")
+ gap <- Tab[, "gap"]
+ plot(k, gap, type=type, xlab=xlab, ylab=ylab, main=main, ...)
+ if(do.arrows)
+ do.call(arrows,
+ c(list(k, gap+ Tab[, "SE.sim"], k, gap- Tab[, "SE.sim"]), arrowArgs))
+ invisible()
+}
diff --git a/R/clusGapGen.R b/R/clusGapGen.R
new file mode 100644
index 0000000..aeb93cd
--- /dev/null
+++ b/R/clusGapGen.R
@@ -0,0 +1,141 @@
+### From Master Thesis of Emmanuel Profumo (w/ M.Maechler) Autumn 2016--March 2017
+### "Generalized clusGap()" : We cannot be 100% compatible to clusGap()
+
+#' @param x the data, can be a data frame or a matrix
+#' @param algo, a clustering algorithm function taking the prepared data and
+#' a number of clusters as arguments
+#' @param index, a function taking a clustering vector and the prepared data
+#' which returns the value of a validity index. Index can also be a list of such
+#' functions to obtain results for different indices.
+#' For coherence with the originally proposed Gap Statistic in Tibshirani et al's
+#' a LOWER value of the validity index implies a better clustering quality, so
+#' indices such as average silhouette width should be added a minus sign.
+#' This can be changed by setting the argument low=FALSE.
+#' @param modelH0, a function which takes as argument at least the data x,
+#' parameters estimated from the data, and further arguments in ...
+#' @param K.max, number of different clusters for which the index should be
+#' evaluated.
+#' @param B, the number of bootstraps sample.
+#' @param transformData, a function which takes the data x as argument and
+#' processed it for clustering
+#' @param modelH0Param, a function which takes as argument the data x and returns
+#' a list of modelH0 parameters with matching names.
+#' @param low, logical, if FALSE a HIGHER value of the index or the indices in the
+#' user provided list implies a better clustering quality
+#'
+#' @return if index is just one function a list with components:
+#' Indks, the bootstrap validity plots
+#' Ind, the validity plot corresponding to the data
+#' E.Ind, the sample mean of the bootstrap validity plots
+#' gap, the calibrated validity plot, difference between E.Ind and Ind
+#' gapHen, the gap divided by the standard deviation of the bootstraps validity plots
+#' SE.sim, the sample standard error of the bootstrap validity plots with a correction
+#' term for bootstrap estimation
+#' SE, the sample standard error
+#' if index is a list of index then each of the components above are lists with values
+#' for each index
+clusGapGen <- function(x, algo, index, modelH0, K.max, B = 100,
+ transformData = identity,
+ modelH0Param = function(y) list(),
+ low=TRUE, verbose = interactive(), ...)
+{
+ ind.isList <- is.list(index)
+ if (is.function(index))
+ index <- list(index)
+ else if (!ind.isList || !all(vapply(index, is.function, NA)))
+ stop("index has to be a function or a list of function")
+
+ Ind <- E.Ind <- SE.sim <- SE <- index
+ for (i in seq_along(index)) Ind[[i]] <- E.Ind[[i]] <- SE.sim[[i]] <- numeric(K.max)
+
+ if(verbose) cat("Clustering k = 1,2,..., K.max (= ",K.max,"): .. ", sep='')
+ xt <- transformData(x)
+ for(k in 1:K.max){
+ cls <- algo(xt,k)
+ for (i in seq_along(index))
+ Ind[[i]][k] <- index[[i]](cls,xt)
+ }
+ if(verbose) cat("done\n")
+ Indks <- index
+ for (i in seq_along(index)) Indks[[i]] <- matrix(0, B, K.max)
+
+ param <- modelH0Param(x)
+ if(verbose) cat("Bootstrapping, b = 1,2,..., B (= ", B,
+ ") [one \".\" per sample]:\n", sep="")
+ for (b in 1:B) {
+ z <- do.call(modelH0,c(list(x=x),param,list(...)))
+ zt <- transformData(z)
+ for(k in 1:K.max) {
+ cls <- algo(zt,k)
+ for (i in seq_along(index))
+ Indks[[i]][b,k] <- index[[i]](cls,zt)
+ }
+ if(verbose) cat(".", if(b %% 50 == 0) paste(b,"\n"))
+ }
+ if(verbose && (B %% 50 != 0)) cat("",B,"\n")
+ gap <- gapHen <- index
+ for (i in seq_along(index)){
+ E.Ind[[i]] <- colMeans(Indks[[i]])
+ var.i <- apply(Indks[[i]], 2, var)
+ SE[[i]] <- sqrt(var.i)
+ SE.sim[[i]] <- sqrt((1 + 1/B) * var.i)
+ gap[[i]] <- gap.i <- E.Ind[[i]] - Ind[[i]]
+ gapHen[[i]] <- gap.i/SE[[i]]
+ if (!low) {
+ gap[[i]] <- -gap[[i]]
+ gapHen[[i]] <- -gapHen[[i]]
+ }
+ }
+ ## TODO: really? make distinction of *list* of indices vs 1 index?
+ ## --- well maybe, keep it: *The* usual case = _one_ index (or not?)
+ if (ind.isList) {
+ list(Indks=Indks,Ind=Ind, E.Ind=E.Ind, gap = gap,
+ gapHen = gapHen ,SE.sim=SE.sim,SE=SE)
+ }
+ else {list(Indks=Indks[[1]],Ind=Ind[[1]], E.Ind=E.Ind[[1]], gap = gap[[1]],
+ gapHen = gapHen[[1]] ,SE.sim=SE.sim[[1]],SE=SE[[1]])
+ }
+}
+
+
+#' @param clusGapRes, a list returned by a call of function clusGapGen
+#' @param main, the main title to the plots
+#' @param divBySd, logical, if TRUE plot for the standardize version of the gap
+
+clusGapGen.plot <- function(clusGapRes,divBySd=FALSE,main=""){
+
+
+ if (!is.list(clusGapRes$Ind)) clusGapRes <- lapply(clusGapRes,
+ function(el) list(el))
+ for (i in seq_along(clusGapRes$Ind)){
+ B <- nrow(clusGapRes$Indks[[i]])
+ K.max <- ncol(clusGapRes$Indks[[i]])
+ std <- t(replicate(B,rep(1,K.max)))
+ ylm <- range(rbind(clusGapRes$Indks[[i]],clusGapRes$Ind[[i]]),na.rm=TRUE)
+ ylb <- names(clusGapRes$Ind[i])
+ if (is.null(ylb)) ylb <- paste("Index",as.character(i))
+ gp <- "gap"
+ namegap <- paste(gp,ylb)
+ if (divBySd) {
+ std <- t(replicate(B,clusGapRes$SE.sim[[i]]))
+ gp <- "gapHen"
+ namegap <- paste(gp,ylb)
+ }
+ matplot(replicate(B,1:K.max),t(clusGapRes$Indks[[i]]),
+ pch = "-",xlab = "k",ylab = ylb,
+ type="l",ylim=ylm,main=main
+ )
+
+ lines(1:K.max,clusGapRes$E.Ind[[i]],type="l",col="white",lwd=2)
+ lines(1:K.max,clusGapRes$Ind[[i]],lwd=2)
+
+ boxplot((clusGapRes$Indks[[i]]-t(replicate(B,clusGapRes$Ind[[i]])))/std,
+ pch = "*",xlab = "k", ylab = namegap,type="l",col=c("light blue"),
+ notch=TRUE, border="grey",main=main
+ )
+
+ lines(1:K.max,clusGapRes[[gp]][[i]],type="l",xlab="k",ylab="",
+ col="orangered",lwd=1.5)
+
+ }
+}
diff --git a/R/coef.R b/R/coef.R
new file mode 100644
index 0000000..8f840dc
--- /dev/null
+++ b/R/coef.R
@@ -0,0 +1,43 @@
+#### R-interface to Agglomerative / Divisive coefficient
+####
+coef.twins <- function(object, ...)
+{
+ if(inherits(object, "agnes"))
+ object$ac
+ else if(inherits(object, "diana"))
+ object$dc
+ else
+ stop("invalid 'twins' object")
+}
+
+coef.hclust <- function(object, ...)
+{
+ ## Author: Martin Maechler, Date: 27 Nov 2004
+ ## Now "really" using $merge _and_ $height -- assuming they match!
+ ht <- object$height
+ mrg <- object$merge
+ nh <- length(ht)
+ stopifnot(nh > 0, is.matrix(mrg), dim(mrg) == c(nh,2),
+ is.numeric(ht), is.numeric(mrg),
+ !is.unsorted(ht))# then they match with merge
+ ## stopifnot(all.equal(1:n, sort(-mrg[mrg < 0])))
+
+ 1 - sum(rowSums(mrg < 0) * ht) / max(ht) / (nh+1)
+}
+
+
+## Note this is (the only!) direct interface to bncoef(),
+## ---- which is used internally both in agnes() and diana() :
+coefHier <- function(object)
+{
+ ## Purpose: Compute agglomerative *or* divisive coefficient from hclust/agnes/diana
+ ## ----------------------------------------------------------------------
+ ## Author: Martin Maechler, Date: 27 Nov 2004
+ nh <- length(ht <- object$height)
+ stopifnot(nh > 0, is.numeric(ht))
+ .C(R_bncoef,
+ n = as.integer(nh + 1L),
+ ban= as.double(c(0., ht)),# <-- is this really tbe ban[]nner, as in ../src/twins.c ?
+ cf = double(1))$cf
+}
+
diff --git a/R/daisy.q b/R/daisy.q
new file mode 100644
index 0000000..9c470cf
--- /dev/null
+++ b/R/daisy.q
@@ -0,0 +1,212 @@
+
+daisy <- function(x, metric = c("euclidean", "manhattan", "gower"),
+ stand = FALSE, type = list(), weights = rep.int(1, p),
+ warnBin = warnType, warnAsym = warnType, warnConst = warnType,
+ warnType = TRUE)
+{
+ ## check type of input matrix
+ if(length(dx <- dim(x)) != 2 || !(is.data.frame(x) || is.numeric(x)))
+ stop("x is not a dataframe or a numeric matrix.")
+ n <- dx[1]# nrow
+ p <- dx[2]# ncol
+ varnms <- dimnames(x)[[2]]
+ pColl <- function(n) paste(n, collapse = ", ")
+ if(length(type)) {
+ if(!is.list(type) || is.null(ntyp <- names(type)) || any(ntyp == ""))
+ stop(gettextf("invalid %s; must be named list", sQuote("type")))
+ ## check each component to be valid column names or numbers:
+ for(nt in ntyp) {
+ cvec <- type[[nt]]
+ ct <- paste0("type$", nt)
+ if(is.character(cvec)) {
+ if(!is.null(varnms) && !all(cvec %in% varnms))
+ stop(gettextf("%s has invalid column names", ct))
+ }
+ else if(is.numeric(cvec)) {
+ if(!all(1 <= cvec & cvec <= p))
+ stop(gettextf("%s must be in 1:ncol(x)", ct))
+ }
+ else stop(gettextf("%s must contain column names or numbers", ct))
+ }
+ tA <- type$asymm
+ tS <- type$symm
+ if(!is.null(tA) || !is.null(tS)) {
+ ## tA and tS might be character and integer!
+ d.bin <- cbind(as.data.frame(x[, tA, drop= FALSE]),
+ x[, tS, drop= FALSE])
+ lenB <- sapply(lapply(d.bin, function(y)
+ levels(as.factor(y))), length)
+ if(any(lenB > 2))
+ stop("at least one binary variable has more than 2 levels.")
+ if(any(lenB < 2))
+ warning("at least one binary variable has not 2 different levels.")
+ ## Convert factors to integer, such that ("0","1") --> (0,1):
+ if(any(is.f <- sapply(d.bin, is.factor)))
+ d.bin[is.f] <- lapply(d.bin[is.f],
+ function(f) as.integer(as.character(f)))
+ if(!all(sapply(d.bin, function(y)
+ is.logical(y) ||
+ all(sort(unique(as.numeric(y[!is.na(y)])))%in% 0:1))))
+ stop("at least one binary variable has values not in {0,1,NA}")
+ }
+ }
+ ## transform variables and construct 'type' vector
+ if(is.data.frame(x)) {
+ type2 <- sapply(x, data.class)
+ x <- data.matrix(x)
+ } else { ## matrix
+ type2 <- rep("numeric", p)
+ names(type2) <- colnames(x)
+ }
+ if(length(type)) {
+ tT <- type$ ordratio
+ tL <- type$ logratio
+ x[, names(type2[tT])] <- unclass(as.ordered(x[, names(type2[tT])]))
+ x[, names(type2[tL])] <- log10( x[, names(type2[tL])])
+ type2[tA] <- "A"
+ type2[tS] <- "S"
+ type2[tT] <- "T" # was "O" (till 2000-12-14) accidentally !
+ }
+ type2[tI <- type2 %in% c("numeric", "integer") ] <- "I"
+ if(warnBin && n > 9 && any(tI) &&
+ any(iBin <- apply(x[, tI, drop = FALSE], 2,
+ function(v) length(table(v)) == 2)))
+ warning(gettextf("binary variable(s) %s treated as interval scaled",
+ pColl(which(tI)[iBin])))
+
+ type2[type2 == "ordered"] <- "O"
+ type2[type2 == "factor"] <- "N"
+ if(any(ilog <- type2 == "logical")) {
+ if(warnAsym) warning(sprintf(ngettext(sum(ilog),
+ "setting 'logical' variable %s to type 'asymm'",
+ "setting 'logical' variables %s to type 'asymm'"),
+ pColl(which(ilog))), domain = NA)
+ type2[ilog] <- "A"
+ }
+ ## Note: We have 2 status codings: ndyst = (0,1,2) and jdat = (1,2);
+ ## the latter is superfluous in principle
+
+ ## standardize, if necessary
+ all.I <- all(type2 == "I")
+ if(all.I && { metric <- match.arg(metric); metric != "gower" }) {
+ if(stand) {
+ x <- scale(x, center = TRUE, scale = FALSE) #-> 0-means
+ sx <- colMeans(abs(x), na.rm = TRUE)# can still have NA's
+ if(0 %in% sx) {
+ if(warnConst) warning(gettextf(
+ "%s has constant columns %s; these are standardized to 0",
+ sQuote("x"), pColl(which(sx == 0))))
+ sx[sx == 0] <- 1
+ }
+ x <- scale(x, center = FALSE, scale = sx)
+ }
+ jdat <- 2L
+ ndyst <- if(metric == "manhattan") 2L else 1L # == diss_kind
+ }
+ else { ## mixed case or explicit "gower"
+ if(!missing(metric) && metric != "gower" && !all.I)
+ warning("with mixed variables, metric \"gower\" is used automatically")
+ ## FIXME: think of a robust alternative scaling to
+ ## Gower's (x - min(x)) / (max(x) - min(x))
+ colR <- apply(x, 2, range, na.rm = TRUE)
+ colmin <- colR[1,]
+ sx <- colR[2,] - colmin
+ if(any(sx == 0))
+ sx[sx == 0] <- 1
+ x <- scale(x, center = colmin, scale = sx)
+ jdat <- 1L
+ ndyst <- 0L ## diss_kind = "mixed | gower"
+ ## weights only used in this "gower" case
+ if(length(weights) == 1)
+ weights <- rep.int(weights, p)
+ else if(length(weights) != p)
+ stop("'weights' must be of length p (or 1)")
+ }
+
+ ## type2 <- paste(type2, collapse = "")
+ typeCodes <- c('A','S','N','O','I','T')
+ ## 1 2 3 4 5 6 --> passed to Fortran below
+ type3 <- match(type2, typeCodes)# integer
+ if(any(ina <- is.na(type3)))
+ stop(gettextf("invalid type %s for column numbers %s",
+ type2[ina], pColl(which(ina))))
+ if((mdata <- any(inax <- is.na(x)))) { # TRUE if x[] has any NAs
+ jtmd <- integer(p)
+ jtmd[apply(inax, 2L, any)] <- -1L
+ ## VALue for MISsing DATa
+ valmisdat <- 1.1* max(abs(range(x, na.rm=TRUE)))
+ x[inax] <- valmisdat
+ }
+ ## call Fortran routine
+ storage.mode(x) <- "double"
+ disv <- .Fortran(cl_daisy, ## -> ../src/daisy.f
+ n,
+ p,
+ x,
+ if(mdata) rep(valmisdat, p) else double(1),
+ as.double(weights),
+ if(mdata) jtmd else integer(1),
+ jdat,
+ type3, # vtype
+ ndyst,
+ as.integer(mdata),
+ dis = double((n * (n - 1))/2),
+ NAOK = TRUE# only to allow "+- Inf"
+ )$dis
+ ## adapt Fortran output to S:
+ ## convert lower matrix, read by rows, to upper matrix, read by rows.
+ disv[disv == -1] <- NA
+ full <- matrix(0, n, n)
+ full[!lower.tri(full, diag = TRUE)] <- disv
+ disv <- t(full)[lower.tri(full)]
+ ## give warning if some dissimilarities are missimg
+ if(anyNA(disv)) attr(disv, "NA.message") <-
+ "NA-values in the dissimilarity matrix !"
+ ## construct S object -- "dist" methods are *there* !
+ class(disv) <- dissiCl # see ./0aaa.R
+ attr(disv, "Labels") <- dimnames(x)[[1]]
+ attr(disv, "Size") <- n
+ attr(disv, "Metric") <- if(!ndyst) "mixed" else metric
+ if(!ndyst) attr(disv, "Types") <- typeCodes[type3]
+ disv
+}
+
+print.dissimilarity <-
+ function(x, diag = NULL, upper = NULL,
+ digits = getOption("digits"), justify = "none", right = TRUE, ...)
+{
+ cat("Dissimilarities :\n")
+ NextMethod("print")##-> stats:::print.dist(..)
+ cat("\n")
+ if(!is.null(attr(x, "na.message")))
+ cat("Warning : ", attr(x, "NA.message"), "\n")
+ cat("Metric : ", attr(x, "Metric"),
+ if(!is.null(aT <- attr(x,"Types")))
+ paste("; Types =", paste(aT, collapse=", ")), "\n")
+ cat("Number of objects : ", attr(x, "Size"), "\n", sep="")
+ invisible(x)
+}
+
+summary.dissimilarity <-
+ function(object, digits = max(3, getOption("digits") - 2), ...)
+ ## 'digits': want a bit higher precision
+{
+ sx <- summary(as.vector(object), digits = digits, ...)
+ at <- attributes(object)
+ r <- c(list(summ = sx, n = length(object)), at[names(at) != "class"])
+ class(r) <- "summary.dissimilarity"
+ r
+}
+
+print.summary.dissimilarity <- function(x, ...)
+{
+ cat(x$n, "dissimilarities, summarized :\n")
+ print(x$summ, ...)
+ cat("Metric : ", x $ Metric,
+ if(!is.null(aT <- x $ Types))
+ paste("; Types =", paste(aT, collapse=", ")), "\n")
+ cat("Number of objects : ", x $ Size, "\n", sep="")
+ if(!is.null(x $ na.message))
+ cat("Warning : ", x $ NA.message, "\n")
+ invisible(x)
+}
diff --git a/R/diana.q b/R/diana.q
new file mode 100644
index 0000000..806b328
--- /dev/null
+++ b/R/diana.q
@@ -0,0 +1,144 @@
+### $Id: diana.q 7237 2016-06-23 00:42:33Z maechler $
+
+diana <- function(x, diss = inherits(x, "dist"),
+ metric = "euclidean", stand = FALSE,
+ stop.at.k = FALSE,
+ keep.diss = n < 100, keep.data = !diss, trace.lev = 0)
+{
+ if((diss <- as.logical(diss))) {
+ ## check type of input vector
+ if(anyNA(x)) stop("NA values in the dissimilarity matrix not allowed.")
+ if(data.class(x) != "dissimilarity") { # try to convert to
+ if(!is.null(dim(x))) {
+ x <- as.dist(x) # or give an error
+ } else {
+ ## possibly convert input *vector*
+ if(!is.numeric(x) || is.na(n <- sizeDiss(x)))
+ stop("'x' is not and cannot be converted to class \"dissimilarity\"")
+ attr(x, "Size") <- n
+ }
+ class(x) <- dissiCl
+ if(is.null(attr(x,"Metric"))) attr(x, "Metric") <- "unspecified"
+ }
+ n <- as.integer(attr(x, "Size"))
+ dv <- x[lower.to.upper.tri.inds(n)]
+ ## prepare arguments for the Fortran call
+ dv <- c(0., dv)# double
+ jp <- 1L
+ mdata <- FALSE
+ ndyst <- 0
+ x2 <- double(1)
+ }
+ else {
+ ## check input matrix and standardize, if necessary
+ x <- data.matrix(x)
+ if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.")
+ x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x
+ ndyst <- if(metric == "manhattan") 2 else 1
+ n <- nrow(x2)
+ jp <- ncol(x2)
+ if((mdata <- any(inax <- is.na(x2)))) { # TRUE if x[] has any NAs
+ jtmd <- integer(jp)
+ jtmd[apply(inax, 2L, any)] <- -1L
+ ## VALue for MISsing DATa
+ valmisdat <- 1.1* max(abs(range(x2, na.rm=TRUE)))
+ x2[inax] <- valmisdat
+ }
+ dv <- double(1 + (n * (n - 1))/2)
+ }
+ stopifnot(length(trace.lev <- as.integer(trace.lev)) == 1)
+ stopifnot(is.logical(stop.at.k) ||
+ (is.numeric(stop.at.k) && 1 <= stop.at.k && stop.at.k <= n))
+ C.keep.diss <- keep.diss && !diss
+ res <- .C(twins,
+ n,
+ jp,
+ as.double(x2),
+ dv,
+ dis = double(if(C.keep.diss) length(dv) else 1),
+ jdyss = if(C.keep.diss) diss + 10L else as.integer(diss),
+ if(mdata) rep(valmisdat, jp) else double(1),
+ if(mdata) jtmd else integer(jp),
+ as.integer(ndyst),
+ 2L,# jalg = 2 <==> DIANA
+ as.integer(stop.at.k),# 'method'; default = 0L : do *not* stop early
+ integer(n),
+ ner = integer(n),
+ ban = double(n),
+ dc = double(1),
+ double(1), # { unused for diana() }
+ merge = matrix(0L, n - 1, 2), # integer
+ trace = trace.lev)
+ if(!diss) {
+ ## give warning if some dissimilarities are missing.
+ if(res$jdyss == -1)
+ stop("No clustering performed, NA's in dissimilarity matrix.\n")
+ if(keep.diss) {
+ ## adapt Fortran output to S:
+ ## convert lower matrix, read by rows, to upper matrix, read by rows.
+ disv <- res$dis[-1]
+ disv[disv == -1] <- NA
+ disv <- disv[upper.to.lower.tri.inds(n)]
+ class(disv) <- dissiCl
+ attr(disv, "Size") <- nrow(x)
+ attr(disv, "Metric") <- metric
+ attr(disv, "Labels") <- dimnames(x)[[1]]
+ }
+ ## add labels to Fortran output
+ if(length(dimnames(x)[[1]]) != 0)
+ order.lab <- dimnames(x)[[1]][res$ner]
+ }
+ else {
+ if(keep.diss) disv <- x
+ ## add labels to Fortran output
+ if(length(attr(x, "Labels")) != 0)
+ order.lab <- attr(x, "Labels")[res$ner]
+ }
+ clustering <- list(order = res$ner, height = res$ban[-1], dc = res$dc,
+ merge = res$merge, diss = if(keep.diss)disv,
+ call = match.call())
+ if(exists("order.lab"))
+ clustering$order.lab <- order.lab
+ if(keep.data && !diss) {
+ if(mdata) x2[x2 == valmisdat] <- NA
+ clustering$data <- x2
+ }
+ class(clustering) <- c("diana", "twins")
+ clustering
+}
+
+print.diana <- function(x, ...)
+{
+ cat("Merge:\n")
+ print(x$merge, ...)
+ cat("Order of objects:\n")
+ print(if (length(x$order.lab) != 0) x$order.lab else x$order,
+ quote = FALSE, ...)
+ cat("Height:\n")
+ print(x$height, ...)
+ cat("Divisive coefficient:\n")
+ print(x$dc, ...)
+ cat("\nAvailable components:\n")
+ print(names(x), ...)
+ invisible(x)
+}
+
+summary.diana <- function(object, ...)
+{
+ class(object) <- "summary.diana"
+ object
+}
+
+print.summary.diana <- function(x, ...)
+{
+ cat("Merge:\n"); print(x$merge, ...)
+ cat("Order of objects:\n")
+ print(if(length(x$order.lab)) x$order.lab else x$order, quote = FALSE, ...)
+ cat("Height:\n"); print(x$height, ...)
+ cat("Divisive coefficient:\n"); print(x$dc, ...)
+ if(!is.null(x$diss)) { ## Dissimilarities:
+ cat("\n"); print(summary(x$diss, ...))
+ }
+ cat("\nAvailable components:\n"); print(names(x), ...)
+ invisible(x)
+}
diff --git a/R/ellipsoidhull.R b/R/ellipsoidhull.R
new file mode 100644
index 0000000..9bc46c6
--- /dev/null
+++ b/R/ellipsoidhull.R
@@ -0,0 +1,129 @@
+#### ellipsoidhull : Find (and optionally draw)
+#### ----------- the smallest ellipsoid containining a set of points
+####
+#### Just making the algorithms in clusplot() available more generally
+#### ( --> ./plotpart.q )
+
+### Author: Martin Maechler, Date: 21 Jan 2002, 15:41
+
+ellipsoidhull <-
+ function(x, tol = 0.01, maxit = 5000,
+ ret.wt = FALSE, ret.sqdist = FALSE, ret.pr = FALSE)
+{
+ if(!is.matrix(x) || !is.numeric(x))
+ stop("'x' must be numeric n x p matrix")
+ if(anyNA(x)) {
+ warning("omitting NAs")
+ x <- na.omit(x)
+ }
+ n <- nrow(x)
+ if(n == 0) stop("no points without missing values")
+ p <- ncol(x)
+
+ res <- .C(spannel,
+ n,
+ ndep= p,
+ dat = cbind(1., x),
+ sqdist = double(n),
+ l1 = double((p+1) ^ 2),
+ double(p),
+ double(p),
+ prob = double(n),
+ double(p+1),
+ eps = as.double(tol),
+ maxit = as.integer(maxit),
+ ierr = integer(1))# 0 or non-zero
+ if(res$ierr != 0)
+ cat("Error in Fortran routine computing the spanning ellipsoid,",
+ "\n probably collinear data\n", sep="")
+ if(any(res$prob < 0) || all(res$prob == 0))
+ stop("computed some negative or all 0 probabilities")
+ conv <- res$maxit < maxit
+ if(!conv)
+ warning(gettextf("algorithm possibly not converged in %d iterations", maxit))
+ conv <- conv && res$ierr == 0
+
+ cov <- cov.wt(x, res$prob)
+ ## cov.wt() in R has extra wt[] scaling; revert here
+ res <- list(loc = cov$center,
+ cov = cov$cov * (1 - sum(cov$wt^2)),
+ d2 = weighted.mean(res$sqdist, res$prob),
+ wt = if(ret.wt) cov$wt,
+ sqdist = if(ret.sqdist) res$sqdist,
+ prob= if(ret.pr) res$prob,
+ tol = tol,
+ eps = max(res$sqdist) - p,
+ it = res$maxit,
+ maxit= maxit,
+ ierr = res$ierr,
+ conv = conv)
+ class(res) <- "ellipsoid"
+ res
+}
+
+
+print.ellipsoid <- function(x, digits = max(1, getOption("digits") - 2), ...)
+{
+ d <- length(x$loc)
+ cat("'ellipsoid' in", d, "dimensions:\n center = (",
+ format(x$loc, digits=digits),
+ "); squared ave.radius d^2 = ", format(x$d2, digits=digits),
+ "\n and shape matrix =\n")
+ print(x$cov, digits = digits, ...)
+ Vx <- volume(x)
+ chV <- if(!is.finite(Vx))
+ paste0("exp(", format(volume(x, log=TRUE), digits=digits),")")
+ else
+ format(Vx, digits=digits)
+ cat(" hence,", if(d==2) "area" else "volume", " = ", chV, "\n")
+ if(!is.null(x$conv) && !x$conv) {
+ cat("\n** Warning: ** the algorithm did not terminate reliably!\n ",
+ if(x$ierr) "most probably because of collinear data"
+ else "(in the available number of iterations)", "\n")
+ }
+ invisible(x)
+}
+
+volume <- function(object, ...) UseMethod("volume")
+
+if(FALSE) ## correct only for dimension d = 2 -- was used up to May 2019 :
+volume.ellipsoid <- function(object) {
+ A <- object$cov
+ pi * object$d2 * sqrt(det(A))
+}
+
+## modified MM from a proposal by Keefe Murphy, e-mail 2019-05-15
+volume.ellipsoid <- function(object, log=FALSE, ...) {
+ stopifnot((p <- length(object$loc)) >= 1)
+ lDet2 <- as.numeric(determinant(object$cov)$modulus) / 2 # = log(sqrt(det(.)))
+ lV <- p/2 * log(pi * object$d2) + lDet2 - lgamma(p/2 + 1)
+ if(log) lV else exp(lV)
+}
+
+
+## For p = 2 :
+## Return (x[i],y[i]) points, i = 1:n, on boundary of ellipse, given
+## by 2 x 2 matrix A[], origin 'loc' and d(xy, loc) ^2 = 'd2'
+ellipsoidPoints <- function(A, d2, loc, n.half = 201)
+{
+ if(length(d <- dim(A)) != 2 || (p <- d[1]) != d[2])
+ stop("'A' must be p x p cov-matrix defining an ellipsoid")
+ if(p == 2) {
+ detA <- A[1, 1] * A[2, 2] - A[1, 2]^2
+ yl2 <- A[2, 2] * d2 # = (y_max - y_loc)^2
+ y <- seq( - sqrt(yl2), sqrt(yl2), length = n.half)
+ sqrt.discr <- sqrt(detA * pmax(0, yl2 - y^2))/A[2, 2]
+ sqrt.discr[c(1, n.half)] <- 0
+ b <- loc[1] + A[1, 2]/A[2, 2] * y
+ y <- loc[2] + y
+ return(rbind(cbind( b - sqrt.discr, y),
+ cbind(rev(b + sqrt.discr), rev(y))))
+ } else { ## p >= 3
+ detA <- det(A)
+ ##-- need something like polar coordinates
+ stop("ellipsoidPoints() not yet implemented for p >= 3 dim.")
+ }
+}
+
+predict.ellipsoid <- function(object, n.out = 201, ...)
+ ellipsoidPoints(object$cov, d2 = object$d2, loc= object$loc, n.half = n.out)
diff --git a/R/fanny.q b/R/fanny.q
new file mode 100644
index 0000000..ba3fff2
--- /dev/null
+++ b/R/fanny.q
@@ -0,0 +1,241 @@
+#### $Id: fanny.q 6953 2015-06-18 09:30:24Z maechler $
+fanny <- function(x, k, diss = inherits(x, "dist"), memb.exp = 2,
+ metric = c("euclidean", "manhattan", "SqEuclidean"),
+ stand = FALSE, iniMem.p = NULL, cluster.only = FALSE,
+ keep.diss = !diss && !cluster.only && n < 100,
+ keep.data = !diss && !cluster.only,
+ maxit = 500, tol = 1e-15, trace.lev = 0)
+{
+ if((diss <- as.logical(diss))) {
+ ## check type of input vector
+ if(anyNA(x)) stop("NA values in the dissimilarity matrix not allowed.")
+ if(data.class(x) != "dissimilarity") { # try to convert to
+ if(!is.null(dim(x))) {
+ x <- as.dist(x) # or give an error
+ } else {
+ ## possibly convert input *vector*
+ if(!is.numeric(x) || is.na(n <- sizeDiss(x)))
+ stop("'x' is not and cannot be converted to class \"dissimilarity\"")
+ attr(x, "Size") <- n
+ }
+ class(x) <- dissiCl
+ if(is.null(attr(x,"Metric"))) attr(x, "Metric") <- "unspecified"
+ }
+ ## prepare arguments for the Fortran call
+ n <- attr(x, "Size")
+ dv <- as.double(c(x, 0))# add extra one
+ jp <- 1
+ mdata <- FALSE
+ ndyst <- 0L
+ x2 <- double(n)
+ jdyss <- 1
+ }
+ else {
+ ## check input matrix and standardize, if necessary
+ x <- data.matrix(x)
+ if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.")
+ x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x
+ metric <- match.arg(metric)
+ ## put info about metric, size and NAs in arguments for the Fortran call
+ ndyst <- which(metric == eval(formals()$metric))# 1, 2, or 3
+ n <- nrow(x2)
+ jp <- ncol(x2)
+ if((mdata <- any(inax <- is.na(x2)))) { # TRUE if x[] has any NAs
+ jtmd <- as.integer(ifelse(apply(inax, 2, any), -1, 1))
+ ## VALue for MISsing DATa
+ valmisdat <- 1.1* max(abs(range(x2, na.rm=TRUE)))
+ x2[inax] <- valmisdat
+ }
+ dv <- double(1 + (n * (n - 1))/2)
+ jdyss <- 0
+ }
+ if((k <- as.integer(k)) < 1 || k > n%/%2 - 1)
+ stop("'k' (number of clusters) must be in {1,2, .., n/2 -1}")
+ if(length(memb.exp) != 1 || (memb.exp <- as.double(memb.exp)) < 1
+ || memb.exp == Inf)
+ stop("'memb.exp' must be a finite number > 1")
+ if((maxit <- as.integer(maxit)[1]) < 0)
+ stop("'maxit' must be non-negative integer")
+ computeP <- is.null(iniMem.p) # default: determine initial membership in C
+ if(computeP)# default: determine initial membership in C
+ iniMem.p <- matrix(0., n, k)# all 0 -> will be used as 'code'
+ else {
+ dm <- dim(iniMem.p)
+ if(length(dm) !=2 || !all(dm == c(n,k)) ||
+ !is.numeric(iniMem.p) || any(iniMem.p < 0) ||
+ !isTRUE(all.equal(unname(rowSums(iniMem.p)), rep(1, n))))
+ stop("'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1")
+ if(!is.double(iniMem.p)) storage.mode(iniMem.p) <- "double"
+ }
+ stopifnot(length(cluster.only) == 1)
+ stopifnot(length(trace.lev) == 1)
+
+ ## call Fortran routine
+ storage.mode(x2) <- "double"
+ res <- .C(cl_fanny,
+ as.integer(n),
+ as.integer(jp),
+ k,
+ x2,
+ dis = dv,
+ ok = as.integer(jdyss),
+ if(mdata) rep(valmisdat, jp) else double(1),
+ if(mdata) jtmd else integer(jp),
+ ndyst,
+ integer(n), # nsend
+ integer(n), # nelem
+ integer(n), # negbr
+ double(n), # syl
+ p = iniMem.p,
+ dp = matrix(0., n, k),# < must all be 0 on entry!
+ avsil = double(k),# 'pt'
+ integer(k), # nfuzz
+ double(k), # esp
+ double(k), # ef
+ double(n), # dvec
+ ttsil = as.double(0),
+ obj = as.double(c(cluster.only, trace.lev, computeP, 0)),# in & out!
+ clu = integer(n),
+ silinf = if(cluster.only) 0. else matrix(0., n, 4),
+ memb.exp = memb.exp,# = 'r'
+ tol = as.double(tol),
+ maxit = maxit)
+
+ if(!(converged <- res$maxit > 0)) {
+ warning(gettextf(
+ "FANNY algorithm has not converged in 'maxit' = %d iterations",
+ maxit))
+ }
+
+ if(!cluster.only) sildim <- res$silinf[, 4]
+ if(diss) {
+ if(keep.diss) disv <- x
+ labs <- attr(x, "Labels")
+ }
+ else {
+ ## give warning if some dissimilarities are missing.
+ if(res$ok == -1)
+ stop("No clustering performed, NA-values in the dissimilarity matrix.")
+ labs <- dimnames(x)[[1]]
+ if(keep.diss) {
+ disv <- res$dis[ - (1 + (n * (n - 1))/2)] # drop the extra one
+ disv[disv == -1] <- NA
+ class(disv) <- dissiCl
+ attr(disv, "Size") <- nrow(x)
+ attr(disv, "Metric") <- metric
+ attr(disv, "Labels") <- labs
+ }
+ }
+ ## add labels, dimnames, etc to Fortran output:
+ if(length(labs) != 0) {
+ if(!cluster.only) sildim <- labs[sildim]
+ dimnames(res$p) <- list(labs, NULL)
+ names(res$clu) <- labs
+ }
+ coeff <- if(memb.exp == 2) res$obj[3:4] else {
+ ## usual partition coefficient with " ^ 2 " :
+ cf <- sum(res$p ^ 2) / n
+ c(cf, (k * cf - 1)/(k - 1))
+ }
+ names(coeff) <- c("dunn_coeff", "normalized")
+ if(abs(coeff["normalized"]) < 1e-7)
+ warning("the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?")
+ k.crisp <- res$obj[1]
+ res$obj <- c("objective" = res$obj[2])
+
+ r <- list(membership = res$p, coeff = coeff, memb.exp = memb.exp,
+ clustering = res$clu, k.crisp = k.crisp,
+ # 'obj*': also containing iterations for back compatibility:
+ objective = c(res$obj, "tolerance" = res$tol),
+ convergence = c(iterations = res$maxit, converged = converged, maxit = maxit),
+ diss = if(keep.diss) disv,
+ call = match.call())
+ if(k != 1 && !cluster.only) {
+ dimnames(res$silinf) <- list(sildim,
+ c("cluster", "neighbor", "sil_width", ""))
+ r$silinfo <- list(widths = res$silinf[, -4],
+ clus.avg.widths = res$avsil[1:k],
+ avg.width = res$ttsil)
+ }
+ if(keep.data && !diss) {
+ if(mdata) x2[x2 == valmisdat] <- NA
+ r$data <- x2
+ }
+ class(r) <- c("fanny", "partition")
+ r
+}
+
+## non-exported:
+.print.fanny <- function(x, digits = getOption("digits"), ...) {
+ cat("Fuzzy Clustering object of class 'fanny' :")
+ print(formatC(cbind(" " = c("m.ship.expon." = x$memb.exp,
+ x$objective[c("objective", "tolerance")],
+ x$convergence, "n" = nrow(x$membership))),
+ digits = digits),
+ quote = FALSE, ...)
+ k <- ncol(x$membership)
+ cat("Membership coefficients (in %, rounded):\n"); print(round(100 * x$membership), ...)
+ cat("Fuzzyness coefficients:\n"); print(x$coeff, digits = digits, ...)
+ cat("Closest hard clustering:\n"); print(x$clustering, ...)
+ if(x$k.crisp < k)
+ cat(sprintf("k_crisp (= %d) < k !!\n", x$k.crisp))
+}
+
+print.fanny <- function(x, digits = getOption("digits"), ...)
+{
+ .print.fanny(x, digits = digits, ...)
+ cat("\nAvailable components:\n")
+ print(names(x), ...)
+ invisible(x)
+}
+
+summary.fanny <- function(object, ...)
+{
+ class(object) <- "summary.fanny"
+ object
+}
+
+print.summary.fanny <- function(x, digits = getOption("digits"), ...)
+{
+ .print.fanny(x, digits = digits, ...)
+ if(length(x$silinfo) != 0) {
+ cat("\nSilhouette plot information:\n")
+ print(x$silinfo[[1]], ...)
+ cat("Average silhouette width per cluster:\n")
+ print(x$silinfo[[2]], ...)
+ cat("Average silhouette width of total data set:\n")
+ print(x$silinfo[[3]], ...)
+ }
+ if(!is.null(x$diss)) { ## Dissimilarities:
+ cat("\n"); print(summary(x$diss, ...))
+ }
+ cat("\nAvailable components:\n"); print(names(x), ...)
+ invisible(x)
+}
+
+## FIXME: Export and document these! -----------------------
+
+## Convert crisp clustering vector to fuzzy membership matrix
+as.membership <- function(clustering, keep.names = TRUE) {
+ stopifnot(is.numeric(clustering), clustering == round(clustering))
+ n <- length(clustering)
+ k <- length(u <- sort(unique(clustering)))
+ r <- matrix(0L, n, k)
+ if(k == 0 || n == 0) return(r)
+ if(keep.names)
+ dimnames(r) <- list(names(clustering), NULL)
+ if(any(u != 1:k)) clustering <- match(clustering, u)
+ r[cbind(1:n, clustering)] <- 1L
+ r
+}
+
+## "Generalized Inverse" transformation:
+## Convert fuzzy membership matrix to closest crisp clustering vector
+toCrisp <- function(m)
+{
+ dm <- dim(m)
+ if(length(dm) != 2 || !is.numeric(m) || any(m < 0) ||
+ !isTRUE(all.equal(unname(rowSums(m)), rep(1, dm[1]))))
+ stop("'m', a membership matrix, must be nonnegative with rowSums == 1")
+ apply(m, 1, which.max)
+}
diff --git a/R/internal.R b/R/internal.R
new file mode 100644
index 0000000..40398e3
--- /dev/null
+++ b/R/internal.R
@@ -0,0 +1,39 @@
+#### Cluster - Internal Utilities
+#### ============================ (new by Martin Maechler)
+
+## This was size(); seems slightly useful in general
+sizeDiss <- function(d)
+{
+ ## find 'n' for d == dissimilarity-like(<n obs.>), i.e. length(d)= n(n-1)/2
+ discr <- 1 + 8 * length(d)
+ sqrtdiscr <- round(sqrt(discr))
+ if(sqrtdiscr^2 == discr) (1 + sqrtdiscr)/2 else NA
+}
+
+##' Return indices to *permute* "dissimilarity" / "dist" entries for C (ex-Fortran) code setup
+##'
+##' Currently always used as:
+##' n <- attr(x, "Size")
+##' dv <- x[lower.to.upper.tri.inds(n)]
+##' -->> FIXME: eventually do the above directly in C
+##' @param n "Size" = number of objects, underlying the dist/dissimilarity
+##' used in ./agnes.q, ./clara.q, ./diana.q und ./pam.q :
+##' *somewhat* related to Matrix:::indTri()
+lower.to.upper.tri.inds <- function(n)
+{
+ n1 <- as.integer(n - 1)
+ if(n1 < 1) stop("'n' must be >= 2")
+ else if(n1 == 1) 1L
+ else rep(seq_len(n1), seq_len(n1)) +
+ c(0L, unlist(lapply(2:n1, function(k) cumsum(c(0L, (n - 2L):(n - k))))))
+}
+
+upper.to.lower.tri.inds <- function(n)
+{
+ if((n2 <- as.integer(n - 2L)) < 0) stop("'n' must be >= 2")
+ rep(1L + cumsum(0:n2), (n - 1):1) +
+ unlist(lapply(0:n2, function(k) cumsum(k:n2)))
+}
+
+
+meanabsdev <- function(y) mean(abs(y - mean(y, na.rm = TRUE)), na.rm = TRUE)
diff --git a/R/mona.q b/R/mona.q
new file mode 100644
index 0000000..3f83a9f
--- /dev/null
+++ b/R/mona.q
@@ -0,0 +1,111 @@
+
+mona <- function(x, trace.lev = 0)
+{
+ ## check type of input matrix
+ if(!(iM <- is.matrix(x)) && !is.data.frame(x))
+ stop("x must be a matrix or data frame.")
+ if(!all(vapply(lapply(as.data.frame(x),
+ function(y) levels(as.factor(y))),
+ length, 1) == 2))
+ stop("All variables must be binary (e.g., a factor with 2 levels, both present).")
+ n <- nrow(x)
+ p <- ncol(x)
+ if(p < 2)
+ stop("mona() needs at least p >= 2 variables (in current implementation)")
+ dnx <- dimnames(x)
+ ## Change levels of input matrix to {0,1, NA=2}:
+ iF <- function(.) as.integer(as.factor(.))
+ x <- (if(iM) apply(x, 2, iF) else vapply(x, iF, integer(n))) - 1L
+ hasNA <- anyNA(x)
+ if(hasNA) x[is.na(x)] <- 2L
+## was
+## x <- apply(as.matrix(x), 2, factor)
+## x[x == "1"] <- "0"
+## x[x == "2"] <- "1"
+## x[is.na(x)] <- "2"
+## storage.mode(x) <- "integer"
+
+ ## call Fortran routine
+ res <- .Fortran(cl_mona,
+ as.integer(n),
+ as.integer(p),
+ x = x,
+ error = as.integer(trace.lev),
+ nban = integer(n),
+ ner = integer(n),
+ integer(n),
+ lava = integer(n), # => variable numbers in every step; 0: no variable
+ integer(p))
+
+ ## stop with a message when two many missing values:
+ if(res$error != 0) {
+ ## NB: Need "full simple strings below, to keep it translatable":
+ switch(res$error
+ ## 1 :
+ , stop("No clustering performed, an object was found with all values missing.")
+ ## 2 :
+ , stop("No clustering performed, found variable with more than half values missing.")
+ ## 3 : never triggers because of binary check above
+ , stop("No clustering performed, a variable was found with all non missing values identical.")
+ ## 4 :
+ , stop("No clustering performed, all variables have at least one missing value.")
+ ## 5: -- cannot trigger here: already handled above
+ , stop("mona() needs at least p >= 2 variables (in current implementation)")
+ )
+ }
+ ##O res$x <- matrix(as.numeric(substring(res$x,
+ ##O 1:nchar(res$x), 1:nchar(res$x))),
+ ##O n, p)
+ ## storage.mode(res$x) <- "integer" # keeping dim()
+ dimnames(res$x) <- dnx
+ ## add labels to Fortran output
+ if(length(dnx[[2]]) != 0) {
+ lava <- as.character(res$lava)
+ lava[lava != "0"] <- dnx[[2]][res$lava]
+ lava[lava == "0"] <- "NULL"
+ res$lava <- lava
+ }
+ ## construct "mona" object
+ structure(class = "mona",
+ list(data = res$x, hasNA = hasNA, order = res$ner,
+ variable = res$lava[-1], step = res$nban[-1],
+ order.lab = if(length(dnx[[1]]) != 0) dnx[[1]][res$ner],
+ call = match.call()))
+}
+
+print.mona <- function(x, ...)
+{
+ ## FIXME: 1) Printing this is non-sense in the case where the data is unchanged
+ ## 2) If it was changed, mona(), i.e. 'x' here should contain the info!
+ d <- dim(x$data) # TODO: maybe *not* keep 'data', but keep 'dim'
+ cat("mona(x, ..) fit; x of dimension ", d[1],"x",d[2],"\n", sep="")
+ if(x$hasNA) {
+ cat("Because of NA's, revised data:\n")
+ print(x$data, quote = FALSE, ...)
+ }
+ cat("Order of objects:\n")
+ print(if (length(x$order.lab) != 0) x$order.lab else x$order,
+ quote = FALSE, ...)
+ cat("Variable used:\n")
+ print(x$variable, quote = FALSE, ...)
+ cat("Separation step:\n")
+ print(x$step, ...)
+ cat("\nAvailable components:\n")
+ print(names(x), ...)
+ invisible(x)
+}
+
+## FIXME: print(summary(.)) should differ from print()
+
+summary.mona <- function(object, ...)
+{
+ class(object) <- "summary.mona"
+ object
+}
+
+print.summary.mona <- function(x, ...)
+{
+ print.mona(x, ...)
+ invisible(x)
+}
+
diff --git a/R/pam.q b/R/pam.q
new file mode 100644
index 0000000..83b3b41
--- /dev/null
+++ b/R/pam.q
@@ -0,0 +1,207 @@
+#### PAM : Partitioning Around Medoids
+#### --- $Id: pam.q 7509 2018-03-29 14:44:04Z maechler $
+pam <- function(x, k, diss = inherits(x, "dist"),
+ metric = c("euclidean", "manhattan"), ## FIXME: add "jaccard"
+ medoids = NULL,
+ stand = FALSE, cluster.only = FALSE, do.swap = TRUE,
+ keep.diss = !diss && !cluster.only && n < 100,
+ keep.data = !diss && !cluster.only,
+ pamonce = FALSE, trace.lev = 0)
+{
+ stopifnot(length(cluster.only) == 1, length(trace.lev) == 1)
+ nMax <- 65536 # 2^16 (as 1+ n(n-1)/2 must be < max_int = 2^31-1)
+ if((diss <- as.logical(diss))) {
+ ## check type of input vector
+ if(anyNA(x)) stop("NA values in the dissimilarity matrix not allowed.")
+ if(data.class(x) != "dissimilarity") { # try to convert to
+ if(!is.null(dim(x))) {
+ x <- as.dist(x) # or give an error
+ } else {
+ ## possibly convert input *vector*
+ if(!is.numeric(x) || is.na(n <- sizeDiss(x)))
+ stop("'x' is not and cannot be converted to class \"dissimilarity\"")
+ attr(x, "Size") <- n
+ }
+ class(x) <- dissiCl
+ if(is.null(attr(x,"Metric"))) attr(x, "Metric") <- "unspecified"
+ }
+ if(keep.data) stop("Cannot keep data when 'x' is a dissimilarity!")
+ ## adapt S dissimilarities to Fortran:
+ ## convert upper matrix, read by rows, to lower matrix, read by rows.
+ n <- attr(x, "Size")
+ if(n > nMax)
+ stop(gettextf("have %d observations, but not more than %d are allowed",
+ n, nMax))
+ dv <- x[lower.to.upper.tri.inds(n)]
+ ## prepare arguments for the Fortran call
+ dv <- c(0, dv) ## <- internally needed {FIXME! memory hog!}
+ storage.mode(dv) <- "double"
+ jp <- 1
+ mdata <- FALSE
+ ndyst <- 0
+ }
+ else {
+ ## check input matrix and standardize, if necessary
+ x <- data.matrix(x)# dropping "automatic rownames" compatibly with daisy()
+ if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.")
+ x2 <- x ; dimnames(x2) <- NULL
+ n <- nrow(x2)
+ if(n > nMax)
+ stop(gettextf("have %d observations, but not more than %d are allowed",
+ n, nMax))
+ if(stand) x2 <- scale(x2, scale = apply(x2, 2, meanabsdev))
+ ## put info about metric, size and NAs in arguments for the Fortran call
+ metric <- match.arg(metric)
+ ndyst <- c("euclidean" = 1L, "manhattan" = 2L)[[metric]]
+ jp <- ncol(x2)
+ if((mdata <- any(inax <- is.na(x2)))) { # TRUE if x[] has any NAs
+ jtmd <- integer(jp)
+ jtmd[apply(inax, 2L, any)] <- -1L
+ ## VALue for MISsing DATa
+ valmisdat <- 1.1* max(abs(range(x2, na.rm=TRUE)))
+ x2[inax] <- valmisdat
+ }
+ storage.mode(x2) <- "double"
+ }
+ if((k <- as.integer(k)) < 1 || k >= n)
+ stop("Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2")
+ if(!is.null(medoids)) { # non-default: check provided medoids
+ ## 'fixme': consider sort(medoids) {and rely on it in ../src/pam.c }
+ if(!is.integer(medoids))
+ medoids <- as.integer(medoids)
+ if(length(medoids) != k || any(medoids < 1L) || any(medoids > n) ||
+ any(duplicated(medoids)))
+ stop(gettextf(
+ "'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d",
+ k, n))
+ ## use observation numbers 'medoids' as starting medoids for 'swap' only
+ }
+ nisol <- integer(if(cluster.only) 1 else k)
+ if(do.swap) nisol[1] <- 1L
+
+ res <- .Call(cl_Pam, k, n,
+ !diss, # == do_diss: compute d[i,j] them from x2[] and allocate in C
+ if(diss) dv else x2,
+ !cluster.only, ## == all_stats == "old" obj[1+ 0] == 0
+ medoids,
+ do.swap, trace.lev, keep.diss, pamonce,
+ ## only needed if(!diss) [ <=> if(do_diss) ] :
+ if(mdata) rep(valmisdat, jp) else double(1), # valmd
+ if(mdata) jtmd else integer(jp), # jtmd
+ ndyst) # dist_kind
+
+ ## Error if have NA's in diss:
+ if(!diss && is.integer(res))
+ stop("No clustering performed, NAs in the computed dissimilarity matrix.")
+
+ xLab <- if(diss) attr(x, "Labels") else dimnames(x)[[1]]
+ r.clu <- res$clu
+ if(length(xLab) > 0)
+ names(r.clu) <- xLab
+
+ if(cluster.only)
+ return(r.clu)
+
+ ## Else, usually
+ medID <- res$med
+ if(any(medID <= 0))
+ stop("error from .C(cl_pam, *): invalid medID's")
+ sildim <- res$silinf[, 4]
+ if(diss) {
+ ## add labels to Fortran output
+ r.med <- if(length(xLab) > 0) {
+ sildim <- xLab[sildim]
+ xLab[medID]
+ } else medID
+ }
+ else {
+ if(keep.diss) {
+ ## adapt Fortran output to S:
+ ## convert lower matrix, read by rows, to upper matrix, read by rows.
+ disv <- res$dys[-1]
+ disv[disv == -1] <- NA
+ disv <- disv[upper.to.lower.tri.inds(n)]
+ class(disv) <- dissiCl
+ attr(disv, "Size") <- nrow(x)
+ attr(disv, "Metric") <- metric
+ attr(disv, "Labels") <- dimnames(x)[[1]]
+ }
+ ## add labels to Fortran output
+ r.med <- x[medID, , drop=FALSE]
+ if(length(xLab) > 0)
+ sildim <- xLab[sildim]
+ }
+ ## add names & dimnames to Fortran output
+ r.obj <- structure(res$obj, .Names = c("build", "swap"))
+ r.isol <- factor(res$isol, levels = 0:2, labels = c("no", "L", "L*"))
+ names(r.isol) <- 1:k
+ r.clusinf <- res$clusinf
+ dimnames(r.clusinf) <- list(NULL, c("size", "max_diss", "av_diss",
+ "diameter", "separation"))
+ ## construct S object
+ r <-
+ list(medoids = r.med, id.med = medID, clustering = r.clu,
+ objective = r.obj, isolation = r.isol,
+ clusinfo = r.clusinf,
+ silinfo = if(k != 1) {
+ silinf <- res$silinf[, -4, drop=FALSE]
+ dimnames(silinf) <-
+ list(sildim, c("cluster", "neighbor", "sil_width"))
+ list(widths = silinf,
+ clus.avg.widths = res$avsil[1:k],
+ avg.width = res$ttsil)
+ },
+ diss = if(keep.diss) { if(diss) x else disv },
+ call = match.call())
+ if(keep.data) { ## have !diss
+ if(mdata) x2[x2 == valmisdat] <- NA
+ r$data <- structure(x2, dimnames = dimnames(x))
+ }
+ class(r) <- c("pam", "partition")
+ r
+}
+
+## non-exported:
+.print.pam <- function(x, ...) {
+ cat("Medoids:\n"); print(cbind(ID = x$id.med, x$medoids), ...)
+ cat("Clustering vector:\n"); print(x$clustering, ...)
+ cat("Objective function:\n"); print(x$objective, ...)
+}
+
+print.pam <- function(x, ...)
+{
+ .print.pam(x, ...)
+ cat("\nAvailable components:\n")
+ print(names(x), ...)
+ invisible(x)
+}
+
+summary.pam <- function(object, ...)
+{
+ class(object) <- "summary.pam"
+ object
+}
+
+print.summary.pam <- function(x, ...)
+{
+ .print.pam(x, ...)
+ cat("\nNumerical information per cluster:\n"); print(x$clusinfo, ...)
+ cat("\nIsolated clusters:\n L-clusters: ")
+ print(names(x$isolation[x$isolation == "L"]), quote = FALSE, ...)
+ cat(" L*-clusters: ")
+ print(names(x$isolation[x$isolation == "L*"]), quote = FALSE, ...)
+ if(length(x$silinfo) != 0) {
+ cat("\nSilhouette plot information:\n")
+ print(x$silinfo[[1]], ...)
+ cat("Average silhouette width per cluster:\n")
+ print(x$silinfo[[2]], ...)
+ cat("Average silhouette width of total data set:\n")
+ print(x$silinfo[[3]], ...)
+ }
+ if(!is.null(x$diss)) { ## Dissimilarities:
+ cat("\n"); print(summary(x$diss, ...))
+ }
+ cat("\nAvailable components:\n"); print(names(x), ...)
+ invisible(x)
+}
+
diff --git a/R/plothier.q b/R/plothier.q
new file mode 100644
index 0000000..e410df5
--- /dev/null
+++ b/R/plothier.q
@@ -0,0 +1,216 @@
+### $Id: plothier.q 6800 2014-09-04 08:29:53Z maechler $
+
+pltree <- function(x, ...) UseMethod("pltree")
+
+## note: pltree() can have an 'xlab' in "..." (plot.hclust has an explicit one)
+pltree.twins <- function(x, main = paste("Dendrogram of ", deparse(x$call)),
+ labels = NULL, ylab = "Height", ...)
+{
+
+ plot(as.hclust(x), labels = labels,
+##- if(is.null(labels) && length(x$order.lab) != 0)
+##- labels <- x$order.lab[sort.list(x$order)]
+##-
+##- ## calling plot.hclust() via generic :
+##- plot(structure(list(merge = x$merge, order = x$order,
+##- height = sort(x$height), labels = labels,
+##- call = x$call, method = x$method),
+##- class = "hclust"),
+ main = main, ylab = ylab, ...)
+}
+
+bannerplot <-
+function(x, w = rev(x$height), fromLeft = TRUE, main=NULL, sub=NULL,
+ xlab = "Height", adj = 0, col = c(2, 0), border = 0,
+ axes = TRUE, frame.plot = axes, rev.xax = !fromLeft, xax.pretty = TRUE,
+ labels = NULL, nmax.lab = 35, max.strlen = 5,
+ yax.do = axes && length(x$order) <= nmax.lab,
+ yaxRight = fromLeft, y.mar = 2.4 + max.strlen / 2.5, ...)
+{
+ m <- max(w)
+ if(axes) {
+ if(xax.pretty) {
+ at.vals <- if(!is.logical(xax.pretty))
+ pretty(c(0,w), n = xax.pretty) else pretty(c(0,w))
+ n <- length(at.vals <- at.vals[at.vals <= m])
+ if(at.vals[n] * 1.01 < m) {
+ lab.vals <- c(at.vals, signif(m, 3))
+ at.vals <- c(at.vals, m)
+ } else lab.vals <- at.vals
+ } else { # old default for plot.agnes() and plot.diana()
+ ss <- seq(0, floor(m), length = 11)# => intervals = 1/10 {total}
+ at.vals <- c(ss, m)
+ lab.vals <- round(at.vals, 2)
+ }
+ }
+ if(fromLeft) {
+ w <- rbind(w, m - w)
+ if(missing(col)) col <- rev(col)
+ } else { ## from Right
+ w <- rbind(m - w, w)
+ if(axes && rev.xax) {
+ at.vals <- m - rev(at.vals)## == c(0, ss + m - floor(m))
+ lab.vals <- rev(lab.vals)
+ }
+ }
+ if(yax.do) {
+ ax <- if(yaxRight)
+ list(side = 4, pos = m)
+ else list(side = 2, pos = 0)
+ if((pm <- par("mar"))[ax$side] < y.mar) {
+ ## need space besides y axis for labeling
+ pm[ax$side] <- y.mar
+ op <- par(mar = pm)
+ on.exit(par(op))
+ }
+ }
+ barplot(w, xlab = xlab, horiz = TRUE, space = 0, axes = FALSE,
+ col = col, border = border, mgp = c(2.5, 1, 0), ...)
+ if(frame.plot && (border == 0 || border == par("bg")))
+ rect(0, 0, m, ncol(w))
+
+ title(main = main, sub = sub, adj = adj)
+ if(axes) {
+ axis(1, at = at.vals, labels = lab.vals, ...)
+ if(yax.do) {
+ if(is.null(labels))
+ labels <- rev(if (length(x$order.lab) != 0)
+ substring(x$order.lab, 1,max.strlen) else x$order)
+ axis(ax$side, at = 0:(length(x$order) - 1), las = 1,
+ labels = labels, pos = ax$pos, mgp = c(3, 1.25, 0), ...)
+ }
+ }
+ invisible()
+}
+
+## plot.diana() [further down] & plot.agnes() are almost identical;
+## -- made bannerplot() a stand-alone function
+## --> maybe *merge* these two into one plot.twins()
+plot.agnes <-
+function(x, ask = FALSE, which.plots = NULL, main = NULL,
+ sub = paste("Agglomerative Coefficient = ", round(x$ac, digits = 2)),
+ adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, ...)
+{
+ if(is.null(main)) {
+ cl <- paste(strwrap(deparse(x$call, 150)[1], width = 60, exdent = 7),
+ collapse="\n")
+ ## Different default for banner & pltree:
+ main1 <- paste("Banner of ", cl)
+ main2 <- paste("Dendrogram of ", cl)
+ }
+ else { # same title for both
+ main1 <- main2 <- main
+ }
+
+ if(is.null(which.plots) && !ask)
+ which.plots <- 1:2
+ if(ask && is.null(which.plots)) { ## Use 'menu' ..
+ tmenu <- paste("plot ", ## choices :
+ c("All", "Banner", "Clustering Tree"))
+ do.all <- FALSE
+ repeat {
+ if(!do.all)
+ pick <- menu(tmenu, title =
+ "\nMake a plot selection (or 0 to exit):\n") + 1
+ switch(pick,
+ return(invisible()), # 0 -> exit loop
+ do.all <- TRUE,# 1 : All
+ bannerplot(x, fromLeft = TRUE,
+ main = main1, sub = sub, adj = adj,
+ xax.pretty = 10,
+ nmax.lab= nmax.lab, max.strlen= max.strlen, ...),
+ pltree (x, main = main2, sub = sub, ...) # 3
+ )
+ if(do.all) { pick <- pick + 1; do.all <- pick <= length(tmenu) + 1}
+ }
+ }
+ else {
+ ask <- prod(par("mfcol")) < length(which.plots) && dev.interactive()
+ if(ask) {
+ op <- par(ask = TRUE)
+ on.exit(par(op))
+ }
+ for(i in which.plots)
+ switch(i,
+ bannerplot(x, fromLeft = TRUE,
+ main = main1, sub = sub, adj = adj,
+ xax.pretty = 10,
+ nmax.lab = nmax.lab, max.strlen = max.strlen, ...),
+ pltree (x, main = main2, sub = sub, ...)
+ )
+ }
+ invisible()
+}
+
+plot.diana <-
+function(x, ask = FALSE, which.plots = NULL, main = NULL,
+ sub = paste("Divisive Coefficient = ", round(x$dc, digits = 2)),
+ adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, ...)
+{
+ if(is.null(main)) {
+ cl <- paste(strwrap(deparse(x$call, 150)[1], width = 60, exdent = 7),
+ collapse="\n")
+ ## Different default for banner & pltree:
+ main1 <- paste("Banner of ", cl)
+ main2 <- paste("Dendrogram of ", cl)
+ }
+ else { # same title for both
+ main1 <- main2 <- main
+ }
+
+ if(is.null(which.plots) && !ask)
+ which.plots <- 1:2
+ if(ask && is.null(which.plots)) { ## Use 'menu' ..
+ tmenu <- paste("plot ", ## choices :
+ c("All", "Banner", "Clustering Tree"))
+ do.all <- FALSE
+ repeat {
+ if(!do.all)
+ pick <- menu(tmenu, title =
+ "\nMake a plot selection (or 0 to exit):\n") + 1
+ switch(pick,
+ return(invisible()), # 0 -> exit loop
+ do.all <- TRUE,# 1 : All
+ bannerplot(x, fromLeft = FALSE,
+ main = main1, sub = sub, adj = adj,
+ xax.pretty = 10,
+ nmax.lab= nmax.lab, max.strlen= max.strlen, ...),
+ pltree (x, main = main2, sub = sub, ...)
+ )
+ if(do.all) { pick <- pick + 1; do.all <- pick <= length(tmenu) + 1}
+ }
+ }
+ else {
+ ask <- prod(par("mfcol")) < length(which.plots) && dev.interactive()
+ if(ask) {
+ op <- par(ask = TRUE)
+ on.exit(par(op))
+ }
+ for(i in which.plots)
+ switch(i,
+ bannerplot(x, fromLeft = FALSE, main = main1, sub = sub,
+ adj = adj, xax.pretty = 10,
+ nmax.lab = nmax.lab, max.strlen = max.strlen, ...),# 1
+ pltree (x, main = main2, sub = sub, ...) # i = 2
+ )
+ }
+ invisible()
+}
+
+plot.mona <- function(x, main = paste("Banner of ", deparse(x$call)),
+ sub = NULL, xlab = "Separation step",
+ col = c(2,0), axes = TRUE, adj = 0,
+ nmax.lab = 35, max.strlen = 5, ...)
+{
+ w <- rev(x$step)
+ m <- max(w)
+ if(any(i0 <- w == 0))
+ w[i0] <- m <- m+1
+ bannerplot(x[c("order","order.lab")], w = w, fromLeft = TRUE,
+ yaxRight = FALSE, col = col, main = main, sub = sub, xlab = xlab,
+ adj= adj, axes= axes, nmax.lab= nmax.lab, max.strlen= max.strlen,
+ xax.pretty = m+1, ...)
+ names <- paste(" ", rev(x$variable))
+ is.na(names) <- i0
+ text(w, 1:length(names) - 0.5, names, adj = 0, col = col[1], ...)
+}
diff --git a/R/plotpart.q b/R/plotpart.q
new file mode 100644
index 0000000..5c74e07
--- /dev/null
+++ b/R/plotpart.q
@@ -0,0 +1,520 @@
+### $Id: plotpart.q 7668 2019-05-15 17:07:45Z maechler $
+plot.partition <-
+function(x, ask = FALSE, which.plots = NULL,
+ nmax.lab = 40, max.strlen = 5, data = x$data, dist = NULL,
+ stand = FALSE, lines = 2,
+ shade = FALSE, color = FALSE, labels = 0, plotchar = TRUE,
+ span = TRUE, xlim = NULL, ylim = NULL, main = NULL, ...)
+{
+ if(is.null(x$data))# data not kept
+ x$data <- data
+ if(is.null(x$data) && !is.null(dist))
+ x$diss <- dist
+ if(is.null(which.plots) && !ask)
+ which.plots <- {
+ if(is.null(x$data) && (is.null(x$diss) || inherits(x, "clara")))
+ 2 ## no clusplot
+ else 1:2
+ }
+ if(ask && is.null(which.plots)) { ## Use 'menu' ..
+ tmenu <- paste("plot ", ## choices :
+ c("All", "Clusplot", "Silhouette Plot"))
+ do.all <- FALSE
+ repeat {
+ if(!do.all)
+ pick <- menu(tmenu, title =
+ "\nMake a plot selection (or 0 to exit):\n") + 1
+ switch(pick,
+ return(invisible())# 0 -> exit loop
+ ,
+ do.all <- TRUE# 1 : All
+ ,
+ clusplot(x, stand = stand, lines = lines,
+ shade = shade, color = color, labels = labels,
+ plotchar = plotchar, span = span,
+ xlim = xlim, ylim = ylim, main = main, ...)
+ ,
+ plot(silhouette(x), nmax.lab, max.strlen, main = main)
+ )
+ if(do.all) { pick <- pick + 1; do.all <- pick <= length(tmenu) + 1}
+ }
+ invisible()
+ }
+ else {
+ ask <- prod(par("mfcol")) < length(which.plots) && dev.interactive()
+ if(ask) { op <- par(ask = TRUE); on.exit(par(op)) }
+ for(i in which.plots)
+ switch(i,
+ clusplot(x, stand = stand, lines = lines,
+ shade = shade, color = color, labels = labels,
+ plotchar = plotchar, span = span,
+ xlim = xlim, ylim = ylim, main = main, ...)
+ ,
+ plot(silhouette(x), nmax.lab, max.strlen, main = main)
+ ) ## and return() whatever *plot(..) returns
+ }
+}
+
+clusplot <- function(x, ...) UseMethod("clusplot")
+
+
+##' @title Make/Check the (n x 2) matrix needed for clusplot.default():
+##' @param x numeric matrix or dissimilarity matrix (-> clusplot.default())
+##' @param diss logical indicating if 'x' is dissimilarity matrix. In that case,
+##' 'cmdscale()' is used, otherwise (typically) 'princomp()'.
+##' @return a list with components
+##' x1 : (n x 2) numeric matrix;
+##' var.dec: a number (in [0,1]), the "variance explained"
+##' labs : the point labels (possibly 1:n)
+##' @author Martin Maechler
+mkCheckX <- function(x, diss) {
+ if(diss) {
+ if(anyNA(x))
+ stop("NA-values are not allowed in dist-like 'x'.")
+ if(inherits(x, "dist")) {
+ n <- attr(x, "Size")
+ labs <- attr(x, "Labels")
+ }
+ else { # x (num.vector or square matrix) must be transformed into diss.
+ siz <- sizeDiss(x)
+ if(is.na(siz)) {
+ if((n <- nrow(x)) != ncol(x))
+ stop("Distances must be result of dist or a square matrix.")
+ if(all.equal(x, t(x)) != TRUE)
+ stop("the square matrix is not symmetric.")
+ labs <- dimnames(x)[[1]]
+ }
+ else {
+ if(!is.vector(x)) {
+ labs <- attr(x, "Labels") # possibly NULL
+ x <- as.matrix(x)
+ if((n <- nrow(x)) == ncol(x) && all.equal(x, t(x)) == TRUE) {
+ labs <- dimnames(x)[[1]]
+ }
+ else {
+ ## Hmm, when does this ever happen :
+ ## numeric, not-dist, non-vector, not symmetric matrix ?
+ warning(">>>>> funny case in clusplot.default() -- please report!\n")
+ ## if(n != sizeDiss(x)) ...
+ attr(x, "Size") <- siz <- sizeDiss(x)
+ if(is.null(labs)) labs <- 1:siz
+ }
+ }
+ else {
+ attr(x, "Size") <- n <- siz
+ }
+ }
+ }
+ x1 <- cmdscale(x, k = 2, add = TRUE)
+ if(x1$ac < 0) ## Rarely ! (FIXME: need and test example!)
+ x1 <- cmdscale(x, k = 2, eig = TRUE)# TODO: not 'eig', but list. = TRUE for R >= 3.2.2
+ var.dec <- x1$GOF[2] # always in [0,1]
+ x1 <- x1$points
+ }
+ else { ## Not (diss)
+ if(!is.matrix(x)) stop("x is not a data matrix")
+ if(anyNA(x)) {
+ y <- is.na(x)
+ if(any(apply(y, 1, all)))
+ stop("one or more objects contain only missing values")
+ if(any(apply(y, 2, all)))
+ stop("one or more variables contain only missing values")
+ x <- apply(x, 2, function(x)
+ { x[is.na(x)] <- median(x, na.rm = TRUE); x } )
+ message("Missing values were displaced by the median of the corresponding variable(s)")
+ }
+ n <- nrow(x)
+ labs <- dimnames(x)[[1]]
+
+ x1 <- if(ncol(x) <= 1) {
+ var.dec <- 1
+ matrix(c(t(x), rep(0, length(x))), ncol = 2)
+ }
+ else {
+ prim.pr <- princomp(x, scores = TRUE, cor = ncol(x) > 2)
+ sd2 <- prim.pr$sdev^2
+ var.dec <- cumsum(sd2/sum(sd2))[2]
+ prim.pr$scores[, 1:2]
+ }
+ }
+ list(x = x1, var.dec = var.dec, labs = if(is.null(labs)) 1:n else labs)
+} ## mkCheckX()
+
+## TODO: allow components (2,3) or (1,3) instead of always (1,2) => drop 'var.dec', 'sub'
+clusplot.default <-
+function(x, clus, diss = FALSE, s.x.2d = mkCheckX(x, diss),
+ stand = FALSE, lines = 2,
+ shade = FALSE, color = FALSE, labels = 0, plotchar = TRUE,
+ col.p = "dark green", # was 5 (= shaded col)
+ col.txt = col.p, col.clus = if(color) c(2, 4, 6, 3) else 5,
+ cex = 1, cex.txt = cex,
+ span = TRUE, add = FALSE, xlim = NULL, ylim = NULL,
+ main = paste("CLUSPLOT(", deparse(substitute(x)),")"),
+ sub = paste("These two components explain",
+ round(100 * var.dec, digits = 2), "% of the point variability."),
+ xlab = "Component 1", ylab = "Component 2",
+ verbose = getOption("verbose"),
+ ...)
+{
+ force(main)
+ if(is.data.frame(x))
+ x <- data.matrix(x)
+ if(!is.numeric(x))
+ stop("x is not numeric")
+## FIXME: - if labels == 0 or == 4, do not need "labs"
+## - if !missing(sub), do not need "var.dec"
+ stopifnot(is.list(s.x.2d),
+ c("x","labs","var.dec") %in% names(s.x.2d),
+ (n <- nrow(x1 <- s.x.2d[["x"]])) > 0)
+ labels1 <- s.x.2d[["labs"]]
+ var.dec <- s.x.2d[["var.dec"]]
+ ## --- The 2D space is setup and points are in x1[,] (n x 2) ---
+
+ clus <- as.vector(clus)
+ if(length(clus) != n)
+ stop("The clustering vector is of incorrect length")
+ clus <- as.factor(clus)
+ if(anyNA(clus))
+ stop("NA-values are not allowed in clustering vector")
+ if(stand)
+ x1 <- scale(x1)
+
+ levclus <- levels(clus)
+ nC <- length(levclus) # the number of clusters
+
+ d.x <- diff(range(x1[, 1]))
+ d.y <- diff(range(x1[, 2]))
+ z <- A <- vector("list", nC)
+ loc <- matrix(0, nrow = nC, ncol = 2)
+ d2 <- verhoud <- numeric(nC)
+ ## num1 .. num6 : all used only once -- there are more constants anyway
+ num3 <- 90
+ num6 <- 70
+
+ for(i in 1:nC) { ##------------- i-th cluster --------------
+ x <- x1[clus == levclus[i],, drop = FALSE ]
+ aantal <- nrow(x) # number of observations in cluster [i]
+ cov <- var(if(aantal == 1) {
+ if(verbose)
+ cat("cluster",i," has only one observation ..\n")
+ rbind(x, c(0, 0))
+ } else x)
+ x.1 <- range(x[, 1])
+ y.1 <- range(x[, 2])
+ notrank2 <- qr(cov, tol = 0.001)$rank != 2
+ if(!span && notrank2) {
+ d2[i] <- 1
+ if((abs(diff(x.1)) > d.x/70) ||
+ (abs(diff(y.1)) > d.y/50)) {
+ loc[i, ] <- c(x.1[1] + diff(x.1)/2, y.1[1] + diff(y.1)/2)
+ a <- sqrt((loc[i, 1] - x.1[1])^2 +
+ (loc[i, 2] - y.1[1])^2)
+ a <- a + 0.05 * a
+ num2 <- 40
+ if(abs(diff(x.1)) > d.x/70 ) {
+ ind1 <- which.max(x[,1])
+ ind2 <- which.min(x[,1])
+ q <- atan((x[ind1, 2] - x[ind2, 2])/
+ (x[ind1, 1] - x[ind2, 1]))
+ b <-
+ if(d.y == 0)
+ 1
+ else if(abs(diff(y.1)) > d.y/50)
+ diff(y.1)/10 ## num1 <- 10
+ else d.y/num2
+ }
+ else {
+ b <- if(d.x == 0) 1 else d.x/num2
+ q <- pi/2
+ }
+ D <- diag(c(a^2, b^2))
+ R <- rbind(c(cos(q), -sin(q)),
+ c(sin(q), cos(q)))
+ A[[i]] <- (R %*% D) %*% t(R)
+ }
+ else {
+ a <- d.x/num3
+ b <- d.y/num6
+ if(a == 0) a <- 1
+ if(b == 0) b <- 1
+ A[[i]] <- diag(c(a^2, b^2))
+ loc[i, ] <- x[1, ]
+ }
+ oppervlak <- pi * a * b
+ }
+ else if(span && notrank2) {
+ d2[i] <- 1
+ if(sum(x[, 1] != x[1, 1]) != 0 ||
+ sum(x[, 2] != x[1, 2]) != 0) {
+ loc[i, ] <- c(x.1[1] + diff(x.1)/2,
+ y.1[1] + diff(y.1)/2)
+ a <- sqrt((loc[i, 1] - x.1[1])^2 +
+ (loc[i, 2] - y.1[1])^2)
+ if(any(x[, 1] != x[1, 1])) {
+ ind1 <- which.max(x[,1])
+ ind2 <- which.min(x[,1])
+ q <- atan((x[ind1, 2] - x[ind2, 2])/
+ (x[ind1, 1] - x[ind2, 1]))
+ }
+ else {
+ q <- pi/2
+ }
+ b <- 1e-7
+ D <- diag(c(a^2, b^2))
+ R <- rbind(c(cos(q), -sin(q)),
+ c(sin(q), cos(q)))
+ A[[i]] <- (R %*% D) %*% t(R)
+ }
+ else {
+ a <- d.x/num3
+ b <- d.y/num6
+ if(a == 0) a <- 1
+ if(b == 0) b <- 1
+ A[[i]] <- diag(c(a^2, b^2))
+ loc[i, ] <- x[1, ]
+ }
+ oppervlak <- pi * a * b
+
+ }
+ else { ## rank2
+ if(!span) {
+ loc[i, ] <- colMeans(x)
+ d2[i] <- max(mahalanobis(x, loc[i, ], cov))
+ ## * (1+ 0.01)^2 --- dropped factor for back-compatibility
+ }
+ else { ## span and rank2
+ if(verbose)
+ cat("span & rank2 : calling \"spannel\" ..\n")
+ k <- 2L
+ res <- .C(spannel,
+ aantal,
+ ndep= k,
+ dat = cbind(1., x),
+ sqdist = double(aantal),
+ l1 = double((k+1) ^ 2),
+ double(k),
+ double(k),
+ prob = double(aantal),
+ double(k+1),
+ eps = (0.01),## convergence tol.
+ maxit = 5000L,
+ ierr = integer(1))
+ if(res$ierr != 0)
+ ## MM : exactmve not available here !
+ warning("Error in C routine for the spanning ellipsoid,\n rank problem??")
+
+ cov <- cov.wt(x, res$prob)
+ loc[i, ] <- cov$center
+ ## NB: cov.wt() in R has extra wt[] scaling; revert here:
+ cov <- cov$cov * (1 - sum(cov$wt^2))
+ d2[i] <- weighted.mean(res$sqdist, res$prob)
+
+ if(verbose)
+ cat("ellipse( A= (", format(cov[1,]),"*", format(cov[2,2]),
+ "),\n\td2=", format(d2[i]),
+ ", loc[]=", format(loc[i, ]), ")\n")
+ }
+ A[[i]] <- cov
+ ## oppervlak (flam.) = area (Engl.)
+ oppervlak <- pi * d2[i] * sqrt(cov[1, 1] * cov[2, 2] - cov[1, 2]^2)
+ }
+
+ z[[i]] <- ellipsoidPoints(A[[i]], d2[i], loc[i, ], n.half= 201)
+ verhoud[i] <- aantal/oppervlak
+ } ## end for( i-th cluster )
+
+ x.range <- do.call(range, lapply(z, `[`, i=TRUE, j = 1))
+ y.range <- do.call(range, lapply(z, `[`, i=TRUE, j = 2))
+ verhouding <- sum(verhoud[verhoud < 1e7])
+ if(verhouding == 0) verhouding <- 1
+ ## num4 <- 37 ; num5 <- 3 --- but '41' is another constant
+ density <- 3 + (verhoud * 37)/verhouding
+ density[density > 41] <- 41
+ if (span) {
+ if (d.x == 0) ## diff(range(x[,1]) == 0 : x-coords all the same
+ x.range <- x1[1, 1] + c(-1,1)
+ if (d.y == 0) ## diff(range(x[,2]) == 0 : y-coords all the same
+ y.range <- x1[1, 2] + c(-1,1)
+ }
+ if(is.null(xlim)) xlim <- x.range
+ if(is.null(ylim)) ylim <- y.range
+ if(length(col.p) < n) col.p <- rep(col.p, length= n)
+
+ ## --- Now plotting starts ---
+
+ ## "Main plot" --
+ if(!add) {
+ plot(x1, xlim = xlim, ylim = ylim,
+ xlab = xlab, ylab = ylab, main = main,
+ type = if(plotchar) "n" else "p", # if(plotchar) add points later
+ col = col.p, cex = cex, ...)
+ if(!is.null(sub) && !is.na(sub) && nchar(sub) > 0)
+ title(sub = sub, adj = 0)
+ }
+ if(color) {
+ if(length(col.clus) < min(4,nC))
+ stop("'col.clus' should have length 4 when color is TRUE")
+ i.verh <- order(verhoud)
+ jInd <- if(nC > 4) pam(verhoud[i.verh], 4)$clustering else 1:nC
+ for(i in 1:nC) {
+ k <- i.verh[i]
+ polygon(z[[k]], density = if(shade) density[k] else 0,
+ col = col.clus[jInd[i]], ...)
+ }
+ col.clus <- col.clus[jInd][order(i.verh)]
+ }
+ else {
+ for(i in 1:nC)
+ polygon(z[[i]], density = if(shade) density[i] else 0,
+ col = col.clus, ...)
+ }
+
+ ## points after polygon in order to write ON TOP:
+ if(plotchar) {
+ karakter <- 1:19
+ for(i in 1:nC) {
+ iC <- clus == levclus[i]
+ points(x1[iC, , drop = FALSE], cex = cex,
+ pch = karakter[1+(i-1) %% 19], col = col.p[iC], ...)
+ }
+ }
+
+ if(nC > 1 && (lines == 1 || lines == 2)) {
+ ## Draw lines between all pairs of the nC cluster (centers)
+
+ ## utilities for computing ellipse intersections:
+ clas.snijpunt <- function(x, loc, m, n, p)
+ {
+ if ( !is.na(xm <- x[1,m]) && loc[n, m] <= xm && xm <= loc[p, m]) x[1, ]
+ else if(!is.na(xm <- x[2,m]) && loc[n, m] <= xm && xm <= loc[p, m]) x[2, ]
+ else NA
+ }
+ coord.snijp1 <- function(x, gemid)
+ x[2, 2] - 2 * x[1, 2] * gemid + x[1, 1] * gemid^2
+ coord.snijp2 <- function(x, d2, y)
+ ((x[1, 1] * x[2, 2] - x[1, 2]^2) * d2)/y
+ coord.snijp3 <- function(xx, y, gemid)
+ {
+ sy <- sqrt(y)
+ sy <- c(sy, -sy)
+ cbind(xx[1] + sy,
+ xx[2] + gemid*sy)
+ }
+
+ afstand <- matrix(0, ncol = nC, nrow = nC)
+ for(i in 1:(nC - 1)) {
+ for(j in (i + 1):nC) {
+ gemid <- (loc[j, 2] - loc[i, 2])/(loc[j, 1] - loc[i, 1])
+ s0 <- coord.snijp1(A[[i]], gemid)
+ b0 <- coord.snijp2(A[[i]], d2[i], s0)
+ snijp.1 <- coord.snijp3(loc[i,], y=b0, gemid)
+ s1 <- coord.snijp1(A[[j]], gemid)
+ b1 <- coord.snijp2(A[[j]], d2[j], s1)
+ snijp.2 <- coord.snijp3(loc[j,], y=b1, gemid)
+ if(loc[i, 1] != loc[j, 1]) {
+ if(loc[i, 1] < loc[j, 1]) {
+ punt.1 <- clas.snijpunt(snijp.1, loc, 1, i, j)
+ punt.2 <- clas.snijpunt(snijp.2, loc, 1, i, j)
+ }
+ else {
+ punt.1 <- clas.snijpunt(snijp.1, loc, 1, j, i)
+ punt.2 <- clas.snijpunt(snijp.2, loc, 1, j, i)
+ }
+ }
+ else {
+ if(loc[i, 2] < loc[j, 2]) {
+ punt.1 <- clas.snijpunt(snijp.1, loc, 2, i, j)
+ punt.2 <- clas.snijpunt(snijp.2, loc, 2, i, j)
+ }
+ else {
+ punt.1 <- clas.snijpunt(snijp.1, loc, 2, j, i)
+ punt.2 <- clas.snijpunt(snijp.2, loc, 2, j, i)
+ }
+ }
+ if(is.na(punt.1[1]) || is.na(punt.2[1]) ||
+ (sqrt((punt.1[1] - loc[i, 1])^2 +
+ (punt.1[2] - loc[i, 2])^2) +
+ sqrt((punt.2[1] - loc[j, 1])^2 +
+ (punt.2[2] - loc[j, 2])^2)) >
+ sqrt((loc[j, 1] - loc[i, 1])^2 +
+ (loc[j, 2] - loc[i, 2])^2))
+ {
+ afstand[i, j] <- NA
+ }
+ else if(lines == 1) {
+ afstand[i, j] <- sqrt((loc[i, 1] - loc[j, 1])^2 +
+ (loc[i, 2] - loc[j, 2])^2)
+ segments(loc[i, 1], loc[i, 2],
+ loc[j, 1], loc[j, 2], col = 6, ...)
+ }
+ else { ## lines == 2
+ afstand[i, j] <- sqrt((punt.1[1] - punt.2[1])^2 +
+ (punt.1[2] - punt.2[2])^2)
+ segments(punt.1[1], punt.1[2],
+ punt.2[1], punt.2[2], col = 6, ...)
+ }
+ }
+ }
+ afstand <- t(afstand) + afstand
+ }
+ else afstand <- NULL
+
+ if(labels) {
+ if(labels == 1) {
+ for(i in 1:nC) { ## add cluster border points
+ m <- nrow(z[[i]])
+ ni <- length(ii <- seq(1, m, by = max(1, m %/% 40)))
+ x1 <- rbind(x1, z[[i]][ii, ])
+ labels1 <- c(labels1, rep(levclus[i], ni))
+ ## identify() only allows one color:
+ ##col.txt <- c(col.txt, rep(col.clus[if(color) i else 1], ni))
+ }
+ identify(x1, labels = labels1, col = col.txt[1])
+ }
+ else {
+### FIXME --- 'cex.txt' but also allow to specify 'cex' (for the points) ???
+ Stext <- function(xy, labs, ...) {
+ ## FIXME: these displacements are not quite ok!
+ xy[, 1] <- xy[, 1] + diff(x.range)/130
+ xy[, 2] <- xy[, 2] + diff(y.range)/50
+ text(xy, labels = labs, ...)
+ }
+ if(labels == 3 || labels == 2)
+ Stext(x1, labels1, col = col.txt, cex = cex.txt, ...)
+ if(labels %in% c(2,4,5)) {
+ maxima <- t(sapply(z, `[`, i=201, j=1:2))
+ Stext(maxima, levclus, font = 4, col = col.clus, cex = cex, ...)
+ }
+ if(labels == 5)
+ identify(x1, labels = labels1, col = col.txt[1])
+ }
+ }
+ density[density == 41] <- NA
+ invisible(list(Distances = afstand, Shading = density))
+}
+
+clusplot.partition <- function(x, main = NULL, dist = NULL, ...)
+{
+ if(is.null(main) && !is.null(x$call))
+ main <- paste("clusplot(",format(x$call),")", sep="")
+ if(length(x$data) != 0 &&
+ (!anyNA(x$data) || data.class(x) == "clara"))
+ clusplot.default(x$data, x$clustering, diss = FALSE, main = main, ...)
+ else if(!is.null(dist))
+ clusplot.default(dist, x$clustering, diss = TRUE, main = main, ...)
+ else if(!is.null(x$diss))
+ clusplot.default(x$diss, x$clustering, diss = TRUE, main = main, ...)
+ else { ## try to find "x$diss" by looking at the pam() call:
+ if(!is.null(x$call)) {
+ xD <- try(eval(x$call[[2]], envir = parent.frame()))
+ if(inherits(xD, "try-error") || !inherits(xD, "dist"))
+ stop(gettextf("no diss nor data found, nor the original argument of %s",
+ deparse(x$call)))
+ ## else
+ ## warning("both 'x$diss' and 'dist' are empty; ",
+ ## "trying to find the first argument of ", deparse(x$call))
+ clusplot.default(xD, x$clustering, diss = TRUE, main = main, ...)
+ }
+ else stop("no diss nor data found for clusplot()'")
+ }
+}
diff --git a/R/silhouette.R b/R/silhouette.R
new file mode 100644
index 0000000..cca20e6
--- /dev/null
+++ b/R/silhouette.R
@@ -0,0 +1,254 @@
+silhouette <- function(x, ...) UseMethod("silhouette")
+
+## Accessor and more:
+silhouette.partition <- function(x, ...) {
+ r <- x$silinfo$widths
+ if(is.null(r))
+ stop("invalid partition object")
+ attr(r, "Ordered") <- TRUE # (cluster <increasing>, s.i <decreasing>)
+ attr(r, "call") <- x$call
+ class(r) <- "silhouette"
+ r
+}
+
+silhouette.clara <- function(x, full = FALSE, ...)
+{
+ if(!full)
+ return(NextMethod()) ##-> silh*.partition()
+
+ ## else : full = TRUE
+ if(is.null(x$data))
+ stop("full silhouette is only available for results of 'clara(*, keep.data = TRUE)'")
+ ## Compute "full" silhouette -- from clustering + full distances:
+ r <- silhouette(x$clustering,
+ daisy(x$data, metric = attr(x, "Metric")))
+ attr(r, "call") <-
+ substitute(silhouette(CL, full = TRUE), list(CL = x$call))
+ r
+}
+
+## R-only implementation -- no longer used nor exported:
+silhouette.default.R <- function(x, dist, dmatrix, ...) {
+ cll <- match.call()
+ if(is.list(x) && !is.null(cl <- x$clustering)) x <- cl
+ n <- length(x)
+ if(!all(x == round(x))) stop("'x' must only have integer codes")
+ k <- length(clid <- sort(unique(x)))
+ if(k <= 1 || k >= n)
+ return(NA)
+ ## check dist/dmatrix
+ if(missing(dist)) {
+ if(missing(dmatrix))
+ stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'")
+ if(is.null(dm <- dim(dmatrix)) || length(dm) != 2 || !all(n == dm))
+ stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'")
+ } else { # 'dist'
+ dist <- as.dist(dist) # hopefully
+ if(n != attr(dist, "Size"))
+ stop("clustering 'x' and dissimilarity 'dist' are incompatible")
+ dmatrix <- as.matrix(dist)# so we can apply(.) below
+ }
+ wds <- matrix(NA, n,3, dimnames =
+ list(names(x), c("cluster","neighbor","sil_width")))
+ for(j in 1:k) { # j-th cluster:
+ Nj <- sum(iC <- x == clid[j])
+ wds[iC, "cluster"] <- clid[j]
+ ## minimal distances to points in all other clusters:
+ diC <- rbind(apply(dmatrix[!iC, iC, drop = FALSE], 2,
+ function(r) tapply(r, x[!iC], mean)))# (k-1) x Nj
+ ## max.col() breaks ties at random; rather do not want random
+ ## behavior of silhouette, (but rather "pam" compatible one):
+ minC <- apply(diC, 2, which.min)
+ ## FIXME minC <- max.col(-t(diC))
+ ## FIXME : extend max.col(*, ties.method = "min") {or similar} !
+ wds[iC,"neighbor"] <- clid[-j][minC]
+ s.i <- if(Nj > 1) {
+ a.i <- colSums(dmatrix[iC, iC])/(Nj - 1) # length(a.i)= Nj
+ b.i <- diC[cbind(minC, seq(along = minC))]
+ ifelse(a.i != b.i, (b.i - a.i) / pmax(b.i, a.i), 0)
+ } else 0
+ wds[iC,"sil_width"] <- s.i
+ }
+ attr(wds, "Ordered") <- FALSE
+ attr(wds, "call") <- cll
+ class(wds) <- "silhouette"
+ wds
+} ## silhouette.default.R
+
+silhouette.default <- function(x, dist, dmatrix, ...) {
+ cll <- match.call()
+ if(is.list(x) && !is.null(cl <- x$clustering)) x <- cl
+ n <- length(x)
+ if(!all(x == round(x))) stop("'x' must only have integer codes")
+ k <- length(ux <- unique(x <- as.integer(x)))
+ if(k <= 1 || k >= n) # silhouette undefined for trivial clusterings
+ return(NA)
+ doRecode <- (any(ux < 1) || any(ux > k)) ## need to recode
+ if(doRecode)
+ x <- as.integer(fx <- factor(x)) # now *has* values in 1:k
+
+ ## check dist/dmatrix
+ has.dmatrix <- missing(dist)
+ if(has.dmatrix) {
+ if(missing(dmatrix))
+ stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'")
+ if(is.null(dm <- dim(dmatrix)) || length(dm) != 2 || !all(n == dm))
+ stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'")
+ } else { # 'dist'
+ dist <- as.dist(dist) # hopefully
+ if(n != attr(dist, "Size"))
+ stop("clustering 'x' and dissimilarity 'dist' are incompatible")
+ }
+
+ out <- .C(sildist,
+ d = as.numeric(if(has.dmatrix) dmatrix else dist),
+ as.integer(n),
+ x,
+ as.integer(k),
+ diC = numeric(n*k),
+ counts = integer(k),
+ si = numeric(n),
+ neighbor = integer(n),
+ ismat = has.dmatrix)[c("si", "neighbor")]
+
+ if(doRecode) {
+ code.x <- as.integer(levels(fx))
+ x <- code.x[x]
+ }
+ wds <- cbind(cluster = x,
+ neighbor = if(doRecode) code.x[out$neighbor] else out$neighbor,
+ "sil_width" = out$si)
+ if(doRecode)
+ attr(wds, "codes") <- code.x
+ attr(wds, "Ordered") <- FALSE
+ attr(wds, "call") <- cll
+ class(wds) <- "silhouette"
+ wds
+}
+
+
+sortSilhouette <- function(object, ...)
+{
+ if(is.null(n <- nrow(object)) || n < 1)
+ stop("invalid silhouette structure")
+ if(attr(object,"Ordered")) {
+ if(is.null(attr(object, "iOrd")))
+ attr(object, "iOrd") <- 1:n
+ return(object)
+ }
+ ## Else :
+ if(is.null(rownames(object)))
+ rownames(object) <- as.character(1:n)
+ ## k <- length(clid <- sort(unique(cl <- object[,"cluster"])))# cluster ID s
+ cl <- object[,"cluster"]
+ r <- object[iOrd <- order(cl, - object[,"sil_width"]) , , drop = FALSE]
+ ## r has lost attributes of object; restore them, but do *not*
+ ## change dimnames:
+ nms <- names(at <- attributes(object))
+ for(n in nms[!(nms %in% c("dim","dimnames","iOrd","Ordered"))])
+ attr(r, n) <- at[[n]]
+ attr(r,"iOrd") <- iOrd # the ordering
+ attr(r,"Ordered") <- TRUE
+ r
+}
+
+summary.silhouette <- function(object, FUN = mean, ...)
+{
+ if(ncol(object) != 3) stop("invalid 'silhouette' object")
+ cl <- object[, "cluster"]
+ si <- object[, "sil_width"]
+ r <- list(si.summary = summary(si, ...),
+ clus.avg.widths = tapply(si, cl, FUN),
+ clus.sizes = table(cl),
+ avg.width = FUN(si),
+ call = attr(object,"call"),
+ codes = attr(object,"codes"),
+ Ordered = attr(object,"Ordered"))
+ class(r) <- "summary.silhouette"
+ r
+}
+
+print.summary.silhouette <- function(x, ...)
+{
+ k <- length(csiz <- x$clus.sizes)
+ cls <- paste("Cluster sizes",
+ if(!is.null(x$codes))
+ paste(", ids = (",paste(x$codes, collapse=", "),"),", sep=""),
+ sep="")
+ cat("Silhouette of", sum(csiz), "units in", k, "clusters",
+ if(!is.null(x$call)) paste("from", deparse(x$call)), ":\n",
+ cls, "and average silhouette widths:\n")
+ cwid <- x$clus.avg.widths
+ names(cwid) <- csiz
+ print(cwid, ...)
+ cat("Individual silhouette widths:\n")
+ print(x$si.summary, ...)
+ invisible(x)
+}
+
+
+## This was the internal function silhouPlot() in plot.partition() :
+plot.silhouette <-
+ function(x, nmax.lab = 40, max.strlen = 5,
+ main = NULL, sub = NULL,
+ xlab = expression("Silhouette width " * s[i]),
+ col = "gray", do.col.sort = length(col) > 1,
+ border = 0, cex.names = par("cex.axis"),
+ do.n.k = TRUE, do.clus.stat = TRUE, ...)
+{
+ if(!is.matrix(x) || ncol(x) != 3)
+ stop("No valid silhouette information (#{clusters} =? 1)")
+ n <- nrow(x)
+ x <- sortSilhouette(x)
+ s <- rev(x[, "sil_width"])
+ space <- c(0, rev(diff(cli <- x[, "cluster"])))
+ space[space != 0] <- 0.5 # gap between clusters
+ axisnames <- (n < nmax.lab)
+ if(axisnames)
+ names <- substring(rev(rownames(x)), 1, max.strlen)
+ if(is.null(main)) {
+ main <- "Silhouette plot"
+ if(!is.null(cll <- attr(x,"call"))) { # drop initial "silhouette":
+ if(!is.na(charmatch("silhouette", deparse(cll[[1]]))))
+ cll[[1]] <- as.name("FF")
+ main <- paste(main, "of", sub("^FF","", deparse(cll)))
+ }
+ }
+ smry <- summary(x)
+ k <- length(nj <- smry$clus.sizes) # k clusters
+ if(is.null(sub))
+ sub <- paste("Average silhouette width : ",
+ round(smry$avg.width, digits = 2))
+ if(do.col.sort && (lc <- length(col)) > 1) {
+ if(lc == k)# cluster wise coloring
+ col <- col[cli]
+ else ## unit wise coloring
+ if(lc != n)
+ col <- rep(col, length = n)
+ col <- rev(col) # was rev(col[attr(x, "iOrd")])
+ }
+ y <- barplot(s, space = space, names = names, xlab = xlab,
+ xlim = c(min(0, min(s)), 1),
+ horiz = TRUE, las = 1, mgp = c(2.5, 1, 0),
+ col = col, border = border, cex.names = cex.names,
+ axisnames = axisnames, ...)
+ title(main = main, sub = sub, adj = 0)
+ if(do.n.k) {
+ mtext(paste("n =", n), adj = 0)
+ mtext(substitute(k ~~ "clusters" ~~ C[j], list(k=k)), adj= 1)
+ }
+ if(do.clus.stat) {
+ mtext(expression(paste(j," : ", n[j]," | ", ave[i %in% Cj] ~~ s[i])),
+ adj = 1.04, line = -1.2)
+ y <- rev(y)
+ hasCodes <- !is.null(cx <- attr(x,"codes"))
+ for(j in 1:k) {
+ j. <- if(hasCodes) cx[j] else j
+ yj <- mean(y[cli == j.])
+ text(1, yj,
+ paste(j.,": ", nj[j]," | ",
+ format(smry$clus.avg.widths[j], digits = 1, nsmall = 2)),
+ xpd = NA, adj = 0.8)
+ }
+ }
+}
diff --git a/R/zzz.R b/R/zzz.R
new file mode 100644
index 0000000..43c2883
--- /dev/null
+++ b/R/zzz.R
@@ -0,0 +1,7 @@
+.onUnload <- function(libpath)
+{
+ library.dynam.unload("cluster", libpath)
+}
+
+## no S4 methodology here; speedup :
+.noGenerics <- TRUE
diff --git a/README b/README
new file mode 100644
index 0000000..299bb21
--- /dev/null
+++ b/README
@@ -0,0 +1,42 @@
+ORIGINAL README :
+
+ This directory contains code, help and examples for CLUS, an S-PLUS
+ package for clustering, as described in ``Clustering in an
+ Object-Oriented Environment'' by Anja Struyf, Mia Hubert, and Peter
+ J. Rousseeuw (Journal of Statistical Software, volume 1).
+------------------------------------------------------------------------
+
+See http://www.stat.ucla.edu/journals/jss/ for the original version.
+
+The current port is based on material now on
+ http://www.agoras.ua.ac.be/
+
+KH <Kurt.Hornik@ci.tuwien.ac.at> 1998/05/21
+
+---------
+
+For historical reasons,
+we keep R/README-Splus which has no relevance to the R package.
+
+---------------------
+
+TODO {see ./TODO-MM for MM's private 'todo' list; ./DONE-MM for things done}
+
+ 3) daisy() for the case of mixed variables should allow
+ a weight vector (of length p = #vars) for up- or downweighing variables.
+ daisy() really should accept the other methods mva's dist() does _and_
+ it should use dist's C API -- but we have no C API for package code, ARRGH!
+
+ 4) Eliminate the many Fortran (g77 -Wall) warnings of the form
+ >> mona.f:101: warning: `jma' might be used uninitialized in this function
+
+ 6) Mona objects describe a hierarchical clustering; they could also inherit
+ from twins, and hence have a pltree() method for plotting the
+ hierarchical tree.
+
+ 8b) Think about "merging" the plot.agnes and plot.diana methods.
+
+------------------
+
+Martin <maechler@stat.math.ethz.ch>, since 1999
+
diff --git a/build/partial.rdb b/build/partial.rdb
new file mode 100644
index 0000000..93a3fa9
--- /dev/null
+++ b/build/partial.rdb
Binary files differ
diff --git a/data/agriculture.tab b/data/agriculture.tab
new file mode 100644
index 0000000..737c8a4
--- /dev/null
+++ b/data/agriculture.tab
@@ -0,0 +1,13 @@
+ x y
+ B 16.8 2.7
+ DK 21.3 5.7
+ D 18.7 3.5
+ GR 5.9 22.2
+ E 11.4 10.9
+ F 17.8 6.0
+IRL 10.9 14.0
+ I 16.6 8.5
+ L 21.0 3.5
+ NL 16.4 4.3
+ P 7.8 17.4
+ UK 14.0 2.3
diff --git a/data/animals.tab b/data/animals.tab
new file mode 100644
index 0000000..e5a7aa4
--- /dev/null
+++ b/data/animals.tab
@@ -0,0 +1,21 @@
+ war fly ver end gro hai
+ant 1 1 1 1 2 1
+bee 1 2 1 1 2 2
+cat 2 1 2 1 1 2
+cpl 1 1 1 1 1 2
+chi 2 1 2 2 2 2
+cow 2 1 2 1 2 2
+duc 2 2 2 1 2 1
+eag 2 2 2 2 1 1
+ele 2 1 2 2 2 1
+fly 1 2 1 1 1 1
+fro 1 1 2 2 NA 1
+her 1 1 2 1 2 1
+lio 2 1 2 NA 2 2
+liz 1 1 2 1 1 1
+lob 1 1 1 1 NA 1
+man 2 1 2 2 2 2
+rab 2 1 2 1 2 2
+sal 1 1 2 1 NA 1
+spi 1 1 1 NA 1 2
+wha 2 1 2 2 2 1
diff --git a/data/chorSub.rda b/data/chorSub.rda
new file mode 100644
index 0000000..e21aa57
--- /dev/null
+++ b/data/chorSub.rda
Binary files differ
diff --git a/data/flower.R b/data/flower.R
new file mode 100644
index 0000000..321852f
--- /dev/null
+++ b/data/flower.R
@@ -0,0 +1,21 @@
+flower <-
+data.frame(V1 =
+ factor(c(1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1),
+ labels = 0:1),
+ V2 =
+ factor(c(2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1),
+ labels = 0:1),
+ V3 =
+ factor(c(2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 1, 1, 2),
+ labels = 0:1),
+ V4 =
+ factor(c(4, 2, 3, 4, 5, 4, 4, 2, 3, 5, 5, 1, 1, 4, 3, 4, 2, 2)),
+ V5 =
+ ordered(c(3, 1, 3, 2, 2, 3, 3, 2, 1, 2, 3, 2, 2, 2, 2, 2, 2, 1)),
+ V6 =
+ ordered(c(15, 3,1,16, 2,12,13, 7, 4,14, 8, 9, 6,11,10,18,17, 5)),
+ V7 = c(25, 150, 150, 125, 20, 50, 40, 100, 25,
+ 100, 45, 90, 20, 80, 40, 200, 150, 25),
+ V8 = c(15, 50, 50, 50, 15, 40, 20, 15, 15, 60,
+ 10, 25, 10, 30, 20, 60, 60, 10)
+ )
diff --git a/data/plantTraits.rda b/data/plantTraits.rda
new file mode 100644
index 0000000..4ec6f5d
--- /dev/null
+++ b/data/plantTraits.rda
Binary files differ
diff --git a/data/pluton.tab b/data/pluton.tab
new file mode 100644
index 0000000..ab8d574
--- /dev/null
+++ b/data/pluton.tab
@@ -0,0 +1,46 @@
+Pu238 Pu239 Pu240 Pu241
+0.126 75.804 21.204 2.18
+0.133 75.515 21.408 2.24
+0.127 75.175 21.668 2.305
+0.156 78.872 18.428 1.906
+0.503 73.317 20.223 4.128
+0.113 79.116 18.548 1.69
+0.129 75.751 21.162 2.26
+0.124 75.326 21.557 2.282
+1.022 63.287 24.493 6.99
+1.412 59.553 25.576 8.027
+1.533 58.688 25.719 8.279
+1.534 58.758 25.692 8.261
+1.437 59.728 25.146 8.377
+1.439 59.544 25.126 8.569
+1.375 59.877 25.128 8.428
+1.153 61.182 25.1 7.802
+0.201 78.244 18.488 2.351
+0.176 78.166 18.629 2.365
+0.239 74.254 21.515 2.901
+0.102 79.84 17.872 1.674
+1.07 62.455 24.656 7.512
+0.851 73.189 18.285 5.597
+0.125 75.968 20.794 2.407
+0.142 75.957 20.867 2.341
+0.352 72.885 21.718 3.618
+0.351 72.907 21.721 3.601
+0.346 72.919 21.713 3.6
+0.217 76.089 20.225 2.556
+1.068 70.129 18.573 7.689
+1.171 69.273 18.633 8.3
+1.213 69.147 18.64 8.363
+1.226 68.294 18.869 8.826
+1.111 71.076 18.122 7.248
+0.183 75.714 20.75 2.488
+0.162 76.15 20.345 2.524
+0.113 77.845 19.108 2.275
+1.309 62.382 22.754 9.311
+1.638 60.112 23.32 9.972
+1.589 60.519 23.128 9.97
+1.411 61.585 23.133 9.339
+1.457 61.332 23.239 9.321
+0.397 72.291 21.761 3.836
+0.328 73.451 21.429 3.419
+0.242 74.888 20.939 2.875
+1.367 60.507 23.603 9.839
diff --git a/data/ruspini.tab b/data/ruspini.tab
new file mode 100644
index 0000000..f149395
--- /dev/null
+++ b/data/ruspini.tab
@@ -0,0 +1,76 @@
+ x y
+1 4 53
+2 5 63
+3 10 59
+4 9 77
+5 13 49
+6 13 69
+7 12 88
+8 15 75
+9 18 61
+10 19 65
+11 22 74
+12 27 72
+13 28 76
+14 24 58
+15 27 55
+16 28 60
+17 30 52
+18 31 60
+19 32 61
+20 36 72
+21 28 147
+22 32 149
+23 35 153
+24 33 154
+25 38 151
+26 41 150
+27 38 145
+28 38 143
+29 32 143
+30 34 141
+31 44 156
+32 44 149
+33 44 143
+34 46 142
+35 47 149
+36 49 152
+37 50 142
+38 53 144
+39 52 152
+40 55 155
+41 54 124
+42 60 136
+43 63 139
+44 86 132
+45 85 115
+46 85 96
+47 78 94
+48 74 96
+49 97 122
+50 98 116
+51 98 124
+52 99 119
+53 99 128
+54 101 115
+55 108 111
+56 110 111
+57 108 116
+58 111 126
+59 115 117
+60 117 115
+61 70 4
+62 77 12
+63 83 21
+64 61 15
+65 69 15
+66 78 16
+67 66 18
+68 58 13
+69 64 20
+70 69 21
+71 66 23
+72 61 25
+73 76 27
+74 72 31
+75 64 30
diff --git a/data/votes.repub.tab b/data/votes.repub.tab
new file mode 100644
index 0000000..0436a4e
--- /dev/null
+++ b/data/votes.repub.tab
@@ -0,0 +1,51 @@
+"1856" "1860" "1864" "1868" "1872" "1876" "1880" "1884" "1888" "1892" "1896" "1900" "1904" "1908" "1912" "1916" "1920" "1924" "1928" "1932" "1936" "1940" "1944" "1948" "1952" "1956" "1960" "1964" "1968" "1972" "1976"
+"Alabama" NA NA NA 51.44 53.19 40.02 36.98 38.44 32.28 3.95 28.13 34.67 20.65 24.38 8.26 21.97 30.98 27.01 48.49 14.15 12.82 14.34 18.2 19.04 35.02 39.39 41.75 69.5 14 72.4 43.48
+"Alaska" NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 50.94 34.1 45.3 58.1 62.91
+"Arizona" NA NA NA NA NA NA NA NA NA NA NA NA NA NA 12.74 35.37 55.41 41.26 57.57 30.53 26.93 36.01 40.9 43.82 58.35 60.99 55.52 50.4 54.8 64.7 58.62
+"Arkansas" NA NA NA 53.73 52.17 39.88 39.55 40.5 38.07 32.01 25.11 35.04 40.25 37.31 19.73 28.01 38.73 29.28 39.33 12.91 17.86 20.87 29.84 21.02 43.76 45.82 43.06 43.9 30.8 68.9 34.97
+"California" 18.77 32.96 58.63 50.24 56.38 50.88 48.92 52.08 49.95 43.76 49.13 54.48 61.9 55.46 0.58 46.26 66.24 57.21 64.7 37.4 31.7 41.35 42.99 47.14 56.39 55.4 50.1 40.9 47.8 55 50.89
+"Colorado" NA NA NA NA NA NA 51.28 54.39 55.31 41.13 13.84 42.04 55.27 46.88 21.88 34.75 59.32 57.02 64.72 41.43 37.09 50.92 53.21 46.52 60.27 59.49 54.63 38.7 50.5 62.6 55.89
+"Connecticut" 53.18 53.86 51.38 51.54 52.25 48.34 50.52 48.01 48.44 46.8 63.24 56.94 58.13 59.43 35.88 49.8 62.72 61.54 53.63 48.54 40.35 46.3 46.94 49.55 55.7 63.73 46.27 32.2 44.3 58.6 52.64
+"Delaware" 2.11 23.71 48.2 40.98 50.99 44.55 47.86 42.75 43.55 48.55 52.94 53.65 54.04 52.09 32.85 50.2 55.71 57.71 65.03 50.55 43.43 45.05 45.27 50.04 51.75 55.09 49 39.1 45.1 59.6 47.27
+"Florida" NA NA NA NA 53.52 50.99 45.83 46.82 39.94 NA 24.3 19.03 21.15 21.58 8.25 18.08 30.79 28.06 56.83 25.04 23.85 25.99 29.68 33.63 55.01 57.2 51.51 48.9 40.5 71.9 46.83
+"Georgia" NA NA NA 35.72 43.77 27.94 34.33 33.84 28.33 21.8 36.82 28.56 18.32 31.4 4.27 7.07 28.57 18.19 43.37 7.77 12.6 14.84 18.25 18.31 30.34 33.22 37.44 54.1 30.4 75 33.02
+"Hawaii" NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 49.97 21.2 38.7 62.5 48.72
+"Idaho" NA NA NA NA NA NA NA NA NA 44.87 21.3 47.14 65.84 54.09 31.02 41.13 66.02 47.12 64.74 38.23 33.19 45.31 48.07 47.26 65.42 61.18 53.78 49.1 56.8 64.2 61.77
+"Illinois" 40.25 50.68 54.41 55.69 56.27 50.09 51.11 50.16 49.54 45.7 55.65 52.83 58.77 54.52 22.13 52.56 67.81 58.84 56.93 42.04 39.69 48.54 48.05 49.24 54.84 59.52 49.8 40.5 47.1 59 51.11
+"Indiana" 40.03 51.09 53.6 51.39 53 48.27 49.33 48.16 49.05 46.17 50.81 50.6 53.99 48.4 23.11 47.44 55.14 55.25 59.68 42.94 41.89 50.45 52.38 49.58 58.11 59.9 55.03 44 50.3 66.1 53.77
+"Iowa" 49.13 54.87 64.23 61.92 64.18 58.58 56.85 52.42 52.36 49.6 55.46 57.99 63.37 55.62 24.3 54.06 70.91 55.06 61.8 39.98 42.7 52.03 51.99 47.58 63.76 59.06 56.71 38.1 53 57.6 50.51
+"Kansas" NA NA 78.61 68.89 66.64 63.1 60.4 58.18 55.39 48.4 47.46 52.56 64.81 52.46 20.48 44.09 64.76 61.54 72.02 44.13 45.95 56.86 60.25 53.63 68.77 65.44 60.45 45.9 54.8 67.7 53.91
+"Kentucky" 0.26 0.93 30.17 25.45 46.45 37.61 39.8 42.81 44.99 39.74 48.92 48.5 47.11 48.04 25.46 46.52 49.25 48.93 59.33 40.15 39.92 42.3 45.22 41.48 49.84 54.3 53.59 36 43.8 63.4 46.24
+"Louisiana" NA NA NA 29.31 55.69 51.57 37.1 42.39 26.48 23.59 21.79 20.96 9.65 11.92 4.83 6.95 30.57 20.23 23.71 7.03 11.16 14.09 19.4 17.45 47.08 53.28 28.59 56.8 23.5 65.3 47
+"Maine" 61.37 64.15 60.22 62.42 67.86 56.73 51.45 55.35 57.49 54.06 67.9 61.89 67.1 63 20.48 50.95 68.92 72.03 68.63 55.83 55.49 51.1 52.44 56.74 66.05 70.87 57.05 31.2 43.1 61.5 50.34
+"Maryland" 0.32 3.11 55.1 32.8 49.65 43.94 45.37 46.09 47.4 43.48 54.6 51.5 48.83 48.85 23.69 44.78 55.11 45.29 57.06 36.04 37.04 40.83 48.15 49.4 55.36 60.04 46.39 34.5 41.9 61.3 46.87
+"Massachusetts" 64.72 62.75 72.22 69.67 69.25 57.74 58.5 48.32 53.38 51.85 69.36 57.67 57.92 58.21 31.89 50.54 68.55 62.26 49.15 46.64 41.76 46.36 47 43.17 54.22 59.33 39.55 23.8 32.9 45.2 41.93
+"Michigan" 56.98 57.18 55.89 56.98 62.67 52.45 52.54 47.89 49.73 47.81 53.85 58.1 69.5 61.93 27.63 52.09 72.8 75.37 70.36 44.45 38.76 49.85 49.18 49.23 55.44 55.63 48.84 33.3 41.5 56.2 52.68
+"Minnesota" NA 63.42 59.06 60.8 61.55 58.77 62.28 58.8 54.12 45.96 56.62 60.21 73.95 59.11 19.25 46.35 70.78 51.18 57.77 36.29 31.01 47.66 46.86 39.89 55.33 53.68 49.16 36.2 41.5 51.6 44.3
+"Mississippi" NA NA NA NA 63.47 31.92 29.94 36.25 26.63 2.64 7.27 9.73 5.46 6.56 2.47 4.91 14.03 7.6 17.9 3.55 2.74 4.19 6.44 2.62 39.56 24.46 24.67 87.1 13.5 78.2 49.21
+"Missouri" NA 10.29 70.17 58.9 43.65 41.23 38.65 46.01 45.31 41.97 45.24 45.94 49.92 48.5 29.75 46.94 54.57 49.58 55.58 35.08 38.16 47.5 48.43 41.5 50.71 49.87 49.74 36 44.9 62.2 48.22
+"Montana" NA NA NA NA NA NA NA NA NA 42.54 19.72 39.84 54.21 46.98 23.19 37.57 61.13 42.5 58.37 36.07 27.59 40.17 44.93 43.15 59.39 57.13 51.1 41.1 50.6 57.9 53.65
+"Nebraska" NA NA NA 64.14 70.12 59.65 62.87 57.33 53.51 43.57 45.98 50.46 61.38 47.6 21.73 41.08 64.68 47.09 63.19 35.29 40.74 57.19 58.58 54.15 69.15 65.51 62.07 47.4 59.8 70.5 60.31
+"Nevada" NA NA 59.84 55.39 57.43 52.73 47.6 56.21 57.23 25.84 18.79 37.75 56.66 43.93 15.89 36.4 56.92 41.76 56.54 30.59 27.19 39.92 45.38 47.26 61.45 57.97 48.84 41.4 47.5 63.7 52.27
+"New Hampshire" 53.59 56.89 52.56 55.02 53.95 51.84 51.94 51.15 50.35 51.11 68.74 59.34 60.14 59.32 37.43 49.06 59.84 59.83 58.66 50.42 47.98 46.78 47.87 52.41 60.92 66.11 53.42 36.1 52.1 64 55.68
+"New Jersey" 28.52 48.13 47.16 49.12 54.22 47 49.02 47.31 47.52 46.24 59.66 55.27 56.68 56.79 20.54 54.35 67.6 62.16 59.77 47.59 39.57 47.93 48.95 50.33 56.81 64.68 49.16 34.4 46.1 61.6 50.99
+"New Mexico" NA NA NA NA NA NA NA NA NA NA NA NA NA NA 35.91 46.53 54.68 48.52 59.01 35.76 36.5 43.28 46.44 42.93 55.39 57.81 49.41 41 51.8 61 51.04
+"New York" 46.14 53.71 50.46 49.41 53.12 48.07 50.32 48.15 49.2 45.59 57.55 53.1 53.13 53.11 28.68 51.53 64.56 55.76 49.79 41.33 38.97 48.04 47.3 45.99 55.45 61.2 47.27 31.4 44.3 58.5 47.84
+"North Carolina" NA NA NA 53.37 57.48 46.36 48.04 46.58 47.14 35.79 46.87 45.47 39.7 45.55 11.94 41.73 43.22 39.72 54.87 29.28 26.6 25.97 33.29 32.68 46.09 49.34 47.89 43.8 39.5 69.5 44.43
+"North Dakota" NA NA NA NA NA NA NA NA NA 48.5 55.58 62.11 74.83 60.87 26.67 49.2 77.79 47.68 54.8 28 26.58 55.06 53.84 52.17 70.97 61.72 55.42 42 55.9 62.1 52.93
+"Ohio" 48.49 52.33 56.33 53.97 53.24 50.21 51.73 50.99 49.51 47.66 51.86 52.3 59.73 51.03 26.82 44.18 58.47 58.33 64.89 47.04 37.43 47.8 50.18 49.24 56.76 61.11 53.28 37.1 45.2 59.6 49.9
+"Oklahoma" NA NA NA NA NA NA NA NA NA NA NA NA NA 43.03 35.69 33.21 50.11 42.82 63.72 26.7 32.69 42.23 44.2 37.25 54.59 55.13 59.02 44.3 47.7 73.7 50.52
+"Oregon" NA 34.48 53.9 49.63 58.74 50.91 50.51 50.99 53.82 44.59 50.07 55.25 67.06 56.39 25.3 48.47 60.2 51.01 64.18 36.89 29.64 45.63 46.94 49.78 60.54 55.25 52.62 36.3 49.8 52.4 50.01
+"Pennsylvania" 33.95 56.25 51.75 52.2 62.18 50.61 50.84 52.68 52.73 51.45 60.98 60.74 67.99 58.84 22.4 54.25 65.8 65.35 65.24 50.84 40.85 46.34 48.36 50.93 52.74 56.49 48.74 34.5 44 59.1 48.57
+"Rhode Island" 57.85 61.22 62.2 66.49 71.94 59.29 62.25 58.07 53.89 50.71 68.33 59.74 60.6 60.76 35.57 51.08 63.97 59.63 49.55 43.31 40.18 43.24 41.26 41.44 50.89 58.31 36.37 19.1 31.8 53 44.24
+"South Carolina" NA NA NA 57.93 75.95 50.26 33.97 23.72 17.27 18.99 13.51 7.04 4.63 5.97 1.06 2.43 3.9 2.21 8.54 1.89 1.43 4.37 4.46 3.78 49.28 25.18 48.76 58.9 38.1 70.8 43.54
+"South Dakota" NA NA NA NA NA NA NA NA NA 49.48 49.48 56.73 71.09 58.84 NA 49.8 60.74 49.69 60.18 34.4 42.49 57.41 58.33 51.84 69.27 58.39 58.21 44.4 53.3 54.2 50.92
+"Tennessee" NA NA NA 68.33 47.57 40.21 44.53 47.83 45.85 37.51 46.23 44.93 43.4 45.95 23.84 42.7 51.28 43.59 53.76 32.46 30.81 32.36 39.22 36.87 49.98 49.21 52.92 44.5 37.8 67.7 43.21
+"Texas" NA NA NA NA 40.71 29.96 23.95 28.63 24.73 19.28 30.75 30.83 21.9 22.35 8.77 17.45 23.54 19.78 51.77 11.35 12.31 19.13 16.64 24.6 53.13 55.27 48.52 36.7 39.9 66.2 48.01
+"Utah" NA NA NA NA NA NA NA NA NA NA 17.27 50.59 61.45 56.19 37.46 37.82 55.93 49.26 53.58 41.05 29.79 37.59 39.42 45.52 58.93 64.56 54.81 45.3 56.5 67.6 64.94
+"Vermont" 78.23 75.79 76.1 78.57 78.26 68.58 69.88 66.54 71.24 68.1 80.1 75.79 77.98 75.12 37.13 62.44 75.87 78.22 66.88 57.66 56.44 54.79 57.06 61.55 71.46 72.18 58.65 33.7 52.8 62.7 56.01
+"Virginia" 0.19 1.15 NA NA 50.48 40.62 39.52 48.9 49.47 38.75 45.9 43.81 36.67 38.36 17 32.05 37.85 32.79 53.91 30.09 29.39 31.55 37.39 41.04 56.32 55.37 52.44 46.5 41.4 67.8 50.73
+"Washington" NA NA NA NA NA NA NA NA NA 41.45 41.84 53.44 69.95 57.47 21.82 43.89 55.96 52.24 67.06 33.94 29.88 40.58 42.24 42.68 54.33 53.91 50.68 38 45.1 56.9 51.37
+"West Virginia" NA NA 68.95 58.84 51.82 42.47 41.03 47.74 49.03 46.94 52.23 54.27 55.26 53.41 21.1 49.38 55.3 49.45 58.43 44.47 39.2 42.9 45.11 42.24 48.08 54.08 47.27 32.1 40.8 63.6 41.95
+"Wisconsin" 55.29 56.58 55.88 56.24 54.6 50.9 54.04 50.38 49.77 46.02 59.93 60.04 63.24 54.52 32.68 49.25 70.65 37.06 53.52 31.19 30.26 48.32 50.37 46.28 60.95 61.58 51.77 37.9 47.9 53.4 49.16
+"Wyoming" NA NA NA NA NA NA NA NA NA NA NA NA NA NA 34.42 41.86 62.38 52.39 63.68 40.82 37.47 46.89 51.23 47.27 62.71 60.04 55.05 43.4 55.8 69 59.85
diff --git a/data/xclara.rda b/data/xclara.rda
new file mode 100644
index 0000000..8568fd8
--- /dev/null
+++ b/data/xclara.rda
Binary files differ
diff --git a/inst/CITATION b/inst/CITATION
new file mode 100644
index 0000000..dde2b3a
--- /dev/null
+++ b/inst/CITATION
@@ -0,0 +1,26 @@
+## -*- R -*-
+citHeader("To cite the R package 'cluster' in publications use:")
+
+year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date)
+vers <- paste("R package version", meta$Version)
+
+citEntry(entry = "Manual",
+ title = "cluster: Cluster Analysis Basics and Extensions",
+ author = c(
+ person("Martin", "Maechler", email="maechler@stat.math.ethz.ch",
+ role = c("aut", "cre"),
+ comment = "enhancements, speed improvements, bug fixes, since 2000"),
+ person("Peter", "Rousseeuw", email="rousse@uia.ua.ac.be", role="aut"),
+ person("Anja", "Struyf", email="Anja.Struyf@uia.ua.ac.be", role="aut"),
+ person("Mia", "Hubert", email="Mia.Hubert@uia.ua.ac.be", role="aut"),
+ person("Kurt", "Hornik", role=c("trl","ctb"), comment = "R port; and much initial help file fixing, 1998--2000")
+ ),
+ year = year,
+ note = paste(vers,"---",
+ "For new features, see the 'Changelog' file (in the package source)"),
+## FIXME: rather give the URL to the manual on CRAN ???
+## url = "http://stat.ethz.ch/CRAN/src/contrib/........",
+ textVersion = paste(
+ "Maechler, M., Rousseeuw, P., Struyf, A., Hubert, M., Hornik, K.(",
+ year, "). cluster: Cluster Analysis Basics and Extensions. ",
+ vers, ".", sep=""))
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
new file mode 100644
index 0000000..a8d502c
--- /dev/null
+++ b/inst/NEWS.Rd
@@ -0,0 +1,455 @@
+% Check from R:
+% news(db = tools:::.build_news_db_from_package_NEWS_Rd("~/R/Pkgs/cluster/inst/NEWS.Rd"))!
+\name{NEWS}
+\title{News for \R Package \pkg{cluster}}% MM: look into ../svn-log-from.all
+\encoding{UTF-8}
+\newcommand{\CRANpkg}{\href{http://CRAN.R-project.org/package=#1}{\pkg{#1}}}
+%% NB: The date (yyyy-mm-dd) is the "Packaged:" date in ../DESCRIPTION
+
+
+\section{Changes in version 2.1.0 (2019-06-07, svn r7674)}{
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item \code{volume(obj)} is now correct also for ellipsoids of
+ dimension \eqn{d > 2}.
+
+ \item \command{--enable-lto} compilation revealed missing
+ \code{weights} argument in \file{src/cluster.h}'s declaration of
+ \code{cldaisy()}.
+ }
+ }
+ \subsection{Tweaks}{
+ \itemize{
+ \item Replaced many \dQuote{old style} \verb{`<word>'} quotations.
+ }
+ }
+}
+
+\section{Changes in version 2.0.9 (2019-05-01, svn r7663)}{
+ \subsection{Tweaks}{
+ \itemize{
+ \item \file{src/mona.c} now uses correct \emph{void} \code{F77_NAME}
+ and source cleaned.
+ }
+ }
+}
+
+\section{Changes in version 2.0.8 (2019-04-02, svn r7643)}{
+ \subsection{New Features}{
+ \itemize{
+ \item \code{pam()} gets new \code{pamonce} options, for values 3,
+ 4, or 5, choosing versions of \command{fastpam}, contributed by Erich
+ Schubert, Univ. Dortmund.
+
+ \item update \file{tests/*} to work with R >= 3.6.0 (sample.kind)
+ }
+ }
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item correct thinko in \code{?clusGap}, the help page.
+ }
+ }
+}
+
+\section{Changes in version 2.0.7 (2018-03-29, svn r7509)}{
+ \subsection{New Features}{
+ \itemize{
+ \item \code{clara()} gets new option \code{metric = "jaccard"},
+ contributed by Kamil Kozlowski and Kamil Jadszko.
+ %% FIXME: Also add for pam() !!
+
+ \item \code{pam()} and \code{clara()} use \code{match.arg(metric)}
+ and hence \code{metric} can be abbreviated (and invalid strings
+ give an error instead of being interpreted as \code{"euclidean"}).
+ }
+ }
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item The bug fix of \code{clara(*, correct.d = TRUE)} (from
+ version 2.0.4) for the NA-data case now also applies to the
+ internal C function \code{selec()}.
+ }
+ }
+}
+
+\section{Changes in version 2.0.6 (2017-03-10, svn r7332)}{
+ \subsection{New Features}{
+ \itemize{
+ \item \code{mona()} now C- instead of Fortran-based (having used
+ f2c etc) and now has a \code{trace.lev} option which allows
+ progress reporting
+ \dQuote{remembers} if the original data had missing values.
+ }
+ }
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item \code{mona(<1-column>)} no longer loops infinitely but signals
+ an error.
+ }
+ }
+}
+
+\section{Changes in version 2.0.5 (2016-10-07, svn r7278)}{
+ \subsection{New Features}{
+ \itemize{
+ \item \code{clusGap()} gets a new option \code{scaleH0}, and
+ \code{scaleH0 = "original"} is an alternative to the default PCA
+ rotation.%% still see ../TODO-MM !
+
+ \item \code{clusGap()} now also stores its \code{call} and uses
+ that for \code{print()}ing and (by default in the \code{main} title)
+ for \code{plot()}ing \code{"clusGap"} objects.
+
+ \item
+ __ MOSTLY NOT IMPLEMENTED yet __ %%% TODO !!!
+
+ \code{diana()} gets new optional argument \code{stop.at.k}.
+ When a positive integer, the DIANA algorithm will stop early, as
+ much desirable for large \eqn{n}.
+ }
+ }
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item \code{daisy()} gets 3+1 new options \code{warn*} which allow
+ to suppress three different kind of warnings, as these are
+ undesirable in some cases. With thanks to Kirill Müller for the
+ convincing context.
+
+ \item \code{pam()} now signals an error when there are more than
+ 65536 observational units (whereas it could segfault previously),
+ thanks to a patch from Mikko Korpela, Helsinki.
+ }
+ }
+}
+
+\section{Changes in version 2.0.4 (2016-04-16, svn r7186)}{
+ \subsection{New Features}{
+ \itemize{
+ \item \code{clusGap()} gets a new option \code{d.power = 1}
+ allowing to choose the basic weight statistic as it was originally
+ proposed, namely \emph{squared} distances by setting \code{d.power = 2}.
+ %% ~/R/MM/Pkg-ex/cluster/Gonzalez-on-clusGap.R <--
+ }
+ }
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item fix small glitch in silhouette's help page.
+
+ \item Finally fixed a bug (in the original Fortran code from
+ Rousseeuw!) in clara's distance computation when there are
+ \code{NA}s in the data. As the fix is not backward compatible,
+ a warning is produced (for the time being) if there \emph{are}
+ \code{NA}s and the user does not explicitly use \code{clara(*, correct.d = TRUE)}.
+ }
+ }
+}
+
+\section{Changes in version 2.0.3 (2015-07-20, svn r6985)}{
+ \subsection{New Features}{
+ \itemize{
+ \item This new \file{NEWS.Rd} file -- going to replace \file{ChangeLog}
+ eventually.
+ }
+ }
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item import all we need (but not more) from the "base" pkgs
+ (stats, graphics, ...).
+ }
+ }
+}
+
+\section{Changes in version 2.0.2 (2015-06-18, svn r6955)}{
+ \subsection{New Features}{
+ \itemize{
+ \item using new \code{anyNA()} where appropriate.
+ \item New Korean translations, thanks to Chel Hee Lee.
+ \item \code{plotpart()}: \code{cmdscale()} tweaks.
+ }
+ }
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item valgrind detected missing allocation (\code{nisol["1"]} for k=1).
+ \item typo R/daisy.q (R bug %once we require R >= 3.2.0: \PR{16430}
+ \Sexpr[results=rd]{tools:::Rd_expr_PR(16430)}).
+ }
+ }
+}
+
+\section{Changes in version 2.0.1 (2015-01-31, svn r6877)}{
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item Fix \code{silhouette( obj )} for \code{obj <- pam(x, k = 1)}.
+ }
+ }
+}
+
+\section{Changes in version 2.0.0 (2015-01-29, svn r6874)}{
+ \subsection{New Features}{
+ \itemize{
+ \item \code{pam()} now using \code{.Call()} instead of
+ \code{.C()} is potentially considerably more efficient.
+ \item \code{agnes()} has improved \code{trace} behaviour; also,
+ some invalid \code{par.method = *} settings now give an early and
+ understandable error message.
+ \item \code{lower.to.upper.tri.inds()} (etc) now returns \code{integer}.
+ }
+ }
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item \code{.C(..)} and \code{.Fortran(..)}: no longer using
+ \code{DUP=FALSE} as that has become deprecated.
+ }
+ }
+}
+
+\section{Changes in version 1.15.3 (2014-09-04, svn r6804)}{
+ \subsection{New Features}{
+ \itemize{
+ \item \code{agnes()} and \code{diana()} finally get, respectively
+ work with a \code{trace.lev} option.
+ \item \code{plot.(agnes|diana)()} now deals well with long
+ \code{call}s, by using multiple title lines.
+ \item Message translations now also for C level error messages.
+ }
+ }
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item \code{agnes(*, method="flexible", par.method = c(a1, a2, b, c))},
+ i.e., \code{length(alpha) == 4}, finally works \emph{correctly}.
+ }
+ }
+}
+
+\section{Changes in version 1.15.2 (2014-03-31, svn r6724)}{
+ \subsection{New Features}{
+ \itemize{
+ \item Rewrote parts of the R level messages so they are more
+ easily translatable, thanks to proposals by Lukasz Daniel.
+ \item French translations from Philippe Grosjean.
+ }
+ }
+}
+
+\section{Changes in version 1.15.1 (2014-03-13, svn r6676)}{
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item \code{mona} example not working in \R < 3.0.x.
+ }
+ }
+}
+
+\section{Changes in version 1.15.0 (2014-03-11, svn r6672)}{
+ \subsection{New Features}{
+ \itemize{
+ \item \code{agnes(*, method = "gaverage")} contributed by Pierre
+ Roudier.
+ \item documentation improvements;
+ \item better translatable messages and translation updates.
+ }
+ }
+}
+
+
+%% ============================== FIXME ===========================
+%% ~~~~~~~~~
+%% use ../ChangeLog
+%% ~~~~~~~~~
+%% and then
+%%
+%% use ../svn-log-from.all
+%% ~~~~~~~~~~~~~~~~
+%% and ../../cluster_Archive.lst {~= CRAN src/contrib/Archive/cluster/ :
+%%
+\section{Changes in version 1.14.4 (2013-03-26, svn r....)}{
+ \subsection{New Features}{
+ \itemize{
+ \item -
+ }
+ }
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item -
+ }
+ }
+}
+
+\section{Changes in version 1.14.3 (2012-10-14, svn r....)}{
+ \subsection{New Features}{
+ \itemize{
+ \item Polnish translations from Lukasz Daniel.
+ }
+ }
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item -
+ }
+ }
+}
+
+
+\section{Changes in version 1.14.2 (2012-02-06, svn r....)}{
+ \subsection{New Features}{
+ \itemize{
+ \item New \code{clusGap()} to compute the \dQuote{cluster Gap}
+ goodness-of-fit statistic.
+ }
+ }
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item -
+ }
+ }
+}
+
+\section{Changes in version 1.14.1 (2011-10-16, svn r....)}{
+ \subsection{New Features}{
+ \itemize{
+ \item First translations (into German, thanks to Detlef Steuer).
+ \item better \code{citation("cluster")}
+ }
+ }
+ \subsection{Bug Fixes}{
+ \itemize{
+ \item \code{plot.silhouette(..., col = <one per cluster>)} had
+ ordering bug.
+ }
+ }
+}
+
+\section{Changes in version 1.14.0 (2011-06-07, svn r....)}{
+}
+%% -> /sfs/w/ftp/CRAN/src/contrib/Archive/cluster/
+
+%% 214765 Feb 21 2011 cluster_1.13.3.tar.gz
+%% 213663 Nov 10 2010 cluster_1.13.2.tar.gz
+%% 214083 Jun 25 2010 cluster_1.13.1.tar.gz
+%% 214677 Apr 2 2010 cluster_1.12.3.tar.gz
+%% 214577 Oct 6 2009 cluster_1.12.1.tar.gz
+%% 215041 May 13 2009 cluster_1.12.0.tar.gz
+%% 211085 Mar 31 2009 cluster_1.11.13.tar.gz
+%% 321990 Jan 7 2009 cluster_1.11.12.tar.gz
+%% 245055 Jun 16 2008 cluster_1.11.11.tar.gz
+%% 243446 Feb 29 2008 cluster_1.11.10.tar.gz
+%% 216573 Oct 2 2007 cluster_1.11.9.tar.gz
+%% 215257 Sep 4 2007 cluster_1.11.8.tar.gz
+%% 216815 Jun 5 2007 cluster_1.11.7.tar.gz
+%% 216729 Apr 27 2007 cluster_1.11.6.tar.gz
+%% 211615 Mar 31 2007 cluster_1.11.5.tar.gz
+%% 211634 Dec 12 2006 cluster_1.11.4.tar.gz
+%% 203692 Dec 2 2006 cluster_1.11.3.tar.gz
+%% 210927 Sep 7 2006 cluster_1.11.2.tar.gz
+%% 210091 Aug 25 2006 cluster_1.11.1.tar.gz
+%% 210215 May 18 2006 cluster_1.11.0.tar.gz
+%% 195962 Mar 21 2006 cluster_1.10.5.tar.gz
+%% 197577 Jan 26 2006 cluster_1.10.4.tar.gz
+%% 197853 Jan 26 2006 cluster_1.10.3.tar.gz
+%% 190839 Aug 31 2005 cluster_1.10.2.tar.gz
+%% 190975 Jul 3 2005 cluster_1.10.1.tar.gz
+%% 189042 Jun 13 2005 cluster_1.10.0.tar.gz
+%% 179723 Apr 4 2005 cluster_1.9.8.tar.gz
+%% 176832 Jan 24 2005 cluster_1.9.7.tar.gz
+%% 174742 Aug 24 2004 cluster_1.9.6.tar.gz
+%% 174218 Aug 4 2004 cluster_1.9.5.tar.gz
+%% 175565 Jun 26 2004 cluster_1.9.4.tar.gz
+%% 173097 Jun 18 2004 cluster_1.9.3.tar.gz
+%% 173251 Jun 13 2004 cluster_1.9.2.tar.gz
+%% 169773 Apr 12 2004 cluster_1.9.1.tar.gz
+%% 170071 Mar 14 2004 cluster_1.8.1.tar.gz
+%% 165322 Jan 22 2004 cluster_1.8.0.tar.gz
+%% 161548 Sep 24 2003 cluster_1.7.6.tar.gz
+%% 161359 Sep 3 2003 cluster_1.7.5.tar.gz
+%% 161257 Jul 18 2003 cluster_1.7.4.tar.gz
+%% 160252 Jun 11 2003 cluster_1.7.3.tar.gz
+%% 158265 Jun 4 2003 cluster_1.7.2.tar.gz
+%% 157386 May 1 2003 cluster_1.7.1.tar.gz
+%% 155161 Mar 26 2003 cluster_1.7.0.tar.gz
+%% 154089 Dec 31 2002 cluster_1.6-4.tar.gz
+%% 154987 Dec 5 2002 cluster_1.6-3.tar.gz
+%% 154261 Oct 23 2002 cluster_1.6-2.tar.gz
+%% 147063 Sep 10 2002 cluster_1.6-1.tar.gz
+%% 131808 Jul 30 2002 cluster_1.5-2.tar.gz
+%% 116292 Jun 19 2002 cluster_1.5-1.tar.gz
+%% 113972 Mar 31 2002 cluster_1.4-2.tar.gz
+%% 113889 Mar 7 2002 cluster_1.4-1.tar.gz
+%% 116698 Jan 24 2002 cluster_1.4-0.tar.gz
+%% 105552 Dec 19 2001 cluster_1.3-6.tar.gz
+%% 105390 Nov 7 2001 cluster_1.3-5.tar.gz
+%% 105275 Aug 24 2001 cluster_1.3-4.tar.gz
+%% 103626 Jun 8 2001 cluster_1.3-3.tar.gz
+%% 99698 Jan 4 2001 cluster_1.3-2.tar.gz
+%% 91608 Feb 18 2000 cluster_1.2-3.tar.gz
+%% 91736 Dec 29 1999 cluster_1.2-2.tar.gz
+%% 93048 Dec 5 1999 cluster_1.2-1.tar.gz
+
+%% ============================== FIXME ===========================
+
+
+
+\section{Version 1.2-1}{
+ \subsection{Versions 1.2-1, ... 1.13-3}{
+ \itemize{
+ \item 60 more CRAN releases of the package \pkg{cluster}
+ from Dec 1999 to Feb 2011, see also the \file{ChangeLog} file and
+ \command{svn log}.
+ }
+ }
+}
+
+% How can I add vertical space ?
+% \preformatted{} is not allowed, nor is \cr
+
+
+\section{Version 1.2-0 (1999-04-11)}{
+ \subsection{First CRAN release of the \pkg{cluster} package, by Kurt Hornik}{
+ \itemize{
+ \item Martin Maechler had its own version independently.
+ \item Both closely modeled after \code{clus} the tarball off JSS.
+ }}
+
+ \subsection{R Functions -- Fortran Files}{
+ \itemize{
+ \item \code{agnes()} -- \file{twins.f} for the \dQuote{twins} \code{agnes} and \code{diana}.
+ \item \code{clara()} -- \code{clara.f}
+ \item \code{daisy()} -- \file{daisy.f} (and \file{meet.f})
+ \item \code{diana()} -- (twins.f)
+ \item \code{fanny()} -- \file{fanny.f}
+ \item \code{mona()} -- \file{mona.f}
+ \item \code{pam()} -- \file{pam.f}
+ }
+ }
+ \subsection{Data Sets}{
+ \itemize{
+ \item agriculture
+ \item animals
+ \item flower
+ \item ruspini
+ \item votes.repub
+ }
+ }
+
+ \subsection{Further Features}{
+ \itemize{
+ \item all Examples in \file{man/*.Rd} hand edited to become
+ executable.
+ \item \code{summary()}, \code{print()} (and
+ \code{print.summary.**()} methods) for the six basic \R functions above.
+ }
+ }
+}
+
+
+
+\section{Version 1.1-2 (1998-06-16)}{
+ \subsection{Renamed previous \pkg{clus} to \pkg{cluster}}{
+ \itemize{ \item . }
+ }
+}
+\section{Version 1.1-1 (1998-06-15)}{
+ \subsection{New Features}{
+ \itemize{
+ \item started \file{ChangeLog}
+ }
+ }
+}
+
diff --git a/inst/po/de/LC_MESSAGES/R-cluster.mo b/inst/po/de/LC_MESSAGES/R-cluster.mo
new file mode 100644
index 0000000..bfc1c09
--- /dev/null
+++ b/inst/po/de/LC_MESSAGES/R-cluster.mo
Binary files differ
diff --git a/inst/po/de/LC_MESSAGES/cluster.mo b/inst/po/de/LC_MESSAGES/cluster.mo
new file mode 100644
index 0000000..6debc9e
--- /dev/null
+++ b/inst/po/de/LC_MESSAGES/cluster.mo
Binary files differ
diff --git a/inst/po/en@quot/LC_MESSAGES/R-cluster.mo b/inst/po/en@quot/LC_MESSAGES/R-cluster.mo
new file mode 100644
index 0000000..0e7bbaf
--- /dev/null
+++ b/inst/po/en@quot/LC_MESSAGES/R-cluster.mo
Binary files differ
diff --git a/inst/po/en@quot/LC_MESSAGES/cluster.mo b/inst/po/en@quot/LC_MESSAGES/cluster.mo
new file mode 100644
index 0000000..efb4789
--- /dev/null
+++ b/inst/po/en@quot/LC_MESSAGES/cluster.mo
Binary files differ
diff --git a/inst/po/fr/LC_MESSAGES/R-cluster.mo b/inst/po/fr/LC_MESSAGES/R-cluster.mo
new file mode 100644
index 0000000..9e91cc7
--- /dev/null
+++ b/inst/po/fr/LC_MESSAGES/R-cluster.mo
Binary files differ
diff --git a/inst/po/ko/LC_MESSAGES/R-cluster.mo b/inst/po/ko/LC_MESSAGES/R-cluster.mo
new file mode 100644
index 0000000..2b546d6
--- /dev/null
+++ b/inst/po/ko/LC_MESSAGES/R-cluster.mo
Binary files differ
diff --git a/inst/po/ko/LC_MESSAGES/cluster.mo b/inst/po/ko/LC_MESSAGES/cluster.mo
new file mode 100644
index 0000000..4433f7a
--- /dev/null
+++ b/inst/po/ko/LC_MESSAGES/cluster.mo
Binary files differ
diff --git a/inst/po/pl/LC_MESSAGES/R-cluster.mo b/inst/po/pl/LC_MESSAGES/R-cluster.mo
new file mode 100644
index 0000000..9817eb4
--- /dev/null
+++ b/inst/po/pl/LC_MESSAGES/R-cluster.mo
Binary files differ
diff --git a/inst/test-tools.R b/inst/test-tools.R
new file mode 100644
index 0000000..673a6a6
--- /dev/null
+++ b/inst/test-tools.R
@@ -0,0 +1,10 @@
+#### Will be sourced by several R scripts in ../tests/
+
+### ------- General test "tools" (from the Matrix package):
+### ==> 'Suggests: Matrix' in ../DESCRIPTION
+source(system.file("test-tools-1.R", package = "Matrix", lib.loc = .Library),
+ keep.source = FALSE)
+
+if(doExtras <- cluster:::doExtras())## from ../R/0aaa.R
+ cat("doExtras <- cluster:::doExtras() : TRUE\n")
+
diff --git a/man/agnes.Rd b/man/agnes.Rd
new file mode 100644
index 0000000..c73aec4
--- /dev/null
+++ b/man/agnes.Rd
@@ -0,0 +1,274 @@
+\name{agnes}
+\alias{agnes}
+\title{Agglomerative Nesting (Hierarchical Clustering)}
+\concept{UPGMA clustering}
+\description{
+ Computes agglomerative hierarchical clustering of the dataset.
+}
+\usage{
+agnes(x, diss = inherits(x, "dist"), metric = "euclidean",
+ stand = FALSE, method = "average", par.method,
+ keep.diss = n < 100, keep.data = !diss, trace.lev = 0)
+}
+\arguments{
+ \item{x}{
+ data matrix or data frame, or dissimilarity matrix, depending on the
+ value of the \code{diss} argument.
+
+ In case of a matrix or data frame, each row corresponds to an observation,
+ and each column corresponds to a variable. All variables must be numeric.
+ Missing values (NAs) are allowed.
+
+ In case of a dissimilarity matrix, \code{x} is typically the output of
+ \code{\link{daisy}} or \code{\link{dist}}.
+ Also a vector with length n*(n-1)/2 is allowed (where n is the number
+ of observations), and will be interpreted in the same way as the
+ output of the above-mentioned functions. Missing values (NAs) are not
+ allowed.
+ }
+ \item{diss}{
+ logical flag: if TRUE (default for \code{dist} or
+ \code{dissimilarity} objects), then \code{x} is assumed to be a
+ dissimilarity matrix. If FALSE, then \code{x} is treated as
+ a matrix of observations by variables.
+ }
+ \item{metric}{
+ character string specifying the metric to be used for calculating
+ dissimilarities between observations.
+ The currently available options are \code{"euclidean"} and \code{"manhattan"}.
+ Euclidean distances are root sum-of-squares of differences, and
+ manhattan distances are the sum of absolute differences.
+ If \code{x} is already a dissimilarity matrix, then this argument will
+ be ignored.
+ }
+ \item{stand}{
+ logical flag: if TRUE, then the measurements in \code{x} are
+ standardized before calculating the dissimilarities. Measurements
+ are standardized for each variable (column), by subtracting the
+ variable's mean value and dividing by the variable's mean absolute
+ deviation. If \code{x} is already a dissimilarity matrix, then this
+ argument will be ignored.
+ }
+ \item{method}{
+ character string defining the clustering method. The six methods
+ implemented are
+ \code{"average"} ([unweighted pair-]group [arithMetic] average method, aka \sQuote{UPGMA}),
+ \code{"single"} (single linkage), \code{"complete"} (complete linkage),
+ \code{"ward"} (Ward's method),
+ \code{"weighted"} (weighted average linkage, aka \sQuote{WPGMA}), its generalization
+ \code{"flexible"} which uses (a constant version of)
+ the Lance-Williams formula and the \code{par.method} argument, and
+ \code{"gaverage"} a generalized \code{"average"} aka \dQuote{flexible
+ UPGMA} method also using the Lance-Williams formula and \code{par.method}.
+
+ The default is \code{"average"}.
+ }
+ \item{par.method}{
+ If \code{method} is \code{"flexible"} or \code{"gaverage"}, a numeric
+ vector of length 1, 3, or 4, (with a default for \code{"gaverage"}), see in
+ the details section.
+ }
+ \item{keep.diss, keep.data}{logicals indicating if the dissimilarities
+ and/or input data \code{x} should be kept in the result. Setting
+ these to \code{FALSE} can give much smaller results and hence even save
+ memory allocation \emph{time}.}
+ \item{trace.lev}{integer specifying a trace level for printing
+ diagnostics during the algorithm. Default \code{0} does not print
+ anything; higher values print increasingly more.}
+}
+\value{
+ an object of class \code{"agnes"} (which extends \code{"twins"})
+ representing the clustering. See \code{\link{agnes.object}} for
+ details, and methods applicable.
+}
+\author{
+ Method \code{"gaverage"} has been contributed by Pierre Roudier, Landcare
+ Research, New Zealand.
+}
+\details{
+ \code{agnes} is fully described in chapter 5 of Kaufman and Rousseeuw (1990).
+ Compared to other agglomerative clustering methods such as \code{hclust},
+ \code{agnes} has the following features: (a) it yields the
+ agglomerative coefficient (see \code{\link{agnes.object}})
+ which measures the amount of clustering structure found; and (b)
+ apart from the usual tree it also provides the banner, a novel
+ graphical display (see \code{\link{plot.agnes}}).
+
+ The \code{agnes}-algorithm constructs a hierarchy of clusterings.\cr
+ At first, each observation is a small cluster by itself. Clusters are
+ merged until only one large cluster remains which contains all the
+ observations. At each stage the two \emph{nearest} clusters are combined
+ to form one larger cluster.
+
+ For \code{method="average"}, the distance between two clusters is the
+ average of the dissimilarities between the points in one cluster and the
+ points in the other cluster.
+ \cr
+ In \code{method="single"}, we use the smallest dissimilarity between a
+ point in the first cluster and a point in the second cluster (nearest
+ neighbor method).
+ \cr
+ When \code{method="complete"}, we use the largest dissimilarity
+ between a point in the first cluster and a point in the second cluster
+ (furthest neighbor method).
+
+ The \code{method = "flexible"} allows (and requires) more details:
+ The Lance-Williams formula specifies how dissimilarities are
+ computed when clusters are agglomerated (equation (32) in K&R(1990),
+ p.237). If clusters \eqn{C_1} and \eqn{C_2} are agglomerated into a
+ new cluster, the dissimilarity between their union and another
+ cluster \eqn{Q} is given by
+ \deqn{
+ D(C_1 \cup C_2, Q) = \alpha_1 * D(C_1, Q) + \alpha_2 * D(C_2, Q) +
+ \beta * D(C_1,C_2) + \gamma * |D(C_1, Q) - D(C_2, Q)|,
+ }
+ where the four coefficients \eqn{(\alpha_1, \alpha_2, \beta, \gamma)}
+ are specified by the vector \code{par.method}, either directly as vector of
+ length 4, or (more conveniently) if \code{par.method} is of length 1,
+ say \eqn{= \alpha}, \code{par.method} is extended to
+ give the \dQuote{Flexible Strategy} (K&R(1990), p.236 f) with
+ Lance-Williams coefficients \eqn{(\alpha_1 = \alpha_2 = \alpha, \beta =
+ 1 - 2\alpha, \gamma=0)}.\cr
+ Also, if \code{length(par.method) == 3}, \eqn{\gamma = 0} is set.
+
+ \bold{Care} and expertise is probably needed when using \code{method = "flexible"}
+ particularly for the case when \code{par.method} is specified of
+ longer length than one. Since \pkg{cluster} version 2.0, choices
+ leading to invalid \code{merge} structures now signal an error (from
+ the C code already).
+ The \emph{weighted average} (\code{method="weighted"}) is the same as
+ \code{method="flexible", par.method = 0.5}. Further,
+ \code{method= "single"} is equivalent to \code{method="flexible", par.method = c(.5,.5,0,-.5)}, and
+ \code{method="complete"} is equivalent to \code{method="flexible", par.method = c(.5,.5,0,+.5)}.
+
+ The \code{method = "gaverage"} is a generalization of \code{"average"}, aka
+ \dQuote{flexible UPGMA} method, and is (a generalization of the approach)
+ detailed in Belbin et al. (1992). As \code{"flexible"}, it uses the
+ Lance-Williams formula above for dissimilarity updating, but with
+ \eqn{\alpha_1} and \eqn{\alpha_2} not constant, but \emph{proportional} to
+ the \emph{sizes} \eqn{n_1} and \eqn{n_2} of the clusters \eqn{C_1} and
+ \eqn{C_2} respectively, i.e,
+ \deqn{\alpha_j = \alpha'_j \frac{n_1}{n_1+n_2},}{%
+ \alpha_j = \alpha'_j * n_1/(n_1 + n_2),}
+ where \eqn{\alpha'_1}, \eqn{\alpha'_2} are determined from \code{par.method},
+ either directly as \eqn{(\alpha_1, \alpha_2, \beta, \gamma)} or
+ \eqn{(\alpha_1, \alpha_2, \beta)} with \eqn{\gamma = 0}, or (less flexibly,
+ but more conveniently) as follows:
+
+ Belbin et al proposed \dQuote{flexible beta}, i.e. the user would only
+ specify \eqn{\beta} (as \code{par.method}), sensibly in
+ \deqn{-1 \leq \beta < 1,}{-1 \le \beta < 1,}
+ and \eqn{\beta} determines \eqn{\alpha'_1} and \eqn{\alpha'_2} as
+ \deqn{\alpha'_j = 1 - \beta,} and \eqn{\gamma = 0}.
+
+ This \eqn{\beta} may be specified by \code{par.method} (as length 1 vector),
+ and if \code{par.method} is not specified, a default value of -0.1 is used,
+ as Belbin et al recommend taking a \eqn{\beta} value around -0.1 as a general
+ agglomerative hierarchical clustering strategy.
+
+ Note that \code{method = "gaverage", par.method = 0} (or \code{par.method =
+ c(1,1,0,0)}) is equivalent to the \code{agnes()} default method \code{"average"}.
+}
+\section{BACKGROUND}{
+ Cluster analysis divides a dataset into groups (clusters) of
+ observations that are similar to each other.
+ \describe{
+ \item{Hierarchical methods}{like
+ \code{agnes}, \code{\link{diana}}, and \code{\link{mona}}
+ construct a hierarchy of clusterings, with the number of clusters
+ ranging from one to the number of observations.}
+ \item{Partitioning methods}{like
+ \code{\link{pam}}, \code{\link{clara}}, and \code{\link{fanny}}
+ require that the number of clusters be given by the user.}
+ }
+}
+\references{
+ Kaufman, L. and Rousseeuw, P.J. (1990). (=: \dQuote{K&R(1990)})
+ \emph{Finding Groups in Data: An Introduction to Cluster Analysis}.
+ Wiley, New York.
+
+ Anja Struyf, Mia Hubert and Peter J. Rousseeuw (1996)
+ Clustering in an Object-Oriented Environment.
+ \emph{Journal of Statistical Software} \bold{1}.
+ \doi{10.18637/jss.v001.i04}
+
+ Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997). Integrating
+ Robust Clustering Techniques in S-PLUS,
+ \emph{Computational Statistics and Data Analysis}, \bold{26}, 17--37.
+
+ Lance, G.N., and W.T. Williams (1966).
+ A General Theory of Classifactory Sorting Strategies, I. Hierarchical
+ Systems.
+ \emph{Computer J.} \bold{9}, 373--380.
+
+ Belbin, L., Faith, D.P. and Milligan, G.W. (1992). A Comparison of
+ Two Approaches to Beta-Flexible Clustering.
+ \emph{Multivariate Behavioral Research}, \bold{27}, 417--433.
+
+}
+\seealso{
+ \code{\link{agnes.object}}, \code{\link{daisy}}, \code{\link{diana}},
+ \code{\link{dist}}, \code{\link{hclust}}, \code{\link{plot.agnes}},
+ \code{\link{twins.object}}.
+}
+\examples{
+data(votes.repub)
+agn1 <- agnes(votes.repub, metric = "manhattan", stand = TRUE)
+agn1
+plot(agn1)
+
+op <- par(mfrow=c(2,2))
+agn2 <- agnes(daisy(votes.repub), diss = TRUE, method = "complete")
+plot(agn2)
+## alpha = 0.625 ==> beta = -1/4 is "recommended" by some
+agnS <- agnes(votes.repub, method = "flexible", par.meth = 0.625)
+plot(agnS)
+par(op)
+
+## "show" equivalence of three "flexible" special cases
+d.vr <- daisy(votes.repub)
+a.wgt <- agnes(d.vr, method = "weighted")
+a.sing <- agnes(d.vr, method = "single")
+a.comp <- agnes(d.vr, method = "complete")
+iC <- -(6:7) # not using 'call' and 'method' for comparisons
+stopifnot(
+ all.equal(a.wgt [iC], agnes(d.vr, method="flexible", par.method = 0.5)[iC]) ,
+ all.equal(a.sing[iC], agnes(d.vr, method="flex", par.method= c(.5,.5,0, -.5))[iC]),
+ all.equal(a.comp[iC], agnes(d.vr, method="flex", par.method= c(.5,.5,0, +.5))[iC]))
+
+## Exploring the dendrogram structure
+(d2 <- as.dendrogram(agn2)) # two main branches
+d2[[1]] # the first branch
+d2[[2]] # the 2nd one { 8 + 42 = 50 }
+d2[[1]][[1]]# first sub-branch of branch 1 .. and shorter form
+identical(d2[[c(1,1)]],
+ d2[[1]][[1]])
+## a "textual picture" of the dendrogram :
+str(d2)
+
+data(agriculture)
+
+## Plot similar to Figure 7 in ref
+\dontrun{plot(agnes(agriculture), ask = TRUE)}
+\dontshow{plot(agnes(agriculture))}
+
+data(animals)
+aa.a <- agnes(animals) # default method = "average"
+aa.ga <- agnes(animals, method = "gaverage")
+op <- par(mfcol=1:2, mgp=c(1.5, 0.6, 0), mar=c(.1+ c(4,3,2,1)),
+ cex.main=0.8)
+plot(aa.a, which.plot = 2)
+plot(aa.ga, which.plot = 2)
+par(op)
+\dontshow{## equivalence
+stopifnot( ## below show ave == gave(0); here ave == gave(c(1,1,0,0)):
+ all.equal(aa.a [iC], agnes(animals, method="gave", par.meth= c(1,1,0,0))[iC]),
+ all.equal(aa.ga[iC], agnes(animals, method="gave", par.meth= -0.1)[iC]),
+ all.equal(aa.ga[iC], agnes(animals, method="gav", par.m= c(1.1,1.1,-0.1,0))[iC]))
+}
+
+## Show how "gaverage" is a "generalized average":
+aa.ga.0 <- agnes(animals, method = "gaverage", par.method = 0)
+stopifnot(all.equal(aa.ga.0[iC], aa.a[iC]))
+}
+\keyword{cluster}
diff --git a/man/agnes.object.Rd b/man/agnes.object.Rd
new file mode 100644
index 0000000..21b7343
--- /dev/null
+++ b/man/agnes.object.Rd
@@ -0,0 +1,90 @@
+\name{agnes.object}
+\alias{agnes.object}
+\title{Agglomerative Nesting (AGNES) Object}
+\description{
+ The objects of class \code{"agnes"}
+ represent an agglomerative hierarchical clustering of a dataset.
+}
+\section{GENERATION}{
+ This class of objects is returned from \code{\link{agnes}}.
+}
+\section{METHODS}{
+ The \code{"agnes"} class has methods for the following generic functions:
+ \code{print}, \code{summary}, \code{plot}, and
+ \code{\link{as.dendrogram}}.
+
+ In addition, \code{\link{cutree}(x, *)} can be used to \dQuote{cut}
+ the dendrogram in order to produce cluster assignments.
+}
+\section{INHERITANCE}{
+ The class \code{"agnes"} inherits from \code{"twins"}.
+ Therefore, the generic functions \code{\link{pltree}} and
+ \code{\link{as.hclust}} are available for \code{agnes} objects.
+ After applying \code{as.hclust()}, all \emph{its} methods are
+ available, of course.
+}
+\value{
+ A legitimate \code{agnes} object is a list with the following components:
+ \item{order}{
+ a vector giving a permutation of the original observations to allow
+ for plotting, in the sense that the branches of a clustering tree
+ will not cross.}
+ \item{order.lab}{
+ a vector similar to \code{order}, but containing observation labels
+ instead of observation numbers. This component is only available if
+ the original observations were labelled.
+ }
+ \item{height}{
+ a vector with the distances between merging clusters at the successive
+ stages.
+ }
+ \item{ac}{
+ the agglomerative coefficient, measuring the clustering structure of the
+ dataset.
+
+ For each observation i, denote by m(i) its dissimilarity to the
+ first cluster it is merged with, divided by the dissimilarity of the
+ merger in the final step of the algorithm. The \code{ac} is the
+ average of all 1 - m(i). It can also be seen as the average width
+ (or the percentage filled) of the banner plot. Because \code{ac}
+ grows with the number of observations, this measure should not
+ be used to compare datasets of very different sizes.
+ }
+ \item{merge}{
+ an (n-1) by 2 matrix, where n is the number of observations. Row i
+ of \code{merge} describes the merging of clusters at step i of the
+ clustering. If a number j in the row is negative, then the single
+ observation |j| is merged at this stage. If j is positive, then the
+ merger is with the cluster formed at stage j of the algorithm.
+ }
+ \item{diss}{
+ an object of class \code{"dissimilarity"} (see
+ \code{\link{dissimilarity.object}}), representing the total
+ dissimilarity matrix of the dataset.
+ }
+ \item{data}{
+ a matrix containing the original or standardized measurements, depending
+ on the \code{stand} option of the function \code{agnes}. If a
+ dissimilarity matrix was given as input structure, then this
+ component is not available.
+ }
+}
+\seealso{
+ \code{\link{agnes}}, \code{\link{diana}},
+ \code{\link{as.hclust}}, \code{\link{hclust}},
+ \code{\link{plot.agnes}}, \code{\link{twins.object}}.
+
+ \code{\link{cutree}}.
+}
+\examples{
+data(agriculture)
+ag.ag <- agnes(agriculture)
+class(ag.ag)
+pltree(ag.ag) # the dendrogram
+
+## cut the dendrogram -> get cluster assignments:
+(ck3 <- cutree(ag.ag, k = 3))
+(ch6 <- cutree(as.hclust(ag.ag), h = 6))
+stopifnot(identical(unname(ch6), ck3))
+}
+\keyword{cluster}
diff --git a/man/agriculture.Rd b/man/agriculture.Rd
new file mode 100644
index 0000000..c6d1c23
--- /dev/null
+++ b/man/agriculture.Rd
@@ -0,0 +1,49 @@
+\name{agriculture}
+\alias{agriculture}
+\title{European Union Agricultural Workforces}
+\usage{data(agriculture)}
+\description{
+ Gross National Product (GNP) per capita and percentage of the
+ population working in agriculture for each country belonging to the
+ European Union in 1993.
+}
+\format{
+ A data frame with 12 observations on 2 variables:
+ \tabular{rlll}{
+ [ , 1] \tab \code{x} \tab numeric \tab per capita GNP \cr
+ [ , 2] \tab \code{y} \tab numeric \tab percentage in agriculture
+ }
+ The row names of the data frame indicate the countries.
+}
+\source{
+ Eurostat (European Statistical Agency, 1994):
+ \emph{Cijfers en feiten: Een statistisch portret van de Europese Unie}.
+}
+\details{
+ The data seem to show two clusters, the \dQuote{more agricultural} one
+ consisting of Greece, Portugal, Spain, and Ireland.
+}
+\seealso{\code{\link{agnes}}, \code{\link{daisy}}, \code{\link{diana}}.
+}
+\references{
+ see those in \code{\link{agnes}}.
+}
+\examples{
+data(agriculture)
+
+## Compute the dissimilarities using Euclidean metric and without
+## standardization
+daisy(agriculture, metric = "euclidean", stand = FALSE)
+
+## 2nd plot is similar to Figure 3 in Struyf et al (1996)
+plot(pam(agriculture, 2))
+
+## Plot similar to Figure 7 in Struyf et al (1996)
+\dontrun{plot(agnes(agriculture), ask = TRUE)}
+\dontshow{plot(agnes(agriculture))}
+
+## Plot similar to Figure 8 in Struyf et al (1996)
+\dontrun{plot(diana(agriculture), ask = TRUE)}
+\dontshow{plot(diana(agriculture))}
+}
+\keyword{datasets}
diff --git a/man/animals.Rd b/man/animals.Rd
new file mode 100644
index 0000000..6195e42
--- /dev/null
+++ b/man/animals.Rd
@@ -0,0 +1,42 @@
+\name{animals}
+\alias{animals}
+\title{Attributes of Animals}
+\usage{data(animals)}
+\description{
+ This data set considers 6 binary attributes for 20 animals.
+}
+\format{
+ A data frame with 20 observations on 6 variables:
+ \tabular{rll}{
+ [ , 1] \tab war \tab warm-blooded \cr
+ [ , 2] \tab fly \tab can fly \cr
+ [ , 3] \tab ver \tab vertebrate \cr
+ [ , 4] \tab end \tab endangered \cr
+ [ , 5] \tab gro \tab live in groups \cr
+ [ , 6] \tab hai \tab have hair \cr
+ }
+ All variables are encoded as 1 = 'no', 2 = 'yes'.
+}
+\source{
+ Leonard Kaufman and Peter J. Rousseeuw (1990):
+ \emph{Finding Groups in Data}
+ (pp 297ff).
+ New York: Wiley.
+}
+\details{
+ This dataset is useful for illustrating monothetic (only a single
+ variable is used for each split) hierarchical clustering.
+}
+\references{
+ see Struyf, Hubert & Rousseeuw (1996), in \code{\link{agnes}}.
+}
+\examples{
+data(animals)
+apply(animals,2, table) # simple overview
+
+ma <- mona(animals)
+ma
+## Plot similar to Figure 10 in Struyf et al (1996)
+plot(ma)
+}
+\keyword{datasets}
diff --git a/man/bannerplot.Rd b/man/bannerplot.Rd
new file mode 100644
index 0000000..cfa7e63
--- /dev/null
+++ b/man/bannerplot.Rd
@@ -0,0 +1,62 @@
+\name{bannerplot}
+\alias{bannerplot}
+\title{Plot Banner (of Hierarchical Clustering)}
+\description{
+ Draws a \dQuote{banner}, i.e. basically a horizontal \code{\link{barplot}}
+ visualizing the (agglomerative or divisive) hierarchical clustering or
+ an other binary dendrogram structure.
+}
+\usage{
+bannerplot(x, w = rev(x$height), fromLeft = TRUE,
+ main=NULL, sub=NULL, xlab = "Height", adj = 0,
+ col = c(2, 0), border = 0, axes = TRUE, frame.plot = axes,
+ rev.xax = !fromLeft, xax.pretty = TRUE,
+ labels = NULL, nmax.lab = 35, max.strlen = 5,
+ yax.do = axes && length(x$order) <= nmax.lab,
+ yaxRight = fromLeft, y.mar = 2.4 + max.strlen/2.5, \dots)
+}
+\arguments{
+ \item{x}{a list with components \code{order}, \code{order.lab} and
+ \code{height} when \code{w}, the next argument is not specified.}
+ \item{w}{non-negative numeric vector of bar widths.}
+ \item{fromLeft}{logical, indicating if the banner is from the left or not.}
+ \item{main,sub}{main and sub titles, see \code{\link{title}}.}
+ \item{xlab}{x axis label (with \sQuote{correct} default e.g. for
+ \code{plot.agnes}).}
+ \item{adj}{passed to \code{\link{title}(main,sub)} for string adjustment.}
+ \item{col}{vector of length 2, for two horizontal segments.}
+ \item{border}{color for bar border; now defaults to background (no border).}
+ \item{axes}{logical indicating if axes (and labels) should be drawn at all.}
+ \item{frame.plot}{logical indicating the banner should be framed;
+ mainly used when \code{border = 0} (as per default).}
+ \item{rev.xax}{logical indicating if the x axis should be reversed (as
+ in \code{plot.diana}).}
+ \item{xax.pretty}{logical or integer indicating if
+ \code{\link{pretty}()} should be used for the x axis.
+ \code{xax.pretty = FALSE} is mainly for back compatibility.}
+ \item{labels}{labels to use on y-axis; the default is constructed from
+ \code{x}.}
+ \item{nmax.lab}{integer indicating the number of labels which is
+ considered too large for single-name labelling the banner plot.}
+ \item{max.strlen}{positive integer giving the length to which
+ strings are truncated in banner plot labeling.}
+ \item{yax.do}{logical indicating if a y axis and banner labels should
+ be drawn.}
+ \item{yaxRight}{logical indicating if the y axis is on the right or left.}
+ \item{y.mar}{positive number specifying the margin width to use when
+ banners are labeled (along a y-axis). The default adapts to the
+ string width and optimally would also dependend on the font.}
+ \item{\dots}{graphical parameters (see \code{\link{par}}) may also
+ be supplied as arguments to this function.}
+}
+\author{Martin Maechler (from original code of Kaufman and Rousseeuw).}
+\note{This is mainly a utility called from \code{\link{plot.agnes}},
+ \code{\link{plot.diana}} and \code{\link{plot.mona}}.
+}% also serves as \seealso{*}
+\examples{
+data(agriculture)
+bannerplot(agnes(agriculture), main = "Bannerplot")
+}
+\keyword{hplot}
+\keyword{cluster}
+\keyword{utilities}
diff --git a/man/chorSub.Rd b/man/chorSub.Rd
new file mode 100644
index 0000000..7e68646
--- /dev/null
+++ b/man/chorSub.Rd
@@ -0,0 +1,34 @@
+\name{chorSub}
+\alias{chorSub}
+\docType{data}
+\title{Subset of C-horizon of Kola Data}
+\description{
+ This is a small rounded subset of the C-horizon data
+ \code{\link[mvoutlier]{chorizon}} from package \pkg{mvoutlier}.
+}
+\usage{data(chorSub)}
+\format{
+ A data frame with 61 observations on 10 variables. The variables
+ contain scaled concentrations of chemical elements.
+}
+\details{
+ This data set was produced from \code{chorizon} via these statements:
+ \preformatted{
+ data(chorizon, package = "mvoutlier")
+ chorSub <- round(100*scale(chorizon[,101:110]))[190:250,]
+ storage.mode(chorSub) <- "integer"
+ colnames(chorSub) <- gsub("_.*", '', colnames(chorSub))
+ }
+}
+\source{Kola Project (1993-1998)
+}
+\seealso{
+ \code{\link[mvoutlier]{chorizon}} in package \pkg{mvoutlier} and other
+ Kola data in the same package.
+}
+\examples{
+data(chorSub)
+summary(chorSub)
+pairs(chorSub, gap= .1)# some outliers
+}
+\keyword{datasets}
diff --git a/man/clara.Rd b/man/clara.Rd
new file mode 100644
index 0000000..3a4a25e
--- /dev/null
+++ b/man/clara.Rd
@@ -0,0 +1,190 @@
+\name{clara}
+\alias{clara}
+\title{Clustering Large Applications}
+\description{
+ Computes a \code{"clara"} object, a list representing a clustering of
+ the data into \code{k} clusters.
+}
+\usage{
+clara(x, k, metric = c("euclidean", "manhattan", "jaccard"),
+ stand = FALSE, samples = 5,
+ sampsize = min(n, 40 + 2 * k), trace = 0, medoids.x = TRUE,
+ keep.data = medoids.x, rngR = FALSE, pamLike = FALSE, correct.d = TRUE)
+}
+\arguments{
+ \item{x}{
+ data matrix or data frame, each row corresponds to an observation,
+ and each column corresponds to a variable. All variables must be numeric.
+ Missing values (NAs) are allowed.}
+ \item{k}{integer, the number of clusters.
+ It is required that \eqn{0 < k < n} where \eqn{n} is the number of
+ observations (i.e., n = \code{nrow(x)}).}
+ \item{metric}{
+ character string specifying the metric to be used for calculating
+ dissimilarities between observations.
+ The currently available options are "euclidean", "manhattan", and
+ "jaccard".
+
+ Euclidean distances are root sum-of-squares of differences, and
+ manhattan distances are the sum of absolute differences.
+ }
+ \item{stand}{logical, indicating if the measurements in \code{x} are
+ standardized before calculating the dissimilarities. Measurements
+ are standardized for each variable (column), by subtracting the
+ variable's mean value and dividing by the variable's mean absolute
+ deviation.
+ }
+ \item{samples}{integer, say \eqn{N}, the number of samples to be drawn from the
+ dataset. The default, \code{N = 5}, is rather small for historical (and
+ now back compatibility) reasons and we \emph{recommend to set
+ \code{samples} an order of magnitude larger}.
+ }
+ \item{sampsize}{integer, say \eqn{j}, the number of observations in each
+ sample. \code{sampsize} should be higher than the number of clusters
+ (\code{k}) and at most the number of observations (n =
+ \code{nrow(x)}). While computational effort is proportional to \eqn{j^2},
+ see note below, it may still be advisable to set
+ \eqn{j = }\code{sampsize} to a \emph{larger} value than the (historical) default.}
+ \item{trace}{integer indicating a \emph{trace level} for diagnostic
+ output during the algorithm.}
+ \item{medoids.x}{logical indicating if the medoids should be
+ returned, identically to some rows of the input data \code{x}. If
+ \code{FALSE}, \code{keep.data} must be false as well, and the medoid
+ indices, i.e., row numbers of the medoids will still be returned
+ (\code{i.med} component), and the algorithm saves space by needing
+ one copy less of \code{x}.}
+ \item{keep.data}{logical indicating if the (\emph{scaled} if
+ \code{stand} is true) data should be kept in the result.
+% (\code{keepdata} is equivalent to \code{keep.data} where the former
+% is deprecated.)
+ Setting this to \code{FALSE} saves memory (and hence time), but
+ disables \code{\link{clusplot}()}ing of the result. Use
+ \code{medoids.x = FALSE} to save even more memory.}
+ \item{rngR}{logical indicating if \R's random number generator should
+ be used instead of the primitive clara()-builtin one. If true, this
+ also means that each call to \code{clara()} returns a different result
+ -- though only slightly different in good situations.}
+ \item{pamLike}{logical indicating if the \dQuote{swap} phase (see
+ \code{\link{pam}}, in C code) should use the same algorithm as
+ \code{\link{pam}()}. Note that from Kaufman and Rousseeuw's
+ description this \emph{should} have been true always, but as the
+ original Fortran code and the subsequent port to C has always
+ contained a small one-letter change (a typo according to Martin Maechler)
+ with respect to PAM, the default, \code{pamLike = FALSE} has been chosen to
+ remain back compatible rather than \dQuote{PAM compatible}.}
+ \item{correct.d}{logical or integer indicating that---only in the case
+ of \code{NA}s present in \code{x}---the correct distance computation
+ should be used instead of the wrong formula which has been present
+ in the original Fortran code and been in use up to early 2016.
+
+ Because the new correct formula is not back compatible, for the time
+ being, a warning is signalled in this case, unless the user explicitly
+ specifies \code{correct.d}.}
+}
+\value{
+ an object of class \code{"clara"} representing the clustering. See
+ \code{\link{clara.object}} for details.
+}
+\details{
+ \code{clara} is fully described in chapter 3 of Kaufman and Rousseeuw (1990).
+ Compared to other partitioning methods such as \code{pam}, it can deal with
+ much larger datasets. Internally, this is achieved by considering
+ sub-datasets of fixed size (\code{sampsize}) such that the time and
+ storage requirements become linear in \eqn{n} rather than quadratic.
+
+ Each sub-dataset is partitioned into \code{k} clusters using the same
+ algorithm as in \code{\link{pam}}.\cr
+ Once \code{k} representative objects have been selected from the
+ sub-dataset, each observation of the entire dataset is assigned
+ to the nearest medoid.
+
+ The mean (equivalent to the sum) of the dissimilarities of the
+ observations to their closest medoid is used as a measure of the
+ quality of the clustering. The sub-dataset for which the mean (or
+ sum) is minimal, is retained. A further analysis is carried out on
+ the final partition.
+
+ Each sub-dataset is forced to contain the medoids obtained from the
+ best sub-dataset until then. Randomly drawn observations are added to
+ this set until \code{sampsize} has been reached.
+}
+\note{
+%% mostly by Martin Maechler :
+ By default, the random sampling is implemented with a \emph{very}
+ simple scheme (with period \eqn{2^{16} = 65536}) inside the Fortran
+ code, independently of \R's random number generation, and as a matter
+ of fact, deterministically. Alternatively, we recommend setting
+ \code{rngR = TRUE} which uses \R's random number generators. Then,
+ \code{clara()} results are made reproducible typically by using
+ \code{\link{set.seed}()} before calling \code{clara}.
+
+ The storage requirement of \code{clara} computation (for small
+ \code{k}) is about
+ \eqn{O(n \times p) + O(j^2)}{O(n * p) + O(j^2)} where
+ \eqn{j = \code{sampsize}}, and \eqn{(n,p) = \code{dim(x)}}.
+ The CPU computing time (again assuming small \code{k}) is about
+ \eqn{O(n \times p \times j^2 \times N)}{O(n * p * j^2 * N)}, where
+ \eqn{N = \code{samples}}.
+
+ For \dQuote{small} datasets, the function \code{\link{pam}} can be used
+ directly. What can be considered \emph{small}, is really a function
+ of available computing power, both memory (RAM) and speed.
+ Originally (1990), \dQuote{small} meant less than 100 observations;
+ in 1997, the authors said \emph{\dQuote{small (say with fewer than 200
+ observations)}}; as of 2006, you can use \code{\link{pam}} with
+ several thousand observations.
+}
+\author{
+ Kaufman and Rousseeuw (see \code{\link{agnes}}), originally.
+ Metric \code{"jaccard"}: Kamil Kozlowski (\code{@ownedoutcomes.com})
+ and Kamil Jadeszko.
+ All arguments from \code{trace} on, and most \R documentation and all
+ tests by Martin Maechler.
+}
+\seealso{
+ \code{\link{agnes}} for background and references;
+ \code{\link{clara.object}}, \code{\link{pam}},
+ \code{\link{partition.object}}, \code{\link{plot.partition}}.
+}
+\examples{
+## generate 500 objects, divided into 2 clusters.
+x <- rbind(cbind(rnorm(200,0,8), rnorm(200,0,8)),
+ cbind(rnorm(300,50,8), rnorm(300,50,8)))
+clarax <- clara(x, 2, samples=50)
+clarax
+clarax$clusinfo
+## using pamLike=TRUE gives the same (apart from the 'call'):
+all.equal(clarax[-8],
+ clara(x, 2, samples=50, pamLike = TRUE)[-8])
+plot(clarax)
+
+## 'xclara' is an artificial data set with 3 clusters of 1000 bivariate
+## objects each.
+data(xclara)
+(clx3 <- clara(xclara, 3))
+## "better" number of samples
+cl.3 <- clara(xclara, 3, samples=100)
+## but that did not change the result here:
+stopifnot(cl.3$clustering == clx3$clustering)
+## Plot similar to Figure 5 in Struyf et al (1996)
+\dontrun{plot(clx3, ask = TRUE)}
+\dontshow{plot(clx3)}
+
+## Try 100 times *different* random samples -- for reliability:
+nSim <- 100
+nCl <- 3 # = no.classes
+set.seed(421)# (reproducibility)
+cl <- matrix(NA,nrow(xclara), nSim)
+for(i in 1:nSim)
+ cl[,i] <- clara(xclara, nCl, medoids.x = FALSE, rngR = TRUE)$cluster
+tcl <- apply(cl,1, tabulate, nbins = nCl)
+## those that are not always in same cluster (5 out of 3000 for this seed):
+(iDoubt <- which(apply(tcl,2, function(n) all(n < nSim))))
+if(length(iDoubt)) { # (not for all seeds)
+ tabD <- tcl[,iDoubt, drop=FALSE]
+ dimnames(tabD) <- list(cluster = paste(1:nCl), obs = format(iDoubt))
+ t(tabD) # how many times in which clusters
+}
+}% end{examples}
+
+\keyword{cluster}
diff --git a/man/clara.object.Rd b/man/clara.object.Rd
new file mode 100644
index 0000000..c90e8d3
--- /dev/null
+++ b/man/clara.object.Rd
@@ -0,0 +1,55 @@
+\name{clara.object}
+\alias{clara.object}
+\title{Clustering Large Applications (CLARA) Object}
+\description{
+ The objects of class \code{"clara"} represent a partitioning of a large
+ dataset into clusters and are typically returned from \code{\link{clara}}.
+}
+\section{Methods, Inheritance}{
+ The \code{"clara"} class has methods for the following generic functions:
+ \code{print}, \code{summary}.
+
+ The class \code{"clara"} inherits from \code{"partition"}.
+ Therefore, the generic functions \code{plot} and \code{clusplot} can
+ be used on a \code{clara} object.
+}
+\value{
+ A legitimate \code{clara} object is a list with the following components:
+
+ \item{sample}{
+ labels or case numbers of the observations in the best sample, that is,
+ the sample used by the \code{clara} algorithm for the final partition.}
+ \item{medoids}{the medoids or representative objects of the clusters.
+ It is a matrix with in each row the coordinates of one medoid.
+ Possibly \code{NULL}, namely when the object resulted from
+ \code{clara(*, medoids.x=FALSE)}. Use the following \code{i.med} in
+ that case.}
+ \item{i.med}{
+ the \emph{indices} of the \code{medoids} above: \code{medoids <- x[i.med,]}
+ where \code{x} is the original data matrix in \code{clara(x,*)}.}
+ \item{clustering}{the clustering vector, see \code{\link{partition.object}}.}
+ \item{objective}{the objective function for the final clustering of
+ the entire dataset.}
+ \item{clusinfo}{
+ matrix, each row gives numerical information for one cluster. These
+ are the cardinality of the cluster (number of observations), the
+ maximal and average dissimilarity between the observations in the
+ cluster and the cluster's medoid. %% FIXME: Now differs from pam.object.Rd:
+ The last column is the maximal
+ dissimilarity between the observations in the cluster and the
+ cluster's medoid, divided by the minimal dissimilarity between the
+ cluster's medoid and the medoid of any other cluster. If this ratio
+ is small, the cluster is well-separated from the other clusters.
+ }
+ \item{diss}{dissimilarity (maybe NULL), see \code{\link{partition.object}}.}
+ \item{silinfo}{list with silhouette width information for the best sample, see
+ \code{\link{partition.object}}.}
+ \item{call}{generating call, see \code{\link{partition.object}}.}
+ \item{data}{matrix, possibibly standardized, or NULL, see
+ \code{\link{partition.object}}.}
+}
+\seealso{
+ \code{\link{clara}}, \code{\link{dissimilarity.object}},
+ \code{\link{partition.object}}, \code{\link{plot.partition}}.
+}
+\keyword{cluster}
diff --git a/man/clusGap.Rd b/man/clusGap.Rd
new file mode 100644
index 0000000..5a3b480
--- /dev/null
+++ b/man/clusGap.Rd
@@ -0,0 +1,259 @@
+\name{clusGap}
+\title{Gap Statistic for Estimating the Number of Clusters}
+\alias{clusGap}
+\alias{maxSE}
+\alias{print.clusGap}
+\alias{plot.clusGap}
+\description{
+ \code{clusGap()} calculates a goodness of clustering measure, the
+ \dQuote{gap} statistic. For each number of clusters \eqn{k}, it
+ compares \eqn{\log(W(k))}{log(W(k))} with
+ \eqn{E^*[\log(W(k))]}{E*[log(W(k))]} where the latter is defined via
+ bootstrapping, i.e., simulating from a reference (\eqn{H_0})
+ distribution, a uniform distribution on the hypercube determined by
+ the ranges of \code{x}, after first centering, and then
+ \code{\link{svd}} (aka \sQuote{PCA})-rotating them when (as by
+ default) \code{spaceH0 = "scaledPCA"}.
+
+ \code{maxSE(f, SE.f)} determines the location of the \bold{maximum}
+ of \code{f}, taking a \dQuote{1-SE rule} into account for the
+ \code{*SE*} methods. The default method \code{"firstSEmax"} looks for
+ the smallest \eqn{k} such that its value \eqn{f(k)} is not more than 1
+ standard error away from the first local maximum.
+ This is similar but not the same as \code{"Tibs2001SEmax"}, Tibshirani
+ et al's recommendation of determining the number of clusters from the
+ gap statistics and their standard deviations.
+}
+\usage{
+clusGap(x, FUNcluster, K.max, B = 100, d.power = 1,
+ spaceH0 = c("scaledPCA", "original"),
+ verbose = interactive(), \dots)
+
+maxSE(f, SE.f,
+ method = c("firstSEmax", "Tibs2001SEmax", "globalSEmax",
+ "firstmax", "globalmax"),
+ SE.factor = 1)
+
+\S3method{print}{clusGap}(x, method = "firstSEmax", SE.factor = 1, \dots)
+
+\S3method{plot}{clusGap}(x, type = "b", xlab = "k", ylab = expression(Gap[k]),
+ main = NULL, do.arrows = TRUE,
+ arrowArgs = list(col="red3", length=1/16, angle=90, code=3), \dots)
+}
+\arguments{
+ \item{x}{numeric matrix or \code{\link{data.frame}}.}
+ \item{FUNcluster}{a \code{\link{function}} which accepts as first
+ argument a (data) matrix like \code{x}, second argument, say
+ \eqn{k, k\geq 2}{k, k >= 2}, the number of clusters desired,
+ and returns a \code{\link{list}} with a component named (or shortened to)
+ \code{cluster} which is a vector of length \code{n = nrow(x)} of
+ integers in \code{1:k} determining the clustering or grouping of the
+ \code{n} observations.}
+ \item{K.max}{the maximum number of clusters to consider, must be at
+ least two.}
+ \item{B}{integer, number of Monte Carlo (\dQuote{bootstrap}) samples.}
+ \item{d.power}{a positive integer specifying the power \eqn{p} which
+ is applied to the euclidean distances (\code{\link{dist}}) before
+ they are summed up to give \eqn{W(k)}. The default, \code{d.power = 1},
+ corresponds to the \dQuote{historical} \R implementation, whereas
+ \code{d.power = 2} corresponds to what Tibshirani et al had
+ proposed. This was found by Juan Gonzalez, in 2016-02.}%Feb.\sspace{}2016.}
+ \item{spaceH0}{a \code{\link{character}} string specifying the
+ space of the \eqn{H_0} distribution (of \emph{no} cluster). Both
+ \code{"scaledPCA"} and \code{"original"} use a uniform distribution
+ in a hyper cube and had been mentioned in the reference;
+ \code{"original"} been added after a proposal (including code) by
+ Juan Gonzalez.}
+ \item{verbose}{integer or logical, determining if \dQuote{progress}
+ output should be printed. The default prints one bit per bootstrap
+ sample.}
+ \item{\dots}{(for \code{clusGap()}:) optionally further arguments for
+ \code{FUNcluster()}, see \code{kmeans} example below.}
+ \item{f}{numeric vector of \sQuote{function values}, of length
+ \eqn{K}, whose (\dQuote{1 SE respected}) maximum we want.}
+ \item{SE.f}{numeric vector of length \eqn{K} of standard errors of \code{f}.}
+ \item{method}{character string indicating how the \dQuote{optimal}
+ number of clusters, \eqn{\hat k}{k^}, is computed from the gap
+ statistics (and their standard deviations), or more generally how
+ the location \eqn{\hat k}{k^} of the maximum of \eqn{f_k}{f[k]}
+ should be determined.
+
+ %% -> ../R/clusGap.R
+ \describe{
+ \item{\code{"globalmax"}:}{simply corresponds to the global maximum,
+ i.e., is \code{which.max(f)}}
+ \item{\code{"firstmax"}:}{gives the location of the first \emph{local}
+ maximum.}
+ \item{\code{"Tibs2001SEmax"}:}{uses the criterion, Tibshirani et
+ al (2001) proposed: \dQuote{the smallest \eqn{k} such that \eqn{f(k)
+ \ge f(k+1) - s_{k+1}}}. Note that this chooses \eqn{k = 1}
+ when all standard deviations are larger than the differences
+ \eqn{f(k+1) - f(k)}.}
+ \item{\code{"firstSEmax"}:}{location of the first \eqn{f()} value
+ which is not smaller than the first \emph{local} maximum minus
+ \code{SE.factor * SE.f[]}, i.e, within an \dQuote{f S.E.} range
+ of that maximum (see also \code{SE.factor}).
+
+ This, the default, has been proposed by Martin Maechler in 2012,
+ when adding \code{clusGap()} to the \pkg{cluster} package, after
+ having seen the \code{"globalSEmax"} proposal (in code) and read
+ the \code{"Tibs2001SEmax"} proposal.}
+
+ \item{\code{"globalSEmax"}:}{(used in Dudoit and Fridlyand (2002),
+ supposedly following Tibshirani's proposition):
+ location of the first \eqn{f()} value which is not smaller than
+ the \emph{global} maximum minus \code{SE.factor * SE.f[]}, i.e,
+ within an \dQuote{f S.E.} range of that maximum (see also
+ \code{SE.factor}).}
+ }
+ See the examples for a comparison in a simple case.
+ }
+ \item{SE.factor}{[When \code{method} contains \code{"SE"}] Determining
+ the optimal number of clusters, Tibshirani et al. proposed the
+ \dQuote{1 S.E.}-rule. Using an \code{SE.factor} \eqn{f}, the
+ \dQuote{f S.E.}-rule is used, more generally.}
+ %% plot():
+ \item{type, xlab, ylab, main}{arguments with the same meaning as in
+ \code{\link{plot.default}()}, with different default.}
+ \item{do.arrows}{logical indicating if (1 SE -)\dQuote{error bars}
+ should be drawn, via \code{\link{arrows}()}.}
+ \item{arrowArgs}{a list of arguments passed to \code{\link{arrows}()};
+ the default, notably \code{angle} and \code{code}, provide a style
+ matching usual error bars.}
+}
+\details{
+ The main result \code{<res>$Tab[,"gap"]} of course is from
+ bootstrapping aka Monte Carlo simulation and hence random, or
+ equivalently, depending on the initial random seed (see
+ \code{\link{set.seed}()}).
+ On the other hand, in our experience, using \code{B = 500} gives
+ quite precise results such that the gap plot is basically unchanged
+ after an another run.
+}
+\value{
+ \code{clusGap(..)} returns an object of S3 class \code{"clusGap"},
+ basically a list with components
+ \item{Tab}{a matrix with \code{K.max} rows and 4 columns, named
+ "logW", "E.logW", "gap", and "SE.sim",
+ where \code{gap = E.logW - logW}, and \code{SE.sim} corresponds to
+ the standard error of \code{gap}, \code{SE.sim[k]=}\eqn{s_k}{s[k]},
+ where \eqn{s_k := \sqrt{1 + 1/B} sd^*(gap_j)}{s[k] := sqrt(1 + 1/B)
+ sd^*(gap[])}, and \eqn{sd^*()} is the standard deviation of the
+ simulated (\dQuote{bootstrapped}) gap values.
+ }
+ \item{call}{the \code{clusGap(..)} \code{\link{call}}.}
+ \item{spaceH0}{the \code{spaceH0} argument (\code{\link{match.arg}()}ed).}
+ \item{n}{number of observations, i.e., \code{nrow(x)}.}
+ \item{B}{input \code{B}}
+ \item{FUNcluster}{input function \code{FUNcluster}}
+}
+\references{
+ Tibshirani, R., Walther, G. and Hastie, T. (2001).
+ Estimating the number of data clusters via the Gap statistic.
+ \emph{Journal of the Royal Statistical Society B}, \bold{63}, 411--423.
+
+ Tibshirani, R., Walther, G. and Hastie, T. (2000).
+ Estimating the number of clusters in a dataset via the Gap statistic.
+ Technical Report. Stanford.
+
+ Dudoit, S. and Fridlyand, J. (2002)
+ A prediction-based resampling method for estimating the number of clusters in a
+ dataset. \emph{Genome Biology} \bold{3}(7).
+ \doi{10.1186/gb-2002-3-7-research0036}
+
+ Per Broberg (2006). SAGx: Statistical Analysis of the GeneChip.
+ R package version 1.9.7.% moved to Bioconductor sometime after 2006
+ % Martin Morgan (2018-10-15): Last change was in 2011
+ % URL <= ~2018: \url{http://home.swipnet.se/pibroberg/expression_hemsida1.html}
+ \url{http://www.bioconductor.org/packages/release/bioc/html/SAGx.html}
+}
+\author{
+ This function is originally based on the functions \code{gap} of
+ (Bioconductor) package \pkg{SAGx} by Per Broberg,
+ \code{gapStat()} from former package \pkg{SLmisc} by Matthias Kohl
+ and ideas from \code{gap()} and its methods of package \pkg{lga} by
+ Justin Harrington.
+
+ The current implementation is by Martin Maechler.
+
+ The implementation of \code{spaceH0 = "original"} is based on code
+ proposed by Juan Gonzalez.
+}
+\seealso{
+ \code{\link{silhouette}} for a much simpler less sophisticated
+ goodness of clustering measure.
+
+ \code{\link[fpc]{cluster.stats}()} in package \pkg{fpc} for
+ alternative measures.
+
+ %\code{\link[SGAx]{gap}} in Bioconductor package \pkg{SGAx}.
+}
+\examples{
+### --- maxSE() methods -------------------------------------------
+(mets <- eval(formals(maxSE)$method))
+fk <- c(2,3,5,4,7,8,5,4)
+sk <- c(1,1,2,1,1,3,1,1)/2
+## use plot.clusGap():
+plot(structure(class="clusGap", list(Tab = cbind(gap=fk, SE.sim=sk))))
+## Note that 'firstmax' and 'globalmax' are always at 3 and 6 :
+sapply(c(1/4, 1,2,4), function(SEf)
+ sapply(mets, function(M) maxSE(fk, sk, method = M, SE.factor = SEf)))
+
+### --- clusGap() -------------------------------------------------
+## ridiculously nicely separated clusters in 3 D :
+x <- rbind(matrix(rnorm(150, sd = 0.1), ncol = 3),
+ matrix(rnorm(150, mean = 1, sd = 0.1), ncol = 3),
+ matrix(rnorm(150, mean = 2, sd = 0.1), ncol = 3),
+ matrix(rnorm(150, mean = 3, sd = 0.1), ncol = 3))
+
+## Slightly faster way to use pam (see below)
+pam1 <- function(x,k) list(cluster = pam(x,k, cluster.only=TRUE))
+
+## We do not recommend using hier.clustering here, but if you want,
+## there is factoextra::hcut () or a cheap version of it
+hclusCut <- function(x, k, d.meth = "euclidean", ...)
+ list(cluster = cutree(hclust(dist(x, method=d.meth), ...), k=k))
+
+## You can manually set it before running this : doExtras <- TRUE # or FALSE
+if(!(exists("doExtras") && is.logical(doExtras)))
+ doExtras <- cluster:::doExtras()
+
+if(doExtras) {
+ ## Note we use B = 60 in the following examples to keep them "speedy".
+ ## ---- rather keep the default B = 500 for your analysis!
+
+ ## note we can pass 'nstart = 20' to kmeans() :
+ gskmn <- clusGap(x, FUN = kmeans, nstart = 20, K.max = 8, B = 60)
+ gskmn #-> its print() method
+ plot(gskmn, main = "clusGap(., FUN = kmeans, n.start=20, B= 60)")
+ set.seed(12); system.time(
+ gsPam0 <- clusGap(x, FUN = pam, K.max = 8, B = 60)
+ )
+ set.seed(12); system.time(
+ gsPam1 <- clusGap(x, FUN = pam1, K.max = 8, B = 60)
+ )
+ ## and show that it gives the "same":
+ not.eq <- c("call", "FUNcluster"); n <- names(gsPam0)
+ eq <- n[!(n \%in\% not.eq)]
+ stopifnot(identical(gsPam1[eq], gsPam0[eq]))
+ print(gsPam1, method="globalSEmax")
+ print(gsPam1, method="globalmax")
+
+ print(gsHc <- clusGap(x, FUN = hclusCut, K.max = 8, B = 60))
+
+}# end {doExtras}
+
+gs.pam.RU <- clusGap(ruspini, FUN = pam1, K.max = 8, B = 60)
+gs.pam.RU
+plot(gs.pam.RU, main = "Gap statistic for the 'ruspini' data")
+mtext("k = 4 is best .. and k = 5 pretty close")
+
+\donttest{## This takes a minute..
+## No clustering ==> k = 1 ("one cluster") should be optimal:
+Z <- matrix(rnorm(256*3), 256,3)
+gsP.Z <- clusGap(Z, FUN = pam1, K.max = 8, B = 200)
+plot(gsP.Z, main = "clusGap(<iid_rnorm_p=3>) ==> k = 1 cluster is optimal")
+gsP.Z
+}%end{dont..}
+}
+\keyword{cluster}
diff --git a/man/clusplot.default.Rd b/man/clusplot.default.Rd
new file mode 100644
index 0000000..145be75
--- /dev/null
+++ b/man/clusplot.default.Rd
@@ -0,0 +1,266 @@
+\name{clusplot.default}
+\alias{clusplot.default}
+\title{Bivariate Cluster Plot (clusplot) Default Method}
+\description{
+ Creates a bivariate plot visualizing a partition (clustering) of the data. All
+ observation are represented by points in the plot, using principal
+ components or multidimensional scaling. Around each cluster an ellipse
+ is drawn.
+}
+\usage{
+\method{clusplot}{default}(x, clus, diss = FALSE,
+ s.x.2d = mkCheckX(x, diss), stand = FALSE,
+ lines = 2, shade = FALSE, color = FALSE,
+ labels= 0, plotchar = TRUE,
+ col.p = "dark green", col.txt = col.p,
+ col.clus = if(color) c(2, 4, 6, 3) else 5, cex = 1, cex.txt = cex,
+ span = TRUE,
+ add = FALSE,
+ xlim = NULL, ylim = NULL,
+ main = paste("CLUSPLOT(", deparse(substitute(x)),")"),
+ sub = paste("These two components explain",
+ round(100 * var.dec, digits = 2), "\% of the point variability."),
+ xlab = "Component 1", ylab = "Component 2",
+ verbose = getOption("verbose"),
+ \dots)
+}
+\arguments{
+ \item{x}{matrix or data frame, or dissimilarity matrix, depending on
+ the value of the \code{diss} argument.
+
+ In case of a matrix (alike), each row corresponds to an observation,
+ and each column corresponds to a variable. All variables must be
+ numeric. Missing values (\code{\link{NA}}s) are allowed. They are
+ replaced by the median of the corresponding variable. When some
+ variables or some observations contain only missing values, the
+ function stops with a warning message.
+
+ In case of a dissimilarity matrix, \code{x} is the output of
+ \code{\link{daisy}} or \code{\link{dist}} or a symmetric matrix. Also,
+ a vector of length \eqn{n*(n-1)/2} is allowed (where \eqn{n} is the
+ number of observations), and will be interpreted in the same way as
+ the output of the above-mentioned functions. Missing values (NAs)
+ are not allowed.
+ }
+ \item{clus}{
+ a vector of length n representing a clustering of \code{x}. For
+ each observation the vector lists the number or name of the cluster
+ to which it has been assigned. \code{clus} is often the clustering
+ component of the output of \code{\link{pam}}, \code{\link{fanny}} or
+ \code{\link{clara}}.}
+ \item{diss}{
+ logical indicating if \code{x} will be considered as a dissimilarity
+ matrix or a matrix of observations by variables (see \code{x}
+ arugment above).}
+ \item{s.x.2d}{a \code{\link{list}} with components named \code{x} (a \eqn{n
+ \times 2}{n x 2} matrix; typically something like principal components of
+ original data), \code{labs} and \code{var.dec}.}% FIXME: 'labs' and 'var.dec' are not always needed
+ \item{stand}{
+ logical flag: if true, then the representations of the n observations in the
+ 2-dimensional plot are standardized.
+ }
+ \item{lines}{
+ integer out of \code{0, 1, 2}, used to obtain an idea of the
+ distances between ellipses. The distance between two ellipses E1
+ and E2 is measured along the line connecting the centers \eqn{m1}
+ and \eqn{m2} of the two ellipses.
+
+ In case E1 and E2 overlap on the line through \eqn{m1} and \eqn{m2},
+ no line is drawn. Otherwise, the result depends on the value of
+ \code{lines}: If
+ \describe{
+ \item{lines = 0,}{no distance lines will appear on the plot;}
+ \item{lines = 1,}{the line segment between \eqn{m1} and \eqn{m2} is drawn;}
+ \item{lines = 2,}{a line segment between the boundaries of E1 and
+ E2 is drawn (along the line connecting \eqn{m1} and \eqn{m2}).}
+ }
+ }
+ \item{shade}{
+ logical flag: if TRUE, then the ellipses are shaded in relation to their
+ density. The density is the number of points in the cluster divided by the
+ area of the ellipse.
+ }
+ \item{color}{
+ logical flag: if TRUE, then the ellipses are colored with respect to their
+ density. With increasing density, the colors are light blue, light
+ green, red and purple. To see these colors on the graphics device, an
+ appropriate color scheme should be selected (we recommend a white
+ background).}
+ \item{labels}{
+ integer code, currently one of 0,1,2,3,4 and 5. If
+ \describe{
+ \item{labels= 0,}{no labels are placed in the plot;}
+ \item{labels= 1,}{points and ellipses can be identified in the plot (see
+ \code{\link{identify}});}
+ \item{labels= 2,}{all points and ellipses are labelled in the plot;}
+ \item{labels= 3,}{only the points are labelled in the plot;}
+ \item{labels= 4,}{only the ellipses are labelled in the plot.}
+ \item{labels= 5,}{the ellipses are labelled in the plot, and
+ points can be identified.}
+ }
+ The levels of the vector \code{clus} are taken as labels for the
+ clusters. The labels
+ of the points are the rownames of \code{x} if \code{x} is matrix like.
+ Otherwise (\code{diss = TRUE}), \code{x} is a vector, point labels
+ can be attached to \code{x} as a "Labels" attribute
+ (\code{attr(x,"Labels")}), as is done for the output of
+ \code{\link{daisy}}.
+
+ A possible \code{\link{names}} attribute of \code{clus} will not
+ be taken into account.
+ }
+ \item{plotchar}{
+ logical flag: if TRUE, then the plotting symbols differ for points belonging
+ to different clusters.
+ }
+ \item{span}{
+ logical flag: if TRUE, then each cluster is represented by the ellipse with
+ smallest area containing all its points. (This is a special case of the
+ minimum volume ellipsoid.)\cr
+ If FALSE, the ellipse is based on the mean and covariance matrix of the
+ same points. While this is faster to compute, it often yields a much
+ larger ellipse.
+
+ There are also some special cases: When a cluster consists of only
+ one point, a tiny circle is drawn around it. When the points of a
+ cluster fall on a straight line, \code{span=FALSE} draws a narrow
+ ellipse around it and \code{span=TRUE} gives the exact line segment.
+ }
+ \item{add}{logical indicating if ellipses (and labels if \code{labels}
+ is true) should be \emph{added} to an already existing plot. If
+ false, neither a \code{\link{title}} or sub title, see \code{sub},
+ is written.}
+ \item{col.p}{color code(s) used for the observation points.}
+ \item{col.txt}{color code(s) used for the labels (if \code{labels >= 2}).}
+ \item{col.clus}{color code for the ellipses (and their labels);
+ only one if color is false (as per default).}
+ \item{cex, cex.txt}{character \bold{ex}pansion (size), for the point
+ symbols and point labels, respectively.}
+ \item{xlim, ylim}{numeric vectors of length 2, giving the x- and y-
+ ranges as in \code{\link{plot.default}}.}
+ \item{main}{main title for the plot; by default, one is constructed.}
+ \item{sub}{sub title for the plot; by default, one is constructed.}
+ \item{xlab, ylab}{x- and y- axis labels for the plot, with defaults.}
+ \item{verbose}{a logical indicating, if there should be extra
+ diagnostic output; mainly for \sQuote{debugging}.}
+ \item{\dots}{Further graphical parameters may also be supplied, see
+ \code{\link{par}}.}
+}% End Arguments
+
+\value{
+ An invisible list with components:
+ \item{Distances}{
+ When \code{lines} is 1 or 2 we optain a k by k matrix (k is the number of
+ clusters). The element in \code{[i,j]} is the distance between ellipse
+ i and ellipse j.\cr
+ If \code{lines = 0}, then the value of this component is \code{NA}.
+ }
+ \item{Shading}{
+ A vector of length k (where k is the number of clusters), containing the
+ amount of shading per cluster. Let y be a vector where element i is the
+ ratio between the number of points in cluster i and the area of ellipse i.
+ When the cluster i is a line segment, y[i] and the density of the cluster are
+ set to \code{NA}. Let z be the sum of all the elements of y without the NAs.
+ Then we put shading = y/z *37 + 3 .
+ }
+}
+
+\section{Side Effects}{
+ a visual display of the clustering is plotted on the current graphics device.
+}
+\details{
+ \code{clusplot} uses function calls
+ \code{\link{princomp}(*, cor = (ncol(x) > 2))} or
+ \code{\link{cmdscale}(*, add=TRUE)}, respectively, depending on
+ \code{diss} being false or true. These functions are data reduction
+ techniques to represent the data in a bivariate plot.
+
+ Ellipses are then drawn to indicate the clusters. The further layout of the
+ plot is determined by the optional arguments.
+}
+\note{
+ When we have 4 or fewer clusters, then the \code{color=TRUE} gives
+ every cluster a different color. When there are more than 4 clusters,
+ clusplot uses the function \code{\link{pam}} to cluster the
+ densities into 4 groups such that ellipses with nearly the same
+ density get the same color. \code{col.clus} specifies the colors used.
+
+ The \code{col.p} and \code{col.txt} arguments, added for \R,
+ are recycled to have length the number of observations.
+ If \code{col.p} has more than one value, using \code{color = TRUE} can
+ be confusing because of a mix of point and ellipse colors.
+}
+\references{
+ Pison, G., Struyf, A. and Rousseeuw, P.J. (1999)
+ Displaying a Clustering with CLUSPLOT,
+ \emph{Computational Statistics and Data Analysis}, \bold{30}, 381--392.\cr
+%% Jan.2015 : no longer there:
+ %% A version of this is available as technical report from
+ %% \url{http://www.agoras.ua.ac.be/abstract/Disclu99.htm}
+
+ Kaufman, L. and Rousseeuw, P.J. (1990).
+ \emph{Finding Groups in Data: An Introduction to Cluster Analysis.}
+ Wiley, New York.
+
+ Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997).
+ Integrating Robust Clustering Techniques in S-PLUS,
+ \emph{Computational Statistics and Data Analysis}, \bold{26}, 17-37.
+}
+\seealso{
+ \code{\link{princomp}}, \code{\link{cmdscale}}, \code{\link{pam}},
+ \code{\link{clara}}, \code{\link{daisy}}, \code{\link{par}},
+ \code{\link{identify}}, \code{\link[MASS]{cov.mve}},
+ \code{\link{clusplot.partition}}.
+}
+\examples{
+## plotting votes.diss(dissimilarity) in a bivariate plot and
+## partitioning into 2 clusters
+data(votes.repub)
+votes.diss <- daisy(votes.repub)
+pamv <- pam(votes.diss, 2, diss = TRUE)
+clusplot(pamv, shade = TRUE)
+## is the same as
+votes.clus <- pamv$clustering
+clusplot(votes.diss, votes.clus, diss = TRUE, shade = TRUE)
+## Now look at components 3 and 2 instead of 1 and 2:
+str(cMDS <- cmdscale(votes.diss, k=3, add=TRUE))
+clusplot(pamv, s.x.2d = list(x=cMDS$points[, c(3,2)],
+ labs=rownames(votes.repub), var.dec=NA),
+ shade = TRUE, col.p = votes.clus,
+ sub="", xlab = "Component 3", ylab = "Component 2")
+
+clusplot(pamv, col.p = votes.clus, labels = 4)# color points and label ellipses
+# "simple" cheap ellipses: larger than minimum volume:
+# here they are *added* to the previous plot:
+clusplot(pamv, span = FALSE, add = TRUE, col.clus = "midnightblue")
+
+## Setting a small *label* size:
+clusplot(votes.diss, votes.clus, diss = TRUE, labels = 3, cex.txt = 0.6)
+
+if(dev.interactive()) { # uses identify() *interactively* :
+ clusplot(votes.diss, votes.clus, diss = TRUE, shade = TRUE, labels = 1)
+ clusplot(votes.diss, votes.clus, diss = TRUE, labels = 5)# ident. only points
+}
+
+## plotting iris (data frame) in a 2-dimensional plot and partitioning
+## into 3 clusters.
+data(iris)
+iris.x <- iris[, 1:4]
+cl3 <- pam(iris.x, 3)$clustering
+op <- par(mfrow= c(2,2))
+clusplot(iris.x, cl3, color = TRUE)
+U <- par("usr")
+## zoom in :
+rect(0,-1, 2,1, border = "orange", lwd=2)
+clusplot(iris.x, cl3, color = TRUE, xlim = c(0,2), ylim = c(-1,1))
+box(col="orange",lwd=2); mtext("sub region", font = 4, cex = 2)
+## or zoom out :
+clusplot(iris.x, cl3, color = TRUE, xlim = c(-4,4), ylim = c(-4,4))
+mtext("'super' region", font = 4, cex = 2)
+rect(U[1],U[3], U[2],U[4], lwd=2, lty = 3)
+
+# reset graphics
+par(op)
+}
+\keyword{cluster}
+\keyword{hplot}
diff --git a/man/clusplot.partition.Rd b/man/clusplot.partition.Rd
new file mode 100644
index 0000000..8d8ebca
--- /dev/null
+++ b/man/clusplot.partition.Rd
@@ -0,0 +1,69 @@
+\name{clusplot}
+\alias{clusplot}
+\alias{clusplot.partition}
+\title{Bivariate Cluster Plot (of a Partitioning Object)}
+\description{
+ Draws a 2-dimensional \dQuote{clusplot} (clustering plot) on the
+ current graphics device.
+ The generic function has a default and a \code{partition} method.
+}
+\usage{
+clusplot(x, \dots)
+
+\method{clusplot}{partition}(x, main = NULL, dist = NULL, \dots)
+}
+\arguments{
+ \item{x}{an \R object, here, specifically an object of class
+ \code{"partition"}, e.g. created by one of the functions
+ \code{\link{pam}}, \code{\link{clara}}, or \code{\link{fanny}}.}
+ \item{main}{title for the plot; when \code{NULL} (by default), a title
+ is constructed, using \code{x$call}.}
+ \item{dist}{when \code{x} does not have a \code{diss} nor a
+ \code{data} component, e.g., for \code{\link{pam}(dist(*),
+ keep.diss=FALSE)}, \code{dist} must specify the dissimilarity for the
+ clusplot.}
+ \item{\dots}{optional arguments passed to methods, notably the
+ \code{\link{clusplot.default}} method (except for the \code{diss}
+ one) may also be supplied to this function. Many graphical parameters
+ (see \code{\link{par}}) may also be supplied as arguments here.}
+}
+\section{Side Effects}{
+ a 2-dimensional clusplot is created on the current graphics device.
+}
+\value{
+ For the \code{partition} (and \code{default}) method: An invisible
+ list with components \code{Distances} and \code{Shading}, as for
+ \code{\link{clusplot.default}}, see there.
+}
+\details{
+ The \code{clusplot.partition()} method relies on \code{\link{clusplot.default}}.
+
+ If the clustering algorithms \code{pam}, \code{fanny} and \code{clara}
+ are applied to a data matrix of observations-by-variables then a
+ clusplot of the resulting clustering can always be drawn. When the
+ data matrix contains missing values and the clustering is performed
+ with \code{\link{pam}} or \code{\link{fanny}}, the dissimilarity
+ matrix will be given as input to \code{clusplot}. When the clustering
+ algorithm \code{\link{clara}} was applied to a data matrix with NAs
+ then clusplot will replace the missing values as described in
+ \code{\link{clusplot.default}}, because a dissimilarity matrix is not
+ available.
+}
+\seealso{\code{\link{clusplot.default}} for references;
+ \code{\link{partition.object}}, \code{\link{pam}},
+ \code{\link{pam.object}}, \code{\link{clara}},
+ \code{\link{clara.object}}, \code{\link{fanny}},
+ \code{\link{fanny.object}}, \code{\link{par}}.
+}
+\examples{ ## For more, see ?clusplot.default
+
+## generate 25 objects, divided into 2 clusters.
+x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)),
+ cbind(rnorm(15,5,0.5), rnorm(15,5,0.5)))
+clusplot(pam(x, 2))
+## add noise, and try again :
+x4 <- cbind(x, rnorm(25), rnorm(25))
+clusplot(pam(x4, 2))
+}
+\keyword{cluster}
+\keyword{hplot}
diff --git a/man/cluster-internal.Rd b/man/cluster-internal.Rd
new file mode 100644
index 0000000..f01d71b
--- /dev/null
+++ b/man/cluster-internal.Rd
@@ -0,0 +1,13 @@
+\name{cluster-internal}
+\alias{meanabsdev}
+\title{Internal cluster functions}
+\description{
+ Internal cluster functions.
+}
+\usage{
+meanabsdev(y)
+}
+\details{
+ These are not to be called by the user.
+}
+\keyword{internal}
diff --git a/man/coef.hclust.Rd b/man/coef.hclust.Rd
new file mode 100644
index 0000000..e4c6a94
--- /dev/null
+++ b/man/coef.hclust.Rd
@@ -0,0 +1,62 @@
+\name{coef.hclust}
+\alias{coefHier}
+\alias{coef.hclust}
+\alias{coef.twins}
+\title{Agglomerative / Divisive Coefficient for 'hclust' Objects}
+\description{
+ Computes the \dQuote{agglomerative coefficient} (aka \dQuote{divisive
+ coefficient} for \code{\link{diana}}), measuring the
+ clustering structure of the dataset.
+
+ For each observation i, denote by \eqn{m(i)} its dissimilarity to the
+ first cluster it is merged with, divided by the dissimilarity of the
+ merger in the final step of the algorithm. The agglomerative
+ coefficient is the average of all \eqn{1 - m(i)}. It can also be seen
+ as the average width (or the percentage filled) of the banner plot.
+
+ \code{coefHier()} directly interfaces to the underlying C code, and
+ \dQuote{proves} that \emph{only} \code{object$heights} is needed to
+ compute the coefficient.
+
+ Because it grows with the number of observations, this measure should not
+ be used to compare datasets of very different sizes.
+}
+\usage{
+coefHier(object)
+coef.hclust(object, \dots)%-- we export this, on purpose
+\method{coef}{hclust}(object, \dots)
+\method{coef}{twins}(object, \dots)
+}
+\arguments{
+ \item{object}{an object of class \code{"hclust"} or \code{"twins"},
+ i.e., typically the result of
+ \code{\link{hclust}(.)},\code{\link{agnes}(.)}, or \code{\link{diana}(.)}.
+
+ Since \code{coef.hclust} only uses \code{object$heights}, and
+ \code{object$merge}, \code{object} can be any list-like object with
+ appropriate \code{merge} and \code{heights} components.
+
+ For \code{coefHier}, even only \code{object$heights} is needed.
+ }
+ \item{\dots}{currently unused potential further arguments}
+}
+\value{
+ a number specifying the \emph{agglomerative} (or \emph{divisive} for
+ \code{diana} objects) coefficient as defined by Kaufman and Rousseeuw,
+ see \code{\link{agnes.object} $ ac} or \code{\link{diana.object} $ dc}.
+}
+\examples{
+data(agriculture)
+aa <- agnes(agriculture)
+coef(aa) # really just extracts aa$ac
+coef(as.hclust(aa))# recomputes
+coefHier(aa) # ditto
+\dontshow{
+ stopifnot(all.equal(coef(aa), coefHier(aa)))
+ d.a <- dist(agriculture, "manhattan")
+ for (m in c("average", "single", "complete"))
+ stopifnot(all.equal(coef(hclust(d.a, method=m)),
+ coef(agnes (d.a, method=m)), tol=1e-13))
+}
+}
+\keyword{cluster}
diff --git a/man/daisy.Rd b/man/daisy.Rd
new file mode 100644
index 0000000..3609a53
--- /dev/null
+++ b/man/daisy.Rd
@@ -0,0 +1,218 @@
+\name{daisy}
+\alias{daisy}
+\title{Dissimilarity Matrix Calculation}
+\concept{Gower's formula}
+\concept{Gower's distance}
+\concept{Gower's coefficient}% FIXME: see ../TODO-MM
+\description{
+ Compute all the pairwise dissimilarities (distances) between observations
+ in the data set. The original variables may be of mixed types. In
+ that case, or whenever \code{metric = "gower"} is set, a
+ generalization of Gower's formula is used, see \sQuote{Details}
+ below.
+}
+\usage{
+daisy(x, metric = c("euclidean", "manhattan", "gower"),
+ stand = FALSE, type = list(), weights = rep.int(1, p),
+ warnBin = warnType, warnAsym = warnType, warnConst = warnType,
+ warnType = TRUE)
+}
+\arguments{
+ \item{x}{
+ numeric matrix or data frame, of dimension \eqn{n\times p}{n x p},
+ say. Dissimilarities will be computed
+ between the rows of \code{x}. Columns of mode \code{numeric}
+ (i.e. all columns when \code{x} is a matrix) will be recognized as
+ interval scaled variables, columns of class \code{factor} will be
+ recognized as nominal variables, and columns of class \code{ordered}
+ will be recognized as ordinal variables. Other variable types
+ should be specified with the \code{type} argument. Missing values
+ (\code{\link{NA}}s) are allowed.
+ }
+ \item{metric}{
+ character string specifying the metric to be used.
+ The currently available options are \code{"euclidean"} (the default),
+ \code{"manhattan"} and \code{"gower"}.\cr
+ Euclidean distances are root sum-of-squares of differences, and
+ manhattan distances are the sum of absolute differences.
+
+ \dQuote{Gower's distance} is chosen by metric \code{"gower"}
+ or automatically if some columns of \code{x} are not numeric. Also
+ known as Gower's coefficient (1971),
+ expressed as a dissimilarity, this implies that a particular
+ standardisation will be applied to each variable, and the
+ \dQuote{distance} between two units is the sum of all the
+ variable-specific distances, see the details section.
+ }
+ \item{stand}{logical flag: if TRUE, then the measurements in \code{x}
+ are standardized before calculating the
+ dissimilarities. Measurements are standardized for each variable
+ (column), by subtracting the variable's mean value and dividing by
+ the variable's mean absolute deviation.
+
+ If not all columns of \code{x} are numeric, \code{stand} will
+ be ignored and Gower's standardization (based on the
+ \code{\link{range}}) will be applied in any case, see argument
+ \code{metric}, above, and the details section.
+ }
+ \item{type}{list for specifying some (or all) of the types of the
+ variables (columns) in \code{x}. The list may contain the following
+ components: \code{"ordratio"} (ratio scaled variables to be treated as
+ ordinal variables), \code{"logratio"} (ratio scaled variables that
+ must be logarithmically transformed), \code{"asymm"} (asymmetric
+ binary) and \code{"symm"} (symmetric binary variables). Each
+ component's value is a vector, containing the names or the numbers
+ of the corresponding columns of \code{x}.
+ Variables not mentioned in the \code{type} list are interpreted as
+ usual (see argument \code{x}).
+ }
+ \item{weights}{an optional numeric vector of length \eqn{p}(=\code{ncol(x)}); to
+ be used in \dQuote{case 2} (mixed variables, or \code{metric = "gower"}),
+ specifying a weight for each variable (\code{x[,k]}) instead of
+ \eqn{1} in Gower's original formula.}
+ \item{warnBin, warnAsym, warnConst}{logicals indicating if the
+ corresponding type checking warnings should be signalled (when found).}
+ \item{warnType}{logical indicating if \emph{all} the type checking
+ warnings should be active or not.}
+}% end{arg..}
+
+\value{
+ an object of class \code{"dissimilarity"} containing the
+ dissimilarities among the rows of \code{x}. This is typically the
+ input for the functions \code{pam}, \code{fanny}, \code{agnes} or
+ \code{diana}. For more details, see \code{\link{dissimilarity.object}}.
+}
+\details{
+ The original version of \code{daisy} is fully described in chapter 1
+ of Kaufman and Rousseeuw (1990).
+ Compared to \code{\link{dist}} whose input must be numeric
+ variables, the main feature of \code{daisy} is its ability to handle
+ other variable types as well (e.g. nominal, ordinal, (a)symmetric
+ binary) even when different types occur in the same data set.
+
+ The handling of nominal, ordinal, and (a)symmetric binary data is
+ achieved by using the general dissimilarity coefficient of Gower
+ (1971). If \code{x} contains any columns of these
+ data-types, both arguments \code{metric} and \code{stand} will be
+ ignored and Gower's coefficient will be used as the metric. This can
+ also be activated for purely numeric data by \code{metric = "gower"}.
+ With that, each variable (column) is first standardized by dividing
+ each entry by the range of the corresponding variable, after
+ subtracting the minimum value; consequently the rescaled variable has
+ range \eqn{[0,1]}, exactly.
+ %% FIXME: Use something like "gowerRob" which uses *robust* rescaling
+
+ Note that setting the type to \code{symm} (symmetric binary) gives the
+ same dissimilarities as using \emph{nominal} (which is chosen for
+ non-ordered factors) only when no missing values are present, and more
+ efficiently.
+
+ Note that \code{daisy} signals a warning when 2-valued numerical
+ variables do not have an explicit \code{type} specified, because the
+ reference authors recommend to consider using \code{"asymm"}; the
+ warning may be silenced by \code{warnBin = FALSE}.
+
+ In the \code{daisy} algorithm, missing values in a row of x are not
+ included in the dissimilarities involving that row. There are two
+ main cases,
+ \enumerate{
+ \item If all variables are interval scaled (and \code{metric} is
+ \emph{not} \code{"gower"}), the metric is "euclidean", and
+ \eqn{n_g} is the number of columns in which
+ neither row i and j have NAs, then the dissimilarity d(i,j) returned is
+ \eqn{\sqrt{p/n_g}}{sqrt(p/n_g)} (\eqn{p=}ncol(x)) times the
+ Euclidean distance between the two vectors of length \eqn{n_g}
+ shortened to exclude NAs. The rule is similar for the "manhattan"
+ metric, except that the coefficient is \eqn{p/n_g}. If \eqn{n_g = 0},
+ the dissimilarity is NA.
+
+ \item When some variables have a type other than interval scaled, or
+ if \code{metric = "gower"} is specified, the
+ dissimilarity between two rows is the weighted mean of the contributions of
+ each variable. Specifically,
+ \deqn{d_{ij} = d(i,j) = \frac{\sum_{k=1}^p w_k \delta_{ij}^{(k)} d_{ij}^{(k)}}{
+ \sum_{k=1}^p w_k \delta_{ij}^{(k)}}.
+ }{d_ij = d(i,j) = sum(k=1:p; w_k delta(ij;k) d(ij,k)) / sum(k=1:p; w_k delta(ij;k)).}
+ In other words, \eqn{d_{ij}}{d_ij} is a weighted mean of
+ \eqn{d_{ij}^{(k)}}{d(ij,k)} with weights \eqn{w_k \delta_{ij}^{(k)}}{w_k delta(ij;k)},
+ where \eqn{w_k}\code{= weigths[k]},
+ \eqn{\delta_{ij}^{(k)}}{delta(ij;k)} is 0 or 1, and
+ \eqn{d_{ij}^{(k)}}{d(ij,k)}, the k-th variable contribution to the
+ total distance, is a distance between \code{x[i,k]} and \code{x[j,k]},
+ see below.
+
+ The 0-1 weight \eqn{\delta_{ij}^{(k)}}{delta(ij;k)} becomes zero
+ when the variable \code{x[,k]} is missing in either or both rows
+ (i and j), or when the variable is asymmetric binary and both
+ values are zero. In all other situations it is 1.
+
+ The contribution \eqn{d_{ij}^{(k)}}{d(ij,k)} of a nominal or binary variable to the total
+ dissimilarity is 0 if both values are equal, 1 otherwise.
+ The contribution of other variables is the absolute difference of
+ both values, divided by the total range of that variable. Note
+ that \dQuote{standard scoring} is applied to ordinal variables,
+ i.e., they are replaced by their integer codes \code{1:K}. Note
+ that this is not the same as using their ranks (since there
+ typically are ties).
+ % contrary to what Kaufman and Rousseeuw write in their book, and
+ % the original help page.
+
+ As the individual contributions \eqn{d_{ij}^{(k)}}{d(ij,k)} are in
+ \eqn{[0,1]}, the dissimilarity \eqn{d_{ij}}{d_ij} will remain in
+ this range.
+ If all weights \eqn{w_k \delta_{ij}^{(k)}}{w_k delta(ij;k)} are zero,
+ the dissimilarity is set to \code{\link{NA}}.
+ }
+}
+\section{Background}{
+ Dissimilarities are used as inputs to cluster analysis and
+ multidimensional scaling. The choice of metric may have a
+ large impact.
+}
+\references{
+ Gower, J. C. (1971)
+ A general coefficient of similarity and some of its properties,
+ \emph{Biometrics} \bold{27}, 857--874.
+
+ Kaufman, L. and Rousseeuw, P.J. (1990)
+ \emph{Finding Groups in Data: An Introduction to Cluster Analysis}.
+ Wiley, New York.
+
+ Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997)
+ Integrating Robust Clustering Techniques in S-PLUS,
+ \emph{Computational Statistics and Data Analysis} \bold{26}, 17--37.
+}
+\author{
+ Anja Struyf, Mia Hubert, and Peter and Rousseeuw, for the original
+ version.
+ \cr
+ Martin Maechler improved the \code{\link{NA}} handling and
+ \code{type} specification checking, and extended functionality to
+ \code{metric = "gower"} and the optional \code{weights} argument.
+}
+\seealso{
+ \code{\link{dissimilarity.object}}, \code{\link{dist}},
+ \code{\link{pam}}, \code{\link{fanny}}, \code{\link{clara}},
+ \code{\link{agnes}}, \code{\link{diana}}.
+}
+\examples{
+data(agriculture)
+## Example 1 in ref:
+## Dissimilarities using Euclidean metric and without standardization
+d.agr <- daisy(agriculture, metric = "euclidean", stand = FALSE)
+d.agr
+as.matrix(d.agr)[,"DK"] # via as.matrix.dist(.)
+## compare with
+as.matrix(daisy(agriculture, metric = "gower"))
+
+data(flower)
+## Example 2 in ref
+summary(dfl1 <- daisy(flower, type = list(asymm = 3)))
+summary(dfl2 <- daisy(flower, type = list(asymm = c(1, 3), ordratio = 7)))
+## this failed earlier:
+summary(dfl3 <- daisy(flower,
+ type = list(asymm = c("V1", "V3"), symm= 2,
+ ordratio= 7, logratio= 8)))
+
+}
+\keyword{cluster}
diff --git a/man/diana.Rd b/man/diana.Rd
new file mode 100644
index 0000000..2519c2a
--- /dev/null
+++ b/man/diana.Rd
@@ -0,0 +1,169 @@
+\name{diana}
+\title{DIvisive ANAlysis Clustering}
+\alias{diana}
+\alias{diana.object}
+\description{
+ Computes a divisive hierarchical clustering of the dataset
+ returning an object of class \code{diana}.
+}
+\usage{
+diana(x, diss = inherits(x, "dist"), metric = "euclidean", stand = FALSE,
+ stop.at.k = FALSE,
+ keep.diss = n < 100, keep.data = !diss, trace.lev = 0)
+}
+\arguments{
+ \item{x}{
+ data matrix or data frame, or dissimilarity matrix or object,
+ depending on the value of the \code{diss} argument.
+
+ In case of a matrix or data frame, each row corresponds to an observation,
+ and each column corresponds to a variable. All variables must be numeric.
+ Missing values (\code{\link{NA}}s) \emph{are} allowed.
+
+ In case of a dissimilarity matrix, \code{x} is typically the output
+ of \code{\link{daisy}} or \code{\link{dist}}. Also a vector of
+ length n*(n-1)/2 is allowed (where n is the number of observations),
+ and will be interpreted in the same way as the output of the
+ above-mentioned functions. Missing values (NAs) are \emph{not} allowed.
+ }
+ \item{diss}{
+ logical flag: if TRUE (default for \code{dist} or
+ \code{dissimilarity} objects), then \code{x} will be considered as a
+ dissimilarity matrix. If FALSE, then \code{x} will be considered as
+ a matrix of observations by variables.
+ }
+ \item{metric}{
+ character string specifying the metric to be used for calculating
+ dissimilarities between observations.\cr
+ The currently available options are "euclidean" and
+ "manhattan". Euclidean distances are root sum-of-squares of
+ differences, and manhattan distances are the sum of absolute
+ differences. If \code{x} is already a dissimilarity matrix, then
+ this argument will be ignored.
+ }
+ \item{stand}{logical; if true, the measurements in \code{x} are
+ standardized before calculating the dissimilarities. Measurements
+ are standardized for each variable (column), by subtracting the
+ variable's mean value and dividing by the variable's mean absolute
+ deviation. If \code{x} is already a dissimilarity matrix, then this
+ argument will be ignored.}
+ \item{stop.at.k}{logical or integer, \code{FALSE} by default.
+ Otherwise must be integer, say \eqn{k}, in \eqn{\{1,2,..,n\}},
+ specifying that the \code{diana} algorithm should stop early.
+ %_TODO_ namely after k splits _OR_ with k final clusters
+ Non-default NOT YET IMPLEMENTED.}
+ \item{keep.diss, keep.data}{logicals indicating if the dissimilarities
+ and/or input data \code{x} should be kept in the result. Setting
+ these to \code{FALSE} can give much smaller results and hence even save
+ memory allocation \emph{time}.}
+ \item{trace.lev}{integer specifying a trace level for printing
+ diagnostics during the algorithm. Default \code{0} does not print
+ anything; higher values print increasingly more.}
+}
+\value{
+ an object of class \code{"diana"} representing the clustering;
+ this class has methods for the following generic functions:
+ \code{print}, \code{summary}, \code{plot}.
+
+ Further, the class \code{"diana"} inherits from
+ \code{"twins"}. Therefore, the generic function \code{\link{pltree}} can be
+ used on a \code{diana} object, and \code{\link{as.hclust}} and
+ \code{\link{as.dendrogram}} methods are available.
+
+ A legitimate \code{diana} object is a list with the following components:
+ \item{order}{
+ a vector giving a permutation of the original observations to allow for
+ plotting, in the sense that the branches of a clustering tree will
+ not cross.
+ }
+ \item{order.lab}{
+ a vector similar to \code{order}, but containing observation labels
+ instead of observation numbers. This component is only available if
+ the original observations were labelled.
+ }
+ \item{height}{a vector with the diameters of the clusters prior to splitting.
+ }
+ \item{dc}{
+ the divisive coefficient, measuring the clustering structure of the
+ dataset. For each observation i, denote by \eqn{d(i)} the diameter of the
+ last cluster to which it belongs (before being split off as a single
+ observation), divided by the diameter of the whole dataset. The
+ \code{dc} is the average of all \eqn{1 - d(i)}. It can also be seen
+ as the average width (or the percentage filled) of the banner plot.
+ Because \code{dc} grows with the number of observations, this
+ measure should not be used to compare datasets of very different
+ sizes.
+ }
+ \item{merge}{
+ an (n-1) by 2 matrix, where n is the number of
+ observations. Row i of \code{merge} describes the split at step n-i of
+ the clustering. If a number \eqn{j} in row r is negative, then the single
+ observation \eqn{|j|} is split off at stage n-r. If j is positive, then the
+ cluster that will be splitted at stage n-j (described by row j), is
+ split off at stage n-r.
+ }
+ \item{diss}{
+ an object of class \code{"dissimilarity"}, representing the total
+ dissimilarity matrix of the dataset.
+ }
+ \item{data}{
+ a matrix containing the original or standardized measurements, depending
+ on the \code{stand} option of the function \code{agnes}. If a
+ dissimilarity matrix was given as input structure, then this component
+ is not available.
+ }
+
+}
+\details{
+\code{diana} is fully described in chapter 6 of Kaufman and Rousseeuw (1990).
+It is probably unique in computing a divisive hierarchy, whereas most
+other software for hierarchical clustering is agglomerative.
+Moreover, \code{diana} provides (a) the divisive coefficient
+(see \code{diana.object}) which measures the amount of clustering structure
+found; and (b) the banner, a novel graphical display
+(see \code{plot.diana}).
+
+The \code{diana}-algorithm constructs a hierarchy of clusterings,
+starting with one large
+cluster containing all n observations. Clusters are divided until each cluster
+contains only a single observation.\cr
+At each stage, the cluster with the largest diameter is selected.
+(The diameter of a cluster is the largest dissimilarity between any
+two of its observations.)\cr
+To divide the selected cluster, the algorithm first looks for its most
+disparate observation (i.e., which has the largest average dissimilarity to the
+other observations of the selected cluster). This observation initiates the
+"splinter group". In subsequent steps, the algorithm reassigns observations
+that are closer to the "splinter group" than to the "old party". The result
+is a division of the selected cluster into two new clusters.
+}
+\seealso{
+ \code{\link{agnes}} also for background and references;
+ \code{\link{cutree}} (and \code{\link{as.hclust}}) for grouping
+ extraction; \code{\link{daisy}}, \code{\link{dist}},
+ \code{\link{plot.diana}}, \code{\link{twins.object}}.
+}
+\examples{
+data(votes.repub)
+dv <- diana(votes.repub, metric = "manhattan", stand = TRUE)
+print(dv)
+plot(dv)
+
+## Cut into 2 groups:
+dv2 <- cutree(as.hclust(dv), k = 2)
+table(dv2) # 8 and 42 group members
+rownames(votes.repub)[dv2 == 1]
+
+## For two groups, does the metric matter ?
+dv0 <- diana(votes.repub, stand = TRUE) # default: Euclidean
+dv.2 <- cutree(as.hclust(dv0), k = 2)
+table(dv2 == dv.2)## identical group assignments
+
+str(as.dendrogram(dv0)) # {via as.dendrogram.twins() method}
+
+data(agriculture)
+## Plot similar to Figure 8 in ref
+\dontrun{plot(diana(agriculture), ask = TRUE)}
+\dontshow{plot(diana(agriculture))}
+}
+\keyword{cluster}
diff --git a/man/dissimilarity.object.Rd b/man/dissimilarity.object.Rd
new file mode 100644
index 0000000..c8b5f0b
--- /dev/null
+++ b/man/dissimilarity.object.Rd
@@ -0,0 +1,61 @@
+\name{dissimilarity.object}
+\alias{dissimilarity.object}
+\title{Dissimilarity Matrix Object}
+\description{
+ Objects of class \code{"dissimilarity"} representing the dissimilarity
+ matrix of a dataset.
+}
+\section{GENERATION}{
+ \code{\link{daisy}} returns this class of objects.
+ Also the functions \code{pam}, \code{clara}, \code{fanny},
+ \code{agnes}, and \code{diana} return a \code{dissimilarity} object,
+ as one component of their return objects.
+}
+\section{METHODS}{
+ The \code{"dissimilarity"} class has methods for the following generic
+ functions: \code{print}, \code{summary}.
+}
+\value{
+ The dissimilarity matrix is symmetric, and hence its lower triangle
+ (column wise) is represented as a vector to save storage space.
+ If the object, is called \code{do}, and \code{n} the number of
+ observations, i.e., \code{n <- attr(do, "Size")}, then
+ for \eqn{i < j <= n}, the dissimilarity between (row) i and j is
+ \code{do[n*(i-1) - i*(i-1)/2 + j-i]}.
+ The length of the vector is \eqn{n*(n-1)/2}, i.e., of order \eqn{n^2}.
+
+ \code{"dissimilarity"} objects also inherit from class
+ \code{\link{dist}} and can use \code{dist} methods, in
+ particular, \code{\link{as.matrix}}, such that \eqn{d_{ij}}{d(i,j)}
+ from above is just \code{as.matrix(do)[i,j]}.
+
+ The object has the following attributes:
+ \item{Size}{the number of observations in the dataset.}
+ \item{Metric}{the metric used for calculating the
+ dissimilarities. Possible values are "euclidean", "manhattan",
+ "mixed" (if variables of different types were present in the
+ dataset), and "unspecified".}
+ \item{Labels}{optionally, contains the labels, if any, of the
+ observations of the dataset.}
+ \item{NA.message}{optionally, if a dissimilarity could not be
+ computed, because of too many missing values for some observations
+ of the dataset.}
+ \item{Types}{when a mixed metric was used, the types for each
+ variable as one-letter codes (as in the book, e.g. p.54):
+ \describe{
+ \item{A}{Asymmetric binary}
+ \item{S}{Symmetric binary}
+ \item{N}{Nominal (factor)}
+ \item{O}{Ordinal (ordered factor)}
+ \item{I}{Interval scaled (numeric)}
+ \item{T}{raTio to be log transformed (positive numeric)}
+ }.}
+}
+\seealso{
+ \code{\link{daisy}}, \code{\link{dist}},
+ \code{\link{pam}}, \code{\link{clara}}, \code{\link{fanny}},
+ \code{\link{agnes}}, \code{\link{diana}}.
+}
+%\examples{} --> ./daisy.Rd
+\keyword{cluster}
+
diff --git a/man/ellipsoidhull.Rd b/man/ellipsoidhull.Rd
new file mode 100644
index 0000000..71f0b53
--- /dev/null
+++ b/man/ellipsoidhull.Rd
@@ -0,0 +1,108 @@
+\name{ellipsoidhull}
+\alias{ellipsoidhull}
+\alias{print.ellipsoid}
+\title{Compute the Ellipsoid Hull or Spanning Ellipsoid of a Point Set}
+\description{
+ Compute the \dQuote{ellipsoid hull} or \dQuote{spanning ellipsoid}, i.e. the
+ ellipsoid of minimal volume (\sQuote{area} in 2D) such that all given points
+ lie just inside or on the boundary of the ellipsoid.
+}
+\usage{
+ellipsoidhull(x, tol=0.01, maxit=5000,
+ ret.wt = FALSE, ret.sqdist = FALSE, ret.pr = FALSE)
+\method{print}{ellipsoid}(x, digits = max(1, getOption("digits") - 2), \dots)
+}
+\arguments{
+ \item{x}{the \eqn{n} \eqn{p}-dimensional points asnumeric
+ \eqn{n\times p}{n x p} matrix.}
+ \item{tol}{convergence tolerance for Titterington's algorithm.
+ Setting this to much smaller values may drastically increase the number of
+ iterations needed, and you may want to increas \code{maxit} as well.}
+ \item{maxit}{integer giving the maximal number of iteration steps for
+ the algorithm.}
+ \item{ret.wt, ret.sqdist, ret.pr}{logicals indicating if additional
+ information should be returned, \code{ret.wt} specifying the
+ \emph{weights}, \code{ret.sqdist} the \emph{\bold{sq}uared
+ \bold{dist}ances} and \code{ret.pr} the final \bold{pr}obabilities
+ in the algorithms.}
+ \item{digits,\dots}{the usual arguments to \code{\link{print}} methods.}
+}
+\details{
+ The \dQuote{spanning ellipsoid} algorithm is said to stem from
+ Titterington(1976), in Pison et al (1999) who use it for
+ \code{\link{clusplot.default}}.\cr
+ The problem can be seen as a special case of the \dQuote{Min.Vol.}
+ ellipsoid of which a more more flexible and general implementation is
+ \code{\link[MASS]{cov.mve}} in the \code{MASS} package.
+}
+\value{
+ an object of class \code{"ellipsoid"}, basically a \code{\link{list}}
+ with several components, comprising at least
+ \item{cov}{\eqn{p\times p}{p x p} \emph{covariance} matrix description
+ the ellipsoid.}
+ \item{loc}{\eqn{p}-dimensional location of the ellipsoid center.}
+ \item{d2}{average squared radius. Further, \eqn{d2 = t^2}, where
+ \eqn{t} is \dQuote{the value of a t-statistic on the ellipse
+ boundary} (from \code{\link[ellipse]{ellipse}} in the
+ \pkg{ellipse} package), and hence, more usefully,
+ \code{d2 = qchisq(alpha, df = p)}, where \code{alpha} is the
+ confidence level for p-variate normally distributed data with
+ location and covariance \code{loc} and \code{cov} to lie inside the
+ ellipsoid.}
+ \item{wt}{the vector of weights iff \code{ret.wt} was true.}
+ \item{sqdist}{the vector of squared distances iff \code{ret.sqdist} was true.}
+ \item{prob}{the vector of algorithm probabilities iff \code{ret.pr} was true.}
+ \item{it}{number of iterations used.}
+ \item{tol, maxit}{just the input argument, see above.}
+ \item{eps}{the achieved tolerance which is the maximal squared radius
+ minus \eqn{p}.}
+ \item{ierr}{error code as from the algorithm; \code{0} means \emph{ok}.}
+ \item{conv}{logical indicating if the converged. This is defined as
+ \code{it < maxit && ierr == 0}.}
+}
+\references{
+ Pison, G., Struyf, A. and Rousseeuw, P.J. (1999)
+ Displaying a Clustering with CLUSPLOT,
+ \emph{Computational Statistics and Data Analysis}, \bold{30}, 381--392.\cr
+%% Jan.2015 : no longer there:
+ %% A version of this is available as technical report from
+ %% \url{http://www.agoras.ua.ac.be/abstract/Disclu99.htm}
+
+ D.M. Titterington (1976)
+ Algorithms for computing D-optimal design on finite design spaces. In
+ \emph{Proc.\ of the 1976 Conf.\ on Information Science and Systems},
+ 213--216; John Hopkins University.
+}
+
+\author{Martin Maechler did the present class implementation; Rousseeuw
+ et al did the underlying original code.}
+\seealso{\code{\link{predict.ellipsoid}} which is also the
+ \code{\link{predict}} method for \code{ellipsoid} objects.
+ \code{\link{volume.ellipsoid}} for an example of \sQuote{manual}
+ \code{ellipsoid} object construction;\cr
+ further \code{\link[ellipse]{ellipse}} from package \pkg{ellipse}
+ and \code{\link[sfsmisc]{ellipsePoints}} from package \pkg{sfsmisc}.
+
+ \code{\link[grDevices]{chull}} for the convex hull,
+ \code{\link{clusplot}} which makes use of this; \code{\link[MASS]{cov.mve}}.
+}
+\examples{
+x <- rnorm(100)
+xy <- unname(cbind(x, rnorm(100) + 2*x + 10))
+exy. <- ellipsoidhull(xy)
+exy. # >> calling print.ellipsoid()
+
+plot(xy, main = "ellipsoidhull(<Gauss data>) -- 'spanning points'")
+lines(predict(exy.), col="blue")
+points(rbind(exy.$loc), col = "red", cex = 3, pch = 13)
+
+exy <- ellipsoidhull(xy, tol = 1e-7, ret.wt = TRUE, ret.sq = TRUE)
+str(exy) # had small 'tol', hence many iterations
+(ii <- which(zapsmall(exy $ wt) > 1e-6))
+## --> only about 4 to 6 "spanning ellipsoid" points
+round(exy$wt[ii],3); sum(exy$wt[ii]) # weights summing to 1
+points(xy[ii,], pch = 21, cex = 2,
+ col="blue", bg = adjustcolor("blue",0.25))
+}
+\keyword{dplot}
+\keyword{hplot}% << ? chull has "hplot" as well.
diff --git a/man/fanny.Rd b/man/fanny.Rd
new file mode 100644
index 0000000..6b73d5e
--- /dev/null
+++ b/man/fanny.Rd
@@ -0,0 +1,146 @@
+\name{fanny}
+\alias{fanny}
+\title{Fuzzy Analysis Clustering}
+\description{
+ Computes a fuzzy clustering of the data into \code{k} clusters.
+}
+\usage{
+fanny(x, k, diss = inherits(x, "dist"), memb.exp = 2,
+ metric = c("euclidean", "manhattan", "SqEuclidean"),
+ stand = FALSE, iniMem.p = NULL, cluster.only = FALSE,
+ keep.diss = !diss && !cluster.only && n < 100,
+ keep.data = !diss && !cluster.only,
+ maxit = 500, tol = 1e-15, trace.lev = 0)
+}
+\arguments{
+ \item{x}{
+ data matrix or data frame, or dissimilarity matrix, depending on the
+ value of the \code{diss} argument.
+
+ In case of a matrix or data frame, each row corresponds to an observation,
+ and each column corresponds to a variable. All variables must be numeric.
+ Missing values (NAs) are allowed.
+
+ In case of a dissimilarity matrix, \code{x} is typically the output
+ of \code{\link{daisy}} or \code{\link{dist}}. Also a vector of
+ length n*(n-1)/2 is allowed (where n is the number of observations),
+ and will be interpreted in the same way as the output of the
+ above-mentioned functions. Missing values (NAs) are not allowed.
+ }
+ \item{k}{integer giving the desired number of clusters. It is
+ required that \eqn{0 < k < n/2} where \eqn{n} is the number of
+ observations.}
+ \item{diss}{
+ logical flag: if TRUE (default for \code{dist} or
+ \code{dissimilarity} objects), then \code{x} is assumed to be a
+ dissimilarity matrix. If FALSE, then \code{x} is treated as
+ a matrix of observations by variables.
+ }
+ \item{memb.exp}{number \eqn{r} strictly larger than 1 specifying the
+ \emph{membership exponent} used in the fit criterion; see the
+ \sQuote{Details} below. Default: \code{2} which used to be hardwired
+ inside FANNY.}
+ \item{metric}{character string specifying the metric to be used for
+ calculating dissimilarities between observations. Options are
+ \code{"euclidean"} (default), \code{"manhattan"}, and
+ \code{"SqEuclidean"}. Euclidean distances are root sum-of-squares
+ of differences, and manhattan distances are the sum of absolute
+ differences, and \code{"SqEuclidean"}, the \emph{squared} euclidean
+ distances are sum-of-squares of differences. Using this last option is
+ equivalent (but somewhat slower) to computing so called \dQuote{fuzzy C-means}.
+ \cr
+ If \code{x} is already a dissimilarity matrix, then this argument will
+ be ignored.
+ }
+ \item{stand}{logical; if true, the measurements in \code{x} are
+ standardized before calculating the dissimilarities. Measurements
+ are standardized for each variable (column), by subtracting the
+ variable's mean value and dividing by the variable's mean absolute
+ deviation. If \code{x} is already a dissimilarity matrix, then this
+ argument will be ignored.}
+ \item{iniMem.p}{numeric \eqn{n \times k}{n x k} matrix or \code{NULL}
+ (by default); can be used to specify a starting \code{membership}
+ matrix, i.e., a matrix of non-negative numbers, each row summing to
+ one.
+ } %% FIXME: add example
+ \item{cluster.only}{logical; if true, no silhouette information will be
+ computed and returned, see details.}%% FIXME: add example
+ \item{keep.diss, keep.data}{logicals indicating if the dissimilarities
+ and/or input data \code{x} should be kept in the result. Setting
+ these to \code{FALSE} can give smaller results and hence also save
+ memory allocation \emph{time}.}
+ \item{maxit, tol}{maximal number of iterations and default tolerance
+ for convergence (relative convergence of the fit criterion) for the
+ FANNY algorithm. The defaults \code{maxit = 500} and \code{tol =
+ 1e-15} used to be hardwired inside the algorithm.}
+ \item{trace.lev}{integer specifying a trace level for printing
+ diagnostics during the C-internal algorithm.
+ Default \code{0} does not print anything; higher values print
+ increasingly more.}
+}
+\value{
+ an object of class \code{"fanny"} representing the clustering.
+ See \code{\link{fanny.object}} for details.
+}
+\details{
+ In a fuzzy clustering, each observation is \dQuote{spread out} over
+ the various clusters. Denote by \eqn{u_{iv}}{u(i,v)} the membership
+ of observation \eqn{i} to cluster \eqn{v}.
+
+ The memberships are nonnegative, and for a fixed observation i they sum to 1.
+ The particular method \code{fanny} stems from chapter 4 of
+ Kaufman and Rousseeuw (1990) (see the references in
+ \code{\link{daisy}}) and has been extended by Martin Maechler to allow
+ user specified \code{memb.exp}, \code{iniMem.p}, \code{maxit},
+ \code{tol}, etc.
+
+ Fanny aims to minimize the objective function
+ \deqn{\sum_{v=1}^k
+ \frac{\sum_{i=1}^n\sum_{j=1}^n u_{iv}^r u_{jv}^r d(i,j)}{
+ 2 \sum_{j=1}^n u_{jv}^r}}{%
+ SUM_[v=1..k] (SUM_(i,j) u(i,v)^r u(j,v)^r d(i,j)) / (2 SUM_j u(j,v)^r)}
+ where \eqn{n} is the number of observations, \eqn{k} is the number of
+ clusters, \eqn{r} is the membership exponent \code{memb.exp} and
+ \eqn{d(i,j)} is the dissimilarity between observations \eqn{i} and \eqn{j}.
+ \cr Note that \eqn{r \to 1}{r -> 1} gives increasingly crisper
+ clusterings whereas \eqn{r \to \infty}{r -> Inf} leads to complete
+ fuzzyness. K&R(1990), p.191 note that values too close to 1 can lead
+ to slow convergence. Further note that even the default, \eqn{r = 2}
+ can lead to complete fuzzyness, i.e., memberships \eqn{u_{iv} \equiv
+ 1/k}{u(i,v) == 1/k}. In that case a warning is signalled and the
+ user is advised to chose a smaller \code{memb.exp} (\eqn{=r}).
+
+ Compared to other fuzzy clustering methods, \code{fanny} has the following
+ features: (a) it also accepts a dissimilarity matrix; (b) it is
+ more robust to the \code{spherical cluster} assumption; (c) it provides
+ a novel graphical display, the silhouette plot (see
+ \code{\link{plot.partition}}).
+}
+\seealso{
+ \code{\link{agnes}} for background and references;
+ \code{\link{fanny.object}}, \code{\link{partition.object}},
+ \code{\link{plot.partition}}, \code{\link{daisy}}, \code{\link{dist}}.
+}
+\examples{
+## generate 10+15 objects in two clusters, plus 3 objects lying
+## between those clusters.
+x <- rbind(cbind(rnorm(10, 0, 0.5), rnorm(10, 0, 0.5)),
+ cbind(rnorm(15, 5, 0.5), rnorm(15, 5, 0.5)),
+ cbind(rnorm( 3,3.2,0.5), rnorm( 3,3.2,0.5)))
+fannyx <- fanny(x, 2)
+## Note that observations 26:28 are "fuzzy" (closer to # 2):
+fannyx
+summary(fannyx)
+plot(fannyx)
+
+(fan.x.15 <- fanny(x, 2, memb.exp = 1.5)) # 'crispier' for obs. 26:28
+(fanny(x, 2, memb.exp = 3)) # more fuzzy in general
+
+data(ruspini)
+f4 <- fanny(ruspini, 4)
+stopifnot(rle(f4$clustering)$lengths == c(20,23,17,15))
+plot(f4, which = 1)
+## Plot similar to Figure 6 in Stryuf et al (1996)
+plot(fanny(ruspini, 5))
+}
+\keyword{cluster}
diff --git a/man/fanny.object.Rd b/man/fanny.object.Rd
new file mode 100644
index 0000000..2d49795
--- /dev/null
+++ b/man/fanny.object.Rd
@@ -0,0 +1,69 @@
+\name{fanny.object}
+\alias{fanny.object}
+\title{Fuzzy Analysis (FANNY) Object}
+\description{
+ The objects of class \code{"fanny"} represent a fuzzy clustering of a
+ dataset.
+}
+\section{GENERATION}{
+ These objects are returned from \code{\link{fanny}}.
+}
+\section{METHODS}{
+ The \code{"fanny"} class has methods for the following generic functions:
+ \code{print}, \code{summary}.
+}
+\section{INHERITANCE}{
+ The class \code{"fanny"} inherits from \code{"partition"}.
+ Therefore, the generic functions \code{plot} and \code{clusplot} can
+ be used on a \code{fanny} object.
+}
+\value{
+ A legitimate \code{fanny} object is a list with the following components:
+ \item{membership}{
+ matrix containing the memberships for each pair consisting of an
+ observation and a cluster.
+ }
+ \item{memb.exp}{the membership exponent used in the fitting criterion.}
+ \item{coeff}{
+ Dunn's partition coefficient \eqn{F(k)} of the clustering, where
+ \eqn{k} is the number of clusters. \eqn{F(k)} is the sum of all
+ \emph{squared} membership coefficients, divided by the number of
+ observations. Its value is between \eqn{1/k} and 1.
+
+ The normalized form of the coefficient is also given. It is defined
+ as \eqn{(F(k) - 1/k) / (1 - 1/k)}, and ranges between 0 and 1.
+ A low value of Dunn's coefficient indicates a very fuzzy clustering,
+ whereas a value close to 1 indicates a near-crisp clustering.
+ }
+ \item{clustering}{
+ the clustering vector of the nearest crisp clustering, see
+ \code{\link{partition.object}}.}
+ \item{k.crisp}{integer (\eqn{\le k}{<= k}) giving the number of \emph{crisp}
+ clusters; can be less than \eqn{k}, where it's recommended to
+ decrease \code{memb.exp}.}
+ \item{objective}{
+ named vector containing the minimal value of the objective function
+ reached by the FANNY algorithm and the relative convergence
+ tolerance \code{tol} used.% + still has 'iterations' for back-compatibility
+ }
+ \item{convergence}{
+ named vector with \code{iterations}, the number of iterations needed
+ and \code{converged} indicating if the algorithm converged (in
+ \code{maxit} iterations within convergence tolerance \code{tol}).
+ }
+ \item{diss}{
+ an object of class \code{"dissimilarity"}, see
+ \code{\link{partition.object}}.}
+ \item{call}{generating call, see \code{\link{partition.object}}.}
+ \item{silinfo}{
+ list with silhouette information of the nearest crisp clustering, see
+ \code{\link{partition.object}}.}
+ \item{data}{matrix, possibibly standardized, or NULL, see
+ \code{\link{partition.object}}.}
+}
+\seealso{
+ \code{\link{fanny}}, \code{\link{print.fanny}},
+ \code{\link{dissimilarity.object}},
+ \code{\link{partition.object}}, \code{\link{plot.partition}}.
+}
+\keyword{cluster}
diff --git a/man/flower.Rd b/man/flower.Rd
new file mode 100644
index 0000000..95c9aa1
--- /dev/null
+++ b/man/flower.Rd
@@ -0,0 +1,53 @@
+\name{flower}
+\alias{flower}
+\title{Flower Characteristics}
+\usage{data(flower)}
+\description{8 characteristics for 18 popular flowers.}
+\format{
+ A data frame with 18 observations on 8 variables:
+ \tabular{rll}{
+ [ , "V1"] \tab factor \tab winters \cr
+ [ , "V2"] \tab factor \tab shadow \cr
+ [ , "V3"] \tab factor \tab tubers \cr
+ [ , "V4"] \tab factor \tab color \cr
+ [ , "V5"] \tab ordered \tab soil \cr
+ [ , "V6"] \tab ordered \tab preference \cr
+ [ , "V7"] \tab numeric \tab height \cr
+ [ , "V8"] \tab numeric \tab distance
+ }
+
+ \describe{
+ \item{V1}{winters, is binary and indicates whether the plant may be left
+ in the garden when it freezes.}
+
+ \item{V2}{shadow, is binary and shows whether the plant needs to stand
+ in the shadow.}
+
+ \item{V3}{tubers, is asymmetric binary and distinguishes between plants
+ with tubers and plants that grow in any other way.}
+
+ \item{V4}{color, is nominal and specifies the flower's color (1 = white,
+ 2 = yellow, 3 = pink, 4 = red, 5 = blue).}
+
+ \item{V5}{soil, is ordinal and indicates whether the plant grows in dry
+ (1), normal (2), or wet (3) soil.}
+
+ \item{V6}{preference, is ordinal and gives someone's preference ranking
+ going from 1 to 18.}
+
+ \item{V7}{height, is interval scaled, the plant's height in centimeters.}
+
+ \item{V8}{distance, is interval scaled, the distance in centimeters that
+ should be left between the plants.}
+ }
+}
+\references{
+ Struyf, Hubert and Rousseeuw (1996), see \code{\link{agnes}}.
+}
+\examples{
+data(flower)
+## Example 2 in ref
+daisy(flower, type = list(asymm = 3))
+daisy(flower, type = list(asymm = c(1, 3), ordratio = 7))
+}
+\keyword{datasets}
diff --git a/man/lower.to.upper.tri.inds.Rd b/man/lower.to.upper.tri.inds.Rd
new file mode 100644
index 0000000..f56e2e4
--- /dev/null
+++ b/man/lower.to.upper.tri.inds.Rd
@@ -0,0 +1,34 @@
+\name{lower.to.upper.tri.inds}
+\alias{lower.to.upper.tri.inds}
+\alias{upper.to.lower.tri.inds}
+\title{Permute Indices for Triangular Matrices}
+\description{
+ Compute index vectors for extracting or reordering of lower or upper
+ triangular matrices that are stored as contiguous vectors.
+}
+\usage{
+lower.to.upper.tri.inds(n)
+upper.to.lower.tri.inds(n)
+}
+\arguments{
+ \item{n}{integer larger than 1.}
+}
+\value{
+ integer vector containing a permutation of \code{1:N} where
+ \eqn{N = n(n-1)/2}.
+}
+\seealso{\code{\link{upper.tri}}, \code{\link{lower.tri}} with a related
+ purpose.}
+\examples{
+m5 <- matrix(NA,5,5)
+m <- m5; m[lower.tri(m)] <- upper.to.lower.tri.inds(5); m
+m <- m5; m[upper.tri(m)] <- lower.to.upper.tri.inds(5); m
+
+stopifnot(lower.to.upper.tri.inds(2) == 1,
+ lower.to.upper.tri.inds(3) == 1:3,
+ upper.to.lower.tri.inds(3) == 1:3,
+ sort(upper.to.lower.tri.inds(5)) == 1:10,
+ sort(lower.to.upper.tri.inds(6)) == 1:15)
+}
+\keyword{array}
+\keyword{utilities}
diff --git a/man/mona.Rd b/man/mona.Rd
new file mode 100644
index 0000000..2930b89
--- /dev/null
+++ b/man/mona.Rd
@@ -0,0 +1,99 @@
+\name{mona}
+\alias{mona}
+\title{MONothetic Analysis Clustering of Binary Variables}
+
+\description{
+ Returns a list representing a divisive hierarchical clustering of
+ a dataset with binary variables only.
+}
+\usage{
+mona(x, trace.lev = 0)% FIXME: allow early stopping
+}
+\arguments{
+ \item{x}{
+ data matrix or data frame in which each row corresponds to an
+ observation, and each column corresponds to a variable. All
+ variables must be binary. A limited number of missing values (\code{NA}s)
+ is allowed. Every observation must have at least one value different
+ from \code{\link{NA}}. No variable should have half of its values
+ missing. There must be at least one variable which has no missing
+ values. A variable with all its non-missing values identical is
+ not allowed.}
+ \item{trace.lev}{logical or integer indicating if (and how much) the
+ algorithm should produce progress output.}
+}
+\value{
+ an object of class \code{"mona"} representing the clustering.
+ See \code{\link{mona.object}} for details.
+}
+\details{
+\code{mona} is fully described in chapter 7 of Kaufman and Rousseeuw (1990).
+It is \dQuote{monothetic} in the sense that each division is based on a
+single (well-chosen) variable, whereas most other hierarchical methods
+(including \code{agnes} and \code{diana}) are \dQuote{polythetic}, i.e. they use
+all variables together.
+
+The \code{mona}-algorithm constructs a hierarchy of clusterings,
+starting with one large cluster. Clusters are divided until all
+observations in the same cluster have identical values for all variables.
+\cr
+At each stage, all clusters are divided according to the values of one
+variable. A cluster is divided into one cluster with all observations having
+value 1 for that variable, and another cluster with all observations having
+value 0 for that variable.
+
+The variable used for splitting a cluster is the variable with the maximal
+total association to the other variables, according to the observations in the
+cluster to be splitted. The association between variables f and g
+is given by a(f,g)*d(f,g) - b(f,g)*c(f,g), where a(f,g), b(f,g), c(f,g),
+and d(f,g) are the numbers in the contingency table of f and g.
+[That is, a(f,g) (resp. d(f,g)) is the number of observations for which f and g
+both have value 0 (resp. value 1); b(f,g) (resp. c(f,g)) is the number of
+observations for which f has value 0 (resp. 1) and g has value 1 (resp. 0).]
+The total association of a variable f is the sum of its associations to all
+variables.
+}
+\section{Missing Values (\code{\link{NA}}s)}{
+ The mona-algorithm requires \dQuote{pure} 0-1 values. However,
+ \code{mona(x)} allows \code{x} to contain (not too many)
+ \code{\link{NA}}s. In a preliminary step, these are \dQuote{imputed},
+ i.e., all missing values are filled in. To do this, the same measure
+ of association between variables is used as in the algorithm. When variable
+ f has missing values, the variable g with the largest absolute association
+ to f is looked up. When the association between f and g is positive,
+ any missing value of f is replaced by the value of g for the same
+ observation. If the association between f and g is negative, then any missing
+ value of f is replaced by the value of 1-g for the same
+ observation.
+}
+\note{
+ In \pkg{cluster} versions before 2.0.6, the algorithm entered an
+ infinite loop in the boundary case of one variable, i.e.,
+ \code{ncol(x) == 1}, which currently signals an error (because the
+ algorithm now in C, haes not correctly taken account of this special case).
+ %% FIXME ("patches are welcome")
+}
+\seealso{
+ \code{\link{agnes}} for background and references;
+ \code{\link{mona.object}}, \code{\link{plot.mona}}.
+}
+\examples{
+data(animals)
+ma <- mona(animals)
+ma
+## Plot similar to Figure 10 in Struyf et al (1996)
+plot(ma)
+
+## One place to see if/how error messages are *translated* (to 'de' / 'pl'):
+ani.NA <- animals; ani.NA[4,] <- NA
+aniNA <- within(animals, { end[2:9] <- NA })
+aniN2 <- animals; aniN2[cbind(1:6, c(3, 1, 4:6, 2))] <- NA
+ani.non2 <- within(animals, end[7] <- 3 )
+ani.idNA <- within(animals, end[!is.na(end)] <- 1 )
+try( mona(ani.NA) ) ## error: .. object with all values missing
+try( mona(aniNA) ) ## error: .. more than half missing values
+try( mona(aniN2) ) ## error: all have at least one missing
+try( mona(ani.non2) ) ## error: all must be binary
+try( mona(ani.idNA) ) ## error: ditto
+}
+\keyword{cluster}
diff --git a/man/mona.object.Rd b/man/mona.object.Rd
new file mode 100644
index 0000000..c12e5ff
--- /dev/null
+++ b/man/mona.object.Rd
@@ -0,0 +1,44 @@
+\name{mona.object}
+\alias{mona.object}
+\title{Monothetic Analysis (MONA) Object}
+\description{
+ The objects of class \code{"mona"} represent the divisive
+ hierarchical clustering of a dataset with only binary variables
+ (measurements). This class of objects is returned from
+ \code{\link{mona}}.
+}
+\section{METHODS}{
+ The \code{"mona"} class has methods for the following generic functions:
+ \code{print}, \code{summary}, \code{plot}.
+}
+\value{
+ A legitimate \code{mona} object is a list with the following components:
+
+ \item{data}{
+ matrix with the same dimensions as the original data matrix,
+ but with factors coded as 0 and 1, and all missing values replaced.
+ }
+ \item{order}{
+ a vector giving a permutation of the original observations to allow
+ for plotting, in the sense that the branches of a clustering tree
+ will not cross.
+ }
+ \item{order.lab}{
+ a vector similar to \code{order}, but containing observation labels
+ instead of observation numbers. This component is only available if
+ the original observations were labelled.
+ }
+ \item{variable}{
+ vector of length n-1 where n is the number of observations,
+ specifying the variables used to separate the observations of \code{order}.
+ }
+ \item{step}{
+ vector of length n-1 where n is the number of observations,
+ specifying the separation steps at which the observations of
+ \code{order} are separated.
+ }
+}
+\seealso{\code{\link{mona}} for examples etc, \code{\link{plot.mona}}.
+}
+\keyword{cluster}
+
diff --git a/man/pam.Rd b/man/pam.Rd
new file mode 100644
index 0000000..a3ba26a
--- /dev/null
+++ b/man/pam.Rd
@@ -0,0 +1,211 @@
+\name{pam}
+\alias{pam}
+\title{Partitioning Around Medoids}
+\description{
+ Partitioning (clustering) of the data into \code{k} clusters \dQuote{around
+ medoids}, a more robust version of K-means.
+}
+\usage{
+pam(x, k, diss = inherits(x, "dist"),
+ metric = c("euclidean", "manhattan"), %% FIXME: add "jaccard"
+ medoids = NULL, stand = FALSE, cluster.only = FALSE,
+ do.swap = TRUE,
+ keep.diss = !diss && !cluster.only && n < 100,
+ keep.data = !diss && !cluster.only,
+ pamonce = FALSE, trace.lev = 0)
+}
+\arguments{
+ \item{x}{
+ data matrix or data frame, or dissimilarity matrix or object,
+ depending on the value of the \code{diss} argument.
+
+ In case of a matrix or data frame, each row corresponds to an
+ observation, and each column corresponds to a variable. All
+ variables must be numeric. Missing values (\code{\link{NA}}s)
+ \emph{are} allowed---as long as every pair of observations has at
+ least one case not missing.
+
+ In case of a dissimilarity matrix, \code{x} is typically the output
+ of \code{\link{daisy}} or \code{\link{dist}}. Also a vector of
+ length n*(n-1)/2 is allowed (where n is the number of observations),
+ and will be interpreted in the same way as the output of the
+ above-mentioned functions. Missing values (\code{\link{NA}}s) are
+ \emph{not} allowed.
+ }
+ \item{k}{positive integer specifying the number of clusters, less than
+ the number of observations.}
+ \item{diss}{
+ logical flag: if TRUE (default for \code{dist} or
+ \code{dissimilarity} objects), then \code{x} will be considered as a
+ dissimilarity matrix. If FALSE, then \code{x} will be considered as
+ a matrix of observations by variables.
+ }
+ \item{metric}{
+ character string specifying the metric to be used for calculating
+ dissimilarities between observations.\cr
+ The currently available options are "euclidean" and
+ "manhattan". Euclidean distances are root sum-of-squares of
+ differences, and manhattan distances are the sum of absolute
+ differences. If \code{x} is already a dissimilarity matrix, then
+ this argument will be ignored.
+ }
+ \item{medoids}{NULL (default) or length-\code{k} vector of integer
+ indices (in \code{1:n}) specifying initial medoids instead of using
+ the \sQuote{\emph{build}} algorithm.}
+ \item{stand}{logical; if true, the measurements in \code{x} are
+ standardized before calculating the dissimilarities. Measurements
+ are standardized for each variable (column), by subtracting the
+ variable's mean value and dividing by the variable's mean absolute
+ deviation. If \code{x} is already a dissimilarity matrix, then this
+ argument will be ignored.}
+ \item{cluster.only}{logical; if true, only the clustering will be
+ computed and returned, see details.}
+ \item{do.swap}{logical indicating if the \bold{swap} phase should
+ happen. The default, \code{TRUE}, correspond to the
+ original algorithm. On the other hand, the \bold{swap} phase is
+ much more computer intensive than the \bold{build} one for large
+ \eqn{n}, so can be skipped by \code{do.swap = FALSE}.}
+ \item{keep.diss, keep.data}{logicals indicating if the dissimilarities
+ and/or input data \code{x} should be kept in the result. Setting
+ these to \code{FALSE} can give much smaller results and hence even save
+ memory allocation \emph{time}.}
+ \item{pamonce}{logical or integer in \code{0:5} specifying algorithmic
+ short cuts as proposed by Reynolds et al. (2006), and
+ Schubert and Rousseeuw (2019) see below.}
+ \item{trace.lev}{integer specifying a trace level for printing
+ diagnostics during the build and swap phase of the algorithm.
+ Default \code{0} does not print anything; higher values print
+ increasingly more.}
+}
+\value{
+ an object of class \code{"pam"} representing the clustering. See
+ \code{?\link{pam.object}} for details.
+}
+\details{
+ The basic \code{pam} algorithm is fully described in chapter 2 of
+ Kaufman and Rousseeuw(1990). Compared to the k-means approach in \code{kmeans}, the
+ function \code{pam} has the following features: (a) it also accepts a
+ dissimilarity matrix; (b) it is more robust because it minimizes a sum
+ of dissimilarities instead of a sum of squared euclidean distances;
+ (c) it provides a novel graphical display, the silhouette plot (see
+ \code{plot.partition}) (d) it allows to select the number of clusters
+ using \code{mean(\link{silhouette}(pr)[, "sil_width"])} on the result
+ \code{pr <- pam(..)}, or directly its component
+ \code{pr$silinfo$avg.width}, see also \code{\link{pam.object}}.
+
+ When \code{cluster.only} is true, the result is simply a (possibly
+ named) integer vector specifying the clustering, i.e.,\cr
+ \code{pam(x,k, cluster.only=TRUE)} is the same as \cr
+ \code{pam(x,k)$clustering} but computed more efficiently.
+
+ The \code{pam}-algorithm is based on the search for \code{k}
+ representative objects or medoids among the observations of the
+ dataset. These observations should represent the structure of the
+ data. After finding a set of \code{k} medoids, \code{k} clusters are
+ constructed by assigning each observation to the nearest medoid. The
+ goal is to find \code{k} representative objects which minimize the sum
+ of the dissimilarities of the observations to their closest
+ representative object.
+ \cr
+ By default, when \code{medoids} are not specified, the algorithm first
+ looks for a good initial set of medoids (this is called the
+ \bold{build} phase). Then it finds a local minimum for the
+ objective function, that is, a solution such that there is no single
+ switch of an observation with a medoid that will decrease the
+ objective (this is called the \bold{swap} phase).
+
+ When the \code{medoids} are specified, their order does \emph{not}
+ matter; in general, the algorithms have been designed to not depend on
+ the order of the observations.
+
+ The \code{pamonce} option, new in cluster 1.14.2 (Jan. 2012), has been
+ proposed by Matthias Studer, University of Geneva, based on the
+ findings by Reynolds et al. (2006) and was extended by Erich Schubert,
+ TU Dortmund, with the FastPAM optimizations.
+
+ The default \code{FALSE} (or integer \code{0}) corresponds to the
+ original \dQuote{swap} algorithm, whereas \code{pamonce = 1} (or
+ \code{TRUE}), corresponds to the first proposal .... %% FIXME
+ and \code{pamonce = 2} additionally implements the second proposal as
+ well. % FIXME more details
+
+ The key ideas of FastPAM (Schubert and Rousseeuw, 2019) are implemented
+ except for the linear approximate build as follows:
+ \describe{
+ \item{\code{pamonce = 3}:}{
+ reduces the runtime by a factor of O(k) by exploiting
+ that points cannot be closest to all current medoids at the same time.}
+ \item{\code{pamonce = 4}:}{ additionally allows executing multiple swaps
+ per iteration, usually reducing the number of iterations.}
+ \item{\code{pamonce = 5}:}{ adds minor optimizations copied from the
+ \code{pamonce = 2} approach, and is expected to be the fastest
+ variant.}
+ }
+}
+\note{
+ For large datasets, \code{pam} may need too much memory or too much
+ computation time since both are \eqn{O(n^2)}. Then,
+ \code{\link{clara}()} is preferable, see its documentation.
+
+ There is hard limit currently, \eqn{n \le 65536}{n <= 65536}, at
+ \eqn{2^{16}} because for larger \eqn{n}, \eqn{n(n-1)/2} is larger than
+ the maximal integer (\code{\link{.Machine}$integer.max} = \eqn{2^{31} - 1}).
+}
+\author{Kaufman and Rousseeuw's orginal Fortran code was translated to C
+ and augmented in several ways, e.g. to allow \code{cluster.only=TRUE}
+ or \code{do.swap=FALSE}, by Martin Maechler.
+ \cr
+ Matthias Studer, Univ.Geneva provided the \code{pamonce} (\code{1} and \code{2})
+ implementation.
+}
+\references{
+%% the pamonce=1,2 options :
+ Reynolds, A., Richards, G., de la Iglesia, B. and Rayward-Smith, V. (1992)
+ Clustering rules: A comparison of partitioning and hierarchical
+ clustering algorithms;
+ \emph{Journal of Mathematical Modelling and Algorithms} \bold{5},
+ 475--504. \doi{10.1007/s10852-005-9022-1}.
+
+%% the pamonce=3,4,5 (FastPAM) options:
+ Erich Schubert and Peter J. Rousseeuw (2019)
+ Faster k-Medoids Clustering:
+ Improving the PAM, CLARA, and CLARANS Algorithms;
+ Preprint, (\url{https://arxiv.org/abs/1810.05691}).
+}
+\seealso{
+ \code{\link{agnes}} for background and references;
+ \code{\link{pam.object}}, \code{\link{clara}}, \code{\link{daisy}},
+ \code{\link{partition.object}}, \code{\link{plot.partition}},
+ \code{\link{dist}}.
+}
+\examples{
+## generate 25 objects, divided into 2 clusters.
+x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)),
+ cbind(rnorm(15,5,0.5), rnorm(15,5,0.5)))
+pamx <- pam(x, 2)
+pamx # Medoids: '7' and '25' ...
+summary(pamx)
+plot(pamx)
+## use obs. 1 & 16 as starting medoids -- same result (typically)
+(p2m <- pam(x, 2, medoids = c(1,16)))
+## no _build_ *and* no _swap_ phase: just cluster all obs. around (1, 16):
+p2.s <- pam(x, 2, medoids = c(1,16), do.swap = FALSE)
+p2.s
+
+p3m <- pam(x, 3, trace = 2)
+## rather stupid initial medoids:
+(p3m. <- pam(x, 3, medoids = 3:1, trace = 1))
+
+\dontshow{
+ ii <- pmatch(c("obj","call"), names(pamx))
+ stopifnot(all.equal(pamx [-ii], p2m [-ii], tolerance=1e-14),
+ all.equal(pamx$objective[2], p2m$objective[2], tolerance=1e-14))
+}
+pam(daisy(x, metric = "manhattan"), 2, diss = TRUE)
+
+data(ruspini)
+## Plot similar to Figure 4 in Stryuf et al (1996)
+\dontrun{plot(pam(ruspini, 4), ask = TRUE)}
+\dontshow{plot(pam(ruspini, 4))}
+}
+\keyword{cluster}
diff --git a/man/pam.object.Rd b/man/pam.object.Rd
new file mode 100644
index 0000000..778cf59
--- /dev/null
+++ b/man/pam.object.Rd
@@ -0,0 +1,80 @@
+\name{pam.object}
+\alias{pam.object}
+\title{Partitioning Around Medoids (PAM) Object}
+\description{
+ The objects of class \code{"pam"} represent a partitioning of a
+ dataset into clusters.
+}
+\section{GENERATION}{
+ These objects are returned from \code{\link{pam}}.}
+\section{METHODS}{
+ The \code{"pam"} class has methods for the following generic functions:
+ \code{print}, \code{summary}.
+}
+\section{INHERITANCE}{
+ The class \code{"pam"} inherits from \code{"partition"}.
+ Therefore, the generic functions \code{plot} and \code{clusplot} can
+ be used on a \code{pam} object.
+}
+\value{
+ A legitimate \code{pam} object is a \code{\link{list}} with the following components:
+ \item{medoids}{
+ the medoids or representative objects of the
+ clusters. If a dissimilarity matrix was given as input to
+ \code{pam}, then a vector of numbers or labels of observations is
+ given, else \code{medoids} is a \code{\link{matrix}} with in each
+ row the coordinates of one medoid.}
+ \item{id.med}{integer vector of \emph{indices} giving the medoid
+ observation numbers.}
+ \item{clustering}{the clustering vector, see \code{\link{partition.object}}.}
+ \item{objective}{the objective function after the first and second
+ step of the \code{pam} algorithm.}
+ \item{isolation}{
+ vector with length equal to the number of clusters, specifying which
+ clusters are isolated clusters (L- or L*-clusters) and which clusters are
+ not isolated.\cr
+ A cluster is an L*-cluster iff its diameter is smaller than its
+ separation. A cluster is an L-cluster iff for each observation i
+ the maximal dissimilarity between i and any other observation of the
+ cluster is smaller than the minimal dissimilarity between i and any
+ observation of another cluster. Clearly each L*-cluster is also an
+ L-cluster.
+ }
+ \item{clusinfo}{
+ matrix, each row gives numerical information for one cluster. These
+ are the cardinality of the cluster (number of observations), the
+ maximal and average dissimilarity between the observations in the
+ cluster and the cluster's medoid, %% FIXME: Now differs from clara.object.Rd:
+ the diameter of the cluster
+ (maximal dissimilarity between two observations of the cluster), and
+ the separation of the cluster (minimal dissimilarity between an
+ observation of the cluster and an observation of another cluster).
+ }
+ \item{silinfo}{list with silhouette width information, see
+ \code{\link{partition.object}}.}
+ \item{diss}{dissimilarity (maybe NULL), see \code{\link{partition.object}}.}
+ \item{call}{generating call, see \code{\link{partition.object}}.}
+ \item{data}{(possibibly standardized) see \code{\link{partition.object}}.}
+}
+\seealso{
+ \code{\link{pam}}, \code{\link{dissimilarity.object}},
+ \code{\link{partition.object}}, \code{\link{plot.partition}}.
+}
+\examples{
+## Use the silhouette widths for assessing the best number of clusters,
+## following a one-dimensional example from Christian Hennig :
+##
+x <- c(rnorm(50), rnorm(50,mean=5), rnorm(30,mean=15))
+asw <- numeric(20)
+## Note that "k=1" won't work!
+for (k in 2:20)
+ asw[k] <- pam(x, k) $ silinfo $ avg.width
+k.best <- which.max(asw)
+cat("silhouette-optimal number of clusters:", k.best, "\n")
+
+plot(1:20, asw, type= "h", main = "pam() clustering assessment",
+ xlab= "k (# clusters)", ylab = "average silhouette width")
+axis(1, k.best, paste("best",k.best,sep="\n"), col = "red", col.axis = "red")
+}
+\keyword{cluster}
+
diff --git a/man/partition.object.Rd b/man/partition.object.Rd
new file mode 100644
index 0000000..b695161
--- /dev/null
+++ b/man/partition.object.Rd
@@ -0,0 +1,68 @@
+\name{partition.object}
+\alias{partition}% == class
+\alias{partition.object}
+\title{Partitioning Object}
+\description{
+ The objects of class \code{"partition"} represent a partitioning of a
+ dataset into clusters.
+}
+\section{GENERATION}{
+ These objects are returned from \code{pam}, \code{clara} or \code{fanny}.
+}
+\section{METHODS}{
+ The \code{"partition"} class has a method for the following generic functions:
+ \code{plot}, \code{clusplot}.
+}
+\section{INHERITANCE}{
+ The following classes inherit from class \code{"partition"} :
+ \code{"pam"}, \code{"clara"} and \code{"fanny"}.
+
+ See \code{\link{pam.object}}, \code{\link{clara.object}} and
+ \code{\link{fanny.object}} for details.
+}
+\value{a \code{"partition"} object is a list with the following
+ (and typically more) components:
+ \item{clustering}{
+ the clustering vector. An integer vector of length \eqn{n}, the number of
+ observations, giving for each observation the number ('id') of the
+ cluster to which it belongs.}
+ \item{call}{the matched \code{\link{call}} generating the object.}
+ \item{silinfo}{
+ a list with all \emph{silhouette} information, only available when
+ the number of clusters is non-trivial, i.e., \eqn{1 < k < n} and
+ then has the following components, see \code{\link{silhouette}}
+ \describe{
+ \item{widths}{an (n x 3) matrix, as returned by
+ \code{\link{silhouette}()}, with for each observation i the
+ cluster to which i belongs, as well as the neighbor cluster of i
+ (the cluster, not containing i, for which the average
+ dissimilarity between its observations and i is minimal), and
+ the silhouette width \eqn{s(i)} of the observation.
+ }
+ \item{clus.avg.widths}{the average silhouette width per cluster.}
+ \item{avg.width}{the average silhouette width for the dataset, i.e.,
+ simply the average of \eqn{s(i)} over all observations \eqn{i}.}
+ }% describe
+ This information is also needed to construct a \emph{silhouette plot} of
+ the clustering, see \code{\link{plot.partition}}.
+
+ Note that \code{avg.width} can be maximized over different
+ clusterings (e.g. with varying number of clusters) to choose an
+ \emph{optimal} clustering.%% see an example or a demo << FIXME >>
+ }
+ \item{objective}{value of criterion maximized during the
+ partitioning algorithm, may more than one entry for different stages.}
+ \item{diss}{
+ an object of class \code{"dissimilarity"}, representing the total
+ dissimilarity matrix of the dataset (or relevant subset, e.g. for
+ \code{clara}).
+ }
+ \item{data}{
+ a matrix containing the original or standardized data. This might
+ be missing to save memory or when a dissimilarity matrix was given
+ as input structure to the clustering method.
+ }
+}
+\seealso{\code{\link{pam}}, \code{\link{clara}}, \code{\link{fanny}}.
+}
+\keyword{cluster}
diff --git a/man/plantTraits.Rd b/man/plantTraits.Rd
new file mode 100644
index 0000000..b903076
--- /dev/null
+++ b/man/plantTraits.Rd
@@ -0,0 +1,97 @@
+\name{plantTraits}
+\alias{plantTraits}
+\title{Plant Species Traits Data}
+\docType{data}
+\encoding{latin1}
+\description{
+ This dataset constitutes a description of 136 plant species
+ according to biological attributes (morphological or reproductive)
+}
+\usage{data(plantTraits)
+}
+\format{
+ A data frame with 136 observations on the following 31 variables.
+ \describe{
+ \item{\code{pdias}}{Diaspore mass (mg)}
+ \item{\code{longindex}}{Seed bank longevity}
+ \item{\code{durflow}}{Flowering duration}
+ \item{\code{height}}{Plant height, an ordered factor with levels
+ \code{1} < \code{2} < \dots < \code{8}.}
+% Plant height}{an ordered factor with levels \code{1} <10cm < \code{2} 10-30cm< \code{3} 30-60cm< \code{4}60-100cm < \code{5}1-3m < \code{6}3-6m < \code{7}:6-15m < \code{8}>15m}
+
+ \item{\code{begflow}}{Time of first flowering, an ordered factor with levels \code{1} < \code{2} < \code{3} < \code{4} < \code{5} < \code{6} < \code{7} < \code{8} < \code{9}}
+% {\code{begflow}}{an ordered factor with levels \code{1} january< \code{2} february< \code{3} march< \code{4}april < \code{5} may< \code{6} june< \code{7} july< \code{8}august < \code{9}september}
+
+ \item{\code{mycor}}{Mycorrhizas, an ordered factor with levels \code{0}never < \code{1} sometimes< \code{2}always}
+
+ \item{\code{vegaer}}{aerial vegetative propagation, an ordered
+ factor with levels \code{0}never < \code{1} present but limited< \code{2}important.}
+
+ \item{\code{vegsout}}{underground vegetative propagation, an ordered
+ factor with 3 levels identical to \code{vegaer} above.}
+
+ \item{\code{autopoll}}{selfing pollination, an ordered factor with
+ levels \code{0}never < \code{1}rare < \code{2} often< the rule\code{3}}
+
+ \item{\code{insects}}{insect pollination, an ordered factor with 5 levels \code{0} < \dots < \code{4}.}
+ \item{\code{wind}}{wind pollination, an ordered factor with 5 levels \code{0} < \dots < \code{4}.}
+ \item{\code{lign}}{a binary factor with levels \code{0:1},
+ indicating if plant is woody.}
+ \item{\code{piq}}{a binary factor indicating if plant is thorny.}
+ \item{\code{ros}}{a binary factor indicating if plant is rosette.}
+ \item{\code{semiros}}{semi-rosette plant, a binary factor (\code{0}:
+ no; \code{1}: yes).}
+ \item{\code{leafy}}{leafy plant, a binary factor.}
+ \item{\code{suman}}{summer annual, a binary factor.}
+ \item{\code{winan}}{winter annual, a binary factor.}
+ \item{\code{monocarp}}{monocarpic perennial, a binary factor.}
+ \item{\code{polycarp}}{polycarpic perennial, a binary factor.}
+ \item{\code{seasaes}}{seasonal aestival leaves, a binary factor.}
+ \item{\code{seashiv}}{seasonal hibernal leaves, a binary factor.}
+ \item{\code{seasver}}{seasonal vernal leaves, a binary factor.}
+ \item{\code{everalw}}{leaves always evergreen, a binary factor.}
+ \item{\code{everparti}}{leaves partially evergreen, a binary factor.}
+ \item{\code{elaio}}{fruits with an elaiosome (dispersed by ants), a binary factor.}
+ \item{\code{endozoo}}{endozoochorous fruits, a binary factor.}
+ \item{\code{epizoo}}{epizoochorous fruits, a binary factor.}
+ \item{\code{aquat}}{aquatic dispersal fruits, a binary factor.}
+ \item{\code{windgl}}{wind dispersed fruits, a binary factor.}
+ \item{\code{unsp}}{unspecialized mechanism of seed dispersal, a binary factor.}
+ }
+}
+\details{
+ Most of factor attributes are not disjunctive. For example, a plant can be usually
+ pollinated by insects but sometimes self-pollination can occured.
+}
+\source{
+ Vallet, Jeanne (2005)
+ \emph{Structuration de communauts vgtales et analyse comparative de
+ traits biologiques le long d'un gradient d'urbanisation}.
+ Mmoire de Master 2 'Ecologie-Biodiversit-Evolution';
+ Universit Paris Sud XI, 30p.+ annexes (in french)
+}
+% \references{
+% ~~ possibly secondary sources and usages ~~
+% }
+\examples{
+data(plantTraits)
+
+## Calculation of a dissimilarity matrix
+library(cluster)
+dai.b <- daisy(plantTraits,
+ type = list(ordratio = 4:11, symm = 12:13, asymm = 14:31))
+
+## Hierarchical classification
+agn.trts <- agnes(dai.b, method="ward")
+plot(agn.trts, which.plots = 2, cex= 0.6)
+plot(agn.trts, which.plots = 1)
+cutree6 <- cutree(agn.trts, k=6)
+cutree6
+
+## Principal Coordinate Analysis
+cmdsdai.b <- cmdscale(dai.b, k=6)
+plot(cmdsdai.b[, 1:2], asp = 1, col = cutree6)
+}
+\keyword{datasets}
+% plant attribute database, mixed type variables, dissimilarity matrix (DAISY), Hierarchical Classification (AGNES)
+% Principal Coordinates Analysis (CMDSCALE)
diff --git a/man/plot.agnes.Rd b/man/plot.agnes.Rd
new file mode 100644
index 0000000..7ee7f5e
--- /dev/null
+++ b/man/plot.agnes.Rd
@@ -0,0 +1,105 @@
+\name{plot.agnes}
+%% almost identical to ./plot.diana.Rd and quite similar to ./plot.mona.Rd
+\alias{plot.agnes}
+\title{Plots of an Agglomerative Hierarchical Clustering}
+\description{
+ Creates plots for visualizing an \code{agnes} object.
+}
+\usage{
+\method{plot}{agnes}(x, ask = FALSE, which.plots = NULL, main = NULL,
+ sub = paste("Agglomerative Coefficient = ",round(x$ac, digits = 2)),
+ adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, \dots)
+}
+\arguments{
+ \item{x}{an object of class \code{"agnes"}, typically created by
+ \code{\link{agnes}(.)}.}
+ \item{ask}{logical; if true and \code{which.plots} is \code{NULL},
+ \code{plot.agnes} operates in interactive mode, via \code{\link{menu}}.}
+ \item{which.plots}{integer vector or NULL (default), the latter
+ producing both plots. Otherwise, \code{which.plots}
+ must contain integers of \code{1} for a \emph{banner} plot or \code{2} for a
+ dendrogram or \dQuote{clustering tree}.}
+ \item{main, sub}{main and sub title for the plot, with convenient
+ defaults. See documentation for these arguments in \code{\link{plot.default}}.}
+ \item{adj}{for label adjustment in \code{\link{bannerplot}()}.}
+ \item{nmax.lab}{integer indicating the number of labels which is
+ considered too large for single-name labelling the banner plot.}
+ \item{max.strlen}{positive integer giving the length to which
+ strings are truncated in banner plot labeling.}
+ \item{xax.pretty}{logical or integer indicating if
+ \code{\link{pretty}(*, n = xax.pretty)} should be used for the x axis.
+ \code{xax.pretty = FALSE} is for back compatibility.}
+ \item{\dots}{graphical parameters (see \code{\link{par}}) may also
+ be supplied and are passed to \code{\link{bannerplot}()} or
+ \code{pltree()} (see \code{\link{pltree.twins}}), respectively.}
+}
+\section{Side Effects}{
+ Appropriate plots are produced on the current graphics device. This can
+ be one or both of the following choices:
+ \cr Banner
+ \cr Clustering tree
+}
+\details{
+ When \code{ask = TRUE}, rather than producing each plot sequentially,
+ \code{plot.agnes} displays a menu listing all the plots that can be produced.
+ If the menu is not desired but a pause between plots is still wanted
+ one must set \code{par(ask= TRUE)} before invoking the plot command.
+
+ The banner displays the hierarchy of clusters, and is equivalent to a tree.
+ See Rousseeuw (1986) or chapter 5 of Kaufman and Rousseeuw (1990).
+ The banner plots distances at which observations and clusters are merged.
+ The observations are listed in the order found by the \code{agnes} algorithm,
+ and the numbers in the \code{height} vector are represented as bars
+ between the observations.
+
+ The leaves of the clustering tree are the original observations. Two
+ branches come together at the distance between the two clusters being merged.
+
+ For more customization of the plots, rather call
+ \code{\link{bannerplot}} and \code{pltree()}, i.e., its method
+ \code{\link{pltree.twins}}, respectively.
+
+ directly with
+ corresponding arguments, e.g., \code{xlab} or \code{ylab}.
+}
+\note{
+ In the banner plot, observation labels are only printed when the
+ number of observations is limited less than \code{nmax.lab} (35, by
+ default), for readability. Moreover, observation labels are truncated
+ to maximally \code{max.strlen} (5) characters.
+
+ For the dendrogram, more flexibility than via \code{pltree()} is
+ provided by \code{dg <- \link{as.dendrogram}(x)} and
+ plotting \code{dg} via \code{\link[stats]{plot.dendrogram}}.
+}
+\references{
+ Kaufman, L. and Rousseeuw, P.J. (1990)
+ \emph{Finding Groups in Data: An Introduction to Cluster Analysis}.
+ Wiley, New York.
+
+ Rousseeuw, P.J. (1986). A visual display for hierarchical classification,
+ in \emph{Data Analysis and Informatics 4}; edited by E. Diday,
+ Y. Escoufier, L. Lebart, J. Pages, Y. Schektman, and R. Tomassone.
+ North-Holland, Amsterdam, 743--748.
+
+ Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997)
+ Integrating Robust Clustering Techniques in S-PLUS,
+ \emph{Computational Statistics and Data Analysis}, \bold{26}, 17--37.
+}
+\seealso{
+ \code{\link{agnes}} and \code{\link{agnes.object}};
+ \code{\link{bannerplot}}, \code{\link{pltree.twins}},
+ and \code{\link{par}}.
+}
+\examples{
+## Can also pass 'labels' to pltree() and bannerplot():
+data(iris)
+cS <- as.character(Sp <- iris$Species)
+cS[Sp == "setosa"] <- "S"
+cS[Sp == "versicolor"] <- "V"
+cS[Sp == "virginica"] <- "g"
+ai <- agnes(iris[, 1:4])
+plot(ai, labels = cS, nmax = 150)# bannerplot labels are mess
+}
+\keyword{cluster}
+\keyword{hplot}
diff --git a/man/plot.diana.Rd b/man/plot.diana.Rd
new file mode 100644
index 0000000..39e09ef
--- /dev/null
+++ b/man/plot.diana.Rd
@@ -0,0 +1,83 @@
+\name{plot.diana}
+%% almost identical to ./plot.agnes.Rd and quite similar to ./plot.mona.Rd
+\alias{plot.diana}
+\title{Plots of a Divisive Hierarchical Clustering}
+\description{
+ Creates plots for visualizing a \code{diana} object.
+}
+\usage{
+\method{plot}{diana}(x, ask = FALSE, which.plots = NULL, main = NULL,
+ sub = paste("Divisive Coefficient = ", round(x$dc, digits = 2)),
+ adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, \dots)
+}
+\arguments{
+ \item{x}{an object of class \code{"diana"}, typically created by
+ \code{\link{diana}(.)}.}
+ \item{ask}{logical; if true and \code{which.plots} is \code{NULL},
+ \code{plot.diana} operates in interactive mode, via \code{\link{menu}}.}
+ \item{which.plots}{integer vector or NULL (default), the latter
+ producing both plots. Otherwise, \code{which.plots}
+ must contain integers of \code{1} for a \emph{banner} plot or \code{2} for a
+ dendrogram or \dQuote{clustering tree}.}
+ \item{main, sub}{main and sub title for the plot, each with a convenient
+ default. See documentation for these arguments in
+ \code{\link{plot.default}}.}
+ \item{adj}{for label adjustment in \code{\link{bannerplot}()}.}
+ \item{nmax.lab}{integer indicating the number of labels which is
+ considered too large for single-name labelling the banner plot.}
+ \item{max.strlen}{positive integer giving the length to which
+ strings are truncated in banner plot labeling.}
+ \item{xax.pretty}{logical or integer indicating if
+ \code{\link{pretty}(*, n = xax.pretty)} should be used for the x axis.
+ \code{xax.pretty = FALSE} is for back compatibility.}
+ \item{\dots}{graphical parameters (see \code{\link{par}}) may also
+ be supplied and are passed to \code{\link{bannerplot}()} or
+ \code{\link{pltree}()}, respectively.}
+}
+\section{Side Effects}{
+ An appropriate plot is produced on the current graphics device. This can
+ be one or both of the following choices:
+ \cr Banner
+ \cr Clustering tree
+}
+\details{
+When \code{ask = TRUE}, rather than producing each plot sequentially,
+\code{plot.diana} displays a menu listing all the plots that can be produced.
+If the menu is not desired but a pause between plots is still wanted
+one must set \code{par(ask= TRUE)} before invoking the plot command.
+
+The banner displays the hierarchy of clusters, and is equivalent to a tree.
+See Rousseeuw (1986) or chapter 6 of Kaufman and Rousseeuw (1990).
+The banner plots the diameter of each cluster being splitted.
+The observations are listed in the order found by the \code{diana}
+algorithm, and the numbers in the \code{height} vector are represented
+as bars between the observations.
+
+The leaves of the clustering tree are the original observations.
+A branch splits up at the diameter of the cluster being splitted.
+}
+\note{
+ In the banner plot,
+ observation labels are only printed when the number of observations is
+ limited less than \code{nmax.lab} (35, by default), for readability.
+ Moreover, observation labels are truncated to maximally
+ \code{max.strlen} (5) characters.
+}
+\references{see those in \code{\link{plot.agnes}}.}
+\seealso{
+ \code{\link{diana}}, \code{\link{diana.object}},
+ \code{\link{twins.object}}, \code{\link{par}}.
+}
+\examples{
+example(diana)# -> dv <- diana(....)
+
+plot(dv, which = 1, nmax.lab = 100)
+
+## wider labels :
+op <- par(mar = par("mar") + c(0, 2, 0,0))
+plot(dv, which = 1, nmax.lab = 100, max.strlen = 12)
+par(op)
+}
+\keyword{cluster}
+\keyword{hplot}
+
diff --git a/man/plot.mona.Rd b/man/plot.mona.Rd
new file mode 100644
index 0000000..57ddf42
--- /dev/null
+++ b/man/plot.mona.Rd
@@ -0,0 +1,54 @@
+\name{plot.mona}
+\alias{plot.mona}
+\title{Banner of Monothetic Divisive Hierarchical Clusterings}
+\description{
+ Creates the banner of a \code{mona} object.
+}
+\usage{
+\method{plot}{mona}(x, main = paste("Banner of ", deparse(x$call)),
+ sub = NULL, xlab = "Separation step",
+ col = c(2,0), axes = TRUE, adj = 0,
+ nmax.lab = 35, max.strlen = 5, \dots)
+}
+\arguments{
+ \item{x}{an object of class \code{"mona"}, typically created by
+ \code{\link{mona}(.)}.}
+ \item{main,sub}{main and sub titles for the plot, with convenient
+ defaults. See documentation in \code{\link{plot.default}}.}
+ \item{xlab}{x axis label, see \code{\link{title}}.}
+ \item{col,adj}{graphical parameters passed to \code{\link{bannerplot}()}.}
+ \item{axes}{logical, indicating if (labeled) axes should be drawn.}
+ \item{nmax.lab}{integer indicating the number of labels which is
+ considered too large for labeling.}
+ \item{max.strlen}{positive integer giving the length to which
+ strings are truncated in labeling.}
+ \item{\dots}{further graphical arguments are passed to
+ \code{\link{bannerplot}()} and \code{\link{text}}.}
+}
+\section{Side Effects}{
+ A banner is plotted on the current graphics device.
+}
+\details{
+ Plots the separation step at which clusters are splitted. The
+ observations are given in the order found by the \code{mona}
+ algorithm, the numbers in the \code{step} vector are represented as
+ bars between the observations.
+
+ When a long bar is drawn between two observations,
+ those observations have the same value for each variable.
+ See chapter 7 of Kaufman and Rousseeuw (1990).
+}
+\note{
+ In the banner plot,
+ observation labels are only printed when the number of observations is
+ limited less than \code{nmax.lab} (35, by default), for readability.
+ Moreover, observation labels are truncated to maximally
+ \code{max.strlen} (5) characters.
+}
+\references{see those in \code{\link{plot.agnes}}.}
+\seealso{
+ \code{\link{mona}}, \code{\link{mona.object}}, \code{\link{par}}.
+}
+\keyword{cluster}
+\keyword{hplot}
+% Converted by Sd2Rd version 0.3-2.
diff --git a/man/plot.partition.Rd b/man/plot.partition.Rd
new file mode 100644
index 0000000..01592b2
--- /dev/null
+++ b/man/plot.partition.Rd
@@ -0,0 +1,104 @@
+\name{plot.partition}
+\alias{plot.partition}
+\title{Plot of a Partition of the Data Set}
+\description{Creates plots for visualizing a \code{partition} object.}
+\usage{
+\method{plot}{partition}(x, ask = FALSE, which.plots = NULL,
+ nmax.lab = 40, max.strlen = 5, data = x$data, dist = NULL,
+ stand = FALSE, lines = 2,
+ shade = FALSE, color = FALSE, labels = 0, plotchar = TRUE,
+ span = TRUE, xlim = NULL, ylim = NULL, main = NULL, \dots)
+}
+\arguments{
+ \item{x}{an object of class \code{"partition"}, typically created by the
+ functions \code{\link{pam}}, \code{\link{clara}}, or \code{\link{fanny}}.}
+ \item{ask}{logical; if true and \code{which.plots} is \code{NULL},
+ \code{plot.partition} operates in interactive mode, via \code{\link{menu}}.}
+ \item{which.plots}{integer vector or NULL (default), the latter
+ producing both plots. Otherwise, \code{which.plots} must contain
+ integers of \code{1} for a \emph{clusplot} or \code{2} for
+ \emph{silhouette}.}
+ \item{nmax.lab}{integer indicating the number of labels which is
+ considered too large for single-name labeling the silhouette plot.}
+ \item{max.strlen}{positive integer giving the length to which
+ strings are truncated in silhouette plot labeling.}
+ \item{data}{numeric matrix with the scaled data; per default taken
+ from the partition object \code{x}, but can be specified explicitly.}
+ \item{dist}{when \code{x} does not have a \code{diss} component as for
+ \code{\link{pam}(*, keep.diss=FALSE)}, \code{dist} must be the
+ dissimilarity if a clusplot is desired.}
+ \item{stand,lines,shade,color,labels,plotchar,span,xlim,ylim,main, \dots}{
+ All optional arguments available for the \code{\link{clusplot.default}}
+ function (except for the \code{diss} one) and graphical parameters
+ (see \code{\link{par}}) may also be supplied as arguments to this function.}
+}
+\section{Side Effects}{
+ An appropriate plot is produced on the current graphics device. This
+ can be one or both of the following choices:
+ \cr Clusplot
+ \cr Silhouette plot
+}
+\details{
+ When \code{ask= TRUE}, rather than producing each plot sequentially,
+ \code{plot.partition} displays a menu listing all the plots that can
+ be produced.
+ If the menu is not desired but a pause between plots is still wanted,
+ call \code{par(ask= TRUE)} before invoking the plot command.
+
+ The \emph{clusplot} of a cluster partition consists of a two-dimensional
+ representation of the observations, in which the clusters are
+ indicated by ellipses (see \code{\link{clusplot.partition}} for more
+ details).
+
+ The \emph{silhouette plot} of a nonhierarchical clustering is fully
+ described in Rousseeuw (1987) and in chapter 2 of Kaufman and
+ Rousseeuw (1990).
+ For each observation i, a bar is drawn, representing its silhouette
+ width s(i), see \code{\link{silhouette}} for details.
+ Observations are grouped per cluster, starting with cluster 1 at the
+ top. Observations with a large s(i) (almost 1) are very well
+ clustered, a small s(i) (around 0) means that the observation lies
+ between two clusters, and observations with a negative s(i) are
+ probably placed in the wrong cluster.
+
+ A clustering can be performed for several values of \code{k} (the number of
+ clusters). Finally, choose the value of \code{k} with the largest overall
+ average silhouette width.
+}
+\note{
+ In the silhouette plot, observation labels are only printed when the
+ number of observations is less than \code{nmax.lab} (40, by default),
+ for readability. Moreover, observation labels are truncated to
+ maximally \code{max.strlen} (5) characters. \cr
+ For more flexibility, use \code{plot(silhouette(x), ...)}, see
+ \code{\link{plot.silhouette}}.
+}
+\references{
+ Rousseeuw, P.J. (1987)
+ Silhouettes: A graphical aid to the interpretation and validation of
+ cluster analysis. \emph{J. Comput. Appl. Math.}, \bold{20}, 53--65.
+
+ Further, the references in \code{\link{plot.agnes}}.
+}
+\seealso{
+ \code{\link{partition.object}}, \code{\link{clusplot.partition}},
+ \code{\link{clusplot.default}}, \code{\link{pam}},
+ \code{\link{pam.object}}, \code{\link{clara}},
+ \code{\link{clara.object}}, \code{\link{fanny}},
+ \code{\link{fanny.object}}, \code{\link{par}}.
+}
+\examples{
+## generate 25 objects, divided into 2 clusters.
+x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)),
+ cbind(rnorm(15,5,0.5), rnorm(15,5,0.5)))
+plot(pam(x, 2))
+
+## Save space not keeping data in clus.object, and still clusplot() it:
+data(xclara)
+cx <- clara(xclara, 3, keep.data = FALSE)
+cx$data # is NULL
+plot(cx, data = xclara)
+}
+\keyword{cluster}
+\keyword{hplot}
+% Converted by Sd2Rd version 0.3-2.
diff --git a/man/pltree.Rd b/man/pltree.Rd
new file mode 100644
index 0000000..3bd8396
--- /dev/null
+++ b/man/pltree.Rd
@@ -0,0 +1,65 @@
+\name{pltree}
+\alias{pltree}
+\alias{pltree.twins}
+\title{Plot Clustering Tree of a Hierarchical Clustering}
+\description{
+ \code{pltree()} Draws a clustering tree (\dQuote{dendrogram}) on the
+ current graphics device. We provide the \code{twins} method draws the
+ tree of a \code{twins} object, i.e., hierarchical clustering,
+ typically resulting from \code{\link{agnes}()} or \code{\link{diana}()}.
+}
+\usage{
+pltree(x, \dots)
+\method{pltree}{twins}(x, main = paste("Dendrogram of ", deparse(x$call)),
+ labels = NULL, ylab = "Height", \dots)
+}
+\arguments{
+ \item{x}{in general, an \R object for which a \code{pltree} method is
+ defined; specifically, an object of class \code{"twins"}, typically
+ created by either \code{\link{agnes}()} or \code{\link{diana}()}.}
+ \item{main}{main title with a sensible default.}
+ \item{labels}{labels to use; the default is constructed from \code{x}.}
+ \item{ylab}{label for y-axis.}
+ \item{\dots}{graphical parameters (see \code{\link{par}}) may also
+ be supplied as arguments to this function.}
+}
+\value{
+ a NULL value is returned.
+}
+\details{
+ Creates a plot of a clustering tree given a \code{twins} object. The
+ leaves of the tree are the original observations. In case of an
+ agglomerative clustering, two branches come together at the distance
+ between the two clusters being merged. For a divisive clustering, a
+ branch splits up at the diameter of the cluster being splitted.
+
+ Note that currently the method function simply calls
+ \code{plot(\link[stats]{as.hclust}(x), ...)}, which dispatches to
+ \code{\link{plot.hclust}(..)}. If more flexible plots are needed,
+ consider \code{xx <- \link{as.dendrogram}(\link{as.hclust}(x))} and plotting
+ \code{xx}, see \code{\link{plot.dendrogram}}.
+}
+\seealso{
+ \code{\link{agnes}}, \code{\link{agnes.object}}, \code{\link{diana}},
+ \code{\link{diana.object}}, \code{\link{hclust}}, \code{\link{par}},
+ \code{\link{plot.agnes}}, \code{\link{plot.diana}}.
+}
+\examples{
+data(votes.repub)
+agn <- agnes(votes.repub)
+pltree(agn)
+
+dagn <- as.dendrogram(as.hclust(agn))
+dagn2 <- as.dendrogram(as.hclust(agn), hang = 0.2)
+op <- par(mar = par("mar") + c(0,0,0, 2)) # more space to the right
+plot(dagn2, horiz = TRUE)
+plot(dagn, horiz = TRUE, center = TRUE,
+ nodePar = list(lab.cex = 0.6, lab.col = "forest green", pch = NA),
+ main = deparse(agn$call))
+par(op)
+}
+\keyword{cluster}
+\keyword{hplot}
+
+
+
diff --git a/man/pluton.Rd b/man/pluton.Rd
new file mode 100644
index 0000000..18faa8d
--- /dev/null
+++ b/man/pluton.Rd
@@ -0,0 +1,50 @@
+\name{pluton}
+\alias{pluton}
+\title{Isotopic Composition Plutonium Batches}
+\usage{data(pluton)}
+\description{
+ The \code{pluton} data frame has 45 rows and 4 columns,
+ containing percentages of isotopic composition of 45 Plutonium
+ batches.
+}
+\format{
+ This data frame contains the following columns:
+ \describe{
+ \item{Pu238}{the percentages of \eqn{\ ^{238}Pu}{(238)Pu},
+ always less than 2 percent.}
+ \item{Pu239}{the percentages of \eqn{\ ^{239}Pu}{(239)Pu},
+ typically between 60 and 80 percent (from neutron capture of Uranium,
+ \eqn{\ ^{238}U}{(238)U}).}
+ \item{Pu240}{percentage of the plutonium 240 isotope.}
+ \item{Pu241}{percentage of the plutonium 241 isotope.}
+ }
+}
+\details{
+ Note that the percentage of plutonium~242 can be computed from the
+ other four percentages, see the examples.
+
+ In the reference below it is explained why it is very desirable to
+ combine these plutonium patches in three groups of similar size.
+}
+\source{
+ Available as \file{pluton.dat} from the archive of the University of Antwerpen,
+ %% originally at
+ %% \url{http://win-www.uia.ac.be/u/statis/datasets/clusplot-examples.tar.gz},
+ %% no longer: Jan.2015:
+ %% currently \url{http://www.agoras.ua.ac.be/datasets/clusplot-examples.tar.gz}.
+ \file{..../datasets/clusplot-examples.tar.gz}, no longer available.
+}
+\references{
+ Rousseeuw, P.J. and Kaufman, L and Trauwaert, E. (1996)
+ Fuzzy clustering using scatter matrices,
+ \emph{Computational Statistics and Data Analysis} \bold{23}(1), 135--151.
+}
+\examples{
+data(pluton)
+
+hist(apply(pluton,1,sum), col = "gray") # between 94\% and 100\%
+pu5 <- pluton
+pu5$Pu242 <- 100 - apply(pluton,1,sum) # the remaining isotope.
+pairs(pu5)
+}
+\keyword{datasets}
diff --git a/man/predict.ellipsoid.Rd b/man/predict.ellipsoid.Rd
new file mode 100644
index 0000000..c4781d3
--- /dev/null
+++ b/man/predict.ellipsoid.Rd
@@ -0,0 +1,60 @@
+\name{predict.ellipsoid}
+\alias{predict.ellipsoid}
+\alias{ellipsoidPoints}
+\title{Predict Method for Ellipsoid Objects}
+\description{
+ Compute points on the ellipsoid boundary, mostly for drawing.
+}
+\usage{
+% method *and* stand alone function on purpose :
+predict.ellipsoid(object, n.out=201, \dots)
+\method{predict}{ellipsoid}(object, n.out=201, \dots)
+ellipsoidPoints(A, d2, loc, n.half = 201)
+}
+\arguments{
+ \item{object}{an object of class \code{ellipsoid}, typically from
+ \code{\link{ellipsoidhull}()}; alternatively any list-like object
+ with proper components, see details below.}
+ \item{n.out, n.half}{half the number of points to create.}
+ \item{A, d2, loc}{arguments of the auxilary \code{ellipsoidPoints},
+ see below.}
+ \item{\dots}{passed to and from methods.}
+}
+\details{
+ Note \code{ellipsoidPoints} is the workhorse function of
+ \code{predict.ellipsoid} a standalone function and method for
+ \code{ellipsoid} objects, see \code{\link{ellipsoidhull}}.
+ The class of \code{object} is not checked; it must solely have valid
+ components \code{loc} (length \eqn{p}), the \eqn{p \times p}{p x p}
+ matrix \code{cov} (corresponding to \code{A}) and \code{d2} for the
+ center, the shape (\dQuote{covariance}) matrix and the squared average
+ radius (or distance) or \code{\link{qchisq}(*, p)} quantile.
+
+ Unfortunately, this is only implemented for \eqn{p = 2}, currently;
+ contributions for \eqn{p \ge 3}{p >= 3} are \emph{very welcome}.
+}
+\value{
+ a numeric matrix of dimension \code{2*n.out} times \eqn{p}.
+}
+\seealso{\code{\link{ellipsoidhull}}, \code{\link{volume.ellipsoid}}.
+}
+\examples{
+ ## see also example(ellipsoidhull)
+
+## Robust vs. L.S. covariance matrix
+set.seed(143)
+x <- rt(200, df=3)
+y <- 3*x + rt(200, df=2)
+plot(x,y, main="non-normal data (N=200)")
+mtext("with classical and robust cov.matrix ellipsoids")
+X <- cbind(x,y)
+C.ls <- cov(X) ; m.ls <- colMeans(X)
+d2.99 <- qchisq(0.99, df = 2)
+lines(ellipsoidPoints(C.ls, d2.99, loc=m.ls), col="green")
+if(require(MASS)) {
+ Cxy <- cov.rob(cbind(x,y))
+ lines(ellipsoidPoints(Cxy$cov, d2 = d2.99, loc=Cxy$center), col="red")
+}# MASS
+}
+\keyword{dplot}
+\keyword{utilities}
diff --git a/man/print.agnes.Rd b/man/print.agnes.Rd
new file mode 100644
index 0000000..087bbe8
--- /dev/null
+++ b/man/print.agnes.Rd
@@ -0,0 +1,24 @@
+\name{print.agnes}
+\alias{print.agnes}
+\title{Print Method for AGNES Objects}
+\description{
+ Prints the call, agglomerative coefficient, ordering of objects and
+ distances between merging clusters ('Height') of an \code{agnes} object.
+
+ This is a method for the generic \code{\link{print}()} function for objects
+ inheriting from class \code{agnes}, see \code{\link{agnes.object}}.
+}
+\usage{
+\method{print}{agnes}(x, \dots)
+}
+\arguments{
+ \item{x}{an agnes object.}
+ \item{\dots}{potential further arguments (required by generic).}
+}
+\seealso{
+ \code{\link{summary.agnes}} producing more output;
+ \code{\link{agnes}}, \code{\link{agnes.object}}, \code{\link{print}},
+ \code{\link{print.default}}.
+}
+\keyword{cluster}
+\keyword{print}
diff --git a/man/print.clara.Rd b/man/print.clara.Rd
new file mode 100644
index 0000000..ccde9ea
--- /dev/null
+++ b/man/print.clara.Rd
@@ -0,0 +1,25 @@
+\name{print.clara}
+\alias{print.clara}
+\title{Print Method for CLARA Objects}
+\description{
+ Prints the best sample, medoids, clustering vector and objective function
+ of \code{clara} object.
+
+ This is a method for the function \code{\link{print}()} for objects
+ inheriting from class \code{\link{clara}}.
+}
+\usage{
+\method{print}{clara}(x, \dots)
+}
+\arguments{
+ \item{x}{a clara object.}
+ \item{\dots}{potential further arguments (require by generic).}
+}
+\seealso{
+ \code{\link{summary.clara}} producing more output;
+ \code{\link{clara}}, \code{\link{clara.object}}, \code{\link{print}},
+ \code{\link{print.default}}.
+}
+\keyword{cluster}
+\keyword{print}
+% Converted by Sd2Rd version 0.3-2.
diff --git a/man/print.diana.Rd b/man/print.diana.Rd
new file mode 100644
index 0000000..9cea46c
--- /dev/null
+++ b/man/print.diana.Rd
@@ -0,0 +1,24 @@
+\name{print.diana}
+\alias{print.diana}
+\title{Print Method for DIANA Objects}
+\description{
+ Prints the ordering of objects, diameters of splitted clusters,
+ and divisive coefficient of a \code{diana} object.
+
+ This is a method for the function \code{\link{print}()} for objects
+ inheriting from class \code{\link{diana}}.
+}
+\usage{
+\method{print}{diana}(x, \dots)
+}
+\arguments{
+ \item{x}{a diana object.}
+ \item{\dots}{potential further arguments (require by generic).}
+}
+\seealso{
+ \code{\link{diana}}, \code{\link{diana.object}}, \code{\link{print}},
+ \code{\link{print.default}}.
+}
+\keyword{cluster}
+\keyword{print}
+
diff --git a/man/print.dissimilarity.Rd b/man/print.dissimilarity.Rd
new file mode 100644
index 0000000..92d04fc
--- /dev/null
+++ b/man/print.dissimilarity.Rd
@@ -0,0 +1,44 @@
+\title{Print and Summary Methods for Dissimilarity Objects}
+\name{print.dissimilarity}
+\alias{print.dissimilarity}
+\alias{summary.dissimilarity}
+\alias{print.summary.dissimilarity}
+\description{
+ Print or summarize the distances and the attributes of a
+ \code{dissimilarity} object.
+
+ These are methods for the functions \code{print()} and \code{summary()} for
+ \code{dissimilarity} objects. See \code{print}, \code{print.default},
+ or \code{summary} for the general behavior of these.
+}
+\usage{
+\method{print}{dissimilarity}(x, diag = NULL, upper = NULL,
+ digits = getOption("digits"), justify = "none", right = TRUE, \dots)
+\method{summary}{dissimilarity}(object,
+ digits = max(3, getOption("digits") - 2), \dots)
+\method{print}{summary.dissimilarity}(x, \dots)
+}
+\arguments{
+ \item{x, object}{a \code{dissimilarity} object or a
+ \code{summary.dissimilarity} one for \code{print.summary.dissimilarity()}.}
+ \item{digits}{the number of digits to use, see \code{\link{print.default}}.}
+ \item{diag, upper, justify, right}{optional arguments specifying how
+ the triangular dissimilarity matrix is printed; see
+ \code{\link[stats]{print.dist}}.}
+ \item{\dots}{potential further arguments (require by generic).}
+}
+\seealso{
+ \code{\link{daisy}}, \code{\link{dissimilarity.object}},
+ \code{\link{print}}, \code{\link{print.default}}, \code{\link{print.dist}}.
+}
+\examples{
+ ## See example(daisy)
+
+ sd <- summary(daisy(matrix(rnorm(100), 20,5)))
+ sd # -> print.summary.dissimilarity(.)
+ str(sd)
+}
+\keyword{cluster}
+\keyword{print}
+
+
diff --git a/man/print.fanny.Rd b/man/print.fanny.Rd
new file mode 100644
index 0000000..fe5b0d3
--- /dev/null
+++ b/man/print.fanny.Rd
@@ -0,0 +1,29 @@
+\name{print.fanny}
+\alias{print.fanny}
+\alias{summary.fanny}
+\alias{print.summary.fanny}
+\title{Print and Summary Methods for FANNY Objects}
+\description{
+ Prints the objective function, membership coefficients and clustering vector
+ of \code{fanny} object.
+
+ This is a method for the function \code{\link{print}()} for objects
+ inheriting from class \code{\link{fanny}}.
+}
+\usage{
+\method{print}{fanny}(x, digits = getOption("digits"), \dots)
+\method{summary}{fanny}(object, \dots)
+\method{print}{summary.fanny}(x, digits = getOption("digits"), \dots)
+}
+\arguments{
+ \item{x, object}{a \code{\link{fanny}} object.}
+ \item{digits}{number of significant digits for printing, see
+ \code{\link{print.default}}.}
+ \item{\dots}{potential further arguments (required by generic).}
+}
+\seealso{
+ \code{\link{fanny}}, \code{\link{fanny.object}}, \code{\link{print}},
+ \code{\link{print.default}}.
+}
+\keyword{cluster}
+\keyword{print}
diff --git a/man/print.mona.Rd b/man/print.mona.Rd
new file mode 100644
index 0000000..d8bd67c
--- /dev/null
+++ b/man/print.mona.Rd
@@ -0,0 +1,23 @@
+\name{print.mona}
+\alias{print.mona}
+\title{Print Method for MONA Objects}
+\description{
+ Prints the ordering of objects, separation steps, and used variables
+ of a \code{mona} object.
+
+ This is a method for the function \code{\link{print}()} for objects
+ inheriting from class \code{\link{mona}}.
+}
+\usage{
+\method{print}{mona}(x, \dots)
+}
+\arguments{
+ \item{x}{a mona object.}
+ \item{\dots}{potential further arguments (require by generic).}
+}
+\seealso{
+ \code{\link{mona}}, \code{\link{mona.object}}, \code{\link{print}},
+ \code{\link{print.default}}.
+}
+\keyword{cluster}
+\keyword{print}
diff --git a/man/print.pam.Rd b/man/print.pam.Rd
new file mode 100644
index 0000000..4b427f8
--- /dev/null
+++ b/man/print.pam.Rd
@@ -0,0 +1,23 @@
+\name{print.pam}
+\alias{print.pam}
+\title{Print Method for PAM Objects}
+\description{
+ Prints the medoids, clustering vector and objective function
+ of \code{pam} object.
+
+ This is a method for the function \code{\link{print}()} for objects
+ inheriting from class \code{\link{pam}}.
+}
+\usage{
+\method{print}{pam}(x, \dots)
+}
+\arguments{
+ \item{x}{a pam object.}
+ \item{\dots}{potential further arguments (require by generic).}
+}
+\seealso{
+ \code{\link{pam}}, \code{\link{pam.object}}, \code{\link{print}},
+ \code{\link{print.default}}.
+}
+\keyword{cluster}
+\keyword{print}
diff --git a/man/ruspini.Rd b/man/ruspini.Rd
new file mode 100644
index 0000000..7a5abe2
--- /dev/null
+++ b/man/ruspini.Rd
@@ -0,0 +1,31 @@
+\name{ruspini}
+\alias{ruspini}
+\title{Ruspini Data}
+\usage{data(ruspini)}
+\description{
+ The Ruspini data set, consisting of 75 points in four groups that is
+ popular for illustrating clustering techniques.
+}
+\format{
+ A data frame with 75 observations on 2 variables giving the x and y
+ coordinates of the points, respectively.
+}
+\source{
+ E. H. Ruspini (1970)
+ Numerical methods for fuzzy clustering.
+ \emph{Inform. Sci.} \bold{2}, 319--350.
+}
+\references{
+ see those in \code{\link{agnes}}.
+}
+\examples{
+data(ruspini)
+
+## Plot similar to Figure 4 in Stryuf et al (1996)
+\dontrun{plot(pam(ruspini, 4), ask = TRUE)}
+\dontshow{plot(pam(ruspini, 4))}
+
+## Plot similar to Figure 6 in Stryuf et al (1996)
+plot(fanny(ruspini, 5))
+}
+\keyword{datasets}
diff --git a/man/silhouette.Rd b/man/silhouette.Rd
new file mode 100644
index 0000000..235bb69
--- /dev/null
+++ b/man/silhouette.Rd
@@ -0,0 +1,193 @@
+\name{silhouette}
+\alias{silhouette}
+\alias{silhouette.clara}
+\alias{silhouette.default}
+\alias{silhouette.partition}
+\alias{sortSilhouette}
+\alias{summary.silhouette}
+\alias{print.summary.silhouette}
+\alias{plot.silhouette}
+\title{Compute or Extract Silhouette Information from Clustering}
+\description{
+ Compute silhouette information according to a given clustering in
+ \eqn{k} clusters.
+}
+\usage{
+silhouette(x, \dots)
+\method{silhouette}{default} (x, dist, dmatrix, \dots)
+\method{silhouette}{partition}(x, \dots)
+\method{silhouette}{clara}(x, full = FALSE, \dots)
+
+sortSilhouette(object, \dots)
+\method{summary}{silhouette}(object, FUN = mean, \dots)
+\method{plot}{silhouette}(x, nmax.lab = 40, max.strlen = 5,
+ main = NULL, sub = NULL, xlab = expression("Silhouette width "* s[i]),
+ col = "gray", do.col.sort = length(col) > 1, border = 0,
+ cex.names = par("cex.axis"), do.n.k = TRUE, do.clus.stat = TRUE, \dots)
+}
+\arguments{
+ \item{x}{an object of appropriate class; for the \code{default}
+ method an integer vector with \eqn{k} different integer cluster
+ codes or a list with such an \code{x$clustering}
+ component. Note that silhouette statistics are only defined if
+ \eqn{2 \le k \le n-1}{2 <= k <= n-1}.}
+ \item{dist}{a dissimilarity object inheriting from class
+ \code{\link{dist}} or coercible to one. If not specified,
+ \code{dmatrix} must be.}
+ \item{dmatrix}{a symmetric dissimilarity matrix (\eqn{n \times n}{n x n}),
+ specified instead of \code{dist}, which can be more efficient.}
+ \item{full}{logical specifying if a \emph{full} silhouette should be
+ computed for \code{\link{clara}} object. Note that this requires
+ \eqn{O(n^2)} memory, since the full dissimilarity (see
+ \code{\link{daisy}}) is needed internally.}
+ \item{object}{an object of class \code{silhouette}.}
+ \item{\dots}{further arguments passed to and from methods.}
+ \item{FUN}{function used to summarize silhouette widths.}
+ \item{nmax.lab}{integer indicating the number of labels which is
+ considered too large for single-name labeling the silhouette plot.}
+ \item{max.strlen}{positive integer giving the length to which
+ strings are truncated in silhouette plot labeling.}
+ \item{main, sub, xlab}{arguments to \code{\link{title}}; have a
+ sensible non-NULL default here.}
+ \item{col, border, cex.names}{arguments passed
+ \code{\link{barplot}()}; note that the default used to be \code{col
+ = heat.colors(n), border = par("fg")} instead.\cr
+ \code{col} can also be a color vector of length \eqn{k} for
+ clusterwise coloring, see also \code{do.col.sort}:
+ }
+ \item{do.col.sort}{logical indicating if the colors \code{col} should
+ be sorted \dQuote{along} the silhouette; this is useful for casewise or
+ clusterwise coloring.}
+ \item{do.n.k}{logical indicating if \eqn{n} and \eqn{k} \dQuote{title text}
+ should be written.}
+ \item{do.clus.stat}{logical indicating if cluster size and averages
+ should be written right to the silhouettes.}
+}
+\details{
+ For each observation i, the \emph{silhouette width} \eqn{s(i)} is
+ defined as follows: \cr
+ Put a(i) = average dissimilarity between i and all other points of the
+ cluster to which i belongs (if i is the \emph{only} observation in
+ its cluster, \eqn{s(i) := 0} without further calculations).
+ For all \emph{other} clusters C, put \eqn{d(i,C)} = average
+ dissimilarity of i to all observations of C. The smallest of these
+ \eqn{d(i,C)} is \eqn{b(i) := \min_C d(i,C)},
+ and can be seen as the dissimilarity between i and its \dQuote{neighbor}
+ cluster, i.e., the nearest one to which it does \emph{not} belong.
+ Finally, \deqn{s(i) := \frac{b(i) - a(i) }{max(a(i), b(i))}.}{%
+ s(i) := ( b(i) - a(i) ) / max( a(i), b(i) ).}
+
+ \code{silhouette.default()} is now based on C code donated by Romain
+ Francois (the R version being still available as
+ \code{cluster:::silhouette.default.R}).
+
+ Observations with a large \eqn{s(i)} (almost 1) are very well
+ clustered, a small \eqn{s(i)} (around 0) means that the observation
+ lies between two clusters, and observations with a negative
+ \eqn{s(i)} are probably placed in the wrong cluster.
+}
+\note{
+ While \code{silhouette()} is \emph{intrinsic} to the
+ \code{\link{partition}} clusterings, and hence has a (trivial) method
+ for these, it is straightforward to get silhouettes from hierarchical
+ clusterings from \code{silhouette.default()} with
+ \code{\link{cutree}()} and distance as input.
+
+ By default, for \code{\link{clara}()} partitions, the silhouette is
+ just for the best random \emph{subset} used. Use \code{full = TRUE}
+ to compute (and later possibly plot) the full silhouette.
+}
+\value{
+ \code{silhouette()} returns an object, \code{sil}, of class
+ \code{silhouette} which is an \eqn{n \times 3}{n x 3} matrix with
+ attributes. For each observation i, \code{sil[i,]} contains the
+ cluster to which i belongs as well as the neighbor cluster of i (the
+ cluster, not containing i, for which the average dissimilarity between its
+ observations and i is minimal), and the silhouette width \eqn{s(i)} of
+ the observation. The \code{\link{colnames}} correspondingly are
+ \code{c("cluster", "neighbor", "sil_width")}.
+
+ \code{summary(sil)} returns an object of class
+ \code{summary.silhouette}, a list with components
+ \describe{
+ \item{\code{si.summary}:}{numerical \code{\link{summary}} of the
+ individual silhouette widths \eqn{s(i)}.}
+ \item{\code{clus.avg.widths}:}{numeric (rank 1) array of clusterwise
+ \emph{means} of silhouette widths where \code{mean = FUN} is used.}
+ \item{\code{avg.width}:}{the total mean \code{FUN(s)} where
+ \code{s} are the individual silhouette widths.}
+ \item{\code{clus.sizes}:}{\code{\link{table}} of the \eqn{k} cluster sizes.}
+ \item{\code{call}:}{if available, the \code{\link{call}} creating \code{sil}.}
+ \item{\code{Ordered}:}{logical identical to \code{attr(sil, "Ordered")},
+ see below.}
+ }
+
+ \code{sortSilhouette(sil)} orders the rows of \code{sil} as in the
+ silhouette plot, by cluster (increasingly) and decreasing silhouette
+ width \eqn{s(i)}.
+ \cr
+ \code{attr(sil, "Ordered")} is a logical indicating if \code{sil} \emph{is}
+ ordered as by \code{sortSilhouette()}. In that case,
+ \code{rownames(sil)} will contain case labels or numbers, and \cr
+ \code{attr(sil, "iOrd")} the ordering index vector.
+}
+\references{
+ Rousseeuw, P.J. (1987)
+ Silhouettes: A graphical aid to the interpretation and validation of
+ cluster analysis. \emph{J. Comput. Appl. Math.}, \bold{20}, 53--65.
+
+ chapter 2 of Kaufman and Rousseeuw (1990), see
+ the references in \code{\link{plot.agnes}}.
+}
+\seealso{\code{\link{partition.object}}, \code{\link{plot.partition}}.
+}
+\examples{
+data(ruspini)
+pr4 <- pam(ruspini, 4)
+str(si <- silhouette(pr4))
+(ssi <- summary(si))
+plot(si) # silhouette plot
+plot(si, col = c("red", "green", "blue", "purple"))# with cluster-wise coloring
+
+si2 <- silhouette(pr4$clustering, dist(ruspini, "canberra"))
+summary(si2) # has small values: "canberra"'s fault
+plot(si2, nmax= 80, cex.names=0.6)
+
+op <- par(mfrow= c(3,2), oma= c(0,0, 3, 0),
+ mgp= c(1.6,.8,0), mar= .1+c(4,2,2,2))
+for(k in 2:6)
+ plot(silhouette(pam(ruspini, k=k)), main = paste("k = ",k), do.n.k=FALSE)
+mtext("PAM(Ruspini) as in Kaufman & Rousseeuw, p.101",
+ outer = TRUE, font = par("font.main"), cex = par("cex.main")); frame()
+
+## the same with cluster-wise colours:
+c6 <- c("tomato", "forest green", "dark blue", "purple2", "goldenrod4", "gray20")
+for(k in 2:6)
+ plot(silhouette(pam(ruspini, k=k)), main = paste("k = ",k), do.n.k=FALSE,
+ col = c6[1:k])
+par(op)
+
+## clara(): standard silhouette is just for the best random subset
+data(xclara)
+set.seed(7)
+str(xc1k <- xclara[ sample(nrow(xclara), size = 1000) ,]) # rownames == indices
+cl3 <- clara(xc1k, 3)
+plot(silhouette(cl3))# only of the "best" subset of 46
+## The full silhouette: internally needs large (36 MB) dist object:
+sf <- silhouette(cl3, full = TRUE) ## this is the same as
+s.full <- silhouette(cl3$clustering, daisy(xc1k))
+stopifnot(all.equal(sf, s.full, check.attributes = FALSE, tolerance = 0))
+## color dependent on original "3 groups of each 1000": % __FIXME ??__
+plot(sf, col = 2+ as.integer(names(cl3$clustering) ) \%/\% 1000,
+ main ="plot(silhouette(clara(.), full = TRUE))")
+
+## Silhouette for a hierarchical clustering:
+ar <- agnes(ruspini)
+si3 <- silhouette(cutree(ar, k = 5), # k = 4 gave the same as pam() above
+ daisy(ruspini))
+plot(si3, nmax = 80, cex.names = 0.5)
+## 2 groups: Agnes() wasn't too good:
+si4 <- silhouette(cutree(ar, k = 2), daisy(ruspini))
+plot(si4, nmax = 80, cex.names = 0.5)
+}
+\keyword{cluster}
diff --git a/man/sizeDiss.Rd b/man/sizeDiss.Rd
new file mode 100644
index 0000000..8f78146
--- /dev/null
+++ b/man/sizeDiss.Rd
@@ -0,0 +1,32 @@
+\name{sizeDiss}
+\alias{sizeDiss}
+\title{Sample Size of Dissimilarity Like Object}
+\description{
+ Returns the number of observations (\emph{sample size}) corresponding
+ to a dissimilarity like object, or equivalently,
+ the number of rows or columns of a matrix
+ when only the lower or upper triangular part (without diagonal) is given.
+
+ It is nothing else but the inverse function of \eqn{f(n) = n(n-1)/2}.
+}
+\usage{
+sizeDiss(d)
+}
+\arguments{
+ \item{d}{any \R object with length (typically) \eqn{n(n-1)/2}.}
+}
+\value{
+ a number; \eqn{n} if \code{length(d) == n(n-1)/2}, \code{NA} otherwise.
+}
+\seealso{\code{\link{dissimilarity.object}} and also
+ \code{\link{as.dist}} for class \code{dissimilarity} and
+ \code{dist} objects which have a \code{Size} attribute.}
+\examples{
+sizeDiss(1:10)# 5, since 10 == 5 * (5 - 1) / 2
+sizeDiss(1:9) # NA
+
+n <- 1:100
+stopifnot(n == sapply( n*(n-1)/2, function(n) sizeDiss(logical(n))))
+}
+\keyword{utilities}
+\keyword{arith}
diff --git a/man/summary.agnes.Rd b/man/summary.agnes.Rd
new file mode 100644
index 0000000..a6b184c
--- /dev/null
+++ b/man/summary.agnes.Rd
@@ -0,0 +1,25 @@
+\name{summary.agnes}
+\alias{summary.agnes}
+\alias{print.summary.agnes}
+\title{Summary Method for 'agnes' Objects}
+\description{
+ Returns (and prints) a summary list for an \code{agnes} object.
+ Printing gives more output than the corresponding
+ \code{\link{print.agnes}} method.
+}
+\usage{
+\method{summary}{agnes}(object, \dots)
+\method{print}{summary.agnes}(x, \dots)
+}
+\arguments{
+ \item{x, object}{a \code{\link{agnes}} object.}
+ \item{\dots}{potential further arguments (require by generic).}
+}
+\seealso{\code{\link{agnes}}, \code{\link{agnes.object}}.}
+\examples{
+data(agriculture)
+summary(agnes(agriculture))
+}
+\keyword{cluster}
+\keyword{print}
+
diff --git a/man/summary.clara.Rd b/man/summary.clara.Rd
new file mode 100644
index 0000000..4292f30
--- /dev/null
+++ b/man/summary.clara.Rd
@@ -0,0 +1,42 @@
+\name{summary.clara}
+\alias{summary.clara}
+\alias{print.summary.clara}
+\title{Summary Method for 'clara' Objects}
+\description{
+ Returns (and prints) a summary list for a \code{clara} object.
+ Printing gives more output than the corresponding
+ \code{\link{print.clara}} method.
+}
+\usage{
+\method{summary}{clara}(object, \dots)
+\method{print}{summary.clara}(x, \dots)
+}
+\arguments{
+ \item{x, object}{a \code{\link{clara}} object.}
+ \item{\dots}{potential further arguments (require by generic).}
+}
+\seealso{\code{\link{clara.object}}}
+\examples{
+## generate 2000 objects, divided into 5 clusters.
+set.seed(47)
+x <- rbind(cbind(rnorm(400, 0,4), rnorm(400, 0,4)),
+ cbind(rnorm(400,10,8), rnorm(400,40,6)),
+ cbind(rnorm(400,30,4), rnorm(400, 0,4)),
+ cbind(rnorm(400,40,4), rnorm(400,20,2)),
+ cbind(rnorm(400,50,4), rnorm(400,50,4))
+)
+clx5 <- clara(x, 5)
+## Mis'classification' table:
+% R version >= 1.5 :
+% table(rep(1:5, each = 400), clx5$clust) # -> 1 "error"
+table(rep(1:5, rep(400,5)), clx5$clust) # -> 1 "error"
+summary(clx5)
+
+## Graphically:
+par(mfrow = c(3,1), mgp = c(1.5, 0.6, 0), mar = par("mar") - c(0,0,2,0))
+%>1.5: plot(x, col = rep(2:6, each = 400))
+plot(x, col = rep(2:6, rep(400,5)))
+plot(clx5)
+}
+\keyword{cluster}
+\keyword{print}
diff --git a/man/summary.diana.Rd b/man/summary.diana.Rd
new file mode 100644
index 0000000..2a4623d
--- /dev/null
+++ b/man/summary.diana.Rd
@@ -0,0 +1,17 @@
+\name{summary.diana}
+\alias{summary.diana}
+\alias{print.summary.diana}
+\title{Summary Method for 'diana' Objects}
+\description{Returns (and prints) a summary list for a \code{diana} object.}
+\usage{
+\method{summary}{diana}(object, \dots)
+\method{print}{summary.diana}(x, \dots)
+}
+\arguments{
+ \item{x, object}{a \code{\link{diana}} object.}
+ \item{\dots}{potential further arguments (require by generic).}
+}
+\seealso{\code{\link{diana}}, \code{\link{diana.object}}.}
+\keyword{cluster}
+\keyword{print}
+
diff --git a/man/summary.mona.Rd b/man/summary.mona.Rd
new file mode 100644
index 0000000..6dd3ec4
--- /dev/null
+++ b/man/summary.mona.Rd
@@ -0,0 +1,16 @@
+\name{summary.mona}
+\alias{summary.mona}
+\alias{print.summary.mona}
+\title{Summary Method for 'mona' Objects}
+\description{Returns (and prints) a summary list for a \code{mona} object.}
+\usage{
+\method{summary}{mona}(object, \dots)
+\method{print}{summary.mona}(x, \dots)
+}
+\arguments{
+ \item{x, object}{a \code{\link{mona}} object.}
+ \item{\dots}{potential further arguments (require by generic).}
+}
+\seealso{\code{\link{mona}}, \code{\link{mona.object}}.}
+\keyword{cluster}
+\keyword{print}
diff --git a/man/summary.pam.Rd b/man/summary.pam.Rd
new file mode 100644
index 0000000..4440dc6
--- /dev/null
+++ b/man/summary.pam.Rd
@@ -0,0 +1,20 @@
+\name{summary.pam}
+\alias{summary.pam}
+\alias{print.summary.pam}
+\title{Summary Method for PAM Objects}
+\description{Summarize a \code{\link{pam}} object and return an object
+ of class \code{summary.pam}.
+ There's a \code{\link{print}} method for the latter.
+}
+\usage{
+\method{summary}{pam}(object, \dots)
+\method{print}{summary.pam}(x, \dots)
+}
+\arguments{
+ \item{x, object}{a \code{\link{pam}} object.}
+ \item{\dots}{potential further arguments (require by generic).}
+}
+\seealso{\code{\link{pam}}, \code{\link{pam.object}}.
+}
+\keyword{cluster}
+
diff --git a/man/twins.object.Rd b/man/twins.object.Rd
new file mode 100644
index 0000000..3ad6db2
--- /dev/null
+++ b/man/twins.object.Rd
@@ -0,0 +1,26 @@
+\name{twins.object}
+\alias{twins.object}
+\alias{twins}% == class
+\title{Hierarchical Clustering Object}
+
+\description{
+ The objects of class \code{"twins"} represent an agglomerative or
+ divisive (polythetic) hierarchical clustering of a dataset.
+}
+\section{GENERATION}{
+ This class of objects is returned from \code{agnes} or \code{diana}.
+}
+\section{METHODS}{
+ The \code{"twins"} class has a method for the following generic function:
+ \code{pltree}.
+}
+\section{INHERITANCE}{
+ The following classes inherit from class \code{"twins"} :
+ \code{"agnes"} and \code{"diana"}.
+}
+\value{
+ See \code{\link{agnes.object}} and \code{\link{diana.object}} for details.
+}
+\seealso{\code{\link{agnes}},\code{\link{diana}}.
+}
+\keyword{cluster}
diff --git a/man/volume.ellipsoid.Rd b/man/volume.ellipsoid.Rd
new file mode 100644
index 0000000..da50b63
--- /dev/null
+++ b/man/volume.ellipsoid.Rd
@@ -0,0 +1,43 @@
+\name{volume.ellipsoid}
+\alias{volume}
+\alias{volume.ellipsoid}
+\title{Compute the Volume (of an Ellipsoid)}
+\description{
+ Compute the volume of geometric \R object.
+ This is a generic function and has a method for \code{ellipsoid} objects
+ (typically resulting from \code{\link{ellipsoidhull}()}.
+}
+\usage{
+volume(object, \dots)
+\method{volume}{ellipsoid}(object, log = FALSE, \dots)
+}
+\arguments{
+ \item{object}{an \R object the volume of which is wanted; for the
+ \code{ellipsoid} method, an object of that class (see
+ \code{\link{ellipsoidhull}} or the example below).}
+ \item{log}{\code{\link{logical}} indicating if the volume should be
+ returned in log scale. Maybe needed in largish dimensions.}
+ \item{\dots}{potential further arguments of methods, e.g. \code{log}.}
+}
+\value{
+ a number, the volume \eqn{V} (or \eqn{\log(V)} if \code{log = TRUE}) of
+ the given \code{object}.
+}
+\author{
+ Martin Maechler (2002, extracting from former \code{\link{clusplot}} code);
+ Keefe Murphy (2019) provided code for dimensions \eqn{d > 2}.
+}
+\seealso{\code{\link{ellipsoidhull}} for spanning ellipsoid computation.}
+\examples{
+## example(ellipsoidhull) # which defines 'ellipsoid' object <namefoo>
+
+myEl <- structure(list(cov = rbind(c(3,1),1:2), loc = c(0,0), d2 = 10),
+ class = "ellipsoid")
+volume(myEl)# i.e. "area" here (d = 2)
+myEl # also mentions the "volume"
+
+set.seed(1)
+d5 <- matrix(rt(500, df=3), 100,5)
+e5 <- ellipsoidhull(d5)
+}
+\keyword{utilities}
diff --git a/man/votes.repub.Rd b/man/votes.repub.Rd
new file mode 100644
index 0000000..33b373e
--- /dev/null
+++ b/man/votes.repub.Rd
@@ -0,0 +1,19 @@
+\name{votes.repub}
+\alias{votes.repub}
+\title{Votes for Republican Candidate in Presidential Elections}
+\usage{data(votes.repub)}
+\description{
+ A data frame with the percents of votes given to the republican
+ candidate in presidential elections from 1856 to 1976. Rows
+ represent the 50 states, and columns the 31 elections.
+}
+\source{
+ S. Peterson (1973):
+ \emph{A Statistical History of the American Presidential Elections}.
+ New York: Frederick Ungar Publishing Co.
+
+ Data from 1964 to 1976 is from R. M. Scammon,
+ \emph{American Votes 12}, Congressional Quarterly.
+}
+\keyword{datasets}
+
diff --git a/man/xclara.Rd b/man/xclara.Rd
new file mode 100644
index 0000000..ad0c15d
--- /dev/null
+++ b/man/xclara.Rd
@@ -0,0 +1,66 @@
+\name{xclara}
+\alias{xclara}
+\title{Bivariate Data Set with 3 Clusters}
+\description{
+ An artificial data set consisting of 3000 points in 3 quite well-separated
+ clusters.
+}
+\usage{data(xclara)}
+\format{
+ A data frame with 3000 observations on 2 numeric variables (named
+ \code{V1} and \code{V2}) giving the
+ \eqn{x} and \eqn{y} coordinates of the points, respectively.
+}
+\source{
+ Sample data set accompanying the reference below (file
+ \file{xclara.dat} in side \file{clus_examples.tar.gz}).
+}
+\note{
+ Our version of the \code{xclara} is slightly more rounded than the one
+ from \code{\link{read.table}("xclara.dat")} and the relative
+ difference measured by \code{\link{all.equal}} is \code{1.15e-7} for
+ \code{V1} and \code{1.17e-7} for \code{V2} which suggests that our
+ version has been the result of a \code{\link{options}(digits = 7)}
+ formatting.
+
+ Previously (before May 2017), it was claimed the three cluster were
+ each of size 1000, which is clearly wrong. \code{\link{pam}(*, 3)}
+ gives cluster sizes of 899, 1149, and 952, which apart from seven
+ \dQuote{outliers} (or \dQuote{mislabellings}) correspond to
+ observation indices \eqn{\{1:900\}}{1:900}, \eqn{\{901:2050\}}{901:2050}, and
+ \eqn{\{2051:3000\}}{2051:3000}, see the example.
+}
+\references{
+ Anja Struyf, Mia Hubert & Peter J. Rousseeuw (1996)
+ Clustering in an Object-Oriented Environment.
+ \emph{Journal of Statistical Software} \bold{1}.
+ \doi{10.18637/jss.v001.i04}
+}
+\keyword{datasets}
+\examples{
+## Visualization: Assuming groups are defined as {1:1000}, {1001:2000}, {2001:3000}
+plot(xclara, cex = 3/4, col = rep(1:3, each=1000))
+p.ID <- c(78, 1411, 2535) ## PAM's medoid indices == pam(xclara, 3)$id.med
+text(xclara[p.ID,], labels = 1:3, cex=2, col=1:3)
+\donttest{%% TODO: a clara() call with the _identical_ clustering (but faster!)
+ px <- pam(xclara, 3) ## takes ~2 seconds
+ cxcl <- px$clustering ; iCl <- split(seq_along(cxcl), cxcl)
+ boxplot(iCl, range = 0.7, horizontal=TRUE,
+ main = "Indices of the 3 clusters of pam(xclara, 3)")
+
+ ## Look more closely now:
+ bxCl <- boxplot(iCl, range = 0.7, plot=FALSE)
+ ## We see 3 + 2 + 2 = 7 clear "outlier"s or "wrong group" observations:
+ with(bxCl, rbind(out, group))
+ ## out 1038 1451 1610 30 327 562 770
+ ## group 1 1 1 2 2 3 3
+ ## Apart from these, what are the robust ranges of indices? -- Robust range:
+ t(iR <- bxCl$stats[c(1,5),])
+ ## 1 900
+ ## 901 2050
+ ## 2051 3000
+ gc <- adjustcolor("gray20",1/2)
+ abline(v = iR, col = gc, lty=3)
+ axis(3, at = c(0, iR[2,]), padj = 1.2, col=gc, col.axis=gc)
+}% dont
+} \ No newline at end of file
diff --git a/po/R-cluster.pot b/po/R-cluster.pot
new file mode 100644
index 0000000..97896df
--- /dev/null
+++ b/po/R-cluster.pot
@@ -0,0 +1,296 @@
+msgid ""
+msgstr ""
+"Project-Id-Version: cluster 2.0.8\n"
+"POT-Creation-Date: 2019-04-02 17:09\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+
+msgid "invalid clustering method"
+msgstr ""
+
+msgid "ambiguous clustering method"
+msgstr ""
+
+msgid "'par.method' must be of length 1, 3, or 4"
+msgstr ""
+
+msgid "NA-values in the dissimilarity matrix not allowed."
+msgstr ""
+
+msgid "'x' is not and cannot be converted to class \"dissimilarity\""
+msgstr ""
+
+msgid "x is not a numeric dataframe or matrix."
+msgstr ""
+
+msgid "need at least 2 objects to cluster"
+msgstr ""
+
+msgid "No clustering performed, NA-values in the dissimilarity matrix."
+msgstr ""
+
+msgid "'x' is a \"dist\" object, but should be a data matrix or frame"
+msgstr ""
+
+msgid "The number of cluster should be at least 1 and at most n-1."
+msgstr ""
+
+msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)"
+msgstr ""
+
+msgid "'sampsize' = %d should not be larger than the number of objects, %d"
+msgstr ""
+
+msgid "'samples' should be at least 1"
+msgstr ""
+
+msgid "when 'medoids.x' is FALSE, 'keep.data' must be too"
+msgstr ""
+
+msgid "Distance computations with NAs: using correct instead of pre-2016 wrong formula.\nUse 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly\nto suppress this warning."
+msgstr ""
+
+msgid "invalid 'correct.d'"
+msgstr ""
+
+msgid "Each of the random samples contains objects between which no distance can be computed."
+msgstr ""
+
+msgid "For each of the %d samples, at least one object was found which could not be assigned to a cluster (because of missing values)."
+msgstr ""
+
+msgid "invalid 'jstop' from .C(cl_clara,.):"
+msgstr ""
+
+msgid "'B' has to be a positive integer"
+msgstr ""
+
+msgid "invalid 'spaceH0':"
+msgstr ""
+
+msgid "index has to be a function or a list of function"
+msgstr ""
+
+msgid "invalid 'twins' object"
+msgstr ""
+
+msgid "x is not a dataframe or a numeric matrix."
+msgstr ""
+
+msgid "invalid %s; must be named list"
+msgstr ""
+
+msgid "%s has invalid column names"
+msgstr ""
+
+msgid "%s must be in 1:ncol(x)"
+msgstr ""
+
+msgid "%s must contain column names or numbers"
+msgstr ""
+
+msgid "at least one binary variable has more than 2 levels."
+msgstr ""
+
+msgid "at least one binary variable has not 2 different levels."
+msgstr ""
+
+msgid "at least one binary variable has values not in {0,1,NA}"
+msgstr ""
+
+msgid "binary variable(s) %s treated as interval scaled"
+msgstr ""
+
+msgid "%s has constant columns %s; these are standardized to 0"
+msgstr ""
+
+msgid "with mixed variables, metric \"gower\" is used automatically"
+msgstr ""
+
+msgid "'weights' must be of length p (or 1)"
+msgstr ""
+
+msgid "invalid type %s for column numbers %s"
+msgstr ""
+
+msgid "NA values in the dissimilarity matrix not allowed."
+msgstr ""
+
+msgid "No clustering performed, NA's in dissimilarity matrix."
+msgstr ""
+
+msgid "'x' must be numeric n x p matrix"
+msgstr ""
+
+msgid "omitting NAs"
+msgstr ""
+
+msgid "no points without missing values"
+msgstr ""
+
+msgid "computed some negative or all 0 probabilities"
+msgstr ""
+
+msgid "algorithm possibly not converged in %d iterations"
+msgstr ""
+
+msgid "'A' must be p x p cov-matrix defining an ellipsoid"
+msgstr ""
+
+msgid "ellipsoidPoints() not yet implemented for p >= 3 dim."
+msgstr ""
+
+msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}"
+msgstr ""
+
+msgid "'memb.exp' must be a finite number > 1"
+msgstr ""
+
+msgid "'maxit' must be non-negative integer"
+msgstr ""
+
+msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1"
+msgstr ""
+
+msgid "FANNY algorithm has not converged in 'maxit' = %d iterations"
+msgstr ""
+
+msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?"
+msgstr ""
+
+msgid "'m', a membership matrix, must be nonnegative with rowSums == 1"
+msgstr ""
+
+msgid "'n' must be >= 2"
+msgstr ""
+
+msgid "x must be a matrix or data frame."
+msgstr ""
+
+msgid "All variables must be binary (e.g., a factor with 2 levels, both present)."
+msgstr ""
+
+msgid "mona() needs at least p >= 2 variables (in current implementation)"
+msgstr ""
+
+msgid "No clustering performed, an object was found with all values missing."
+msgstr ""
+
+msgid "No clustering performed, found variable with more than half values missing."
+msgstr ""
+
+msgid "No clustering performed, a variable was found with all non missing values identical."
+msgstr ""
+
+msgid "No clustering performed, all variables have at least one missing value."
+msgstr ""
+
+msgid "Cannot keep data when 'x' is a dissimilarity!"
+msgstr ""
+
+msgid "have %d observations, but not more than %d are allowed"
+msgstr ""
+
+msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2"
+msgstr ""
+
+msgid "'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d"
+msgstr ""
+
+msgid "No clustering performed, NAs in the computed dissimilarity matrix."
+msgstr ""
+
+msgid "error from .C(cl_pam, *): invalid medID's"
+msgstr ""
+
+msgid "NA-values are not allowed in dist-like 'x'."
+msgstr ""
+
+msgid "Distances must be result of dist or a square matrix."
+msgstr ""
+
+msgid "the square matrix is not symmetric."
+msgstr ""
+
+msgid ">>>>> funny case in clusplot.default() -- please report!"
+msgstr ""
+
+msgid "x is not a data matrix"
+msgstr ""
+
+msgid "one or more objects contain only missing values"
+msgstr ""
+
+msgid "one or more variables contain only missing values"
+msgstr ""
+
+msgid "Missing values were displaced by the median of the corresponding variable(s)"
+msgstr ""
+
+msgid "x is not numeric"
+msgstr ""
+
+msgid "The clustering vector is of incorrect length"
+msgstr ""
+
+msgid "NA-values are not allowed in clustering vector"
+msgstr ""
+
+msgid "Error in Fortran routine for the spanning ellipsoid,\n rank problem??"
+msgstr ""
+
+msgid "'col.clus' should have length 4 when color is TRUE"
+msgstr ""
+
+msgid "no diss nor data found, nor the original argument of %s"
+msgstr ""
+
+msgid "no diss nor data found for clusplot()'"
+msgstr ""
+
+msgid "invalid partition object"
+msgstr ""
+
+msgid "full silhouette is only available for results of 'clara(*, keep.data = TRUE)'"
+msgstr ""
+
+msgid "'x' must only have integer codes"
+msgstr ""
+
+msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'"
+msgstr ""
+
+msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'"
+msgstr ""
+
+msgid "clustering 'x' and dissimilarity 'dist' are incompatible"
+msgstr ""
+
+msgid "invalid silhouette structure"
+msgstr ""
+
+msgid "invalid 'silhouette' object"
+msgstr ""
+
+msgid "No valid silhouette information (#{clusters} =? 1)"
+msgstr ""
+
+msgid "Observation %s has *only* NAs --> omit it for clustering"
+msgid_plural "Observations %s have *only* NAs --> omit them for clustering!"
+msgstr[0] ""
+msgstr[1] ""
+
+msgid "%d observation (%s) has *only* NAs --> omit them for clustering!"
+msgid_plural "%d observations (%s ...) have *only* NAs --> omit them for clustering!"
+msgstr[0] ""
+msgstr[1] ""
+
+msgid "setting 'logical' variable %s to type 'asymm'"
+msgid_plural "setting 'logical' variables %s to type 'asymm'"
+msgstr[0] ""
+msgstr[1] ""
diff --git a/po/R-de.po b/po/R-de.po
new file mode 100644
index 0000000..1b6eef8
--- /dev/null
+++ b/po/R-de.po
@@ -0,0 +1,433 @@
+# Translation of src/library/Recommended/cluster/po/R-cluster.pot to German
+# Copyright (C) 2013-2019 The R Foundation
+# This file is distributed under the same license as the R package.
+msgid ""
+msgstr ""
+"Project-Id-Version: R 3.6.0 cluster 2.0.8\n"
+"Report-Msgid-Bugs-To: bugs.r-project.org\n"
+"POT-Creation-Date: 2018-04-06 23:26\n"
+"PO-Revision-Date: 2019-04-02 13:23+0200\n"
+"Last-Translator: Detlef Steuer <steuer@hsu-hh.de>\n"
+"Language-Team: R Core Team <r-core@r-project.org\n"
+"Language: de\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=n == 1 ? 0 : 1;\n"
+
+msgid "invalid clustering method"
+msgstr "unzulässige Clustermethode"
+
+msgid "ambiguous clustering method"
+msgstr "zweideutige Clustermethode"
+
+msgid "'par.method' must be of length 1, 3, or 4"
+msgstr "'par.method' muss Länge 1, 3 oder 4 haben"
+
+msgid "NA-values in the dissimilarity matrix not allowed."
+msgstr "NAs in der Unähnlichkeitsmatrix nicht zulässig."
+
+msgid "'x' is not and cannot be converted to class \"dissimilarity\""
+msgstr ""
+"'x' ist nicht und kann auch nicht umgewandelt werden in Klasse "
+"\"dissimilarity\""
+
+msgid "x is not a numeric dataframe or matrix."
+msgstr "x ist weder numerischer Dataframe noch Matrix"
+
+msgid "need at least 2 objects to cluster"
+msgstr "benötige zum Clustern mindestens 2 Objekte"
+
+msgid "No clustering performed, NA-values in the dissimilarity matrix."
+msgstr "Keine Clusterung durchgeführt. NAs in der Unähnlichkeitsmatrix."
+
+msgid "'x' is a \"dist\" object, but should be a data matrix or frame"
+msgstr "'x' ist ein \"dist\"-Objekt, sollte aber Datenmatrix oder -frame sein"
+
+msgid "The number of cluster should be at least 1 and at most n-1."
+msgstr "Die Anzahl der Cluster sollte mindestens 1, höchstens n-1 sein."
+
+msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)"
+msgstr "'sampsize' sollte mindestens %d = max(2, 1+ Anzahl Cluster sein)"
+
+msgid "'sampsize' = %d should not be larger than the number of objects, %d"
+msgstr "'sampsize' = %d; sollte nicht größer sein als die Zahl der Objekte %d"
+
+msgid "'samples' should be at least 1"
+msgstr "'samples' sollte mindestens 1 sein"
+
+msgid "when 'medoids.x' is FALSE, 'keep.data' must be too"
+msgstr "wenn 'medoids.x' FALSE ist, dann muss es auch 'keep.data' sein"
+
+msgid ""
+"Distance computations with NAs: using correct instead of pre-2016 wrong "
+"formula.\n"
+"Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' "
+"explicitly\n"
+"to suppress this warning."
+msgstr ""
+"Abstandsberechnungen mit NAs: nutze korrekte anstelle der falschen Formel, "
+"wie vor 2016.\n"
+" Nutze 'correct.d=FALSE', um die alten, falschen Ergebnisse zu bekommen oder "
+"'correct.d=TRUE', um diese Warnung zu unterdrücken."
+
+msgid "invalid 'correct.d'"
+msgstr "unzulässiges 'correct.d'"
+
+msgid ""
+"Each of the random samples contains objects between which no distance can be "
+"computed."
+msgstr ""
+"Jede der Zufallsstichproben enthält Objekte, zwischen denen kein Abstand "
+"berechnet werden kann"
+
+msgid ""
+"For each of the %d samples, at least one object was found which could not be "
+"assigned to a cluster (because of missing values)."
+msgstr ""
+"Für jede der %d Stichproben wurde mindestens ein Objekt gefunden, das nicht "
+"einem Cluster zugeordnet werden konnte (wegen fehlender Werte)"
+
+msgid "invalid 'jstop' from .C(cl_clara,.):"
+msgstr "unzulässiger 'jstop' aus .C(cl_clara,.):"
+
+msgid "'B' has to be a positive integer"
+msgstr "'B' muss eine positive ganze Zahl sein"
+
+msgid "invalid 'spaceH0':"
+msgstr "unzulässiger 'spaceH0':"
+
+msgid "index has to be a function or a list of function"
+msgstr "index muss eine Funktion oder eine Liste von Funktionen sein"
+
+msgid "invalid 'twins' object"
+msgstr "unzulässiges 'twins'-Objekt"
+
+msgid "x is not a dataframe or a numeric matrix."
+msgstr "x ist weder Dataframe noch numerische Matrix"
+
+msgid "invalid %s; must be named list"
+msgstr "unzulässige %s; muss eine benannte Liste sein"
+
+msgid "%s has invalid column names"
+msgstr "%s hat unzulässige Spaltennamen"
+
+msgid "%s must be in 1:ncol(x)"
+msgstr "%s muss aus 1:ncol(x) sein"
+
+msgid "%s must contain column names or numbers"
+msgstr "%s muss Spaltennamen oder Zahlen enthalten"
+
+msgid "at least one binary variable has more than 2 levels."
+msgstr "mindestens eine binäre Variable hat mehr als 2 Stufen."
+
+msgid "at least one binary variable has not 2 different levels."
+msgstr "mindestens eine binäre Variable hat keine 2 verschiedenen Stufen."
+
+msgid "at least one binary variable has values not in {0,1,NA}"
+msgstr "mindestens eine binäre Variable hat Werte nicht aus {0, 1, NA}"
+
+msgid "binary variable(s) %s treated as interval scaled"
+msgstr "Binärvariable %s als intervallskaliert behandelt"
+
+msgid "%s has constant columns %s; these are standardized to 0"
+msgstr "%s hat konstante Spalten %s; diese werden standardisiert auf 0"
+
+msgid "with mixed variables, metric \"gower\" is used automatically"
+msgstr "mit gemischten Variablen wird automatisch \"gower\" genutzt"
+
+msgid "'weights' must be of length p (or 1)"
+msgstr "'weights' muss von Länge p (oder 1) sein"
+
+msgid "invalid type %s for column numbers %s"
+msgstr "ungültiger Typ %s für Spaltennummern %s"
+
+msgid "NA values in the dissimilarity matrix not allowed."
+msgstr "NAs in der Unähnlichkeitsmatrix nicht erlaubt."
+
+msgid "No clustering performed, NA's in dissimilarity matrix."
+msgstr "Keine Clusterung durchgeführt, NAs in Unähnlichkeitsmatrix."
+
+msgid "'x' must be numeric n x p matrix"
+msgstr "'x' muss numerische n x p - Matrix sein"
+
+msgid "omitting NAs"
+msgstr "NAs ausgelassen"
+
+msgid "no points without missing values"
+msgstr "keine Punkte ohne fehlende Werte"
+
+msgid "computed some negative or all 0 probabilities"
+msgstr "einige negative Wahrscheinlichkeiten oder alle zu 0 berechnet"
+
+msgid "algorithm possibly not converged in %d iterations"
+msgstr "Algorithmus hat nicht in %d Iterationen konvergiert"
+
+msgid "'A' must be p x p cov-matrix defining an ellipsoid"
+msgstr ""
+"'A' muss eine p x p Kovarianzmatrix sein, die einen Ellipsoid definiert"
+
+msgid "ellipsoidPoints() not yet implemented for p >= 3 dim."
+msgstr "ellipsoidPoints() noch nicht für Dimensionen p>=3 implementiert"
+
+msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}"
+msgstr "'k' (Anzahl Cluster) muss aus {1, 2, ..., n/2 -1} sein"
+
+msgid "'memb.exp' must be a finite number > 1"
+msgstr "'memb.exp' muss endliche Zahl > 1 sein"
+
+msgid "'maxit' must be non-negative integer"
+msgstr "'maxit' muss nicht-negative Zahl sein"
+
+msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1"
+msgstr ""
+"'iniMem.p' muss eine nicht-negative n x k Matrix mit Zeilensummen == 1 sein"
+
+msgid "FANNY algorithm has not converged in 'maxit' = %d iterations"
+msgstr "FANNY Algorithmus ist in 'maxit' = %d Iterationen nicht konvergiert"
+
+msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?"
+msgstr ""
+"die Mitgliedswerte sind alle sehr nah an 1/k. Evtl. 'memb.exp' reduzieren?"
+
+msgid "'m', a membership matrix, must be nonnegative with rowSums == 1"
+msgstr ""
+"'m' ist eine Mitgliedswertmatrix, muss nicht-negativ sein mit Zeilensummen "
+"== 1"
+
+msgid "'n' must be >= 2"
+msgstr "'n' muss >= 2 sein"
+
+msgid "x must be a matrix or data frame."
+msgstr "x muss eine Matrix oder Dataframe sein"
+
+msgid ""
+"All variables must be binary (e.g., a factor with 2 levels, both present)."
+msgstr "Alle Variablen müssen binär sein (z.B. Faktor mit 2 vorhandenen Stufen)."
+
+msgid "mona() needs at least p >= 2 variables (in current implementation)"
+msgstr "mona() (in der aktuellen Implementierung) benötigt mindestens p >= 2 Variablen"
+
+msgid "No clustering performed, an object was found with all values missing."
+msgstr ""
+"Keine Clusterung durchgeführt. Objekt gefunden, bei dem alle Werte fehlend "
+"sind."
+
+msgid ""
+"No clustering performed, found variable with more than half values missing."
+msgstr ""
+"Keine Clusterung durchgeführt. Variable gefunden, mit mehr als der Hälfte "
+"fehlenden Werten."
+
+msgid ""
+"No clustering performed, a variable was found with all non missing values "
+"identical."
+msgstr ""
+"Keine Clusterung durchgeführt. Variable gefunden, bei der alle nicht "
+"fehlenden Werte identisch sind."
+
+msgid "No clustering performed, all variables have at least one missing value."
+msgstr ""
+"Keine Clusterung durchgeführt. Alle Variablen haben mindestens einen "
+"fehlenden Wert."
+
+msgid "Cannot keep data when 'x' is a dissimilarity!"
+msgstr ""
+"Kann Datenmatrix 'data' nicht beibehalten wenn 'x' eine 'dissimilarity' ist!"
+
+msgid "have %d observations, but not more than %d are allowed"
+msgstr "habe %d Beobachtungen, aber mehr als %d nicht erlaubt"
+
+msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2"
+msgstr "Anzahl der Cluster 'k' muss auch {1, 2, ..., n-1} sein; deshalb n >= 2"
+
+msgid ""
+"'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d"
+msgstr ""
+"'medoids' muss NULL oder ein Vektor von %d verschiedenen Indizes aus {1, "
+"2,..., n}, n=%d sein"
+
+msgid "No clustering performed, NAs in the computed dissimilarity matrix."
+msgstr ""
+"Keine Clusterung durchgeführt, NAs in der berechneten Unähnlichkeitsmatrix."
+
+msgid "error from .C(cl_pam, *): invalid medID's"
+msgstr "Fehler aus .C(cl_pam, *): unzulässige medID's"
+
+msgid "NA-values are not allowed in dist-like 'x'."
+msgstr "NAs nicht erlaubt in dist-ähnlichem 'x'."
+
+msgid "Distances must be result of dist or a square matrix."
+msgstr ""
+"Distanzen müssen ein Ergebnis von dist oder eine quadratische Matrix sein."
+
+msgid "the square matrix is not symmetric."
+msgstr "Die quadratische Matrix ist nicht symmetrisch."
+
+msgid ">>>>> funny case in clusplot.default() -- please report!"
+msgstr ""
+">>>>> komische Sache in clusplot.default() -- bitte an den Entwickler senden!"
+
+msgid "x is not a data matrix"
+msgstr "x ist keine Datenmatrix"
+
+msgid "one or more objects contain only missing values"
+msgstr "eines oder mehrere Objekte enthalten nur fehlende Werte"
+
+msgid "one or more variables contain only missing values"
+msgstr "eine oder mehrere Variablen enthalten nur fehlende Werte"
+
+msgid ""
+"Missing values were displaced by the median of the corresponding variable(s)"
+msgstr ""
+"Fehlende Werte wurden durch den Median der korrespondierenden Variable(n) "
+"ersetzt"
+
+msgid "x is not numeric"
+msgstr "x ist nicht numerisch"
+
+msgid "The clustering vector is of incorrect length"
+msgstr "Der Clustervektor hat eine falsche Länge"
+
+msgid "NA-values are not allowed in clustering vector"
+msgstr "NAs im Clustervektor nicht erlaubt"
+
+msgid ""
+"Error in Fortran routine for the spanning ellipsoid,\n"
+" rank problem??"
+msgstr "Fehler im Fortran-Kode für den aufspannenden Ellipsoiden, Rangproblem?"
+
+msgid "'col.clus' should have length 4 when color is TRUE"
+msgstr "'col.clus' sollte Länge 4 haben, wenn color auf TRUE gesetzt ist"
+
+msgid "no diss nor data found, nor the original argument of %s"
+msgstr ""
+"weder diss noch data gefunden, ebensowenig das ursprüngliche Argument von %s"
+
+msgid "no diss nor data found for clusplot()'"
+msgstr "weder diss noch data für 'clusplot()' gefunden"
+
+msgid "invalid partition object"
+msgstr "unzulässiges Partitionsobjekt"
+
+msgid ""
+"full silhouette is only available for results of 'clara(*, keep.data = TRUE)'"
+msgstr ""
+"die volle Silhoutte ist nur verfügbar für Resultate von 'clara(*, keep."
+"data=TRUE)'"
+
+msgid "'x' must only have integer codes"
+msgstr "'x' darf nur ganzzahlige Kodes enthalten"
+
+msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'"
+msgstr ""
+"Benötige entweder Unähnlichkeitsmatrix 'dist' oder diss.matrix 'dmatrix'"
+
+msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'"
+msgstr "'dmatrix' ist keine zu 'x' kompatible Unähnlichkeitsmatrix "
+
+msgid "clustering 'x' and dissimilarity 'dist' are incompatible"
+msgstr "Clusterung 'x' und Unähnlichkeitsmatrix 'dist' sind inkompatibel"
+
+msgid "invalid silhouette structure"
+msgstr "unzulässige Silhouttenstruktur"
+
+msgid "invalid 'silhouette' object"
+msgstr "unzulässiges 'silhouette' Objekt"
+
+msgid "No valid silhouette information (#{clusters} =? 1)"
+msgstr "keine gültige Silhouetteninformation (#{clusters} =? 1)"
+
+msgid "Observation %s has *only* NAs --> omit it for clustering"
+msgid_plural "Observations %s have *only* NAs --> omit them for clustering!"
+msgstr[0] "Beobachtung %s hat *nur* NAs --> ausgelassen für Clustering"
+msgstr[1] "Beobachtungen %s haben *nur* NAs --> ausgelassen für Clustering"
+
+msgid "%d observation (%s) has *only* NAs --> omit them for clustering!"
+msgid_plural ""
+"%d observations (%s ...) have *only* NAs --> omit them for clustering!"
+msgstr[0] "%d Beobachtung (%s) hat *nur* NAs --> ausgelassen für Clustering"
+msgstr[1] ""
+"%d Beobachtungen (%s) haben *nur* NAs --> ausgelassen für Clustering"
+
+msgid "setting 'logical' variable %s to type 'asymm'"
+msgid_plural "setting 'logical' variables %s to type 'asymm'"
+msgstr[0] "setze 'logical' Variable %s auf Typ 'asymm'"
+msgstr[1] "setze 'logical' Variablen %s auf Typ 'asymm'"
+
+#~ msgid "NAdiss"
+#~ msgstr "NAdiss"
+
+#~ msgid "non.diss"
+#~ msgstr "non.diss"
+
+#~ msgid "no distance can be computed."
+#~ msgstr "keine Entfernung berechnent werden kann"
+
+#~ msgid "For each of the"
+#~ msgstr "Für jede der"
+
+#~ msgid ""
+#~ "samples, at least one object was found which\n"
+#~ " could not"
+#~ msgstr "Stichproben wurde mindestens ein Objekt gefunden, das nicht"
+
+#~ msgid "be assigned to a cluster (because of missing values)."
+#~ msgstr "einem Cluster zugeordnet werden konnte (wegen fehlender Werte)"
+
+#~ msgid "invalid"
+#~ msgstr "unzulässiger"
+
+#~ msgid "type"
+#~ msgstr "Typ"
+
+#~ msgid "type$"
+#~ msgstr "type$"
+
+#~ msgid "binary variable(s)"
+#~ msgstr "binäre Variable(n)"
+
+#~ msgid "x"
+#~ msgstr "x"
+
+#~ msgid "has constant columns"
+#~ msgstr "hat konstante Spalten"
+
+#~ msgid "possibly not converged in"
+#~ msgstr "evtl nicht konvergiert in "
+
+#~ msgid "iterations"
+#~ msgstr "Iterationen"
+
+#~ msgid "'medoids' must be NULL or vector of"
+#~ msgstr "'medoids' muss NULL sein oder ein Vektor von"
+
+#~ msgid "rank problem??"
+#~ msgstr "evtl. Probleme mit dem Rang?"
+
+#~ msgid "'clara(*, keep.data = TRUE)'"
+#~ msgstr "'clara(*, keep.data = TRUE)'"
+
+#~ msgid ""
+#~ "No clustering performed, a variable was found with at least 50% missing "
+#~ "values."
+#~ msgstr ""
+#~ "Keine Clusterung durchgeführt. Variable mit mehr als 50% fehlender Werte."
+
+#~ msgid "No clustering performed,"
+#~ msgstr "Clustering nicht durchgeführt,"
+
+#~ msgid "Observations %s"
+#~ msgstr "Beobachtungen %s"
+
+#~ msgid "%d observations (%s ...)"
+#~ msgstr "%d Beobachtungen (%s ...)"
+
+#~ msgid "have *only* NAs --> na.omit() them for clustering!"
+#~ msgstr "haben *nur* NAs --> na.omit() diese für das Clustern"
+
+#~ msgid "s"
+#~ msgstr "n"
+
+#~ msgid "to type 'asymm'"
+#~ msgstr "auf Typ 'asymm'"
diff --git a/po/R-en@quot.po b/po/R-en@quot.po
new file mode 100644
index 0000000..c1ef892
--- /dev/null
+++ b/po/R-en@quot.po
@@ -0,0 +1,337 @@
+# All this catalog "translates" are quotation characters.
+# The msgids must be ASCII and therefore cannot contain real quotation
+# characters, only substitutes like grave accent (0x60), apostrophe (0x27)
+# and double quote (0x22). These substitutes look strange; see
+# http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html
+#
+# This catalog translates grave accent (0x60) and apostrophe (0x27) to
+# left single quotation mark (U+2018) and right single quotation mark (U+2019).
+# It also translates pairs of apostrophe (0x27) to
+# left single quotation mark (U+2018) and right single quotation mark (U+2019)
+# and pairs of quotation mark (0x22) to
+# left double quotation mark (U+201C) and right double quotation mark (U+201D).
+#
+# When output to an UTF-8 terminal, the quotation characters appear perfectly.
+# When output to an ISO-8859-1 terminal, the single quotation marks are
+# transliterated to apostrophes (by iconv in glibc 2.2 or newer) or to
+# grave/acute accent (by libiconv), and the double quotation marks are
+# transliterated to 0x22.
+# When output to an ASCII terminal, the single quotation marks are
+# transliterated to apostrophes, and the double quotation marks are
+# transliterated to 0x22.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: R 2.15.1\n"
+"Report-Msgid-Bugs-To: bugs.r-project.org\n"
+"POT-Creation-Date: 2012-08-21 22:49\n"
+"PO-Revision-Date: 2012-08-21 22:49\n"
+"Last-Translator: Automatically generated\n"
+"Language-Team: none\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Language: en\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+msgid "invalid clustering method"
+msgstr "invalid clustering method"
+
+msgid "ambiguous clustering method"
+msgstr "ambiguous clustering method"
+
+msgid "'par.method' must be of length 1, 3, or 4"
+msgstr "‘par.method’ must be of length 1, 3, or 4"
+
+msgid "NAdiss"
+msgstr "NAdiss"
+
+msgid "non.diss"
+msgstr "non.diss"
+
+msgid "x is not a numeric dataframe or matrix."
+msgstr "x is not a numeric dataframe or matrix."
+
+msgid "need at least 2 objects to cluster"
+msgstr "need at least 2 objects to cluster"
+
+msgid "No clustering performed, NA-values in the dissimilarity matrix."
+msgstr "No clustering performed, NA-values in the dissimilarity matrix."
+
+msgid "'x' is a \"dist\" object, but should be a data matrix or frame"
+msgstr "‘x’ is a \"dist\" object, but should be a data matrix or frame"
+
+msgid "The number of cluster should be at least 1 and at most n-1."
+msgstr "The number of cluster should be at least 1 and at most n-1."
+
+msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)"
+msgstr "‘sampsize’ should be at least %d = max(2, 1+ number of clusters)"
+
+msgid "'sampsize' = %d should not be larger than the number of objects, %d"
+msgstr "‘sampsize’ = %d should not be larger than the number of objects, %d"
+
+msgid "'samples' should be at least 1"
+msgstr "‘samples’ should be at least 1"
+
+msgid "when 'medoids.x' is FALSE, 'keep.data' must be too"
+msgstr "when ‘medoids.x’ is FALSE, ‘keep.data’ must be too"
+
+msgid "Each of the random samples contains objects between which"
+msgstr "Each of the random samples contains objects between which"
+
+msgid "no distance can be computed."
+msgstr "no distance can be computed."
+
+msgid "For each of the"
+msgstr "For each of the"
+
+msgid ""
+"samples, at least one object was found which\n"
+" could not"
+msgstr ""
+"samples, at least one object was found which\n"
+" could not"
+
+msgid "be assigned to a cluster (because of missing values)."
+msgstr "be assigned to a cluster (because of missing values)."
+
+msgid "invalid 'jstop' from .C(cl_clara,.):"
+msgstr "invalid ‘jstop’ from .C(cl_clara,.):"
+
+msgid "'B' has to be a positive integer"
+msgstr "‘B’ has to be a positive integer"
+
+msgid "invalid 'twins' object"
+msgstr "invalid ‘twins’ object"
+
+msgid "x is not a dataframe or a numeric matrix."
+msgstr "x is not a dataframe or a numeric matrix."
+
+msgid "invalid"
+msgstr "invalid"
+
+msgid "type"
+msgstr "type"
+
+msgid "; must be named list"
+msgstr "; must be named list"
+
+msgid "type$"
+msgstr "type$"
+
+msgid "has invalid column names"
+msgstr "has invalid column names"
+
+msgid "must be in 1:ncol(x)"
+msgstr "must be in 1:ncol(x)"
+
+msgid "must contain column names or numbers"
+msgstr "must contain column names or numbers"
+
+msgid "at least one binary variable has more than 2 levels."
+msgstr "at least one binary variable has more than 2 levels."
+
+msgid "at least one binary variable has not 2 different levels."
+msgstr "at least one binary variable has not 2 different levels."
+
+msgid "at least one binary variable has values not in {0,1,NA}"
+msgstr "at least one binary variable has values not in {0,1,NA}"
+
+msgid "binary variable(s)"
+msgstr "binary variable(s)"
+
+msgid "treated as interval scaled"
+msgstr "treated as interval scaled"
+
+msgid "x"
+msgstr "x"
+
+msgid "has constant columns"
+msgstr "has constant columns"
+
+msgid "; these are standardized to 0"
+msgstr "; these are standardized to 0"
+
+msgid "with mixed variables, metric \"gower\" is used automatically"
+msgstr "with mixed variables, metric \"gower\" is used automatically"
+
+msgid "'weights' must be of length p (or 1)"
+msgstr "‘weights’ must be of length p (or 1)"
+
+msgid "invalid type"
+msgstr "invalid type"
+
+msgid "for column numbers"
+msgstr "for column numbers"
+
+msgid "No clustering performed, NA's in dissimilarity matrix."
+msgstr "No clustering performed, NA's in dissimilarity matrix."
+
+msgid "'x' must be numeric n x p matrix"
+msgstr "‘x’ must be numeric n x p matrix"
+
+msgid "omitting NAs"
+msgstr "omitting NAs"
+
+msgid "no points without missing values"
+msgstr "no points without missing values"
+
+msgid "computed some negative or all 0 'prob'abilities"
+msgstr "computed some negative or all 0 'prob'abilities"
+
+msgid "possibly not converged in"
+msgstr "possibly not converged in"
+
+msgid "iterations"
+msgstr "iterations"
+
+msgid "'A' must be p x p cov-matrix defining an ellipsoid"
+msgstr "‘A’ must be p x p cov-matrix defining an ellipsoid"
+
+msgid "ellipsoidPoints() not yet implemented for p >= 3 dim."
+msgstr "ellipsoidPoints() not yet implemented for p >= 3 dim."
+
+msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}"
+msgstr "‘k’ (number of clusters) must be in {1,2, .., n/2 -1}"
+
+msgid "'memb.exp' must be a finite number > 1"
+msgstr "‘memb.exp’ must be a finite number > 1"
+
+msgid "'maxit' must be non-negative integer"
+msgstr "‘maxit’ must be non-negative integer"
+
+msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1"
+msgstr "‘iniMem.p’ must be a nonnegative n * k matrix with rowSums == 1"
+
+msgid "FANNY algorithm has not converged in 'maxit' = %d iterations"
+msgstr "FANNY algorithm has not converged in ‘maxit’ = %d iterations"
+
+msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?"
+msgstr "the memberships are all very close to 1/k. Maybe decrease ‘memb.exp’ ?"
+
+msgid "'m', a membership matrix, must be nonnegative with rowSums == 1"
+msgstr "'m', a membership matrix, must be nonnegative with rowSums == 1"
+
+msgid "'n' must be >= 2"
+msgstr "‘n’ must be >= 2"
+
+msgid "x must be a matrix or data frame."
+msgstr "x must be a matrix or data frame."
+
+msgid "All variables must be binary (factor with 2 levels)."
+msgstr "All variables must be binary (factor with 2 levels)."
+
+msgid "No clustering performed,"
+msgstr "No clustering performed,"
+
+msgid "an object was found with all values missing."
+msgstr "an object was found with all values missing."
+
+msgid "a variable was found with at least 50% missing values."
+msgstr "a variable was found with at least 50% missing values."
+
+msgid "a variable was found with all non missing values identical."
+msgstr "a variable was found with all non missing values identical."
+
+msgid "all variables have at least one missing value."
+msgstr "all variables have at least one missing value."
+
+msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2"
+msgstr "Number of clusters ‘k’ must be in {1,2, .., n-1}; hence n >= 2"
+
+msgid "'medoids' must be NULL or vector of"
+msgstr "‘medoids’ must be NULL or vector of"
+
+msgid "distinct indices in {1,2, .., n}, n="
+msgstr "distinct indices in {1,2, .., n}, n="
+
+msgid "No clustering performed, NAs in the computed dissimilarity matrix."
+msgstr "No clustering performed, NAs in the computed dissimilarity matrix."
+
+msgid "error from .C(cl_pam, *): invalid medID's"
+msgstr "error from .C(cl_pam, *): invalid medID's"
+
+msgid "NA-values are not allowed in dist-like 'x'."
+msgstr "NA-values are not allowed in dist-like ‘x’."
+
+msgid "Distances must be result of dist or a square matrix."
+msgstr "Distances must be result of dist or a square matrix."
+
+msgid "the square matrix is not symmetric."
+msgstr "the square matrix is not symmetric."
+
+msgid ">>>>> funny case in clusplot.default() -- please report!"
+msgstr ">>>>> funny case in clusplot.default() -- please report!"
+
+msgid "x is not a data matrix"
+msgstr "x is not a data matrix"
+
+msgid "one or more objects contain only missing values"
+msgstr "one or more objects contain only missing values"
+
+msgid "one or more variables contain only missing values"
+msgstr "one or more variables contain only missing values"
+
+msgid ""
+"Missing values were displaced by the median of the corresponding variable(s)"
+msgstr ""
+"Missing values were displaced by the median of the corresponding variable(s)"
+
+msgid "x is not numeric"
+msgstr "x is not numeric"
+
+msgid "The clustering vector is of incorrect length"
+msgstr "The clustering vector is of incorrect length"
+
+msgid "NA-values are not allowed in clustering vector"
+msgstr "NA-values are not allowed in clustering vector"
+
+msgid "Error in Fortran routine for the spanning ellipsoid,"
+msgstr "Error in Fortran routine for the spanning ellipsoid,"
+
+msgid "rank problem??"
+msgstr "rank problem??"
+
+msgid "'col.clus' should have length 4 when color is TRUE"
+msgstr "‘col.clus’ should have length 4 when color is TRUE"
+
+msgid "no diss nor data found, nor the original argument of"
+msgstr "no diss nor data found, nor the original argument of"
+
+msgid "no diss nor data found for clusplot()'"
+msgstr "no diss nor data found for clusplot()'"
+
+msgid "invalid partition object"
+msgstr "invalid partition object"
+
+msgid "full silhouette is only available for results of"
+msgstr "full silhouette is only available for results of"
+
+msgid "'clara(*, keep.data = TRUE)'"
+msgstr "'clara(*, keep.data = TRUE)'"
+
+msgid "'x' must only have integer codes"
+msgstr "‘x’ must only have integer codes"
+
+msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'"
+msgstr "Need either a dissimilarity ‘dist’ or diss.matrix ‘dmatrix’"
+
+msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'"
+msgstr "‘dmatrix’ is not a dissimilarity matrix compatible to ‘x’"
+
+msgid "clustering 'x' and dissimilarity 'dist' are incompatible"
+msgstr "clustering ‘x’ and dissimilarity ‘dist’ are incompatible"
+
+msgid "invalid silhouette structure"
+msgstr "invalid silhouette structure"
+
+msgid "invalid 'silhouette' object"
+msgstr "invalid ‘silhouette’ object"
+
+msgid "No valid silhouette information (#{clusters} =? 1)"
+msgstr "No valid silhouette information (#{clusters} =? 1)"
+
+msgid "setting 'logical' variable %s to type 'asymm'"
+msgid_plural "setting 'logical' variables %s to type 'asymm'"
+msgstr[0] "setting ‘logical’ variable %s to type ‘asymm’"
+msgstr[1] "setting ‘logical’ variables %s to type ‘asymm’"
diff --git a/po/R-fr.po b/po/R-fr.po
new file mode 100644
index 0000000..48b255e
--- /dev/null
+++ b/po/R-fr.po
@@ -0,0 +1,432 @@
+# Translation of src/library/Recommended/cluster/po/R-cluster.pot to German
+# Copyright (C) 2013 The R Foundation
+# This file is distributed under the same license as the R package.
+# Philippe.Grosjean@umons.ac.be, 2014--
+msgid ""
+msgstr ""
+"Project-Id-Version: cluster 1.14.5\n"
+"POT-Creation-Date: 2018-04-06 23:26\n"
+"PO-Revision-Date: 2014-03-30 09:03+0100\n"
+"Last-Translator: Philippe Grosjean <phgrosjean@sciviews.org>\n"
+"Language-Team: none\n"
+"Language: fr\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"X-Generator: Poedit 1.6.4\n"
+"Plural-Forms: nplurals=2; plural=(n > 1);\n"
+
+msgid "invalid clustering method"
+msgstr "méthode d'agrégation incorrecte"
+
+msgid "ambiguous clustering method"
+msgstr "méthode d'agrégation ambigüe"
+
+msgid "'par.method' must be of length 1, 3, or 4"
+msgstr "'par.method' doit être de longueur 1, 3 ou 4"
+
+msgid "NA-values in the dissimilarity matrix not allowed."
+msgstr ""
+"les valeurs manquantes (NA) ne sont pas autorisées dans la matrice de "
+"dissimilarité."
+
+msgid "'x' is not and cannot be converted to class \"dissimilarity\""
+msgstr ""
+"'x' n'est pas et ne peux pas être converti en un objet de classe "
+"\"dissimilarity\""
+
+msgid "x is not a numeric dataframe or matrix."
+msgstr ""
+"x n'est pas un tableau de données (data frame) ou une matrice numérique."
+
+msgid "need at least 2 objects to cluster"
+msgstr "au moins deux objets sont nécessaires pour effectuer une agrégation"
+
+msgid "No clustering performed, NA-values in the dissimilarity matrix."
+msgstr ""
+"Aucune agrégation n'est réalisée, présence de NAs dans la matrice de "
+"dissimilarité."
+
+msgid "'x' is a \"dist\" object, but should be a data matrix or frame"
+msgstr ""
+"'x' est un objet \"dist\", mais il faut une matrice ou un tableau de données"
+
+msgid "The number of cluster should be at least 1 and at most n-1."
+msgstr "Le nombre de groupes doit être compris entre 1 et n-1."
+
+msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)"
+msgstr "'sampsize' doit être au minimum %d = max(2, 1+ nombre de groupes)"
+
+msgid "'sampsize' = %d should not be larger than the number of objects, %d"
+msgstr "'sampsize' = %d ne peut être plus grand que le nombre d'objets, %d"
+
+msgid "'samples' should be at least 1"
+msgstr "'samples' doit valoir au moins 1"
+
+msgid "when 'medoids.x' is FALSE, 'keep.data' must be too"
+msgstr "lorsque 'medoids.x' est FALSE, 'keep.data' doit l'être aussi"
+
+msgid ""
+"Distance computations with NAs: using correct instead of pre-2016 wrong "
+"formula.\n"
+"Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' "
+"explicitly\n"
+"to suppress this warning."
+msgstr ""
+
+#, fuzzy
+msgid "invalid 'correct.d'"
+msgstr "objet 'twins' incorrect"
+
+msgid ""
+"Each of the random samples contains objects between which no distance can be "
+"computed."
+msgstr ""
+"Chacun des échantillons aléatoires contient des objets entre lesquels la "
+"distance ne peut être calculée."
+
+msgid ""
+"For each of the %d samples, at least one object was found which could not be "
+"assigned to a cluster (because of missing values)."
+msgstr ""
+"Dans chacun des %d échantillons, au moins un objet ne peut être assigné à un "
+"groupe (parce qu'il contient des valeurs manquantes)"
+
+msgid "invalid 'jstop' from .C(cl_clara,.):"
+msgstr "'jstop' incorrect obtenu de .C(cl_clara,.) :"
+
+msgid "'B' has to be a positive integer"
+msgstr "'B' doit être un entier positif"
+
+#, fuzzy
+msgid "invalid 'spaceH0':"
+msgstr "type incorrect"
+
+msgid "index has to be a function or a list of function"
+msgstr ""
+
+msgid "invalid 'twins' object"
+msgstr "objet 'twins' incorrect"
+
+msgid "x is not a dataframe or a numeric matrix."
+msgstr ""
+"x n'est pas un tableau de données (data frame) ou une matrice numérique."
+
+msgid "invalid %s; must be named list"
+msgstr "%s incorrect ; doit être une liste nommée"
+
+msgid "%s has invalid column names"
+msgstr "%s a des noms de colonnes incorrects"
+
+msgid "%s must be in 1:ncol(x)"
+msgstr "%s doit être compris dans 1:ncol(x)"
+
+msgid "%s must contain column names or numbers"
+msgstr "%s doit contenir des noms de colonnes ou des nombres"
+
+msgid "at least one binary variable has more than 2 levels."
+msgstr "au moins une des variables binaires a plus de deux niveaux."
+
+msgid "at least one binary variable has not 2 different levels."
+msgstr "au moins une variable binaire n'a pas deux nivea\tux."
+
+msgid "at least one binary variable has values not in {0,1,NA}"
+msgstr "au moins une variable binaire a des valeurs autres que {0,1,NA}"
+
+msgid "binary variable(s) %s treated as interval scaled"
+msgstr ""
+"la ou les variables binaires %s sont traitées comme intervalles standardisés"
+
+msgid "%s has constant columns %s; these are standardized to 0"
+msgstr "%s à des colonnes constantes %s ; elles sont standardisées à 0"
+
+msgid "with mixed variables, metric \"gower\" is used automatically"
+msgstr ""
+"avec des variables mélangées, la métrique \"gower\" est utilisée "
+"automatiquement"
+
+msgid "'weights' must be of length p (or 1)"
+msgstr "'weights' doit être de longueur p (ou 1)"
+
+msgid "invalid type %s for column numbers %s"
+msgstr "type inadéquat %s pour les numéros de colonnes %s"
+
+msgid "NA values in the dissimilarity matrix not allowed."
+msgstr ""
+"les valeurs manquantes (NA) ne sont pas admises dans la matrice de "
+"dissimilarité."
+
+msgid "No clustering performed, NA's in dissimilarity matrix."
+msgstr ""
+"Aucune agrégation n'est réalisée, NAs dans la matrice de dissimilarité."
+
+msgid "'x' must be numeric n x p matrix"
+msgstr "'x' doit être une matrice numérique n x p"
+
+msgid "omitting NAs"
+msgstr "valeurs NAs ignorées"
+
+msgid "no points without missing values"
+msgstr "aucun point sans valeurs manquantes"
+
+msgid "computed some negative or all 0 probabilities"
+msgstr "des probabilités négatives ou égales à zéro ont été calculées"
+
+msgid "algorithm possibly not converged in %d iterations"
+msgstr "l'algorithme n'a vraisemblablement pas convergé en %d itérations"
+
+msgid "'A' must be p x p cov-matrix defining an ellipsoid"
+msgstr "'A doit être une matrice de covariance p x p définissant un ellipsoïde"
+
+msgid "ellipsoidPoints() not yet implemented for p >= 3 dim."
+msgstr "ellipsoidPoints() non implémenté pour p >= 3 dim."
+
+msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}"
+msgstr "'k' (nombre de groupes) doit être {1,2,…, n/2 -1}"
+
+msgid "'memb.exp' must be a finite number > 1"
+msgstr "'memb.exp' doit être un nombre fini > 1"
+
+msgid "'maxit' must be non-negative integer"
+msgstr "'maxit' doit être un entier non négatif"
+
+msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1"
+msgstr "'iniMem.p' doit être une matrice n * k non négative avec rowSums == 1"
+
+msgid "FANNY algorithm has not converged in 'maxit' = %d iterations"
+msgstr "l'algorithme FANNY n'a pas convergé en 'maxit' = %d itérations"
+
+msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?"
+msgstr ""
+"les appartenances sont toutes très proches de 1/k. Essayez en diminuant "
+"'memb.exp' ?"
+
+msgid "'m', a membership matrix, must be nonnegative with rowSums == 1"
+msgstr ""
+"'m', une matrice d'appartenance, doit être non négative avec rowSums == 1"
+
+msgid "'n' must be >= 2"
+msgstr "'n\" doit être >= 2"
+
+msgid "x must be a matrix or data frame."
+msgstr "x doit être une matrice ou un tableau de données (data frame)."
+
+#, fuzzy
+msgid ""
+"All variables must be binary (e.g., a factor with 2 levels, both present)."
+msgstr ""
+"Toutes les variables doivent être binaires (c'est-à-dire, des variables "
+"facteur à 2 niveaux)."
+
+msgid "mona() needs at least p >= 2 variables (in current implementation)"
+msgstr ""
+
+msgid "No clustering performed, an object was found with all values missing."
+msgstr ""
+"Aucune agrégation n'a été effectuée, un objet a toutes ses valeurs "
+"manquantes."
+
+msgid ""
+"No clustering performed, found variable with more than half values missing."
+msgstr ""
+"Aucune agrégation n'a été effectuée, une variable a plus de la moitié de ses "
+"valeurs manquantes."
+
+msgid ""
+"No clustering performed, a variable was found with all non missing values "
+"identical."
+msgstr ""
+"Aucune agrégation n'a été effectuée, une variable a toutes ses valeurs non "
+"manquantes."
+
+msgid "No clustering performed, all variables have at least one missing value."
+msgstr ""
+"Aucune agrégation n'a été effectuée, toutes les variables ont au moins une "
+"valeur manquante."
+
+msgid "Cannot keep data when 'x' is a dissimilarity!"
+msgstr ""
+
+msgid "have %d observations, but not more than %d are allowed"
+msgstr ""
+
+msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2"
+msgstr "Le nombre de groupes 'k' doit être dans {1,2, …, n-1} ; où n >= 2"
+
+msgid ""
+"'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d"
+msgstr ""
+"'medoids' doit être NULL ou un vecteur de %d valeurs d'indice distinctes "
+"dans {1, 2, …, n}, n=%d"
+
+msgid "No clustering performed, NAs in the computed dissimilarity matrix."
+msgstr ""
+"Aucune agrégation n'a été effectuée, NAs dans la matrice de dissimilarité "
+"calculée."
+
+msgid "error from .C(cl_pam, *): invalid medID's"
+msgstr "erreur depuis .C(cl_pam, *) : medIDs incorrects"
+
+msgid "NA-values are not allowed in dist-like 'x'."
+msgstr ""
+"des valeurs manquantes NA ne sont pas autorisées dans 'x' de type dist."
+
+msgid "Distances must be result of dist or a square matrix."
+msgstr ""
+"Les distances doivent résulter d'un objet dist ou d'une matrice carrée."
+
+msgid "the square matrix is not symmetric."
+msgstr "la matrice carrée n'est pas symétrique."
+
+msgid ">>>>> funny case in clusplot.default() -- please report!"
+msgstr ""
+">>>>> cas pathologique dans clusplot.default() -- veuillez envoyer un "
+"rapport de bug !"
+
+msgid "x is not a data matrix"
+msgstr "x n'est pas une matrice de données"
+
+msgid "one or more objects contain only missing values"
+msgstr "un ou plusieurs objets ne contiennent que des valeurs manquantes"
+
+msgid "one or more variables contain only missing values"
+msgstr "une ou plusieurs variables ne contiennent que des valeurs manquantes"
+
+msgid ""
+"Missing values were displaced by the median of the corresponding variable(s)"
+msgstr ""
+"Les valeurs manquantes ont été remplacées par la médiane de la ou des "
+"variables correspondantes"
+
+msgid "x is not numeric"
+msgstr "x n'est pas numérique"
+
+msgid "The clustering vector is of incorrect length"
+msgstr "Le vecteur d'agrégation est de longueur incorrecte"
+
+msgid "NA-values are not allowed in clustering vector"
+msgstr ""
+"Les valeurs manquantes NA ne sont pas autorisées dans le vecteur d'agrégation"
+
+msgid ""
+"Error in Fortran routine for the spanning ellipsoid,\n"
+" rank problem??"
+msgstr ""
+"Erreur dans la routine Fortran pour obtenir l'ellipsoïde de dispersion,\n"
+" problème de rang??"
+
+msgid "'col.clus' should have length 4 when color is TRUE"
+msgstr "'col.clus' doit avoir une longueur de 4 lorsque color est TRUE"
+
+msgid "no diss nor data found, nor the original argument of %s"
+msgstr "pas de diss ou de données trouvées, même pas l'argument original de %s"
+
+msgid "no diss nor data found for clusplot()'"
+msgstr "pas de diss ou de données trouvées pour clusplot()'"
+
+msgid "invalid partition object"
+msgstr "objet de partitionnement incorrect"
+
+msgid ""
+"full silhouette is only available for results of 'clara(*, keep.data = TRUE)'"
+msgstr ""
+"la silhouette complète n'est disponible que pour les résultats de 'clara(*, "
+"keep.data = TRUE)'"
+
+msgid "'x' must only have integer codes"
+msgstr "'x' doit n'avoir que des codes entiers"
+
+msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'"
+msgstr ""
+"Il faut soit un objet 'dist' de dissimilarité ou une matrice de "
+"dissimilarité 'dmatrix'"
+
+msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'"
+msgstr "'dmatrix' n'est pas une matrice de dissimilarité compatible avec 'x'"
+
+msgid "clustering 'x' and dissimilarity 'dist' are incompatible"
+msgstr ""
+"l'agrégation 'x' et la matrice de dissimilarité 'dist' sont incompatibles"
+
+msgid "invalid silhouette structure"
+msgstr "structure de silhouette incorrecte"
+
+msgid "invalid 'silhouette' object"
+msgstr "objet 'silhouette' incorrect"
+
+msgid "No valid silhouette information (#{clusters} =? 1)"
+msgstr "Aucune valeur de silhouette n'est correcte (#{groupes} =? 1)"
+
+msgid "Observation %s has *only* NAs --> omit it for clustering"
+msgid_plural "Observations %s have *only* NAs --> omit them for clustering!"
+msgstr[0] "L'observation %s n'a *que* des NAs --> ignorée pour le regroupement"
+msgstr[1] ""
+"Les observations %s n'ont *que* des NAs --> ignorées pour le regroupement!"
+
+msgid "%d observation (%s) has *only* NAs --> omit them for clustering!"
+msgid_plural ""
+"%d observations (%s ...) have *only* NAs --> omit them for clustering!"
+msgstr[0] ""
+"%d observation (%s) n'a *que* des NAs --> ignorée pour le regroupement!"
+msgstr[1] ""
+"%d observations (%s) n'ont *que* des NAs --> ignorées pour le regroupement!"
+
+msgid "setting 'logical' variable %s to type 'asymm'"
+msgid_plural "setting 'logical' variables %s to type 'asymm'"
+msgstr[0] "la variable 'logical' %s est transformée en type 'asymm'"
+msgstr[1] "les variable 'logical' %s sont transformées en type 'asymm'"
+
+#~ msgid "NAdiss"
+#~ msgstr "NAdiss"
+
+#~ msgid "non.diss"
+#~ msgstr "non.diss"
+
+#~ msgid "no distance can be computed."
+#~ msgstr "aucune distance n'a été calculée."
+
+#~ msgid "For each of the"
+#~ msgstr "Pour chacun des"
+
+#~ msgid ""
+#~ "samples, at least one object was found which\n"
+#~ " could not"
+#~ msgstr ""
+#~ "échantillons, au moins un objet a été trouvé qui\n"
+#~ " ne peut"
+
+#~ msgid "be assigned to a cluster (because of missing values)."
+#~ msgstr "être assigné à un groupe (à cause de valeurs manquantes)."
+
+#~ msgid "invalid"
+#~ msgstr "incorrect"
+
+#~ msgid "type"
+#~ msgstr "type"
+
+#~ msgid "type$"
+#~ msgstr "type$"
+
+#~ msgid "binary variable(s)"
+#~ msgstr "variable(s) binaire(s)"
+
+#~ msgid "x"
+#~ msgstr "x"
+
+#~ msgid "has constant columns"
+#~ msgstr "a des colonnes constantes"
+
+#~ msgid "possibly not converged in"
+#~ msgstr "probablement pas de convergence en"
+
+#~ msgid "iterations"
+#~ msgstr "itérations"
+
+#~ msgid "'medoids' must be NULL or vector of"
+#~ msgstr "'medoids' doit être NULL ou un vecteur de"
+
+#~ msgid "rank problem??"
+#~ msgstr "problème de rang ??"
+
+#~ msgid "'clara(*, keep.data = TRUE)'"
+#~ msgstr "'clara(*, keep.data = TRUE)'"
diff --git a/po/R-ko.po b/po/R-ko.po
new file mode 100644
index 0000000..b351542
--- /dev/null
+++ b/po/R-ko.po
@@ -0,0 +1,393 @@
+# Korean translations for cluster package.
+# Recommended/cluster/po/R-ko.po
+# Maintainer: Martin Maechler <maechler@stat.math.ethz.ch>
+#
+# This file is distributed under the same license as the R cluster package.
+# Chel Hee Lee <chl948@mail.usask.ca>, 2013-2015.
+#
+# Reviewing process is completed (15-JAN-2015).
+# The original source code is reviewed (26-JAN-2015).
+# QC: PASS
+# Freezing on 06-FEB-2015 for R-3.1.3
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: cluster 1.15.2\n"
+"POT-Creation-Date: 2018-04-06 23:26\n"
+"PO-Revision-Date: 2015-02-06 21:56-0600\n"
+"Last-Translator:Chel Hee Lee <chl948@mail.usask.ca>\n"
+"Language-Team: Chel Hee Lee <chl948@mail.usask.ca>\n"
+"Language: ko\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+
+msgid "invalid clustering method"
+msgstr "군집방법(clustering method)의 이름이 올바르지 않습니다."
+
+msgid "ambiguous clustering method"
+msgstr "불분명한 군집방법(clustering method)입니다."
+
+msgid "'par.method' must be of length 1, 3, or 4"
+msgstr "'par.method'의 길이는 반드시 1, 3, 또는 4이어야 합니다."
+
+msgid "NA-values in the dissimilarity matrix not allowed."
+msgstr "NA의 값은 비유사성 행렬(dissimilarity matrix)에 사용될 수 없습니다."
+
+msgid "'x' is not and cannot be converted to class \"dissimilarity\""
+msgstr ""
+"'x'는 \"dissimilarity\"이라는 클래스가 아니거나 클래스 \"dissimilarity\"로 전"
+"환할 수 없습니다."
+
+msgid "x is not a numeric dataframe or matrix."
+msgstr "x는 수치형 데이터 프레임 또는 행렬이 아닙니다."
+
+msgid "need at least 2 objects to cluster"
+msgstr "군집(cluster)는 적어도 2개의 객체를 필요로 합니다."
+
+msgid "No clustering performed, NA-values in the dissimilarity matrix."
+msgstr ""
+"비유사성 행렬(dissimilarity matrix)에서 NA 값이 발견되었기 때문에 군집화 과정"
+"이 실행되지 않았습니다."
+
+msgid "'x' is a \"dist\" object, but should be a data matrix or frame"
+msgstr ""
+"'x'는 클래스 \"dist\"를 가지는 객체이지만, 데이터 행렬 또는 프레임이어야 합니"
+"다."
+
+msgid "The number of cluster should be at least 1 and at most n-1."
+msgstr ""
+"군집(cluster)의 개수는 적어도 1 이상이며 최대 n-1 이내에 있어야 합니다."
+
+msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)"
+msgstr "'sampsize'는 최소 %d = max(2, 1 + 군집의 개수)가 되어야 합니다."
+
+msgid "'sampsize' = %d should not be larger than the number of objects, %d"
+msgstr "'sampsize' = %1$d는 객체의 개수 %2$d보다 클 수 없습니다."
+
+msgid "'samples' should be at least 1"
+msgstr "'samples'는 적어도 1 이상 이어야 합니다."
+
+msgid "when 'medoids.x' is FALSE, 'keep.data' must be too"
+msgstr "'medoids.x'가 FALSE인 경우에는 'keep.data' 역시 FALSE이어야 합니다."
+
+msgid ""
+"Distance computations with NAs: using correct instead of pre-2016 wrong "
+"formula.\n"
+"Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' "
+"explicitly\n"
+"to suppress this warning."
+msgstr ""
+
+#, fuzzy
+msgid "invalid 'correct.d'"
+msgstr "올바른 'twins' 객체가 아닙니다."
+
+msgid ""
+"Each of the random samples contains objects between which no distance can be "
+"computed."
+msgstr ""
+"각각의 무작위 표본은 서로간의 거리를 계산할 수 없는 객체들을 포함하고 있습니"
+"다."
+
+msgid ""
+"For each of the %d samples, at least one object was found which could not be "
+"assigned to a cluster (because of missing values)."
+msgstr ""
+"%d개의 표본 각각에 대해서 결측값으로 인하여 어느 군집에도 배정할 수 없는 객체"
+"를 적어도 하나 이상 발견하였습니다."
+
+msgid "invalid 'jstop' from .C(cl_clara,.):"
+msgstr ""
+".C(cl_clara,.)으로부터 얻어진 'jstop'는 다음과 같은 이유로 이상합니다: "
+
+msgid "'B' has to be a positive integer"
+msgstr "'B'는 반드시 양의 정수이어야 합니다."
+
+#, fuzzy
+msgid "invalid 'spaceH0':"
+msgstr "올바른 'twins' 객체가 아닙니다."
+
+msgid "index has to be a function or a list of function"
+msgstr ""
+
+msgid "invalid 'twins' object"
+msgstr "올바른 'twins' 객체가 아닙니다."
+
+msgid "x is not a dataframe or a numeric matrix."
+msgstr "x는 데이터 프레임이 아니거나 수치형 행렬이 아닙니다."
+
+msgid "invalid %s; must be named list"
+msgstr ""
+"사용할 수 있는 %s가 아닙니다. 반드시 구성요소에 이름이 부여된 리스트(named "
+"list)이여야 합니다."
+
+msgid "%s has invalid column names"
+msgstr "%s는 올바른 열이름을 가지고 있지 않습니다."
+
+msgid "%s must be in 1:ncol(x)"
+msgstr "%s는 반드시 1:ncol(x)내에 있어야 합니다."
+
+msgid "%s must contain column names or numbers"
+msgstr "%s는 반드시 열 이름 또는 번호를 포함해야 합니다."
+
+msgid "at least one binary variable has more than 2 levels."
+msgstr ""
+"적어도 하나 이상의 이항변수(binary variable)가 두 가지 이상의 수준(levels)을 "
+"가지고 있습니다."
+
+msgid "at least one binary variable has not 2 different levels."
+msgstr ""
+"적어도 하나 이상의 이항변수(binary variable)이 서로 다른 두 가지 수준을 가지"
+"고 있지 않습니다."
+
+msgid "at least one binary variable has values not in {0,1,NA}"
+msgstr ""
+"적어도 하나 이상의 이항변수(binary variable)이 {0,1,NA} 외의 값을 가지고 있습"
+"니다."
+
+msgid "binary variable(s) %s treated as interval scaled"
+msgstr ""
+"이항변수(binary variable) %s는 구간척도(interval scale)로서 다루어집니다. "
+
+msgid "%s has constant columns %s; these are standardized to 0"
+msgstr ""
+"%1$s는 상수(constant)값을 가지는 열 %2$s를 가집니다. 이들은 0으로 표준화"
+"(standardized)됩니다."
+
+msgid "with mixed variables, metric \"gower\" is used automatically"
+msgstr ""
+"혼합형 변수(mixed variables)를 이용할 때는 metric은 자동으로 \"gower\"가 사용"
+"됩니다."
+
+msgid "'weights' must be of length p (or 1)"
+msgstr "'weights'의 길이는 반드시 p (또는 1)이어야 합니다."
+
+msgid "invalid type %s for column numbers %s"
+msgstr "행번호 %2$s에 잘못된 유형(type) %1$s이 주어졌습니다."
+
+msgid "NA values in the dissimilarity matrix not allowed."
+msgstr "비유사성 행렬(dissimilarity matrix)는 NA 값을 가질 수 없습니다."
+
+msgid "No clustering performed, NA's in dissimilarity matrix."
+msgstr ""
+"비유사성 행렬(dissimilarity matrix)에 NA가 있기 때문에, 군집화 과정이 실행되"
+"지 않았습니다."
+
+msgid "'x' must be numeric n x p matrix"
+msgstr "'x'는 반드시 크기가 n x p인 수치형 행렬이어야 합니다."
+
+msgid "omitting NAs"
+msgstr "NA를 삭제합니다."
+
+msgid "no points without missing values"
+msgstr "결측값들을 제외하면 사용가능한 포인트들이 없습니다."
+
+msgid "computed some negative or all 0 probabilities"
+msgstr "확률값이 모두 0이거나 일부가 음수로 산출되었습니다."
+
+msgid "algorithm possibly not converged in %d iterations"
+msgstr "알고리즘의 %d번의 반복수행에도 수렴하지 않을 수 있습니다."
+
+msgid "'A' must be p x p cov-matrix defining an ellipsoid"
+msgstr ""
+"'A'는 반드시 타원(ellipsoid)를 정의하는 크기가 p x p인 공분산행렬(cov-matrix)"
+"이어야 합니다."
+
+msgid "ellipsoidPoints() not yet implemented for p >= 3 dim."
+msgstr "ellipsoidPoints()는 p >= 3 인경우에는 아직 구현되지 않았습니다."
+
+msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}"
+msgstr "'k' (군집의 개수)는 반드시 {1,2, .., n/2 -1} 내에 존재해야 합니다."
+
+msgid "'memb.exp' must be a finite number > 1"
+msgstr "'memb.exp'는 반드시 1보다 큰 유한한(finite) 숫자이어야 합니다."
+
+msgid "'maxit' must be non-negative integer"
+msgstr "'maxit'은 반드시 음이 아닌 정수이어야 합니다."
+
+msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1"
+msgstr ""
+"'iniMem.p'는 반드시 크기가 n * k 인 비음수 행렬(nonnegative matrix)이어야 하"
+"며, 이 행렬의 rowSums == 1 이어야 합니다."
+
+msgid "FANNY algorithm has not converged in 'maxit' = %d iterations"
+msgstr "FANNY 알고리즘은 'maxit' = %d번의 반복수행에도 수렴하지 않았습니다."
+
+msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?"
+msgstr ""
+"멤버쉽(membership) 전부가 1/k에 매우 가깝습니다. 아마도 'memb.exp'를 줄여보"
+"는 것은 어떨까요?"
+
+msgid "'m', a membership matrix, must be nonnegative with rowSums == 1"
+msgstr ""
+"멤버쉽 행렬(membership matrix) 'm'은 반드시 음수를 가지지 않으며 rowSums == 1"
+"이어야 합니다."
+
+msgid "'n' must be >= 2"
+msgstr "'n'는 반드시 2보다 크거나 같아야 합니다."
+
+msgid "x must be a matrix or data frame."
+msgstr "x는 반드시 행렬 또는 데이터 프레임이어야 합니다."
+
+#, fuzzy
+msgid ""
+"All variables must be binary (e.g., a factor with 2 levels, both present)."
+msgstr ""
+"모든 변수들은 반드시 2개의 수준(levels)으로 이루어진 요인(factor)이어야 합니"
+"다."
+
+msgid "mona() needs at least p >= 2 variables (in current implementation)"
+msgstr ""
+
+msgid "No clustering performed, an object was found with all values missing."
+msgstr "모든 값이 결측된 객체가 발견되어 군집화 과정이 수행되지 않았습니다."
+
+msgid ""
+"No clustering performed, found variable with more than half values missing."
+msgstr ""
+"절반 이상의 값들이 결측된 변수가 발견되어 군집화 과정이 수행되지 않았습니다."
+
+msgid ""
+"No clustering performed, a variable was found with all non missing values "
+"identical."
+msgstr ""
+"결측되지 않은 모든 값들이 동일한 변수가 발견되어 군집화 과정이 수행되지 않았"
+"습니다."
+
+msgid "No clustering performed, all variables have at least one missing value."
+msgstr ""
+"모든 변수들이 적어도 하나 이상의 결측값을 가지기 때문에 군집화 과정이 수행되"
+"지 않았습니다."
+
+msgid "Cannot keep data when 'x' is a dissimilarity!"
+msgstr ""
+
+msgid "have %d observations, but not more than %d are allowed"
+msgstr ""
+
+msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2"
+msgstr ""
+"군집(clusters)의 개수 'k'는 반드시 {1,2, .., n-1}내에 존재해야 하므로 n >= 2 "
+"입니다."
+
+msgid ""
+"'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d"
+msgstr ""
+"'medoids'는 반드시 NULL 또는 {1,2, .., n}으로부터 %1$d개의 구분되는 인덱스를 "
+"가진 벡터입니다 (n=%2$d). "
+
+msgid "No clustering performed, NAs in the computed dissimilarity matrix."
+msgstr ""
+"계산된 비유사성 행렬(dissimilarity matrix) 내에 NA가 존재하여 군집화 과정이 "
+"수행되지 않았습니다."
+
+msgid "error from .C(cl_pam, *): invalid medID's"
+msgstr ".C(cl_pam, *)으로부터 에러가 발생했습니다: medID가 올바르지 않습니다."
+
+msgid "NA-values are not allowed in dist-like 'x'."
+msgstr "'x'는 NA를 가질 수 없습니다."
+
+msgid "Distances must be result of dist or a square matrix."
+msgstr ""
+"거리(distances)는 반드시 dist 또는 정방행렬(square matrix)의 결과이어야 합니"
+"다."
+
+msgid "the square matrix is not symmetric."
+msgstr "대칭(symmetric)적인 정방행렬이 아닙니다."
+
+msgid ">>>>> funny case in clusplot.default() -- please report!"
+msgstr ""
+">>>>> clusplot.default()에서 예상치 못한 경우가 발생했습니다 -- 보고해 주시"
+"길 부탁드립니다!"
+
+msgid "x is not a data matrix"
+msgstr "x는 데이터 행렬(data matrix)이 아닙니다."
+
+msgid "one or more objects contain only missing values"
+msgstr "하나 또는 그 이상의 객체들이 오로지 결측값만을 포함하고 있습니다."
+
+msgid "one or more variables contain only missing values"
+msgstr "하나 또는 그 이상의 변수들이 오로지 결측값만을 포함하고 있습니다."
+
+msgid ""
+"Missing values were displaced by the median of the corresponding variable(s)"
+msgstr "결측값들은 대응변수(들)의 중앙값으로 대체되었습니다."
+
+msgid "x is not numeric"
+msgstr "x는 수치형(numeric)이 아닙니다."
+
+msgid "The clustering vector is of incorrect length"
+msgstr "군집벡터(clustering vector)의 길이가 올바르지 않습니다."
+
+msgid "NA-values are not allowed in clustering vector"
+msgstr "군집벡터(clustering vector)에서는 NA가 허용되지 않습니다."
+
+msgid ""
+"Error in Fortran routine for the spanning ellipsoid,\n"
+" rank problem??"
+msgstr ""
+"스패닝 타원(spanning ellipsoid)를 생성하는 포트란 루틴(Fortran routine)에서 "
+"에러가 발생했습니다. \n"
+" 위수(rank) 문제인가요??"
+
+msgid "'col.clus' should have length 4 when color is TRUE"
+msgstr "color가 TRUE일 때, 'col.clus'의 길이는 반드시 4이어야 합니다."
+
+msgid "no diss nor data found, nor the original argument of %s"
+msgstr ""
+"diss와 data 모두 찾을 수 없을 뿐만 아니라 원래의 인자 %s 또한 찾을 수 없습니"
+"다."
+
+msgid "no diss nor data found for clusplot()'"
+msgstr "clusplot()에 사용될 diss와 data 모두 찾을 수 없습니다."
+
+msgid "invalid partition object"
+msgstr "partition 객체가 유효하지 않습니다."
+
+msgid ""
+"full silhouette is only available for results of 'clara(*, keep.data = TRUE)'"
+msgstr ""
+"full silhouette는 'clara(*, keep.data = TRUE)'의 결과만에 오로지 사용할 수 있"
+"습니다."
+
+msgid "'x' must only have integer codes"
+msgstr "'x'는 오로지 정수형 코드(codes)만을 가질 수 있습니다."
+
+msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'"
+msgstr ""
+"dissimilarity 'dist' 또는 diss.matrix 'dmatrix' 둘 중에 하나가 필요합니다."
+
+msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'"
+msgstr ""
+"'dmatrix'는 'x'에 부합하는 (또는 사용할 수 있는) 비유사성 행렬(dissimilarity "
+"matrix)이 아닙니다."
+
+msgid "clustering 'x' and dissimilarity 'dist' are incompatible"
+msgstr "'x'와 'dist'가 서로 부합하지 않습니다."
+
+msgid "invalid silhouette structure"
+msgstr "silhouette 구조가 올바르지 않습니다."
+
+msgid "invalid 'silhouette' object"
+msgstr "'silhouette' 객체가 올바르지 않습니다."
+
+msgid "No valid silhouette information (#{clusters} =? 1)"
+msgstr "유효한 silhouette 정보가 없습니다 (#{clusters} =? 1)"
+
+msgid "Observation %s has *only* NAs --> omit it for clustering"
+msgid_plural "Observations %s have *only* NAs --> omit them for clustering!"
+msgstr[0] ""
+"관측값 %s는 *오로지* NA만을 가집니다 --> 군집화를 위하여 이것들을 제거합니다!"
+
+msgid "%d observation (%s) has *only* NAs --> omit them for clustering!"
+msgid_plural ""
+"%d observations (%s ...) have *only* NAs --> omit them for clustering!"
+msgstr[0] ""
+"%d개의 관측값들이 (%s) *오로지* NA만을 가집니다 --> 군집화를 위하여 이들을 제"
+"거합니다!"
+
+msgid "setting 'logical' variable %s to type 'asymm'"
+msgid_plural "setting 'logical' variables %s to type 'asymm'"
+msgstr[0] "'logical' 변수 %s를 유형(type) 'asymm'으로 설정합니다."
diff --git a/po/R-pl.po b/po/R-pl.po
new file mode 100644
index 0000000..e5ef8f0
--- /dev/null
+++ b/po/R-pl.po
@@ -0,0 +1,1144 @@
+msgid ""
+msgstr ""
+"Project-Id-Version: cluster 1.15.1\n"
+"Report-Msgid-Bugs-To: bugs.r-project.org\n"
+"POT-Creation-Date: 2018-04-06 23:26\n"
+"PO-Revision-Date: 2014-03-27 17:34+0100\n"
+"Last-Translator: Łukasz Daniel <lukasz.daniel@gmail.com>\n"
+"Language-Team: Łukasz Daniel <lukasz.daniel@gmail.com>\n"
+"Language: pl_PL\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 "
+"|| n%100>=20) ? 1 : 2);\n"
+"X-Poedit-SourceCharset: iso-8859-1\n"
+"X-Generator: Poedit 1.5.4\n"
+
+# cluster/R/agnes.R: 10
+# stop("invalid clustering method")
+msgid "invalid clustering method"
+msgstr "niepoprawna metoda grupowania"
+
+# cluster/R/agnes.R: 11
+# stop("ambiguous clustering method")
+msgid "ambiguous clustering method"
+msgstr "niejednoznaczna metoda grupowania"
+
+# cluster/R/agnes.R: 22
+# stop("'par.method' must be of length 1, 3, or 4")
+msgid "'par.method' must be of length 1, 3, or 4"
+msgstr "'par.method' musi być długości 1, 3, lub 4"
+
+# cluster/R/agnes.R: 28
+# stop("NA values in the dissimilarity matrix not allowed.")
+# cluster/R/diana.R: 11
+# stop("NA values in the dissimilarity matrix not allowed.")
+# cluster/R/pam.R: 13
+# stop("NA values in the dissimilarity matrix not allowed.")
+# cluster/R/fanny.R: 12
+# stop("NA values in the dissimilarity matrix not allowed.")
+msgid "NA-values in the dissimilarity matrix not allowed."
+msgstr "wartości NA w macierzy różnic nie są dozwolone."
+
+# cluster/R/agnes.R: 35
+# stop(gettextf("%s is not and cannot be converted to class \"dissimilarity\"", dataname))
+# cluster/R/diana.R: 18
+# stop(gettextf("%s is not and cannot be converted to class \"dissimilarity\"", dataname))
+# cluster/R/pam.R: 20
+# stop(gettextf("%s is not and cannot be converted to class \"dissimilarity\"", dataname))
+# cluster/R/fanny.R: 19
+# stop(gettextf("%s is not and cannot be converted to class \"dissimilarity\"", dataname))
+msgid "'x' is not and cannot be converted to class \"dissimilarity\""
+msgstr ""
+"argument 'x' nie jest i nie może być przekształcony na obiekt klasy "
+"\"dissimilarity\""
+
+# cluster/R/agnes.R: 53
+# stop(gettextf("%s is not a numeric dataframe or matrix.", dataname))
+# cluster/R/clara.R: 15
+# stop(gettextf("%s is not a numeric dataframe or matrix.", dataname))
+# cluster/R/diana.R: 36
+# stop(gettextf("%s is not a numeric dataframe or matrix.", dataname))
+# cluster/R/pam.R: 40
+# stop(gettextf("%s is not a numeric dataframe or matrix.", dataname))
+# cluster/R/fanny.R: 37
+# stop(gettextf("%s is not a numeric dataframe or matrix.", dataname))
+msgid "x is not a numeric dataframe or matrix."
+msgstr "argument 'x' nie jest ramką liczbową ani też macierzą"
+
+# cluster/R/agnes.R: 68
+# stop("need at least 2 objects to cluster")
+msgid "need at least 2 objects to cluster"
+msgstr "potrzeba co najmniej 2 obiektów do grupowania"
+
+# cluster/R/agnes.R: 92
+# stop("No clustering performed, NA values in the dissimilarity matrix.", "\n", sep = "" )
+# cluster/R/fanny.R: 120
+# stop("No clustering performed, NA values in the dissimilarity matrix.")
+msgid "No clustering performed, NA-values in the dissimilarity matrix."
+msgstr "Nie wykonano grupowania, wartości NA w macierzy różnic."
+
+# cluster/R/clara.R: 13
+# stop(gettextf("%s is a \"dist\" object, but should be a data matrix or frame", dataname))
+msgid "'x' is a \"dist\" object, but should be a data matrix or frame"
+msgstr "'x' jest obiektem klasy \"dist\", ale powinien być macierzą lub ramką"
+
+# cluster/R/clara.R: 18
+# stop("The number of cluster should be at least 1 and at most n-1." )
+msgid "The number of cluster should be at least 1 and at most n-1."
+msgstr "Liczba grup powinna wynosić conajmniej 1 oraz co najwyżej n-1."
+
+# cluster/R/clara.R: 20
+# stop(gettextf("'sampsize' should be at least %d = max(2, 1+ number of clusters)", max(2,k+1)), domain = "R-cluster")
+msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)"
+msgstr "'sampsize' powinien być co najmniej %d = max(2, 1+ liczba grup)"
+
+# cluster/R/clara.R: 22
+# stop(gettextf("'sampsize' = %d should not be larger than the number of objects, %d", sampsize, n), domain = "R-cluster")
+msgid "'sampsize' = %d should not be larger than the number of objects, %d"
+msgstr "'sampsize' = %d nie powinien być większy niż liczba obiektów, %d"
+
+# cluster/R/clara.R: 24
+# stop("'samples' should be at least 1")
+msgid "'samples' should be at least 1"
+msgstr "'samples' powinno wynosić przynajmniej 1"
+
+# cluster/R/clara.R: 32
+# stop("when 'medoids.x' is FALSE, 'keep.data' must be too")
+msgid "when 'medoids.x' is FALSE, 'keep.data' must be too"
+msgstr "kiedy 'medoids.x' jest FALSE, 'keep.data' musi być również FALSE"
+
+msgid ""
+"Distance computations with NAs: using correct instead of pre-2016 wrong "
+"formula.\n"
+"Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' "
+"explicitly\n"
+"to suppress this warning."
+msgstr ""
+
+# cluster/R/coef.R: 10
+# stop("invalid 'twins' object")
+#, fuzzy
+msgid "invalid 'correct.d'"
+msgstr "niepoprawny obiekt 'twins'"
+
+# cluster/R/clara.R: 96
+# stop("Each of the random samples contains objects between which no distance can be computed.")
+msgid ""
+"Each of the random samples contains objects between which no distance can be "
+"computed."
+msgstr ""
+"Każda z losowych próbek zawiera obiekty pomiędzy którymi żadna odległość nie "
+"może być obliczona."
+
+# cluster/R/clara.R: 98
+# stop(gettextf("For each of the %d samples, at least one object was found which could not be assigned to a cluster (because of missing values).", samples))
+msgid ""
+"For each of the %d samples, at least one object was found which could not be "
+"assigned to a cluster (because of missing values)."
+msgstr ""
+"Dla każdej z %d próbek, co najmniej jeden obiekt został znaleziony, który "
+"nie mógł być przypisany do grupy (z uwagi na brakujące wartości)."
+
+# cluster/R/clara.R: 100
+# stop(gettextf("invalid 'jstop' from .C(cl_clara,.): %s", res$jstop))
+msgid "invalid 'jstop' from .C(cl_clara,.):"
+msgstr "niepoprawny 'jstop' z '.C(cl_clara,.)':"
+
+# cluster/R/clusGap.R: 20
+# stop("'B' has to be a positive integer")
+msgid "'B' has to be a positive integer"
+msgstr "'B' musi być dodatnią liczbą całkowitą"
+
+#, fuzzy
+msgid "invalid 'spaceH0':"
+msgstr "niepoprawny typ"
+
+msgid "index has to be a function or a list of function"
+msgstr ""
+
+# cluster/R/coef.R: 10
+# stop("invalid 'twins' object")
+msgid "invalid 'twins' object"
+msgstr "niepoprawny obiekt 'twins'"
+
+# cluster/R/daisy.R: 8
+# stop(gettextf("%s is not a dataframe or a numeric matrix.", dataname))
+msgid "x is not a dataframe or a numeric matrix."
+msgstr "argument 'x' nie jest ramką danych ani też macierzą liczbową"
+
+# cluster/R/daisy.R: 15
+# stop(gettextf("invalid %s; must be named list", sQuote("type")))
+msgid "invalid %s; must be named list"
+msgstr "niepoprawne %s; musi być nazwaną listą"
+
+# cluster/R/daisy.R: 21
+# stop(gettextf("%s has invalid column names", paste0("type$", nt)))
+msgid "%s has invalid column names"
+msgstr "%s posiada niepoprawne nazwy kolumn"
+
+# cluster/R/daisy.R: 25
+# stop(gettextf("%s must be in 1:ncol(x)", paste0("type$", nt)))
+msgid "%s must be in 1:ncol(x)"
+msgstr "%s musi być w przedziale 1:ncol(x)"
+
+# cluster/R/daisy.R: 27
+# stop(gettextf("%s must contain column names or numbers", paste0("type$", nt)))
+msgid "%s must contain column names or numbers"
+msgstr "%s musi zawierać nazwy kolumn lub liczby"
+
+# cluster/R/daisy.R: 38
+# stop("at least one binary variable has more than 2 levels.")
+msgid "at least one binary variable has more than 2 levels."
+msgstr "przynajmniej jedna zmienna binarna posiada więcej niż 2 poziomy."
+
+# cluster/R/daisy.R: 40
+# warning("at least one binary variable has not 2 different levels.")
+msgid "at least one binary variable has not 2 different levels."
+msgstr "przynajmniej jedna zmienna binarna nie posiada 2 różnych poziomów."
+
+# cluster/R/daisy.R: 48
+# stop("at least one binary variable has values not in {0,1,NA}")
+msgid "at least one binary variable has values not in {0,1,NA}"
+msgstr "przynajmniej jedna zmienna binarna posiada wartości poza {0, 1, NA}"
+
+# cluster/R/daisy.R: 71
+# warning(gettextf("binary variable(s) %s treated as interval scaled", pColl(which(tI)[iBin])))
+msgid "binary variable(s) %s treated as interval scaled"
+msgstr "zmienne binarne %s traktowane jako interwał zostały przeskalowane"
+
+# cluster/R/daisy.R: 92
+# warning(gettextf("%s has constant columns %s; these are standardized to 0", sQuote("x"), pColl(which(sx == 0))))
+msgid "%s has constant columns %s; these are standardized to 0"
+msgstr "%s posiada stałe kolumny %s; zostały one ustandaryzowane do zera"
+
+# cluster/R/daisy.R: 102
+# warning("with mixed variables, metric \"gower\" is used automatically")
+msgid "with mixed variables, metric \"gower\" is used automatically"
+msgstr "z mieszanymi zmiennymi, metryka 'gower' jest używana automatycznie"
+
+# cluster/R/daisy.R: 117
+# stop("'weights' must be of length p (or 1)")
+msgid "'weights' must be of length p (or 1)"
+msgstr "'weights' musi być o długości 'p' (lub 1)"
+
+# cluster/R/daisy.R: 125
+# stop(gettextf("invalid type %s for column numbers %s", type2[ina], pColl(which(is.na))))
+msgid "invalid type %s for column numbers %s"
+msgstr "niepoprawny typ %s dla liczb kolumn %s"
+
+# cluster/R/agnes.R: 28
+# stop("NA values in the dissimilarity matrix not allowed.")
+# cluster/R/diana.R: 11
+# stop("NA values in the dissimilarity matrix not allowed.")
+# cluster/R/pam.R: 13
+# stop("NA values in the dissimilarity matrix not allowed.")
+# cluster/R/fanny.R: 12
+# stop("NA values in the dissimilarity matrix not allowed.")
+msgid "NA values in the dissimilarity matrix not allowed."
+msgstr "wartości NA w macierzy różnic nie są dozwolone."
+
+# cluster/R/diana.R: 76
+# stop("No clustering performed, NA's in dissimilarity matrix.\n")
+msgid "No clustering performed, NA's in dissimilarity matrix."
+msgstr "Nie wykonano grupowania, wartości NA w macierzy różnic"
+
+# cluster/R/ellipsoidhull.R: 14
+# stop("'x' must be numeric n x p matrix")
+msgid "'x' must be numeric n x p matrix"
+msgstr "'x' musi być liczbową macierzą n x p"
+
+# cluster/R/ellipsoidhull.R: 16
+# warning("omitting NAs")
+msgid "omitting NAs"
+msgstr "pomijanie wartości NA"
+
+# cluster/R/ellipsoidhull.R: 20
+# stop("no points without missing values")
+msgid "no points without missing values"
+msgstr "brak punktów bez brakujących wartości"
+
+# cluster/R/ellipsoidhull.R: 39
+# stop("computed some negative or all 0 probabilities")
+msgid "computed some negative or all 0 probabilities"
+msgstr ""
+"niektóre wyliczone prawdopodobieństwa są ujemne lub wszystkie są zerami"
+
+# cluster/R/fanny.R: 107
+# warning(gettextf(
+# "FANNY algorithm has not converged in 'maxit' = %d iterations",
+# maxit))
+msgid "algorithm possibly not converged in %d iterations"
+msgstr "algorytm prawdopodobnie nie uzbieżnił się w %d iteracjach"
+
+# cluster/R/ellipsoidhull.R: 92
+# stop("'A' must be p x p cov-matrix defining an ellipsoid")
+msgid "'A' must be p x p cov-matrix defining an ellipsoid"
+msgstr "'A' musi być macierzą kowariancji p x p określającą elipsoidę"
+
+# cluster/R/ellipsoidhull.R: 106
+# stop("ellipsoidPoints() not yet implemented for p >= 3 dim.")
+msgid "ellipsoidPoints() not yet implemented for p >= 3 dim."
+msgstr ""
+"'ellipsoidPoints()' nie została jeszcze zaimplementowana dla p >= 3 wymiary."
+
+# cluster/R/fanny.R: 55
+# stop("'k' (number of clusters) must be in {1,2, .., n/2 -1}")
+msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}"
+msgstr "'k' (liczba grup) musi mieścić się w przedziale {1,2, .., n/2 -1}"
+
+# cluster/R/fanny.R: 58
+# stop("'memb.exp' must be a finite number > 1")
+msgid "'memb.exp' must be a finite number > 1"
+msgstr "'memb.exp' musi być skończoną liczbą > 1"
+
+# cluster/R/fanny.R: 60
+# stop("'maxit' must be non-negative integer")
+msgid "'maxit' must be non-negative integer"
+msgstr "'maxit' musi być nieujemną liczbą całkowitą"
+
+# cluster/R/fanny.R: 69
+# stop("'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1")
+msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1"
+msgstr "'iniMem.p' musi być nieujemną maceirzą n x k z rowSums == 1"
+
+# cluster/R/fanny.R: 107
+# warning(gettextf(
+# "FANNY algorithm has not converged in 'maxit' = %d iterations",
+# maxit))
+msgid "FANNY algorithm has not converged in 'maxit' = %d iterations"
+msgstr "algorytm FANNY nie uzbieżnił się w 'maxit' = %d iteracjach"
+
+# cluster/R/fanny.R: 144
+# warning("the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?")
+msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?"
+msgstr "przynależności są bardzo bliskie 1/k. Może zmniejszyć 'memb.exp'?"
+
+# cluster/R/fanny.R: 241
+# stop("'m', a membership matrix, must be nonnegative with rowSums == 1")
+msgid "'m', a membership matrix, must be nonnegative with rowSums == 1"
+msgstr "macierz przynależności 'm' musi być nieujemna z rowSums == 1"
+
+# cluster/R/internal.R: 18
+# stop("'n' argument must be >= 2")
+# cluster/R/internal.R: 26
+# stop("'n' argument must be >= 2")
+msgid "'n' must be >= 2"
+msgstr "argument 'n' musi być >= 2"
+
+# cluster/R/mona.R: 6
+# stop("'x' must be a matrix or data frame.")
+msgid "x must be a matrix or data frame."
+msgstr "argument 'x' musi być macierzą lub ramką danych."
+
+# cluster/R/mona.R: 10
+# stop("All variables must be binary (factor with 2 levels).")
+#, fuzzy
+msgid ""
+"All variables must be binary (e.g., a factor with 2 levels, both present)."
+msgstr "Wszystkie zmienne muszą być binarne (czynnik z dwoma poziomami)"
+
+msgid "mona() needs at least p >= 2 variables (in current implementation)"
+msgstr ""
+
+# cluster/R/mona.R: 40
+# stop("No clustering performed, an object was found with all values missing.")
+msgid "No clustering performed, an object was found with all values missing."
+msgstr ""
+"Nie wykonano grupowania, znaleziono obiekt któremu brakowało wszystkich "
+"wartości."
+
+# cluster/R/mona.R: 40
+# stop("No clustering performed, an object was found with all values missing.")
+msgid ""
+"No clustering performed, found variable with more than half values missing."
+msgstr ""
+"Nie wykonano grupowania, znaleziono obiekt któremu brakowało wszystkich "
+"wartości."
+
+# cluster/R/mona.R: 44
+# stop("No clustering performed, a variable was found with all non missing values identical.")
+msgid ""
+"No clustering performed, a variable was found with all non missing values "
+"identical."
+msgstr ""
+"Nie wykonano grupowania, znaleziono zmienną z identycznymi niebrakującymi "
+"wartościami."
+
+# cluster/R/mona.R: 46
+# stop("No clustering performed, all variables have at least one missing value.")
+msgid "No clustering performed, all variables have at least one missing value."
+msgstr ""
+"Nie wykonano grupowania, wszystkie zmienne mają co najmniej jedną brakującą "
+"wartość."
+
+msgid "Cannot keep data when 'x' is a dissimilarity!"
+msgstr ""
+
+msgid "have %d observations, but not more than %d are allowed"
+msgstr ""
+
+# cluster/R/pam.R: 56
+# stop("Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2")
+msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2"
+msgstr ""
+"Liczba grup 'k' musi zawierać się w zbiorze {1,2, .., n-1}; tak więc n >= 2"
+
+# cluster/R/pam.R: 64
+# stop(gettextf("'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d", k, n))
+msgid ""
+"'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d"
+msgstr ""
+"argument 'medoids' musi być wartością NULL lub wektorem %d różnych indeksów "
+"w {1,2, .., n}, n=%d"
+
+# cluster/R/pam.R: 109
+# stop("No clustering performed, NAs in the computed dissimilarity matrix.")
+msgid "No clustering performed, NAs in the computed dissimilarity matrix."
+msgstr "Nie wykonano grupowania, wyliczono wartości NA w macierzy różnic."
+
+# cluster/R/pam.R: 116
+# stop("error from .C(cl_pam, *): invalid medID's")
+msgid "error from .C(cl_pam, *): invalid medID's"
+msgstr "błąd w '.C(cl_pam, *)': niepoprawne 'medID'"
+
+# cluster/R/plotpart.R: 70
+# stop("NA values are not allowed in dist-like 'x'.")
+msgid "NA-values are not allowed in dist-like 'x'."
+msgstr "wartości NA nie są dozwolone w 'x' typu odległości."
+
+# cluster/R/plotpart.R: 79
+# stop("Distances must be result of dist or a square matrix.")
+msgid "Distances must be result of dist or a square matrix."
+msgstr "Odległości muszą być wynikiem 'dist' lub macierzy kwadratowej."
+
+# cluster/R/plotpart.R: 81
+# stop("the square matrix is not symmetric.")
+msgid "the square matrix is not symmetric."
+msgstr "macierz kwadratowa nie jest symetryczna."
+
+# cluster/R/plotpart.R: 94
+# warning(">>>>> funny case in clusplot.default() -- please report!\n")
+msgid ">>>>> funny case in clusplot.default() -- please report!"
+msgstr ""
+">>>>> zabawny przypadek w 'clusplot.default()' -- proszę zgłosić raport!"
+
+# cluster/R/plotpart.R: 116
+# stop("'x' is not a data matrix")
+msgid "x is not a data matrix"
+msgstr "argument 'x' nie jest macierzą danych"
+
+# cluster/R/plotpart.R: 120
+# stop("one or more objects contain only missing values")
+msgid "one or more objects contain only missing values"
+msgstr "jeden lub więcej obiektów zawierają jedynie wartości brakujące"
+
+# cluster/R/plotpart.R: 122
+# stop("one or more variables contain only missing values")
+msgid "one or more variables contain only missing values"
+msgstr "jeden lub więcej zmiennych zawiera jedynie wartości brakujące"
+
+# cluster/R/plotpart.R: 125
+# message("Missing values were displaced by the median of the corresponding variable(s)")
+msgid ""
+"Missing values were displaced by the median of the corresponding variable(s)"
+msgstr ""
+"Brakujące wartości zostały zastąpione przez medianę odpowiednich zmiennych"
+
+# cluster/R/plotpart.R: 164
+# stop("'x' is not numeric")
+msgid "x is not numeric"
+msgstr "argument 'x' nie jest liczbą"
+
+# cluster/R/plotpart.R: 174
+# stop("The clustering vector is of incorrect length")
+msgid "The clustering vector is of incorrect length"
+msgstr "Wektor grupujący posiada niepoprawną długość"
+
+# cluster/R/plotpart.R: 177
+# stop("NA values are not allowed in clustering vector")
+msgid "NA-values are not allowed in clustering vector"
+msgstr "wartości NA są niedozwolone w wektorze grupującym"
+
+# cluster/R/plotpart.R: 303
+# warning("Error in Fortran routine for the spanning ellipsoid,\n rank problem??")
+msgid ""
+"Error in Fortran routine for the spanning ellipsoid,\n"
+" rank problem??"
+msgstr ""
+"Błąd w procedurze Fortran dla elipsoidy obejmującej,\n"
+" problem rang?"
+
+# cluster/R/plotpart.R: 353
+# stop("'col.clus' argument should have length 4 when color is TRUE")
+msgid "'col.clus' should have length 4 when color is TRUE"
+msgstr ""
+"argument 'col.clus' powinien mieć długość 4, gdy 'color' ma wartość TRUE"
+
+# cluster/R/plotpart.R: 508
+# stop(gettextf("no diss nor data found, nor the original argument of %s", deparse(x$call)))
+msgid "no diss nor data found, nor the original argument of %s"
+msgstr "nie znaleziono różnic ani danych, ani oryginalnego argumentu %s"
+
+# cluster/R/plotpart.R: 514
+# stop("no diss nor data found for 'clusplot()' function")
+msgid "no diss nor data found for clusplot()'"
+msgstr "nie znaleziono różnic ani danych dla funkcji 'clusplot()'"
+
+# cluster/R/silhouette.R: 7
+# stop("invalid partition object")
+msgid "invalid partition object"
+msgstr "niepoprawny obiekt podziału"
+
+# cluster/R/silhouette.R: 21
+# stop("full silhouette is only available for results of 'clara(*, keep.data = TRUE)'")
+msgid ""
+"full silhouette is only available for results of 'clara(*, keep.data = TRUE)'"
+msgstr ""
+"pełna sylwetka jest dostępna jedynie dla wyników 'clara(*, keep.data = TRUE)'"
+
+# cluster/R/silhouette.R: 35
+# stop("'x' must only have integer codes")
+# cluster/R/silhouette.R: 82
+# stop("'x' must only have integer codes")
+msgid "'x' must only have integer codes"
+msgstr "'x' musi posiadać tylko kody będące liczbami całkowitymi"
+
+# cluster/R/silhouette.R: 42
+# stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'")
+# cluster/R/silhouette.R: 94
+# stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'")
+msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'"
+msgstr "Potrzeba albo różnic 'dist' lub diss.matrix 'dmatrix'"
+
+# cluster/R/silhouette.R: 44
+# stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'")
+# cluster/R/silhouette.R: 96
+# stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'")
+msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'"
+msgstr "'dmatrix' nie jest macierzą różnic kompatybilną z 'x'"
+
+# cluster/R/silhouette.R: 48
+# stop("clustering 'x' and dissimilarity 'dist' are incompatible")
+# cluster/R/silhouette.R: 100
+# stop("clustering 'x' and dissimilarity 'dist' are incompatible")
+msgid "clustering 'x' and dissimilarity 'dist' are incompatible"
+msgstr "grupowane 'x' oraz różnice 'dist' nie są kompatybilne"
+
+# cluster/R/silhouette.R: 134
+# stop("invalid silhouette structure")
+msgid "invalid silhouette structure"
+msgstr "niepoprana struktura 'silhouette'"
+
+# cluster/R/silhouette.R: 158
+# stop("invalid 'silhouette' object")
+msgid "invalid 'silhouette' object"
+msgstr "niepoprawny obiekt 'silhouette'"
+
+# cluster/R/silhouette.R: 210
+# stop("No valid silhouette information (#{clusters} =? 1)")
+msgid "No valid silhouette information (#{clusters} =? 1)"
+msgstr "Brak poprawnej informacji o sylwetce (czy liczba grup =? 1)"
+
+# cluster/R/clara.R: 91
+# stop(sprintf(ngettext(nNA, "Observation %s has *only* NAs --> omit it for clustering", "Observations %s have *only* NAs --> omit them for clustering!", domain = "R-cluster"), pasteC(i)), domain = NA)
+msgid "Observation %s has *only* NAs --> omit it for clustering"
+msgid_plural "Observations %s have *only* NAs --> omit them for clustering!"
+msgstr[0] ""
+"Obserwacja %s posiada *tylko* wartości NA --> pomijanie jej w grupowaniu"
+msgstr[1] ""
+"Obserwacje %s posiadają *tylko* wartości NA --> pomijanie ich w grupowaniu"
+msgstr[2] ""
+"Obserwacje %s posiadają *tylko* wartości NA --> pomijanie ich w grupowaniu"
+
+# cluster/R/clara.R: 93
+# stop(sprintf(ngettext(nNA, "%d observation (%s) has *only* NAs --> omit them for clustering!", "%d observations (%s ...) have *only* NAs --> omit them for clustering!", domain = "R-cluster"), nNA, pasteC(i[1:12])), domain = NA)
+msgid "%d observation (%s) has *only* NAs --> omit them for clustering!"
+msgid_plural ""
+"%d observations (%s ...) have *only* NAs --> omit them for clustering!"
+msgstr[0] ""
+"%d obserwacja (%s) posiada *tylko* wartości NA --> pomijanie jej w grupowaniu"
+msgstr[1] ""
+"%d obserwacje (%s ...) posiadają *tylko* wartości NA --> pomijanie ich w "
+"grupowaniu"
+msgstr[2] ""
+"%d obserwacji (%s ...) posiadają *tylko* wartości NA --> pomijanie ich w "
+"grupowaniu"
+
+# cluster/R/daisy.R: 76
+# warning(sprintf(ngettext(sum(ilog),
+# "setting 'logical' variable %s to type 'asymm'",
+# "setting 'logical' variables %s to type 'asymm'", domain = "R-cluster"),
+# pColl(which(ilog))), domain = NA)
+msgid "setting 'logical' variable %s to type 'asymm'"
+msgid_plural "setting 'logical' variables %s to type 'asymm'"
+msgstr[0] "ustawianie zmiennej 'logical' %s na tym 'asymm'"
+msgstr[1] "ustawianie zmiennych 'logical' %s na tym 'asymm'"
+msgstr[2] "ustawianie zmiennych 'logical' %s na tym 'asymm'"
+
+#~ msgid "NAdiss"
+#~ msgstr "NAdiss"
+
+#~ msgid "non.diss"
+#~ msgstr "non.diss"
+
+#~ msgid "no distance can be computed."
+#~ msgstr "żadna odległość nie może zostać obliczona."
+
+#~ msgid "For each of the"
+#~ msgstr "Dla każdej z"
+
+#~ msgid ""
+#~ "samples, at least one object was found which\n"
+#~ " could not"
+#~ msgstr "próbek, co najmniej jeden obiekt został znaleziony, który nie mógł"
+
+#~ msgid "be assigned to a cluster (because of missing values)."
+#~ msgstr "być przypisany do grupy (z powodu brakujących wartości)."
+
+#~ msgid "invalid"
+#~ msgstr "niepoprawny argument"
+
+#~ msgid "type"
+#~ msgstr "type"
+
+#~ msgid "type$"
+#~ msgstr "type$"
+
+#~ msgid "binary variable(s)"
+#~ msgstr "zmienne binarne"
+
+#~ msgid "x"
+#~ msgstr "x"
+
+#~ msgid "has constant columns"
+#~ msgstr "posiada stałe kolumny"
+
+# cluster/R/ellipsoidhull.R: 42
+# warning(gettextf("algorithm possibly not converged in %d iterations", maxit))
+#~ msgid "possibly not converged in"
+#~ msgstr "algorytm prawdopodobnie nie uzbieżnił się w"
+
+# cluster/man/plot.mona.Rd: 9
+# gettext("Separation step", domain = "R-cluster")
+# cluster/R/plothier.R: 199
+# gettext("Separation step", domain = "R-cluster")
+#~ msgid "iterations"
+#~ msgstr "iteracjach"
+
+# cluster/R/pam.R: 64
+# stop(gettextf("'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d", k, n))
+#~ msgid "'medoids' must be NULL or vector of"
+#~ msgstr "'medoids' musi być wartością NULL lub wektorem"
+
+#~ msgid "rank problem??"
+#~ msgstr "problem rang?"
+
+#~ msgid "'clara(*, keep.data = TRUE)'"
+#~ msgstr "'clara(*, keep.data = TRUE)'"
+
+# cluster/R/agnes.R: 135
+# gettext("Call: ", domain = "R-cluster")
+# cluster/R/clara.R: 141
+# gettext("Call: ", domain = "R-cluster")
+#~ msgid "Call:"
+#~ msgstr "Wywołanie:"
+
+# cluster/R/agnes.R: 136
+# gettext("Agglomerative coefficient: ", domain = "R-cluster")
+# cluster/R/agnes.R: 149
+# gettext("Agglomerative coefficient: ", domain = "R-cluster")
+#~ msgid "Agglomerative coefficient:"
+#~ msgstr "Współczynnik aglomeracyjny:"
+
+# cluster/R/agnes.R: 137
+# gettext("Order of objects:", domain = "R-cluster")
+# cluster/R/agnes.R: 150
+# gettext("Order of objects:", domain = "R-cluster")
+# cluster/R/mona.R: 75
+# gettext("Order of objects:", domain = "R-cluster")
+# cluster/R/diana.R: 115
+# gettext("Order of objects:", domain = "R-cluster")
+# cluster/R/diana.R: 136
+# gettext("Order of objects:", domain = "R-cluster")
+#~ msgid "Order of objects:"
+#~ msgstr "Kolejność (rząd) obiektów:"
+
+# cluster/R/agnes.R: 140
+# gettext("Height (summary):", domain = "R-cluster")
+#~ msgid "Height (summary):"
+#~ msgstr "Wysokość (podsumowanie):"
+
+# cluster/R/agnes.R: 141
+# gettext("Available components:", domain = "R-cluster")
+# cluster/R/agnes.R: 158
+# gettext("Available components:", domain = "R-cluster")
+# cluster/R/mona.R: 82
+# gettext("Available components:", domain = "R-cluster")
+# cluster/R/clara.R: 147
+# gettext("Available components:", domain = "R-cluster")
+# cluster/R/clara.R: 179
+# gettext("Available components:", domain = "R-cluster")
+# cluster/R/diana.R: 122
+# gettext("Available components:", domain = "R-cluster")
+# cluster/R/diana.R: 143
+# gettext("Available components:", domain = "R-cluster")
+# cluster/R/pam.R: 183
+# gettext("Available components:", domain = "R-cluster")
+# cluster/R/pam.R: 213
+# gettext("Available components:", domain = "R-cluster")
+# cluster/R/fanny.R: 189
+# gettext("Available components:", domain = "R-cluster")
+# cluster/R/fanny.R: 214
+# gettext("Available components:", domain = "R-cluster")
+#~ msgid "Available components:"
+#~ msgstr "Dostępne komponenty:"
+
+# cluster/R/agnes.R: 148
+# gettext("Object of class 'agnes' from call:", domain = "R-cluster")
+#~ msgid "Object of class 'agnes' from call:"
+#~ msgstr "Obiekt klasy \"agnes\" z wywołania:"
+
+# cluster/R/agnes.R: 153
+# gettext("Merge:", domain = "R-cluster")
+# cluster/R/diana.R: 113
+# gettext("Merge:", domain = "R-cluster")
+# cluster/R/diana.R: 135
+# gettext("Merge:", domain = "R-cluster")
+#~ msgid "Merge:"
+#~ msgstr "Złączenie:"
+
+# cluster/R/agnes.R: 154
+# gettext("Height:", domain = "R-cluster")
+# cluster/R/diana.R: 118
+# gettext("Height:", domain = "R-cluster")
+# cluster/R/diana.R: 138
+# gettext("Height:", domain = "R-cluster")
+#~ msgid "Height:"
+#~ msgstr "Wysokość:"
+
+# cluster/R/clara.R: 48
+# gettextf("calling .C(cl_clara, ..., DUP = %s):", doDUP, domain = "R-cluster")
+#~ msgid "calling .C(cl_clara, ..., DUP = %s):"
+#~ msgstr "wywoływanie .C(cl_clara, ..., DUP = %s):"
+
+# cluster/R/clara.R: 142
+# gettext("Medoids:", domain = "R-cluster")
+# cluster/R/clara.R: 160
+# gettext("Medoids:", domain = "R-cluster")
+# cluster/R/pam.R: 175
+# gettext("Medoids:", domain = "R-cluster")
+#~ msgid "Medoids:"
+#~ msgstr "Medoidy:"
+
+# cluster/R/clara.R: 143
+# gettext("Objective function:", domain = "R-cluster")
+# cluster/R/clara.R: 161
+# gettext("Objective function:", domain = "R-cluster")
+# cluster/R/pam.R: 177
+# gettext("Objective function:", domain = "R-cluster")
+#~ msgid "Objective function:"
+#~ msgstr "Funkcja celu:"
+
+# cluster/R/clara.R: 144
+# gettext("Clustering vector:", domain = "R-cluster")
+# cluster/R/clara.R: 171
+# gettext("Clustering vector:", domain = "R-cluster")
+# cluster/R/pam.R: 176
+# gettext("Clustering vector:", domain = "R-cluster")
+#~ msgid "Clustering vector:"
+#~ msgstr "Wektor grupujący:"
+
+# cluster/R/clara.R: 145
+# gettext("Cluster sizes:", domain = "R-cluster")
+#~ msgid "Cluster sizes:"
+#~ msgstr "Rozmiary grup:"
+
+# cluster/R/clara.R: 146
+# gettext("Best sample:", domain = "R-cluster")
+# cluster/R/clara.R: 170
+# gettext("Best sample:", domain = "R-cluster")
+#~ msgid "Best sample:"
+#~ msgstr "Najlepsza próbka:"
+
+# cluster/R/clara.R: 159
+# gettext("Object of class 'clara' from call:", domain = "R-cluster")
+#~ msgid "Object of class 'clara' from call:"
+#~ msgstr "Obiekt klasy \"clara\" z wywołania:"
+
+# cluster/R/clara.R: 162
+# gettext("Numerical information per cluster:", domain = "R-cluster")
+# cluster/R/pam.R: 197
+# gettext("Numerical information per cluster:", domain = "R-cluster")
+#~ msgid "Numerical information per cluster:"
+#~ msgstr "Numeryczna informacja na grupę:"
+
+# cluster/R/clara.R: 165
+# gettext("Average silhouette width per cluster:", domain = "R-cluster")
+# cluster/R/pam.R: 205
+# gettext("Average silhouette width per cluster:", domain = "R-cluster")
+# cluster/R/fanny.R: 206
+# gettext("Average silhouette width per cluster:", domain = "R-cluster")
+#~ msgid "Average silhouette width per cluster:"
+#~ msgstr "Przeciętna szerokość sylwetki na grupę:"
+
+# cluster/R/clara.R: 167
+# gettext("Average silhouette width of best sample: ", domain = "R-cluster")
+#~ msgid "Average silhouette width of best sample:"
+#~ msgstr "Przeciętna szerokość sylwetki dla najlepszej próbki:"
+
+# cluster/R/clara.R: 173
+# gettext("Silhouette plot information for best sample:", domain = "R-cluster")
+#~ msgid "Silhouette plot information for best sample:"
+#~ msgstr "Informacja o wykresie sylwetki dla najlepszej próbki:"
+
+# cluster/R/clusGap.R: 33
+# gettextf("Clustering k = 1,2,..., K.max (= %d): .. ", K.max, domain = "R-cluster")
+#~ msgid "Clustering k = 1,2,..., K.max (= %d): .."
+#~ msgstr "Grupowanie k = 1,2,..., K.max (= %d): .."
+
+# cluster/R/clusGap.R: 36
+# gettext("done", domain = "R-cluster")
+#~ msgid "done"
+#~ msgstr "wykonano"
+
+# cluster/R/clusGap.R: 46
+# gettextf("Bootstrapping, b = 1,2,..., B (= %d) [one \".\" per sample]:", B, domain = "R-cluster")
+#~ msgid "Bootstrapping, b = 1,2,..., B (= %d) [one \".\" per sample]:"
+#~ msgstr "Bootstrapowanie, b = 1,2,..., B (= %d) [jeden \".\" na próbkę]:"
+
+# cluster/R/clusGap.R: 127
+# gettext("Clustering Gap statistic [\"clusGap\"].", domain = "R-cluster")
+#~ msgid "Clustering Gap statistic [\"clusGap\"]."
+#~ msgstr "Statystyka przerwy grupowania [\"clusGap\"]."
+
+# cluster/R/clusGap.R: 128
+# gettextf("B=%d simulated reference sets, k = 1..%d", x$B, K, domain = "R-cluster")
+#~ msgid "B=%d simulated reference sets, k = 1..%d"
+#~ msgstr "B=%d symulowane zbiory referencyjne, k = 1..%d"
+
+# cluster/R/clusGap.R: 132
+# gettextf(" --> Number of clusters (method '%s', SE.factor=%g): %d", method, SE.factor, nc, domain = "R-cluster")
+#~ msgid "--> Number of clusters (method '%s', SE.factor=%g): %d"
+#~ msgstr "--> Liczba grup (metoda '%s', SE.factor=%g): %d"
+
+# cluster/R/clusGap.R: 134
+# gettextf(" --> Number of clusters (method '%s'): %d", method, nc, domain = "R-cluster")
+#~ msgid "--> Number of clusters (method '%s'): %d"
+#~ msgstr "--> Liczba grup (metoda '%s'): %d"
+
+# cluster/R/daisy.R: 157
+# gettext("NA values in the dissimilarity matrix!")
+#~ msgid "NA values in the dissimilarity matrix!"
+#~ msgstr "Wartości NA w macierzy odmienności!"
+
+# cluster/R/daisy.R: 171
+# gettext("Dissimilarities:", domain = "R-cluster")
+#~ msgid "Dissimilarities:"
+#~ msgstr "Odmienności:"
+
+# cluster/R/daisy.R: 178
+# gettext("Warning: ", domain = "R-cluster")
+# cluster/R/daisy.R: 208
+# gettext("Warning: ", domain = "R-cluster")
+#~ msgid "Warning:"
+#~ msgstr "Ostrzeżenie:"
+
+# cluster/R/daisy.R: 179
+# gettext("Metric: ", domain = "R-cluster")
+# cluster/R/daisy.R: 202
+# gettext("Metric: ", domain = "R-cluster")
+#~ msgid "Metric:"
+#~ msgstr "Metryka:"
+
+# cluster/R/daisy.R: 181
+# gettextf("Types = %s", paste(aT, collapse = ", "), domain = "R-cluster")
+# cluster/R/daisy.R: 204
+# gettextf("Types = %s", paste(aT, collapse = ", "), domain = "R-cluster")
+#~ msgid "Types = %s"
+#~ msgstr "Typy = %s"
+
+# cluster/R/daisy.R: 183
+# gettext("Number of objects:", domain = "R-cluster")
+# cluster/R/daisy.R: 206
+# gettext("Number of objects:", domain = "R-cluster")
+#~ msgid "Number of objects:"
+#~ msgstr "Liczba obiektów:"
+
+# cluster/R/diana.R: 120
+# gettext("Divisive coefficient:", domain = "R-cluster")
+# cluster/R/diana.R: 139
+# gettext("Divisive coefficient:", domain = "R-cluster")
+#~ msgid "Divisive coefficient:"
+#~ msgstr "Współczynnik podziału:"
+
+# cluster/R/ellipsoidhull.R: 37
+# gettext("Error in Fortran routine computing the spanning ellipsoid. Probably collinear data", domain = "R-cluster")
+#~ msgid ""
+#~ "Error in Fortran routine computing the spanning ellipsoid. Probably "
+#~ "collinear data"
+#~ msgstr ""
+#~ "Błąd w procedurze Fortran dla elipsoidy obejmującej, prawdopodobnie "
+#~ "współliniowe dane"
+
+#~ msgid ""
+#~ "ellipsoid in %d dimensions:\n"
+#~ " center = (%s); squared ave.radius d^2 = %s\n"
+#~ " and shape matrix ="
+#~ msgstr ""
+#~ "elipsoida w %d wymiarach:\n"
+#~ " centrum = (%s); kwadrat przeciętnego promienia d^2 = %s\n"
+#~ " oraz macierz kształtu ="
+
+# cluster/R/ellipsoidhull.R: 69
+# gettextf(" ellipsoid's area = %s", format(volume(x), digits=digits), domain = "R-cluster")
+#~ msgid "ellipsoid's area = %s"
+#~ msgstr "powierzchnia elipsoidy = %s"
+
+# cluster/R/ellipsoidhull.R: 70
+# gettextf(" ellipsoid's volume = %s", format(volume(x), digits=digits), domain = "R-cluster")
+#~ msgid "ellipsoid's volume = %s"
+#~ msgstr "objętość elipsoidy = %s"
+
+# cluster/R/ellipsoidhull.R: 73
+# gettext("** Warning: ** the algorithm did not terminate reliably!\n most probably because of collinear data", domain = "R-cluster")
+#~ msgid ""
+#~ "** Warning: ** the algorithm did not terminate reliably!\n"
+#~ " most probably because of collinear data"
+#~ msgstr ""
+#~ "** Ostrzeżenie: ** algorytm nie zakończył się w sposób wiarygodny!\n"
+#~ " prawdopodobnie z powodu wspóliniowych danych"
+
+# cluster/R/ellipsoidhull.R: 75
+# gettext("** Warning: ** the algorithm did not terminate reliably!\n (in the available number of iterations)", domain = "R-cluster")
+#~ msgid ""
+#~ "** Warning: ** the algorithm did not terminate reliably!\n"
+#~ " (in the available number of iterations)"
+#~ msgstr ""
+#~ "** Ostrzeżenie: ** algorytm nie zakończył się w sposób wiarygodny!\n"
+#~ " (w dostępnej liczbie iteracji)"
+
+# cluster/R/fanny.R: 172
+# gettext("Fuzzy Clustering object of class 'fanny': ", domain = "R-cluster")
+#~ msgid "Fuzzy Clustering object of class 'fanny':"
+#~ msgstr "Obiekt rozmytego grupowania klasy \"fanny\":"
+
+# cluster/R/fanny.R: 179
+# gettext("Membership coefficients (in percent, rounded):", domain = "R-cluster")
+#~ msgid "Membership coefficients (in percent, rounded):"
+#~ msgstr "Współczynnik członkostwa (w procentach, zaokrąglony):"
+
+# cluster/R/fanny.R: 180
+# gettext("Fuzzyness coefficients:", domain = "R-cluster")
+#~ msgid "Fuzzyness coefficients:"
+#~ msgstr "Współczynniki rozmycia:"
+
+# cluster/R/fanny.R: 181
+# gettext("Closest hard clustering:", domain = "R-cluster")
+#~ msgid "Closest hard clustering:"
+#~ msgstr "Najbliższe twarde grupowanie:"
+
+# cluster/R/fanny.R: 183
+# gettextf("k_crisp (= %d) < k !!", x$k.crisp, domain = "R-cluster")
+#~ msgid "k_crisp (= %d) < k !!"
+#~ msgstr "k_crisp (= %d) < k !!"
+
+# cluster/R/pam.R: 203
+# gettext("Silhouette plot information:", domain = "R-cluster")
+# cluster/R/fanny.R: 204
+# gettext("Silhouette plot information:", domain = "R-cluster")
+#~ msgid "Silhouette plot information:"
+#~ msgstr "Informacje o wykresie sylwetek:"
+
+# cluster/R/pam.R: 207
+# gettext("Average silhouette width of total data set:", domain = "R-cluster")
+# cluster/R/fanny.R: 208
+# gettext("Average silhouette width of total data set:", domain = "R-cluster")
+#~ msgid "Average silhouette width of total data set:"
+#~ msgstr "Przeciętna szerokość sylwetki pełnego zbioru danych:"
+
+# cluster/R/mona.R: 42
+# stop("No clustering performed, a variable was found with at least 50 percent missing values.")
+#~ msgid ""
+#~ "No clustering performed, a variable was found with at least 50 percent "
+#~ "missing values."
+#~ msgstr ""
+#~ "Nie wykonano grupowania, znaleziono zmienną z co najmniej 50 procent "
+#~ "brakujących wartości."
+
+# cluster/R/mona.R: 73
+# gettext("Revised data:", domain = "R-cluster")
+#~ msgid "Revised data:"
+#~ msgstr "Przeglądnięte dane:"
+
+# cluster/R/mona.R: 78
+# gettext("Variable used:", domain = "R-cluster")
+#~ msgid "Variable used:"
+#~ msgstr "Użyte zmienne:"
+
+# cluster/R/mona.R: 80
+# gettext("Separation step:", domain = "R-cluster")
+#~ msgid "Separation step:"
+#~ msgstr "Krok separacji:"
+
+# cluster/R/pam.R: 198
+# gettext("Isolated clusters:", domain = "R-cluster")
+#~ msgid "Isolated clusters:"
+#~ msgstr "Izolowane grupy:"
+
+#~ msgid "L-clusters:"
+#~ msgstr "L-grupy:"
+
+# cluster/R/pam.R: 200
+# gettext(" L*-clusters: ", domain = "R-cluster")
+#~ msgid "L*-clusters:"
+#~ msgstr "L*-grupy:"
+
+# cluster/R/plothier.R: 6
+# gettextf("Dendrogram of %s", paste(deparse(x$call), collapse = ""), domain = "R-cluster")
+# cluster/R/plothier.R: 98
+# gettextf("Dendrogram of %s", cl, domain = "R-cluster")
+# cluster/R/plothier.R: 153
+# gettextf("Dendrogram of %s", cl, domain = "R-cluster")
+#~ msgid "Dendrogram of %s"
+#~ msgstr "Dendrogram %s"
+
+# cluster/man/pltree.twins.Rd: 11
+# gettext("Height", domain = "R-cluster")
+# cluster/man/bannerplot.Rd: 11
+# gettext("Height", domain = "R-cluster")
+# cluster/R/plothier.R: 7
+# gettext("Height", domain = "R-cluster")
+# cluster/R/plothier.R: 24
+# gettext("Height", domain = "R-cluster")
+#~ msgid "Height"
+#~ msgstr "Wysokość"
+
+# cluster/R/plothier.R: 91
+# gettextf("Agglomerative Coefficient = %s", round(x$ac, digits = 2), domain = "R-cluster")
+#~ msgid "Agglomerative Coefficient = %s"
+#~ msgstr "Współczynnik aglomeracyjny = %s"
+
+# cluster/R/plothier.R: 97
+# gettextf("Banner of %s", cl, domain = "R-cluster")
+# cluster/R/plothier.R: 152
+# gettextf("Banner of %s", cl, domain = "R-cluster")
+# cluster/R/plothier.R: 198
+# gettextf("Banner of %s", deparse(x$call), domain = "R-cluster")
+#~ msgid "Banner of %s"
+#~ msgstr "Baner %s"
+
+# cluster/R/plothier.R: 113
+# gettext("Make a plot selection (or 0 to exit):", domain = "R-cluster")
+# cluster/R/plothier.R: 168
+# gettext("Make a plot selection (or 0 to exit):", domain = "R-cluster")
+# cluster/R/plotpart.R: 26
+# gettext("Make a plot selection (or 0 to exit):", domain = "R-cluster")
+#~ msgid "Make a plot selection (or 0 to exit):"
+#~ msgstr "Wybierz wykres (lub 0 aby wyjść):"
+
+# cluster/R/plothier.R: 146
+# gettextf("Divisive Coefficient = %s", round(x$dc, digits = 2), domain = "R-cluster")
+#~ msgid "Divisive Coefficient = %s"
+#~ msgstr "Współczynnik podziału = %s"
+
+# cluster/R/plotpart.R: 154
+# gettextf("CLUSPLOT(%s)", deparse(substitute(x)))
+#~ msgid "CLUSPLOT(%s)"
+#~ msgstr "CLUSPLOT(%s)"
+
+# cluster/R/plotpart.R: 155
+# gettextf("These two components explain %s percent of the point variability.", round(100 * var.dec, digits = 2))
+#~ msgid "These two components explain %s percent of the point variability."
+#~ msgstr "Te dwa komponenty wyjaśniają %s procent zmienności punktu."
+
+# cluster/man/clusplot.default.Rd: 23
+# gettext("Component 1", domain = "R-cluster")
+# cluster/R/plotpart.R: 156
+# gettext("Component 1", domain = "R-cluster")
+#~ msgid "Component 1"
+#~ msgstr "Komponent 1"
+
+#~ msgid "Component 2"
+#~ msgstr "Komponent 2"
+
+# cluster/R/plotpart.R: 198
+# gettextf("cluster %d has only one observation ..", i, domain = "R-cluster")
+#~ msgid "cluster %d has only one observation .."
+#~ msgstr "grupa %d ma tylko jedną obserwację .."
+
+# cluster/R/plotpart.R: 286
+# gettext("span & rank2 : calling \"spannel\" ..", domain = "R-cluster")
+#~ msgid "span & rank2 : calling \"spannel\" .."
+#~ msgstr "span & rank2 : wywoływanie \"spannel\" .."
+
+# cluster/R/silhouette.R: 178
+# gettextf("Silhouette of %d units in %d clusters from %s:", sum(csiz), k, deparse(x$call), domain = "R-cluster")
+# cluster/R/silhouette.R: 181
+# gettextf("Silhouette of %d units in %d clusters from %s:", sum(csiz), k, deparse(x$call), domain = "R-cluster")
+#~ msgid "Silhouette of %d units in %d clusters from %s:"
+#~ msgstr "Sylwetka %d jednostek w %d klastrach z %s:"
+
+# cluster/R/silhouette.R: 179
+# gettextf("Cluster sizes, ids = (%s), and average silhouette widths:", paste(x$codes, collapse=", "), domain = "R-cluster")
+# cluster/R/silhouette.R: 186
+# gettextf("Cluster sizes, ids = (%s), and average silhouette widths:", paste(x$codes, collapse=", "), domain = "R-cluster")
+#~ msgid "Cluster sizes, ids = (%s), and average silhouette widths:"
+#~ msgstr "Rozmiary grup, ids = (%s), oraz przeciętne szerokości sylwetek:"
+
+# cluster/R/silhouette.R: 182
+# gettextf("Cluster sizes and average silhouette widths:", domain = "R-cluster")
+# cluster/R/silhouette.R: 189
+# gettext("Cluster sizes and average silhouette widths:", domain = "R-cluster")
+#~ msgid "Cluster sizes and average silhouette widths:"
+#~ msgstr "Rozmiary grup oraz przeciętne szerokości sylwetek:"
+
+# cluster/R/silhouette.R: 185
+# gettextf("Silhouette of %d units in %d clusters:", sum(csiz), k, domain = "R-cluster")
+# cluster/R/silhouette.R: 188
+# gettextf("Silhouette of %d units in %d clusters:", sum(csiz), k, domain = "R-cluster")
+#~ msgid "Silhouette of %d units in %d clusters:"
+#~ msgstr "Sylwetka %d jednostek w %d klastrach:"
+
+# cluster/R/silhouette.R: 194
+# gettext("Individual silhouette widths:", domain = "R-cluster")
+#~ msgid "Individual silhouette widths:"
+#~ msgstr "Indywidualne szerokości sylwetki:"
+
+# cluster/R/silhouette.R: 220
+# gettext("Silhouette plot", domain = "R-cluster")
+#~ msgid "Silhouette plot"
+#~ msgstr "Wykres sylwetki"
+
+# cluster/R/silhouette.R: 224
+# gettextf("Silhouette plot of %s", sub("^FF","", deparse(cll)), domain = "R-cluster")
+#~ msgid "Silhouette plot of %s"
+#~ msgstr "Wykres sylwetki %s"
+
+# cluster/R/silhouette.R: 230
+# gettext("Average silhouette width:", domain = "R-cluster")
+#~ msgid "Average silhouette width:"
+#~ msgstr "Przeciętna szerokość sylwetki:"
+
+# cluster/R/daisy.R: 200
+# sprintf(ngettext(x$n, "%d dissimilarity, summarized:", "%d dissimilarities, summarized:", domain = "R-cluster"), x$n)
+#~ msgid "%d dissimilarity, summarized:"
+#~ msgid_plural "%d dissimilarities, summarized:"
+#~ msgstr[0] "%d odmienność, podsumowanie:"
+#~ msgstr[1] "%d odmienności, podsumowanie:"
+#~ msgstr[2] "%d odmienności, podsumowanie:"
+
+#~ msgid ""
+#~ "%d observations (%s ...)\n"
+#~ "\thave *only* NAs --> na.omit() them for clustering!"
+#~ msgstr ""
+#~ "%d obserwacji (%s ...)\n"
+#~ "\tmają *tylko* wartości NA --> pomijanie ich w grupowaniu"
+
+#~ msgid "hence, area = %s"
+#~ msgstr "tak więc powierzchnia = %s"
+
+#, fuzzy
+#~ msgid "R-cluster"
+#~ msgstr "L-grupy:"
diff --git a/po/cluster.pot b/po/cluster.pot
new file mode 100644
index 0000000..2aa7710
--- /dev/null
+++ b/po/cluster.pot
@@ -0,0 +1,54 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the cluster package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: cluster 2.0.8\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2019-04-02 17:09+0200\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: clara.c:101
+#, c-format
+msgid "C level clara(): random k=%d > n **\n"
+msgstr ""
+
+#: clara.c:312
+#, c-format
+msgid ""
+"clara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'"
+msgstr ""
+
+#: clara.c:348 clara.c:353
+#, c-format
+msgid "C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%d"
+msgstr ""
+
+#: pam.c:161
+msgid "Invalid 'medoids'"
+msgstr ""
+
+#: pam.c:887
+#, c-format
+msgid "pam(): Bug in C level cstat(), k=%d: ntt=0"
+msgstr ""
+
+#: twins.c:153
+#, c-format
+msgid ""
+"agnes(method=%d, par.method=*) lead to invalid merge; step %d, D(.,.)=%g"
+msgstr ""
+
+#: twins.c:260
+#, c-format
+msgid "invalid method (code %d)"
+msgstr ""
diff --git a/po/de.po b/po/de.po
new file mode 100644
index 0000000..f82adb9
--- /dev/null
+++ b/po/de.po
@@ -0,0 +1,56 @@
+# # Translation of src/library/Recommended/cluster/po/cluster.pot to German
+# # Copyright (C) 2013 The R Foundation
+# # This file is distributed under the same license as the R package.
+# # Detlef Steuer <detlef.steuer@hsu-hh.de>, 2013-2015.
+msgid ""
+msgstr ""
+"Project-Id-Version: cluster 2.0.1\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2019-04-02 17:09+0200\n"
+"PO-Revision-Date: 2015-02-02 12:30+0100\n"
+"Last-Translator: Detlef Steuer <steuer@hsu-hh.de>\n"
+"Language-Team: R Core <r-core@r-project.org>\n"
+"Language: DE\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=n == 1 ? 0 : 1;\n"
+
+#: clara.c:101
+#, c-format
+msgid "C level clara(): random k=%d > n **\n"
+msgstr "C Level clara(): random k=%d > n **\n"
+
+#: clara.c:312
+#, c-format
+msgid ""
+"clara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'"
+msgstr ""
+"clara()'s C Level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) ergab 'toomany_NA'"
+
+#: clara.c:348 clara.c:353
+#, c-format
+msgid "C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%d"
+msgstr "C Level dysta2(): nsel[%s= %d] = %d ist außerhalb von 0..n, n=%d"
+
+#: pam.c:161
+msgid "Invalid 'medoids'"
+msgstr "unzulässige 'medoids'"
+
+#: pam.c:887
+#, c-format
+msgid "pam(): Bug in C level cstat(), k=%d: ntt=0"
+msgstr "pam(): Bug in C Level cstat(), k=%d: ntt=0"
+
+#: twins.c:153
+#, c-format
+msgid ""
+"agnes(method=%d, par.method=*) lead to invalid merge; step %d, D(.,.)=%g"
+msgstr ""
+"agnes(method=%d, par.method=*) führte zu unzulässigem Zusammenfassen;\n"
+"Schritt %d, D(.,.)=%g"
+
+#: twins.c:260
+#, c-format
+msgid "invalid method (code %d)"
+msgstr "unzulässige Methode (Kode %d)"
diff --git a/po/ko.po b/po/ko.po
new file mode 100644
index 0000000..78eaf9f
--- /dev/null
+++ b/po/ko.po
@@ -0,0 +1,60 @@
+# Korean translations for cluster package.
+# Recommended/cluster/po/ko.po
+# Maintainer: Martin Maechler <maechler@stat.math.ethz.ch>
+#
+# This file is distributed under the same license as the R cluster package.
+# Chel Hee Lee <chl948@mail.usask.ca>, 2013-2015.
+# Reviewing process is completed (15-JAN-2015)
+# QC: PASS
+# Freezing on 06-FEB-2015 for R-3.1.3
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: cluster 1.15.2\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2019-04-02 17:09+0200\n"
+"PO-Revision-Date: 2015-02-06 21:56-0600\n"
+"Last-Translator:Chel Hee Lee <chl948@mail.usask.ca>\n"
+"Language-Team: Chel Hee Lee <chl948@mail.usask.ca>\n"
+"Language: ko\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+
+#: clara.c:101
+#, c-format
+msgid "C level clara(): random k=%d > n **\n"
+msgstr "C level clara(): random k=%d > n **\n"
+
+#: clara.c:312
+#, c-format
+msgid ""
+"clara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'"
+msgstr ""
+"clara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'"
+
+#: clara.c:348 clara.c:353
+#, c-format
+msgid "C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%d"
+msgstr "C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%d"
+
+#: pam.c:161
+msgid "Invalid 'medoids'"
+msgstr ""
+
+#: pam.c:887
+#, c-format
+msgid "pam(): Bug in C level cstat(), k=%d: ntt=0"
+msgstr "pam(): Bug in C level cstat(), k=%d: ntt=0"
+
+#: twins.c:153
+#, c-format
+msgid ""
+"agnes(method=%d, par.method=*) lead to invalid merge; step %d, D(.,.)=%g"
+msgstr ""
+
+#: twins.c:260
+#, c-format
+msgid "invalid method (code %d)"
+msgstr "메소드가 올바르지 않습니다 (code %d)."
diff --git a/po/update-me.sh b/po/update-me.sh
new file mode 100755
index 0000000..6a5b186
--- /dev/null
+++ b/po/update-me.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+#
+#__>> Keep in sync with ~/R/Pkgs/Matrix/po/update-me.sh <<__
+#
+## Script for updating package-specific *.pot files
+## written such that it should work for any package
+#
+R=${R:-R}
+thisdir=`dirname $0` ; cd $thisdir; thisdir=`pwd`
+echo "R = '$R' (`$R --version | head -1`)
+ preliminary thisdir='$thisdir'"
+pkgDIR=`dirname $thisdir`
+pkg=`basename $pkgDIR`
+echo ' --> pkgDIR='$pkgDIR' ; pkg='$pkg
+# echo ''; echo '## FIXME ## use new Scheme from R 3.0.x on'
+# cd `$R RHOME`/po
+# make pkg-update PKG=$pkg PKGDIR=$pkgDIR
+L=update.log
+Rcd="require('tools'); update_pkg_po('$pkgDIR')"
+## -------------------------------- as of R 3.0.0
+echo $Rcd > $L
+echo $Rcd | $R --slave 2>&1 | tee -a $L
+echo 'end{make pkg-update}' ; echo ''
+echo 'Test with (e.g.)'
+echo ' LANGUAGE=de R --no-environ --no-save' ; echo ''
+echo 'and then something like'
+echo ' Matrix(1:6, 2,3) %*% Matrix(1:4, 2)'; echo ''
+echo 'Commit with something like'
+echo " svn ci -m'translation updates' po inst/po"; echo ''
diff --git a/src/clara.c b/src/clara.c
new file mode 100644
index 0000000..267ae5e
--- /dev/null
+++ b/src/clara.c
@@ -0,0 +1,1036 @@
+
+/* Clustering LARge Applications
+ ~ ~~~ ~
+ Clustering program based upon the k-medoid approach,
+ and suitable for data sets of at least 100 objects.
+ (for smaller data sets, please use program pam.)
+ */
+
+/* original Id: clara.f,v 1.10 2002/08/27 15:43:58 maechler translated by
+ * f2c (version 20010821) and run through f2c-clean,v 1.10 2002/03/28
+ */
+
+#include <math.h>
+
+#include <R_ext/Print.h>/* for diagnostics */
+#include <R_ext/Random.h>/* when R's RNG is used */
+#include <R_ext/Utils.h>/* for interrupting */
+
+#include "cluster.h"
+#include "ind_2.h"
+
+void cl_clara(int *n, /* = number of objects */
+ int *jpp,/* = number of variables */
+ int *kk, /* = number of clusters, 1 <= kk <= n-1 */
+ double *x, /* Input: the data x[n, jpp] _rowwise_ (transposed)
+ * Output: the first `n' values are the `clustering'
+ * (integers in 1,2,..,kk) */
+ int *nran, /* = #{random samples} drawn (= `samples' in R)*/
+ int *nsam, /* = #{objects} drawn from data set (`sampsize' in R) */
+ double *dys,/* [1:(1 + (nsam * (nsam - 1))/2)]
+ * Output: to contain the distances */
+ int *mdata, /*= {0,1}; 1: min(x) is missing value (NA); 0: no NA */
+ double *valmd,/*[j]= missing value code (instead of NA) for x[,j]*/
+ int *jtmd, /* [j]= {-1,1}; -1: x[,j] has NA; 1: no NAs in x[,j] */
+ DISS_KIND *diss_kind, // = {EUCLIDEAN, MANHATTAN, JACCARD}
+ int/*logical*/ *rng_R,/*= {0,1}; 0 : use clara's internal weak RNG;
+ * 1 : use R's RNG (and seed) */
+ int/*logical*/ *pam_like,/* if (1), we do "swap()" as in pam(), otherwise
+ use the code as it was in clara() "forever"
+ upto 2011-04 */
+ int *correct_d,/* option for dist.computation: if (0), use the "fishy"
+ formula to update distances in the NA-case,
+ if (1), use a dysta2()-compatible formula */
+ int *nrepr, /* logical (0/1): 1 = "is representative object" */
+ int *nsel,
+ int *nbest,/* x[nbest[j],] : the j-th obs in the final sample */
+ int *nr, int *nrx,/* prov. and final "medoids" aka representatives */
+ double *radus, double *ttd, double *ratt,
+ double *ttbes, double *rdbes, double *rabes,
+ int *mtt, double *obj,
+ double *avsyl, double *ttsyl, double *sylinf,
+ int *jstop, int *trace_lev,
+ double *tmp, /* = double [ 3 * nsam ] */
+ int *itmp /* = integer[ 6 * nsam ] */
+ )
+{
+
+#define tmp1 tmp
+#define tmp2 &tmp[*nsam]
+
+#define ntmp1 itmp
+#define ntmp2 &itmp[*nsam]
+#define ntmp3 &itmp[nsamb]
+#define ntmp4 &itmp[nsamb+ *nsam]
+#define ntmp5 &itmp[2*nsamb]
+#define ntmp6 &itmp[2*nsamb+ *nsam]
+
+ /* Local variables */
+
+ Rboolean nafs, kall, full_sample, lrg_sam, dyst_toomany_NA,
+ has_NA = *mdata;
+ int j, jk, jkk, js, jsm, jran, l, n_sam;
+ int nsm, ntt, rand_k, nrun, n_dys, nsamb, nunfs;
+ double rnn, sky, zb, s, sx = -1., zba = -1.;/* Wall */
+
+ *jstop = 0;
+ rnn = (double) (*n);
+
+ /* n_dys := size of distance array dys[] */
+ n_dys = *nsam * (*nsam - 1) / 2 + 1;/* >= 1 */
+ full_sample = (*n == *nsam);/* only one sub sample == full data */
+ nsamb = *nsam * 2;
+ lrg_sam = (*n < nsamb);/* sample more than *n/2 */
+ if (lrg_sam)/* generate indices for the other, smaller half */
+ n_sam = *n - *nsam;
+ else
+ n_sam = *nsam;
+
+ if(*trace_lev) Rprintf("C clara(): (nsam,nran,n) = (%d,%d,%d);%s\n",
+ *nsam, *nran, *n,
+ full_sample ? " 'full_sample',":
+ (lrg_sam ? " 'large_sample',": ""));
+ if(*rng_R && !full_sample)
+ GetRNGstate();
+ else /* << initialize `random seed' of the very simple randm() below */
+ nrun = 0;
+
+#define NEW_rand_k_trace_print(_nr_) \
+ rand_k= 1+ (int)(rnn* ((*rng_R)? unif_rand(): randm(&nrun))); \
+ if (rand_k > *n) {/* should never happen */ \
+ warning(_("C level clara(): random k=%d > n **\n"), rand_k); \
+ rand_k = *n; \
+ } \
+ if(*trace_lev >= 4) { \
+ Rprintf("... {" #_nr_ "}"); \
+ if(*rng_R) Rprintf("R unif_rand()"); \
+ else Rprintf("nrun=%5d", nrun); \
+ Rprintf(" -> k{ran}=%d\n", rand_k); \
+ }
+
+/* __LOOP__ : random subsamples are drawn and partitioned into kk clusters */
+
+ kall = FALSE; /* kall becomes TRUE iff we've found a "valid sample",
+ i.e. one for which all d(j,k) can be computed */
+ nunfs = 0;
+ dyst_toomany_NA = FALSE;
+ for (jran = 1; jran <= *nran; ++jran) {
+ if(*trace_lev) Rprintf("C clara(): sample %d ", jran);
+ if (!full_sample) {/* `real' case: sample size < n */
+ ntt = 0;
+ if (kall && nunfs+1 != jran && !lrg_sam) {
+ /* Have had (at least) one valid sample; use its representatives
+ * nrx[] : nsel[] := sort(nrx[]) for the first j=1:k */
+ if(*trace_lev >= 2) Rprintf(" if (kall && nunfs...): \n");
+
+ for (jk = 0; jk < *kk; ++jk)
+ nsel[jk] = nrx[jk];
+ for (jk = 0; jk < *kk-1; ++jk) { /* sort(nsel[0:(kk-1)] */
+ /* FIXME: nsel[] is 0-indexed, but *contains* 1-indices*/
+ nsm = nsel[jk];
+ jsm = jk;
+ for (jkk = jk + 1; jkk < *kk; ++jkk) {
+ if (nsm > nsel[jkk]) {
+ nsm = nsel[jkk];
+ jsm = jkk;
+ }
+ }
+ nsel[jsm] = nsel[jk]; nsel[jk] = nsm;
+ }
+ ntt = *kk;
+ }
+ else { /* no valid sample _OR_ lrg_sam */
+ if(*trace_lev >= 2) Rprintf(" finding 1st... new k{ran}:\n");
+
+ /* Loop finding random index `rand_k' not yet in nrx[0:(*kk-1)] : */
+ L180:
+ NEW_rand_k_trace_print(180)
+
+ if (kall) {
+ for (jk = 0; jk < *kk; ++jk)
+ if (rand_k == nrx[jk])
+ goto L180;
+ }
+ /* end Loop */
+
+ nsel[ntt] = rand_k;
+ if (++ntt == n_sam)
+ goto L295;
+ }
+
+ if(*trace_lev >= 2) {
+ Rprintf(".. kall: %s, ", (kall) ? "T" : "FALSE");
+ if(*trace_lev == 2) {
+ Rprintf("nsel[ntt=%d] = %d\n", ntt, nsel[ntt]);
+ } else { /* trace_lev >= 3 */
+ Rprintf("\n... nrx [0:%d]= ",*kk-1);
+ for (jk = 0; jk < *kk; jk++) Rprintf("%d ",nrx[jk]);
+ Rprintf("\n... nsel[0:%d]= ",ntt-1);
+ for (jk = 0; jk < ntt; jk++) Rprintf("%d ",nsel[jk]);
+ Rprintf("\n");
+ }
+ }
+
+ do {
+ /* Loop finding random index 'rand_k' in {1:n},
+ * not in nrx[0:(k-1)] nor nsel[1:ntt] : */
+ L210:
+ NEW_rand_k_trace_print(210)
+
+ if (kall && lrg_sam) {
+ for (jk = 0; jk < *kk; ++jk) {
+ if (rand_k == nrx[jk])
+ goto L210;
+ }
+ }
+ /* insert rand_k into nsel[1:ntt] or after and increase ntt : */
+ for (int ka = 0; ka < ntt; ++ka)
+ if (nsel[ka] >= rand_k) {
+ if (nsel[ka] == rand_k)
+ goto L210;
+ else {// nsel[ka] > rand_k :
+ for (int na = ntt; na > ka; --na)
+ nsel[na] = nsel[na-1];
+ nsel[ka] = rand_k;
+ /* continue _outer_ loop */ goto L290;
+ }
+ }
+ // else: rand_k > nsel[ka] for all ka = 0:(ntt-1) :
+ nsel[ntt] = rand_k;
+
+ L290:
+ ++ntt;
+ } while (ntt < n_sam);
+
+ L295:
+ if(*trace_lev) Rprintf(" {295} [ntt=%d, nunfs=%d] ", ntt, nunfs);
+ if (lrg_sam) {
+ /* have indices for smaller _nonsampled_ half; revert this: */
+ for (j = 1, jk = 0, js = 0; j <= *n; j++) {
+ if (jk < n_sam && nsel[jk] == j)
+ ++jk;
+ else
+ nrepr[js++] = j;
+ }
+ for (j = 0; j < *nsam; ++j)
+ nsel[j] = nrepr[j];
+ }
+ if(*trace_lev >= 3) {
+ Rprintf(".. nsel[1:%d]= ", *nsam);
+ for (jk = 0; jk < *nsam; jk++) Rprintf("%d ",nsel[jk]);
+ }
+ if(*trace_lev) Rprintf(" -> dysta2()\n");
+ }
+ else { /* full_sample : *n = *nsam -- one sample is enough ! */
+ for (j = 0; j < *nsam; ++j)
+ nsel[j] = j+1;/* <- uses 1-indices for its *values*! */
+ }
+
+ dysta2(*nsam, *jpp, nsel, x, *n, dys, *diss_kind,
+ jtmd, valmd, has_NA, &dyst_toomany_NA);
+ if(dyst_toomany_NA) {
+ if(*trace_lev)
+ Rprintf(" dysta2() gave dyst_toomany_NA --> new sample\n");
+ dyst_toomany_NA = FALSE;
+ ++nunfs;
+ continue;/* random sample*/
+ }
+
+ s = 0.;
+ for(l = 1; l < n_dys; l++) /* dys[0] is not used here */
+ if (s < dys[l])
+ s = dys[l];
+ if(*trace_lev >= 2)
+ Rprintf(". clara(): s:= max dys[1..%d] = %g;", l-1,s);
+
+ bswap2(*kk, *nsam, s, dys, *pam_like, *trace_lev,
+ /* --> */ &sky, nrepr,
+ /* dysma */tmp1, /*dysmb*/tmp2,
+ /* beter[], only used here */&tmp[nsamb]);
+
+ if(*trace_lev >= 2)
+ Rprintf("end{bswap2}: sky = %g\n", sky);
+
+ selec(*kk, *n, *jpp, *diss_kind, &zb, *nsam, has_NA, jtmd, valmd,
+ *trace_lev, nrepr, nsel, dys, x, nr, &nafs, ttd, radus, ratt,
+ ntmp1, ntmp2, ntmp3, ntmp4, ntmp5, ntmp6, tmp1, tmp2, *correct_d);
+
+ if (nafs) { /* couldn't assign some observation (to a cluster)
+ * because of too many NA s */
+ ++nunfs;
+ if(*trace_lev >= 2)
+ Rprintf(" selec() -> 'NAfs'");
+ }
+ else if(!kall || zba > zb) { /* 1st proper sample or new best */
+ kall = TRUE;
+ if(*trace_lev >= 2) Rprintf(" 1st proper or new best:");
+ zba = zb;
+ for (jk = 0; jk < *kk; ++jk) {
+ ttbes[jk] = ttd [jk];
+ rdbes[jk] = radus[jk];
+ rabes[jk] = ratt [jk];
+ nrx [jk] = nr [jk];
+ }
+ for (js = 0; js < *nsam; ++js)
+ nbest[js] = nsel[js];
+ sx = s;
+ }
+ if(*trace_lev >= 2) Rprintf(" obj= %g\n", zb/rnn);
+
+ if(full_sample) break; /* out of resampling */
+ }
+/* --- end random sampling loop */
+ if(*rng_R && !full_sample)
+ PutRNGstate();
+
+ if (nunfs >= *nran) { *jstop = 1; return; }
+ /* else */
+ if (!kall) { *jstop = 2; return; }
+
+ if(*trace_lev) {
+ Rprintf("C clara(): best sample _found_ ");
+ if(*trace_lev >= 2) {
+ Rprintf("; nbest[1:%d] =\n c(", *nsam);
+ for (js = 0; js < *nsam; ++js) {
+ Rprintf("%d", nbest[js]);
+ if(js+1 < *nsam) Rprintf(",");
+ }
+ Rprintf(")\n");
+ }
+ Rprintf(" --> dysta2(nbest), resul(), end\n");
+ }
+
+
+/* for the best subsample, the objects of the entire data set
+ are assigned to their clusters */
+
+ *obj = zba / rnn;
+ dysta2(*nsam, *jpp, nbest, x, *n, dys, *diss_kind, jtmd, valmd,
+ has_NA, &dyst_toomany_NA);
+ if(dyst_toomany_NA) {
+ error(_(
+ "clara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'"),
+ *nsam, *jpp, nbest, *n );
+ }
+ resul(*kk, *n, *jpp, *diss_kind, has_NA, jtmd, valmd, x, nrx, mtt, *correct_d);
+
+ if (*kk > 1)
+ black(*kk, *jpp, *nsam, nbest, dys, sx, x,
+ /* compute --> */
+ avsyl, ttsyl, sylinf,
+ ntmp1, ntmp2, ntmp3, ntmp4, /* syl[] */ tmp1, tmp2);
+ return;
+} /* End clara() ---------------------------------------------------*/
+#undef tmp1
+#undef tmp2
+
+#undef ntmp1
+#undef ntmp2
+#undef ntmp3
+#undef ntmp4
+#undef ntmp5
+#undef ntmp6
+
+
+
+/**
+ * Compute Dissimilarities for the selected sub-sample ---> dys[,]
+ */
+void dysta2(int nsam, int jpp, int *nsel,
+ double *x, int n, double *dys, DISS_KIND diss_kind,
+ int *jtmd, double *valmd, Rboolean has_NA, Rboolean *toomany_NA)
+{
+ int nlk = 0;
+ dys[0] = 0.;/* very first index; *is* used because ind_2(i,i) |-> 0 ! */
+ for (int l = 1; l < nsam; ++l) {
+ int lsel = nsel[l];
+ if(lsel <= 0 || lsel > n)
+ error(_("C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%d"),
+ "l", l, lsel, n);
+ for (int k = 0; k < l; ++k) { /* compute d(nsel[l], nsel[k]) {if possible}*/
+ int ksel = nsel[k];
+ if(ksel <= 0 || ksel > n)
+ error(_("C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%d"),
+ "k", k, ksel, n);
+ ++nlk;
+ int npres = 0, j, lj, kj, N_ones = 0;
+ double clk = 0.;
+ for (j = 0, lj = lsel-1, kj = ksel-1; j < jpp;
+ ++j, lj += n, kj += n) {
+ if (has_NA && jtmd[j] < 0) { /* x[,j] has some Missing (NA) */
+ /* in the following line (Fortran!), x[-2] ==> seg.fault
+ {BDR to R-core, Sat, 3 Aug 2002} */
+ if (x[lj] == valmd[j] || x[kj] == valmd[j]) {
+ continue /* next j */;
+ }
+ }
+ ++npres;
+ if (diss_kind == EUCLIDEAN)
+ clk += (x[lj] - x[kj]) * (x[lj] - x[kj]);
+ else if (diss_kind == JACCARD) {
+ if( x[lj] > 0.9 && x[kj] > 0.9) { // both "are 1" - increment numerator
+ clk++ ; N_ones++ ;
+ } else if( x[lj] > 0.9 || x[kj] > 0.9)// any is 1 - increment N_ones
+ N_ones++ ;
+ }
+ else // (diss_kind == MANHATTAN)
+ clk += fabs(x[lj] - x[kj]);
+ }
+ if (npres == 0) {/* cannot compute d(.,.) because of too many NA */
+ *toomany_NA = TRUE;
+ dys[nlk] = -1.;
+ } else {
+ double d1 = clk * (jpp / (double) npres);
+ dys[nlk] =
+ (diss_kind == EUCLIDEAN) ? sqrt(d1)
+ :(diss_kind == JACCARD) ? 1 - clk / (double) N_ones
+ :/* diss_kind == MANHATTAN */ d1 ;
+ }
+ } /* for( k ) */
+ } /* for( l ) */
+ return;
+} /* End dysta2() -----------------------------------------------------------*/
+
+double randm(int *nrun)
+{
+/* we programmed this generator ourselves because we wanted it
+ to be machine independent. it should run on most computers
+ because the largest int used is less than 2^30 . the period
+ is 2^16=65536, which is good enough for our purposes. */
+ /* MM: improved the original speed-wise only: */
+ *nrun = (*nrun * 5761 + 999) & 0177777;
+ /* Masking off all but the last 16 bits is equivalent to % 65536 */
+ return ((double) (*nrun) / 65536.);
+} /* randm() */
+
+/* bswap2() : called once [per random sample] from clara() : */
+void bswap2(int kk, int n, /* == nsam == 'sampsize', here in clara */
+ double s, const double dys[],
+ Rboolean pam_like, int trace_lev,
+ // result (though *only* nrepr[] is really used in caller:)
+ double *sky, int *nrepr,
+ double *dysma, double *dysmb, double *beter)
+{
+ int i, j, ij, k,h, hbest = -1, nbest = -1;/* init for -Wall */
+ double dzsky;
+
+ /* Parameter adjustments */
+ --nrepr;
+ --beter;
+
+ --dysma; --dysmb;
+
+ if(trace_lev >= 2) {
+ if(trace_lev == 2)
+ Rprintf("\n bswap2():");
+ else
+ Rprintf("\nclara()'s bswap2(*, s=%g): ", s);
+ }
+
+ s = s * 1.1 + 1.;/* value larger than all dissimilarities */
+
+/* ====== first algorithm: BUILD. ====== */
+
+ for (i = 1; i <= n; ++i) {
+ nrepr[i] = 0;
+ dysma[i] = s;
+ }
+
+ for(k = 0; k < kk; k++) {
+ int nmax = -1; /* -Wall */
+ double ammax = 0.;
+ for (i = 1; i <= n; ++i) {
+ if (nrepr[i] == 0) {
+ beter[i] = 0.;
+ for (j = 1; j <= n; ++j) {
+ double cmd = dysma[j] - dys[ ind_2(i, j)];
+ if (cmd > 0.)
+ beter[i] += cmd;
+ }
+ if (ammax <= beter[i]) {
+ /* does < (instead of <= ) work too? -- NO! */
+ ammax = beter[i];
+ nmax = i;
+ }
+ }
+ }
+
+ nrepr[nmax] = 1;/* = .true. : found new representative */
+ if(trace_lev >= 2) {
+ if(trace_lev == 2)
+ Rprintf(" %d", nmax);
+ else
+ Rprintf(" new repr. %d\n", nmax);
+ }
+
+ /* update dysma[] : dysma[j] = D(j, nearest_representative) */
+ for (j = 1; j <= n; ++j) {
+ ij = ind_2(nmax, j);
+ if (dysma[j] > dys[ij])
+ dysma[j] = dys[ij];
+ }
+ }
+ // output of the above loop: nrepr[], dysma[], ...
+
+ *sky = 0.;
+ for (j = 1; j <= n; ++j)
+ *sky += dysma[j];
+
+ if(trace_lev >= 2) /* >= 2 (?) */ {
+ Rprintf(" after build: medoids are");
+ for (i = 1; i <= n; ++i)
+ if(nrepr[i] == 1) Rprintf(" %2d", i);
+ if(trace_lev >= 3) {
+ Rprintf("\n and min.dist dysma[1:n] are\n");
+ for (i = 1; i <= n; ++i) {
+ Rprintf(" %6.3g", dysma[i]);
+ if(i % 10 == 0) Rprintf("\n");
+ }
+ if(n % 10 != 0) Rprintf("\n");
+ } else Rprintf("\n");
+ Rprintf(" --> sky = sum_j D_j= %g\n", *sky);
+ }
+
+ if (kk == 1)
+ return;
+
+// asky = *sky / ((double) n);
+
+/* ====== second algorithm: SWAP. ====== */
+
+/* Big LOOP : */
+L60:
+
+ for (j = 1; j <= n; ++j) {
+ /* dysma[j] := D_j d(j, <closest medi>) [KR p.102, 104]
+ * dysmb[j] := E_j d(j, <2-nd cl.medi>) [p.103] */
+ dysma[j] = s;
+ dysmb[j] = s;
+ for (i = 1; i <= n; ++i) {
+ if (nrepr[i]) {
+ ij = ind_2(i, j);
+ if (dysma[j] > dys[ij]) {
+ dysmb[j] = dysma[j];
+ dysma[j] = dys[ij];
+ } else if (dysmb[j] > dys[ij]) {
+ dysmb[j] = dys[ij];
+ }
+ }
+ }
+ }
+
+ dzsky = 1.; /* 1 is arbitrary > 0; only dzsky < 0 matters in the end */
+ for (h = 1; h <= n; ++h) if (!nrepr[h]) {
+ for (i = 1; i <= n; ++i) if (nrepr[i]) {
+ double dz = 0.;
+ /* dz := T_{ih} := sum_j C_{jih} [p.104] : */
+ for (j = 1; j <= n; ++j) {
+ int ij = ind_2(i, j),
+ hj = ind_2(h, j);
+ if (dys[ij] == dysma[j]) {
+ double small;
+ if(pam_like)
+ small = dysmb[j] > dys[hj] ? dys[hj] : dysmb[j];
+ else // old clara code which differs from pam()'s
+ // and seems a bit illogical:
+ small = dysmb[j] > dys[ij] ? dys[hj] : dysmb[j];
+ dz += (- dysma[j] + small);
+ }
+ else if (dys[hj] < dysma[j])
+ dz += (- dysma[j] + dys[hj]);
+ }
+ if (dzsky > dz) {
+ dzsky = dz; // dzsky := min_{i,h} T_{i,h}
+ hbest = h;
+ nbest = i;
+ }
+ }
+ }
+
+ /* once had some 64-bit compiler / data configuration that looped forever*/
+ R_CheckUserInterrupt();
+
+ if (dzsky < 0.) { /* found an improving swap */
+ if(trace_lev >= 3)
+ Rprintf( " swp new %d <-> %d old; decreasing diss. by %g\n",
+ hbest, nbest, dzsky);
+ nrepr[hbest] = 1;
+ nrepr[nbest] = 0;
+ *sky += dzsky;
+ goto L60;
+ }
+ if(trace_lev >= 2 && hbest != -1) // in my examples hbest == -1 and it does not print:
+ Rprintf( " Last swap: new %d <-> %d old; decreasing diss. by %g\n",
+ hbest, nbest, dzsky);
+
+} /* End of bswap2() -------------------------------------------------- */
+
+/* selec() : called once [per random sample] from clara() */
+void selec(int kk, int n, int jpp, DISS_KIND diss_kind,
+ double *zb, int nsam, Rboolean has_NA, int *jtmd, double *valmd,
+ int trace_lev,
+ int *nrepr, int *nsel, double *dys, double *x, int *nr,
+ Rboolean *nafs, /* := TRUE if a distance cannot be calculated */
+ double *ttd, double *radus, double *ratt,
+ // [i]tmp* for clara(), i.e. not used later!
+ int *nrnew, int *nsnew, int *npnew, int *ns, int *np, int *new,
+ double *ttnew, double *rdnew, int correct_d)
+{
+
+ /* Local variables */
+ int j, jk, i, jp, jnew, jkabc = -1/* -Wall */;
+ int newf, nr_k, na, nb;
+
+ double pp = (double) (jpp);
+
+/* Parameter adjustments */
+ --nsel; --nrepr;
+
+ --ratt;
+ --radus; --ttd; --np; --nr; --ns;
+
+ --rdnew; --ttnew; --npnew; --nrnew; --nsnew;
+ --new;
+
+ /* nafs := TRUE if a distance cannot be calculated (because of NA s)*/
+ *nafs = FALSE;
+
+ /* identification of representative objects, and initializations */
+ jk = 0;
+ for (j = 1; j <= nsam; ++j) {
+ if (nrepr[j] != 0) {
+ ++jk;
+ nr [jk] = nsel[j];
+ ns [jk] = 0;
+ ttd [jk] = 0.;
+ radus[jk] = -1.;
+ np [jk] = j;
+ }
+ }
+
+/* - assignment of the objects of the entire data set to a cluster,
+ * - computation of some statistics,
+ * - determination of the new ordering of the clusters */
+
+ *zb = 0.;
+ newf = 0;
+
+ for(i = 1; i <= n; i++) {
+ double dsum, dnull = -9./* -Wall */;
+ if (!has_NA) {
+ for (jk = 1; jk <= kk; ++jk) {
+ dsum = 0.;
+ nr_k = nr[jk];
+ if (nr_k != i) {
+ int N_ones = 0;
+ double tra = 0.; // init only for JACCARD
+ for (jp = 0; jp < jpp; ++jp) {
+ na = (nr_k - 1) + jp * n;
+ nb = (i - 1) + jp * n;
+ if (diss_kind == JACCARD) {
+ if(x[na] > 0.9 && x[nb] > 0.9) {
+ // both "are 1" - increment numerator (and denom.)
+ tra += 1; N_ones ++;
+ } else if( x[na] > 0.9 || x[nb] > 0.9) {
+ // any is 1 - increment denominator N_ones
+ N_ones ++;
+ }
+ } else { // Euclidean or Manhattan
+ tra = fabs(x[na] - x[nb]);
+ if (diss_kind == EUCLIDEAN)
+ tra *= tra;
+ dsum += tra;
+ }
+ }
+ if (diss_kind == JACCARD)
+ dsum = 1 - tra / (double)N_ones;
+
+ if (jk != 1 && dsum >= dnull)
+ continue /* next jk */;
+ }
+ // new best: dsum < "previous" dnull
+ dnull = dsum;
+ jkabc = jk;
+ }
+ }
+ else { // _has_ missing data
+ Rboolean first = TRUE;
+ for (jk = 1; jk <= kk; ++jk) {
+ dsum = 0.;
+ nr_k = nr[jk];
+ if (nr_k != i) {
+ int nobs = 0, N_ones = 0;
+ double tra = 0.; // init only for JACCARD
+ for (jp = 0; jp < jpp; ++jp) {
+ na = (nr_k - 1) + jp * n;
+ nb = (i - 1) + jp * n;
+ if (jtmd[jp] < 0) {
+ if (x[na] == valmd[jp] || x[nb] == valmd[jp])
+ continue /* next jp */;
+ }
+ nobs++;
+ if (diss_kind == JACCARD) {
+ if(x[na] > 0.9 && x[nb] > 0.9) {
+ // both "are 1" - increment numerator (and denom.)
+ tra += 1; N_ones ++;
+ } else if( x[na] > 0.9 || x[nb] > 0.9) {
+ // any is 1 - increment denominator N_ones
+ N_ones ++;
+ }
+ } else { // Euclidean or Manhattan
+ tra = fabs(x[na] - x[nb]);
+ if (diss_kind == EUCLIDEAN)
+ tra *= tra;
+ dsum += tra;
+ }
+ }
+ if (nobs == 0) /* all pairs partially missing */
+ continue /* next jk */;
+ if (diss_kind == JACCARD)
+ dsum = 1 - tra / (double)N_ones;
+ if(correct_d) // correct -- only since 2017-06
+ dsum *= (pp / nobs);
+ else
+ dsum *= (nobs / pp);
+ }
+ if (first)
+ first = FALSE;
+ else if (dnull <= dsum)
+ continue /* next jk */;
+ /* here : first was TRUE {i.e. 1st time} or
+ * dnull > dsum {i.e. new best} */
+ dnull = dsum;
+ jkabc = jk;
+ }/* for(jk ..) */
+
+ if (first) { /* found nothing */
+ *nafs = TRUE; return;
+ }
+ } /* else: has_NA */
+
+ if (diss_kind == EUCLIDEAN)
+ dnull = sqrt(dnull);
+
+ *zb += dnull;
+ ttd[jkabc] += dnull;
+ if (radus[jkabc] < dnull)
+ radus[jkabc] = dnull;
+
+ ++ns[jkabc];
+ if (newf < kk) {
+ if (newf != 0) {
+ for (jnew = 1; jnew <= newf; ++jnew) {
+ if (jkabc == new[jnew])
+ goto L90;/* next i */
+ }
+ }
+ ++newf;
+ new[newf] = jkabc;
+ }
+ L90:
+ ;
+ } /* for( i = 1..n ) */
+
+
+/* a permutation is carried out on vectors nr,ns,np,ttd,radus
+ using the information in vector new. */
+
+ for (jk = 1; jk <= kk; ++jk) {
+ int njk = new[jk];
+ nrnew[jk] = nr[njk];
+ nsnew[jk] = ns[njk];
+ npnew[jk] = np[njk];
+ ttnew[jk] = ttd[njk];
+ rdnew[jk] = radus[njk];
+ }
+ for (jk = 1; jk <= kk; ++jk) {
+ nr[jk] = nrnew[jk];
+ ns[jk] = nsnew[jk];
+ np[jk] = npnew[jk];
+ ttd[jk] = ttnew[jk];
+ radus[jk] = rdnew[jk];
+ }
+ for (j = 1; j <= kk; ++j) {
+ ttd[j] /= (double) ns[j];
+ }
+
+ if (kk > 1) {
+
+ /* computation of ratt[ka] := minimal distance of medoid ka to any
+ other medoid for comparison with the radius of cluster ka. */
+
+ for (int ka = 1; ka <= kk; ++ka) {
+ Rboolean first = TRUE;
+ int npa = np[ka];
+ for (int kb = 1; kb <= kk; ++kb) {
+ if (kb == ka)
+ continue /* next kb */;
+
+ int npb = np[kb],
+ npab = ind_2(npa, npb);
+ if (first)
+ first = FALSE;
+ else if (dys[npab] >= ratt[ka])
+ continue /* next kb */;
+
+ ratt[ka] = dys[npab];
+ if (ratt[ka] == 0.)
+ ratt[ka] = -1.;
+ }
+ if (ratt[ka] > -0.5)
+ ratt[ka] = radus[ka] / ratt[ka];
+ }
+ }
+ return;
+} /* End selec() -----------------------------------------------------------*/
+
+void resul(int kk, int n, int jpp, DISS_KIND diss_kind, Rboolean has_NA,
+ int *jtmd, double *valmd, double *x, int *nrx, int *mtt, int correct_d)
+{
+ /* correct_d : option for dist.computation:
+ if (0), use the "fishy" formula to update distances in the NA-case,
+ if (1), use a dysta2()-compatible formula */
+
+// __FIXME__ "Jaccard" not yet supported ! _______
+
+ /* Local variables */
+ int j, jk, i, ka, na, nb, njnb, nrjk, jksky = -1/* Wall */;
+ double pp = (double) (jpp), dsum, dnull = -9./* Wall */;
+
+/* clustering vector is incorporated into x, and ``printed''. */
+
+ for(i = 0; i < n; i++) {
+
+ for (jk = 0; jk < kk; ++jk) {
+ if (nrx[jk] == i + 1)/* 1-indexing */
+ goto L220; /* continue next i (i.e., outer loop) */
+ }
+ njnb = i;
+
+ if (!has_NA) {
+ for (jk = 0; jk < kk; ++jk) {
+ dsum = 0.;
+ nrjk = (nrx[jk] - 1);
+ for (j = 0; j < jpp; ++j) {
+ double tra = fabs(x[nrjk + j * n] - x[njnb + j * n]);
+ if (diss_kind == EUCLIDEAN)
+ tra *= tra;
+ dsum += tra;
+ }
+ if (diss_kind == EUCLIDEAN)
+ dsum = sqrt(dsum);
+ if (jk == 0 || dnull > dsum) { // have new best
+ dnull = dsum;
+ jksky = jk;
+ }
+ }
+ }
+ else { /* _has_ missing data */
+ for (jk = 0; jk < kk; ++jk) {
+ dsum = 0.;
+ nrjk = (nrx[jk] - 1);
+ int nobs = 0;
+ for (j = 0; j < jpp; ++j) {
+ na = nrjk + j * n;
+ nb = njnb + j * n;
+ if (jtmd[j] < 0) {
+ if (x[na] == valmd[j] || x[nb] == valmd[j])
+ continue /* next j */;
+ }
+ nobs++;
+ double tra = fabs(x[na] - x[nb]);
+ if (diss_kind == EUCLIDEAN)
+ tra *= tra;
+ dsum += tra;
+ }
+ if (diss_kind == EUCLIDEAN)
+ dsum = sqrt(dsum);
+ if(correct_d) // correct -- only since 2016-04
+ dsum *= (pp / nobs);
+ else
+ dsum *= (nobs / pp); // MM: "fishy" (had note since r4321, 2007-05-01 !)
+
+ if (jk == 0 || dnull > dsum) { // have new best
+ dnull = dsum;
+ jksky = jk;
+ }
+ }
+ }
+ x[njnb] = (double) jksky + 1;/* 1-indexing */
+
+ L220:
+ ;
+ } /* for(i = 0; i < n ..)*/
+
+ for (jk = 0; jk < kk; ++jk)
+ x[nrx[jk] - 1] = (double) jk + 1;/* 1-indexing */
+
+ /* mtt[k] := size(k-th cluster) : */
+ for (ka = 0; ka < kk; ++ka) {
+ mtt[ka] = 0;
+ for(i = 0; i < n; i++) {
+ if (((int) x[i]) == ka + 1)/* 1-indexing */
+ ++mtt[ka];
+ }
+ }
+ return;
+} /* end resul() -----------------------------------------------------------*/
+
+
+// called 'dark()' in ./pam.c
+void black(int kk, int jpp, int nsam, int *nbest,
+ double *dys, double s, double *x,
+ /* --> Output : */
+ double *avsyl, double *ttsyl, double *sylinf,
+ /* but the following output vectors are never used by clara() : */
+ int *ncluv, int *nsend, int *nelem, int *negbr,
+ double *syl, double *srank)
+{
+/* Silhouettes computation and "drawing" --> syl[] and sylinf[] */
+
+ /* System generated locals */
+ int sylinf_dim1, sylinf_offset;
+
+ /* Local variables */
+
+ double att, btt, db, dysa, dysb, symax;
+ int lang = -1/* -Wall */;
+ int j, l, lplac, nj, nl, nbb, ncase, nclu, numcl, nsylr, ntt;
+
+/* Parameter adjustments */
+ --avsyl;
+
+ --srank; --syl;
+ --negbr; --nelem; --nsend;
+ --ncluv; --nbest;
+
+ sylinf_dim1 = nsam;
+ sylinf_offset = 1 + sylinf_dim1 * 1;
+ sylinf -= sylinf_offset;
+
+/*
+ construction of clustering vector (ncluv)
+ of selected sample (nbest).
+*/
+
+ /* Function Body */
+ for (l = 1; l <= nsam; ++l) {
+ ncase = nbest[l];
+ ncluv[l] = (int) x[ncase - 1];
+ }
+
+/* drawing of the silhouettes */
+
+ nsylr = 0;
+ *ttsyl = 0.;
+ for (numcl = 1; numcl <= kk; ++numcl) {
+ ntt = 0;
+ for (j = 1; j <= nsam; ++j) {
+ if (ncluv[j] == numcl) {
+ ++ntt;
+ nelem[ntt] = j;
+ }
+ }
+ for (j = 1; j <= ntt; ++j) {
+ nj = nelem[j];
+ dysb = s * 1.1 + 1.;
+ negbr[j] = -1;
+
+ for (nclu = 1; nclu <= kk; ++nclu) {
+ if (nclu != numcl) {
+ nbb = 0;
+ db = 0.;
+ for (l = 1; l <= nsam; ++l) {
+ if (ncluv[l] == nclu) {
+ ++nbb;
+ db += dys[ind_2(nj, l)];
+ }
+ }
+ btt = (double) nbb;
+ db /= btt;
+ if (db < dysb) {
+ dysb = db;
+ negbr[j] = nclu;
+ }
+ }
+ }
+
+ if (ntt == 1) {
+ syl[j] = 0.; continue /* j */;
+ }
+ dysa = 0.;
+ for (l = 1; l <= ntt; ++l) {
+ nl = nelem[l];
+ dysa += dys[ind_2(nj, nl)];
+ }
+ att = (double) (ntt - 1);
+ dysa /= att;
+ if (dysa <= 0.) {
+ if (dysb > 0.)
+ syl[j] = 1.;
+ else
+ syl[j] = 0.;
+
+ continue /* j */;
+ }
+
+ if (dysb > 0.) {
+ if (dysb > dysa)
+ syl[j] = 1. - dysa / dysb;
+ else if (dysb < dysa)
+ syl[j] = dysb / dysa - 1.;
+ else /* (dysb == dysa) */
+ syl[j] = 0.;
+
+ if (syl[j] < -1.)
+ syl[j] = -1.;
+ else if (syl[j] > 1.)
+ syl[j] = 1.;
+ }
+ else {
+ syl[j] = -1.;
+ }
+
+ } /* for(j ..) */
+
+ avsyl[numcl] = 0.;
+ for (j = 1; j <= ntt; ++j) {
+ symax = -2.;
+ for (l = 1; l <= ntt; ++l) {
+ if (syl[l] > symax) {
+ symax = syl[l];
+ lang = l;
+ }
+ }
+ nsend[j] = lang;
+ srank[j] = syl[lang];
+ avsyl[numcl] += srank[j];
+ syl[lang] = -3.;
+ }
+ *ttsyl += avsyl[numcl];
+ avsyl[numcl] /= ntt;
+
+ if (ntt >= 2) {
+ for (l = 1; l <= ntt; ++l) {
+ lplac = nsend[l];
+ ncase = nelem[lplac];
+ ++nsylr;
+ sylinf[nsylr + sylinf_dim1] = (double) numcl;
+ sylinf[nsylr + (sylinf_dim1 << 1)] = (double) negbr[lplac];
+ sylinf[nsylr + sylinf_dim1 * 3] = srank[l];
+ sylinf[nsylr + (sylinf_dim1 << 2)] = (double) nbest[ncase];
+ }
+ }
+ else {
+ ncase = nelem[1];
+ ++nsylr;
+ sylinf[nsylr + sylinf_dim1] = (double) numcl;
+ sylinf[nsylr + (sylinf_dim1 << 1)] = (double) negbr[1];
+ sylinf[nsylr + sylinf_dim1 * 3] = 0.;
+ sylinf[nsylr + (sylinf_dim1 << 2)] = (double) nbest[ncase];
+ }
+
+ }
+ *ttsyl /= (double) (nsam);
+ return;
+} /* black */
diff --git a/src/cluster.h b/src/cluster.h
new file mode 100644
index 0000000..d9fe1e6
--- /dev/null
+++ b/src/cluster.h
@@ -0,0 +1,198 @@
+/* Declare everything, Fortran & C -- so we can register them */
+
+#include <R.h>
+#include <Rinternals.h>
+/* -> Rconfig.h, but also Boolean.h RS.h */
+
+#ifdef ENABLE_NLS
+#include <libintl.h>
+#define _(String) dgettext ("cluster", String)
+#else
+#define _(String) (String)
+#endif
+
+// These codes must match those in ../R/clara.q <==> 'diss_kind'
+typedef enum {
+ EUCLIDEAN = 1,
+ MANHATTAN = 2,
+ JACCARD = 3
+} DISS_KIND;
+
+/* --------- ./clara.c ------------------*/
+
+double randm(int *nrun);
+
+void cl_clara(int *n, /* = number of objects */
+ int *jpp,/* = number of variables */
+ int *kk, /* = number of clusters, 1 <= kk <= n-1 */
+ double *x, /* Input: the data x[n, jpp] _rowwise_ (transposed)
+ * Output: the first `n' values are the `clustering'
+ * (integers in 1,2,..,kk) */
+ int *nran, /* = #{random samples} drawn (= `samples' in R)*/
+ int *nsam, /* = #{objects} drawn from data set (`sampsize' in R) */
+ double *dys,/* [1:(1 + (nsam * (nsam - 1))/2)]
+ * Output: to contain the distances */
+ int *mdata, /*= {0,1}; 1: min(x) is missing value (NA); 0: no NA */
+ double *valmd,/*[j]= missing value code (instead of NA) for x[,j]*/
+ int *jtmd, /* [j]= {-1,1}; -1: x[,j] has NA; 1: no NAs in x[,j] */
+ DISS_KIND *diss_kind, // = {EUCLIDEAN, MANHATTAN, JACCARD}
+ int/*logical*/ *rng_R,/*= {0,1}; 0 : use clara's internal weak RNG;
+ * 1 : use R's RNG (and seed) */
+ int/*logical*/ *pam_like,/* if (1), we do "swap()" as in pam(), otherwise
+ use the code as it was in clara() "forever"
+ upto 2011-04 */
+ int *correct_d,/* option for dist.computation: if (0), use the "fishy"
+ formula to update distances in the NA-case,
+ if (1), use a dysta2()-compatible formula */
+ int *nrepr, /* logical (0/1): 1 = "is representative object" */
+ int *nsel,
+ int *nbest,/* x[nbest[j],] : the j-th obs in the final sample */
+ int *nr, int *nrx,/* prov. and final "medoids" aka representatives */
+ double *radus, double *ttd, double *ratt,
+ double *ttbes, double *rdbes, double *rabes,
+ int *mtt, double *obj,
+ double *avsyl, double *ttsyl, double *sylinf,
+ int *jstop, int *trace_lev,
+ double *tmp, /* = double [ 3 * nsam ] */
+ int *itmp /* = integer[ 6 * nsam ] */
+ );
+
+void dysta2(int nsam, int jpp, int *nsel,
+ double *x, int n, double *dys, DISS_KIND diss_kind,
+ int *jtmd, double *valmd, Rboolean has_NA, Rboolean *toomany_NA);
+
+
+void bswap2(int kk, int nsam, double s, const double dys[],
+ Rboolean pam_like, int trace_lev,
+ // result:
+ double *sky, int *nrepr,
+ double *dysma, double *dysmb, double *beter);
+
+void selec(int kk, int n, int jpp, DISS_KIND diss_kind,
+ double *zb, int nsam, Rboolean has_NA, int *jtmd, double *valmd,
+ int trace_lev,
+ int *nrepr, int *nsel, double *dys, double *x, int *nr,
+ Rboolean *nafs, double *ttd, double *radus, double *ratt,
+ int *nrnew, int *nsnew, int *npnew, int *ns, int *np, int *new,
+ double *ttnew, double *rdnew, int correct_d);
+
+void resul(int kk, int n, int jpp, DISS_KIND diss_kind, Rboolean has_NA,
+ int *jtmd, double *valmd, double *x, int *nrx, int *mtt, int correct_d);
+
+void black(int kk, int jpp, int nsam, int *nbest,
+ double *dys, double s, double *x,
+ /* --> Output : */
+ double *avsyl, double *ttsyl, double *sylinf,
+ int *ncluv, int *nsend, int *nelem, int *negbr,
+ double *syl, double *srank);
+
+/* -------- ./dysta.f --- (was in pam.f) -------------------- */
+void F77_NAME(dysta)(int *nn, int *jpp, double *x, double *dys, int *ndyst,
+ int *jtmd, double *valmd, int *jhalt);
+/* --------- ./pam.c ------------------*/
+
+#ifdef _UNUSED_C_pam
+void cl_pam(int *nn, int *jpp, int *kk, double *x, double *dys,
+ int *jdyss, /* jdyss = 0 : compute distances from x
+ * = 1 : distances provided in x */
+ double *valmd, int *jtmd,
+ int *ndyst, int *nsend, int *nrepr, int *nelem,
+ double *radus, double *damer, double *ttd, double *separ,
+ double *ttsyl, double *obj, int *med, int *ncluv,
+ double *clusinf, double *sylinf, int *nisol, int* optim);
+#endif
+
+SEXP cl_Pam(SEXP k_, SEXP n_,
+ SEXP do_diss_, /* == !diss; if true, compute distances from x (= x_or_diss);
+ otherwise distances provided by x_or_diss */
+ SEXP x_or_diss,// this "is" if(do_diss) "x[]" (n x p) else "dys[]"
+ SEXP all_stats_, // all_stats == !cluster.only
+ SEXP medoids, // NULL or integer(k) subset {1:n}
+ SEXP do_swap_, SEXP trace_lev_,
+ SEXP keep_diss_, SEXP pam_once_,
+
+ // the next 3 are only needed if(do_diss)
+ SEXP val_md, SEXP j_md, // "md" := [m]issing [d]ata
+ SEXP dist_kind); // = 1 ("euclidean") or 2 ("manhattan")
+
+void bswap(int kk, int nsam, int *nrepr,
+ /* nrepr[]: here is boolean (0/1): 1 = "is representative object" */
+ Rboolean med_given, Rboolean do_swap, int trace_lev,
+ double *dysma, double *dysmb, double *beter,
+ const double dys[], double s, double *obj, int pamonce);
+
+void cstat(int kk, int nn, int *nsend, int *nrepr, Rboolean all_stats,
+ double *radus, double *damer, double *ttd, double *separ, double *s,
+ double *dys, int *ncluv, int *nelem, int *med, int *nisol);
+
+void dark(int kk, int nn, const int ncluv[], const double dys[], double s,
+ int *nsend, int *nelem, int *negbr,
+ double *syl, double *srank, double *avsyl, double *ttsyl,
+ double *sylinf);
+
+
+/* --------- ./spannel.c ------------------*/
+
+void cl_sweep(double *, int *, int *, int *, double *);
+
+void spannel(int *ncas, /* = number of objects */
+ int *ndep, /* = number of variables */
+ double *dat,/* [ncas, 0:ndep] */
+ double *dstopt, /* = squared distances [1:ncas] */
+ double *cov,/* matrix [0:ndep, 0:ndep] */
+ double *varsum, /* [1:ndep] */
+ double *varss, /* [1:ndep] */
+ double *prob, /* [1:ncas] */
+ double *work, /* [0:ndep] */
+ double *eps,
+ int *maxit, /* = maximal # iterations (and returns #{iter.})*/
+ int *ierr);
+
+void sildist(double *d, /* distance : in matrix or dist format; i.e.,
+ of length n^2 or n*(n-1)/2; see 'ismat' */
+ int *n, /* number of Subjects (attr(d,'Size')) */
+ int *clustering,/* clustering vector, values from {1..k} */
+ int *k, /* number of clusters */
+ double *diC, /* diC */
+ int *counts, /* counts[k] := #{cluster k} */
+ double *si, /* (a_i - b_i) / max(ai,bi) */
+ int *neighbor, /* neighbor */
+ int *ismat); /* boolean : is 'd' a matrix or 'dist' ? */
+
+void cl_fanny(int *nn, int *jpp, int *kk,
+ double *x, double *dss, int *jdyss, double *valmd,
+ int *jtmd, int *ndyst, int *nsend, int *nelem,
+ int *negbr, double *syl, double *p, double *dp,
+ double *pt, int *nfuzz, double *esp, double *ef,
+ double *dvec, double *ttsyl, double *obj,
+ int *ncluv, double *sylinf, double *r, double *tol, int *maxit);
+
+
+/* ================= Fortran things (remainder) ======================== */
+
+/* -------- ./daisy.f ---------------------------------- */
+void F77_NAME(cldaisy)(int *nn, int *jpp, double *x,
+ double *valmd, double *weights,
+ int *jtmd, int *jdat, int *vtype,
+ int *ndyst, int *mdata, double *disv);
+
+/* -------- ./fanny.c ---------------------------------- */
+/* R-level: called only from ../tests/dysta-ex.R (now via .C()): */
+void dysta3(int *nn, int *p, double *x, double *dys,
+ int *ndyst, int *jtmd, double *valmd, int *jhalt);
+
+/* -------- ./mona.f ---------------------------------- */
+void F77_NAME(clmona)(int *nn, int *pp, int *x, int *jerr,
+ int *nban, int *ner, int *kwan, int *lava, int *jlack);
+
+/* -------- ./twins.c ---------------------------------- */
+void R_bncoef(int *nn, double *ban, double *cf);
+double bncoef(int nn, double *ban);
+
+void twins(int *nn, int *jpp, double *x,
+ double *dys, double *dys2, int *jdyss, double *valmd,
+ int *jtmd, int *ndyst, int *jalg, int *method,
+ int *kwan, int *ner, double *ban, double *coef,
+ double *alpha, int *merge, int *trace_lev);
+
+
diff --git a/src/daisy.f b/src/daisy.f
new file mode 100644
index 0000000..1c3b38c
--- /dev/null
+++ b/src/daisy.f
@@ -0,0 +1,124 @@
+
+ subroutine cldaisy(nn,jpp, x, valmd,weights,
+ + jtmd,jdat,vtype,ndyst,mdata, disv)
+c c
+c c Calculating dissimilarities between objects or variables
+c c
+
+ integer nn, jpp
+c c nn = number of objects
+c c jpp = number of variables used for the calculations
+
+c c The following vectors and matrices must be dimensioned in the
+c c main program :
+ double precision x(nn,jpp), valmd(jpp), weights(jpp)
+ integer jtmd(jpp), jdat, vtype(jpp), ndyst, mdata
+ double precision disv(1+nn*(nn-1)/2)
+
+c vtype was character originally
+c vtype(j) is the type of variable j:
+c = 1 (A) for an Asymmetric binary variable
+c = 2 (S) for a Symmetric binary variable
+c = 3 (N) for a Nominal variable
+c = 4 (O) for an Ordinal variable
+c = 5 (I) for an Interval variable (additive)
+c = 6 (T) for a raTio variable (log transformed)
+
+c ndyst is the "kind of dissimilarity/distance" aka 'diss_type'
+c = 0 "mixed" / gower == here treated _identically_ to "manhattan" (L1) !
+c = 1 "euclidean" (= L2 )
+c = 2 "manhattan" (= L1 )
+
+c vector jtmd is only read if there are missing values : if(mdata)
+c jtmd(j) = 0 if variable j is binary
+c = -1 if variable j is not binary and has missing values
+c = +1 if variable j is not binary and has no missing values
+c VAR
+ double precision clk,dlk, pp,ppa, rpres
+ integer j,k,l,la, lsubt, nlk, nbad, npres
+ logical hasNA
+
+ hasNA = (mdata .ne. 0)
+
+c calculation of the dissimilarities
+ nlk=0
+ if(jdat .eq. 1) then
+c Case I: `mixed' type variables
+ nbad=0
+ do 450 l=2,nn
+ la=l-1
+ do 440 k=1,la
+ nlk=nlk+1
+ ppa=0.
+ dlk=0.
+c Dissimilarity between obs. l & k
+ do 420 j=1,jpp
+ if(vtype(j) .ge. 3) then
+ if (hasNA) then
+ if(jtmd(j).lt.0) then
+ if(x(l,j).eq.valmd(j)) goto 420
+ if(x(k,j).eq.valmd(j)) goto 420
+ endif
+ endif
+ ppa=ppa + weights(j)
+ if(vtype(j).eq.3) then
+ if(x(l,j).ne.x(k,j)) dlk=dlk+ weights(j)
+ else
+ dlk=dlk+ weights(j)*dabs(x(l,j)-x(k,j))
+ endif
+ else
+c binary variable x(*,j)
+ if(x(l,j).ne.0..and.x(l,j).ne.1.) goto 420
+ if(x(k,j).ne.0..and.x(k,j).ne.1.) goto 420
+ if(vtype(j).eq.2.or.x(l,j).ne.0.or.x(k,j).ne.0)
+ * ppa=ppa+weights(j)
+ if(x(l,j).ne.x(k,j)) dlk=dlk+ weights(j)
+ endif
+ 420 continue
+ if(ppa.le.0.5) then
+ nbad=nbad+1
+ disv(nlk)=-1
+ else
+ disv(nlk)=dlk/ppa
+ endif
+ 440 continue
+ 450 continue
+
+ else
+c Case II : jdat != 1: all variables are interval scaled
+c ------- ~~~~~~~~~ { basically === dysta() in ./dysta.f
+c FIXME: common code! }
+ pp=jpp
+ do l=2,nn
+ lsubt=l-1
+ do k=1,lsubt
+ clk=0.0
+ nlk=nlk+1
+ npres=0
+ do 530 j=1,jpp
+ if (hasNA) then
+ if(jtmd(j).lt.0) then
+ if(x(l,j).eq.valmd(j)) goto 530
+ if(x(k,j).eq.valmd(j)) goto 530
+ endif
+ endif
+ npres=npres+1
+ if(ndyst.eq.1) then
+ clk=clk+ (x(l,j)-x(k,j))*(x(l,j)-x(k,j))
+ else
+ clk=clk+ dabs(x(l,j)-x(k,j))
+ endif
+ 530 continue
+ rpres=npres
+ if(npres.eq.0)then
+ disv(nlk)=-1.0
+ else if(ndyst.eq.1) then
+ disv(nlk)=dsqrt(clk*(pp/rpres))
+ else
+ disv(nlk)=clk*(pp/rpres)
+ endif
+ end do
+ end do
+ endif
+
+ end
diff --git a/src/dysta.f b/src/dysta.f
new file mode 100644
index 0000000..114701a
--- /dev/null
+++ b/src/dysta.f
@@ -0,0 +1,56 @@
+
+c Dysta() :
+c
+c Compute Distances from X matrix {also for agnes() and diana()}:
+c -----------------------------------------------------------
+c
+c was part of pam.f --- now called both from Fortran & C
+c "keep in sync" with daisy.f {move both to C or replace by R's dist!}
+c
+ subroutine dysta(nn,p,x,dys,ndyst,jtmd,valmd,jhalt)
+
+ integer nn, p, ndyst, jtmd(p), jhalt
+ double precision x(nn,p), dys(1+nn*(nn-1)/2), valmd(p)
+c ndyst = 1 : euclidean
+c "else"(2) : manhattan
+
+c VARs
+ integer nlk,j,l,k, lsubt, npres
+ double precision pp, clk, rpres
+
+ nlk=1
+ dys(1)=0.0
+c ---------- is used potentially for d[i,i] == dys[1] == 0
+ pp=p
+ do 100 l=2,nn
+ lsubt=l-1
+ do 20 k=1,lsubt
+ clk=0.0
+ nlk=nlk+1
+ npres=0
+ do 30 j=1,p
+ if(jtmd(j).lt.0) then ! some x(*,j) are missing (NA)
+ if(x(l,j).eq.valmd(j))goto 30
+ if(x(k,j).eq.valmd(j))goto 30
+ endif
+ npres=npres+1
+ if(ndyst.eq.1) then
+ clk=clk+ (x(l,j)-x(k,j))*(x(l,j)-x(k,j))
+ else
+ clk=clk+ dabs(x(l,j)-x(k,j))
+ endif
+ 30 continue
+ rpres=npres
+ if(npres.eq.0) then
+ jhalt=1
+ dys(nlk)=-1.0
+ else
+ if(ndyst.eq.1) then
+ dys(nlk)= dsqrt(clk*(pp/rpres))
+ else
+ dys(nlk)= clk*(pp/rpres)
+ endif
+ endif
+ 20 continue
+ 100 continue
+ end
diff --git a/src/fanny.c b/src/fanny.c
new file mode 100644
index 0000000..bc54cdd
--- /dev/null
+++ b/src/fanny.c
@@ -0,0 +1,507 @@
+/* FANNY : program for Fuzzy cluster ANalysis */
+
+/* was $Id: fanny.c 7627 2019-02-12 19:16:51Z maechler $
+ * fanny.f -- translated by f2c (version 20020621).
+ * and treated by f2c-clean v 1.10, and manually by Martin Maechler
+ */
+
+#include <Rmath.h>
+#include <R_ext/Print.h>/* for diagnostics */
+
+#include "cluster.h"
+/* dysta3() declared in cluster.h */
+
+static void
+fuzzy(int nn, int k, double *p,
+ double *dp, double *pt, double *dss, double *esp,
+ double *ef, double *obj,
+ double r, double tol, int *nit, int trace_lev);
+
+static void
+caddy(int nn, int k, double *p, int *ktrue,
+ int *nfuzz, int *ncluv, double *rdraw, int trace_lev);
+
+static void
+fygur(int kk, int nn, int ncluv[], double dss[], double s,
+ int *nsend, int *nelem, int *negbr,
+ double *syl, double *srank, double *avsyl, double *ttsyl,
+ double *sylinf);
+
+
+void cl_fanny(int *nn, /* = number of objects */
+ int *jpp, /* = number of variables for clustering */
+ int *kk, /* = number of clusters */
+ double *x, double *dss, int *jdyss, double *valmd,
+ int *jtmd, int *ndyst, int *nsend, int *nelem,
+ int *negbr, double *syl, double *p, double *dp,
+ double *pt, int *nfuzz, double *esp, double *ef,
+ double *dvec, double *ttsyl,
+ double *obj, /* input/output; see fuzzy() below */
+ int *ncluv, double *sylinf,
+ double *r, double *tol, int *maxit)
+{
+ int ktrue, trace_lev = (int) obj[1];
+ Rboolean all_stats = (obj[0] == 0.);/* TODO: consider *not* doing caddy() */
+
+ if (*jdyss != 1) { /* compute dissimilarities from data */
+ int jhalt = 0;
+ dysta3(nn, jpp, x, dss, ndyst, jtmd, valmd, &jhalt);
+ if (jhalt) {
+ *jdyss = -1; return;
+ }
+ }
+
+ fuzzy(*nn, *kk, p, dp, pt, dss, esp,
+ ef, obj, *r, *tol, maxit, trace_lev);
+
+ caddy(*nn, *kk, p, /* -> */ &ktrue, nfuzz, ncluv, pt, trace_lev);
+
+ obj[0] = (double) ktrue;
+
+ /* Compute "silhouette": */
+ if (all_stats && 2 <= ktrue && ktrue < *nn) {
+ int i, nhalf = *nn * (*nn - 1) / 2;
+ double s = 0.; /* s := max( dss[i,j] ) */
+ for(i = 0; i < nhalf; i++)
+ if (s < dss[i])
+ s = dss[i];
+ fygur(ktrue, *nn, ncluv, dss, s,
+ nsend, nelem, negbr, syl, dvec, pt, ttsyl, sylinf);
+ }
+ return;
+} /* cl_fanny */
+
+
+void dysta3(int *nn, int *p, double *x, double *dys,
+ int *ndyst, int *jtmd, double *valmd, int *jhalt)
+{
+ int k, l, nlk, x_d = *nn;
+
+ nlk = 0;
+ for (l = 0; l < (*nn - 1); ++l) {
+ for (k = l + 1; k < *nn; ++k, ++nlk) {
+ double clk = 0.;
+ int j, jj, npres = 0;
+ for (j = 0, jj = 0; j < *p; j++, jj+=x_d) {
+ double d;
+ if (jtmd[j] < 0) {
+ if (x[l + jj] == valmd[j] ||
+ x[k + jj] == valmd[j])
+
+ continue; /* next j */
+ }
+ ++npres;
+ d = x[l + jj] - x[k + jj];
+ if (*ndyst != 2) /* 1 or 3 */
+ clk += d * d;
+ else /* if (*ndyst == 2) */
+ clk += fabs(d);
+ }
+ if (npres == 0) {
+ dys[nlk] = -1.; *jhalt = 1;
+ } else {
+ clk *= (*p) / (double) npres;
+ dys[nlk] = (*ndyst == 1) ? sqrt(clk) : /*ndyst = 2 or 3 */ clk;
+ }
+ }
+ }
+} /* dysta3 */
+
+
+static
+void fuzzy(int nn, int k, double *p,
+ double *dp, double *pt, double *dss, double *esp, double *ef,
+ double *obj,/* of length 4;
+ * in : (cluster_only, trace_lev, compute_p, 0)
+ * out: (ktrue , cryt, PC ("dunn"), normalized_PC)
+ */
+ double r, /* the exponent, > 1. -- was fixed to 2 originally */
+ double tol,/* the precision for the iterations */
+ int *nit, /* the maximal number of iterations --
+ originally fixed to 500 */
+ int trace_lev)
+{
+ double dt, xx, ddd, crt, reen, cryt;
+ int p_d = nn, dp_d = nn;
+ int i, j, m, mi, it;
+ Rboolean converged = FALSE, compute_p = (int)obj[2];
+
+ if(trace_lev)
+ Rprintf("fanny()'s fuzzy(n = %d, k = %d):\n", nn, k);
+
+ if(compute_p) {
+ /* Compute initial fuzzy clustering, i.e. membership matrix p[,] */
+ int nd, ndk;
+ double p0 = 0.1 / (k - 1);
+ for (m = 0; m < nn; ++m)
+ for (j = 0; j < k; ++j)
+ p[m + j * p_d] = p0;
+
+ ndk = nn / k;
+ nd = ndk;
+ j = 0;
+ for (m = 0; m < nn; ++m) {
+ int jj;
+ p[m + j * p_d] = 0.9;
+ if (m+1 >= nd) {
+ ++j;
+ if (j+1 == k) /* reset */
+ nd = nn;
+ else nd += ndk;
+ }
+ for (jj = 0; jj < k; ++jj)
+ p[m + jj * p_d] = pow(p[m + jj * p_d], r);
+ }
+ }
+ else { /* p[,] already contains memberships */
+
+ for (m = 0; m < nn; ++m)
+ for (j = 0; j < k; ++j)
+ p[m + j * p_d] = pow(p[m + j * p_d], r);
+ }
+
+/* initial criterion value */
+
+ cryt = 0.;
+ for (j = 0; j < k; ++j) {
+ esp[j] = 0.;
+ ef[j] = 0.;
+ for (m = 0; m < nn; ++m) {
+ esp[j] += p[m + j * p_d];
+ for (i = 0; i < nn; ++i) {
+ if (i != m) {
+ mi = imin2(m,i);
+ mi = mi * nn - (mi + 1) * (mi + 2) / 2 + imax2(m,i);
+ dp[m + j * dp_d] += p[i + j * p_d] * dss[mi];
+ ef[j] += p[i + j * p_d] * p[m + j * p_d] * dss[mi];
+ }
+ }
+ }
+ cryt += ef[j] / (esp[j] * 2.);
+ }
+ crt = cryt;
+
+ if(trace_lev) {
+ Rprintf("fuzzy(): initial obj = %g\n", cryt);
+ if(trace_lev >= 2) {
+ Rprintf(" ef[]= (");
+ for(j=0; j < k; j++) Rprintf(" %g%s", ef[j], ((j < k-1)? "," : ")\n"));
+ Rprintf(" esp[]= (");
+ for(j=0; j < k; j++) Rprintf(" %g%s",esp[j], ((j < k-1)? "," : ")\n"));
+ }
+ }
+
+ reen = 1. / (r - 1.);
+
+ it = 0;
+ while(++it <= *nit) { /* . . . . . iterations . . . . . . . . . . . . . */
+
+ for(m = 0; m < nn; m++) {
+ /* the new membership coefficients of the objects are calculated,
+ and the resulting value of the criterion is computed. */
+ dt = 0.;
+ for (j = 0; j < k; ++j) {
+ pt[j] = pow(esp[j] / (dp[m + j * dp_d] - ef[j] / (2 * esp[j])),
+ reen);
+ dt += pt[j];
+ }
+ xx = 0.;
+ for (j = 0; j < k; ++j) {
+ pt[j] /= dt;
+ if (pt[j] < 0.)
+ xx += pt[j];
+ }
+ /* now: sum_j (pt[j]) == 1; xx := sum_{pt[j] < 0} pt[j] */
+ for (j = 0; j < k; ++j) {
+ double d_mj;
+ pt[j] = (pt[j] > 0.) ? pow(pt[j] / (1 - xx), r) : 0.;
+ d_mj = pt[j] - p[m + j * p_d];
+ esp[j] += d_mj;
+ for (i = 0; i < nn; ++i) {
+ if (i != m) {
+ mi = imin2(m,i);
+ mi = mi * nn - (mi + 1) * (mi + 2) / 2 + imax2(m,i);
+ ddd = d_mj * dss[mi];
+ dp[i + j * dp_d] += ddd;
+ ef[j] += p[i + j * p_d] * 2. * ddd;
+ }
+ }
+ p[m + j * p_d] = pt[j];
+ }
+
+ if(trace_lev >= 3) {
+ Rprintf(" pt[m= %d, *]: ",m);
+ for (j = 0; j < k; ++j)
+ Rprintf(" %g%s", pt[j], ((j < k-1)? "," : "\n"));
+ }
+ }
+
+ /* m == nn */
+ cryt = 0.;
+ for (j = 0; j < k; ++j)
+ cryt += ef[j] / (esp[j] * 2.);
+
+ if(trace_lev >= 2) Rprintf(" m == n: obj = %#20.14g", cryt);
+
+ /* Convergence check */
+ if((converged = (fabs(cryt - crt) <= tol * cryt)))
+ break;
+
+ if(trace_lev >= 2) Rprintf(" not converged: it = %d\n", it);
+ crt = cryt;
+
+ } /* while */
+
+ *nit = (converged)? it : -1;
+
+ if(trace_lev) {
+ Rprintf("%s%sonverged after %d iterations, obj = %#20.*g\n",
+ trace_lev >=2 ? "\n" : "", (converged) ? "C" : "NOT c",
+ it, (int)((trace_lev >= 2)? 20 : 7), cryt);
+ }
+
+ /* obj[0] = (double) it; << no longer; return it via *nit ! */
+ obj[1] = cryt;
+ /* PC (partition coefficient), "non-fuzzyness index" of libert is computed
+ * C = 1/n sum_{i,j} u_{i,j} ^ r fulfills
+ * 1 >= C >= sum_j (1/k)^r = k * k^-r = k^(1-r)
+ * ==> normalization (C - k^(1-r)) / (1 - k^(1-r)) = (k^(r-1) * C - 1) / (k^(r-1) - 1)
+ */
+ for (j = 0, crt = 0.; j < k; ++j)
+ crt += esp[j];
+ crt /= nn;
+ obj[2] = crt; /* the PC */
+ xx = pow((double)k, r - 1.);
+ obj[3] = (xx * crt - 1.) / (xx - 1.);
+ /* Note however, that for r != 2, MM rather prefers to use
+ * the "original definition" C = 1/n sum_{i,j} u_{i,j} ^ 2, and its normalization */
+
+ /* p[m,j] := (u_{m,j} ^ r) ^{1/r} == u_{m,j} : */
+ xx = 1. / r;
+ for (m = 0; m < nn; ++m)
+ for (j = 0; j < k; ++j)
+ p[m + j * p_d] = pow(p[m + j * p_d], xx);
+
+} /* fuzzy */
+
+
+static
+void caddy(int nn, int k, double *p, int *ktrue,
+ int *nfuzz, int *ncluv, double *rdraw, int trace_lev)
+{
+ Rboolean stay;
+ int i, m, ktry, nbest;
+ double pbest;
+
+ if(trace_lev)
+ Rprintf("fanny()'s caddy(*, k = %d):\n", k);
+
+ pbest = p[0];
+ nbest = 1;
+ for (i = 1; i < k; ++i) {
+ if (pbest < p[i * nn]) {
+ pbest = p[i * nn];
+ nbest = i+1;
+ }
+ }
+ nfuzz[0] = nbest;
+ ncluv[0] = 1;
+ *ktrue = 1;
+ for (m = 1; m < nn; ++m) {
+ pbest = p[m];
+ nbest = 1;
+ for (i = 1; i < k; ++i) {
+ if (pbest < p[m + i * nn]) {
+ pbest = p[m + i * nn];
+ nbest = i+1;
+ }
+ }
+ stay = FALSE;
+ for (ktry = 0; ktry < *ktrue; ++ktry) {
+ if (nfuzz[ktry] == nbest) {
+ stay = TRUE;
+ ncluv[m] = ktry+1;
+ break;
+ }
+ }
+ if (! stay) {
+ nfuzz[*ktrue] = nbest;
+ (*ktrue)++;
+ ncluv[m] = *ktrue;
+ }
+ }
+
+ if(trace_lev)
+ Rprintf(" -> k_true (crisp) = %d", *ktrue);
+ if (*ktrue < k) {
+ int kwalk, kleft;
+ if(trace_lev)
+ Rprintf(" < k (= %d) !!\n", k);
+
+ for (kwalk = *ktrue; kwalk < k; ++kwalk) {
+ for (kleft = 1; kleft <= k; ++kleft) {
+ stay = FALSE;
+ for (ktry = 0; ktry < kwalk; ++ktry) {
+ if (nfuzz[ktry] == kleft) {
+ stay = TRUE;
+ break;
+ }
+ }
+ if (! stay) {
+ nfuzz[kwalk] = kleft;
+ break;
+ }
+ }
+ }
+ } else if(trace_lev) Rprintf("\n");
+
+ for (m = 0; m < nn; ++m) {
+ for (i = 0; i < k; ++i)
+ rdraw[i] = p[m + (nfuzz[i]-1) * nn];
+ for (i = 0; i < k; ++i)
+ p[m + i * nn] = rdraw[i];
+ }
+ return;
+} /* caddy */
+
+/* -----------------------------------------------------------
+
+ Compute Silhouette Information :
+
+ TODO cleanup: this is almost identical to dark() in ./pam.c
+ -- difference : different dys[] / dss[] indexing, but that
+ -- dss[] indexing change needs to be "synchronized" in all functions here
+*/
+static
+void fygur(int kk, int nn, int ncluv[], double dss[], double s,
+ int *nsend, int *nelem, int *negbr,
+ double *syl, double *srank, double *avsyl, double *ttsyl,
+ double *sylinf)
+{
+ int sylinf_d = nn; /* sylinf[nn, 4] */
+ int j, l, k, k_, nj, ntt, nsylr;
+ double dysa, dysb;
+ /* pointers to sylinf[] columns:*/
+ double *sylinf_2, *sylinf_3, *sylinf_4;
+ sylinf_2 = sylinf + sylinf_d;
+ sylinf_3 = sylinf_2 + sylinf_d;
+ sylinf_4 = sylinf_3 + sylinf_d;
+
+ /* Parameter adjustments */
+ --avsyl;
+ --ncluv;
+
+ --dss;
+
+ nsylr = 0;
+ *ttsyl = 0.;
+ for (k = 1; k <= kk; ++k) {
+
+ /* nelem[0:(ntt-1)] := indices (1-based) of obs. in cluster k : */
+ ntt = 0;
+ for (j = 1; j <= nn; ++j) {
+ if (ncluv[j] == k) {
+ nelem[ntt] = j;
+ ++ntt;
+ }
+ }
+
+ for (j = 0; j < ntt; ++j) {/* (j+1)-th obs. in cluster k */
+ nj = nelem[j];
+ dysb = s * 1.1 + 1.;
+ negbr[j] = -1;
+ /* for all clusters k_ != k : */
+ for (k_ = 1; k_ <= kk; ++k_) if (k_ != k) {
+ int nbb = 0;
+ double db = 0.;
+ for (l = 1; l <= nn; ++l) {
+ if (ncluv[l] == k_) {
+ ++nbb;
+ if (l < nj) {
+ db += dss[nn * (l - 1) + nj - l * (l + 1) / 2];
+ } else if (l > nj) {
+ db += dss[nn * (nj - 1) + l - nj * (nj + 1) / 2];
+ } /* else dss(.)=0 ; nothing to add */
+ }
+ }
+ db /= nbb; /* now db(k_) := mean( d[j, l]; l in C_{k_} ) */
+ if (dysb > db) {
+ dysb = db;
+ negbr[j] = k_;
+ }
+ }/* negbr[j] := arg min_{k_} db(k_) */
+ if (ntt > 1) {
+ dysa = 0.;
+ for (l = 0; l < ntt; ++l) {
+ int nl = nelem[l];
+ if (nj < nl) {
+ dysa += dss[nn * (nj - 1) + nl - nj * (nj + 1) / 2];
+ } else if (nj > nl) {
+ dysa += dss[nn * (nl - 1) + nj - nl * (nl + 1) / 2];
+ }/* else dss(.)=0 ; nothing to add */
+ }
+ dysa /= ntt - 1;
+ if (dysa > 0.) {
+ if (dysb > 0.) {
+ if (dysb > dysa)
+ syl[j] = 1. - dysa / dysb;
+ else if (dysb < dysa)
+ syl[j] = dysb / dysa - 1.;
+ else /* dysb == dysa: */
+ syl[j] = 0.;
+
+ if (syl[j] < -1.)
+ syl[j] = -1.;
+ else if (syl[j] > 1.)
+ syl[j] = 1.;
+
+ } else {
+ syl[j] = -1.;
+ }
+ }
+ else /* dysa == 0 */ if (dysb > 0.)
+ syl[j] = 1.;
+ else
+ syl[j] = 0.;
+ }
+ else { /* ntt == 1: */
+ syl[j] = 0.;
+ }
+ } /* for( j ) */
+ avsyl[k] = 0.;
+ for (j = 0; j < ntt; ++j) {
+ int lang = 0 /* -Wall */;
+ double symax = -2.;
+ for (l = 0; l < ntt; ++l) {
+ if (symax < syl[l]) {
+ symax = syl[l];
+ lang = l;
+ }
+ }
+ nsend[j] = lang;
+ srank[j] = symax; /* = syl[lang] */
+ avsyl[k] += srank[j];
+ syl[lang] = -3.;
+ }
+ *ttsyl += avsyl[k];
+ avsyl[k] /= (double) ntt;
+ if (ntt < 2) {
+ sylinf [nsylr] = (double) k;
+ sylinf_2[nsylr] = (double) negbr[0];
+ sylinf_3[nsylr] = 0.;
+ sylinf_4[nsylr] = (double) nelem[0];
+ ++nsylr;
+ }
+ else {
+ for (j = 0; j < ntt; ++j) {
+ nj = nsend[j];
+ sylinf [nsylr] = (double) k;
+ sylinf_2[nsylr] = (double) negbr[nj];
+ sylinf_3[nsylr] = srank[j];
+ sylinf_4[nsylr] = (double) nelem[nj];
+ ++nsylr;
+ }
+ }
+ } /* for (k) */
+ *ttsyl /= nn;
+} /* fygur */
diff --git a/src/ind_2.h b/src/ind_2.h
new file mode 100644
index 0000000..b491314
--- /dev/null
+++ b/src/ind_2.h
@@ -0,0 +1,42 @@
+/* inlined, to be included in pam.c and clara.c */
+
+static
+#ifdef __GNUC__
+__inline__
+#endif
+int ind_2(int l, int j)
+{
+/* Utility, originally FORTRAN, called "meet"; called from CLARA, PAM & TWINS.
+ * Original code had MEET(), MEET2(), and MEET3() in the 3 source files.
+
+ * ind_2(l,j) returns the *index* of dys() where diss. d(l,j) is stored:
+ * d(l,j) == dys[ind_2(l,j)]
+ *
+ * MM: changed to work with 0-origin matrices dys[], but l,j are >= 1
+ */
+#ifdef was_orig
+ if(l > j)
+ return (l-2)*(l-1)/2 + j;
+ else if(l == j)
+ return 0;/* and the first element, dys[0] is := 0. permanently! */
+ else /* l < j */
+ return (j-2)*(j-1)/2 + l;
+#else
+ /* from Li Long -- optimizes particularly well on Itanium2 */
+
+ /* max_m check by MMächler: max_m is the largest integer m
+ * ----- such that (m-2)*(m-1) < MAX_INT : */
+#define max_m 46342
+
+ int result = 0;
+ if (l != j) {
+ int m = l>j ? l : j;
+ int n = l>j ? j : l;
+ result = (m <= max_m)
+ ? (m-2)*(m-1)/2 + n
+ : (int) (((double) m-2)*(m-1)/2 + n);
+ }
+ return result;
+#endif
+}
+
diff --git a/src/init.c b/src/init.c
new file mode 100644
index 0000000..8b4b81e
--- /dev/null
+++ b/src/init.c
@@ -0,0 +1,98 @@
+#include <R.h>
+#include <Rinternals.h>
+
+#include "cluster.h"
+
+#include <R_ext/Rdynload.h>
+
+#define CDEF(name) {#name, (DL_FUNC) &name, sizeof(name ## _t)/sizeof(name ## _t[0]), name ##_t}
+#define CALLDEF(name, n) {#name, (DL_FUNC) &name, n}
+
+
+static R_NativePrimitiveArgType R_bncoef_t[3] = {
+ INTSXP, REALSXP, REALSXP
+};
+
+static R_NativePrimitiveArgType cl_clara_t[34] = {
+ /*n:*/ INTSXP, INTSXP, INTSXP, REALSXP, INTSXP, INTSXP, REALSXP, INTSXP,
+ /*valmd:*/ REALSXP, INTSXP, INTSXP, /* rng_R: */ LGLSXP, /* pam_like:*/ LGLSXP,
+ /*d_flag:*/ INTSXP,
+ /*nrepr: */ INTSXP, INTSXP, INTSXP, INTSXP, INTSXP,
+ /*radus:*/ REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP,
+ /*obj: */ REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP,
+ /*tmp: */ REALSXP,INTSXP
+};
+
+static R_NativePrimitiveArgType cl_fanny_t[27] = {
+ INTSXP, INTSXP, INTSXP, REALSXP, REALSXP,
+ /*jdyss: */ INTSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP,
+ /*negbr: */ INTSXP, /*syl: */ REALSXP, REALSXP, REALSXP, REALSXP,
+ /*nfuzz: */ INTSXP, REALSXP, REALSXP, REALSXP, REALSXP,
+ /*obj: */ REALSXP, INTSXP, REALSXP, REALSXP, REALSXP, INTSXP
+};
+
+#ifdef _UNUSED_C_pam
+static R_NativePrimitiveArgType cl_pam_t[24] = {
+ INTSXP, INTSXP, INTSXP, REALSXP, REALSXP,
+ /*jdyss: */ INTSXP, REALSXP, INTSXP, INTSXP, INTSXP,
+ /*nrepr: */ LGLSXP, INTSXP, /*radus: */ REALSXP, REALSXP, REALSXP, REALSXP,
+ /*ttsyl: */ REALSXP, REALSXP, /*medoids*/ INTSXP, INTSXP, REALSXP, REALSXP, INTSXP,
+ /*optim: */ INTSXP
+};
+#endif
+
+static R_NativePrimitiveArgType spannel_t[12] = { // ./spannel.c :
+ INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP,
+ /*varss: */ REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP
+};
+
+static R_NativePrimitiveArgType sildist_t[] = {
+ REALSXP, INTSXP, INTSXP, INTSXP, REALSXP, INTSXP,
+ /* si: */ REALSXP, INTSXP, LGLSXP
+};
+
+static R_NativePrimitiveArgType twins_t[18] = {
+ INTSXP, INTSXP, REALSXP, REALSXP, REALSXP,
+ /* jdiss: */ INTSXP, REALSXP,
+ INTSXP, INTSXP, INTSXP, INTSXP,
+ /* kwan: */ INTSXP, INTSXP, REALSXP, REALSXP,
+ REALSXP, INTSXP, INTSXP
+};
+
+/* is only .C()-called from ../tests/sweep-ex.R : */
+static R_NativePrimitiveArgType cl_sweep_t[5] = {
+ REALSXP, INTSXP, INTSXP, INTSXP, REALSXP
+};
+
+static const R_CMethodDef CEntries[] = {
+ CDEF(R_bncoef),
+ CDEF(cl_clara),
+ {"dysta3", (DL_FUNC) &dysta3, 8},/* ./fanny.c */
+ CDEF(cl_fanny),
+#ifdef _UNUSED_C_pam
+ CDEF(cl_pam),
+#endif
+ CDEF(spannel),
+ CDEF(cl_sweep),
+ CDEF(sildist),
+ CDEF(twins),
+ {NULL, NULL, 0}
+};
+
+static R_CallMethodDef CallEntries[] = {
+ CALLDEF(cl_Pam, 13),
+ {NULL, NULL, 0}
+};
+
+static R_FortranMethodDef FortEntries[] = {
+ {"cl_daisy", (DL_FUNC) &F77_SUB(cldaisy), 11},
+ {"cl_mona", (DL_FUNC) &F77_SUB(clmona), 9},
+ {"dysta", (DL_FUNC) &F77_SUB(dysta), 8},
+ {NULL, NULL, 0}
+};
+
+void R_init_cluster(DllInfo *dll)
+{
+ R_registerRoutines(dll, CEntries, CallEntries, FortEntries, NULL);
+ R_useDynamicSymbols(dll, FALSE);
+}
diff --git a/src/mona.c b/src/mona.c
new file mode 100644
index 0000000..8575969
--- /dev/null
+++ b/src/mona.c
@@ -0,0 +1,313 @@
+/* MONothetic Analysis --- MONA
+
+ Program for divisive hierarchical clustering of binary data,
+ using association analysis.
+*/
+
+/* mona.f -- translated by f2c (version 20031025).
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+ http://www.netlib.org/f2c/libf2c.zip
+ Further transformed by
+ $Id: f2c-clean,v 1.11 2012/05/04 19:34:33 maechler Exp $
+*/
+
+#include <R.h>
+// #include <Rinternals.h>
+
+#include <R_ext/Print.h>/* for diagnostics */
+#include <R_ext/Utils.h>/* for interrupting */
+
+// iabs(j) = |j|
+static int iabs(int j)
+{
+ return (j >= 0) ? (j) : -(j);
+}
+
+void clmona_(int *nn, // = number of objects
+ int *pp, // = number of variables
+ int *x, /* x[i,j]: binary (0/1/NA) data (obs. i, var.j)
+ where NA = missing values, are all values > 1 ;
+ actually are NA == 2 when called from R's mona() */
+ // int jerr[1] : verbose(ness) in {0, 1, ..}
+
+ // Result / Output / Return Values:
+ int *jerr, // error return code in {1,2,3,4,5}
+ int *nban, // [1:nn]
+ int *ner, // [1:nn]
+ int *kwan, // [1:nn]
+ int *lava, // [1:nn]
+ int *jlack)// [1:pp] : jlack[j] := #{NA's in x[,j]}
+{
+ int verbose = jerr[0];
+ int j, j0, j1, jnat, jma = -1, jtel = -1, jtelz = -1, lama;
+
+ /* Parameter adjustments */
+ --lava;
+ --kwan;
+ --ner;
+ --nban;
+ --jlack;
+ int x_dim1 = *nn,
+ x_offset = 1 + x_dim1;
+ x -= x_offset;
+#define X(i,j) x[i + j * x_dim1]
+
+ /* Function Body */
+ if(*pp < 2) {
+ *jerr = 5; return; // not implemented currently (patches welcome!)
+ }
+
+ const int nhalf = (*nn + 1) / 2;
+ // jptwe = (*pp + 4) / 5;
+ Rboolean has_NA = FALSE;
+ for (int l = 1; l <= *nn; ++l) {
+ int n_miss = 0;
+ for (j = 1; j <= *pp; ++j) {
+ if (X(l,j) > 1) ++n_miss;
+ }
+ if (n_miss == *pp) { // all variables missing for this object
+ *jerr = 1; return;
+ }
+ if(n_miss) {
+ has_NA = TRUE; break;
+ }
+ }
+
+ if (has_NA) { // -------------- Missing Values Treatment -----------
+
+ int lack = 0;
+ for (j = 1; j <= *pp; ++j) {
+ j0 = 0;
+ j1 = 0;
+ for (int l = 1; l <= *nn; ++l) {
+ if (X(l,j) == 0) ++j0;
+ else if (X(l,j) == 1) ++j1;
+ }
+ jlack[j] = *nn - j0 - j1;
+ if (jlack[j] != 0) {
+ ++lack;
+ }
+ if (jlack[j] >= nhalf) {
+ // at least 50% of the objects have missing values for this variable
+ *jerr = 2; return;
+ }
+ if (j0 == 0 || j1 == 0) {
+ // all non missing values are identical for this variable
+ *jerr = 3; return;
+ }
+ }
+ if (lack == *pp) { /* all variables have missing values */
+ *jerr = 4; return;
+ }
+
+ /* ---------- Filling in missing values --------------------- */
+
+ for (j = 1; j <= *pp; ++j)
+ if (jlack[j] != 0) {
+ lama = -1;
+ Rboolean syn = TRUE;
+ for (int ja = 1; ja <= *pp; ++ja)
+ if (jlack[ja] == 0) { /* no missing in x[, ja] */
+ int a = 0,
+ b = 0,
+ c = 0,
+ d = 0;
+ for (int k = 1; k <= *nn; ++k) {
+ if (X(k,j) != 1) {
+ if (X(k,ja) == 0) ++a;
+ else if (X(k,ja) == 1) ++b;
+ } else { // x[...] == 1
+ if (X(k,ja) == 0) ++c;
+ else if (X(k,ja) == 1) ++d;
+ }
+ }
+ int kal = a * d - b * c,
+ kalf = iabs(kal);
+ if (kalf >= lama) {
+ lama = kalf;
+ jma = ja;
+ if (kal < 0)
+ syn = FALSE;
+ }
+ }
+
+ for (int k = 1; k <= *nn; ++k)
+ if (X(k,j) > 1) { // missing
+ if (syn) {
+ X(k,j) = X(k,jma);
+ } else {
+ if (X(k,jma) == 1)
+ X(k,j) = 0;
+ if (X(k,jma) == 0)
+ X(k,j) = 1;
+ }
+ }
+ }
+
+ } /* --- end of treating missing values ---- */
+ *jerr = 0; // it may have had "verbose"
+
+/* initialization --------------------------- */
+
+ for (int k = 1; k <= *nn; ++k) {
+ kwan[k] = 0;
+ ner[k] = k;
+ lava[k] = 0;
+ }
+ int npass = 1; // number of passes
+ kwan[1] = *nn;
+
+/* algorithm -------------------------------- */
+
+ int nclu = 1;
+ int ka = 1;
+/* --- Loop --- */
+L310:
+ R_CheckUserInterrupt(); // (had infinite loop whenever pp == 1 !)
+
+ int kb = ka + kwan[ka] - 1;
+ if(verbose) Rprintf("Loop npass = %d: (ka,kb) = (%d,%d)\n", npass, ka, kb);
+ lama = -1;
+ jnat = *pp;
+ for (j = 1; j <= *pp; ++j) {
+ if (nclu == 1) {
+ goto L330; // jump *inside* if(.) .. hmm...
+ }
+ j0 = 0;
+ j1 = 0;
+ for (int k = ka; k <= kb; ++k) {
+ int n_k = ner[k];
+ if (X(n_k,j) == 0) ++j0;
+ else if (X(n_k,j) == 1) ++j1;
+ }
+ if (j1 != 0 && j0 != 0) {
+ L330:
+ --jnat;
+ int a = 0,
+ b = 0,
+ c = 0,
+ d = 0,
+ lams = 0;
+ for (int jb = 1; jb <= *pp; ++jb) {
+ if (jb != j) { // FIXME: if (p == 1) have j == jb == 1 here
+ // then this branch is never used ==> lama = -1 < lams = 0
+ // but (a,b,c,d) will we remain unitialized
+ // -Wall in Fortran did make us intialize them to 0
+ a = 0;
+ b = 0;
+ c = 0;
+ d = 0;
+ for (int k = ka; k <= kb; ++k) {
+ int n_k = ner[k];
+ if (X(n_k,j) == 0) {
+ if (X(n_k,jb) == 0) ++a;
+ else if (X(n_k,jb) == 1) ++b;
+ } else {
+ if (X(n_k,jb) == 0) ++c;
+ else if (X(n_k,jb) == 1) ++d;
+ }
+ }
+ lams += iabs(a * d - b * c);
+ }
+ }
+ if (lama < lams) {
+ lama = lams;
+ jtel = c + d;
+ jtelz = a + b;
+ jma = j;
+ }
+ }
+ } // end -- for(j in 1:p)
+
+ if(verbose) Rprintf(" for(j ..) -> jma=%d, jtel(.,z) = (%d, %d)", jma, jtel, jtelz);
+
+ if (jnat < *pp) {
+
+ // ---- Splitting -------------------------------
+ int nzf, jtel2,
+ // L375:
+ nel = ner[ka];
+ if (X(nel,jma) == 1) {
+ nzf = 0;
+ jtel2 = jtel;
+ } else {
+ nzf = 1;
+ jtel2 = jtelz;
+ }
+ int jres = kb - ka + 1 - jtel2,
+ km = ka + jtel2;
+ if(verbose)
+ Rprintf(" --> splitting: ka=%d, ner[ka]=%d => (nzf, jtel2, jres, km) = (%d, %d, %d, %d)\n",
+ ka, nel, nzf, jtel2, jres, km);
+
+ /*------- inner loop ------------------ */
+ if(verbose >= 2) Rprintf(" inner loop: for(k in ka:km) use ner[k]: ");
+ int k = ka,
+ n_b = 0;
+ do { // L378: almost for(k=ka; k < km; ++k) (but see 'continue' below)
+ int n_k = ner[k];
+ if(verbose >= 2) Rprintf(" %d", n_k);
+
+ if (X(n_k,jma) == nzf) {
+ int c = 0;
+ for (int b = k; b <= kb; ++b) {
+ n_b = ner[b];
+ if (X(n_b,jma) != nzf) {
+ c = b - 1;
+ break; // goto L382;
+ }
+ }
+ // L382:
+ for (int a = k; a <= c; ++a) {
+ int d = c + k - a;
+ ner[d + 1] = ner[d];
+ }
+ ner[k] = n_b;
+ continue; // the inner loop _without_ increasing 'k' !
+ }
+ ++k;
+ } while (k < km);
+ if(verbose >= 2) Rprintf(" -> 'nelbb' = n_b = %d\n", n_b);
+ /*------- end{inner loop} -- */
+
+ /* L390: */
+ ++nclu;
+ nban[km] = npass;
+ kwan[ka] = jtel2;
+ kwan[km] = jres;
+ lava[km] = jma;
+ ka += kwan[ka];
+
+ } else { // jnat == *pp
+ if(verbose) Rprintf(" --> _NO_ splitting\n");
+ kwan[ka] = -kwan[ka];
+ }
+ if(verbose) Rprintf(" --> nclu = %d, kwan[ka] = %d\n", nclu, kwan[ka]);
+
+ // L400:
+ if (kb == *nn) {
+ goto L500;
+ }
+
+ do {// L410:
+ ka += iabs(kwan[ka]);
+ if (ka > *nn) {
+ goto L500;
+ }
+ } while (kwan[ka] < 2);
+
+ goto L310;
+ //-----------> Loop
+
+L500:
+ ++npass;
+ for (ka = 1; ka <= *nn; ++ka) {
+ if (kwan[ka] >= 2) {
+ if(verbose) Rprintf("L500; found kwan[%d] = %d >= 2 ==> Loop again\n",
+ ka, kwan[ka]);
+ goto L310;
+ //-----------> Loop
+ }
+ }
+ return;
+} /* clmona_ */
diff --git a/src/pam.c b/src/pam.c
new file mode 100644
index 0000000..27a9e62
--- /dev/null
+++ b/src/pam.c
@@ -0,0 +1,1107 @@
+/*
+ * PAM := Partitioning Around Medoids
+ *
+ * original Id: pam.f,v 1.16 2003/06/03 13:40:56 maechler translated by
+ * f2c (version 20031025) and run through f2c-clean,v 1.10 2002/03/28
+ */
+
+#include <float.h>
+
+#include <Rmath.h>
+#include <Rinternals.h>
+
+#include <R_ext/Print.h>/* for diagnostics */
+#include <R_ext/Utils.h>/* for interrupting */
+
+#include "cluster.h"
+#include "ind_2.h"
+
+// carries out a clustering using the k-medoid approach
+
+#ifdef _UNUSED_C_pam
+// The .C() version --- no longer used, since Jan.2015
+void cl_pam(int *nn, int *p, int *kk, double *x, double *dys,
+ int *jdyss, /* jdyss = 0 : compute distances from x
+ * = 1 : distances provided in x */
+ double *valmd, int *jtmd, int *ndyst,
+ int *nsend, int/*logical*/ *nrepr, int *nelem,
+ double *radus, double *damer, double *avsyl, double *separ,
+ double *ttsyl, double *obj, int *med, int *ncluv,
+ double *clusinf, double *sylinf, int *nisol, int* pamonce)
+{
+ int clusinf_dim1 = *kk;
+
+ /* Local variables */
+ Rboolean all_stats = (obj[0] == 0.),// all_stats == !cluster.only
+ med_given = (med[0] != 0),/* if true, med[] contain initial medoids */
+ do_swap = (nisol[0] != 0);
+ int k, i, nhalf, trace_lev = (int) obj[1];
+ double s;
+
+ /* Function Body */
+ nhalf = *nn * (*nn - 1) / 2 + 1; /* nhalf := #{distances}+1 = length(dys) */
+
+ if (*jdyss != 1) {
+ int jhalt = 0;
+ if(trace_lev)
+ Rprintf("C pam(): computing %d dissimilarities from %d x %d matrix: ",
+ nhalf, *nn, *p);
+ F77_CALL(dysta)(nn, p, x, dys, ndyst, jtmd, valmd, &jhalt);
+ if (jhalt != 0) {
+ if(trace_lev) Rprintf(" dysta()-error: jhalt=%d\n", jhalt);
+ *jdyss = -1; return;
+ }
+ // else
+ if(trace_lev) Rprintf("[Ok]\n");
+ }
+
+ /* s := max( dys[.] ), the largest distance */
+ for (i = 1, s = 0.; i < nhalf; ++i) /* dys[0] == 0. not used here */
+ if (s < dys[i])
+ s = dys[i];
+
+ /* FIXME: work with med[] = (i_1, i_2, ..., i_k)
+ * ----- instead nrepr[] = (b_1, ... b_n) b_i in {0,1} */
+ for (i = 0; i < *nn; ++i)
+ nrepr[i] = 0;
+ if(med_given) { /* if true, med[] contain initial medoids */
+
+ /* for the moment, translate these to nrepr[] 0/1 :
+ * not assuming that the med[] indices are sorted */
+ for (k = 0; k < *kk; k++)
+ nrepr[med[k] - 1] = 1;
+ }
+
+/* Build + Swap [but no build if(med_given); swap only if(do_swap) : */
+
+ bswap(*kk, *nn, nrepr,
+ med_given, do_swap, trace_lev,
+ radus, damer, avsyl, dys, s, obj, *pamonce);
+
+ if(trace_lev) Rprintf("end{bswap()}, ");
+/* Compute Clustering & STATs if(all_stats): */
+ cstat(*kk, *nn, nsend, nrepr, all_stats,
+ radus, damer, avsyl, separ, &s, dys, ncluv, nelem, med, nisol);
+ if(trace_lev) Rprintf("end{cstat()}\n");
+ if(all_stats) {
+ for (k = 0; k < *kk; ++k) {
+ clusinf[k]= (double) nrepr[k];
+ clusinf[k + clusinf_dim1] = radus[k];
+ clusinf[k + (clusinf_dim1 << 1)] = avsyl[k];
+ clusinf[k + clusinf_dim1 * 3] = damer[k];
+ clusinf[k + (clusinf_dim1 << 2)] = separ[k];
+ }
+ if (1 < *kk && *kk < *nn) {
+ /* Compute Silhouette info : */
+ dark(*kk, *nn, ncluv, dys, s,
+ // -->
+ nsend, nelem, nrepr, radus, damer, avsyl, ttsyl, sylinf);
+ }
+ }
+} /* cl_pam */
+#endif
+
+// The .Call() version
+SEXP cl_Pam(SEXP k_, SEXP n_,
+ SEXP do_diss_, /* == !diss; if true, compute distances from x (= x_or_diss);
+ otherwise distances provided by x_or_diss */
+ SEXP x_or_diss,// this "is" if(do_diss) "x[]" (n x p) else "dys[]"
+ SEXP all_stats_, // all_stats == !cluster.only
+ SEXP medoids, // NULL or integer(k) subset {1:n}
+ SEXP do_swap_, SEXP trace_lev_,
+ SEXP keep_diss_, SEXP pam_once_,
+
+ // the next 3 are only needed if(do_diss)
+ SEXP val_md, SEXP j_md, // "md" := [m]issing [d]ata
+ SEXP diss_kind_) // = 1 ("euclidean") or 2 ("manhattan")
+{
+ const int kk = asInteger(k_), n = asInteger(n_),
+ pam_once = asInteger(pam_once_),
+ trace_lev = asInteger(trace_lev_);
+ const Rboolean all_stats = asLogical(all_stats_)
+ , med_given = (medoids != R_NilValue) // if true, med[] contain initial medoids
+ , do_diss = asLogical(do_diss_)
+ , do_swap = asLogical(do_swap_)
+ , keep_diss = asLogical(keep_diss_) // only if(keep_diss) return dys[] ..
+ , do_syl = all_stats && (1 < kk && kk < n);
+
+
+#ifdef once_we_get_n_from_args
+ int n, p = NA_INTEGER;
+ if (do_diss) { // <-- was 'jdyss != 1' i.e. jdyss == 0
+ SEXP dims = getAttrib(x_or_diss, R_DimSymbol);
+ n = INTEGER(dims)[0];
+ p = INTEGER(dims)[1];
+ } else {
+ n = asInteger(getAttrib(x_or_diss, install("Size")));
+ }
+#endif
+
+ int i, nhalf; // nhalf := #{distances}+1 = length(dys)
+ double s;
+ if (n % 2 == 0) { // avoid overflow of n * (n - 1)
+ nhalf = n / 2 * (n - 1) + 1;
+ } else {
+ nhalf = (n - 1) / 2 * n + 1;
+ }
+
+ int *nsend = (int*) R_alloc(n, sizeof(int))
+ , *nelem = (int*) R_alloc(all_stats ? n : 1, sizeof(int)) /* Rboolean */
+ , *nrepr = (int*) R_alloc(n, sizeof(int))
+ , *med
+ ;
+ double
+ *radus = (double*) R_alloc( n, sizeof(double)),
+ *damer = (double*) R_alloc( n, sizeof(double)),
+ *separ = (double*) R_alloc(kk, sizeof(double));
+ int clusinf_dim1 = kk;
+
+ if(med_given) {
+ if(TYPEOF(medoids) != INTSXP || LENGTH(medoids) != kk)
+ error(_("Invalid 'medoids'"));
+ PROTECT(medoids = duplicate(medoids));
+ } else {
+ PROTECT(medoids = allocVector(INTSXP, kk));
+ }
+ med = INTEGER(medoids);
+
+ SEXP nms,
+ ans = PROTECT(allocVector(VECSXP, keep_diss ? 9 : 9-1));
+ setAttrib(ans, R_NamesSymbol,
+ nms = allocVector(STRSXP, keep_diss ? 9 : 9-1));
+ int nprot = 2; // <- ++ for each PROTECT() below
+ SEXP dys_, avsyl_, obj_, clu_, clusinf_, sylinf_, nisol_,ttsyl_;
+
+ // these are only used if(do_diss) :
+ double *valmd; int *jtmd; int *diss_kind;
+ if (do_diss) { // <-- was 'jdyss != 1' i.e. jdyss == 0
+ PROTECT(dys_ = allocVector(REALSXP, nhalf)); nprot++;
+ valmd = REAL(val_md);
+ jtmd = INTEGER(j_md);
+ diss_kind = INTEGER(diss_kind_); // = 1 ("euclidean") or 2 ("manhattan")
+ } else {
+ dys_ = x_or_diss; // a pointer to the same thing
+ }
+ // Creating the SEXPs as list components, so they are auto-PROTECTed:
+ SET_STRING_ELT(nms, 0, mkChar("clu"));
+ SET_VECTOR_ELT(ans, 0, clu_ = allocVector(INTSXP, n));
+ SET_STRING_ELT(nms, 1, mkChar("med")); SET_VECTOR_ELT(ans, 1, medoids);
+ SET_STRING_ELT(nms, 2, mkChar("silinf"));
+ if(do_syl)
+ SET_VECTOR_ELT(ans, 2, sylinf_ = all_stats ? allocMatrix(REALSXP, n, 4)
+ : allocVector(REALSXP, 1));
+ SET_STRING_ELT(nms, 3, mkChar("obj"));
+ SET_VECTOR_ELT(ans, 3, obj_ = allocVector(REALSXP, 2));
+ SET_STRING_ELT(nms, 4, mkChar("isol"));
+ SET_VECTOR_ELT(ans, 4, nisol_ = allocVector(INTSXP, all_stats ? kk : 1));
+ SET_STRING_ELT(nms, 5, mkChar("clusinf"));
+ SET_VECTOR_ELT(ans, 5, clusinf_ = all_stats ? allocMatrix(REALSXP, kk, 5)
+ : allocVector(REALSXP, 1));
+ SET_STRING_ELT(nms, 6, mkChar("avsil"));
+ SET_VECTOR_ELT(ans, 6, avsyl_ = allocVector(REALSXP, n));
+ SET_STRING_ELT(nms, 7, mkChar("ttsil"));
+ if(do_syl)
+ SET_VECTOR_ELT(ans, 7, ttsyl_ = allocVector(REALSXP, 1));
+ if(keep_diss) {
+ SET_STRING_ELT(nms, 8, mkChar("dys")); SET_VECTOR_ELT(ans, 8, dys_);
+ }
+
+ int *ncluv = INTEGER(clu_),
+ *nisol = INTEGER(nisol_);
+ double
+ *dys = REAL(dys_),
+ *avsyl = REAL(avsyl_),
+ *obj = REAL(obj_),
+ *clusinf= REAL(clusinf_);
+
+ if (do_diss) { // <-- was 'jdyss != 1' i.e. jdyss == 0
+ double *x = REAL(x_or_diss);
+ int jhalt = 0;
+ SEXP dims = getAttrib(x_or_diss, R_DimSymbol);
+ int p = INTEGER(dims)[1];
+ if(trace_lev)
+ Rprintf("C pam(): computing %d dissimilarities from %d x %d matrix: ",
+ nhalf, n, p);
+ F77_CALL(dysta)((int*)&n, &p, x, dys, diss_kind, jtmd, valmd, &jhalt);
+ if (jhalt != 0) {
+ if(trace_lev) Rprintf(" dysta()-error: jhalt=%d\n", jhalt);
+ UNPROTECT(nprot);
+ return ScalarInteger(jhalt); // i.e., integer error code instead of a named list
+ }
+ // else
+ if(trace_lev) Rprintf("[Ok]\n");
+ }
+
+ /* s := max( dys[.] ), the largest distance */
+ for (i = 1, s = 0.; i < nhalf; ++i) /* dys[0] == 0. not used here */
+ if (s < dys[i])
+ s = dys[i];
+
+ /* FIXME: work with med[] = (i_1, i_2, ..., i_k)
+ * ----- instead nrepr[] = (b_1, ... b_n) b_i in {0,1} */
+ for (i = 0; i < n; ++i)
+ nrepr[i] = 0;
+ if(med_given) { /* if true, med[] contain initial medoids */
+
+ /* for the moment, translate these to nrepr[] 0/1 :
+ * not assuming that the med[] indices are sorted */
+ for (int k = 0; k < kk; k++)
+ nrepr[med[k] - 1] = 1;
+ }
+
+/* Build + Swap [but no build if(med_given); swap only if(do_swap) : */
+
+ bswap(kk, n, nrepr, // <- 3
+ med_given, do_swap, trace_lev,// <- 6
+ radus, damer, avsyl, // <- 9
+ dys, s, obj, // <- 12
+ pam_once);
+
+ if(trace_lev) Rprintf("end{bswap()}, ");
+/* Compute Clustering & STATs if(all_stats): */
+ cstat(kk, n, nsend, nrepr, all_stats,
+ radus, damer, avsyl, separ, &s, dys, ncluv, nelem, med, nisol);
+ if(trace_lev) Rprintf("end{cstat()}\n");
+ if(all_stats) {
+ for (int k = 0; k < kk; ++k) {
+ clusinf[k]= (double) nrepr[k];
+ clusinf[k + clusinf_dim1] = radus[k];
+ clusinf[k + (clusinf_dim1 << 1)] = avsyl[k];
+ clusinf[k + clusinf_dim1 * 3] = damer[k];
+ clusinf[k + (clusinf_dim1 << 2)] = separ[k];
+ }
+ if (do_syl) { // Compute Silhouette info :
+ double
+ *ttsyl = REAL(ttsyl_),
+ *sylinf = REAL(sylinf_);
+
+ dark(kk, n, ncluv, dys, s,
+ // -->
+ nsend, nelem, nrepr, radus, damer, avsyl, ttsyl, sylinf);
+ }
+ }
+ UNPROTECT(nprot);
+ return ans;
+} /* cl_Pam */
+
+
+
+/* -----------------------------------------------------------
+ bswap(): the clustering algorithm in 2 parts: I. build, II. swap
+*/
+void bswap(int kk, int n, int *nrepr,
+ Rboolean med_given, Rboolean do_swap, int trace_lev,
+ /* nrepr[]: here is boolean (0/1): 1 = "is representative object" */
+ double *dysma, double *dysmb, double *beter,
+ const double dys[], double s, double *obj, int pamonce)
+{
+ int i, j, ij, k,h, dig_n;
+ double sky;
+
+ /* Parameter adjustments */
+ --nrepr;
+ --beter;
+
+ --dysma; --dysmb;
+
+ if(trace_lev) Rprintf("pam()'s bswap(*, s=%g, pamonce=%d): ", s, pamonce);
+
+ s = s * 1.1 + 1.;// larger than all dys[] (but DBL_MAX is too large)
+
+
+/* IDEA: when n is large compared to k (= kk),
+ * ---- rather use a "sparse" representation:
+ * instead of boolean vector nrepr[] , use ind_repr <- which(nrepr) !!
+ */
+ for (i = 1; i <= n; ++i)
+ dysma[i] = s;
+
+ if(med_given) {
+ if(trace_lev) Rprintf("medoids given\n");
+
+ /* compute dysma[] : dysma[j] = D(j, nearest_representative) */
+ for (i = 1; i <= n; ++i) {
+ if (nrepr[i] == 1)
+ for (j = 1; j <= n; ++j) {
+ ij = ind_2(i, j);
+ if (dysma[j] > dys[ij])
+ dysma[j] = dys[ij];
+ }
+ }
+ }
+ else {
+
+/* ====== first algorithm: BUILD. ====== */
+
+ if(trace_lev) Rprintf("build %d medoids:\n", kk);
+
+ /* find kk representatives aka medoids : */
+
+ for (k = 1; k <= kk; ++k) {
+
+ R_CheckUserInterrupt();
+
+ /* compute beter[i] for all non-representatives:
+ * also find ammax := max_{..} and nmax := argmax_i{beter[i]} ... */
+ int nmax = -1; /* -Wall */
+ double ammax, cmd;
+ ammax = 0.;
+ for (i = 1; i <= n; ++i) {
+ if (nrepr[i] == 0) {
+ beter[i] = 0.;
+ for (j = 1; j <= n; ++j) {
+ cmd = dysma[j] - dys[ind_2(i, j)];
+ if (cmd > 0.)
+ beter[i] += cmd;
+ }
+ if (ammax <= beter[i]) {
+ /* does < (instead of <= ) work too? -- NO! */
+ ammax = beter[i];
+ nmax = i;
+ }
+ }
+ }
+
+ nrepr[nmax] = 1;/* = .true. : found new representative */
+ if (trace_lev >= 2)
+ Rprintf(" new repr. %d\n", nmax);
+
+ /* update dysma[] : dysma[j] = D(j, nearest_representative) */
+ for (j = 1; j <= n; ++j) {
+ ij = ind_2(nmax, j);
+ if (dysma[j] > dys[ij])
+ dysma[j] = dys[ij];
+ }
+ }
+ /* output of the above loop: nrepr[], dysma[], ... */
+ }
+
+ if(trace_lev) /* >= 2 (?) */ {
+ dig_n = 1+floor(log10(n));
+ Rprintf(" after build: medoids are");
+ for (i = 1; i <= n; ++i)
+ if(nrepr[i] == 1) Rprintf(" %*d", dig_n, i);
+ if(trace_lev >= 3) {
+ Rprintf("\n and min.dist dysma[1:n] are\n");
+ for (i = 1; i <= n; ++i) {
+ Rprintf(" %6.3g", dysma[i]);
+ if(i % 10 == 0) Rprintf("\n");
+ }
+ if(n % 10 != 0) Rprintf("\n");
+ } else Rprintf("\n");
+ } else dig_n = 1;// -Wall
+
+ sky = 0.;
+ for (j = 1; j <= n; ++j)
+ sky += dysma[j];
+ obj[0] = sky / n;
+
+ if (do_swap && (kk > 1 || med_given)) {
+
+ double dzsky;
+ int hbest = -1, nbest = -1, kbest= -1; // -Wall
+ int *medoids, *clustmembership;
+ double *fvect;
+ if(pamonce) {
+ // +1 --> use 1-based indices (as R)
+ medoids = (int*) R_alloc(kk+1, sizeof(int));
+ clustmembership = (int*) R_alloc(n+1, sizeof(int));
+ fvect = (double*) R_alloc(n+1, sizeof(double));
+ for (int k = 1, i = 1; i <= n; ++i) {
+ if (nrepr[i]) {
+ medoids[k] = i;
+ k++;
+ }
+ }
+ } else { // -Wall :
+ clustmembership = medoids = (int*) NULL;
+ fvect = (double*) NULL;
+ }
+ int *best_h = NULL; double *best_d = NULL;
+ if (pamonce == 4 || pamonce == 5) { // Schubert and Rousseeuw 2019 FastPAM2
+ best_h = (int*) R_alloc(kk+1, sizeof(int));
+ best_d = (double*) R_alloc(kk+1, sizeof(double));
+ }
+
+/* ====== second algorithm: SWAP. ====== */
+
+ /* Hmm: In the following, we RE-compute dysma[];
+ * don't need it first time; then only need *update* after swap */
+
+/*-- Loop : */
+ L60:
+ if(pamonce == 0) { // original algorihtm
+ for (j = 1; j <= n; ++j) {
+ /* dysma[j] := D_j d(j, <closest medi>) [KR p.102, 104]
+ * dysmb[j] := E_j d(j, <2-nd cl.medi>) [p.103] */
+ dysma[j] = s;
+ dysmb[j] = s;
+ for (i = 1; i <= n; ++i) {
+ if (nrepr[i]) {
+ ij = ind_2(i, j);
+ if (dysma[j] > dys[ij]) {
+ dysmb[j] = dysma[j];
+ dysma[j] = dys[ij];
+ } else if (dysmb[j] > dys[ij]) {