summaryrefslogtreecommitdiffstats
path: root/libsmartcols/src/calculate.c
diff options
context:
space:
mode:
Diffstat (limited to 'libsmartcols/src/calculate.c')
-rw-r--r--libsmartcols/src/calculate.c608
1 files changed, 608 insertions, 0 deletions
diff --git a/libsmartcols/src/calculate.c b/libsmartcols/src/calculate.c
new file mode 100644
index 0000000..ad0b15d
--- /dev/null
+++ b/libsmartcols/src/calculate.c
@@ -0,0 +1,608 @@
+#include "smartcolsP.h"
+#include "mbsalign.h"
+
+static void dbg_column(struct libscols_table *tb, struct libscols_column *cl)
+{
+ struct libscols_wstat *st;
+
+ if (scols_column_is_hidden(cl)) {
+ DBG(COL, ul_debugobj(cl, "%s (hidden) ignored", cl->header.data));
+ return;
+ }
+
+ st = &cl->wstat;
+
+ DBG(COL, ul_debugobj(cl, "%15s seq=%zu, width=%zd, "
+ "hint=%d, max=%zu, min=%zu, "
+ "0x04%x [%s%s%s]",
+
+ cl->header.data, cl->seqnum, cl->width,
+ cl->width_hint >= 1.0 ? (int) cl->width_hint :
+ (int) (cl->width_hint * tb->termwidth),
+ st->width_max,
+ st->width_min,
+ cl->flags,
+ cl->flags & SCOLS_FL_TRUNC ? "trunc" : "",
+ scols_column_is_right(cl) ? " right" : "",
+ scols_column_is_noextremes(cl) ? " noextrem" : ""));
+}
+
+static void dbg_columns(struct libscols_table *tb)
+{
+ struct libscols_iter itr;
+ struct libscols_column *cl;
+
+ scols_reset_iter(&itr, SCOLS_ITER_FORWARD);
+ while (scols_table_next_column(tb, &itr, &cl) == 0)
+ dbg_column(tb, cl);
+}
+
+static int count_cell_width(struct libscols_table *tb,
+ struct libscols_line *ln,
+ struct libscols_column *cl,
+ struct ul_buffer *buf)
+{
+ size_t len;
+ char *data;
+ int rc;
+ struct libscols_cell *ce;
+ struct libscols_wstat *st;
+
+ rc = __cell_to_buffer(tb, ln, cl, buf);
+ if (rc)
+ return rc;
+
+ data = ul_buffer_get_data(buf, NULL, NULL);
+ if (!data)
+ len = 0;
+ else if (scols_column_is_customwrap(cl))
+ len = cl->wrap_chunksize(cl, data, cl->wrapfunc_data);
+ else if (scols_table_is_noencoding(tb))
+ len = mbs_width(data);
+ else
+ len = mbs_safe_width(data);
+
+ if (len == (size_t) -1) /* ignore broken multibyte strings */
+ len = 0;
+
+ ce = scols_line_get_cell(ln, cl->seqnum);
+ ce->width = len;
+
+ st = &cl->wstat;
+ st->width_max = max(len, st->width_max);
+
+ if (scols_column_is_tree(cl)) {
+ size_t treewidth = ul_buffer_get_safe_pointer_width(buf, SCOLS_BUFPTR_TREEEND);
+ cl->width_treeart = max(cl->width_treeart, treewidth);
+ }
+
+ return 0;
+}
+
+static int walk_count_cell_width(struct libscols_table *tb,
+ struct libscols_line *ln,
+ struct libscols_column *cl,
+ void *data)
+{
+ return count_cell_width(tb, ln, cl, (struct ul_buffer *) data);
+}
+
+static double sqrtroot(double num)
+{
+ double tmp = 0, sq = num / 2;
+
+ while (sq != tmp){
+ tmp = sq;
+ sq = (num / tmp + tmp) / 2;
+ }
+ return sq;
+}
+
+static void count_column_deviation(struct libscols_table *tb, struct libscols_column *cl)
+{
+ struct libscols_wstat *st;
+ struct libscols_iter itr;
+ struct libscols_line *ln;
+ struct libscols_cell *ce;
+ size_t sum = 0, n = 0, extra = 0;
+
+ st = &cl->wstat;
+
+ if (scols_column_is_tree(cl) && has_groups(tb))
+ extra = tb->grpset_size + 1;
+
+ /* count average */
+ scols_reset_iter(&itr, SCOLS_ITER_FORWARD);
+ while (scols_table_next_line(tb, &itr, &ln) == 0) {
+ ce = scols_line_get_cell(ln, cl->seqnum);
+
+ n++;
+ sum += ce->width + extra;
+ }
+
+ if (n)
+ st->width_avg = sum / n;
+
+ /* count deviation */
+ if (n > 1) {
+ double variance;
+
+ scols_reset_iter(&itr, SCOLS_ITER_FORWARD);
+ while (scols_table_next_line(tb, &itr, &ln) == 0) {
+ double diff;
+ ce = scols_line_get_cell(ln, cl->seqnum);
+
+ diff = (double) ce->width - st->width_avg;
+ st->width_sqr_sum += diff * diff; /* aka pow(x, 2) */
+ }
+
+ variance = st->width_sqr_sum / (n - 1);
+ st->width_deviation = sqrtroot(variance);
+ }
+
+ DBG(COL, ul_debugobj(cl, "%15s avg=%g, deviation=%g",
+ cl->header.data,
+ st->width_avg,
+ st->width_deviation));
+}
+
+/*
+ * This function counts column width.
+ */
+static int count_column_width(struct libscols_table *tb,
+ struct libscols_column *cl,
+ struct ul_buffer *buf)
+{
+ int rc = 0, no_header = 0;
+ const char *data;
+ struct libscols_wstat *st;
+ struct libscols_iter itr;
+ struct libscols_line *ln;
+
+ assert(tb);
+ assert(cl);
+
+ st = &cl->wstat;
+
+ cl->width = 0;
+ memset(st, 0, sizeof(struct libscols_wstat));
+
+ /* set minimal width according to width_hint */
+ if (cl->width_hint < 1 && scols_table_is_maxout(tb) && tb->is_term) {
+ st->width_min = (size_t) (cl->width_hint * tb->termwidth);
+ if (st->width_min && !is_last_column(cl))
+ st->width_min--;
+ }
+
+ /* set minimal width according to header width */
+ data = scols_cell_get_data(&cl->header);
+ if (data) {
+ size_t len = scols_table_is_noencoding(tb) ?
+ mbs_width(data) : mbs_safe_width(data);
+
+ st->width_min = max(st->width_min, len);
+ } else
+ no_header = 1;
+
+ if (!st->width_min)
+ st->width_min = 1;
+
+ /* count width according to cells data */
+ if (scols_table_is_tree(tb)) {
+ /* Count width for tree */
+ rc = scols_walk_tree(tb, cl, walk_count_cell_width, (void *) buf);
+ if (rc)
+ goto done;
+ } else {
+ /* Count width for list */
+ scols_reset_iter(&itr, SCOLS_ITER_FORWARD);
+ while (scols_table_next_line(tb, &itr, &ln) == 0) {
+ rc = count_cell_width(tb, ln, cl, buf);
+ if (rc)
+ goto done;
+ }
+ }
+
+ if (scols_column_is_tree(cl) && has_groups(tb)) {
+ /* We don't fill buffer with groups tree ascii art during width
+ * calculation. The print function only enlarge grpset[] and we
+ * calculate final width from grpset_size.
+ */
+ size_t gprwidth = tb->grpset_size + 1;
+ st->width_treeart += gprwidth;
+ st->width_max += gprwidth;
+ }
+
+ /* this is default, may be later reduced */
+ cl->width = st->width_max;
+
+ /* enlarge to minimal width */
+ if (cl->width < st->width_min && !scols_column_is_strict_width(cl))
+ cl->width = st->width_min;
+
+ /* use absolute size for large columns */
+ else if (cl->width_hint >= 1 && cl->width < (size_t) cl->width_hint
+ && st->width_min < (size_t) cl->width_hint)
+
+ cl->width = (size_t) cl->width_hint;
+
+
+ /* Column without header and data, set minimal size to zero (default is 1) */
+ if (st->width_max == 0 && no_header && st->width_min == 1 && cl->width <= 1)
+ cl->width = st->width_min = 0;
+
+done:
+ ON_DBG(COL, dbg_column(tb, cl));
+ return rc;
+}
+
+static int cmp_deviation(struct list_head *a, struct list_head *b,
+ void *data __attribute__((__unused__)))
+{
+ struct libscols_column *ca = list_entry(a, struct libscols_column, cl_columns);
+ struct libscols_column *cb = list_entry(b, struct libscols_column, cl_columns);
+
+ double xa = ca->wstat.width_avg + (3*ca->wstat.width_deviation);
+ double xb = cb->wstat.width_avg + (3*cb->wstat.width_deviation);
+
+ return cmp_numbers(xa, xb);
+}
+
+static int cmp_seqnum(struct list_head *a, struct list_head *b,
+ void *data __attribute__((__unused__)))
+{
+ struct libscols_column *ca = list_entry(a, struct libscols_column, cl_columns);
+ struct libscols_column *cb = list_entry(b, struct libscols_column, cl_columns);
+
+ return cmp_numbers(ca->seqnum, cb->seqnum);
+}
+
+static inline void sort_columns(struct libscols_table *tb,
+ int (*cmp)(struct list_head *, struct list_head *, void *))
+{
+ list_sort(&tb->tb_columns, cmp, NULL);
+}
+
+
+/* 68%–95%–99% rule (aka empirical rule) defines relation ship between
+ * mean (avg) and and standard deviation.
+ *
+ * avg + (n * deviation)
+ *
+ * n=1 : covers 68% of data
+ * n=2 : covers 95% of data
+ * n=3 : covers 99.7% of data
+ *
+ */
+static void reduce_to_68(struct libscols_column *cl, size_t wanted)
+{
+ struct libscols_wstat *st = &cl->wstat;
+ size_t new;
+
+ if (st->width_deviation < 1.0)
+ return;
+
+ new = st->width_avg + st->width_deviation;
+ if (new < st->width_min)
+ new = st->width_min;
+
+ if (cl->width - new > wanted)
+ cl->width -= wanted;
+ else
+ cl->width = new;
+}
+
+static int reduce_column(struct libscols_table *tb,
+ struct libscols_column *cl,
+ size_t *width,
+ int stage,
+ int nth)
+{
+ struct libscols_wstat *st = &cl->wstat;
+ size_t wanted, org_width, reduce = 1;
+ int is_trunc = 0;
+
+ if (tb->termwidth >= *width)
+ return 1;
+ /* ignore hidden columns */
+ if (scols_column_is_hidden(cl))
+ return 0;
+ /* never truncate if already minimal width */
+ if (cl->width == cl->wstat.width_min)
+ return 0;
+ /* nothing to truncate */
+ if (cl->width == 0)
+ return 0;
+ /* never truncate the tree */
+ if (scols_column_is_tree(cl) && *width <= cl->width_treeart)
+ return 0;
+
+ org_width = cl->width;
+ wanted = *width - tb->termwidth;
+
+ is_trunc = scols_column_is_trunc(cl)
+ || (scols_column_is_wrap(cl) && !scols_column_is_customwrap(cl));
+
+ switch (stage) {
+ case 0:
+ /* reduce 1st column if with trunc or extreme flag (the
+ * columns are sorted by deviation, so 1st is the worst) */
+ if (!is_trunc && !scols_column_is_noextremes(cl))
+ break;
+ if (nth != 0)
+ break;
+ reduce_to_68(cl, wanted);
+ break;
+
+ case 1:
+ /* reduce extreme columns with large width deviation */
+ if (st->width_deviation < st->width_avg / 2.0)
+ break;
+ /* fallthrough */
+ case 2:
+ /* reduce extreme columns */
+ if (!scols_column_is_noextremes(cl))
+ break;
+ reduce_to_68(cl, wanted);
+ break;
+
+ case 3:
+ /* reduce columns with trunc flag and relative whint and large width deviation */
+ if (st->width_deviation < st->width_avg / 2.0)
+ break;
+ /* fallthrough */
+ case 4:
+ /* reduce columns with trunc flag and relative whint */
+ if (!is_trunc)
+ break;
+ if (cl->width_hint <= 0 || cl->width_hint >= 1)
+ break;
+ if (cl->width < (size_t) (cl->width_hint * tb->termwidth))
+ break;
+ reduce_to_68(cl, wanted);
+ break;
+
+ case 5:
+ /* reduce all columns with trunc flag large width deviation */
+ if (st->width_deviation < st->width_avg / 2.2)
+ break;
+ /* fallthrough */
+ case 6:
+ /* reduce all columns with trunc flag */
+ if (!is_trunc && !scols_column_is_noextremes(cl))
+ break;
+ if (nth == 0)
+ /* columns are reduced in "bad first" way, be more
+ * agresive for the the worst column */
+ reduce = 3;
+ if (cl->width - reduce < st->width_min)
+ reduce = cl->width - st->width_min;
+ cl->width -= reduce;
+ break;
+ default:
+ return -1; /* no more stages */
+ }
+
+ /* hide zero width columns */
+ if (cl->width == 0)
+ cl->flags |= SCOLS_FL_HIDDEN;
+
+ if (cl->width != org_width)
+ DBG(COL, ul_debugobj(cl, " [%02zd] %s reduced %zu-->%zu",
+ cl->seqnum,
+ cl->header.data, org_width, cl->width));
+
+ *width -= org_width - cl->width;
+ return 0;
+}
+
+/*
+ * This is core of the scols_* voodoo...
+ */
+int __scols_calculate(struct libscols_table *tb, struct ul_buffer *buf)
+{
+ struct libscols_column *cl, *last_cl;
+ struct libscols_iter itr;
+ size_t width = 0, width_min = 0; /* output width */
+ int stage = 0, rc = 0;
+ int ignore_extremes = 0, group_ncolumns = 0;
+ size_t colsepsz;
+ int sorted = 0;
+
+
+ DBG(TAB, ul_debugobj(tb, "-----calculate-(termwidth=%zu)-----", tb->termwidth));
+ tb->is_dummy_print = 1;
+
+ colsepsz = scols_table_is_noencoding(tb) ?
+ mbs_width(colsep(tb)) :
+ mbs_safe_width(colsep(tb));
+
+ if (has_groups(tb))
+ group_ncolumns = 1;
+
+ /* set basic columns width
+ */
+ scols_reset_iter(&itr, SCOLS_ITER_FORWARD);
+ while (scols_table_next_column(tb, &itr, &cl) == 0) {
+ int is_last;
+
+ if (scols_column_is_hidden(cl))
+ continue;
+
+ /* we print groups chart only for the for the first tree column */
+ if (scols_column_is_tree(cl) && group_ncolumns == 1) {
+ cl->is_groups = 1;
+ group_ncolumns++;
+ }
+
+ rc = count_column_width(tb, cl, buf);
+ if (rc)
+ goto done;
+
+ is_last = is_last_column(cl);
+
+ width += cl->width + (is_last ? 0 : colsepsz); /* separator for non-last column */
+ width_min += cl->wstat.width_min + (is_last ? 0 : colsepsz);
+ }
+
+ if (!tb->is_term) {
+ DBG(TAB, ul_debugobj(tb, " non-terminal output"));
+ goto done;
+ }
+
+ /* be paranoid */
+ if (width_min > tb->termwidth && scols_table_is_maxout(tb)) {
+ DBG(TAB, ul_debugobj(tb, " min width larger than terminal! [width=%zu, term=%zu]", width_min, tb->termwidth));
+ scols_reset_iter(&itr, SCOLS_ITER_FORWARD);
+ while (width_min > tb->termwidth
+ && scols_table_next_column(tb, &itr, &cl) == 0) {
+
+ if (scols_column_is_hidden(cl))
+ continue;
+ width_min--;
+ cl->wstat.width_min--;
+ }
+ DBG(TAB, ul_debugobj(tb, " min width reduced to %zu", width_min));
+ }
+
+ /* calculate statistics */
+ scols_reset_iter(&itr, SCOLS_ITER_FORWARD);
+ while (scols_table_next_column(tb, &itr, &cl) == 0) {
+
+ count_column_deviation(tb, cl);
+
+ if (scols_column_is_noextremes(cl))
+ ignore_extremes++;
+ }
+
+ /* remember last column before we sort columns */
+ last_cl = list_entry(tb->tb_columns.prev, struct libscols_column, cl_columns);
+
+ /* reduce columns width */
+ while (width > tb->termwidth) {
+ size_t org_width = width;
+ int rc = 0, n = 0;
+
+ if (!sorted) {
+ DBG(TAB, ul_debugobj(tb, "sorting by deviation"));
+ sort_columns(tb, cmp_deviation);
+ ON_DBG(TAB, dbg_columns(tb));
+ sorted = 1;
+ }
+
+ DBG(TAB, ul_debugobj(tb, "#%d reduce stage (width=%zu, term=%zu)",
+ stage, width, tb->termwidth));
+
+ scols_reset_iter(&itr, SCOLS_ITER_BACKWARD);
+
+ while (width > tb->termwidth
+ && rc == 0
+ && scols_table_next_column(tb, &itr, &cl) == 0) {
+ rc = reduce_column(tb, cl, &width, stage, n++);
+ }
+
+ if (rc != 0)
+ break;
+ if (org_width == width)
+ stage++;
+ }
+
+ /* enlarge */
+ if (width < tb->termwidth) {
+ if (ignore_extremes) {
+ if (!sorted) {
+ sort_columns(tb, cmp_deviation);
+ sorted = 1;
+ }
+
+ scols_reset_iter(&itr, SCOLS_ITER_BACKWARD);
+ while (scols_table_next_column(tb, &itr, &cl) == 0) {
+ size_t add;
+
+ if (!scols_column_is_noextremes(cl) || scols_column_is_hidden(cl))
+ continue;
+ if (cl->wstat.width_min == 0 && cl->width == 0)
+ continue;
+
+ add = tb->termwidth - width;
+ if (add && cl->wstat.width_max &&
+ cl->width + add > cl->wstat.width_max)
+ add = cl->wstat.width_max - cl->width;
+ if (!add)
+ continue;
+ DBG(TAB, ul_debugobj(tb, " add +%zd (extreme %s)",
+ add, cl->header.data));
+ cl->width += add;
+ width += add;
+
+ if (width == tb->termwidth)
+ break;
+ }
+ }
+
+ if (width < tb->termwidth && scols_table_is_maxout(tb)) {
+ DBG(TAB, ul_debugobj(tb, " enlarge width (max-out)"));
+
+ /* try enlarging all columns */
+ while (width < tb->termwidth) {
+ scols_reset_iter(&itr, SCOLS_ITER_BACKWARD);
+ while (scols_table_next_column(tb, &itr, &cl) == 0) {
+ if (scols_column_is_hidden(cl))
+ continue;
+ DBG(TAB, ul_debugobj(tb, " enlarge (max-out %s)",
+ cl->header.data));
+ cl->width++;
+ width++;
+ if (width == tb->termwidth)
+ break;
+ }
+ }
+ } else if (width < tb->termwidth) {
+ /* enlarge the last column */
+ DBG(TAB, ul_debugobj(tb, " enlarge width (last column)"));
+
+ if (!scols_column_is_right(last_cl)) {
+ last_cl->width += tb->termwidth - width;
+ width = tb->termwidth;
+ }
+ }
+ }
+
+
+ if (sorted) {
+ sort_columns(tb, cmp_seqnum);
+ sorted = 0;
+ }
+
+ /* ignore last column(s) or force last column to be truncated if
+ * nowrap mode enabled */
+ if (tb->no_wrap && width > tb->termwidth) {
+ scols_reset_iter(&itr, SCOLS_ITER_BACKWARD);
+ while (scols_table_next_column(tb, &itr, &cl) == 0) {
+
+ if (scols_column_is_hidden(cl))
+ continue;
+ if (width <= tb->termwidth)
+ break;
+ if (width - cl->width < tb->termwidth) {
+ size_t r = width - tb->termwidth;
+
+ cl->flags |= SCOLS_FL_TRUNC;
+ cl->width -= r;
+ width -= r;
+ } else {
+ cl->flags |= SCOLS_FL_HIDDEN;
+ width -= cl->width + colsepsz;
+ }
+ }
+ }
+done:
+ if (sorted)
+ sort_columns(tb, cmp_seqnum);
+
+ tb->is_dummy_print = 0;
+ DBG(TAB, ul_debugobj(tb, "-----final width: %zu (rc=%d)-----", width, rc));
+ ON_DBG(TAB, dbg_columns(tb));
+
+ return rc;
+}