diff options
Diffstat (limited to '')
-rw-r--r-- | doc/devel/variadic_debug/03-libldap_Debug.cocci | 70 | ||||
-rw-r--r-- | doc/devel/variadic_debug/04-variadic.cocci | 165 | ||||
-rw-r--r-- | doc/devel/variadic_debug/07-shortcut.cocci | 216 | ||||
-rw-r--r-- | doc/devel/variadic_debug/09-merge.cocci | 147 | ||||
-rw-r--r-- | doc/devel/variadic_debug/README | 39 | ||||
-rw-r--r-- | doc/devel/variadic_debug/equivalence.iso | 12 | ||||
-rw-r--r-- | doc/devel/variadic_debug/macros.h | 23 | ||||
-rwxr-xr-x | doc/devel/variadic_debug/script.sh | 73 |
8 files changed, 745 insertions, 0 deletions
diff --git a/doc/devel/variadic_debug/03-libldap_Debug.cocci b/doc/devel/variadic_debug/03-libldap_Debug.cocci new file mode 100644 index 0000000..8353e64 --- /dev/null +++ b/doc/devel/variadic_debug/03-libldap_Debug.cocci @@ -0,0 +1,70 @@ +using "equivalence.iso" + +@initialize:ocaml@ +@@ +// count the number of % characters in the format string +let fmtn(fmt,n) = + List.length (Str.split_delim (Str.regexp_string "%") fmt) = n + 1 + +# replace osip_debug/oslocal_debug with Debug() macros first +@@ +expression E; +expression list args; +@@ +( +-osip_debug +| +-oslocal_debug +) ++Debug + ( +-E, ++LDAP_DEBUG_TRACE, + args ); + +// replace Debug( ..., arg1, arg2, 0 ) with Debug2( ..., arg1, arg2 ) +@@ +char[] fmt : script:ocaml() { fmtn(fmt,2) }; +expression list[2] args; +expression E; +@@ + +-Debug ++Debug2 + ( E, _(fmt), args +-, 0 + ); + +// replace Debug( ..., arg1, 0, 0 ) with Debug1() +@@ +char[] fmt : script:ocaml() { fmtn(fmt,1) }; +expression list[1] args; +expression E; +@@ + +-Debug ++Debug1 + ( E, _(fmt), args +-, 0, 0 + ); + +// Zero-argument Debug() -> Debug0() +@@ +expression E, S; +@@ + +-Debug ++Debug0 + ( E, S +-, 0, 0, 0 + ); + +// everything else is a regular 3-argument debug macro, replace with Debug3() +@@ +expression E, S; +expression list[3] args; +@@ + +-Debug ++Debug3 + ( E, S, args ); diff --git a/doc/devel/variadic_debug/04-variadic.cocci b/doc/devel/variadic_debug/04-variadic.cocci new file mode 100644 index 0000000..bd5fbea --- /dev/null +++ b/doc/devel/variadic_debug/04-variadic.cocci @@ -0,0 +1,165 @@ +@initialize:ocaml@ +@@ +// count the number of % characters in the format string +let fmtn(fmt,n) = + List.length (Str.split_delim (Str.regexp_string "%") fmt) = n + 1 + +@@ +identifier Logs =~ "Log[0-9]"; +@@ +-Logs ++Log + +@@ +@@ +-StatslogTest ++LogTest + +// Process two-argument Debug() macros with an extra zero +@@ +char[] fmt : script:ocaml() { fmtn(fmt,2) }; +expression list[2] args; +expression E; +@@ + +Debug( E, fmt, args +-, 0 + ); + +@@ +char[] fmt : script:ocaml() { fmtn(fmt,2) }; +expression list[2] args; +expression E; +@@ + +Debug( E, fmt, args +-, NULL + ); + +// Single argument Debug() macros with two extra zeroes +@@ +char[] fmt : script:ocaml() { fmtn(fmt,1) }; +expression list[1] args; +expression E; +@@ + +Debug( E, fmt, args +-, 0, 0 + ); + +@@ +char[] fmt : script:ocaml() { fmtn(fmt,1) }; +expression list[1] args; +expression E; +@@ + +Debug( E, fmt, args +-, NULL, NULL + ); + +// Debug() macros with no arguments just padded with zeroes +@@ +expression E, S; +@@ + +Debug( E, S +-, 0, 0, 0 + ); + +@@ +expression E, S; +@@ + +Debug( E, S +-, NULL, NULL, NULL + ); + +// Similar to above, just for Statslog +@@ +char[] fmt : script:ocaml() { fmtn(fmt,5) }; +expression list[5] args; +expression E; +@@ + +-Statslog ++Debug + ( E, fmt, args ); + +@@ +char[] fmt : script:ocaml() { fmtn(fmt,4) }; +expression list[4] args; +expression E; +@@ + +-Statslog ++Debug + ( E, fmt, args +-, 0 + ); + +@@ +char[] fmt : script:ocaml() { fmtn(fmt,3) }; +expression list[3] args; +expression E; +@@ + +-Statslog ++Debug + ( E, fmt, args +-, 0, 0 + ); + +@@ +char[] fmt : script:ocaml() { fmtn(fmt,2) }; +expression list[2] args; +expression E; +@@ + +-Statslog ++Debug + ( E, fmt, args +-, 0, 0, 0 + ); + +@@ +char[] fmt : script:ocaml() { fmtn(fmt,1) }; +expression list[1] args; +expression E; +@@ + +-Statslog ++Debug + ( E, fmt, args +-, 0, 0, 0, 0 + ); + +@@ +expression E, S; +@@ + +-Statslog ++Debug + ( E, S +-, 0, 0, 0, 0, 0 + ); + +// And StatslogEtime +@@ +char[] fmt : script:ocaml() { fmtn(fmt,4) }; +expression list[4] args; +expression E; +@@ + +StatslogEtime( E, fmt, args +-, 0 + ); + +@@ +identifier Stats =~ "^Statslog"; +@@ +( + StatslogEtime +| +-Stats ++Debug +) diff --git a/doc/devel/variadic_debug/07-shortcut.cocci b/doc/devel/variadic_debug/07-shortcut.cocci new file mode 100644 index 0000000..99b3b55 --- /dev/null +++ b/doc/devel/variadic_debug/07-shortcut.cocci @@ -0,0 +1,216 @@ +// Splice string `s` into the format string `fmtstring` replacing the +// %-parameter at position `pos` +@initialize:python@ +@@ + +# regex from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python +import re +fmtstring = '''\ +( # start of capture group 1 +% # literal "%" +(?: # first option +(?:[-+0 #]{0,5}) # optional flags +(?:\d+|\*)? # width +(?:\.(?:\d+|\*))? # precision +(?:h|l|ll|w|I|I32|I64)? # size +[cCdiouxXeEfgGaAnpsSZ] # type +) | # OR +%%) # literal "%%" +''' + +regex = re.compile(fmtstring, re.X) + +def parse_format(f): + return tuple((m.span(), m.group()) for m in + regex.finditer(f)) + +def insert_at_pos(fmt, s, pos): + formats = parse_format(fmt) + span, format = formats[pos] + acc = fmt[:span[0]] + if s.startswith('"'): + acc += s[1:] + else: + acc += '" ' + acc += s + if acc.endswith('"'): + acc = acc[:-1] + fmt[span[1]:] + else: + acc += ' "' + acc += fmt[span[1]:] + return acc + +// rest of the file implements the same as 09-merge.cocci +// The main difference is that we only match on snprintf and Debug that are +// directly adjacent, not based on control flow information which trips +// coccinelle's model-checker +@shortcut@ +identifier buf; +expression E, L; +expression list args_before, args, args_after; +expression format1, format2; +position p1, p2; +@@ + +snprintf@p1( buf, E, format1, args ); +Debug@p2( L, format2, args_before, buf, args_after ); + +// use insert_at_pos above to construct the new format-string +@script:python shortcut_process@ +format1 << shortcut.format1; +format2 << shortcut.format2; +args_before << shortcut.args_before; +merged; +@@ + +pos = len(args_before.elements) +coccinelle.merged = insert_at_pos(format2, format1, pos) + +@shortcut_replace@ +position shortcut.p1, shortcut.p2; +identifier shortcut_process.merged; + +identifier buf; +expression E, L; +expression list args_before, args, args_after; +expression format1, format2; +@@ + +-snprintf@p1( buf, E, format1, args ); +-Debug@p2( L, format2, args_before, buf, args_after ); ++Debug( L, merged, args_before, args, args_after ); + +@shortcut_locked@ +identifier buf; +expression E, L, lock; +expression list args_before, args, args_after; +expression format1, format2; +position p1, p2; +@@ + +ldap_pvt_thread_mutex_lock(lock); +snprintf@p1( buf, E, format1, args ); +ldap_pvt_thread_mutex_unlock(lock); +Debug@p2( L, format2, args_before, buf, args_after ); + +// use insert_at_pos above to construct the new format-string +@script:python shortcut_locked_process@ +format1 << shortcut_locked.format1; +format2 << shortcut_locked.format2; +args_before << shortcut_locked.args_before; +merged; +@@ + +pos = len(args_before.elements) +coccinelle.merged = insert_at_pos(format2, format1, pos) + +@shortcut_locked_replace@ +position shortcut_locked.p1, shortcut_locked.p2; +identifier shortcut_locked_process.merged; + +identifier buf; +expression E, L, lock; +expression list args_before, args, args_after; +expression format1, format2; +@@ + +ldap_pvt_thread_mutex_lock(lock); +-snprintf@p1( buf, E, format1, args ); ++Debug( L, merged, args_before, args, args_after ); +ldap_pvt_thread_mutex_unlock(lock); +-Debug@p2( L, format2, args_before, buf, args_after ); + +// so long as we don't reference 'buf' afterwards, no need to keep it defined. +// A lot of pattern-matching is spelled out explicitly to work around the fact +// that the state space doesn't get compressed otherwise. +@@ +type T; +identifier buf, id; +expression E, lock; +initializer I; +@@ +{ +-\( T buf = I; \| T buf; \) +( + ldap_pvt_thread_mutex_lock(lock); +| +) +( + Debug( ... ); +& + ... when != buf +) +( + ldap_pvt_thread_mutex_unlock(lock); +| +) +( +| + continue; +| + break; +| + goto id; +| + \( + return E; + \& + ... when != buf + \) +) +} + +// the rest identifies and removes a (newly-)redundant LogTest check +@if_guard@ +position p; +statement s; +@@ + +( + if ( ... ) {@p + Debug( ... ); + } else s +| + if ( ... ) {@p + Debug( ... ); + } +) + +@else_guard@ +position p; +statement s; +@@ + +if ( ... ) s +else {@p + Debug( ... ); +} + +@loop_guard@ +position p; +@@ + +( + while ( ... ) {@p + Debug( ... ); + } +| + for ( ...;...;... ) {@p + Debug( ... ); + } +) + +@@ +position p != { if_guard.p , else_guard.p, loop_guard.p }; +@@ +-{@p + Debug( ... ); +-} + +@useless_if@ +expression L; +@@ + +-if ( LogTest( L ) ) { + Debug( L, ... ); +-} diff --git a/doc/devel/variadic_debug/09-merge.cocci b/doc/devel/variadic_debug/09-merge.cocci new file mode 100644 index 0000000..4b0c1b2 --- /dev/null +++ b/doc/devel/variadic_debug/09-merge.cocci @@ -0,0 +1,147 @@ +// Note that this file has not actually been used in the end, since +// 07-shortcut.cocci covers everything we needed in the project, but being +// simpler, it makes the intent of 07-shortcut.cocci clearer + + +// Splice string `s` into the format string `fmtstring` replacing the +// %-parameter at position `pos` +@initialize:python@ +@@ + +#regex from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python +import re +fmtstring = '''\ +( # start of capture group 1 +% # literal "%" +(?: # first option +(?:[-+0 #]{0,5}) # optional flags +(?:\d+|\*)? # width +(?:\.(?:\d+|\*))? # precision +(?:h|l|ll|w|I|I32|I64)? # size +[cCdiouxXeEfgGaAnpsSZ] # type +) | # OR +%%) # literal "%%" +''' + +regex = re.compile(fmtstring, re.X) + +def parse_format(f): + return tuple((m.span(), m.group()) for m in + regex.finditer(f)) + +def insert_at_pos(fmt, s, pos): + formats = parse_format(fmt) + span, format = formats[pos] + acc = fmt[:span[0]] + if s.startswith('"'): + acc += s[1:] + else: + acc += '" ' + acc += s + if acc.endswith('"'): + acc = acc[:-1] + fmt[span[1]:] + else: + acc += ' "' + acc += fmt[span[1]:] + return acc + +// Identify the redundant snprintfs (within a locked region) +@a exists@ +expression lock, E, L; +expression list args_before, args, args_after; +identifier buf; +expression format1, format2; +type T; +position p1, p2; +@@ + +{ +... +T buf; +... +ldap_pvt_thread_mutex_lock(lock); +... +snprintf@p1( buf, E, format1, args ); +... +ldap_pvt_thread_mutex_unlock(lock); +... +Debug@p2( L, format2, args_before, buf, args_after ); +... +} + +// Merge the format strings with insert_at_pos above +@script:python a_process@ +format1 << a.format1; +format2 << a.format2; +args_before << a.args_before; +merged; +@@ + +pos = len(args_before.elements) +coccinelle.merged = insert_at_pos(format2, format1, pos) + +// And merge the two together, replacing the extra buffer that's not used anymore +@a_replace@ +position a.p1, a.p2; +identifier a_process.merged; + +expression lock, E, L; +expression list args_before, args, args_after; +identifier buf; +expression format1, format2; +type T; +@@ + +{ +... +-T buf; +... +ldap_pvt_thread_mutex_lock(lock); +... +-snprintf@p1( buf, E, format1, args ); ++Debug( L, merged, args_before, args, args_after ); +... +ldap_pvt_thread_mutex_unlock(lock); +... +-Debug@p2( L, format2, args_before, buf, args_after ); +... +} + +// Once again (same as the 'a' series above, but those that remain to be sorted +// now don't need to stay within a locked region +@b exists@ +expression E, L; +expression list args_before, args, args_after; +identifier buf; +expression format1, format2; +position p1, p2; +@@ + +snprintf@p1( buf, E, format1, args ); +... +Debug@p2( L, format2, args_before, buf, args_after ); + +@script:python b_process@ +format1 << b.format1; +format2 << b.format2; +args_before << b.args_before; +merged; +@@ + +pos = len(args_before.elements) +coccinelle.merged = insert_at_pos(format2, format1, pos) + +@b_replace@ +position b.p1, b.p2; +identifier b_process.merged; + +expression E, L; +expression list args_before, args, args_after; +identifier buf; +expression format1, format2; +@@ + +-snprintf@p1( buf, E, format1, args ); ++Debug( L, merged, args_before, args, args_after ); +... +-Debug@p2( L, format2, args_before, buf, args_after ); diff --git a/doc/devel/variadic_debug/README b/doc/devel/variadic_debug/README new file mode 100644 index 0000000..3ccbea2 --- /dev/null +++ b/doc/devel/variadic_debug/README @@ -0,0 +1,39 @@ +Most of the project now depends on the compiler supporting C99 variadic +macros. This is used in the Debug() macro everywhere except libldap and +its dependencies. + +From now on, any time Debug( level, fmt, args... ) is used, you can and +should provide the appropriate number of arguments. The coccinelle +patches in this directory implement the transformations used to bring +the project in line with this. + +As we still aim to support libldap on platforms that only provide C89, +Debug0/1/2/3 macros are used instead. + +If you need to adapt your own fork, see ITS#8731, the rest of this +README and scripts in this directory on what you'll need to achieve +this. + +Coccinelle as of git hash e65a7bdc04ac9122acdae2353422c5736b7998ba from +https://github.com/coccinelle/coccinelle has been used to run the +transformations performed. One notable limitation at the time of writing +is that multi-part (format) strings are always merged onto the same line. + +Some sources cannot be processed, nssov overlay being a prime example, +being wrapped in non-trivial macros. + +The following semantic patches are involved: +- 03-libldap_Debug.cocci: converts the libraries to use the Debug[0123] + macros as appropriate +- 04-variadic.cocci: converts the rest of the project to use the Debug + macro with the right number of arguments (as opposed to padding with + zeroes) +- 09-merge.cocci will merge an 'snprintf(s, len, "fmt", args...); + Debug(level, "... %s ...", ..., s, ...);' sequence together +- 07-shortcut.cocci is actually used to apply the above since + coccinelle's model-checker seems to struggle with state space + explosion in some of the very long and complex functions we have - + 09-merge.cocci doesn't finish in any reasonable time + +The equivalence.iso and macros.h files aid coccinelle to parse our +sources correctly and simplify the semantic patches. diff --git a/doc/devel/variadic_debug/equivalence.iso b/doc/devel/variadic_debug/equivalence.iso new file mode 100644 index 0000000..07372fb --- /dev/null +++ b/doc/devel/variadic_debug/equivalence.iso @@ -0,0 +1,12 @@ +Expression +@ NULL @ +@@ + +NULL <=> 0 + +Expression +@ underscore_func @ +expression E; +@@ + +_(E) => E diff --git a/doc/devel/variadic_debug/macros.h b/doc/devel/variadic_debug/macros.h new file mode 100644 index 0000000..265c549 --- /dev/null +++ b/doc/devel/variadic_debug/macros.h @@ -0,0 +1,23 @@ +#define LDAP_PF_LOCAL_SENDMSG_ARG(x) + +#define LDAP_P(x) x +#define LDAP_F(x) extern x +#define LDAP_V(x) extern x + +#define LDAP_GCCATTR(x) +#define LDAP_XSTRING(x) "" +#define LDAP_CONCAT(x,y) x + +#define LDAP_CONST const +#define LDAP_BEGIN_DECL +#define LDAP_END_DECL + +#define SLAP_EVENT_DECL +#define SLAP_EVENT_FNAME + +/* contrib/slapd-modules/smbk5pwd/smbk5pwd.c */ +#define HDB int* + +#define BACKSQL_ARBITRARY_KEY +#define BACKSQL_IDNUMFMT "%llu" +#define BACKSQL_IDFMT "%s" diff --git a/doc/devel/variadic_debug/script.sh b/doc/devel/variadic_debug/script.sh new file mode 100755 index 0000000..b9fd9f0 --- /dev/null +++ b/doc/devel/variadic_debug/script.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +set -e + +PATCH_DIR=doc/devel/variadic_debug + +SPATCH=${SPATCH:-spatch} +SPATCH_OPTS=( --macro-file-builtins "$PATCH_DIR/macros.h" ) +#SPATCH_OPTS+=( --timeout 300 ) + +SED_TRANSFORMATIONS=() + +# split out multipart strings back to original form (one per line) +SED_TRANSFORMATIONS+=( -e 's/^\(+\s*\)\(.*"\) \(".*\)"$/\1\2\n+\1\3/' ) + +# re-add whitespace around parentheses +SED_TRANSFORMATIONS+=( -e 's/^\(+.*Debug[0-3]\?(\)\s*/\1 /' ) +SED_TRANSFORMATIONS+=( -e 's/^\(+.*[^ ]\));$/\1 );/' ) + +# strip trailing whitespace copied from source on affected lines +SED_TRANSFORMATIONS+=( -e 's/^\(+.*\)\s\+$/\1/' ) + +# fix whitespace errors in source we touch +SED_TRANSFORMATIONS+=( -e 's/^\(+.*\) \t/\1\t\t/' ) +SED_TRANSFORMATIONS+=( -e 's/^\(+\t*\) \{1,3\}\t/\1\t/' ) + +normalise() { + patch="$1" + shift + + # iterate until we've reached fixpoint + while ! cmp "$patch" "${patch}.new" 2>/dev/null; do + if [ -e "${patch}.new" ]; then + mv -- "${patch}.new" "$patch" + fi + sed "${SED_TRANSFORMATIONS[@]}" -- "$patch" >"${patch}.new" + done + rediff "$patch" >"${patch}.new" + mv -- "${patch}.new" "$patch" +} + +git add "$PATCH_DIR" +git commit -m "ITS#8731 Add the documentation and scripts" + +git am "$PATCH_DIR/00-fixes.patch" +git am "$PATCH_DIR/01-logging.patch" +git am "$PATCH_DIR/02-manual.patch" + +$SPATCH "${SPATCH_OPTS[@]}" -sp_file "$PATCH_DIR/03-libldap_Debug.cocci" \ + -dir libraries/libldap \ + >"$PATCH_DIR/03-libldap_Debug.patch" +normalise "$PATCH_DIR/03-libldap_Debug.patch" +git apply --index --directory libraries/libldap "$PATCH_DIR/03-libldap_Debug.patch" +git commit -m "ITS#8731 Apply $PATCH_DIR/03-libldap_Debug.cocci" + +$SPATCH "${SPATCH_OPTS[@]}" -sp_file "$PATCH_DIR/04-variadic.cocci" \ + -dir . \ + >"$PATCH_DIR/04-variadic.patch" +normalise "$PATCH_DIR/04-variadic.patch" +git apply --index "$PATCH_DIR/04-variadic.patch" +git commit -m "ITS#8731 Apply $PATCH_DIR/04-variadic.cocci" + +git am "$PATCH_DIR/05-back-sql.patch" +git am "$PATCH_DIR/06-nssov.patch" + +$SPATCH "${SPATCH_OPTS[@]}" -sp_file "$PATCH_DIR/07-shortcut.cocci" \ + -dir . \ + >"$PATCH_DIR/07-shortcut.patch" +normalise "$PATCH_DIR/07-shortcut.patch" +git apply --index "$PATCH_DIR/07-shortcut.patch" +git commit -m "ITS#8731 Apply $PATCH_DIR/07-shortcut.cocci" + +git am "$PATCH_DIR/08-snprintf-manual.patch" |