diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 16:35:32 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 16:35:32 +0000 |
commit | 5ea77a75dd2d2158401331879f3c8f47940a732c (patch) | |
tree | d89dc06e9f4850a900f161e25f84e922c4f86cc8 /doc/devel | |
parent | Initial commit. (diff) | |
download | openldap-5ea77a75dd2d2158401331879f3c8f47940a732c.tar.xz openldap-5ea77a75dd2d2158401331879f3c8f47940a732c.zip |
Adding upstream version 2.5.13+dfsg.upstream/2.5.13+dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'doc/devel')
-rw-r--r-- | doc/devel/OIDs | 119 | ||||
-rw-r--r-- | doc/devel/README | 9 | ||||
-rw-r--r-- | doc/devel/args | 64 | ||||
-rw-r--r-- | doc/devel/lloadd/design.md | 282 | ||||
-rw-r--r-- | doc/devel/template.c | 26 | ||||
-rw-r--r-- | doc/devel/todo | 67 | ||||
-rw-r--r-- | doc/devel/toolargs | 31 | ||||
-rw-r--r-- | doc/devel/utfconv.txt | 291 | ||||
-rw-r--r-- | doc/devel/variadic_debug/03-libldap_Debug.cocci | 70 | ||||
-rw-r--r-- | doc/devel/variadic_debug/04-variadic.cocci | 165 | ||||
-rw-r--r-- | doc/devel/variadic_debug/07-shortcut.cocci | 216 | ||||
-rw-r--r-- | doc/devel/variadic_debug/09-merge.cocci | 147 | ||||
-rw-r--r-- | doc/devel/variadic_debug/README | 39 | ||||
-rw-r--r-- | doc/devel/variadic_debug/equivalence.iso | 12 | ||||
-rw-r--r-- | doc/devel/variadic_debug/macros.h | 23 | ||||
-rwxr-xr-x | doc/devel/variadic_debug/script.sh | 73 |
16 files changed, 1634 insertions, 0 deletions
diff --git a/doc/devel/OIDs b/doc/devel/OIDs new file mode 100644 index 0000000..af2bf88 --- /dev/null +++ b/doc/devel/OIDs @@ -0,0 +1,119 @@ +OpenLDAProot 1.3.6.1.4.1.4203 + +OpenLDAP OpenLDAProot:1 + +OpenLDAPsyntax OpenLDAP:1 + authPasswordSyntax OpenLDAPsyntax:2 + +OpenLDAPmatchingrule OpenLDAP:2 + authPasswordExactMatch OpenLDAPmatchingrule:2 + authPasswordMatch OpenLDAPmatchingrule:3 + +OpenLDAPattributeType OpenLDAP:3 + supportedAuthPasswordSchemas OpenLDAPattributeType:3 + authPassword OpenLDAPattributeType:4 + supportedFeatures OpenLDAPattributeType:5 + +OpenLDAPobjectClass OpenLDAP:4 + OpenLDAPorg OpenLDAPObjectClass:3 + OpenLDAPou OpenLDAPObjectClass:4 + OpenLDAPperson OpenLDAPObjectClass:5 + OpenLDAPdisplayableObject OpenLDAPObjectClass:6 + authPasswordObject OpenLDAPobjectClass:7 + +OpenLDAPfeatures OpenLDAP:5 + allOperationalAttrs OpenLDAPfeatures:1 + OC AD lists OpenLDAPfeatures:2 + TrueFalseFilters OpenLDAPfeatures:3 + languageTagOptions OpenLDAPfeatures:4 + languageRangetags OpenLDAPfeatures:5 + +Syncrepl OpenLDAP:9 + +OpenLDAPcontrol OpenLDAP:10 + SubentriesControl OpenLDAPcontrol:1 + +OpenLDAPexop OpenLDAP:11 + passwordModify OpenLDAPexop:1 + whoAmI OpenLDAPexop:3 + +OpenLDAPinternal OpenLDAP:12 + OpenLDAPtesting OpenLDAPinternal:1 + OpenLDAPconfig OpenLDAPinternal:2 + + +OpenLDAPexperimental OpenLDAProot:666 + +ExperimentalAttr OpenLDAPexperimental:1 + OpenLDAPaci ExperimentalAttr:5 + entryCSN ExperimentalAttr:7 + authzTo ExperimentalAttr:8 + authzFrom ExperimentalAttr:9 + monitorContext ExperimentalAttr:10 + superiorUUID ExperimentalAttr:11 check - is this dup of parentUUID? + namingCSN ExperimentalAttr:13 + syncreplCookie ExperimentalAttr:23 + contextCSN ExperimentalAttr:25 + syncTimestamp ExperimentalAttr:26 + lastmodDN ExperimentalAttr:28 (contrib/slapd-modules/lastmod) + lastmodType ExperimentalAttr:29 + lastmodEnabled ExperimentalAttr:30 + monitorAttrs ExperimentalAttr:55 (back-monitor) + entryExpireTimestamp ExperimentalAttr:57 (slapo-dds) + rdnValue ExperimentalAttr:58 (contrib/slapd-modules/samba4) + parentUUID ExperimentalAttr:59 (...samba4) + x509PrivateKey ExperimentalAttr:60 + + +ExperimentalSyntax OpenLDAPexperimental:2 + ACIsyntax ExperimentalSyntax:1 + authPassword ExperimentalSyntax:2 check - this was promoted to RFC3112 + authz ExperimentalSyntax:7 + privateKey ExperimentalSyntax:13 + +ExperimentalObjectClass OpenLDAPexperimental:3 + glue ExperimentalObjectClass:4 + syncConsumerSubentry ExperimentalObjectClass:5 + syncProviderSubentry ExperimentalObjectClass:6 + lastmod ExperimentalObjectClass:13 + monitorClasses ExperimentalObjectClass:16 + +ExperimentalMatchingRule OpenLDAPexperimental:4 + authPaswordMatch ExperimentalMatchingRule:1 check - this was promoted to RFC3112 + ACImatch ExperimentalMatchingRule:2 + direectoryStringApproxMatch ExperimentalMatchingRule:4 + IA5stringApproxMatch ExperimentalMatchingRule:5 + dnOneLevelMatch ExperimentalMatchingRule:8 + dnSubtreeMatch ExperimentalMatchingRule:9 + dnSubordinateMatch ExperimentalMatchingRule:10 + dnSuperiorMatch ExperimentalMatchingRule:11 + authzMatch ExperimentalMatchingRule:12 + privateKeyMatch ExperimentalMatchingRule:13 + +ExperimentalControl OpenLDAPexperimental:5 + noop ExperimentalControl:2 + noSubordinates ExperimentalControl:11 + relax ExperimentalControl:12 + slurp ExperimentalControl:13 + valsort ExperimentalControl:14 + deref ExperimentalControl:16 + whatfailed ExperimentalControl:17 + noopsrch ExperimentalControl:18 + +ExperimentalExop OpenLDAPexperimental:6 + verifyCredentials ExperimentalExop:5 + +ExperimentalFeatures OpenLDAPexperimental:8 + subordinateScope ExperimentalFeatures:1 + +SelfContainedWorks OpenLDAPexperimental:11 + CSNs SelfContainedWorks:2 + chaining SelfContainedWorks:3 + retcode SelfContainedWorks:4 + accesslog SelfContainedWorks:5 + distProc SelfContainedWorks:6 + LDAP txns SelfContainedWorks:7 (replaced by 1.3.6.1.1.21 RFC 5805) + dyngroup SelfContainedWorks:8 + proxyCache SelfContainedWorks:9 + X509 PMI SelfContainedWorks:10 + autoca SelfContainedWorks:11 diff --git a/doc/devel/README b/doc/devel/README new file mode 100644 index 0000000..3a0cb3d --- /dev/null +++ b/doc/devel/README @@ -0,0 +1,9 @@ +The OpenLDAP Developer's FAQ is available at: + http://www.openldap.org/faq/index.cgi?file=4 + +Additional developer pages are at: + http://www.openldap.org/devel/ + + +--- +$OpenLDAP$ diff --git a/doc/devel/args b/doc/devel/args new file mode 100644 index 0000000..c5aa02f --- /dev/null +++ b/doc/devel/args @@ -0,0 +1,64 @@ +Tools ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz +ldapcompare * DE**HI** MNOPQR UVWXYZ de *h*** *nop* vwxyz +ldapdelete *CDE**HI** MNOPQR UVWXYZ cdef*h*** *nop* vwxyz +ldapexop * D **HI** NO QR UVWXYZ de *h*** *nop vwxy +ldapmodify *CDE**HI** MNOPQRS UVWXYZabcde *h*** *nop*r t vwxy +ldapmodrdn *CDE**HI** MNOPQR UVWXYZ cdef*h*** *nop*rs vwxy +ldappasswd A*CDE**HI** NO QRS UVWXYZa def*h*** * o * s vwxy +ldapsearch A*CDE**HI**LMNOPQRSTUVWXYZab def*h***l*nop* stuvwxyz +ldapurl * E**H ** S ab f*h*** * p* s +ldapvc * DE**HI** NO QRS UVWXYZa cdef*h*** *nop* vwxy +ldapwhoami * DE**HI** NO QR UVWXYZ def*h*** *nop* vwxy + + +* reserved + BFGJgijmq01235789 + +* General flags: + -C Chase Referrals + -D Bind DN + -E Tool-specific Extensions (e.g., -E <[!]oid[=options]>*) + -e General Extensions (e.g., -e <[!]oid[=options]>*) + -f file + -H URI + -P protocol version + -V version information + -W prompt for bind password + -d debug + -h host + -n no-op + -N no (SASLprep) normalization of simple bind password + -o general libldap options (plus ldif_wrap and nettimeout for backwards comp.) + -p port + -v verbose + -V version + -x simple bind + -y Bind password-file + -w Bind password + +Not used + -4 IPv4 only + -6 IPv6 only + + +* LDAPv3 Only + -M ManageDSAIT + -Z StartTLS + + -Y SASL Mechanism (defaults to "best") + -R SASL Realm (defaults to empty) + -O SASL Security Options (defaults to "noanonymous,noplain") + -U SASL Authentication Identity (defaults to USER) + -X SASL Authorization Identity (defaults to empty) + + -I SASL interactive mode (default: automatic) + -Q SASL quiet mode (default: automatic) + + +* LDAPv2+ Only (REMOVED) + -K LDAPv2 Kerberos Bind (Step 1 only) + -k LDAPv2 Kerberos Bind + + +--- +$OpenLDAP$ diff --git a/doc/devel/lloadd/design.md b/doc/devel/lloadd/design.md new file mode 100644 index 0000000..62fcd88 --- /dev/null +++ b/doc/devel/lloadd/design.md @@ -0,0 +1,282 @@ +TODO: +- [ ] keep a global op in-flight counter? (might need locking) +- [-] scheduling (who does what, more than one select thread? How does the proxy + work get distributed between threads?) +- [ ] managing timeouts? +- [X] outline locking policy: seems like there might be a lock inversion in the + design looming: when working with op, might need a lock on both client and + upstream but depending on where we started, we might want to start with + locking one, then other +- [ ] how to deal with the balancer running out of fds? Especially when we hit + the limit, then lose an upstream connection and accept() a client, we + wouldn't be able to initiate a new one. A bit of a DoS... But probably not + a concern for Ericsson +- [ ] non-Linux? No idea how anything other than poll works (moot if building a + libevent/libuv-based load balancer since they take care of that, except + edge-triggered I/O?) +- [-] rootDSE? Controls and exops might have different semantics and need + binding to the same upstream connection. +- [ ] Just piggybacking on OpenLDAP as a module? Would still need some updates + in the core and the module/subsystem would be a very invasive one. On the + other hand, allows to expose live configuration and monitoring over LDAP + over the current slapd listeners without re-inventing the wheel. + + +Expecting to handle only LDAPv3 + +terms: + server - configured target + upstream - a single connection to a server + client - an incoming connection + +To maintain fairness `G( requested => ( F( progressed | failed ) ) )`, use +queues and put timeouts in + +Runtime organisation +------ +- main thread with its own event base handling signals +- one thread (later possibly more) listening on the rendezvous sockets, handing + the new sockets to worker threads +- n worker threads dealing with client and server I/O (dispatching actual work + to the thread pool most likely) +- a thread pool to handle actual work + +Operational behaviour +------ + +- client read -> upstream write: + - client read: + - if TLS_SETUP, keep processing, set state back when finished and note that + we're under TLS + - ber_get_next(), if we don't have a tag, finished (unless we have true + edge-triggered I/O, also put the fd back into the ones we're waiting for) + - peek at op tag: + - unbind: + - with a single lock, mark all pending ops in upstreams abandoned, clear + client link (would it be fast enough if we remove them from upstream + map instead?) + - locked per op: + - remove op from upstream map + - check upstream is not write-suspended, if it is ... + - try to write the abandon op to upstream, suspend upstream if not + fully sent + - remove op from client map (how if we're in avl_apply?, another pass?) + - would be nice if we could wipe the complete client map then, otherwise + we need to queue it to have it freed when all abandons get passed onto + the upstream (just dropping them might put extra strain on upstreams, + will probably have a queue on each client/upstream anyway, not just a + single Ber) + - bind: + - check mechanism is not EXTERNAL (or implement it) + - abandon existing ops (see unbind) + - set state to BINDING, put DN into authzid + - pick upstream, create PDU and sent + - abandon: + - find op, mark for abandon, send to appropriate upstream + - Exop: + - check not BINDING (unless it's a cancel?) + - check OID: + - STARTTLS: + - check we don't have TLS yet + - abandon all + - set state to TLS_SETUP + - send the hello + - VC(?): + - similar to bind except for the abandons/state change + - other: + - check not BINDING + - pick an upstream + - create a PDU, send (marking upstream suspended if not written in full) + - check if should read again (keep a counter of number of times to read + off a connection in a single pass so that we maintain fairness) + - if read enough requests and can still read, re-queue ourselves (if we + don't have true edge-triggered I/O, we can just register the fd again) + - upstream write (only when suspended): + - flush the current BER + - there shouldn't be anything else? +- upstream read -> client write: + - upstream read: + - ber_get_next(), if we don't have a tag, finished (unless we have true + edge-triggered I/O, also put the fd back into the ones we're waiting for) + - when we get it, peek at msgid, resolve client connection, lock, check: + - if unsolicited, handle as close (and mark connection closing) + - if op is abandoned or does not exist, drop PDU and op, update counters + - if client backlogged, suspend upstream, register callback to unsuspend + (on progress when writing to client or abandon from client (connection + death, abandon proper, ...)) + - reconstruct final PDU, write BER to client, if did not write fully, + suspend client + - if a final response, decrement operation counts on upstream and client + - check if should read again (keep a counter of number of responses to read + off a connection in a single pass so that we don't starve any?) + - client write ready (only checked for when suspended): + - write the rest of pending BER if any + - on successful write, pick all pending ops that need failure response, push + to client (are there any controls that need to be present in response even + in the case of failure?, what to do with them?) + - on successfully flushing them, walk through suspended upstreams, picking + the pending PDU (unsuspending the upstream) and writing, if PDU flushed + successfully, pick next upstream + - if we successfully flushed all suspended upstreams, unsuspend client + (and disable the write callback) +- upstream close/error: + - look up pending ops, try to write to clients, mark clients suspended that + have ops that need responses (another queue associated with client to speed + up?) + - schedule a new connection open +- client close/error: + - same as unbind +- client inactive (no pending ops and nothing happened in x seconds) + - might just send notice of disconnection and close +- op timeout handling: + - mark for abandon + - send abandon + - send timeLimitExceeded/adminLimitExceeded to client + +Picking an upstream: +- while there is a level available: + - pick a random ordering of upstreams based on weights + - while there is an upstream in the level: + - check number of ops in-flight (this is where we lock the upstream map) + - find the least busy connection (and check if a new connection should be + opened) + - try to lock for socket write, if available (no BER queued) we have our + upstream + +PDU processing: +- request (have an upstream selected): + - get new msgid from upstream + - create an Op structure (actually, with the need for freelist lock, we can + make it a cache for freed operation structures, avoiding some malloc + traffic, to reset, we need slap_sl_mem_create( ,,, 1 )) + - check proxyauthz is not present? or just let upstream reject it if there are + two? + - add own controls at the end: + - construct proxyauthz from authzid + - construct session tracking from remote IP, own name, authzid + - send over + - insert Op into client and upstream maps +- response/intermediate/entry: + - look up Op in upstream's map + - write old msgid, rest of the response can go unchanged + - if a response, remove Op from all maps (client and upstream) + +Managing upstreams: +- async connect up to min_connections (is there a point in having a connection + count range if we can't use it when needed since all of the below is async?) +- when connected, set up TLS (if requested) +- when done, send a bind +- go for the bind interaction +- when done, add it to the upstream's connection list +- (if a connection is suspended or connections are over 75 % op limit, schedule + creating a new connection setup unless connection limit has been hit) + +Managing timeouts: +- two options: + - maintain a separate locked priority queue to give a perfect ordering to when + each operation is to time out, would need to maintain yet another place + where operations can be found. + - the locking protocol for disposing of the operation would need to be + adjusted and might become even more complicated, might do the alternative + initially and then attempt this if it helps performance + - just do a sweep over all clients (that mutex is less contended) every so + often. With many in-flight operations might be a lot of wasted work. + - we still need to sweep over all clients to check if they should be killed + anyway + +Dispatcher thread (2^n of them, fd x is handled by thread no x % (2^n)): +- poll on all registered fds +- remove each fd that's ready from the registered list and schedule the work +- work threads can put their fd back in if they deem necessary (=not suspended) +- this works as a poor man's edge-triggered polling, with enough workers, should + we do proper edge triggered I/O? What about non-Linux? + +Listener thread: +- slapd has just one, which then reassigns the sockets to separate I/O + threads + +Threading: +- if using slap_sl_malloc, how much perf do we gain? To allocate a context per + op, we should have a dedicated parent context so that when we free it, we can + use that exclusively. The parent context's parent would be the main thread's + context. This implies a lot of slap_sl_mem_setctx/slap_sl_mem_create( ,,, 0 ) + and making sure an op does not allocate/free things from two threads at the + same time (might need an Op mutex after all? Not such a huge cost if we + routinely reuse Op structures) + +Locking policy: +- read mutexes are unnecessary, we only have one thread receiving data from the + connection - the one started from the dispatcher +- two reference counters of operation structures (an op is accessible from + client and upstream map, each counter is consistent when thread has a lock on + corresponding map), when decreasing the counter to zero, start freeing + procedure +- place to mark disposal finished for each side, consistency enforced by holding + the freelist lock when reading/manipulating +- when op is created, we already have a write lock on upstream socket and map, + start writing, insert to upstream map with upstream refcount 1, unlock, lock + client, insert (client refcount 0), unlock, lock upstream, decrement refcount + (triggers a test if we need to drop it now), unlock upstream, done +- when upstream processes a PDU, locks its map, increments counter, (potentially + removes if it's a response), unlocks, locks client's map, write mutex (this + order?) and full client mutex (if a bind response) +- when client side wants to work with a PDU (abandon, (un)bind), locks its map, + increase refcount, unlocks, locks upstream map, write mutex, sends or queues + abandon, unlocks write mutex, initiates freeing procedure from upstream side + (or if having to remember we've already increased client-side refcount, mark + for deletion, lose upstream lock, lock client, decref, either triggering + deletion from client or mark for it) +- if we have operation lock, we can simplify a bit (no need for three-stage + locking above) + +Shutdown: +- stop accept() thread(s) - potentially add a channel to hand these listening + sockets over for zero-downtime restart +- if very gentle, mark connections as closing, start timeout and: + - when a new non-abandon PDU comes in from client - return LDAP_UNAVAILABLE + - when receiving a PDU from upstream, send over to client, if no ops pending, + send unsolicited response and close (RFC4511 suggests unsolicited response + is the last PDU coming from the upstream and libldap agrees, so we can't + send it for a socket we want to shut down more gracefully) +- gentle (or very gentle timed out): + - set timeout + - mark all ops as abandoned + - send unbind to all upstreams + - send unsolicited to all clients +- imminent (or gentle timed out): + - async close all connections? + - exit() + +RootDSE: +- default option is not to care and if a control/exop has special restrictions, + it is the admin's job to flag it as such in the load-balancer's config +- another is not to care about the search request but check each search entry + being passed back, check DN and if it's a rootDSE, filter the list of + controls/exops/sasl mechs (external!) that are supported +- last one is to check all search requests for the DN/scope and synthesise the + response locally - probably not (would need to configure the complete list of + controls, exops, sasl mechs, naming contexts in the balancer) + +Potential red flags: +- we suspend upstreams, if we ever suspend clients we need to be sure we can't + create dependency cycles + - is this an issue when only suspending the read side of each? Because even if + we stop reading from everything, we should eventually flush data to those we + can still talk to, as upstreams are flushed, we can start sending new + requests from live clients (those that are suspended are due to their own + inability to accept data) + - we might need to suspend a client if there is a reason to choose a + particular upstream (multi-request operation - bind, VC, PR, TXN, ...) + - a SASL bind, but that means there are no outstanding ops to receive + it holds that !suspended(client) \or !suspended(upstream), so they + cannot participate in a cycle + - VC - multiple binds at the same time - !!! more analysis needed + - PR - should only be able to have one per connection (that's a problem + for later, maybe even needs a dedicated upstream connection) + - TXN - ??? probably same situation as PR + - or if we have a queue for pending Bers on the server, we not need to suspend + clients, upstream is only chosen if the queue is free or there is a reason + to send it to that particular upstream (multi-stage bind/VC, PR, ...), but + that still makes it possible for a client to exhaust all our memory by + sending requests (VC or other ones bound to a slow upstream or by not + reading the responses at all) diff --git a/doc/devel/template.c b/doc/devel/template.c new file mode 100644 index 0000000..28e028d --- /dev/null +++ b/doc/devel/template.c @@ -0,0 +1,26 @@ +/* template.c -- example OpenLDAP source file */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software <http://www.openldap.org/>. + * + * Copyright YEAR The OpenLDAP Foundation. + * Portions Copyright YEAR Secondary Rights Holder. + * Portions Copyright YEAR Another Rights Holder. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +/* Additional (custom) notices (where necessary). + * Please consult Kurt Zeilenga <kurt@openldap.org> before adding + * additional notices. + */ +/* ACKNOWLEDGEMENTS: + * This work was initially developed by Jane Doe for inclusion in + * OpenLDAP Software. Additional significant contributors include: + * John Doe + */ diff --git a/doc/devel/todo b/doc/devel/todo new file mode 100644 index 0000000..670e9cc --- /dev/null +++ b/doc/devel/todo @@ -0,0 +1,67 @@ +OpenLDAP Software To Do List +---------------------------- + +This is a list of projects that need getting done. They are defined +by scale of the effort as opposed to priority. Contribute to +projects based upon your personal priorities. + +If you would like to work on any of these projects, please coordinate +by posting to OpenLDAP-devel mailing list: + http://www.OpenLDAP.org/lists + +If you have a project you'd like added to the list, talk it up on +Developer's list or just do it. + +Please read: + http://www.OpenLDAP.org/devel/programming.html + http://www.OpenLDAP.org/devel/contributing.html + + +OpenLDAP 2.x Projects +--------------------- + SLAPD + Complete Unicode Support (ACLs, etc.) + client C API update + Implement per referral/continuation callback + clients (e.g. ldapsearch(1)) + Implement referral chasing options w/ referral callback + Update manual pages + + +Large projects +-------------- +Implement character string localization +Implement X.500 administrative models (e.g. subentries (RFC 3672), etc.) +Implement LDAP sorted search results control (RFC 2891) + + +Medium projects +--------------- +Add syncrepl turn +Implement DIT Structure Rules and Name Forms +Implement LDAPprep +Implement native support for simple SASL mechanisms + (e.g. EXTERNAL and PLAIN) +Redesign slapd memory allocation fault handling +Localize tools + + +Small projects +-------------- +Add DSML capabilities to command line tools +Add LDIFv2 (XML) support to command line tools +Implement authPassword (RFC 3112) +Implement SASLprep (RFC 4013) for LDAP (draft-ietf-ldapbis-*) +Implement additional matching rules (RFC 3698) +Add dumpasn1 logging support +Add tests to test suite +Recode linked-list structs to use <ldap_queue.h> macros +Convert utfconv.txt into man page(s). +Update manual pages as needed. + + +For additional TODO items, see: + https://bugs.openldap.org + +--- +$OpenLDAP$ diff --git a/doc/devel/toolargs b/doc/devel/toolargs new file mode 100644 index 0000000..f0f8d9f --- /dev/null +++ b/doc/devel/toolargs @@ -0,0 +1,31 @@ +Tools ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz +slapacl D F U X b d f o uv +slapadd F S bcd fg j l no q s uvw +slapauth F M R U X d f o v +slapcat F H abcd fg l no s v +slapdn F N P d f o v +slapindex F bcd fg no q t v +slapmodify F S bcd fg j l no q s uvw +slappasswd T c h s uv +slapschema F H abcd fg l no s v +slaptest F Q d f no uv + +* General flags: + -F config directory + -U authcID + -X authzID + -b suffix (slapacl: entryDN) + -c continue mode + -d debug level + -f config file + -g disable subordinate gluing + -l LDIF file + -n database number + -o options + -q "quick" mode + -s disable schema checking (slapcat: subtree, slappasswd: secret) + -u dryrun (slappasswd: RFC2307 userPassword) + -v verbose + +--- +$OpenLDAP$ diff --git a/doc/devel/utfconv.txt b/doc/devel/utfconv.txt new file mode 100644 index 0000000..1adaab5 --- /dev/null +++ b/doc/devel/utfconv.txt @@ -0,0 +1,291 @@ + Dec 5, 2000 + Dave Steck + Novell, Inc. + + UTF-8 Conversion Functions + + +1. Strings in the LDAP C SDK should be encoded in UTF-8 format. + However, most platforms do not provide APIs for converting to + this format. If they do, they are platform-specific. + + As a result, most applications (knowingly or not) use local strings + with LDAP functions. This works fine for 7-bit ASCII characters, + but will fail with 8-bit European characters, Asian characters, etc. + + We propose adding the following platform-independent conversion functions + to the OpenLDAP SDK. There are 4 functions for converting between UTF-8 + and wide characters, and 4 functions for converting between UTF-8 and + multibyte characters. + + For multibyte to UTF-8 conversions, charset translation is necessary. + While a full charset translator is not practical or appropriate for the + LDAP SDK, we can pass the translator function in as an argument. + A NULL for this argument will use the ANSI C functions mbtowc, mbstowcs, + wctomb, and wcstombs. + +2. UTF-8 <--> Wide Character conversions + +The following new conversion routines will be added, following the pattern of +the ANSI C conversion routines (mbtowc, mbstowcs, etc). These routines use +the wchar_t type. wchar_t is 2 bytes on some systems and 4 bytes on others. +However the advantage of using wchar_t is that all the standard wide character +string functions may be used on these strings: wcslen, wcscpy, etc. + + int ldap_x_utf8_to_wc - Convert a single UTF-8 encoded character to a wide character. + int ldap_x_utf8s_to_wcs - Convert a UTF-8 string to a wide character string. + int ldap_x_wc_to_utf8 - Convert a single wide character to a UTF-8 sequence. + int ldap_x_wcs_to_utf8s - Convert a wide character string to a UTF-8 string. + + +2.1 ldap_x_utf8_to_wc - Convert a single UTF-8 encoded character to a wide character. + +int ldap_x_utf8_to_wc ( wchar_t *wchar, const char *utf8char ) + + wchar (OUT) Points to a wide character code to receive the + converted character. + + utf8char (IN) Address of the UTF8 sequence of bytes. + +Return Value: + If successful, the function returns the length in + bytes of the UTF-8 input character. + + If utf8char is NULL or points to an empty string, the + function returns 1 and a NULL is written to wchar. + + If utf8char contains an invalid UTF-8 sequence -1 is returned. + + +2.2 ldap_x_utf8s_to_wcs - Convert a UTF-8 string to a wide character string. + +int ldap_x_utf8s_to_wcs (wchar_t *wcstr, const char *utf8str, size_t count) + + wcstr (OUT) Points to a wide char buffer to receive the + converted wide char string. The output string will be + null terminated if there is space for it in the + buffer. + + utf8str (IN) Address of the null-terminated UTF-8 string to convert. + + count (IN) The number of UTF-8 characters to convert, or + equivalently, the size of the output buffer in wide + characters. + +Return Value: + If successful, the function returns the number of wide + characters written to wcstr, excluding the null termination + character, if any. + + If wcstr is NULL, the function returns the number of wide + characters required to contain the converted string, + excluding the null termination character. + + If an invalid UTF-8 sequence is encountered, the + function returns -1. + + If the return value equals count, there was not enough space to fit the + string and the null terminator in the buffer. + + +2.3 ldap_x_wc_to_utf8 - Convert a single wide character to a UTF-8 sequence. + +int ldap_x_wc_to_utf8 ( char *utf8char, wchar_t wchar, count ) + + utf8char (OUT) Points to a byte array to receive the converted UTF-8 + string. + + wchar (IN) The wide character to convert. + + count (IN) The maximum number of bytes to write to the output + buffer. Normally set this to LDAP_MAX_UTF8_LEN, which + is defined as 3 or 6 depending on the size of wchar_t. + A partial character will not be written. + +Return Value: + If successful, the function returns the length in bytes of + the converted UTF-8 output character. + + If wchar is NULL, the function returns 1 and a NULL is + written to utf8char. + + If wchar cannot be converted to a UTF-8 character, the + function returns -1. + + +2.4 int ldap_x_wcs_to_utf8s - Convert a wide character string to a UTF-8 string. + +int ldap_x_wcs_to_utf8s (char *utf8str, const wchar_t *wcstr, size_t count) + + utf8str (OUT) Points to a byte array to receive the converted + UTF-8 string. The output string will be null + terminated if there is space for it in the + buffer. + + + wcstr (IN) Address of the null-terminated wide char string to convert. + + count (IN) The size of the output buffer in bytes. + +Return Value: + If successful, the function returns the number of bytes + written to utf8str, excluding the null termination + character, if any. + + If utf8str is NULL, the function returns the number of + bytes required to contain the converted string, excluding + the null termination character. The 'count' parameter is ignored. + + If the function encounters a wide character that cannot + be mapped to a UTF-8 sequence, the function returns -1. + + If the return value equals count, there was not enough space to fit + the string and the null terminator in the buffer. + + + +3. Multi-byte <--> UTF-8 Conversions + +These functions convert the string in a two-step process, from multibyte +to Wide, then from Wide to UTF8, or vice versa. This conversion requires a +charset translation routine, which is passed in as an argument. + + ldap_x_mb_to_utf8 - Convert a multi-byte character to a UTF-8 character. + ldap_x_mbs_to_utf8s - Convert a multi-byte string to a UTF-8 string. + ldap_x_utf8_to_mb - Convert a UTF-8 character to a multi-byte character. + ldap_x_utf8s_to_mbs - Convert a UTF-8 string to a multi-byte string. + +3.1 ldap_x_mb_to_utf8 - Convert a multi-byte character to a UTF-8 character. + +int ldap_x_mb_to_utf8 ( char *utf8char, const char *mbchar, size_t mbsize, int (*f_mbtowc)(wchar_t *wchar, const char *mbchar, size_t count) ) + + utf8char (OUT) Points to a byte buffer to receive the converted + UTF-8 character. May be NULL. The output is not + null-terminated. + + mbchar (IN) Address of a sequence of bytes forming a multibyte character. + + mbsize (IN) The maximum number of bytes of the mbchar argument to + check. This should normally be MB_CUR_MAX. + + f_mbtowc (IN) The function to use for converting a multibyte + character to a wide character. If NULL, the local + ANSI C routine mbtowc is used. + +Return Value: + If successful, the function returns the length in bytes of + the UTF-8 output character. + + If utf8char is NULL, count is ignored and the function + returns the number of bytes that would be written to the + output char. + + If count is zero, 0 is returned and nothing is written to + utf8char. + + If mbchar is NULL or points to an empty string, the + function returns 1 and a null byte is written to utf8char. + + If mbchar contains an invalid multi-byte character, -1 is returned. + + +3.2 ldap_x_mbs_to_utf8s - Convert a multi-byte string to a UTF-8 string. + +int ldap_x_mbs_to_utf8s (char *utf8str, const char *mbstr, size_t count, + size_t (*f_mbstowcs)(wchar_t *wcstr, const char *mbstr, size_t count)) + +utf8str (OUT) Points to a buffer to receive the converted UTF-8 string. + May be NULL. + + mbchar (IN) Address of the null-terminated multi-byte input string. + + count (IN) The size of the output buffer in bytes. + + f_mbstowcs (IN) The function to use for converting a multibyte string + to a wide character string. If NULL, the local ANSI + C routine mbstowcs is used. + +Return Value: + If successful, the function returns the length in + bytes of the UTF-8 output string, excluding the null + terminator, if present. + + If utf8str is NULL, count is ignored and the function + returns the number of bytes required for the output string, + excluding the NULL. + + If count is zero, 0 is returned and nothing is written to utf8str. + + If mbstr is NULL or points to an empty string, the + function returns 1 and a null byte is written to utf8str. + + If mbstr contains an invalid multi-byte character, -1 is returned. + + If the returned value is equal to count, the entire null-terminated + string would not fit in the output buffer. + + +3.3 ldap_x_utf8_to_mb - Convert a UTF-8 character to a multi-byte character. + +int ldap_x_utf8_to_mb ( char *mbchar, const char *utf8char, + int (*f_wctomb)(char *mbchar, wchar_t wchar) ) + +mbchar (OUT) Points to a byte buffer to receive the converted multi-byte + character. May be NULL. + + utf8char (IN) Address of the UTF-8 character sequence. + + f_wctomb (IN) The function to use for converting a wide character + to a multibyte character. If NULL, the local + ANSI C routine wctomb is used. + + +Return Value: + If successful, the function returns the length in + bytes of the multi-byte output character. + + If utf8char is NULL or points to an empty string, the + function returns 1 and a null byte is written to mbchar. + + If utf8char contains an invalid UTF-8 sequence, -1 is returned. + + +3.4 int ldap_x_utf8s_to_mbs - Convert a UTF-8 string to a multi-byte string. + + +int ldap_x_utf8s_to_mbs ( char *mbstr, const char *utf8str, size_t count, + size_t (*f_wcstombs)(char *mbstr, const wchar_t *wcstr, size_t count) ) + + mbstr (OUT) Points to a byte buffer to receive the converted + multi-byte string. May be NULL. + + utf8str (IN) Address of the null-terminated UTF-8 string to convert. + + count (IN) The size of the output buffer in bytes. + + f_wcstombs (IN) The function to use for converting a wide character + string to a multibyte string. If NULL, the local + ANSI C routine wcstombs is used. + +Return Value: + If successful, the function returns the number of bytes + written to mbstr, excluding the null termination + character, if any. + + If mbstr is NULL, count is ignored and the function + returns the number of bytes required for the output string, + excluding the NULL. + + If count is zero, 0 is returned and nothing is written to + mbstr. + + If utf8str is NULL or points to an empty string, the + function returns 1 and a null byte is written to mbstr. + + If an invalid UTF-8 character is encountered, the + function returns -1. + +The output string will be null terminated if there is space for it in +the output buffer. + + diff --git a/doc/devel/variadic_debug/03-libldap_Debug.cocci b/doc/devel/variadic_debug/03-libldap_Debug.cocci new file mode 100644 index 0000000..8353e64 --- /dev/null +++ b/doc/devel/variadic_debug/03-libldap_Debug.cocci @@ -0,0 +1,70 @@ +using "equivalence.iso" + +@initialize:ocaml@ +@@ +// count the number of % characters in the format string +let fmtn(fmt,n) = + List.length (Str.split_delim (Str.regexp_string "%") fmt) = n + 1 + +# replace osip_debug/oslocal_debug with Debug() macros first +@@ +expression E; +expression list args; +@@ +( +-osip_debug +| +-oslocal_debug +) ++Debug + ( +-E, ++LDAP_DEBUG_TRACE, + args ); + +// replace Debug( ..., arg1, arg2, 0 ) with Debug2( ..., arg1, arg2 ) +@@ +char[] fmt : script:ocaml() { fmtn(fmt,2) }; +expression list[2] args; +expression E; +@@ + +-Debug ++Debug2 + ( E, _(fmt), args +-, 0 + ); + +// replace Debug( ..., arg1, 0, 0 ) with Debug1() +@@ +char[] fmt : script:ocaml() { fmtn(fmt,1) }; +expression list[1] args; +expression E; +@@ + +-Debug ++Debug1 + ( E, _(fmt), args +-, 0, 0 + ); + +// Zero-argument Debug() -> Debug0() +@@ +expression E, S; +@@ + +-Debug ++Debug0 + ( E, S +-, 0, 0, 0 + ); + +// everything else is a regular 3-argument debug macro, replace with Debug3() +@@ +expression E, S; +expression list[3] args; +@@ + +-Debug ++Debug3 + ( E, S, args ); diff --git a/doc/devel/variadic_debug/04-variadic.cocci b/doc/devel/variadic_debug/04-variadic.cocci new file mode 100644 index 0000000..bd5fbea --- /dev/null +++ b/doc/devel/variadic_debug/04-variadic.cocci @@ -0,0 +1,165 @@ +@initialize:ocaml@ +@@ +// count the number of % characters in the format string +let fmtn(fmt,n) = + List.length (Str.split_delim (Str.regexp_string "%") fmt) = n + 1 + +@@ +identifier Logs =~ "Log[0-9]"; +@@ +-Logs ++Log + +@@ +@@ +-StatslogTest ++LogTest + +// Process two-argument Debug() macros with an extra zero +@@ +char[] fmt : script:ocaml() { fmtn(fmt,2) }; +expression list[2] args; +expression E; +@@ + +Debug( E, fmt, args +-, 0 + ); + +@@ +char[] fmt : script:ocaml() { fmtn(fmt,2) }; +expression list[2] args; +expression E; +@@ + +Debug( E, fmt, args +-, NULL + ); + +// Single argument Debug() macros with two extra zeroes +@@ +char[] fmt : script:ocaml() { fmtn(fmt,1) }; +expression list[1] args; +expression E; +@@ + +Debug( E, fmt, args +-, 0, 0 + ); + +@@ +char[] fmt : script:ocaml() { fmtn(fmt,1) }; +expression list[1] args; +expression E; +@@ + +Debug( E, fmt, args +-, NULL, NULL + ); + +// Debug() macros with no arguments just padded with zeroes +@@ +expression E, S; +@@ + +Debug( E, S +-, 0, 0, 0 + ); + +@@ +expression E, S; +@@ + +Debug( E, S +-, NULL, NULL, NULL + ); + +// Similar to above, just for Statslog +@@ +char[] fmt : script:ocaml() { fmtn(fmt,5) }; +expression list[5] args; +expression E; +@@ + +-Statslog ++Debug + ( E, fmt, args ); + +@@ +char[] fmt : script:ocaml() { fmtn(fmt,4) }; +expression list[4] args; +expression E; +@@ + +-Statslog ++Debug + ( E, fmt, args +-, 0 + ); + +@@ +char[] fmt : script:ocaml() { fmtn(fmt,3) }; +expression list[3] args; +expression E; +@@ + +-Statslog ++Debug + ( E, fmt, args +-, 0, 0 + ); + +@@ +char[] fmt : script:ocaml() { fmtn(fmt,2) }; +expression list[2] args; +expression E; +@@ + +-Statslog ++Debug + ( E, fmt, args +-, 0, 0, 0 + ); + +@@ +char[] fmt : script:ocaml() { fmtn(fmt,1) }; +expression list[1] args; +expression E; +@@ + +-Statslog ++Debug + ( E, fmt, args +-, 0, 0, 0, 0 + ); + +@@ +expression E, S; +@@ + +-Statslog ++Debug + ( E, S +-, 0, 0, 0, 0, 0 + ); + +// And StatslogEtime +@@ +char[] fmt : script:ocaml() { fmtn(fmt,4) }; +expression list[4] args; +expression E; +@@ + +StatslogEtime( E, fmt, args +-, 0 + ); + +@@ +identifier Stats =~ "^Statslog"; +@@ +( + StatslogEtime +| +-Stats ++Debug +) diff --git a/doc/devel/variadic_debug/07-shortcut.cocci b/doc/devel/variadic_debug/07-shortcut.cocci new file mode 100644 index 0000000..99b3b55 --- /dev/null +++ b/doc/devel/variadic_debug/07-shortcut.cocci @@ -0,0 +1,216 @@ +// Splice string `s` into the format string `fmtstring` replacing the +// %-parameter at position `pos` +@initialize:python@ +@@ + +# regex from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python +import re +fmtstring = '''\ +( # start of capture group 1 +% # literal "%" +(?: # first option +(?:[-+0 #]{0,5}) # optional flags +(?:\d+|\*)? # width +(?:\.(?:\d+|\*))? # precision +(?:h|l|ll|w|I|I32|I64)? # size +[cCdiouxXeEfgGaAnpsSZ] # type +) | # OR +%%) # literal "%%" +''' + +regex = re.compile(fmtstring, re.X) + +def parse_format(f): + return tuple((m.span(), m.group()) for m in + regex.finditer(f)) + +def insert_at_pos(fmt, s, pos): + formats = parse_format(fmt) + span, format = formats[pos] + acc = fmt[:span[0]] + if s.startswith('"'): + acc += s[1:] + else: + acc += '" ' + acc += s + if acc.endswith('"'): + acc = acc[:-1] + fmt[span[1]:] + else: + acc += ' "' + acc += fmt[span[1]:] + return acc + +// rest of the file implements the same as 09-merge.cocci +// The main difference is that we only match on snprintf and Debug that are +// directly adjacent, not based on control flow information which trips +// coccinelle's model-checker +@shortcut@ +identifier buf; +expression E, L; +expression list args_before, args, args_after; +expression format1, format2; +position p1, p2; +@@ + +snprintf@p1( buf, E, format1, args ); +Debug@p2( L, format2, args_before, buf, args_after ); + +// use insert_at_pos above to construct the new format-string +@script:python shortcut_process@ +format1 << shortcut.format1; +format2 << shortcut.format2; +args_before << shortcut.args_before; +merged; +@@ + +pos = len(args_before.elements) +coccinelle.merged = insert_at_pos(format2, format1, pos) + +@shortcut_replace@ +position shortcut.p1, shortcut.p2; +identifier shortcut_process.merged; + +identifier buf; +expression E, L; +expression list args_before, args, args_after; +expression format1, format2; +@@ + +-snprintf@p1( buf, E, format1, args ); +-Debug@p2( L, format2, args_before, buf, args_after ); ++Debug( L, merged, args_before, args, args_after ); + +@shortcut_locked@ +identifier buf; +expression E, L, lock; +expression list args_before, args, args_after; +expression format1, format2; +position p1, p2; +@@ + +ldap_pvt_thread_mutex_lock(lock); +snprintf@p1( buf, E, format1, args ); +ldap_pvt_thread_mutex_unlock(lock); +Debug@p2( L, format2, args_before, buf, args_after ); + +// use insert_at_pos above to construct the new format-string +@script:python shortcut_locked_process@ +format1 << shortcut_locked.format1; +format2 << shortcut_locked.format2; +args_before << shortcut_locked.args_before; +merged; +@@ + +pos = len(args_before.elements) +coccinelle.merged = insert_at_pos(format2, format1, pos) + +@shortcut_locked_replace@ +position shortcut_locked.p1, shortcut_locked.p2; +identifier shortcut_locked_process.merged; + +identifier buf; +expression E, L, lock; +expression list args_before, args, args_after; +expression format1, format2; +@@ + +ldap_pvt_thread_mutex_lock(lock); +-snprintf@p1( buf, E, format1, args ); ++Debug( L, merged, args_before, args, args_after ); +ldap_pvt_thread_mutex_unlock(lock); +-Debug@p2( L, format2, args_before, buf, args_after ); + +// so long as we don't reference 'buf' afterwards, no need to keep it defined. +// A lot of pattern-matching is spelled out explicitly to work around the fact +// that the state space doesn't get compressed otherwise. +@@ +type T; +identifier buf, id; +expression E, lock; +initializer I; +@@ +{ +-\( T buf = I; \| T buf; \) +( + ldap_pvt_thread_mutex_lock(lock); +| +) +( + Debug( ... ); +& + ... when != buf +) +( + ldap_pvt_thread_mutex_unlock(lock); +| +) +( +| + continue; +| + break; +| + goto id; +| + \( + return E; + \& + ... when != buf + \) +) +} + +// the rest identifies and removes a (newly-)redundant LogTest check +@if_guard@ +position p; +statement s; +@@ + +( + if ( ... ) {@p + Debug( ... ); + } else s +| + if ( ... ) {@p + Debug( ... ); + } +) + +@else_guard@ +position p; +statement s; +@@ + +if ( ... ) s +else {@p + Debug( ... ); +} + +@loop_guard@ +position p; +@@ + +( + while ( ... ) {@p + Debug( ... ); + } +| + for ( ...;...;... ) {@p + Debug( ... ); + } +) + +@@ +position p != { if_guard.p , else_guard.p, loop_guard.p }; +@@ +-{@p + Debug( ... ); +-} + +@useless_if@ +expression L; +@@ + +-if ( LogTest( L ) ) { + Debug( L, ... ); +-} diff --git a/doc/devel/variadic_debug/09-merge.cocci b/doc/devel/variadic_debug/09-merge.cocci new file mode 100644 index 0000000..4b0c1b2 --- /dev/null +++ b/doc/devel/variadic_debug/09-merge.cocci @@ -0,0 +1,147 @@ +// Note that this file has not actually been used in the end, since +// 07-shortcut.cocci covers everything we needed in the project, but being +// simpler, it makes the intent of 07-shortcut.cocci clearer + + +// Splice string `s` into the format string `fmtstring` replacing the +// %-parameter at position `pos` +@initialize:python@ +@@ + +#regex from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python +import re +fmtstring = '''\ +( # start of capture group 1 +% # literal "%" +(?: # first option +(?:[-+0 #]{0,5}) # optional flags +(?:\d+|\*)? # width +(?:\.(?:\d+|\*))? # precision +(?:h|l|ll|w|I|I32|I64)? # size +[cCdiouxXeEfgGaAnpsSZ] # type +) | # OR +%%) # literal "%%" +''' + +regex = re.compile(fmtstring, re.X) + +def parse_format(f): + return tuple((m.span(), m.group()) for m in + regex.finditer(f)) + +def insert_at_pos(fmt, s, pos): + formats = parse_format(fmt) + span, format = formats[pos] + acc = fmt[:span[0]] + if s.startswith('"'): + acc += s[1:] + else: + acc += '" ' + acc += s + if acc.endswith('"'): + acc = acc[:-1] + fmt[span[1]:] + else: + acc += ' "' + acc += fmt[span[1]:] + return acc + +// Identify the redundant snprintfs (within a locked region) +@a exists@ +expression lock, E, L; +expression list args_before, args, args_after; +identifier buf; +expression format1, format2; +type T; +position p1, p2; +@@ + +{ +... +T buf; +... +ldap_pvt_thread_mutex_lock(lock); +... +snprintf@p1( buf, E, format1, args ); +... +ldap_pvt_thread_mutex_unlock(lock); +... +Debug@p2( L, format2, args_before, buf, args_after ); +... +} + +// Merge the format strings with insert_at_pos above +@script:python a_process@ +format1 << a.format1; +format2 << a.format2; +args_before << a.args_before; +merged; +@@ + +pos = len(args_before.elements) +coccinelle.merged = insert_at_pos(format2, format1, pos) + +// And merge the two together, replacing the extra buffer that's not used anymore +@a_replace@ +position a.p1, a.p2; +identifier a_process.merged; + +expression lock, E, L; +expression list args_before, args, args_after; +identifier buf; +expression format1, format2; +type T; +@@ + +{ +... +-T buf; +... +ldap_pvt_thread_mutex_lock(lock); +... +-snprintf@p1( buf, E, format1, args ); ++Debug( L, merged, args_before, args, args_after ); +... +ldap_pvt_thread_mutex_unlock(lock); +... +-Debug@p2( L, format2, args_before, buf, args_after ); +... +} + +// Once again (same as the 'a' series above, but those that remain to be sorted +// now don't need to stay within a locked region +@b exists@ +expression E, L; +expression list args_before, args, args_after; +identifier buf; +expression format1, format2; +position p1, p2; +@@ + +snprintf@p1( buf, E, format1, args ); +... +Debug@p2( L, format2, args_before, buf, args_after ); + +@script:python b_process@ +format1 << b.format1; +format2 << b.format2; +args_before << b.args_before; +merged; +@@ + +pos = len(args_before.elements) +coccinelle.merged = insert_at_pos(format2, format1, pos) + +@b_replace@ +position b.p1, b.p2; +identifier b_process.merged; + +expression E, L; +expression list args_before, args, args_after; +identifier buf; +expression format1, format2; +@@ + +-snprintf@p1( buf, E, format1, args ); ++Debug( L, merged, args_before, args, args_after ); +... +-Debug@p2( L, format2, args_before, buf, args_after ); diff --git a/doc/devel/variadic_debug/README b/doc/devel/variadic_debug/README new file mode 100644 index 0000000..3ccbea2 --- /dev/null +++ b/doc/devel/variadic_debug/README @@ -0,0 +1,39 @@ +Most of the project now depends on the compiler supporting C99 variadic +macros. This is used in the Debug() macro everywhere except libldap and +its dependencies. + +From now on, any time Debug( level, fmt, args... ) is used, you can and +should provide the appropriate number of arguments. The coccinelle +patches in this directory implement the transformations used to bring +the project in line with this. + +As we still aim to support libldap on platforms that only provide C89, +Debug0/1/2/3 macros are used instead. + +If you need to adapt your own fork, see ITS#8731, the rest of this +README and scripts in this directory on what you'll need to achieve +this. + +Coccinelle as of git hash e65a7bdc04ac9122acdae2353422c5736b7998ba from +https://github.com/coccinelle/coccinelle has been used to run the +transformations performed. One notable limitation at the time of writing +is that multi-part (format) strings are always merged onto the same line. + +Some sources cannot be processed, nssov overlay being a prime example, +being wrapped in non-trivial macros. + +The following semantic patches are involved: +- 03-libldap_Debug.cocci: converts the libraries to use the Debug[0123] + macros as appropriate +- 04-variadic.cocci: converts the rest of the project to use the Debug + macro with the right number of arguments (as opposed to padding with + zeroes) +- 09-merge.cocci will merge an 'snprintf(s, len, "fmt", args...); + Debug(level, "... %s ...", ..., s, ...);' sequence together +- 07-shortcut.cocci is actually used to apply the above since + coccinelle's model-checker seems to struggle with state space + explosion in some of the very long and complex functions we have - + 09-merge.cocci doesn't finish in any reasonable time + +The equivalence.iso and macros.h files aid coccinelle to parse our +sources correctly and simplify the semantic patches. diff --git a/doc/devel/variadic_debug/equivalence.iso b/doc/devel/variadic_debug/equivalence.iso new file mode 100644 index 0000000..07372fb --- /dev/null +++ b/doc/devel/variadic_debug/equivalence.iso @@ -0,0 +1,12 @@ +Expression +@ NULL @ +@@ + +NULL <=> 0 + +Expression +@ underscore_func @ +expression E; +@@ + +_(E) => E diff --git a/doc/devel/variadic_debug/macros.h b/doc/devel/variadic_debug/macros.h new file mode 100644 index 0000000..265c549 --- /dev/null +++ b/doc/devel/variadic_debug/macros.h @@ -0,0 +1,23 @@ +#define LDAP_PF_LOCAL_SENDMSG_ARG(x) + +#define LDAP_P(x) x +#define LDAP_F(x) extern x +#define LDAP_V(x) extern x + +#define LDAP_GCCATTR(x) +#define LDAP_XSTRING(x) "" +#define LDAP_CONCAT(x,y) x + +#define LDAP_CONST const +#define LDAP_BEGIN_DECL +#define LDAP_END_DECL + +#define SLAP_EVENT_DECL +#define SLAP_EVENT_FNAME + +/* contrib/slapd-modules/smbk5pwd/smbk5pwd.c */ +#define HDB int* + +#define BACKSQL_ARBITRARY_KEY +#define BACKSQL_IDNUMFMT "%llu" +#define BACKSQL_IDFMT "%s" diff --git a/doc/devel/variadic_debug/script.sh b/doc/devel/variadic_debug/script.sh new file mode 100755 index 0000000..b9fd9f0 --- /dev/null +++ b/doc/devel/variadic_debug/script.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +set -e + +PATCH_DIR=doc/devel/variadic_debug + +SPATCH=${SPATCH:-spatch} +SPATCH_OPTS=( --macro-file-builtins "$PATCH_DIR/macros.h" ) +#SPATCH_OPTS+=( --timeout 300 ) + +SED_TRANSFORMATIONS=() + +# split out multipart strings back to original form (one per line) +SED_TRANSFORMATIONS+=( -e 's/^\(+\s*\)\(.*"\) \(".*\)"$/\1\2\n+\1\3/' ) + +# re-add whitespace around parentheses +SED_TRANSFORMATIONS+=( -e 's/^\(+.*Debug[0-3]\?(\)\s*/\1 /' ) +SED_TRANSFORMATIONS+=( -e 's/^\(+.*[^ ]\));$/\1 );/' ) + +# strip trailing whitespace copied from source on affected lines +SED_TRANSFORMATIONS+=( -e 's/^\(+.*\)\s\+$/\1/' ) + +# fix whitespace errors in source we touch +SED_TRANSFORMATIONS+=( -e 's/^\(+.*\) \t/\1\t\t/' ) +SED_TRANSFORMATIONS+=( -e 's/^\(+\t*\) \{1,3\}\t/\1\t/' ) + +normalise() { + patch="$1" + shift + + # iterate until we've reached fixpoint + while ! cmp "$patch" "${patch}.new" 2>/dev/null; do + if [ -e "${patch}.new" ]; then + mv -- "${patch}.new" "$patch" + fi + sed "${SED_TRANSFORMATIONS[@]}" -- "$patch" >"${patch}.new" + done + rediff "$patch" >"${patch}.new" + mv -- "${patch}.new" "$patch" +} + +git add "$PATCH_DIR" +git commit -m "ITS#8731 Add the documentation and scripts" + +git am "$PATCH_DIR/00-fixes.patch" +git am "$PATCH_DIR/01-logging.patch" +git am "$PATCH_DIR/02-manual.patch" + +$SPATCH "${SPATCH_OPTS[@]}" -sp_file "$PATCH_DIR/03-libldap_Debug.cocci" \ + -dir libraries/libldap \ + >"$PATCH_DIR/03-libldap_Debug.patch" +normalise "$PATCH_DIR/03-libldap_Debug.patch" +git apply --index --directory libraries/libldap "$PATCH_DIR/03-libldap_Debug.patch" +git commit -m "ITS#8731 Apply $PATCH_DIR/03-libldap_Debug.cocci" + +$SPATCH "${SPATCH_OPTS[@]}" -sp_file "$PATCH_DIR/04-variadic.cocci" \ + -dir . \ + >"$PATCH_DIR/04-variadic.patch" +normalise "$PATCH_DIR/04-variadic.patch" +git apply --index "$PATCH_DIR/04-variadic.patch" +git commit -m "ITS#8731 Apply $PATCH_DIR/04-variadic.cocci" + +git am "$PATCH_DIR/05-back-sql.patch" +git am "$PATCH_DIR/06-nssov.patch" + +$SPATCH "${SPATCH_OPTS[@]}" -sp_file "$PATCH_DIR/07-shortcut.cocci" \ + -dir . \ + >"$PATCH_DIR/07-shortcut.patch" +normalise "$PATCH_DIR/07-shortcut.patch" +git apply --index "$PATCH_DIR/07-shortcut.patch" +git commit -m "ITS#8731 Apply $PATCH_DIR/07-shortcut.cocci" + +git am "$PATCH_DIR/08-snprintf-manual.patch" |