diff options
author | Federico Ceratto <federico.ceratto@gmail.com> | 2018-03-27 21:28:21 +0000 |
---|---|---|
committer | Federico Ceratto <federico.ceratto@gmail.com> | 2018-03-27 21:28:21 +0000 |
commit | d4dd00f58a502c9ca4b63e36ce6bc7a9945dc63c (patch) | |
tree | faac99f51f182bb8c0a03e95e393d421ac9ddf42 /python.d | |
parent | New upstream version 1.9.0+dfsg (diff) | |
download | netdata-d4dd00f58a502c9ca4b63e36ce6bc7a9945dc63c.tar.xz netdata-d4dd00f58a502c9ca4b63e36ce6bc7a9945dc63c.zip |
New upstream version 1.10.0+dfsgupstream/1.10.0+dfsg
Diffstat (limited to 'python.d')
57 files changed, 14895 insertions, 359 deletions
diff --git a/python.d/Makefile.am b/python.d/Makefile.am index 1c84cddb4..a5fcc7394 100644 --- a/python.d/Makefile.am +++ b/python.d/Makefile.am @@ -16,6 +16,7 @@ dist_python_DATA = \ apache.chart.py \ beanstalk.chart.py \ bind_rndc.chart.py \ + ceph.chart.py \ chrony.chart.py \ couchdb.chart.py \ cpufreq.chart.py \ @@ -31,6 +32,8 @@ dist_python_DATA = \ go_expvar.chart.py \ haproxy.chart.py \ hddtemp.chart.py \ + httpcheck.chart.py \ + icecast.chart.py \ ipfs.chart.py \ isc_dhcpd.chart.py \ mdstat.chart.py \ @@ -38,9 +41,12 @@ dist_python_DATA = \ mongodb.chart.py \ mysql.chart.py \ nginx.chart.py \ + nginx_plus.chart.py \ nsd.chart.py \ + ntpd.chart.py \ ovpn_status_log.chart.py \ phpfpm.chart.py \ + portcheck.chart.py \ postfix.chart.py \ postgres.chart.py \ powerdns.chart.py \ @@ -49,9 +55,11 @@ dist_python_DATA = \ retroshare.chart.py \ samba.chart.py \ sensors.chart.py \ + springboot.chart.py \ squid.chart.py \ smartd_log.chart.py \ tomcat.chart.py \ + traefik.chart.py \ varnish.chart.py \ web_log.chart.py \ $(NULL) @@ -194,4 +202,3 @@ dist_python_urllib3_securetransport_DATA = \ python_modules/urllib3/contrib/_securetransport/bindings.py \ python_modules/urllib3/contrib/_securetransport/low_level.py \ $(NULL) - diff --git a/python.d/Makefile.in b/python.d/Makefile.in index dda54e1a5..d6e11d0cf 100644 --- a/python.d/Makefile.in +++ b/python.d/Makefile.in @@ -336,6 +336,7 @@ dist_python_DATA = \ apache.chart.py \ beanstalk.chart.py \ bind_rndc.chart.py \ + ceph.chart.py \ chrony.chart.py \ couchdb.chart.py \ cpufreq.chart.py \ @@ -351,6 +352,8 @@ dist_python_DATA = \ go_expvar.chart.py \ haproxy.chart.py \ hddtemp.chart.py \ + httpcheck.chart.py \ + icecast.chart.py \ ipfs.chart.py \ isc_dhcpd.chart.py \ mdstat.chart.py \ @@ -358,9 +361,12 @@ dist_python_DATA = \ mongodb.chart.py \ mysql.chart.py \ nginx.chart.py \ + nginx_plus.chart.py \ nsd.chart.py \ + ntpd.chart.py \ ovpn_status_log.chart.py \ phpfpm.chart.py \ + portcheck.chart.py \ postfix.chart.py \ postgres.chart.py \ powerdns.chart.py \ @@ -369,9 +375,11 @@ dist_python_DATA = \ retroshare.chart.py \ samba.chart.py \ sensors.chart.py \ + springboot.chart.py \ squid.chart.py \ smartd_log.chart.py \ tomcat.chart.py \ + traefik.chart.py \ varnish.chart.py \ web_log.chart.py \ $(NULL) diff --git a/python.d/README.md b/python.d/README.md index 009265f72..faabba2c7 100644 --- a/python.d/README.md +++ b/python.d/README.md @@ -29,7 +29,7 @@ local: # job name update_every : 5 # job update frequency other_var1 : some_val # module specific variable -other_job: +other_job: priority : 5 # job position on dashboard retries : 20 # job retries other_var2 : val # module specific variable @@ -43,7 +43,7 @@ The following python.d modules are supported: # apache -This module will monitor one or more apache servers depending on configuration. +This module will monitor one or more apache servers depending on configuration. **Requirements:** * apache with enabled `mod_status` @@ -60,20 +60,20 @@ It produces the following charts: * keepalive * closing * writing - + 4. **Bandwidth** in kilobytes/s * sent - + 5. **Workers** * idle * busy - + 6. **Lifetime Avg. Requests/s** in requests/s * requests_sec - + 7. **Lifetime Avg. Bandwidth/s** in kilobytes/s * size_sec - + 8. **Lifetime Avg. Response Size** in bytes/request * size_req @@ -109,7 +109,7 @@ Module monitors apache mod_cache log and produces only one chart: * hit * miss * other - + ### configuration Sample: @@ -138,14 +138,14 @@ Module provides server and tube level statistics: 1. **Cpu usage** in cpu time * user * system - + 2. **Jobs rate** in jobs/s * total * timeouts - + 3. **Connections rate** in connections/s * connections - + 4. **Commands rate** in commands/s * put * peek @@ -167,27 +167,27 @@ Module provides server and tube level statistics: * list-tube-used * list-tubes-watched * pause-tube - + 5. **Current tubes** in tubes * tubes - + 6. **Current jobs** in jobs * urgent * ready * reserved * delayed * buried - + 7. **Current connections** in connections * written * producers * workers * waiting - + 8. **Binlog** in records/s * written * migrated - + 9. **Uptime** in seconds * uptime @@ -195,7 +195,7 @@ Module provides server and tube level statistics: 1. **Jobs rate** in jobs/s * jobs - + 2. **Jobs** in jobs * using * ready @@ -211,12 +211,12 @@ Module provides server and tube level statistics: 4. **Commands** in commands/s * deletes * pauses - + 5. **Pause** in seconds * since * left - + ### configuration Sample: @@ -252,7 +252,7 @@ It produces: * recursion * duplicate * rejections - + 2. **Incoming queries** * RESERVED0 * A @@ -273,7 +273,7 @@ It produces: * SPF * ANY * DLV - + 3. **Outgoing queries** * Same as Incoming queries @@ -323,6 +323,39 @@ local: --- +# ceph + +This module monitors the ceph cluster usage and consuption data of a server. + +It produces: + +* Cluster statistics (usage, available, latency, objects, read/write rate) +* OSD usage +* OSD latency +* Pool usage +* Pool read/write operations +* Pool read/write rate +* number of objects per pool + +**Requirements:** + +- `rados` python module +- Granting read permissions to ceph group from keyring file +```shell +# chmod 640 /etc/ceph/ceph.client.admin.keyring +``` + +### Configuration + +Sample: +```yaml +local: + config_file: '/etc/ceph/ceph.conf' + keyring_file: '/etc/ceph/ceph.client.admin.keyring' +``` + +--- + # couchdb This module monitors vital statistics of a local Apache CouchDB 2.x server, including: @@ -467,13 +500,13 @@ localhost: # dovecot -This module provides statistics information from dovecot server. +This module provides statistics information from dovecot server. Statistics are taken from dovecot socket by executing `EXPORT global` command. More information about dovecot stats can be found on [project wiki page.](http://wiki2.dovecot.org/Statistics) **Requirement:** Dovecot unix socket with R/W permissions for user netdata or dovecot with configured TCP/IP socket. - + Module gives information with following charts: 1. **sessions** @@ -482,25 +515,25 @@ Module gives information with following charts: 2. **logins** * logins -3. **commands** - number of IMAP commands +3. **commands** - number of IMAP commands * commands - + 4. **Faults** * minor * major - -5. **Context Switches** + +5. **Context Switches** * volountary * involountary - + 6. **disk** in bytes/s * read * write - + 7. **bytes** in bytes/s * read * write - + 8. **number of syscalls** in syscalls/s * read * write @@ -509,7 +542,7 @@ Module gives information with following charts: * path * attr -10. **hits** - number of cache hits +10. **hits** - number of cache hits * hits 11. **attempts** - authorization attemts @@ -519,7 +552,7 @@ Module gives information with following charts: 12. **cache** - cached authorization hits * hit * miss - + ### configuration Sample: @@ -561,7 +594,7 @@ It produces: * Time spent on garbage collections 4. **Host metrics** charts: - * Available file descriptors in percent + * Available file descriptors in percent * Opened HTTP connections * Cluster communication transport metrics @@ -602,7 +635,7 @@ If no configuration is given, module will fail to run. # exim -Simple module executing `exim -bpc` to grab exim queue. +Simple module executing `exim -bpc` to grab exim queue. This command can take a lot of time to finish its execution thus it is not recommended to run it every second. It produces only one chart: @@ -616,13 +649,13 @@ Configuration is not needed. # fail2ban -Module monitor fail2ban log file to show all bans for all active jails +Module monitor fail2ban log file to show all bans for all active jails **Requirements:** * fail2ban.log file MUST BE readable by netdata (A good idea is to add **create 0640 root netdata** to fail2ban conf at logrotate.d) - + It produces one chart with multiple lines (one line per jail) - + ### configuration Sample: @@ -691,14 +724,14 @@ local: port : '18121' secret : 'adminsecret' acct : False # Freeradius accounting statistics. - proxy_auth : False # Freeradius proxy authentication statistics. + proxy_auth : False # Freeradius proxy authentication statistics. proxy_acct : False # Freeradius proxy accounting statistics. ``` **Freeradius server configuration:** The configuration for the status server is automatically created in the sites-available directory. -By default, server is enabled and can be queried from every client. +By default, server is enabled and can be queried from every client. FreeRADIUS will only respond to status-server messages, if the status-server virtual server has been enabled. To do this, create a link from the sites-enabled directory to the status file in the sites-available directory: @@ -721,30 +754,30 @@ For the memory statistics, it produces the following charts: 1. **Heap allocations** in kB * alloc: size of objects allocated on the heap - * inuse: size of allocated heap spans - + * inuse: size of allocated heap spans + 2. **Stack allocations** in kB * inuse: size of allocated stack spans - + 3. **MSpan allocations** in kB * inuse: size of allocated mspan structures - + 4. **MCache allocations** in kB * inuse: size of allocated mcache structures - + 5. **Virtual memory** in kB * sys: size of reserved virtual address space - + 6. **Live objects** * live: number of live objects in memory - + 7. **GC pauses average** in ns * avg: average duration of all GC stop-the-world pauses - + ### configuration - + Please see the [wiki page](https://github.com/firehol/netdata/wiki/Monitoring-Go-Applications#using-netdata-go_expvar-module) for detailed info about module configuration. - + --- # haproxy @@ -760,13 +793,13 @@ Socket MUST be readable AND writable by netdata user. It produces: 1. **Frontend** family charts - * Kilobytes in/s + * Kilobytes in/s * Kilobytes out/s * Sessions current * Sessions in queue current 2. **Backend** family charts - * Kilobytes in/s + * Kilobytes in/s * Kilobytes out/s * Sessions current * Sessions in queue current @@ -798,7 +831,7 @@ If no configuration is given, module will fail to run. --- # hddtemp - + Module monitors disk temperatures from one or more hddtemp daemons. **Requirement:** @@ -820,6 +853,75 @@ If no configuration is given, module will attempt to connect to hddtemp daemon o --- +# httpcheck + +Module monitors remote http server for availability and response time. + +Following charts are drawn per job: + +1. **Response time** ms + * Time in 0.1 ms resolution in which the server responds. + If the connection failed, the value is missing. + +2. **Status** boolean + * Connection successful + * Unexpected content: No Regex match found in the response + * Unexpected status code: Do we get 500 errors? + * Connection failed: port not listening or blocked + * Connection timed out: host or port unreachable + +### configuration + +Sample configuration and their default values. + +```yaml +server: + url: 'http://host:port/path' # required + status_accepted: # optional + - 200 + timeout: 1 # optional, supports decimals (e.g. 0.2) + update_every: 3 # optional + regex: 'REGULAR_EXPRESSION' # optional, see https://docs.python.org/3/howto/regex.html + redirect: yes # optional +``` + +### notes + + * The status chart is primarily intended for alarms, badges or for access via API. + * A system/service/firewall might block netdata's access if a portscan or + similar is detected. + * This plugin is meant for simple use cases. Currently, the accuracy of the + response time is low and should be used as reference only. + +--- + +# icecast + +This module will monitor number of listeners for active sources. + +**Requirements:** + * icecast version >= 2.4.0 + +It produces the following charts: + +1. **Listeners** in listeners + * source number + +### configuration + +Needs only `url` to server's `/status-json.xsl` + +Here is an example for remote server: + +```yaml +remote: + url : 'http://1.2.3.4:8443/status-json.xsl' +``` + +Without configuration, module attempts to connect to `http://localhost:8443/status-json.xsl` + +--- + # IPFS Module monitors [IPFS](https://ipfs.io) basic information. @@ -827,13 +929,13 @@ Module monitors [IPFS](https://ipfs.io) basic information. 1. **Bandwidth** in kbits/s * in * out - + 2. **Peers** * peers - + ### configuration -Only url to IPFS server is needed. +Only url to IPFS server is needed. Sample: @@ -860,11 +962,11 @@ It produces: 2. **Total leases** * leases (overall number of leases for all pools) - + 3. **Active leases** for every pools * leases (number of active leases in pool) - + ### configuration Sample: @@ -888,8 +990,8 @@ Module monitor /proc/mdstat It produces: 1. **Health** Number of failed disks in every array (aggregate chart). - -2. **Disks stats** + +2. **Disks stats** * total (number of devices array ideally would have) * inuse (number of devices currently are in use) @@ -898,11 +1000,11 @@ It produces: * recovery in percent * reshape in percent * check in percent - + 4. **Operation status** (if resync/recovery/reshape/check is active) * finish in minutes * speed in megabytes/s - + ### configuration No configuration is needed. @@ -915,20 +1017,20 @@ Memcached monitoring module. Data grabbed from [stats interface](https://github. 1. **Network** in kilobytes/s * read * written - + 2. **Connections** per second * current * rejected * total - + 3. **Items** in cluster * current * total - + 4. **Evicted and Reclaimed** items * evicted * reclaimed - + 5. **GET** requests/s * hits * misses @@ -938,7 +1040,7 @@ Memcached monitoring module. Data grabbed from [stats interface](https://github. 7. **SET rate** rate in requests/s * rate - + 8. **DELETE** requests/s * hits * misses @@ -947,22 +1049,22 @@ Memcached monitoring module. Data grabbed from [stats interface](https://github. * hits * misses * bad value - + 10. **Increment** requests/s * hits * misses - + 11. **Decrement** requests/s * hits * misses - + 12. **Touch** requests/s * hits * misses - + 13. **Touch rate** rate in requests/s * rate - + ### configuration Sample: @@ -1214,7 +1316,7 @@ If no configuration is given, module will attempt to connect to mysql server via # nginx -This module will monitor one or more nginx servers depending on configuration. Servers can be either local or remote. +This module will monitor one or more nginx servers depending on configuration. Servers can be either local or remote. **Requirements:** * nginx with configured 'ngx_http_stub_status_module' @@ -1234,11 +1336,11 @@ It produces following charts: * reading * writing * waiting - + 4. **Connections Rate** in connections/s * accepts * handled - + ### configuration Needs only `url` to server's `stub_status` @@ -1258,6 +1360,132 @@ Without configuration, module attempts to connect to `http://localhost/stub_stat --- +# nginx_plus + +This module will monitor one or more nginx_plus servers depending on configuration. +Servers can be either local or remote. + +Example nginx_plus configuration can be found in 'python.d/nginx_plus.conf' + +It produces following charts: + +1. **Requests total** in requests/s + * total + +2. **Requests current** in requests + * current + +3. **Connection Statistics** in connections/s + * accepted + * dropped + +4. **Workers Statistics** in workers + * idle + * active + +5. **SSL Handshakes** in handshakes/s + * successful + * failed + +6. **SSL Session Reuses** in sessions/s + * reused + +7. **SSL Memory Usage** in percent + * usage + +8. **Processes** in processes + * respawned + +For every server zone: + +1. **Processing** in requests + * processing + +2. **Requests** in requests/s + * requests + +3. **Responses** in requests/s + * 1xx + * 2xx + * 3xx + * 4xx + * 5xx + +4. **Traffic** in kilobits/s + * received + * sent + +For every upstream: + +1. **Peers Requests** in requests/s + * peer name (dimension per peer) + +2. **All Peers Responses** in responses/s + * 1xx + * 2xx + * 3xx + * 4xx + * 5xx + +3. **Peer Responses** in requests/s (for every peer) + * 1xx + * 2xx + * 3xx + * 4xx + * 5xx + +4. **Peers Connections** in active + * peer name (dimension per peer) + +5. **Peers Connections Usage** in percent + * peer name (dimension per peer) + +6. **All Peers Traffic** in KB + * received + * sent + +7. **Peer Traffic** in KB/s (for every peer) + * received + * sent + +8. **Peer Timings** in ms (for every peer) + * header + * response + +9. **Memory Usage** in percent + * usage + +10. **Peers Status** in state + * peer name (dimension per peer) + +11. **Peers Total Downtime** in seconds + * peer name (dimension per peer) + +For every cache: + +1. **Traffic** in KB + * served + * written + * bypass + +2. **Memory Usage** in percent + * usage + +### configuration + +Needs only `url` to server's `status` + +Here is an example for local server: + +```yaml +local: + url : 'http://localhost/status' +``` + +Without configuration, module fail to start. + +--- + # nsd Module uses the `nsd-control stats_noreset` command to provide `nsd` statistics. @@ -1313,9 +1541,81 @@ Configuration is not needed. --- +# ntpd + +Module monitors the system variables of the local `ntpd` daemon (optional incl. variables of the polled peers) using the NTP Control Message Protocol via UDP socket, similar to `ntpq`, the [standard NTP query program](http://doc.ntp.org/current-stable/ntpq.html). + +**Requirements:** + * Version: `NTPv4` + * Local interrogation allowed in `/etc/ntp.conf` (default): + +``` +# Local users may interrogate the ntp server more closely. +restrict 127.0.0.1 +restrict ::1 +``` + +It produces: + +1. system + * offset + * jitter + * frequency + * delay + * dispersion + * stratum + * tc + * precision + +2. peers + * offset + * delay + * dispersion + * jitter + * rootdelay + * rootdispersion + * stratum + * hmode + * pmode + * hpoll + * ppoll + * precision + +**configuration** + +Sample: + +```yaml +update_every: 10 + +host: 'localhost' +port: '123' +show_peers: yes +# hide peers with source address in ranges 127.0.0.0/8 and 192.168.0.0/16 +peer_filter: '(127\..*)|(192\.168\..*)' +# check for new/changed peers every 60 updates +peer_rescan: 60 +``` + +Sample (multiple jobs): + +Note: `ntp.conf` on the host `otherhost` must be configured to allow queries from our local host by including a line like `restrict <IP> nomodify notrap nopeer`. + +```yaml +local: + host: 'localhost' + +otherhost: + host: 'otherhost' +``` + +If no configuration is given, module will attempt to connect to `ntpd` on `::1:123` or `127.0.0.1:123` and show charts for the systemvars. Use `show_peers: yes` to also show the charts for configured peers. Local peers in the range `127.0.0.0/8` are hidden by default, use `peer_filter: ''` to show all peers. + +--- + # ovpn_status_log -Module monitor openvpn-status log file. +Module monitor openvpn-status log file. **Requirements:** @@ -1325,16 +1625,16 @@ Module monitor openvpn-status log file. * Make sure NETDATA USER CAN READ openvpn-status.log * Update_every interval MUST MATCH interval on which OpenVPN writes operational status to log file. - + It produces: 1. **Users** OpenVPN active users * users - + 2. **Traffic** OpenVPN overall bandwidth usage in kilobit/s * in * out - + ### configuration Sample: @@ -1348,12 +1648,12 @@ default # phpfpm -This module will monitor one or more php-fpm instances depending on configuration. +This module will monitor one or more php-fpm instances depending on configuration. **Requirements:** * php-fpm with enabled `status` page * access to `status` page via web server - + It produces following charts: 1. **Active Connections** @@ -1363,15 +1663,15 @@ It produces following charts: 2. **Requests** in requests/s * requests - + 3. **Performance** * reached * slow - + ### configuration Needs only `url` to server's `status` - + Here is an example for local instance: ```yaml @@ -1387,6 +1687,42 @@ Without configuration, module attempts to connect to `http://localhost/status` --- +# portcheck + +Module monitors a remote TCP service. + +Following charts are drawn per host: + +1. **Latency** ms + * Time required to connect to a TCP port. + Displays latency in 0.1 ms resolution. If the connection failed, the value is missing. + +2. **Status** boolean + * Connection successful + * Could not create socket: possible DNS problems + * Connection refused: port not listening or blocked + * Connection timed out: host or port unreachable + + +### configuration + +```yaml +server: + host: 'dns or ip' # required + port: 22 # required + timeout: 1 # optional + update_every: 1 # optional +``` + +### notes + + * The error chart is intended for alarms, badges or for access via API. + * A system/service/firewall might block netdata's access if a portscan or + similar is detected. + * Currently, the accuracy of the latency is low and should be used as reference only. + +--- + # postfix Simple module executing `postfix -p` to grab postfix queue. @@ -1395,7 +1731,7 @@ It produces only two charts: 1. **Postfix Queue Emails** * emails - + 2. **Postfix Queue Emails Size** in KB * size @@ -1427,10 +1763,10 @@ Following charts are drawn: 4. **Checkpoints** writes/s * scheduled * requested - + 5. **Current connections to db** count * connections - + 6. **Tuples returned from db** tuples/s * sequential * bitmap @@ -1451,7 +1787,7 @@ Following charts are drawn: 10. **Locks on db** count per type * locks - + ### configuration ```yaml @@ -1543,10 +1879,13 @@ Following charts are drawn: 6. **Erlang processes** * used processes -7. **Memory** +7. **Erlang run queue** + * Erlang run queue + +8. **Memory** * free memory in megabytes -8. **Disk Space** +9. **Disk Space** * free disk space in gigabytes ### configuration @@ -1581,16 +1920,16 @@ Following charts are drawn: * total * lua -4. **Database keys** +4. **Database keys** * lines are creates dynamically based on how many databases are there - + 5. **Clients** * connected * blocked - + 6. **Slaves** * connected - + ### configuration ```yaml @@ -1688,6 +2027,39 @@ Please join this discussion for help. --- +# springboot + +This module will monitor one or more Java Spring-boot applications depending on configuration. + +It produces following charts: + +1. **Response Codes** in requests/s + * 1xx + * 2xx + * 3xx + * 4xx + * 5xx + * others + +2. **Threads** + * daemon + * total + +3. **GC Time** in milliseconds and **GC Operations** in operations/s + * Copy + * MarkSweep + * ... + +4. **Heap Mmeory Usage** in KB + * used + * committed + +### configuration + +Please see the [Monitoring Java Spring Boot Applications](https://github.com/firehol/netdata/wiki/Monitoring-Java-Spring-Boot-Applications) page for detailed info about module configuration. + +--- + # squid This module will monitor one or more squid instances depending on configuration. @@ -1707,11 +2079,11 @@ It produces following charts: 3. **Server Bandwidth** in kilobits/s * in * out - + 4. **Server Requests** in requests/s * requests * errors - + ### configuration ```yaml @@ -1724,7 +2096,7 @@ local: ``` Without any configuration module will try to autodetect where squid presents its `counters` data - + --- # smartd_log @@ -1738,7 +2110,7 @@ It produces following charts (you can add additional attributes in the module co 2. **Start/Stop Count** attribute 4 3. **Reallocated Sectors Count** attribute 5 - + 4. **Seek Error Rate** attribute 7 5. **Power-On Hours Count** attribute 9 @@ -1750,11 +2122,11 @@ It produces following charts (you can add additional attributes in the module co 8. **Temperature** attribute 194 9. **Current Pending Sectors** attribute 197 - + 10. **Off-Line Uncorrectable** attribute 198 11. **Write Error Rate** attribute 200 - + ### configuration ```yaml @@ -1763,7 +2135,7 @@ local: ``` If no configuration is given, module will attempt to read log files in /var/log/smartd/ directory. - + --- # tomcat @@ -1781,10 +2153,10 @@ Charts: 3. **Threads** * current * busy - + 4. **JVM Free Memory** in MB * jvm - + ### configuration ```yaml @@ -1795,10 +2167,65 @@ localhost: pass : 'secret_tomcat_password' ``` -Without configuration, module attempts to connect to `http://localhost:8080/manager/status?XML=true`, without any credentials. +Without configuration, module attempts to connect to `http://localhost:8080/manager/status?XML=true`, without any credentials. So it will probably fail. ---- +--- + +# Traefik + +Module uses the `health` API to provide statistics. + +It produces: + +1. **Responses** by statuses + * success (1xx, 2xx, 304) + * error (5xx) + * redirect (3xx except 304) + * bad (4xx) + * other (all other responses) + +2. **Responses** by codes + * 2xx (successful) + * 5xx (internal server errors) + * 3xx (redirect) + * 4xx (bad) + * 1xx (informational) + * other (non-standart responses) + +3. **Detailed Response Codes** requests/s (number of responses for each response code family individually) + +4. **Requests**/s + * request statistics + +5. **Total response time** + * sum of all response time + +6. **Average response time** + +7. **Average response time per iteration** + +8. **Uptime** + * Traefik server uptime + +### configuration + +Needs only `url` to server's `health` + +Here is an example for local server: + +```yaml +update_every : 1 +priority : 60000 + +local: + url : 'http://localhost:8080/health' + retries : 10 +``` + +Without configuration, module attempts to connect to `http://localhost:8080/health`. + +--- # varnish cache @@ -1825,7 +2252,7 @@ It produces: 5. **Expired Objects** in expired/s * objects - + 6. **Least Recently Used Nuked Objects** in nuked/s * objects @@ -1837,7 +2264,7 @@ It produces: * created * failed * limited - + 9. **Current Queue Length** in requests * in queue @@ -1848,22 +2275,22 @@ It produces: * closed * resycled * failed - + 10. **Requests To The Backend** in requests/s * received - + 11. **ESI Statistics** in problems/s * errors * warnings - + 12. **Memory Usage** in MB * free * allocated - + 13. **Uptime** in seconds * uptime - - + + ### configuration No configuration is needed. @@ -1893,7 +2320,7 @@ It produces following charts: * unmatched (the lines in the log file that are not matched) 3. **Detailed Response Codes** requests/s (number of responses for each response code family individually) - + 4. **Bandwidth** KB/s * received (bandwidth of requests) * send (bandwidth of responses) @@ -1915,7 +2342,7 @@ It produces following charts: 11. **All Time Unique Client IPs** unique ips/s (unique client IPs since the last restart of netdata) - + ### configuration ```yaml @@ -1933,4 +2360,4 @@ apache_log: Module has preconfigured jobs for nginx, apache and gunicorn on various distros. ---- +--- diff --git a/python.d/ceph.chart.py b/python.d/ceph.chart.py new file mode 100644 index 000000000..fb78397d0 --- /dev/null +++ b/python.d/ceph.chart.py @@ -0,0 +1,313 @@ +# -*- coding: utf-8 -*- +# Description: ceph netdata python.d module +# Author: Luis Eduardo (lets00) + +try: + import rados + CEPH = True +except ImportError: + CEPH = False + +import json +from bases.FrameworkServices.SimpleService import SimpleService + +# default module values (can be overridden per job in `config`) +update_every = 10 +priority = 60000 +retries = 60 + +ORDER = ['general_usage', 'general_objects', 'general_bytes', 'general_operations', + 'general_latency', 'pool_usage', 'pool_objects', 'pool_read_bytes', + 'pool_write_bytes', 'pool_read_operations', 'pool_write_operations', 'osd_usage', + 'osd_apply_latency', 'osd_commit_latency'] + +CHARTS = { + 'general_usage': { + 'options': [None, 'Ceph General Space', 'KB', 'general', 'ceph.general_usage', 'stacked'], + 'lines': [ + ['general_available', 'avail', 'absolute', 1, 1024], + ['general_usage', 'used', 'absolute', 1, 1024] + ] + }, + 'general_objects': { + 'options': [None, 'Ceph General Objects', 'objects', 'general', 'ceph.general_objects', 'area'], + 'lines': [ + ['general_objects', 'cluster', 'absolute'] + ] + }, + 'general_bytes': { + 'options': [None, 'Ceph General Read/Write Data/s', 'KB', 'general', 'ceph.general_bytes', + 'area'], + 'lines': [ + ['general_read_bytes', 'read', 'absolute', 1, 1024], + ['general_write_bytes', 'write', 'absolute', -1, 1024] + ] + }, + 'general_operations': { + 'options': [None, 'Ceph General Read/Write Operations/s', 'operations', 'general', 'ceph.general_operations', + 'area'], + 'lines': [ + ['general_read_operations', 'read', 'absolute', 1], + ['general_write_operations', 'write', 'absolute', -1] + ] + }, + 'general_latency': { + 'options': [None, 'Ceph General Apply/Commit latency', 'milliseconds', 'general', 'ceph.general_latency', + 'area'], + 'lines': [ + ['general_apply_latency', 'apply', 'absolute'], + ['general_commit_latency', 'commit', 'absolute'] + ] + }, + 'pool_usage': { + 'options': [None, 'Ceph Pools', 'KB', 'pool', 'ceph.pool_usage', 'line'], + 'lines': [] + }, + 'pool_objects': { + 'options': [None, 'Ceph Pools', 'objects', 'pool', 'ceph.pool_objects', 'line'], + 'lines': [] + }, + 'pool_read_bytes': { + 'options': [None, 'Ceph Read Pool Data/s', 'KB', 'pool', 'ceph.pool_read_bytes', 'area'], + 'lines': [] + }, + 'pool_write_bytes': { + 'options': [None, 'Ceph Write Pool Data/s', 'KB', 'pool', 'ceph.pool_write_bytes', 'area'], + 'lines': [] + }, + 'pool_read_operations': { + 'options': [None, 'Ceph Read Pool Operations/s', 'operations', 'pool', 'ceph.pool_read_operations', 'area'], + 'lines': [] + }, + 'pool_write_operations': { + 'options': [None, 'Ceph Write Pool Operations/s', 'operations', 'pool', 'ceph.pool_write_operations', 'area'], + 'lines': [] + }, + 'osd_usage': { + 'options': [None, 'Ceph OSDs', 'KB', 'osd', 'ceph.osd_usage', 'line'], + 'lines': [] + }, + 'osd_apply_latency': { + 'options': [None, 'Ceph OSDs apply latency', 'milliseconds', 'osd', 'ceph.apply_latency', 'line'], + 'lines': [] + }, + 'osd_commit_latency': { + 'options': [None, 'Ceph OSDs commit latency', 'milliseconds', 'osd', 'ceph.commit_latency', 'line'], + 'lines': [] + } + +} + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.config_file = self.configuration.get('config_file') + self.keyring_file = self.configuration.get('keyring_file') + + def check(self): + """ + Checks module + :return: + """ + if not CEPH: + self.error('rados module is needed to use ceph.chart.py') + return False + if not (self.config_file and self.keyring_file): + self.error('config_file and/or keyring_file is not defined') + return False + try: + self.cluster = rados.Rados(conffile=self.config_file, + conf=dict(keyring=self.keyring_file)) + self.cluster.connect() + except rados.Error as error: + self.error(error) + return False + self.create_definitions() + return True + + def create_definitions(self): + """ + Create dynamically charts options + :return: None + """ + # Pool lines + for pool in sorted(self._get_df()['pools']): + self.definitions['pool_usage']['lines'].append([pool['name'], + pool['name'], + 'absolute']) + self.definitions['pool_objects']['lines'].append(["obj_{0}".format(pool['name']), + pool['name'], + 'absolute']) + self.definitions['pool_read_bytes']['lines'].append(['read_{0}'.format(pool['name']), + pool['name'], + 'absolute', 1, 1024]) + self.definitions['pool_write_bytes']['lines'].append(['write_{0}'.format(pool['name']), + pool['name'], + 'absolute', 1, 1024]) + self.definitions['pool_read_operations']['lines'].append(['read_operations_{0}'.format(pool['name']), + pool['name'], + 'absolute']) + self.definitions['pool_write_operations']['lines'].append(['write_operations_{0}'.format(pool['name']), + pool['name'], + 'absolute']) + + # OSD lines + for osd in sorted(self._get_osd_df()['nodes']): + self.definitions['osd_usage']['lines'].append([osd['name'], + osd['name'], + 'absolute']) + self.definitions['osd_apply_latency']['lines'].append(['apply_latency_{0}'.format(osd['name']), + osd['name'], + 'absolute']) + self.definitions['osd_commit_latency']['lines'].append(['commit_latency_{0}'.format(osd['name']), + osd['name'], + 'absolute']) + + def get_data(self): + """ + Catch all ceph data + :return: dict + """ + try: + data = {} + df = self._get_df() + osd_df = self._get_osd_df() + osd_perf = self._get_osd_perf() + pool_stats = self._get_osd_pool_stats() + data.update(self._get_general(osd_perf, pool_stats)) + for pool in df['pools']: + data.update(self._get_pool_usage(pool)) + data.update(self._get_pool_objects(pool)) + for pool_io in pool_stats: + data.update(self._get_pool_rw(pool_io)) + for osd in osd_df['nodes']: + data.update(self._get_osd_usage(osd)) + for osd_apply_commit in osd_perf['osd_perf_infos']: + data.update(self._get_osd_latency(osd_apply_commit)) + return data + except (ValueError, AttributeError) as error: + self.error(error) + return None + + def _get_general(self, osd_perf, pool_stats): + """ + Get ceph's general usage + :return: dict + """ + status = self.cluster.get_cluster_stats() + read_bytes_sec = 0 + write_bytes_sec = 0 + read_op_per_sec = 0 + write_op_per_sec = 0 + apply_latency = 0 + commit_latency = 0 + + for pool_rw_io_b in pool_stats: + read_bytes_sec += pool_rw_io_b['client_io_rate'].get('read_bytes_sec', 0) + write_bytes_sec += pool_rw_io_b['client_io_rate'].get('write_bytes_sec', 0) + read_op_per_sec += pool_rw_io_b['client_io_rate'].get('read_op_per_sec', 0) + write_op_per_sec += pool_rw_io_b['client_io_rate'].get('write_op_per_sec', 0) + for perf in osd_perf['osd_perf_infos']: + apply_latency += perf['perf_stats']['apply_latency_ms'] + commit_latency += perf['perf_stats']['commit_latency_ms'] + + return {'general_usage': int(status['kb_used']), + 'general_available': int(status['kb_avail']), + 'general_objects': int(status['num_objects']), + 'general_read_bytes': read_bytes_sec, + 'general_write_bytes': write_bytes_sec, + 'general_read_operations': read_op_per_sec, + 'general_write_operations': write_op_per_sec, + 'general_apply_latency': apply_latency, + 'general_commit_latency': commit_latency + } + + @staticmethod + def _get_pool_usage(pool): + """ + Process raw data into pool usage dict information + :return: A pool dict with pool name's key and usage bytes' value + """ + return {pool['name']: pool['stats']['kb_used']} + + @staticmethod + def _get_pool_objects(pool): + """ + Process raw data into pool usage dict information + :return: A pool dict with pool name's key and object numbers + """ + return {'obj_{0}'.format(pool['name']): pool['stats']['objects']} + + @staticmethod + def _get_pool_rw(pool): + """ + Get read/write kb and operations in a pool + :return: A pool dict with both read/write bytes and operations. + """ + return {'read_{0}'.format(pool['pool_name']): int(pool['client_io_rate'].get('read_bytes_sec', 0)), + 'write_{0}'.format(pool['pool_name']): int(pool['client_io_rate'].get('write_bytes_sec', 0)), + 'read_operations_{0}'.format(pool['pool_name']): int(pool['client_io_rate'].get('read_op_per_sec', 0)), + 'write_operations_{0}'.format(pool['pool_name']): int(pool['client_io_rate'].get('write_op_per_sec', 0)) + } + + @staticmethod + def _get_osd_usage(osd): + """ + Process raw data into osd dict information to get osd usage + :return: A osd dict with osd name's key and usage bytes' value + """ + return {osd['name']: float(osd['kb_used'])} + + @staticmethod + def _get_osd_latency(osd): + """ + Get ceph osd apply and commit latency + :return: A osd dict with osd name's key with both apply and commit latency values + """ + return {'apply_latency_osd.{0}'.format(osd['id']): osd['perf_stats']['apply_latency_ms'], + 'commit_latency_osd.{0}'.format(osd['id']): osd['perf_stats']['commit_latency_ms']} + + def _get_df(self): + """ + Get ceph df output + :return: ceph df --format json + """ + return json.loads(self.cluster.mon_command(json.dumps({ + 'prefix': 'df', + 'format': 'json' + }), '')[1]) + + def _get_osd_df(self): + """ + Get ceph osd df output + :return: ceph osd df --format json + """ + return json.loads(self.cluster.mon_command(json.dumps({ + 'prefix': 'osd df', + 'format': 'json' + }), '')[1]) + + def _get_osd_perf(self): + """ + Get ceph osd performance + :return: ceph osd perf --format json + """ + return json.loads(self.cluster.mon_command(json.dumps({ + 'prefix': 'osd perf', + 'format': 'json' + }), '')[1]) + + def _get_osd_pool_stats(self): + """ + Get ceph osd pool status. + This command is used to get information about both + read/write operation and bytes per second on each pool + :return: ceph osd pool stats --format json + """ + return json.loads(self.cluster.mon_command(json.dumps({ + 'prefix': 'osd pool stats', + 'format': 'json' + }), '')[1]) diff --git a/python.d/elasticsearch.chart.py b/python.d/elasticsearch.chart.py index afdf0f1b4..9c2c58944 100644 --- a/python.d/elasticsearch.chart.py +++ b/python.d/elasticsearch.chart.py @@ -32,15 +32,36 @@ NODE_STATS = [ 'indices.indexing.index_time_in_millis', 'indices.refresh.total', 'indices.refresh.total_time_in_millis', - 'indices.flush.total' + 'indices.flush.total', 'indices.flush.total_time_in_millis', + 'indices.translog.operations', + 'indices.translog.size_in_bytes', + 'indices.translog.uncommitted_operations', + 'indices.translog.uncommitted_size_in_bytes', + 'indices.segments.count', + 'indices.segments.terms_memory_in_bytes', + 'indices.segments.stored_fields_memory_in_bytes', + 'indices.segments.term_vectors_memory_in_bytes', + 'indices.segments.norms_memory_in_bytes', + 'indices.segments.points_memory_in_bytes', + 'indices.segments.doc_values_memory_in_bytes', + 'indices.segments.index_writer_memory_in_bytes', + 'indices.segments.version_map_memory_in_bytes', + 'indices.segments.fixed_bit_set_memory_in_bytes', 'jvm.gc.collectors.young.collection_count', 'jvm.gc.collectors.old.collection_count', 'jvm.gc.collectors.young.collection_time_in_millis', 'jvm.gc.collectors.old.collection_time_in_millis', 'jvm.mem.heap_used_percent', + 'jvm.mem.heap_used_in_bytes', 'jvm.mem.heap_committed_in_bytes', - 'thread_pool.bulk.queue' + 'jvm.buffer_pools.direct.count', + 'jvm.buffer_pools.direct.used_in_bytes', + 'jvm.buffer_pools.direct.total_capacity_in_bytes', + 'jvm.buffer_pools.mapped.count', + 'jvm.buffer_pools.mapped.used_in_bytes', + 'jvm.buffer_pools.mapped.total_capacity_in_bytes', + 'thread_pool.bulk.queue', 'thread_pool.bulk.rejected', 'thread_pool.index.queue', 'thread_pool.index.rejected', @@ -103,7 +124,9 @@ LATENCY = { # charts order (can be overridden if you want less charts, or different order) ORDER = ['search_performance_total', 'search_performance_current', 'search_performance_time', 'search_latency', 'index_performance_total', 'index_performance_current', 'index_performance_time', - 'index_latency', 'jvm_mem_heap', 'jvm_gc_count', 'jvm_gc_time', 'host_metrics_file_descriptors', + 'index_latency', 'index_translog_operations', 'index_translog_size', 'index_segments_count', 'index_segments_memory_writer', + 'index_segments_memory', 'jvm_mem_heap', 'jvm_mem_heap_bytes', 'jvm_buffer_pool_count', + 'jvm_direct_buffers_memory', 'jvm_mapped_buffers_memory', 'jvm_gc_count', 'jvm_gc_time', 'host_metrics_file_descriptors', 'host_metrics_http', 'host_metrics_transport', 'thread_pool_queued', 'thread_pool_rejected', 'fielddata_cache', 'fielddata_evictions_tripped', 'cluster_health_status', 'cluster_health_nodes', 'cluster_health_shards', 'cluster_stats_nodes', 'cluster_stats_query_cache', 'cluster_stats_docs', @@ -166,12 +189,78 @@ CHARTS = { ['indexing_latency', 'indexing', 'absolute', 1, 1000], ['flushing_latency', 'flushing', 'absolute', 1, 1000] ]}, + 'index_translog_operations': { + 'options': [None, 'Translog Operations', 'count', 'translog', + 'elastic.index_translog_operations', 'area'], + 'lines': [ + ['indices_translog_operations', 'total', 'absolute'], + ['indices_translog_uncommitted_operations', 'uncommited', 'absolute'] + ]}, + 'index_translog_size': { + 'options': [None, 'Translog Size', 'MB', 'translog', + 'elastic.index_translog_size', 'area'], + 'lines': [ + ['indices_translog_size_in_bytes', 'total', 'absolute', 1, 1048567], + ['indices_translog_uncommitted_size_in_bytes', 'uncommited', 'absolute', 1, 1048567] + ]}, + 'index_segments_count': { + 'options': [None, 'Total Number Of Indices Segments', 'count', 'indices segments', + 'elastic.index_segments_count', 'line'], + 'lines': [ + ['indices_segments_count', 'segments', 'absolute'] + ]}, + 'index_segments_memory_writer': { + 'options': [None, 'Index Writer Memory Usage', 'MB', 'indices segments', + 'elastic.index_segments_memory_writer', 'area'], + 'lines': [ + ['indices_segments_index_writer_memory_in_bytes', 'total', 'absolute', 1, 1048567] + ]}, + 'index_segments_memory': { + 'options': [None, 'Indices Segments Memory Usage', 'MB', 'indices segments', + 'elastic.index_segments_memory', 'stacked'], + 'lines': [ + ['indices_segments_terms_memory_in_bytes', 'terms', 'absolute', 1, 1048567], + ['indices_segments_stored_fields_memory_in_bytes', 'stored fields', 'absolute', 1, 1048567], + ['indices_segments_term_vectors_memory_in_bytes', 'term vectors', 'absolute', 1, 1048567], + ['indices_segments_norms_memory_in_bytes', 'norms', 'absolute', 1, 1048567], + ['indices_segments_points_memory_in_bytes', 'points', 'absolute', 1, 1048567], + ['indices_segments_doc_values_memory_in_bytes', 'doc values', 'absolute', 1, 1048567], + ['indices_segments_version_map_memory_in_bytes', 'version map', 'absolute', 1, 1048567], + ['indices_segments_fixed_bit_set_memory_in_bytes', 'fixed bit set', 'absolute', 1, 1048567] + ]}, 'jvm_mem_heap': { - 'options': [None, 'JVM Heap Currently in Use/Committed', 'percent/MB', 'memory usage and gc', + 'options': [None, 'JVM Heap Percentage Currently in Use', 'percent', 'memory usage and gc', 'elastic.jvm_heap', 'area'], 'lines': [ - ['jvm_mem_heap_used_percent', 'inuse', 'absolute'], - ['jvm_mem_heap_committed_in_bytes', 'commit', 'absolute', -1, 1048576] + ['jvm_mem_heap_used_percent', 'inuse', 'absolute'] + ]}, + 'jvm_mem_heap_bytes': { + 'options': [None, 'JVM Heap Commit And Usage', 'MB', 'memory usage and gc', + 'elastic.jvm_heap_bytes', 'area'], + 'lines': [ + ['jvm_mem_heap_committed_in_bytes', 'commited', 'absolute', 1, 1048576], + ['jvm_mem_heap_used_in_bytes', 'used', 'absolute', 1, 1048576] + ]}, + 'jvm_buffer_pool_count': { + 'options': [None, 'JVM Buffers', 'count', 'memory usage and gc', + 'elastic.jvm_buffer_pool_count', 'line'], + 'lines': [ + ['jvm_buffer_pools_direct_count', 'direct', 'absolute'], + ['jvm_buffer_pools_mapped_count', 'mapped', 'absolute'] + ]}, + 'jvm_direct_buffers_memory': { + 'options': [None, 'JVM Direct Buffers Memory', 'MB', 'memory usage and gc', + 'elastic.jvm_direct_buffers_memory', 'area'], + 'lines': [ + ['jvm_buffer_pools_direct_used_in_bytes', 'used', 'absolute', 1, 1048567], + ['jvm_buffer_pools_direct_total_capacity_in_bytes', 'total capacity', 'absolute', 1, 1048567] + ]}, + 'jvm_mapped_buffers_memory': { + 'options': [None, 'JVM Mapped Buffers Memory', 'MB', 'memory usage and gc', + 'elastic.jvm_mapped_buffers_memory', 'area'], + 'lines': [ + ['jvm_buffer_pools_mapped_used_in_bytes', 'used', 'absolute', 1, 1048567], + ['jvm_buffer_pools_mapped_total_capacity_in_bytes', 'total capacity', 'absolute', 1, 1048567] ]}, 'jvm_gc_count': { 'options': [None, 'Garbage Collections', 'counts', 'memory usage and gc', 'elastic.gc_count', 'stacked'], diff --git a/python.d/haproxy.chart.py b/python.d/haproxy.chart.py index e72698d10..3061f5ef2 100644 --- a/python.d/haproxy.chart.py +++ b/python.d/haproxy.chart.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Description: haproxy netdata python.d module -# Author: l2isbad +# Author: l2isbad, ktarasz from collections import defaultdict from re import compile as re_compile @@ -20,8 +20,13 @@ priority = 60000 retries = 60 # charts order (can be overridden if you want less charts, or different order) -ORDER = ['fbin', 'fbout', 'fscur', 'fqcur', 'bbin', 'bbout', 'bscur', 'bqcur', - 'health_sdown', 'health_bdown', 'health_idle'] +ORDER = ['fbin', 'fbout', 'fscur', 'fqcur', + 'fhrsp_1xx', 'fhrsp_2xx', 'fhrsp_3xx', 'fhrsp_4xx', 'fhrsp_5xx', 'fhrsp_other', 'fhrsp_total', + 'bbin', 'bbout', 'bscur', 'bqcur', + 'bhrsp_1xx', 'bhrsp_2xx', 'bhrsp_3xx', 'bhrsp_4xx', 'bhrsp_5xx', 'bhrsp_other', 'bhrsp_total', + 'bqtime', 'bttime', 'brtime', 'bctime', + 'health_sup', 'health_sdown', 'health_bdown', 'health_idle'] + CHARTS = { 'fbin': { 'options': [None, "Kilobytes In", "KB/s", 'frontend', 'haproxy_f.bin', 'line'], @@ -39,6 +44,34 @@ CHARTS = { 'options': [None, "Session In Queue", "sessions", 'frontend', 'haproxy_f.qcur', 'line'], 'lines': [ ]}, + 'fhrsp_1xx': { + 'options': [None, "HTTP responses with 1xx code", "responses/s", 'frontend', 'haproxy_f.hrsp_1xx', 'line'], + 'lines': [ + ]}, + 'fhrsp_2xx': { + 'options': [None, "HTTP responses with 2xx code", "responses/s", 'frontend', 'haproxy_f.hrsp_2xx', 'line'], + 'lines': [ + ]}, + 'fhrsp_3xx': { + 'options': [None, "HTTP responses with 3xx code", "responses/s", 'frontend', 'haproxy_f.hrsp_3xx', 'line'], + 'lines': [ + ]}, + 'fhrsp_4xx': { + 'options': [None, "HTTP responses with 4xx code", "responses/s", 'frontend', 'haproxy_f.hrsp_4xx', 'line'], + 'lines': [ + ]}, + 'fhrsp_5xx': { + 'options': [None, "HTTP responses with 5xx code", "responses/s", 'frontend', 'haproxy_f.hrsp_5xx', 'line'], + 'lines': [ + ]}, + 'fhrsp_other': { + 'options': [None, "HTTP responses with other codes (protocol error)", "responses/s", 'frontend', 'haproxy_f.hrsp_other', 'line'], + 'lines': [ + ]}, + 'fhrsp_total': { + 'options': [None, "HTTP responses", "responses", 'frontend', 'haproxy_f.hrsp_total', 'line'], + 'lines': [ + ]}, 'bbin': { 'options': [None, "Kilobytes In", "KB/s", 'backend', 'haproxy_b.bin', 'line'], 'lines': [ @@ -55,11 +88,64 @@ CHARTS = { 'options': [None, "Sessions In Queue", "sessions", 'backend', 'haproxy_b.qcur', 'line'], 'lines': [ ]}, + 'bhrsp_1xx': { + 'options': [None, "HTTP responses with 1xx code", "responses/s", 'backend', 'haproxy_b.hrsp_1xx', 'line'], + 'lines': [ + ]}, + 'bhrsp_2xx': { + 'options': [None, "HTTP responses with 2xx code", "responses/s", 'backend', 'haproxy_b.hrsp_2xx', 'line'], + 'lines': [ + ]}, + 'bhrsp_3xx': { + 'options': [None, "HTTP responses with 3xx code", "responses/s", 'backend', 'haproxy_b.hrsp_3xx', 'line'], + 'lines': [ + ]}, + 'bhrsp_4xx': { + 'options': [None, "HTTP responses with 4xx code", "responses/s", 'backend', 'haproxy_b.hrsp_4xx', 'line'], + 'lines': [ + ]}, + 'bhrsp_5xx': { + 'options': [None, "HTTP responses with 5xx code", "responses/s", 'backend', 'haproxy_b.hrsp_5xx', 'line'], + 'lines': [ + ]}, + 'bhrsp_other': { + 'options': [None, "HTTP responses with other codes (protocol error)", "responses/s", 'backend', + 'haproxy_b.hrsp_other', 'line'], + 'lines': [ + ]}, + 'bhrsp_total': { + 'options': [None, "HTTP responses (total)", "responses/s", 'backend', 'haproxy_b.hrsp_total', 'line'], + 'lines': [ + ]}, + 'bqtime': { + 'options': [None, "The average queue time over the 1024 last requests", "ms", 'backend', 'haproxy_b.qtime', 'line'], + 'lines': [ + ]}, + 'bctime': { + 'options': [None, "The average connect time over the 1024 last requests", "ms", 'backend', + 'haproxy_b.ctime', 'line'], + 'lines': [ + ]}, + 'brtime': { + 'options': [None, "The average response time over the 1024 last requests", "ms", 'backend', + 'haproxy_b.rtime', 'line'], + 'lines': [ + ]}, + 'bttime': { + 'options': [None, "The average total session time over the 1024 last requests", "ms", 'backend', + 'haproxy_b.ttime', 'line'], + 'lines': [ + ]}, 'health_sdown': { 'options': [None, "Backend Servers In DOWN State", "failed servers", 'health', 'haproxy_hs.down', 'line'], 'lines': [ ]}, + 'health_sup': { + 'options': [None, "Backend Servers In UP State", "health servers", 'health', + 'haproxy_hs.up', 'line'], + 'lines': [ + ]}, 'health_bdown': { 'options': [None, "Is Backend Alive? 1 = DOWN", "failed backend", 'health', 'haproxy_hb.down', 'line'], 'lines': [ @@ -71,10 +157,27 @@ CHARTS = { ]} } + METRICS = {'bin': {'algorithm': 'incremental', 'divisor': 1024}, 'bout': {'algorithm': 'incremental', 'divisor': 1024}, 'scur': {'algorithm': 'absolute', 'divisor': 1}, - 'qcur': {'algorithm': 'absolute', 'divisor': 1}} + 'qcur': {'algorithm': 'absolute', 'divisor': 1}, + 'hrsp_1xx': {'algorithm': 'incremental', 'divisor': 1}, + 'hrsp_2xx': {'algorithm': 'incremental', 'divisor': 1}, + 'hrsp_3xx': {'algorithm': 'incremental', 'divisor': 1}, + 'hrsp_4xx': {'algorithm': 'incremental', 'divisor': 1}, + 'hrsp_5xx': {'algorithm': 'incremental', 'divisor': 1}, + 'hrsp_other': {'algorithm': 'incremental', 'divisor': 1}, + } + + +BACKEND_METRICS = { + 'qtime': {'algorithm': 'absolute', 'divisor': 1}, + 'ctime': {'algorithm': 'absolute', 'divisor': 1}, + 'rtime': {'algorithm': 'absolute', 'divisor': 1}, + 'ttime': {'algorithm': 'absolute', 'divisor': 1} +} + REGEX = dict(url=re_compile(r'idle = (?P<idle>[0-9]+)'), socket=re_compile(r'Idle_pct: (?P<idle>[0-9]+)')) @@ -139,11 +242,19 @@ class Service(UrlService, SocketService): for backend in self.data['backend']: name, idx = backend['# pxname'], backend['# pxname'].replace('.', '_') + stat_data['hsup_' + idx] = len([server for server in self.data['servers'] + if server_status(server, name, 'UP')]) stat_data['hsdown_' + idx] = len([server for server in self.data['servers'] - if server_down(server, name)]) + if server_status(server, name, 'DOWN')]) stat_data['hbdown_' + idx] = 1 if backend.get('status') == 'DOWN' else 0 + for metric in BACKEND_METRICS: + stat_data['_'.join(['backend', metric, idx])] = backend.get(metric) or 0 + hrsp_total = 0 for metric in METRICS: stat_data['_'.join(['backend', metric, idx])] = backend.get(metric) or 0 + if metric.startswith('hrsp_'): + hrsp_total += int(backend.get(metric) or 0) + stat_data['_'.join(['backend', 'hrsp_total', idx])] = hrsp_total return stat_data def _get_info_data(self, regex): @@ -173,12 +284,21 @@ class Service(UrlService, SocketService): self.definitions['f' + metric]['lines'].append(['_'.join(['frontend', metric, idx]), name, METRICS[metric]['algorithm'], 1, METRICS[metric]['divisor']]) + self.definitions['fhrsp_total']['lines'].append(['_'.join(['frontend', 'hrsp_total', idx]), + name, 'incremental', 1, 1]) for back in self.data['backend']: name, idx = back['# pxname'], back['# pxname'].replace('.', '_') for metric in METRICS: self.definitions['b' + metric]['lines'].append(['_'.join(['backend', metric, idx]), name, METRICS[metric]['algorithm'], 1, METRICS[metric]['divisor']]) + self.definitions['bhrsp_total']['lines'].append(['_'.join(['backend', 'hrsp_total', idx]), + name, 'incremental', 1, 1]) + for metric in BACKEND_METRICS: + self.definitions['b' + metric]['lines'].append(['_'.join(['backend', metric, idx]), + name, BACKEND_METRICS[metric]['algorithm'], 1, + BACKEND_METRICS[metric]['divisor']]) + self.definitions['health_sup']['lines'].append(['hsup_' + idx, name, 'absolute']) self.definitions['health_sdown']['lines'].append(['hsdown_' + idx, name, 'absolute']) self.definitions['health_bdown']['lines'].append(['hbdown_' + idx, name, 'absolute']) @@ -210,8 +330,8 @@ def parse_data_(data): return result or None -def server_down(server, backend_name): - return server.get('# pxname') == backend_name and server.get('status') == 'DOWN' +def server_status(server, backend_name, status='DOWN'): + return server.get('# pxname') == backend_name and server.get('status') == status def url_remove_params(url): diff --git a/python.d/httpcheck.chart.py b/python.d/httpcheck.chart.py new file mode 100644 index 000000000..b0177ff90 --- /dev/null +++ b/python.d/httpcheck.chart.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# Description: http check netdata python.d module +# Original Author: ccremer (github.com/ccremer) + +import urllib3 +import re + +try: + from time import monotonic as time +except ImportError: + from time import time + +from bases.FrameworkServices.UrlService import UrlService + +# default module values (can be overridden per job in `config`) +update_every = 3 +priority = 60000 +retries = 60 + +# Response +HTTP_RESPONSE_TIME = 'time' +HTTP_RESPONSE_LENGTH = 'length' + +# Status dimensions +HTTP_SUCCESS = 'success' +HTTP_BAD_CONTENT = 'bad_content' +HTTP_BAD_STATUS = 'bad_status' +HTTP_TIMEOUT = 'timeout' +HTTP_NO_CONNECTION = 'no_connection' + +ORDER = ['response_time', 'response_length', 'status'] + +CHARTS = { + 'response_time': { + 'options': [None, 'HTTP response time', 'ms', 'response', 'httpcheck.responsetime', 'line'], + 'lines': [ + [HTTP_RESPONSE_TIME, 'time', 'absolute', 100, 1000] + ]}, + 'response_length': { + 'options': [None, 'HTTP response body length', 'characters', 'response', 'httpcheck.responselength', 'line'], + 'lines': [ + [HTTP_RESPONSE_LENGTH, 'length', 'absolute'] + ]}, + 'status': { + 'options': [None, 'HTTP status', 'boolean', 'status', 'httpcheck.status', 'line'], + 'lines': [ + [HTTP_SUCCESS, 'success', 'absolute'], + [HTTP_BAD_CONTENT, 'bad content', 'absolute'], + [HTTP_BAD_STATUS, 'bad status', 'absolute'], + [HTTP_TIMEOUT, 'timeout', 'absolute'], + [HTTP_NO_CONNECTION, 'no connection', 'absolute'] + ]} +} + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + pattern = self.configuration.get('regex') + self.regex = re.compile(pattern) if pattern else None + self.status_codes_accepted = self.configuration.get('status_accepted', [200]) + self.follow_redirect = self.configuration.get('redirect', True) + self.order = ORDER + self.definitions = CHARTS + + def _get_data(self): + """ + Format data received from http request + :return: dict + """ + data = dict() + data[HTTP_SUCCESS] = 0 + data[HTTP_BAD_CONTENT] = 0 + data[HTTP_BAD_STATUS] = 0 + data[HTTP_TIMEOUT] = 0 + data[HTTP_NO_CONNECTION] = 0 + url = self.url + try: + start = time() + status, content = self._get_raw_data_with_status(retries=1 if self.follow_redirect else False, + redirect=self.follow_redirect) + diff = time() - start + data[HTTP_RESPONSE_TIME] = max(round(diff * 10000), 0) + self.debug('Url: {url}. Host responded with status code {code} in {diff} s'.format( + url=url, code=status, diff=diff + )) + self.process_response(content, data, status) + + except urllib3.exceptions.NewConnectionError as error: + self.debug("Connection failed: {url}. Error: {error}".format(url=url, error=error)) + data[HTTP_NO_CONNECTION] = 1 + + except (urllib3.exceptions.TimeoutError, urllib3.exceptions.PoolError) as error: + self.debug("Connection timed out: {url}. Error: {error}".format(url=url, error=error)) + data[HTTP_TIMEOUT] = 1 + + except urllib3.exceptions.HTTPError as error: + self.debug("Connection failed: {url}. Error: {error}".format(url=url, error=error)) + data[HTTP_NO_CONNECTION] = 1 + + except (TypeError, AttributeError) as error: + self.error('Url: {url}. Error: {error}'.format(url=url, error=error)) + return None + + return data + + def process_response(self, content, data, status): + data[HTTP_RESPONSE_LENGTH] = len(content) + self.debug('Content: \n\n{content}\n'.format(content=content)) + if status in self.status_codes_accepted: + if self.regex and self.regex.search(content) is None: + self.debug("No match for regex '{regex}' found".format(regex=self.regex.pattern)) + data[HTTP_BAD_CONTENT] = 1 + else: + data[HTTP_SUCCESS] = 1 + else: + data[HTTP_BAD_STATUS] = 1 diff --git a/python.d/icecast.chart.py b/python.d/icecast.chart.py new file mode 100644 index 000000000..792b99f3f --- /dev/null +++ b/python.d/icecast.chart.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +# Description: icecast netdata python.d module +# Author: Ilya Mashchenko (l2isbad) + +import json + +from bases.FrameworkServices.UrlService import UrlService + + +priority = 60000 +retries = 60 + +# charts order (can be overridden if you want less charts, or different order) +ORDER = ['listeners'] + +CHARTS = { + 'listeners': { + 'options': [None, 'Number Of Listeners', 'listeners', + 'listeners', 'icecast.listeners', 'line'], + 'lines': [ + ]} +} + + +class Source: + def __init__(self, idx, data): + self.name = 'source_{0}'.format(idx) + self.is_active = data.get('stream_start') and data.get('server_name') + self.listeners = data['listeners'] + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.url = self.configuration.get('url') + self._manager = self._build_manager() + + def check(self): + """ + Add active sources to the "listeners" chart + :return: bool + """ + sources = self.get_sources() + if not sources: + return None + + active_sources = 0 + for idx, raw_source in enumerate(sources): + if Source(idx, raw_source).is_active: + active_sources += 1 + dim_id = 'source_{0}'.format(idx) + dim = 'source {0}'.format(idx) + self.definitions['listeners']['lines'].append([dim_id, dim]) + + return bool(active_sources) + + def _get_data(self): + """ + Get number of listeners for every source + :return: dict + """ + sources = self.get_sources() + if not sources: + return None + + data = dict() + + for idx, raw_source in enumerate(sources): + source = Source(idx, raw_source) + data[source.name] = source.listeners + + return data + + def get_sources(self): + """ + Format data received from http request and return list of sources + :return: list + """ + + raw_data = self._get_raw_data() + if not raw_data: + return None + + try: + data = json.loads(raw_data) + except ValueError as error: + self.error("JSON decode error:", error) + return None + + return data['icestats'].get('source') diff --git a/python.d/isc_dhcpd.chart.py b/python.d/isc_dhcpd.chart.py index 60995342c..eb6338452 100644 --- a/python.d/isc_dhcpd.chart.py +++ b/python.d/isc_dhcpd.chart.py @@ -2,165 +2,192 @@ # Description: isc dhcpd lease netdata python.d module # Author: l2isbad -from time import mktime, strptime, gmtime, time -from os import stat, access, R_OK -from os.path import isfile -try: - from ipaddress import ip_network, ip_address - HAVE_IPADDRESS = True -except ImportError: - HAVE_IPADDRESS = False +import os +import re +import time + + try: - from itertools import filterfalse + import ipaddress + HAVE_IP_ADDRESS = True except ImportError: - from itertools import ifilterfalse as filterfalse + HAVE_IP_ADDRESS = False + +from collections import defaultdict +from copy import deepcopy from bases.FrameworkServices.SimpleService import SimpleService priority = 60000 retries = 60 -update_every = 5 -ORDER = ['pools_utilization', 'pools_active_leases', 'leases_total', 'parse_time', 'leases_size'] +ORDER = ['pools_utilization', 'pools_active_leases', 'leases_total'] CHARTS = { 'pools_utilization': { - 'options': [None, 'Pools Utilization', 'used in percent', 'utilization', + 'options': [None, 'Pools Utilization', '%', 'utilization', 'isc_dhcpd.utilization', 'line'], 'lines': []}, 'pools_active_leases': { - 'options': [None, 'Active Leases', 'leases per pool', 'active leases', + 'options': [None, 'Active Leases Per Pool', 'leases', 'active leases', 'isc_dhcpd.active_leases', 'line'], 'lines': []}, 'leases_total': { - 'options': [None, 'Total All Pools', 'number', 'active leases', + 'options': [None, 'All Active Leases', 'leases', 'active leases', 'isc_dhcpd.leases_total', 'line'], - 'lines': [['leases_total', 'leases', 'absolute']]}, - 'parse_time': { - 'options': [None, 'Parse Time', 'ms', 'parse stats', - 'isc_dhcpd.parse_time', 'line'], - 'lines': [['parse_time', 'time', 'absolute']]}, - 'leases_size': { - 'options': [None, 'Dhcpd Leases File Size', 'kilobytes', - 'parse stats', 'isc_dhcpd.leases_size', 'line'], - 'lines': [['leases_size', 'size', 'absolute', 1, 1024]]}} + 'lines': [['leases_total', 'leases', 'absolute']], + 'variables': [ + ['leases_size'] + ] + } +} + + +class DhcpdLeasesFile: + def __init__(self, path): + self.path = path + self.mod_time = 0 + self.size = 0 + + def is_valid(self): + return os.path.isfile(self.path) and os.access(self.path, os.R_OK) + + def is_changed(self): + mod_time = os.path.getmtime(self.path) + if mod_time != self.mod_time: + self.mod_time = mod_time + self.size = int(os.path.getsize(self.path) / 1024) + return True + return False + + def get_data(self): + try: + with open(self.path) as leases: + result = defaultdict(dict) + for row in leases: + row = row.strip() + if row.startswith('lease'): + address = row[6:-2] + elif row.startswith('iaaddr'): + address = row[7:-2] + elif row.startswith('ends'): + result[address]['ends'] = row[5:-1] + elif row.startswith('binding state'): + result[address]['state'] = row[14:-1] + return dict((k, v) for k, v in result.items() if len(v) == 2) + except (OSError, IOError): + return None + + +class Pool: + def __init__(self, name, network): + self.id = re.sub(r'[:/.-]+', '_', name) + self.name = name + self.network = ipaddress.ip_network(address=u'%s' % network) + + def num_hosts(self): + return self.network.num_addresses - 2 + + def __contains__(self, item): + return item.address in self.network + + +class Lease: + def __init__(self, address, ends, state): + self.address = ipaddress.ip_address(address=u'%s' % address) + self.ends = ends + self.state = state + + def is_active(self, current_time): + # lease_end_time might be epoch + if self.ends.startswith('epoch'): + epoch = int(self.ends.split()[1].replace(';', '')) + return epoch - current_time > 0 + # max. int for lease-time causes lease to expire in year 2038. + # dhcpd puts 'never' in the ends section of active lease + elif self.ends == 'never': + return True + return time.mktime(time.strptime(self.ends, '%w %Y/%m/%d %H:%M:%S')) - current_time > 0 + + def is_valid(self): + return self.state == 'active' class Service(SimpleService): def __init__(self, configuration=None, name=None): SimpleService.__init__(self, configuration=configuration, name=name) - self.leases_path = self.configuration.get('leases_path', '/var/lib/dhcp/dhcpd.leases') self.order = ORDER - self.definitions = CHARTS - self.pools = dict() + self.definitions = deepcopy(CHARTS) + + lease_path = self.configuration.get('leases_path', '/var/lib/dhcp/dhcpd.leases') + self.dhcpd_leases = DhcpdLeasesFile(path=lease_path) + self.pools = list() + self.data = dict() # Will work only with 'default' db-time-format (weekday year/month/day hour:minute:second) # TODO: update algorithm to parse correctly 'local' db-time-format - # Also only ipv4 supported def check(self): - if not HAVE_IPADDRESS: - self.error('\'python-ipaddress\' module is needed') + if not HAVE_IP_ADDRESS: + self.error("'python-ipaddress' module is needed") return False - if not (isfile(self.leases_path) and access(self.leases_path, R_OK)): - self.error('Make sure leases_path is correct and leases log file is readable by netdata') + + if not self.dhcpd_leases.is_valid(): + self.error("Make sure '{path}' is exist and readable by netdata".format(path=self.dhcpd_leases.path)) return False - if not self.configuration.get('pools'): + + pools = self.configuration.get('pools') + if not pools: self.error('Pools are not defined') return False - if not isinstance(self.configuration['pools'], dict): - self.error('Invalid \'pools\' format') - return False - for pool in self.configuration['pools']: + for pool in pools: try: - net = ip_network(u'%s' % self.configuration['pools'][pool]) - self.pools[pool] = dict(net=net, num_hosts=net.num_addresses - 2) + new_pool = Pool(name=pool, network=pools[pool]) except ValueError as error: - self.error('%s removed, error: %s' % (self.configuration['pools'][pool], error)) + self.error("'{pool}' was removed, error: {error}".format(pool=pools[pool], error=error)) + else: + self.pools.append(new_pool) - if not self.pools: - return False self.create_charts() - return True + return bool(self.pools) - def _get_raw_data(self): - """ - Parses log file - :return: tuple( - [ipaddress, lease end time, ...], - time to parse leases file - ) - """ - try: - with open(self.leases_path) as leases: - time_start = time() - part1 = filterfalse(find_lease, leases) - part2 = filterfalse(find_ends, leases) - result = dict(zip(part1, part2)) - time_end = time() - file_parse_time = round((time_end - time_start) * 1000) - return result, file_parse_time - except (OSError, IOError) as error: - self.error("Failed to parse leases file:", str(error)) - return None - - def _get_data(self): + def get_data(self): """ :return: dict """ - raw_data = self._get_raw_data() - if not raw_data: - return None + if not self.dhcpd_leases.is_changed(): + return self.data - raw_leases, parse_time = raw_data[0], raw_data[1] + raw_leases = self.dhcpd_leases.get_data() + if not raw_leases: + self.data = dict() + return None - # Result: {ipaddress: end lease time, ...} - active_leases, to_netdata = list(), dict() - current_time = mktime(gmtime()) + active_leases = list() + current_time = time.mktime(time.gmtime()) - for ip, lease_end_time in raw_leases.items(): - # Result: [active binding, active binding....]. (Expire time (ends date;) - current time > 0) - if binding_active(lease_end_time=lease_end_time[7:-2], - current_time=current_time): - active_leases.append(ip_address(u'%s' % ip[6:-3])) + for address in raw_leases: + try: + new_lease = Lease(address, **raw_leases[address]) + except ValueError: + continue + else: + if new_lease.is_active(current_time) and new_lease.is_valid(): + active_leases.append(new_lease) for pool in self.pools: - dim_id = pool.replace('.', '_') - pool_leases_count = len([ip for ip in active_leases if ip in self.pools[pool]['net']]) - to_netdata[dim_id + '_active_leases'] = pool_leases_count - to_netdata[dim_id + '_utilization'] = float(pool_leases_count) / self.pools[pool]['num_hosts'] * 10000 + count = len([ip for ip in active_leases if ip in pool]) + self.data[pool.id + '_active_leases'] = count + self.data[pool.id + '_utilization'] = float(count) / pool.num_hosts() * 10000 + + self.data['leases_size'] = self.dhcpd_leases.size + self.data['leases_total'] = len(active_leases) - to_netdata['leases_total'] = len(active_leases) - to_netdata['leases_size'] = stat(self.leases_path)[6] - to_netdata['parse_time'] = parse_time - return to_netdata + return self.data def create_charts(self): for pool in self.pools: - dim, dim_id = pool, pool.replace('.', '_') - self.definitions['pools_utilization']['lines'].append([dim_id + '_utilization', - dim, 'absolute', 1, 100]) - self.definitions['pools_active_leases']['lines'].append([dim_id + '_active_leases', - dim, 'absolute']) - - -def binding_active(lease_end_time, current_time): - # lease_end_time might be epoch - if lease_end_time.startswith('epoch'): - epoch = int(lease_end_time.split()[1].replace(';','')) - return epoch - current_time > 0 - # max. int for lease-time causes lease to expire in year 2038. - # dhcpd puts 'never' in the ends section of active lease - elif lease_end_time == 'never': - return True - else: - return mktime(strptime(lease_end_time, '%w %Y/%m/%d %H:%M:%S')) - current_time > 0 - - -def find_lease(value): - return value[0:3] != 'lea' - - -def find_ends(value): - return value[2:6] != 'ends' + self.definitions['pools_utilization']['lines'].append([pool.id + '_utilization', pool.name, + 'absolute', 1, 100]) + self.definitions['pools_active_leases']['lines'].append([pool.id + '_active_leases', pool.name]) diff --git a/python.d/mdstat.chart.py b/python.d/mdstat.chart.py index 794d25641..35ba9058f 100644 --- a/python.d/mdstat.chart.py +++ b/python.d/mdstat.chart.py @@ -2,8 +2,9 @@ # Description: mdstat netdata python.d module # Author: l2isbad +import re + from collections import defaultdict -from re import compile as re_compile from bases.FrameworkServices.SimpleService import SimpleService @@ -11,28 +12,96 @@ priority = 60000 retries = 60 update_every = 1 +ORDER = ['mdstat_health'] +CHARTS = { + 'mdstat_health': { + 'options': [None, 'Faulty Devices In MD', 'failed disks', 'health', 'md.health', 'line'], + 'lines': list() + } +} + OPERATIONS = ('check', 'resync', 'reshape', 'recovery', 'finish', 'speed') +RE_DISKS = re.compile(r' (?P<array>[a-zA-Z_0-9]+) : active .+\[' + r'(?P<total_disks>[0-9]+)/' + r'(?P<inuse_disks>[0-9]+)\]') + +RE_STATUS = re.compile(r' (?P<array>[a-zA-Z_0-9]+) : active .+ ' + r'(?P<operation>[a-z]+) =[ ]{1,2}' + r'(?P<operation_status>[0-9.]+).+finish=' + r'(?P<finish>([0-9.]+))min speed=' + r'(?P<speed>[0-9]+)') + + +def md_charts(md): + order = ['{0}_disks'.format(md.name), + '{0}_operation'.format(md.name), + '{0}_finish'.format(md.name), + '{0}_speed'.format(md.name) + ] + + charts = dict() + charts[order[0]] = { + 'options': [None, 'Disks Stats', 'disks', md.name, 'md.disks', 'stacked'], + 'lines': [ + ['{0}_total_disks'.format(md.name), 'total', 'absolute'], + ['{0}_inuse_disks'.format(md.name), 'inuse', 'absolute'] + ] + } + + charts['_'.join([md.name, 'operation'])] = { + 'options': [None, 'Current Status', 'percent', md.name, 'md.status', 'line'], + 'lines': [ + ['{0}_resync'.format(md.name), 'resync', 'absolute', 1, 100], + ['{0}_recovery'.format(md.name), 'recovery', 'absolute', 1, 100], + ['{0}_reshape'.format(md.name), 'reshape', 'absolute', 1, 100], + ['{0}_check'.format(md.name), 'check', 'absolute', 1, 100] + ] + } + + charts['_'.join([md.name, 'finish'])] = { + 'options': [None, 'Approximate Time Until Finish', 'seconds', md.name, 'md.rate', 'line'], + 'lines': [ + ['{0}_finish'.format(md.name), 'finish in', 'absolute', 1, 1000] + ] + } + + charts['_'.join([md.name, 'speed'])] = { + 'options': [None, 'Operation Speed', 'KB/s', md.name, 'md.rate', 'line'], + 'lines': [ + ['{0}_speed'.format(md.name), 'speed', 'absolute', 1, 1000] + ] + } + + return order, charts + + +class MD: + def __init__(self, name, stats): + self.name = name + self.stats = stats + + def update_stats(self, stats): + self.stats = stats + + def data(self): + stats = dict(('_'.join([self.name, k]), v) for k, v in self.stats.items()) + stats['{0}_health'.format(self.name)] = int(self.stats['total_disks']) - int(self.stats['inuse_disks']) + return stats + class Service(SimpleService): def __init__(self, configuration=None, name=None): SimpleService.__init__(self, configuration=configuration, name=name) - self.regex = dict(disks=re_compile(r' (?P<array>[a-zA-Z_0-9]+) : active .+\[' - r'(?P<total_disks>[0-9]+)/' - r'(?P<inuse_disks>[0-9]+)\]'), - status=re_compile(r' (?P<array>[a-zA-Z_0-9]+) : active .+ ' - r'(?P<operation>[a-z]+) =[ ]{1,2}' - r'(?P<operation_status>[0-9.]+).+finish=' - r'(?P<finish>([0-9.]+))min speed=' - r'(?P<speed>[0-9]+)')) + self.order = ORDER + self.definitions = CHARTS + self.mds = dict() def check(self): - arrays = find_arrays(self._get_raw_data(), self.regex) + arrays = find_arrays(self._get_raw_data()) if not arrays: self.error('Failed to read data from /proc/mdstat or there is no active arrays') return None - - self.order, self.definitions = create_charts(arrays.keys()) return True @staticmethod @@ -47,25 +116,44 @@ class Service(SimpleService): except (OSError, IOError): return None - def _get_data(self): + def get_data(self): """ Parse data from _get_raw_data() :return: dict """ - raw_data = self._get_raw_data() - arrays = find_arrays(raw_data, self.regex) + arrays = find_arrays(self._get_raw_data()) if not arrays: return None - to_netdata = dict() + data = dict() for array, values in arrays.items(): - for key, value in values.items(): - to_netdata['_'.join([array, key])] = value - return to_netdata + if array not in self.mds: + md = MD(array, values) + self.mds[md.name] = md + self.create_new_array_charts(md) + else: + md = self.mds[array] + md.update_stats(values) + + data.update(md.data()) + + return data + def create_new_array_charts(self, md): + order, charts = md_charts(md) -def find_arrays(raw_data, regex): + self.charts['mdstat_health'].add_dimension(['{0}_health'.format(md.name), md.name]) + for chart_name in order: + params = [chart_name] + charts[chart_name]['options'] + dimensions = charts[chart_name]['lines'] + + new_chart = self.charts.add_chart(params) + for dimension in dimensions: + new_chart.add_dimension(dimension) + + +def find_arrays(raw_data): if raw_data is None: return None data = defaultdict(str) @@ -79,49 +167,23 @@ def find_arrays(raw_data, regex): arrays = dict() for value in data.values(): - match = regex['disks'].search(value) + match = RE_DISKS.search(value) if not match: continue match = match.groupdict() array = match.pop('array') arrays[array] = match - arrays[array]['health'] = int(match['total_disks']) - int(match['inuse_disks']) for operation in OPERATIONS: arrays[array][operation] = 0 - match = regex['status'].search(value) + match = RE_STATUS.search(value) if match: match = match.groupdict() if match['operation'] in OPERATIONS: + arrays[array]['operation'] = match['operation'] arrays[array][match['operation']] = float(match['operation_status']) * 100 - arrays[array]['finish'] = float(match['finish']) * 100 - arrays[array]['speed'] = float(match['speed']) / 1000 * 100 + arrays[array]['finish'] = float(match['finish']) * 1000 * 60 + arrays[array]['speed'] = float(match['speed']) * 1000 return arrays or None - - -def create_charts(arrays): - order = ['mdstat_health'] - definitions = dict(mdstat_health={'options': [None, 'Faulty devices in MD', 'failed disks', - 'health', 'md.health', 'line'], - 'lines': []}) - for md in arrays: - order.append(md) - order.append(md + '_status') - order.append(md + '_rate') - definitions['mdstat_health']['lines'].append([md + '_health', md, 'absolute']) - definitions[md] = {'options': [None, '%s disks stats' % md, 'disks', md, 'md.disks', 'stacked'], - 'lines': [[md + '_total_disks', 'total', 'absolute'], - [md + '_inuse_disks', 'inuse', 'absolute']]} - definitions[md + '_status'] = {'options': [None, '%s current status' % md, - 'percent', md, 'md.status', 'line'], - 'lines': [[md + '_resync', 'resync', 'absolute', 1, 100], - [md + '_recovery', 'recovery', 'absolute', 1, 100], - [md + '_reshape', 'reshape', 'absolute', 1, 100], - [md + '_check', 'check', 'absolute', 1, 100]]} - definitions[md + '_rate'] = {'options': [None, '%s operation status' % md, - 'rate', md, 'md.rate', 'line'], - 'lines': [[md + '_finish', 'finish min', 'absolute', 1, 100], - [md + '_speed', 'MB/s', 'absolute', -1, 100]]} - return order, definitions diff --git a/python.d/nginx_plus.chart.py b/python.d/nginx_plus.chart.py new file mode 100644 index 000000000..509ddd380 --- /dev/null +++ b/python.d/nginx_plus.chart.py @@ -0,0 +1,491 @@ +# -*- coding: utf-8 -*- +# Description: nginx_plus netdata python.d module +# Author: Ilya Mashchenko (l2isbad) + +import re + +from collections import defaultdict +from copy import deepcopy +from json import loads + +try: + from collections import OrderedDict +except ImportError: + from third_party.ordereddict import OrderedDict + +from bases.FrameworkServices.UrlService import UrlService + +# default module values (can be overridden per job in `config`) +update_every = 1 +priority = 60000 +retries = 60 + +# charts order (can be overridden if you want less charts, or different order) +ORDER = ['requests_total', 'requests_current', + 'connections_statistics', 'connections_workers', + 'ssl_handshakes', 'ssl_session_reuses', 'ssl_memory_usage', + 'processes'] + +CHARTS = { + 'requests_total': { + 'options': [None, 'Requests Total', 'requests/s', + 'requests', 'nginx_plus.requests_total', 'line'], + 'lines': [ + ['requests_total', 'total', 'incremental'] + ]}, + 'requests_current': { + 'options': [None, 'Requests Current', 'requests', + 'requests', 'nginx_plus.requests_current', 'line'], + 'lines': [ + ['requests_current', 'current'] + ]}, + 'connections_statistics': { + 'options': [None, 'Connections Statistics', 'connections/s', + 'connections', 'nginx_plus.connections_statistics', 'stacked'], + 'lines': [ + ['connections_accepted', 'accepted', 'incremental'], + ['connections_dropped', 'dropped', 'incremental'] + ]}, + 'connections_workers': { + 'options': [None, 'Workers Statistics', 'workers', + 'connections', 'nginx_plus.connections_workers', 'stacked'], + 'lines': [ + ['connections_idle', 'idle'], + ['connections_active', 'active'] + ]}, + 'ssl_handshakes': { + 'options': [None, 'SSL Handshakes', 'handshakes/s', + 'ssl', 'nginx_plus.ssl_handshakes', 'stacked'], + 'lines': [ + ['ssl_handshakes', 'successful', 'incremental'], + ['ssl_handshakes_failed', 'failed', 'incremental'] + ]}, + 'ssl_session_reuses': { + 'options': [None, 'Session Reuses', 'sessions/s', + 'ssl', 'nginx_plus.ssl_session_reuses', 'line'], + 'lines': [ + ['ssl_session_reuses', 'reused', 'incremental'] + ]}, + 'ssl_memory_usage': { + 'options': [None, 'Memory Usage', '%', + 'ssl', 'nginx_plus.ssl_memory_usage', 'area'], + 'lines': [ + ['ssl_memory_usage', 'usage', 'absolute', 1, 100] + ]}, + 'processes': { + 'options': [None, 'Processes', 'processes', + 'processes', 'nginx_plus.processes', 'line'], + 'lines': [ + ['processes_respawned', 'respawned'] + ]} +} + + +def cache_charts(cache): + family = 'cache {0}'.format(cache.real_name) + charts = OrderedDict() + + charts['{0}_traffic'.format(cache.name)] = { + 'options': [None, 'Traffic', 'KB', family, + 'nginx_plus.cache_traffic', 'stacked'], + 'lines': [ + ['_'.join([cache.name, 'hit_bytes']), 'served', 'absolute', 1, 1024], + ['_'.join([cache.name, 'miss_bytes_written']), 'written', 'absolute', 1, 1024], + ['_'.join([cache.name, 'miss_bytes']), 'bypass', 'absolute', 1, 1024] + ] + } + charts['{0}_memory_usage'.format(cache.name)] = { + 'options': [None, 'Memory Usage', '%', family, + 'nginx_plus.cache_memory_usage', 'area'], + 'lines': [ + ['_'.join([cache.name, 'memory_usage']), 'usage', 'absolute', 1, 100], + ] + } + return charts + + +def web_zone_charts(wz): + charts = OrderedDict() + family = 'web zone {name}'.format(name=wz.real_name) + + # Processing + charts['zone_{name}_processing'.format(name=wz.name)] = { + 'options': [None, 'Zone "{name}" Processing'.format(name=wz.name), 'requests', family, + 'nginx_plus.web_zone_processing', 'line'], + 'lines': [ + ['_'.join([wz.name, 'processing']), 'processing'] + ] + } + # Requests + charts['zone_{name}_requests'.format(name=wz.name)] = { + 'options': [None, 'Zone "{name}" Requests'.format(name=wz.name), 'requests/s', family, + 'nginx_plus.web_zone_requests', 'line'], + 'lines': [ + ['_'.join([wz.name, 'requests']), 'requests', 'incremental'] + ] + } + # Response Codes + charts['zone_{name}_responses'.format(name=wz.name)] = { + 'options': [None, 'Zone "{name}" Responses'.format(name=wz.name), 'requests/s', family, + 'nginx_plus.web_zone_responses', 'stacked'], + 'lines': [ + ['_'.join([wz.name, 'responses_2xx']), '2xx', 'incremental'], + ['_'.join([wz.name, 'responses_5xx']), '5xx', 'incremental'], + ['_'.join([wz.name, 'responses_3xx']), '3xx', 'incremental'], + ['_'.join([wz.name, 'responses_4xx']), '4xx', 'incremental'], + ['_'.join([wz.name, 'responses_1xx']), '1xx', 'incremental'] + ] + } + # Traffic + charts['zone_{name}_net'.format(name=wz.name)] = { + 'options': [None, 'Zone "{name}" Traffic'.format(name=wz.name), 'kilobits/s', family, + 'nginx_plus.zone_net', 'area'], + 'lines': [ + ['_'.join([wz.name, 'received']), 'received', 'incremental', 1, 1000], + ['_'.join([wz.name, 'sent']), 'sent', 'incremental', -1, 1000] + ] + } + return charts + + +def web_upstream_charts(wu): + def dimensions(value, a='absolute', m=1, d=1): + dims = list() + for p in wu: + dims.append(['_'.join([wu.name, p.server, value]), p.real_server, a, m, d]) + return dims + + charts = OrderedDict() + family = 'web upstream {name}'.format(name=wu.real_name) + + # Requests + charts['web_upstream_{name}_requests'.format(name=wu.name)] = { + 'options': [None, 'Peers Requests', 'requests/s', family, + 'nginx_plus.web_upstream_requests', 'line'], + 'lines': dimensions('requests', 'incremental') + } + # Responses Codes + charts['web_upstream_{name}_all_responses'.format(name=wu.name)] = { + 'options': [None, 'All Peers Responses', 'responses/s', family, + 'nginx_plus.web_upstream_all_responses', 'stacked'], + 'lines': [ + ['_'.join([wu.name, 'responses_2xx']), '2xx', 'incremental'], + ['_'.join([wu.name, 'responses_5xx']), '5xx', 'incremental'], + ['_'.join([wu.name, 'responses_3xx']), '3xx', 'incremental'], + ['_'.join([wu.name, 'responses_4xx']), '4xx', 'incremental'], + ['_'.join([wu.name, 'responses_1xx']), '1xx', 'incremental'], + ] + } + for peer in wu: + charts['web_upstream_{0}_{1}_responses'.format(wu.name, peer.id)] = { + 'options': [None, 'Peer "{0}" Responses'.format(peer.real_server), 'responses/s', family, + 'nginx_plus.web_upstream_peer_responses', 'stacked'], + 'lines': [ + ['_'.join([wu.name, peer.server, 'responses_2xx']), '2xx', 'incremental'], + ['_'.join([wu.name, peer.server, 'responses_5xx']), '5xx', 'incremental'], + ['_'.join([wu.name, peer.server, 'responses_3xx']), '3xx', 'incremental'], + ['_'.join([wu.name, peer.server, 'responses_4xx']), '4xx', 'incremental'], + ['_'.join([wu.name, peer.server, 'responses_1xx']), '1xx', 'incremental'] + ] + } + # Connections + charts['web_upstream_{name}_connections'.format(name=wu.name)] = { + 'options': [None, 'Peers Connections', 'active', family, + 'nginx_plus.web_upstream_connections', 'line'], + 'lines': dimensions('active') + } + charts['web_upstream_{name}_connections_usage'.format(name=wu.name)] = { + 'options': [None, 'Peers Connections Usage', '%', family, + 'nginx_plus.web_upstream_connections_usage', 'line'], + 'lines': dimensions('connections_usage', d=100) + } + # Traffic + charts['web_upstream_{0}_all_net'.format(wu.name)] = { + 'options': [None, 'All Peers Traffic', 'kilobits/s', family, + 'nginx_plus.web_upstream_all_net', 'area'], + 'lines': [ + ['{0}_received'.format(wu.name), 'received', 'incremental', 1, 1000], + ['{0}_sent'.format(wu.name), 'sent', 'incremental', -1, 1000] + ] + } + for peer in wu: + charts['web_upstream_{0}_{1}_net'.format(wu.name, peer.id)] = { + 'options': [None, 'Peer "{0}" Traffic'.format(peer.real_server), 'kilobits/s', family, + 'nginx_plus.web_upstream_peer_traffic', 'area'], + 'lines': [ + ['{0}_{1}_received'.format(wu.name, peer.server), 'received', 'incremental', 1, 1000], + ['{0}_{1}_sent'.format(wu.name, peer.server), 'sent', 'incremental', -1, 1000] + ] + } + # Response Time + for peer in wu: + charts['web_upstream_{0}_{1}_timings'.format(wu.name, peer.id)] = { + 'options': [None, 'Peer "{0}" Timings'.format(peer.real_server), 'ms', family, + 'nginx_plus.web_upstream_peer_timings', 'line'], + 'lines': [ + ['_'.join([wu.name, peer.server, 'header_time']), 'header'], + ['_'.join([wu.name, peer.server, 'response_time']), 'response'] + ] + } + # Memory Usage + charts['web_upstream_{name}_memory_usage'.format(name=wu.name)] = { + 'options': [None, 'Memory Usage', '%', family, + 'nginx_plus.web_upstream_memory_usage', 'area'], + 'lines': [ + ['_'.join([wu.name, 'memory_usage']), 'usage', 'absolute', 1, 100] + ] + } + # State + charts['web_upstream_{name}_status'.format(name=wu.name)] = { + 'options': [None, 'Peers Status', 'state', family, + 'nginx_plus.web_upstream_status', 'line'], + 'lines': dimensions('state') + } + # Downtime + charts['web_upstream_{name}_downtime'.format(name=wu.name)] = { + 'options': [None, 'Peers Downtime', 'seconds', family, + 'nginx_plus.web_upstream_peer_downtime', 'line'], + 'lines': dimensions('downtime', d=1000) + } + + return charts + + +METRICS = dict( + SERVER=[ + 'processes.respawned', + 'connections.accepted', + 'connections.dropped', + 'connections.active', + 'connections.idle', + 'ssl.handshakes', + 'ssl.handshakes_failed', + 'ssl.session_reuses', + 'requests.total', + 'requests.current', + 'slabs.SSL.pages.free', + 'slabs.SSL.pages.used' + ], + WEB_ZONE=[ + 'processing', + 'requests', + 'responses.1xx', + 'responses.2xx', + 'responses.3xx', + 'responses.4xx', + 'responses.5xx', + 'discarded', + 'received', + 'sent' + ], + WEB_UPSTREAM_PEER=[ + 'id', + 'server', + 'name', + 'state', + 'active', + 'max_conns', + 'requests', + 'header_time', # alive only + 'response_time', # alive only + 'responses.1xx', + 'responses.2xx', + 'responses.3xx', + 'responses.4xx', + 'responses.5xx', + 'sent', + 'received', + 'downtime' + ], + WEB_UPSTREAM_SUMMARY=[ + 'responses.1xx', + 'responses.2xx', + 'responses.3xx', + 'responses.4xx', + 'responses.5xx', + 'sent', + 'received' + ], + CACHE=[ + 'hit.bytes', # served + 'miss.bytes_written', # written + 'miss.bytes' # bypass + + ] +) + +BAD_SYMBOLS = re.compile(r'[:/.-]+') + + +class Cache: + key = 'caches' + charts = cache_charts + + def __init__(self, **kw): + self.real_name = kw['name'] + self.name = BAD_SYMBOLS.sub('_', self.real_name) + + def memory_usage(self, data): + used = data['slabs'][self.real_name]['pages']['used'] + free = data['slabs'][self.real_name]['pages']['free'] + return used / float(free + used) * 1e4 + + def get_data(self, raw_data): + zone_data = raw_data['caches'][self.real_name] + data = parse_json(zone_data, METRICS['CACHE']) + data['memory_usage'] = self.memory_usage(raw_data) + return dict(('_'.join([self.name, k]), v) for k, v in data.items()) + + +class WebZone: + key = 'server_zones' + charts = web_zone_charts + + def __init__(self, **kw): + self.real_name = kw['name'] + self.name = BAD_SYMBOLS.sub('_', self.real_name) + + def get_data(self, raw_data): + zone_data = raw_data['server_zones'][self.real_name] + data = parse_json(zone_data, METRICS['WEB_ZONE']) + return dict(('_'.join([self.name, k]), v) for k, v in data.items()) + + +class WebUpstream: + key = 'upstreams' + charts = web_upstream_charts + + def __init__(self, **kw): + self.real_name = kw['name'] + self.name = BAD_SYMBOLS.sub('_', self.real_name) + self.peers = OrderedDict() + + peers = kw['response']['upstreams'][self.real_name]['peers'] + for peer in peers: + self.add_peer(peer['id'], peer['server']) + + def __iter__(self): + return iter(self.peers.values()) + + def add_peer(self, idx, server): + peer = WebUpstreamPeer(idx, server) + self.peers[peer.real_server] = peer + return peer + + def peers_stats(self, peers): + data = dict() + for peer in self.peers.values(): + if not peer.active: + continue + try: + data.update(peer.get_data(peers[peer.id])) + except KeyError: + peer.active = False + return data + + def memory_usage(self, data): + used = data['slabs'][self.real_name]['pages']['used'] + free = data['slabs'][self.real_name]['pages']['free'] + return used / float(free + used) * 1e4 + + def summary_stats(self, data): + rv = defaultdict(int) + for metric in METRICS['WEB_UPSTREAM_SUMMARY']: + for peer in self.peers.values(): + if peer.active: + metric = '_'.join(metric.split('.')) + rv[metric] += data['_'.join([peer.server, metric])] + return rv + + def get_data(self, raw_data): + data = dict() + peers = raw_data['upstreams'][self.real_name]['peers'] + data.update(self.peers_stats(peers)) + data.update(self.summary_stats(data)) + data['memory_usage'] = self.memory_usage(raw_data) + return dict(('_'.join([self.name, k]), v) for k, v in data.items()) + + +class WebUpstreamPeer: + def __init__(self, idx, server): + self.id = idx + self.real_server = server + self.server = BAD_SYMBOLS.sub('_', self.real_server) + self.active = True + + def get_data(self, raw): + data = dict(header_time=0, response_time=0, max_conns=0) + data.update(parse_json(raw, METRICS['WEB_UPSTREAM_PEER'])) + data['connections_usage'] = 0 if not data['max_conns'] else data['active'] / float(data['max_conns']) * 1e4 + data['state'] = int(data['state'] == 'up') + return dict(('_'.join([self.server, k]), v) for k, v in data.items()) + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.order = list(ORDER) + self.definitions = deepcopy(CHARTS) + self.objects = dict() + + def check(self): + if not self.url: + self.error('URL is not defined') + return None + + self._manager = self._build_manager() + if not self._manager: + return None + + raw_data = self._get_raw_data() + if not raw_data: + return None + + try: + response = loads(raw_data) + except ValueError: + return None + + for obj_cls in [WebZone, WebUpstream, Cache]: + for obj_name in response.get(obj_cls.key, list()): + obj = obj_cls(name=obj_name, response=response) + self.objects[obj.real_name] = obj + charts = obj_cls.charts(obj) + for chart in charts: + self.order.append(chart) + self.definitions[chart] = charts[chart] + + return bool(self.objects) + + def _get_data(self): + """ + Format data received from http request + :return: dict + """ + raw_data = self._get_raw_data() + if not raw_data: + return None + response = loads(raw_data) + + data = parse_json(response, METRICS['SERVER']) + data['ssl_memory_usage'] = data['slabs_SSL_pages_used'] / float(data['slabs_SSL_pages_free']) * 1e4 + + for obj in self.objects.values(): + if obj.real_name in response[obj.key]: + data.update(obj.get_data(response)) + + return data + + +def parse_json(raw_data, metrics): + data = dict() + for metric in metrics: + value = raw_data + metrics_list = metric.split('.') + try: + for m in metrics_list: + value = value[m] + except KeyError: + continue + data['_'.join(metrics_list)] = value + return data diff --git a/python.d/ntpd.chart.py b/python.d/ntpd.chart.py new file mode 100644 index 000000000..05209da87 --- /dev/null +++ b/python.d/ntpd.chart.py @@ -0,0 +1,380 @@ +# -*- coding: utf-8 -*- +# Description: ntpd netdata python.d module +# Author: Sven Mäder (rda0) +# Author: Ilya Mashchenko (l2isbad) + +import struct +import re + +from bases.FrameworkServices.SocketService import SocketService + +# default module values +update_every = 1 +priority = 60000 +retries = 60 + +# NTP Control Message Protocol constants +MODE = 6 +HEADER_FORMAT = '!BBHHHHH' +HEADER_LEN = 12 +OPCODES = { + 'readstat': 1, + 'readvar': 2 +} + +# Maximal dimension precision +PRECISION = 1000000 + +# Static charts +ORDER = [ + 'sys_offset', + 'sys_jitter', + 'sys_frequency', + 'sys_wander', + 'sys_rootdelay', + 'sys_rootdisp', + 'sys_stratum', + 'sys_tc', + 'sys_precision', + 'peer_offset', + 'peer_delay', + 'peer_dispersion', + 'peer_jitter', + 'peer_xleave', + 'peer_rootdelay', + 'peer_rootdisp', + 'peer_stratum', + 'peer_hmode', + 'peer_pmode', + 'peer_hpoll', + 'peer_ppoll', + 'peer_precision' +] + +CHARTS = { + 'sys_offset': { + 'options': [None, 'Combined offset of server relative to this host', 'ms', 'system', 'ntpd.sys_offset', 'area'], + 'lines': [ + ['offset', 'offset', 'absolute', 1, PRECISION] + ]}, + 'sys_jitter': { + 'options': [None, 'Combined system jitter and clock jitter', 'ms', 'system', 'ntpd.sys_jitter', 'line'], + 'lines': [ + ['sys_jitter', 'system', 'absolute', 1, PRECISION], + ['clk_jitter', 'clock', 'absolute', 1, PRECISION] + ]}, + 'sys_frequency': { + 'options': [None, 'Frequency offset relative to hardware clock', 'ppm', 'system', 'ntpd.sys_frequency', 'area'], + 'lines': [ + ['frequency', 'frequency', 'absolute', 1, PRECISION] + ]}, + 'sys_wander': { + 'options': [None, 'Clock frequency wander', 'ppm', 'system', 'ntpd.sys_wander', 'area'], + 'lines': [ + ['clk_wander', 'clock', 'absolute', 1, PRECISION] + ]}, + 'sys_rootdelay': { + 'options': [None, 'Total roundtrip delay to the primary reference clock', 'ms', 'system', + 'ntpd.sys_rootdelay', 'area'], + 'lines': [ + ['rootdelay', 'delay', 'absolute', 1, PRECISION] + ]}, + 'sys_rootdisp': { + 'options': [None, 'Total root dispersion to the primary reference clock', 'ms', 'system', + 'ntpd.sys_rootdisp', 'area'], + 'lines': [ + ['rootdisp', 'dispersion', 'absolute', 1, PRECISION] + ]}, + 'sys_stratum': { + 'options': [None, 'Stratum (1-15)', 'stratum', 'system', 'ntpd.sys_stratum', 'line'], + 'lines': [ + ['stratum', 'stratum', 'absolute', 1, PRECISION] + ]}, + 'sys_tc': { + 'options': [None, 'Time constant and poll exponent (3-17)', 'log2 s', 'system', 'ntpd.sys_tc', 'line'], + 'lines': [ + ['tc', 'current', 'absolute', 1, PRECISION], + ['mintc', 'minimum', 'absolute', 1, PRECISION] + ]}, + 'sys_precision': { + 'options': [None, 'Precision', 'log2 s', 'system', 'ntpd.sys_precision', 'line'], + 'lines': [ + ['precision', 'precision', 'absolute', 1, PRECISION] + ]} +} + +PEER_CHARTS = { + 'peer_offset': { + 'options': [None, 'Filter offset', 'ms', 'peers', 'ntpd.peer_offset', 'line'], + 'lines': [ + ]}, + 'peer_delay': { + 'options': [None, 'Filter delay', 'ms', 'peers', 'ntpd.peer_delay', 'line'], + 'lines': [ + ]}, + 'peer_dispersion': { + 'options': [None, 'Filter dispersion', 'ms', 'peers', 'ntpd.peer_dispersion', 'line'], + 'lines': [ + ]}, + 'peer_jitter': { + 'options': [None, 'Filter jitter', 'ms', 'peers', 'ntpd.peer_jitter', 'line'], + 'lines': [ + ]}, + 'peer_xleave': { + 'options': [None, 'Interleave delay', 'ms', 'peers', 'ntpd.peer_xleave', 'line'], + 'lines': [ + ]}, + 'peer_rootdelay': { + 'options': [None, 'Total roundtrip delay to the primary reference clock', 'ms', 'peers', + 'ntpd.peer_rootdelay', 'line'], + 'lines': [ + ]}, + 'peer_rootdisp': { + 'options': [None, 'Total root dispersion to the primary reference clock', 'ms', 'peers', + 'ntpd.peer_rootdisp', 'line'], + 'lines': [ + ]}, + 'peer_stratum': { + 'options': [None, 'Stratum (1-15)', 'stratum', 'peers', 'ntpd.peer_stratum', 'line'], + 'lines': [ + ]}, + 'peer_hmode': { + 'options': [None, 'Host mode (1-6)', 'hmode', 'peers', 'ntpd.peer_hmode', 'line'], + 'lines': [ + ]}, + 'peer_pmode': { + 'options': [None, 'Peer mode (1-5)', 'pmode', 'peers', 'ntpd.peer_pmode', 'line'], + 'lines': [ + ]}, + 'peer_hpoll': { + 'options': [None, 'Host poll exponent', 'log2 s', 'peers', 'ntpd.peer_hpoll', 'line'], + 'lines': [ + ]}, + 'peer_ppoll': { + 'options': [None, 'Peer poll exponent', 'log2 s', 'peers', 'ntpd.peer_ppoll', 'line'], + 'lines': [ + ]}, + 'peer_precision': { + 'options': [None, 'Precision', 'log2 s', 'peers', 'ntpd.peer_precision', 'line'], + 'lines': [ + ]} +} + + +class Base: + regex = re.compile(r'([a-z_]+)=((?:-)?[0-9]+(?:\.[0-9]+)?)') + + @staticmethod + def get_header(associd=0, operation='readvar'): + """ + Constructs the NTP Control Message header: + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |LI | VN |Mode |R|E|M| OpCode | Sequence Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Status | Association ID | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Offset | Count | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + """ + version = 2 + sequence = 1 + status = 0 + offset = 0 + count = 0 + header = struct.pack(HEADER_FORMAT, (version << 3 | MODE), OPCODES[operation], + sequence, status, associd, offset, count) + return header + + +class System(Base): + def __init__(self): + self.request = self.get_header() + + def get_data(self, raw): + """ + Extracts key=value pairs with float/integer from ntp response packet data. + """ + data = dict() + for key, value in self.regex.findall(raw): + data[key] = float(value) * PRECISION + return data + + +class Peer(Base): + def __init__(self, idx, name): + self.id = idx + self.real_name = name + self.name = name.replace('.', '_') + self.request = self.get_header(self.id) + + def get_data(self, raw): + """ + Extracts key=value pairs with float/integer from ntp response packet data. + """ + data = dict() + for key, value in self.regex.findall(raw): + dimension = '_'.join([self.name, key]) + data[dimension] = float(value) * PRECISION + return data + + +class Service(SocketService): + def __init__(self, configuration=None, name=None): + SocketService.__init__(self, configuration=configuration, name=name) + self.order = list(ORDER) + self.definitions = dict(CHARTS) + + self.port = 'ntp' + self.dgram_socket = True + self.system = System() + self.peers = dict() + self.request = str() + self.retries = 0 + self.show_peers = self.configuration.get('show_peers', False) + self.peer_rescan = self.configuration.get('peer_rescan', 60) + + if self.show_peers: + self.definitions.update(PEER_CHARTS) + + def check(self): + """ + Checks if we can get valid systemvars. + If not, returns None to disable module. + """ + self._parse_config() + + peer_filter = self.configuration.get('peer_filter', r'127\..*') + try: + self.peer_filter = re.compile(r'^((0\.0\.0\.0)|({0}))$'.format(peer_filter)) + except re.error as error: + self.error('Compile pattern error (peer_filter) : {0}'.format(error)) + return None + + self.request = self.system.request + raw_systemvars = self._get_raw_data() + + if not self.system.get_data(raw_systemvars): + return None + + return True + + def get_data(self): + """ + Gets systemvars data on each update. + Gets peervars data for all peers on each update. + """ + data = dict() + + self.request = self.system.request + raw = self._get_raw_data() + if not raw: + return None + + data.update(self.system.get_data(raw)) + + if not self.show_peers: + return data + + if not self.peers or self.runs_counter % self.peer_rescan == 0 or self.retries > 8: + self.find_new_peers() + + for peer in self.peers.values(): + self.request = peer.request + peer_data = peer.get_data(self._get_raw_data()) + if peer_data: + data.update(peer_data) + else: + self.retries += 1 + + return data + + def find_new_peers(self): + new_peers = dict((p.real_name, p) for p in self.get_peers()) + if new_peers: + + peers_to_remove = set(self.peers) - set(new_peers) + peers_to_add = set(new_peers) - set(self.peers) + + for peer_name in peers_to_remove: + self.hide_old_peer_from_charts(self.peers[peer_name]) + del self.peers[peer_name] + + for peer_name in peers_to_add: + self.add_new_peer_to_charts(new_peers[peer_name]) + + self.peers.update(new_peers) + self.retries = 0 + + def add_new_peer_to_charts(self, peer): + for chart_id in set(self.charts.charts) & set(PEER_CHARTS): + dim_id = peer.name + chart_id[4:] + if dim_id not in self.charts[chart_id]: + self.charts[chart_id].add_dimension([dim_id, peer.real_name, 'absolute', 1, PRECISION]) + else: + self.charts[chart_id].hide_dimension(dim_id, reverse=True) + + def hide_old_peer_from_charts(self, peer): + for chart_id in set(self.charts.charts) & set(PEER_CHARTS): + dim_id = peer.name + chart_id[4:] + self.charts[chart_id].hide_dimension(dim_id) + + def get_peers(self): + self.request = Base.get_header(operation='readstat') + + raw_data = self._get_raw_data(raw=True) + if not raw_data: + return list() + + peer_ids = self.get_peer_ids(raw_data) + if not peer_ids: + return list() + + new_peers = list() + for peer_id in peer_ids: + self.request = Base.get_header(peer_id) + raw_peer_data = self._get_raw_data() + if not raw_peer_data: + continue + srcadr = re.search(r'(srcadr)=([^,]+)', raw_peer_data) + if not srcadr: + continue + srcadr = srcadr.group(2) + if self.peer_filter.search(srcadr): + continue + stratum = re.search(r'(stratum)=([^,]+)', raw_peer_data) + if not stratum: + continue + if int(stratum.group(2)) > 15: + continue + + new_peer = Peer(idx=peer_id, name=srcadr) + new_peers.append(new_peer) + return new_peers + + def get_peer_ids(self, res): + """ + Unpack the NTP Control Message header + Get data length from header + Get list of association ids returned in the readstat response + """ + + try: + count = struct.unpack(HEADER_FORMAT, res[:HEADER_LEN])[6] + except struct.error as error: + self.error('error unpacking header: {0}'.format(error)) + return None + if not count: + self.error('empty data field in NTP control packet') + return None + + data_end = HEADER_LEN + count + data = res[HEADER_LEN:data_end] + data_format = ''.join(['!', 'H' * int(count / 2)]) + try: + peer_ids = list(struct.unpack(data_format, data))[::2] + except struct.error as error: + self.error('error unpacking data: {0}'.format(error)) + return None + return peer_ids diff --git a/python.d/portcheck.chart.py b/python.d/portcheck.chart.py new file mode 100644 index 000000000..0a312210d --- /dev/null +++ b/python.d/portcheck.chart.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +# Description: simple port check netdata python.d module +# Original Author: ccremer (github.com/ccremer) + +import socket + +try: + from time import monotonic as time +except ImportError: + from time import time + +from bases.FrameworkServices.SimpleService import SimpleService + +# default module values (can be overridden per job in `config`) +priority = 60000 +retries = 60 + +PORT_LATENCY = 'connect' + +PORT_SUCCESS = 'success' +PORT_TIMEOUT = 'timeout' +PORT_FAILED = 'no_connection' + +ORDER = ['latency', 'status'] + +CHARTS = { + 'latency': { + 'options': [None, 'TCP connect latency', 'ms', 'latency', 'portcheck.latency', 'line'], + 'lines': [ + [PORT_LATENCY, 'connect', 'absolute', 100, 1000] + ] + }, + 'status': { + 'options': [None, 'Portcheck status', 'boolean', 'status', 'portcheck.status', 'line'], + 'lines': [ + [PORT_SUCCESS, 'success', 'absolute'], + [PORT_TIMEOUT, 'timeout', 'absolute'], + [PORT_FAILED, 'no connection', 'absolute'] + ]} +} + + +# Not deriving from SocketService, too much is different +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.host = self.configuration.get('host') + self.port = self.configuration.get('port') + self.timeout = self.configuration.get('timeout', 1) + + def check(self): + """ + Parse configuration, check if configuration is available, and dynamically create chart lines data + :return: boolean + """ + if self.host is None or self.port is None: + self.error("Host or port missing") + return False + if not isinstance(self.port, int): + self.error('"port" is not an integer. Specify a numerical value, not service name.') + return False + + self.debug("Enabled portcheck: {host}:{port}, update every {update}s, timeout: {timeout}s".format( + host=self.host, port=self.port, update=self.update_every, timeout=self.timeout + )) + # We will accept any (valid-ish) configuration, even if initial connection fails (a service might be down from + # the beginning) + return True + + def _get_data(self): + """ + Get data from socket + :return: dict + """ + data = dict() + data[PORT_SUCCESS] = 0 + data[PORT_TIMEOUT] = 0 + data[PORT_FAILED] = 0 + + success = False + try: + for socket_config in socket.getaddrinfo(self.host, self.port, socket.AF_UNSPEC, socket.SOCK_STREAM): + # use first working socket + sock = self._create_socket(socket_config) + if sock is not None: + self._connect2socket(data, socket_config, sock) + self._disconnect(sock) + success = True + break + except socket.gaierror as error: + self.debug('Failed to connect to "{host}:{port}", error: {error}'.format( + host=self.host, port=self.port, error=error + )) + + # We could not connect + if not success: + data[PORT_FAILED] = 1 + + return data + + def _create_socket(self, socket_config): + af, sock_type, proto, canon_name, sa = socket_config + try: + self.debug('Creating socket to "{address}", port {port}'.format(address=sa[0], port=sa[1])) + sock = socket.socket(af, sock_type, proto) + sock.settimeout(self.timeout) + return sock + except socket.error as error: + self.debug('Failed to create socket "{address}", port {port}, error: {error}'.format( + address=sa[0], port=sa[1], error=error + )) + return None + + def _connect2socket(self, data, socket_config, sock): + """ + Connect to a socket, passing the result of getaddrinfo() + :return: dict + """ + + af, sock_type, proto, canon_name, sa = socket_config + port = str(sa[1]) + try: + self.debug('Connecting socket to "{address}", port {port}'.format(address=sa[0], port=port)) + start = time() + sock.connect(sa) + diff = time() - start + self.debug('Connected to "{address}", port {port}, latency {latency}'.format( + address=sa[0], port=port, latency=diff + )) + # we will set it at least 0.1 ms. 0.0 would mean failed connection (handy for 3rd-party-APIs) + data[PORT_LATENCY] = max(round(diff * 10000), 0) + data[PORT_SUCCESS] = 1 + + except socket.timeout as error: + self.debug('Socket timed out on "{address}", port {port}, error: {error}'.format( + address=sa[0], port=port, error=error + )) + data[PORT_TIMEOUT] = 1 + + except socket.error as error: + self.debug('Failed to connect to "{address}", port {port}, error: {error}'.format( + address=sa[0], port=port, error=error + )) + data[PORT_FAILED] = 1 + + def _disconnect(self, sock): + """ + Close socket connection + :return: + """ + if sock is not None: + try: + self.debug('Closing socket') + sock.shutdown(2) # 0 - read, 1 - write, 2 - all + sock.close() + except socket.error: + pass diff --git a/python.d/postgres.chart.py b/python.d/postgres.chart.py index ef69a9c77..0522b1938 100644 --- a/python.d/postgres.chart.py +++ b/python.d/postgres.chart.py @@ -32,6 +32,8 @@ METRICS = dict( 'tup_updated', 'tup_deleted', 'conflicts', + 'temp_files', + 'temp_bytes', 'size'], BACKENDS=['backends_active', 'backends_idle'], @@ -39,11 +41,21 @@ METRICS = dict( 'index_size'], TABLE_STATS=['table_size', 'table_count'], + WAL=['written_wal', + 'recycled_wal', + 'total_wal'], + WAL_WRITES=['wal_writes'], ARCHIVE=['ready_count', 'done_count', 'file_count'], - BGWRITER=['writer_scheduled', - 'writer_requested'], + BGWRITER=['checkpoint_scheduled', + 'checkpoint_requested', + 'buffers_checkpoint', + 'buffers_clean', + 'maxwritten_clean', + 'buffers_backend', + 'buffers_alloc', + 'buffers_backend_fsync'], LOCKS=['ExclusiveLock', 'RowShareLock', 'SIReadLock', @@ -52,17 +64,44 @@ METRICS = dict( 'AccessShareLock', 'ShareRowExclusiveLock', 'ShareLock', - 'RowExclusiveLock'] + 'RowExclusiveLock'], + AUTOVACUUM=['analyze', + 'vacuum_analyze', + 'vacuum', + 'vacuum_freeze', + 'brin_summarize'], + STANDBY_DELTA=['sent_delta', + 'write_delta', + 'flush_delta', + 'replay_delta'], + REPSLOT_FILES=['replslot_wal_keep', + 'replslot_files'] + ) QUERIES = dict( + WAL=""" +SELECT + count(*) as total_wal, + count(*) FILTER (WHERE type = 'recycled') AS recycled_wal, + count(*) FILTER (WHERE type = 'written') AS written_wal +FROM + (SELECT wal.name, + pg_{0}file_name(CASE pg_is_in_recovery() WHEN true THEN NULL ELSE pg_current_{0}_{1}() END ), + CASE WHEN wal.name > pg_{0}file_name(CASE pg_is_in_recovery() WHEN true THEN NULL ELSE pg_current_{0}_{1}() END ) THEN 'recycled' + ELSE 'written' + END AS type + FROM pg_catalog.pg_ls_dir('pg_{0}') AS wal(name) + WHERE name ~ '^[0-9A-F]{{24}}$' + ORDER BY (pg_stat_file('pg_{0}/'||name)).modification, wal.name DESC) sub; +""", ARCHIVE=""" SELECT CAST(COUNT(*) AS INT) AS file_count, CAST(COALESCE(SUM(CAST(archive_file ~ $r$\.ready$$r$ as INT)), 0) AS INT) AS ready_count, CAST(COALESCE(SUM(CAST(archive_file ~ $r$\.done$$r$ AS INT)), 0) AS INT) AS done_count FROM - pg_catalog.pg_ls_dir('{0}/archive_status') AS archive_files (archive_file); + pg_catalog.pg_ls_dir('pg_{0}/archive_status') AS archive_files (archive_file); """, BACKENDS=""" SELECT @@ -86,28 +125,37 @@ WHERE relkind = 'i';""", DATABASE=""" SELECT datname AS database_name, - sum(numbackends) AS connections, - sum(xact_commit) AS xact_commit, - sum(xact_rollback) AS xact_rollback, - sum(blks_read) AS blks_read, - sum(blks_hit) AS blks_hit, - sum(tup_returned) AS tup_returned, - sum(tup_fetched) AS tup_fetched, - sum(tup_inserted) AS tup_inserted, - sum(tup_updated) AS tup_updated, - sum(tup_deleted) AS tup_deleted, - sum(conflicts) AS conflicts, - pg_database_size(datname) AS size + numbackends AS connections, + xact_commit AS xact_commit, + xact_rollback AS xact_rollback, + blks_read AS blks_read, + blks_hit AS blks_hit, + tup_returned AS tup_returned, + tup_fetched AS tup_fetched, + tup_inserted AS tup_inserted, + tup_updated AS tup_updated, + tup_deleted AS tup_deleted, + conflicts AS conflicts, + pg_database_size(datname) AS size, + temp_files AS temp_files, + temp_bytes AS temp_bytes FROM pg_stat_database WHERE datname IN %(databases)s -GROUP BY datname; +; """, BGWRITER=""" SELECT - checkpoints_timed AS writer_scheduled, - checkpoints_req AS writer_requested -FROM pg_stat_bgwriter;""", - LOCKS=""" + checkpoints_timed AS checkpoint_scheduled, + checkpoints_req AS checkpoint_requested, + buffers_checkpoint * current_setting('block_size')::numeric buffers_checkpoint, + buffers_clean * current_setting('block_size')::numeric buffers_clean, + maxwritten_clean, + buffers_backend * current_setting('block_size')::numeric buffers_backend, + buffers_alloc * current_setting('block_size')::numeric buffers_alloc, + buffers_backend_fsync +FROM pg_stat_bgwriter; +""", + LOCKS=""" SELECT pg_database.datname as database_name, mode, @@ -123,12 +171,69 @@ FROM pg_stat_database WHERE has_database_privilege((SELECT current_user), datname, 'connect') AND NOT datname ~* '^template\d+'; """, + FIND_STANDBY=""" +SELECT application_name +FROM pg_stat_replication +WHERE application_name IS NOT NULL +GROUP BY application_name; +""", + FIND_REPLICATION_SLOT=""" +SELECT slot_name +FROM pg_replication_slots; +""", + STANDBY_DELTA=""" +SELECT application_name, + pg_{0}_{1}_diff(CASE pg_is_in_recovery() WHEN true THEN pg_last_{0}_receive_{1}() ELSE pg_current_{0}_{1}() END , sent_{1}) AS sent_delta, + pg_{0}_{1}_diff(CASE pg_is_in_recovery() WHEN true THEN pg_last_{0}_receive_{1}() ELSE pg_current_{0}_{1}() END , write_{1}) AS write_delta, + pg_{0}_{1}_diff(CASE pg_is_in_recovery() WHEN true THEN pg_last_{0}_receive_{1}() ELSE pg_current_{0}_{1}() END , flush_{1}) AS flush_delta, + pg_{0}_{1}_diff(CASE pg_is_in_recovery() WHEN true THEN pg_last_{0}_receive_{1}() ELSE pg_current_{0}_{1}() END , replay_{1}) AS replay_delta +FROM pg_stat_replication +WHERE application_name IS NOT NULL; +""", + REPSLOT_FILES=""" +WITH wal_size AS ( + SELECT current_setting('wal_block_size')::INT * setting::INT AS val + FROM pg_settings + WHERE name = 'wal_segment_size' +) +SELECT slot_name, slot_type, replslot_wal_keep, count(slot_file) AS replslot_files +FROM ( + SELECT slot.slot_name, CASE WHEN slot_file <> 'state' THEN 1 END AS slot_file , slot_type, + COALESCE (floor((pg_wal_lsn_diff (pg_current_wal_lsn (), + slot.restart_lsn) - (pg_walfile_name_offset (restart_lsn)).file_offset) / (s.val)), + 0) AS replslot_wal_keep + FROM pg_replication_slots slot + LEFT JOIN ( + SELECT slot2.slot_name, + pg_ls_dir('pg_replslot/' || slot2.slot_name) AS slot_file + FROM pg_replication_slots slot2 + ) files (slot_name, slot_file) + ON slot.slot_name = files.slot_name + CROSS JOIN wal_size s) AS d +GROUP BY slot_name, slot_type, replslot_wal_keep; +""", IF_SUPERUSER=""" SELECT current_setting('is_superuser') = 'on' AS is_superuser; - """, +""", DETECT_SERVER_VERSION=""" SHOW server_version_num; - """ +""", + AUTOVACUUM=""" +SELECT + count(*) FILTER (WHERE query LIKE 'autovacuum: ANALYZE%%') AS analyze, + count(*) FILTER (WHERE query LIKE 'autovacuum: VACUUM ANALYZE%%') AS vacuum_analyze, + count(*) FILTER (WHERE query LIKE 'autovacuum: VACUUM%%' + AND query NOT LIKE 'autovacuum: VACUUM ANALYZE%%' + AND query NOT LIKE '%%to prevent wraparound%%') AS vacuum, + count(*) FILTER (WHERE query LIKE '%%to prevent wraparound%%') AS vacuum_freeze, + count(*) FILTER (WHERE query LIKE 'autovacuum: BRIN summarize%%') AS brin_summarize +FROM pg_stat_activity +WHERE query NOT LIKE '%%pg_stat_activity%%'; +""", + DIFF_LSN=""" +SELECT pg_{0}_{1}_diff(CASE pg_is_in_recovery() WHEN true THEN pg_last_{0}_receive_{1}() ELSE pg_current_{0}_{1}() END, '0/0') as wal_writes ; +""" + ) @@ -138,8 +243,11 @@ QUERY_STATS = { QUERIES['LOCKS']: METRICS['LOCKS'] } -ORDER = ['db_stat_transactions', 'db_stat_tuple_read', 'db_stat_tuple_returned', 'db_stat_tuple_write', 'database_size', - 'backend_process', 'index_count', 'index_size', 'table_count', 'table_size', 'wal', 'background_writer'] +ORDER = ['db_stat_temp_files', 'db_stat_temp_bytes', 'db_stat_blks', 'db_stat_tuple_returned', 'db_stat_tuple_write', + 'db_stat_transactions','db_stat_connections', 'database_size', 'backend_process', 'index_count', 'index_size', + 'table_count', 'table_size', 'wal', 'wal_writes', 'archive_wal', 'checkpointer', 'stat_bgwriter_alloc', 'stat_bgwriter_checkpoint', + 'stat_bgwriter_backend', 'stat_bgwriter_backend_fsync' , 'stat_bgwriter_bgwriter', 'stat_bgwriter_maxwritten', + 'replication_slot', 'standby_delta', 'autovacuum'] CHARTS = { 'db_stat_transactions': { @@ -155,8 +263,8 @@ CHARTS = { 'lines': [ ['connections', 'connections', 'absolute'] ]}, - 'db_stat_tuple_read': { - 'options': [None, 'Tuple reads from db', 'reads/s', 'db statistics', 'postgres.db_stat_tuple_read', 'line'], + 'db_stat_blks': { + 'options': [None, 'Disk blocks reads from db', 'reads/s', 'db statistics', 'postgres.db_stat_blks', 'line'], 'lines': [ ['blks_read', 'disk', 'incremental'], ['blks_hit', 'cache', 'incremental'] @@ -176,6 +284,16 @@ CHARTS = { ['tup_deleted', 'deleted', 'incremental'], ['conflicts', 'conflicts', 'incremental'] ]}, + 'db_stat_temp_bytes': { + 'options': [None, 'Temp files written to disk', 'KB/s', 'db statistics', 'postgres.db_stat_temp_bytes', 'line'], + 'lines': [ + ['temp_bytes', 'size', 'incremental', 1, 1024] + ]}, + 'db_stat_temp_files': { + 'options': [None, 'Temp files written to disk', 'files', 'db statistics', 'postgres.db_stat_temp_files', 'line'], + 'lines': [ + ['temp_files', 'files', 'incremental'] + ]}, 'database_size': { 'options': [None, 'Database size', 'MB', 'database size', 'postgres.db_size', 'stacked'], 'lines': [ @@ -208,17 +326,82 @@ CHARTS = { ['table_size', 'size', 'absolute', 1, 1024 * 1024] ]}, 'wal': { - 'options': [None, 'Write-Ahead Logging Statistics', 'files/s', 'write ahead log', 'postgres.wal', 'line'], + 'options': [None, 'Write-Ahead Logs', 'files', 'wal', 'postgres.wal', 'line'], + 'lines': [ + ['written_wal', 'written', 'absolute'], + ['recycled_wal', 'recycled', 'absolute'], + ['total_wal', 'total', 'absolute'] + ]}, + 'wal_writes': { + 'options': [None, 'Write-Ahead Logs', 'kilobytes/s', 'wal_writes', 'postgres.wal_writes', 'line'], + 'lines': [ + ['wal_writes', 'writes', 'incremental', 1, 1024] + ]}, + 'archive_wal': { + 'options': [None, 'Archive Write-Ahead Logs', 'files/s', 'archive wal', 'postgres.archive_wal', 'line'], 'lines': [ ['file_count', 'total', 'incremental'], ['ready_count', 'ready', 'incremental'], ['done_count', 'done', 'incremental'] ]}, - 'background_writer': { - 'options': [None, 'Checkpoints', 'writes/s', 'background writer', 'postgres.background_writer', 'line'], + 'checkpointer': { + 'options': [None, 'Checkpoints', 'writes', 'checkpointer', 'postgres.checkpointer', 'line'], + 'lines': [ + ['checkpoint_scheduled', 'scheduled', 'incremental'], + ['checkpoint_requested', 'requested', 'incremental'] + ]}, + 'stat_bgwriter_alloc': { + 'options': [None, 'Buffers allocated', 'kilobytes/s', 'bgwriter', 'postgres.stat_bgwriter_alloc', 'line'], + 'lines': [ + ['buffers_alloc', 'alloc', 'incremental', 1, 1024] + ]}, + 'stat_bgwriter_checkpoint': { + 'options': [None, 'Buffers written during checkpoints', 'kilobytes/s', 'bgwriter', 'postgres.stat_bgwriter_checkpoint', 'line'], + 'lines': [ + ['buffers_checkpoint', 'checkpoint', 'incremental', 1, 1024] + ]}, + 'stat_bgwriter_backend': { + 'options': [None, 'Buffers written directly by a backend', 'kilobytes/s', 'bgwriter', 'postgres.stat_bgwriter_backend', 'line'], + 'lines': [ + ['buffers_backend', 'backend', 'incremental', 1, 1024] + ]}, + 'stat_bgwriter_backend_fsync': { + 'options': [None, 'Fsync by backend', 'times', 'bgwriter', 'postgres.stat_bgwriter_backend_fsync', 'line'], + 'lines': [ + ['buffers_backend_fsync', 'backend fsync', 'incremental'] + ]}, + 'stat_bgwriter_bgwriter': { + 'options': [None, 'Buffers written by the background writer', 'kilobytes/s', 'bgwriter', 'postgres.bgwriter_bgwriter', 'line'], + 'lines': [ + ['buffers_clean', 'clean', 'incremental', 1, 1024] + ]}, + 'stat_bgwriter_maxwritten': { + 'options': [None, 'Too many buffers written', 'times', 'bgwriter', 'postgres.stat_bgwriter_maxwritten', 'line'], + 'lines': [ + ['maxwritten_clean', 'maxwritten', 'incremental'] + ]}, + 'autovacuum': { + 'options': [None, 'Autovacuum workers', 'workers', 'autovacuum', 'postgres.autovacuum', 'line'], 'lines': [ - ['writer_scheduled', 'scheduled', 'incremental'], - ['writer_requested', 'requested', 'incremental'] + ['analyze', 'analyze', 'absolute'], + ['vacuum', 'vacuum', 'absolute'], + ['vacuum_analyze', 'vacuum analyze', 'absolute'], + ['vacuum_freeze', 'vacuum freeze', 'absolute'], + ['brin_summarize', 'brin summarize', 'absolute'] + ]}, + 'standby_delta': { + 'options': [None, 'Standby delta', 'kilobytes', 'replication delta', 'postgres.standby_delta', 'line'], + 'lines': [ + ['sent_delta', 'sent delta', 'absolute', 1, 1024], + ['write_delta', 'write delta', 'absolute', 1, 1024], + ['flush_delta', 'flush delta', 'absolute', 1, 1024], + ['replay_delta', 'replay delta', 'absolute', 1, 1024] + ]}, + 'replication_slot': { + 'options': [None, 'Replication slot files', 'files', 'replication slot', 'postgres.replication_slot', 'line'], + 'lines': [ + ['replslot_wal_keep', 'wal keeped', 'absolute'], + ['replslot_files', 'pg_replslot files', 'absolute'] ]} } @@ -237,6 +420,8 @@ class Service(SimpleService): self.data = dict() self.locks_zeroed = dict() self.databases = list() + self.secondaries = list() + self.replication_slots = list() self.queries = QUERY_STATS.copy() def _connect(self): @@ -270,7 +455,10 @@ class Service(SimpleService): cursor = self.connection.cursor() self.databases = discover_databases_(cursor, QUERIES['FIND_DATABASES']) is_superuser = check_if_superuser_(cursor, QUERIES['IF_SUPERUSER']) + self.secondaries = discover_secondaries_(cursor, QUERIES['FIND_STANDBY']) self.server_version = detect_server_version(cursor, QUERIES['DETECT_SERVER_VERSION']) + if self.server_version >= 94000: + self.replication_slots = discover_replication_slots_(cursor, QUERIES['FIND_REPLICATION_SLOT']) cursor.close() if self.database_poll and isinstance(self.database_poll, str): @@ -286,29 +474,51 @@ class Service(SimpleService): return False def add_additional_queries_(self, is_superuser): + + if self.server_version >= 100000: + wal = 'wal' + lsn = 'lsn' + else: + wal = 'xlog' + lsn = 'location' + self.queries[QUERIES['BGWRITER']] = METRICS['BGWRITER'] + self.queries[QUERIES['DIFF_LSN'].format(wal,lsn)] = METRICS['WAL_WRITES'] + self.queries[QUERIES['STANDBY_DELTA'].format(wal,lsn)] = METRICS['STANDBY_DELTA'] + if self.index_stats: self.queries[QUERIES['INDEX_STATS']] = METRICS['INDEX_STATS'] if self.table_stats: self.queries[QUERIES['TABLE_STATS']] = METRICS['TABLE_STATS'] if is_superuser: - self.queries[QUERIES['BGWRITER']] = METRICS['BGWRITER'] + self.queries[QUERIES['ARCHIVE'].format(wal)] = METRICS['ARCHIVE'] + if self.server_version >= 90400: + self.queries[QUERIES['WAL'].format(wal,lsn)] = METRICS['WAL'] if self.server_version >= 100000: - wal_dir_name = 'pg_wal' - else: - wal_dir_name = 'pg_xlog' - self.queries[QUERIES['ARCHIVE'].format(wal_dir_name)] = METRICS['ARCHIVE'] + self.queries[QUERIES['REPSLOT_FILES']] = METRICS['REPSLOT_FILES'] + if self.server_version >= 90400: + self.queries[QUERIES['AUTOVACUUM']] = METRICS['AUTOVACUUM'] def create_dynamic_charts_(self): for database_name in self.databases[::-1]: self.definitions['database_size']['lines'].append([database_name + '_size', database_name, 'absolute', 1, 1024 * 1024]) - for chart_name in [name for name in CHARTS if name.startswith('db_stat')]: + for chart_name in [name for name in self.order if name.startswith('db_stat')]: add_database_stat_chart_(order=self.order, definitions=self.definitions, name=chart_name, database_name=database_name) add_database_lock_chart_(order=self.order, definitions=self.definitions, database_name=database_name) + for application_name in self.secondaries[::-1]: + add_replication_delta_chart_(order=self.order, definitions=self.definitions, + name='standby_delta', application_name=application_name) + + for slot_name in self.replication_slots[::-1]: + add_replication_slot_chart_(order=self.order, definitions=self.definitions, + name='replication_slot', slot_name=slot_name) + + + def _get_data(self): result, error = self._connect() if result: @@ -332,7 +542,14 @@ class Service(SimpleService): cursor.execute(query, dict(databases=tuple(self.databases))) for row in cursor: for metric in metrics: - dimension_id = '_'.join([row['database_name'], metric]) if 'database_name' in row else metric + if 'database_name' in row: + dimension_id = '_'.join([row['database_name'], metric]) + elif 'application_name' in row: + dimension_id = '_'.join([row['application_name'], metric]) + elif 'slot_name' in row: + dimension_id = '_'.join([row['slot_name'], metric]) + else: + dimension_id = metric if metric in row: self.data[dimension_id] = int(row[metric]) elif 'locks_count' in row: @@ -347,6 +564,21 @@ def discover_databases_(cursor, query): result.append(db) return result +def discover_secondaries_(cursor, query): + cursor.execute(query) + result = list() + for sc in [standby[0] for standby in cursor]: + if sc not in result: + result.append(sc) + return result + +def discover_replication_slots_(cursor, query): + cursor.execute(query) + result = list() + for slot in [replication_slot[0] for replication_slot in cursor]: + if slot not in result: + result.append(slot) + return result def check_if_superuser_(cursor, query): cursor.execute(query) @@ -398,3 +630,37 @@ def add_database_stat_chart_(order, definitions, name, database_name): definitions[chart_name] = { 'options': [name, title + ': ' + database_name, units, 'db ' + database_name, context, chart_type], 'lines': create_lines(database_name, chart_template['lines'])} + +def add_replication_delta_chart_(order, definitions, name, application_name): + def create_lines(standby, lines): + result = list() + for line in lines: + new_line = ['_'.join([standby, line[0]])] + line[1:] + result.append(new_line) + return result + + chart_template = CHARTS[name] + chart_name = '_'.join([application_name, name]) + position = order.index('database_size') + order.insert(position, chart_name) + name, title, units, family, context, chart_type = chart_template['options'] + definitions[chart_name] = { + 'options': [name, title + ': ' + application_name, units, 'replication delta', context, chart_type], + 'lines': create_lines(application_name, chart_template['lines'])} + +def add_replication_slot_chart_(order, definitions, name, slot_name): + def create_lines(slot, lines): + result = list() + for line in lines: + new_line = ['_'.join([slot, line[0]])] + line[1:] + result.append(new_line) + return result + + chart_template = CHARTS[name] + chart_name = '_'.join([slot_name, name]) + position = order.index('database_size') + order.insert(position, chart_name) + name, title, units, family, context, chart_type = chart_template['options'] + definitions[chart_name] = { + 'options': [name, title + ': ' + slot_name, units, 'replication slot files', context, chart_type], + 'lines': create_lines(slot_name, chart_template['lines'])} diff --git a/python.d/python_modules/bases/FrameworkServices/ExecutableService.py b/python.d/python_modules/bases/FrameworkServices/ExecutableService.py index b6d7871fa..a71f2bfd2 100644 --- a/python.d/python_modules/bases/FrameworkServices/ExecutableService.py +++ b/python.d/python_modules/bases/FrameworkServices/ExecutableService.py @@ -30,7 +30,10 @@ class ExecutableService(SimpleService): data = list() std = p.stderr if stderr else p.stdout for line in std: - data.append(line.decode()) + try: + data.append(line.decode('utf-8')) + except TypeError: + continue return data or None diff --git a/python.d/python_modules/bases/FrameworkServices/SimpleService.py b/python.d/python_modules/bases/FrameworkServices/SimpleService.py index 14c839101..177332c1f 100644 --- a/python.d/python_modules/bases/FrameworkServices/SimpleService.py +++ b/python.d/python_modules/bases/FrameworkServices/SimpleService.py @@ -33,6 +33,7 @@ class RuntimeCounters: self.RETRIES = 0 self.RETRIES_MAX = configuration.pop('retries') self.PENALTY = 0 + self.RUNS = 1 def is_sleep_time(self): return self.START_RUN < self.NEXT_RUN @@ -82,6 +83,10 @@ class SimpleService(Thread, PythonDLimitedLogger, OldVersionCompatibility, objec return self.fake_name or self.name @property + def runs_counter(self): + return self._runtime_counters.RUNS + + @property def update_every(self): return self._runtime_counters.FREQ @@ -178,6 +183,8 @@ class SimpleService(Thread, PythonDLimitedLogger, OldVersionCompatibility, objec self.error('update() unhandled exception: {error}'.format(error=error)) updated = False + job.RUNS += 1 + if not updated: if not self.manage_retries(): return @@ -209,7 +216,10 @@ class SimpleService(Thread, PythonDLimitedLogger, OldVersionCompatibility, objec for chart in self.charts: if chart.flags.obsoleted: - continue + if chart.can_be_updated(data): + chart.refresh() + else: + continue elif self.charts.cleanup and chart.penalty >= self.charts.cleanup: chart.obsolete() self.error("chart '{0}' was suppressed due to non updating".format(chart.name)) diff --git a/python.d/python_modules/bases/FrameworkServices/SocketService.py b/python.d/python_modules/bases/FrameworkServices/SocketService.py index 90631df16..8d27ae660 100644 --- a/python.d/python_modules/bases/FrameworkServices/SocketService.py +++ b/python.d/python_modules/bases/FrameworkServices/SocketService.py @@ -14,6 +14,7 @@ class SocketService(SimpleService): self.host = 'localhost' self.port = None self.unix_socket = None + self.dgram_socket = False self.request = '' self.__socket_config = None self.__empty_request = "".encode() @@ -115,7 +116,11 @@ class SocketService(SimpleService): if self.__socket_config is not None: self._connect2socket() else: - for res in socket.getaddrinfo(self.host, self.port, socket.AF_UNSPEC, socket.SOCK_STREAM): + if self.dgram_socket: + sock_type = socket.SOCK_DGRAM + else: + sock_type = socket.SOCK_STREAM + for res in socket.getaddrinfo(self.host, self.port, socket.AF_UNSPEC, sock_type): if self._connect2socket(res): break @@ -158,12 +163,15 @@ class SocketService(SimpleService): return False return True - def _receive(self): + def _receive(self, raw=False): """ Receive data from socket - :return: str + :param raw: set `True` to return bytes + :type raw: bool + :return: decoded str or raw bytes + :rtype: str/bytes """ - data = "" + data = "" if not raw else b"" while True: self.debug('receiving response') try: @@ -174,7 +182,7 @@ class SocketService(SimpleService): break if buf is None or len(buf) == 0: # handle server disconnect - if data == "": + if data == "" or data == b"": self._socket_error('unexpectedly disconnected') else: self.debug('server closed the connection') @@ -182,17 +190,20 @@ class SocketService(SimpleService): break self.debug('received data') - data += buf.decode('utf-8', 'ignore') + data += buf.decode('utf-8', 'ignore') if not raw else buf if self._check_raw_data(data): break self.debug('final response: {0}'.format(data)) return data - def _get_raw_data(self): + def _get_raw_data(self, raw=False): """ Get raw data with low-level "socket" module. - :return: str + :param raw: set `True` to return bytes + :type raw: bool + :return: decoded data (str) or raw data (bytes) + :rtype: str/bytes """ if self._sock is None: self._connect() @@ -203,7 +214,7 @@ class SocketService(SimpleService): if not self._send(): return None - data = self._receive() + data = self._receive(raw) if not self._keep_alive: self._disconnect() diff --git a/python.d/python_modules/bases/FrameworkServices/UrlService.py b/python.d/python_modules/bases/FrameworkServices/UrlService.py index 0941ab168..bb340ba3b 100644 --- a/python.d/python_modules/bases/FrameworkServices/UrlService.py +++ b/python.d/python_modules/bases/FrameworkServices/UrlService.py @@ -75,20 +75,31 @@ class UrlService(SimpleService): :return: str """ try: - url = url or self.url - manager = manager or self._manager - response = manager.request(method='GET', - url=url, - timeout=self.request_timeout, - retries=1, - headers=manager.headers) + status, data = self._get_raw_data_with_status(url, manager) except (urllib3.exceptions.HTTPError, TypeError, AttributeError) as error: self.error('Url: {url}. Error: {error}'.format(url=url, error=error)) return None - if response.status == 200: - return response.data.decode() - self.debug('Url: {url}. Http response status code: {code}'.format(url=url, code=response.status)) - return None + + if status == 200: + return data.decode() + else: + self.debug('Url: {url}. Http response status code: {code}'.format(url=url, code=status)) + return None + + def _get_raw_data_with_status(self, url=None, manager=None, retries=1, redirect=True): + """ + Get status and response body content from http request. Does not catch exceptions + :return: int, str + """ + url = url or self.url + manager = manager or self._manager + response = manager.request(method='GET', + url=url, + timeout=self.request_timeout, + retries=retries, + headers=manager.headers, + redirect=redirect) + return response.status, response.data def check(self): """ diff --git a/python.d/python_modules/bases/charts.py b/python.d/python_modules/bases/charts.py index 1e9348e59..5394fbf64 100644 --- a/python.d/python_modules/bases/charts.py +++ b/python.d/python_modules/bases/charts.py @@ -217,6 +217,12 @@ class Chart: safe_print(chart + dimensions + variables) + def can_be_updated(self, data): + for dim in self.dimensions: + if dim.get_value(data) is not None: + return True + return False + def update(self, data, interval): updated_dimensions, updated_variables = str(), str() diff --git a/python.d/python_modules/pyyaml2/__init__.py b/python.d/python_modules/pyyaml2/__init__.py new file mode 100644 index 000000000..76e19e13f --- /dev/null +++ b/python.d/python_modules/pyyaml2/__init__.py @@ -0,0 +1,315 @@ + +from error import * + +from tokens import * +from events import * +from nodes import * + +from loader import * +from dumper import * + +__version__ = '3.11' + +try: + from cyaml import * + __with_libyaml__ = True +except ImportError: + __with_libyaml__ = False + +def scan(stream, Loader=Loader): + """ + Scan a YAML stream and produce scanning tokens. + """ + loader = Loader(stream) + try: + while loader.check_token(): + yield loader.get_token() + finally: + loader.dispose() + +def parse(stream, Loader=Loader): + """ + Parse a YAML stream and produce parsing events. + """ + loader = Loader(stream) + try: + while loader.check_event(): + yield loader.get_event() + finally: + loader.dispose() + +def compose(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + loader = Loader(stream) + try: + return loader.get_single_node() + finally: + loader.dispose() + +def compose_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding representation trees. + """ + loader = Loader(stream) + try: + while loader.check_node(): + yield loader.get_node() + finally: + loader.dispose() + +def load(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = Loader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + +def load_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + """ + loader = Loader(stream) + try: + while loader.check_data(): + yield loader.get_data() + finally: + loader.dispose() + +def safe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + Resolve only basic YAML tags. + """ + return load(stream, SafeLoader) + +def safe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + Resolve only basic YAML tags. + """ + return load_all(stream, SafeLoader) + +def emit(events, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + from StringIO import StringIO + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + try: + for event in events: + dumper.emit(event) + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize_all(nodes, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding='utf-8', explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + from StringIO import StringIO + else: + from cStringIO import StringIO + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for node in nodes: + dumper.serialize(node) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize(node, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + return serialize_all([node], stream, Dumper=Dumper, **kwds) + +def dump_all(documents, stream=None, Dumper=Dumper, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding='utf-8', explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of Python objects into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + from StringIO import StringIO + else: + from cStringIO import StringIO + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, default_style=default_style, + default_flow_style=default_flow_style, + canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for data in documents: + dumper.represent(data) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def dump(data, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a Python object into a YAML stream. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=Dumper, **kwds) + +def safe_dump_all(documents, stream=None, **kwds): + """ + Serialize a sequence of Python objects into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all(documents, stream, Dumper=SafeDumper, **kwds) + +def safe_dump(data, stream=None, **kwds): + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=SafeDumper, **kwds) + +def add_implicit_resolver(tag, regexp, first=None, + Loader=Loader, Dumper=Dumper): + """ + Add an implicit scalar detector. + If an implicit scalar value matches the given regexp, + the corresponding tag is assigned to the scalar. + first is a sequence of possible initial characters or None. + """ + Loader.add_implicit_resolver(tag, regexp, first) + Dumper.add_implicit_resolver(tag, regexp, first) + +def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper): + """ + Add a path based resolver for the given tag. + A path is a list of keys that forms a path + to a node in the representation tree. + Keys can be string values, integers, or None. + """ + Loader.add_path_resolver(tag, path, kind) + Dumper.add_path_resolver(tag, path, kind) + +def add_constructor(tag, constructor, Loader=Loader): + """ + Add a constructor for the given tag. + Constructor is a function that accepts a Loader instance + and a node object and produces the corresponding Python object. + """ + Loader.add_constructor(tag, constructor) + +def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): + """ + Add a multi-constructor for the given tag prefix. + Multi-constructor is called for a node if its tag starts with tag_prefix. + Multi-constructor accepts a Loader instance, a tag suffix, + and a node object and produces the corresponding Python object. + """ + Loader.add_multi_constructor(tag_prefix, multi_constructor) + +def add_representer(data_type, representer, Dumper=Dumper): + """ + Add a representer for the given type. + Representer is a function accepting a Dumper instance + and an instance of the given data type + and producing the corresponding representation node. + """ + Dumper.add_representer(data_type, representer) + +def add_multi_representer(data_type, multi_representer, Dumper=Dumper): + """ + Add a representer for the given type. + Multi-representer is a function accepting a Dumper instance + and an instance of the given data type or subtype + and producing the corresponding representation node. + """ + Dumper.add_multi_representer(data_type, multi_representer) + +class YAMLObjectMetaclass(type): + """ + The metaclass for YAMLObject. + """ + def __init__(cls, name, bases, kwds): + super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) + if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: + cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) + cls.yaml_dumper.add_representer(cls, cls.to_yaml) + +class YAMLObject(object): + """ + An object that can dump itself to a YAML stream + and load itself from a YAML stream. + """ + + __metaclass__ = YAMLObjectMetaclass + __slots__ = () # no direct instantiation, so allow immutable subclasses + + yaml_loader = Loader + yaml_dumper = Dumper + + yaml_tag = None + yaml_flow_style = None + + def from_yaml(cls, loader, node): + """ + Convert a representation node to a Python object. + """ + return loader.construct_yaml_object(node, cls) + from_yaml = classmethod(from_yaml) + + def to_yaml(cls, dumper, data): + """ + Convert a Python object to a representation node. + """ + return dumper.represent_yaml_object(cls.yaml_tag, data, cls, + flow_style=cls.yaml_flow_style) + to_yaml = classmethod(to_yaml) + diff --git a/python.d/python_modules/pyyaml2/composer.py b/python.d/python_modules/pyyaml2/composer.py new file mode 100644 index 000000000..06e5ac782 --- /dev/null +++ b/python.d/python_modules/pyyaml2/composer.py @@ -0,0 +1,139 @@ + +__all__ = ['Composer', 'ComposerError'] + +from error import MarkedYAMLError +from events import * +from nodes import * + +class ComposerError(MarkedYAMLError): + pass + +class Composer(object): + + def __init__(self): + self.anchors = {} + + def check_node(self): + # Drop the STREAM-START event. + if self.check_event(StreamStartEvent): + self.get_event() + + # If there are more documents available? + return not self.check_event(StreamEndEvent) + + def get_node(self): + # Get the root node of the next document. + if not self.check_event(StreamEndEvent): + return self.compose_document() + + def get_single_node(self): + # Drop the STREAM-START event. + self.get_event() + + # Compose a document if the stream is not empty. + document = None + if not self.check_event(StreamEndEvent): + document = self.compose_document() + + # Ensure that the stream contains no more documents. + if not self.check_event(StreamEndEvent): + event = self.get_event() + raise ComposerError("expected a single document in the stream", + document.start_mark, "but found another document", + event.start_mark) + + # Drop the STREAM-END event. + self.get_event() + + return document + + def compose_document(self): + # Drop the DOCUMENT-START event. + self.get_event() + + # Compose the root node. + node = self.compose_node(None, None) + + # Drop the DOCUMENT-END event. + self.get_event() + + self.anchors = {} + return node + + def compose_node(self, parent, index): + if self.check_event(AliasEvent): + event = self.get_event() + anchor = event.anchor + if anchor not in self.anchors: + raise ComposerError(None, None, "found undefined alias %r" + % anchor.encode('utf-8'), event.start_mark) + return self.anchors[anchor] + event = self.peek_event() + anchor = event.anchor + if anchor is not None: + if anchor in self.anchors: + raise ComposerError("found duplicate anchor %r; first occurence" + % anchor.encode('utf-8'), self.anchors[anchor].start_mark, + "second occurence", event.start_mark) + self.descend_resolver(parent, index) + if self.check_event(ScalarEvent): + node = self.compose_scalar_node(anchor) + elif self.check_event(SequenceStartEvent): + node = self.compose_sequence_node(anchor) + elif self.check_event(MappingStartEvent): + node = self.compose_mapping_node(anchor) + self.ascend_resolver() + return node + + def compose_scalar_node(self, anchor): + event = self.get_event() + tag = event.tag + if tag is None or tag == u'!': + tag = self.resolve(ScalarNode, event.value, event.implicit) + node = ScalarNode(tag, event.value, + event.start_mark, event.end_mark, style=event.style) + if anchor is not None: + self.anchors[anchor] = node + return node + + def compose_sequence_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == u'!': + tag = self.resolve(SequenceNode, None, start_event.implicit) + node = SequenceNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + index = 0 + while not self.check_event(SequenceEndEvent): + node.value.append(self.compose_node(node, index)) + index += 1 + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + + def compose_mapping_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == u'!': + tag = self.resolve(MappingNode, None, start_event.implicit) + node = MappingNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + while not self.check_event(MappingEndEvent): + #key_event = self.peek_event() + item_key = self.compose_node(node, None) + #if item_key in node.value: + # raise ComposerError("while composing a mapping", start_event.start_mark, + # "found duplicate key", key_event.start_mark) + item_value = self.compose_node(node, item_key) + #node.value[item_key] = item_value + node.value.append((item_key, item_value)) + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + diff --git a/python.d/python_modules/pyyaml2/constructor.py b/python.d/python_modules/pyyaml2/constructor.py new file mode 100644 index 000000000..635faac3e --- /dev/null +++ b/python.d/python_modules/pyyaml2/constructor.py @@ -0,0 +1,675 @@ + +__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', + 'ConstructorError'] + +from error import * +from nodes import * + +import datetime + +import binascii, re, sys, types + +class ConstructorError(MarkedYAMLError): + pass + +class BaseConstructor(object): + + yaml_constructors = {} + yaml_multi_constructors = {} + + def __init__(self): + self.constructed_objects = {} + self.recursive_objects = {} + self.state_generators = [] + self.deep_construct = False + + def check_data(self): + # If there are more documents available? + return self.check_node() + + def get_data(self): + # Construct and return the next document. + if self.check_node(): + return self.construct_document(self.get_node()) + + def get_single_data(self): + # Ensure that the stream contains a single document and construct it. + node = self.get_single_node() + if node is not None: + return self.construct_document(node) + return None + + def construct_document(self, node): + data = self.construct_object(node) + while self.state_generators: + state_generators = self.state_generators + self.state_generators = [] + for generator in state_generators: + for dummy in generator: + pass + self.constructed_objects = {} + self.recursive_objects = {} + self.deep_construct = False + return data + + def construct_object(self, node, deep=False): + if node in self.constructed_objects: + return self.constructed_objects[node] + if deep: + old_deep = self.deep_construct + self.deep_construct = True + if node in self.recursive_objects: + raise ConstructorError(None, None, + "found unconstructable recursive node", node.start_mark) + self.recursive_objects[node] = None + constructor = None + tag_suffix = None + if node.tag in self.yaml_constructors: + constructor = self.yaml_constructors[node.tag] + else: + for tag_prefix in self.yaml_multi_constructors: + if node.tag.startswith(tag_prefix): + tag_suffix = node.tag[len(tag_prefix):] + constructor = self.yaml_multi_constructors[tag_prefix] + break + else: + if None in self.yaml_multi_constructors: + tag_suffix = node.tag + constructor = self.yaml_multi_constructors[None] + elif None in self.yaml_constructors: + constructor = self.yaml_constructors[None] + elif isinstance(node, ScalarNode): + constructor = self.__class__.construct_scalar + elif isinstance(node, SequenceNode): + constructor = self.__class__.construct_sequence + elif isinstance(node, MappingNode): + constructor = self.__class__.construct_mapping + if tag_suffix is None: + data = constructor(self, node) + else: + data = constructor(self, tag_suffix, node) + if isinstance(data, types.GeneratorType): + generator = data + data = generator.next() + if self.deep_construct: + for dummy in generator: + pass + else: + self.state_generators.append(generator) + self.constructed_objects[node] = data + del self.recursive_objects[node] + if deep: + self.deep_construct = old_deep + return data + + def construct_scalar(self, node): + if not isinstance(node, ScalarNode): + raise ConstructorError(None, None, + "expected a scalar node, but found %s" % node.id, + node.start_mark) + return node.value + + def construct_sequence(self, node, deep=False): + if not isinstance(node, SequenceNode): + raise ConstructorError(None, None, + "expected a sequence node, but found %s" % node.id, + node.start_mark) + return [self.construct_object(child, deep=deep) + for child in node.value] + + def construct_mapping(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + mapping = {} + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + try: + hash(key) + except TypeError, exc: + raise ConstructorError("while constructing a mapping", node.start_mark, + "found unacceptable key (%s)" % exc, key_node.start_mark) + value = self.construct_object(value_node, deep=deep) + mapping[key] = value + return mapping + + def construct_pairs(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + pairs = [] + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + value = self.construct_object(value_node, deep=deep) + pairs.append((key, value)) + return pairs + + def add_constructor(cls, tag, constructor): + if not 'yaml_constructors' in cls.__dict__: + cls.yaml_constructors = cls.yaml_constructors.copy() + cls.yaml_constructors[tag] = constructor + add_constructor = classmethod(add_constructor) + + def add_multi_constructor(cls, tag_prefix, multi_constructor): + if not 'yaml_multi_constructors' in cls.__dict__: + cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() + cls.yaml_multi_constructors[tag_prefix] = multi_constructor + add_multi_constructor = classmethod(add_multi_constructor) + +class SafeConstructor(BaseConstructor): + + def construct_scalar(self, node): + if isinstance(node, MappingNode): + for key_node, value_node in node.value: + if key_node.tag == u'tag:yaml.org,2002:value': + return self.construct_scalar(value_node) + return BaseConstructor.construct_scalar(self, node) + + def flatten_mapping(self, node): + merge = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == u'tag:yaml.org,2002:merge': + del node.value[index] + if isinstance(value_node, MappingNode): + self.flatten_mapping(value_node) + merge.extend(value_node.value) + elif isinstance(value_node, SequenceNode): + submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing a mapping", + node.start_mark, + "expected a mapping for merging, but found %s" + % subnode.id, subnode.start_mark) + self.flatten_mapping(subnode) + submerge.append(subnode.value) + submerge.reverse() + for value in submerge: + merge.extend(value) + else: + raise ConstructorError("while constructing a mapping", node.start_mark, + "expected a mapping or list of mappings for merging, but found %s" + % value_node.id, value_node.start_mark) + elif key_node.tag == u'tag:yaml.org,2002:value': + key_node.tag = u'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + if merge: + node.value = merge + node.value + + def construct_mapping(self, node, deep=False): + if isinstance(node, MappingNode): + self.flatten_mapping(node) + return BaseConstructor.construct_mapping(self, node, deep=deep) + + def construct_yaml_null(self, node): + self.construct_scalar(node) + return None + + bool_values = { + u'yes': True, + u'no': False, + u'true': True, + u'false': False, + u'on': True, + u'off': False, + } + + def construct_yaml_bool(self, node): + value = self.construct_scalar(node) + return self.bool_values[value.lower()] + + def construct_yaml_int(self, node): + value = str(self.construct_scalar(node)) + value = value.replace('_', '') + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '0': + return 0 + elif value.startswith('0b'): + return sign*int(value[2:], 2) + elif value.startswith('0x'): + return sign*int(value[2:], 16) + elif value[0] == '0': + return sign*int(value, 8) + elif ':' in value: + digits = [int(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*int(value) + + inf_value = 1e300 + while inf_value != inf_value*inf_value: + inf_value *= inf_value + nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). + + def construct_yaml_float(self, node): + value = str(self.construct_scalar(node)) + value = value.replace('_', '').lower() + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '.inf': + return sign*self.inf_value + elif value == '.nan': + return self.nan_value + elif ':' in value: + digits = [float(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0.0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*float(value) + + def construct_yaml_binary(self, node): + value = self.construct_scalar(node) + try: + return str(value).decode('base64') + except (binascii.Error, UnicodeEncodeError), exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + timestamp_regexp = re.compile( + ur'''^(?P<year>[0-9][0-9][0-9][0-9]) + -(?P<month>[0-9][0-9]?) + -(?P<day>[0-9][0-9]?) + (?:(?:[Tt]|[ \t]+) + (?P<hour>[0-9][0-9]?) + :(?P<minute>[0-9][0-9]) + :(?P<second>[0-9][0-9]) + (?:\.(?P<fraction>[0-9]*))? + (?:[ \t]*(?P<tz>Z|(?P<tz_sign>[-+])(?P<tz_hour>[0-9][0-9]?) + (?::(?P<tz_minute>[0-9][0-9]))?))?)?$''', re.X) + + def construct_yaml_timestamp(self, node): + value = self.construct_scalar(node) + match = self.timestamp_regexp.match(node.value) + values = match.groupdict() + year = int(values['year']) + month = int(values['month']) + day = int(values['day']) + if not values['hour']: + return datetime.date(year, month, day) + hour = int(values['hour']) + minute = int(values['minute']) + second = int(values['second']) + fraction = 0 + if values['fraction']: + fraction = values['fraction'][:6] + while len(fraction) < 6: + fraction += '0' + fraction = int(fraction) + delta = None + if values['tz_sign']: + tz_hour = int(values['tz_hour']) + tz_minute = int(values['tz_minute'] or 0) + delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) + if values['tz_sign'] == '-': + delta = -delta + data = datetime.datetime(year, month, day, hour, minute, second, fraction) + if delta: + data -= delta + return data + + def construct_yaml_omap(self, node): + # Note: we do not check for duplicate keys, because it's too + # CPU-expensive. + omap = [] + yield omap + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + omap.append((key, value)) + + def construct_yaml_pairs(self, node): + # Note: the same code as `construct_yaml_omap`. + pairs = [] + yield pairs + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + pairs.append((key, value)) + + def construct_yaml_set(self, node): + data = set() + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_str(self, node): + value = self.construct_scalar(node) + try: + return value.encode('ascii') + except UnicodeEncodeError: + return value + + def construct_yaml_seq(self, node): + data = [] + yield data + data.extend(self.construct_sequence(node)) + + def construct_yaml_map(self, node): + data = {} + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_object(self, node, cls): + data = cls.__new__(cls) + yield data + if hasattr(data, '__setstate__'): + state = self.construct_mapping(node, deep=True) + data.__setstate__(state) + else: + state = self.construct_mapping(node) + data.__dict__.update(state) + + def construct_undefined(self, node): + raise ConstructorError(None, None, + "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'), + node.start_mark) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:null', + SafeConstructor.construct_yaml_null) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:bool', + SafeConstructor.construct_yaml_bool) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:int', + SafeConstructor.construct_yaml_int) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:float', + SafeConstructor.construct_yaml_float) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:binary', + SafeConstructor.construct_yaml_binary) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:timestamp', + SafeConstructor.construct_yaml_timestamp) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:omap', + SafeConstructor.construct_yaml_omap) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:pairs', + SafeConstructor.construct_yaml_pairs) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:set', + SafeConstructor.construct_yaml_set) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:str', + SafeConstructor.construct_yaml_str) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:seq', + SafeConstructor.construct_yaml_seq) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:map', + SafeConstructor.construct_yaml_map) + +SafeConstructor.add_constructor(None, + SafeConstructor.construct_undefined) + +class Constructor(SafeConstructor): + + def construct_python_str(self, node): + return self.construct_scalar(node).encode('utf-8') + + def construct_python_unicode(self, node): + return self.construct_scalar(node) + + def construct_python_long(self, node): + return long(self.construct_yaml_int(node)) + + def construct_python_complex(self, node): + return complex(self.construct_scalar(node)) + + def construct_python_tuple(self, node): + return tuple(self.construct_sequence(node)) + + def find_python_module(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python module", mark, + "expected non-empty name appended to the tag", mark) + try: + __import__(name) + except ImportError, exc: + raise ConstructorError("while constructing a Python module", mark, + "cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark) + return sys.modules[name] + + def find_python_name(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python object", mark, + "expected non-empty name appended to the tag", mark) + if u'.' in name: + module_name, object_name = name.rsplit('.', 1) + else: + module_name = '__builtin__' + object_name = name + try: + __import__(module_name) + except ImportError, exc: + raise ConstructorError("while constructing a Python object", mark, + "cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark) + module = sys.modules[module_name] + if not hasattr(module, object_name): + raise ConstructorError("while constructing a Python object", mark, + "cannot find %r in the module %r" % (object_name.encode('utf-8'), + module.__name__), mark) + return getattr(module, object_name) + + def construct_python_name(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python name", node.start_mark, + "expected the empty value, but found %r" % value.encode('utf-8'), + node.start_mark) + return self.find_python_name(suffix, node.start_mark) + + def construct_python_module(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python module", node.start_mark, + "expected the empty value, but found %r" % value.encode('utf-8'), + node.start_mark) + return self.find_python_module(suffix, node.start_mark) + + class classobj: pass + + def make_python_instance(self, suffix, node, + args=None, kwds=None, newobj=False): + if not args: + args = [] + if not kwds: + kwds = {} + cls = self.find_python_name(suffix, node.start_mark) + if newobj and isinstance(cls, type(self.classobj)) \ + and not args and not kwds: + instance = self.classobj() + instance.__class__ = cls + return instance + elif newobj and isinstance(cls, type): + return cls.__new__(cls, *args, **kwds) + else: + return cls(*args, **kwds) + + def set_python_instance_state(self, instance, state): + if hasattr(instance, '__setstate__'): + instance.__setstate__(state) + else: + slotstate = {} + if isinstance(state, tuple) and len(state) == 2: + state, slotstate = state + if hasattr(instance, '__dict__'): + instance.__dict__.update(state) + elif state: + slotstate.update(state) + for key, value in slotstate.items(): + setattr(object, key, value) + + def construct_python_object(self, suffix, node): + # Format: + # !!python/object:module.name { ... state ... } + instance = self.make_python_instance(suffix, node, newobj=True) + yield instance + deep = hasattr(instance, '__setstate__') + state = self.construct_mapping(node, deep=deep) + self.set_python_instance_state(instance, state) + + def construct_python_object_apply(self, suffix, node, newobj=False): + # Format: + # !!python/object/apply # (or !!python/object/new) + # args: [ ... arguments ... ] + # kwds: { ... keywords ... } + # state: ... state ... + # listitems: [ ... listitems ... ] + # dictitems: { ... dictitems ... } + # or short format: + # !!python/object/apply [ ... arguments ... ] + # The difference between !!python/object/apply and !!python/object/new + # is how an object is created, check make_python_instance for details. + if isinstance(node, SequenceNode): + args = self.construct_sequence(node, deep=True) + kwds = {} + state = {} + listitems = [] + dictitems = {} + else: + value = self.construct_mapping(node, deep=True) + args = value.get('args', []) + kwds = value.get('kwds', {}) + state = value.get('state', {}) + listitems = value.get('listitems', []) + dictitems = value.get('dictitems', {}) + instance = self.make_python_instance(suffix, node, args, kwds, newobj) + if state: + self.set_python_instance_state(instance, state) + if listitems: + instance.extend(listitems) + if dictitems: + for key in dictitems: + instance[key] = dictitems[key] + return instance + + def construct_python_object_new(self, suffix, node): + return self.construct_python_object_apply(suffix, node, newobj=True) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/none', + Constructor.construct_yaml_null) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/bool', + Constructor.construct_yaml_bool) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/str', + Constructor.construct_python_str) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/unicode', + Constructor.construct_python_unicode) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/int', + Constructor.construct_yaml_int) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/long', + Constructor.construct_python_long) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/float', + Constructor.construct_yaml_float) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/complex', + Constructor.construct_python_complex) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/list', + Constructor.construct_yaml_seq) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/tuple', + Constructor.construct_python_tuple) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/dict', + Constructor.construct_yaml_map) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/name:', + Constructor.construct_python_name) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/module:', + Constructor.construct_python_module) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object:', + Constructor.construct_python_object) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object/apply:', + Constructor.construct_python_object_apply) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object/new:', + Constructor.construct_python_object_new) + diff --git a/python.d/python_modules/pyyaml2/cyaml.py b/python.d/python_modules/pyyaml2/cyaml.py new file mode 100644 index 000000000..68dcd7519 --- /dev/null +++ b/python.d/python_modules/pyyaml2/cyaml.py @@ -0,0 +1,85 @@ + +__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', + 'CBaseDumper', 'CSafeDumper', 'CDumper'] + +from _yaml import CParser, CEmitter + +from constructor import * + +from serializer import * +from representer import * + +from resolver import * + +class CBaseLoader(CParser, BaseConstructor, BaseResolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class CSafeLoader(CParser, SafeConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class CLoader(CParser, Constructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + Constructor.__init__(self) + Resolver.__init__(self) + +class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CSafeDumper(CEmitter, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CDumper(CEmitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/python.d/python_modules/pyyaml2/dumper.py b/python.d/python_modules/pyyaml2/dumper.py new file mode 100644 index 000000000..f811d2c91 --- /dev/null +++ b/python.d/python_modules/pyyaml2/dumper.py @@ -0,0 +1,62 @@ + +__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] + +from emitter import * +from serializer import * +from representer import * +from resolver import * + +class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class Dumper(Emitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/python.d/python_modules/pyyaml2/emitter.py b/python.d/python_modules/pyyaml2/emitter.py new file mode 100644 index 000000000..e5bcdcccb --- /dev/null +++ b/python.d/python_modules/pyyaml2/emitter.py @@ -0,0 +1,1140 @@ + +# Emitter expects events obeying the following grammar: +# stream ::= STREAM-START document* STREAM-END +# document ::= DOCUMENT-START node DOCUMENT-END +# node ::= SCALAR | sequence | mapping +# sequence ::= SEQUENCE-START node* SEQUENCE-END +# mapping ::= MAPPING-START (node node)* MAPPING-END + +__all__ = ['Emitter', 'EmitterError'] + +from error import YAMLError +from events import * + +class EmitterError(YAMLError): + pass + +class ScalarAnalysis(object): + def __init__(self, scalar, empty, multiline, + allow_flow_plain, allow_block_plain, + allow_single_quoted, allow_double_quoted, + allow_block): + self.scalar = scalar + self.empty = empty + self.multiline = multiline + self.allow_flow_plain = allow_flow_plain + self.allow_block_plain = allow_block_plain + self.allow_single_quoted = allow_single_quoted + self.allow_double_quoted = allow_double_quoted + self.allow_block = allow_block + +class Emitter(object): + + DEFAULT_TAG_PREFIXES = { + u'!' : u'!', + u'tag:yaml.org,2002:' : u'!!', + } + + def __init__(self, stream, canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + + # The stream should have the methods `write` and possibly `flush`. + self.stream = stream + + # Encoding can be overriden by STREAM-START. + self.encoding = None + + # Emitter is a state machine with a stack of states to handle nested + # structures. + self.states = [] + self.state = self.expect_stream_start + + # Current event and the event queue. + self.events = [] + self.event = None + + # The current indentation level and the stack of previous indents. + self.indents = [] + self.indent = None + + # Flow level. + self.flow_level = 0 + + # Contexts. + self.root_context = False + self.sequence_context = False + self.mapping_context = False + self.simple_key_context = False + + # Characteristics of the last emitted character: + # - current position. + # - is it a whitespace? + # - is it an indention character + # (indentation space, '-', '?', or ':')? + self.line = 0 + self.column = 0 + self.whitespace = True + self.indention = True + + # Whether the document requires an explicit document indicator + self.open_ended = False + + # Formatting details. + self.canonical = canonical + self.allow_unicode = allow_unicode + self.best_indent = 2 + if indent and 1 < indent < 10: + self.best_indent = indent + self.best_width = 80 + if width and width > self.best_indent*2: + self.best_width = width + self.best_line_break = u'\n' + if line_break in [u'\r', u'\n', u'\r\n']: + self.best_line_break = line_break + + # Tag prefixes. + self.tag_prefixes = None + + # Prepared anchor and tag. + self.prepared_anchor = None + self.prepared_tag = None + + # Scalar analysis and style. + self.analysis = None + self.style = None + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def emit(self, event): + self.events.append(event) + while not self.need_more_events(): + self.event = self.events.pop(0) + self.state() + self.event = None + + # In some cases, we wait for a few next events before emitting. + + def need_more_events(self): + if not self.events: + return True + event = self.events[0] + if isinstance(event, DocumentStartEvent): + return self.need_events(1) + elif isinstance(event, SequenceStartEvent): + return self.need_events(2) + elif isinstance(event, MappingStartEvent): + return self.need_events(3) + else: + return False + + def need_events(self, count): + level = 0 + for event in self.events[1:]: + if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): + level += 1 + elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): + level -= 1 + elif isinstance(event, StreamEndEvent): + level = -1 + if level < 0: + return False + return (len(self.events) < count+1) + + def increase_indent(self, flow=False, indentless=False): + self.indents.append(self.indent) + if self.indent is None: + if flow: + self.indent = self.best_indent + else: + self.indent = 0 + elif not indentless: + self.indent += self.best_indent + + # States. + + # Stream handlers. + + def expect_stream_start(self): + if isinstance(self.event, StreamStartEvent): + if self.event.encoding and not getattr(self.stream, 'encoding', None): + self.encoding = self.event.encoding + self.write_stream_start() + self.state = self.expect_first_document_start + else: + raise EmitterError("expected StreamStartEvent, but got %s" + % self.event) + + def expect_nothing(self): + raise EmitterError("expected nothing, but got %s" % self.event) + + # Document handlers. + + def expect_first_document_start(self): + return self.expect_document_start(first=True) + + def expect_document_start(self, first=False): + if isinstance(self.event, DocumentStartEvent): + if (self.event.version or self.event.tags) and self.open_ended: + self.write_indicator(u'...', True) + self.write_indent() + if self.event.version: + version_text = self.prepare_version(self.event.version) + self.write_version_directive(version_text) + self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() + if self.event.tags: + handles = self.event.tags.keys() + handles.sort() + for handle in handles: + prefix = self.event.tags[handle] + self.tag_prefixes[prefix] = handle + handle_text = self.prepare_tag_handle(handle) + prefix_text = self.prepare_tag_prefix(prefix) + self.write_tag_directive(handle_text, prefix_text) + implicit = (first and not self.event.explicit and not self.canonical + and not self.event.version and not self.event.tags + and not self.check_empty_document()) + if not implicit: + self.write_indent() + self.write_indicator(u'---', True) + if self.canonical: + self.write_indent() + self.state = self.expect_document_root + elif isinstance(self.event, StreamEndEvent): + if self.open_ended: + self.write_indicator(u'...', True) + self.write_indent() + self.write_stream_end() + self.state = self.expect_nothing + else: + raise EmitterError("expected DocumentStartEvent, but got %s" + % self.event) + + def expect_document_end(self): + if isinstance(self.event, DocumentEndEvent): + self.write_indent() + if self.event.explicit: + self.write_indicator(u'...', True) + self.write_indent() + self.flush_stream() + self.state = self.expect_document_start + else: + raise EmitterError("expected DocumentEndEvent, but got %s" + % self.event) + + def expect_document_root(self): + self.states.append(self.expect_document_end) + self.expect_node(root=True) + + # Node handlers. + + def expect_node(self, root=False, sequence=False, mapping=False, + simple_key=False): + self.root_context = root + self.sequence_context = sequence + self.mapping_context = mapping + self.simple_key_context = simple_key + if isinstance(self.event, AliasEvent): + self.expect_alias() + elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): + self.process_anchor(u'&') + self.process_tag() + if isinstance(self.event, ScalarEvent): + self.expect_scalar() + elif isinstance(self.event, SequenceStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_sequence(): + self.expect_flow_sequence() + else: + self.expect_block_sequence() + elif isinstance(self.event, MappingStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_mapping(): + self.expect_flow_mapping() + else: + self.expect_block_mapping() + else: + raise EmitterError("expected NodeEvent, but got %s" % self.event) + + def expect_alias(self): + if self.event.anchor is None: + raise EmitterError("anchor is not specified for alias") + self.process_anchor(u'*') + self.state = self.states.pop() + + def expect_scalar(self): + self.increase_indent(flow=True) + self.process_scalar() + self.indent = self.indents.pop() + self.state = self.states.pop() + + # Flow sequence handlers. + + def expect_flow_sequence(self): + self.write_indicator(u'[', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_sequence_item + + def expect_first_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(u']', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + def expect_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(u',', False) + self.write_indent() + self.write_indicator(u']', False) + self.state = self.states.pop() + else: + self.write_indicator(u',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + # Flow mapping handlers. + + def expect_flow_mapping(self): + self.write_indicator(u'{', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_mapping_key + + def expect_first_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(u'}', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(u',', False) + self.write_indent() + self.write_indicator(u'}', False) + self.state = self.states.pop() + else: + self.write_indicator(u',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_simple_value(self): + self.write_indicator(u':', False) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + def expect_flow_mapping_value(self): + if self.canonical or self.column > self.best_width: + self.write_indent() + self.write_indicator(u':', True) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + # Block sequence handlers. + + def expect_block_sequence(self): + indentless = (self.mapping_context and not self.indention) + self.increase_indent(flow=False, indentless=indentless) + self.state = self.expect_first_block_sequence_item + + def expect_first_block_sequence_item(self): + return self.expect_block_sequence_item(first=True) + + def expect_block_sequence_item(self, first=False): + if not first and isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + self.write_indicator(u'-', True, indention=True) + self.states.append(self.expect_block_sequence_item) + self.expect_node(sequence=True) + + # Block mapping handlers. + + def expect_block_mapping(self): + self.increase_indent(flow=False) + self.state = self.expect_first_block_mapping_key + + def expect_first_block_mapping_key(self): + return self.expect_block_mapping_key(first=True) + + def expect_block_mapping_key(self, first=False): + if not first and isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + if self.check_simple_key(): + self.states.append(self.expect_block_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True, indention=True) + self.states.append(self.expect_block_mapping_value) + self.expect_node(mapping=True) + + def expect_block_mapping_simple_value(self): + self.write_indicator(u':', False) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + def expect_block_mapping_value(self): + self.write_indent() + self.write_indicator(u':', True, indention=True) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + # Checkers. + + def check_empty_sequence(self): + return (isinstance(self.event, SequenceStartEvent) and self.events + and isinstance(self.events[0], SequenceEndEvent)) + + def check_empty_mapping(self): + return (isinstance(self.event, MappingStartEvent) and self.events + and isinstance(self.events[0], MappingEndEvent)) + + def check_empty_document(self): + if not isinstance(self.event, DocumentStartEvent) or not self.events: + return False + event = self.events[0] + return (isinstance(event, ScalarEvent) and event.anchor is None + and event.tag is None and event.implicit and event.value == u'') + + def check_simple_key(self): + length = 0 + if isinstance(self.event, NodeEvent) and self.event.anchor is not None: + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + length += len(self.prepared_anchor) + if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ + and self.event.tag is not None: + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(self.event.tag) + length += len(self.prepared_tag) + if isinstance(self.event, ScalarEvent): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + length += len(self.analysis.scalar) + return (length < 128 and (isinstance(self.event, AliasEvent) + or (isinstance(self.event, ScalarEvent) + and not self.analysis.empty and not self.analysis.multiline) + or self.check_empty_sequence() or self.check_empty_mapping())) + + # Anchor, Tag, and Scalar processors. + + def process_anchor(self, indicator): + if self.event.anchor is None: + self.prepared_anchor = None + return + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + if self.prepared_anchor: + self.write_indicator(indicator+self.prepared_anchor, True) + self.prepared_anchor = None + + def process_tag(self): + tag = self.event.tag + if isinstance(self.event, ScalarEvent): + if self.style is None: + self.style = self.choose_scalar_style() + if ((not self.canonical or tag is None) and + ((self.style == '' and self.event.implicit[0]) + or (self.style != '' and self.event.implicit[1]))): + self.prepared_tag = None + return + if self.event.implicit[0] and tag is None: + tag = u'!' + self.prepared_tag = None + else: + if (not self.canonical or tag is None) and self.event.implicit: + self.prepared_tag = None + return + if tag is None: + raise EmitterError("tag is not specified") + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(tag) + if self.prepared_tag: + self.write_indicator(self.prepared_tag, True) + self.prepared_tag = None + + def choose_scalar_style(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.event.style == '"' or self.canonical: + return '"' + if not self.event.style and self.event.implicit[0]: + if (not (self.simple_key_context and + (self.analysis.empty or self.analysis.multiline)) + and (self.flow_level and self.analysis.allow_flow_plain + or (not self.flow_level and self.analysis.allow_block_plain))): + return '' + if self.event.style and self.event.style in '|>': + if (not self.flow_level and not self.simple_key_context + and self.analysis.allow_block): + return self.event.style + if not self.event.style or self.event.style == '\'': + if (self.analysis.allow_single_quoted and + not (self.simple_key_context and self.analysis.multiline)): + return '\'' + return '"' + + def process_scalar(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.style is None: + self.style = self.choose_scalar_style() + split = (not self.simple_key_context) + #if self.analysis.multiline and split \ + # and (not self.style or self.style in '\'\"'): + # self.write_indent() + if self.style == '"': + self.write_double_quoted(self.analysis.scalar, split) + elif self.style == '\'': + self.write_single_quoted(self.analysis.scalar, split) + elif self.style == '>': + self.write_folded(self.analysis.scalar) + elif self.style == '|': + self.write_literal(self.analysis.scalar) + else: + self.write_plain(self.analysis.scalar, split) + self.analysis = None + self.style = None + + # Analyzers. + + def prepare_version(self, version): + major, minor = version + if major != 1: + raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) + return u'%d.%d' % (major, minor) + + def prepare_tag_handle(self, handle): + if not handle: + raise EmitterError("tag handle must not be empty") + if handle[0] != u'!' or handle[-1] != u'!': + raise EmitterError("tag handle must start and end with '!': %r" + % (handle.encode('utf-8'))) + for ch in handle[1:-1]: + if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_'): + raise EmitterError("invalid character %r in the tag handle: %r" + % (ch.encode('utf-8'), handle.encode('utf-8'))) + return handle + + def prepare_tag_prefix(self, prefix): + if not prefix: + raise EmitterError("tag prefix must not be empty") + chunks = [] + start = end = 0 + if prefix[0] == u'!': + end = 1 + while end < len(prefix): + ch = prefix[end] + if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?!:@&=+$,_.~*\'()[]': + end += 1 + else: + if start < end: + chunks.append(prefix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append(u'%%%02X' % ord(ch)) + if start < end: + chunks.append(prefix[start:end]) + return u''.join(chunks) + + def prepare_tag(self, tag): + if not tag: + raise EmitterError("tag must not be empty") + if tag == u'!': + return tag + handle = None + suffix = tag + prefixes = self.tag_prefixes.keys() + prefixes.sort() + for prefix in prefixes: + if tag.startswith(prefix) \ + and (prefix == u'!' or len(prefix) < len(tag)): + handle = self.tag_prefixes[prefix] + suffix = tag[len(prefix):] + chunks = [] + start = end = 0 + while end < len(suffix): + ch = suffix[end] + if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?:@&=+$,_.~*\'()[]' \ + or (ch == u'!' and handle != u'!'): + end += 1 + else: + if start < end: + chunks.append(suffix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append(u'%%%02X' % ord(ch)) + if start < end: + chunks.append(suffix[start:end]) + suffix_text = u''.join(chunks) + if handle: + return u'%s%s' % (handle, suffix_text) + else: + return u'!<%s>' % suffix_text + + def prepare_anchor(self, anchor): + if not anchor: + raise EmitterError("anchor must not be empty") + for ch in anchor: + if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_'): + raise EmitterError("invalid character %r in the anchor: %r" + % (ch.encode('utf-8'), anchor.encode('utf-8'))) + return anchor + + def analyze_scalar(self, scalar): + + # Empty scalar is a special case. + if not scalar: + return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, + allow_flow_plain=False, allow_block_plain=True, + allow_single_quoted=True, allow_double_quoted=True, + allow_block=False) + + # Indicators and special characters. + block_indicators = False + flow_indicators = False + line_breaks = False + special_characters = False + + # Important whitespace combinations. + leading_space = False + leading_break = False + trailing_space = False + trailing_break = False + break_space = False + space_break = False + + # Check document indicators. + if scalar.startswith(u'---') or scalar.startswith(u'...'): + block_indicators = True + flow_indicators = True + + # First character or preceded by a whitespace. + preceeded_by_whitespace = True + + # Last character or followed by a whitespace. + followed_by_whitespace = (len(scalar) == 1 or + scalar[1] in u'\0 \t\r\n\x85\u2028\u2029') + + # The previous character is a space. + previous_space = False + + # The previous character is a break. + previous_break = False + + index = 0 + while index < len(scalar): + ch = scalar[index] + + # Check for indicators. + if index == 0: + # Leading indicators are special characters. + if ch in u'#,[]{}&*!|>\'\"%@`': + flow_indicators = True + block_indicators = True + if ch in u'?:': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == u'-' and followed_by_whitespace: + flow_indicators = True + block_indicators = True + else: + # Some indicators cannot appear within a scalar as well. + if ch in u',?[]{}': + flow_indicators = True + if ch == u':': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == u'#' and preceeded_by_whitespace: + flow_indicators = True + block_indicators = True + + # Check for line breaks, special, and unicode characters. + if ch in u'\n\x85\u2028\u2029': + line_breaks = True + if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): + if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' + or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF': + unicode_characters = True + if not self.allow_unicode: + special_characters = True + else: + special_characters = True + + # Detect important whitespace combinations. + if ch == u' ': + if index == 0: + leading_space = True + if index == len(scalar)-1: + trailing_space = True + if previous_break: + break_space = True + previous_space = True + previous_break = False + elif ch in u'\n\x85\u2028\u2029': + if index == 0: + leading_break = True + if index == len(scalar)-1: + trailing_break = True + if previous_space: + space_break = True + previous_space = False + previous_break = True + else: + previous_space = False + previous_break = False + + # Prepare for the next character. + index += 1 + preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029') + followed_by_whitespace = (index+1 >= len(scalar) or + scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029') + + # Let's decide what styles are allowed. + allow_flow_plain = True + allow_block_plain = True + allow_single_quoted = True + allow_double_quoted = True + allow_block = True + + # Leading and trailing whitespaces are bad for plain scalars. + if (leading_space or leading_break + or trailing_space or trailing_break): + allow_flow_plain = allow_block_plain = False + + # We do not permit trailing spaces for block scalars. + if trailing_space: + allow_block = False + + # Spaces at the beginning of a new line are only acceptable for block + # scalars. + if break_space: + allow_flow_plain = allow_block_plain = allow_single_quoted = False + + # Spaces followed by breaks, as well as special character are only + # allowed for double quoted scalars. + if space_break or special_characters: + allow_flow_plain = allow_block_plain = \ + allow_single_quoted = allow_block = False + + # Although the plain scalar writer supports breaks, we never emit + # multiline plain scalars. + if line_breaks: + allow_flow_plain = allow_block_plain = False + + # Flow indicators are forbidden for flow plain scalars. + if flow_indicators: + allow_flow_plain = False + + # Block indicators are forbidden for block plain scalars. + if block_indicators: + allow_block_plain = False + + return ScalarAnalysis(scalar=scalar, + empty=False, multiline=line_breaks, + allow_flow_plain=allow_flow_plain, + allow_block_plain=allow_block_plain, + allow_single_quoted=allow_single_quoted, + allow_double_quoted=allow_double_quoted, + allow_block=allow_block) + + # Writers. + + def flush_stream(self): + if hasattr(self.stream, 'flush'): + self.stream.flush() + + def write_stream_start(self): + # Write BOM if needed. + if self.encoding and self.encoding.startswith('utf-16'): + self.stream.write(u'\uFEFF'.encode(self.encoding)) + + def write_stream_end(self): + self.flush_stream() + + def write_indicator(self, indicator, need_whitespace, + whitespace=False, indention=False): + if self.whitespace or not need_whitespace: + data = indicator + else: + data = u' '+indicator + self.whitespace = whitespace + self.indention = self.indention and indention + self.column += len(data) + self.open_ended = False + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_indent(self): + indent = self.indent or 0 + if not self.indention or self.column > indent \ + or (self.column == indent and not self.whitespace): + self.write_line_break() + if self.column < indent: + self.whitespace = True + data = u' '*(indent-self.column) + self.column = indent + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_line_break(self, data=None): + if data is None: + data = self.best_line_break + self.whitespace = True + self.indention = True + self.line += 1 + self.column = 0 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_version_directive(self, version_text): + data = u'%%YAML %s' % version_text + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + def write_tag_directive(self, handle_text, prefix_text): + data = u'%%TAG %s %s' % (handle_text, prefix_text) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + # Scalar streams. + + def write_single_quoted(self, text, split=True): + self.write_indicator(u'\'', True) + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch is None or ch != u' ': + if start+1 == end and self.column > self.best_width and split \ + and start != 0 and end != len(text): + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + if text[start] == u'\n': + self.write_line_break() + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'': + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch == u'\'': + data = u'\'\'' + self.column += 2 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + 1 + if ch is not None: + spaces = (ch == u' ') + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + self.write_indicator(u'\'', False) + + ESCAPE_REPLACEMENTS = { + u'\0': u'0', + u'\x07': u'a', + u'\x08': u'b', + u'\x09': u't', + u'\x0A': u'n', + u'\x0B': u'v', + u'\x0C': u'f', + u'\x0D': u'r', + u'\x1B': u'e', + u'\"': u'\"', + u'\\': u'\\', + u'\x85': u'N', + u'\xA0': u'_', + u'\u2028': u'L', + u'\u2029': u'P', + } + + def write_double_quoted(self, text, split=True): + self.write_indicator(u'"', True) + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \ + or not (u'\x20' <= ch <= u'\x7E' + or (self.allow_unicode + and (u'\xA0' <= ch <= u'\uD7FF' + or u'\uE000' <= ch <= u'\uFFFD'))): + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + if ch in self.ESCAPE_REPLACEMENTS: + data = u'\\'+self.ESCAPE_REPLACEMENTS[ch] + elif ch <= u'\xFF': + data = u'\\x%02X' % ord(ch) + elif ch <= u'\uFFFF': + data = u'\\u%04X' % ord(ch) + else: + data = u'\\U%08X' % ord(ch) + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end+1 + if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \ + and self.column+(end-start) > self.best_width and split: + data = text[start:end]+u'\\' + if start < end: + start = end + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_indent() + self.whitespace = False + self.indention = False + if text[start] == u' ': + data = u'\\' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + end += 1 + self.write_indicator(u'"', False) + + def determine_block_hints(self, text): + hints = u'' + if text: + if text[0] in u' \n\x85\u2028\u2029': + hints += unicode(self.best_indent) + if text[-1] not in u'\n\x85\u2028\u2029': + hints += u'-' + elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029': + hints += u'+' + return hints + + def write_folded(self, text): + hints = self.determine_block_hints(text) + self.write_indicator(u'>'+hints, True) + if hints[-1:] == u'+': + self.open_ended = True + self.write_line_break() + leading_space = True + spaces = False + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + if not leading_space and ch is not None and ch != u' ' \ + and text[start] == u'\n': + self.write_line_break() + leading_space = (ch == u' ') + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + elif spaces: + if ch != u' ': + if start+1 == end and self.column > self.best_width: + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in u'\n\x85\u2028\u2029') + spaces = (ch == u' ') + end += 1 + + def write_literal(self, text): + hints = self.determine_block_hints(text) + self.write_indicator(u'|'+hints, True) + if hints[-1:] == u'+': + self.open_ended = True + self.write_line_break() + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + else: + if ch is None or ch in u'\n\x85\u2028\u2029': + data = text[start:end] + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + + def write_plain(self, text, split=True): + if self.root_context: + self.open_ended = True + if not text: + return + if not self.whitespace: + data = u' ' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.whitespace = False + self.indention = False + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch != u' ': + if start+1 == end and self.column > self.best_width and split: + self.write_indent() + self.whitespace = False + self.indention = False + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch not in u'\n\x85\u2028\u2029': + if text[start] == u'\n': + self.write_line_break() + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + self.whitespace = False + self.indention = False + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + spaces = (ch == u' ') + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + diff --git a/python.d/python_modules/pyyaml2/error.py b/python.d/python_modules/pyyaml2/error.py new file mode 100644 index 000000000..577686db5 --- /dev/null +++ b/python.d/python_modules/pyyaml2/error.py @@ -0,0 +1,75 @@ + +__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] + +class Mark(object): + + def __init__(self, name, index, line, column, buffer, pointer): + self.name = name + self.index = index + self.line = line + self.column = column + self.buffer = buffer + self.pointer = pointer + + def get_snippet(self, indent=4, max_length=75): + if self.buffer is None: + return None + head = '' + start = self.pointer + while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029': + start -= 1 + if self.pointer-start > max_length/2-1: + head = ' ... ' + start += 5 + break + tail = '' + end = self.pointer + while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029': + end += 1 + if end-self.pointer > max_length/2-1: + tail = ' ... ' + end -= 5 + break + snippet = self.buffer[start:end].encode('utf-8') + return ' '*indent + head + snippet + tail + '\n' \ + + ' '*(indent+self.pointer-start+len(head)) + '^' + + def __str__(self): + snippet = self.get_snippet() + where = " in \"%s\", line %d, column %d" \ + % (self.name, self.line+1, self.column+1) + if snippet is not None: + where += ":\n"+snippet + return where + +class YAMLError(Exception): + pass + +class MarkedYAMLError(YAMLError): + + def __init__(self, context=None, context_mark=None, + problem=None, problem_mark=None, note=None): + self.context = context + self.context_mark = context_mark + self.problem = problem + self.problem_mark = problem_mark + self.note = note + + def __str__(self): + lines = [] + if self.context is not None: + lines.append(self.context) + if self.context_mark is not None \ + and (self.problem is None or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column): + lines.append(str(self.context_mark)) + if self.problem is not None: + lines.append(self.problem) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) + if self.note is not None: + lines.append(self.note) + return '\n'.join(lines) + diff --git a/python.d/python_modules/pyyaml2/events.py b/python.d/python_modules/pyyaml2/events.py new file mode 100644 index 000000000..f79ad389c --- /dev/null +++ b/python.d/python_modules/pyyaml2/events.py @@ -0,0 +1,86 @@ + +# Abstract classes. + +class Event(object): + def __init__(self, start_mark=None, end_mark=None): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in ['anchor', 'tag', 'implicit', 'value'] + if hasattr(self, key)] + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +class NodeEvent(Event): + def __init__(self, anchor, start_mark=None, end_mark=None): + self.anchor = anchor + self.start_mark = start_mark + self.end_mark = end_mark + +class CollectionStartEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, + flow_style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class CollectionEndEvent(Event): + pass + +# Implementations. + +class StreamStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndEvent(Event): + pass + +class DocumentStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None, version=None, tags=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + self.version = version + self.tags = tags + +class DocumentEndEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + +class AliasEvent(NodeEvent): + pass + +class ScalarEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, value, + start_mark=None, end_mark=None, style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class SequenceStartEvent(CollectionStartEvent): + pass + +class SequenceEndEvent(CollectionEndEvent): + pass + +class MappingStartEvent(CollectionStartEvent): + pass + +class MappingEndEvent(CollectionEndEvent): + pass + diff --git a/python.d/python_modules/pyyaml2/loader.py b/python.d/python_modules/pyyaml2/loader.py new file mode 100644 index 000000000..293ff467b --- /dev/null +++ b/python.d/python_modules/pyyaml2/loader.py @@ -0,0 +1,40 @@ + +__all__ = ['BaseLoader', 'SafeLoader', 'Loader'] + +from reader import * +from scanner import * +from parser import * +from composer import * +from constructor import * +from resolver import * + +class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) + diff --git a/python.d/python_modules/pyyaml2/nodes.py b/python.d/python_modules/pyyaml2/nodes.py new file mode 100644 index 000000000..c4f070c41 --- /dev/null +++ b/python.d/python_modules/pyyaml2/nodes.py @@ -0,0 +1,49 @@ + +class Node(object): + def __init__(self, tag, value, start_mark, end_mark): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + value = self.value + #if isinstance(value, list): + # if len(value) == 0: + # value = '<empty>' + # elif len(value) == 1: + # value = '<1 item>' + # else: + # value = '<%d items>' % len(value) + #else: + # if len(value) > 75: + # value = repr(value[:70]+u' ... ') + # else: + # value = repr(value) + value = repr(value) + return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) + +class ScalarNode(Node): + id = 'scalar' + def __init__(self, tag, value, + start_mark=None, end_mark=None, style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class CollectionNode(Node): + def __init__(self, tag, value, + start_mark=None, end_mark=None, flow_style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class SequenceNode(CollectionNode): + id = 'sequence' + +class MappingNode(CollectionNode): + id = 'mapping' + diff --git a/python.d/python_modules/pyyaml2/parser.py b/python.d/python_modules/pyyaml2/parser.py new file mode 100644 index 000000000..f9e3057f3 --- /dev/null +++ b/python.d/python_modules/pyyaml2/parser.py @@ -0,0 +1,589 @@ + +# The following YAML grammar is LL(1) and is parsed by a recursive descent +# parser. +# +# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +# implicit_document ::= block_node DOCUMENT-END* +# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +# block_node_or_indentless_sequence ::= +# ALIAS +# | properties (block_content | indentless_block_sequence)? +# | block_content +# | indentless_block_sequence +# block_node ::= ALIAS +# | properties block_content? +# | block_content +# flow_node ::= ALIAS +# | properties flow_content? +# | flow_content +# properties ::= TAG ANCHOR? | ANCHOR TAG? +# block_content ::= block_collection | flow_collection | SCALAR +# flow_content ::= flow_collection | SCALAR +# block_collection ::= block_sequence | block_mapping +# flow_collection ::= flow_sequence | flow_mapping +# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +# block_mapping ::= BLOCK-MAPPING_START +# ((KEY block_node_or_indentless_sequence?)? +# (VALUE block_node_or_indentless_sequence?)?)* +# BLOCK-END +# flow_sequence ::= FLOW-SEQUENCE-START +# (flow_sequence_entry FLOW-ENTRY)* +# flow_sequence_entry? +# FLOW-SEQUENCE-END +# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# flow_mapping ::= FLOW-MAPPING-START +# (flow_mapping_entry FLOW-ENTRY)* +# flow_mapping_entry? +# FLOW-MAPPING-END +# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# +# FIRST sets: +# +# stream: { STREAM-START } +# explicit_document: { DIRECTIVE DOCUMENT-START } +# implicit_document: FIRST(block_node) +# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_sequence: { BLOCK-SEQUENCE-START } +# block_mapping: { BLOCK-MAPPING-START } +# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } +# indentless_sequence: { ENTRY } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_sequence: { FLOW-SEQUENCE-START } +# flow_mapping: { FLOW-MAPPING-START } +# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } +# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } + +__all__ = ['Parser', 'ParserError'] + +from error import MarkedYAMLError +from tokens import * +from events import * +from scanner import * + +class ParserError(MarkedYAMLError): + pass + +class Parser(object): + # Since writing a recursive-descendant parser is a straightforward task, we + # do not give many comments here. + + DEFAULT_TAGS = { + u'!': u'!', + u'!!': u'tag:yaml.org,2002:', + } + + def __init__(self): + self.current_event = None + self.yaml_version = None + self.tag_handles = {} + self.states = [] + self.marks = [] + self.state = self.parse_stream_start + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def check_event(self, *choices): + # Check the type of the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + if self.current_event is not None: + if not choices: + return True + for choice in choices: + if isinstance(self.current_event, choice): + return True + return False + + def peek_event(self): + # Get the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + return self.current_event + + def get_event(self): + # Get the next event and proceed further. + if self.current_event is None: + if self.state: + self.current_event = self.state() + value = self.current_event + self.current_event = None + return value + + # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + # implicit_document ::= block_node DOCUMENT-END* + # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + + def parse_stream_start(self): + + # Parse the stream start. + token = self.get_token() + event = StreamStartEvent(token.start_mark, token.end_mark, + encoding=token.encoding) + + # Prepare the next state. + self.state = self.parse_implicit_document_start + + return event + + def parse_implicit_document_start(self): + + # Parse an implicit document. + if not self.check_token(DirectiveToken, DocumentStartToken, + StreamEndToken): + self.tag_handles = self.DEFAULT_TAGS + token = self.peek_token() + start_mark = end_mark = token.start_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=False) + + # Prepare the next state. + self.states.append(self.parse_document_end) + self.state = self.parse_block_node + + return event + + else: + return self.parse_document_start() + + def parse_document_start(self): + + # Parse any extra document end indicators. + while self.check_token(DocumentEndToken): + self.get_token() + + # Parse an explicit document. + if not self.check_token(StreamEndToken): + token = self.peek_token() + start_mark = token.start_mark + version, tags = self.process_directives() + if not self.check_token(DocumentStartToken): + raise ParserError(None, None, + "expected '<document start>', but found %r" + % self.peek_token().id, + self.peek_token().start_mark) + token = self.get_token() + end_mark = token.end_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=True, version=version, tags=tags) + self.states.append(self.parse_document_end) + self.state = self.parse_document_content + else: + # Parse the end of the stream. + token = self.get_token() + event = StreamEndEvent(token.start_mark, token.end_mark) + assert not self.states + assert not self.marks + self.state = None + return event + + def parse_document_end(self): + + # Parse the document end. + token = self.peek_token() + start_mark = end_mark = token.start_mark + explicit = False + if self.check_token(DocumentEndToken): + token = self.get_token() + end_mark = token.end_mark + explicit = True + event = DocumentEndEvent(start_mark, end_mark, + explicit=explicit) + + # Prepare the next state. + self.state = self.parse_document_start + + return event + + def parse_document_content(self): + if self.check_token(DirectiveToken, + DocumentStartToken, DocumentEndToken, StreamEndToken): + event = self.process_empty_scalar(self.peek_token().start_mark) + self.state = self.states.pop() + return event + else: + return self.parse_block_node() + + def process_directives(self): + self.yaml_version = None + self.tag_handles = {} + while self.check_token(DirectiveToken): + token = self.get_token() + if token.name == u'YAML': + if self.yaml_version is not None: + raise ParserError(None, None, + "found duplicate YAML directive", token.start_mark) + major, minor = token.value + if major != 1: + raise ParserError(None, None, + "found incompatible YAML document (version 1.* is required)", + token.start_mark) + self.yaml_version = token.value + elif token.name == u'TAG': + handle, prefix = token.value + if handle in self.tag_handles: + raise ParserError(None, None, + "duplicate tag handle %r" % handle.encode('utf-8'), + token.start_mark) + self.tag_handles[handle] = prefix + if self.tag_handles: + value = self.yaml_version, self.tag_handles.copy() + else: + value = self.yaml_version, None + for key in self.DEFAULT_TAGS: + if key not in self.tag_handles: + self.tag_handles[key] = self.DEFAULT_TAGS[key] + return value + + # block_node_or_indentless_sequence ::= ALIAS + # | properties (block_content | indentless_block_sequence)? + # | block_content + # | indentless_block_sequence + # block_node ::= ALIAS + # | properties block_content? + # | block_content + # flow_node ::= ALIAS + # | properties flow_content? + # | flow_content + # properties ::= TAG ANCHOR? | ANCHOR TAG? + # block_content ::= block_collection | flow_collection | SCALAR + # flow_content ::= flow_collection | SCALAR + # block_collection ::= block_sequence | block_mapping + # flow_collection ::= flow_sequence | flow_mapping + + def parse_block_node(self): + return self.parse_node(block=True) + + def parse_flow_node(self): + return self.parse_node() + + def parse_block_node_or_indentless_sequence(self): + return self.parse_node(block=True, indentless_sequence=True) + + def parse_node(self, block=False, indentless_sequence=False): + if self.check_token(AliasToken): + token = self.get_token() + event = AliasEvent(token.value, token.start_mark, token.end_mark) + self.state = self.states.pop() + else: + anchor = None + tag = None + start_mark = end_mark = tag_mark = None + if self.check_token(AnchorToken): + token = self.get_token() + start_mark = token.start_mark + end_mark = token.end_mark + anchor = token.value + if self.check_token(TagToken): + token = self.get_token() + tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + elif self.check_token(TagToken): + token = self.get_token() + start_mark = tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + if self.check_token(AnchorToken): + token = self.get_token() + end_mark = token.end_mark + anchor = token.value + if tag is not None: + handle, suffix = tag + if handle is not None: + if handle not in self.tag_handles: + raise ParserError("while parsing a node", start_mark, + "found undefined tag handle %r" % handle.encode('utf-8'), + tag_mark) + tag = self.tag_handles[handle]+suffix + else: + tag = suffix + #if tag == u'!': + # raise ParserError("while parsing a node", start_mark, + # "found non-specific tag '!'", tag_mark, + # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") + if start_mark is None: + start_mark = end_mark = self.peek_token().start_mark + event = None + implicit = (tag is None or tag == u'!') + if indentless_sequence and self.check_token(BlockEntryToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark) + self.state = self.parse_indentless_sequence_entry + else: + if self.check_token(ScalarToken): + token = self.get_token() + end_mark = token.end_mark + if (token.plain and tag is None) or tag == u'!': + implicit = (True, False) + elif tag is None: + implicit = (False, True) + else: + implicit = (False, False) + event = ScalarEvent(anchor, tag, implicit, token.value, + start_mark, end_mark, style=token.style) + self.state = self.states.pop() + elif self.check_token(FlowSequenceStartToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_sequence_first_entry + elif self.check_token(FlowMappingStartToken): + end_mark = self.peek_token().end_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_mapping_first_key + elif block and self.check_token(BlockSequenceStartToken): + end_mark = self.peek_token().start_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_sequence_first_entry + elif block and self.check_token(BlockMappingStartToken): + end_mark = self.peek_token().start_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_mapping_first_key + elif anchor is not None or tag is not None: + # Empty scalars are allowed even if a tag or an anchor is + # specified. + event = ScalarEvent(anchor, tag, (implicit, False), u'', + start_mark, end_mark) + self.state = self.states.pop() + else: + if block: + node = 'block' + else: + node = 'flow' + token = self.peek_token() + raise ParserError("while parsing a %s node" % node, start_mark, + "expected the node content, but found %r" % token.id, + token.start_mark) + return event + + # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + + def parse_block_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_sequence_entry() + + def parse_block_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, BlockEndToken): + self.states.append(self.parse_block_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_block_sequence_entry + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block collection", self.marks[-1], + "expected <block end>, but found %r" % token.id, token.start_mark) + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + + def parse_indentless_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, + KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_indentless_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_indentless_sequence_entry + return self.process_empty_scalar(token.end_mark) + token = self.peek_token() + event = SequenceEndEvent(token.start_mark, token.start_mark) + self.state = self.states.pop() + return event + + # block_mapping ::= BLOCK-MAPPING_START + # ((KEY block_node_or_indentless_sequence?)? + # (VALUE block_node_or_indentless_sequence?)?)* + # BLOCK-END + + def parse_block_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_mapping_key() + + def parse_block_mapping_key(self): + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_value) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_value + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block mapping", self.marks[-1], + "expected <block end>, but found %r" % token.id, token.start_mark) + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_block_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_key) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_block_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + # flow_sequence ::= FLOW-SEQUENCE-START + # (flow_sequence_entry FLOW-ENTRY)* + # flow_sequence_entry? + # FLOW-SEQUENCE-END + # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + # + # Note that while production rules for both flow_sequence_entry and + # flow_mapping_entry are equal, their interpretations are different. + # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` + # generate an inline mapping (set syntax). + + def parse_flow_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_sequence_entry(first=True) + + def parse_flow_sequence_entry(self, first=False): + if not self.check_token(FlowSequenceEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow sequence", self.marks[-1], + "expected ',' or ']', but got %r" % token.id, token.start_mark) + + if self.check_token(KeyToken): + token = self.peek_token() + event = MappingStartEvent(None, None, True, + token.start_mark, token.end_mark, + flow_style=True) + self.state = self.parse_flow_sequence_entry_mapping_key + return event + elif not self.check_token(FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry) + return self.parse_flow_node() + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_sequence_entry_mapping_key(self): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_value + return self.process_empty_scalar(token.end_mark) + + def parse_flow_sequence_entry_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_end) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_end + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_sequence_entry_mapping_end + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_sequence_entry_mapping_end(self): + self.state = self.parse_flow_sequence_entry + token = self.peek_token() + return MappingEndEvent(token.start_mark, token.start_mark) + + # flow_mapping ::= FLOW-MAPPING-START + # (flow_mapping_entry FLOW-ENTRY)* + # flow_mapping_entry? + # FLOW-MAPPING-END + # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + + def parse_flow_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_mapping_key(first=True) + + def parse_flow_mapping_key(self, first=False): + if not self.check_token(FlowMappingEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow mapping", self.marks[-1], + "expected ',' or '}', but got %r" % token.id, token.start_mark) + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_value + return self.process_empty_scalar(token.end_mark) + elif not self.check_token(FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_empty_value) + return self.parse_flow_node() + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_key) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_mapping_empty_value(self): + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(self.peek_token().start_mark) + + def process_empty_scalar(self, mark): + return ScalarEvent(None, None, (True, False), u'', mark, mark) + diff --git a/python.d/python_modules/pyyaml2/reader.py b/python.d/python_modules/pyyaml2/reader.py new file mode 100644 index 000000000..3249e6b9f --- /dev/null +++ b/python.d/python_modules/pyyaml2/reader.py @@ -0,0 +1,190 @@ +# This module contains abstractions for the input stream. You don't have to +# looks further, there are no pretty code. +# +# We define two classes here. +# +# Mark(source, line, column) +# It's just a record and its only use is producing nice error messages. +# Parser does not use it for any other purposes. +# +# Reader(source, data) +# Reader determines the encoding of `data` and converts it to unicode. +# Reader provides the following methods and attributes: +# reader.peek(length=1) - return the next `length` characters +# reader.forward(length=1) - move the current position to `length` characters. +# reader.index - the number of the current character. +# reader.line, stream.column - the line and the column of the current character. + +__all__ = ['Reader', 'ReaderError'] + +from error import YAMLError, Mark + +import codecs, re + +class ReaderError(YAMLError): + + def __init__(self, name, position, character, encoding, reason): + self.name = name + self.character = character + self.position = position + self.encoding = encoding + self.reason = reason + + def __str__(self): + if isinstance(self.character, str): + return "'%s' codec can't decode byte #x%02x: %s\n" \ + " in \"%s\", position %d" \ + % (self.encoding, ord(self.character), self.reason, + self.name, self.position) + else: + return "unacceptable character #x%04x: %s\n" \ + " in \"%s\", position %d" \ + % (self.character, self.reason, + self.name, self.position) + +class Reader(object): + # Reader: + # - determines the data encoding and converts it to unicode, + # - checks if characters are in allowed range, + # - adds '\0' to the end. + + # Reader accepts + # - a `str` object, + # - a `unicode` object, + # - a file-like object with its `read` method returning `str`, + # - a file-like object with its `read` method returning `unicode`. + + # Yeah, it's ugly and slow. + + def __init__(self, stream): + self.name = None + self.stream = None + self.stream_pointer = 0 + self.eof = True + self.buffer = u'' + self.pointer = 0 + self.raw_buffer = None + self.raw_decode = None + self.encoding = None + self.index = 0 + self.line = 0 + self.column = 0 + if isinstance(stream, unicode): + self.name = "<unicode string>" + self.check_printable(stream) + self.buffer = stream+u'\0' + elif isinstance(stream, str): + self.name = "<string>" + self.raw_buffer = stream + self.determine_encoding() + else: + self.stream = stream + self.name = getattr(stream, 'name', "<file>") + self.eof = False + self.raw_buffer = '' + self.determine_encoding() + + def peek(self, index=0): + try: + return self.buffer[self.pointer+index] + except IndexError: + self.update(index+1) + return self.buffer[self.pointer+index] + + def prefix(self, length=1): + if self.pointer+length >= len(self.buffer): + self.update(length) + return self.buffer[self.pointer:self.pointer+length] + + def forward(self, length=1): + if self.pointer+length+1 >= len(self.buffer): + self.update(length+1) + while length: + ch = self.buffer[self.pointer] + self.pointer += 1 + self.index += 1 + if ch in u'\n\x85\u2028\u2029' \ + or (ch == u'\r' and self.buffer[self.pointer] != u'\n'): + self.line += 1 + self.column = 0 + elif ch != u'\uFEFF': + self.column += 1 + length -= 1 + + def get_mark(self): + if self.stream is None: + return Mark(self.name, self.index, self.line, self.column, + self.buffer, self.pointer) + else: + return Mark(self.name, self.index, self.line, self.column, + None, None) + + def determine_encoding(self): + while not self.eof and len(self.raw_buffer) < 2: + self.update_raw() + if not isinstance(self.raw_buffer, unicode): + if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): + self.raw_decode = codecs.utf_16_le_decode + self.encoding = 'utf-16-le' + elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): + self.raw_decode = codecs.utf_16_be_decode + self.encoding = 'utf-16-be' + else: + self.raw_decode = codecs.utf_8_decode + self.encoding = 'utf-8' + self.update(1) + + NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') + def check_printable(self, data): + match = self.NON_PRINTABLE.search(data) + if match: + character = match.group() + position = self.index+(len(self.buffer)-self.pointer)+match.start() + raise ReaderError(self.name, position, ord(character), + 'unicode', "special characters are not allowed") + + def update(self, length): + if self.raw_buffer is None: + return + self.buffer = self.buffer[self.pointer:] + self.pointer = 0 + while len(self.buffer) < length: + if not self.eof: + self.update_raw() + if self.raw_decode is not None: + try: + data, converted = self.raw_decode(self.raw_buffer, + 'strict', self.eof) + except UnicodeDecodeError, exc: + character = exc.object[exc.start] + if self.stream is not None: + position = self.stream_pointer-len(self.raw_buffer)+exc.start + else: + position = exc.start + raise ReaderError(self.name, position, character, + exc.encoding, exc.reason) + else: + data = self.raw_buffer + converted = len(data) + self.check_printable(data) + self.buffer += data + self.raw_buffer = self.raw_buffer[converted:] + if self.eof: + self.buffer += u'\0' + self.raw_buffer = None + break + + def update_raw(self, size=1024): + data = self.stream.read(size) + if data: + self.raw_buffer += data + self.stream_pointer += len(data) + else: + self.eof = True + +#try: +# import psyco +# psyco.bind(Reader) +#except ImportError: +# pass + diff --git a/python.d/python_modules/pyyaml2/representer.py b/python.d/python_modules/pyyaml2/representer.py new file mode 100644 index 000000000..5f4fc70db --- /dev/null +++ b/python.d/python_modules/pyyaml2/representer.py @@ -0,0 +1,484 @@ + +__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', + 'RepresenterError'] + +from error import * +from nodes import * + +import datetime + +import sys, copy_reg, types + +class RepresenterError(YAMLError): + pass + +class BaseRepresenter(object): + + yaml_representers = {} + yaml_multi_representers = {} + + def __init__(self, default_style=None, default_flow_style=None): + self.default_style = default_style + self.default_flow_style = default_flow_style + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent(self, data): + node = self.represent_data(data) + self.serialize(node) + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def get_classobj_bases(self, cls): + bases = [cls] + for base in cls.__bases__: + bases.extend(self.get_classobj_bases(base)) + return bases + + def represent_data(self, data): + if self.ignore_aliases(data): + self.alias_key = None + else: + self.alias_key = id(data) + if self.alias_key is not None: + if self.alias_key in self.represented_objects: + node = self.represented_objects[self.alias_key] + #if node is None: + # raise RepresenterError("recursive objects are not allowed: %r" % data) + return node + #self.represented_objects[alias_key] = None + self.object_keeper.append(data) + data_types = type(data).__mro__ + if type(data) is types.InstanceType: + data_types = self.get_classobj_bases(data.__class__)+list(data_types) + if data_types[0] in self.yaml_representers: + node = self.yaml_representers[data_types[0]](self, data) + else: + for data_type in data_types: + if data_type in self.yaml_multi_representers: + node = self.yaml_multi_representers[data_type](self, data) + break + else: + if None in self.yaml_multi_representers: + node = self.yaml_multi_representers[None](self, data) + elif None in self.yaml_representers: + node = self.yaml_representers[None](self, data) + else: + node = ScalarNode(None, unicode(data)) + #if alias_key is not None: + # self.represented_objects[alias_key] = node + return node + + def add_representer(cls, data_type, representer): + if not 'yaml_representers' in cls.__dict__: + cls.yaml_representers = cls.yaml_representers.copy() + cls.yaml_representers[data_type] = representer + add_representer = classmethod(add_representer) + + def add_multi_representer(cls, data_type, representer): + if not 'yaml_multi_representers' in cls.__dict__: + cls.yaml_multi_representers = cls.yaml_multi_representers.copy() + cls.yaml_multi_representers[data_type] = representer + add_multi_representer = classmethod(add_multi_representer) + + def represent_scalar(self, tag, value, style=None): + if style is None: + style = self.default_style + node = ScalarNode(tag, value, style=style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + return node + + def represent_sequence(self, tag, sequence, flow_style=None): + value = [] + node = SequenceNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + for item in sequence: + node_item = self.represent_data(item) + if not (isinstance(node_item, ScalarNode) and not node_item.style): + best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_mapping(self, tag, mapping, flow_style=None): + value = [] + node = MappingNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + if hasattr(mapping, 'items'): + mapping = mapping.items() + mapping.sort() + for item_key, item_value in mapping: + node_key = self.represent_data(item_key) + node_value = self.represent_data(item_value) + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not node_value.style): + best_style = False + value.append((node_key, node_value)) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def ignore_aliases(self, data): + return False + +class SafeRepresenter(BaseRepresenter): + + def ignore_aliases(self, data): + if data in [None, ()]: + return True + if isinstance(data, (str, unicode, bool, int, float)): + return True + + def represent_none(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:null', + u'null') + + def represent_str(self, data): + tag = None + style = None + try: + data = unicode(data, 'ascii') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + try: + data = unicode(data, 'utf-8') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + data = data.encode('base64') + tag = u'tag:yaml.org,2002:binary' + style = '|' + return self.represent_scalar(tag, data, style=style) + + def represent_unicode(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:str', data) + + def represent_bool(self, data): + if data: + value = u'true' + else: + value = u'false' + return self.represent_scalar(u'tag:yaml.org,2002:bool', value) + + def represent_int(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) + + def represent_long(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) + + inf_value = 1e300 + while repr(inf_value) != repr(inf_value*inf_value): + inf_value *= inf_value + + def represent_float(self, data): + if data != data or (data == 0.0 and data == 1.0): + value = u'.nan' + elif data == self.inf_value: + value = u'.inf' + elif data == -self.inf_value: + value = u'-.inf' + else: + value = unicode(repr(data)).lower() + # Note that in some cases `repr(data)` represents a float number + # without the decimal parts. For instance: + # >>> repr(1e17) + # '1e17' + # Unfortunately, this is not a valid float representation according + # to the definition of the `!!float` tag. We fix this by adding + # '.0' before the 'e' symbol. + if u'.' not in value and u'e' in value: + value = value.replace(u'e', u'.0e', 1) + return self.represent_scalar(u'tag:yaml.org,2002:float', value) + + def represent_list(self, data): + #pairs = (len(data) > 0 and isinstance(data, list)) + #if pairs: + # for item in data: + # if not isinstance(item, tuple) or len(item) != 2: + # pairs = False + # break + #if not pairs: + return self.represent_sequence(u'tag:yaml.org,2002:seq', data) + #value = [] + #for item_key, item_value in data: + # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', + # [(item_key, item_value)])) + #return SequenceNode(u'tag:yaml.org,2002:pairs', value) + + def represent_dict(self, data): + return self.represent_mapping(u'tag:yaml.org,2002:map', data) + + def represent_set(self, data): + value = {} + for key in data: + value[key] = None + return self.represent_mapping(u'tag:yaml.org,2002:set', value) + + def represent_date(self, data): + value = unicode(data.isoformat()) + return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + + def represent_datetime(self, data): + value = unicode(data.isoformat(' ')) + return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + + def represent_yaml_object(self, tag, data, cls, flow_style=None): + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__.copy() + return self.represent_mapping(tag, state, flow_style=flow_style) + + def represent_undefined(self, data): + raise RepresenterError("cannot represent an object: %s" % data) + +SafeRepresenter.add_representer(type(None), + SafeRepresenter.represent_none) + +SafeRepresenter.add_representer(str, + SafeRepresenter.represent_str) + +SafeRepresenter.add_representer(unicode, + SafeRepresenter.represent_unicode) + +SafeRepresenter.add_representer(bool, + SafeRepresenter.represent_bool) + +SafeRepresenter.add_representer(int, + SafeRepresenter.represent_int) + +SafeRepresenter.add_representer(long, + SafeRepresenter.represent_long) + +SafeRepresenter.add_representer(float, + SafeRepresenter.represent_float) + +SafeRepresenter.add_representer(list, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(tuple, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(dict, + SafeRepresenter.represent_dict) + +SafeRepresenter.add_representer(set, + SafeRepresenter.represent_set) + +SafeRepresenter.add_representer(datetime.date, + SafeRepresenter.represent_date) + +SafeRepresenter.add_representer(datetime.datetime, + SafeRepresenter.represent_datetime) + +SafeRepresenter.add_representer(None, + SafeRepresenter.represent_undefined) + +class Representer(SafeRepresenter): + + def represent_str(self, data): + tag = None + style = None + try: + data = unicode(data, 'ascii') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + try: + data = unicode(data, 'utf-8') + tag = u'tag:yaml.org,2002:python/str' + except UnicodeDecodeError: + data = data.encode('base64') + tag = u'tag:yaml.org,2002:binary' + style = '|' + return self.represent_scalar(tag, data, style=style) + + def represent_unicode(self, data): + tag = None + try: + data.encode('ascii') + tag = u'tag:yaml.org,2002:python/unicode' + except UnicodeEncodeError: + tag = u'tag:yaml.org,2002:str' + return self.represent_scalar(tag, data) + + def represent_long(self, data): + tag = u'tag:yaml.org,2002:int' + if int(data) is not data: + tag = u'tag:yaml.org,2002:python/long' + return self.represent_scalar(tag, unicode(data)) + + def represent_complex(self, data): + if data.imag == 0.0: + data = u'%r' % data.real + elif data.real == 0.0: + data = u'%rj' % data.imag + elif data.imag > 0: + data = u'%r+%rj' % (data.real, data.imag) + else: + data = u'%r%rj' % (data.real, data.imag) + return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data) + + def represent_tuple(self, data): + return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data) + + def represent_name(self, data): + name = u'%s.%s' % (data.__module__, data.__name__) + return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'') + + def represent_module(self, data): + return self.represent_scalar( + u'tag:yaml.org,2002:python/module:'+data.__name__, u'') + + def represent_instance(self, data): + # For instances of classic classes, we use __getinitargs__ and + # __getstate__ to serialize the data. + + # If data.__getinitargs__ exists, the object must be reconstructed by + # calling cls(**args), where args is a tuple returned by + # __getinitargs__. Otherwise, the cls.__init__ method should never be + # called and the class instance is created by instantiating a trivial + # class and assigning to the instance's __class__ variable. + + # If data.__getstate__ exists, it returns the state of the object. + # Otherwise, the state of the object is data.__dict__. + + # We produce either a !!python/object or !!python/object/new node. + # If data.__getinitargs__ does not exist and state is a dictionary, we + # produce a !!python/object node . Otherwise we produce a + # !!python/object/new node. + + cls = data.__class__ + class_name = u'%s.%s' % (cls.__module__, cls.__name__) + args = None + state = None + if hasattr(data, '__getinitargs__'): + args = list(data.__getinitargs__()) + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__ + if args is None and isinstance(state, dict): + return self.represent_mapping( + u'tag:yaml.org,2002:python/object:'+class_name, state) + if isinstance(state, dict) and not state: + return self.represent_sequence( + u'tag:yaml.org,2002:python/object/new:'+class_name, args) + value = {} + if args: + value['args'] = args + value['state'] = state + return self.represent_mapping( + u'tag:yaml.org,2002:python/object/new:'+class_name, value) + + def represent_object(self, data): + # We use __reduce__ API to save the data. data.__reduce__ returns + # a tuple of length 2-5: + # (function, args, state, listitems, dictitems) + + # For reconstructing, we calls function(*args), then set its state, + # listitems, and dictitems if they are not None. + + # A special case is when function.__name__ == '__newobj__'. In this + # case we create the object with args[0].__new__(*args). + + # Another special case is when __reduce__ returns a string - we don't + # support it. + + # We produce a !!python/object, !!python/object/new or + # !!python/object/apply node. + + cls = type(data) + if cls in copy_reg.dispatch_table: + reduce = copy_reg.dispatch_table[cls](data) + elif hasattr(data, '__reduce_ex__'): + reduce = data.__reduce_ex__(2) + elif hasattr(data, '__reduce__'): + reduce = data.__reduce__() + else: + raise RepresenterError("cannot represent object: %r" % data) + reduce = (list(reduce)+[None]*5)[:5] + function, args, state, listitems, dictitems = reduce + args = list(args) + if state is None: + state = {} + if listitems is not None: + listitems = list(listitems) + if dictitems is not None: + dictitems = dict(dictitems) + if function.__name__ == '__newobj__': + function = args[0] + args = args[1:] + tag = u'tag:yaml.org,2002:python/object/new:' + newobj = True + else: + tag = u'tag:yaml.org,2002:python/object/apply:' + newobj = False + function_name = u'%s.%s' % (function.__module__, function.__name__) + if not args and not listitems and not dictitems \ + and isinstance(state, dict) and newobj: + return self.represent_mapping( + u'tag:yaml.org,2002:python/object:'+function_name, state) + if not listitems and not dictitems \ + and isinstance(state, dict) and not state: + return self.represent_sequence(tag+function_name, args) + value = {} + if args: + value['args'] = args + if state or not isinstance(state, dict): + value['state'] = state + if listitems: + value['listitems'] = listitems + if dictitems: + value['dictitems'] = dictitems + return self.represent_mapping(tag+function_name, value) + +Representer.add_representer(str, + Representer.represent_str) + +Representer.add_representer(unicode, + Representer.represent_unicode) + +Representer.add_representer(long, + Representer.represent_long) + +Representer.add_representer(complex, + Representer.represent_complex) + +Representer.add_representer(tuple, + Representer.represent_tuple) + +Representer.add_representer(type, + Representer.represent_name) + +Representer.add_representer(types.ClassType, + Representer.represent_name) + +Representer.add_representer(types.FunctionType, + Representer.represent_name) + +Representer.add_representer(types.BuiltinFunctionType, + Representer.represent_name) + +Representer.add_representer(types.ModuleType, + Representer.represent_module) + +Representer.add_multi_representer(types.InstanceType, + Representer.represent_instance) + +Representer.add_multi_representer(object, + Representer.represent_object) + diff --git a/python.d/python_modules/pyyaml2/resolver.py b/python.d/python_modules/pyyaml2/resolver.py new file mode 100644 index 000000000..6b5ab8759 --- /dev/null +++ b/python.d/python_modules/pyyaml2/resolver.py @@ -0,0 +1,224 @@ + +__all__ = ['BaseResolver', 'Resolver'] + +from error import * +from nodes import * + +import re + +class ResolverError(YAMLError): + pass + +class BaseResolver(object): + + DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map' + + yaml_implicit_resolvers = {} + yaml_path_resolvers = {} + + def __init__(self): + self.resolver_exact_paths = [] + self.resolver_prefix_paths = [] + + def add_implicit_resolver(cls, tag, regexp, first): + if not 'yaml_implicit_resolvers' in cls.__dict__: + cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() + if first is None: + first = [None] + for ch in first: + cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) + add_implicit_resolver = classmethod(add_implicit_resolver) + + def add_path_resolver(cls, tag, path, kind=None): + # Note: `add_path_resolver` is experimental. The API could be changed. + # `new_path` is a pattern that is matched against the path from the + # root to the node that is being considered. `node_path` elements are + # tuples `(node_check, index_check)`. `node_check` is a node class: + # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` + # matches any kind of a node. `index_check` could be `None`, a boolean + # value, a string value, or a number. `None` and `False` match against + # any _value_ of sequence and mapping nodes. `True` matches against + # any _key_ of a mapping node. A string `index_check` matches against + # a mapping value that corresponds to a scalar key which content is + # equal to the `index_check` value. An integer `index_check` matches + # against a sequence value with the index equal to `index_check`. + if not 'yaml_path_resolvers' in cls.__dict__: + cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() + new_path = [] + for element in path: + if isinstance(element, (list, tuple)): + if len(element) == 2: + node_check, index_check = element + elif len(element) == 1: + node_check = element[0] + index_check = True + else: + raise ResolverError("Invalid path element: %s" % element) + else: + node_check = None + index_check = element + if node_check is str: + node_check = ScalarNode + elif node_check is list: + node_check = SequenceNode + elif node_check is dict: + node_check = MappingNode + elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ + and not isinstance(node_check, basestring) \ + and node_check is not None: + raise ResolverError("Invalid node checker: %s" % node_check) + if not isinstance(index_check, (basestring, int)) \ + and index_check is not None: + raise ResolverError("Invalid index checker: %s" % index_check) + new_path.append((node_check, index_check)) + if kind is str: + kind = ScalarNode + elif kind is list: + kind = SequenceNode + elif kind is dict: + kind = MappingNode + elif kind not in [ScalarNode, SequenceNode, MappingNode] \ + and kind is not None: + raise ResolverError("Invalid node kind: %s" % kind) + cls.yaml_path_resolvers[tuple(new_path), kind] = tag + add_path_resolver = classmethod(add_path_resolver) + + def descend_resolver(self, current_node, current_index): + if not self.yaml_path_resolvers: + return + exact_paths = {} + prefix_paths = [] + if current_node: + depth = len(self.resolver_prefix_paths) + for path, kind in self.resolver_prefix_paths[-1]: + if self.check_resolver_prefix(depth, path, kind, + current_node, current_index): + if len(path) > depth: + prefix_paths.append((path, kind)) + else: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + for path, kind in self.yaml_path_resolvers: + if not path: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + prefix_paths.append((path, kind)) + self.resolver_exact_paths.append(exact_paths) + self.resolver_prefix_paths.append(prefix_paths) + + def ascend_resolver(self): + if not self.yaml_path_resolvers: + return + self.resolver_exact_paths.pop() + self.resolver_prefix_paths.pop() + + def check_resolver_prefix(self, depth, path, kind, + current_node, current_index): + node_check, index_check = path[depth-1] + if isinstance(node_check, basestring): + if current_node.tag != node_check: + return + elif node_check is not None: + if not isinstance(current_node, node_check): + return + if index_check is True and current_index is not None: + return + if (index_check is False or index_check is None) \ + and current_index is None: + return + if isinstance(index_check, basestring): + if not (isinstance(current_index, ScalarNode) + and index_check == current_index.value): + return + elif isinstance(index_check, int) and not isinstance(index_check, bool): + if index_check != current_index: + return + return True + + def resolve(self, kind, value, implicit): + if kind is ScalarNode and implicit[0]: + if value == u'': + resolvers = self.yaml_implicit_resolvers.get(u'', []) + else: + resolvers = self.yaml_implicit_resolvers.get(value[0], []) + resolvers += self.yaml_implicit_resolvers.get(None, []) + for tag, regexp in resolvers: + if regexp.match(value): + return tag + implicit = implicit[1] + if self.yaml_path_resolvers: + exact_paths = self.resolver_exact_paths[-1] + if kind in exact_paths: + return exact_paths[kind] + if None in exact_paths: + return exact_paths[None] + if kind is ScalarNode: + return self.DEFAULT_SCALAR_TAG + elif kind is SequenceNode: + return self.DEFAULT_SEQUENCE_TAG + elif kind is MappingNode: + return self.DEFAULT_MAPPING_TAG + +class Resolver(BaseResolver): + pass + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:bool', + re.compile(ur'''^(?:yes|Yes|YES|no|No|NO + |true|True|TRUE|false|False|FALSE + |on|On|ON|off|Off|OFF)$''', re.X), + list(u'yYnNtTfFoO')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:float', + re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? + |\.[0-9_]+(?:[eE][-+][0-9]+)? + |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* + |[-+]?\.(?:inf|Inf|INF) + |\.(?:nan|NaN|NAN))$''', re.X), + list(u'-+0123456789.')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:int', + re.compile(ur'''^(?:[-+]?0b[0-1_]+ + |[-+]?0[0-7_]+ + |[-+]?(?:0|[1-9][0-9_]*) + |[-+]?0x[0-9a-fA-F_]+ + |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), + list(u'-+0123456789')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:merge', + re.compile(ur'^(?:<<)$'), + [u'<']) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:null', + re.compile(ur'''^(?: ~ + |null|Null|NULL + | )$''', re.X), + [u'~', u'n', u'N', u'']) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:timestamp', + re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? + (?:[Tt]|[ \t]+)[0-9][0-9]? + :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? + (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), + list(u'0123456789')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:value', + re.compile(ur'^(?:=)$'), + [u'=']) + +# The following resolver is only for documentation purposes. It cannot work +# because plain scalars cannot start with '!', '&', or '*'. +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:yaml', + re.compile(ur'^(?:!|&|\*)$'), + list(u'!&*')) + diff --git a/python.d/python_modules/pyyaml2/scanner.py b/python.d/python_modules/pyyaml2/scanner.py new file mode 100644 index 000000000..5228fad65 --- /dev/null +++ b/python.d/python_modules/pyyaml2/scanner.py @@ -0,0 +1,1457 @@ + +# Scanner produces tokens of the following types: +# STREAM-START +# STREAM-END +# DIRECTIVE(name, value) +# DOCUMENT-START +# DOCUMENT-END +# BLOCK-SEQUENCE-START +# BLOCK-MAPPING-START +# BLOCK-END +# FLOW-SEQUENCE-START +# FLOW-MAPPING-START +# FLOW-SEQUENCE-END +# FLOW-MAPPING-END +# BLOCK-ENTRY +# FLOW-ENTRY +# KEY +# VALUE +# ALIAS(value) +# ANCHOR(value) +# TAG(value) +# SCALAR(value, plain, style) +# +# Read comments in the Scanner code for more details. +# + +__all__ = ['Scanner', 'ScannerError'] + +from error import MarkedYAMLError +from tokens import * + +class ScannerError(MarkedYAMLError): + pass + +class SimpleKey(object): + # See below simple keys treatment. + + def __init__(self, token_number, required, index, line, column, mark): + self.token_number = token_number + self.required = required + self.index = index + self.line = line + self.column = column + self.mark = mark + +class Scanner(object): + + def __init__(self): + """Initialize the scanner.""" + # It is assumed that Scanner and Reader will have a common descendant. + # Reader do the dirty work of checking for BOM and converting the + # input data to Unicode. It also adds NUL to the end. + # + # Reader supports the following methods + # self.peek(i=0) # peek the next i-th character + # self.prefix(l=1) # peek the next l characters + # self.forward(l=1) # read the next l characters and move the pointer. + + # Had we reached the end of the stream? + self.done = False + + # The number of unclosed '{' and '['. `flow_level == 0` means block + # context. + self.flow_level = 0 + + # List of processed tokens that are not yet emitted. + self.tokens = [] + + # Add the STREAM-START token. + self.fetch_stream_start() + + # Number of tokens that were emitted through the `get_token` method. + self.tokens_taken = 0 + + # The current indentation level. + self.indent = -1 + + # Past indentation levels. + self.indents = [] + + # Variables related to simple keys treatment. + + # A simple key is a key that is not denoted by the '?' indicator. + # Example of simple keys: + # --- + # block simple key: value + # ? not a simple key: + # : { flow simple key: value } + # We emit the KEY token before all keys, so when we find a potential + # simple key, we try to locate the corresponding ':' indicator. + # Simple keys should be limited to a single line and 1024 characters. + + # Can a simple key start at the current position? A simple key may + # start: + # - at the beginning of the line, not counting indentation spaces + # (in block context), + # - after '{', '[', ',' (in the flow context), + # - after '?', ':', '-' (in the block context). + # In the block context, this flag also signifies if a block collection + # may start at the current position. + self.allow_simple_key = True + + # Keep track of possible simple keys. This is a dictionary. The key + # is `flow_level`; there can be no more that one possible simple key + # for each level. The value is a SimpleKey record: + # (token_number, required, index, line, column, mark) + # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), + # '[', or '{' tokens. + self.possible_simple_keys = {} + + # Public methods. + + def check_token(self, *choices): + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self): + # Return the next token, but do not delete if from the queue. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + return self.tokens[0] + + def get_token(self): + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + self.tokens_taken += 1 + return self.tokens.pop(0) + + # Private methods. + + def need_more_tokens(self): + if self.done: + return False + if not self.tokens: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + + def fetch_more_tokens(self): + + # Eat whitespaces and comments until we reach the next token. + self.scan_to_next_token() + + # Remove obsolete possible simple keys. + self.stale_possible_simple_keys() + + # Compare the current indentation and column. It may add some tokens + # and decrease the current indentation level. + self.unwind_indent(self.column) + + # Peek the next character. + ch = self.peek() + + # Is it the end of stream? + if ch == u'\0': + return self.fetch_stream_end() + + # Is it a directive? + if ch == u'%' and self.check_directive(): + return self.fetch_directive() + + # Is it the document start? + if ch == u'-' and self.check_document_start(): + return self.fetch_document_start() + + # Is it the document end? + if ch == u'.' and self.check_document_end(): + return self.fetch_document_end() + + # TODO: support for BOM within a stream. + #if ch == u'\uFEFF': + # return self.fetch_bom() <-- issue BOMToken + + # Note: the order of the following checks is NOT significant. + + # Is it the flow sequence start indicator? + if ch == u'[': + return self.fetch_flow_sequence_start() + + # Is it the flow mapping start indicator? + if ch == u'{': + return self.fetch_flow_mapping_start() + + # Is it the flow sequence end indicator? + if ch == u']': + return self.fetch_flow_sequence_end() + + # Is it the flow mapping end indicator? + if ch == u'}': + return self.fetch_flow_mapping_end() + + # Is it the flow entry indicator? + if ch == u',': + return self.fetch_flow_entry() + + # Is it the block entry indicator? + if ch == u'-' and self.check_block_entry(): + return self.fetch_block_entry() + + # Is it the key indicator? + if ch == u'?' and self.check_key(): + return self.fetch_key() + + # Is it the value indicator? + if ch == u':' and self.check_value(): + return self.fetch_value() + + # Is it an alias? + if ch == u'*': + return self.fetch_alias() + + # Is it an anchor? + if ch == u'&': + return self.fetch_anchor() + + # Is it a tag? + if ch == u'!': + return self.fetch_tag() + + # Is it a literal scalar? + if ch == u'|' and not self.flow_level: + return self.fetch_literal() + + # Is it a folded scalar? + if ch == u'>' and not self.flow_level: + return self.fetch_folded() + + # Is it a single quoted scalar? + if ch == u'\'': + return self.fetch_single() + + # Is it a double quoted scalar? + if ch == u'\"': + return self.fetch_double() + + # It must be a plain scalar then. + if self.check_plain(): + return self.fetch_plain() + + # No? It's an error. Let's produce a nice error message. + raise ScannerError("while scanning for the next token", None, + "found character %r that cannot start any token" + % ch.encode('utf-8'), self.get_mark()) + + # Simple keys treatment. + + def next_possible_simple_key(self): + # Return the number of the nearest possible simple key. Actually we + # don't need to loop through the whole dictionary. We may replace it + # with the following code: + # if not self.possible_simple_keys: + # return None + # return self.possible_simple_keys[ + # min(self.possible_simple_keys.keys())].token_number + min_token_number = None + for level in self.possible_simple_keys: + key = self.possible_simple_keys[level] + if min_token_number is None or key.token_number < min_token_number: + min_token_number = key.token_number + return min_token_number + + def stale_possible_simple_keys(self): + # Remove entries that are no longer possible simple keys. According to + # the YAML specification, simple keys + # - should be limited to a single line, + # - should be no longer than 1024 characters. + # Disabling this procedure will allow simple keys of any length and + # height (may cause problems if indentation is broken though). + for level in self.possible_simple_keys.keys(): + key = self.possible_simple_keys[level] + if key.line != self.line \ + or self.index-key.index > 1024: + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + del self.possible_simple_keys[level] + + def save_possible_simple_key(self): + # The next token may start a simple key. We check if it's possible + # and save its position. This function is called for + # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. + + # Check if a simple key is required at the current position. + required = not self.flow_level and self.indent == self.column + + # A simple key is required only if it is the first token in the current + # line. Therefore it is always allowed. + assert self.allow_simple_key or not required + + # The next token might be a simple key. Let's save it's number and + # position. + if self.allow_simple_key: + self.remove_possible_simple_key() + token_number = self.tokens_taken+len(self.tokens) + key = SimpleKey(token_number, required, + self.index, self.line, self.column, self.get_mark()) + self.possible_simple_keys[self.flow_level] = key + + def remove_possible_simple_key(self): + # Remove the saved possible key position at the current flow level. + if self.flow_level in self.possible_simple_keys: + key = self.possible_simple_keys[self.flow_level] + + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + + del self.possible_simple_keys[self.flow_level] + + # Indentation functions. + + def unwind_indent(self, column): + + ## In flow context, tokens should respect indentation. + ## Actually the condition should be `self.indent >= column` according to + ## the spec. But this condition will prohibit intuitively correct + ## constructions such as + ## key : { + ## } + #if self.flow_level and self.indent > column: + # raise ScannerError(None, None, + # "invalid intendation or unclosed '[' or '{'", + # self.get_mark()) + + # In the flow context, indentation is ignored. We make the scanner less + # restrictive then specification requires. + if self.flow_level: + return + + # In block context, we may need to issue the BLOCK-END tokens. + while self.indent > column: + mark = self.get_mark() + self.indent = self.indents.pop() + self.tokens.append(BlockEndToken(mark, mark)) + + def add_indent(self, column): + # Check if we need to increase indentation. + if self.indent < column: + self.indents.append(self.indent) + self.indent = column + return True + return False + + # Fetchers. + + def fetch_stream_start(self): + # We always add STREAM-START as the first token and STREAM-END as the + # last token. + + # Read the token. + mark = self.get_mark() + + # Add STREAM-START. + self.tokens.append(StreamStartToken(mark, mark, + encoding=self.encoding)) + + + def fetch_stream_end(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + self.possible_simple_keys = {} + + # Read the token. + mark = self.get_mark() + + # Add STREAM-END. + self.tokens.append(StreamEndToken(mark, mark)) + + # The steam is finished. + self.done = True + + def fetch_directive(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Scan and add DIRECTIVE. + self.tokens.append(self.scan_directive()) + + def fetch_document_start(self): + self.fetch_document_indicator(DocumentStartToken) + + def fetch_document_end(self): + self.fetch_document_indicator(DocumentEndToken) + + def fetch_document_indicator(self, TokenClass): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. Note that there could not be a block collection + # after '---'. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Add DOCUMENT-START or DOCUMENT-END. + start_mark = self.get_mark() + self.forward(3) + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_start(self): + self.fetch_flow_collection_start(FlowSequenceStartToken) + + def fetch_flow_mapping_start(self): + self.fetch_flow_collection_start(FlowMappingStartToken) + + def fetch_flow_collection_start(self, TokenClass): + + # '[' and '{' may start a simple key. + self.save_possible_simple_key() + + # Increase the flow level. + self.flow_level += 1 + + # Simple keys are allowed after '[' and '{'. + self.allow_simple_key = True + + # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_end(self): + self.fetch_flow_collection_end(FlowSequenceEndToken) + + def fetch_flow_mapping_end(self): + self.fetch_flow_collection_end(FlowMappingEndToken) + + def fetch_flow_collection_end(self, TokenClass): + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Decrease the flow level. + self.flow_level -= 1 + + # No simple keys after ']' or '}'. + self.allow_simple_key = False + + # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_entry(self): + + # Simple keys are allowed after ','. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add FLOW-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(FlowEntryToken(start_mark, end_mark)) + + def fetch_block_entry(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a new entry? + if not self.allow_simple_key: + raise ScannerError(None, None, + "sequence entries are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-SEQUENCE-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockSequenceStartToken(mark, mark)) + + # It's an error for the block entry to occur in the flow context, + # but we let the parser detect this. + else: + pass + + # Simple keys are allowed after '-'. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add BLOCK-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(BlockEntryToken(start_mark, end_mark)) + + def fetch_key(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a key (not nessesary a simple)? + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping keys are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-MAPPING-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after '?' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add KEY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(KeyToken(start_mark, end_mark)) + + def fetch_value(self): + + # Do we determine a simple key? + if self.flow_level in self.possible_simple_keys: + + # Add KEY. + key = self.possible_simple_keys[self.flow_level] + del self.possible_simple_keys[self.flow_level] + self.tokens.insert(key.token_number-self.tokens_taken, + KeyToken(key.mark, key.mark)) + + # If this key starts a new block mapping, we need to add + # BLOCK-MAPPING-START. + if not self.flow_level: + if self.add_indent(key.column): + self.tokens.insert(key.token_number-self.tokens_taken, + BlockMappingStartToken(key.mark, key.mark)) + + # There cannot be two simple keys one after another. + self.allow_simple_key = False + + # It must be a part of a complex key. + else: + + # Block context needs additional checks. + # (Do we really need them? They will be catched by the parser + # anyway.) + if not self.flow_level: + + # We are allowed to start a complex value if and only if + # we can start a simple key. + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping values are not allowed here", + self.get_mark()) + + # If this value starts a new block mapping, we need to add + # BLOCK-MAPPING-START. It will be detected as an error later by + # the parser. + if not self.flow_level: + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after ':' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add VALUE. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(ValueToken(start_mark, end_mark)) + + def fetch_alias(self): + + # ALIAS could be a simple key. + self.save_possible_simple_key() + + # No simple keys after ALIAS. + self.allow_simple_key = False + + # Scan and add ALIAS. + self.tokens.append(self.scan_anchor(AliasToken)) + + def fetch_anchor(self): + + # ANCHOR could start a simple key. + self.save_possible_simple_key() + + # No simple keys after ANCHOR. + self.allow_simple_key = False + + # Scan and add ANCHOR. + self.tokens.append(self.scan_anchor(AnchorToken)) + + def fetch_tag(self): + + # TAG could start a simple key. + self.save_possible_simple_key() + + # No simple keys after TAG. + self.allow_simple_key = False + + # Scan and add TAG. + self.tokens.append(self.scan_tag()) + + def fetch_literal(self): + self.fetch_block_scalar(style='|') + + def fetch_folded(self): + self.fetch_block_scalar(style='>') + + def fetch_block_scalar(self, style): + + # A simple key may follow a block scalar. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Scan and add SCALAR. + self.tokens.append(self.scan_block_scalar(style)) + + def fetch_single(self): + self.fetch_flow_scalar(style='\'') + + def fetch_double(self): + self.fetch_flow_scalar(style='"') + + def fetch_flow_scalar(self, style): + + # A flow scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after flow scalars. + self.allow_simple_key = False + + # Scan and add SCALAR. + self.tokens.append(self.scan_flow_scalar(style)) + + def fetch_plain(self): + + # A plain scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after plain scalars. But note that `scan_plain` will + # change this flag if the scan is finished at the beginning of the + # line. + self.allow_simple_key = False + + # Scan and add SCALAR. May change `allow_simple_key`. + self.tokens.append(self.scan_plain()) + + # Checkers. + + def check_directive(self): + + # DIRECTIVE: ^ '%' ... + # The '%' indicator is already checked. + if self.column == 0: + return True + + def check_document_start(self): + + # DOCUMENT-START: ^ '---' (' '|'\n') + if self.column == 0: + if self.prefix(3) == u'---' \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return True + + def check_document_end(self): + + # DOCUMENT-END: ^ '...' (' '|'\n') + if self.column == 0: + if self.prefix(3) == u'...' \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return True + + def check_block_entry(self): + + # BLOCK-ENTRY: '-' (' '|'\n') + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_key(self): + + # KEY(flow context): '?' + if self.flow_level: + return True + + # KEY(block context): '?' (' '|'\n') + else: + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_value(self): + + # VALUE(flow context): ':' + if self.flow_level: + return True + + # VALUE(block context): ':' (' '|'\n') + else: + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_plain(self): + + # A plain scalar may start with any non-space character except: + # '-', '?', ':', ',', '[', ']', '{', '}', + # '#', '&', '*', '!', '|', '>', '\'', '\"', + # '%', '@', '`'. + # + # It may also start with + # '-', '?', ':' + # if it is followed by a non-space character. + # + # Note that we limit the last rule to the block context (except the + # '-' character) because we want the flow context to be space + # independent. + ch = self.peek() + return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ + or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' + and (ch == u'-' or (not self.flow_level and ch in u'?:'))) + + # Scanners. + + def scan_to_next_token(self): + # We ignore spaces, line breaks and comments. + # If we find a line break in the block context, we set the flag + # `allow_simple_key` on. + # The byte order mark is stripped if it's the first character in the + # stream. We do not yet support BOM inside the stream as the + # specification requires. Any such mark will be considered as a part + # of the document. + # + # TODO: We need to make tab handling rules more sane. A good rule is + # Tabs cannot precede tokens + # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + # KEY(block), VALUE(block), BLOCK-ENTRY + # So the checking code is + # if <TAB>: + # self.allow_simple_keys = False + # We also need to add the check for `allow_simple_keys == True` to + # `unwind_indent` before issuing BLOCK-END. + # Scanners for block, flow, and plain scalars need to be modified. + + if self.index == 0 and self.peek() == u'\uFEFF': + self.forward() + found = False + while not found: + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + if self.scan_line_break(): + if not self.flow_level: + self.allow_simple_key = True + else: + found = True + + def scan_directive(self): + # See the specification for details. + start_mark = self.get_mark() + self.forward() + name = self.scan_directive_name(start_mark) + value = None + if name == u'YAML': + value = self.scan_yaml_directive_value(start_mark) + end_mark = self.get_mark() + elif name == u'TAG': + value = self.scan_tag_directive_value(start_mark) + end_mark = self.get_mark() + else: + end_mark = self.get_mark() + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + self.scan_directive_ignored_line(start_mark) + return DirectiveToken(name, value, start_mark, end_mark) + + def scan_directive_name(self, start_mark): + # See the specification for details. + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + return value + + def scan_yaml_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + major = self.scan_yaml_directive_number(start_mark) + if self.peek() != '.': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or '.', but found %r" + % self.peek().encode('utf-8'), + self.get_mark()) + self.forward() + minor = self.scan_yaml_directive_number(start_mark) + if self.peek() not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or ' ', but found %r" + % self.peek().encode('utf-8'), + self.get_mark()) + return (major, minor) + + def scan_yaml_directive_number(self, start_mark): + # See the specification for details. + ch = self.peek() + if not (u'0' <= ch <= u'9'): + raise ScannerError("while scanning a directive", start_mark, + "expected a digit, but found %r" % ch.encode('utf-8'), + self.get_mark()) + length = 0 + while u'0' <= self.peek(length) <= u'9': + length += 1 + value = int(self.prefix(length)) + self.forward(length) + return value + + def scan_tag_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + handle = self.scan_tag_directive_handle(start_mark) + while self.peek() == u' ': + self.forward() + prefix = self.scan_tag_directive_prefix(start_mark) + return (handle, prefix) + + def scan_tag_directive_handle(self, start_mark): + # See the specification for details. + value = self.scan_tag_handle('directive', start_mark) + ch = self.peek() + if ch != u' ': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch.encode('utf-8'), + self.get_mark()) + return value + + def scan_tag_directive_prefix(self, start_mark): + # See the specification for details. + value = self.scan_tag_uri('directive', start_mark) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch.encode('utf-8'), + self.get_mark()) + return value + + def scan_directive_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in u'\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a comment or a line break, but found %r" + % ch.encode('utf-8'), self.get_mark()) + self.scan_line_break() + + def scan_anchor(self, TokenClass): + # The specification does not restrict characters for anchors and + # aliases. This may lead to problems, for instance, the document: + # [ *alias, value ] + # can be interpteted in two ways, as + # [ "value" ] + # and + # [ *alias , "value" ] + # Therefore we restrict aliases to numbers and ASCII letters. + start_mark = self.get_mark() + indicator = self.peek() + if indicator == u'*': + name = 'alias' + else: + name = 'anchor' + self.forward() + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + end_mark = self.get_mark() + return TokenClass(value, start_mark, end_mark) + + def scan_tag(self): + # See the specification for details. + start_mark = self.get_mark() + ch = self.peek(1) + if ch == u'<': + handle = None + self.forward(2) + suffix = self.scan_tag_uri('tag', start_mark) + if self.peek() != u'>': + raise ScannerError("while parsing a tag", start_mark, + "expected '>', but found %r" % self.peek().encode('utf-8'), + self.get_mark()) + self.forward() + elif ch in u'\0 \t\r\n\x85\u2028\u2029': + handle = None + suffix = u'!' + self.forward() + else: + length = 1 + use_handle = False + while ch not in u'\0 \r\n\x85\u2028\u2029': + if ch == u'!': + use_handle = True + break + length += 1 + ch = self.peek(length) + handle = u'!' + if use_handle: + handle = self.scan_tag_handle('tag', start_mark) + else: + handle = u'!' + self.forward() + suffix = self.scan_tag_uri('tag', start_mark) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a tag", start_mark, + "expected ' ', but found %r" % ch.encode('utf-8'), + self.get_mark()) + value = (handle, suffix) + end_mark = self.get_mark() + return TagToken(value, start_mark, end_mark) + + def scan_block_scalar(self, style): + # See the specification for details. + + if style == '>': + folded = True + else: + folded = False + + chunks = [] + start_mark = self.get_mark() + + # Scan the header. + self.forward() + chomping, increment = self.scan_block_scalar_indicators(start_mark) + self.scan_block_scalar_ignored_line(start_mark) + + # Determine the indentation level and go to the first non-empty line. + min_indent = self.indent+1 + if min_indent < 1: + min_indent = 1 + if increment is None: + breaks, max_indent, end_mark = self.scan_block_scalar_indentation() + indent = max(min_indent, max_indent) + else: + indent = min_indent+increment-1 + breaks, end_mark = self.scan_block_scalar_breaks(indent) + line_break = u'' + + # Scan the inner part of the block scalar. + while self.column == indent and self.peek() != u'\0': + chunks.extend(breaks) + leading_non_space = self.peek() not in u' \t' + length = 0 + while self.peek(length) not in u'\0\r\n\x85\u2028\u2029': + length += 1 + chunks.append(self.prefix(length)) + self.forward(length) + line_break = self.scan_line_break() + breaks, end_mark = self.scan_block_scalar_breaks(indent) + if self.column == indent and self.peek() != u'\0': + + # Unfortunately, folding rules are ambiguous. + # + # This is the folding according to the specification: + + if folded and line_break == u'\n' \ + and leading_non_space and self.peek() not in u' \t': + if not breaks: + chunks.append(u' ') + else: + chunks.append(line_break) + + # This is Clark Evans's interpretation (also in the spec + # examples): + # + #if folded and line_break == u'\n': + # if not breaks: + # if self.peek() not in ' \t': + # chunks.append(u' ') + # else: + # chunks.append(line_break) + #else: + # chunks.append(line_break) + else: + break + + # Chomp the tail. + if chomping is not False: + chunks.append(line_break) + if chomping is True: + chunks.extend(breaks) + + # We are done. + return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + style) + + def scan_block_scalar_indicators(self, start_mark): + # See the specification for details. + chomping = None + increment = None + ch = self.peek() + if ch in u'+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch in u'0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + elif ch in u'0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + ch = self.peek() + if ch in u'+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected chomping or indentation indicators, but found %r" + % ch.encode('utf-8'), self.get_mark()) + return chomping, increment + + def scan_block_scalar_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in u'\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected a comment or a line break, but found %r" + % ch.encode('utf-8'), self.get_mark()) + self.scan_line_break() + + def scan_block_scalar_indentation(self): + # See the specification for details. + chunks = [] + max_indent = 0 + end_mark = self.get_mark() + while self.peek() in u' \r\n\x85\u2028\u2029': + if self.peek() != u' ': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + else: + self.forward() + if self.column > max_indent: + max_indent = self.column + return chunks, max_indent, end_mark + + def scan_block_scalar_breaks(self, indent): + # See the specification for details. + chunks = [] + end_mark = self.get_mark() + while self.column < indent and self.peek() == u' ': + self.forward() + while self.peek() in u'\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + while self.column < indent and self.peek() == u' ': + self.forward() + return chunks, end_mark + + def scan_flow_scalar(self, style): + # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don't need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. + if style == '"': + double = True + else: + double = False + chunks = [] + start_mark = self.get_mark() + quote = self.peek() + self.forward() + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + while self.peek() != quote: + chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + self.forward() + end_mark = self.get_mark() + return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + style) + + ESCAPE_REPLACEMENTS = { + u'0': u'\0', + u'a': u'\x07', + u'b': u'\x08', + u't': u'\x09', + u'\t': u'\x09', + u'n': u'\x0A', + u'v': u'\x0B', + u'f': u'\x0C', + u'r': u'\x0D', + u'e': u'\x1B', + u' ': u'\x20', + u'\"': u'\"', + u'\\': u'\\', + u'N': u'\x85', + u'_': u'\xA0', + u'L': u'\u2028', + u'P': u'\u2029', + } + + ESCAPE_CODES = { + u'x': 2, + u'u': 4, + u'U': 8, + } + + def scan_flow_scalar_non_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + length = 0 + while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': + length += 1 + if length: + chunks.append(self.prefix(length)) + self.forward(length) + ch = self.peek() + if not double and ch == u'\'' and self.peek(1) == u'\'': + chunks.append(u'\'') + self.forward(2) + elif (double and ch == u'\'') or (not double and ch in u'\"\\'): + chunks.append(ch) + self.forward() + elif double and ch == u'\\': + self.forward() + ch = self.peek() + if ch in self.ESCAPE_REPLACEMENTS: + chunks.append(self.ESCAPE_REPLACEMENTS[ch]) + self.forward() + elif ch in self.ESCAPE_CODES: + length = self.ESCAPE_CODES[ch] + self.forward() + for k in range(length): + if self.peek(k) not in u'0123456789ABCDEFabcdef': + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "expected escape sequence of %d hexdecimal numbers, but found %r" % + (length, self.peek(k).encode('utf-8')), self.get_mark()) + code = int(self.prefix(length), 16) + chunks.append(unichr(code)) + self.forward(length) + elif ch in u'\r\n\x85\u2028\u2029': + self.scan_line_break() + chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) + else: + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark()) + else: + return chunks + + def scan_flow_scalar_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + length = 0 + while self.peek(length) in u' \t': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch == u'\0': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected end of stream", self.get_mark()) + elif ch in u'\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + breaks = self.scan_flow_scalar_breaks(double, start_mark) + if line_break != u'\n': + chunks.append(line_break) + elif not breaks: + chunks.append(u' ') + chunks.extend(breaks) + else: + chunks.append(whitespaces) + return chunks + + def scan_flow_scalar_breaks(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + # Instead of checking indentation, we check for document + # separators. + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected document separator", self.get_mark()) + while self.peek() in u' \t': + self.forward() + if self.peek() in u'\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + else: + return chunks + + def scan_plain(self): + # See the specification for details. + # We add an additional restriction for the flow context: + # plain scalars in the flow context cannot contain ',', ':' and '?'. + # We also keep track of the `allow_simple_key` flag here. + # Indentation rules are loosed for the flow context. + chunks = [] + start_mark = self.get_mark() + end_mark = start_mark + indent = self.indent+1 + # We allow zero indentation for scalars, but then we need to check for + # document separators at the beginning of the line. + #if indent == 0: + # indent = 1 + spaces = [] + while True: + length = 0 + if self.peek() == u'#': + break + while True: + ch = self.peek(length) + if ch in u'\0 \t\r\n\x85\u2028\u2029' \ + or (not self.flow_level and ch == u':' and + self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \ + or (self.flow_level and ch in u',:?[]{}'): + break + length += 1 + # It's not clear what we should do with ':' in the flow context. + if (self.flow_level and ch == u':' + and self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'): + self.forward(length) + raise ScannerError("while scanning a plain scalar", start_mark, + "found unexpected ':'", self.get_mark(), + "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.") + if length == 0: + break + self.allow_simple_key = False + chunks.extend(spaces) + chunks.append(self.prefix(length)) + self.forward(length) + end_mark = self.get_mark() + spaces = self.scan_plain_spaces(indent, start_mark) + if not spaces or self.peek() == u'#' \ + or (not self.flow_level and self.column < indent): + break + return ScalarToken(u''.join(chunks), True, start_mark, end_mark) + + def scan_plain_spaces(self, indent, start_mark): + # See the specification for details. + # The specification is really confusing about tabs in plain scalars. + # We just forbid them completely. Do not use tabs in YAML! + chunks = [] + length = 0 + while self.peek(length) in u' ': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch in u'\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + self.allow_simple_key = True + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return + breaks = [] + while self.peek() in u' \r\n\x85\u2028\u2029': + if self.peek() == ' ': + self.forward() + else: + breaks.append(self.scan_line_break()) + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return + if line_break != u'\n': + chunks.append(line_break) + elif not breaks: + chunks.append(u' ') + chunks.extend(breaks) + elif whitespaces: + chunks.append(whitespaces) + return chunks + + def scan_tag_handle(self, name, start_mark): + # See the specification for details. + # For some strange reasons, the specification does not allow '_' in + # tag handles. I have allowed it anyway. + ch = self.peek() + if ch != u'!': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch.encode('utf-8'), + self.get_mark()) + length = 1 + ch = self.peek(length) + if ch != u' ': + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if ch != u'!': + self.forward(length) + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch.encode('utf-8'), + self.get_mark()) + length += 1 + value = self.prefix(length) + self.forward(length) + return value + + def scan_tag_uri(self, name, start_mark): + # See the specification for details. + # Note: we do not check if URI is well-formed. + chunks = [] + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?:@&=+$,_.!~*\'()[]%': + if ch == u'%': + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + chunks.append(self.scan_uri_escapes(name, start_mark)) + else: + length += 1 + ch = self.peek(length) + if length: + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + if not chunks: + raise ScannerError("while parsing a %s" % name, start_mark, + "expected URI, but found %r" % ch.encode('utf-8'), + self.get_mark()) + return u''.join(chunks) + + def scan_uri_escapes(self, name, start_mark): + # See the specification for details. + bytes = [] + mark = self.get_mark() + while self.peek() == u'%': + self.forward() + for k in range(2): + if self.peek(k) not in u'0123456789ABCDEFabcdef': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected URI escape sequence of 2 hexdecimal numbers, but found %r" % + (self.peek(k).encode('utf-8')), self.get_mark()) + bytes.append(chr(int(self.prefix(2), 16))) + self.forward(2) + try: + value = unicode(''.join(bytes), 'utf-8') + except UnicodeDecodeError, exc: + raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) + return value + + def scan_line_break(self): + # Transforms: + # '\r\n' : '\n' + # '\r' : '\n' + # '\n' : '\n' + # '\x85' : '\n' + # '\u2028' : '\u2028' + # '\u2029 : '\u2029' + # default : '' + ch = self.peek() + if ch in u'\r\n\x85': + if self.prefix(2) == u'\r\n': + self.forward(2) + else: + self.forward() + return u'\n' + elif ch in u'\u2028\u2029': + self.forward() + return ch + return u'' + +#try: +# import psyco +# psyco.bind(Scanner) +#except ImportError: +# pass + diff --git a/python.d/python_modules/pyyaml2/serializer.py b/python.d/python_modules/pyyaml2/serializer.py new file mode 100644 index 000000000..0bf1e96dc --- /dev/null +++ b/python.d/python_modules/pyyaml2/serializer.py @@ -0,0 +1,111 @@ + +__all__ = ['Serializer', 'SerializerError'] + +from error import YAMLError +from events import * +from nodes import * + +class SerializerError(YAMLError): + pass + +class Serializer(object): + + ANCHOR_TEMPLATE = u'id%03d' + + def __init__(self, encoding=None, + explicit_start=None, explicit_end=None, version=None, tags=None): + self.use_encoding = encoding + self.use_explicit_start = explicit_start + self.use_explicit_end = explicit_end + self.use_version = version + self.use_tags = tags + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + self.closed = None + + def open(self): + if self.closed is None: + self.emit(StreamStartEvent(encoding=self.use_encoding)) + self.closed = False + elif self.closed: + raise SerializerError("serializer is closed") + else: + raise SerializerError("serializer is already opened") + + def close(self): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif not self.closed: + self.emit(StreamEndEvent()) + self.closed = True + + #def __del__(self): + # self.close() + + def serialize(self, node): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif self.closed: + raise SerializerError("serializer is closed") + self.emit(DocumentStartEvent(explicit=self.use_explicit_start, + version=self.use_version, tags=self.use_tags)) + self.anchor_node(node) + self.serialize_node(node, None, None) + self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + + def anchor_node(self, node): + if node in self.anchors: + if self.anchors[node] is None: + self.anchors[node] = self.generate_anchor(node) + else: + self.anchors[node] = None + if isinstance(node, SequenceNode): + for item in node.value: + self.anchor_node(item) + elif isinstance(node, MappingNode): + for key, value in node.value: + self.anchor_node(key) + self.anchor_node(value) + + def generate_anchor(self, node): + self.last_anchor_id += 1 + return self.ANCHOR_TEMPLATE % self.last_anchor_id + + def serialize_node(self, node, parent, index): + alias = self.anchors[node] + if node in self.serialized_nodes: + self.emit(AliasEvent(alias)) + else: + self.serialized_nodes[node] = True + self.descend_resolver(parent, index) + if isinstance(node, ScalarNode): + detected_tag = self.resolve(ScalarNode, node.value, (True, False)) + default_tag = self.resolve(ScalarNode, node.value, (False, True)) + implicit = (node.tag == detected_tag), (node.tag == default_tag) + self.emit(ScalarEvent(alias, node.tag, implicit, node.value, + style=node.style)) + elif isinstance(node, SequenceNode): + implicit = (node.tag + == self.resolve(SequenceNode, node.value, True)) + self.emit(SequenceStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + index = 0 + for item in node.value: + self.serialize_node(item, node, index) + index += 1 + self.emit(SequenceEndEvent()) + elif isinstance(node, MappingNode): + implicit = (node.tag + == self.resolve(MappingNode, node.value, True)) + self.emit(MappingStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + for key, value in node.value: + self.serialize_node(key, node, None) + self.serialize_node(value, node, key) + self.emit(MappingEndEvent()) + self.ascend_resolver() + diff --git a/python.d/python_modules/pyyaml2/tokens.py b/python.d/python_modules/pyyaml2/tokens.py new file mode 100644 index 000000000..4d0b48a39 --- /dev/null +++ b/python.d/python_modules/pyyaml2/tokens.py @@ -0,0 +1,104 @@ + +class Token(object): + def __init__(self, start_mark, end_mark): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in self.__dict__ + if not key.endswith('_mark')] + attributes.sort() + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +#class BOMToken(Token): +# id = '<byte order mark>' + +class DirectiveToken(Token): + id = '<directive>' + def __init__(self, name, value, start_mark, end_mark): + self.name = name + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class DocumentStartToken(Token): + id = '<document start>' + +class DocumentEndToken(Token): + id = '<document end>' + +class StreamStartToken(Token): + id = '<stream start>' + def __init__(self, start_mark=None, end_mark=None, + encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndToken(Token): + id = '<stream end>' + +class BlockSequenceStartToken(Token): + id = '<block sequence start>' + +class BlockMappingStartToken(Token): + id = '<block mapping start>' + +class BlockEndToken(Token): + id = '<block end>' + +class FlowSequenceStartToken(Token): + id = '[' + +class FlowMappingStartToken(Token): + id = '{' + +class FlowSequenceEndToken(Token): + id = ']' + +class FlowMappingEndToken(Token): + id = '}' + +class KeyToken(Token): + id = '?' + +class ValueToken(Token): + id = ':' + +class BlockEntryToken(Token): + id = '-' + +class FlowEntryToken(Token): + id = ',' + +class AliasToken(Token): + id = '<alias>' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class AnchorToken(Token): + id = '<anchor>' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class TagToken(Token): + id = '<tag>' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class ScalarToken(Token): + id = '<scalar>' + def __init__(self, value, plain, start_mark, end_mark, style=None): + self.value = value + self.plain = plain + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + diff --git a/python.d/python_modules/pyyaml3/__init__.py b/python.d/python_modules/pyyaml3/__init__.py new file mode 100644 index 000000000..a5e20f94d --- /dev/null +++ b/python.d/python_modules/pyyaml3/__init__.py @@ -0,0 +1,312 @@ + +from .error import * + +from .tokens import * +from .events import * +from .nodes import * + +from .loader import * +from .dumper import * + +__version__ = '3.11' +try: + from .cyaml import * + __with_libyaml__ = True +except ImportError: + __with_libyaml__ = False + +import io + +def scan(stream, Loader=Loader): + """ + Scan a YAML stream and produce scanning tokens. + """ + loader = Loader(stream) + try: + while loader.check_token(): + yield loader.get_token() + finally: + loader.dispose() + +def parse(stream, Loader=Loader): + """ + Parse a YAML stream and produce parsing events. + """ + loader = Loader(stream) + try: + while loader.check_event(): + yield loader.get_event() + finally: + loader.dispose() + +def compose(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + loader = Loader(stream) + try: + return loader.get_single_node() + finally: + loader.dispose() + +def compose_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding representation trees. + """ + loader = Loader(stream) + try: + while loader.check_node(): + yield loader.get_node() + finally: + loader.dispose() + +def load(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = Loader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + +def load_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + """ + loader = Loader(stream) + try: + while loader.check_data(): + yield loader.get_data() + finally: + loader.dispose() + +def safe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + Resolve only basic YAML tags. + """ + return load(stream, SafeLoader) + +def safe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + Resolve only basic YAML tags. + """ + return load_all(stream, SafeLoader) + +def emit(events, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + stream = io.StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + try: + for event in events: + dumper.emit(event) + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize_all(nodes, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = io.StringIO() + else: + stream = io.BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for node in nodes: + dumper.serialize(node) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize(node, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + return serialize_all([node], stream, Dumper=Dumper, **kwds) + +def dump_all(documents, stream=None, Dumper=Dumper, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of Python objects into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = io.StringIO() + else: + stream = io.BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, default_style=default_style, + default_flow_style=default_flow_style, + canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for data in documents: + dumper.represent(data) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def dump(data, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a Python object into a YAML stream. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=Dumper, **kwds) + +def safe_dump_all(documents, stream=None, **kwds): + """ + Serialize a sequence of Python objects into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all(documents, stream, Dumper=SafeDumper, **kwds) + +def safe_dump(data, stream=None, **kwds): + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=SafeDumper, **kwds) + +def add_implicit_resolver(tag, regexp, first=None, + Loader=Loader, Dumper=Dumper): + """ + Add an implicit scalar detector. + If an implicit scalar value matches the given regexp, + the corresponding tag is assigned to the scalar. + first is a sequence of possible initial characters or None. + """ + Loader.add_implicit_resolver(tag, regexp, first) + Dumper.add_implicit_resolver(tag, regexp, first) + +def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper): + """ + Add a path based resolver for the given tag. + A path is a list of keys that forms a path + to a node in the representation tree. + Keys can be string values, integers, or None. + """ + Loader.add_path_resolver(tag, path, kind) + Dumper.add_path_resolver(tag, path, kind) + +def add_constructor(tag, constructor, Loader=Loader): + """ + Add a constructor for the given tag. + Constructor is a function that accepts a Loader instance + and a node object and produces the corresponding Python object. + """ + Loader.add_constructor(tag, constructor) + +def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): + """ + Add a multi-constructor for the given tag prefix. + Multi-constructor is called for a node if its tag starts with tag_prefix. + Multi-constructor accepts a Loader instance, a tag suffix, + and a node object and produces the corresponding Python object. + """ + Loader.add_multi_constructor(tag_prefix, multi_constructor) + +def add_representer(data_type, representer, Dumper=Dumper): + """ + Add a representer for the given type. + Representer is a function accepting a Dumper instance + and an instance of the given data type + and producing the corresponding representation node. + """ + Dumper.add_representer(data_type, representer) + +def add_multi_representer(data_type, multi_representer, Dumper=Dumper): + """ + Add a representer for the given type. + Multi-representer is a function accepting a Dumper instance + and an instance of the given data type or subtype + and producing the corresponding representation node. + """ + Dumper.add_multi_representer(data_type, multi_representer) + +class YAMLObjectMetaclass(type): + """ + The metaclass for YAMLObject. + """ + def __init__(cls, name, bases, kwds): + super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) + if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: + cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) + cls.yaml_dumper.add_representer(cls, cls.to_yaml) + +class YAMLObject(metaclass=YAMLObjectMetaclass): + """ + An object that can dump itself to a YAML stream + and load itself from a YAML stream. + """ + + __slots__ = () # no direct instantiation, so allow immutable subclasses + + yaml_loader = Loader + yaml_dumper = Dumper + + yaml_tag = None + yaml_flow_style = None + + @classmethod + def from_yaml(cls, loader, node): + """ + Convert a representation node to a Python object. + """ + return loader.construct_yaml_object(node, cls) + + @classmethod + def to_yaml(cls, dumper, data): + """ + Convert a Python object to a representation node. + """ + return dumper.represent_yaml_object(cls.yaml_tag, data, cls, + flow_style=cls.yaml_flow_style) + diff --git a/python.d/python_modules/pyyaml3/composer.py b/python.d/python_modules/pyyaml3/composer.py new file mode 100644 index 000000000..d5c6a7acd --- /dev/null +++ b/python.d/python_modules/pyyaml3/composer.py @@ -0,0 +1,139 @@ + +__all__ = ['Composer', 'ComposerError'] + +from .error import MarkedYAMLError +from .events import * +from .nodes import * + +class ComposerError(MarkedYAMLError): + pass + +class Composer: + + def __init__(self): + self.anchors = {} + + def check_node(self): + # Drop the STREAM-START event. + if self.check_event(StreamStartEvent): + self.get_event() + + # If there are more documents available? + return not self.check_event(StreamEndEvent) + + def get_node(self): + # Get the root node of the next document. + if not self.check_event(StreamEndEvent): + return self.compose_document() + + def get_single_node(self): + # Drop the STREAM-START event. + self.get_event() + + # Compose a document if the stream is not empty. + document = None + if not self.check_event(StreamEndEvent): + document = self.compose_document() + + # Ensure that the stream contains no more documents. + if not self.check_event(StreamEndEvent): + event = self.get_event() + raise ComposerError("expected a single document in the stream", + document.start_mark, "but found another document", + event.start_mark) + + # Drop the STREAM-END event. + self.get_event() + + return document + + def compose_document(self): + # Drop the DOCUMENT-START event. + self.get_event() + + # Compose the root node. + node = self.compose_node(None, None) + + # Drop the DOCUMENT-END event. + self.get_event() + + self.anchors = {} + return node + + def compose_node(self, parent, index): + if self.check_event(AliasEvent): + event = self.get_event() + anchor = event.anchor + if anchor not in self.anchors: + raise ComposerError(None, None, "found undefined alias %r" + % anchor, event.start_mark) + return self.anchors[anchor] + event = self.peek_event() + anchor = event.anchor + if anchor is not None: + if anchor in self.anchors: + raise ComposerError("found duplicate anchor %r; first occurence" + % anchor, self.anchors[anchor].start_mark, + "second occurence", event.start_mark) + self.descend_resolver(parent, index) + if self.check_event(ScalarEvent): + node = self.compose_scalar_node(anchor) + elif self.check_event(SequenceStartEvent): + node = self.compose_sequence_node(anchor) + elif self.check_event(MappingStartEvent): + node = self.compose_mapping_node(anchor) + self.ascend_resolver() + return node + + def compose_scalar_node(self, anchor): + event = self.get_event() + tag = event.tag + if tag is None or tag == '!': + tag = self.resolve(ScalarNode, event.value, event.implicit) + node = ScalarNode(tag, event.value, + event.start_mark, event.end_mark, style=event.style) + if anchor is not None: + self.anchors[anchor] = node + return node + + def compose_sequence_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == '!': + tag = self.resolve(SequenceNode, None, start_event.implicit) + node = SequenceNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + index = 0 + while not self.check_event(SequenceEndEvent): + node.value.append(self.compose_node(node, index)) + index += 1 + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + + def compose_mapping_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == '!': + tag = self.resolve(MappingNode, None, start_event.implicit) + node = MappingNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + while not self.check_event(MappingEndEvent): + #key_event = self.peek_event() + item_key = self.compose_node(node, None) + #if item_key in node.value: + # raise ComposerError("while composing a mapping", start_event.start_mark, + # "found duplicate key", key_event.start_mark) + item_value = self.compose_node(node, item_key) + #node.value[item_key] = item_value + node.value.append((item_key, item_value)) + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + diff --git a/python.d/python_modules/pyyaml3/constructor.py b/python.d/python_modules/pyyaml3/constructor.py new file mode 100644 index 000000000..981543aeb --- /dev/null +++ b/python.d/python_modules/pyyaml3/constructor.py @@ -0,0 +1,686 @@ + +__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', + 'ConstructorError'] + +from .error import * +from .nodes import * + +import collections, datetime, base64, binascii, re, sys, types + +class ConstructorError(MarkedYAMLError): + pass + +class BaseConstructor: + + yaml_constructors = {} + yaml_multi_constructors = {} + + def __init__(self): + self.constructed_objects = {} + self.recursive_objects = {} + self.state_generators = [] + self.deep_construct = False + + def check_data(self): + # If there are more documents available? + return self.check_node() + + def get_data(self): + # Construct and return the next document. + if self.check_node(): + return self.construct_document(self.get_node()) + + def get_single_data(self): + # Ensure that the stream contains a single document and construct it. + node = self.get_single_node() + if node is not None: + return self.construct_document(node) + return None + + def construct_document(self, node): + data = self.construct_object(node) + while self.state_generators: + state_generators = self.state_generators + self.state_generators = [] + for generator in state_generators: + for dummy in generator: + pass + self.constructed_objects = {} + self.recursive_objects = {} + self.deep_construct = False + return data + + def construct_object(self, node, deep=False): + if node in self.constructed_objects: + return self.constructed_objects[node] + if deep: + old_deep = self.deep_construct + self.deep_construct = True + if node in self.recursive_objects: + raise ConstructorError(None, None, + "found unconstructable recursive node", node.start_mark) + self.recursive_objects[node] = None + constructor = None + tag_suffix = None + if node.tag in self.yaml_constructors: + constructor = self.yaml_constructors[node.tag] + else: + for tag_prefix in self.yaml_multi_constructors: + if node.tag.startswith(tag_prefix): + tag_suffix = node.tag[len(tag_prefix):] + constructor = self.yaml_multi_constructors[tag_prefix] + break + else: + if None in self.yaml_multi_constructors: + tag_suffix = node.tag + constructor = self.yaml_multi_constructors[None] + elif None in self.yaml_constructors: + constructor = self.yaml_constructors[None] + elif isinstance(node, ScalarNode): + constructor = self.__class__.construct_scalar + elif isinstance(node, SequenceNode): + constructor = self.__class__.construct_sequence + elif isinstance(node, MappingNode): + constructor = self.__class__.construct_mapping + if tag_suffix is None: + data = constructor(self, node) + else: + data = constructor(self, tag_suffix, node) + if isinstance(data, types.GeneratorType): + generator = data + data = next(generator) + if self.deep_construct: + for dummy in generator: + pass + else: + self.state_generators.append(generator) + self.constructed_objects[node] = data + del self.recursive_objects[node] + if deep: + self.deep_construct = old_deep + return data + + def construct_scalar(self, node): + if not isinstance(node, ScalarNode): + raise ConstructorError(None, None, + "expected a scalar node, but found %s" % node.id, + node.start_mark) + return node.value + + def construct_sequence(self, node, deep=False): + if not isinstance(node, SequenceNode): + raise ConstructorError(None, None, + "expected a sequence node, but found %s" % node.id, + node.start_mark) + return [self.construct_object(child, deep=deep) + for child in node.value] + + def construct_mapping(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + mapping = {} + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + if not isinstance(key, collections.Hashable): + raise ConstructorError("while constructing a mapping", node.start_mark, + "found unhashable key", key_node.start_mark) + value = self.construct_object(value_node, deep=deep) + mapping[key] = value + return mapping + + def construct_pairs(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + pairs = [] + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + value = self.construct_object(value_node, deep=deep) + pairs.append((key, value)) + return pairs + + @classmethod + def add_constructor(cls, tag, constructor): + if not 'yaml_constructors' in cls.__dict__: + cls.yaml_constructors = cls.yaml_constructors.copy() + cls.yaml_constructors[tag] = constructor + + @classmethod + def add_multi_constructor(cls, tag_prefix, multi_constructor): + if not 'yaml_multi_constructors' in cls.__dict__: + cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() + cls.yaml_multi_constructors[tag_prefix] = multi_constructor + +class SafeConstructor(BaseConstructor): + + def construct_scalar(self, node): + if isinstance(node, MappingNode): + for key_node, value_node in node.value: + if key_node.tag == 'tag:yaml.org,2002:value': + return self.construct_scalar(value_node) + return super().construct_scalar(node) + + def flatten_mapping(self, node): + merge = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == 'tag:yaml.org,2002:merge': + del node.value[index] + if isinstance(value_node, MappingNode): + self.flatten_mapping(value_node) + merge.extend(value_node.value) + elif isinstance(value_node, SequenceNode): + submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing a mapping", + node.start_mark, + "expected a mapping for merging, but found %s" + % subnode.id, subnode.start_mark) + self.flatten_mapping(subnode) + submerge.append(subnode.value) + submerge.reverse() + for value in submerge: + merge.extend(value) + else: + raise ConstructorError("while constructing a mapping", node.start_mark, + "expected a mapping or list of mappings for merging, but found %s" + % value_node.id, value_node.start_mark) + elif key_node.tag == 'tag:yaml.org,2002:value': + key_node.tag = 'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + if merge: + node.value = merge + node.value + + def construct_mapping(self, node, deep=False): + if isinstance(node, MappingNode): + self.flatten_mapping(node) + return super().construct_mapping(node, deep=deep) + + def construct_yaml_null(self, node): + self.construct_scalar(node) + return None + + bool_values = { + 'yes': True, + 'no': False, + 'true': True, + 'false': False, + 'on': True, + 'off': False, + } + + def construct_yaml_bool(self, node): + value = self.construct_scalar(node) + return self.bool_values[value.lower()] + + def construct_yaml_int(self, node): + value = self.construct_scalar(node) + value = value.replace('_', '') + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '0': + return 0 + elif value.startswith('0b'): + return sign*int(value[2:], 2) + elif value.startswith('0x'): + return sign*int(value[2:], 16) + elif value[0] == '0': + return sign*int(value, 8) + elif ':' in value: + digits = [int(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*int(value) + + inf_value = 1e300 + while inf_value != inf_value*inf_value: + inf_value *= inf_value + nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). + + def construct_yaml_float(self, node): + value = self.construct_scalar(node) + value = value.replace('_', '').lower() + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '.inf': + return sign*self.inf_value + elif value == '.nan': + return self.nan_value + elif ':' in value: + digits = [float(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0.0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*float(value) + + def construct_yaml_binary(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError(None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + timestamp_regexp = re.compile( + r'''^(?P<year>[0-9][0-9][0-9][0-9]) + -(?P<month>[0-9][0-9]?) + -(?P<day>[0-9][0-9]?) + (?:(?:[Tt]|[ \t]+) + (?P<hour>[0-9][0-9]?) + :(?P<minute>[0-9][0-9]) + :(?P<second>[0-9][0-9]) + (?:\.(?P<fraction>[0-9]*))? + (?:[ \t]*(?P<tz>Z|(?P<tz_sign>[-+])(?P<tz_hour>[0-9][0-9]?) + (?::(?P<tz_minute>[0-9][0-9]))?))?)?$''', re.X) + + def construct_yaml_timestamp(self, node): + value = self.construct_scalar(node) + match = self.timestamp_regexp.match(node.value) + values = match.groupdict() + year = int(values['year']) + month = int(values['month']) + day = int(values['day']) + if not values['hour']: + return datetime.date(year, month, day) + hour = int(values['hour']) + minute = int(values['minute']) + second = int(values['second']) + fraction = 0 + if values['fraction']: + fraction = values['fraction'][:6] + while len(fraction) < 6: + fraction += '0' + fraction = int(fraction) + delta = None + if values['tz_sign']: + tz_hour = int(values['tz_hour']) + tz_minute = int(values['tz_minute'] or 0) + delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) + if values['tz_sign'] == '-': + delta = -delta + data = datetime.datetime(year, month, day, hour, minute, second, fraction) + if delta: + data -= delta + return data + + def construct_yaml_omap(self, node): + # Note: we do not check for duplicate keys, because it's too + # CPU-expensive. + omap = [] + yield omap + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + omap.append((key, value)) + + def construct_yaml_pairs(self, node): + # Note: the same code as `construct_yaml_omap`. + pairs = [] + yield pairs + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + pairs.append((key, value)) + + def construct_yaml_set(self, node): + data = set() + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_str(self, node): + return self.construct_scalar(node) + + def construct_yaml_seq(self, node): + data = [] + yield data + data.extend(self.construct_sequence(node)) + + def construct_yaml_map(self, node): + data = {} + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_object(self, node, cls): + data = cls.__new__(cls) + yield data + if hasattr(data, '__setstate__'): + state = self.construct_mapping(node, deep=True) + data.__setstate__(state) + else: + state = self.construct_mapping(node) + data.__dict__.update(state) + + def construct_undefined(self, node): + raise ConstructorError(None, None, + "could not determine a constructor for the tag %r" % node.tag, + node.start_mark) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:null', + SafeConstructor.construct_yaml_null) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:bool', + SafeConstructor.construct_yaml_bool) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:int', + SafeConstructor.construct_yaml_int) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:float', + SafeConstructor.construct_yaml_float) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:binary', + SafeConstructor.construct_yaml_binary) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:timestamp', + SafeConstructor.construct_yaml_timestamp) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:omap', + SafeConstructor.construct_yaml_omap) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:pairs', + SafeConstructor.construct_yaml_pairs) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:set', + SafeConstructor.construct_yaml_set) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:str', + SafeConstructor.construct_yaml_str) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:seq', + SafeConstructor.construct_yaml_seq) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:map', + SafeConstructor.construct_yaml_map) + +SafeConstructor.add_constructor(None, + SafeConstructor.construct_undefined) + +class Constructor(SafeConstructor): + + def construct_python_str(self, node): + return self.construct_scalar(node) + + def construct_python_unicode(self, node): + return self.construct_scalar(node) + + def construct_python_bytes(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError(None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + def construct_python_long(self, node): + return self.construct_yaml_int(node) + + def construct_python_complex(self, node): + return complex(self.construct_scalar(node)) + + def construct_python_tuple(self, node): + return tuple(self.construct_sequence(node)) + + def find_python_module(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python module", mark, + "expected non-empty name appended to the tag", mark) + try: + __import__(name) + except ImportError as exc: + raise ConstructorError("while constructing a Python module", mark, + "cannot find module %r (%s)" % (name, exc), mark) + return sys.modules[name] + + def find_python_name(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python object", mark, + "expected non-empty name appended to the tag", mark) + if '.' in name: + module_name, object_name = name.rsplit('.', 1) + else: + module_name = 'builtins' + object_name = name + try: + __import__(module_name) + except ImportError as exc: + raise ConstructorError("while constructing a Python object", mark, + "cannot find module %r (%s)" % (module_name, exc), mark) + module = sys.modules[module_name] + if not hasattr(module, object_name): + raise ConstructorError("while constructing a Python object", mark, + "cannot find %r in the module %r" + % (object_name, module.__name__), mark) + return getattr(module, object_name) + + def construct_python_name(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python name", node.start_mark, + "expected the empty value, but found %r" % value, node.start_mark) + return self.find_python_name(suffix, node.start_mark) + + def construct_python_module(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python module", node.start_mark, + "expected the empty value, but found %r" % value, node.start_mark) + return self.find_python_module(suffix, node.start_mark) + + def make_python_instance(self, suffix, node, + args=None, kwds=None, newobj=False): + if not args: + args = [] + if not kwds: + kwds = {} + cls = self.find_python_name(suffix, node.start_mark) + if newobj and isinstance(cls, type): + return cls.__new__(cls, *args, **kwds) + else: + return cls(*args, **kwds) + + def set_python_instance_state(self, instance, state): + if hasattr(instance, '__setstate__'): + instance.__setstate__(state) + else: + slotstate = {} + if isinstance(state, tuple) and len(state) == 2: + state, slotstate = state + if hasattr(instance, '__dict__'): + instance.__dict__.update(state) + elif state: + slotstate.update(state) + for key, value in slotstate.items(): + setattr(object, key, value) + + def construct_python_object(self, suffix, node): + # Format: + # !!python/object:module.name { ... state ... } + instance = self.make_python_instance(suffix, node, newobj=True) + yield instance + deep = hasattr(instance, '__setstate__') + state = self.construct_mapping(node, deep=deep) + self.set_python_instance_state(instance, state) + + def construct_python_object_apply(self, suffix, node, newobj=False): + # Format: + # !!python/object/apply # (or !!python/object/new) + # args: [ ... arguments ... ] + # kwds: { ... keywords ... } + # state: ... state ... + # listitems: [ ... listitems ... ] + # dictitems: { ... dictitems ... } + # or short format: + # !!python/object/apply [ ... arguments ... ] + # The difference between !!python/object/apply and !!python/object/new + # is how an object is created, check make_python_instance for details. + if isinstance(node, SequenceNode): + args = self.construct_sequence(node, deep=True) + kwds = {} + state = {} + listitems = [] + dictitems = {} + else: + value = self.construct_mapping(node, deep=True) + args = value.get('args', []) + kwds = value.get('kwds', {}) + state = value.get('state', {}) + listitems = value.get('listitems', []) + dictitems = value.get('dictitems', {}) + instance = self.make_python_instance(suffix, node, args, kwds, newobj) + if state: + self.set_python_instance_state(instance, state) + if listitems: + instance.extend(listitems) + if dictitems: + for key in dictitems: + instance[key] = dictitems[key] + return instance + + def construct_python_object_new(self, suffix, node): + return self.construct_python_object_apply(suffix, node, newobj=True) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/none', + Constructor.construct_yaml_null) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/bool', + Constructor.construct_yaml_bool) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/str', + Constructor.construct_python_str) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/unicode', + Constructor.construct_python_unicode) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/bytes', + Constructor.construct_python_bytes) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/int', + Constructor.construct_yaml_int) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/long', + Constructor.construct_python_long) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/float', + Constructor.construct_yaml_float) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/complex', + Constructor.construct_python_complex) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/list', + Constructor.construct_yaml_seq) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/tuple', + Constructor.construct_python_tuple) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/dict', + Constructor.construct_yaml_map) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/name:', + Constructor.construct_python_name) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/module:', + Constructor.construct_python_module) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object:', + Constructor.construct_python_object) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object/apply:', + Constructor.construct_python_object_apply) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object/new:', + Constructor.construct_python_object_new) + diff --git a/python.d/python_modules/pyyaml3/cyaml.py b/python.d/python_modules/pyyaml3/cyaml.py new file mode 100644 index 000000000..d5cb87e99 --- /dev/null +++ b/python.d/python_modules/pyyaml3/cyaml.py @@ -0,0 +1,85 @@ + +__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', + 'CBaseDumper', 'CSafeDumper', 'CDumper'] + +from _yaml import CParser, CEmitter + +from .constructor import * + +from .serializer import * +from .representer import * + +from .resolver import * + +class CBaseLoader(CParser, BaseConstructor, BaseResolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class CSafeLoader(CParser, SafeConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class CLoader(CParser, Constructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + Constructor.__init__(self) + Resolver.__init__(self) + +class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CSafeDumper(CEmitter, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CDumper(CEmitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/python.d/python_modules/pyyaml3/dumper.py b/python.d/python_modules/pyyaml3/dumper.py new file mode 100644 index 000000000..0b6912877 --- /dev/null +++ b/python.d/python_modules/pyyaml3/dumper.py @@ -0,0 +1,62 @@ + +__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] + +from .emitter import * +from .serializer import * +from .representer import * +from .resolver import * + +class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class Dumper(Emitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/python.d/python_modules/pyyaml3/emitter.py b/python.d/python_modules/pyyaml3/emitter.py new file mode 100644 index 000000000..34cb145a5 --- /dev/null +++ b/python.d/python_modules/pyyaml3/emitter.py @@ -0,0 +1,1137 @@ + +# Emitter expects events obeying the following grammar: +# stream ::= STREAM-START document* STREAM-END +# document ::= DOCUMENT-START node DOCUMENT-END +# node ::= SCALAR | sequence | mapping +# sequence ::= SEQUENCE-START node* SEQUENCE-END +# mapping ::= MAPPING-START (node node)* MAPPING-END + +__all__ = ['Emitter', 'EmitterError'] + +from .error import YAMLError +from .events import * + +class EmitterError(YAMLError): + pass + +class ScalarAnalysis: + def __init__(self, scalar, empty, multiline, + allow_flow_plain, allow_block_plain, + allow_single_quoted, allow_double_quoted, + allow_block): + self.scalar = scalar + self.empty = empty + self.multiline = multiline + self.allow_flow_plain = allow_flow_plain + self.allow_block_plain = allow_block_plain + self.allow_single_quoted = allow_single_quoted + self.allow_double_quoted = allow_double_quoted + self.allow_block = allow_block + +class Emitter: + + DEFAULT_TAG_PREFIXES = { + '!' : '!', + 'tag:yaml.org,2002:' : '!!', + } + + def __init__(self, stream, canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + + # The stream should have the methods `write` and possibly `flush`. + self.stream = stream + + # Encoding can be overriden by STREAM-START. + self.encoding = None + + # Emitter is a state machine with a stack of states to handle nested + # structures. + self.states = [] + self.state = self.expect_stream_start + + # Current event and the event queue. + self.events = [] + self.event = None + + # The current indentation level and the stack of previous indents. + self.indents = [] + self.indent = None + + # Flow level. + self.flow_level = 0 + + # Contexts. + self.root_context = False + self.sequence_context = False + self.mapping_context = False + self.simple_key_context = False + + # Characteristics of the last emitted character: + # - current position. + # - is it a whitespace? + # - is it an indention character + # (indentation space, '-', '?', or ':')? + self.line = 0 + self.column = 0 + self.whitespace = True + self.indention = True + + # Whether the document requires an explicit document indicator + self.open_ended = False + + # Formatting details. + self.canonical = canonical + self.allow_unicode = allow_unicode + self.best_indent = 2 + if indent and 1 < indent < 10: + self.best_indent = indent + self.best_width = 80 + if width and width > self.best_indent*2: + self.best_width = width + self.best_line_break = '\n' + if line_break in ['\r', '\n', '\r\n']: + self.best_line_break = line_break + + # Tag prefixes. + self.tag_prefixes = None + + # Prepared anchor and tag. + self.prepared_anchor = None + self.prepared_tag = None + + # Scalar analysis and style. + self.analysis = None + self.style = None + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def emit(self, event): + self.events.append(event) + while not self.need_more_events(): + self.event = self.events.pop(0) + self.state() + self.event = None + + # In some cases, we wait for a few next events before emitting. + + def need_more_events(self): + if not self.events: + return True + event = self.events[0] + if isinstance(event, DocumentStartEvent): + return self.need_events(1) + elif isinstance(event, SequenceStartEvent): + return self.need_events(2) + elif isinstance(event, MappingStartEvent): + return self.need_events(3) + else: + return False + + def need_events(self, count): + level = 0 + for event in self.events[1:]: + if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): + level += 1 + elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): + level -= 1 + elif isinstance(event, StreamEndEvent): + level = -1 + if level < 0: + return False + return (len(self.events) < count+1) + + def increase_indent(self, flow=False, indentless=False): + self.indents.append(self.indent) + if self.indent is None: + if flow: + self.indent = self.best_indent + else: + self.indent = 0 + elif not indentless: + self.indent += self.best_indent + + # States. + + # Stream handlers. + + def expect_stream_start(self): + if isinstance(self.event, StreamStartEvent): + if self.event.encoding and not hasattr(self.stream, 'encoding'): + self.encoding = self.event.encoding + self.write_stream_start() + self.state = self.expect_first_document_start + else: + raise EmitterError("expected StreamStartEvent, but got %s" + % self.event) + + def expect_nothing(self): + raise EmitterError("expected nothing, but got %s" % self.event) + + # Document handlers. + + def expect_first_document_start(self): + return self.expect_document_start(first=True) + + def expect_document_start(self, first=False): + if isinstance(self.event, DocumentStartEvent): + if (self.event.version or self.event.tags) and self.open_ended: + self.write_indicator('...', True) + self.write_indent() + if self.event.version: + version_text = self.prepare_version(self.event.version) + self.write_version_directive(version_text) + self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() + if self.event.tags: + handles = sorted(self.event.tags.keys()) + for handle in handles: + prefix = self.event.tags[handle] + self.tag_prefixes[prefix] = handle + handle_text = self.prepare_tag_handle(handle) + prefix_text = self.prepare_tag_prefix(prefix) + self.write_tag_directive(handle_text, prefix_text) + implicit = (first and not self.event.explicit and not self.canonical + and not self.event.version and not self.event.tags + and not self.check_empty_document()) + if not implicit: + self.write_indent() + self.write_indicator('---', True) + if self.canonical: + self.write_indent() + self.state = self.expect_document_root + elif isinstance(self.event, StreamEndEvent): + if self.open_ended: + self.write_indicator('...', True) + self.write_indent() + self.write_stream_end() + self.state = self.expect_nothing + else: + raise EmitterError("expected DocumentStartEvent, but got %s" + % self.event) + + def expect_document_end(self): + if isinstance(self.event, DocumentEndEvent): + self.write_indent() + if self.event.explicit: + self.write_indicator('...', True) + self.write_indent() + self.flush_stream() + self.state = self.expect_document_start + else: + raise EmitterError("expected DocumentEndEvent, but got %s" + % self.event) + + def expect_document_root(self): + self.states.append(self.expect_document_end) + self.expect_node(root=True) + + # Node handlers. + + def expect_node(self, root=False, sequence=False, mapping=False, + simple_key=False): + self.root_context = root + self.sequence_context = sequence + self.mapping_context = mapping + self.simple_key_context = simple_key + if isinstance(self.event, AliasEvent): + self.expect_alias() + elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): + self.process_anchor('&') + self.process_tag() + if isinstance(self.event, ScalarEvent): + self.expect_scalar() + elif isinstance(self.event, SequenceStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_sequence(): + self.expect_flow_sequence() + else: + self.expect_block_sequence() + elif isinstance(self.event, MappingStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_mapping(): + self.expect_flow_mapping() + else: + self.expect_block_mapping() + else: + raise EmitterError("expected NodeEvent, but got %s" % self.event) + + def expect_alias(self): + if self.event.anchor is None: + raise EmitterError("anchor is not specified for alias") + self.process_anchor('*') + self.state = self.states.pop() + + def expect_scalar(self): + self.increase_indent(flow=True) + self.process_scalar() + self.indent = self.indents.pop() + self.state = self.states.pop() + + # Flow sequence handlers. + + def expect_flow_sequence(self): + self.write_indicator('[', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_sequence_item + + def expect_first_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(']', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + def expect_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(',', False) + self.write_indent() + self.write_indicator(']', False) + self.state = self.states.pop() + else: + self.write_indicator(',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + # Flow mapping handlers. + + def expect_flow_mapping(self): + self.write_indicator('{', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_mapping_key + + def expect_first_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator('}', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(',', False) + self.write_indent() + self.write_indicator('}', False) + self.state = self.states.pop() + else: + self.write_indicator(',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_simple_value(self): + self.write_indicator(':', False) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + def expect_flow_mapping_value(self): + if self.canonical or self.column > self.best_width: + self.write_indent() + self.write_indicator(':', True) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + # Block sequence handlers. + + def expect_block_sequence(self): + indentless = (self.mapping_context and not self.indention) + self.increase_indent(flow=False, indentless=indentless) + self.state = self.expect_first_block_sequence_item + + def expect_first_block_sequence_item(self): + return self.expect_block_sequence_item(first=True) + + def expect_block_sequence_item(self, first=False): + if not first and isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + self.write_indicator('-', True, indention=True) + self.states.append(self.expect_block_sequence_item) + self.expect_node(sequence=True) + + # Block mapping handlers. + + def expect_block_mapping(self): + self.increase_indent(flow=False) + self.state = self.expect_first_block_mapping_key + + def expect_first_block_mapping_key(self): + return self.expect_block_mapping_key(first=True) + + def expect_block_mapping_key(self, first=False): + if not first and isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + if self.check_simple_key(): + self.states.append(self.expect_block_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True, indention=True) + self.states.append(self.expect_block_mapping_value) + self.expect_node(mapping=True) + + def expect_block_mapping_simple_value(self): + self.write_indicator(':', False) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + def expect_block_mapping_value(self): + self.write_indent() + self.write_indicator(':', True, indention=True) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + # Checkers. + + def check_empty_sequence(self): + return (isinstance(self.event, SequenceStartEvent) and self.events + and isinstance(self.events[0], SequenceEndEvent)) + + def check_empty_mapping(self): + return (isinstance(self.event, MappingStartEvent) and self.events + and isinstance(self.events[0], MappingEndEvent)) + + def check_empty_document(self): + if not isinstance(self.event, DocumentStartEvent) or not self.events: + return False + event = self.events[0] + return (isinstance(event, ScalarEvent) and event.anchor is None + and event.tag is None and event.implicit and event.value == '') + + def check_simple_key(self): + length = 0 + if isinstance(self.event, NodeEvent) and self.event.anchor is not None: + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + length += len(self.prepared_anchor) + if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ + and self.event.tag is not None: + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(self.event.tag) + length += len(self.prepared_tag) + if isinstance(self.event, ScalarEvent): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + length += len(self.analysis.scalar) + return (length < 128 and (isinstance(self.event, AliasEvent) + or (isinstance(self.event, ScalarEvent) + and not self.analysis.empty and not self.analysis.multiline) + or self.check_empty_sequence() or self.check_empty_mapping())) + + # Anchor, Tag, and Scalar processors. + + def process_anchor(self, indicator): + if self.event.anchor is None: + self.prepared_anchor = None + return + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + if self.prepared_anchor: + self.write_indicator(indicator+self.prepared_anchor, True) + self.prepared_anchor = None + + def process_tag(self): + tag = self.event.tag + if isinstance(self.event, ScalarEvent): + if self.style is None: + self.style = self.choose_scalar_style() + if ((not self.canonical or tag is None) and + ((self.style == '' and self.event.implicit[0]) + or (self.style != '' and self.event.implicit[1]))): + self.prepared_tag = None + return + if self.event.implicit[0] and tag is None: + tag = '!' + self.prepared_tag = None + else: + if (not self.canonical or tag is None) and self.event.implicit: + self.prepared_tag = None + return + if tag is None: + raise EmitterError("tag is not specified") + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(tag) + if self.prepared_tag: + self.write_indicator(self.prepared_tag, True) + self.prepared_tag = None + + def choose_scalar_style(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.event.style == '"' or self.canonical: + return '"' + if not self.event.style and self.event.implicit[0]: + if (not (self.simple_key_context and + (self.analysis.empty or self.analysis.multiline)) + and (self.flow_level and self.analysis.allow_flow_plain + or (not self.flow_level and self.analysis.allow_block_plain))): + return '' + if self.event.style and self.event.style in '|>': + if (not self.flow_level and not self.simple_key_context + and self.analysis.allow_block): + return self.event.style + if not self.event.style or self.event.style == '\'': + if (self.analysis.allow_single_quoted and + not (self.simple_key_context and self.analysis.multiline)): + return '\'' + return '"' + + def process_scalar(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.style is None: + self.style = self.choose_scalar_style() + split = (not self.simple_key_context) + #if self.analysis.multiline and split \ + # and (not self.style or self.style in '\'\"'): + # self.write_indent() + if self.style == '"': + self.write_double_quoted(self.analysis.scalar, split) + elif self.style == '\'': + self.write_single_quoted(self.analysis.scalar, split) + elif self.style == '>': + self.write_folded(self.analysis.scalar) + elif self.style == '|': + self.write_literal(self.analysis.scalar) + else: + self.write_plain(self.analysis.scalar, split) + self.analysis = None + self.style = None + + # Analyzers. + + def prepare_version(self, version): + major, minor = version + if major != 1: + raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) + return '%d.%d' % (major, minor) + + def prepare_tag_handle(self, handle): + if not handle: + raise EmitterError("tag handle must not be empty") + if handle[0] != '!' or handle[-1] != '!': + raise EmitterError("tag handle must start and end with '!': %r" % handle) + for ch in handle[1:-1]: + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): + raise EmitterError("invalid character %r in the tag handle: %r" + % (ch, handle)) + return handle + + def prepare_tag_prefix(self, prefix): + if not prefix: + raise EmitterError("tag prefix must not be empty") + chunks = [] + start = end = 0 + if prefix[0] == '!': + end = 1 + while end < len(prefix): + ch = prefix[end] + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?!:@&=+$,_.~*\'()[]': + end += 1 + else: + if start < end: + chunks.append(prefix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append('%%%02X' % ord(ch)) + if start < end: + chunks.append(prefix[start:end]) + return ''.join(chunks) + + def prepare_tag(self, tag): + if not tag: + raise EmitterError("tag must not be empty") + if tag == '!': + return tag + handle = None + suffix = tag + prefixes = sorted(self.tag_prefixes.keys()) + for prefix in prefixes: + if tag.startswith(prefix) \ + and (prefix == '!' or len(prefix) < len(tag)): + handle = self.tag_prefixes[prefix] + suffix = tag[len(prefix):] + chunks = [] + start = end = 0 + while end < len(suffix): + ch = suffix[end] + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.~*\'()[]' \ + or (ch == '!' and handle != '!'): + end += 1 + else: + if start < end: + chunks.append(suffix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append('%%%02X' % ord(ch)) + if start < end: + chunks.append(suffix[start:end]) + suffix_text = ''.join(chunks) + if handle: + return '%s%s' % (handle, suffix_text) + else: + return '!<%s>' % suffix_text + + def prepare_anchor(self, anchor): + if not anchor: + raise EmitterError("anchor must not be empty") + for ch in anchor: + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): + raise EmitterError("invalid character %r in the anchor: %r" + % (ch, anchor)) + return anchor + + def analyze_scalar(self, scalar): + + # Empty scalar is a special case. + if not scalar: + return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, + allow_flow_plain=False, allow_block_plain=True, + allow_single_quoted=True, allow_double_quoted=True, + allow_block=False) + + # Indicators and special characters. + block_indicators = False + flow_indicators = False + line_breaks = False + special_characters = False + + # Important whitespace combinations. + leading_space = False + leading_break = False + trailing_space = False + trailing_break = False + break_space = False + space_break = False + + # Check document indicators. + if scalar.startswith('---') or scalar.startswith('...'): + block_indicators = True + flow_indicators = True + + # First character or preceded by a whitespace. + preceeded_by_whitespace = True + + # Last character or followed by a whitespace. + followed_by_whitespace = (len(scalar) == 1 or + scalar[1] in '\0 \t\r\n\x85\u2028\u2029') + + # The previous character is a space. + previous_space = False + + # The previous character is a break. + previous_break = False + + index = 0 + while index < len(scalar): + ch = scalar[index] + + # Check for indicators. + if index == 0: + # Leading indicators are special characters. + if ch in '#,[]{}&*!|>\'\"%@`': + flow_indicators = True + block_indicators = True + if ch in '?:': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == '-' and followed_by_whitespace: + flow_indicators = True + block_indicators = True + else: + # Some indicators cannot appear within a scalar as well. + if ch in ',?[]{}': + flow_indicators = True + if ch == ':': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == '#' and preceeded_by_whitespace: + flow_indicators = True + block_indicators = True + + # Check for line breaks, special, and unicode characters. + if ch in '\n\x85\u2028\u2029': + line_breaks = True + if not (ch == '\n' or '\x20' <= ch <= '\x7E'): + if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD') and ch != '\uFEFF': + unicode_characters = True + if not self.allow_unicode: + special_characters = True + else: + special_characters = True + + # Detect important whitespace combinations. + if ch == ' ': + if index == 0: + leading_space = True + if index == len(scalar)-1: + trailing_space = True + if previous_break: + break_space = True + previous_space = True + previous_break = False + elif ch in '\n\x85\u2028\u2029': + if index == 0: + leading_break = True + if index == len(scalar)-1: + trailing_break = True + if previous_space: + space_break = True + previous_space = False + previous_break = True + else: + previous_space = False + previous_break = False + + # Prepare for the next character. + index += 1 + preceeded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029') + followed_by_whitespace = (index+1 >= len(scalar) or + scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029') + + # Let's decide what styles are allowed. + allow_flow_plain = True + allow_block_plain = True + allow_single_quoted = True + allow_double_quoted = True + allow_block = True + + # Leading and trailing whitespaces are bad for plain scalars. + if (leading_space or leading_break + or trailing_space or trailing_break): + allow_flow_plain = allow_block_plain = False + + # We do not permit trailing spaces for block scalars. + if trailing_space: + allow_block = False + + # Spaces at the beginning of a new line are only acceptable for block + # scalars. + if break_space: + allow_flow_plain = allow_block_plain = allow_single_quoted = False + + # Spaces followed by breaks, as well as special character are only + # allowed for double quoted scalars. + if space_break or special_characters: + allow_flow_plain = allow_block_plain = \ + allow_single_quoted = allow_block = False + + # Although the plain scalar writer supports breaks, we never emit + # multiline plain scalars. + if line_breaks: + allow_flow_plain = allow_block_plain = False + + # Flow indicators are forbidden for flow plain scalars. + if flow_indicators: + allow_flow_plain = False + + # Block indicators are forbidden for block plain scalars. + if block_indicators: + allow_block_plain = False + + return ScalarAnalysis(scalar=scalar, + empty=False, multiline=line_breaks, + allow_flow_plain=allow_flow_plain, + allow_block_plain=allow_block_plain, + allow_single_quoted=allow_single_quoted, + allow_double_quoted=allow_double_quoted, + allow_block=allow_block) + + # Writers. + + def flush_stream(self): + if hasattr(self.stream, 'flush'): + self.stream.flush() + + def write_stream_start(self): + # Write BOM if needed. + if self.encoding and self.encoding.startswith('utf-16'): + self.stream.write('\uFEFF'.encode(self.encoding)) + + def write_stream_end(self): + self.flush_stream() + + def write_indicator(self, indicator, need_whitespace, + whitespace=False, indention=False): + if self.whitespace or not need_whitespace: + data = indicator + else: + data = ' '+indicator + self.whitespace = whitespace + self.indention = self.indention and indention + self.column += len(data) + self.open_ended = False + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_indent(self): + indent = self.indent or 0 + if not self.indention or self.column > indent \ + or (self.column == indent and not self.whitespace): + self.write_line_break() + if self.column < indent: + self.whitespace = True + data = ' '*(indent-self.column) + self.column = indent + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_line_break(self, data=None): + if data is None: + data = self.best_line_break + self.whitespace = True + self.indention = True + self.line += 1 + self.column = 0 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_version_directive(self, version_text): + data = '%%YAML %s' % version_text + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + def write_tag_directive(self, handle_text, prefix_text): + data = '%%TAG %s %s' % (handle_text, prefix_text) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + # Scalar streams. + + def write_single_quoted(self, text, split=True): + self.write_indicator('\'', True) + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch is None or ch != ' ': + if start+1 == end and self.column > self.best_width and split \ + and start != 0 and end != len(text): + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': + self.write_line_break() + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'': + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch == '\'': + data = '\'\'' + self.column += 2 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + 1 + if ch is not None: + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 + self.write_indicator('\'', False) + + ESCAPE_REPLACEMENTS = { + '\0': '0', + '\x07': 'a', + '\x08': 'b', + '\x09': 't', + '\x0A': 'n', + '\x0B': 'v', + '\x0C': 'f', + '\x0D': 'r', + '\x1B': 'e', + '\"': '\"', + '\\': '\\', + '\x85': 'N', + '\xA0': '_', + '\u2028': 'L', + '\u2029': 'P', + } + + def write_double_quoted(self, text, split=True): + self.write_indicator('"', True) + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \ + or not ('\x20' <= ch <= '\x7E' + or (self.allow_unicode + and ('\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD'))): + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + if ch in self.ESCAPE_REPLACEMENTS: + data = '\\'+self.ESCAPE_REPLACEMENTS[ch] + elif ch <= '\xFF': + data = '\\x%02X' % ord(ch) + elif ch <= '\uFFFF': + data = '\\u%04X' % ord(ch) + else: + data = '\\U%08X' % ord(ch) + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end+1 + if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \ + and self.column+(end-start) > self.best_width and split: + data = text[start:end]+'\\' + if start < end: + start = end + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_indent() + self.whitespace = False + self.indention = False + if text[start] == ' ': + data = '\\' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + end += 1 + self.write_indicator('"', False) + + def determine_block_hints(self, text): + hints = '' + if text: + if text[0] in ' \n\x85\u2028\u2029': + hints += str(self.best_indent) + if text[-1] not in '\n\x85\u2028\u2029': + hints += '-' + elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029': + hints += '+' + return hints + + def write_folded(self, text): + hints = self.determine_block_hints(text) + self.write_indicator('>'+hints, True) + if hints[-1:] == '+': + self.open_ended = True + self.write_line_break() + leading_space = True + spaces = False + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + if not leading_space and ch is not None and ch != ' ' \ + and text[start] == '\n': + self.write_line_break() + leading_space = (ch == ' ') + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + elif spaces: + if ch != ' ': + if start+1 == end and self.column > self.best_width: + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in '\n\x85\u2028\u2029') + spaces = (ch == ' ') + end += 1 + + def write_literal(self, text): + hints = self.determine_block_hints(text) + self.write_indicator('|'+hints, True) + if hints[-1:] == '+': + self.open_ended = True + self.write_line_break() + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + else: + if ch is None or ch in '\n\x85\u2028\u2029': + data = text[start:end] + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 + + def write_plain(self, text, split=True): + if self.root_context: + self.open_ended = True + if not text: + return + if not self.whitespace: + data = ' ' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.whitespace = False + self.indention = False + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch != ' ': + if start+1 == end and self.column > self.best_width and split: + self.write_indent() + self.whitespace = False + self.indention = False + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': + self.write_line_break() + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + self.whitespace = False + self.indention = False + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 + diff --git a/python.d/python_modules/pyyaml3/error.py b/python.d/python_modules/pyyaml3/error.py new file mode 100644 index 000000000..b796b4dc5 --- /dev/null +++ b/python.d/python_modules/pyyaml3/error.py @@ -0,0 +1,75 @@ + +__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] + +class Mark: + + def __init__(self, name, index, line, column, buffer, pointer): + self.name = name + self.index = index + self.line = line + self.column = column + self.buffer = buffer + self.pointer = pointer + + def get_snippet(self, indent=4, max_length=75): + if self.buffer is None: + return None + head = '' + start = self.pointer + while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029': + start -= 1 + if self.pointer-start > max_length/2-1: + head = ' ... ' + start += 5 + break + tail = '' + end = self.pointer + while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029': + end += 1 + if end-self.pointer > max_length/2-1: + tail = ' ... ' + end -= 5 + break + snippet = self.buffer[start:end] + return ' '*indent + head + snippet + tail + '\n' \ + + ' '*(indent+self.pointer-start+len(head)) + '^' + + def __str__(self): + snippet = self.get_snippet() + where = " in \"%s\", line %d, column %d" \ + % (self.name, self.line+1, self.column+1) + if snippet is not None: + where += ":\n"+snippet + return where + +class YAMLError(Exception): + pass + +class MarkedYAMLError(YAMLError): + + def __init__(self, context=None, context_mark=None, + problem=None, problem_mark=None, note=None): + self.context = context + self.context_mark = context_mark + self.problem = problem + self.problem_mark = problem_mark + self.note = note + + def __str__(self): + lines = [] + if self.context is not None: + lines.append(self.context) + if self.context_mark is not None \ + and (self.problem is None or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column): + lines.append(str(self.context_mark)) + if self.problem is not None: + lines.append(self.problem) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) + if self.note is not None: + lines.append(self.note) + return '\n'.join(lines) + diff --git a/python.d/python_modules/pyyaml3/events.py b/python.d/python_modules/pyyaml3/events.py new file mode 100644 index 000000000..f79ad389c --- /dev/null +++ b/python.d/python_modules/pyyaml3/events.py @@ -0,0 +1,86 @@ + +# Abstract classes. + +class Event(object): + def __init__(self, start_mark=None, end_mark=None): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in ['anchor', 'tag', 'implicit', 'value'] + if hasattr(self, key)] + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +class NodeEvent(Event): + def __init__(self, anchor, start_mark=None, end_mark=None): + self.anchor = anchor + self.start_mark = start_mark + self.end_mark = end_mark + +class CollectionStartEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, + flow_style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class CollectionEndEvent(Event): + pass + +# Implementations. + +class StreamStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndEvent(Event): + pass + +class DocumentStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None, version=None, tags=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + self.version = version + self.tags = tags + +class DocumentEndEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + +class AliasEvent(NodeEvent): + pass + +class ScalarEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, value, + start_mark=None, end_mark=None, style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class SequenceStartEvent(CollectionStartEvent): + pass + +class SequenceEndEvent(CollectionEndEvent): + pass + +class MappingStartEvent(CollectionStartEvent): + pass + +class MappingEndEvent(CollectionEndEvent): + pass + diff --git a/python.d/python_modules/pyyaml3/loader.py b/python.d/python_modules/pyyaml3/loader.py new file mode 100644 index 000000000..08c8f01b3 --- /dev/null +++ b/python.d/python_modules/pyyaml3/loader.py @@ -0,0 +1,40 @@ + +__all__ = ['BaseLoader', 'SafeLoader', 'Loader'] + +from .reader import * +from .scanner import * +from .parser import * +from .composer import * +from .constructor import * +from .resolver import * + +class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) + diff --git a/python.d/python_modules/pyyaml3/nodes.py b/python.d/python_modules/pyyaml3/nodes.py new file mode 100644 index 000000000..c4f070c41 --- /dev/null +++ b/python.d/python_modules/pyyaml3/nodes.py @@ -0,0 +1,49 @@ + +class Node(object): + def __init__(self, tag, value, start_mark, end_mark): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + value = self.value + #if isinstance(value, list): + # if len(value) == 0: + # value = '<empty>' + # elif len(value) == 1: + # value = '<1 item>' + # else: + # value = '<%d items>' % len(value) + #else: + # if len(value) > 75: + # value = repr(value[:70]+u' ... ') + # else: + # value = repr(value) + value = repr(value) + return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) + +class ScalarNode(Node): + id = 'scalar' + def __init__(self, tag, value, + start_mark=None, end_mark=None, style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class CollectionNode(Node): + def __init__(self, tag, value, + start_mark=None, end_mark=None, flow_style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class SequenceNode(CollectionNode): + id = 'sequence' + +class MappingNode(CollectionNode): + id = 'mapping' + diff --git a/python.d/python_modules/pyyaml3/parser.py b/python.d/python_modules/pyyaml3/parser.py new file mode 100644 index 000000000..13a5995d2 --- /dev/null +++ b/python.d/python_modules/pyyaml3/parser.py @@ -0,0 +1,589 @@ + +# The following YAML grammar is LL(1) and is parsed by a recursive descent +# parser. +# +# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +# implicit_document ::= block_node DOCUMENT-END* +# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +# block_node_or_indentless_sequence ::= +# ALIAS +# | properties (block_content | indentless_block_sequence)? +# | block_content +# | indentless_block_sequence +# block_node ::= ALIAS +# | properties block_content? +# | block_content +# flow_node ::= ALIAS +# | properties flow_content? +# | flow_content +# properties ::= TAG ANCHOR? | ANCHOR TAG? +# block_content ::= block_collection | flow_collection | SCALAR +# flow_content ::= flow_collection | SCALAR +# block_collection ::= block_sequence | block_mapping +# flow_collection ::= flow_sequence | flow_mapping +# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +# block_mapping ::= BLOCK-MAPPING_START +# ((KEY block_node_or_indentless_sequence?)? +# (VALUE block_node_or_indentless_sequence?)?)* +# BLOCK-END +# flow_sequence ::= FLOW-SEQUENCE-START +# (flow_sequence_entry FLOW-ENTRY)* +# flow_sequence_entry? +# FLOW-SEQUENCE-END +# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# flow_mapping ::= FLOW-MAPPING-START +# (flow_mapping_entry FLOW-ENTRY)* +# flow_mapping_entry? +# FLOW-MAPPING-END +# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# +# FIRST sets: +# +# stream: { STREAM-START } +# explicit_document: { DIRECTIVE DOCUMENT-START } +# implicit_document: FIRST(block_node) +# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_sequence: { BLOCK-SEQUENCE-START } +# block_mapping: { BLOCK-MAPPING-START } +# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } +# indentless_sequence: { ENTRY } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_sequence: { FLOW-SEQUENCE-START } +# flow_mapping: { FLOW-MAPPING-START } +# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } +# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } + +__all__ = ['Parser', 'ParserError'] + +from .error import MarkedYAMLError +from .tokens import * +from .events import * +from .scanner import * + +class ParserError(MarkedYAMLError): + pass + +class Parser: + # Since writing a recursive-descendant parser is a straightforward task, we + # do not give many comments here. + + DEFAULT_TAGS = { + '!': '!', + '!!': 'tag:yaml.org,2002:', + } + + def __init__(self): + self.current_event = None + self.yaml_version = None + self.tag_handles = {} + self.states = [] + self.marks = [] + self.state = self.parse_stream_start + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def check_event(self, *choices): + # Check the type of the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + if self.current_event is not None: + if not choices: + return True + for choice in choices: + if isinstance(self.current_event, choice): + return True + return False + + def peek_event(self): + # Get the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + return self.current_event + + def get_event(self): + # Get the next event and proceed further. + if self.current_event is None: + if self.state: + self.current_event = self.state() + value = self.current_event + self.current_event = None + return value + + # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + # implicit_document ::= block_node DOCUMENT-END* + # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + + def parse_stream_start(self): + + # Parse the stream start. + token = self.get_token() + event = StreamStartEvent(token.start_mark, token.end_mark, + encoding=token.encoding) + + # Prepare the next state. + self.state = self.parse_implicit_document_start + + return event + + def parse_implicit_document_start(self): + + # Parse an implicit document. + if not self.check_token(DirectiveToken, DocumentStartToken, + StreamEndToken): + self.tag_handles = self.DEFAULT_TAGS + token = self.peek_token() + start_mark = end_mark = token.start_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=False) + + # Prepare the next state. + self.states.append(self.parse_document_end) + self.state = self.parse_block_node + + return event + + else: + return self.parse_document_start() + + def parse_document_start(self): + + # Parse any extra document end indicators. + while self.check_token(DocumentEndToken): + self.get_token() + + # Parse an explicit document. + if not self.check_token(StreamEndToken): + token = self.peek_token() + start_mark = token.start_mark + version, tags = self.process_directives() + if not self.check_token(DocumentStartToken): + raise ParserError(None, None, + "expected '<document start>', but found %r" + % self.peek_token().id, + self.peek_token().start_mark) + token = self.get_token() + end_mark = token.end_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=True, version=version, tags=tags) + self.states.append(self.parse_document_end) + self.state = self.parse_document_content + else: + # Parse the end of the stream. + token = self.get_token() + event = StreamEndEvent(token.start_mark, token.end_mark) + assert not self.states + assert not self.marks + self.state = None + return event + + def parse_document_end(self): + + # Parse the document end. + token = self.peek_token() + start_mark = end_mark = token.start_mark + explicit = False + if self.check_token(DocumentEndToken): + token = self.get_token() + end_mark = token.end_mark + explicit = True + event = DocumentEndEvent(start_mark, end_mark, + explicit=explicit) + + # Prepare the next state. + self.state = self.parse_document_start + + return event + + def parse_document_content(self): + if self.check_token(DirectiveToken, + DocumentStartToken, DocumentEndToken, StreamEndToken): + event = self.process_empty_scalar(self.peek_token().start_mark) + self.state = self.states.pop() + return event + else: + return self.parse_block_node() + + def process_directives(self): + self.yaml_version = None + self.tag_handles = {} + while self.check_token(DirectiveToken): + token = self.get_token() + if token.name == 'YAML': + if self.yaml_version is not None: + raise ParserError(None, None, + "found duplicate YAML directive", token.start_mark) + major, minor = token.value + if major != 1: + raise ParserError(None, None, + "found incompatible YAML document (version 1.* is required)", + token.start_mark) + self.yaml_version = token.value + elif token.name == 'TAG': + handle, prefix = token.value + if handle in self.tag_handles: + raise ParserError(None, None, + "duplicate tag handle %r" % handle, + token.start_mark) + self.tag_handles[handle] = prefix + if self.tag_handles: + value = self.yaml_version, self.tag_handles.copy() + else: + value = self.yaml_version, None + for key in self.DEFAULT_TAGS: + if key not in self.tag_handles: + self.tag_handles[key] = self.DEFAULT_TAGS[key] + return value + + # block_node_or_indentless_sequence ::= ALIAS + # | properties (block_content | indentless_block_sequence)? + # | block_content + # | indentless_block_sequence + # block_node ::= ALIAS + # | properties block_content? + # | block_content + # flow_node ::= ALIAS + # | properties flow_content? + # | flow_content + # properties ::= TAG ANCHOR? | ANCHOR TAG? + # block_content ::= block_collection | flow_collection | SCALAR + # flow_content ::= flow_collection | SCALAR + # block_collection ::= block_sequence | block_mapping + # flow_collection ::= flow_sequence | flow_mapping + + def parse_block_node(self): + return self.parse_node(block=True) + + def parse_flow_node(self): + return self.parse_node() + + def parse_block_node_or_indentless_sequence(self): + return self.parse_node(block=True, indentless_sequence=True) + + def parse_node(self, block=False, indentless_sequence=False): + if self.check_token(AliasToken): + token = self.get_token() + event = AliasEvent(token.value, token.start_mark, token.end_mark) + self.state = self.states.pop() + else: + anchor = None + tag = None + start_mark = end_mark = tag_mark = None + if self.check_token(AnchorToken): + token = self.get_token() + start_mark = token.start_mark + end_mark = token.end_mark + anchor = token.value + if self.check_token(TagToken): + token = self.get_token() + tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + elif self.check_token(TagToken): + token = self.get_token() + start_mark = tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + if self.check_token(AnchorToken): + token = self.get_token() + end_mark = token.end_mark + anchor = token.value + if tag is not None: + handle, suffix = tag + if handle is not None: + if handle not in self.tag_handles: + raise ParserError("while parsing a node", start_mark, + "found undefined tag handle %r" % handle, + tag_mark) + tag = self.tag_handles[handle]+suffix + else: + tag = suffix + #if tag == '!': + # raise ParserError("while parsing a node", start_mark, + # "found non-specific tag '!'", tag_mark, + # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") + if start_mark is None: + start_mark = end_mark = self.peek_token().start_mark + event = None + implicit = (tag is None or tag == '!') + if indentless_sequence and self.check_token(BlockEntryToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark) + self.state = self.parse_indentless_sequence_entry + else: + if self.check_token(ScalarToken): + token = self.get_token() + end_mark = token.end_mark + if (token.plain and tag is None) or tag == '!': + implicit = (True, False) + elif tag is None: + implicit = (False, True) + else: + implicit = (False, False) + event = ScalarEvent(anchor, tag, implicit, token.value, + start_mark, end_mark, style=token.style) + self.state = self.states.pop() + elif self.check_token(FlowSequenceStartToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_sequence_first_entry + elif self.check_token(FlowMappingStartToken): + end_mark = self.peek_token().end_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_mapping_first_key + elif block and self.check_token(BlockSequenceStartToken): + end_mark = self.peek_token().start_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_sequence_first_entry + elif block and self.check_token(BlockMappingStartToken): + end_mark = self.peek_token().start_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_mapping_first_key + elif anchor is not None or tag is not None: + # Empty scalars are allowed even if a tag or an anchor is + # specified. + event = ScalarEvent(anchor, tag, (implicit, False), '', + start_mark, end_mark) + self.state = self.states.pop() + else: + if block: + node = 'block' + else: + node = 'flow' + token = self.peek_token() + raise ParserError("while parsing a %s node" % node, start_mark, + "expected the node content, but found %r" % token.id, + token.start_mark) + return event + + # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + + def parse_block_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_sequence_entry() + + def parse_block_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, BlockEndToken): + self.states.append(self.parse_block_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_block_sequence_entry + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block collection", self.marks[-1], + "expected <block end>, but found %r" % token.id, token.start_mark) + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + + def parse_indentless_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, + KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_indentless_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_indentless_sequence_entry + return self.process_empty_scalar(token.end_mark) + token = self.peek_token() + event = SequenceEndEvent(token.start_mark, token.start_mark) + self.state = self.states.pop() + return event + + # block_mapping ::= BLOCK-MAPPING_START + # ((KEY block_node_or_indentless_sequence?)? + # (VALUE block_node_or_indentless_sequence?)?)* + # BLOCK-END + + def parse_block_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_mapping_key() + + def parse_block_mapping_key(self): + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_value) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_value + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block mapping", self.marks[-1], + "expected <block end>, but found %r" % token.id, token.start_mark) + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_block_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_key) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_block_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + # flow_sequence ::= FLOW-SEQUENCE-START + # (flow_sequence_entry FLOW-ENTRY)* + # flow_sequence_entry? + # FLOW-SEQUENCE-END + # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + # + # Note that while production rules for both flow_sequence_entry and + # flow_mapping_entry are equal, their interpretations are different. + # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` + # generate an inline mapping (set syntax). + + def parse_flow_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_sequence_entry(first=True) + + def parse_flow_sequence_entry(self, first=False): + if not self.check_token(FlowSequenceEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow sequence", self.marks[-1], + "expected ',' or ']', but got %r" % token.id, token.start_mark) + + if self.check_token(KeyToken): + token = self.peek_token() + event = MappingStartEvent(None, None, True, + token.start_mark, token.end_mark, + flow_style=True) + self.state = self.parse_flow_sequence_entry_mapping_key + return event + elif not self.check_token(FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry) + return self.parse_flow_node() + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_sequence_entry_mapping_key(self): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_value + return self.process_empty_scalar(token.end_mark) + + def parse_flow_sequence_entry_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_end) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_end + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_sequence_entry_mapping_end + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_sequence_entry_mapping_end(self): + self.state = self.parse_flow_sequence_entry + token = self.peek_token() + return MappingEndEvent(token.start_mark, token.start_mark) + + # flow_mapping ::= FLOW-MAPPING-START + # (flow_mapping_entry FLOW-ENTRY)* + # flow_mapping_entry? + # FLOW-MAPPING-END + # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + + def parse_flow_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_mapping_key(first=True) + + def parse_flow_mapping_key(self, first=False): + if not self.check_token(FlowMappingEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow mapping", self.marks[-1], + "expected ',' or '}', but got %r" % token.id, token.start_mark) + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_value + return self.process_empty_scalar(token.end_mark) + elif not self.check_token(FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_empty_value) + return self.parse_flow_node() + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_key) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_mapping_empty_value(self): + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(self.peek_token().start_mark) + + def process_empty_scalar(self, mark): + return ScalarEvent(None, None, (True, False), '', mark, mark) + diff --git a/python.d/python_modules/pyyaml3/reader.py b/python.d/python_modules/pyyaml3/reader.py new file mode 100644 index 000000000..f70e920f4 --- /dev/null +++ b/python.d/python_modules/pyyaml3/reader.py @@ -0,0 +1,192 @@ +# This module contains abstractions for the input stream. You don't have to +# looks further, there are no pretty code. +# +# We define two classes here. +# +# Mark(source, line, column) +# It's just a record and its only use is producing nice error messages. +# Parser does not use it for any other purposes. +# +# Reader(source, data) +# Reader determines the encoding of `data` and converts it to unicode. +# Reader provides the following methods and attributes: +# reader.peek(length=1) - return the next `length` characters +# reader.forward(length=1) - move the current position to `length` characters. +# reader.index - the number of the current character. +# reader.line, stream.column - the line and the column of the current character. + +__all__ = ['Reader', 'ReaderError'] + +from .error import YAMLError, Mark + +import codecs, re + +class ReaderError(YAMLError): + + def __init__(self, name, position, character, encoding, reason): + self.name = name + self.character = character + self.position = position + self.encoding = encoding + self.reason = reason + + def __str__(self): + if isinstance(self.character, bytes): + return "'%s' codec can't decode byte #x%02x: %s\n" \ + " in \"%s\", position %d" \ + % (self.encoding, ord(self.character), self.reason, + self.name, self.position) + else: + return "unacceptable character #x%04x: %s\n" \ + " in \"%s\", position %d" \ + % (self.character, self.reason, + self.name, self.position) + +class Reader(object): + # Reader: + # - determines the data encoding and converts it to a unicode string, + # - checks if characters are in allowed range, + # - adds '\0' to the end. + + # Reader accepts + # - a `bytes` object, + # - a `str` object, + # - a file-like object with its `read` method returning `str`, + # - a file-like object with its `read` method returning `unicode`. + + # Yeah, it's ugly and slow. + + def __init__(self, stream): + self.name = None + self.stream = None + self.stream_pointer = 0 + self.eof = True + self.buffer = '' + self.pointer = 0 + self.raw_buffer = None + self.raw_decode = None + self.encoding = None + self.index = 0 + self.line = 0 + self.column = 0 + if isinstance(stream, str): + self.name = "<unicode string>" + self.check_printable(stream) + self.buffer = stream+'\0' + elif isinstance(stream, bytes): + self.name = "<byte string>" + self.raw_buffer = stream + self.determine_encoding() + else: + self.stream = stream + self.name = getattr(stream, 'name', "<file>") + self.eof = False + self.raw_buffer = None + self.determine_encoding() + + def peek(self, index=0): + try: + return self.buffer[self.pointer+index] + except IndexError: + self.update(index+1) + return self.buffer[self.pointer+index] + + def prefix(self, length=1): + if self.pointer+length >= len(self.buffer): + self.update(length) + return self.buffer[self.pointer:self.pointer+length] + + def forward(self, length=1): + if self.pointer+length+1 >= len(self.buffer): + self.update(length+1) + while length: + ch = self.buffer[self.pointer] + self.pointer += 1 + self.index += 1 + if ch in '\n\x85\u2028\u2029' \ + or (ch == '\r' and self.buffer[self.pointer] != '\n'): + self.line += 1 + self.column = 0 + elif ch != '\uFEFF': + self.column += 1 + length -= 1 + + def get_mark(self): + if self.stream is None: + return Mark(self.name, self.index, self.line, self.column, + self.buffer, self.pointer) + else: + return Mark(self.name, self.index, self.line, self.column, + None, None) + + def determine_encoding(self): + while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): + self.update_raw() + if isinstance(self.raw_buffer, bytes): + if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): + self.raw_decode = codecs.utf_16_le_decode + self.encoding = 'utf-16-le' + elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): + self.raw_decode = codecs.utf_16_be_decode + self.encoding = 'utf-16-be' + else: + self.raw_decode = codecs.utf_8_decode + self.encoding = 'utf-8' + self.update(1) + + NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') + def check_printable(self, data): + match = self.NON_PRINTABLE.search(data) + if match: + character = match.group() + position = self.index+(len(self.buffer)-self.pointer)+match.start() + raise ReaderError(self.name, position, ord(character), + 'unicode', "special characters are not allowed") + + def update(self, length): + if self.raw_buffer is None: + return + self.buffer = self.buffer[self.pointer:] + self.pointer = 0 + while len(self.buffer) < length: + if not self.eof: + self.update_raw() + if self.raw_decode is not None: + try: + data, converted = self.raw_decode(self.raw_buffer, + 'strict', self.eof) + except UnicodeDecodeError as exc: + character = self.raw_buffer[exc.start] + if self.stream is not None: + position = self.stream_pointer-len(self.raw_buffer)+exc.start + else: + position = exc.start + raise ReaderError(self.name, position, character, + exc.encoding, exc.reason) + else: + data = self.raw_buffer + converted = len(data) + self.check_printable(data) + self.buffer += data + self.raw_buffer = self.raw_buffer[converted:] + if self.eof: + self.buffer += '\0' + self.raw_buffer = None + break + + def update_raw(self, size=4096): + data = self.stream.read(size) + if self.raw_buffer is None: + self.raw_buffer = data + else: + self.raw_buffer += data + self.stream_pointer += len(data) + if not data: + self.eof = True + +#try: +# import psyco +# psyco.bind(Reader) +#except ImportError: +# pass + diff --git a/python.d/python_modules/pyyaml3/representer.py b/python.d/python_modules/pyyaml3/representer.py new file mode 100644 index 000000000..67cd6fd25 --- /dev/null +++ b/python.d/python_modules/pyyaml3/representer.py @@ -0,0 +1,374 @@ + +__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', + 'RepresenterError'] + +from .error import * +from .nodes import * + +import datetime, sys, copyreg, types, base64 + +class RepresenterError(YAMLError): + pass + +class BaseRepresenter: + + yaml_representers = {} + yaml_multi_representers = {} + + def __init__(self, default_style=None, default_flow_style=None): + self.default_style = default_style + self.default_flow_style = default_flow_style + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent(self, data): + node = self.represent_data(data) + self.serialize(node) + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent_data(self, data): + if self.ignore_aliases(data): + self.alias_key = None + else: + self.alias_key = id(data) + if self.alias_key is not None: + if self.alias_key in self.represented_objects: + node = self.represented_objects[self.alias_key] + #if node is None: + # raise RepresenterError("recursive objects are not allowed: %r" % data) + return node + #self.represented_objects[alias_key] = None + self.object_keeper.append(data) + data_types = type(data).__mro__ + if data_types[0] in self.yaml_representers: + node = self.yaml_representers[data_types[0]](self, data) + else: + for data_type in data_types: + if data_type in self.yaml_multi_representers: + node = self.yaml_multi_representers[data_type](self, data) + break + else: + if None in self.yaml_multi_representers: + node = self.yaml_multi_representers[None](self, data) + elif None in self.yaml_representers: + node = self.yaml_representers[None](self, data) + else: + node = ScalarNode(None, str(data)) + #if alias_key is not None: + # self.represented_objects[alias_key] = node + return node + + @classmethod + def add_representer(cls, data_type, representer): + if not 'yaml_representers' in cls.__dict__: + cls.yaml_representers = cls.yaml_representers.copy() + cls.yaml_representers[data_type] = representer + + @classmethod + def add_multi_representer(cls, data_type, representer): + if not 'yaml_multi_representers' in cls.__dict__: + cls.yaml_multi_representers = cls.yaml_multi_representers.copy() + cls.yaml_multi_representers[data_type] = representer + + def represent_scalar(self, tag, value, style=None): + if style is None: + style = self.default_style + node = ScalarNode(tag, value, style=style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + return node + + def represent_sequence(self, tag, sequence, flow_style=None): + value = [] + node = SequenceNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + for item in sequence: + node_item = self.represent_data(item) + if not (isinstance(node_item, ScalarNode) and not node_item.style): + best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_mapping(self, tag, mapping, flow_style=None): + value = [] + node = MappingNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + if hasattr(mapping, 'items'): + mapping = list(mapping.items()) + try: + mapping = sorted(mapping) + except TypeError: + pass + for item_key, item_value in mapping: + node_key = self.represent_data(item_key) + node_value = self.represent_data(item_value) + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not node_value.style): + best_style = False + value.append((node_key, node_value)) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def ignore_aliases(self, data): + return False + +class SafeRepresenter(BaseRepresenter): + + def ignore_aliases(self, data): + if data in [None, ()]: + return True + if isinstance(data, (str, bytes, bool, int, float)): + return True + + def represent_none(self, data): + return self.represent_scalar('tag:yaml.org,2002:null', 'null') + + def represent_str(self, data): + return self.represent_scalar('tag:yaml.org,2002:str', data) + + def represent_binary(self, data): + if hasattr(base64, 'encodebytes'): + data = base64.encodebytes(data).decode('ascii') + else: + data = base64.encodestring(data).decode('ascii') + return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|') + + def represent_bool(self, data): + if data: + value = 'true' + else: + value = 'false' + return self.represent_scalar('tag:yaml.org,2002:bool', value) + + def represent_int(self, data): + return self.represent_scalar('tag:yaml.org,2002:int', str(data)) + + inf_value = 1e300 + while repr(inf_value) != repr(inf_value*inf_value): + inf_value *= inf_value + + def represent_float(self, data): + if data != data or (data == 0.0 and data == 1.0): + value = '.nan' + elif data == self.inf_value: + value = '.inf' + elif data == -self.inf_value: + value = '-.inf' + else: + value = repr(data).lower() + # Note that in some cases `repr(data)` represents a float number + # without the decimal parts. For instance: + # >>> repr(1e17) + # '1e17' + # Unfortunately, this is not a valid float representation according + # to the definition of the `!!float` tag. We fix this by adding + # '.0' before the 'e' symbol. + if '.' not in value and 'e' in value: + value = value.replace('e', '.0e', 1) + return self.represent_scalar('tag:yaml.org,2002:float', value) + + def represent_list(self, data): + #pairs = (len(data) > 0 and isinstance(data, list)) + #if pairs: + # for item in data: + # if not isinstance(item, tuple) or len(item) != 2: + # pairs = False + # break + #if not pairs: + return self.represent_sequence('tag:yaml.org,2002:seq', data) + #value = [] + #for item_key, item_value in data: + # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', + # [(item_key, item_value)])) + #return SequenceNode(u'tag:yaml.org,2002:pairs', value) + + def represent_dict(self, data): + return self.represent_mapping('tag:yaml.org,2002:map', data) + + def represent_set(self, data): + value = {} + for key in data: + value[key] = None + return self.represent_mapping('tag:yaml.org,2002:set', value) + + def represent_date(self, data): + value = data.isoformat() + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) + + def represent_datetime(self, data): + value = data.isoformat(' ') + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) + + def represent_yaml_object(self, tag, data, cls, flow_style=None): + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__.copy() + return self.represent_mapping(tag, state, flow_style=flow_style) + + def represent_undefined(self, data): + raise RepresenterError("cannot represent an object: %s" % data) + +SafeRepresenter.add_representer(type(None), + SafeRepresenter.represent_none) + +SafeRepresenter.add_representer(str, + SafeRepresenter.represent_str) + +SafeRepresenter.add_representer(bytes, + SafeRepresenter.represent_binary) + +SafeRepresenter.add_representer(bool, + SafeRepresenter.represent_bool) + +SafeRepresenter.add_representer(int, + SafeRepresenter.represent_int) + +SafeRepresenter.add_representer(float, + SafeRepresenter.represent_float) + +SafeRepresenter.add_representer(list, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(tuple, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(dict, + SafeRepresenter.represent_dict) + +SafeRepresenter.add_representer(set, + SafeRepresenter.represent_set) + +SafeRepresenter.add_representer(datetime.date, + SafeRepresenter.represent_date) + +SafeRepresenter.add_representer(datetime.datetime, + SafeRepresenter.represent_datetime) + +SafeRepresenter.add_representer(None, + SafeRepresenter.represent_undefined) + +class Representer(SafeRepresenter): + + def represent_complex(self, data): + if data.imag == 0.0: + data = '%r' % data.real + elif data.real == 0.0: + data = '%rj' % data.imag + elif data.imag > 0: + data = '%r+%rj' % (data.real, data.imag) + else: + data = '%r%rj' % (data.real, data.imag) + return self.represent_scalar('tag:yaml.org,2002:python/complex', data) + + def represent_tuple(self, data): + return self.represent_sequence('tag:yaml.org,2002:python/tuple', data) + + def represent_name(self, data): + name = '%s.%s' % (data.__module__, data.__name__) + return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '') + + def represent_module(self, data): + return self.represent_scalar( + 'tag:yaml.org,2002:python/module:'+data.__name__, '') + + def represent_object(self, data): + # We use __reduce__ API to save the data. data.__reduce__ returns + # a tuple of length 2-5: + # (function, args, state, listitems, dictitems) + + # For reconstructing, we calls function(*args), then set its state, + # listitems, and dictitems if they are not None. + + # A special case is when function.__name__ == '__newobj__'. In this + # case we create the object with args[0].__new__(*args). + + # Another special case is when __reduce__ returns a string - we don't + # support it. + + # We produce a !!python/object, !!python/object/new or + # !!python/object/apply node. + + cls = type(data) + if cls in copyreg.dispatch_table: + reduce = copyreg.dispatch_table[cls](data) + elif hasattr(data, '__reduce_ex__'): + reduce = data.__reduce_ex__(2) + elif hasattr(data, '__reduce__'): + reduce = data.__reduce__() + else: + raise RepresenterError("cannot represent object: %r" % data) + reduce = (list(reduce)+[None]*5)[:5] + function, args, state, listitems, dictitems = reduce + args = list(args) + if state is None: + state = {} + if listitems is not None: + listitems = list(listitems) + if dictitems is not None: + dictitems = dict(dictitems) + if function.__name__ == '__newobj__': + function = args[0] + args = args[1:] + tag = 'tag:yaml.org,2002:python/object/new:' + newobj = True + else: + tag = 'tag:yaml.org,2002:python/object/apply:' + newobj = False + function_name = '%s.%s' % (function.__module__, function.__name__) + if not args and not listitems and not dictitems \ + and isinstance(state, dict) and newobj: + return self.represent_mapping( + 'tag:yaml.org,2002:python/object:'+function_name, state) + if not listitems and not dictitems \ + and isinstance(state, dict) and not state: + return self.represent_sequence(tag+function_name, args) + value = {} + if args: + value['args'] = args + if state or not isinstance(state, dict): + value['state'] = state + if listitems: + value['listitems'] = listitems + if dictitems: + value['dictitems'] = dictitems + return self.represent_mapping(tag+function_name, value) + +Representer.add_representer(complex, + Representer.represent_complex) + +Representer.add_representer(tuple, + Representer.represent_tuple) + +Representer.add_representer(type, + Representer.represent_name) + +Representer.add_representer(types.FunctionType, + Representer.represent_name) + +Representer.add_representer(types.BuiltinFunctionType, + Representer.represent_name) + +Representer.add_representer(types.ModuleType, + Representer.represent_module) + +Representer.add_multi_representer(object, + Representer.represent_object) + diff --git a/python.d/python_modules/pyyaml3/resolver.py b/python.d/python_modules/pyyaml3/resolver.py new file mode 100644 index 000000000..0eece2582 --- /dev/null +++ b/python.d/python_modules/pyyaml3/resolver.py @@ -0,0 +1,224 @@ + +__all__ = ['BaseResolver', 'Resolver'] + +from .error import * +from .nodes import * + +import re + +class ResolverError(YAMLError): + pass + +class BaseResolver: + + DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map' + + yaml_implicit_resolvers = {} + yaml_path_resolvers = {} + + def __init__(self): + self.resolver_exact_paths = [] + self.resolver_prefix_paths = [] + + @classmethod + def add_implicit_resolver(cls, tag, regexp, first): + if not 'yaml_implicit_resolvers' in cls.__dict__: + cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() + if first is None: + first = [None] + for ch in first: + cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) + + @classmethod + def add_path_resolver(cls, tag, path, kind=None): + # Note: `add_path_resolver` is experimental. The API could be changed. + # `new_path` is a pattern that is matched against the path from the + # root to the node that is being considered. `node_path` elements are + # tuples `(node_check, index_check)`. `node_check` is a node class: + # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` + # matches any kind of a node. `index_check` could be `None`, a boolean + # value, a string value, or a number. `None` and `False` match against + # any _value_ of sequence and mapping nodes. `True` matches against + # any _key_ of a mapping node. A string `index_check` matches against + # a mapping value that corresponds to a scalar key which content is + # equal to the `index_check` value. An integer `index_check` matches + # against a sequence value with the index equal to `index_check`. + if not 'yaml_path_resolvers' in cls.__dict__: + cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() + new_path = [] + for element in path: + if isinstance(element, (list, tuple)): + if len(element) == 2: + node_check, index_check = element + elif len(element) == 1: + node_check = element[0] + index_check = True + else: + raise ResolverError("Invalid path element: %s" % element) + else: + node_check = None + index_check = element + if node_check is str: + node_check = ScalarNode + elif node_check is list: + node_check = SequenceNode + elif node_check is dict: + node_check = MappingNode + elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ + and not isinstance(node_check, str) \ + and node_check is not None: + raise ResolverError("Invalid node checker: %s" % node_check) + if not isinstance(index_check, (str, int)) \ + and index_check is not None: + raise ResolverError("Invalid index checker: %s" % index_check) + new_path.append((node_check, index_check)) + if kind is str: + kind = ScalarNode + elif kind is list: + kind = SequenceNode + elif kind is dict: + kind = MappingNode + elif kind not in [ScalarNode, SequenceNode, MappingNode] \ + and kind is not None: + raise ResolverError("Invalid node kind: %s" % kind) + cls.yaml_path_resolvers[tuple(new_path), kind] = tag + + def descend_resolver(self, current_node, current_index): + if not self.yaml_path_resolvers: + return + exact_paths = {} + prefix_paths = [] + if current_node: + depth = len(self.resolver_prefix_paths) + for path, kind in self.resolver_prefix_paths[-1]: + if self.check_resolver_prefix(depth, path, kind, + current_node, current_index): + if len(path) > depth: + prefix_paths.append((path, kind)) + else: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + for path, kind in self.yaml_path_resolvers: + if not path: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + prefix_paths.append((path, kind)) + self.resolver_exact_paths.append(exact_paths) + self.resolver_prefix_paths.append(prefix_paths) + + def ascend_resolver(self): + if not self.yaml_path_resolvers: + return + self.resolver_exact_paths.pop() + self.resolver_prefix_paths.pop() + + def check_resolver_prefix(self, depth, path, kind, + current_node, current_index): + node_check, index_check = path[depth-1] + if isinstance(node_check, str): + if current_node.tag != node_check: + return + elif node_check is not None: + if not isinstance(current_node, node_check): + return + if index_check is True and current_index is not None: + return + if (index_check is False or index_check is None) \ + and current_index is None: + return + if isinstance(index_check, str): + if not (isinstance(current_index, ScalarNode) + and index_check == current_index.value): + return + elif isinstance(index_check, int) and not isinstance(index_check, bool): + if index_check != current_index: + return + return True + + def resolve(self, kind, value, implicit): + if kind is ScalarNode and implicit[0]: + if value == '': + resolvers = self.yaml_implicit_resolvers.get('', []) + else: + resolvers = self.yaml_implicit_resolvers.get(value[0], []) + resolvers += self.yaml_implicit_resolvers.get(None, []) + for tag, regexp in resolvers: + if regexp.match(value): + return tag + implicit = implicit[1] + if self.yaml_path_resolvers: + exact_paths = self.resolver_exact_paths[-1] + if kind in exact_paths: + return exact_paths[kind] + if None in exact_paths: + return exact_paths[None] + if kind is ScalarNode: + return self.DEFAULT_SCALAR_TAG + elif kind is SequenceNode: + return self.DEFAULT_SEQUENCE_TAG + elif kind is MappingNode: + return self.DEFAULT_MAPPING_TAG + +class Resolver(BaseResolver): + pass + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:bool', + re.compile(r'''^(?:yes|Yes|YES|no|No|NO + |true|True|TRUE|false|False|FALSE + |on|On|ON|off|Off|OFF)$''', re.X), + list('yYnNtTfFoO')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:float', + re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? + |\.[0-9_]+(?:[eE][-+][0-9]+)? + |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* + |[-+]?\.(?:inf|Inf|INF) + |\.(?:nan|NaN|NAN))$''', re.X), + list('-+0123456789.')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:int', + re.compile(r'''^(?:[-+]?0b[0-1_]+ + |[-+]?0[0-7_]+ + |[-+]?(?:0|[1-9][0-9_]*) + |[-+]?0x[0-9a-fA-F_]+ + |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), + list('-+0123456789')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:merge', + re.compile(r'^(?:<<)$'), + ['<']) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:null', + re.compile(r'''^(?: ~ + |null|Null|NULL + | )$''', re.X), + ['~', 'n', 'N', '']) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:timestamp', + re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? + (?:[Tt]|[ \t]+)[0-9][0-9]? + :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? + (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), + list('0123456789')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:value', + re.compile(r'^(?:=)$'), + ['=']) + +# The following resolver is only for documentation purposes. It cannot work +# because plain scalars cannot start with '!', '&', or '*'. +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:yaml', + re.compile(r'^(?:!|&|\*)$'), + list('!&*')) + diff --git a/python.d/python_modules/pyyaml3/scanner.py b/python.d/python_modules/pyyaml3/scanner.py new file mode 100644 index 000000000..494d975ba --- /dev/null +++ b/python.d/python_modules/pyyaml3/scanner.py @@ -0,0 +1,1448 @@ + +# Scanner produces tokens of the following types: +# STREAM-START +# STREAM-END +# DIRECTIVE(name, value) +# DOCUMENT-START +# DOCUMENT-END +# BLOCK-SEQUENCE-START +# BLOCK-MAPPING-START +# BLOCK-END +# FLOW-SEQUENCE-START +# FLOW-MAPPING-START +# FLOW-SEQUENCE-END +# FLOW-MAPPING-END +# BLOCK-ENTRY +# FLOW-ENTRY +# KEY +# VALUE +# ALIAS(value) +# ANCHOR(value) +# TAG(value) +# SCALAR(value, plain, style) +# +# Read comments in the Scanner code for more details. +# + +__all__ = ['Scanner', 'ScannerError'] + +from .error import MarkedYAMLError +from .tokens import * + +class ScannerError(MarkedYAMLError): + pass + +class SimpleKey: + # See below simple keys treatment. + + def __init__(self, token_number, required, index, line, column, mark): + self.token_number = token_number + self.required = required + self.index = index + self.line = line + self.column = column + self.mark = mark + +class Scanner: + + def __init__(self): + """Initialize the scanner.""" + # It is assumed that Scanner and Reader will have a common descendant. + # Reader do the dirty work of checking for BOM and converting the + # input data to Unicode. It also adds NUL to the end. + # + # Reader supports the following methods + # self.peek(i=0) # peek the next i-th character + # self.prefix(l=1) # peek the next l characters + # self.forward(l=1) # read the next l characters and move the pointer. + + # Had we reached the end of the stream? + self.done = False + + # The number of unclosed '{' and '['. `flow_level == 0` means block + # context. + self.flow_level = 0 + + # List of processed tokens that are not yet emitted. + self.tokens = [] + + # Add the STREAM-START token. + self.fetch_stream_start() + + # Number of tokens that were emitted through the `get_token` method. + self.tokens_taken = 0 + + # The current indentation level. + self.indent = -1 + + # Past indentation levels. + self.indents = [] + + # Variables related to simple keys treatment. + + # A simple key is a key that is not denoted by the '?' indicator. + # Example of simple keys: + # --- + # block simple key: value + # ? not a simple key: + # : { flow simple key: value } + # We emit the KEY token before all keys, so when we find a potential + # simple key, we try to locate the corresponding ':' indicator. + # Simple keys should be limited to a single line and 1024 characters. + + # Can a simple key start at the current position? A simple key may + # start: + # - at the beginning of the line, not counting indentation spaces + # (in block context), + # - after '{', '[', ',' (in the flow context), + # - after '?', ':', '-' (in the block context). + # In the block context, this flag also signifies if a block collection + # may start at the current position. + self.allow_simple_key = True + + # Keep track of possible simple keys. This is a dictionary. The key + # is `flow_level`; there can be no more that one possible simple key + # for each level. The value is a SimpleKey record: + # (token_number, required, index, line, column, mark) + # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), + # '[', or '{' tokens. + self.possible_simple_keys = {} + + # Public methods. + + def check_token(self, *choices): + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self): + # Return the next token, but do not delete if from the queue. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + return self.tokens[0] + + def get_token(self): + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + self.tokens_taken += 1 + return self.tokens.pop(0) + + # Private methods. + + def need_more_tokens(self): + if self.done: + return False + if not self.tokens: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + + def fetch_more_tokens(self): + + # Eat whitespaces and comments until we reach the next token. + self.scan_to_next_token() + + # Remove obsolete possible simple keys. + self.stale_possible_simple_keys() + + # Compare the current indentation and column. It may add some tokens + # and decrease the current indentation level. + self.unwind_indent(self.column) + + # Peek the next character. + ch = self.peek() + + # Is it the end of stream? + if ch == '\0': + return self.fetch_stream_end() + + # Is it a directive? + if ch == '%' and self.check_directive(): + return self.fetch_directive() + + # Is it the document start? + if ch == '-' and self.check_document_start(): + return self.fetch_document_start() + + # Is it the document end? + if ch == '.' and self.check_document_end(): + return self.fetch_document_end() + + # TODO: support for BOM within a stream. + #if ch == '\uFEFF': + # return self.fetch_bom() <-- issue BOMToken + + # Note: the order of the following checks is NOT significant. + + # Is it the flow sequence start indicator? + if ch == '[': + return self.fetch_flow_sequence_start() + + # Is it the flow mapping start indicator? + if ch == '{': + return self.fetch_flow_mapping_start() + + # Is it the flow sequence end indicator? + if ch == ']': + return self.fetch_flow_sequence_end() + + # Is it the flow mapping end indicator? + if ch == '}': + return self.fetch_flow_mapping_end() + + # Is it the flow entry indicator? + if ch == ',': + return self.fetch_flow_entry() + + # Is it the block entry indicator? + if ch == '-' and self.check_block_entry(): + return self.fetch_block_entry() + + # Is it the key indicator? + if ch == '?' and self.check_key(): + return self.fetch_key() + + # Is it the value indicator? + if ch == ':' and self.check_value(): + return self.fetch_value() + + # Is it an alias? + if ch == '*': + return self.fetch_alias() + + # Is it an anchor? + if ch == '&': + return self.fetch_anchor() + + # Is it a tag? + if ch == '!': + return self.fetch_tag() + + # Is it a literal scalar? + if ch == '|' and not self.flow_level: + return self.fetch_literal() + + # Is it a folded scalar? + if ch == '>' and not self.flow_level: + return self.fetch_folded() + + # Is it a single quoted scalar? + if ch == '\'': + return self.fetch_single() + + # Is it a double quoted scalar? + if ch == '\"': + return self.fetch_double() + + # It must be a plain scalar then. + if self.check_plain(): + return self.fetch_plain() + + # No? It's an error. Let's produce a nice error message. + raise ScannerError("while scanning for the next token", None, + "found character %r that cannot start any token" % ch, + self.get_mark()) + + # Simple keys treatment. + + def next_possible_simple_key(self): + # Return the number of the nearest possible simple key. Actually we + # don't need to loop through the whole dictionary. We may replace it + # with the following code: + # if not self.possible_simple_keys: + # return None + # return self.possible_simple_keys[ + # min(self.possible_simple_keys.keys())].token_number + min_token_number = None + for level in self.possible_simple_keys: + key = self.possible_simple_keys[level] + if min_token_number is None or key.token_number < min_token_number: + min_token_number = key.token_number + return min_token_number + + def stale_possible_simple_keys(self): + # Remove entries that are no longer possible simple keys. According to + # the YAML specification, simple keys + # - should be limited to a single line, + # - should be no longer than 1024 characters. + # Disabling this procedure will allow simple keys of any length and + # height (may cause problems if indentation is broken though). + for level in list(self.possible_simple_keys): + key = self.possible_simple_keys[level] + if key.line != self.line \ + or self.index-key.index > 1024: + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + del self.possible_simple_keys[level] + + def save_possible_simple_key(self): + # The next token may start a simple key. We check if it's possible + # and save its position. This function is called for + # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. + + # Check if a simple key is required at the current position. + required = not self.flow_level and self.indent == self.column + + # A simple key is required only if it is the first token in the current + # line. Therefore it is always allowed. + assert self.allow_simple_key or not required + + # The next token might be a simple key. Let's save it's number and + # position. + if self.allow_simple_key: + self.remove_possible_simple_key() + token_number = self.tokens_taken+len(self.tokens) + key = SimpleKey(token_number, required, + self.index, self.line, self.column, self.get_mark()) + self.possible_simple_keys[self.flow_level] = key + + def remove_possible_simple_key(self): + # Remove the saved possible key position at the current flow level. + if self.flow_level in self.possible_simple_keys: + key = self.possible_simple_keys[self.flow_level] + + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + + del self.possible_simple_keys[self.flow_level] + + # Indentation functions. + + def unwind_indent(self, column): + + ## In flow context, tokens should respect indentation. + ## Actually the condition should be `self.indent >= column` according to + ## the spec. But this condition will prohibit intuitively correct + ## constructions such as + ## key : { + ## } + #if self.flow_level and self.indent > column: + # raise ScannerError(None, None, + # "invalid intendation or unclosed '[' or '{'", + # self.get_mark()) + + # In the flow context, indentation is ignored. We make the scanner less + # restrictive then specification requires. + if self.flow_level: + return + + # In block context, we may need to issue the BLOCK-END tokens. + while self.indent > column: + mark = self.get_mark() + self.indent = self.indents.pop() + self.tokens.append(BlockEndToken(mark, mark)) + + def add_indent(self, column): + # Check if we need to increase indentation. + if self.indent < column: + self.indents.append(self.indent) + self.indent = column + return True + return False + + # Fetchers. + + def fetch_stream_start(self): + # We always add STREAM-START as the first token and STREAM-END as the + # last token. + + # Read the token. + mark = self.get_mark() + + # Add STREAM-START. + self.tokens.append(StreamStartToken(mark, mark, + encoding=self.encoding)) + + + def fetch_stream_end(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + self.possible_simple_keys = {} + + # Read the token. + mark = self.get_mark() + + # Add STREAM-END. + self.tokens.append(StreamEndToken(mark, mark)) + + # The steam is finished. + self.done = True + + def fetch_directive(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Scan and add DIRECTIVE. + self.tokens.append(self.scan_directive()) + + def fetch_document_start(self): + self.fetch_document_indicator(DocumentStartToken) + + def fetch_document_end(self): + self.fetch_document_indicator(DocumentEndToken) + + def fetch_document_indicator(self, TokenClass): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. Note that there could not be a block collection + # after '---'. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Add DOCUMENT-START or DOCUMENT-END. + start_mark = self.get_mark() + self.forward(3) + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_start(self): + self.fetch_flow_collection_start(FlowSequenceStartToken) + + def fetch_flow_mapping_start(self): + self.fetch_flow_collection_start(FlowMappingStartToken) + + def fetch_flow_collection_start(self, TokenClass): + + # '[' and '{' may start a simple key. + self.save_possible_simple_key() + + # Increase the flow level. + self.flow_level += 1 + + # Simple keys are allowed after '[' and '{'. + self.allow_simple_key = True + + # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_end(self): + self.fetch_flow_collection_end(FlowSequenceEndToken) + + def fetch_flow_mapping_end(self): + self.fetch_flow_collection_end(FlowMappingEndToken) + + def fetch_flow_collection_end(self, TokenClass): + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Decrease the flow level. + self.flow_level -= 1 + + # No simple keys after ']' or '}'. + self.allow_simple_key = False + + # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_entry(self): + + # Simple keys are allowed after ','. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add FLOW-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(FlowEntryToken(start_mark, end_mark)) + + def fetch_block_entry(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a new entry? + if not self.allow_simple_key: + raise ScannerError(None, None, + "sequence entries are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-SEQUENCE-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockSequenceStartToken(mark, mark)) + + # It's an error for the block entry to occur in the flow context, + # but we let the parser detect this. + else: + pass + + # Simple keys are allowed after '-'. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add BLOCK-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(BlockEntryToken(start_mark, end_mark)) + + def fetch_key(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a key (not nessesary a simple)? + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping keys are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-MAPPING-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after '?' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add KEY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(KeyToken(start_mark, end_mark)) + + def fetch_value(self): + + # Do we determine a simple key? + if self.flow_level in self.possible_simple_keys: + + # Add KEY. + key = self.possible_simple_keys[self.flow_level] + del self.possible_simple_keys[self.flow_level] + self.tokens.insert(key.token_number-self.tokens_taken, + KeyToken(key.mark, key.mark)) + + # If this key starts a new block mapping, we need to add + # BLOCK-MAPPING-START. + if not self.flow_level: + if self.add_indent(key.column): + self.tokens.insert(key.token_number-self.tokens_taken, + BlockMappingStartToken(key.mark, key.mark)) + + # There cannot be two simple keys one after another. + self.allow_simple_key = False + + # It must be a part of a complex key. + else: + + # Block context needs additional checks. + # (Do we really need them? They will be catched by the parser + # anyway.) + if not self.flow_level: + + # We are allowed to start a complex value if and only if + # we can start a simple key. + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping values are not allowed here", + self.get_mark()) + + # If this value starts a new block mapping, we need to add + # BLOCK-MAPPING-START. It will be detected as an error later by + # the parser. + if not self.flow_level: + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after ':' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add VALUE. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(ValueToken(start_mark, end_mark)) + + def fetch_alias(self): + + # ALIAS could be a simple key. + self.save_possible_simple_key() + + # No simple keys after ALIAS. + self.allow_simple_key = False + + # Scan and add ALIAS. + self.tokens.append(self.scan_anchor(AliasToken)) + + def fetch_anchor(self): + + # ANCHOR could start a simple key. + self.save_possible_simple_key() + + # No simple keys after ANCHOR. + self.allow_simple_key = False + + # Scan and add ANCHOR. + self.tokens.append(self.scan_anchor(AnchorToken)) + + def fetch_tag(self): + + # TAG could start a simple key. + self.save_possible_simple_key() + + # No simple keys after TAG. + self.allow_simple_key = False + + # Scan and add TAG. + self.tokens.append(self.scan_tag()) + + def fetch_literal(self): + self.fetch_block_scalar(style='|') + + def fetch_folded(self): + self.fetch_block_scalar(style='>') + + def fetch_block_scalar(self, style): + + # A simple key may follow a block scalar. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Scan and add SCALAR. + self.tokens.append(self.scan_block_scalar(style)) + + def fetch_single(self): + self.fetch_flow_scalar(style='\'') + + def fetch_double(self): + self.fetch_flow_scalar(style='"') + + def fetch_flow_scalar(self, style): + + # A flow scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after flow scalars. + self.allow_simple_key = False + + # Scan and add SCALAR. + self.tokens.append(self.scan_flow_scalar(style)) + + def fetch_plain(self): + + # A plain scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after plain scalars. But note that `scan_plain` will + # change this flag if the scan is finished at the beginning of the + # line. + self.allow_simple_key = False + + # Scan and add SCALAR. May change `allow_simple_key`. + self.tokens.append(self.scan_plain()) + + # Checkers. + + def check_directive(self): + + # DIRECTIVE: ^ '%' ... + # The '%' indicator is already checked. + if self.column == 0: + return True + + def check_document_start(self): + + # DOCUMENT-START: ^ '---' (' '|'\n') + if self.column == 0: + if self.prefix(3) == '---' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return True + + def check_document_end(self): + + # DOCUMENT-END: ^ '...' (' '|'\n') + if self.column == 0: + if self.prefix(3) == '...' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return True + + def check_block_entry(self): + + # BLOCK-ENTRY: '-' (' '|'\n') + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_key(self): + + # KEY(flow context): '?' + if self.flow_level: + return True + + # KEY(block context): '?' (' '|'\n') + else: + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_value(self): + + # VALUE(flow context): ':' + if self.flow_level: + return True + + # VALUE(block context): ':' (' '|'\n') + else: + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_plain(self): + + # A plain scalar may start with any non-space character except: + # '-', '?', ':', ',', '[', ']', '{', '}', + # '#', '&', '*', '!', '|', '>', '\'', '\"', + # '%', '@', '`'. + # + # It may also start with + # '-', '?', ':' + # if it is followed by a non-space character. + # + # Note that we limit the last rule to the block context (except the + # '-' character) because we want the flow context to be space + # independent. + ch = self.peek() + return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ + or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029' + and (ch == '-' or (not self.flow_level and ch in '?:'))) + + # Scanners. + + def scan_to_next_token(self): + # We ignore spaces, line breaks and comments. + # If we find a line break in the block context, we set the flag + # `allow_simple_key` on. + # The byte order mark is stripped if it's the first character in the + # stream. We do not yet support BOM inside the stream as the + # specification requires. Any such mark will be considered as a part + # of the document. + # + # TODO: We need to make tab handling rules more sane. A good rule is + # Tabs cannot precede tokens + # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + # KEY(block), VALUE(block), BLOCK-ENTRY + # So the checking code is + # if <TAB>: + # self.allow_simple_keys = False + # We also need to add the check for `allow_simple_keys == True` to + # `unwind_indent` before issuing BLOCK-END. + # Scanners for block, flow, and plain scalars need to be modified. + + if self.index == 0 and self.peek() == '\uFEFF': + self.forward() + found = False + while not found: + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + if self.scan_line_break(): + if not self.flow_level: + self.allow_simple_key = True + else: + found = True + + def scan_directive(self): + # See the specification for details. + start_mark = self.get_mark() + self.forward() + name = self.scan_directive_name(start_mark) + value = None + if name == 'YAML': + value = self.scan_yaml_directive_value(start_mark) + end_mark = self.get_mark() + elif name == 'TAG': + value = self.scan_tag_directive_value(start_mark) + end_mark = self.get_mark() + else: + end_mark = self.get_mark() + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + self.scan_directive_ignored_line(start_mark) + return DirectiveToken(name, value, start_mark, end_mark) + + def scan_directive_name(self, start_mark): + # See the specification for details. + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + return value + + def scan_yaml_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + major = self.scan_yaml_directive_number(start_mark) + if self.peek() != '.': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or '.', but found %r" % self.peek(), + self.get_mark()) + self.forward() + minor = self.scan_yaml_directive_number(start_mark) + if self.peek() not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or ' ', but found %r" % self.peek(), + self.get_mark()) + return (major, minor) + + def scan_yaml_directive_number(self, start_mark): + # See the specification for details. + ch = self.peek() + if not ('0' <= ch <= '9'): + raise ScannerError("while scanning a directive", start_mark, + "expected a digit, but found %r" % ch, self.get_mark()) + length = 0 + while '0' <= self.peek(length) <= '9': + length += 1 + value = int(self.prefix(length)) + self.forward(length) + return value + + def scan_tag_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + handle = self.scan_tag_directive_handle(start_mark) + while self.peek() == ' ': + self.forward() + prefix = self.scan_tag_directive_prefix(start_mark) + return (handle, prefix) + + def scan_tag_directive_handle(self, start_mark): + # See the specification for details. + value = self.scan_tag_handle('directive', start_mark) + ch = self.peek() + if ch != ' ': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + return value + + def scan_tag_directive_prefix(self, start_mark): + # See the specification for details. + value = self.scan_tag_uri('directive', start_mark) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + return value + + def scan_directive_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in '\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a comment or a line break, but found %r" + % ch, self.get_mark()) + self.scan_line_break() + + def scan_anchor(self, TokenClass): + # The specification does not restrict characters for anchors and + # aliases. This may lead to problems, for instance, the document: + # [ *alias, value ] + # can be interpteted in two ways, as + # [ "value" ] + # and + # [ *alias , "value" ] + # Therefore we restrict aliases to numbers and ASCII letters. + start_mark = self.get_mark() + indicator = self.peek() + if indicator == '*': + name = 'alias' + else: + name = 'anchor' + self.forward() + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`': + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + end_mark = self.get_mark() + return TokenClass(value, start_mark, end_mark) + + def scan_tag(self): + # See the specification for details. + start_mark = self.get_mark() + ch = self.peek(1) + if ch == '<': + handle = None + self.forward(2) + suffix = self.scan_tag_uri('tag', start_mark) + if self.peek() != '>': + raise ScannerError("while parsing a tag", start_mark, + "expected '>', but found %r" % self.peek(), + self.get_mark()) + self.forward() + elif ch in '\0 \t\r\n\x85\u2028\u2029': + handle = None + suffix = '!' + self.forward() + else: + length = 1 + use_handle = False + while ch not in '\0 \r\n\x85\u2028\u2029': + if ch == '!': + use_handle = True + break + length += 1 + ch = self.peek(length) + handle = '!' + if use_handle: + handle = self.scan_tag_handle('tag', start_mark) + else: + handle = '!' + self.forward() + suffix = self.scan_tag_uri('tag', start_mark) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a tag", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + value = (handle, suffix) + end_mark = self.get_mark() + return TagToken(value, start_mark, end_mark) + + def scan_block_scalar(self, style): + # See the specification for details. + + if style == '>': + folded = True + else: + folded = False + + chunks = [] + start_mark = self.get_mark() + + # Scan the header. + self.forward() + chomping, increment = self.scan_block_scalar_indicators(start_mark) + self.scan_block_scalar_ignored_line(start_mark) + + # Determine the indentation level and go to the first non-empty line. + min_indent = self.indent+1 + if min_indent < 1: + min_indent = 1 + if increment is None: + breaks, max_indent, end_mark = self.scan_block_scalar_indentation() + indent = max(min_indent, max_indent) + else: + indent = min_indent+increment-1 + breaks, end_mark = self.scan_block_scalar_breaks(indent) + line_break = '' + + # Scan the inner part of the block scalar. + while self.column == indent and self.peek() != '\0': + chunks.extend(breaks) + leading_non_space = self.peek() not in ' \t' + length = 0 + while self.peek(length) not in '\0\r\n\x85\u2028\u2029': + length += 1 + chunks.append(self.prefix(length)) + self.forward(length) + line_break = self.scan_line_break() + breaks, end_mark = self.scan_block_scalar_breaks(indent) + if self.column == indent and self.peek() != '\0': + + # Unfortunately, folding rules are ambiguous. + # + # This is the folding according to the specification: + + if folded and line_break == '\n' \ + and leading_non_space and self.peek() not in ' \t': + if not breaks: + chunks.append(' ') + else: + chunks.append(line_break) + + # This is Clark Evans's interpretation (also in the spec + # examples): + # + #if folded and line_break == '\n': + # if not breaks: + # if self.peek() not in ' \t': + # chunks.append(' ') + # else: + # chunks.append(line_break) + #else: + # chunks.append(line_break) + else: + break + + # Chomp the tail. + if chomping is not False: + chunks.append(line_break) + if chomping is True: + chunks.extend(breaks) + + # We are done. + return ScalarToken(''.join(chunks), False, start_mark, end_mark, + style) + + def scan_block_scalar_indicators(self, start_mark): + # See the specification for details. + chomping = None + increment = None + ch = self.peek() + if ch in '+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch in '0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + elif ch in '0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + ch = self.peek() + if ch in '+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected chomping or indentation indicators, but found %r" + % ch, self.get_mark()) + return chomping, increment + + def scan_block_scalar_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in '\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected a comment or a line break, but found %r" % ch, + self.get_mark()) + self.scan_line_break() + + def scan_block_scalar_indentation(self): + # See the specification for details. + chunks = [] + max_indent = 0 + end_mark = self.get_mark() + while self.peek() in ' \r\n\x85\u2028\u2029': + if self.peek() != ' ': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + else: + self.forward() + if self.column > max_indent: + max_indent = self.column + return chunks, max_indent, end_mark + + def scan_block_scalar_breaks(self, indent): + # See the specification for details. + chunks = [] + end_mark = self.get_mark() + while self.column < indent and self.peek() == ' ': + self.forward() + while self.peek() in '\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + while self.column < indent and self.peek() == ' ': + self.forward() + return chunks, end_mark + + def scan_flow_scalar(self, style): + # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don't need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. + if style == '"': + double = True + else: + double = False + chunks = [] + start_mark = self.get_mark() + quote = self.peek() + self.forward() + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + while self.peek() != quote: + chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + self.forward() + end_mark = self.get_mark() + return ScalarToken(''.join(chunks), False, start_mark, end_mark, + style) + + ESCAPE_REPLACEMENTS = { + '0': '\0', + 'a': '\x07', + 'b': '\x08', + 't': '\x09', + '\t': '\x09', + 'n': '\x0A', + 'v': '\x0B', + 'f': '\x0C', + 'r': '\x0D', + 'e': '\x1B', + ' ': '\x20', + '\"': '\"', + '\\': '\\', + 'N': '\x85', + '_': '\xA0', + 'L': '\u2028', + 'P': '\u2029', + } + + ESCAPE_CODES = { + 'x': 2, + 'u': 4, + 'U': 8, + } + + def scan_flow_scalar_non_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + length = 0 + while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029': + length += 1 + if length: + chunks.append(self.prefix(length)) + self.forward(length) + ch = self.peek() + if not double and ch == '\'' and self.peek(1) == '\'': + chunks.append('\'') + self.forward(2) + elif (double and ch == '\'') or (not double and ch in '\"\\'): + chunks.append(ch) + self.forward() + elif double and ch == '\\': + self.forward() + ch = self.peek() + if ch in self.ESCAPE_REPLACEMENTS: + chunks.append(self.ESCAPE_REPLACEMENTS[ch]) + self.forward() + elif ch in self.ESCAPE_CODES: + length = self.ESCAPE_CODES[ch] + self.forward() + for k in range(length): + if self.peek(k) not in '0123456789ABCDEFabcdef': + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "expected escape sequence of %d hexdecimal numbers, but found %r" % + (length, self.peek(k)), self.get_mark()) + code = int(self.prefix(length), 16) + chunks.append(chr(code)) + self.forward(length) + elif ch in '\r\n\x85\u2028\u2029': + self.scan_line_break() + chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) + else: + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "found unknown escape character %r" % ch, self.get_mark()) + else: + return chunks + + def scan_flow_scalar_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + length = 0 + while self.peek(length) in ' \t': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch == '\0': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected end of stream", self.get_mark()) + elif ch in '\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + breaks = self.scan_flow_scalar_breaks(double, start_mark) + if line_break != '\n': + chunks.append(line_break) + elif not breaks: + chunks.append(' ') + chunks.extend(breaks) + else: + chunks.append(whitespaces) + return chunks + + def scan_flow_scalar_breaks(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + # Instead of checking indentation, we check for document + # separators. + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected document separator", self.get_mark()) + while self.peek() in ' \t': + self.forward() + if self.peek() in '\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + else: + return chunks + + def scan_plain(self): + # See the specification for details. + # We add an additional restriction for the flow context: + # plain scalars in the flow context cannot contain ',', ':' and '?'. + # We also keep track of the `allow_simple_key` flag here. + # Indentation rules are loosed for the flow context. + chunks = [] + start_mark = self.get_mark() + end_mark = start_mark + indent = self.indent+1 + # We allow zero indentation for scalars, but then we need to check for + # document separators at the beginning of the line. + #if indent == 0: + # indent = 1 + spaces = [] + while True: + length = 0 + if self.peek() == '#': + break + while True: + ch = self.peek(length) + if ch in '\0 \t\r\n\x85\u2028\u2029' \ + or (not self.flow_level and ch == ':' and + self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029') \ + or (self.flow_level and ch in ',:?[]{}'): + break + length += 1 + # It's not clear what we should do with ':' in the flow context. + if (self.flow_level and ch == ':' + and self.peek(length+1) not in '\0 \t\r\n\x85\u2028\u2029,[]{}'): + self.forward(length) + raise ScannerError("while scanning a plain scalar", start_mark, + "found unexpected ':'", self.get_mark(), + "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.") + if length == 0: + break + self.allow_simple_key = False + chunks.extend(spaces) + chunks.append(self.prefix(length)) + self.forward(length) + end_mark = self.get_mark() + spaces = self.scan_plain_spaces(indent, start_mark) + if not spaces or self.peek() == '#' \ + or (not self.flow_level and self.column < indent): + break + return ScalarToken(''.join(chunks), True, start_mark, end_mark) + + def scan_plain_spaces(self, indent, start_mark): + # See the specification for details. + # The specification is really confusing about tabs in plain scalars. + # We just forbid them completely. Do not use tabs in YAML! + chunks = [] + length = 0 + while self.peek(length) in ' ': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch in '\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + self.allow_simple_key = True + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return + breaks = [] + while self.peek() in ' \r\n\x85\u2028\u2029': + if self.peek() == ' ': + self.forward() + else: + breaks.append(self.scan_line_break()) + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return + if line_break != '\n': + chunks.append(line_break) + elif not breaks: + chunks.append(' ') + chunks.extend(breaks) + elif whitespaces: + chunks.append(whitespaces) + return chunks + + def scan_tag_handle(self, name, start_mark): + # See the specification for details. + # For some strange reasons, the specification does not allow '_' in + # tag handles. I have allowed it anyway. + ch = self.peek() + if ch != '!': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch, self.get_mark()) + length = 1 + ch = self.peek(length) + if ch != ' ': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if ch != '!': + self.forward(length) + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch, self.get_mark()) + length += 1 + value = self.prefix(length) + self.forward(length) + return value + + def scan_tag_uri(self, name, start_mark): + # See the specification for details. + # Note: we do not check if URI is well-formed. + chunks = [] + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.!~*\'()[]%': + if ch == '%': + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + chunks.append(self.scan_uri_escapes(name, start_mark)) + else: + length += 1 + ch = self.peek(length) + if length: + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + if not chunks: + raise ScannerError("while parsing a %s" % name, start_mark, + "expected URI, but found %r" % ch, self.get_mark()) + return ''.join(chunks) + + def scan_uri_escapes(self, name, start_mark): + # See the specification for details. + codes = [] + mark = self.get_mark() + while self.peek() == '%': + self.forward() + for k in range(2): + if self.peek(k) not in '0123456789ABCDEFabcdef': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected URI escape sequence of 2 hexdecimal numbers, but found %r" + % self.peek(k), self.get_mark()) + codes.append(int(self.prefix(2), 16)) + self.forward(2) + try: + value = bytes(codes).decode('utf-8') + except UnicodeDecodeError as exc: + raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) + return value + + def scan_line_break(self): + # Transforms: + # '\r\n' : '\n' + # '\r' : '\n' + # '\n' : '\n' + # '\x85' : '\n' + # '\u2028' : '\u2028' + # '\u2029 : '\u2029' + # default : '' + ch = self.peek() + if ch in '\r\n\x85': + if self.prefix(2) == '\r\n': + self.forward(2) + else: + self.forward() + return '\n' + elif ch in '\u2028\u2029': + self.forward() + return ch + return '' + +#try: +# import psyco +# psyco.bind(Scanner) +#except ImportError: +# pass + diff --git a/python.d/python_modules/pyyaml3/serializer.py b/python.d/python_modules/pyyaml3/serializer.py new file mode 100644 index 000000000..fe911e67a --- /dev/null +++ b/python.d/python_modules/pyyaml3/serializer.py @@ -0,0 +1,111 @@ + +__all__ = ['Serializer', 'SerializerError'] + +from .error import YAMLError +from .events import * +from .nodes import * + +class SerializerError(YAMLError): + pass + +class Serializer: + + ANCHOR_TEMPLATE = 'id%03d' + + def __init__(self, encoding=None, + explicit_start=None, explicit_end=None, version=None, tags=None): + self.use_encoding = encoding + self.use_explicit_start = explicit_start + self.use_explicit_end = explicit_end + self.use_version = version + self.use_tags = tags + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + self.closed = None + + def open(self): + if self.closed is None: + self.emit(StreamStartEvent(encoding=self.use_encoding)) + self.closed = False + elif self.closed: + raise SerializerError("serializer is closed") + else: + raise SerializerError("serializer is already opened") + + def close(self): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif not self.closed: + self.emit(StreamEndEvent()) + self.closed = True + + #def __del__(self): + # self.close() + + def serialize(self, node): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif self.closed: + raise SerializerError("serializer is closed") + self.emit(DocumentStartEvent(explicit=self.use_explicit_start, + version=self.use_version, tags=self.use_tags)) + self.anchor_node(node) + self.serialize_node(node, None, None) + self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + + def anchor_node(self, node): + if node in self.anchors: + if self.anchors[node] is None: + self.anchors[node] = self.generate_anchor(node) + else: + self.anchors[node] = None + if isinstance(node, SequenceNode): + for item in node.value: + self.anchor_node(item) + elif isinstance(node, MappingNode): + for key, value in node.value: + self.anchor_node(key) + self.anchor_node(value) + + def generate_anchor(self, node): + self.last_anchor_id += 1 + return self.ANCHOR_TEMPLATE % self.last_anchor_id + + def serialize_node(self, node, parent, index): + alias = self.anchors[node] + if node in self.serialized_nodes: + self.emit(AliasEvent(alias)) + else: + self.serialized_nodes[node] = True + self.descend_resolver(parent, index) + if isinstance(node, ScalarNode): + detected_tag = self.resolve(ScalarNode, node.value, (True, False)) + default_tag = self.resolve(ScalarNode, node.value, (False, True)) + implicit = (node.tag == detected_tag), (node.tag == default_tag) + self.emit(ScalarEvent(alias, node.tag, implicit, node.value, + style=node.style)) + elif isinstance(node, SequenceNode): + implicit = (node.tag + == self.resolve(SequenceNode, node.value, True)) + self.emit(SequenceStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + index = 0 + for item in node.value: + self.serialize_node(item, node, index) + index += 1 + self.emit(SequenceEndEvent()) + elif isinstance(node, MappingNode): + implicit = (node.tag + == self.resolve(MappingNode, node.value, True)) + self.emit(MappingStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + for key, value in node.value: + self.serialize_node(key, node, None) + self.serialize_node(value, node, key) + self.emit(MappingEndEvent()) + self.ascend_resolver() + diff --git a/python.d/python_modules/pyyaml3/tokens.py b/python.d/python_modules/pyyaml3/tokens.py new file mode 100644 index 000000000..4d0b48a39 --- /dev/null +++ b/python.d/python_modules/pyyaml3/tokens.py @@ -0,0 +1,104 @@ + +class Token(object): + def __init__(self, start_mark, end_mark): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in self.__dict__ + if not key.endswith('_mark')] + attributes.sort() + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +#class BOMToken(Token): +# id = '<byte order mark>' + +class DirectiveToken(Token): + id = '<directive>' + def __init__(self, name, value, start_mark, end_mark): + self.name = name + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class DocumentStartToken(Token): + id = '<document start>' + +class DocumentEndToken(Token): + id = '<document end>' + +class StreamStartToken(Token): + id = '<stream start>' + def __init__(self, start_mark=None, end_mark=None, + encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndToken(Token): + id = '<stream end>' + +class BlockSequenceStartToken(Token): + id = '<block sequence start>' + +class BlockMappingStartToken(Token): + id = '<block mapping start>' + +class BlockEndToken(Token): + id = '<block end>' + +class FlowSequenceStartToken(Token): + id = '[' + +class FlowMappingStartToken(Token): + id = '{' + +class FlowSequenceEndToken(Token): + id = ']' + +class FlowMappingEndToken(Token): + id = '}' + +class KeyToken(Token): + id = '?' + +class ValueToken(Token): + id = ':' + +class BlockEntryToken(Token): + id = '-' + +class FlowEntryToken(Token): + id = ',' + +class AliasToken(Token): + id = '<alias>' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class AnchorToken(Token): + id = '<anchor>' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class TagToken(Token): + id = '<tag>' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class ScalarToken(Token): + id = '<scalar>' + def __init__(self, value, plain, start_mark, end_mark, style=None): + self.value = value + self.plain = plain + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + diff --git a/python.d/rabbitmq.chart.py b/python.d/rabbitmq.chart.py index eef472bf2..b8847e9f8 100644 --- a/python.d/rabbitmq.chart.py +++ b/python.d/rabbitmq.chart.py @@ -24,7 +24,8 @@ NODE_STATS = ['fd_used', 'mem_used', 'sockets_used', 'proc_used', - 'disk_free' + 'disk_free', + 'run_queue' ] OVERVIEW_STATS = ['object_totals.channels', 'object_totals.consumers', @@ -39,7 +40,7 @@ OVERVIEW_STATS = ['object_totals.channels', 'message_stats.publish' ] ORDER = ['queued_messages', 'message_rates', 'global_counts', - 'file_descriptors', 'socket_descriptors', 'erlang_processes', 'memory', 'disk_space'] + 'file_descriptors', 'socket_descriptors', 'erlang_processes', 'erlang_run_queue', 'memory', 'disk_space'] CHARTS = { 'file_descriptors': { @@ -72,6 +73,12 @@ CHARTS = { 'lines': [ ['proc_used', 'used', 'absolute'] ]}, + 'erlang_run_queue': { + 'options': [None, 'Erlang Run Queue', 'processes', 'overview', + 'rabbitmq.erlang_run_queue', 'line'], + 'lines': [ + ['run_queue',' length', 'absolute'] + ]}, 'global_counts': { 'options': [None, 'Global Counts', 'counts', 'overview', 'rabbitmq.global_counts', 'line'], diff --git a/python.d/springboot.chart.py b/python.d/springboot.chart.py new file mode 100644 index 000000000..60ad0cccb --- /dev/null +++ b/python.d/springboot.chart.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +# Description: tomcat netdata python.d module +# Author: Wing924 + +import json +from bases.FrameworkServices.UrlService import UrlService + +# default module values (can be overridden per job in `config`) +# update_every = 2 +priority = 60000 +retries = 60 + + +DEFAULT_ORDER = ['response_code', 'threads', 'gc_time', 'gc_ope', 'heap'] + +DEFAULT_CHARTS = { + 'response_code': { + 'options': [None, "Response Codes", "requests/s", "response", "springboot.response_code", "stacked"], + 'lines': [ + ["resp_other", 'Other', 'incremental'], + ["resp_1xx", '1xx', 'incremental'], + ["resp_2xx", '2xx', 'incremental'], + ["resp_3xx", '3xx', 'incremental'], + ["resp_4xx", '4xx', 'incremental'], + ["resp_5xx", '5xx', 'incremental'], + ]}, + 'threads': { + 'options': [None, "Threads", "current threads", "threads", "springboot.threads", "area"], + 'lines': [ + ["threads_daemon", 'daemon', 'absolute'], + ["threads", 'total', 'absolute'], + ]}, + 'gc_time': { + 'options': [None, "GC Time", "milliseconds", "garbage collection", "springboot.gc_time", "stacked"], + 'lines': [ + ["gc_copy_time", 'Copy', 'incremental'], + ["gc_marksweepcompact_time", 'MarkSweepCompact', 'incremental'], + ["gc_parnew_time", 'ParNew', 'incremental'], + ["gc_concurrentmarksweep_time", 'ConcurrentMarkSweep', 'incremental'], + ["gc_ps_scavenge_time", 'PS Scavenge', 'incremental'], + ["gc_ps_marksweep_time", 'PS MarkSweep', 'incremental'], + ["gc_g1_young_generation_time", 'G1 Young Generation', 'incremental'], + ["gc_g1_old_generation_time", 'G1 Old Generation', 'incremental'], + ]}, + 'gc_ope': { + 'options': [None, "GC Operations", "operations/s", "garbage collection", "springboot.gc_ope", "stacked"], + 'lines': [ + ["gc_copy_count", 'Copy', 'incremental'], + ["gc_marksweepcompact_count", 'MarkSweepCompact', 'incremental'], + ["gc_parnew_count", 'ParNew', 'incremental'], + ["gc_concurrentmarksweep_count", 'ConcurrentMarkSweep', 'incremental'], + ["gc_ps_scavenge_count", 'PS Scavenge', 'incremental'], + ["gc_ps_marksweep_count", 'PS MarkSweep', 'incremental'], + ["gc_g1_young_generation_count", 'G1 Young Generation', 'incremental'], + ["gc_g1_old_generation_count", 'G1 Old Generation', 'incremental'], + ]}, + 'heap': { + 'options': [None, "Heap Memory Usage", "KB", "heap memory", "springboot.heap", "area"], + 'lines': [ + ["heap_committed", 'committed', "absolute"], + ["heap_used", 'used', "absolute"], + ]}, +} + +class ExtraChartError(ValueError): + pass + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.url = self.configuration.get('url', "http://localhost:8080/metrics") + self._setup_charts() + + def _get_data(self): + """ + Format data received from http request + :return: dict + """ + raw_data = self._get_raw_data() + if not raw_data: + return None + + try: + data = json.loads(raw_data) + except ValueError: + self.debug('%s is not a vaild JSON page' % self.url) + return None + + result = { + 'resp_1xx': 0, + 'resp_2xx': 0, + 'resp_3xx': 0, + 'resp_4xx': 0, + 'resp_5xx': 0, + 'resp_other': 0, + } + + for key, value in data.iteritems(): + if 'counter.status.' in key: + status_type = key[15:16] + 'xx' + if status_type[0] not in '12345': + status_type = 'other' + result['resp_' + status_type] += value + else: + result[key.replace('.', '_')] = value + + return result or None + + def _setup_charts(self): + self.order = [] + self.definitions = {} + defaults = self.configuration.get('defaults', {}) + + for chart in DEFAULT_ORDER: + if defaults.get(chart, True): + self.order.append(chart) + self.definitions[chart] = DEFAULT_CHARTS[chart] + + for extra in self.configuration.get('extras', []): + self._add_extra_chart(extra) + self.order.append(extra['id']) + + def _add_extra_chart(self, chart): + chart_id = chart.get('id', None) or die('id is not defined in extra chart') + options = chart.get('options', None) or die('option is not defined in extra chart: %s' % chart_id) + lines = chart.get('lines', None) or die('lines is not defined in extra chart: %s' % chart_id) + + title = options.get('title', None) or die('title is missing: %s' % chart_id) + units = options.get('units', None) or die('units is missing: %s' % chart_id) + family = options.get('family', title) + context = options.get('context', 'springboot.' + title) + charttype = options.get('charttype', 'line') + + result = { + 'options': [None, title, units, family, context, charttype], + 'lines': [], + } + + for line in lines: + dimension = line.get('dimension', None) or die('dimension is missing: %s' % chart_id) + name = line.get('name', dimension) + algorithm = line.get('algorithm', 'absolute') + multiplier = line.get('multiplier', 1) + divisor = line.get('divisor', 1) + result['lines'].append([dimension, name, algorithm, multiplier, divisor]) + + self.definitions[chart_id] = result + + @staticmethod + def die(error_message): + raise ExtraChartError(error_message) diff --git a/python.d/traefik.chart.py b/python.d/traefik.chart.py new file mode 100644 index 000000000..f7c3e223b --- /dev/null +++ b/python.d/traefik.chart.py @@ -0,0 +1,181 @@ +# -*- coding: utf-8 -*- +# Description: traefik netdata python.d module +# Author: Alexandre Menezes (@ale_menezes) + +from json import loads +from collections import defaultdict +from bases.FrameworkServices.UrlService import UrlService + +# default module values (can be overridden per job in `config`) +update_every = 1 +priority = 60000 +retries = 10 + +# charts order (can be overridden if you want less charts, or different order) +ORDER = [ + 'response_statuses', + 'response_codes', + 'detailed_response_codes', + 'requests', + 'total_response_time', + 'average_response_time', + 'average_response_time_per_iteration', + 'uptime' +] + +CHARTS = { + 'response_statuses': { + 'options': [None, 'Response statuses', 'requests/s', 'responses', 'traefik.response_statuses', 'stacked'], + 'lines': [ + ['successful_requests', 'success', 'incremental'], + ['server_errors', 'error', 'incremental'], + ['redirects', 'redirect', 'incremental'], + ['bad_requests', 'bad', 'incremental'], + ['other_requests', 'other', 'incremental'] + ]}, + 'response_codes': { + 'options': [None, 'Responses by codes', 'requests/s', 'responses', 'traefik.response_codes', 'stacked'], + 'lines': [ + ['2xx', None, 'incremental'], + ['5xx', None, 'incremental'], + ['3xx', None, 'incremental'], + ['4xx', None, 'incremental'], + ['1xx', None, 'incremental'], + ['other', None, 'incremental'] + ]}, + 'detailed_response_codes': { + 'options': [None, 'Detailed response codes', 'requests/s', 'responses', 'traefik.detailed_response_codes', 'stacked'], + 'lines': [ + ]}, + 'requests': { + 'options': [None, 'Requests', 'requests/s', 'requests', 'traefik.requests', 'line'], + 'lines': [ + ['total_count', 'requests', 'incremental'] + ]}, + 'total_response_time': { + 'options': [None, 'Total response time', 'seconds', 'timings', 'traefik.total_response_time', 'line'], + 'lines': [ + ['total_response_time_sec', 'response', 'absolute', 1, 10000] + ]}, + 'average_response_time': { + 'options': [None, 'Average response time', 'milliseconds', 'timings', 'traefik.average_response_time', 'line'], + 'lines': [ + ['average_response_time_sec', 'response', 'absolute', 1, 1000] + ]}, + 'average_response_time_per_iteration': { + 'options': [None, 'Average response time per iteration', 'milliseconds', 'timings', 'traefik.average_response_time_per_iteration', 'line'], + 'lines': [ + ['average_response_time_per_iteration_sec', 'response', 'incremental', 1, 10000] + ]}, + 'uptime': { + 'options': [None, 'Uptime', 'seconds', 'uptime', 'traefik.uptime', 'line'], + 'lines': [ + ['uptime_sec', 'uptime', 'absolute'] + ]} + } + +HEALTH_STATS = [ + 'uptime_sec', + 'average_response_time_sec', + 'total_response_time_sec', + 'total_count', + 'total_status_code_count' +] + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.url = self.configuration.get('url', 'http://localhost:8080/health') + self.order = ORDER + self.definitions = CHARTS + self.data = { + 'successful_requests': 0, 'redirects': 0, 'bad_requests': 0, + 'server_errors': 0, 'other_requests': 0, '1xx': 0, '2xx': 0, + '3xx': 0, '4xx': 0, '5xx': 0, 'other': 0, + 'average_response_time_per_iteration_sec': 0 + } + self.last_total_response_time = 0 + self.last_total_count = 0 + + def _get_data(self): + data = self._get_raw_data() + + if not data: + return None + + data = loads(data) + + self.get_data_per_code_status(raw_data=data) + + self.get_data_per_code_family(raw_data=data) + + self.get_data_per_code(raw_data=data) + + self.data.update(fetch_data_(raw_data=data, metrics=HEALTH_STATS)) + + self.data['average_response_time_sec'] *= 1000000 + self.data['total_response_time_sec'] *= 10000 + if data['total_count'] != self.last_total_count: + self.data['average_response_time_per_iteration_sec'] = (data['total_response_time_sec'] - self.last_total_response_time) * 1000000 / (data['total_count'] - self.last_total_count) + else: + self.data['average_response_time_per_iteration_sec'] = 0 + self.last_total_response_time = data['total_response_time_sec'] + self.last_total_count = data['total_count'] + + return self.data or None + + def get_data_per_code_status(self, raw_data): + data = defaultdict(int) + for code, value in raw_data['total_status_code_count'].items(): + code_prefix = code[0] + if code_prefix == '1' or code_prefix == '2' or code == '304': + data['successful_requests'] += value + elif code_prefix == '3': + data['redirects'] += value + elif code_prefix == '4': + data['bad_requests'] += value + elif code_prefix == '5': + data['server_errors'] += value + else: + data['other_requests'] += value + self.data.update(data) + + def get_data_per_code_family(self, raw_data): + data = defaultdict(int) + for code, value in raw_data['total_status_code_count'].items(): + code_prefix = code[0] + if code_prefix == '1': + data['1xx'] += value + elif code_prefix == '2': + data['2xx'] += value + elif code_prefix == '3': + data['3xx'] += value + elif code_prefix == '4': + data['4xx'] += value + elif code_prefix == '5': + data['5xx'] += value + else: + data['other'] += value + self.data.update(data) + + def get_data_per_code(self, raw_data): + for code, value in raw_data['total_status_code_count'].items(): + if self.charts: + if code not in self.data: + self.charts['detailed_response_codes'].add_dimension([code, code, 'incremental']) + self.data[code] = value + +def fetch_data_(raw_data, metrics): + data = dict() + + for metric in metrics: + value = raw_data + metrics_list = metric.split('.') + try: + for m in metrics_list: + value = value[m] + except KeyError: + continue + data['_'.join(metrics_list)] = value + + return data diff --git a/python.d/web_log.chart.py b/python.d/web_log.chart.py index 954ecd41d..be9baba92 100644 --- a/python.d/web_log.chart.py +++ b/python.d/web_log.chart.py @@ -5,6 +5,7 @@ import bisect import re import os +import sys from collections import namedtuple, defaultdict from copy import deepcopy @@ -21,7 +22,8 @@ from bases.FrameworkServices.LogService import LogService ORDER_APACHE_CACHE = ['apache_cache'] -ORDER_WEB = ['response_statuses', 'response_codes', 'bandwidth', 'response_time', 'response_time_upstream', +ORDER_WEB = ['response_statuses', 'response_codes', 'bandwidth', + 'response_time', 'response_time_hist', 'response_time_upstream', 'response_time_upstream_hist', 'requests_per_url', 'requests_per_user_defined', 'http_method', 'http_version', 'requests_per_ipproto', 'clients', 'clients_all'] @@ -55,6 +57,10 @@ CHARTS_WEB = { ['resp_time_max', 'max', 'incremental', 1, 1000], ['resp_time_avg', 'avg', 'incremental', 1, 1000] ]}, + 'response_time_hist': { + 'options': [None, 'Processing Time Histogram', 'requests/s', 'timings', 'web_log.response_time_hist', 'line'], + 'lines': [ + ]}, 'response_time_upstream': { 'options': [None, 'Processing Time Upstream', 'milliseconds', 'timings', 'web_log.response_time_upstream', 'area'], @@ -63,6 +69,11 @@ CHARTS_WEB = { ['resp_time_upstream_max', 'max', 'incremental', 1, 1000], ['resp_time_upstream_avg', 'avg', 'incremental', 1, 1000] ]}, + 'response_time_upstream_hist': { + 'options': [None, 'Processing Time Histogram', 'requests/s', 'timings', + 'web_log.response_time_upstream_hist', 'line'], + 'lines': [ + ]}, 'clients': { 'options': [None, 'Current Poll Unique Client IPs', 'unique ips', 'clients', 'web_log.clients', 'stacked'], 'lines': [ @@ -347,8 +358,30 @@ class Web: """ if 'resp_time' not in match_dict: self.order.remove('response_time') + self.order.remove('response_time_hist') if 'resp_time_upstream' not in match_dict: self.order.remove('response_time_upstream') + self.order.remove('response_time_upstream_hist') + + # Add 'response_time_hist' and 'response_time_upstream_hist' charts if is specified in the configuration + histogram = self.configuration.get('histogram', None) + if isinstance(histogram, list): + self.storage['bucket_index'] = histogram[:] + self.storage['bucket_index'].append(sys.maxint) + self.storage['buckets'] = [0] * (len(histogram) + 1) + self.storage['upstream_buckets'] = [0] * (len(histogram) + 1) + hist_lines = self.definitions['response_time_hist']['lines'] + upstream_hist_lines = self.definitions['response_time_upstream_hist']['lines'] + for i, le in enumerate(histogram): + hist_key = "response_time_hist_%d" % i + upstream_hist_key = "response_time_upstream_hist_%d" % i + hist_lines.append([hist_key, str(le), 'incremental', 1, 1]) + upstream_hist_lines.append([upstream_hist_key, str(le), 'incremental', 1, 1]) + + hist_lines.append(["response_time_hist_%d" % len(histogram), '+Inf', 'incremental', 1, 1]) + upstream_hist_lines.append(["response_time_upstream_hist_%d" % len(histogram), '+Inf', 'incremental', 1, 1]) + elif histogram is not None: + self.error("expect histogram list, but was {0}".format(type(histogram))) if not self.configuration.get('all_time', True): self.order.remove('clients_all') @@ -431,11 +464,15 @@ class Web: resp_length = match_dict['resp_length'] if '-' not in match_dict['resp_length'] else 0 self.data['resp_length'] += int(resp_length) if 'resp_time' in match_dict: - get_timings(timings=timings['resp_time'], - time=self.storage['func_resp_time'](float(match_dict['resp_time']))) + resp_time = self.storage['func_resp_time'](float(match_dict['resp_time'])) + get_timings(timings=timings['resp_time'], time=resp_time) + if 'bucket_index' in self.storage: + get_hist(self.storage['bucket_index'], self.storage['buckets'], resp_time / 1000) if 'resp_time_upstream' in match_dict and match_dict['resp_time_upstream'] != '-': - get_timings(timings=timings['resp_time_upstream'], - time=self.storage['func_resp_time'](float(match_dict['resp_time_upstream']))) + resp_time_upstream = self.storage['func_resp_time'](float(match_dict['resp_time_upstream'])) + get_timings(timings=timings['resp_time_upstream'], time=resp_time_upstream) + if 'bucket_index' in self.storage: + get_hist(self.storage['bucket_index'], self.storage['upstream_buckets'], resp_time / 1000) # requests per ip proto proto = 'ipv6' if ':' in match_dict['address'] else 'ipv4' self.data['req_' + proto] += 1 @@ -456,6 +493,17 @@ class Web: self.data[elem + '_min'] += timings[elem]['minimum'] self.data[elem + '_avg'] += timings[elem]['summary'] / timings[elem]['count'] self.data[elem + '_max'] += timings[elem]['maximum'] + + # histogram + if 'bucket_index' in self.storage: + buckets = self.storage['buckets'] + upstream_buckets = self.storage['upstream_buckets'] + for i in range(0, len(self.storage['bucket_index'])): + hist_key = "response_time_hist_%d" % i + upstream_hist_key = "response_time_upstream_hist_%d" % i + self.data[hist_key] = buckets[i] + self.data[upstream_hist_key] = upstream_buckets[i] + return self.data def find_regex(self, last_line): @@ -903,6 +951,18 @@ def get_timings(timings, time): timings['summary'] += time timings['count'] += 1 +def get_hist(index, buckets, time): + """ + :param index: histogram index (Ex. [10, 50, 100, 150, ...]) + :param buckets: histogram buckets + :param time: time + :return: None + """ + for i in range(len(index)-1, -1, -1): + if time <= index[i]: + buckets[i] += 1 + else: + break def address_not_in_pool(pool, address, pool_size): """ |