# netdata python.d.plugin configuration for pandas # # This file is in YaML format. Generally the format is: # # name: value # # There are 2 sections: # - global variables # - one or more JOBS # # JOBS allow you to collect values from multiple sources. # Each source will have its own set of charts. # # JOB parameters have to be indented (using spaces only, example below). # ---------------------------------------------------------------------- # Global Variables # These variables set the defaults for all JOBs, however each JOB # may define its own, overriding the defaults. # update_every sets the default data collection frequency. # If unset, the python.d.plugin default is used. update_every: 5 # priority controls the order of charts at the netdata dashboard. # Lower numbers move the charts towards the top of the page. # If unset, the default for python.d.plugin is used. # priority: 60000 # penalty indicates whether to apply penalty to update_every in case of failures. # Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. # penalty: yes # autodetection_retry sets the job re-check interval in seconds. # The job is not deleted if check fails. # Attempts to start the job are made once every autodetection_retry. # This feature is disabled by default. # autodetection_retry: 0 # ---------------------------------------------------------------------- # JOBS (data collection sources) # # The default JOBS share the same *name*. JOBS with the same name # are mutually exclusive. Only one of them will be allowed running at # any time. This allows autodetection to try several alternatives and # pick the one that works. # # Any number of jobs is supported. # # All python.d.plugin JOBS (for all its modules) support a set of # predefined parameters. These are: # # job_name: # name: myname # the JOB's name as it will appear on the dashboard # # dashboard (by default is the job_name) # # JOBs sharing a name are mutually exclusive # update_every: 1 # the JOB's data collection frequency # priority: 60000 # the JOB's order on the dashboard # penalty: yes # the JOB's penalty # autodetection_retry: 0 # the JOB's re-check interval in seconds # # Additionally to the above, example also supports the following: # # num_lines: 4 # the number of lines to create # lower: 0 # the lower bound of numbers to randomly sample from # upper: 100 # the upper bound of numbers to randomly sample from # # ---------------------------------------------------------------------- # AUTO-DETECTION JOBS # Some example configurations, enable this collector, uncomment and example below and restart netdata to enable. # example pulling some hourly temperature data, a chart for today forecast (mean,min,max) and another chart for current. # temperature: # name: "temperature" # update_every: 5 # chart_configs: # - name: "temperature_forecast_by_city" # title: "Temperature By City - Today Forecast" # family: "temperature.today" # context: "pandas.temperature" # type: "line" # units: "Celsius" # df_steps: > # pd.DataFrame.from_dict( # {city: requests.get(f'https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lng}&hourly=temperature_2m').json()['hourly']['temperature_2m'] # for (city,lat,lng) # in [ # ('dublin', 53.3441, -6.2675), # ('athens', 37.9792, 23.7166), # ('london', 51.5002, -0.1262), # ('berlin', 52.5235, 13.4115), # ('paris', 48.8567, 2.3510), # ('madrid', 40.4167, -3.7033), # ('new_york', 40.71, -74.01), # ('los_angeles', 34.05, -118.24), # ] # } # ); # df.describe(); # get aggregate stats for each city; # df.transpose()[['mean', 'max', 'min']].reset_index(); # just take mean, min, max; # df.rename(columns={'index':'city'}); # some column renaming; # df.pivot(columns='city').mean().to_frame().reset_index(); # force to be one row per city; # df.rename(columns={0:'degrees'}); # some column renaming; # pd.concat([df, df['city']+'_'+df['level_0']], axis=1); # add new column combining city and summary measurement label; # df.rename(columns={0:'measurement'}); # some column renaming; # df[['measurement', 'degrees']].set_index('measurement'); # just take two columns we want; # df.sort_index(); # sort by city name; # df.transpose(); # transpose so its just one wide row; # - name: "temperature_current_by_city" # title: "Temperature By City - Current" # family: "temperature.current" # context: "pandas.temperature" # type: "line" # units: "Celsius" # df_steps: > # pd.DataFrame.from_dict( # {city: requests.get(f'https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lng}¤t_weather=true').json()['current_weather'] # for (city,lat,lng) # in [ # ('dublin', 53.3441, -6.2675), # ('athens', 37.9792, 23.7166), # ('london', 51.5002, -0.1262), # ('berlin', 52.5235, 13.4115), # ('paris', 48.8567, 2.3510), # ('madrid', 40.4167, -3.7033), # ('new_york', 40.71, -74.01), # ('los_angeles', 34.05, -118.24), # ] # } # ); # df.transpose(); # df[['temperature']]; # df.transpose(); # example showing a read_csv from a url and some light pandas data wrangling. # pull data in csv format from london demo server and then ratio of user cpus over system cpu averaged over last 60 seconds. # example_csv: # name: "example_csv" # update_every: 2 # chart_configs: # - name: "london_system_cpu" # title: "London System CPU - Ratios" # family: "london_system_cpu" # context: "pandas" # type: "line" # units: "n" # df_steps: > # pd.read_csv('https://london.my-netdata.io/api/v1/data?chart=system.cpu&format=csv&after=-60', storage_options={'User-Agent': 'netdata'}); # df.drop('time', axis=1); # df.mean().to_frame().transpose(); # df.apply(lambda row: (row.user / row.system), axis = 1).to_frame(); # df.rename(columns={0:'average_user_system_ratio'}); # df*100; # example showing a read_json from a url and some light pandas data wrangling. # pull data in json format (using requests.get() if json data is too complex for pd.read_json() ) from london demo server and work out 'total_bandwidth'. # example_json: # name: "example_json" # update_every: 2 # chart_configs: # - name: "london_system_net" # title: "London System Net - Total Bandwidth" # family: "london_system_net" # context: "pandas" # type: "area" # units: "kilobits/s" # df_steps: > # pd.DataFrame(requests.get('https://london.my-netdata.io/api/v1/data?chart=system.net&format=json&after=-1').json()['data'], columns=requests.get('https://london.my-netdata.io/api/v1/data?chart=system.net&format=json&after=-1').json()['labels']); # df.drop('time', axis=1); # abs(df); # df.sum(axis=1).to_frame(); # df.rename(columns={0:'total_bandwidth'}); # example showing a read_xml from a url and some light pandas data wrangling. # pull weather forecast data in xml format, use xpath to pull out temperature forecast. # example_xml: # name: "example_xml" # update_every: 2 # line_sep: "|" # chart_configs: # - name: "temperature_forcast" # title: "Temperature Forecast" # family: "temp" # context: "pandas.temp" # type: "line" # units: "celsius" # df_steps: > # pd.read_xml('http://metwdb-openaccess.ichec.ie/metno-wdb2ts/locationforecast?lat=54.7210798611;long=-8.7237392806', xpath='./product/time[1]/location/temperature', parser='etree')| # df.rename(columns={'value': 'dublin'})| # df[['dublin']]| # example showing a read_sql from a postgres database using sqlalchemy. # note: example assumes a running postgress db on localhost with a netdata users and password netdata. # sql: # name: "sql" # update_every: 5 # chart_configs: # - name: "sql" # title: "SQL Example" # family: "sql.example" # context: "example" # type: "line" # units: "percent" # df_steps: > # pd.read_sql_query( # sql='\ # select \ # random()*100 as metric_1, \ # random()*100 as metric_2 \ # ', # con=create_engine('postgresql://localhost/postgres?user=netdata&password=netdata') # );