diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/spdk/dpdk/examples/vm_power_manager | |
parent | Initial commit. (diff) | |
download | ceph-upstream.tar.xz ceph-upstream.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/dpdk/examples/vm_power_manager')
22 files changed, 4595 insertions, 0 deletions
diff --git a/src/spdk/dpdk/examples/vm_power_manager/Makefile b/src/spdk/dpdk/examples/vm_power_manager/Makefile new file mode 100644 index 00000000..13a5205b --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/Makefile @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2014 Intel Corporation + +ifneq ($(shell pkg-config --atleast-version=0.9.3 libvirt; echo $$?), 0) +$(error vm_power_manager requires libvirt >= 0.9.3) +else + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = vm_power_mgr + +# all source are stored in SRCS-y +SRCS-y := main.c vm_power_cli.c power_manager.c channel_manager.c +SRCS-y += channel_monitor.c parse.c +ifeq ($(CONFIG_RTE_ARCH_X86_64),y) +SRCS-y += oob_monitor_x86.c +else +SRCS-y += oob_monitor_nop.c +endif + +CFLAGS += -O3 -I$(RTE_SDK)/lib/librte_power/ +CFLAGS += $(WERROR_FLAGS) + +LDLIBS += -lvirt + +ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),y) + +ifeq ($(CONFIG_RTE_LIBRTE_IXGBE_PMD),y) +LDLIBS += -lrte_pmd_ixgbe +endif + +ifeq ($(CONFIG_RTE_LIBRTE_I40E_PMD),y) +LDLIBS += -lrte_pmd_i40e +endif + +ifeq ($(CONFIG_RTE_LIBRTE_BNXT_PMD),y) +LDLIBS += -lrte_pmd_bnxt +endif + +endif + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk + +endif # libvirt check diff --git a/src/spdk/dpdk/examples/vm_power_manager/channel_manager.c b/src/spdk/dpdk/examples/vm_power_manager/channel_manager.c new file mode 100644 index 00000000..927fc35a --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/channel_manager.c @@ -0,0 +1,844 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/un.h> +#include <fcntl.h> +#include <unistd.h> +#include <inttypes.h> +#include <dirent.h> +#include <errno.h> + +#include <sys/queue.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/select.h> + +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_mempool.h> +#include <rte_log.h> +#include <rte_atomic.h> +#include <rte_spinlock.h> + +#include <libvirt/libvirt.h> + +#include "channel_manager.h" +#include "channel_commands.h" +#include "channel_monitor.h" + + +#define RTE_LOGTYPE_CHANNEL_MANAGER RTE_LOGTYPE_USER1 + +#define ITERATIVE_BITMASK_CHECK_64(mask_u64b, i) \ + for (i = 0; mask_u64b; mask_u64b &= ~(1ULL << i++)) \ + if ((mask_u64b >> i) & 1) \ + +/* Global pointer to libvirt connection */ +static virConnectPtr global_vir_conn_ptr; + +static unsigned char *global_cpumaps; +static virVcpuInfo *global_vircpuinfo; +static size_t global_maplen; + +static unsigned global_n_host_cpus; + +/* + * Represents a single Virtual Machine + */ +struct virtual_machine_info { + char name[CHANNEL_MGR_MAX_NAME_LEN]; + rte_atomic64_t pcpu_mask[CHANNEL_CMDS_MAX_CPUS]; + struct channel_info *channels[CHANNEL_CMDS_MAX_VM_CHANNELS]; + uint64_t channel_mask; + uint8_t num_channels; + enum vm_status status; + virDomainPtr domainPtr; + virDomainInfo info; + rte_spinlock_t config_spinlock; + LIST_ENTRY(virtual_machine_info) vms_info; +}; + +LIST_HEAD(, virtual_machine_info) vm_list_head; + +static struct virtual_machine_info * +find_domain_by_name(const char *name) +{ + struct virtual_machine_info *info; + LIST_FOREACH(info, &vm_list_head, vms_info) { + if (!strncmp(info->name, name, CHANNEL_MGR_MAX_NAME_LEN-1)) + return info; + } + return NULL; +} + +static int +update_pcpus_mask(struct virtual_machine_info *vm_info) +{ + virVcpuInfoPtr cpuinfo; + unsigned i, j; + int n_vcpus; + uint64_t mask; + + memset(global_cpumaps, 0, CHANNEL_CMDS_MAX_CPUS*global_maplen); + + if (!virDomainIsActive(vm_info->domainPtr)) { + n_vcpus = virDomainGetVcpuPinInfo(vm_info->domainPtr, + vm_info->info.nrVirtCpu, global_cpumaps, global_maplen, + VIR_DOMAIN_AFFECT_CONFIG); + if (n_vcpus < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error getting vCPU info for " + "in-active VM '%s'\n", vm_info->name); + return -1; + } + goto update_pcpus; + } + + memset(global_vircpuinfo, 0, sizeof(*global_vircpuinfo)* + CHANNEL_CMDS_MAX_CPUS); + + cpuinfo = global_vircpuinfo; + + n_vcpus = virDomainGetVcpus(vm_info->domainPtr, cpuinfo, + CHANNEL_CMDS_MAX_CPUS, global_cpumaps, global_maplen); + if (n_vcpus < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error getting vCPU info for " + "active VM '%s'\n", vm_info->name); + return -1; + } +update_pcpus: + if (n_vcpus >= CHANNEL_CMDS_MAX_CPUS) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Number of vCPUS(%u) is out of range " + "0...%d\n", n_vcpus, CHANNEL_CMDS_MAX_CPUS-1); + return -1; + } + if (n_vcpus != vm_info->info.nrVirtCpu) { + RTE_LOG(INFO, CHANNEL_MANAGER, "Updating the number of vCPUs for VM '%s" + " from %d -> %d\n", vm_info->name, vm_info->info.nrVirtCpu, + n_vcpus); + vm_info->info.nrVirtCpu = n_vcpus; + } + for (i = 0; i < vm_info->info.nrVirtCpu; i++) { + mask = 0; + for (j = 0; j < global_n_host_cpus; j++) { + if (VIR_CPU_USABLE(global_cpumaps, global_maplen, i, j) > 0) { + mask |= 1ULL << j; + } + } + rte_atomic64_set(&vm_info->pcpu_mask[i], mask); + } + return 0; +} + +int +set_pcpus_mask(char *vm_name, unsigned vcpu, uint64_t core_mask) +{ + unsigned i = 0; + int flags = VIR_DOMAIN_AFFECT_LIVE|VIR_DOMAIN_AFFECT_CONFIG; + struct virtual_machine_info *vm_info; + uint64_t mask = core_mask; + + if (vcpu >= CHANNEL_CMDS_MAX_CPUS) { + RTE_LOG(ERR, CHANNEL_MANAGER, "vCPU(%u) exceeds max allowable(%d)\n", + vcpu, CHANNEL_CMDS_MAX_CPUS-1); + return -1; + } + + vm_info = find_domain_by_name(vm_name); + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "VM '%s' not found\n", vm_name); + return -1; + } + + if (!virDomainIsActive(vm_info->domainPtr)) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to set vCPU(%u) to pCPU " + "mask(0x%"PRIx64") for VM '%s', VM is not active\n", + vcpu, core_mask, vm_info->name); + return -1; + } + + if (vcpu >= vm_info->info.nrVirtCpu) { + RTE_LOG(ERR, CHANNEL_MANAGER, "vCPU(%u) exceeds the assigned number of " + "vCPUs(%u)\n", vcpu, vm_info->info.nrVirtCpu); + return -1; + } + memset(global_cpumaps, 0 , CHANNEL_CMDS_MAX_CPUS * global_maplen); + ITERATIVE_BITMASK_CHECK_64(mask, i) { + VIR_USE_CPU(global_cpumaps, i); + if (i >= global_n_host_cpus) { + RTE_LOG(ERR, CHANNEL_MANAGER, "CPU(%u) exceeds the available " + "number of CPUs(%u)\n", i, global_n_host_cpus); + return -1; + } + } + if (virDomainPinVcpuFlags(vm_info->domainPtr, vcpu, global_cpumaps, + global_maplen, flags) < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to set vCPU(%u) to pCPU " + "mask(0x%"PRIx64") for VM '%s'\n", vcpu, core_mask, + vm_info->name); + return -1; + } + rte_atomic64_set(&vm_info->pcpu_mask[vcpu], core_mask); + return 0; + +} + +int +set_pcpu(char *vm_name, unsigned vcpu, unsigned core_num) +{ + uint64_t mask = 1ULL << core_num; + + return set_pcpus_mask(vm_name, vcpu, mask); +} + +uint64_t +get_pcpus_mask(struct channel_info *chan_info, unsigned vcpu) +{ + struct virtual_machine_info *vm_info = + (struct virtual_machine_info *)chan_info->priv_info; + return rte_atomic64_read(&vm_info->pcpu_mask[vcpu]); +} + +static inline int +channel_exists(struct virtual_machine_info *vm_info, unsigned channel_num) +{ + rte_spinlock_lock(&(vm_info->config_spinlock)); + if (vm_info->channel_mask & (1ULL << channel_num)) { + rte_spinlock_unlock(&(vm_info->config_spinlock)); + return 1; + } + rte_spinlock_unlock(&(vm_info->config_spinlock)); + return 0; +} + + + +static int +open_non_blocking_channel(struct channel_info *info) +{ + int ret, flags; + struct sockaddr_un sock_addr; + fd_set soc_fd_set; + struct timeval tv; + + info->fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (info->fd == -1) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error(%s) creating socket for '%s'\n", + strerror(errno), + info->channel_path); + return -1; + } + sock_addr.sun_family = AF_UNIX; + memcpy(&sock_addr.sun_path, info->channel_path, + strlen(info->channel_path)+1); + + /* Get current flags */ + flags = fcntl(info->fd, F_GETFL, 0); + if (flags < 0) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "Error(%s) fcntl get flags socket for" + "'%s'\n", strerror(errno), info->channel_path); + return 1; + } + /* Set to Non Blocking */ + flags |= O_NONBLOCK; + if (fcntl(info->fd, F_SETFL, flags) < 0) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "Error(%s) setting non-blocking " + "socket for '%s'\n", strerror(errno), info->channel_path); + return -1; + } + ret = connect(info->fd, (struct sockaddr *)&sock_addr, + sizeof(sock_addr)); + if (ret < 0) { + /* ECONNREFUSED error is given when VM is not active */ + if (errno == ECONNREFUSED) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "VM is not active or has not " + "activated its endpoint to channel %s\n", + info->channel_path); + return -1; + } + /* Wait for tv_sec if in progress */ + else if (errno == EINPROGRESS) { + tv.tv_sec = 2; + tv.tv_usec = 0; + FD_ZERO(&soc_fd_set); + FD_SET(info->fd, &soc_fd_set); + if (select(info->fd+1, NULL, &soc_fd_set, NULL, &tv) > 0) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "Timeout or error on channel " + "'%s'\n", info->channel_path); + return -1; + } + } else { + /* Any other error */ + RTE_LOG(WARNING, CHANNEL_MANAGER, "Error(%s) connecting socket" + " for '%s'\n", strerror(errno), info->channel_path); + return -1; + } + } + return 0; +} + +static int +setup_channel_info(struct virtual_machine_info **vm_info_dptr, + struct channel_info **chan_info_dptr, unsigned channel_num) +{ + struct channel_info *chan_info = *chan_info_dptr; + struct virtual_machine_info *vm_info = *vm_info_dptr; + + chan_info->channel_num = channel_num; + chan_info->priv_info = (void *)vm_info; + chan_info->status = CHANNEL_MGR_CHANNEL_DISCONNECTED; + if (open_non_blocking_channel(chan_info) < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Could not open channel: " + "'%s' for VM '%s'\n", + chan_info->channel_path, vm_info->name); + return -1; + } + if (add_channel_to_monitor(&chan_info) < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Could add channel: " + "'%s' to epoll ctl for VM '%s'\n", + chan_info->channel_path, vm_info->name); + return -1; + + } + rte_spinlock_lock(&(vm_info->config_spinlock)); + vm_info->num_channels++; + vm_info->channel_mask |= 1ULL << channel_num; + vm_info->channels[channel_num] = chan_info; + chan_info->status = CHANNEL_MGR_CHANNEL_CONNECTED; + rte_spinlock_unlock(&(vm_info->config_spinlock)); + return 0; +} + +int +add_all_channels(const char *vm_name) +{ + DIR *d; + struct dirent *dir; + struct virtual_machine_info *vm_info; + struct channel_info *chan_info; + char *token, *remaining, *tail_ptr; + char socket_name[PATH_MAX]; + unsigned channel_num; + int num_channels_enabled = 0; + + /* verify VM exists */ + vm_info = find_domain_by_name(vm_name); + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "VM: '%s' not found" + " during channel discovery\n", vm_name); + return 0; + } + if (!virDomainIsActive(vm_info->domainPtr)) { + RTE_LOG(ERR, CHANNEL_MANAGER, "VM: '%s' is not active\n", vm_name); + vm_info->status = CHANNEL_MGR_VM_INACTIVE; + return 0; + } + d = opendir(CHANNEL_MGR_SOCKET_PATH); + if (d == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error opening directory '%s': %s\n", + CHANNEL_MGR_SOCKET_PATH, strerror(errno)); + return -1; + } + while ((dir = readdir(d)) != NULL) { + if (!strncmp(dir->d_name, ".", 1) || + !strncmp(dir->d_name, "..", 2)) + continue; + + snprintf(socket_name, sizeof(socket_name), "%s", dir->d_name); + remaining = socket_name; + /* Extract vm_name from "<vm_name>.<channel_num>" */ + token = strsep(&remaining, "."); + if (remaining == NULL) + continue; + if (strncmp(vm_name, token, CHANNEL_MGR_MAX_NAME_LEN)) + continue; + + /* remaining should contain only <channel_num> */ + errno = 0; + channel_num = (unsigned)strtol(remaining, &tail_ptr, 0); + if ((errno != 0) || (remaining[0] == '\0') || + tail_ptr == NULL || (*tail_ptr != '\0')) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "Malformed channel name" + "'%s' found it should be in the form of " + "'<guest_name>.<channel_num>(decimal)'\n", + dir->d_name); + continue; + } + if (channel_num >= CHANNEL_CMDS_MAX_VM_CHANNELS) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "Channel number(%u) is " + "greater than max allowable: %d, skipping '%s%s'\n", + channel_num, CHANNEL_CMDS_MAX_VM_CHANNELS-1, + CHANNEL_MGR_SOCKET_PATH, dir->d_name); + continue; + } + /* if channel has not been added previously */ + if (channel_exists(vm_info, channel_num)) + continue; + + chan_info = rte_malloc(NULL, sizeof(*chan_info), + RTE_CACHE_LINE_SIZE); + if (chan_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error allocating memory for " + "channel '%s%s'\n", CHANNEL_MGR_SOCKET_PATH, dir->d_name); + continue; + } + + snprintf(chan_info->channel_path, + sizeof(chan_info->channel_path), "%s%s", + CHANNEL_MGR_SOCKET_PATH, dir->d_name); + + if (setup_channel_info(&vm_info, &chan_info, channel_num) < 0) { + rte_free(chan_info); + continue; + } + + num_channels_enabled++; + } + closedir(d); + return num_channels_enabled; +} + +int +add_channels(const char *vm_name, unsigned *channel_list, + unsigned len_channel_list) +{ + struct virtual_machine_info *vm_info; + struct channel_info *chan_info; + char socket_path[PATH_MAX]; + unsigned i; + int num_channels_enabled = 0; + + vm_info = find_domain_by_name(vm_name); + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to add channels: VM '%s' " + "not found\n", vm_name); + return 0; + } + + if (!virDomainIsActive(vm_info->domainPtr)) { + RTE_LOG(ERR, CHANNEL_MANAGER, "VM: '%s' is not active\n", vm_name); + vm_info->status = CHANNEL_MGR_VM_INACTIVE; + return 0; + } + + for (i = 0; i < len_channel_list; i++) { + + if (channel_list[i] >= CHANNEL_CMDS_MAX_VM_CHANNELS) { + RTE_LOG(INFO, CHANNEL_MANAGER, "Channel(%u) is out of range " + "0...%d\n", channel_list[i], + CHANNEL_CMDS_MAX_VM_CHANNELS-1); + continue; + } + if (channel_exists(vm_info, channel_list[i])) { + RTE_LOG(INFO, CHANNEL_MANAGER, "Channel already exists, skipping " + "'%s.%u'\n", vm_name, i); + continue; + } + + snprintf(socket_path, sizeof(socket_path), "%s%s.%u", + CHANNEL_MGR_SOCKET_PATH, vm_name, channel_list[i]); + errno = 0; + if (access(socket_path, F_OK) < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Channel path '%s' error: " + "%s\n", socket_path, strerror(errno)); + continue; + } + chan_info = rte_malloc(NULL, sizeof(*chan_info), + RTE_CACHE_LINE_SIZE); + if (chan_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error allocating memory for " + "channel '%s'\n", socket_path); + continue; + } + snprintf(chan_info->channel_path, + sizeof(chan_info->channel_path), "%s%s.%u", + CHANNEL_MGR_SOCKET_PATH, vm_name, channel_list[i]); + if (setup_channel_info(&vm_info, &chan_info, channel_list[i]) < 0) { + rte_free(chan_info); + continue; + } + num_channels_enabled++; + + } + return num_channels_enabled; +} + +int +remove_channel(struct channel_info **chan_info_dptr) +{ + struct virtual_machine_info *vm_info; + struct channel_info *chan_info = *chan_info_dptr; + + close(chan_info->fd); + + vm_info = (struct virtual_machine_info *)chan_info->priv_info; + + rte_spinlock_lock(&(vm_info->config_spinlock)); + vm_info->channel_mask &= ~(1ULL << chan_info->channel_num); + vm_info->num_channels--; + rte_spinlock_unlock(&(vm_info->config_spinlock)); + + rte_free(chan_info); + return 0; +} + +int +set_channel_status_all(const char *vm_name, enum channel_status status) +{ + struct virtual_machine_info *vm_info; + unsigned i; + uint64_t mask; + int num_channels_changed = 0; + + if (!(status == CHANNEL_MGR_CHANNEL_CONNECTED || + status == CHANNEL_MGR_CHANNEL_DISABLED)) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Channels can only be enabled or " + "disabled: Unable to change status for VM '%s'\n", vm_name); + } + vm_info = find_domain_by_name(vm_name); + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to disable channels: VM '%s' " + "not found\n", vm_name); + return 0; + } + + rte_spinlock_lock(&(vm_info->config_spinlock)); + mask = vm_info->channel_mask; + ITERATIVE_BITMASK_CHECK_64(mask, i) { + vm_info->channels[i]->status = status; + num_channels_changed++; + } + rte_spinlock_unlock(&(vm_info->config_spinlock)); + return num_channels_changed; + +} + +int +set_channel_status(const char *vm_name, unsigned *channel_list, + unsigned len_channel_list, enum channel_status status) +{ + struct virtual_machine_info *vm_info; + unsigned i; + int num_channels_changed = 0; + + if (!(status == CHANNEL_MGR_CHANNEL_CONNECTED || + status == CHANNEL_MGR_CHANNEL_DISABLED)) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Channels can only be enabled or " + "disabled: Unable to change status for VM '%s'\n", vm_name); + } + vm_info = find_domain_by_name(vm_name); + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to add channels: VM '%s' " + "not found\n", vm_name); + return 0; + } + for (i = 0; i < len_channel_list; i++) { + if (channel_exists(vm_info, channel_list[i])) { + rte_spinlock_lock(&(vm_info->config_spinlock)); + vm_info->channels[channel_list[i]]->status = status; + rte_spinlock_unlock(&(vm_info->config_spinlock)); + num_channels_changed++; + } + } + return num_channels_changed; +} + +void +get_all_vm(int *num_vm, int *num_vcpu) +{ + + virNodeInfo node_info; + virDomainPtr *domptr; + uint64_t mask; + int i, ii, numVcpus[MAX_VCPUS], cpu, n_vcpus; + unsigned int jj; + const char *vm_name; + unsigned int domain_flags = VIR_CONNECT_LIST_DOMAINS_RUNNING | + VIR_CONNECT_LIST_DOMAINS_PERSISTENT; + unsigned int domain_flag = VIR_DOMAIN_VCPU_CONFIG; + + + memset(global_cpumaps, 0, CHANNEL_CMDS_MAX_CPUS*global_maplen); + if (virNodeGetInfo(global_vir_conn_ptr, &node_info)) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to retrieve node Info\n"); + return; + } + + /* Returns number of pcpus */ + global_n_host_cpus = (unsigned int)node_info.cpus; + + /* Returns number of active domains */ + *num_vm = virConnectListAllDomains(global_vir_conn_ptr, &domptr, + domain_flags); + if (*num_vm <= 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "No Active Domains Running\n"); + return; + } + + for (i = 0; i < *num_vm; i++) { + + /* Get Domain Names */ + vm_name = virDomainGetName(domptr[i]); + lvm_info[i].vm_name = vm_name; + + /* Get Number of Vcpus */ + numVcpus[i] = virDomainGetVcpusFlags(domptr[i], domain_flag); + + /* Get Number of VCpus & VcpuPinInfo */ + n_vcpus = virDomainGetVcpuPinInfo(domptr[i], + numVcpus[i], global_cpumaps, + global_maplen, domain_flag); + + if ((int)n_vcpus > 0) { + *num_vcpu = n_vcpus; + lvm_info[i].num_cpus = n_vcpus; + } + + /* Save pcpu in use by libvirt VMs */ + for (ii = 0; ii < n_vcpus; ii++) { + mask = 0; + for (jj = 0; jj < global_n_host_cpus; jj++) { + if (VIR_CPU_USABLE(global_cpumaps, + global_maplen, ii, jj) > 0) { + mask |= 1ULL << jj; + } + } + ITERATIVE_BITMASK_CHECK_64(mask, cpu) { + lvm_info[i].pcpus[ii] = cpu; + } + } + } +} + +int +get_info_vm(const char *vm_name, struct vm_info *info) +{ + struct virtual_machine_info *vm_info; + unsigned i, channel_num = 0; + uint64_t mask; + + vm_info = find_domain_by_name(vm_name); + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "VM '%s' not found\n", vm_name); + return -1; + } + info->status = CHANNEL_MGR_VM_ACTIVE; + if (!virDomainIsActive(vm_info->domainPtr)) + info->status = CHANNEL_MGR_VM_INACTIVE; + + rte_spinlock_lock(&(vm_info->config_spinlock)); + + mask = vm_info->channel_mask; + ITERATIVE_BITMASK_CHECK_64(mask, i) { + info->channels[channel_num].channel_num = i; + memcpy(info->channels[channel_num].channel_path, + vm_info->channels[i]->channel_path, UNIX_PATH_MAX); + info->channels[channel_num].status = vm_info->channels[i]->status; + info->channels[channel_num].fd = vm_info->channels[i]->fd; + channel_num++; + } + + info->num_channels = channel_num; + info->num_vcpus = vm_info->info.nrVirtCpu; + rte_spinlock_unlock(&(vm_info->config_spinlock)); + + memcpy(info->name, vm_info->name, sizeof(vm_info->name)); + for (i = 0; i < info->num_vcpus; i++) { + info->pcpu_mask[i] = rte_atomic64_read(&vm_info->pcpu_mask[i]); + } + return 0; +} + +int +add_vm(const char *vm_name) +{ + struct virtual_machine_info *new_domain; + virDomainPtr dom_ptr; + int i; + + if (find_domain_by_name(vm_name) != NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to add VM: VM '%s' " + "already exists\n", vm_name); + return -1; + } + + if (global_vir_conn_ptr == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "No connection to hypervisor exists\n"); + return -1; + } + dom_ptr = virDomainLookupByName(global_vir_conn_ptr, vm_name); + if (dom_ptr == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error on VM lookup with libvirt: " + "VM '%s' not found\n", vm_name); + return -1; + } + + new_domain = rte_malloc("virtual_machine_info", sizeof(*new_domain), + RTE_CACHE_LINE_SIZE); + if (new_domain == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to allocate memory for VM " + "info\n"); + return -1; + } + new_domain->domainPtr = dom_ptr; + if (virDomainGetInfo(new_domain->domainPtr, &new_domain->info) != 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to get libvirt VM info\n"); + rte_free(new_domain); + return -1; + } + if (new_domain->info.nrVirtCpu > CHANNEL_CMDS_MAX_CPUS) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error the number of virtual CPUs(%u) is " + "greater than allowable(%d)\n", new_domain->info.nrVirtCpu, + CHANNEL_CMDS_MAX_CPUS); + rte_free(new_domain); + return -1; + } + + for (i = 0; i < CHANNEL_CMDS_MAX_CPUS; i++) { + rte_atomic64_init(&new_domain->pcpu_mask[i]); + } + if (update_pcpus_mask(new_domain) < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error getting physical CPU pinning\n"); + rte_free(new_domain); + return -1; + } + strncpy(new_domain->name, vm_name, sizeof(new_domain->name)); + new_domain->name[sizeof(new_domain->name) - 1] = '\0'; + new_domain->channel_mask = 0; + new_domain->num_channels = 0; + + if (!virDomainIsActive(dom_ptr)) + new_domain->status = CHANNEL_MGR_VM_INACTIVE; + else + new_domain->status = CHANNEL_MGR_VM_ACTIVE; + + rte_spinlock_init(&(new_domain->config_spinlock)); + LIST_INSERT_HEAD(&vm_list_head, new_domain, vms_info); + return 0; +} + +int +remove_vm(const char *vm_name) +{ + struct virtual_machine_info *vm_info = find_domain_by_name(vm_name); + + if (vm_info == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to remove VM: VM '%s' " + "not found\n", vm_name); + return -1; + } + rte_spinlock_lock(&vm_info->config_spinlock); + if (vm_info->num_channels != 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to remove VM '%s', there are " + "%"PRId8" channels still active\n", + vm_name, vm_info->num_channels); + rte_spinlock_unlock(&vm_info->config_spinlock); + return -1; + } + LIST_REMOVE(vm_info, vms_info); + rte_spinlock_unlock(&vm_info->config_spinlock); + rte_free(vm_info); + return 0; +} + +static void +disconnect_hypervisor(void) +{ + if (global_vir_conn_ptr != NULL) { + virConnectClose(global_vir_conn_ptr); + global_vir_conn_ptr = NULL; + } +} + +static int +connect_hypervisor(const char *path) +{ + if (global_vir_conn_ptr != NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error connecting to %s, connection " + "already established\n", path); + return -1; + } + global_vir_conn_ptr = virConnectOpen(path); + if (global_vir_conn_ptr == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error failed to open connection to " + "Hypervisor '%s'\n", path); + return -1; + } + return 0; +} + +int +channel_manager_init(const char *path) +{ + virNodeInfo info; + + LIST_INIT(&vm_list_head); + if (connect_hypervisor(path) < 0) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to initialize channel manager\n"); + return -1; + } + + global_maplen = VIR_CPU_MAPLEN(CHANNEL_CMDS_MAX_CPUS); + + global_vircpuinfo = rte_zmalloc(NULL, sizeof(*global_vircpuinfo) * + CHANNEL_CMDS_MAX_CPUS, RTE_CACHE_LINE_SIZE); + if (global_vircpuinfo == NULL) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Error allocating memory for CPU Info\n"); + goto error; + } + global_cpumaps = rte_zmalloc(NULL, CHANNEL_CMDS_MAX_CPUS * global_maplen, + RTE_CACHE_LINE_SIZE); + if (global_cpumaps == NULL) { + goto error; + } + + if (virNodeGetInfo(global_vir_conn_ptr, &info)) { + RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to retrieve node Info\n"); + goto error; + } + + global_n_host_cpus = (unsigned)info.cpus; + + if (global_n_host_cpus > CHANNEL_CMDS_MAX_CPUS) { + RTE_LOG(WARNING, CHANNEL_MANAGER, "The number of host CPUs(%u) exceeds the " + "maximum of %u. No cores over %u should be used.\n", + global_n_host_cpus, CHANNEL_CMDS_MAX_CPUS, + CHANNEL_CMDS_MAX_CPUS - 1); + global_n_host_cpus = CHANNEL_CMDS_MAX_CPUS; + } + + return 0; +error: + disconnect_hypervisor(); + return -1; +} + +void +channel_manager_exit(void) +{ + unsigned i; + uint64_t mask; + struct virtual_machine_info *vm_info; + + LIST_FOREACH(vm_info, &vm_list_head, vms_info) { + + rte_spinlock_lock(&(vm_info->config_spinlock)); + + mask = vm_info->channel_mask; + ITERATIVE_BITMASK_CHECK_64(mask, i) { + remove_channel_from_monitor(vm_info->channels[i]); + close(vm_info->channels[i]->fd); + rte_free(vm_info->channels[i]); + } + rte_spinlock_unlock(&(vm_info->config_spinlock)); + + LIST_REMOVE(vm_info, vms_info); + rte_free(vm_info); + } + + rte_free(global_cpumaps); + rte_free(global_vircpuinfo); + disconnect_hypervisor(); +} diff --git a/src/spdk/dpdk/examples/vm_power_manager/channel_manager.h b/src/spdk/dpdk/examples/vm_power_manager/channel_manager.h new file mode 100644 index 00000000..872ec614 --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/channel_manager.h @@ -0,0 +1,322 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef CHANNEL_MANAGER_H_ +#define CHANNEL_MANAGER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <linux/limits.h> +#include <sys/un.h> +#include <rte_atomic.h> + +/* Maximum number of CPUs */ +#define CHANNEL_CMDS_MAX_CPUS 64 +#if CHANNEL_CMDS_MAX_CPUS > 64 +#error Maximum number of cores is 64, overflow is guaranteed to \ + cause problems with VM Power Management +#endif + +/* Maximum name length including '\0' terminator */ +#define CHANNEL_MGR_MAX_NAME_LEN 64 + +/* Maximum number of channels to each Virtual Machine */ +#define CHANNEL_MGR_MAX_CHANNELS 64 + +/* Hypervisor Path for libvirt(qemu/KVM) */ +#define CHANNEL_MGR_DEFAULT_HV_PATH "qemu:///system" + +/* File socket directory */ +#define CHANNEL_MGR_SOCKET_PATH "/tmp/powermonitor/" + +#ifndef UNIX_PATH_MAX +struct sockaddr_un _sockaddr_un; +#define UNIX_PATH_MAX sizeof(_sockaddr_un.sun_path) +#endif + +#define MAX_VMS 4 +#define MAX_VCPUS 20 + + +struct libvirt_vm_info { + const char *vm_name; + unsigned int pcpus[MAX_VCPUS]; + uint8_t num_cpus; +}; + +struct libvirt_vm_info lvm_info[MAX_VMS]; +/* Communication Channel Status */ +enum channel_status { CHANNEL_MGR_CHANNEL_DISCONNECTED = 0, + CHANNEL_MGR_CHANNEL_CONNECTED, + CHANNEL_MGR_CHANNEL_DISABLED, + CHANNEL_MGR_CHANNEL_PROCESSING}; + +/* VM libvirt(qemu/KVM) connection status */ +enum vm_status { CHANNEL_MGR_VM_INACTIVE = 0, CHANNEL_MGR_VM_ACTIVE}; + +/* + * Represents a single and exclusive VM channel that exists between a guest and + * the host. + */ +struct channel_info { + char channel_path[UNIX_PATH_MAX]; /**< Path to host socket */ + volatile uint32_t status; /**< Connection status(enum channel_status) */ + int fd; /**< AF_UNIX socket fd */ + unsigned channel_num; /**< CHANNEL_MGR_SOCKET_PATH/<vm_name>.channel_num */ + void *priv_info; /**< Pointer to private info, do not modify */ +}; + +/* Represents a single VM instance used to return internal information about + * a VM */ +struct vm_info { + char name[CHANNEL_MGR_MAX_NAME_LEN]; /**< VM name */ + enum vm_status status; /**< libvirt status */ + uint64_t pcpu_mask[CHANNEL_CMDS_MAX_CPUS]; /**< pCPU mask for each vCPU */ + unsigned num_vcpus; /**< number of vCPUS */ + struct channel_info channels[CHANNEL_MGR_MAX_CHANNELS]; /**< Array of channel_info */ + unsigned num_channels; /**< Number of channels */ +}; + +/** + * Initialize the Channel Manager resources and connect to the Hypervisor + * specified in path. + * This must be successfully called first before calling any other functions. + * It must only be call once; + * + * @param path + * Must be a local path, e.g. qemu:///system. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int channel_manager_init(const char *path); + +/** + * Free resources associated with the Channel Manager. + * + * @param path + * Must be a local path, e.g. qemu:///system. + * + * @return + * None + */ +void channel_manager_exit(void); + +/** + * Get the Physical CPU mask for VM lcore channel(vcpu), result is assigned to + * core_mask. + * It is not thread-safe. + * + * @param chan_info + * Pointer to struct channel_info + * + * @param vcpu + * The virtual CPU to query. + * + * + * @return + * - 0 on error. + * - >0 on success. + */ +uint64_t get_pcpus_mask(struct channel_info *chan_info, unsigned vcpu); + +/** + * Set the Physical CPU mask for the specified vCPU. + * It is not thread-safe. + * + * @param name + * Virtual Machine name to lookup + * + * @param vcpu + * The virtual CPU to set. + * + * @param core_mask + * The core mask of the physical CPU(s) to bind the vCPU + * + * @return + * - 0 on success. + * - Negative on error. + */ +int set_pcpus_mask(char *vm_name, unsigned vcpu, uint64_t core_mask); + +/** + * Set the Physical CPU for the specified vCPU. + * It is not thread-safe. + * + * @param name + * Virtual Machine name to lookup + * + * @param vcpu + * The virtual CPU to set. + * + * @param core_num + * The core number of the physical CPU(s) to bind the vCPU + * + * @return + * - 0 on success. + * - Negative on error. + */ +int set_pcpu(char *vm_name, unsigned vcpu, unsigned core_num); +/** + * Add a VM as specified by name to the Channel Manager. The name must + * correspond to a valid libvirt domain name. + * This is required prior to adding channels. + * It is not thread-safe. + * + * @param name + * Virtual Machine name to lookup. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int add_vm(const char *name); + +/** + * Remove a previously added Virtual Machine from the Channel Manager + * It is not thread-safe. + * + * @param name + * Virtual Machine name to lookup. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int remove_vm(const char *name); + +/** + * Add all available channels to the VM as specified by name. + * Channels in the form of paths + * (CHANNEL_MGR_SOCKET_PATH/<vm_name>.<channel_number>) will only be parsed. + * It is not thread-safe. + * + * @param name + * Virtual Machine name to lookup. + * + * @return + * - N the number of channels added for the VM + */ +int add_all_channels(const char *vm_name); + +/** + * Add the channel numbers in channel_list to the domain specified by name. + * Channels in the form of paths + * (CHANNEL_MGR_SOCKET_PATH/<vm_name>.<channel_number>) will only be parsed. + * It is not thread-safe. + * + * @param name + * Virtual Machine name to add channels. + * + * @param channel_list + * Pointer to list of unsigned integers, representing the channel number to add + * It must be allocated outside of this function. + * + * @param num_channels + * The amount of channel numbers in channel_list + * + * @return + * - N the number of channels added for the VM + * - 0 for error + */ +int add_channels(const char *vm_name, unsigned *channel_list, + unsigned num_channels); + +/** + * Remove a channel definition from the channel manager. This must only be + * called from the channel monitor thread. + * + * @param chan_info + * Pointer to a valid struct channel_info. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int remove_channel(struct channel_info **chan_info_dptr); + +/** + * For all channels associated with a Virtual Machine name, update the + * connection status. Valid states are CHANNEL_MGR_CHANNEL_CONNECTED or + * CHANNEL_MGR_CHANNEL_DISABLED only. + * + * + * @param name + * Virtual Machine name to modify all channels. + * + * @param status + * The status to set each channel + * + * @param num_channels + * The amount of channel numbers in channel_list + * + * @return + * - N the number of channels added for the VM + * - 0 for error + */ +int set_channel_status_all(const char *name, enum channel_status status); + +/** + * For all channels in channel_list associated with a Virtual Machine name + * update the connection status of each. + * Valid states are CHANNEL_MGR_CHANNEL_CONNECTED or + * CHANNEL_MGR_CHANNEL_DISABLED only. + * It is not thread-safe. + * + * @param name + * Virtual Machine name to add channels. + * + * @param channel_list + * Pointer to list of unsigned integers, representing the channel numbers to + * modify. + * It must be allocated outside of this function. + * + * @param num_channels + * The amount of channel numbers in channel_list + * + * @return + * - N the number of channels modified for the VM + * - 0 for error + */ +int set_channel_status(const char *vm_name, unsigned *channel_list, + unsigned len_channel_list, enum channel_status status); + +/** + * Populates a pointer to struct vm_info associated with vm_name. + * + * @param vm_name + * The name of the virtual machine to lookup. + * + * @param vm_info + * Pointer to a struct vm_info, this must be allocated prior to calling this + * function. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int get_info_vm(const char *vm_name, struct vm_info *info); + +/** + * Populates a table with all domains running and their physical cpu. + * All information is gathered through libvirt api. + * + * @param num_vm + * modified to store number of active VMs + * + * @param num_vcpu + modified to store number of vcpus active + * + * @return + * void + */ +void get_all_vm(int *num_vm, int *num_vcpu); +#ifdef __cplusplus +} +#endif + +#endif /* CHANNEL_MANAGER_H_ */ diff --git a/src/spdk/dpdk/examples/vm_power_manager/channel_monitor.c b/src/spdk/dpdk/examples/vm_power_manager/channel_monitor.c new file mode 100644 index 00000000..7fa47ba9 --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/channel_monitor.c @@ -0,0 +1,544 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <signal.h> +#include <errno.h> +#include <string.h> +#include <sys/types.h> +#include <sys/epoll.h> +#include <sys/queue.h> +#include <sys/time.h> + +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_malloc.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> +#include <rte_pmd_i40e.h> + +#include <libvirt/libvirt.h> +#include "channel_monitor.h" +#include "channel_commands.h" +#include "channel_manager.h" +#include "power_manager.h" +#include "oob_monitor.h" + +#define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1 + +#define MAX_EVENTS 256 + +uint64_t vsi_pkt_count_prev[384]; +uint64_t rdtsc_prev[384]; + +double time_period_ms = 1; +static volatile unsigned run_loop = 1; +static int global_event_fd; +static unsigned int policy_is_set; +static struct epoll_event *global_events_list; +static struct policy policies[MAX_VMS]; + +void channel_monitor_exit(void) +{ + run_loop = 0; + rte_free(global_events_list); +} + +static void +core_share(int pNo, int z, int x, int t) +{ + if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) { + if (strcmp(policies[pNo].pkt.vm_name, + lvm_info[x].vm_name) != 0) { + policies[pNo].core_share[z].status = 1; + power_manager_scale_core_max( + policies[pNo].core_share[z].pcpu); + } + } +} + +static void +core_share_status(int pNo) +{ + + int noVms, noVcpus, z, x, t; + + get_all_vm(&noVms, &noVcpus); + + /* Reset Core Share Status. */ + for (z = 0; z < noVcpus; z++) + policies[pNo].core_share[z].status = 0; + + /* Foreach vcpu in a policy. */ + for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) { + /* Foreach VM on the platform. */ + for (x = 0; x < noVms; x++) { + /* Foreach vcpu of VMs on platform. */ + for (t = 0; t < lvm_info[x].num_cpus; t++) + core_share(pNo, z, x, t); + } + } +} + +static void +get_pcpu_to_control(struct policy *pol) +{ + + /* Convert vcpu to pcpu. */ + struct vm_info info; + int pcpu, count; + uint64_t mask_u64b; + struct core_info *ci; + int ret; + + ci = get_core_info(); + + RTE_LOG(INFO, CHANNEL_MONITOR, "Looking for pcpu for %s\n", + pol->pkt.vm_name); + get_info_vm(pol->pkt.vm_name, &info); + + for (count = 0; count < pol->pkt.num_vcpu; count++) { + mask_u64b = info.pcpu_mask[pol->pkt.vcpu_to_control[count]]; + for (pcpu = 0; mask_u64b; mask_u64b &= ~(1ULL << pcpu++)) { + if ((mask_u64b >> pcpu) & 1) { + if (pol->pkt.policy_to_use == BRANCH_RATIO) { + ci->cd[pcpu].oob_enabled = 1; + ret = add_core_to_monitor(pcpu); + if (ret == 0) + printf("Monitoring pcpu %d via Branch Ratio\n", + pcpu); + else + printf("Failed to start OOB Monitoring pcpu %d\n", + pcpu); + + } else { + pol->core_share[count].pcpu = pcpu; + printf("Monitoring pcpu %d\n", pcpu); + } + } + } + } +} + +static int +get_pfid(struct policy *pol) +{ + + int i, x, ret = 0; + + for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) { + + RTE_ETH_FOREACH_DEV(x) { + ret = rte_pmd_i40e_query_vfid_by_mac(x, + (struct ether_addr *)&(pol->pkt.vfid[i])); + if (ret != -EINVAL) { + pol->port[i] = x; + break; + } + } + if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) { + RTE_LOG(INFO, CHANNEL_MONITOR, + "Error with Policy. MAC not found on " + "attached ports "); + pol->enabled = 0; + return ret; + } + pol->pfid[i] = ret; + } + return 1; +} + +static int +update_policy(struct channel_packet *pkt) +{ + + unsigned int updated = 0; + int i; + + for (i = 0; i < MAX_VMS; i++) { + if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) { + policies[i].pkt = *pkt; + get_pcpu_to_control(&policies[i]); + if (get_pfid(&policies[i]) == -1) { + updated = 1; + break; + } + core_share_status(i); + policies[i].enabled = 1; + updated = 1; + } + } + if (!updated) { + for (i = 0; i < MAX_VMS; i++) { + if (policies[i].enabled == 0) { + policies[i].pkt = *pkt; + get_pcpu_to_control(&policies[i]); + if (get_pfid(&policies[i]) == -1) + break; + core_share_status(i); + policies[i].enabled = 1; + break; + } + } + } + return 0; +} + +static uint64_t +get_pkt_diff(struct policy *pol) +{ + + uint64_t vsi_pkt_count, + vsi_pkt_total = 0, + vsi_pkt_count_prev_total = 0; + double rdtsc_curr, rdtsc_diff, diff; + int x; + struct rte_eth_stats vf_stats; + + for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) { + + /*Read vsi stats*/ + if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0) + vsi_pkt_count = vf_stats.ipackets; + else + vsi_pkt_count = -1; + + vsi_pkt_total += vsi_pkt_count; + + vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]]; + vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count; + } + + rdtsc_curr = rte_rdtsc_precise(); + rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]]; + rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr; + + diff = (vsi_pkt_total - vsi_pkt_count_prev_total) * + ((double)rte_get_tsc_hz() / rdtsc_diff); + + return diff; +} + +static void +apply_traffic_profile(struct policy *pol) +{ + + int count; + uint64_t diff = 0; + + diff = get_pkt_diff(pol); + + RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n"); + + if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) { + for (count = 0; count < pol->pkt.num_vcpu; count++) { + if (pol->core_share[count].status != 1) + power_manager_scale_core_max( + pol->core_share[count].pcpu); + } + } else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) { + for (count = 0; count < pol->pkt.num_vcpu; count++) { + if (pol->core_share[count].status != 1) + power_manager_scale_core_med( + pol->core_share[count].pcpu); + } + } else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) { + for (count = 0; count < pol->pkt.num_vcpu; count++) { + if (pol->core_share[count].status != 1) + power_manager_scale_core_min( + pol->core_share[count].pcpu); + } + } +} + +static void +apply_time_profile(struct policy *pol) +{ + + int count, x; + struct timeval tv; + struct tm *ptm; + char time_string[40]; + + /* Obtain the time of day, and convert it to a tm struct. */ + gettimeofday(&tv, NULL); + ptm = localtime(&tv.tv_sec); + /* Format the date and time, down to a single second. */ + strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm); + + for (x = 0; x < HOURS; x++) { + + if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) { + for (count = 0; count < pol->pkt.num_vcpu; count++) { + if (pol->core_share[count].status != 1) { + power_manager_scale_core_max( + pol->core_share[count].pcpu); + RTE_LOG(INFO, CHANNEL_MONITOR, + "Scaling up core %d to max\n", + pol->core_share[count].pcpu); + } + } + break; + } else if (ptm->tm_hour == + pol->pkt.timer_policy.quiet_hours[x]) { + for (count = 0; count < pol->pkt.num_vcpu; count++) { + if (pol->core_share[count].status != 1) { + power_manager_scale_core_min( + pol->core_share[count].pcpu); + RTE_LOG(INFO, CHANNEL_MONITOR, + "Scaling down core %d to min\n", + pol->core_share[count].pcpu); + } + } + break; + } else if (ptm->tm_hour == + pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) { + apply_traffic_profile(pol); + break; + } + } +} + +static void +apply_workload_profile(struct policy *pol) +{ + + int count; + + if (pol->pkt.workload == HIGH) { + for (count = 0; count < pol->pkt.num_vcpu; count++) { + if (pol->core_share[count].status != 1) + power_manager_scale_core_max( + pol->core_share[count].pcpu); + } + } else if (pol->pkt.workload == MEDIUM) { + for (count = 0; count < pol->pkt.num_vcpu; count++) { + if (pol->core_share[count].status != 1) + power_manager_scale_core_med( + pol->core_share[count].pcpu); + } + } else if (pol->pkt.workload == LOW) { + for (count = 0; count < pol->pkt.num_vcpu; count++) { + if (pol->core_share[count].status != 1) + power_manager_scale_core_min( + pol->core_share[count].pcpu); + } + } +} + +static void +apply_policy(struct policy *pol) +{ + + struct channel_packet *pkt = &pol->pkt; + + /*Check policy to use*/ + if (pkt->policy_to_use == TRAFFIC) + apply_traffic_profile(pol); + else if (pkt->policy_to_use == TIME) + apply_time_profile(pol); + else if (pkt->policy_to_use == WORKLOAD) + apply_workload_profile(pol); +} + + +static int +process_request(struct channel_packet *pkt, struct channel_info *chan_info) +{ + uint64_t core_mask; + + if (chan_info == NULL) + return -1; + + if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED, + CHANNEL_MGR_CHANNEL_PROCESSING) == 0) + return -1; + + if (pkt->command == CPU_POWER) { + core_mask = get_pcpus_mask(chan_info, pkt->resource_id); + if (core_mask == 0) { + RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for " + "channel '%s' using vCPU(%u)\n", chan_info->channel_path, + (unsigned)pkt->unit); + return -1; + } + if (__builtin_popcountll(core_mask) == 1) { + + unsigned core_num = __builtin_ffsll(core_mask) - 1; + + switch (pkt->unit) { + case(CPU_POWER_SCALE_MIN): + power_manager_scale_core_min(core_num); + break; + case(CPU_POWER_SCALE_MAX): + power_manager_scale_core_max(core_num); + break; + case(CPU_POWER_SCALE_DOWN): + power_manager_scale_core_down(core_num); + break; + case(CPU_POWER_SCALE_UP): + power_manager_scale_core_up(core_num); + break; + case(CPU_POWER_ENABLE_TURBO): + power_manager_enable_turbo_core(core_num); + break; + case(CPU_POWER_DISABLE_TURBO): + power_manager_disable_turbo_core(core_num); + break; + default: + break; + } + } else { + switch (pkt->unit) { + case(CPU_POWER_SCALE_MIN): + power_manager_scale_mask_min(core_mask); + break; + case(CPU_POWER_SCALE_MAX): + power_manager_scale_mask_max(core_mask); + break; + case(CPU_POWER_SCALE_DOWN): + power_manager_scale_mask_down(core_mask); + break; + case(CPU_POWER_SCALE_UP): + power_manager_scale_mask_up(core_mask); + break; + case(CPU_POWER_ENABLE_TURBO): + power_manager_enable_turbo_mask(core_mask); + break; + case(CPU_POWER_DISABLE_TURBO): + power_manager_disable_turbo_mask(core_mask); + break; + default: + break; + } + + } + } + + if (pkt->command == PKT_POLICY) { + RTE_LOG(INFO, CHANNEL_MONITOR, "\nProcessing Policy request from Guest\n"); + update_policy(pkt); + policy_is_set = 1; + } + + /* Return is not checked as channel status may have been set to DISABLED + * from management thread + */ + rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING, + CHANNEL_MGR_CHANNEL_CONNECTED); + return 0; + +} + +int +add_channel_to_monitor(struct channel_info **chan_info) +{ + struct channel_info *info = *chan_info; + struct epoll_event event; + + event.events = EPOLLIN; + event.data.ptr = info; + if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) { + RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' " + "to epoll\n", info->channel_path); + return -1; + } + return 0; +} + +int +remove_channel_from_monitor(struct channel_info *chan_info) +{ + if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) { + RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' " + "from epoll\n", chan_info->channel_path); + return -1; + } + return 0; +} + +int +channel_monitor_init(void) +{ + global_event_fd = epoll_create1(0); + if (global_event_fd == 0) { + RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with " + "error %s\n", strerror(errno)); + return -1; + } + global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list) + * MAX_EVENTS, RTE_CACHE_LINE_SIZE); + if (global_events_list == NULL) { + RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for " + "epoll events\n"); + return -1; + } + return 0; +} + +void +run_channel_monitor(void) +{ + while (run_loop) { + int n_events, i; + + n_events = epoll_wait(global_event_fd, global_events_list, + MAX_EVENTS, 1); + if (!run_loop) + break; + for (i = 0; i < n_events; i++) { + struct channel_info *chan_info = (struct channel_info *) + global_events_list[i].data.ptr; + if ((global_events_list[i].events & EPOLLERR) || + (global_events_list[i].events & EPOLLHUP)) { + RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for " + "channel '%s'\n", + chan_info->channel_path); + remove_channel(&chan_info); + continue; + } + if (global_events_list[i].events & EPOLLIN) { + + int n_bytes, err = 0; + struct channel_packet pkt; + void *buffer = &pkt; + int buffer_len = sizeof(pkt); + + while (buffer_len > 0) { + n_bytes = read(chan_info->fd, + buffer, buffer_len); + if (n_bytes == buffer_len) + break; + if (n_bytes == -1) { + err = errno; + RTE_LOG(DEBUG, CHANNEL_MONITOR, + "Received error on " + "channel '%s' read: %s\n", + chan_info->channel_path, + strerror(err)); + remove_channel(&chan_info); + break; + } + buffer = (char *)buffer + n_bytes; + buffer_len -= n_bytes; + } + if (!err) + process_request(&pkt, chan_info); + } + } + rte_delay_us(time_period_ms*1000); + if (policy_is_set) { + int j; + + for (j = 0; j < MAX_VMS; j++) { + if (policies[j].enabled == 1) + apply_policy(&policies[j]); + } + } + } +} diff --git a/src/spdk/dpdk/examples/vm_power_manager/channel_monitor.h b/src/spdk/dpdk/examples/vm_power_manager/channel_monitor.h new file mode 100644 index 00000000..7362a80d --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/channel_monitor.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef CHANNEL_MONITOR_H_ +#define CHANNEL_MONITOR_H_ + +#include "channel_manager.h" +#include "channel_commands.h" + +struct core_share { + unsigned int pcpu; + /* + * 1 CORE SHARE + * 0 NOT SHARED + */ + int status; +}; + +struct policy { + struct channel_packet pkt; + uint32_t pfid[MAX_VFS]; + uint32_t port[MAX_VFS]; + unsigned int enabled; + struct core_share core_share[MAX_VCPU_PER_VM]; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Setup the Channel Monitor resources required to initialize epoll. + * Must be called first before calling other functions. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int channel_monitor_init(void); + +/** + * Run the channel monitor, loops forever on on epoll_wait. + * + * + * @return + * None + */ +void run_channel_monitor(void); + +/** + * Exit the Channel Monitor, exiting the epoll_wait loop and events processing. + * + * @return + * - 0 on success. + * - Negative on error. + */ +void channel_monitor_exit(void); + +/** + * Add an open channel to monitor via epoll. A pointer to struct channel_info + * will be registered with epoll for event processing. + * It is thread-safe. + * + * @param chan_info + * Pointer to struct channel_info pointer. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int add_channel_to_monitor(struct channel_info **chan_info); + +/** + * Remove a previously added channel from epoll control. + * + * @param chan_info + * Pointer to struct channel_info. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int remove_channel_from_monitor(struct channel_info *chan_info); + +#ifdef __cplusplus +} +#endif + + +#endif /* CHANNEL_MONITOR_H_ */ diff --git a/src/spdk/dpdk/examples/vm_power_manager/guest_cli/Makefile b/src/spdk/dpdk/examples/vm_power_manager/guest_cli/Makefile new file mode 100644 index 00000000..8b1db861 --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/guest_cli/Makefile @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2014 Intel Corporation + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overridden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = guest_vm_power_mgr + +# all source are stored in SRCS-y +SRCS-y := main.c vm_power_cli_guest.c parse.c + +CFLAGS += -O3 -I$(RTE_SDK)/lib/librte_power/ +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/src/spdk/dpdk/examples/vm_power_manager/guest_cli/main.c b/src/spdk/dpdk/examples/vm_power_manager/guest_cli/main.c new file mode 100644 index 00000000..36365b12 --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/guest_cli/main.c @@ -0,0 +1,190 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <getopt.h> +#include <string.h> + +#include <rte_lcore.h> +#include <rte_power.h> +#include <rte_debug.h> +#include <rte_eal.h> +#include <rte_log.h> + +#include "vm_power_cli_guest.h" +#include "parse.h" + +static void +sig_handler(int signo) +{ + printf("Received signal %d, exiting...\n", signo); + unsigned lcore_id; + + RTE_LCORE_FOREACH(lcore_id) { + rte_power_exit(lcore_id); + } + +} + +#define MAX_HOURS 24 + +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + const struct option lgopts[] = { + { "vm-name", required_argument, 0, 'n'}, + { "busy-hours", required_argument, 0, 'b'}, + { "quiet-hours", required_argument, 0, 'q'}, + { "port-list", required_argument, 0, 'p'}, + { "vcpu-list", required_argument, 0, 'l'}, + { "policy", required_argument, 0, 'o'}, + {NULL, 0, 0, 0} + }; + struct channel_packet *policy; + unsigned short int hours[MAX_HOURS]; + unsigned short int cores[MAX_VCPU_PER_VM]; + unsigned short int ports[MAX_VCPU_PER_VM]; + int i, cnt, idx; + + policy = get_policy(); + set_policy_defaults(policy); + + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "n:b:q:p:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'n': + strcpy(policy->vm_name, optarg); + printf("Setting VM Name to [%s]\n", policy->vm_name); + break; + case 'b': + case 'q': + //printf("***Processing set using [%s]\n", optarg); + cnt = parse_set(optarg, hours, MAX_HOURS); + if (cnt < 0) { + printf("Invalid value passed to quiet/busy hours - [%s]\n", + optarg); + break; + } + idx = 0; + for (i = 0; i < MAX_HOURS; i++) { + if (hours[i]) { + if (opt == 'b') { + printf("***Busy Hour %d\n", i); + policy->timer_policy.busy_hours + [idx++] = i; + } else { + printf("***Quiet Hour %d\n", i); + policy->timer_policy.quiet_hours + [idx++] = i; + } + } + } + break; + case 'l': + cnt = parse_set(optarg, cores, MAX_VCPU_PER_VM); + if (cnt < 0) { + printf("Invalid value passed to vcpu-list - [%s]\n", + optarg); + break; + } + idx = 0; + for (i = 0; i < MAX_VCPU_PER_VM; i++) { + if (cores[i]) { + printf("***Using core %d\n", i); + policy->vcpu_to_control[idx++] = i; + } + } + policy->num_vcpu = idx; + printf("Total cores: %d\n", idx); + break; + case 'p': + cnt = parse_set(optarg, ports, MAX_VCPU_PER_VM); + if (cnt < 0) { + printf("Invalid value passed to port-list - [%s]\n", + optarg); + break; + } + idx = 0; + for (i = 0; i < MAX_VCPU_PER_VM; i++) { + if (ports[i]) { + printf("***Using port %d\n", i); + set_policy_mac(i, idx++); + } + } + policy->nb_mac_to_monitor = idx; + printf("Total Ports: %d\n", idx); + break; + case 'o': + if (!strcmp(optarg, "TRAFFIC")) + policy->policy_to_use = TRAFFIC; + else if (!strcmp(optarg, "TIME")) + policy->policy_to_use = TIME; + else if (!strcmp(optarg, "WORKLOAD")) + policy->policy_to_use = WORKLOAD; + else if (!strcmp(optarg, "BRANCH_RATIO")) + policy->policy_to_use = BRANCH_RATIO; + else { + printf("Invalid policy specified: %s\n", + optarg); + return -1; + } + break; + /* long options */ + + case 0: + break; + + default: + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +int +main(int argc, char **argv) +{ + int ret; + unsigned lcore_id; + + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_panic("Cannot init EAL\n"); + + signal(SIGINT, sig_handler); + signal(SIGTERM, sig_handler); + + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid arguments\n"); + + rte_power_set_env(PM_ENV_KVM_VM); + RTE_LCORE_FOREACH(lcore_id) { + rte_power_init(lcore_id); + } + run_cli(NULL); + + return 0; +} diff --git a/src/spdk/dpdk/examples/vm_power_manager/guest_cli/parse.c b/src/spdk/dpdk/examples/vm_power_manager/guest_cli/parse.c new file mode 100644 index 00000000..528df6d6 --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/guest_cli/parse.c @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation. + * Copyright(c) 2014 6WIND S.A. + */ + +#include <stdlib.h> +#include <string.h> +#include <rte_log.h> +#include "parse.h" + +/* + * Parse elem, the elem could be single number/range or group + * 1) A single number elem, it's just a simple digit. e.g. 9 + * 2) A single range elem, two digits with a '-' between. e.g. 2-6 + * 3) A group elem, combines multiple 1) or 2) e.g 0,2-4,6 + * Within group, '-' used for a range separator; + * ',' used for a single number. + */ +int +parse_set(const char *input, uint16_t set[], unsigned int num) +{ + unsigned int idx; + const char *str = input; + char *end = NULL; + unsigned int min, max; + + memset(set, 0, num * sizeof(uint16_t)); + + while (isblank(*str)) + str++; + + /* only digit or left bracket is qualify for start point */ + if (!isdigit(*str) || *str == '\0') + return -1; + + while (isblank(*str)) + str++; + if (*str == '\0') + return -1; + + min = num; + do { + + /* go ahead to the first digit */ + while (isblank(*str)) + str++; + if (!isdigit(*str)) + return -1; + + /* get the digit value */ + errno = 0; + idx = strtoul(str, &end, 10); + if (errno || end == NULL || idx >= num) + return -1; + + /* go ahead to separator '-' and ',' */ + while (isblank(*end)) + end++; + if (*end == '-') { + if (min == num) + min = idx; + else /* avoid continuous '-' */ + return -1; + } else if ((*end == ',') || (*end == '\0')) { + max = idx; + + if (min == num) + min = idx; + + for (idx = RTE_MIN(min, max); + idx <= RTE_MAX(min, max); idx++) { + set[idx] = 1; + } + min = num; + } else + return -1; + + str = end + 1; + } while (*end != '\0'); + + return str - input; +} diff --git a/src/spdk/dpdk/examples/vm_power_manager/guest_cli/parse.h b/src/spdk/dpdk/examples/vm_power_manager/guest_cli/parse.h new file mode 100644 index 00000000..c8aa0ea5 --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/guest_cli/parse.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef PARSE_H_ +#define PARSE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +int +parse_set(const char *, uint16_t [], unsigned int); + +#ifdef __cplusplus +} +#endif + +#endif /* PARSE_H_ */ diff --git a/src/spdk/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c b/src/spdk/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c new file mode 100644 index 00000000..0db1b804 --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + + +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <termios.h> + +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_string.h> +#include <cmdline_parse_num.h> +#include <cmdline_socket.h> +#include <cmdline.h> +#include <rte_log.h> +#include <rte_lcore.h> +#include <rte_ethdev.h> + +#include <rte_power.h> +#include <guest_channel.h> + +#include "vm_power_cli_guest.h" + + +#define CHANNEL_PATH "/dev/virtio-ports/virtio.serial.port.poweragent" + + +#define RTE_LOGTYPE_GUEST_CHANNEL RTE_LOGTYPE_USER1 + +struct cmd_quit_result { + cmdline_fixed_string_t quit; +}; + +union PFID { + struct ether_addr addr; + uint64_t pfid; +}; + +static struct channel_packet policy; + +struct channel_packet * +get_policy(void) +{ + return &policy; +} + +int +set_policy_mac(int port, int idx) +{ + struct channel_packet *policy; + union PFID pfid; + + /* Use port MAC address as the vfid */ + rte_eth_macaddr_get(port, &pfid.addr); + + printf("Port %u MAC: %02" PRIx8 ":%02" PRIx8 ":%02" PRIx8 ":" + "%02" PRIx8 ":%02" PRIx8 ":%02" PRIx8 "\n", + port, + pfid.addr.addr_bytes[0], pfid.addr.addr_bytes[1], + pfid.addr.addr_bytes[2], pfid.addr.addr_bytes[3], + pfid.addr.addr_bytes[4], pfid.addr.addr_bytes[5]); + policy = get_policy(); + policy->vfid[idx] = pfid.pfid; + return 0; +} + +void +set_policy_defaults(struct channel_packet *pkt) +{ + set_policy_mac(0, 0); + pkt->nb_mac_to_monitor = 1; + + pkt->t_boost_status.tbEnabled = false; + + pkt->vcpu_to_control[0] = 0; + pkt->vcpu_to_control[1] = 1; + pkt->num_vcpu = 2; + /* Dummy Population. */ + pkt->traffic_policy.min_packet_thresh = 96000; + pkt->traffic_policy.avg_max_packet_thresh = 1800000; + pkt->traffic_policy.max_max_packet_thresh = 2000000; + + pkt->timer_policy.busy_hours[0] = 3; + pkt->timer_policy.busy_hours[1] = 4; + pkt->timer_policy.busy_hours[2] = 5; + pkt->timer_policy.quiet_hours[0] = 11; + pkt->timer_policy.quiet_hours[1] = 12; + pkt->timer_policy.quiet_hours[2] = 13; + + pkt->timer_policy.hours_to_use_traffic_profile[0] = 8; + pkt->timer_policy.hours_to_use_traffic_profile[1] = 10; + + pkt->workload = LOW; + pkt->policy_to_use = TIME; + pkt->command = PKT_POLICY; + strcpy(pkt->vm_name, "ubuntu2"); +} + +static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + unsigned lcore_id; + + RTE_LCORE_FOREACH(lcore_id) { + rte_power_exit(lcore_id); + } + cmdline_quit(cl); +} + +cmdline_parse_token_string_t cmd_quit_quit = + TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit"); + +cmdline_parse_inst_t cmd_quit = { + .f = cmd_quit_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "close the application", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_quit_quit, + NULL, + }, +}; + +/* *** VM operations *** */ + +struct cmd_set_cpu_freq_result { + cmdline_fixed_string_t set_cpu_freq; + uint8_t lcore_id; + cmdline_fixed_string_t cmd; +}; + +static void +cmd_set_cpu_freq_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + int ret = -1; + struct cmd_set_cpu_freq_result *res = parsed_result; + + if (!strcmp(res->cmd , "up")) + ret = rte_power_freq_up(res->lcore_id); + else if (!strcmp(res->cmd , "down")) + ret = rte_power_freq_down(res->lcore_id); + else if (!strcmp(res->cmd , "min")) + ret = rte_power_freq_min(res->lcore_id); + else if (!strcmp(res->cmd , "max")) + ret = rte_power_freq_max(res->lcore_id); + else if (!strcmp(res->cmd, "enable_turbo")) + ret = rte_power_freq_enable_turbo(res->lcore_id); + else if (!strcmp(res->cmd, "disable_turbo")) + ret = rte_power_freq_disable_turbo(res->lcore_id); + if (ret != 1) + cmdline_printf(cl, "Error sending message: %s\n", strerror(ret)); +} + +cmdline_parse_token_string_t cmd_set_cpu_freq = + TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_result, + set_cpu_freq, "set_cpu_freq"); +cmdline_parse_token_string_t cmd_set_cpu_freq_core_num = + TOKEN_NUM_INITIALIZER(struct cmd_set_cpu_freq_result, + lcore_id, UINT8); +cmdline_parse_token_string_t cmd_set_cpu_freq_cmd_cmd = + TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_result, + cmd, "up#down#min#max#enable_turbo#disable_turbo"); + +cmdline_parse_inst_t cmd_set_cpu_freq_set = { + .f = cmd_set_cpu_freq_parsed, + .data = NULL, + .help_str = "set_cpu_freq <core_num> " + "<up|down|min|max|enable_turbo|disable_turbo>, " + "adjust the frequency for the specified core.", + .tokens = { + (void *)&cmd_set_cpu_freq, + (void *)&cmd_set_cpu_freq_core_num, + (void *)&cmd_set_cpu_freq_cmd_cmd, + NULL, + }, +}; + +struct cmd_send_policy_result { + cmdline_fixed_string_t send_policy; + cmdline_fixed_string_t cmd; +}; + +static inline int +send_policy(struct channel_packet *pkt) +{ + int ret; + + ret = rte_power_guest_channel_send_msg(pkt, 1); + if (ret == 0) + return 1; + RTE_LOG(DEBUG, POWER, "Error sending message: %s\n", + ret > 0 ? strerror(ret) : "channel not connected"); + return -1; +} + +static void +cmd_send_policy_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + int ret = -1; + struct cmd_send_policy_result *res = parsed_result; + + if (!strcmp(res->cmd, "now")) { + printf("Sending Policy down now!\n"); + ret = send_policy(&policy); + } + if (ret != 1) + cmdline_printf(cl, "Error sending message: %s\n", + strerror(ret)); +} + +cmdline_parse_token_string_t cmd_send_policy = + TOKEN_STRING_INITIALIZER(struct cmd_send_policy_result, + send_policy, "send_policy"); +cmdline_parse_token_string_t cmd_send_policy_cmd_cmd = + TOKEN_STRING_INITIALIZER(struct cmd_send_policy_result, + cmd, "now"); + +cmdline_parse_inst_t cmd_send_policy_set = { + .f = cmd_send_policy_parsed, + .data = NULL, + .help_str = "send_policy now", + .tokens = { + (void *)&cmd_send_policy, + (void *)&cmd_send_policy_cmd_cmd, + NULL, + }, +}; + +cmdline_parse_ctx_t main_ctx[] = { + (cmdline_parse_inst_t *)&cmd_quit, + (cmdline_parse_inst_t *)&cmd_send_policy_set, + (cmdline_parse_inst_t *)&cmd_set_cpu_freq_set, + NULL, +}; + +void +run_cli(__attribute__((unused)) void *arg) +{ + struct cmdline *cl; + + cl = cmdline_stdin_new(main_ctx, "vmpower(guest)> "); + if (cl == NULL) + return; + + cmdline_interact(cl); + cmdline_stdin_exit(cl); +} diff --git a/src/spdk/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.h b/src/spdk/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.h new file mode 100644 index 00000000..fd77f6a6 --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef VM_POWER_CLI_H_ +#define VM_POWER_CLI_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "channel_commands.h" + +struct channel_packet *get_policy(void); + +int set_policy_mac(int port, int idx); + +void set_policy_defaults(struct channel_packet *pkt); + +void run_cli(__attribute__((unused)) void *arg); + +#ifdef __cplusplus +} +#endif + +#endif /* VM_POWER_CLI_H_ */ diff --git a/src/spdk/dpdk/examples/vm_power_manager/main.c b/src/spdk/dpdk/examples/vm_power_manager/main.c new file mode 100644 index 00000000..58c5fa45 --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/main.c @@ -0,0 +1,436 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <sys/epoll.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <signal.h> +#include <errno.h> + +#include <sys/queue.h> + +#include <rte_common.h> +#include <rte_eal.h> +#include <rte_launch.h> +#include <rte_log.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_ethdev.h> +#include <getopt.h> +#include <rte_cycles.h> +#include <rte_debug.h> + +#include "channel_manager.h" +#include "channel_monitor.h" +#include "power_manager.h" +#include "vm_power_cli.h" +#include "oob_monitor.h" +#include "parse.h" +#include <rte_pmd_ixgbe.h> +#include <rte_pmd_i40e.h> +#include <rte_pmd_bnxt.h> + +#define RX_RING_SIZE 1024 +#define TX_RING_SIZE 1024 + +#define NUM_MBUFS 8191 +#define MBUF_CACHE_SIZE 250 +#define BURST_SIZE 32 + +static uint32_t enabled_port_mask; +static volatile bool force_quit; + +/****************/ +static const struct rte_eth_conf port_conf_default = { + .rxmode = { + .max_rx_pkt_len = ETHER_MAX_LEN, + }, +}; + +static inline int +port_init(uint16_t port, struct rte_mempool *mbuf_pool) +{ + struct rte_eth_conf port_conf = port_conf_default; + const uint16_t rx_rings = 1, tx_rings = 1; + int retval; + uint16_t q; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf txq_conf; + + if (!rte_eth_dev_is_valid_port(port)) + return -1; + + rte_eth_dev_info_get(port, &dev_info); + if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) + port_conf.txmode.offloads |= + DEV_TX_OFFLOAD_MBUF_FAST_FREE; + + /* Configure the Ethernet device. */ + retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); + if (retval != 0) + return retval; + + /* Allocate and set up 1 RX queue per Ethernet port. */ + for (q = 0; q < rx_rings; q++) { + retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, + rte_eth_dev_socket_id(port), NULL, mbuf_pool); + if (retval < 0) + return retval; + } + + txq_conf = dev_info.default_txconf; + txq_conf.offloads = port_conf.txmode.offloads; + /* Allocate and set up 1 TX queue per Ethernet port. */ + for (q = 0; q < tx_rings; q++) { + retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, + rte_eth_dev_socket_id(port), &txq_conf); + if (retval < 0) + return retval; + } + + /* Start the Ethernet port. */ + retval = rte_eth_dev_start(port); + if (retval < 0) + return retval; + + /* Display the port MAC address. */ + struct ether_addr addr; + rte_eth_macaddr_get(port, &addr); + printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8 + " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n", + (unsigned int)port, + addr.addr_bytes[0], addr.addr_bytes[1], + addr.addr_bytes[2], addr.addr_bytes[3], + addr.addr_bytes[4], addr.addr_bytes[5]); + + /* Enable RX in promiscuous mode for the Ethernet device. */ + rte_eth_promiscuous_enable(port); + + + return 0; +} + +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} +/* Parse the argument given in the command line of the application */ +static int +parse_args(int argc, char **argv) +{ + int opt, ret, cnt, i; + char **argvopt; + uint16_t *oob_enable; + int option_index; + char *prgname = argv[0]; + struct core_info *ci; + float branch_ratio; + static struct option lgopts[] = { + { "mac-updating", no_argument, 0, 1}, + { "no-mac-updating", no_argument, 0, 0}, + { "core-list", optional_argument, 0, 'l'}, + { "port-list", optional_argument, 0, 'p'}, + { "branch-ratio", optional_argument, 0, 'b'}, + {NULL, 0, 0, 0} + }; + argvopt = argv; + ci = get_core_info(); + + while ((opt = getopt_long(argc, argvopt, "l:p:q:T:b:", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + printf("invalid portmask\n"); + return -1; + } + break; + case 'l': + oob_enable = malloc(ci->core_count * sizeof(uint16_t)); + if (oob_enable == NULL) { + printf("Error - Unable to allocate memory\n"); + return -1; + } + cnt = parse_set(optarg, oob_enable, ci->core_count); + if (cnt < 0) { + printf("Invalid core-list - [%s]\n", + optarg); + break; + } + for (i = 0; i < ci->core_count; i++) { + if (oob_enable[i]) { + printf("***Using core %d\n", i); + ci->cd[i].oob_enabled = 1; + ci->cd[i].global_enabled_cpus = 1; + } + } + free(oob_enable); + break; + case 'b': + branch_ratio = 0.0; + if (strlen(optarg)) + branch_ratio = atof(optarg); + if (branch_ratio <= 0.0) { + printf("invalid branch ratio specified\n"); + return -1; + } + ci->branch_ratio_threshold = branch_ratio; + printf("***Setting branch ratio to %f\n", + branch_ratio); + break; + /* long options */ + case 0: + break; + + default: + return -1; + } + } + + if (optind >= 0) + argv[optind-1] = prgname; + + ret = optind-1; + optind = 0; /* reset getopt lib */ + return ret; +} + +static void +check_all_ports_link_status(uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint16_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + if (force_quit) + return; + all_ports_up = 1; + RTE_ETH_FOREACH_DEV(portid) { + if (force_quit) + return; + if ((port_mask & (1 << portid)) == 0) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint16_t)portid, + (unsigned int)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint16_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == ETH_LINK_DOWN) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} +static int +run_monitor(__attribute__((unused)) void *arg) +{ + if (channel_monitor_init() < 0) { + printf("Unable to initialize channel monitor\n"); + return -1; + } + run_channel_monitor(); + return 0; +} + +static int +run_core_monitor(__attribute__((unused)) void *arg) +{ + if (branch_monitor_init() < 0) { + printf("Unable to initialize core monitor\n"); + return -1; + } + run_branch_monitor(); + return 0; +} + +static void +sig_handler(int signo) +{ + printf("Received signal %d, exiting...\n", signo); + channel_monitor_exit(); + channel_manager_exit(); + power_manager_exit(); + +} + +int +main(int argc, char **argv) +{ + int ret; + unsigned lcore_id; + unsigned int nb_ports; + struct rte_mempool *mbuf_pool; + uint16_t portid; + struct core_info *ci; + + + ret = core_info_init(); + if (ret < 0) + rte_panic("Cannot allocate core info\n"); + + ci = get_core_info(); + + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_panic("Cannot init EAL\n"); + + signal(SIGINT, sig_handler); + signal(SIGTERM, sig_handler); + + argc -= ret; + argv += ret; + + /* parse application arguments (after the EAL ones) */ + ret = parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid arguments\n"); + + nb_ports = rte_eth_dev_count_avail(); + + if (nb_ports > 0) { + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", + NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); + + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + /* Initialize ports. */ + RTE_ETH_FOREACH_DEV(portid) { + struct ether_addr eth; + int w, j; + int ret; + + if ((enabled_port_mask & (1 << portid)) == 0) + continue; + + eth.addr_bytes[0] = 0xe0; + eth.addr_bytes[1] = 0xe0; + eth.addr_bytes[2] = 0xe0; + eth.addr_bytes[3] = 0xe0; + eth.addr_bytes[4] = portid + 0xf0; + + if (port_init(portid, mbuf_pool) != 0) + rte_exit(EXIT_FAILURE, + "Cannot init port %"PRIu8 "\n", + portid); + + for (w = 0; w < MAX_VFS; w++) { + eth.addr_bytes[5] = w + 0xf0; + + ret = rte_pmd_ixgbe_set_vf_mac_addr(portid, + w, ð); + if (ret == -ENOTSUP) + ret = rte_pmd_i40e_set_vf_mac_addr( + portid, w, ð); + if (ret == -ENOTSUP) + ret = rte_pmd_bnxt_set_vf_mac_addr( + portid, w, ð); + + switch (ret) { + case 0: + printf("Port %d VF %d MAC: ", + portid, w); + for (j = 0; j < 5; j++) { + printf("%02x:", + eth.addr_bytes[j]); + } + printf("%02x\n", eth.addr_bytes[5]); + break; + } + printf("\n"); + break; + } + } + } + + check_all_ports_link_status(enabled_port_mask); + + lcore_id = rte_get_next_lcore(-1, 1, 0); + if (lcore_id == RTE_MAX_LCORE) { + RTE_LOG(ERR, EAL, "A minimum of three cores are required to run " + "application\n"); + return 0; + } + printf("Running channel monitor on lcore id %d\n", lcore_id); + rte_eal_remote_launch(run_monitor, NULL, lcore_id); + + lcore_id = rte_get_next_lcore(lcore_id, 1, 0); + if (lcore_id == RTE_MAX_LCORE) { + RTE_LOG(ERR, EAL, "A minimum of three cores are required to run " + "application\n"); + return 0; + } + if (power_manager_init() < 0) { + printf("Unable to initialize power manager\n"); + return -1; + } + if (channel_manager_init(CHANNEL_MGR_DEFAULT_HV_PATH) < 0) { + printf("Unable to initialize channel manager\n"); + return -1; + } + + printf("Running core monitor on lcore id %d\n", lcore_id); + rte_eal_remote_launch(run_core_monitor, NULL, lcore_id); + + run_cli(NULL); + + branch_monitor_exit(); + + rte_eal_mp_wait_lcore(); + + free(ci->cd); + + return 0; +} diff --git a/src/spdk/dpdk/examples/vm_power_manager/meson.build b/src/spdk/dpdk/examples/vm_power_manager/meson.build new file mode 100644 index 00000000..c370d747 --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/meson.build @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +# meson file, for building this example as part of a main DPDK build. +# +# To build this example as a standalone application with an already-installed +# DPDK instance, use 'make' + +# Example app currently unsupported by meson build +build = false diff --git a/src/spdk/dpdk/examples/vm_power_manager/oob_monitor.h b/src/spdk/dpdk/examples/vm_power_manager/oob_monitor.h new file mode 100644 index 00000000..b96e08df --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/oob_monitor.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef OOB_MONITOR_H_ +#define OOB_MONITOR_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Setup the Branch Monitor resources required to initialize epoll. + * Must be called first before calling other functions. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int branch_monitor_init(void); + +/** + * Run the OOB branch monitor, loops forever on on epoll_wait. + * + * + * @return + * None + */ +void run_branch_monitor(void); + +/** + * Exit the OOB Branch Monitor. + * + * @return + * None + */ +void branch_monitor_exit(void); + +/** + * Add a core to the list of cores to monitor. + * + * @param core + * Core Number + * + * @return + * - 0 on success. + * - Negative on error. + */ +int add_core_to_monitor(int core); + +/** + * Remove a previously added core from core list. + * + * @param core + * Core Number + * + * @return + * - 0 on success. + * - Negative on error. + */ +int remove_core_from_monitor(int core); + +#ifdef __cplusplus +} +#endif + + +#endif /* OOB_MONITOR_H_ */ diff --git a/src/spdk/dpdk/examples/vm_power_manager/oob_monitor_nop.c b/src/spdk/dpdk/examples/vm_power_manager/oob_monitor_nop.c new file mode 100644 index 00000000..7e7b8bc1 --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/oob_monitor_nop.c @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include "oob_monitor.h" + +void branch_monitor_exit(void) +{ +} + +__attribute__((unused)) static float +apply_policy(__attribute__((unused)) int core) +{ + return 0.0; +} + +int +add_core_to_monitor(__attribute__((unused)) int core) +{ + return 0; +} + +int +remove_core_from_monitor(__attribute__((unused)) int core) +{ + return 0; +} + +int +branch_monitor_init(void) +{ + return 0; +} + +void +run_branch_monitor(void) +{ +} diff --git a/src/spdk/dpdk/examples/vm_power_manager/oob_monitor_x86.c b/src/spdk/dpdk/examples/vm_power_manager/oob_monitor_x86.c new file mode 100644 index 00000000..589c604e --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/oob_monitor_x86.c @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <unistd.h> +#include <fcntl.h> +#include <rte_log.h> + +#include "oob_monitor.h" +#include "power_manager.h" +#include "channel_manager.h" + +static volatile unsigned run_loop = 1; +static uint64_t g_branches, g_branch_misses; +static int g_active; + +void branch_monitor_exit(void) +{ + run_loop = 0; +} + +/* Number of microseconds between each poll */ +#define INTERVAL 100 +#define PRINT_LOOP_COUNT (1000000/INTERVAL) +#define IA32_PERFEVTSEL0 0x186 +#define IA32_PERFEVTSEL1 0x187 +#define IA32_PERFCTR0 0xc1 +#define IA32_PERFCTR1 0xc2 +#define IA32_PERFEVT_BRANCH_HITS 0x05300c4 +#define IA32_PERFEVT_BRANCH_MISS 0x05300c5 + +static float +apply_policy(int core) +{ + struct core_info *ci; + uint64_t counter; + uint64_t branches, branch_misses; + uint32_t last_branches, last_branch_misses; + int hits_diff, miss_diff; + float ratio; + int ret; + + g_active = 0; + ci = get_core_info(); + + last_branches = ci->cd[core].last_branches; + last_branch_misses = ci->cd[core].last_branch_misses; + + ret = pread(ci->cd[core].msr_fd, &counter, + sizeof(counter), IA32_PERFCTR0); + if (ret < 0) + RTE_LOG(ERR, POWER_MANAGER, + "unable to read counter for core %u\n", + core); + branches = counter; + + ret = pread(ci->cd[core].msr_fd, &counter, + sizeof(counter), IA32_PERFCTR1); + if (ret < 0) + RTE_LOG(ERR, POWER_MANAGER, + "unable to read counter for core %u\n", + core); + branch_misses = counter; + + + ci->cd[core].last_branches = branches; + ci->cd[core].last_branch_misses = branch_misses; + + hits_diff = (int)branches - (int)last_branches; + if (hits_diff <= 0) { + /* Likely a counter overflow condition, skip this round */ + return -1.0; + } + + miss_diff = (int)branch_misses - (int)last_branch_misses; + if (miss_diff <= 0) { + /* Likely a counter overflow condition, skip this round */ + return -1.0; + } + + g_branches = hits_diff; + g_branch_misses = miss_diff; + + if (hits_diff < (INTERVAL*100)) { + /* Likely no workload running on this core. Skip. */ + return -1.0; + } + + ratio = (float)miss_diff * (float)100 / (float)hits_diff; + + if (ratio < ci->branch_ratio_threshold) + power_manager_scale_core_min(core); + else + power_manager_scale_core_max(core); + + g_active = 1; + return ratio; +} + +int +add_core_to_monitor(int core) +{ + struct core_info *ci; + char proc_file[UNIX_PATH_MAX]; + int ret; + + ci = get_core_info(); + + if (core < ci->core_count) { + long setup; + + snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core); + ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC); + if (ci->cd[core].msr_fd < 0) { + RTE_LOG(ERR, POWER_MANAGER, + "Error opening MSR file for core %d " + "(is msr kernel module loaded?)\n", + core); + return -1; + } + /* + * Set up branch counters + */ + setup = IA32_PERFEVT_BRANCH_HITS; + ret = pwrite(ci->cd[core].msr_fd, &setup, + sizeof(setup), IA32_PERFEVTSEL0); + if (ret < 0) { + RTE_LOG(ERR, POWER_MANAGER, + "unable to set counter for core %u\n", + core); + return ret; + } + setup = IA32_PERFEVT_BRANCH_MISS; + ret = pwrite(ci->cd[core].msr_fd, &setup, + sizeof(setup), IA32_PERFEVTSEL1); + if (ret < 0) { + RTE_LOG(ERR, POWER_MANAGER, + "unable to set counter for core %u\n", + core); + return ret; + } + /* + * Close the file and re-open as read only so + * as not to hog the resource + */ + close(ci->cd[core].msr_fd); + ci->cd[core].msr_fd = open(proc_file, O_RDONLY); + if (ci->cd[core].msr_fd < 0) { + RTE_LOG(ERR, POWER_MANAGER, + "Error opening MSR file for core %d " + "(is msr kernel module loaded?)\n", + core); + return -1; + } + ci->cd[core].oob_enabled = 1; + } + return 0; +} + +int +remove_core_from_monitor(int core) +{ + struct core_info *ci; + char proc_file[UNIX_PATH_MAX]; + int ret; + + ci = get_core_info(); + + if (ci->cd[core].oob_enabled) { + long setup; + + /* + * close the msr file, then reopen rw so we can + * disable the counters + */ + if (ci->cd[core].msr_fd != 0) + close(ci->cd[core].msr_fd); + snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core); + ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC); + if (ci->cd[core].msr_fd < 0) { + RTE_LOG(ERR, POWER_MANAGER, + "Error opening MSR file for core %d " + "(is msr kernel module loaded?)\n", + core); + return -1; + } + setup = 0x0; /* clear event */ + ret = pwrite(ci->cd[core].msr_fd, &setup, + sizeof(setup), IA32_PERFEVTSEL0); + if (ret < 0) { + RTE_LOG(ERR, POWER_MANAGER, + "unable to set counter for core %u\n", + core); + return ret; + } + setup = 0x0; /* clear event */ + ret = pwrite(ci->cd[core].msr_fd, &setup, + sizeof(setup), IA32_PERFEVTSEL1); + if (ret < 0) { + RTE_LOG(ERR, POWER_MANAGER, + "unable to set counter for core %u\n", + core); + return ret; + } + + close(ci->cd[core].msr_fd); + ci->cd[core].msr_fd = 0; + ci->cd[core].oob_enabled = 0; + } + return 0; +} + +int +branch_monitor_init(void) +{ + return 0; +} + +void +run_branch_monitor(void) +{ + struct core_info *ci; + int print = 0; + float ratio; + int printed; + int reads = 0; + + ci = get_core_info(); + + while (run_loop) { + + if (!run_loop) + break; + usleep(INTERVAL); + int j; + print++; + printed = 0; + for (j = 0; j < ci->core_count; j++) { + if (ci->cd[j].oob_enabled) { + ratio = apply_policy(j); + if ((print > PRINT_LOOP_COUNT) && (g_active)) { + printf(" %d: %.4f {%lu} {%d}", j, + ratio, g_branches, + reads); + printed = 1; + reads = 0; + } else { + reads++; + } + } + } + if (print > PRINT_LOOP_COUNT) { + if (printed) + printf("\n"); + print = 0; + } + } +} diff --git a/src/spdk/dpdk/examples/vm_power_manager/parse.c b/src/spdk/dpdk/examples/vm_power_manager/parse.c new file mode 100644 index 00000000..8231533b --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/parse.c @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation. + * Copyright(c) 2014 6WIND S.A. + */ + +#include <string.h> +#include <rte_log.h> +#include "parse.h" + +/* + * Parse elem, the elem could be single number/range or group + * 1) A single number elem, it's just a simple digit. e.g. 9 + * 2) A single range elem, two digits with a '-' between. e.g. 2-6 + * 3) A group elem, combines multiple 1) or 2) e.g 0,2-4,6 + * Within group, '-' used for a range separator; + * ',' used for a single number. + */ +int +parse_set(const char *input, uint16_t set[], unsigned int num) +{ + unsigned int idx; + const char *str = input; + char *end = NULL; + unsigned int min, max; + + memset(set, 0, num * sizeof(uint16_t)); + + while (isblank(*str)) + str++; + + /* only digit or left bracket is qualify for start point */ + if (!isdigit(*str) || *str == '\0') + return -1; + + while (isblank(*str)) + str++; + if (*str == '\0') + return -1; + + min = num; + do { + + /* go ahead to the first digit */ + while (isblank(*str)) + str++; + if (!isdigit(*str)) + return -1; + + /* get the digit value */ + errno = 0; + idx = strtoul(str, &end, 10); + if (errno || end == NULL || idx >= num) + return -1; + + /* go ahead to separator '-' and ',' */ + while (isblank(*end)) + end++; + if (*end == '-') { + if (min == num) + min = idx; + else /* avoid continuous '-' */ + return -1; + } else if ((*end == ',') || (*end == '\0')) { + max = idx; + + if (min == num) + min = idx; + + for (idx = RTE_MIN(min, max); + idx <= RTE_MAX(min, max); idx++) { + set[idx] = 1; + } + min = num; + } else + return -1; + + str = end + 1; + } while (*end != '\0'); + + return str - input; +} diff --git a/src/spdk/dpdk/examples/vm_power_manager/parse.h b/src/spdk/dpdk/examples/vm_power_manager/parse.h new file mode 100644 index 00000000..a5971e9a --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/parse.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef PARSE_H_ +#define PARSE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +int +parse_set(const char *, uint16_t [], unsigned int); + +#ifdef __cplusplus +} +#endif + + +#endif /* PARSE_H_ */ diff --git a/src/spdk/dpdk/examples/vm_power_manager/power_manager.c b/src/spdk/dpdk/examples/vm_power_manager/power_manager.c new file mode 100644 index 00000000..b7769c3c --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/power_manager.c @@ -0,0 +1,314 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/un.h> +#include <fcntl.h> +#include <unistd.h> +#include <dirent.h> +#include <errno.h> + +#include <sys/sysinfo.h> +#include <sys/types.h> + +#include <rte_log.h> +#include <rte_power.h> +#include <rte_spinlock.h> + +#include "channel_manager.h" +#include "power_manager.h" +#include "oob_monitor.h" + +#define POWER_SCALE_CORE(DIRECTION, core_num , ret) do { \ + if (core_num >= ci.core_count) \ + return -1; \ + if (!(ci.cd[core_num].global_enabled_cpus)) \ + return -1; \ + rte_spinlock_lock(&global_core_freq_info[core_num].power_sl); \ + ret = rte_power_freq_##DIRECTION(core_num); \ + rte_spinlock_unlock(&global_core_freq_info[core_num].power_sl); \ +} while (0) + +#define POWER_SCALE_MASK(DIRECTION, core_mask, ret) do { \ + int i; \ + for (i = 0; core_mask; core_mask &= ~(1 << i++)) { \ + if ((core_mask >> i) & 1) { \ + if (!(ci.cd[i].global_enabled_cpus)) \ + continue; \ + rte_spinlock_lock(&global_core_freq_info[i].power_sl); \ + if (rte_power_freq_##DIRECTION(i) != 1) \ + ret = -1; \ + rte_spinlock_unlock(&global_core_freq_info[i].power_sl); \ + } \ + } \ +} while (0) + +struct freq_info { + rte_spinlock_t power_sl; + uint32_t freqs[RTE_MAX_LCORE_FREQS]; + unsigned num_freqs; +} __rte_cache_aligned; + +static struct freq_info global_core_freq_info[POWER_MGR_MAX_CPUS]; + +struct core_info ci; + +#define SYSFS_CPU_PATH "/sys/devices/system/cpu/cpu%u/topology/core_id" + +struct core_info * +get_core_info(void) +{ + return &ci; +} + +int +core_info_init(void) +{ + struct core_info *ci; + int i; + + ci = get_core_info(); + + ci->core_count = get_nprocs_conf(); + ci->branch_ratio_threshold = BRANCH_RATIO_THRESHOLD; + ci->cd = malloc(ci->core_count * sizeof(struct core_details)); + if (!ci->cd) { + RTE_LOG(ERR, POWER_MANAGER, "Failed to allocate memory for core info."); + return -1; + } + for (i = 0; i < ci->core_count; i++) { + ci->cd[i].global_enabled_cpus = 1; + ci->cd[i].oob_enabled = 0; + ci->cd[i].msr_fd = 0; + } + printf("%d cores in system\n", ci->core_count); + return 0; +} + +int +power_manager_init(void) +{ + unsigned int i, num_cpus = 0, num_freqs = 0; + int ret = 0; + struct core_info *ci; + + rte_power_set_env(PM_ENV_ACPI_CPUFREQ); + + ci = get_core_info(); + if (!ci) { + RTE_LOG(ERR, POWER_MANAGER, + "Failed to get core info!\n"); + return -1; + } + + for (i = 0; i < ci->core_count; i++) { + if (ci->cd[i].global_enabled_cpus) { + if (rte_power_init(i) < 0) + RTE_LOG(ERR, POWER_MANAGER, + "Unable to initialize power manager " + "for core %u\n", i); + num_cpus++; + num_freqs = rte_power_freqs(i, + global_core_freq_info[i].freqs, + RTE_MAX_LCORE_FREQS); + if (num_freqs == 0) { + RTE_LOG(ERR, POWER_MANAGER, + "Unable to get frequency list for core %u\n", + i); + ci->cd[i].oob_enabled = 0; + ret = -1; + } + global_core_freq_info[i].num_freqs = num_freqs; + + rte_spinlock_init(&global_core_freq_info[i].power_sl); + } + if (ci->cd[i].oob_enabled) + add_core_to_monitor(i); + } + RTE_LOG(INFO, POWER_MANAGER, "Managing %u cores out of %u available host cores\n", + num_cpus, ci->core_count); + return ret; + +} + +uint32_t +power_manager_get_current_frequency(unsigned core_num) +{ + uint32_t freq, index; + + if (core_num >= POWER_MGR_MAX_CPUS) { + RTE_LOG(ERR, POWER_MANAGER, "Core(%u) is out of range 0...%d\n", + core_num, POWER_MGR_MAX_CPUS-1); + return -1; + } + if (!(ci.cd[core_num].global_enabled_cpus)) + return 0; + + rte_spinlock_lock(&global_core_freq_info[core_num].power_sl); + index = rte_power_get_freq(core_num); + rte_spinlock_unlock(&global_core_freq_info[core_num].power_sl); + if (index >= POWER_MGR_MAX_CPUS) + freq = 0; + else + freq = global_core_freq_info[core_num].freqs[index]; + + return freq; +} + +int +power_manager_exit(void) +{ + unsigned int i; + int ret = 0; + struct core_info *ci; + + ci = get_core_info(); + if (!ci) { + RTE_LOG(ERR, POWER_MANAGER, + "Failed to get core info!\n"); + return -1; + } + + for (i = 0; i < ci->core_count; i++) { + if (ci->cd[i].global_enabled_cpus) { + if (rte_power_exit(i) < 0) { + RTE_LOG(ERR, POWER_MANAGER, "Unable to shutdown power manager " + "for core %u\n", i); + ret = -1; + } + ci->cd[i].global_enabled_cpus = 0; + } + remove_core_from_monitor(i); + } + return ret; +} + +int +power_manager_scale_mask_up(uint64_t core_mask) +{ + int ret = 0; + + POWER_SCALE_MASK(up, core_mask, ret); + return ret; +} + +int +power_manager_scale_mask_down(uint64_t core_mask) +{ + int ret = 0; + + POWER_SCALE_MASK(down, core_mask, ret); + return ret; +} + +int +power_manager_scale_mask_min(uint64_t core_mask) +{ + int ret = 0; + + POWER_SCALE_MASK(min, core_mask, ret); + return ret; +} + +int +power_manager_scale_mask_max(uint64_t core_mask) +{ + int ret = 0; + + POWER_SCALE_MASK(max, core_mask, ret); + return ret; +} + +int +power_manager_enable_turbo_mask(uint64_t core_mask) +{ + int ret = 0; + + POWER_SCALE_MASK(enable_turbo, core_mask, ret); + return ret; +} + +int +power_manager_disable_turbo_mask(uint64_t core_mask) +{ + int ret = 0; + + POWER_SCALE_MASK(disable_turbo, core_mask, ret); + return ret; +} + +int +power_manager_scale_core_up(unsigned core_num) +{ + int ret = 0; + + POWER_SCALE_CORE(up, core_num, ret); + return ret; +} + +int +power_manager_scale_core_down(unsigned core_num) +{ + int ret = 0; + + POWER_SCALE_CORE(down, core_num, ret); + return ret; +} + +int +power_manager_scale_core_min(unsigned core_num) +{ + int ret = 0; + + POWER_SCALE_CORE(min, core_num, ret); + return ret; +} + +int +power_manager_scale_core_max(unsigned core_num) +{ + int ret = 0; + + POWER_SCALE_CORE(max, core_num, ret); + return ret; +} + +int +power_manager_enable_turbo_core(unsigned int core_num) +{ + int ret = 0; + + POWER_SCALE_CORE(enable_turbo, core_num, ret); + return ret; +} + +int +power_manager_disable_turbo_core(unsigned int core_num) +{ + int ret = 0; + + POWER_SCALE_CORE(disable_turbo, core_num, ret); + return ret; +} + +int +power_manager_scale_core_med(unsigned int core_num) +{ + int ret = 0; + struct core_info *ci; + + ci = get_core_info(); + if (core_num >= POWER_MGR_MAX_CPUS) + return -1; + if (!(ci->cd[core_num].global_enabled_cpus)) + return -1; + rte_spinlock_lock(&global_core_freq_info[core_num].power_sl); + ret = rte_power_set_freq(core_num, + global_core_freq_info[core_num].num_freqs / 2); + rte_spinlock_unlock(&global_core_freq_info[core_num].power_sl); + return ret; +} diff --git a/src/spdk/dpdk/examples/vm_power_manager/power_manager.h b/src/spdk/dpdk/examples/vm_power_manager/power_manager.h new file mode 100644 index 00000000..605b3c8f --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/power_manager.h @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef POWER_MANAGER_H_ +#define POWER_MANAGER_H_ + +#ifdef __cplusplus +extern "C" { +#endif +struct core_details { + uint64_t last_branches; + uint64_t last_branch_misses; + uint16_t global_enabled_cpus; + uint16_t oob_enabled; + int msr_fd; +}; + +struct core_info { + uint16_t core_count; + struct core_details *cd; + float branch_ratio_threshold; +}; + +#define BRANCH_RATIO_THRESHOLD 0.1 + +struct core_info * +get_core_info(void); + +int +core_info_init(void); + +#define RTE_LOGTYPE_POWER_MANAGER RTE_LOGTYPE_USER1 + +/* Maximum number of CPUS to manage */ +#define POWER_MGR_MAX_CPUS 64 +/** + * Initialize power management. + * Initializes resources and verifies the number of CPUs on the system. + * Wraps librte_power int rte_power_init(unsigned lcore_id); + * + * @return + * - 0 on success. + * - Negative on error. + */ +int power_manager_init(void); + +/** + * Exit power management. Must be called prior to exiting the application. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int power_manager_exit(void); + +/** + * Scale up the frequency of the cores specified in core_mask. + * It is thread-safe. + * + * @param core_mask + * The uint64_t bit-mask of cores to change frequency. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_scale_mask_up(uint64_t core_mask); + +/** + * Scale down the frequency of the cores specified in core_mask. + * It is thread-safe. + * + * @param core_mask + * The uint64_t bit-mask of cores to change frequency. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_scale_mask_down(uint64_t core_mask); + +/** + * Scale to the minimum frequency of the cores specified in core_mask. + * It is thread-safe. + * + * @param core_mask + * The uint64_t bit-mask of cores to change frequency. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_scale_mask_min(uint64_t core_mask); + +/** + * Scale to the maximum frequency of the cores specified in core_mask. + * It is thread-safe. + * + * @param core_mask + * The uint64_t bit-mask of cores to change frequency. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_scale_mask_max(uint64_t core_mask); + +/** + * Enable Turbo Boost on the cores specified in core_mask. + * It is thread-safe. + * + * @param core_mask + * The uint64_t bit-mask of cores to change frequency. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_enable_turbo_mask(uint64_t core_mask); + +/** + * Disable Turbo Boost on the cores specified in core_mask. + * It is thread-safe. + * + * @param core_mask + * The uint64_t bit-mask of cores to change frequency. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_disable_turbo_mask(uint64_t core_mask); + +/** + * Scale up frequency for the core specified by core_num. + * It is thread-safe. + * + * @param core_num + * The core number to change frequency + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_scale_core_up(unsigned core_num); + +/** + * Scale down frequency for the core specified by core_num. + * It is thread-safe. + * + * @param core_num + * The core number to change frequency + * + * @return + * - 1 on success. + * - 0 if frequency not changed. + * - Negative on error. + */ +int power_manager_scale_core_down(unsigned core_num); + +/** + * Scale to minimum frequency for the core specified by core_num. + * It is thread-safe. + * + * @param core_num + * The core number to change frequency + * + * @return + * - 1 on success. + * - 0 if frequency not changed. + * - Negative on error. + */ +int power_manager_scale_core_min(unsigned core_num); + +/** + * Scale to maximum frequency for the core specified by core_num. + * It is thread-safe. + * + * @param core_num + * The core number to change frequency + * + * @return + * - 1 on success. + * - 0 if frequency not changed. + * - Negative on error. + */ +int power_manager_scale_core_max(unsigned core_num); + +/** + * Enable Turbo Boost for the core specified by core_num. + * It is thread-safe. + * + * @param core_num + * The core number to boost + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_enable_turbo_core(unsigned int core_num); + +/** + * Disable Turbo Boost for the core specified by core_num. + * It is thread-safe. + * + * @param core_num + * The core number to boost + * + * @return + * - 1 on success. + * - Negative on error. + */ +int power_manager_disable_turbo_core(unsigned int core_num); + +/** + * Get the current freuency of the core specified by core_num + * + * @param core_num + * The core number to get the current frequency + * + * @return + * - 0 on error + * - >0 for current frequency. + */ +uint32_t power_manager_get_current_frequency(unsigned core_num); + +/** + * Scale to medium frequency for the core specified by core_num. + * It is thread-safe. + * + * @param core_num + * The core number to change frequency + * + * @return + * - 1 on success. + * - 0 if frequency not changed. + * - Negative on error. + */ +int power_manager_scale_core_med(unsigned int core_num); + +#ifdef __cplusplus +} +#endif + + +#endif /* POWER_MANAGER_H_ */ diff --git a/src/spdk/dpdk/examples/vm_power_manager/vm_power_cli.c b/src/spdk/dpdk/examples/vm_power_manager/vm_power_cli.c new file mode 100644 index 00000000..d588d38a --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/vm_power_cli.c @@ -0,0 +1,650 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdio.h> +#include <string.h> +#include <termios.h> +#include <errno.h> + +#include <cmdline_rdline.h> +#include <cmdline_parse.h> +#include <cmdline_parse_string.h> +#include <cmdline_parse_num.h> +#include <cmdline_socket.h> +#include <cmdline.h> + +#include "vm_power_cli.h" +#include "channel_manager.h" +#include "channel_monitor.h" +#include "power_manager.h" +#include "channel_commands.h" + +struct cmd_quit_result { + cmdline_fixed_string_t quit; +}; + +static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, + __attribute__((unused)) void *data) +{ + channel_monitor_exit(); + channel_manager_exit(); + power_manager_exit(); + cmdline_quit(cl); +} + +cmdline_parse_token_string_t cmd_quit_quit = + TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit"); + +cmdline_parse_inst_t cmd_quit = { + .f = cmd_quit_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "close the application", + .tokens = { /* token list, NULL terminated */ + (void *)&cmd_quit_quit, + NULL, + }, +}; + +/* *** VM operations *** */ +struct cmd_show_vm_result { + cmdline_fixed_string_t show_vm; + cmdline_fixed_string_t vm_name; +}; + +static void +cmd_show_vm_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_show_vm_result *res = parsed_result; + struct vm_info info; + unsigned i; + + if (get_info_vm(res->vm_name, &info) != 0) + return; + cmdline_printf(cl, "VM: '%s', status = ", info.name); + if (info.status == CHANNEL_MGR_VM_ACTIVE) + cmdline_printf(cl, "ACTIVE\n"); + else + cmdline_printf(cl, "INACTIVE\n"); + cmdline_printf(cl, "Channels %u\n", info.num_channels); + for (i = 0; i < info.num_channels; i++) { + cmdline_printf(cl, " [%u]: %s, status = ", i, + info.channels[i].channel_path); + switch (info.channels[i].status) { + case CHANNEL_MGR_CHANNEL_CONNECTED: + cmdline_printf(cl, "CONNECTED\n"); + break; + case CHANNEL_MGR_CHANNEL_DISCONNECTED: + cmdline_printf(cl, "DISCONNECTED\n"); + break; + case CHANNEL_MGR_CHANNEL_DISABLED: + cmdline_printf(cl, "DISABLED\n"); + break; + case CHANNEL_MGR_CHANNEL_PROCESSING: + cmdline_printf(cl, "PROCESSING\n"); + break; + default: + cmdline_printf(cl, "UNKNOWN\n"); + break; + } + } + cmdline_printf(cl, "Virtual CPU(s): %u\n", info.num_vcpus); + for (i = 0; i < info.num_vcpus; i++) { + cmdline_printf(cl, " [%u]: Physical CPU Mask 0x%"PRIx64"\n", i, + info.pcpu_mask[i]); + } +} + + + +cmdline_parse_token_string_t cmd_vm_show = + TOKEN_STRING_INITIALIZER(struct cmd_show_vm_result, + show_vm, "show_vm"); +cmdline_parse_token_string_t cmd_show_vm_name = + TOKEN_STRING_INITIALIZER(struct cmd_show_vm_result, + vm_name, NULL); + +cmdline_parse_inst_t cmd_show_vm_set = { + .f = cmd_show_vm_parsed, + .data = NULL, + .help_str = "show_vm <vm_name>, prints the information on the " + "specified VM(s), the information lists the number of vCPUS, the " + "pinning to pCPU(s) as a bit mask, along with any communication " + "channels associated with each VM", + .tokens = { + (void *)&cmd_vm_show, + (void *)&cmd_show_vm_name, + NULL, + }, +}; + +/* *** vCPU to pCPU mapping operations *** */ +struct cmd_set_pcpu_mask_result { + cmdline_fixed_string_t set_pcpu_mask; + cmdline_fixed_string_t vm_name; + uint8_t vcpu; + uint64_t core_mask; +}; + +static void +cmd_set_pcpu_mask_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_set_pcpu_mask_result *res = parsed_result; + + if (set_pcpus_mask(res->vm_name, res->vcpu, res->core_mask) == 0) + cmdline_printf(cl, "Pinned vCPU(%"PRId8") to pCPU core " + "mask(0x%"PRIx64")\n", res->vcpu, res->core_mask); + else + cmdline_printf(cl, "Unable to pin vCPU(%"PRId8") to pCPU core " + "mask(0x%"PRIx64")\n", res->vcpu, res->core_mask); +} + +cmdline_parse_token_string_t cmd_set_pcpu_mask = + TOKEN_STRING_INITIALIZER(struct cmd_set_pcpu_mask_result, + set_pcpu_mask, "set_pcpu_mask"); +cmdline_parse_token_string_t cmd_set_pcpu_mask_vm_name = + TOKEN_STRING_INITIALIZER(struct cmd_set_pcpu_mask_result, + vm_name, NULL); +cmdline_parse_token_num_t set_pcpu_mask_vcpu = + TOKEN_NUM_INITIALIZER(struct cmd_set_pcpu_mask_result, + vcpu, UINT8); +cmdline_parse_token_num_t set_pcpu_mask_core_mask = + TOKEN_NUM_INITIALIZER(struct cmd_set_pcpu_mask_result, + core_mask, UINT64); + + +cmdline_parse_inst_t cmd_set_pcpu_mask_set = { + .f = cmd_set_pcpu_mask_parsed, + .data = NULL, + .help_str = "set_pcpu_mask <vm_name> <vcpu> <pcpu>, Set the binding " + "of Virtual CPU on VM to the Physical CPU mask.", + .tokens = { + (void *)&cmd_set_pcpu_mask, + (void *)&cmd_set_pcpu_mask_vm_name, + (void *)&set_pcpu_mask_vcpu, + (void *)&set_pcpu_mask_core_mask, + NULL, + }, +}; + +struct cmd_set_pcpu_result { + cmdline_fixed_string_t set_pcpu; + cmdline_fixed_string_t vm_name; + uint8_t vcpu; + uint8_t core; +}; + +static void +cmd_set_pcpu_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_set_pcpu_result *res = parsed_result; + + if (set_pcpu(res->vm_name, res->vcpu, res->core) == 0) + cmdline_printf(cl, "Pinned vCPU(%"PRId8") to pCPU core " + "%"PRId8")\n", res->vcpu, res->core); + else + cmdline_printf(cl, "Unable to pin vCPU(%"PRId8") to pCPU core " + "%"PRId8")\n", res->vcpu, res->core); +} + +cmdline_parse_token_string_t cmd_set_pcpu = + TOKEN_STRING_INITIALIZER(struct cmd_set_pcpu_result, + set_pcpu, "set_pcpu"); +cmdline_parse_token_string_t cmd_set_pcpu_vm_name = + TOKEN_STRING_INITIALIZER(struct cmd_set_pcpu_result, + vm_name, NULL); +cmdline_parse_token_num_t set_pcpu_vcpu = + TOKEN_NUM_INITIALIZER(struct cmd_set_pcpu_result, + vcpu, UINT8); +cmdline_parse_token_num_t set_pcpu_core = + TOKEN_NUM_INITIALIZER(struct cmd_set_pcpu_result, + core, UINT64); + + +cmdline_parse_inst_t cmd_set_pcpu_set = { + .f = cmd_set_pcpu_parsed, + .data = NULL, + .help_str = "set_pcpu <vm_name> <vcpu> <pcpu>, Set the binding " + "of Virtual CPU on VM to the Physical CPU.", + .tokens = { + (void *)&cmd_set_pcpu, + (void *)&cmd_set_pcpu_vm_name, + (void *)&set_pcpu_vcpu, + (void *)&set_pcpu_core, + NULL, + }, +}; + +struct cmd_vm_op_result { + cmdline_fixed_string_t op_vm; + cmdline_fixed_string_t vm_name; +}; + +static void +cmd_vm_op_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_vm_op_result *res = parsed_result; + + if (!strcmp(res->op_vm, "add_vm")) { + if (add_vm(res->vm_name) < 0) + cmdline_printf(cl, "Unable to add VM '%s'\n", res->vm_name); + } else if (remove_vm(res->vm_name) < 0) + cmdline_printf(cl, "Unable to remove VM '%s'\n", res->vm_name); +} + +cmdline_parse_token_string_t cmd_vm_op = + TOKEN_STRING_INITIALIZER(struct cmd_vm_op_result, + op_vm, "add_vm#rm_vm"); +cmdline_parse_token_string_t cmd_vm_name = + TOKEN_STRING_INITIALIZER(struct cmd_vm_op_result, + vm_name, NULL); + +cmdline_parse_inst_t cmd_vm_op_set = { + .f = cmd_vm_op_parsed, + .data = NULL, + .help_str = "add_vm|rm_vm <name>, add a VM for " + "subsequent operations with the CLI or remove a previously added " + "VM from the VM Power Manager", + .tokens = { + (void *)&cmd_vm_op, + (void *)&cmd_vm_name, + NULL, + }, +}; + +/* *** VM channel operations *** */ +struct cmd_channels_op_result { + cmdline_fixed_string_t op; + cmdline_fixed_string_t vm_name; + cmdline_fixed_string_t channel_list; +}; +static void +cmd_channels_op_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + unsigned num_channels = 0, channel_num, i; + int channels_added; + unsigned channel_list[CHANNEL_CMDS_MAX_VM_CHANNELS]; + char *token, *remaining, *tail_ptr; + struct cmd_channels_op_result *res = parsed_result; + + if (!strcmp(res->channel_list, "all")) { + channels_added = add_all_channels(res->vm_name); + cmdline_printf(cl, "Added %d channels for VM '%s'\n", + channels_added, res->vm_name); + return; + } + + remaining = res->channel_list; + while (1) { + if (remaining == NULL || remaining[0] == '\0') + break; + + token = strsep(&remaining, ","); + if (token == NULL) + break; + errno = 0; + channel_num = (unsigned)strtol(token, &tail_ptr, 10); + if ((errno != 0) || tail_ptr == NULL || (*tail_ptr != '\0')) + break; + + if (channel_num == CHANNEL_CMDS_MAX_VM_CHANNELS) { + cmdline_printf(cl, "Channel number '%u' exceeds the maximum number " + "of allowable channels(%u) for VM '%s'\n", channel_num, + CHANNEL_CMDS_MAX_VM_CHANNELS, res->vm_name); + return; + } + channel_list[num_channels++] = channel_num; + } + for (i = 0; i < num_channels; i++) + cmdline_printf(cl, "[%u]: Adding channel %u\n", i, channel_list[i]); + + channels_added = add_channels(res->vm_name, channel_list, + num_channels); + cmdline_printf(cl, "Enabled %d channels for '%s'\n", channels_added, + res->vm_name); +} + +cmdline_parse_token_string_t cmd_channels_op = + TOKEN_STRING_INITIALIZER(struct cmd_channels_op_result, + op, "add_channels"); +cmdline_parse_token_string_t cmd_channels_vm_name = + TOKEN_STRING_INITIALIZER(struct cmd_channels_op_result, + vm_name, NULL); +cmdline_parse_token_string_t cmd_channels_list = + TOKEN_STRING_INITIALIZER(struct cmd_channels_op_result, + channel_list, NULL); + +cmdline_parse_inst_t cmd_channels_op_set = { + .f = cmd_channels_op_parsed, + .data = NULL, + .help_str = "add_channels <vm_name> <list>|all, add " + "communication channels for the specified VM, the " + "virtio channels must be enabled in the VM " + "configuration(qemu/libvirt) and the associated VM must be active. " + "<list> is a comma-separated list of channel numbers to add, using " + "the keyword 'all' will attempt to add all channels for the VM", + .tokens = { + (void *)&cmd_channels_op, + (void *)&cmd_channels_vm_name, + (void *)&cmd_channels_list, + NULL, + }, +}; + +struct cmd_channels_status_op_result { + cmdline_fixed_string_t op; + cmdline_fixed_string_t vm_name; + cmdline_fixed_string_t channel_list; + cmdline_fixed_string_t status; +}; + +static void +cmd_channels_status_op_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + unsigned num_channels = 0, channel_num; + int changed; + unsigned channel_list[CHANNEL_CMDS_MAX_VM_CHANNELS]; + char *token, *remaining, *tail_ptr; + struct cmd_channels_status_op_result *res = parsed_result; + enum channel_status status; + + if (!strcmp(res->status, "enabled")) + status = CHANNEL_MGR_CHANNEL_CONNECTED; + else + status = CHANNEL_MGR_CHANNEL_DISABLED; + + if (!strcmp(res->channel_list, "all")) { + changed = set_channel_status_all(res->vm_name, status); + cmdline_printf(cl, "Updated status of %d channels " + "for VM '%s'\n", changed, res->vm_name); + return; + } + remaining = res->channel_list; + while (1) { + if (remaining == NULL || remaining[0] == '\0') + break; + token = strsep(&remaining, ","); + if (token == NULL) + break; + errno = 0; + channel_num = (unsigned)strtol(token, &tail_ptr, 10); + if ((errno != 0) || tail_ptr == NULL || (*tail_ptr != '\0')) + break; + + if (channel_num == CHANNEL_CMDS_MAX_VM_CHANNELS) { + cmdline_printf(cl, "%u exceeds the maximum number of allowable " + "channels(%u) for VM '%s'\n", channel_num, + CHANNEL_CMDS_MAX_VM_CHANNELS, res->vm_name); + return; + } + channel_list[num_channels++] = channel_num; + } + changed = set_channel_status(res->vm_name, channel_list, num_channels, + status); + cmdline_printf(cl, "Updated status of %d channels " + "for VM '%s'\n", changed, res->vm_name); +} + +cmdline_parse_token_string_t cmd_channels_status_op = + TOKEN_STRING_INITIALIZER(struct cmd_channels_status_op_result, + op, "set_channel_status"); +cmdline_parse_token_string_t cmd_channels_status_vm_name = + TOKEN_STRING_INITIALIZER(struct cmd_channels_status_op_result, + vm_name, NULL); +cmdline_parse_token_string_t cmd_channels_status_list = + TOKEN_STRING_INITIALIZER(struct cmd_channels_status_op_result, + channel_list, NULL); +cmdline_parse_token_string_t cmd_channels_status = + TOKEN_STRING_INITIALIZER(struct cmd_channels_status_op_result, + status, "enabled#disabled"); + +cmdline_parse_inst_t cmd_channels_status_op_set = { + .f = cmd_channels_status_op_parsed, + .data = NULL, + .help_str = "set_channel_status <vm_name> <list>|all enabled|disabled, " + " enable or disable the communication channels in " + "list(comma-separated) for the specified VM, alternatively " + "list can be replaced with keyword 'all'. " + "Disabled channels will still receive packets on the host, " + "however the commands they specify will be ignored. " + "Set status to 'enabled' to begin processing requests again.", + .tokens = { + (void *)&cmd_channels_status_op, + (void *)&cmd_channels_status_vm_name, + (void *)&cmd_channels_status_list, + (void *)&cmd_channels_status, + NULL, + }, +}; + +/* *** CPU Frequency operations *** */ +struct cmd_show_cpu_freq_mask_result { + cmdline_fixed_string_t show_cpu_freq_mask; + uint64_t core_mask; +}; + +static void +cmd_show_cpu_freq_mask_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_show_cpu_freq_mask_result *res = parsed_result; + unsigned i; + uint64_t mask = res->core_mask; + uint32_t freq; + + for (i = 0; mask; mask &= ~(1ULL << i++)) { + if ((mask >> i) & 1) { + freq = power_manager_get_current_frequency(i); + if (freq > 0) + cmdline_printf(cl, "Core %u: %"PRId32"\n", i, freq); + } + } +} + +cmdline_parse_token_string_t cmd_show_cpu_freq_mask = + TOKEN_STRING_INITIALIZER(struct cmd_show_cpu_freq_mask_result, + show_cpu_freq_mask, "show_cpu_freq_mask"); +cmdline_parse_token_num_t cmd_show_cpu_freq_mask_core_mask = + TOKEN_NUM_INITIALIZER(struct cmd_show_cpu_freq_mask_result, + core_mask, UINT64); + +cmdline_parse_inst_t cmd_show_cpu_freq_mask_set = { + .f = cmd_show_cpu_freq_mask_parsed, + .data = NULL, + .help_str = "show_cpu_freq_mask <mask>, Get the current frequency for each " + "core specified in the mask", + .tokens = { + (void *)&cmd_show_cpu_freq_mask, + (void *)&cmd_show_cpu_freq_mask_core_mask, + NULL, + }, +}; + +struct cmd_set_cpu_freq_mask_result { + cmdline_fixed_string_t set_cpu_freq_mask; + uint64_t core_mask; + cmdline_fixed_string_t cmd; +}; + +static void +cmd_set_cpu_freq_mask_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_set_cpu_freq_mask_result *res = parsed_result; + int ret = -1; + + if (!strcmp(res->cmd , "up")) + ret = power_manager_scale_mask_up(res->core_mask); + else if (!strcmp(res->cmd , "down")) + ret = power_manager_scale_mask_down(res->core_mask); + else if (!strcmp(res->cmd , "min")) + ret = power_manager_scale_mask_min(res->core_mask); + else if (!strcmp(res->cmd , "max")) + ret = power_manager_scale_mask_max(res->core_mask); + else if (!strcmp(res->cmd, "enable_turbo")) + ret = power_manager_enable_turbo_mask(res->core_mask); + else if (!strcmp(res->cmd, "disable_turbo")) + ret = power_manager_disable_turbo_mask(res->core_mask); + if (ret < 0) { + cmdline_printf(cl, "Error scaling core_mask(0x%"PRIx64") '%s' , not " + "all cores specified have been scaled\n", + res->core_mask, res->cmd); + }; +} + +cmdline_parse_token_string_t cmd_set_cpu_freq_mask = + TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_mask_result, + set_cpu_freq_mask, "set_cpu_freq_mask"); +cmdline_parse_token_num_t cmd_set_cpu_freq_mask_core_mask = + TOKEN_NUM_INITIALIZER(struct cmd_set_cpu_freq_mask_result, + core_mask, UINT64); +cmdline_parse_token_string_t cmd_set_cpu_freq_mask_result = + TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_mask_result, + cmd, "up#down#min#max#enable_turbo#disable_turbo"); + +cmdline_parse_inst_t cmd_set_cpu_freq_mask_set = { + .f = cmd_set_cpu_freq_mask_parsed, + .data = NULL, + .help_str = "set_cpu_freq <core_mask> <up|down|min|max|enable_turbo|disable_turbo>, adjust the current " + "frequency for the cores specified in <core_mask>", + .tokens = { + (void *)&cmd_set_cpu_freq_mask, + (void *)&cmd_set_cpu_freq_mask_core_mask, + (void *)&cmd_set_cpu_freq_mask_result, + NULL, + }, +}; + + + +struct cmd_show_cpu_freq_result { + cmdline_fixed_string_t show_cpu_freq; + uint8_t core_num; +}; + +static void +cmd_show_cpu_freq_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_show_cpu_freq_result *res = parsed_result; + uint32_t curr_freq = power_manager_get_current_frequency(res->core_num); + + if (curr_freq == 0) { + cmdline_printf(cl, "Unable to get frequency for core %u\n", + res->core_num); + return; + } + cmdline_printf(cl, "Core %u frequency: %"PRId32"\n", res->core_num, + curr_freq); +} + +cmdline_parse_token_string_t cmd_show_cpu_freq = + TOKEN_STRING_INITIALIZER(struct cmd_show_cpu_freq_result, + show_cpu_freq, "show_cpu_freq"); + +cmdline_parse_token_num_t cmd_show_cpu_freq_core_num = + TOKEN_NUM_INITIALIZER(struct cmd_show_cpu_freq_result, + core_num, UINT8); + +cmdline_parse_inst_t cmd_show_cpu_freq_set = { + .f = cmd_show_cpu_freq_parsed, + .data = NULL, + .help_str = "Get the current frequency for the specified core", + .tokens = { + (void *)&cmd_show_cpu_freq, + (void *)&cmd_show_cpu_freq_core_num, + NULL, + }, +}; + +struct cmd_set_cpu_freq_result { + cmdline_fixed_string_t set_cpu_freq; + uint8_t core_num; + cmdline_fixed_string_t cmd; +}; + +static void +cmd_set_cpu_freq_parsed(void *parsed_result, struct cmdline *cl, + __attribute__((unused)) void *data) +{ + int ret = -1; + struct cmd_set_cpu_freq_result *res = parsed_result; + + if (!strcmp(res->cmd , "up")) + ret = power_manager_scale_core_up(res->core_num); + else if (!strcmp(res->cmd , "down")) + ret = power_manager_scale_core_down(res->core_num); + else if (!strcmp(res->cmd , "min")) + ret = power_manager_scale_core_min(res->core_num); + else if (!strcmp(res->cmd , "max")) + ret = power_manager_scale_core_max(res->core_num); + else if (!strcmp(res->cmd, "enable_turbo")) + ret = power_manager_enable_turbo_core(res->core_num); + else if (!strcmp(res->cmd, "disable_turbo")) + ret = power_manager_disable_turbo_core(res->core_num); + if (ret < 0) { + cmdline_printf(cl, "Error scaling core(%u) '%s'\n", res->core_num, + res->cmd); + } +} + +cmdline_parse_token_string_t cmd_set_cpu_freq = + TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_result, + set_cpu_freq, "set_cpu_freq"); +cmdline_parse_token_num_t cmd_set_cpu_freq_core_num = + TOKEN_NUM_INITIALIZER(struct cmd_set_cpu_freq_result, + core_num, UINT8); +cmdline_parse_token_string_t cmd_set_cpu_freq_cmd_cmd = + TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_result, + cmd, "up#down#min#max#enable_turbo#disable_turbo"); + +cmdline_parse_inst_t cmd_set_cpu_freq_set = { + .f = cmd_set_cpu_freq_parsed, + .data = NULL, + .help_str = "set_cpu_freq <core_num> <up|down|min|max|enable_turbo|disable_turbo>, adjust the current " + "frequency for the specified core", + .tokens = { + (void *)&cmd_set_cpu_freq, + (void *)&cmd_set_cpu_freq_core_num, + (void *)&cmd_set_cpu_freq_cmd_cmd, + NULL, + }, +}; + +cmdline_parse_ctx_t main_ctx[] = { + (cmdline_parse_inst_t *)&cmd_quit, + (cmdline_parse_inst_t *)&cmd_vm_op_set, + (cmdline_parse_inst_t *)&cmd_channels_op_set, + (cmdline_parse_inst_t *)&cmd_channels_status_op_set, + (cmdline_parse_inst_t *)&cmd_show_vm_set, + (cmdline_parse_inst_t *)&cmd_show_cpu_freq_mask_set, + (cmdline_parse_inst_t *)&cmd_set_cpu_freq_mask_set, + (cmdline_parse_inst_t *)&cmd_show_cpu_freq_set, + (cmdline_parse_inst_t *)&cmd_set_cpu_freq_set, + (cmdline_parse_inst_t *)&cmd_set_pcpu_mask_set, + (cmdline_parse_inst_t *)&cmd_set_pcpu_set, + NULL, +}; + +void +run_cli(__attribute__((unused)) void *arg) +{ + struct cmdline *cl; + + cl = cmdline_stdin_new(main_ctx, "vmpower> "); + if (cl == NULL) + return; + + cmdline_interact(cl); + cmdline_stdin_exit(cl); +} diff --git a/src/spdk/dpdk/examples/vm_power_manager/vm_power_cli.h b/src/spdk/dpdk/examples/vm_power_manager/vm_power_cli.h new file mode 100644 index 00000000..075c255e --- /dev/null +++ b/src/spdk/dpdk/examples/vm_power_manager/vm_power_cli.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef VM_POWER_CLI_H_ +#define VM_POWER_CLI_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +void run_cli(__attribute__((unused)) void *arg); + +#ifdef __cplusplus +} +#endif + +#endif /* VM_POWER_CLI_H_ */ |