4693 lines
125 KiB
C
4693 lines
125 KiB
C
// SPDX-License-Identifier: Apache-2.0
|
|
/*
|
|
* XIVE2: eXternal Interrupt Virtualization Engine. POWER10 interrupt
|
|
* controller
|
|
*
|
|
* Copyright (c) 2016-2019, IBM Corporation.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "XIVE: " fmt
|
|
|
|
#include <skiboot.h>
|
|
#include <slw.h>
|
|
#include <xscom.h>
|
|
#include <chip.h>
|
|
#include <io.h>
|
|
#include <xive.h>
|
|
#include <xive2-regs.h>
|
|
#include <xscom-p10-regs.h>
|
|
#include <interrupts.h>
|
|
#include <timebase.h>
|
|
#include <bitmap.h>
|
|
#include <buddy.h>
|
|
#include <phys-map.h>
|
|
#include <p10_stop_api.H>
|
|
|
|
|
|
/* Verbose debug */
|
|
#undef XIVE_VERBOSE_DEBUG
|
|
|
|
/* Extra debug options used in debug builds */
|
|
#ifdef DEBUG
|
|
#define XIVE_CHECK_LOCKS
|
|
#define XIVE_DEBUG_INIT_CACHE_UPDATES
|
|
#define XIVE_EXTRA_CHECK_INIT_CACHE
|
|
#else
|
|
#undef XIVE_CHECK_LOCKS
|
|
#undef XIVE_DEBUG_INIT_CACHE_UPDATES
|
|
#undef XIVE_EXTRA_CHECK_INIT_CACHE
|
|
#endif
|
|
|
|
/*
|
|
* VSDs, blocks, set translation etc...
|
|
*
|
|
* For the following data structures, the XIVE use a mechanism called
|
|
* Virtualization Structure Tables (VST) to manage the memory layout
|
|
* and access: ESBs (Event State Buffers), EAS (Event assignment
|
|
* structures), ENDs (Event Notification Descriptors) and NVT/NVP
|
|
* (Notification Virtual Targets/Processors).
|
|
*
|
|
* These structures divide those tables into 16 "blocks". Each XIVE
|
|
* instance has a definition for all 16 blocks that can either represent
|
|
* an actual table in memory or a remote XIVE MMIO port to access a
|
|
* block that is owned by that remote XIVE.
|
|
*
|
|
* Our SW design will consist of allocating one block per chip (and thus
|
|
* per XIVE instance) for now, thus giving us up to 16 supported chips in
|
|
* the system. We may have to revisit that if we ever support systems with
|
|
* more than 16 chips but that isn't on our radar at the moment or if we
|
|
* want to do like pHyp on some machines and dedicate 2 blocks per chip
|
|
* for some structures.
|
|
*
|
|
* Thus we need to be careful that we never expose to Linux the concept
|
|
* of block and block boundaries, but instead we provide full number ranges
|
|
* so that consecutive blocks can be supported.
|
|
*
|
|
* Similarily, for MMIO access, the BARs support what is called "set
|
|
* translation" which allows the BAR to be devided into a certain
|
|
* number of sets. Each "set" can be routed to a specific block and
|
|
* offset within a block.
|
|
*/
|
|
|
|
#define XIVE_MAX_BLOCKS 16
|
|
#define XIVE_VSD_SIZE 8
|
|
|
|
/*
|
|
* Max number of ESBs. (direct table)
|
|
*
|
|
* The max number of ESBs supported in the P10 MMIO space is 1TB/128K: 8M.
|
|
*
|
|
* 1M is our current top limit of ESB entries and EAS entries
|
|
* pre-allocated per chip. That allocates 256KB per chip for the state
|
|
* bits and 8M per chip for the EAS.
|
|
*/
|
|
|
|
#define XIVE_INT_ORDER 20 /* 1M interrupts */
|
|
#define XIVE_INT_COUNT (1ul << XIVE_INT_ORDER)
|
|
|
|
/*
|
|
* First interrupt number, also the first logical interrupt number
|
|
* allocated by Linux (maximum ISA interrupt number + 1)
|
|
*/
|
|
#define XIVE_INT_FIRST 0x10
|
|
|
|
/* Corresponding direct table sizes */
|
|
#define XIVE_ESB_SIZE (XIVE_INT_COUNT / 4)
|
|
#define XIVE_EAT_SIZE (XIVE_INT_COUNT * 8)
|
|
|
|
/* Use 64K for everything by default */
|
|
#define XIVE_ESB_SHIFT (16 + 1) /* trigger + mgmt pages */
|
|
#define XIVE_ESB_PAGE_SIZE (1ul << XIVE_ESB_SHIFT) /* 2 pages */
|
|
|
|
/*
|
|
* Max number of ENDs. (indirect table)
|
|
*
|
|
* The max number of ENDs supported in the P10 MMIO space is 2TB/128K: 16M.
|
|
* Since one END is 32 bytes, a 64K indirect subpage can hold 2K ENDs.
|
|
* We need 8192 subpages, ie, 64K of memory for the indirect table.
|
|
*/
|
|
#define END_PER_PAGE (PAGE_SIZE / sizeof(struct xive_end))
|
|
|
|
#define XIVE_END_ORDER 23 /* 8M ENDs */
|
|
#define XIVE_END_COUNT (1ul << XIVE_END_ORDER)
|
|
#define XIVE_END_TABLE_SIZE ((XIVE_END_COUNT / END_PER_PAGE) * XIVE_VSD_SIZE)
|
|
|
|
#define XIVE_END_SHIFT (16 + 1) /* ESn + ESe pages */
|
|
|
|
/* One bit per number of priorities configured */
|
|
#define xive_end_bitmap_size(x) (XIVE_END_COUNT >> xive_cfg_vp_prio_shift(x))
|
|
|
|
/* Number of priorities (and thus ENDs) we allocate for each VP */
|
|
#define xive_cfg_vp_prio_shift(x) GETFIELD(CQ_XIVE_CFG_VP_INT_PRIO, (x)->config)
|
|
#define xive_cfg_vp_prio(x) (1 << xive_cfg_vp_prio_shift(x))
|
|
|
|
/* Max priority number */
|
|
#define xive_max_prio(x) (xive_cfg_vp_prio(x) - 1)
|
|
|
|
/* Priority used for gather/silent escalation (KVM) */
|
|
#define xive_escalation_prio(x) xive_max_prio(x)
|
|
|
|
/*
|
|
* Max number of VPs. (indirect table)
|
|
*
|
|
* The max number of NVPs we support in our MMIO space is 1TB/128K: 8M.
|
|
* Since one NVP is 32 bytes, a 64K indirect subpage can hold 2K NVPs.
|
|
* We need 4096 pointers, ie, 32K of memory for the indirect table.
|
|
*
|
|
* However, we use 8 priorities (by default) per NVP and the number of
|
|
* ENDs is configured to 8M. Therefore, our VP space is limited to 1M.
|
|
*/
|
|
#define VP_PER_PAGE (PAGE_SIZE / sizeof(struct xive_nvp))
|
|
|
|
#define XIVE_VP_ORDER(x) (XIVE_END_ORDER - xive_cfg_vp_prio_shift(x))
|
|
#define XIVE_VP_COUNT(x) (1ul << XIVE_VP_ORDER(x))
|
|
#define XIVE_VP_TABLE_SIZE(x) ((XIVE_VP_COUNT(x) / VP_PER_PAGE) * XIVE_VSD_SIZE)
|
|
|
|
#define XIVE_NVP_SHIFT 17 /* NVPG BAR: two pages, even NVP, odd NVG */
|
|
|
|
/* VP Space maximums in Gen1 and Gen2 modes */
|
|
#define VP_SHIFT_GEN1 19 /* in sync with END_W6_VP_OFFSET_GEN1 */
|
|
#define VP_SHIFT_GEN2 24 /* in sync with END_W6_VP_OFFSET */
|
|
|
|
/*
|
|
* VP ids for HW threads.
|
|
*
|
|
* Depends on the thread id bits configuration of the IC. 8bit is the
|
|
* default for P10 and 7bit for p9.
|
|
*
|
|
* These values are global because they should be common to all chips
|
|
*/
|
|
static uint32_t xive_threadid_shift;
|
|
static uint32_t xive_hw_vp_base;
|
|
static uint32_t xive_hw_vp_count;
|
|
|
|
/*
|
|
* The XIVE operation mode indicates the active "API" and corresponds
|
|
* to the "version/mode" parameter of the opal_xive_reset() call
|
|
*/
|
|
static enum {
|
|
/* No XICS emulation */
|
|
XIVE_MODE_EXPL = OPAL_XIVE_MODE_EXPL, /* default */
|
|
XIVE_MODE_NONE,
|
|
} xive_mode = XIVE_MODE_NONE;
|
|
|
|
/*
|
|
* The XIVE exploitation mode options indicates the active features and
|
|
* is part of the mode parameter of the opal_xive_reset() call
|
|
*/
|
|
static uint64_t xive_expl_options;
|
|
|
|
#define XIVE_EXPL_ALL_OPTIONS 0
|
|
|
|
/*
|
|
* Each source controller has one of these. There's one embedded in
|
|
* the XIVE struct for IPIs
|
|
*/
|
|
struct xive_src {
|
|
struct irq_source is;
|
|
const struct irq_source_ops *orig_ops;
|
|
struct xive *xive;
|
|
void *esb_mmio;
|
|
uint32_t esb_base;
|
|
uint32_t esb_shift;
|
|
uint32_t flags;
|
|
};
|
|
|
|
struct xive_cpu_state {
|
|
struct xive *xive;
|
|
void *tm_ring1;
|
|
|
|
/* Base HW VP and associated queues */
|
|
uint32_t vp_blk;
|
|
uint32_t vp_idx;
|
|
uint32_t end_blk;
|
|
uint32_t end_idx; /* Base end index of a block of 8 */
|
|
|
|
struct lock lock;
|
|
};
|
|
|
|
enum xive_generation {
|
|
XIVE_GEN1 = 1, /* P9 compat mode */
|
|
XIVE_GEN2 = 2, /* P10 default */
|
|
};
|
|
|
|
enum xive_quirks {
|
|
/* HW527671 - 8bits Hardwired Thread Id range not implemented */
|
|
XIVE_QUIRK_THREADID_7BITS = 0x00000001,
|
|
/* HW542974 - interrupt command priority checker not working properly */
|
|
XIVE_QUIRK_BROKEN_PRIO_CHECK = 0x00000002,
|
|
};
|
|
|
|
struct xive {
|
|
uint32_t chip_id;
|
|
uint32_t block_id;
|
|
struct dt_node *x_node;
|
|
|
|
enum xive_generation generation;
|
|
uint64_t capabilities;
|
|
uint64_t config;
|
|
|
|
uint64_t xscom_base;
|
|
|
|
/* MMIO regions */
|
|
void *ic_base;
|
|
uint64_t ic_size;
|
|
uint32_t ic_shift;
|
|
void *ic_tm_direct_base;
|
|
|
|
void *tm_base;
|
|
uint64_t tm_size;
|
|
uint32_t tm_shift;
|
|
void *nvp_base;
|
|
uint64_t nvp_size;
|
|
void *esb_base;
|
|
uint64_t esb_size;
|
|
void *end_base;
|
|
uint64_t end_size;
|
|
|
|
/* Set on XSCOM register access error */
|
|
bool last_reg_error;
|
|
|
|
/* Per-XIVE mutex */
|
|
struct lock lock;
|
|
|
|
/* Pre-allocated tables.
|
|
*
|
|
* We setup all the VDS for actual tables (ie, by opposition to
|
|
* forwarding ports) as either direct pre-allocated or indirect
|
|
* and partially populated.
|
|
*
|
|
* Currently, the ESB and the EAS tables are direct and fully
|
|
* pre-allocated based on XIVE_INT_COUNT.
|
|
*
|
|
* The other tables are indirect, we thus pre-allocate the indirect
|
|
* table (ie, pages of pointers) and populate enough of the pages
|
|
* for our basic setup using 64K subpages.
|
|
*
|
|
* The size of the indirect tables are driven by XIVE_VP_COUNT
|
|
* and XIVE_END_COUNT. The number of pre-allocated ones are
|
|
* driven by xive_hw_vp_count for the HW threads. The number
|
|
* of END depends on number of VP.
|
|
*/
|
|
|
|
/* Direct SBE and EAT tables */
|
|
void *sbe_base;
|
|
void *eat_base;
|
|
|
|
/* Indirect END table. NULL entries are unallocated, count is
|
|
* the numbre of pointers (ie, sub page placeholders).
|
|
*/
|
|
beint64_t *end_ind_base;
|
|
uint32_t end_ind_count;
|
|
uint64_t end_ind_size;
|
|
|
|
/* END allocation bitmap. Each bit represent #priority ENDs */
|
|
bitmap_t *end_map;
|
|
|
|
/* Indirect NVT/VP table. NULL entries are unallocated, count is
|
|
* the numbre of pointers (ie, sub page placeholders).
|
|
*/
|
|
beint64_t *vp_ind_base;
|
|
uint32_t vp_ind_count;
|
|
uint64_t vp_ind_size;
|
|
|
|
/* VP space size. Depends on Gen1/2 mode */
|
|
uint32_t vp_shift;
|
|
|
|
/* Pool of donated pages for provisioning indirect END and VP pages */
|
|
struct list_head donated_pages;
|
|
|
|
/* To ease a possible change to supporting more than one block of
|
|
* interrupts per chip, we store here the "base" global number
|
|
* and max number of interrupts for this chip. The global number
|
|
* encompass the block number and index.
|
|
*/
|
|
uint32_t int_base;
|
|
uint32_t int_count;
|
|
|
|
/* Due to the overlap between IPIs and HW sources in the EAS table,
|
|
* we keep some kind of top-down allocator. It is used for HW sources
|
|
* to "allocate" interrupt entries and will limit what can be handed
|
|
* out as IPIs. Of course this assumes we "allocate" all HW sources
|
|
* before we start handing out IPIs.
|
|
*
|
|
* Note: The numbers here are global interrupt numbers so that we can
|
|
* potentially handle more than one block per chip in the future.
|
|
*/
|
|
uint32_t int_hw_bot; /* Bottom of HW allocation */
|
|
uint32_t int_ipi_top; /* Highest IPI handed out so far + 1 */
|
|
|
|
/* The IPI allocation bitmap */
|
|
bitmap_t *ipi_alloc_map;
|
|
|
|
/* We keep track of which interrupts were ever enabled to
|
|
* speed up xive_reset
|
|
*/
|
|
bitmap_t *int_enabled_map;
|
|
|
|
/* Embedded source IPIs */
|
|
struct xive_src ipis;
|
|
|
|
/* Embedded escalation interrupts */
|
|
struct xive_src esc_irqs;
|
|
|
|
/* In memory queue overflow */
|
|
void *q_ovf;
|
|
|
|
/* Cache/sync injection */
|
|
uint64_t sync_inject_size;
|
|
void *sync_inject;
|
|
|
|
/* INT HW Errata */
|
|
uint64_t quirks;
|
|
};
|
|
|
|
/* First XIVE unit configured on the system */
|
|
static struct xive *one_xive;
|
|
|
|
/* Global DT node */
|
|
static struct dt_node *xive_dt_node;
|
|
|
|
/* Block <-> Chip conversions.
|
|
*
|
|
* As chipIDs may not be within the range of 16 block IDs supported by XIVE,
|
|
* we have a 2 way conversion scheme.
|
|
*
|
|
* From block to chip, use the global table below.
|
|
*
|
|
* From chip to block, a field in struct proc_chip contains the first block
|
|
* of that chip. For now we only support one block per chip but that might
|
|
* change in the future
|
|
*/
|
|
#define XIVE_INVALID_CHIP 0xffffffff
|
|
#define XIVE_MAX_CHIPS 16
|
|
static uint32_t xive_block_to_chip[XIVE_MAX_CHIPS];
|
|
static uint32_t xive_block_count;
|
|
|
|
static uint32_t xive_chip_to_block(uint32_t chip_id)
|
|
{
|
|
struct proc_chip *c = get_chip(chip_id);
|
|
|
|
assert(c);
|
|
assert(c->xive);
|
|
return c->xive->block_id;
|
|
}
|
|
|
|
/*
|
|
* Conversion between GIRQ and block/index.
|
|
*
|
|
* ------------------------------------
|
|
* |000E|BLOC| INDEX|
|
|
* ------------------------------------
|
|
* 4 4 24
|
|
*
|
|
* the E bit indicates that this is an escalation interrupt, in
|
|
* that case, the BLOC/INDEX represents the END containing the
|
|
* corresponding escalation descriptor.
|
|
*
|
|
* Global interrupt numbers for non-escalation interrupts are thus
|
|
* limited to 28 bits.
|
|
*/
|
|
|
|
#define INT_SHIFT 24
|
|
#define INT_ESC_SHIFT (INT_SHIFT + 4) /* 4bits block id */
|
|
|
|
#if XIVE_INT_ORDER > INT_SHIFT
|
|
#error "Too many ESBs for IRQ encoding"
|
|
#endif
|
|
|
|
#if XIVE_END_ORDER > INT_SHIFT
|
|
#error "Too many ENDs for escalation IRQ number encoding"
|
|
#endif
|
|
|
|
#define GIRQ_TO_BLK(__g) (((__g) >> INT_SHIFT) & 0xf)
|
|
#define GIRQ_TO_IDX(__g) ((__g) & ((1 << INT_SHIFT) - 1))
|
|
#define BLKIDX_TO_GIRQ(__b,__i) (((uint32_t)(__b)) << INT_SHIFT | (__i))
|
|
|
|
#define GIRQ_IS_ESCALATION(__g) ((__g) & (1 << INT_ESC_SHIFT))
|
|
#define MAKE_ESCALATION_GIRQ(__b,__i)(BLKIDX_TO_GIRQ(__b,__i) | (1 << INT_ESC_SHIFT))
|
|
|
|
|
|
/* Block/IRQ to chip# conversions */
|
|
#define PC_BLK_TO_CHIP(__b) (xive_block_to_chip[__b])
|
|
#define VC_BLK_TO_CHIP(__b) (xive_block_to_chip[__b])
|
|
#define GIRQ_TO_CHIP(__isn) (VC_BLK_TO_CHIP(GIRQ_TO_BLK(__isn)))
|
|
|
|
/* Routing of physical processors to VPs */
|
|
#define PIR2VP_IDX( __pir) (xive_hw_vp_base | P10_PIR2LOCALCPU(__pir))
|
|
#define PIR2VP_BLK(__pir) (xive_chip_to_block(P10_PIR2GCID(__pir)))
|
|
#define VP2PIR(__blk, __idx) (P10_PIRFROMLOCALCPU(VC_BLK_TO_CHIP(__blk), (__idx) & 0xff))
|
|
|
|
/* Decoding of OPAL API VP IDs. The VP IDs are encoded as follow
|
|
*
|
|
* Block group mode:
|
|
*
|
|
* -----------------------------------
|
|
* |GVEOOOOO| INDEX|
|
|
* -----------------------------------
|
|
* || |
|
|
* || Order
|
|
* |Virtual
|
|
* Group
|
|
*
|
|
* G (Group) : Set to 1 for a group VP (not currently supported)
|
|
* V (Virtual) : Set to 1 for an allocated VP (vs. a physical processor ID)
|
|
* E (Error) : Should never be 1, used internally for errors
|
|
* O (Order) : Allocation order of the VP block
|
|
*
|
|
* The conversion is thus done as follow (groups aren't implemented yet)
|
|
*
|
|
* If V=0, O must be 0 and 24-bit INDEX value is the PIR
|
|
* If V=1, the order O group is allocated such that if N is the number of
|
|
* chip bits considered for allocation (*)
|
|
* then the INDEX is constructed as follow (bit numbers such as 0=LSB)
|
|
* - bottom O-N bits is the index within the "VP block"
|
|
* - next N bits is the XIVE blockID of the VP
|
|
* - the remaining bits is the per-chip "base"
|
|
* so the conversion consists of "extracting" the block ID and moving
|
|
* down the upper bits by N bits.
|
|
*
|
|
* In non-block-group mode, the difference is that the blockID is
|
|
* on the left of the index (the entire VP block is in a single
|
|
* block ID)
|
|
*/
|
|
|
|
#define VP_GROUP_SHIFT 31
|
|
#define VP_VIRTUAL_SHIFT 30
|
|
#define VP_ERROR_SHIFT 29
|
|
#define VP_ORDER_SHIFT 24
|
|
|
|
#define vp_group(vp) (((vp) >> VP_GROUP_SHIFT) & 1)
|
|
#define vp_virtual(vp) (((vp) >> VP_VIRTUAL_SHIFT) & 1)
|
|
#define vp_order(vp) (((vp) >> VP_ORDER_SHIFT) & 0x1f)
|
|
#define vp_index(vp) ((vp) & ((1 << VP_ORDER_SHIFT) - 1))
|
|
|
|
/* VP allocation */
|
|
static uint32_t xive_chips_alloc_bits = 0;
|
|
static struct buddy *xive_vp_buddy;
|
|
static struct lock xive_buddy_lock = LOCK_UNLOCKED;
|
|
|
|
/* VP# decoding/encoding */
|
|
static bool xive_decode_vp(uint32_t vp, uint32_t *blk, uint32_t *idx,
|
|
uint8_t *order, bool *group)
|
|
{
|
|
uint32_t o = vp_order(vp);
|
|
uint32_t n = xive_chips_alloc_bits;
|
|
uint32_t index = vp_index(vp);
|
|
uint32_t imask = (1 << (o - n)) - 1;
|
|
|
|
/* Groups not supported yet */
|
|
if (vp_group(vp))
|
|
return false;
|
|
if (group)
|
|
*group = false;
|
|
|
|
/* PIR case */
|
|
if (!vp_virtual(vp)) {
|
|
if (find_cpu_by_pir(index) == NULL)
|
|
return false;
|
|
if (blk)
|
|
*blk = PIR2VP_BLK(index);
|
|
if (idx)
|
|
*idx = PIR2VP_IDX(index);
|
|
return true;
|
|
}
|
|
|
|
/* Ensure o > n, we have *at least* 2 VPs per block */
|
|
if (o <= n)
|
|
return false;
|
|
|
|
/* Combine the index base and index */
|
|
if (idx)
|
|
*idx = ((index >> n) & ~imask) | (index & imask);
|
|
/* Extract block ID */
|
|
if (blk)
|
|
*blk = (index >> (o - n)) & ((1 << n) - 1);
|
|
|
|
/* Return order as well if asked for */
|
|
if (order)
|
|
*order = o;
|
|
|
|
return true;
|
|
}
|
|
|
|
static uint32_t xive_encode_vp(uint32_t blk, uint32_t idx, uint32_t order)
|
|
{
|
|
uint32_t vp = (1 << VP_VIRTUAL_SHIFT) | (order << VP_ORDER_SHIFT);
|
|
uint32_t n = xive_chips_alloc_bits;
|
|
uint32_t imask = (1 << (order - n)) - 1;
|
|
|
|
vp |= (idx & ~imask) << n;
|
|
vp |= blk << (order - n);
|
|
vp |= idx & imask;
|
|
return vp;
|
|
}
|
|
|
|
/*
|
|
* XSCOM/MMIO helpers
|
|
*/
|
|
#define XIVE_NO_MMIO -1
|
|
|
|
#define xive_regw(__x, __r, __v) \
|
|
__xive_regw(__x, __r, X_##__r, __v, #__r)
|
|
#define xive_regr(__x, __r) \
|
|
__xive_regr(__x, __r, X_##__r, #__r)
|
|
#define xive_regwx(__x, __r, __v) \
|
|
__xive_regw(__x, XIVE_NO_MMIO, X_##__r, __v, #__r)
|
|
#define xive_regrx(__x, __r) \
|
|
__xive_regr(__x, XIVE_NO_MMIO, X_##__r, #__r)
|
|
|
|
#ifdef XIVE_VERBOSE_DEBUG
|
|
#define xive_vdbg(__x,__fmt,...) prlog(PR_DEBUG,"[ IC %02x ] " __fmt, (__x)->chip_id, ##__VA_ARGS__)
|
|
#define xive_cpu_vdbg(__c,__fmt,...) prlog(PR_DEBUG,"[CPU %04x] " __fmt, (__c)->pir, ##__VA_ARGS__)
|
|
#else
|
|
#define xive_vdbg(x,fmt,...) do { } while(0)
|
|
#define xive_cpu_vdbg(x,fmt,...) do { } while(0)
|
|
#endif
|
|
|
|
#define xive_dbg(__x,__fmt,...) prlog(PR_DEBUG,"[ IC %02x ] " __fmt, (__x)->chip_id, ##__VA_ARGS__)
|
|
#define xive_cpu_dbg(__c,__fmt,...) prlog(PR_DEBUG,"[CPU %04x] " __fmt, (__c)->pir, ##__VA_ARGS__)
|
|
#define xive_notice(__x,__fmt,...) prlog(PR_NOTICE,"[ IC %02x ] " __fmt, (__x)->chip_id, ##__VA_ARGS__)
|
|
#define xive_cpu_notice(__c,__fmt,...) prlog(PR_NOTICE,"[CPU %04x] " __fmt, (__c)->pir, ##__VA_ARGS__)
|
|
#define xive_warn(__x,__fmt,...) prlog(PR_WARNING,"[ IC %02x ] " __fmt, (__x)->chip_id, ##__VA_ARGS__)
|
|
#define xive_cpu_warn(__c,__fmt,...) prlog(PR_WARNING,"[CPU %04x] " __fmt, (__c)->pir, ##__VA_ARGS__)
|
|
#define xive_err(__x,__fmt,...) prlog(PR_ERR,"[ IC %02x ] " __fmt, (__x)->chip_id, ##__VA_ARGS__)
|
|
#define xive_cpu_err(__c,__fmt,...) prlog(PR_ERR,"[CPU %04x] " __fmt, (__c)->pir, ##__VA_ARGS__)
|
|
|
|
/*
|
|
* The XIVE subengine being accessed can be deduced from the XSCOM
|
|
* reg, and from there, the page offset in the IC BAR.
|
|
*/
|
|
static void* xive_ic_page(struct xive *x, uint32_t x_reg)
|
|
{
|
|
uint64_t pgoff = (x_reg >> 8) & 0x3;
|
|
|
|
return x->ic_base + (pgoff << x->ic_shift);
|
|
}
|
|
|
|
static void __xive_regw(struct xive *x, uint32_t m_reg, uint32_t x_reg, uint64_t v,
|
|
const char *rname)
|
|
{
|
|
bool use_xscom = (m_reg == XIVE_NO_MMIO) || !x->ic_base;
|
|
int64_t rc;
|
|
|
|
x->last_reg_error = false;
|
|
|
|
assert(x_reg != 0);
|
|
|
|
if (use_xscom) {
|
|
rc = xscom_write(x->chip_id, x->xscom_base + x_reg, v);
|
|
if (rc) {
|
|
if (!rname)
|
|
rname = "???";
|
|
xive_err(x, "Error writing register %s\n", rname);
|
|
/* Anything else we can do here ? */
|
|
x->last_reg_error = true;
|
|
}
|
|
} else {
|
|
out_be64(xive_ic_page(x, x_reg) + m_reg, v);
|
|
}
|
|
}
|
|
|
|
static uint64_t __xive_regr(struct xive *x, uint32_t m_reg, uint32_t x_reg,
|
|
const char *rname)
|
|
{
|
|
bool use_xscom = (m_reg == XIVE_NO_MMIO) || !x->ic_base;
|
|
int64_t rc;
|
|
uint64_t val;
|
|
|
|
x->last_reg_error = false;
|
|
|
|
assert(x_reg != 0);
|
|
|
|
if (use_xscom) {
|
|
rc = xscom_read(x->chip_id, x->xscom_base + x_reg, &val);
|
|
if (rc) {
|
|
if (!rname)
|
|
rname = "???";
|
|
xive_err(x, "Error reading register %s\n", rname);
|
|
/* Anything else we can do here ? */
|
|
x->last_reg_error = true;
|
|
return -1ull;
|
|
}
|
|
} else {
|
|
val = in_be64(xive_ic_page(x, x_reg) + m_reg);
|
|
}
|
|
return val;
|
|
}
|
|
|
|
/* Locate a controller from an IRQ number */
|
|
static struct xive *xive_from_isn(uint32_t isn)
|
|
{
|
|
uint32_t chip_id = GIRQ_TO_CHIP(isn);
|
|
struct proc_chip *c = get_chip(chip_id);
|
|
|
|
if (!c)
|
|
return NULL;
|
|
return c->xive;
|
|
}
|
|
|
|
static struct xive *xive_from_pc_blk(uint32_t blk)
|
|
{
|
|
uint32_t chip_id = PC_BLK_TO_CHIP(blk);
|
|
struct proc_chip *c = get_chip(chip_id);
|
|
|
|
if (!c)
|
|
return NULL;
|
|
return c->xive;
|
|
}
|
|
|
|
static struct xive *xive_from_vc_blk(uint32_t blk)
|
|
{
|
|
uint32_t chip_id = VC_BLK_TO_CHIP(blk);
|
|
struct proc_chip *c = get_chip(chip_id);
|
|
|
|
if (!c)
|
|
return NULL;
|
|
return c->xive;
|
|
}
|
|
|
|
static struct xive_end *xive_get_end(struct xive *x, unsigned int idx)
|
|
{
|
|
struct xive_end *p;
|
|
|
|
if (idx >= (x->end_ind_count * END_PER_PAGE))
|
|
return NULL;
|
|
p = (struct xive_end *)(be64_to_cpu(x->end_ind_base[idx / END_PER_PAGE]) &
|
|
VSD_ADDRESS_MASK);
|
|
if (!p)
|
|
return NULL;
|
|
|
|
return &p[idx % END_PER_PAGE];
|
|
}
|
|
|
|
static struct xive_eas *xive_get_eas(struct xive *x, unsigned int isn)
|
|
{
|
|
struct xive_eas *eat;
|
|
uint32_t idx = GIRQ_TO_IDX(isn);
|
|
|
|
if (GIRQ_IS_ESCALATION(isn)) {
|
|
/* Allright, an escalation EAS is buried inside an END, let's
|
|
* try to find it
|
|
*/
|
|
struct xive_end *end;
|
|
|
|
if (x->chip_id != VC_BLK_TO_CHIP(GIRQ_TO_BLK(isn))) {
|
|
xive_err(x, "%s, ESC ISN 0x%x not on right chip\n",
|
|
__func__, isn);
|
|
return NULL;
|
|
}
|
|
end = xive_get_end(x, idx);
|
|
if (!end) {
|
|
xive_err(x, "%s, ESC ISN 0x%x END not found\n",
|
|
__func__, isn);
|
|
return NULL;
|
|
}
|
|
|
|
/* If using single-escalation, don't let anybody get
|
|
* to the individual escalation interrupts
|
|
*/
|
|
if (xive_get_field32(END_W0_UNCOND_ESCALATE, end->w0))
|
|
return NULL;
|
|
|
|
/* Grab the escalation END */
|
|
return (struct xive_eas *)(char *)&end->w4;
|
|
} else {
|
|
/* Check the block matches */
|
|
if (isn < x->int_base || isn >= x->int_count) {
|
|
xive_err(x, "%s, ISN 0x%x not on right chip\n",
|
|
__func__, isn);
|
|
return NULL;
|
|
}
|
|
assert (idx < XIVE_INT_COUNT);
|
|
|
|
/* If we support >1 block per chip, this should still
|
|
* work as we are likely to make the table contiguous
|
|
* anyway
|
|
*/
|
|
eat = x->eat_base;
|
|
assert(eat);
|
|
|
|
return eat + idx;
|
|
}
|
|
}
|
|
|
|
static struct xive_nvp *xive_get_vp(struct xive *x, unsigned int idx)
|
|
{
|
|
struct xive_nvp *p;
|
|
|
|
assert(idx < (x->vp_ind_count * VP_PER_PAGE));
|
|
p = (struct xive_nvp *)(be64_to_cpu(x->vp_ind_base[idx / VP_PER_PAGE]) &
|
|
VSD_ADDRESS_MASK);
|
|
if (!p)
|
|
return NULL;
|
|
|
|
return &p[idx % VP_PER_PAGE];
|
|
}
|
|
|
|
/*
|
|
* Store the END base of the VP in W5, using the new architected field
|
|
* in P10. Used to be the pressure relief interrupt field on P9.
|
|
*/
|
|
static void xive_vp_set_end_base(struct xive_nvp *vp,
|
|
uint32_t end_blk, uint32_t end_idx)
|
|
{
|
|
vp->w5 = xive_set_field32(NVP_W5_VP_END_BLOCK, 0, end_blk) |
|
|
xive_set_field32(NVP_W5_VP_END_INDEX, 0, end_idx);
|
|
|
|
/* This is the criteria to know if a VP was allocated */
|
|
assert(vp->w5 != 0);
|
|
}
|
|
|
|
static void xive_init_default_vp(struct xive_nvp *vp,
|
|
uint32_t end_blk, uint32_t end_idx)
|
|
{
|
|
memset(vp, 0, sizeof(struct xive_nvp));
|
|
|
|
xive_vp_set_end_base(vp, end_blk, end_idx);
|
|
|
|
vp->w0 = xive_set_field32(NVP_W0_VALID, 0, 1);
|
|
}
|
|
|
|
/*
|
|
* VPs of the HW threads have their own set of ENDs which is allocated
|
|
* when XIVE is initialized. These are tagged with a FIRMWARE bit so
|
|
* that they can be identified when the driver is reset (kexec).
|
|
*/
|
|
static void xive_init_hw_end(struct xive_end *end)
|
|
{
|
|
memset(end, 0, sizeof(struct xive_end));
|
|
end->w0 = xive_set_field32(END_W0_FIRMWARE1, 0, 1);
|
|
}
|
|
|
|
static void *xive_get_donated_page(struct xive *x)
|
|
{
|
|
return (void *)list_pop_(&x->donated_pages, 0);
|
|
}
|
|
|
|
#define XIVE_ALLOC_IS_ERR(_idx) ((_idx) >= 0xfffffff0)
|
|
|
|
#define XIVE_ALLOC_NO_SPACE 0xffffffff /* No possible space */
|
|
#define XIVE_ALLOC_NO_IND 0xfffffffe /* Indirect need provisioning */
|
|
#define XIVE_ALLOC_NO_MEM 0xfffffffd /* Local allocation failed */
|
|
|
|
static uint32_t xive_alloc_end_set(struct xive *x, bool alloc_indirect)
|
|
{
|
|
uint32_t ind_idx;
|
|
int idx;
|
|
int end_base_idx;
|
|
|
|
xive_vdbg(x, "Allocating END set...\n");
|
|
|
|
assert(x->end_map);
|
|
|
|
/* Allocate from the END bitmap. Each bit is 8 ENDs */
|
|
idx = bitmap_find_zero_bit(*x->end_map, 0, xive_end_bitmap_size(x));
|
|
if (idx < 0) {
|
|
xive_dbg(x, "Allocation from END bitmap failed !\n");
|
|
return XIVE_ALLOC_NO_SPACE;
|
|
}
|
|
|
|
end_base_idx = idx << xive_cfg_vp_prio_shift(x);
|
|
|
|
xive_vdbg(x, "Got ENDs 0x%x..0x%x\n", end_base_idx,
|
|
end_base_idx + xive_max_prio(x));
|
|
|
|
/* Calculate the indirect page where the ENDs reside */
|
|
ind_idx = end_base_idx / END_PER_PAGE;
|
|
|
|
/* Is there an indirect page ? If not, check if we can provision it */
|
|
if (!x->end_ind_base[ind_idx]) {
|
|
/* Default flags */
|
|
uint64_t vsd_flags = SETFIELD(VSD_TSIZE, 0ull, 4) |
|
|
SETFIELD(VSD_MODE, 0ull, VSD_MODE_EXCLUSIVE);
|
|
void *page;
|
|
|
|
/* If alloc_indirect is set, allocate the memory from OPAL own,
|
|
* otherwise try to provision from the donated pool
|
|
*/
|
|
if (alloc_indirect) {
|
|
/* Allocate/provision indirect page during boot only */
|
|
xive_vdbg(x, "Indirect empty, provisioning from local pool\n");
|
|
page = local_alloc(x->chip_id, PAGE_SIZE, PAGE_SIZE);
|
|
if (!page) {
|
|
xive_dbg(x, "provisioning failed !\n");
|
|
return XIVE_ALLOC_NO_MEM;
|
|
}
|
|
vsd_flags |= VSD_FIRMWARE;
|
|
} else {
|
|
xive_vdbg(x, "Indirect empty, provisioning from donated pages\n");
|
|
page = xive_get_donated_page(x);
|
|
if (!page) {
|
|
xive_vdbg(x, "no idirect pages available !\n");
|
|
return XIVE_ALLOC_NO_IND;
|
|
}
|
|
}
|
|
memset(page, 0, PAGE_SIZE);
|
|
x->end_ind_base[ind_idx] = cpu_to_be64(vsd_flags |
|
|
(((uint64_t)page) & VSD_ADDRESS_MASK));
|
|
/* Any cache scrub needed ? */
|
|
}
|
|
|
|
bitmap_set_bit(*x->end_map, idx);
|
|
return end_base_idx;
|
|
}
|
|
|
|
static void xive_free_end_set(struct xive *x, uint32_t ends)
|
|
{
|
|
uint32_t idx;
|
|
uint8_t prio_mask = xive_max_prio(x);
|
|
|
|
xive_vdbg(x, "Freeing END 0x%x..0x%x\n", ends, ends + xive_max_prio(x));
|
|
|
|
assert((ends & prio_mask) == 0);
|
|
assert(x->end_map);
|
|
|
|
idx = ends >> xive_cfg_vp_prio_shift(x);
|
|
bitmap_clr_bit(*x->end_map, idx);
|
|
}
|
|
|
|
static bool xive_provision_vp_ind(struct xive *x, uint32_t vp_idx, uint32_t order)
|
|
{
|
|
uint32_t pbase, pend, i;
|
|
|
|
pbase = vp_idx / VP_PER_PAGE;
|
|
pend = (vp_idx + (1 << order)) / VP_PER_PAGE;
|
|
|
|
for (i = pbase; i <= pend; i++) {
|
|
void *page;
|
|
u64 vsd;
|
|
|
|
/* Already provisioned ? */
|
|
if (x->vp_ind_base[i])
|
|
continue;
|
|
|
|
/* Try to grab a donated page */
|
|
page = xive_get_donated_page(x);
|
|
if (!page)
|
|
return false;
|
|
|
|
/* Install the page */
|
|
memset(page, 0, PAGE_SIZE);
|
|
vsd = ((uint64_t)page) & VSD_ADDRESS_MASK;
|
|
vsd |= SETFIELD(VSD_TSIZE, 0ull, 4);
|
|
vsd |= SETFIELD(VSD_MODE, 0ull, VSD_MODE_EXCLUSIVE);
|
|
x->vp_ind_base[i] = cpu_to_be64(vsd);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static void xive_init_vp_allocator(void)
|
|
{
|
|
/* Initialize chip alloc bits */
|
|
xive_chips_alloc_bits = ilog2(xive_block_count);
|
|
|
|
prlog(PR_INFO, "%d chips considered for VP allocations\n",
|
|
1 << xive_chips_alloc_bits);
|
|
|
|
/* Allocate a buddy big enough for XIVE_VP_ORDER allocations.
|
|
*
|
|
* each bit in the buddy represents 1 << xive_chips_alloc_bits
|
|
* VPs.
|
|
*/
|
|
xive_vp_buddy = buddy_create(XIVE_VP_ORDER(one_xive));
|
|
assert(xive_vp_buddy);
|
|
|
|
/*
|
|
* We reserve the whole range of VP ids representing HW threads.
|
|
*/
|
|
assert(buddy_reserve(xive_vp_buddy, xive_hw_vp_base,
|
|
xive_threadid_shift));
|
|
}
|
|
|
|
static uint32_t xive_alloc_vps(uint32_t order)
|
|
{
|
|
uint32_t local_order, i;
|
|
int vp;
|
|
|
|
/* The minimum order is 2 VPs per chip */
|
|
if (order < (xive_chips_alloc_bits + 1))
|
|
order = xive_chips_alloc_bits + 1;
|
|
|
|
/* We split the allocation */
|
|
local_order = order - xive_chips_alloc_bits;
|
|
|
|
/* We grab that in the global buddy */
|
|
assert(xive_vp_buddy);
|
|
lock(&xive_buddy_lock);
|
|
vp = buddy_alloc(xive_vp_buddy, local_order);
|
|
unlock(&xive_buddy_lock);
|
|
if (vp < 0)
|
|
return XIVE_ALLOC_NO_SPACE;
|
|
|
|
/* Provision on every chip considered for allocation */
|
|
for (i = 0; i < (1 << xive_chips_alloc_bits); i++) {
|
|
struct xive *x = xive_from_pc_blk(i);
|
|
bool success;
|
|
|
|
/* Return internal error & log rather than assert ? */
|
|
assert(x);
|
|
lock(&x->lock);
|
|
success = xive_provision_vp_ind(x, vp, local_order);
|
|
unlock(&x->lock);
|
|
if (!success) {
|
|
lock(&xive_buddy_lock);
|
|
buddy_free(xive_vp_buddy, vp, local_order);
|
|
unlock(&xive_buddy_lock);
|
|
return XIVE_ALLOC_NO_IND;
|
|
}
|
|
}
|
|
|
|
/* Encode the VP number. "blk" is 0 as this represents
|
|
* all blocks and the allocation always starts at 0
|
|
*/
|
|
return xive_encode_vp(0, vp, order);
|
|
}
|
|
|
|
static void xive_free_vps(uint32_t vp)
|
|
{
|
|
uint32_t idx;
|
|
uint8_t order, local_order;
|
|
|
|
assert(xive_decode_vp(vp, NULL, &idx, &order, NULL));
|
|
|
|
/* We split the allocation */
|
|
local_order = order - xive_chips_alloc_bits;
|
|
|
|
/* Free that in the buddy */
|
|
lock(&xive_buddy_lock);
|
|
buddy_free(xive_vp_buddy, idx, local_order);
|
|
unlock(&xive_buddy_lock);
|
|
}
|
|
|
|
enum xive_cache_type {
|
|
xive_cache_easc,
|
|
xive_cache_esbc,
|
|
xive_cache_endc,
|
|
xive_cache_nxc,
|
|
};
|
|
|
|
/*
|
|
* Cache update
|
|
*/
|
|
|
|
#define FLUSH_CTRL_POLL_VALID PPC_BIT(0) /* POLL bit is the same for all */
|
|
|
|
static int64_t __xive_cache_scrub(struct xive *x,
|
|
enum xive_cache_type ctype,
|
|
uint64_t block, uint64_t idx,
|
|
bool want_inval __unused, bool want_disable __unused)
|
|
{
|
|
uint64_t ctrl_reg, x_ctrl_reg;
|
|
uint64_t poll_val, ctrl_val;
|
|
|
|
#ifdef XIVE_CHECK_LOCKS
|
|
assert(lock_held_by_me(&x->lock));
|
|
#endif
|
|
switch (ctype) {
|
|
case xive_cache_easc:
|
|
poll_val =
|
|
SETFIELD(VC_EASC_FLUSH_POLL_BLOCK_ID, 0ll, block) |
|
|
SETFIELD(VC_EASC_FLUSH_POLL_OFFSET, 0ll, idx) |
|
|
VC_EASC_FLUSH_POLL_BLOCK_ID_MASK |
|
|
VC_EASC_FLUSH_POLL_OFFSET_MASK;
|
|
xive_regw(x, VC_EASC_FLUSH_POLL, poll_val);
|
|
ctrl_reg = VC_EASC_FLUSH_CTRL;
|
|
x_ctrl_reg = X_VC_EASC_FLUSH_CTRL;
|
|
break;
|
|
case xive_cache_esbc:
|
|
poll_val =
|
|
SETFIELD(VC_ESBC_FLUSH_POLL_BLOCK_ID, 0ll, block) |
|
|
SETFIELD(VC_ESBC_FLUSH_POLL_OFFSET, 0ll, idx) |
|
|
VC_ESBC_FLUSH_POLL_BLOCK_ID_MASK |
|
|
VC_ESBC_FLUSH_POLL_OFFSET_MASK;
|
|
xive_regw(x, VC_ESBC_FLUSH_POLL, poll_val);
|
|
ctrl_reg = VC_ESBC_FLUSH_CTRL;
|
|
x_ctrl_reg = X_VC_ESBC_FLUSH_CTRL;
|
|
break;
|
|
case xive_cache_endc:
|
|
poll_val =
|
|
SETFIELD(VC_ENDC_FLUSH_POLL_BLOCK_ID, 0ll, block) |
|
|
SETFIELD(VC_ENDC_FLUSH_POLL_OFFSET, 0ll, idx) |
|
|
VC_ENDC_FLUSH_POLL_BLOCK_ID_MASK |
|
|
VC_ENDC_FLUSH_POLL_OFFSET_MASK;
|
|
xive_regw(x, VC_ENDC_FLUSH_POLL, poll_val);
|
|
ctrl_reg = VC_ENDC_FLUSH_CTRL;
|
|
x_ctrl_reg = X_VC_ENDC_FLUSH_CTRL;
|
|
break;
|
|
case xive_cache_nxc:
|
|
poll_val =
|
|
SETFIELD(PC_NXC_FLUSH_POLL_BLOCK_ID, 0ll, block) |
|
|
SETFIELD(PC_NXC_FLUSH_POLL_OFFSET, 0ll, idx) |
|
|
PC_NXC_FLUSH_POLL_BLOCK_ID_MASK |
|
|
PC_NXC_FLUSH_POLL_OFFSET_MASK;
|
|
xive_regw(x, PC_NXC_FLUSH_POLL, poll_val);
|
|
ctrl_reg = PC_NXC_FLUSH_CTRL;
|
|
x_ctrl_reg = X_PC_NXC_FLUSH_CTRL;
|
|
break;
|
|
default:
|
|
return OPAL_INTERNAL_ERROR;
|
|
}
|
|
|
|
/* XXX Add timeout !!! */
|
|
for (;;) {
|
|
ctrl_val = __xive_regr(x, ctrl_reg, x_ctrl_reg, NULL);
|
|
if (!(ctrl_val & FLUSH_CTRL_POLL_VALID))
|
|
break;
|
|
/* Small delay */
|
|
time_wait(100);
|
|
}
|
|
sync();
|
|
return 0;
|
|
}
|
|
|
|
static int64_t xive_easc_scrub(struct xive *x, uint64_t block, uint64_t idx)
|
|
{
|
|
return __xive_cache_scrub(x, xive_cache_easc, block, idx, false, false);
|
|
}
|
|
|
|
static int64_t xive_nxc_scrub(struct xive *x, uint64_t block, uint64_t idx)
|
|
{
|
|
return __xive_cache_scrub(x, xive_cache_nxc, block, idx, false, false);
|
|
}
|
|
|
|
static int64_t xive_nxc_scrub_clean(struct xive *x, uint64_t block, uint64_t idx)
|
|
{
|
|
return __xive_cache_scrub(x, xive_cache_nxc, block, idx, true, false);
|
|
}
|
|
|
|
static int64_t xive_endc_scrub(struct xive *x, uint64_t block, uint64_t idx)
|
|
{
|
|
return __xive_cache_scrub(x, xive_cache_endc, block, idx, false, false);
|
|
}
|
|
|
|
#define XIVE_CACHE_WATCH_MAX_RETRIES 10
|
|
|
|
static int64_t __xive_cache_watch(struct xive *x, enum xive_cache_type ctype,
|
|
uint64_t block, uint64_t idx,
|
|
uint32_t start_dword, uint32_t dword_count,
|
|
beint64_t *new_data, bool light_watch,
|
|
bool synchronous)
|
|
{
|
|
uint64_t sreg, sregx, dreg0, dreg0x;
|
|
uint64_t dval0, sval, status;
|
|
int64_t i;
|
|
int retries = 0;
|
|
|
|
#ifdef XIVE_CHECK_LOCKS
|
|
assert(lock_held_by_me(&x->lock));
|
|
#endif
|
|
switch (ctype) {
|
|
case xive_cache_endc:
|
|
sreg = VC_ENDC_WATCH0_SPEC;
|
|
sregx = X_VC_ENDC_WATCH0_SPEC;
|
|
dreg0 = VC_ENDC_WATCH0_DATA0;
|
|
dreg0x = X_VC_ENDC_WATCH0_DATA0;
|
|
sval = SETFIELD(VC_ENDC_WATCH_BLOCK_ID, idx, block);
|
|
break;
|
|
case xive_cache_nxc:
|
|
sreg = PC_NXC_WATCH0_SPEC;
|
|
sregx = X_PC_NXC_WATCH0_SPEC;
|
|
dreg0 = PC_NXC_WATCH0_DATA0;
|
|
dreg0x = X_PC_NXC_WATCH0_DATA0;
|
|
sval = SETFIELD(PC_NXC_WATCH_BLOCK_ID, idx, block);
|
|
break;
|
|
default:
|
|
return OPAL_INTERNAL_ERROR;
|
|
}
|
|
|
|
/* The full bit is in the same position for ENDC and NXC */
|
|
if (!light_watch)
|
|
sval |= VC_ENDC_WATCH_FULL;
|
|
|
|
for (;;) {
|
|
/* Write the cache watch spec */
|
|
__xive_regw(x, sreg, sregx, sval, NULL);
|
|
|
|
/* Load data0 register to populate the watch */
|
|
dval0 = __xive_regr(x, dreg0, dreg0x, NULL);
|
|
|
|
/* If new_data is NULL, this is a dummy watch used as a
|
|
* workaround for a HW bug
|
|
*/
|
|
if (!new_data) {
|
|
__xive_regw(x, dreg0, dreg0x, dval0, NULL);
|
|
return 0;
|
|
}
|
|
|
|
/* Write the words into the watch facility. We write in reverse
|
|
* order in case word 0 is part of it as it must be the last
|
|
* one written.
|
|
*/
|
|
for (i = start_dword + dword_count - 1; i >= start_dword ;i--) {
|
|
uint64_t dw = be64_to_cpu(new_data[i - start_dword]);
|
|
__xive_regw(x, dreg0 + i * 8, dreg0x + i, dw, NULL);
|
|
}
|
|
|
|
/* Write data0 register to trigger the update if word 0 wasn't
|
|
* written above
|
|
*/
|
|
if (start_dword > 0)
|
|
__xive_regw(x, dreg0, dreg0x, dval0, NULL);
|
|
|
|
/* This may not be necessary for light updates (it's possible
|
|
* that a sync in sufficient, TBD). Ensure the above is
|
|
* complete and check the status of the watch.
|
|
*/
|
|
status = __xive_regr(x, sreg, sregx, NULL);
|
|
|
|
/* Bits FULL and CONFLICT are in the same position in
|
|
* ENDC and NXC
|
|
*/
|
|
if (!(status & VC_ENDC_WATCH_FULL) ||
|
|
!(status & VC_ENDC_WATCH_CONFLICT))
|
|
break;
|
|
if (!synchronous)
|
|
return OPAL_BUSY;
|
|
|
|
if (++retries == XIVE_CACHE_WATCH_MAX_RETRIES) {
|
|
xive_err(x, "Reached maximum retries %d when doing "
|
|
"a %s cache update\n", retries,
|
|
ctype == xive_cache_endc ? "ENDC" : "NXC");
|
|
return OPAL_BUSY;
|
|
}
|
|
}
|
|
|
|
/* Perform a scrub with "want_invalidate" set to false to push the
|
|
* cache updates to memory as well
|
|
*/
|
|
return __xive_cache_scrub(x, ctype, block, idx, false, false);
|
|
}
|
|
|
|
#ifdef XIVE_DEBUG_INIT_CACHE_UPDATES
|
|
static bool xive_check_endc_update(struct xive *x, uint32_t idx, struct xive_end *end)
|
|
{
|
|
struct xive_end *end_p = xive_get_end(x, idx);
|
|
struct xive_end end2;
|
|
|
|
assert(end_p);
|
|
end2 = *end_p;
|
|
end2.w0 &= ~END_W0_RESERVED;
|
|
end2.w1 &= ~END_W1_RESERVED;
|
|
end2.w7 &= ~END_W7_F0_RESERVED;
|
|
if (memcmp(end, &end2, sizeof(struct xive_end)) != 0) {
|
|
xive_err(x, "END update mismatch idx %d\n", idx);
|
|
xive_err(x, "want: %08x %08x %08x %08x\n",
|
|
end->w0, end->w1, end->w2, end->w3);
|
|
xive_err(x, " %08x %08x %08x %08x\n",
|
|
end->w4, end->w5, end->w6, end->w7);
|
|
xive_err(x, "got : %08x %08x %08x %08x\n",
|
|
end2.w0, end2.w1, end2.w2, end2.w3);
|
|
xive_err(x, " %08x %08x %08x %08x\n",
|
|
end2.w4, end2.w5, end2.w6, end2.w7);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static bool xive_check_nxc_update(struct xive *x, uint32_t idx, struct xive_nvp *vp)
|
|
{
|
|
struct xive_nvp *vp_p = xive_get_vp(x, idx);
|
|
struct xive_nvp vp2;
|
|
|
|
assert(vp_p);
|
|
vp2 = *vp_p;
|
|
if (memcmp(vp, &vp2, sizeof(struct xive_nvp)) != 0) {
|
|
xive_err(x, "VP update mismatch idx %d\n", idx);
|
|
xive_err(x, "want: %08x %08x %08x %08x\n",
|
|
vp->w0, vp->w1, vp->w2, vp->w3);
|
|
xive_err(x, " %08x %08x %08x %08x\n",
|
|
vp->w4, vp->w5, vp->w6, vp->w7);
|
|
xive_err(x, "got : %08x %08x %08x %08x\n",
|
|
vp2.w0, vp2.w1, vp2.w2, vp2.w3);
|
|
xive_err(x, " %08x %08x %08x %08x\n",
|
|
vp2.w4, vp2.w5, vp2.w6, vp2.w7);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
#else
|
|
static inline bool xive_check_endc_update(struct xive *x __unused,
|
|
uint32_t idx __unused,
|
|
struct xive_end *end __unused)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
static inline bool xive_check_nxc_update(struct xive *x __unused,
|
|
uint32_t idx __unused,
|
|
struct xive_nvp *vp __unused)
|
|
{
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
static int64_t xive_escalation_ive_cache_update(struct xive *x, uint64_t block,
|
|
uint64_t idx, struct xive_eas *eas,
|
|
bool synchronous)
|
|
{
|
|
return __xive_cache_watch(x, xive_cache_endc, block, idx,
|
|
2, 1, &eas->w, true, synchronous);
|
|
}
|
|
|
|
static int64_t xive_endc_cache_update(struct xive *x, uint64_t block,
|
|
uint64_t idx, struct xive_end *end,
|
|
bool synchronous)
|
|
{
|
|
int64_t ret;
|
|
|
|
ret = __xive_cache_watch(x, xive_cache_endc, block, idx,
|
|
0, 4, (beint64_t *)end, false, synchronous);
|
|
xive_check_endc_update(x, idx, end);
|
|
return ret;
|
|
}
|
|
|
|
static int64_t xive_nxc_cache_update(struct xive *x, uint64_t block,
|
|
uint64_t idx, struct xive_nvp *vp,
|
|
bool synchronous)
|
|
{
|
|
int64_t ret;
|
|
|
|
ret = __xive_cache_watch(x, xive_cache_nxc, block, idx,
|
|
0, 4, (beint64_t *)vp, false, synchronous);
|
|
xive_check_nxc_update(x, idx, vp);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* VSD
|
|
*/
|
|
static bool xive_set_vsd(struct xive *x, uint32_t tbl, uint32_t idx, uint64_t v)
|
|
{
|
|
/* Set VC subengine */
|
|
xive_regw(x, VC_VSD_TABLE_ADDR,
|
|
SETFIELD(VC_VSD_TABLE_SELECT, 0ull, tbl) |
|
|
SETFIELD(VC_VSD_TABLE_ADDRESS, 0ull, idx));
|
|
if (x->last_reg_error)
|
|
return false;
|
|
xive_regw(x, VC_VSD_TABLE_DATA, v);
|
|
if (x->last_reg_error)
|
|
return false;
|
|
|
|
/* also set PC subengine if table is used */
|
|
if (tbl == VST_EAS || tbl == VST_ERQ || tbl == VST_IC)
|
|
return true;
|
|
|
|
xive_regw(x, PC_VSD_TABLE_ADDR,
|
|
SETFIELD(PC_VSD_TABLE_SELECT, 0ull, tbl) |
|
|
SETFIELD(PC_VSD_TABLE_ADDRESS, 0ull, idx));
|
|
if (x->last_reg_error)
|
|
return false;
|
|
xive_regw(x, PC_VSD_TABLE_DATA, v);
|
|
if (x->last_reg_error)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
static bool xive_set_local_tables(struct xive *x)
|
|
{
|
|
uint64_t base, i;
|
|
|
|
/* These have to be power of 2 sized */
|
|
assert(is_pow2(XIVE_ESB_SIZE));
|
|
assert(is_pow2(XIVE_EAT_SIZE));
|
|
|
|
/* All tables set as exclusive */
|
|
base = SETFIELD(VSD_MODE, 0ull, VSD_MODE_EXCLUSIVE);
|
|
|
|
/* ESB: direct mode */
|
|
if (!xive_set_vsd(x, VST_ESB, x->block_id, base |
|
|
(((uint64_t)x->sbe_base) & VSD_ADDRESS_MASK) |
|
|
SETFIELD(VSD_TSIZE, 0ull, ilog2(XIVE_ESB_SIZE) - 12)))
|
|
return false;
|
|
|
|
/* EAS: direct mode */
|
|
if (!xive_set_vsd(x, VST_EAS, x->block_id, base |
|
|
(((uint64_t)x->eat_base) & VSD_ADDRESS_MASK) |
|
|
SETFIELD(VSD_TSIZE, 0ull, ilog2(XIVE_EAT_SIZE) - 12)))
|
|
return false;
|
|
|
|
/* END: indirect mode with 64K subpages */
|
|
if (!xive_set_vsd(x, VST_END, x->block_id, base |
|
|
(((uint64_t)x->end_ind_base) & VSD_ADDRESS_MASK) |
|
|
VSD_INDIRECT | SETFIELD(VSD_TSIZE, 0ull,
|
|
ilog2(x->end_ind_size) - 12)))
|
|
return false;
|
|
|
|
/* NVP: indirect mode with 64K subpages */
|
|
if (!xive_set_vsd(x, VST_NVP, x->block_id, base |
|
|
(((uint64_t)x->vp_ind_base) & VSD_ADDRESS_MASK) |
|
|
VSD_INDIRECT | SETFIELD(VSD_TSIZE, 0ull,
|
|
ilog2(x->vp_ind_size) - 12)))
|
|
return false;
|
|
|
|
/* NVG: not used */
|
|
/* NVC: not used */
|
|
|
|
/* INT and SYNC: indexed with the Topology# */
|
|
if (!xive_set_vsd(x, VST_IC, x->chip_id, base |
|
|
(((uint64_t)x->ic_base) & VSD_ADDRESS_MASK) |
|
|
SETFIELD(VSD_TSIZE, 0ull, ilog2(x->ic_size) - 12)))
|
|
return false;
|
|
|
|
if (!xive_set_vsd(x, VST_SYNC, x->chip_id, base |
|
|
(((uint64_t)x->sync_inject) & VSD_ADDRESS_MASK) |
|
|
SETFIELD(VSD_TSIZE, 0ull, ilog2(x->sync_inject_size) - 12)))
|
|
return false;
|
|
|
|
/*
|
|
* ERQ: one 64K page for each queue overflow. Indexed with :
|
|
*
|
|
* 0:IPI, 1:HWD, 2:NxC, 3:INT, 4:OS-Queue, 5:Pool-Queue, 6:Hard-Queue
|
|
*/
|
|
for (i = 0; i < VC_QUEUE_COUNT; i++) {
|
|
u64 addr = ((uint64_t)x->q_ovf) + i * PAGE_SIZE;
|
|
u64 cfg, sreg, sregx;
|
|
|
|
if (!xive_set_vsd(x, VST_ERQ, i, base |
|
|
(addr & VSD_ADDRESS_MASK) |
|
|
SETFIELD(VSD_TSIZE, 0ull, 4)))
|
|
return false;
|
|
|
|
sreg = VC_QUEUES_CFG_REM0 + i * 8;
|
|
sregx = X_VC_QUEUES_CFG_REM0 + i;
|
|
cfg = __xive_regr(x, sreg, sregx, NULL);
|
|
cfg |= VC_QUEUES_CFG_MEMB_EN;
|
|
cfg = SETFIELD(VC_QUEUES_CFG_MEMB_SZ, cfg, 4);
|
|
__xive_regw(x, sreg, sregx, cfg, NULL);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/*
|
|
* IC BAR layout
|
|
*
|
|
* Page 0: Internal CQ register accesses (reads & writes)
|
|
* Page 1: Internal PC register accesses (reads & writes)
|
|
* Page 2: Internal VC register accesses (reads & writes)
|
|
* Page 3: Internal TCTXT (TIMA) reg accesses (read & writes)
|
|
* Page 4: Notify Port page (writes only, w/data),
|
|
* Page 5: Reserved
|
|
* Page 6: Sync Poll page (writes only, dataless)
|
|
* Page 7: Sync Inject page (writes only, dataless)
|
|
* Page 8: LSI Trigger page (writes only, dataless)
|
|
* Page 9: LSI SB Management page (reads & writes dataless)
|
|
* Pages 10-255: Reserved
|
|
* Pages 256-383: Direct mapped Thread Context Area (reads & writes)
|
|
* covering the 128 threads in P10.
|
|
* Pages 384-511: Reserved
|
|
*/
|
|
|
|
#define XIVE_IC_CQ_PGOFF 0
|
|
#define XIVE_IC_PC_PGOFF 1
|
|
#define XIVE_IC_VC_PGOFF 2
|
|
#define XIVE_IC_TCTXT_PGOFF 3
|
|
#define XIVE_NOTIFY_PGOFF 4
|
|
#define XIVE_SYNC_POLL_PGOFF 6
|
|
#define XIVE_SYNC_INJECT_PGOFF 7
|
|
#define XIVE_LSI_TRIGGER_PGOFF 8
|
|
#define XIVE_LSI_MGMT_PGOFF 9
|
|
#define XIVE_IC_TM_DIRECT_PGOFF 256
|
|
|
|
static bool xive_configure_ic_bars(struct xive *x)
|
|
{
|
|
uint64_t chip_id = x->chip_id;
|
|
uint64_t val;
|
|
|
|
/* Reset all bars to zero */
|
|
xive_regwx(x, CQ_RST_CTL, CQ_RST_PB_BAR_RESET);
|
|
|
|
/* IC BAR */
|
|
phys_map_get(chip_id, XIVE_IC, 0, (uint64_t *)&x->ic_base, &x->ic_size);
|
|
val = (uint64_t)x->ic_base | CQ_IC_BAR_VALID | CQ_IC_BAR_64K;
|
|
x->ic_shift = 16;
|
|
|
|
xive_regwx(x, CQ_IC_BAR, val);
|
|
if (x->last_reg_error)
|
|
return false;
|
|
|
|
/*
|
|
* TM BAR, same address for each chip. Hence we create a fake
|
|
* chip 0 and use that for all phys_map_get(XIVE_TM) calls.
|
|
*/
|
|
phys_map_get(0, XIVE_TM, 0, (uint64_t *)&x->tm_base, &x->tm_size);
|
|
val = (uint64_t)x->tm_base | CQ_TM_BAR_VALID | CQ_TM_BAR_64K;
|
|
x->tm_shift = 16;
|
|
|
|
xive_regwx(x, CQ_TM_BAR, val);
|
|
if (x->last_reg_error)
|
|
return false;
|
|
|
|
/* IC BAR sub-pages shortcuts */
|
|
x->ic_tm_direct_base = x->ic_base +
|
|
(XIVE_IC_TM_DIRECT_PGOFF << x->ic_shift);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* NVPG, NVC, ESB, END BARs have common attributes: 64k page and only
|
|
* one set covering the whole BAR.
|
|
*/
|
|
static bool xive_configure_bars(struct xive *x)
|
|
{
|
|
uint64_t chip_id = x->chip_id;
|
|
uint64_t val;
|
|
uint64_t esb_size;
|
|
uint64_t end_size;
|
|
uint64_t nvp_size;
|
|
|
|
x->nvp_size = XIVE_VP_COUNT(x) << XIVE_NVP_SHIFT;
|
|
x->esb_size = XIVE_INT_COUNT << XIVE_ESB_SHIFT;
|
|
x->end_size = XIVE_END_COUNT << XIVE_END_SHIFT;
|
|
|
|
/*
|
|
* NVC BAR is not configured because we do not use the XIVE2
|
|
* Crowd capability.
|
|
*/
|
|
|
|
/* NVPG BAR: two pages, even NVP, odd NVG */
|
|
phys_map_get(chip_id, XIVE_NVPG, 0, (uint64_t *)&x->nvp_base, &nvp_size);
|
|
if (x->nvp_size > nvp_size) {
|
|
xive_err(x, "NVP table is larger than default: "
|
|
"0x%012llx > 0x%012llx\n", x->nvp_size, nvp_size);
|
|
return false;
|
|
}
|
|
|
|
val = (uint64_t)x->nvp_base | CQ_BAR_VALID | CQ_BAR_64K |
|
|
SETFIELD(CQ_BAR_RANGE, 0ull, ilog2(x->nvp_size) - 24);
|
|
xive_regwx(x, CQ_NVPG_BAR, val);
|
|
if (x->last_reg_error)
|
|
return false;
|
|
|
|
/* ESB BAR */
|
|
phys_map_get(chip_id, XIVE_ESB, 0, (uint64_t *)&x->esb_base, &esb_size);
|
|
if (x->esb_size > esb_size) {
|
|
xive_err(x, "ESB table is larger than default: "
|
|
"0x%012llx > 0x%012llx\n", x->esb_size, esb_size);
|
|
return false;
|
|
}
|
|
|
|
val = (uint64_t)x->esb_base | CQ_BAR_VALID | CQ_BAR_64K |
|
|
SETFIELD(CQ_BAR_RANGE, 0ull, ilog2(x->esb_size) - 24);
|
|
xive_regwx(x, CQ_ESB_BAR, val);
|
|
if (x->last_reg_error)
|
|
return false;
|
|
|
|
/* END BAR */
|
|
phys_map_get(chip_id, XIVE_END, 0, (uint64_t *)&x->end_base, &end_size);
|
|
if (x->end_size > end_size) {
|
|
xive_err(x, "END table is larger than default: "
|
|
"0x%012llx > 0x%012llx\n", x->end_size, end_size);
|
|
return false;
|
|
}
|
|
|
|
val = (uint64_t)x->end_base | CQ_BAR_VALID | CQ_BAR_64K |
|
|
SETFIELD(CQ_BAR_RANGE, 0ull, ilog2(x->end_size) - 24);
|
|
xive_regwx(x, CQ_END_BAR, val);
|
|
if (x->last_reg_error)
|
|
return false;
|
|
|
|
xive_dbg(x, "IC: %14p [0x%012llx]\n", x->ic_base, x->ic_size);
|
|
xive_dbg(x, "TM: %14p [0x%012llx]\n", x->tm_base, x->tm_size);
|
|
xive_dbg(x, "NVP: %14p [0x%012llx]\n", x->nvp_base, x->nvp_size);
|
|
xive_dbg(x, "ESB: %14p [0x%012llx]\n", x->esb_base, x->esb_size);
|
|
xive_dbg(x, "END: %14p [0x%012llx]\n", x->end_base, x->end_size);
|
|
xive_dbg(x, "OVF: %14p [0x%012x]\n", x->q_ovf,
|
|
VC_QUEUE_COUNT * PAGE_SIZE);
|
|
|
|
return true;
|
|
}
|
|
|
|
static void xive_dump_mmio(struct xive *x)
|
|
{
|
|
prlog(PR_DEBUG, " CQ_CFG_PB_GEN = %016llx\n",
|
|
in_be64(x->ic_base + CQ_CFG_PB_GEN));
|
|
prlog(PR_DEBUG, " CQ_MSGSND = %016llx\n",
|
|
in_be64(x->ic_base + CQ_MSGSND));
|
|
}
|
|
|
|
static const struct {
|
|
uint64_t bitmask;
|
|
const char *name;
|
|
} xive_capabilities[] = {
|
|
{ CQ_XIVE_CAP_PHB_PQ_DISABLE, "PHB PQ disable mode support" },
|
|
{ CQ_XIVE_CAP_PHB_ABT, "PHB address based trigger mode support" },
|
|
{ CQ_XIVE_CAP_EXPLOITATION_MODE, "Exploitation mode" },
|
|
{ CQ_XIVE_CAP_STORE_EOI, "StoreEOI mode support" },
|
|
{ CQ_XIVE_CAP_VP_SAVE_RESTORE, "VP Context Save and Restore" },
|
|
};
|
|
|
|
static void xive_dump_capabilities(struct xive *x, uint64_t cap_val)
|
|
{
|
|
int i;
|
|
|
|
xive_dbg(x, "capabilities: %016llx\n", cap_val);
|
|
xive_dbg(x, "\tVersion: %lld\n",
|
|
GETFIELD(CQ_XIVE_CAP_VERSION, cap_val));
|
|
xive_dbg(x, "\tUser interrupt priorities: [ 1 - %d ]\n",
|
|
1 << GETFIELD(CQ_XIVE_CAP_USER_INT_PRIO, cap_val));
|
|
xive_dbg(x, "\tVP interrupt priorities: [ %d - 8 ]\n",
|
|
1 << GETFIELD(CQ_XIVE_CAP_VP_INT_PRIO, cap_val));
|
|
xive_dbg(x, "\tExtended Blockid bits: %lld\n",
|
|
4 + GETFIELD(CQ_XIVE_CAP_BLOCK_ID_WIDTH, cap_val));
|
|
|
|
for (i = 0; i < ARRAY_SIZE(xive_capabilities); i++) {
|
|
if (xive_capabilities[i].bitmask & cap_val)
|
|
xive_dbg(x, "\t%s\n", xive_capabilities[i].name);
|
|
}
|
|
}
|
|
|
|
static const struct {
|
|
uint64_t bitmask;
|
|
const char *name;
|
|
} xive_configs[] = {
|
|
{ CQ_XIVE_CFG_GEN1_TIMA_OS, "Gen1 mode TIMA OS" },
|
|
{ CQ_XIVE_CFG_GEN1_TIMA_HYP, "Gen1 mode TIMA Hyp" },
|
|
{ CQ_XIVE_CFG_GEN1_TIMA_HYP_BLK0, "Gen1 mode TIMA General Hypervisor Block0" },
|
|
{ CQ_XIVE_CFG_GEN1_TIMA_CROWD_DIS, "Gen1 mode TIMA Crowd disable" },
|
|
{ CQ_XIVE_CFG_GEN1_END_ESX, "Gen1 mode END ESx" },
|
|
{ CQ_XIVE_CFG_EN_VP_SAVE_RESTORE, "VP Context Save and Restore" },
|
|
{ CQ_XIVE_CFG_EN_VP_SAVE_REST_STRICT, "VP Context Save and Restore strict" },
|
|
};
|
|
|
|
static void xive_dump_configuration(struct xive *x, const char *prefix,
|
|
uint64_t cfg_val)
|
|
{
|
|
int i ;
|
|
|
|
xive_dbg(x, "%s configuration: %016llx\n", prefix, cfg_val);
|
|
xive_dbg(x, "\tHardwired Thread Id range: %lld bits\n",
|
|
7 + GETFIELD(CQ_XIVE_CFG_HYP_HARD_RANGE, cfg_val));
|
|
xive_dbg(x, "\tUser Interrupt priorities: [ 1 - %d ]\n",
|
|
1 << GETFIELD(CQ_XIVE_CFG_USER_INT_PRIO, cfg_val));
|
|
xive_dbg(x, "\tVP Interrupt priorities: [ 0 - %d ]\n", xive_max_prio(x));
|
|
xive_dbg(x, "\tBlockId bits: %lld bits\n",
|
|
4 + GETFIELD(CQ_XIVE_CFG_BLOCK_ID_WIDTH, cfg_val));
|
|
if (CQ_XIVE_CFG_HYP_HARD_BLKID_OVERRIDE & cfg_val)
|
|
xive_dbg(x, "\tHardwired BlockId: %lld\n",
|
|
GETFIELD(CQ_XIVE_CFG_HYP_HARD_BLOCK_ID, cfg_val));
|
|
|
|
for (i = 0; i < ARRAY_SIZE(xive_configs); i++) {
|
|
if (xive_configs[i].bitmask & cfg_val)
|
|
xive_dbg(x, "\t%s\n", xive_configs[i].name);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Default XIVE configuration
|
|
*/
|
|
#define XIVE_CONFIGURATION \
|
|
(SETFIELD(CQ_XIVE_CFG_HYP_HARD_RANGE, 0ull, CQ_XIVE_CFG_THREADID_8BITS) | \
|
|
SETFIELD(CQ_XIVE_CFG_VP_INT_PRIO, 0ull, CQ_XIVE_CFG_INT_PRIO_8))
|
|
|
|
/*
|
|
* Gen1 configuration for tests (QEMU)
|
|
*/
|
|
#define XIVE_CONFIGURATION_GEN1 \
|
|
(SETFIELD(CQ_XIVE_CFG_HYP_HARD_RANGE, 0ull, CQ_XIVE_CFG_THREADID_7BITS) | \
|
|
SETFIELD(CQ_XIVE_CFG_VP_INT_PRIO, 0ull, CQ_XIVE_CFG_INT_PRIO_8) | \
|
|
CQ_XIVE_CFG_GEN1_TIMA_OS | \
|
|
CQ_XIVE_CFG_GEN1_TIMA_HYP | \
|
|
CQ_XIVE_CFG_GEN1_TIMA_HYP_BLK0 | \
|
|
CQ_XIVE_CFG_GEN1_TIMA_CROWD_DIS | \
|
|
CQ_XIVE_CFG_GEN1_END_ESX)
|
|
|
|
static bool xive_has_cap(struct xive *x, uint64_t cap)
|
|
{
|
|
return !!x && !!(x->capabilities & cap);
|
|
}
|
|
|
|
#define XIVE_CAN_STORE_EOI(x) xive_has_cap(x, CQ_XIVE_CAP_STORE_EOI)
|
|
|
|
static bool xive_cfg_save_restore(struct xive *x)
|
|
{
|
|
return !!(x->config & CQ_XIVE_CFG_EN_VP_SAVE_RESTORE);
|
|
}
|
|
|
|
/*
|
|
* When PQ_disable is available, configure the ESB cache to improve
|
|
* performance for PHB ESBs.
|
|
*
|
|
* split_mode :
|
|
* 1/3rd of the cache is reserved for PHB ESBs and the rest to
|
|
* IPIs. This is sufficient to keep all the PHB ESBs in cache and
|
|
* avoid ESB cache misses during IO interrupt processing.
|
|
*
|
|
* hash_array_enable :
|
|
* Internal cache hashing optimization. The hash_array tracks for
|
|
* ESBs where the original trigger came from so that we avoid
|
|
* getting the EAS into the cache twice.
|
|
*/
|
|
static void xive_config_esb_cache(struct xive *x)
|
|
{
|
|
uint64_t val = xive_regr(x, VC_ESBC_CFG);
|
|
|
|
if (xive_has_cap(x, CQ_XIVE_CAP_PHB_PQ_DISABLE)) {
|
|
val |= VC_ESBC_CFG_SPLIT_MODE | VC_ESBC_CFG_HASH_ARRAY_ENABLE;
|
|
val = SETFIELD(VC_ESBC_CFG_MAX_ENTRIES_IN_MODIFIED, val, 0xE);
|
|
xive_dbg(x, "ESB cache configured with split mode "
|
|
"and hash array. VC_ESBC_CFG=%016llx\n", val);
|
|
} else
|
|
val &= ~VC_ESBC_CFG_SPLIT_MODE;
|
|
|
|
xive_regw(x, VC_ESBC_CFG, val);
|
|
}
|
|
|
|
static void xive_config_fused_core(struct xive *x)
|
|
{
|
|
uint64_t val = xive_regr(x, TCTXT_CFG);
|
|
|
|
if (this_cpu()->is_fused_core) {
|
|
val |= TCTXT_CFG_FUSE_CORE_EN;
|
|
xive_dbg(x, "configured for fused cores. "
|
|
"PC_TCTXT_CFG=%016llx\n", val);
|
|
} else
|
|
val &= ~TCTXT_CFG_FUSE_CORE_EN;
|
|
xive_regw(x, TCTXT_CFG, val);
|
|
}
|
|
|
|
static void xive_config_reduced_priorities_fixup(struct xive *x)
|
|
{
|
|
if (xive_cfg_vp_prio_shift(x) < CQ_XIVE_CFG_INT_PRIO_8 &&
|
|
x->quirks & XIVE_QUIRK_BROKEN_PRIO_CHECK) {
|
|
uint64_t val = xive_regr(x, PC_ERR1_CFG1);
|
|
|
|
val &= ~PC_ERR1_CFG1_INTERRUPT_INVALID_PRIO;
|
|
xive_dbg(x, "workaround for reduced priorities. "
|
|
"PC_ERR1_CFG1=%016llx\n", val);
|
|
xive_regw(x, PC_ERR1_CFG1, val);
|
|
}
|
|
}
|
|
|
|
static bool xive_config_init(struct xive *x)
|
|
{
|
|
x->capabilities = xive_regr(x, CQ_XIVE_CAP);
|
|
xive_dump_capabilities(x, x->capabilities);
|
|
|
|
x->generation = GETFIELD(CQ_XIVE_CAP_VERSION, x->capabilities);
|
|
|
|
/*
|
|
* Allow QEMU to override version for tests
|
|
*/
|
|
if (x->generation != XIVE_GEN2 && !chip_quirk(QUIRK_QEMU)) {
|
|
xive_err(x, "Invalid XIVE controller version %d\n",
|
|
x->generation);
|
|
return false;
|
|
}
|
|
|
|
x->config = xive_regr(x, CQ_XIVE_CFG);
|
|
xive_dump_configuration(x, "default", x->config);
|
|
|
|
/* Start with default settings */
|
|
x->config = x->generation == XIVE_GEN1 ? XIVE_CONFIGURATION_GEN1 :
|
|
XIVE_CONFIGURATION;
|
|
|
|
if (x->quirks & XIVE_QUIRK_THREADID_7BITS)
|
|
x->config = SETFIELD(CQ_XIVE_CFG_HYP_HARD_RANGE, x->config,
|
|
CQ_XIVE_CFG_THREADID_7BITS);
|
|
|
|
/*
|
|
* Hardwire the block ID. The default value is the topology ID
|
|
* of the chip which is different from the block.
|
|
*/
|
|
x->config |= CQ_XIVE_CFG_HYP_HARD_BLKID_OVERRIDE |
|
|
SETFIELD(CQ_XIVE_CFG_HYP_HARD_BLOCK_ID, 0ull, x->block_id);
|
|
|
|
/*
|
|
* Enable "VP Context Save and Restore" by default. it is
|
|
* compatible with KVM which currently does the context
|
|
* save&restore in the entry/exit path of the vCPU
|
|
*/
|
|
if (x->capabilities & CQ_XIVE_CAP_VP_SAVE_RESTORE)
|
|
x->config |= CQ_XIVE_CFG_EN_VP_SAVE_RESTORE;
|
|
|
|
xive_dump_configuration(x, "new", x->config);
|
|
xive_regw(x, CQ_XIVE_CFG, x->config);
|
|
if (xive_regr(x, CQ_XIVE_CFG) != x->config) {
|
|
xive_err(x, "configuration setting failed\n");
|
|
}
|
|
|
|
/*
|
|
* Disable error reporting in the FIR for info errors from the VC.
|
|
*/
|
|
xive_regw(x, CQ_FIRMASK_OR, CQ_FIR_VC_INFO_ERROR_0_2);
|
|
|
|
/*
|
|
* Mask CI Load and Store to bad location, as IPI trigger
|
|
* pages may be mapped to user space, and a read on the
|
|
* trigger page causes a checkstop
|
|
*/
|
|
xive_regw(x, CQ_FIRMASK_OR, CQ_FIR_PB_RCMDX_CI_ERR1);
|
|
|
|
/*
|
|
* VP space settings. P9 mode is 19bits.
|
|
*/
|
|
x->vp_shift = x->generation == XIVE_GEN1 ?
|
|
VP_SHIFT_GEN1 : VP_SHIFT_GEN2;
|
|
|
|
/*
|
|
* VP ids for HW threads. These values are hardcoded in the
|
|
* CAM line of the HW context
|
|
*
|
|
* POWER10 |chip|0000000000000001|threadid|
|
|
* 28bits 4 16 8
|
|
*
|
|
* POWER9 |chip|000000000001|thrdid |
|
|
* 23bits 4 12 7
|
|
*/
|
|
|
|
/* TODO (cosmetic): set VP ids for HW threads only once */
|
|
xive_threadid_shift = 7 + GETFIELD(CQ_XIVE_CFG_HYP_HARD_RANGE,
|
|
x->config);
|
|
|
|
xive_hw_vp_base = 1 << xive_threadid_shift;
|
|
xive_hw_vp_count = 1 << xive_threadid_shift;
|
|
|
|
xive_dbg(x, "store EOI is %savailable\n",
|
|
XIVE_CAN_STORE_EOI(x) ? "" : "not ");
|
|
|
|
xive_config_fused_core(x);
|
|
|
|
xive_config_esb_cache(x);
|
|
|
|
xive_config_reduced_priorities_fixup(x);
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Set Translation tables : 1 block per chip */
|
|
static bool xive_setup_set_xlate(struct xive *x)
|
|
{
|
|
unsigned int i;
|
|
|
|
/* Configure ESBs */
|
|
xive_regw(x, CQ_TAR,
|
|
CQ_TAR_AUTOINC | SETFIELD(CQ_TAR_SELECT, 0ull, CQ_TAR_ESB));
|
|
if (x->last_reg_error)
|
|
return false;
|
|
for (i = 0; i < XIVE_MAX_BLOCKS; i++) {
|
|
xive_regw(x, CQ_TDR, CQ_TDR_VALID |
|
|
SETFIELD(CQ_TDR_BLOCK_ID, 0ull, x->block_id));
|
|
if (x->last_reg_error)
|
|
return false;
|
|
}
|
|
|
|
/* Configure ENDs */
|
|
xive_regw(x, CQ_TAR,
|
|
CQ_TAR_AUTOINC | SETFIELD(CQ_TAR_SELECT, 0ull, CQ_TAR_END));
|
|
if (x->last_reg_error)
|
|
return false;
|
|
for (i = 0; i < XIVE_MAX_BLOCKS; i++) {
|
|
xive_regw(x, CQ_TDR, CQ_TDR_VALID |
|
|
SETFIELD(CQ_TDR_BLOCK_ID, 0ull, x->block_id));
|
|
if (x->last_reg_error)
|
|
return false;
|
|
}
|
|
|
|
/* Configure NVPs */
|
|
xive_regw(x, CQ_TAR,
|
|
CQ_TAR_AUTOINC | SETFIELD(CQ_TAR_SELECT, 0ull, CQ_TAR_NVPG));
|
|
if (x->last_reg_error)
|
|
return false;
|
|
for (i = 0; i < XIVE_MAX_BLOCKS; i++) {
|
|
xive_regw(x, CQ_TDR, CQ_TDR_VALID |
|
|
SETFIELD(CQ_TDR_BLOCK_ID, 0ull, x->block_id));
|
|
if (x->last_reg_error)
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static bool xive_prealloc_tables(struct xive *x)
|
|
{
|
|
uint32_t i;
|
|
uint32_t pbase, pend;
|
|
|
|
/* ESB has 4 entries per byte */
|
|
x->sbe_base = local_alloc(x->chip_id, XIVE_ESB_SIZE, XIVE_ESB_SIZE);
|
|
if (!x->sbe_base) {
|
|
xive_err(x, "Failed to allocate SBE\n");
|
|
return false;
|
|
}
|
|
|
|
/* PQs are initialized to 0b01 which corresponds to "ints off" */
|
|
memset(x->sbe_base, 0x55, XIVE_ESB_SIZE);
|
|
xive_dbg(x, "SBE at %p size 0x%lx\n", x->sbe_base, XIVE_ESB_SIZE);
|
|
|
|
/* EAS entries are 8 bytes */
|
|
x->eat_base = local_alloc(x->chip_id, XIVE_EAT_SIZE, XIVE_EAT_SIZE);
|
|
if (!x->eat_base) {
|
|
xive_err(x, "Failed to allocate EAS\n");
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* We clear the entries (non-valid). They will be initialized
|
|
* when actually used
|
|
*/
|
|
memset(x->eat_base, 0, XIVE_EAT_SIZE);
|
|
xive_dbg(x, "EAT at %p size 0x%lx\n", x->eat_base, XIVE_EAT_SIZE);
|
|
|
|
/* Indirect END table. Limited to one top page. */
|
|
x->end_ind_size = ALIGN_UP(XIVE_END_TABLE_SIZE, PAGE_SIZE);
|
|
if (x->end_ind_size > PAGE_SIZE) {
|
|
xive_err(x, "END indirect table is too big !\n");
|
|
return false;
|
|
}
|
|
x->end_ind_base = local_alloc(x->chip_id, x->end_ind_size,
|
|
x->end_ind_size);
|
|
if (!x->end_ind_base) {
|
|
xive_err(x, "Failed to allocate END indirect table\n");
|
|
return false;
|
|
}
|
|
memset(x->end_ind_base, 0, x->end_ind_size);
|
|
xive_dbg(x, "ENDi at %p size 0x%llx #%ld entries\n", x->end_ind_base,
|
|
x->end_ind_size, XIVE_END_COUNT);
|
|
x->end_ind_count = XIVE_END_TABLE_SIZE / XIVE_VSD_SIZE;
|
|
|
|
/* Indirect VP table. Limited to one top page. */
|
|
x->vp_ind_size = ALIGN_UP(XIVE_VP_TABLE_SIZE(x), PAGE_SIZE);
|
|
if (x->vp_ind_size > PAGE_SIZE) {
|
|
xive_err(x, "VP indirect table is too big !\n");
|
|
return false;
|
|
}
|
|
x->vp_ind_base = local_alloc(x->chip_id, x->vp_ind_size,
|
|
x->vp_ind_size);
|
|
if (!x->vp_ind_base) {
|
|
xive_err(x, "Failed to allocate VP indirect table\n");
|
|
return false;
|
|
}
|
|
xive_dbg(x, "VPi at %p size 0x%llx #%ld entries\n", x->vp_ind_base,
|
|
x->vp_ind_size, XIVE_VP_COUNT(x));
|
|
x->vp_ind_count = XIVE_VP_TABLE_SIZE(x) / XIVE_VSD_SIZE;
|
|
memset(x->vp_ind_base, 0, x->vp_ind_size);
|
|
|
|
/* Allocate pages for the VP ids representing HW threads */
|
|
pbase = xive_hw_vp_base / VP_PER_PAGE;
|
|
pend = (xive_hw_vp_base + xive_hw_vp_count) / VP_PER_PAGE;
|
|
|
|
xive_dbg(x, "Allocating pages %d to %d of VPs (for %d VPs)\n",
|
|
pbase, pend, xive_hw_vp_count);
|
|
for (i = pbase; i <= pend; i++) {
|
|
void *page;
|
|
u64 vsd;
|
|
|
|
/* Indirect entries have a VSD format */
|
|
page = local_alloc(x->chip_id, PAGE_SIZE, PAGE_SIZE);
|
|
if (!page) {
|
|
xive_err(x, "Failed to allocate VP page\n");
|
|
return false;
|
|
}
|
|
xive_dbg(x, "VP%d at %p size 0x%x\n", i, page, PAGE_SIZE);
|
|
memset(page, 0, PAGE_SIZE);
|
|
vsd = ((uint64_t)page) & VSD_ADDRESS_MASK;
|
|
|
|
vsd |= SETFIELD(VSD_TSIZE, 0ull, 4);
|
|
vsd |= SETFIELD(VSD_MODE, 0ull, VSD_MODE_EXCLUSIVE);
|
|
vsd |= VSD_FIRMWARE;
|
|
x->vp_ind_base[i] = cpu_to_be64(vsd);
|
|
}
|
|
|
|
/*
|
|
* Allocate page for cache and sync injection (512 * 128 hw
|
|
* threads) + one extra page for future use
|
|
*/
|
|
x->sync_inject_size = PAGE_SIZE + PAGE_SIZE;
|
|
x->sync_inject = local_alloc(x->chip_id, x->sync_inject_size,
|
|
x->sync_inject_size);
|
|
if (!x->sync_inject) {
|
|
xive_err(x, "Failed to allocate sync pages\n");
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* The Memory Coherence Directory uses 16M "granule" to track
|
|
* shared copies of a cache line. If any cache line within the
|
|
* 16M range gets touched by someone outside of the group, the
|
|
* MCD forces accesses to any cache line within the range to
|
|
* include everyone that might have a shared copy.
|
|
*/
|
|
#define QUEUE_OVF_ALIGN (16 << 20) /* MCD granule size */
|
|
|
|
/*
|
|
* Allocate the queue overflow pages and use a 16M alignment
|
|
* to avoid sharing with other structures and reduce traffic
|
|
* on the PowerBus.
|
|
*/
|
|
x->q_ovf = local_alloc(x->chip_id, VC_QUEUE_COUNT * PAGE_SIZE,
|
|
QUEUE_OVF_ALIGN);
|
|
if (!x->q_ovf) {
|
|
xive_err(x, "Failed to allocate queue overflow\n");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static void xive_add_provisioning_properties(void)
|
|
{
|
|
beint32_t chips[XIVE_MAX_CHIPS];
|
|
uint32_t i, count;
|
|
|
|
dt_add_property_cells(xive_dt_node,
|
|
"ibm,xive-provision-page-size", PAGE_SIZE);
|
|
|
|
count = 1 << xive_chips_alloc_bits;
|
|
for (i = 0; i < count; i++)
|
|
chips[i] = cpu_to_be32(xive_block_to_chip[i]);
|
|
dt_add_property(xive_dt_node, "ibm,xive-provision-chips",
|
|
chips, 4 * count);
|
|
}
|
|
|
|
static void xive_create_mmio_dt_node(struct xive *x)
|
|
{
|
|
uint64_t tb = (uint64_t)x->tm_base;
|
|
uint32_t stride = 1u << x->tm_shift;
|
|
|
|
xive_dt_node = dt_new_addr(dt_root, "interrupt-controller", tb);
|
|
assert(xive_dt_node);
|
|
|
|
dt_add_property_u64s(xive_dt_node, "reg",
|
|
tb + 0 * stride, stride,
|
|
tb + 1 * stride, stride,
|
|
tb + 2 * stride, stride,
|
|
tb + 3 * stride, stride);
|
|
|
|
dt_add_property_strings(xive_dt_node, "compatible",
|
|
"ibm,opal-xive-pe", "ibm,opal-xive-vc",
|
|
"ibm,opal-intc");
|
|
|
|
dt_add_property(xive_dt_node, "interrupt-controller", NULL, 0);
|
|
dt_add_property_cells(xive_dt_node, "#address-cells", 0);
|
|
dt_add_property_cells(xive_dt_node, "#interrupt-cells", 2);
|
|
|
|
dt_add_property_cells(xive_dt_node, "ibm,xive-eq-sizes",
|
|
12, 16, 21, 24);
|
|
|
|
dt_add_property_cells(xive_dt_node, "ibm,xive-#priorities",
|
|
xive_cfg_vp_prio(x));
|
|
|
|
dt_add_property(xive_dt_node, "single-escalation-support", NULL, 0);
|
|
|
|
if (XIVE_CAN_STORE_EOI(x))
|
|
dt_add_property(xive_dt_node, "store-eoi", NULL, 0);
|
|
|
|
if (xive_cfg_save_restore(x))
|
|
dt_add_property(xive_dt_node, "vp-save-restore", NULL, 0);
|
|
|
|
xive_add_provisioning_properties();
|
|
|
|
}
|
|
|
|
uint32_t xive2_get_phandle(void)
|
|
{
|
|
if (!xive_dt_node)
|
|
return 0;
|
|
|
|
return xive_dt_node->phandle;
|
|
}
|
|
|
|
static void xive_setup_forward_ports(struct xive *x, struct proc_chip *remote_chip)
|
|
{
|
|
struct xive *remote_xive = remote_chip->xive;
|
|
uint64_t base = SETFIELD(VSD_MODE, 0ull, VSD_MODE_FORWARD);
|
|
|
|
if (!xive_set_vsd(x, VST_ESB, remote_xive->block_id,
|
|
base | ((uint64_t)remote_xive->esb_base) |
|
|
SETFIELD(VSD_TSIZE, 0ull, ilog2(x->esb_size) - 12)))
|
|
goto error;
|
|
|
|
/* EAS: No remote */
|
|
|
|
if (!xive_set_vsd(x, VST_END, remote_xive->block_id,
|
|
base | ((uint64_t)remote_xive->end_base) |
|
|
SETFIELD(VSD_TSIZE, 0ull, ilog2(x->end_size) - 12)))
|
|
goto error;
|
|
|
|
if (!xive_set_vsd(x, VST_NVP, remote_xive->block_id,
|
|
base | ((uint64_t)remote_xive->nvp_base) |
|
|
SETFIELD(VSD_TSIZE, 0ull, ilog2(x->nvp_size) - 12)))
|
|
goto error;
|
|
|
|
/* NVG: not used */
|
|
/* NVC: not used */
|
|
|
|
if (!xive_set_vsd(x, VST_IC, remote_xive->chip_id,
|
|
base | ((uint64_t)remote_xive->ic_base) |
|
|
SETFIELD(VSD_TSIZE, 0ull, ilog2(x->ic_size) - 12)))
|
|
goto error;
|
|
|
|
if (!xive_set_vsd(x, VST_SYNC, remote_xive->chip_id,
|
|
base | ((uint64_t)remote_xive->sync_inject) |
|
|
SETFIELD(VSD_TSIZE, 0ull, ilog2(x->sync_inject_size) - 12)))
|
|
goto error;
|
|
|
|
/* ERQ: No remote */
|
|
|
|
return;
|
|
|
|
error:
|
|
xive_err(x, "Failure configuring forwarding ports\n");
|
|
}
|
|
|
|
static void late_init_one_xive(struct xive *x)
|
|
{
|
|
struct proc_chip *chip;
|
|
|
|
/* We need to setup the cross-chip forward ports. Let's
|
|
* iterate all chip and set them up accordingly
|
|
*/
|
|
for_each_chip(chip) {
|
|
/* We skip ourselves or chips without a xive */
|
|
if (chip->xive == x || !chip->xive)
|
|
continue;
|
|
|
|
/* Setup our forward ports to that chip */
|
|
xive_setup_forward_ports(x, chip);
|
|
}
|
|
}
|
|
|
|
static bool xive_check_ipi_free(struct xive *x, uint32_t irq, uint32_t count)
|
|
{
|
|
uint32_t i, idx = GIRQ_TO_IDX(irq);
|
|
|
|
for (i = 0; i < count; i++)
|
|
if (bitmap_tst_bit(*x->ipi_alloc_map, idx + i))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
uint32_t xive2_alloc_hw_irqs(uint32_t chip_id, uint32_t count,
|
|
uint32_t align)
|
|
{
|
|
struct proc_chip *chip = get_chip(chip_id);
|
|
struct xive *x;
|
|
uint32_t base, i;
|
|
|
|
assert(chip);
|
|
assert(is_pow2(align));
|
|
|
|
x = chip->xive;
|
|
assert(x);
|
|
|
|
lock(&x->lock);
|
|
|
|
/* Allocate the HW interrupts */
|
|
base = x->int_hw_bot - count;
|
|
base &= ~(align - 1);
|
|
if (base < x->int_ipi_top) {
|
|
xive_err(x,
|
|
"HW alloc request for %d interrupts aligned to %d failed\n",
|
|
count, align);
|
|
unlock(&x->lock);
|
|
return XIVE_IRQ_ERROR;
|
|
}
|
|
if (!xive_check_ipi_free(x, base, count)) {
|
|
xive_err(x, "HWIRQ boot allocator request overlaps dynamic allocator\n");
|
|
unlock(&x->lock);
|
|
return XIVE_IRQ_ERROR;
|
|
}
|
|
|
|
x->int_hw_bot = base;
|
|
|
|
/* Initialize the corresponding EAS entries to sane defaults,
|
|
* IE entry is valid, not routed and masked, EQ data is set
|
|
* to the GIRQ number.
|
|
*/
|
|
for (i = 0; i < count; i++) {
|
|
struct xive_eas *eas = xive_get_eas(x, base + i);
|
|
|
|
eas->w = xive_set_field64(EAS_VALID, 0, 1) |
|
|
xive_set_field64(EAS_MASKED, 0, 1) |
|
|
xive_set_field64(EAS_END_DATA, 0, base + i);
|
|
}
|
|
|
|
unlock(&x->lock);
|
|
return base;
|
|
}
|
|
|
|
uint32_t xive2_alloc_ipi_irqs(uint32_t chip_id, uint32_t count,
|
|
uint32_t align)
|
|
{
|
|
struct proc_chip *chip = get_chip(chip_id);
|
|
struct xive *x;
|
|
uint32_t base, i;
|
|
|
|
assert(chip);
|
|
assert(is_pow2(align));
|
|
|
|
x = chip->xive;
|
|
assert(x);
|
|
|
|
lock(&x->lock);
|
|
|
|
/* Allocate the IPI interrupts */
|
|
base = x->int_ipi_top + (align - 1);
|
|
base &= ~(align - 1);
|
|
if (base >= x->int_hw_bot) {
|
|
xive_err(x,
|
|
"IPI alloc request for %d interrupts aligned to %d failed\n",
|
|
count, align);
|
|
unlock(&x->lock);
|
|
return XIVE_IRQ_ERROR;
|
|
}
|
|
if (!xive_check_ipi_free(x, base, count)) {
|
|
xive_err(x, "IPI boot allocator request overlaps dynamic allocator\n");
|
|
unlock(&x->lock);
|
|
return XIVE_IRQ_ERROR;
|
|
}
|
|
|
|
x->int_ipi_top = base + count;
|
|
|
|
/* Initialize the corresponding EAS entries to sane defaults,
|
|
* IE entry is valid, not routed and masked, END data is set
|
|
* to the GIRQ number.
|
|
*/
|
|
for (i = 0; i < count; i++) {
|
|
struct xive_eas *eas = xive_get_eas(x, base + i);
|
|
|
|
eas->w = xive_set_field64(EAS_VALID, 0, 1) |
|
|
xive_set_field64(EAS_MASKED, 0, 1) |
|
|
xive_set_field64(EAS_END_DATA, 0, base + i);
|
|
}
|
|
|
|
unlock(&x->lock);
|
|
return base;
|
|
}
|
|
|
|
void *xive2_get_trigger_port(uint32_t girq)
|
|
{
|
|
uint32_t idx = GIRQ_TO_IDX(girq);
|
|
struct xive *x;
|
|
|
|
/* Find XIVE on which the EAS resides */
|
|
x = xive_from_isn(girq);
|
|
if (!x)
|
|
return NULL;
|
|
|
|
if (GIRQ_IS_ESCALATION(girq)) {
|
|
/* There is no trigger page for escalation interrupts */
|
|
return NULL;
|
|
} else {
|
|
/* Make sure it's an IPI on that chip */
|
|
if (girq < x->int_base ||
|
|
girq >= x->int_ipi_top)
|
|
return NULL;
|
|
|
|
return x->esb_base + idx * XIVE_ESB_PAGE_SIZE;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Notify Port page (writes only, w/data), separated into two
|
|
* categories, both sent to VC:
|
|
* - IPI queue (Addr bit 52 = 0) (for NPU)
|
|
* - HW queue (Addr bit 52 = 1)
|
|
*/
|
|
uint64_t xive2_get_notify_port(uint32_t chip_id, uint32_t ent)
|
|
{
|
|
struct proc_chip *chip = get_chip(chip_id);
|
|
struct xive *x;
|
|
uint32_t offset = 0;
|
|
|
|
assert(chip);
|
|
x = chip->xive;
|
|
assert(x);
|
|
|
|
/* This is where we can assign a different HW queue to a different
|
|
* source by offsetting into the cache lines of the notify port
|
|
*
|
|
* For now we keep it very basic, this will have to be looked at
|
|
* again on real HW with some proper performance analysis.
|
|
*
|
|
* Here's what Florian says on the matter:
|
|
*
|
|
* <<
|
|
* The first 2k of the notify port page can all be used for PCIe triggers
|
|
*
|
|
* However the idea would be that we try to use the first 4 cache lines to
|
|
* balance the PCIe Interrupt requests to use the least used snoop buses
|
|
* (we went from 2 to 4 snoop buses for P9). snoop 0 is heavily used
|
|
* (I think TLBIs are using that in addition to the normal addresses),
|
|
* snoop 3 is used for all Int commands, so I think snoop 2 (CL 2 in the
|
|
* page) is the least used overall. So we probably should that one for
|
|
* the Int commands from PCIe.
|
|
*
|
|
* In addition, our EAS cache supports hashing to provide "private" cache
|
|
* areas for the PHBs in the shared 1k EAS cache. This allows e.g. to avoid
|
|
* that one "thrashing" PHB thrashes the EAS cache for everyone, or provide
|
|
* a PHB with a private area that would allow high cache hits in case of a
|
|
* device using very few interrupts. The hashing is based on the offset within
|
|
* the cache line. So using that, you can e.g. set the EAS cache up so that
|
|
* IPIs use 512 entries, the x16 PHB uses 256 entries and the x8 PHBs 128
|
|
* entries each - or IPIs using all entries and sharing with PHBs, so PHBs
|
|
* would use 512 entries and 256 entries respectively.
|
|
*
|
|
* This is a tuning we would probably do later in the lab, but as a "prep"
|
|
* we should set up the different PHBs such that they are using different
|
|
* 8B-aligned offsets within the cache line, so e.g.
|
|
* PH4_0 addr 0x100 (CL 2 DW0
|
|
* PH4_1 addr 0x108 (CL 2 DW1)
|
|
* PH4_2 addr 0x110 (CL 2 DW2)
|
|
* etc.
|
|
* >>
|
|
*
|
|
* I'm using snoop1 for PHB0 and snoop2 for everybody else.
|
|
*/
|
|
|
|
/* Florian adds :
|
|
*
|
|
* we just set them up for a start to have different offsets
|
|
* within the cache line so that we could use the allocation
|
|
* restrictions that can be enforced in the interrupt
|
|
* controller
|
|
*
|
|
* P10 might now be randomizing the cache line bits in HW to
|
|
* balance snoop bus usage
|
|
*/
|
|
switch(ent) {
|
|
case XIVE_HW_SRC_PHBn(0):
|
|
offset = 0x800;
|
|
break;
|
|
case XIVE_HW_SRC_PHBn(1):
|
|
offset = 0x908;
|
|
break;
|
|
case XIVE_HW_SRC_PHBn(2):
|
|
offset = 0x910;
|
|
break;
|
|
case XIVE_HW_SRC_PHBn(3):
|
|
offset = 0x918;
|
|
break;
|
|
case XIVE_HW_SRC_PHBn(4):
|
|
offset = 0x920;
|
|
break;
|
|
case XIVE_HW_SRC_PHBn(5):
|
|
offset = 0x928;
|
|
break;
|
|
case XIVE_HW_SRC_PSI:
|
|
offset = 0x930;
|
|
break;
|
|
default:
|
|
assert(false);
|
|
return 0;
|
|
}
|
|
|
|
return ((uint64_t)x->ic_base) +
|
|
(XIVE_NOTIFY_PGOFF << x->ic_shift) + offset;
|
|
}
|
|
|
|
/* Manufacture the powerbus packet bits 32:63 */
|
|
__attrconst uint32_t xive2_get_notify_base(uint32_t girq)
|
|
{
|
|
return (GIRQ_TO_BLK(girq) << 28) | GIRQ_TO_IDX(girq);
|
|
}
|
|
|
|
static bool xive_get_irq_targetting(uint32_t isn, uint32_t *out_target,
|
|
uint8_t *out_prio, uint32_t *out_lirq)
|
|
{
|
|
struct xive_eas *eas;
|
|
struct xive *x, *end_x;
|
|
struct xive_end *end;
|
|
uint32_t end_blk, end_idx;
|
|
uint32_t vp_blk, vp_idx;
|
|
uint32_t prio, server;
|
|
bool is_escalation = GIRQ_IS_ESCALATION(isn);
|
|
|
|
/* Find XIVE on which the EAS resides */
|
|
x = xive_from_isn(isn);
|
|
if (!x)
|
|
return false;
|
|
/* Grab the EAS */
|
|
eas = xive_get_eas(x, isn);
|
|
if (!eas)
|
|
return false;
|
|
if (!xive_get_field64(EAS_VALID, eas->w) && !is_escalation) {
|
|
xive_err(x, "ISN %x lead to invalid EAS !\n", isn);
|
|
return false;
|
|
}
|
|
|
|
if (out_lirq)
|
|
*out_lirq = xive_get_field64(EAS_END_DATA, eas->w);
|
|
|
|
/* Find the END and its xive instance */
|
|
end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
|
|
end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
|
|
end_x = xive_from_vc_blk(end_blk);
|
|
|
|
/* This can fail if the interrupt hasn't been initialized yet
|
|
* but it should also be masked, so fail silently
|
|
*/
|
|
if (!end_x)
|
|
goto pick_default;
|
|
end = xive_get_end(end_x, end_idx);
|
|
if (!end)
|
|
goto pick_default;
|
|
|
|
/* XXX Check valid and format 0 */
|
|
|
|
/* No priority conversion, return the actual one ! */
|
|
if (xive_get_field64(EAS_MASKED, eas->w))
|
|
prio = 0xff;
|
|
else
|
|
prio = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
|
|
if (out_prio)
|
|
*out_prio = prio;
|
|
|
|
vp_blk = xive_get_field32(END_W6_VP_BLOCK, end->w6);
|
|
vp_idx = xive_get_field32(END_W6_VP_OFFSET, end->w6);
|
|
server = VP2PIR(vp_blk, vp_idx);
|
|
|
|
if (out_target)
|
|
*out_target = server;
|
|
|
|
xive_vdbg(end_x, "END info for ISN %x: prio=%d, server=0x%x (VP %x/%x)\n",
|
|
isn, prio, server, vp_blk, vp_idx);
|
|
return true;
|
|
|
|
pick_default:
|
|
xive_vdbg(end_x, "END info for ISN %x: Using masked defaults\n", isn);
|
|
|
|
if (out_prio)
|
|
*out_prio = 0xff;
|
|
/* Pick a random default, me will be fine ... */
|
|
if (out_target)
|
|
*out_target = mfspr(SPR_PIR);
|
|
return true;
|
|
}
|
|
|
|
static inline bool xive_end_for_target(uint32_t target, uint8_t prio,
|
|
uint32_t *out_end_blk,
|
|
uint32_t *out_end_idx)
|
|
{
|
|
struct xive *x;
|
|
struct xive_nvp *vp;
|
|
uint32_t vp_blk, vp_idx;
|
|
uint32_t end_blk, end_idx;
|
|
|
|
if (prio > xive_max_prio(one_xive))
|
|
return false;
|
|
|
|
/* Get the VP block/index from the target word */
|
|
if (!xive_decode_vp(target, &vp_blk, &vp_idx, NULL, NULL))
|
|
return false;
|
|
|
|
/* Grab the target VP's XIVE */
|
|
x = xive_from_pc_blk(vp_blk);
|
|
if (!x)
|
|
return false;
|
|
|
|
/* Find the VP structrure where we stashed the END number */
|
|
vp = xive_get_vp(x, vp_idx);
|
|
if (!vp)
|
|
return false;
|
|
|
|
end_blk = xive_get_field32(NVP_W5_VP_END_BLOCK, vp->w5);
|
|
end_idx = xive_get_field32(NVP_W5_VP_END_INDEX, vp->w5);
|
|
|
|
/* Currently the END block and VP block should be the same */
|
|
if (end_blk != vp_blk) {
|
|
xive_err(x, "end_blk != vp_blk (%d vs. %d) for target 0x%08x/%d\n",
|
|
end_blk, vp_blk, target, prio);
|
|
assert(false);
|
|
}
|
|
|
|
if (out_end_blk)
|
|
*out_end_blk = end_blk;
|
|
if (out_end_idx)
|
|
*out_end_idx = end_idx + prio;
|
|
|
|
return true;
|
|
}
|
|
|
|
static int64_t xive_set_irq_targetting(uint32_t isn, uint32_t target,
|
|
uint8_t prio, uint32_t lirq,
|
|
bool synchronous)
|
|
{
|
|
struct xive *x;
|
|
struct xive_eas *eas, new_eas;
|
|
uint32_t end_blk, end_idx;
|
|
bool is_escalation = GIRQ_IS_ESCALATION(isn);
|
|
int64_t rc;
|
|
|
|
/* Find XIVE on which the EAS resides */
|
|
x = xive_from_isn(isn);
|
|
if (!x)
|
|
return OPAL_PARAMETER;
|
|
/* Grab the EAS */
|
|
eas = xive_get_eas(x, isn);
|
|
if (!eas)
|
|
return OPAL_PARAMETER;
|
|
if (!xive_get_field64(EAS_VALID, eas->w) && !is_escalation) {
|
|
xive_err(x, "ISN %x lead to invalid EAS !\n", isn);
|
|
return OPAL_PARAMETER;
|
|
}
|
|
|
|
lock(&x->lock);
|
|
|
|
/* Read existing EAS */
|
|
new_eas = *eas;
|
|
|
|
/* Are we masking ? */
|
|
if (prio == 0xff && !is_escalation) {
|
|
new_eas.w = xive_set_field64(EAS_MASKED, new_eas.w, 1);
|
|
xive_vdbg(x, "ISN %x masked !\n", isn);
|
|
|
|
/* Put prio 7 in the END */
|
|
prio = xive_max_prio(x);
|
|
} else {
|
|
/* Unmasking */
|
|
new_eas.w = xive_set_field64(EAS_MASKED, new_eas.w, 0);
|
|
xive_vdbg(x, "ISN %x unmasked !\n", isn);
|
|
|
|
/* For normal interrupt sources, keep track of which ones
|
|
* we ever enabled since the last reset
|
|
*/
|
|
if (!is_escalation)
|
|
bitmap_set_bit(*x->int_enabled_map, GIRQ_TO_IDX(isn));
|
|
}
|
|
|
|
/* If prio isn't 0xff, re-target the EAS. First find the END
|
|
* correponding to the target
|
|
*/
|
|
if (prio != 0xff) {
|
|
if (!xive_end_for_target(target, prio, &end_blk, &end_idx)) {
|
|
xive_err(x, "Can't find END for target/prio 0x%x/%d\n",
|
|
target, prio);
|
|
unlock(&x->lock);
|
|
return OPAL_PARAMETER;
|
|
}
|
|
|
|
/* Try to update it atomically to avoid an intermediary
|
|
* stale state
|
|
*/
|
|
new_eas.w = xive_set_field64(EAS_END_BLOCK, new_eas.w, end_blk);
|
|
new_eas.w = xive_set_field64(EAS_END_INDEX, new_eas.w, end_idx);
|
|
}
|
|
new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, lirq);
|
|
|
|
xive_vdbg(x,"ISN %x routed to end %x/%x lirq=%08x EAS=%016llx !\n",
|
|
isn, end_blk, end_idx, lirq, new_eas.w);
|
|
|
|
/* Updating the cache differs between real EAS and escalation
|
|
* EAS inside an END
|
|
*/
|
|
if (is_escalation) {
|
|
rc = xive_escalation_ive_cache_update(x, x->block_id,
|
|
GIRQ_TO_IDX(isn), &new_eas, synchronous);
|
|
} else {
|
|
sync();
|
|
*eas = new_eas;
|
|
rc = xive_easc_scrub(x, x->block_id, GIRQ_TO_IDX(isn));
|
|
}
|
|
|
|
unlock(&x->lock);
|
|
return rc;
|
|
}
|
|
|
|
static void xive_update_irq_mask(struct xive_src *s, uint32_t idx, bool masked)
|
|
{
|
|
void *mmio_base = s->esb_mmio + (1ul << s->esb_shift) * idx;
|
|
uint32_t offset;
|
|
|
|
/* XXX FIXME: A quick mask/umask can make us shoot an interrupt
|
|
* more than once to a queue. We need to keep track better
|
|
*/
|
|
if (s->flags & XIVE_SRC_EOI_PAGE1)
|
|
mmio_base += 1ull << (s->esb_shift - 1);
|
|
if (masked)
|
|
offset = XIVE_ESB_SET_PQ_01;
|
|
else
|
|
offset = XIVE_ESB_SET_PQ_00;
|
|
|
|
in_be64(mmio_base + offset);
|
|
}
|
|
|
|
#define XIVE_SYNC_IPI 0x000
|
|
#define XIVE_SYNC_HW 0x080
|
|
#define XIVE_SYNC_NxC 0x100
|
|
#define XIVE_SYNC_INT 0x180
|
|
#define XIVE_SYNC_OS_ESC 0x200
|
|
#define XIVE_SYNC_POOL_ESC 0x280
|
|
#define XIVE_SYNC_HARD_ESC 0x300
|
|
|
|
static int64_t xive_sync(struct xive *x __unused)
|
|
{
|
|
uint64_t r;
|
|
void *sync_base;
|
|
|
|
lock(&x->lock);
|
|
|
|
sync_base = x->ic_base + (XIVE_SYNC_POLL_PGOFF << x->ic_shift);
|
|
|
|
out_be64(sync_base + XIVE_SYNC_IPI, 0);
|
|
out_be64(sync_base + XIVE_SYNC_HW, 0);
|
|
out_be64(sync_base + XIVE_SYNC_NxC, 0);
|
|
out_be64(sync_base + XIVE_SYNC_INT, 0);
|
|
out_be64(sync_base + XIVE_SYNC_OS_ESC, 0);
|
|
out_be64(sync_base + XIVE_SYNC_POOL_ESC, 0);
|
|
out_be64(sync_base + XIVE_SYNC_HARD_ESC, 0);
|
|
|
|
/* XXX Add timeout */
|
|
for (;;) {
|
|
r = xive_regr(x, VC_ENDC_SYNC_DONE);
|
|
if ((r & VC_ENDC_SYNC_POLL_DONE) == VC_ENDC_SYNC_POLL_DONE)
|
|
break;
|
|
cpu_relax();
|
|
}
|
|
xive_regw(x, VC_ENDC_SYNC_DONE, r & ~VC_ENDC_SYNC_POLL_DONE);
|
|
|
|
/*
|
|
* Do a read after clearing the sync done bit to prevent any
|
|
* race between CI write and next sync command
|
|
*/
|
|
xive_regr(x, VC_ENDC_SYNC_DONE);
|
|
|
|
unlock(&x->lock);
|
|
return 0;
|
|
}
|
|
|
|
static int64_t __xive_set_irq_config(struct irq_source *is, uint32_t girq,
|
|
uint64_t vp, uint8_t prio, uint32_t lirq,
|
|
bool update_esb, bool sync)
|
|
{
|
|
struct xive_src *s = container_of(is, struct xive_src, is);
|
|
uint32_t old_target, vp_blk;
|
|
u8 old_prio;
|
|
int64_t rc;
|
|
|
|
/* Grab existing target */
|
|
if (!xive_get_irq_targetting(girq, &old_target, &old_prio, NULL))
|
|
return OPAL_PARAMETER;
|
|
|
|
/* Let XIVE configure the END. We do the update without the
|
|
* synchronous flag, thus a cache update failure will result
|
|
* in us returning OPAL_BUSY
|
|
*/
|
|
rc = xive_set_irq_targetting(girq, vp, prio, lirq, false);
|
|
if (rc)
|
|
return rc;
|
|
|
|
/* Do we need to update the mask ? */
|
|
if (old_prio != prio && (old_prio == 0xff || prio == 0xff)) {
|
|
/* The source has special variants of masking/unmasking */
|
|
if (update_esb) {
|
|
/* Ensure it's enabled/disabled in the source
|
|
* controller
|
|
*/
|
|
xive_update_irq_mask(s, girq - s->esb_base,
|
|
prio == 0xff);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Synchronize the source and old target XIVEs to ensure that
|
|
* all pending interrupts to the old target have reached their
|
|
* respective queue.
|
|
*
|
|
* WARNING: This assumes the VP and it's queues are on the same
|
|
* XIVE instance !
|
|
*/
|
|
if (!sync)
|
|
return OPAL_SUCCESS;
|
|
xive_sync(s->xive);
|
|
if (xive_decode_vp(old_target, &vp_blk, NULL, NULL, NULL)) {
|
|
struct xive *x = xive_from_pc_blk(vp_blk);
|
|
if (x)
|
|
xive_sync(x);
|
|
}
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int64_t xive_set_irq_config(uint32_t girq, uint64_t vp, uint8_t prio,
|
|
uint32_t lirq, bool update_esb)
|
|
{
|
|
struct irq_source *is = irq_find_source(girq);
|
|
|
|
return __xive_set_irq_config(is, girq, vp, prio, lirq, update_esb,
|
|
true);
|
|
}
|
|
|
|
static void xive_source_interrupt(struct irq_source *is, uint32_t isn)
|
|
{
|
|
struct xive_src *s = container_of(is, struct xive_src, is);
|
|
|
|
if (!s->orig_ops || !s->orig_ops->interrupt)
|
|
return;
|
|
s->orig_ops->interrupt(is, isn);
|
|
}
|
|
|
|
static uint64_t xive_source_attributes(struct irq_source *is, uint32_t isn)
|
|
{
|
|
struct xive_src *s = container_of(is, struct xive_src, is);
|
|
|
|
if (!s->orig_ops || !s->orig_ops->attributes)
|
|
return IRQ_ATTR_TARGET_LINUX;
|
|
return s->orig_ops->attributes(is, isn);
|
|
}
|
|
|
|
static char *xive_source_name(struct irq_source *is, uint32_t isn)
|
|
{
|
|
struct xive_src *s = container_of(is, struct xive_src, is);
|
|
|
|
if (!s->orig_ops || !s->orig_ops->name)
|
|
return NULL;
|
|
return s->orig_ops->name(is, isn);
|
|
}
|
|
|
|
void xive2_source_mask(struct irq_source *is, uint32_t isn)
|
|
{
|
|
struct xive_src *s = container_of(is, struct xive_src, is);
|
|
|
|
xive_update_irq_mask(s, isn - s->esb_base, true);
|
|
}
|
|
|
|
static bool xive_has_opal_interrupts(struct irq_source *is)
|
|
{
|
|
struct xive_src *s = container_of(is, struct xive_src, is);
|
|
|
|
if (!s->orig_ops || !s->orig_ops->attributes || !s->orig_ops->interrupt)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
static const struct irq_source_ops xive_irq_source_ops = {
|
|
.interrupt = xive_source_interrupt,
|
|
.attributes = xive_source_attributes,
|
|
.name = xive_source_name,
|
|
.has_opal_interrupts = xive_has_opal_interrupts,
|
|
};
|
|
|
|
static void __xive_register_source(struct xive *x, struct xive_src *s,
|
|
uint32_t base, uint32_t count,
|
|
uint32_t shift, void *mmio, uint32_t flags,
|
|
bool secondary, void *data,
|
|
const struct irq_source_ops *orig_ops)
|
|
{
|
|
s->esb_base = base;
|
|
s->esb_shift = shift;
|
|
s->esb_mmio = mmio;
|
|
s->flags = flags;
|
|
s->orig_ops = orig_ops;
|
|
s->xive = x;
|
|
s->is.start = base;
|
|
s->is.end = base + count;
|
|
s->is.ops = &xive_irq_source_ops;
|
|
s->is.data = data;
|
|
|
|
__register_irq_source(&s->is, secondary);
|
|
}
|
|
|
|
void xive2_register_hw_source(uint32_t base, uint32_t count, uint32_t shift,
|
|
void *mmio, uint32_t flags, void *data,
|
|
const struct irq_source_ops *ops)
|
|
{
|
|
struct xive_src *s;
|
|
struct xive *x = xive_from_isn(base);
|
|
|
|
assert(x);
|
|
|
|
s = malloc(sizeof(struct xive_src));
|
|
assert(s);
|
|
__xive_register_source(x, s, base, count, shift, mmio, flags,
|
|
false, data, ops);
|
|
}
|
|
|
|
static void __xive2_register_esb_source(uint32_t base, uint32_t count,
|
|
void *data, const struct irq_source_ops *ops)
|
|
{
|
|
struct xive_src *s;
|
|
struct xive *x = xive_from_isn(base);
|
|
uint32_t base_idx = GIRQ_TO_IDX(base);
|
|
void *mmio_base;
|
|
uint32_t flags = XIVE_SRC_EOI_PAGE1 | XIVE_SRC_TRIGGER_PAGE;
|
|
|
|
assert(x);
|
|
|
|
s = malloc(sizeof(struct xive_src));
|
|
assert(s);
|
|
|
|
if (XIVE_CAN_STORE_EOI(x))
|
|
flags |= XIVE_SRC_STORE_EOI;
|
|
|
|
/* Callbacks assume the MMIO base corresponds to the first
|
|
* interrupt of that source structure so adjust it
|
|
*/
|
|
mmio_base = x->esb_base + (1ul << XIVE_ESB_SHIFT) * base_idx;
|
|
__xive_register_source(x, s, base, count, XIVE_ESB_SHIFT, mmio_base,
|
|
flags, false, data, ops);
|
|
}
|
|
|
|
/*
|
|
* Check that IPI sources have interrupt numbers in the IPI interrupt
|
|
* number range
|
|
*/
|
|
void xive2_register_ipi_source(uint32_t base, uint32_t count, void *data,
|
|
const struct irq_source_ops *ops)
|
|
{
|
|
struct xive *x = xive_from_isn(base);
|
|
|
|
assert(x);
|
|
assert(base >= x->int_base && (base + count) <= x->int_ipi_top);
|
|
|
|
__xive2_register_esb_source(base, count, data, ops);
|
|
}
|
|
|
|
/*
|
|
* Some HW sources (PHB) can disable the use of their own ESB pages
|
|
* and offload all the checks on ESB pages of the IC. The interrupt
|
|
* numbers are not necessarily in the IPI range.
|
|
*/
|
|
void xive2_register_esb_source(uint32_t base, uint32_t count)
|
|
{
|
|
__xive2_register_esb_source(base, count, NULL, NULL);
|
|
}
|
|
|
|
uint64_t xive2_get_esb_base(uint32_t base)
|
|
{
|
|
struct xive *x = xive_from_isn(base);
|
|
uint32_t base_idx = GIRQ_TO_IDX(base);
|
|
|
|
assert(x);
|
|
|
|
return (uint64_t) x->esb_base + (1ul << XIVE_ESB_SHIFT) * base_idx;
|
|
}
|
|
|
|
static void xive_set_quirks(struct xive *x, struct proc_chip *chip __unused)
|
|
{
|
|
uint64_t quirks = 0;
|
|
|
|
/* This extension is dropped for P10 */
|
|
if (proc_gen == proc_gen_p10 || proc_gen == proc_gen_p11)
|
|
quirks |= XIVE_QUIRK_THREADID_7BITS;
|
|
|
|
/* Broken check on invalid priority when reduced priorities is in use */
|
|
if (proc_gen == proc_gen_p10 || proc_gen == proc_gen_p11)
|
|
quirks |= XIVE_QUIRK_BROKEN_PRIO_CHECK;
|
|
|
|
xive_dbg(x, "setting XIVE quirks to %016llx\n", quirks);
|
|
x->quirks = quirks;
|
|
}
|
|
|
|
static struct xive *init_one_xive(struct dt_node *np)
|
|
{
|
|
struct xive *x;
|
|
struct proc_chip *chip;
|
|
uint32_t flags;
|
|
|
|
x = zalloc(sizeof(struct xive));
|
|
assert(x);
|
|
x->x_node = np;
|
|
x->xscom_base = dt_get_address(np, 0, NULL);
|
|
x->chip_id = dt_get_chip_id(np);
|
|
|
|
/* "Allocate" a new block ID for the chip */
|
|
x->block_id = xive_block_count++;
|
|
assert (x->block_id < XIVE_MAX_CHIPS);
|
|
xive_block_to_chip[x->block_id] = x->chip_id;
|
|
init_lock(&x->lock);
|
|
|
|
chip = get_chip(x->chip_id);
|
|
assert(chip);
|
|
|
|
xive_notice(x, "Initializing XIVE block ID %d...\n", x->block_id);
|
|
chip->xive = x;
|
|
|
|
xive_set_quirks(x, chip);
|
|
|
|
list_head_init(&x->donated_pages);
|
|
|
|
/* Base interrupt numbers and allocator init */
|
|
|
|
x->int_base = BLKIDX_TO_GIRQ(x->block_id, 0);
|
|
x->int_count = x->int_base + XIVE_INT_COUNT;
|
|
x->int_hw_bot = x->int_count;
|
|
x->int_ipi_top = x->int_base;
|
|
|
|
if (x->int_ipi_top < XIVE_INT_FIRST)
|
|
x->int_ipi_top = XIVE_INT_FIRST;
|
|
|
|
/* Allocate a few bitmaps */
|
|
x->end_map = local_alloc(x->chip_id, BITMAP_BYTES(xive_end_bitmap_size(x)), PAGE_SIZE);
|
|
assert(x->end_map);
|
|
memset(x->end_map, 0, BITMAP_BYTES(xive_end_bitmap_size(x)));
|
|
|
|
/*
|
|
* Allocate END index 0 to make sure it can not be used as an
|
|
* END base for a VP. This is the criteria to know if a VP was
|
|
* allocated.
|
|
*/
|
|
bitmap_set_bit(*x->end_map, 0);
|
|
|
|
x->int_enabled_map = local_alloc(x->chip_id, BITMAP_BYTES(XIVE_INT_COUNT), PAGE_SIZE);
|
|
assert(x->int_enabled_map);
|
|
memset(x->int_enabled_map, 0, BITMAP_BYTES(XIVE_INT_COUNT));
|
|
x->ipi_alloc_map = local_alloc(x->chip_id, BITMAP_BYTES(XIVE_INT_COUNT), PAGE_SIZE);
|
|
assert(x->ipi_alloc_map);
|
|
memset(x->ipi_alloc_map, 0, BITMAP_BYTES(XIVE_INT_COUNT));
|
|
|
|
xive_dbg(x, "Handling interrupts [%08x..%08x]\n",
|
|
x->int_base, x->int_count - 1);
|
|
|
|
/* Setup the IC BARs */
|
|
if (!xive_configure_ic_bars(x))
|
|
goto fail;
|
|
|
|
/* Some basic global inits such as page sizes etc... */
|
|
if (!xive_config_init(x))
|
|
goto fail;
|
|
|
|
/* Configure the set translations for MMIO */
|
|
if (!xive_setup_set_xlate(x))
|
|
goto fail;
|
|
|
|
/* Dump some MMIO registers for diagnostics */
|
|
xive_dump_mmio(x);
|
|
|
|
/* Pre-allocate a number of tables */
|
|
if (!xive_prealloc_tables(x))
|
|
goto fail;
|
|
|
|
/* Setup the XIVE structures BARs */
|
|
if (!xive_configure_bars(x))
|
|
goto fail;
|
|
|
|
/*
|
|
* Configure local tables in VSDs (forward ports will be
|
|
* handled later)
|
|
*/
|
|
if (!xive_set_local_tables(x))
|
|
goto fail;
|
|
|
|
/* Register built-in source controllers (aka IPIs) */
|
|
flags = XIVE_SRC_EOI_PAGE1 | XIVE_SRC_TRIGGER_PAGE;
|
|
if (XIVE_CAN_STORE_EOI(x))
|
|
flags |= XIVE_SRC_STORE_EOI;
|
|
__xive_register_source(x, &x->ipis, x->int_base,
|
|
x->int_hw_bot - x->int_base, XIVE_ESB_SHIFT,
|
|
x->esb_base, flags, true, NULL, NULL);
|
|
|
|
/* Register escalation sources (ENDs)
|
|
*
|
|
* The ESe PQ bits are used for coalescing and the END ESB for
|
|
* interrupt management. The word 4&5 of the END is the EAS
|
|
* for the escalation source and the indexing is the same as
|
|
* the END.
|
|
*
|
|
* This is an OPAL primary source, IPIs are secondary.
|
|
*/
|
|
__xive_register_source(x, &x->esc_irqs,
|
|
MAKE_ESCALATION_GIRQ(x->block_id, 0),
|
|
XIVE_END_COUNT, XIVE_END_SHIFT,
|
|
x->end_base, XIVE_SRC_EOI_PAGE1,
|
|
false, NULL, NULL);
|
|
|
|
|
|
return x;
|
|
fail:
|
|
xive_err(x, "Initialization failed...\n");
|
|
|
|
/* Should this be fatal ? */
|
|
//assert(false);
|
|
return NULL;
|
|
}
|
|
|
|
static void xive_reset_enable_thread(struct cpu_thread *c)
|
|
{
|
|
struct proc_chip *chip = get_chip(c->chip_id);
|
|
struct xive *x = chip->xive;
|
|
uint32_t fc, bit;
|
|
uint64_t enable;
|
|
|
|
/* Get fused core number */
|
|
fc = (c->pir >> 3) & 0xf;
|
|
|
|
/* Get bit in register */
|
|
bit = c->pir & 0x3f;
|
|
|
|
/* Get which register to access */
|
|
if (fc < 8) {
|
|
xive_regw(x, TCTXT_EN0_RESET, PPC_BIT(bit));
|
|
xive_regw(x, TCTXT_EN0_SET, PPC_BIT(bit));
|
|
|
|
enable = xive_regr(x, TCTXT_EN0);
|
|
if (!(enable & PPC_BIT(bit)))
|
|
xive_cpu_err(c, "Failed to enable thread\n");
|
|
} else {
|
|
xive_regw(x, TCTXT_EN1_RESET, PPC_BIT(bit));
|
|
xive_regw(x, TCTXT_EN1_SET, PPC_BIT(bit));
|
|
|
|
enable = xive_regr(x, TCTXT_EN1);
|
|
if (!(enable & PPC_BIT(bit)))
|
|
xive_cpu_err(c, "Failed to enable thread\n");
|
|
}
|
|
}
|
|
|
|
void xive2_cpu_callin(struct cpu_thread *cpu)
|
|
{
|
|
struct xive_cpu_state *xs = cpu->xstate;
|
|
uint8_t old_w2 __unused, w2 __unused;
|
|
|
|
if (!xs)
|
|
return;
|
|
|
|
/* Reset the HW thread context and enable it */
|
|
xive_reset_enable_thread(cpu);
|
|
|
|
/* Set VT to 1 */
|
|
old_w2 = in_8(xs->tm_ring1 + TM_QW3_HV_PHYS + TM_WORD2);
|
|
out_8(xs->tm_ring1 + TM_QW3_HV_PHYS + TM_WORD2, 0x80);
|
|
w2 = in_8(xs->tm_ring1 + TM_QW3_HV_PHYS + TM_WORD2);
|
|
|
|
xive_cpu_vdbg(cpu, "Initialized TIMA VP=%x/%x W01=%016llx W2=%02x->%02x\n",
|
|
xs->vp_blk, xs->vp_idx,
|
|
in_be64(xs->tm_ring1 + TM_QW3_HV_PHYS),
|
|
old_w2, w2);
|
|
}
|
|
|
|
#ifdef XIVE_EXTRA_CHECK_INIT_CACHE
|
|
#define CHECK_INIT_CACHE_LOOP 0x100
|
|
static void xive_special_cache_check(struct xive *x, uint32_t blk, uint32_t idx)
|
|
{
|
|
struct xive_nvp vp = {0};
|
|
uint32_t i;
|
|
|
|
/*
|
|
* SIMICS checks the value of reserved fields
|
|
*/
|
|
if (chip_quirk(QUIRK_SIMICS))
|
|
return;
|
|
|
|
for (i = 0; i < CHECK_INIT_CACHE_LOOP; i++) {
|
|
struct xive_nvp *vp_m = xive_get_vp(x, idx);
|
|
|
|
memset(vp_m, (~i) & 0xff, sizeof(*vp_m));
|
|
vp_m->w0 = xive_set_field32(NVP_W0_VALID, vp_m->w0, 0);
|
|
sync();
|
|
vp.w1 = (i << 16) | i;
|
|
assert(!xive_nxc_cache_update(x, blk, idx, &vp, true));
|
|
if (!xive_check_nxc_update(x, idx, &vp)) {
|
|
xive_dbg(x, "NXC update test failed at %d iterations\n", i);
|
|
return;
|
|
}
|
|
}
|
|
xive_dbg(x, "NXC update test passed for %d/0x%x\n", blk, idx);
|
|
}
|
|
#else
|
|
static inline void xive_special_cache_check(struct xive *x __unused,
|
|
uint32_t blk __unused,
|
|
uint32_t idx __unused)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
static void xive_init_cpu_exploitation(struct xive_cpu_state *xs)
|
|
{
|
|
struct xive_end end;
|
|
struct xive_nvp vp;
|
|
struct xive *x_vp, *x_end;
|
|
int i;
|
|
|
|
/* Grab the XIVE where the VP resides. It could be different from
|
|
* the local chip XIVE if not using block group mode
|
|
*/
|
|
x_vp = xive_from_pc_blk(xs->vp_blk);
|
|
assert(x_vp);
|
|
|
|
/* Grab the XIVE where the END resides. It should be the same
|
|
* as the VP.
|
|
*/
|
|
x_end = xive_from_vc_blk(xs->end_blk);
|
|
assert(x_end);
|
|
|
|
xive_init_hw_end(&end);
|
|
|
|
/* Use the cache watch to update all ENDs reserved for HW VPs */
|
|
lock(&x_end->lock);
|
|
for (i = 0; i < xive_cfg_vp_prio(x_end); i++)
|
|
xive_endc_cache_update(x_end, xs->end_blk, xs->end_idx + i,
|
|
&end, true);
|
|
unlock(&x_end->lock);
|
|
|
|
/* Initialize/enable the VP */
|
|
xive_init_default_vp(&vp, xs->end_blk, xs->end_idx);
|
|
|
|
/* Use the cache watch to write it out */
|
|
lock(&x_vp->lock);
|
|
xive_special_cache_check(x_vp, xs->vp_blk, xs->vp_idx);
|
|
xive_nxc_cache_update(x_vp, xs->vp_blk, xs->vp_idx, &vp, true);
|
|
unlock(&x_vp->lock);
|
|
}
|
|
|
|
static void xive_configure_ex_special_bar(struct xive *x, struct cpu_thread *c)
|
|
{
|
|
uint64_t xa, val;
|
|
int64_t rc;
|
|
|
|
xive_cpu_vdbg(c, "Setting up special BAR\n");
|
|
xa = XSCOM_ADDR_P10_NCU(pir_to_core_id(c->pir), P10_NCU_SPEC_BAR);
|
|
val = (uint64_t)x->tm_base | P10_NCU_SPEC_BAR_ENABLE;
|
|
if (x->tm_shift == 16)
|
|
val |= P10_NCU_SPEC_BAR_256K;
|
|
xive_cpu_vdbg(c, "NCU_SPEC_BAR_XA[%08llx]=%016llx\n", xa, val);
|
|
rc = xscom_write(c->chip_id, xa, val);
|
|
if (rc) {
|
|
xive_cpu_err(c, "Failed to setup NCU_SPEC_BAR\n");
|
|
/* XXXX what do do now ? */
|
|
}
|
|
}
|
|
|
|
void xive2_late_init(void)
|
|
{
|
|
struct cpu_thread *c;
|
|
|
|
prlog(PR_INFO, "SLW: Configuring self-restore for NCU_SPEC_BAR\n");
|
|
for_each_present_cpu(c) {
|
|
if(cpu_is_thread0(c)) {
|
|
struct proc_chip *chip = get_chip(c->chip_id);
|
|
struct xive *x = chip->xive;
|
|
uint64_t xa, val, rc;
|
|
xa = XSCOM_ADDR_P10_NCU(pir_to_core_id(c->pir), P10_NCU_SPEC_BAR);
|
|
val = (uint64_t)x->tm_base | P10_NCU_SPEC_BAR_ENABLE;
|
|
/* Bail out if wakeup engine has already failed */
|
|
if (wakeup_engine_state != WAKEUP_ENGINE_PRESENT) {
|
|
prlog(PR_ERR, "XIVE proc_stop_api fail detected\n");
|
|
break;
|
|
}
|
|
rc = proc_stop_save_scom((void *)chip->homer_base, xa, val,
|
|
PROC_STOP_SCOM_REPLACE, PROC_STOP_SECTION_L3);
|
|
if (rc) {
|
|
xive_cpu_err(c, "proc_stop_save_scom failed for NCU_SPEC_BAR rc=%lld\n",
|
|
rc);
|
|
wakeup_engine_state = WAKEUP_ENGINE_FAILED;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void xive_provision_cpu(struct xive_cpu_state *xs, struct cpu_thread *c)
|
|
{
|
|
struct xive *x;
|
|
|
|
/* VP ids for HW threads are pre-allocated */
|
|
xs->vp_blk = PIR2VP_BLK(c->pir);
|
|
xs->vp_idx = PIR2VP_IDX(c->pir);
|
|
|
|
/* For now we use identical block IDs for VC and PC but that might
|
|
* change. We allocate the ENDs on the same XIVE as the VP.
|
|
*/
|
|
xs->end_blk = xs->vp_blk;
|
|
|
|
/* Grab the XIVE where the END resides. It could be different from
|
|
* the local chip XIVE if not using block group mode
|
|
*/
|
|
x = xive_from_vc_blk(xs->end_blk);
|
|
assert(x);
|
|
|
|
/* Allocate a set of ENDs for that VP */
|
|
xs->end_idx = xive_alloc_end_set(x, true);
|
|
assert(!XIVE_ALLOC_IS_ERR(xs->end_idx));
|
|
}
|
|
|
|
static void xive_init_cpu(struct cpu_thread *c)
|
|
{
|
|
struct proc_chip *chip = get_chip(c->chip_id);
|
|
struct xive *x = chip->xive;
|
|
struct xive_cpu_state *xs;
|
|
|
|
if (!x)
|
|
return;
|
|
|
|
/*
|
|
* Each core pair (EX) needs this special BAR setup to have the
|
|
* right powerbus cycle for the TM area (as it has the same address
|
|
* on all chips so it's somewhat special).
|
|
*
|
|
* Because we don't want to bother trying to figure out which core
|
|
* of a pair is present we just do the setup for each of them, which
|
|
* is harmless.
|
|
*/
|
|
if (cpu_is_thread0(c) || cpu_is_core_chiplet_primary(c))
|
|
xive_configure_ex_special_bar(x, c);
|
|
|
|
/* Initialize the state structure */
|
|
c->xstate = xs = local_alloc(c->chip_id, sizeof(struct xive_cpu_state), 1);
|
|
assert(xs);
|
|
memset(xs, 0, sizeof(struct xive_cpu_state));
|
|
xs->xive = x;
|
|
|
|
init_lock(&xs->lock);
|
|
|
|
/* Shortcut to TM HV ring */
|
|
xs->tm_ring1 = x->tm_base + (1u << x->tm_shift);
|
|
|
|
/* Provision a VP id and some ENDs for a HW thread */
|
|
xive_provision_cpu(xs, c);
|
|
|
|
xive_init_cpu_exploitation(xs);
|
|
}
|
|
|
|
static uint64_t xive_convert_irq_flags(uint64_t iflags)
|
|
{
|
|
uint64_t oflags = 0;
|
|
|
|
if (iflags & XIVE_SRC_STORE_EOI)
|
|
oflags |= OPAL_XIVE_IRQ_STORE_EOI2;
|
|
|
|
/* OPAL_XIVE_IRQ_TRIGGER_PAGE is only meant to be set if
|
|
* the interrupt has a *separate* trigger page.
|
|
*/
|
|
if ((iflags & XIVE_SRC_EOI_PAGE1) &&
|
|
(iflags & XIVE_SRC_TRIGGER_PAGE))
|
|
oflags |= OPAL_XIVE_IRQ_TRIGGER_PAGE;
|
|
|
|
if (iflags & XIVE_SRC_LSI)
|
|
oflags |= OPAL_XIVE_IRQ_LSI;
|
|
|
|
return oflags;
|
|
}
|
|
|
|
static int64_t opal_xive_get_irq_info(uint32_t girq,
|
|
beint64_t *out_flags,
|
|
beint64_t *out_eoi_page,
|
|
beint64_t *out_trig_page,
|
|
beint32_t *out_esb_shift,
|
|
beint32_t *out_src_chip)
|
|
{
|
|
struct irq_source *is = irq_find_source(girq);
|
|
struct xive_src *s = container_of(is, struct xive_src, is);
|
|
uint32_t idx;
|
|
uint64_t mm_base;
|
|
uint64_t eoi_page = 0, trig_page = 0;
|
|
|
|
if (xive_mode != XIVE_MODE_EXPL)
|
|
return OPAL_WRONG_STATE;
|
|
if (is == NULL || out_flags == NULL)
|
|
return OPAL_PARAMETER;
|
|
assert(is->ops == &xive_irq_source_ops);
|
|
|
|
if (out_flags)
|
|
*out_flags = cpu_to_be64(xive_convert_irq_flags(s->flags));
|
|
|
|
idx = girq - s->esb_base;
|
|
|
|
if (out_esb_shift)
|
|
*out_esb_shift = cpu_to_be32(s->esb_shift);
|
|
|
|
mm_base = (uint64_t)s->esb_mmio + (1ull << s->esb_shift) * idx;
|
|
|
|
/* The EOI page can either be the first or second page */
|
|
if (s->flags & XIVE_SRC_EOI_PAGE1) {
|
|
uint64_t p1off = 1ull << (s->esb_shift - 1);
|
|
eoi_page = mm_base + p1off;
|
|
} else
|
|
eoi_page = mm_base;
|
|
|
|
/* The trigger page, if it exists, is always the first page */
|
|
if (s->flags & XIVE_SRC_TRIGGER_PAGE)
|
|
trig_page = mm_base;
|
|
|
|
if (out_eoi_page)
|
|
*out_eoi_page = cpu_to_be64(eoi_page);
|
|
if (out_trig_page)
|
|
*out_trig_page = cpu_to_be64(trig_page);
|
|
if (out_src_chip)
|
|
*out_src_chip = cpu_to_be32(GIRQ_TO_CHIP(girq));
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int64_t opal_xive_get_irq_config(uint32_t girq,
|
|
beint64_t *out_vp,
|
|
uint8_t *out_prio,
|
|
beint32_t *out_lirq)
|
|
{
|
|
uint32_t vp;
|
|
uint32_t lirq;
|
|
uint8_t prio;
|
|
|
|
if (xive_mode != XIVE_MODE_EXPL)
|
|
return OPAL_WRONG_STATE;
|
|
|
|
if (xive_get_irq_targetting(girq, &vp, &prio, &lirq)) {
|
|
*out_vp = cpu_to_be64(vp);
|
|
*out_prio = prio;
|
|
*out_lirq = cpu_to_be32(lirq);
|
|
return OPAL_SUCCESS;
|
|
} else
|
|
return OPAL_PARAMETER;
|
|
}
|
|
|
|
static int64_t opal_xive_set_irq_config(uint32_t girq,
|
|
uint64_t vp,
|
|
uint8_t prio,
|
|
uint32_t lirq)
|
|
{
|
|
/*
|
|
* This variant is meant for a XIVE-aware OS, thus it will
|
|
* *not* affect the ESB state of the interrupt. If used with
|
|
* a prio of FF, the EAS will be masked. In that case the
|
|
* races have to be handled by the OS.
|
|
*/
|
|
if (xive_mode != XIVE_MODE_EXPL)
|
|
return OPAL_WRONG_STATE;
|
|
|
|
return xive_set_irq_config(girq, vp, prio, lirq, false);
|
|
}
|
|
|
|
static int64_t opal_xive_get_queue_info(uint64_t vp, uint32_t prio,
|
|
beint64_t *out_qpage,
|
|
beint64_t *out_qsize,
|
|
beint64_t *out_qeoi_page,
|
|
beint32_t *out_escalate_irq,
|
|
beint64_t *out_qflags)
|
|
{
|
|
uint32_t blk, idx;
|
|
struct xive *x;
|
|
struct xive_end *end;
|
|
|
|
if (xive_mode != XIVE_MODE_EXPL)
|
|
return OPAL_WRONG_STATE;
|
|
|
|
if (!xive_end_for_target(vp, prio, &blk, &idx))
|
|
return OPAL_PARAMETER;
|
|
|
|
x = xive_from_vc_blk(blk);
|
|
if (!x)
|
|
return OPAL_PARAMETER;
|
|
|
|
end = xive_get_end(x, idx);
|
|
if (!end)
|
|
return OPAL_PARAMETER;
|
|
|
|
if (out_escalate_irq) {
|
|
uint32_t esc_idx = idx;
|
|
|
|
/* If escalations are routed to a single queue, fix up
|
|
* the escalation interrupt number here.
|
|
*/
|
|
if (xive_get_field32(END_W0_UNCOND_ESCALATE, end->w0))
|
|
esc_idx |= xive_escalation_prio(x);
|
|
*out_escalate_irq =
|
|
cpu_to_be32(MAKE_ESCALATION_GIRQ(blk, esc_idx));
|
|
}
|
|
|
|
/* If this is a single-escalation gather queue, that's all
|
|
* there is to return
|
|
*/
|
|
if (xive_get_field32(END_W0_SILENT_ESCALATE, end->w0)) {
|
|
if (out_qflags)
|
|
*out_qflags = 0;
|
|
if (out_qpage)
|
|
*out_qpage = 0;
|
|
if (out_qsize)
|
|
*out_qsize = 0;
|
|
if (out_qeoi_page)
|
|
*out_qeoi_page = 0;
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
if (out_qpage) {
|
|
if (xive_get_field32(END_W0_ENQUEUE, end->w0))
|
|
*out_qpage = cpu_to_be64(
|
|
((uint64_t)xive_get_field32(END_W2_EQ_ADDR_HI, end->w2) << 32) |
|
|
xive_get_field32(END_W3_EQ_ADDR_LO, end->w3));
|
|
else
|
|
*out_qpage = 0;
|
|
}
|
|
if (out_qsize) {
|
|
if (xive_get_field32(END_W0_ENQUEUE, end->w0))
|
|
*out_qsize = cpu_to_be64(xive_get_field32(END_W3_QSIZE, end->w3) + 12);
|
|
else
|
|
*out_qsize = 0;
|
|
}
|
|
if (out_qeoi_page) {
|
|
*out_qeoi_page = cpu_to_be64(
|
|
(uint64_t)x->end_base + idx * XIVE_ESB_PAGE_SIZE);
|
|
}
|
|
if (out_qflags) {
|
|
*out_qflags = 0;
|
|
if (xive_get_field32(END_W0_VALID, end->w0))
|
|
*out_qflags |= cpu_to_be64(OPAL_XIVE_EQ_ENABLED);
|
|
if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0))
|
|
*out_qflags |= cpu_to_be64(OPAL_XIVE_EQ_ALWAYS_NOTIFY);
|
|
if (xive_get_field32(END_W0_ESCALATE_CTL, end->w0))
|
|
*out_qflags |= cpu_to_be64(OPAL_XIVE_EQ_ESCALATE);
|
|
}
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static void xive_cleanup_end(struct xive_end *end)
|
|
{
|
|
end->w0 = xive_set_field32(END_W0_FIRMWARE1, 0, xive_end_is_firmware1(end));
|
|
end->w1 = xive_set_field32(END_W1_ESe_Q, 0, 1) |
|
|
xive_set_field32(END_W1_ESn_Q, 0, 1);
|
|
end->w2 = end->w3 = end->w4 = end->w5 = end->w6 = end->w7 = 0;
|
|
}
|
|
|
|
static int64_t opal_xive_set_queue_info(uint64_t vp, uint32_t prio,
|
|
uint64_t qpage,
|
|
uint64_t qsize,
|
|
uint64_t qflags)
|
|
{
|
|
uint32_t blk, idx;
|
|
struct xive *x;
|
|
struct xive_end *old_end;
|
|
struct xive_end end;
|
|
uint32_t vp_blk, vp_idx;
|
|
bool group;
|
|
int64_t rc;
|
|
|
|
if (!xive_end_for_target(vp, prio, &blk, &idx))
|
|
return OPAL_PARAMETER;
|
|
|
|
x = xive_from_vc_blk(blk);
|
|
if (!x)
|
|
return OPAL_PARAMETER;
|
|
|
|
old_end = xive_get_end(x, idx);
|
|
if (!old_end)
|
|
return OPAL_PARAMETER;
|
|
|
|
/* If this is a silent escalation queue, it cannot be
|
|
* configured directly
|
|
*/
|
|
if (xive_get_field32(END_W0_SILENT_ESCALATE, old_end->w0))
|
|
return OPAL_PARAMETER;
|
|
|
|
/* This shouldn't fail or xive_end_for_target would have
|
|
* failed already
|
|
*/
|
|
if (!xive_decode_vp(vp, &vp_blk, &vp_idx, NULL, &group))
|
|
return OPAL_PARAMETER;
|
|
|
|
/*
|
|
* Make a local copy which we will later try to commit using
|
|
* the cache watch facility
|
|
*/
|
|
end = *old_end;
|
|
|
|
if (qflags & OPAL_XIVE_EQ_ENABLED) {
|
|
switch(qsize) {
|
|
/* Supported sizes */
|
|
case 12:
|
|
case 16:
|
|
case 21:
|
|
case 24:
|
|
end.w3 = cpu_to_be32(qpage & END_W3_EQ_ADDR_LO);
|
|
end.w2 = cpu_to_be32((qpage >> 32) & END_W2_EQ_ADDR_HI);
|
|
end.w3 = xive_set_field32(END_W3_QSIZE, end.w3, qsize - 12);
|
|
end.w0 = xive_set_field32(END_W0_ENQUEUE, end.w0, 1);
|
|
break;
|
|
case 0:
|
|
end.w2 = end.w3 = 0;
|
|
end.w0 = xive_set_field32(END_W0_ENQUEUE, end.w0, 0);
|
|
break;
|
|
default:
|
|
return OPAL_PARAMETER;
|
|
}
|
|
|
|
/* Ensure the priority and target are correctly set (they will
|
|
* not be right after allocation
|
|
*/
|
|
end.w6 = xive_set_field32(END_W6_VP_BLOCK, 0, vp_blk) |
|
|
xive_set_field32(END_W6_VP_OFFSET, 0, vp_idx);
|
|
end.w7 = xive_set_field32(END_W7_F0_PRIORITY, 0, prio);
|
|
/* XXX Handle group i bit when needed */
|
|
|
|
/* Always notify flag */
|
|
if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
|
|
end.w0 = xive_set_field32(END_W0_UCOND_NOTIFY, end.w0, 1);
|
|
else
|
|
end.w0 = xive_set_field32(END_W0_UCOND_NOTIFY, end.w0, 0);
|
|
|
|
/* Escalation flag */
|
|
if (qflags & OPAL_XIVE_EQ_ESCALATE)
|
|
end.w0 = xive_set_field32(END_W0_ESCALATE_CTL, end.w0, 1);
|
|
else
|
|
end.w0 = xive_set_field32(END_W0_ESCALATE_CTL, end.w0, 0);
|
|
|
|
/* Unconditionally clear the current queue pointer, set
|
|
* generation to 1 and disable escalation interrupts.
|
|
*/
|
|
end.w1 = xive_set_field32(END_W1_GENERATION, 0, 1) |
|
|
xive_set_field32(END_W1_ES, 0, xive_get_field32(END_W1_ES, old_end->w1));
|
|
|
|
/* Enable. We always enable backlog for an enabled queue
|
|
* otherwise escalations won't work.
|
|
*/
|
|
end.w0 = xive_set_field32(END_W0_VALID, end.w0, 1);
|
|
end.w0 = xive_set_field32(END_W0_BACKLOG, end.w0, 1);
|
|
} else
|
|
xive_cleanup_end(&end);
|
|
|
|
/* Update END, non-synchronous */
|
|
lock(&x->lock);
|
|
rc = xive_endc_cache_update(x, blk, idx, &end, false);
|
|
unlock(&x->lock);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int64_t opal_xive_get_queue_state(uint64_t vp, uint32_t prio,
|
|
beint32_t *out_qtoggle,
|
|
beint32_t *out_qindex)
|
|
{
|
|
uint32_t blk, idx;
|
|
struct xive *x;
|
|
struct xive_end *end;
|
|
int64_t rc;
|
|
|
|
if (xive_mode != XIVE_MODE_EXPL)
|
|
return OPAL_WRONG_STATE;
|
|
|
|
if (!out_qtoggle || !out_qindex ||
|
|
!xive_end_for_target(vp, prio, &blk, &idx))
|
|
return OPAL_PARAMETER;
|
|
|
|
x = xive_from_vc_blk(blk);
|
|
if (!x)
|
|
return OPAL_PARAMETER;
|
|
|
|
end = xive_get_end(x, idx);
|
|
if (!end)
|
|
return OPAL_PARAMETER;
|
|
|
|
/* Scrub the queue */
|
|
lock(&x->lock);
|
|
rc = xive_endc_scrub(x, blk, idx);
|
|
unlock(&x->lock);
|
|
if (rc)
|
|
return rc;
|
|
|
|
/* We don't do disable queues */
|
|
if (!xive_get_field32(END_W0_VALID, end->w0))
|
|
return OPAL_WRONG_STATE;
|
|
|
|
*out_qtoggle = cpu_to_be32(xive_get_field32(END_W1_GENERATION, end->w1));
|
|
*out_qindex = cpu_to_be32(xive_get_field32(END_W1_PAGE_OFF, end->w1));
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio,
|
|
uint32_t qtoggle, uint32_t qindex)
|
|
{
|
|
uint32_t blk, idx;
|
|
struct xive *x;
|
|
struct xive_end *end, new_end;
|
|
int64_t rc;
|
|
|
|
if (xive_mode != XIVE_MODE_EXPL)
|
|
return OPAL_WRONG_STATE;
|
|
|
|
if (!xive_end_for_target(vp, prio, &blk, &idx))
|
|
return OPAL_PARAMETER;
|
|
|
|
x = xive_from_vc_blk(blk);
|
|
if (!x)
|
|
return OPAL_PARAMETER;
|
|
|
|
end = xive_get_end(x, idx);
|
|
if (!end)
|
|
return OPAL_PARAMETER;
|
|
|
|
/* We don't do disable queues */
|
|
if (!xive_get_field32(END_W0_VALID, end->w0))
|
|
return OPAL_WRONG_STATE;
|
|
|
|
new_end = *end;
|
|
|
|
new_end.w1 = xive_set_field32(END_W1_GENERATION, new_end.w1, qtoggle);
|
|
new_end.w1 = xive_set_field32(END_W1_PAGE_OFF, new_end.w1, qindex);
|
|
|
|
lock(&x->lock);
|
|
rc = xive_endc_cache_update(x, blk, idx, &new_end, false);
|
|
unlock(&x->lock);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int64_t opal_xive_donate_page(uint32_t chip_id, uint64_t addr)
|
|
{
|
|
struct proc_chip *c = get_chip(chip_id);
|
|
struct list_node *n;
|
|
|
|
if (xive_mode != XIVE_MODE_EXPL)
|
|
return OPAL_WRONG_STATE;
|
|
if (!c)
|
|
return OPAL_PARAMETER;
|
|
if (!c->xive)
|
|
return OPAL_PARAMETER;
|
|
if (addr & 0xffff)
|
|
return OPAL_PARAMETER;
|
|
|
|
n = (struct list_node *)addr;
|
|
lock(&c->xive->lock);
|
|
list_add(&c->xive->donated_pages, n);
|
|
unlock(&c->xive->lock);
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int64_t opal_xive_get_vp_info(uint64_t vp_id,
|
|
beint64_t *out_flags,
|
|
beint64_t *out_cam_value,
|
|
beint64_t *out_report_cl_pair,
|
|
beint32_t *out_chip_id)
|
|
{
|
|
struct xive *x;
|
|
struct xive_nvp *vp;
|
|
uint32_t blk, idx;
|
|
bool group;
|
|
|
|
if (!xive_decode_vp(vp_id, &blk, &idx, NULL, &group))
|
|
return OPAL_PARAMETER;
|
|
/* We don't do groups yet */
|
|
if (group)
|
|
return OPAL_PARAMETER;
|
|
x = xive_from_pc_blk(blk);
|
|
if (!x)
|
|
return OPAL_PARAMETER;
|
|
vp = xive_get_vp(x, idx);
|
|
if (!vp)
|
|
return OPAL_PARAMETER;
|
|
|
|
if (out_flags) {
|
|
uint32_t end_blk, end_idx;
|
|
struct xive_end *end;
|
|
struct xive *end_x;
|
|
*out_flags = 0;
|
|
|
|
/*
|
|
* We would like to a way to stash a SW bit in the VP
|
|
* to know whether silent escalation is enabled or
|
|
* not, but unlike what happens with ENDs, the PC
|
|
* cache watch doesn't implement the reserved bit in
|
|
* the VPs... so we have to go look at END 7 instead.
|
|
*/
|
|
|
|
/* Grab END for prio 7 to check for silent escalation */
|
|
if (!xive_end_for_target(vp_id, xive_escalation_prio(x),
|
|
&end_blk, &end_idx))
|
|
return OPAL_PARAMETER;
|
|
|
|
end_x = xive_from_vc_blk(end_blk);
|
|
if (!end_x)
|
|
return OPAL_PARAMETER;
|
|
|
|
end = xive_get_end(x, end_idx);
|
|
if (!end)
|
|
return OPAL_PARAMETER;
|
|
if (xive_get_field32(NVP_W0_VALID, vp->w0))
|
|
*out_flags |= cpu_to_be64(OPAL_XIVE_VP_ENABLED);
|
|
if (xive_cfg_save_restore(x))
|
|
*out_flags |= cpu_to_be64(OPAL_XIVE_VP_SAVE_RESTORE);
|
|
if (xive_get_field32(END_W0_SILENT_ESCALATE, end->w0))
|
|
*out_flags |= cpu_to_be64(OPAL_XIVE_VP_SINGLE_ESCALATION);
|
|
}
|
|
|
|
if (out_cam_value) {
|
|
uint64_t cam_value;
|
|
|
|
cam_value = (blk << x->vp_shift) | idx;
|
|
|
|
/*
|
|
* If save-restore is enabled, force the CAM line
|
|
* value with the H bit.
|
|
*/
|
|
if (xive_cfg_save_restore(x))
|
|
cam_value |= TM10_QW1W2_HO;
|
|
|
|
*out_cam_value = cpu_to_be64(cam_value);
|
|
}
|
|
|
|
if (out_report_cl_pair) {
|
|
uint64_t report_cl_pair;
|
|
|
|
report_cl_pair = ((uint64_t)(be32_to_cpu(vp->w6) & 0x0fffffff)) << 32;
|
|
report_cl_pair |= be32_to_cpu(vp->w7) & 0xffffff00;
|
|
|
|
*out_report_cl_pair = cpu_to_be64(report_cl_pair);
|
|
}
|
|
|
|
if (out_chip_id)
|
|
*out_chip_id = cpu_to_be32(xive_block_to_chip[blk]);
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int64_t xive_setup_silent_gather(uint64_t vp_id, bool enable)
|
|
{
|
|
uint32_t blk, idx, i;
|
|
struct xive_end *end_orig;
|
|
struct xive_end end;
|
|
struct xive *x;
|
|
int64_t rc;
|
|
|
|
/* Get base END block */
|
|
if (!xive_end_for_target(vp_id, 0, &blk, &idx)) {
|
|
prlog(PR_ERR, "%s: Invalid VP 0x%08llx\n", __func__, vp_id);
|
|
return OPAL_PARAMETER;
|
|
}
|
|
x = xive_from_vc_blk(blk);
|
|
if (!x) {
|
|
prlog(PR_ERR, "%s: VP 0x%08llx has invalid block %d\n", __func__,
|
|
vp_id, blk);
|
|
return OPAL_PARAMETER;
|
|
}
|
|
|
|
/* Grab prio 7 */
|
|
end_orig = xive_get_end(x, idx + xive_escalation_prio(x));
|
|
if (!end_orig) {
|
|
xive_err(x, "Failed to get silent gather END 0x%x for VP 0x%08llx\n",
|
|
idx + xive_escalation_prio(x), vp_id);
|
|
return OPAL_PARAMETER;
|
|
}
|
|
|
|
/* If trying to enable silent gather, make sure prio 7 is not
|
|
* already enabled as a normal queue
|
|
*/
|
|
if (enable && xive_get_field32(END_W0_VALID, end_orig->w0) &&
|
|
!xive_get_field32(END_W0_SILENT_ESCALATE, end_orig->w0)) {
|
|
xive_err(x, "silent gather END 0x%x already in use\n",
|
|
idx + xive_escalation_prio(x));
|
|
return OPAL_PARAMETER;
|
|
}
|
|
|
|
end = *end_orig;
|
|
|
|
if (enable) {
|
|
/* W0: Enabled and "s" set, no other bit */
|
|
end.w0 = xive_set_field32(END_W0_FIRMWARE1, end.w0, 0);
|
|
end.w0 = xive_set_field32(END_W0_VALID, end.w0, 1);
|
|
end.w0 = xive_set_field32(END_W0_SILENT_ESCALATE, end.w0, 1);
|
|
end.w0 = xive_set_field32(END_W0_ESCALATE_CTL, end.w0, 1);
|
|
end.w0 = xive_set_field32(END_W0_BACKLOG, end.w0, 1);
|
|
|
|
/* Set new "N" for END escalation (vs. ESB) */
|
|
end.w0 = xive_set_field32(END_W0_ESCALATE_END, end.w0, 1);
|
|
|
|
/* W1: Mark ESn as 01, ESe as 00 */
|
|
end.w1 = xive_set_field32(END_W1_ESn_P, end.w1, 0);
|
|
end.w1 = xive_set_field32(END_W1_ESn_Q, end.w1, 1);
|
|
end.w1 = xive_set_field32(END_W1_ESe, end.w1, 0);
|
|
} else if (xive_get_field32(END_W0_SILENT_ESCALATE, end.w0))
|
|
xive_cleanup_end(&end);
|
|
|
|
if (!memcmp(end_orig, &end, sizeof(end)))
|
|
rc = 0;
|
|
else
|
|
rc = xive_endc_cache_update(x, blk, idx + xive_escalation_prio(x),
|
|
&end, false);
|
|
if (rc)
|
|
return rc;
|
|
|
|
/* Mark/unmark all other prios with the new "u" bit and update
|
|
* escalation
|
|
*/
|
|
for (i = 0; i < xive_cfg_vp_prio(x); i++) {
|
|
if (i == xive_escalation_prio(x))
|
|
continue;
|
|
end_orig = xive_get_end(x, idx + i);
|
|
if (!end_orig)
|
|
continue;
|
|
end = *end_orig;
|
|
if (enable) {
|
|
/* Set "u" bit */
|
|
end.w0 = xive_set_field32(END_W0_UNCOND_ESCALATE, end.w0, 1);
|
|
|
|
/* Set new "N" for END escalation (vs. ESB) */
|
|
/* TODO (Gen2+) : use ESB escalation configuration */
|
|
end.w0 = xive_set_field32(END_W0_ESCALATE_END, end.w0, 1);
|
|
|
|
/* Re-route escalation interrupt (previous
|
|
* route is lost !) to the gather queue
|
|
*/
|
|
end.w4 = xive_set_field32(END_W4_END_BLOCK, end.w4, blk);
|
|
end.w4 = xive_set_field32(END_W4_ESC_END_INDEX,
|
|
end.w4, idx + xive_escalation_prio(x));
|
|
} else if (xive_get_field32(END_W0_UNCOND_ESCALATE, end.w0)) {
|
|
/* Clear the "u" bit, disable escalations if it was set */
|
|
end.w0 = xive_set_field32(END_W0_UNCOND_ESCALATE, end.w0, 0);
|
|
end.w0 = xive_set_field32(END_W0_ESCALATE_CTL, end.w0, 0);
|
|
}
|
|
if (!memcmp(end_orig, &end, sizeof(end)))
|
|
continue;
|
|
rc = xive_endc_cache_update(x, blk, idx + i, &end, false);
|
|
if (rc)
|
|
break;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int64_t opal_xive_set_vp_info(uint64_t vp_id,
|
|
uint64_t flags,
|
|
uint64_t report_cl_pair)
|
|
{
|
|
struct xive *x;
|
|
struct xive_nvp *vp, vp_new;
|
|
uint32_t blk, idx;
|
|
bool group;
|
|
int64_t rc;
|
|
|
|
if (!xive_decode_vp(vp_id, &blk, &idx, NULL, &group))
|
|
return OPAL_PARAMETER;
|
|
/* We don't do groups yet */
|
|
if (group)
|
|
return OPAL_PARAMETER;
|
|
if (report_cl_pair & 0xff)
|
|
return OPAL_PARAMETER;
|
|
x = xive_from_pc_blk(blk);
|
|
if (!x)
|
|
return OPAL_PARAMETER;
|
|
vp = xive_get_vp(x, idx);
|
|
if (!vp)
|
|
return OPAL_PARAMETER;
|
|
|
|
/* Consistency check. */
|
|
if ((flags & OPAL_XIVE_VP_SAVE_RESTORE) && !xive_cfg_save_restore(x))
|
|
return OPAL_PARAMETER;
|
|
|
|
lock(&x->lock);
|
|
|
|
vp_new = *vp;
|
|
if (flags & OPAL_XIVE_VP_ENABLED) {
|
|
vp_new.w0 = xive_set_field32(NVP_W0_VALID, vp_new.w0, 1);
|
|
vp_new.w6 = cpu_to_be32(report_cl_pair >> 32);
|
|
vp_new.w7 = cpu_to_be32(report_cl_pair & 0xffffffff);
|
|
|
|
if (flags & OPAL_XIVE_VP_SINGLE_ESCALATION)
|
|
rc = xive_setup_silent_gather(vp_id, true);
|
|
else
|
|
rc = xive_setup_silent_gather(vp_id, false);
|
|
|
|
/*
|
|
* Prepare NVP to be HW owned for automatic save-restore
|
|
*/
|
|
if (xive_cfg_save_restore(x)) {
|
|
/*
|
|
* Set NVP privilege level. Default to OS.
|
|
* This check only makes sense for KVM guests
|
|
* currently. We would need an extra flag to
|
|
* distinguish from pool level.
|
|
*/
|
|
vp_new.w0 = xive_set_field32(NVP_W0_VPRIV, vp_new.w0, 0);
|
|
|
|
vp_new.w2 = xive_set_field32(NVP_W2_CPPR, vp_new.w2, 0xFF);
|
|
vp_new.w0 = xive_set_field32(NVP_W0_HW, vp_new.w0, 1);
|
|
}
|
|
} else {
|
|
/*
|
|
* TODO (kvm): disabling a VP invalidates the associated ENDs.
|
|
*
|
|
* The loads then return all 1s which can be an issue for the
|
|
* Linux code to handle.
|
|
*/
|
|
|
|
vp_new.w0 = vp_new.w6 = vp_new.w7 = 0;
|
|
rc = xive_setup_silent_gather(vp_id, false);
|
|
}
|
|
|
|
if (rc) {
|
|
if (rc != OPAL_BUSY)
|
|
xive_dbg(x, "Silent gather setup failed with err %lld\n", rc);
|
|
goto bail;
|
|
}
|
|
|
|
rc = xive_nxc_cache_update(x, blk, idx, &vp_new, false);
|
|
if (rc)
|
|
goto bail;
|
|
|
|
/* When disabling, we scrub clean (invalidate the entry) so
|
|
* we can avoid cache ops in alloc/free
|
|
*/
|
|
if (!(flags & OPAL_XIVE_VP_ENABLED))
|
|
xive_nxc_scrub_clean(x, blk, idx);
|
|
|
|
bail:
|
|
unlock(&x->lock);
|
|
return rc;
|
|
}
|
|
|
|
static int64_t opal_xive_get_vp_state(uint64_t vp_id, beint64_t *out_state)
|
|
{
|
|
struct xive *x;
|
|
struct xive_nvp *vp;
|
|
uint32_t blk, idx;
|
|
int64_t rc;
|
|
bool group;
|
|
|
|
if (!out_state || !xive_decode_vp(vp_id, &blk, &idx, NULL, &group))
|
|
return OPAL_PARAMETER;
|
|
if (group)
|
|
return OPAL_PARAMETER;
|
|
x = xive_from_pc_blk(blk);
|
|
if (!x)
|
|
return OPAL_PARAMETER;
|
|
vp = xive_get_vp(x, idx);
|
|
if (!vp)
|
|
return OPAL_PARAMETER;
|
|
|
|
/* Scrub the vp */
|
|
lock(&x->lock);
|
|
rc = xive_nxc_scrub(x, blk, idx);
|
|
unlock(&x->lock);
|
|
if (rc)
|
|
return rc;
|
|
|
|
if (!xive_get_field32(NVP_W0_VALID, vp->w0))
|
|
return OPAL_WRONG_STATE;
|
|
|
|
/*
|
|
* return a state matching the layout of WORD 0-1 of the TIMA
|
|
* as this is expected by current implementation.
|
|
*/
|
|
*out_state = cpu_to_be64(((uint64_t) 0x0) << 54 |
|
|
(uint64_t)xive_get_field32(NVP_W2_CPPR, vp->w2) << 48 |
|
|
(uint64_t)xive_get_field32(NVP_W2_IPB, vp->w2) << 40 |
|
|
(uint64_t)xive_get_field32(NVP_W2_LSMFB, vp->w2) << 32);
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static void *xive_cpu_get_tima(struct cpu_thread *c)
|
|
{
|
|
struct xive_cpu_state *xs = c->xstate;
|
|
struct xive *x = xs->xive;
|
|
|
|
return x->ic_tm_direct_base + ((c->pir & 0xff) << x->ic_shift);
|
|
}
|
|
|
|
static void xive_cleanup_cpu_tima(struct cpu_thread *c)
|
|
{
|
|
struct xive_cpu_state *xs __unused = c->xstate;
|
|
void *cpu_tm_base = xive_cpu_get_tima(c);
|
|
uint8_t old_w2 __unused, w2 __unused;
|
|
|
|
/* Reset the HW context */
|
|
xive_reset_enable_thread(c);
|
|
|
|
/* Set VT to 1 */
|
|
old_w2 = in_8(cpu_tm_base + TM_QW3_HV_PHYS + TM_WORD2);
|
|
out_8(cpu_tm_base + TM_QW3_HV_PHYS + TM_WORD2, 0x80);
|
|
w2 = in_8(cpu_tm_base + TM_QW3_HV_PHYS + TM_WORD2);
|
|
|
|
/* Dump HV state */
|
|
xive_cpu_vdbg(c, "[reset] VP TIMA VP=%x/%x W01=%016llx W2=%02x->%02x\n",
|
|
xs->vp_blk, xs->vp_idx,
|
|
in_be64(cpu_tm_base + TM_QW3_HV_PHYS),
|
|
old_w2, w2);
|
|
}
|
|
|
|
static int64_t xive_vc_ind_cache_kill(struct xive *x, uint64_t type)
|
|
{
|
|
uint64_t val;
|
|
|
|
/* We clear the whole thing */
|
|
xive_regw(x, VC_AT_MACRO_KILL_MASK, 0);
|
|
xive_regw(x, VC_AT_MACRO_KILL, VC_AT_MACRO_KILL_VALID |
|
|
SETFIELD(VC_AT_MACRO_KILL_VSD, 0ull, type));
|
|
|
|
/* XXX Add timeout */
|
|
for (;;) {
|
|
val = xive_regr(x, VC_AT_MACRO_KILL);
|
|
if (!(val & VC_AT_MACRO_KILL_VALID))
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int64_t xive_pc_ind_cache_kill(struct xive *x)
|
|
{
|
|
uint64_t val;
|
|
|
|
/* We clear the whole thing */
|
|
xive_regw(x, PC_AT_KILL_MASK, 0);
|
|
xive_regw(x, PC_AT_KILL, PC_AT_KILL_VALID |
|
|
SETFIELD(VC_AT_MACRO_KILL_VSD, 0ull, VST_NVP));
|
|
|
|
/* XXX Add timeout */
|
|
for (;;) {
|
|
val = xive_regr(x, PC_AT_KILL);
|
|
if (!(val & PC_AT_KILL_VALID))
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void xive_cleanup_vp_ind(struct xive *x)
|
|
{
|
|
int i;
|
|
|
|
xive_dbg(x, "Cleaning up %d VP ind entries...\n", x->vp_ind_count);
|
|
for (i = 0; i < x->vp_ind_count; i++) {
|
|
if (be64_to_cpu(x->vp_ind_base[i]) & VSD_FIRMWARE) {
|
|
xive_dbg(x, " %04x ... skip (firmware)\n", i);
|
|
continue;
|
|
}
|
|
if (x->vp_ind_base[i] != 0) {
|
|
x->vp_ind_base[i] = 0;
|
|
xive_dbg(x, " %04x ... cleaned\n", i);
|
|
}
|
|
}
|
|
xive_pc_ind_cache_kill(x);
|
|
}
|
|
|
|
static void xive_cleanup_end_ind(struct xive *x)
|
|
{
|
|
int i;
|
|
|
|
xive_dbg(x, "Cleaning up %d END ind entries...\n", x->end_ind_count);
|
|
for (i = 0; i < x->end_ind_count; i++) {
|
|
if (be64_to_cpu(x->end_ind_base[i]) & VSD_FIRMWARE) {
|
|
xive_dbg(x, " %04x ... skip (firmware)\n", i);
|
|
continue;
|
|
}
|
|
if (x->end_ind_base[i] != 0) {
|
|
x->end_ind_base[i] = 0;
|
|
xive_dbg(x, " %04x ... cleaned\n", i);
|
|
}
|
|
}
|
|
xive_vc_ind_cache_kill(x, VST_END);
|
|
}
|
|
|
|
static void xive_reset_one(struct xive *x)
|
|
{
|
|
struct cpu_thread *c;
|
|
bool end_firmware;
|
|
int i;
|
|
|
|
xive_notice(x, "Resetting one xive...\n");
|
|
|
|
lock(&x->lock);
|
|
|
|
/* Check all interrupts are disabled */
|
|
i = bitmap_find_one_bit(*x->int_enabled_map, 0, XIVE_INT_COUNT);
|
|
if (i >= 0)
|
|
xive_warn(x, "Interrupt %d (and maybe more) not disabled"
|
|
" at reset !\n", i);
|
|
|
|
/* Reset IPI allocation */
|
|
xive_dbg(x, "freeing alloc map %p/%p\n",
|
|
x->ipi_alloc_map, *x->ipi_alloc_map);
|
|
memset(x->ipi_alloc_map, 0, BITMAP_BYTES(XIVE_INT_COUNT));
|
|
|
|
xive_dbg(x, "Resetting ENDs...\n");
|
|
|
|
/* Reset all allocated ENDs and free the user ones */
|
|
bitmap_for_each_one(*x->end_map, xive_end_bitmap_size(x), i) {
|
|
struct xive_end end0;
|
|
struct xive_end *end;
|
|
int j;
|
|
|
|
if (i == 0)
|
|
continue;
|
|
end_firmware = false;
|
|
for (j = 0; j < xive_cfg_vp_prio(x); j++) {
|
|
uint32_t idx = (i << xive_cfg_vp_prio_shift(x)) | j;
|
|
|
|
end = xive_get_end(x, idx);
|
|
if (!end)
|
|
continue;
|
|
|
|
/* We need to preserve the firmware bit, otherwise
|
|
* we will incorrectly free the ENDs that are reserved
|
|
* for the physical CPUs
|
|
*/
|
|
if (xive_get_field32(END_W0_VALID, end->w0)) {
|
|
if (!xive_end_is_firmware1(end))
|
|
xive_dbg(x, "END 0x%x:0x%x is valid at reset: %08x %08x\n",
|
|
x->block_id, idx, end->w0, end->w1);
|
|
end0 = *end;
|
|
xive_cleanup_end(&end0);
|
|
xive_endc_cache_update(x, x->block_id, idx, &end0, true);
|
|
}
|
|
if (xive_end_is_firmware1(end))
|
|
end_firmware = true;
|
|
}
|
|
if (!end_firmware)
|
|
bitmap_clr_bit(*x->end_map, i);
|
|
}
|
|
|
|
/* Take out all VPs from HW and reset all CPPRs to 0 */
|
|
for_each_present_cpu(c) {
|
|
if (c->chip_id != x->chip_id)
|
|
continue;
|
|
if (!c->xstate)
|
|
continue;
|
|
xive_cleanup_cpu_tima(c);
|
|
}
|
|
|
|
/* Reset all user-allocated VPs. This is inefficient, we should
|
|
* either keep a bitmap of allocated VPs or add an iterator to
|
|
* the buddy which is trickier but doable.
|
|
*/
|
|
for (i = 0; i < XIVE_VP_COUNT(x); i++) {
|
|
struct xive_nvp *vp;
|
|
struct xive_nvp vp0 = {0};
|
|
|
|
/* Ignore the physical CPU VPs */
|
|
if (i >= xive_hw_vp_count &&
|
|
i < (xive_hw_vp_base + xive_hw_vp_count))
|
|
continue;
|
|
|
|
/* Is the VP valid ? */
|
|
vp = xive_get_vp(x, i);
|
|
if (!vp || !xive_get_field32(NVP_W0_VALID, vp->w0))
|
|
continue;
|
|
|
|
/* Clear it */
|
|
xive_dbg(x, "VP 0x%x:0x%x is valid at reset\n", x->block_id, i);
|
|
xive_nxc_cache_update(x, x->block_id, i, &vp0, true);
|
|
}
|
|
|
|
/* Forget about remaining donated pages */
|
|
list_head_init(&x->donated_pages);
|
|
|
|
/* And cleanup donated indirect VP and END pages */
|
|
xive_cleanup_vp_ind(x);
|
|
xive_cleanup_end_ind(x);
|
|
|
|
/* The rest must not be called with the lock held */
|
|
unlock(&x->lock);
|
|
|
|
/* Re-configure VPs */
|
|
for_each_present_cpu(c) {
|
|
struct xive_cpu_state *xs = c->xstate;
|
|
|
|
if (c->chip_id != x->chip_id || !xs)
|
|
continue;
|
|
|
|
xive_init_cpu_exploitation(xs);
|
|
}
|
|
}
|
|
|
|
static void xive_reset_mask_source_cb(struct irq_source *is,
|
|
void *data __unused)
|
|
{
|
|
struct xive_src *s = container_of(is, struct xive_src, is);
|
|
struct xive *x;
|
|
uint32_t isn;
|
|
|
|
if (is->ops != &xive_irq_source_ops)
|
|
return;
|
|
|
|
/* Skip escalation sources */
|
|
if (GIRQ_IS_ESCALATION(is->start))
|
|
return;
|
|
|
|
x = s->xive;
|
|
|
|
/* Iterate all interrupts */
|
|
for (isn = is->start; isn < is->end; isn++) {
|
|
/* Has it ever been enabled ? */
|
|
if (!bitmap_tst_bit(*x->int_enabled_map, GIRQ_TO_IDX(isn)))
|
|
continue;
|
|
/* Mask it and clear the enabled map bit */
|
|
xive_vdbg(x, "[reset] disabling source 0x%x\n", isn);
|
|
__xive_set_irq_config(is, isn, 0, 0xff, isn, true, false);
|
|
bitmap_clr_bit(*x->int_enabled_map, GIRQ_TO_IDX(isn));
|
|
}
|
|
}
|
|
|
|
void xive2_cpu_reset(void)
|
|
{
|
|
struct cpu_thread *c = this_cpu();
|
|
struct xive_cpu_state *xs = c->xstate;
|
|
|
|
out_8(xs->tm_ring1 + TM_QW3_HV_PHYS + TM_CPPR, 0);
|
|
|
|
in_be64(xs->tm_ring1 + TM_SPC_PULL_POOL_CTX);
|
|
}
|
|
|
|
static int64_t __xive_reset(uint64_t mode)
|
|
{
|
|
struct proc_chip *chip;
|
|
|
|
xive_mode = mode;
|
|
|
|
/* Mask all interrupt sources */
|
|
irq_for_each_source(xive_reset_mask_source_cb, NULL);
|
|
|
|
/* For each XIVE do a sync... */
|
|
for_each_chip(chip) {
|
|
if (!chip->xive)
|
|
continue;
|
|
xive_sync(chip->xive);
|
|
}
|
|
|
|
/* For each XIVE reset everything else... */
|
|
for_each_chip(chip) {
|
|
if (!chip->xive)
|
|
continue;
|
|
xive_reset_one(chip->xive);
|
|
}
|
|
|
|
/* Cleanup global VP allocator */
|
|
buddy_reset(xive_vp_buddy);
|
|
|
|
/*
|
|
* We reserve the whole range of VP ids for HW threads.
|
|
*/
|
|
assert(buddy_reserve(xive_vp_buddy, xive_hw_vp_base, xive_threadid_shift));
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
/* Called by fast reboot */
|
|
int64_t xive2_reset(void)
|
|
{
|
|
if (xive_mode == XIVE_MODE_NONE)
|
|
return OPAL_SUCCESS;
|
|
return __xive_reset(XIVE_MODE_EXPL);
|
|
}
|
|
|
|
static int64_t opal_xive_reset(uint64_t mode)
|
|
{
|
|
prlog(PR_DEBUG, "XIVE reset. mode = %llx\n", mode);
|
|
|
|
if (!(mode & XIVE_MODE_EXPL)) {
|
|
prlog(PR_NOTICE, "No emulation mode. XIVE exploitation mode "
|
|
"is the default\n");
|
|
}
|
|
|
|
xive_expl_options = mode & ~XIVE_MODE_EXPL;
|
|
if (xive_expl_options & ~XIVE_EXPL_ALL_OPTIONS) {
|
|
prerror("invalid XIVE exploitation mode option %016llx\n",
|
|
xive_expl_options);
|
|
return OPAL_PARAMETER;
|
|
}
|
|
|
|
return __xive_reset(XIVE_MODE_EXPL);
|
|
}
|
|
|
|
static int64_t opal_xive_free_vp_block(uint64_t vp_base)
|
|
{
|
|
uint32_t blk, idx, i, j, count;
|
|
uint8_t order;
|
|
bool group;
|
|
|
|
if (xive_mode != XIVE_MODE_EXPL)
|
|
return OPAL_WRONG_STATE;
|
|
|
|
if (!xive_decode_vp(vp_base, &blk, &idx, &order, &group))
|
|
return OPAL_PARAMETER;
|
|
if (group)
|
|
return OPAL_PARAMETER;
|
|
if (blk)
|
|
return OPAL_PARAMETER;
|
|
if (order < (xive_chips_alloc_bits + 1))
|
|
return OPAL_PARAMETER;
|
|
if (idx & ((1 << (order - xive_chips_alloc_bits)) - 1))
|
|
return OPAL_PARAMETER;
|
|
|
|
count = 1 << order;
|
|
for (i = 0; i < count; i++) {
|
|
uint32_t vp_id = vp_base + i;
|
|
uint32_t blk, idx, end_blk, end_idx;
|
|
struct xive *x;
|
|
struct xive_nvp *vp;
|
|
|
|
if (!xive_decode_vp(vp_id, &blk, &idx, NULL, NULL)) {
|
|
prerror("Couldn't decode VP id %u\n", vp_id);
|
|
return OPAL_INTERNAL_ERROR;
|
|
}
|
|
x = xive_from_pc_blk(blk);
|
|
if (!x) {
|
|
prerror("Instance not found for deallocated VP"
|
|
" block %d\n", blk);
|
|
return OPAL_INTERNAL_ERROR;
|
|
}
|
|
vp = xive_get_vp(x, idx);
|
|
if (!vp) {
|
|
prerror("VP not found for deallocation !");
|
|
return OPAL_INTERNAL_ERROR;
|
|
}
|
|
|
|
/* VP must be disabled */
|
|
if (xive_get_field32(NVP_W0_VALID, vp->w0)) {
|
|
prlog(PR_ERR, "freeing active VP %d\n", vp_id);
|
|
return OPAL_XIVE_FREE_ACTIVE;
|
|
}
|
|
|
|
/* Not populated */
|
|
if (vp->w5 == 0)
|
|
continue;
|
|
|
|
end_blk = xive_get_field32(NVP_W5_VP_END_BLOCK, vp->w5);
|
|
end_idx = xive_get_field32(NVP_W5_VP_END_INDEX, vp->w5);
|
|
|
|
lock(&x->lock);
|
|
|
|
/* Ensure ENDs are disabled and cleaned up. Ideally the caller
|
|
* should have done it but we double check it here
|
|
*/
|
|
for (j = 0; j < xive_cfg_vp_prio(x); j++) {
|
|
struct xive *end_x = xive_from_vc_blk(end_blk);
|
|
struct xive_end end, *orig_end = xive_get_end(end_x, end_idx + j);
|
|
|
|
if (!xive_get_field32(END_W0_VALID, orig_end->w0))
|
|
continue;
|
|
|
|
prlog(PR_WARNING, "freeing VP %d with queue %d active\n",
|
|
vp_id, j);
|
|
end = *orig_end;
|
|
xive_cleanup_end(&end);
|
|
xive_endc_cache_update(x, end_blk, end_idx + j, &end, true);
|
|
}
|
|
|
|
/* Mark it not populated so we don't try to free it again */
|
|
vp->w5 = 0;
|
|
|
|
if (end_blk != blk) {
|
|
prerror("Block mismatch trying to free ENDs\n");
|
|
unlock(&x->lock);
|
|
return OPAL_INTERNAL_ERROR;
|
|
}
|
|
|
|
xive_free_end_set(x, end_idx);
|
|
unlock(&x->lock);
|
|
}
|
|
|
|
xive_free_vps(vp_base);
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int64_t opal_xive_alloc_vp_block(uint32_t alloc_order)
|
|
{
|
|
uint32_t vp_base, ends, count, i;
|
|
int64_t rc;
|
|
|
|
if (xive_mode != XIVE_MODE_EXPL)
|
|
return OPAL_WRONG_STATE;
|
|
|
|
prlog(PR_TRACE, "opal_xive_alloc_vp_block(%d)\n", alloc_order);
|
|
|
|
vp_base = xive_alloc_vps(alloc_order);
|
|
if (XIVE_ALLOC_IS_ERR(vp_base)) {
|
|
if (vp_base == XIVE_ALLOC_NO_IND)
|
|
return OPAL_XIVE_PROVISIONING;
|
|
return OPAL_RESOURCE;
|
|
}
|
|
|
|
/* Allocate ENDs and initialize VPs */
|
|
count = 1 << alloc_order;
|
|
for (i = 0; i < count; i++) {
|
|
uint32_t vp_id = vp_base + i;
|
|
uint32_t blk, idx;
|
|
struct xive *x;
|
|
struct xive_nvp *vp;
|
|
|
|
if (!xive_decode_vp(vp_id, &blk, &idx, NULL, NULL)) {
|
|
prerror("Couldn't decode VP id %u\n", vp_id);
|
|
return OPAL_INTERNAL_ERROR;
|
|
}
|
|
x = xive_from_pc_blk(blk);
|
|
if (!x) {
|
|
prerror("Instance not found for allocated VP"
|
|
" block %d\n", blk);
|
|
rc = OPAL_INTERNAL_ERROR;
|
|
goto fail;
|
|
}
|
|
vp = xive_get_vp(x, idx);
|
|
if (!vp) {
|
|
prerror("VP not found after allocation !");
|
|
rc = OPAL_INTERNAL_ERROR;
|
|
goto fail;
|
|
}
|
|
|
|
/* Allocate ENDs, if fails, free the VPs and return */
|
|
lock(&x->lock);
|
|
ends = xive_alloc_end_set(x, false);
|
|
unlock(&x->lock);
|
|
if (XIVE_ALLOC_IS_ERR(ends)) {
|
|
if (ends == XIVE_ALLOC_NO_IND)
|
|
rc = OPAL_XIVE_PROVISIONING;
|
|
else
|
|
rc = OPAL_RESOURCE;
|
|
goto fail;
|
|
}
|
|
|
|
/* Initialize the VP structure. We don't use a cache watch
|
|
* as we have made sure when freeing the entries to scrub
|
|
* it out of the cache.
|
|
*/
|
|
memset(vp, 0, sizeof(*vp));
|
|
|
|
/* Store the END base of the VP in W5 (new in p10) */
|
|
xive_vp_set_end_base(vp, blk, ends);
|
|
}
|
|
return vp_base;
|
|
fail:
|
|
opal_xive_free_vp_block(vp_base);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int64_t xive_try_allocate_irq(struct xive *x)
|
|
{
|
|
int idx, base_idx, max_count, girq;
|
|
struct xive_eas *eas;
|
|
|
|
lock(&x->lock);
|
|
|
|
base_idx = x->int_ipi_top - x->int_base;
|
|
max_count = x->int_hw_bot - x->int_ipi_top;
|
|
|
|
idx = bitmap_find_zero_bit(*x->ipi_alloc_map, base_idx, max_count);
|
|
if (idx < 0) {
|
|
unlock(&x->lock);
|
|
return OPAL_RESOURCE;
|
|
}
|
|
bitmap_set_bit(*x->ipi_alloc_map, idx);
|
|
girq = x->int_base + idx;
|
|
|
|
/* Mark the EAS valid. Don't bother with the HW cache, it's
|
|
* still masked anyway, the cache will be updated when unmasked
|
|
* and configured.
|
|
*/
|
|
eas = xive_get_eas(x, girq);
|
|
if (!eas) {
|
|
bitmap_clr_bit(*x->ipi_alloc_map, idx);
|
|
unlock(&x->lock);
|
|
return OPAL_PARAMETER;
|
|
}
|
|
eas->w = xive_set_field64(EAS_VALID, 0, 1) |
|
|
xive_set_field64(EAS_MASKED, 0, 1) |
|
|
xive_set_field64(EAS_END_DATA, 0, girq);
|
|
unlock(&x->lock);
|
|
|
|
return girq;
|
|
}
|
|
|
|
static int64_t opal_xive_allocate_irq(uint32_t chip_id)
|
|
{
|
|
struct proc_chip *chip;
|
|
bool try_all = false;
|
|
int64_t rc;
|
|
|
|
if (xive_mode != XIVE_MODE_EXPL)
|
|
return OPAL_WRONG_STATE;
|
|
|
|
if (chip_id == OPAL_XIVE_ANY_CHIP) {
|
|
try_all = true;
|
|
chip_id = this_cpu()->chip_id;
|
|
}
|
|
chip = get_chip(chip_id);
|
|
if (!chip)
|
|
return OPAL_PARAMETER;
|
|
|
|
/* Try initial target chip */
|
|
if (!chip->xive)
|
|
rc = OPAL_PARAMETER;
|
|
else
|
|
rc = xive_try_allocate_irq(chip->xive);
|
|
if (rc >= 0 || !try_all)
|
|
return rc;
|
|
|
|
/* Failed and we try all... do so */
|
|
for_each_chip(chip) {
|
|
if (!chip->xive)
|
|
continue;
|
|
rc = xive_try_allocate_irq(chip->xive);
|
|
if (rc >= 0)
|
|
break;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
static int64_t opal_xive_free_irq(uint32_t girq)
|
|
{
|
|
struct irq_source *is = irq_find_source(girq);
|
|
struct xive_src *s = container_of(is, struct xive_src, is);
|
|
struct xive *x = xive_from_isn(girq);
|
|
struct xive_eas *eas;
|
|
uint32_t idx;
|
|
|
|
if (xive_mode != XIVE_MODE_EXPL)
|
|
return OPAL_WRONG_STATE;
|
|
if (!x || !is)
|
|
return OPAL_PARAMETER;
|
|
|
|
idx = GIRQ_TO_IDX(girq);
|
|
|
|
lock(&x->lock);
|
|
|
|
eas = xive_get_eas(x, girq);
|
|
if (!eas) {
|
|
unlock(&x->lock);
|
|
return OPAL_PARAMETER;
|
|
}
|
|
|
|
/* Mask the interrupt source */
|
|
xive_update_irq_mask(s, girq - s->esb_base, true);
|
|
|
|
/* Mark the EAS masked and invalid */
|
|
eas->w = xive_set_field64(EAS_VALID, 0, 1) |
|
|
xive_set_field64(EAS_MASKED, 0, 1);
|
|
xive_easc_scrub(x, x->block_id, idx);
|
|
|
|
/* Free it */
|
|
if (!bitmap_tst_bit(*x->ipi_alloc_map, idx)) {
|
|
unlock(&x->lock);
|
|
return OPAL_PARAMETER;
|
|
}
|
|
bitmap_clr_bit(*x->ipi_alloc_map, idx);
|
|
bitmap_clr_bit(*x->int_enabled_map, idx);
|
|
unlock(&x->lock);
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int64_t opal_xive_dump_tm(uint32_t offset, const char *n, uint32_t pir)
|
|
{
|
|
struct cpu_thread *c = find_cpu_by_pir(pir);
|
|
struct xive_cpu_state *xs;
|
|
struct xive *x;
|
|
void *cpu_tm_base;
|
|
uint64_t v0,v1;
|
|
|
|
if (!c)
|
|
return OPAL_PARAMETER;
|
|
xs = c->xstate;
|
|
if (!xs || !xs->tm_ring1)
|
|
return OPAL_INTERNAL_ERROR;
|
|
x = xs->xive;
|
|
cpu_tm_base = xive_cpu_get_tima(c);
|
|
|
|
lock(&x->lock);
|
|
v0 = in_be64(cpu_tm_base + offset);
|
|
if (offset == TM_QW3_HV_PHYS) {
|
|
v1 = in_8(cpu_tm_base + offset + 8);
|
|
v1 <<= 56;
|
|
} else {
|
|
v1 = in_be32(cpu_tm_base + offset + 8);
|
|
v1 <<= 32;
|
|
}
|
|
prlog(PR_INFO, "CPU[%04x]: TM state for QW %s\n", pir, n);
|
|
prlog(PR_INFO, "CPU[%04x]: NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
|
|
" W2 W3\n", pir);
|
|
prlog(PR_INFO, "CPU[%04x]: %02x %02x %02x %02x %02x "
|
|
"%02x %02x %02x %08x %08x\n", pir,
|
|
(uint8_t)(v0 >> 56) & 0xff, (uint8_t)(v0 >> 48) & 0xff,
|
|
(uint8_t)(v0 >> 40) & 0xff, (uint8_t)(v0 >> 32) & 0xff,
|
|
(uint8_t)(v0 >> 24) & 0xff, (uint8_t)(v0 >> 16) & 0xff,
|
|
(uint8_t)(v0 >> 8) & 0xff, (uint8_t)(v0 ) & 0xff,
|
|
(uint32_t)(v1 >> 32) & 0xffffffff,
|
|
(uint32_t)(v1 & 0xffffffff));
|
|
unlock(&x->lock);
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int64_t opal_xive_dump_vp(uint32_t vp_id)
|
|
{
|
|
uint32_t blk, idx;
|
|
uint8_t order;
|
|
bool group;
|
|
struct xive *x;
|
|
struct xive_nvp *vp;
|
|
uint32_t *vpw;
|
|
|
|
if (!xive_decode_vp(vp_id, &blk, &idx, &order, &group))
|
|
return OPAL_PARAMETER;
|
|
|
|
x = xive_from_vc_blk(blk);
|
|
if (!x)
|
|
return OPAL_PARAMETER;
|
|
vp = xive_get_vp(x, idx);
|
|
if (!vp)
|
|
return OPAL_PARAMETER;
|
|
lock(&x->lock);
|
|
|
|
xive_nxc_scrub_clean(x, blk, idx);
|
|
|
|
vpw = ((uint32_t *)vp) + (group ? 8 : 0);
|
|
prlog(PR_INFO, "VP[%08x]: 0..3: %08x %08x %08x %08x\n", vp_id,
|
|
vpw[0], vpw[1], vpw[2], vpw[3]);
|
|
prlog(PR_INFO, "VP[%08x]: 4..7: %08x %08x %08x %08x\n", vp_id,
|
|
vpw[4], vpw[5], vpw[6], vpw[7]);
|
|
unlock(&x->lock);
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int64_t opal_xive_sync_irq_src(uint32_t girq)
|
|
{
|
|
struct xive *x = xive_from_isn(girq);
|
|
|
|
if (!x)
|
|
return OPAL_PARAMETER;
|
|
return xive_sync(x);
|
|
}
|
|
|
|
static int64_t opal_xive_sync_irq_target(uint32_t girq)
|
|
{
|
|
uint32_t target, vp_blk;
|
|
struct xive *x;
|
|
|
|
if (!xive_get_irq_targetting(girq, &target, NULL, NULL))
|
|
return OPAL_PARAMETER;
|
|
if (!xive_decode_vp(target, &vp_blk, NULL, NULL, NULL))
|
|
return OPAL_PARAMETER;
|
|
x = xive_from_pc_blk(vp_blk);
|
|
if (!x)
|
|
return OPAL_PARAMETER;
|
|
return xive_sync(x);
|
|
}
|
|
|
|
static int64_t opal_xive_sync(uint32_t type, uint32_t id)
|
|
{
|
|
int64_t rc = OPAL_SUCCESS;;
|
|
|
|
if (type & XIVE_SYNC_EAS)
|
|
rc = opal_xive_sync_irq_src(id);
|
|
if (rc)
|
|
return rc;
|
|
if (type & XIVE_SYNC_QUEUE)
|
|
rc = opal_xive_sync_irq_target(id);
|
|
if (rc)
|
|
return rc;
|
|
|
|
/* Add more ... */
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int64_t opal_xive_dump(uint32_t type, uint32_t id)
|
|
{
|
|
switch (type) {
|
|
case XIVE_DUMP_TM_HYP:
|
|
return opal_xive_dump_tm(TM_QW3_HV_PHYS, "PHYS", id);
|
|
case XIVE_DUMP_TM_POOL:
|
|
return opal_xive_dump_tm(TM_QW2_HV_POOL, "POOL", id);
|
|
case XIVE_DUMP_TM_OS:
|
|
return opal_xive_dump_tm(TM_QW1_OS, "OS ", id);
|
|
case XIVE_DUMP_TM_USER:
|
|
return opal_xive_dump_tm(TM_QW0_USER, "USER", id);
|
|
case XIVE_DUMP_VP:
|
|
return opal_xive_dump_vp(id);
|
|
default:
|
|
return OPAL_PARAMETER;
|
|
}
|
|
}
|
|
|
|
static void xive_init_globals(void)
|
|
{
|
|
uint32_t i;
|
|
|
|
for (i = 0; i < XIVE_MAX_CHIPS; i++)
|
|
xive_block_to_chip[i] = XIVE_INVALID_CHIP;
|
|
}
|
|
|
|
/*
|
|
* The global availability of some capabilities used in other drivers
|
|
* (PHB, PSI) is deduced from the capabilities of the first XIVE chip
|
|
* of the system. It should be common to all chips.
|
|
*/
|
|
bool xive2_cap_phb_pq_disable(void)
|
|
{
|
|
return xive_has_cap(one_xive, CQ_XIVE_CAP_PHB_PQ_DISABLE);
|
|
}
|
|
|
|
bool xive2_cap_phb_abt(void)
|
|
{
|
|
if (!xive_has_cap(one_xive, CQ_XIVE_CAP_PHB_ABT))
|
|
return false;
|
|
|
|
/*
|
|
* We need 'PQ disable' to use ABT mode, else the OS will use
|
|
* two different sets of ESB pages (PHB and IC) to control the
|
|
* interrupt sources. Can not work.
|
|
*/
|
|
if (!xive2_cap_phb_pq_disable()) {
|
|
prlog_once(PR_ERR, "ABT mode is set without PQ disable. "
|
|
"Ignoring bogus configuration\n");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool xive2_cap_store_eoi(void)
|
|
{
|
|
return xive_has_cap(one_xive, CQ_XIVE_CAP_STORE_EOI);
|
|
}
|
|
|
|
void xive2_init(void)
|
|
{
|
|
struct dt_node *np;
|
|
struct proc_chip *chip;
|
|
struct cpu_thread *cpu;
|
|
bool first = true;
|
|
|
|
/* Look for xive nodes and do basic inits */
|
|
dt_for_each_compatible(dt_root, np, "ibm,power10-xive-x") {
|
|
struct xive *x;
|
|
|
|
/* Initialize some global stuff */
|
|
if (first)
|
|
xive_init_globals();
|
|
|
|
/* Create/initialize the xive instance */
|
|
x = init_one_xive(np);
|
|
if (first)
|
|
one_xive = x;
|
|
first = false;
|
|
}
|
|
if (first)
|
|
return;
|
|
|
|
/*
|
|
* P8 emulation is not supported on P10 anymore. Exploitation
|
|
* is the default XIVE mode. We might introduce a GEN2 mode.
|
|
*/
|
|
xive_mode = XIVE_MODE_EXPL;
|
|
|
|
/* Init VP allocator */
|
|
xive_init_vp_allocator();
|
|
|
|
/* Create a device-tree node for Linux use */
|
|
xive_create_mmio_dt_node(one_xive);
|
|
|
|
/* Some inits must be done after all xive have been created
|
|
* such as setting up the forwarding ports
|
|
*/
|
|
for_each_chip(chip) {
|
|
if (chip->xive)
|
|
late_init_one_xive(chip->xive);
|
|
}
|
|
|
|
/* Initialize per-cpu structures */
|
|
for_each_present_cpu(cpu) {
|
|
xive_init_cpu(cpu);
|
|
}
|
|
|
|
/* Calling boot CPU */
|
|
xive2_cpu_callin(this_cpu());
|
|
|
|
/* Register XIVE exploitation calls */
|
|
opal_register(OPAL_XIVE_RESET, opal_xive_reset, 1);
|
|
opal_register(OPAL_XIVE_GET_IRQ_INFO, opal_xive_get_irq_info, 6);
|
|
opal_register(OPAL_XIVE_GET_IRQ_CONFIG, opal_xive_get_irq_config, 4);
|
|
opal_register(OPAL_XIVE_SET_IRQ_CONFIG, opal_xive_set_irq_config, 4);
|
|
opal_register(OPAL_XIVE_GET_QUEUE_INFO, opal_xive_get_queue_info, 7);
|
|
opal_register(OPAL_XIVE_SET_QUEUE_INFO, opal_xive_set_queue_info, 5);
|
|
opal_register(OPAL_XIVE_DONATE_PAGE, opal_xive_donate_page, 2);
|
|
opal_register(OPAL_XIVE_ALLOCATE_IRQ, opal_xive_allocate_irq, 1);
|
|
opal_register(OPAL_XIVE_FREE_IRQ, opal_xive_free_irq, 1);
|
|
opal_register(OPAL_XIVE_ALLOCATE_VP_BLOCK, opal_xive_alloc_vp_block, 1);
|
|
opal_register(OPAL_XIVE_FREE_VP_BLOCK, opal_xive_free_vp_block, 1);
|
|
opal_register(OPAL_XIVE_GET_VP_INFO, opal_xive_get_vp_info, 5);
|
|
opal_register(OPAL_XIVE_SET_VP_INFO, opal_xive_set_vp_info, 3);
|
|
opal_register(OPAL_XIVE_SYNC, opal_xive_sync, 2);
|
|
opal_register(OPAL_XIVE_DUMP, opal_xive_dump, 2);
|
|
opal_register(OPAL_XIVE_GET_QUEUE_STATE, opal_xive_get_queue_state, 4);
|
|
opal_register(OPAL_XIVE_SET_QUEUE_STATE, opal_xive_set_queue_state, 4);
|
|
opal_register(OPAL_XIVE_GET_VP_STATE, opal_xive_get_vp_state, 2);
|
|
}
|