1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
|
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __POWERNV_PCI_H
#define __POWERNV_PCI_H
#include <linux/compiler.h> /* for __printf */
#include <linux/iommu.h>
#include <asm/iommu.h>
#include <asm/msi_bitmap.h>
struct pci_dn;
enum pnv_phb_type {
PNV_PHB_IODA1 = 0,
PNV_PHB_IODA2 = 1,
PNV_PHB_NPU_NVLINK = 2,
PNV_PHB_NPU_OCAPI = 3,
};
/* Precise PHB model for error management */
enum pnv_phb_model {
PNV_PHB_MODEL_UNKNOWN,
PNV_PHB_MODEL_P7IOC,
PNV_PHB_MODEL_PHB3,
PNV_PHB_MODEL_NPU,
PNV_PHB_MODEL_NPU2,
};
#define PNV_PCI_DIAG_BUF_SIZE 8192
#define PNV_IODA_PE_DEV (1 << 0) /* PE has single PCI device */
#define PNV_IODA_PE_BUS (1 << 1) /* PE has primary PCI bus */
#define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */
#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */
#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */
#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */
/*
* A brief note on PNV_IODA_PE_BUS_ALL
*
* This is needed because of the behaviour of PCIe-to-PCI bridges. The PHB uses
* the Requester ID field of the PCIe request header to determine the device
* (and PE) that initiated a DMA. In legacy PCI individual memory read/write
* requests aren't tagged with the RID. To work around this the PCIe-to-PCI
* bridge will use (secondary_bus_no << 8) | 0x00 as the RID on the PCIe side.
*
* PCIe-to-X bridges have a similar issue even though PCI-X requests also have
* a RID in the transaction header. The PCIe-to-X bridge is permitted to "take
* ownership" of a transaction by a PCI-X device when forwarding it to the PCIe
* side of the bridge.
*
* To work around these problems we use the BUS_ALL flag since every subordinate
* bus of the bridge should go into the same PE.
*/
/* Indicates operations are frozen for a PE: MMIO in PESTA & DMA in PESTB. */
#define PNV_IODA_STOPPED_STATE 0x8000000000000000
/* Data associated with a PE, including IOMMU tracking etc.. */
struct pnv_phb;
struct pnv_ioda_pe {
unsigned long flags;
struct pnv_phb *phb;
int device_count;
/* A PE can be associated with a single device or an
* entire bus (& children). In the former case, pdev
* is populated, in the later case, pbus is.
*/
#ifdef CONFIG_PCI_IOV
struct pci_dev *parent_dev;
#endif
struct pci_dev *pdev;
struct pci_bus *pbus;
/* Effective RID (device RID for a device PE and base bus
* RID with devfn 0 for a bus PE)
*/
unsigned int rid;
/* PE number */
unsigned int pe_number;
/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
struct iommu_table_group table_group;
struct npu_comp *npucomp;
/* 64-bit TCE bypass region */
bool tce_bypass_enabled;
uint64_t tce_bypass_base;
/*
* Used to track whether we've done DMA setup for this PE or not. We
* want to defer allocating TCE tables, etc until we've added a
* non-bridge device to the PE.
*/
bool dma_setup_done;
/* MSIs. MVE index is identical for 32 and 64 bit MSI
* and -1 if not supported. (It's actually identical to the
* PE number)
*/
int mve_number;
/* PEs in compound case */
struct pnv_ioda_pe *master;
struct list_head slaves;
/* Link in list of PE#s */
struct list_head list;
};
#define PNV_PHB_FLAG_EEH (1 << 0)
struct pnv_phb {
struct pci_controller *hose;
enum pnv_phb_type type;
enum pnv_phb_model model;
u64 hub_id;
u64 opal_id;
int flags;
void __iomem *regs;
u64 regs_phys;
int initialized;
spinlock_t lock;
#ifdef CONFIG_DEBUG_FS
int has_dbgfs;
struct dentry *dbgfs;
#endif
unsigned int msi_base;
unsigned int msi32_support;
struct msi_bitmap msi_bmp;
int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev,
unsigned int hwirq, unsigned int virq,
unsigned int is_64, struct msi_msg *msg);
int (*init_m64)(struct pnv_phb *phb);
int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
struct {
/* Global bridge info */
unsigned int total_pe_num;
unsigned int reserved_pe_idx;
unsigned int root_pe_idx;
/* 32-bit MMIO window */
unsigned int m32_size;
unsigned int m32_segsize;
unsigned int m32_pci_base;
/* 64-bit MMIO window */
unsigned int m64_bar_idx;
unsigned long m64_size;
unsigned long m64_segsize;
unsigned long m64_base;
#define MAX_M64_BARS 64
unsigned long m64_bar_alloc;
/* IO ports */
unsigned int io_size;
unsigned int io_segsize;
unsigned int io_pci_base;
/* PE allocation */
struct mutex pe_alloc_mutex;
unsigned long *pe_alloc;
struct pnv_ioda_pe *pe_array;
/* M32 & IO segment maps */
unsigned int *m64_segmap;
unsigned int *m32_segmap;
unsigned int *io_segmap;
/* DMA32 segment maps - IODA1 only */
unsigned int dma32_count;
unsigned int *dma32_segmap;
/* IRQ chip */
int irq_chip_init;
struct irq_chip irq_chip;
/* Sorted list of used PE's based
* on the sequence of creation
*/
struct list_head pe_list;
struct mutex pe_list_mutex;
/* Reverse map of PEs, indexed by {bus, devfn} */
unsigned int pe_rmap[0x10000];
} ioda;
/* PHB and hub diagnostics */
unsigned int diag_data_size;
u8 *diag_data;
};
/* IODA PE management */
static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
{
/*
* WARNING: We cannot rely on the resource flags. The Linux PCI
* allocation code sometimes decides to put a 64-bit prefetchable
* BAR in the 32-bit window, so we have to compare the addresses.
*
* For simplicity we only test resource start.
*/
return (r->start >= phb->ioda.m64_base &&
r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
}
static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
{
unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
return (resource_flags & flags) == flags;
}
int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe);
struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count);
void pnv_ioda_free_pe(struct pnv_ioda_pe *pe);
#ifdef CONFIG_PCI_IOV
/*
* For SR-IOV we want to put each VF's MMIO resource in to a separate PE.
* This requires a bit of acrobatics with the MMIO -> PE configuration
* and this structure is used to keep track of it all.
*/
struct pnv_iov_data {
/* number of VFs enabled */
u16 num_vfs;
/* pointer to the array of VF PEs. num_vfs long*/
struct pnv_ioda_pe *vf_pe_arr;
/* Did we map the VF BAR with single-PE IODA BARs? */
bool m64_single_mode[PCI_SRIOV_NUM_BARS];
/*
* True if we're using any segmented windows. In that case we need
* shift the start of the IOV resource the segment corresponding to
* the allocated PE.
*/
bool need_shift;
/*
* Bit mask used to track which m64 windows are used to map the
* SR-IOV BARs for this device.
*/
DECLARE_BITMAP(used_m64_bar_mask, MAX_M64_BARS);
/*
* If we map the SR-IOV BARs with a segmented window then
* parts of that window will be "claimed" by other PEs.
*
* "holes" here is used to reserve the leading portion
* of the window that is used by other (non VF) PEs.
*/
struct resource holes[PCI_SRIOV_NUM_BARS];
};
static inline struct pnv_iov_data *pnv_iov_get(struct pci_dev *pdev)
{
return pdev->dev.archdata.iov_data;
}
void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev);
resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno);
int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs);
int pnv_pcibios_sriov_disable(struct pci_dev *pdev);
#endif /* CONFIG_PCI_IOV */
extern struct pci_ops pnv_pci_ops;
void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
unsigned char *log_buff);
int pnv_pci_cfg_read(struct pci_dn *pdn,
int where, int size, u32 *val);
int pnv_pci_cfg_write(struct pci_dn *pdn,
int where, int size, u32 val);
extern struct iommu_table *pnv_pci_table_alloc(int nid);
extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_init_npu_phb(struct device_node *np);
extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
extern void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr);
extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn);
extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
__u64 window_size, __u32 levels);
extern int pnv_eeh_post_init(void);
__printf(3, 4)
extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...);
#define pe_err(pe, fmt, ...) \
pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__)
#define pe_warn(pe, fmt, ...) \
pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__)
#define pe_info(pe, fmt, ...) \
pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
/* Nvlink functions */
extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass);
extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm);
extern void pnv_pci_npu_setup_iommu_groups(void);
/* pci-ioda-tce.c */
#define POWERNV_IOMMU_DEFAULT_LEVELS 2
#define POWERNV_IOMMU_MAX_LEVELS 5
extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr, enum dma_data_direction direction,
unsigned long attrs);
extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction,
bool alloc);
extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index,
bool alloc);
extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
__u32 page_shift, __u64 window_size, __u32 levels,
bool alloc_userspace_copy, struct iommu_table *tbl);
extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
extern long pnv_pci_link_table_and_group(int node, int num,
struct iommu_table *tbl,
struct iommu_table_group *table_group);
extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
struct iommu_table_group *table_group);
extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
void *tce_mem, u64 tce_size,
u64 dma_offset, unsigned int page_shift);
extern unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
static inline struct pnv_phb *pci_bus_to_pnvhb(struct pci_bus *bus)
{
struct pci_controller *hose = bus->sysdata;
if (hose)
return hose->private_data;
return NULL;
}
#endif /* __POWERNV_PCI_H */
|