diff options
Diffstat (limited to 'arch/ia64/sn')
42 files changed, 11079 insertions, 0 deletions
diff --git a/arch/ia64/sn/Makefile b/arch/ia64/sn/Makefile new file mode 100644 index 000000000..79a7df02e --- /dev/null +++ b/arch/ia64/sn/Makefile @@ -0,0 +1,12 @@ +# arch/ia64/sn/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2004 Silicon Graphics, Inc. All Rights Reserved. +# +# Makefile for the sn ia64 subplatform +# + +obj-y += kernel/ pci/ diff --git a/arch/ia64/sn/include/ioerror.h b/arch/ia64/sn/include/ioerror.h new file mode 100644 index 000000000..e68f2b078 --- /dev/null +++ b/arch/ia64/sn/include/ioerror.h @@ -0,0 +1,81 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_IOERROR_H +#define _ASM_IA64_SN_IOERROR_H + +/* + * IO error structure. + * + * This structure would expand to hold the information retrieved from + * all IO related error registers. + * + * This structure is defined to hold all system specific + * information related to a single error. + * + * This serves a couple of purpose. + * - Error handling often involves translating one form of address to other + * form. So, instead of having different data structures at each level, + * we have a single structure, and the appropriate fields get filled in + * at each layer. + * - This provides a way to dump all error related information in any layer + * of erorr handling (debugging aid). + * + * A second possibility is to allow each layer to define its own error + * data structure, and fill in the proper fields. This has the advantage + * of isolating the layers. + * A big concern is the potential stack usage (and overflow), if each layer + * defines these structures on stack (assuming we don't want to do kmalloc. + * + * Any layer wishing to pass extra information to a layer next to it in + * error handling hierarchy, can do so as a separate parameter. + */ + +typedef struct io_error_s { + /* Bit fields indicating which structure fields are valid */ + union { + struct { + unsigned ievb_errortype:1; + unsigned ievb_widgetnum:1; + unsigned ievb_widgetdev:1; + unsigned ievb_srccpu:1; + unsigned ievb_srcnode:1; + unsigned ievb_errnode:1; + unsigned ievb_sysioaddr:1; + unsigned ievb_xtalkaddr:1; + unsigned ievb_busspace:1; + unsigned ievb_busaddr:1; + unsigned ievb_vaddr:1; + unsigned ievb_memaddr:1; + unsigned ievb_epc:1; + unsigned ievb_ef:1; + unsigned ievb_tnum:1; + } iev_b; + unsigned iev_a; + } ie_v; + + short ie_errortype; /* error type: extra info about error */ + short ie_widgetnum; /* Widget number that's in error */ + short ie_widgetdev; /* Device within widget in error */ + cpuid_t ie_srccpu; /* CPU on srcnode generating error */ + cnodeid_t ie_srcnode; /* Node which caused the error */ + cnodeid_t ie_errnode; /* Node where error was noticed */ + iopaddr_t ie_sysioaddr; /* Sys specific IO address */ + iopaddr_t ie_xtalkaddr; /* Xtalk (48bit) addr of Error */ + iopaddr_t ie_busspace; /* Bus specific address space */ + iopaddr_t ie_busaddr; /* Bus specific address */ + caddr_t ie_vaddr; /* Virtual address of error */ + iopaddr_t ie_memaddr; /* Physical memory address */ + caddr_t ie_epc; /* pc when error reported */ + caddr_t ie_ef; /* eframe when error reported */ + short ie_tnum; /* Xtalk TNUM field */ +} ioerror_t; + +#define IOERROR_INIT(e) do { (e)->ie_v.iev_a = 0; } while (0) +#define IOERROR_SETVALUE(e,f,v) do { (e)->ie_ ## f = (v); (e)->ie_v.iev_b.ievb_ ## f = 1; } while (0) + +#endif /* _ASM_IA64_SN_IOERROR_H */ diff --git a/arch/ia64/sn/include/tio.h b/arch/ia64/sn/include/tio.h new file mode 100644 index 000000000..6b2e7b75e --- /dev/null +++ b/arch/ia64/sn/include/tio.h @@ -0,0 +1,41 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_TIO_H +#define _ASM_IA64_SN_TIO_H + +#define TIO_MMR_ADDR_MOD + +#define TIO_NODE_ID TIO_MMR_ADDR_MOD(0x0000000090060e80) + +#define TIO_ITTE_BASE 0xb0008800 /* base of translation table entries */ +#define TIO_ITTE(bigwin) (TIO_ITTE_BASE + 8*(bigwin)) + +#define TIO_ITTE_OFFSET_BITS 8 /* size of offset field */ +#define TIO_ITTE_OFFSET_MASK ((1<<TIO_ITTE_OFFSET_BITS)-1) +#define TIO_ITTE_OFFSET_SHIFT 0 + +#define TIO_ITTE_WIDGET_BITS 2 /* size of widget field */ +#define TIO_ITTE_WIDGET_MASK ((1<<TIO_ITTE_WIDGET_BITS)-1) +#define TIO_ITTE_WIDGET_SHIFT 12 +#define TIO_ITTE_VALID_MASK 0x1 +#define TIO_ITTE_VALID_SHIFT 16 + +#define TIO_ITTE_WIDGET(itte) \ + (((itte) >> TIO_ITTE_WIDGET_SHIFT) & TIO_ITTE_WIDGET_MASK) +#define TIO_ITTE_VALID(itte) \ + (((itte) >> TIO_ITTE_VALID_SHIFT) & TIO_ITTE_VALID_MASK) + +#define TIO_ITTE_PUT(nasid, bigwin, widget, addr, valid) \ + REMOTE_HUB_S((nasid), TIO_ITTE(bigwin), \ + (((((addr) >> TIO_BWIN_SIZE_BITS) & \ + TIO_ITTE_OFFSET_MASK) << TIO_ITTE_OFFSET_SHIFT) | \ + (((widget) & TIO_ITTE_WIDGET_MASK) << TIO_ITTE_WIDGET_SHIFT)) | \ + (( (valid) & TIO_ITTE_VALID_MASK) << TIO_ITTE_VALID_SHIFT)) + +#endif /* _ASM_IA64_SN_TIO_H */ diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h new file mode 100644 index 000000000..8182583c7 --- /dev/null +++ b/arch/ia64/sn/include/xtalk/hubdev.h @@ -0,0 +1,91 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_XTALK_HUBDEV_H +#define _ASM_IA64_SN_XTALK_HUBDEV_H + +#include "xtalk/xwidgetdev.h" + +#define HUB_WIDGET_ID_MAX 0xf +#define DEV_PER_WIDGET (2*2*8) +#define IIO_ITTE_WIDGET_BITS 4 /* size of widget field */ +#define IIO_ITTE_WIDGET_MASK ((1<<IIO_ITTE_WIDGET_BITS)-1) +#define IIO_ITTE_WIDGET_SHIFT 8 + +#define IIO_ITTE_WIDGET(itte) \ + (((itte) >> IIO_ITTE_WIDGET_SHIFT) & IIO_ITTE_WIDGET_MASK) + +/* + * Use the top big window as a surrogate for the first small window + */ +#define SWIN0_BIGWIN HUB_NUM_BIG_WINDOW +#define IIO_NUM_ITTES 7 +#define HUB_NUM_BIG_WINDOW (IIO_NUM_ITTES - 1) + +/* This struct is shared between the PROM and the kernel. + * Changes to this struct will require corresponding changes to the kernel. + */ +struct sn_flush_device_common { + int sfdl_bus; + int sfdl_slot; + int sfdl_pin; + struct common_bar_list { + unsigned long start; + unsigned long end; + } sfdl_bar_list[6]; + unsigned long sfdl_force_int_addr; + unsigned long sfdl_flush_value; + volatile unsigned long *sfdl_flush_addr; + u32 sfdl_persistent_busnum; + u32 sfdl_persistent_segment; + struct pcibus_info *sfdl_pcibus_info; +}; + +/* This struct is kernel only and is not used by the PROM */ +struct sn_flush_device_kernel { + spinlock_t sfdl_flush_lock; + struct sn_flush_device_common *common; +}; + +/* 01/16/06 This struct is the old PROM/kernel struct and needs to be included + * for older official PROMs to function on the new kernel base. This struct + * will be removed when the next official PROM release occurs. */ + +struct sn_flush_device_war { + struct sn_flush_device_common common; + u32 filler; /* older PROMs expect the default size of a spinlock_t */ +}; + +/* + * **widget_p - Used as an array[wid_num][device] of sn_flush_device_kernel. + */ +struct sn_flush_nasid_entry { + struct sn_flush_device_kernel **widget_p; // Used as an array of wid_num + u64 iio_itte[8]; +}; + +struct hubdev_info { + geoid_t hdi_geoid; + short hdi_nasid; + short hdi_peer_nasid; /* Dual Porting Peer */ + + struct sn_flush_nasid_entry hdi_flush_nasid_list; + struct xwidget_info hdi_xwidget_info[HUB_WIDGET_ID_MAX + 1]; + + + void *hdi_nodepda; + void *hdi_node_vertex; + u32 max_segment_number; + u32 max_pcibus_number; +}; + +extern void hubdev_init_node(nodepda_t *, cnodeid_t); +extern void hub_error_init(struct hubdev_info *); +extern void ice_error_init(struct hubdev_info *); + + +#endif /* _ASM_IA64_SN_XTALK_HUBDEV_H */ diff --git a/arch/ia64/sn/include/xtalk/xbow.h b/arch/ia64/sn/include/xtalk/xbow.h new file mode 100644 index 000000000..90f37a413 --- /dev/null +++ b/arch/ia64/sn/include/xtalk/xbow.h @@ -0,0 +1,301 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992-1997,2000-2006 Silicon Graphics, Inc. All Rights + * Reserved. + */ +#ifndef _ASM_IA64_SN_XTALK_XBOW_H +#define _ASM_IA64_SN_XTALK_XBOW_H + +#define XBOW_PORT_8 0x8 +#define XBOW_PORT_C 0xc +#define XBOW_PORT_F 0xf + +#define MAX_XBOW_PORTS 8 /* number of ports on xbow chip */ +#define BASE_XBOW_PORT XBOW_PORT_8 /* Lowest external port */ + +#define XBOW_CREDIT 4 + +#define MAX_XBOW_NAME 16 + +/* Register set for each xbow link */ +typedef volatile struct xb_linkregs_s { +/* + * we access these through synergy unswizzled space, so the address + * gets twiddled (i.e. references to 0x4 actually go to 0x0 and vv.) + * That's why we put the register first and filler second. + */ + u32 link_ibf; + u32 filler0; /* filler for proper alignment */ + u32 link_control; + u32 filler1; + u32 link_status; + u32 filler2; + u32 link_arb_upper; + u32 filler3; + u32 link_arb_lower; + u32 filler4; + u32 link_status_clr; + u32 filler5; + u32 link_reset; + u32 filler6; + u32 link_aux_status; + u32 filler7; +} xb_linkregs_t; + +typedef volatile struct xbow_s { + /* standard widget configuration 0x000000-0x000057 */ + struct widget_cfg xb_widget; /* 0x000000 */ + + /* helper fieldnames for accessing bridge widget */ + +#define xb_wid_id xb_widget.w_id +#define xb_wid_stat xb_widget.w_status +#define xb_wid_err_upper xb_widget.w_err_upper_addr +#define xb_wid_err_lower xb_widget.w_err_lower_addr +#define xb_wid_control xb_widget.w_control +#define xb_wid_req_timeout xb_widget.w_req_timeout +#define xb_wid_int_upper xb_widget.w_intdest_upper_addr +#define xb_wid_int_lower xb_widget.w_intdest_lower_addr +#define xb_wid_err_cmdword xb_widget.w_err_cmd_word +#define xb_wid_llp xb_widget.w_llp_cfg +#define xb_wid_stat_clr xb_widget.w_tflush + +/* + * we access these through synergy unswizzled space, so the address + * gets twiddled (i.e. references to 0x4 actually go to 0x0 and vv.) + * That's why we put the register first and filler second. + */ + /* xbow-specific widget configuration 0x000058-0x0000FF */ + u32 xb_wid_arb_reload; /* 0x00005C */ + u32 _pad_000058; + u32 xb_perf_ctr_a; /* 0x000064 */ + u32 _pad_000060; + u32 xb_perf_ctr_b; /* 0x00006c */ + u32 _pad_000068; + u32 xb_nic; /* 0x000074 */ + u32 _pad_000070; + + /* Xbridge only */ + u32 xb_w0_rst_fnc; /* 0x00007C */ + u32 _pad_000078; + u32 xb_l8_rst_fnc; /* 0x000084 */ + u32 _pad_000080; + u32 xb_l9_rst_fnc; /* 0x00008c */ + u32 _pad_000088; + u32 xb_la_rst_fnc; /* 0x000094 */ + u32 _pad_000090; + u32 xb_lb_rst_fnc; /* 0x00009c */ + u32 _pad_000098; + u32 xb_lc_rst_fnc; /* 0x0000a4 */ + u32 _pad_0000a0; + u32 xb_ld_rst_fnc; /* 0x0000ac */ + u32 _pad_0000a8; + u32 xb_le_rst_fnc; /* 0x0000b4 */ + u32 _pad_0000b0; + u32 xb_lf_rst_fnc; /* 0x0000bc */ + u32 _pad_0000b8; + u32 xb_lock; /* 0x0000c4 */ + u32 _pad_0000c0; + u32 xb_lock_clr; /* 0x0000cc */ + u32 _pad_0000c8; + /* end of Xbridge only */ + u32 _pad_0000d0[12]; + + /* Link Specific Registers, port 8..15 0x000100-0x000300 */ + xb_linkregs_t xb_link_raw[MAX_XBOW_PORTS]; +} xbow_t; + +#define xb_link(p) xb_link_raw[(p) & (MAX_XBOW_PORTS - 1)] + +#define XB_FLAGS_EXISTS 0x1 /* device exists */ +#define XB_FLAGS_MASTER 0x2 +#define XB_FLAGS_SLAVE 0x0 +#define XB_FLAGS_GBR 0x4 +#define XB_FLAGS_16BIT 0x8 +#define XB_FLAGS_8BIT 0x0 + +/* is widget port number valid? (based on version 7.0 of xbow spec) */ +#define XBOW_WIDGET_IS_VALID(wid) ((wid) >= XBOW_PORT_8 && (wid) <= XBOW_PORT_F) + +/* whether to use upper or lower arbitration register, given source widget id */ +#define XBOW_ARB_IS_UPPER(wid) ((wid) >= XBOW_PORT_8 && (wid) <= XBOW_PORT_B) +#define XBOW_ARB_IS_LOWER(wid) ((wid) >= XBOW_PORT_C && (wid) <= XBOW_PORT_F) + +/* offset of arbitration register, given source widget id */ +#define XBOW_ARB_OFF(wid) (XBOW_ARB_IS_UPPER(wid) ? 0x1c : 0x24) + +#define XBOW_WID_ID WIDGET_ID +#define XBOW_WID_STAT WIDGET_STATUS +#define XBOW_WID_ERR_UPPER WIDGET_ERR_UPPER_ADDR +#define XBOW_WID_ERR_LOWER WIDGET_ERR_LOWER_ADDR +#define XBOW_WID_CONTROL WIDGET_CONTROL +#define XBOW_WID_REQ_TO WIDGET_REQ_TIMEOUT +#define XBOW_WID_INT_UPPER WIDGET_INTDEST_UPPER_ADDR +#define XBOW_WID_INT_LOWER WIDGET_INTDEST_LOWER_ADDR +#define XBOW_WID_ERR_CMDWORD WIDGET_ERR_CMD_WORD +#define XBOW_WID_LLP WIDGET_LLP_CFG +#define XBOW_WID_STAT_CLR WIDGET_TFLUSH +#define XBOW_WID_ARB_RELOAD 0x5c +#define XBOW_WID_PERF_CTR_A 0x64 +#define XBOW_WID_PERF_CTR_B 0x6c +#define XBOW_WID_NIC 0x74 + +/* Xbridge only */ +#define XBOW_W0_RST_FNC 0x00007C +#define XBOW_L8_RST_FNC 0x000084 +#define XBOW_L9_RST_FNC 0x00008c +#define XBOW_LA_RST_FNC 0x000094 +#define XBOW_LB_RST_FNC 0x00009c +#define XBOW_LC_RST_FNC 0x0000a4 +#define XBOW_LD_RST_FNC 0x0000ac +#define XBOW_LE_RST_FNC 0x0000b4 +#define XBOW_LF_RST_FNC 0x0000bc +#define XBOW_RESET_FENCE(x) ((x) > 7 && (x) < 16) ? \ + (XBOW_W0_RST_FNC + ((x) - 7) * 8) : \ + ((x) == 0) ? XBOW_W0_RST_FNC : 0 +#define XBOW_LOCK 0x0000c4 +#define XBOW_LOCK_CLR 0x0000cc +/* End of Xbridge only */ + +/* used only in ide, but defined here within the reserved portion */ +/* of the widget0 address space (before 0xf4) */ +#define XBOW_WID_UNDEF 0xe4 + +/* xbow link register set base, legal value for x is 0x8..0xf */ +#define XB_LINK_BASE 0x100 +#define XB_LINK_OFFSET 0x40 +#define XB_LINK_REG_BASE(x) (XB_LINK_BASE + ((x) & (MAX_XBOW_PORTS - 1)) * XB_LINK_OFFSET) + +#define XB_LINK_IBUF_FLUSH(x) (XB_LINK_REG_BASE(x) + 0x4) +#define XB_LINK_CTRL(x) (XB_LINK_REG_BASE(x) + 0xc) +#define XB_LINK_STATUS(x) (XB_LINK_REG_BASE(x) + 0x14) +#define XB_LINK_ARB_UPPER(x) (XB_LINK_REG_BASE(x) + 0x1c) +#define XB_LINK_ARB_LOWER(x) (XB_LINK_REG_BASE(x) + 0x24) +#define XB_LINK_STATUS_CLR(x) (XB_LINK_REG_BASE(x) + 0x2c) +#define XB_LINK_RESET(x) (XB_LINK_REG_BASE(x) + 0x34) +#define XB_LINK_AUX_STATUS(x) (XB_LINK_REG_BASE(x) + 0x3c) + +/* link_control(x) */ +#define XB_CTRL_LINKALIVE_IE 0x80000000 /* link comes alive */ +/* reserved: 0x40000000 */ +#define XB_CTRL_PERF_CTR_MODE_MSK 0x30000000 /* perf counter mode */ +#define XB_CTRL_IBUF_LEVEL_MSK 0x0e000000 /* input packet buffer + level */ +#define XB_CTRL_8BIT_MODE 0x01000000 /* force link into 8 + bit mode */ +#define XB_CTRL_BAD_LLP_PKT 0x00800000 /* force bad LLP + packet */ +#define XB_CTRL_WIDGET_CR_MSK 0x007c0000 /* LLP widget credit + mask */ +#define XB_CTRL_WIDGET_CR_SHFT 18 /* LLP widget credit + shift */ +#define XB_CTRL_ILLEGAL_DST_IE 0x00020000 /* illegal destination + */ +#define XB_CTRL_OALLOC_IBUF_IE 0x00010000 /* overallocated input + buffer */ +/* reserved: 0x0000fe00 */ +#define XB_CTRL_BNDWDTH_ALLOC_IE 0x00000100 /* bandwidth alloc */ +#define XB_CTRL_RCV_CNT_OFLOW_IE 0x00000080 /* rcv retry overflow */ +#define XB_CTRL_XMT_CNT_OFLOW_IE 0x00000040 /* xmt retry overflow */ +#define XB_CTRL_XMT_MAX_RTRY_IE 0x00000020 /* max transmit retry */ +#define XB_CTRL_RCV_IE 0x00000010 /* receive */ +#define XB_CTRL_XMT_RTRY_IE 0x00000008 /* transmit retry */ +/* reserved: 0x00000004 */ +#define XB_CTRL_MAXREQ_TOUT_IE 0x00000002 /* maximum request + timeout */ +#define XB_CTRL_SRC_TOUT_IE 0x00000001 /* source timeout */ + +/* link_status(x) */ +#define XB_STAT_LINKALIVE XB_CTRL_LINKALIVE_IE +/* reserved: 0x7ff80000 */ +#define XB_STAT_MULTI_ERR 0x00040000 /* multi error */ +#define XB_STAT_ILLEGAL_DST_ERR XB_CTRL_ILLEGAL_DST_IE +#define XB_STAT_OALLOC_IBUF_ERR XB_CTRL_OALLOC_IBUF_IE +#define XB_STAT_BNDWDTH_ALLOC_ID_MSK 0x0000ff00 /* port bitmask */ +#define XB_STAT_RCV_CNT_OFLOW_ERR XB_CTRL_RCV_CNT_OFLOW_IE +#define XB_STAT_XMT_CNT_OFLOW_ERR XB_CTRL_XMT_CNT_OFLOW_IE +#define XB_STAT_XMT_MAX_RTRY_ERR XB_CTRL_XMT_MAX_RTRY_IE +#define XB_STAT_RCV_ERR XB_CTRL_RCV_IE +#define XB_STAT_XMT_RTRY_ERR XB_CTRL_XMT_RTRY_IE +/* reserved: 0x00000004 */ +#define XB_STAT_MAXREQ_TOUT_ERR XB_CTRL_MAXREQ_TOUT_IE +#define XB_STAT_SRC_TOUT_ERR XB_CTRL_SRC_TOUT_IE + +/* link_aux_status(x) */ +#define XB_AUX_STAT_RCV_CNT 0xff000000 +#define XB_AUX_STAT_XMT_CNT 0x00ff0000 +#define XB_AUX_STAT_TOUT_DST 0x0000ff00 +#define XB_AUX_LINKFAIL_RST_BAD 0x00000040 +#define XB_AUX_STAT_PRESENT 0x00000020 +#define XB_AUX_STAT_PORT_WIDTH 0x00000010 +/* reserved: 0x0000000f */ + +/* + * link_arb_upper/link_arb_lower(x), (reg) should be the link_arb_upper + * register if (x) is 0x8..0xb, link_arb_lower if (x) is 0xc..0xf + */ +#define XB_ARB_GBR_MSK 0x1f +#define XB_ARB_RR_MSK 0x7 +#define XB_ARB_GBR_SHFT(x) (((x) & 0x3) * 8) +#define XB_ARB_RR_SHFT(x) (((x) & 0x3) * 8 + 5) +#define XB_ARB_GBR_CNT(reg,x) ((reg) >> XB_ARB_GBR_SHFT(x) & XB_ARB_GBR_MSK) +#define XB_ARB_RR_CNT(reg,x) ((reg) >> XB_ARB_RR_SHFT(x) & XB_ARB_RR_MSK) + +/* XBOW_WID_STAT */ +#define XB_WID_STAT_LINK_INTR_SHFT (24) +#define XB_WID_STAT_LINK_INTR_MASK (0xFF << XB_WID_STAT_LINK_INTR_SHFT) +#define XB_WID_STAT_LINK_INTR(x) \ + (0x1 << (((x)&7) + XB_WID_STAT_LINK_INTR_SHFT)) +#define XB_WID_STAT_WIDGET0_INTR 0x00800000 +#define XB_WID_STAT_SRCID_MASK 0x000003c0 /* Xbridge only */ +#define XB_WID_STAT_REG_ACC_ERR 0x00000020 +#define XB_WID_STAT_RECV_TOUT 0x00000010 /* Xbridge only */ +#define XB_WID_STAT_ARB_TOUT 0x00000008 /* Xbridge only */ +#define XB_WID_STAT_XTALK_ERR 0x00000004 +#define XB_WID_STAT_DST_TOUT 0x00000002 /* Xbridge only */ +#define XB_WID_STAT_MULTI_ERR 0x00000001 + +#define XB_WID_STAT_SRCID_SHFT 6 + +/* XBOW_WID_CONTROL */ +#define XB_WID_CTRL_REG_ACC_IE XB_WID_STAT_REG_ACC_ERR +#define XB_WID_CTRL_RECV_TOUT XB_WID_STAT_RECV_TOUT +#define XB_WID_CTRL_ARB_TOUT XB_WID_STAT_ARB_TOUT +#define XB_WID_CTRL_XTALK_IE XB_WID_STAT_XTALK_ERR + +/* XBOW_WID_INT_UPPER */ +/* defined in xwidget.h for WIDGET_INTDEST_UPPER_ADDR */ + +/* XBOW WIDGET part number, in the ID register */ +#define XBOW_WIDGET_PART_NUM 0x0 /* crossbow */ +#define XXBOW_WIDGET_PART_NUM 0xd000 /* Xbridge */ +#define XBOW_WIDGET_MFGR_NUM 0x0 +#define XXBOW_WIDGET_MFGR_NUM 0x0 +#define PXBOW_WIDGET_PART_NUM 0xd100 /* PIC */ + +#define XBOW_REV_1_0 0x1 /* xbow rev 1.0 is "1" */ +#define XBOW_REV_1_1 0x2 /* xbow rev 1.1 is "2" */ +#define XBOW_REV_1_2 0x3 /* xbow rev 1.2 is "3" */ +#define XBOW_REV_1_3 0x4 /* xbow rev 1.3 is "4" */ +#define XBOW_REV_2_0 0x5 /* xbow rev 2.0 is "5" */ + +#define XXBOW_PART_REV_1_0 (XXBOW_WIDGET_PART_NUM << 4 | 0x1 ) +#define XXBOW_PART_REV_2_0 (XXBOW_WIDGET_PART_NUM << 4 | 0x2 ) + +/* XBOW_WID_ARB_RELOAD */ +#define XBOW_WID_ARB_RELOAD_INT 0x3f /* GBR reload interval */ + +#define IS_XBRIDGE_XBOW(wid) \ + (XWIDGET_PART_NUM(wid) == XXBOW_WIDGET_PART_NUM && \ + XWIDGET_MFG_NUM(wid) == XXBOW_WIDGET_MFGR_NUM) + +#define IS_PIC_XBOW(wid) \ + (XWIDGET_PART_NUM(wid) == PXBOW_WIDGET_PART_NUM && \ + XWIDGET_MFG_NUM(wid) == XXBOW_WIDGET_MFGR_NUM) + +#define XBOW_WAR_ENABLED(pv, widid) ((1 << XWIDGET_REV_NUM(widid)) & pv) + +#endif /* _ASM_IA64_SN_XTALK_XBOW_H */ diff --git a/arch/ia64/sn/include/xtalk/xwidgetdev.h b/arch/ia64/sn/include/xtalk/xwidgetdev.h new file mode 100644 index 000000000..2800eda0f --- /dev/null +++ b/arch/ia64/sn/include/xtalk/xwidgetdev.h @@ -0,0 +1,70 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992-1997,2000-2003 Silicon Graphics, Inc. All Rights Reserved. + */ +#ifndef _ASM_IA64_SN_XTALK_XWIDGET_H +#define _ASM_IA64_SN_XTALK_XWIDGET_H + +/* WIDGET_ID */ +#define WIDGET_REV_NUM 0xf0000000 +#define WIDGET_PART_NUM 0x0ffff000 +#define WIDGET_MFG_NUM 0x00000ffe +#define WIDGET_REV_NUM_SHFT 28 +#define WIDGET_PART_NUM_SHFT 12 +#define WIDGET_MFG_NUM_SHFT 1 + +#define XWIDGET_PART_NUM(widgetid) (((widgetid) & WIDGET_PART_NUM) >> WIDGET_PART_NUM_SHFT) +#define XWIDGET_REV_NUM(widgetid) (((widgetid) & WIDGET_REV_NUM) >> WIDGET_REV_NUM_SHFT) +#define XWIDGET_MFG_NUM(widgetid) (((widgetid) & WIDGET_MFG_NUM) >> WIDGET_MFG_NUM_SHFT) +#define XWIDGET_PART_REV_NUM(widgetid) ((XWIDGET_PART_NUM(widgetid) << 4) | \ + XWIDGET_REV_NUM(widgetid)) +#define XWIDGET_PART_REV_NUM_REV(partrev) (partrev & 0xf) + +/* widget configuration registers */ +struct widget_cfg{ + u32 w_id; /* 0x04 */ + u32 w_pad_0; /* 0x00 */ + u32 w_status; /* 0x0c */ + u32 w_pad_1; /* 0x08 */ + u32 w_err_upper_addr; /* 0x14 */ + u32 w_pad_2; /* 0x10 */ + u32 w_err_lower_addr; /* 0x1c */ + u32 w_pad_3; /* 0x18 */ + u32 w_control; /* 0x24 */ + u32 w_pad_4; /* 0x20 */ + u32 w_req_timeout; /* 0x2c */ + u32 w_pad_5; /* 0x28 */ + u32 w_intdest_upper_addr; /* 0x34 */ + u32 w_pad_6; /* 0x30 */ + u32 w_intdest_lower_addr; /* 0x3c */ + u32 w_pad_7; /* 0x38 */ + u32 w_err_cmd_word; /* 0x44 */ + u32 w_pad_8; /* 0x40 */ + u32 w_llp_cfg; /* 0x4c */ + u32 w_pad_9; /* 0x48 */ + u32 w_tflush; /* 0x54 */ + u32 w_pad_10; /* 0x50 */ +}; + +/* + * Crosstalk Widget Hardware Identification, as defined in the Crosstalk spec. + */ +struct xwidget_hwid{ + int mfg_num; + int rev_num; + int part_num; +}; + +struct xwidget_info{ + + struct xwidget_hwid xwi_hwid; /* Widget Identification */ + char xwi_masterxid; /* Hub's Widget Port Number */ + void *xwi_hubinfo; /* Hub's provider private info */ + u64 *xwi_hub_provider; /* prom provider functions */ + void *xwi_vertex; +}; + +#endif /* _ASM_IA64_SN_XTALK_XWIDGET_H */ diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile new file mode 100644 index 000000000..d27df1d45 --- /dev/null +++ b/arch/ia64/sn/kernel/Makefile @@ -0,0 +1,18 @@ +# arch/ia64/sn/kernel/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1999,2001-2006,2008 Silicon Graphics, Inc. All Rights Reserved. +# + +ccflags-y := -Iarch/ia64/sn/include + +obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ + huberror.o io_acpi_init.o io_common.o \ + io_init.o iomv.o klconflib.o pio_phys.o \ + sn2/ +obj-$(CONFIG_IA64_GENERIC) += machvec.o +obj-$(CONFIG_SGI_TIOCX) += tiocx.o +obj-$(CONFIG_PCI_MSI) += msi_sn.o diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c new file mode 100644 index 000000000..9146192b8 --- /dev/null +++ b/arch/ia64/sn/kernel/bte.c @@ -0,0 +1,475 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2007 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/module.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/addrs.h> +#include <asm/sn/arch.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/pda.h> +#include <asm/sn/shubio.h> +#include <asm/nodedata.h> +#include <asm/delay.h> + +#include <linux/bootmem.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/sn/bte.h> + +#ifndef L1_CACHE_MASK +#define L1_CACHE_MASK (L1_CACHE_BYTES - 1) +#endif + +/* two interfaces on two btes */ +#define MAX_INTERFACES_TO_TRY 4 +#define MAX_NODES_TO_TRY 2 + +static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface) +{ + nodepda_t *tmp_nodepda; + + if (nasid_to_cnodeid(nasid) == -1) + return (struct bteinfo_s *)NULL; + + tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid)); + return &tmp_nodepda->bte_if[interface]; + +} + +static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode) +{ + if (is_shub2()) { + BTE_CTRL_STORE(bte, (IBLS_BUSY | ((len) | (mode) << 24))); + } else { + BTE_LNSTAT_STORE(bte, len); + BTE_CTRL_STORE(bte, mode); + } +} + +/************************************************************************ + * Block Transfer Engine copy related functions. + * + ***********************************************************************/ + +/* + * bte_copy(src, dest, len, mode, notification) + * + * Use the block transfer engine to move kernel memory from src to dest + * using the assigned mode. + * + * Parameters: + * src - physical address of the transfer source. + * dest - physical address of the transfer destination. + * len - number of bytes to transfer from source to dest. + * mode - hardware defined. See reference information + * for IBCT0/1 in the SHUB Programmers Reference + * notification - kernel virtual address of the notification cache + * line. If NULL, the default is used and + * the bte_copy is synchronous. + * + * NOTE: This function requires src, dest, and len to + * be cacheline aligned. + */ +bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) +{ + u64 transfer_size; + u64 transfer_stat; + u64 notif_phys_addr; + struct bteinfo_s *bte; + bte_result_t bte_status; + unsigned long irq_flags; + unsigned long itc_end = 0; + int nasid_to_try[MAX_NODES_TO_TRY]; + int my_nasid = cpuid_to_nasid(raw_smp_processor_id()); + int bte_if_index, nasid_index; + int bte_first, btes_per_node = BTES_PER_NODE; + + BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n", + src, dest, len, mode, notification)); + + if (len == 0) { + return BTE_SUCCESS; + } + + BUG_ON(len & L1_CACHE_MASK); + BUG_ON(src & L1_CACHE_MASK); + BUG_ON(dest & L1_CACHE_MASK); + BUG_ON(len > BTE_MAX_XFER); + + /* + * Start with interface corresponding to cpu number + */ + bte_first = raw_smp_processor_id() % btes_per_node; + + if (mode & BTE_USE_DEST) { + /* try remote then local */ + nasid_to_try[0] = NASID_GET(dest); + if (mode & BTE_USE_ANY) { + nasid_to_try[1] = my_nasid; + } else { + nasid_to_try[1] = 0; + } + } else { + /* try local then remote */ + nasid_to_try[0] = my_nasid; + if (mode & BTE_USE_ANY) { + nasid_to_try[1] = NASID_GET(dest); + } else { + nasid_to_try[1] = 0; + } + } + +retry_bteop: + do { + local_irq_save(irq_flags); + + bte_if_index = bte_first; + nasid_index = 0; + + /* Attempt to lock one of the BTE interfaces. */ + while (nasid_index < MAX_NODES_TO_TRY) { + bte = bte_if_on_node(nasid_to_try[nasid_index],bte_if_index); + + if (bte == NULL) { + nasid_index++; + continue; + } + + if (spin_trylock(&bte->spinlock)) { + if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) || + (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) { + /* Got the lock but BTE still busy */ + spin_unlock(&bte->spinlock); + } else { + /* we got the lock and it's not busy */ + break; + } + } + + bte_if_index = (bte_if_index + 1) % btes_per_node; /* Next interface */ + if (bte_if_index == bte_first) { + /* + * We've tried all interfaces on this node + */ + nasid_index++; + } + + bte = NULL; + } + + if (bte != NULL) { + break; + } + + local_irq_restore(irq_flags); + + if (!(mode & BTE_WACQUIRE)) { + return BTEFAIL_NOTAVAIL; + } + } while (1); + + if (notification == NULL) { + /* User does not want to be notified. */ + bte->most_rcnt_na = &bte->notify; + } else { + bte->most_rcnt_na = notification; + } + + /* Calculate the number of cache lines to transfer. */ + transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK); + + /* Initialize the notification to a known value. */ + *bte->most_rcnt_na = BTE_WORD_BUSY; + notif_phys_addr = (u64)bte->most_rcnt_na; + + /* Set the source and destination registers */ + BTE_PRINTKV(("IBSA = 0x%lx)\n", src)); + BTE_SRC_STORE(bte, src); + BTE_PRINTKV(("IBDA = 0x%lx)\n", dest)); + BTE_DEST_STORE(bte, dest); + + /* Set the notification register */ + BTE_PRINTKV(("IBNA = 0x%lx)\n", notif_phys_addr)); + BTE_NOTIF_STORE(bte, notif_phys_addr); + + /* Initiate the transfer */ + BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode))); + bte_start_transfer(bte, transfer_size, BTE_VALID_MODE(mode)); + + itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec); + + spin_unlock_irqrestore(&bte->spinlock, irq_flags); + + if (notification != NULL) { + return BTE_SUCCESS; + } + + while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) { + cpu_relax(); + if (ia64_get_itc() > itc_end) { + BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n", + NASID_GET(bte->bte_base_addr), bte->bte_num, + BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) ); + bte->bte_error_count++; + bte->bh_error = IBLS_ERROR; + bte_error_handler(NODEPDA(bte->bte_cnode)); + *bte->most_rcnt_na = BTE_WORD_AVAILABLE; + goto retry_bteop; + } + } + + BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n", + BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); + + if (transfer_stat & IBLS_ERROR) { + bte_status = BTE_GET_ERROR_STATUS(transfer_stat); + } else { + bte_status = BTE_SUCCESS; + } + *bte->most_rcnt_na = BTE_WORD_AVAILABLE; + + BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n", + BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); + + return bte_status; +} + +EXPORT_SYMBOL(bte_copy); + +/* + * bte_unaligned_copy(src, dest, len, mode) + * + * use the block transfer engine to move kernel + * memory from src to dest using the assigned mode. + * + * Parameters: + * src - physical address of the transfer source. + * dest - physical address of the transfer destination. + * len - number of bytes to transfer from source to dest. + * mode - hardware defined. See reference information + * for IBCT0/1 in the SGI documentation. + * + * NOTE: If the source, dest, and len are all cache line aligned, + * then it would be _FAR_ preferable to use bte_copy instead. + */ +bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) +{ + int destFirstCacheOffset; + u64 headBteSource; + u64 headBteLen; + u64 headBcopySrcOffset; + u64 headBcopyDest; + u64 headBcopyLen; + u64 footBteSource; + u64 footBteLen; + u64 footBcopyDest; + u64 footBcopyLen; + bte_result_t rv; + char *bteBlock, *bteBlock_unaligned; + + if (len == 0) { + return BTE_SUCCESS; + } + + /* temporary buffer used during unaligned transfers */ + bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, GFP_KERNEL); + if (bteBlock_unaligned == NULL) { + return BTEFAIL_NOTAVAIL; + } + bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned); + + headBcopySrcOffset = src & L1_CACHE_MASK; + destFirstCacheOffset = dest & L1_CACHE_MASK; + + /* + * At this point, the transfer is broken into + * (up to) three sections. The first section is + * from the start address to the first physical + * cache line, the second is from the first physical + * cache line to the last complete cache line, + * and the third is from the last cache line to the + * end of the buffer. The first and third sections + * are handled by bte copying into a temporary buffer + * and then bcopy'ing the necessary section into the + * final location. The middle section is handled with + * a standard bte copy. + * + * One nasty exception to the above rule is when the + * source and destination are not symmetrically + * mis-aligned. If the source offset from the first + * cache line is different from the destination offset, + * we make the first section be the entire transfer + * and the bcopy the entire block into place. + */ + if (headBcopySrcOffset == destFirstCacheOffset) { + + /* + * Both the source and destination are the same + * distance from a cache line boundary so we can + * use the bte to transfer the bulk of the + * data. + */ + headBteSource = src & ~L1_CACHE_MASK; + headBcopyDest = dest; + if (headBcopySrcOffset) { + headBcopyLen = + (len > + (L1_CACHE_BYTES - + headBcopySrcOffset) ? L1_CACHE_BYTES + - headBcopySrcOffset : len); + headBteLen = L1_CACHE_BYTES; + } else { + headBcopyLen = 0; + headBteLen = 0; + } + + if (len > headBcopyLen) { + footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK; + footBteLen = L1_CACHE_BYTES; + + footBteSource = src + len - footBcopyLen; + footBcopyDest = dest + len - footBcopyLen; + + if (footBcopyDest == (headBcopyDest + headBcopyLen)) { + /* + * We have two contiguous bcopy + * blocks. Merge them. + */ + headBcopyLen += footBcopyLen; + headBteLen += footBteLen; + } else if (footBcopyLen > 0) { + rv = bte_copy(footBteSource, + ia64_tpa((unsigned long)bteBlock), + footBteLen, mode, NULL); + if (rv != BTE_SUCCESS) { + kfree(bteBlock_unaligned); + return rv; + } + + memcpy(__va(footBcopyDest), + (char *)bteBlock, footBcopyLen); + } + } else { + footBcopyLen = 0; + footBteLen = 0; + } + + if (len > (headBcopyLen + footBcopyLen)) { + /* now transfer the middle. */ + rv = bte_copy((src + headBcopyLen), + (dest + + headBcopyLen), + (len - headBcopyLen - + footBcopyLen), mode, NULL); + if (rv != BTE_SUCCESS) { + kfree(bteBlock_unaligned); + return rv; + } + + } + } else { + + /* + * The transfer is not symmetric, we will + * allocate a buffer large enough for all the + * data, bte_copy into that buffer and then + * bcopy to the destination. + */ + + headBcopySrcOffset = src & L1_CACHE_MASK; + headBcopyDest = dest; + headBcopyLen = len; + + headBteSource = src - headBcopySrcOffset; + /* Add the leading and trailing bytes from source */ + headBteLen = L1_CACHE_ALIGN(len + headBcopySrcOffset); + } + + if (headBcopyLen > 0) { + rv = bte_copy(headBteSource, + ia64_tpa((unsigned long)bteBlock), headBteLen, + mode, NULL); + if (rv != BTE_SUCCESS) { + kfree(bteBlock_unaligned); + return rv; + } + + memcpy(__va(headBcopyDest), ((char *)bteBlock + + headBcopySrcOffset), headBcopyLen); + } + kfree(bteBlock_unaligned); + return BTE_SUCCESS; +} + +EXPORT_SYMBOL(bte_unaligned_copy); + +/************************************************************************ + * Block Transfer Engine initialization functions. + * + ***********************************************************************/ +static void bte_recovery_timeout(struct timer_list *t) +{ + struct nodepda_s *nodepda = from_timer(nodepda, t, bte_recovery_timer); + + bte_error_handler(nodepda); +} + +/* + * bte_init_node(nodepda, cnode) + * + * Initialize the nodepda structure with BTE base addresses and + * spinlocks. + */ +void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode) +{ + int i; + + /* + * Indicate that all the block transfer engines on this node + * are available. + */ + + /* + * Allocate one bte_recover_t structure per node. It holds + * the recovery lock for node. All the bte interface structures + * will point at this one bte_recover structure to get the lock. + */ + spin_lock_init(&mynodepda->bte_recovery_lock); + timer_setup(&mynodepda->bte_recovery_timer, bte_recovery_timeout, 0); + + for (i = 0; i < BTES_PER_NODE; i++) { + u64 *base_addr; + + /* Which link status register should we use? */ + base_addr = (u64 *) + REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), BTE_BASE_ADDR(i)); + mynodepda->bte_if[i].bte_base_addr = base_addr; + mynodepda->bte_if[i].bte_source_addr = BTE_SOURCE_ADDR(base_addr); + mynodepda->bte_if[i].bte_destination_addr = BTE_DEST_ADDR(base_addr); + mynodepda->bte_if[i].bte_control_addr = BTE_CTRL_ADDR(base_addr); + mynodepda->bte_if[i].bte_notify_addr = BTE_NOTIF_ADDR(base_addr); + + /* + * Initialize the notification and spinlock + * so the first transfer can occur. + */ + mynodepda->bte_if[i].most_rcnt_na = + &(mynodepda->bte_if[i].notify); + mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE; + spin_lock_init(&mynodepda->bte_if[i].spinlock); + + mynodepda->bte_if[i].bte_cnode = cnode; + mynodepda->bte_if[i].bte_error_count = 0; + mynodepda->bte_if[i].bte_num = i; + mynodepda->bte_if[i].cleanup_active = 0; + mynodepda->bte_if[i].bh_error = 0; + } + +} diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c new file mode 100644 index 000000000..d92786c09 --- /dev/null +++ b/arch/ia64/sn/kernel/bte_error.c @@ -0,0 +1,255 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2007 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/types.h> +#include <asm/sn/sn_sal.h> +#include "ioerror.h" +#include <asm/sn/addrs.h> +#include <asm/sn/shubio.h> +#include <asm/sn/geo.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#include <asm/sn/bte.h> +#include <asm/param.h> + +/* + * Bte error handling is done in two parts. The first captures + * any crb related errors. Since there can be multiple crbs per + * interface and multiple interfaces active, we need to wait until + * all active crbs are completed. This is the first job of the + * second part error handler. When all bte related CRBs are cleanly + * completed, it resets the interfaces and gets them ready for new + * transfers to be queued. + */ + +/* + * Wait until all BTE related CRBs are completed + * and then reset the interfaces. + */ +static int shub1_bte_error_handler(struct nodepda_s *err_nodepda) +{ + struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; + nasid_t nasid; + int i; + int valid_crbs; + ii_imem_u_t imem; /* II IMEM Register */ + ii_icrb0_d_u_t icrbd; /* II CRB Register D */ + ii_ibcr_u_t ibcr; + ii_icmr_u_t icmr; + ii_ieclr_u_t ieclr; + + BTE_PRINTK(("shub1_bte_error_handler(%p) - %d\n", err_nodepda, + smp_processor_id())); + + if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) && + (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) { + BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda, + smp_processor_id())); + return 1; + } + + /* Determine information about our hub */ + nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); + + /* + * A BTE transfer can use multiple CRBs. We need to make sure + * that all the BTE CRBs are complete (or timed out) before + * attempting to clean up the error. Resetting the BTE while + * there are still BTE CRBs active will hang the BTE. + * We should look at all the CRBs to see if they are allocated + * to the BTE and see if they are still active. When none + * are active, we can continue with the cleanup. + * + * We also want to make sure that the local NI port is up. + * When a router resets the NI port can go down, while it + * goes through the LLP handshake, but then comes back up. + */ + icmr.ii_icmr_regval = REMOTE_HUB_L(nasid, IIO_ICMR); + if (icmr.ii_icmr_fld_s.i_crb_mark != 0) { + /* + * There are errors which still need to be cleaned up by + * hubiio_crb_error_handler + */ + mod_timer(recovery_timer, jiffies + (HZ * 5)); + BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, + smp_processor_id())); + return 1; + } + if (icmr.ii_icmr_fld_s.i_crb_vld != 0) { + + valid_crbs = icmr.ii_icmr_fld_s.i_crb_vld; + + for (i = 0; i < IIO_NUM_CRBS; i++) { + if (!((1 << i) & valid_crbs)) { + /* This crb was not marked as valid, ignore */ + continue; + } + icrbd.ii_icrb0_d_regval = + REMOTE_HUB_L(nasid, IIO_ICRB_D(i)); + if (icrbd.d_bteop) { + mod_timer(recovery_timer, jiffies + (HZ * 5)); + BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n", + err_nodepda, smp_processor_id(), + i)); + return 1; + } + } + } + + BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id())); + /* Re-enable both bte interfaces */ + imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM); + imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1; + REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval); + + /* Clear BTE0/1 error bits */ + ieclr.ii_ieclr_regval = 0; + if (err_nodepda->bte_if[0].bh_error != BTE_SUCCESS) + ieclr.ii_ieclr_fld_s.i_e_bte_0 = 1; + if (err_nodepda->bte_if[1].bh_error != BTE_SUCCESS) + ieclr.ii_ieclr_fld_s.i_e_bte_1 = 1; + REMOTE_HUB_S(nasid, IIO_IECLR, ieclr.ii_ieclr_regval); + + /* Reinitialize both BTE state machines. */ + ibcr.ii_ibcr_regval = REMOTE_HUB_L(nasid, IIO_IBCR); + ibcr.ii_ibcr_fld_s.i_soft_reset = 1; + REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval); + + del_timer(recovery_timer); + return 0; +} + +/* + * Wait until all BTE related CRBs are completed + * and then reset the interfaces. + */ +static int shub2_bte_error_handler(struct nodepda_s *err_nodepda) +{ + struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; + struct bteinfo_s *bte; + nasid_t nasid; + u64 status; + int i; + + nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); + + /* + * Verify that all the BTEs are complete + */ + for (i = 0; i < BTES_PER_NODE; i++) { + bte = &err_nodepda->bte_if[i]; + status = BTE_LNSTAT_LOAD(bte); + if (status & IBLS_ERROR) { + bte->bh_error = BTE_SHUB2_ERROR(status); + continue; + } + if (!(status & IBLS_BUSY)) + continue; + mod_timer(recovery_timer, jiffies + (HZ * 5)); + BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, + smp_processor_id())); + return 1; + } + if (ia64_sn_bte_recovery(nasid)) + panic("bte_error_handler(): Fatal BTE Error"); + + del_timer(recovery_timer); + return 0; +} + +/* + * Wait until all BTE related CRBs are completed + * and then reset the interfaces. + */ +void bte_error_handler(struct nodepda_s *err_nodepda) +{ + spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock; + int i; + unsigned long irq_flags; + volatile u64 *notify; + bte_result_t bh_error; + + BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda, + smp_processor_id())); + + spin_lock_irqsave(recovery_lock, irq_flags); + + /* + * Lock all interfaces on this node to prevent new transfers + * from being queued. + */ + for (i = 0; i < BTES_PER_NODE; i++) { + if (err_nodepda->bte_if[i].cleanup_active) { + continue; + } + spin_lock(&err_nodepda->bte_if[i].spinlock); + BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda, + smp_processor_id(), i)); + err_nodepda->bte_if[i].cleanup_active = 1; + } + + if (is_shub1()) { + if (shub1_bte_error_handler(err_nodepda)) { + spin_unlock_irqrestore(recovery_lock, irq_flags); + return; + } + } else { + if (shub2_bte_error_handler(err_nodepda)) { + spin_unlock_irqrestore(recovery_lock, irq_flags); + return; + } + } + + for (i = 0; i < BTES_PER_NODE; i++) { + bh_error = err_nodepda->bte_if[i].bh_error; + if (bh_error != BTE_SUCCESS) { + /* There is an error which needs to be notified */ + notify = err_nodepda->bte_if[i].most_rcnt_na; + BTE_PRINTK(("cnode %d bte %d error=0x%lx\n", + err_nodepda->bte_if[i].bte_cnode, + err_nodepda->bte_if[i].bte_num, + IBLS_ERROR | (u64) bh_error)); + *notify = IBLS_ERROR | bh_error; + err_nodepda->bte_if[i].bh_error = BTE_SUCCESS; + } + + err_nodepda->bte_if[i].cleanup_active = 0; + BTE_PRINTK(("eh:%p:%d Unlocked %d\n", err_nodepda, + smp_processor_id(), i)); + spin_unlock(&err_nodepda->bte_if[i].spinlock); + } + + spin_unlock_irqrestore(recovery_lock, irq_flags); +} + +/* + * First part error handler. This is called whenever any error CRB interrupt + * is generated by the II. + */ +void +bte_crb_error_handler(cnodeid_t cnode, int btenum, + int crbnum, ioerror_t * ioe, int bteop) +{ + struct bteinfo_s *bte; + + + bte = &(NODEPDA(cnode)->bte_if[btenum]); + + /* + * The caller has already figured out the error type, we save that + * in the bte handle structure for the thread exercising the + * interface to consume. + */ + bte->bh_error = ioe->ie_errortype + BTEFAIL_OFFSET; + bte->bte_error_count++; + + BTE_PRINTK(("Got an error on cnode %d bte %d: HW error type 0x%x\n", + bte->bte_cnode, bte->bte_num, ioe->ie_errortype)); + bte_error_handler(NODEPDA(cnode)); +} + diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c new file mode 100644 index 000000000..97fa56ddd --- /dev/null +++ b/arch/ia64/sn/kernel/huberror.c @@ -0,0 +1,220 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000,2002-2007 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include <asm/delay.h> +#include <asm/sn/sn_sal.h> +#include "ioerror.h" +#include <asm/sn/addrs.h> +#include <asm/sn/shubio.h> +#include <asm/sn/geo.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#include <asm/sn/bte.h> + +void hubiio_crb_error_handler(struct hubdev_info *hubdev_info); +extern void bte_crb_error_handler(cnodeid_t, int, int, ioerror_t *, + int); +static irqreturn_t hub_eint_handler(int irq, void *arg) +{ + struct hubdev_info *hubdev_info; + struct ia64_sal_retval ret_stuff; + nasid_t nasid; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + hubdev_info = (struct hubdev_info *)arg; + nasid = hubdev_info->hdi_nasid; + + if (is_shub1()) { + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, + (u64) nasid, 0, 0, 0, 0, 0, 0); + + if ((int)ret_stuff.v0) + panic("%s: Fatal %s Error", __func__, + ((nasid & 1) ? "TIO" : "HUBII")); + + if (!(nasid & 1)) /* Not a TIO, handle CRB errors */ + (void)hubiio_crb_error_handler(hubdev_info); + } else + if (nasid & 1) { /* TIO errors */ + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, + (u64) nasid, 0, 0, 0, 0, 0, 0); + + if ((int)ret_stuff.v0) + panic("%s: Fatal TIO Error", __func__); + } else + bte_error_handler(NODEPDA(nasid_to_cnodeid(nasid))); + + return IRQ_HANDLED; +} + +/* + * Free the hub CRB "crbnum" which encountered an error. + * Assumption is, error handling was successfully done, + * and we now want to return the CRB back to Hub for normal usage. + * + * In order to free the CRB, all that's needed is to de-allocate it + * + * Assumption: + * No other processor is mucking around with the hub control register. + * So, upper layer has to single thread this. + */ +void hubiio_crb_free(struct hubdev_info *hubdev_info, int crbnum) +{ + ii_icrb0_b_u_t icrbb; + + /* + * The hardware does NOT clear the mark bit, so it must get cleared + * here to be sure the error is not processed twice. + */ + icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(hubdev_info->hdi_nasid, + IIO_ICRB_B(crbnum)); + icrbb.b_mark = 0; + REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICRB_B(crbnum), + icrbb.ii_icrb0_b_regval); + /* + * Deallocate the register wait till hub indicates it's done. + */ + REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICDR, (IIO_ICDR_PND | crbnum)); + while (REMOTE_HUB_L(hubdev_info->hdi_nasid, IIO_ICDR) & IIO_ICDR_PND) + cpu_relax(); + +} + +/* + * hubiio_crb_error_handler + * + * This routine gets invoked when a hub gets an error + * interrupt. So, the routine is running in interrupt context + * at error interrupt level. + * Action: + * It's responsible for identifying ALL the CRBs that are marked + * with error, and process them. + * + * If you find the CRB that's marked with error, map this to the + * reason it caused error, and invoke appropriate error handler. + * + * XXX Be aware of the information in the context register. + * + * NOTE: + * Use REMOTE_HUB_* macro instead of LOCAL_HUB_* so that the interrupt + * handler can be run on any node. (not necessarily the node + * corresponding to the hub that encountered error). + */ + +void hubiio_crb_error_handler(struct hubdev_info *hubdev_info) +{ + nasid_t nasid; + ii_icrb0_a_u_t icrba; /* II CRB Register A */ + ii_icrb0_b_u_t icrbb; /* II CRB Register B */ + ii_icrb0_c_u_t icrbc; /* II CRB Register C */ + ii_icrb0_d_u_t icrbd; /* II CRB Register D */ + ii_icrb0_e_u_t icrbe; /* II CRB Register D */ + int i; + int num_errors = 0; /* Num of errors handled */ + ioerror_t ioerror; + + nasid = hubdev_info->hdi_nasid; + + /* + * XXX - Add locking for any recovery actions + */ + /* + * Scan through all CRBs in the Hub, and handle the errors + * in any of the CRBs marked. + */ + for (i = 0; i < IIO_NUM_CRBS; i++) { + /* Check this crb entry to see if it is in error. */ + icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i)); + + if (icrbb.b_mark == 0) { + continue; + } + + icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i)); + + IOERROR_INIT(&ioerror); + + /* read other CRB error registers. */ + icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i)); + icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i)); + icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i)); + + IOERROR_SETVALUE(&ioerror, errortype, icrbb.b_ecode); + + /* Check if this error is due to BTE operation, + * and handle it separately. + */ + if (icrbd.d_bteop || + ((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 || + icrbb.b_initiator == IIO_ICRB_INIT_BTE1) && + (icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE || + icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))) { + + int bte_num; + + if (icrbd.d_bteop) + bte_num = icrbc.c_btenum; + else /* b_initiator bit 2 gives BTE number */ + bte_num = (icrbb.b_initiator & 0x4) >> 2; + + hubiio_crb_free(hubdev_info, i); + + bte_crb_error_handler(nasid_to_cnodeid(nasid), bte_num, + i, &ioerror, icrbd.d_bteop); + num_errors++; + continue; + } + } +} + +/* + * Function : hub_error_init + * Purpose : initialize the error handling requirements for a given hub. + * Parameters : cnode, the compact nodeid. + * Assumptions : Called only once per hub, either by a local cpu. Or by a + * remote cpu, when this hub is headless.(cpuless) + * Returns : None + */ +void hub_error_init(struct hubdev_info *hubdev_info) +{ + + if (request_irq(SGI_II_ERROR, hub_eint_handler, IRQF_SHARED, + "SN_hub_error", hubdev_info)) { + printk(KERN_ERR "hub_error_init: Failed to request_irq for 0x%p\n", + hubdev_info); + return; + } + irq_set_handler(SGI_II_ERROR, handle_level_irq); + sn_set_err_irq_affinity(SGI_II_ERROR); +} + + +/* + * Function : ice_error_init + * Purpose : initialize the error handling requirements for a given tio. + * Parameters : cnode, the compact nodeid. + * Assumptions : Called only once per tio. + * Returns : None + */ +void ice_error_init(struct hubdev_info *hubdev_info) +{ + + if (request_irq + (SGI_TIO_ERROR, (void *)hub_eint_handler, IRQF_SHARED, "SN_TIO_error", + (void *)hubdev_info)) { + printk("ice_error_init: request_irq() error hubdev_info 0x%p\n", + hubdev_info); + return; + } + irq_set_handler(SGI_TIO_ERROR, handle_level_irq); + sn_set_err_irq_affinity(SGI_TIO_ERROR); +} + diff --git a/arch/ia64/sn/kernel/idle.c b/arch/ia64/sn/kernel/idle.c new file mode 100644 index 000000000..49d178f02 --- /dev/null +++ b/arch/ia64/sn/kernel/idle.c @@ -0,0 +1,30 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2001-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <asm/sn/leds.h> + +void snidle(int state) +{ + if (state) { + if (pda->idle_flag == 0) { + /* + * Turn the activity LED off. + */ + set_led_bits(0, LED_CPU_ACTIVITY); + } + + pda->idle_flag = 1; + } else { + /* + * Turn the activity LED on. + */ + set_led_bits(LED_CPU_ACTIVITY, LED_CPU_ACTIVITY); + + pda->idle_flag = 0; + } +} diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c new file mode 100644 index 000000000..c31fe637b --- /dev/null +++ b/arch/ia64/sn/kernel/io_acpi_init.c @@ -0,0 +1,513 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <asm/sn/types.h> +#include <asm/sn/addrs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/sn_sal.h> +#include "xtalk/hubdev.h" +#include <linux/acpi.h> +#include <linux/slab.h> +#include <linux/export.h> + + +/* + * The code in this file will only be executed when running with + * a PROM that has ACPI IO support. (i.e., SN_ACPI_BASE_SUPPORT() == 1) + */ + + +/* + * This value must match the UUID the PROM uses + * (io/acpi/defblk.c) when building a vendor descriptor. + */ +struct acpi_vendor_uuid sn_uuid = { + .subtype = 0, + .data = { 0x2c, 0xc6, 0xa6, 0xfe, 0x9c, 0x44, 0xda, 0x11, + 0xa2, 0x7c, 0x08, 0x00, 0x69, 0x13, 0xea, 0x51 }, +}; + +struct sn_pcidev_match { + u8 bus; + unsigned int devfn; + acpi_handle handle; +}; + +/* + * Perform the early IO init in PROM. + */ +static long +sal_ioif_init(u64 *result) +{ + struct ia64_sal_retval isrv = {0,0,0,0}; + + SAL_CALL_NOLOCK(isrv, + SN_SAL_IOIF_INIT, 0, 0, 0, 0, 0, 0, 0); + *result = isrv.v0; + return isrv.status; +} + +/* + * sn_acpi_hubdev_init() - This function is called by acpi_ns_get_device_callback() + * for all SGIHUB and SGITIO acpi devices defined in the + * DSDT. It obtains the hubdev_info pointer from the + * ACPI vendor resource, which the PROM setup, and sets up the + * hubdev_info in the pda. + */ + +static acpi_status __init +sn_acpi_hubdev_init(acpi_handle handle, u32 depth, void *context, void **ret) +{ + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + u64 addr; + struct hubdev_info *hubdev; + struct hubdev_info *hubdev_ptr; + int i; + u64 nasid; + struct acpi_resource *resource; + acpi_status status; + struct acpi_resource_vendor_typed *vendor; + extern void sn_common_hubdev_init(struct hubdev_info *); + + status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS, + &sn_uuid, &buffer); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "sn_acpi_hubdev_init: acpi_get_vendor_resource() " + "(0x%x) failed for: %s\n", status, + (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return AE_OK; /* Continue walking namespace */ + } + + resource = buffer.pointer; + vendor = &resource->data.vendor_typed; + if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) != + sizeof(struct hubdev_info *)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "sn_acpi_hubdev_init: Invalid vendor data length: " + "%d for: %s\n", + vendor->byte_length, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + goto exit; + } + + memcpy(&addr, vendor->byte_data, sizeof(struct hubdev_info *)); + hubdev_ptr = __va((struct hubdev_info *) addr); + + nasid = hubdev_ptr->hdi_nasid; + i = nasid_to_cnodeid(nasid); + hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); + *hubdev = *hubdev_ptr; + sn_common_hubdev_init(hubdev); + +exit: + kfree(buffer.pointer); + return AE_OK; /* Continue walking namespace */ +} + +/* + * sn_get_bussoft_ptr() - The pcibus_bussoft pointer is found in + * the ACPI Vendor resource for this bus. + */ +static struct pcibus_bussoft * +sn_get_bussoft_ptr(struct pci_bus *bus) +{ + u64 addr; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + acpi_handle handle; + struct pcibus_bussoft *prom_bussoft_ptr; + struct acpi_resource *resource; + acpi_status status; + struct acpi_resource_vendor_typed *vendor; + + + handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion); + status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS, + &sn_uuid, &buffer); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR "%s: " + "acpi_get_vendor_resource() failed (0x%x) for: %s\n", + __func__, status, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return NULL; + } + resource = buffer.pointer; + vendor = &resource->data.vendor_typed; + + if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) != + sizeof(struct pcibus_bussoft *)) { + printk(KERN_ERR + "%s: Invalid vendor data length %d\n", + __func__, vendor->byte_length); + kfree(buffer.pointer); + return NULL; + } + memcpy(&addr, vendor->byte_data, sizeof(struct pcibus_bussoft *)); + prom_bussoft_ptr = __va((struct pcibus_bussoft *) addr); + kfree(buffer.pointer); + + return prom_bussoft_ptr; +} + +/* + * sn_extract_device_info - Extract the pcidev_info and the sn_irq_info + * pointers from the vendor resource using the + * provided acpi handle, and copy the structures + * into the argument buffers. + */ +static int +sn_extract_device_info(acpi_handle handle, struct pcidev_info **pcidev_info, + struct sn_irq_info **sn_irq_info) +{ + u64 addr; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct sn_irq_info *irq_info, *irq_info_prom; + struct pcidev_info *pcidev_ptr, *pcidev_prom_ptr; + struct acpi_resource *resource; + int ret = 0; + acpi_status status; + struct acpi_resource_vendor_typed *vendor; + + /* + * The pointer to this device's pcidev_info structure in + * the PROM, is in the vendor resource. + */ + status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS, + &sn_uuid, &buffer); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "%s: acpi_get_vendor_resource() failed (0x%x) for: %s\n", + __func__, status, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return 1; + } + + resource = buffer.pointer; + vendor = &resource->data.vendor_typed; + if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) != + sizeof(struct pci_devdev_info *)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "%s: Invalid vendor data length: %d for: %s\n", + __func__, vendor->byte_length, + (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + ret = 1; + goto exit; + } + + pcidev_ptr = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL); + if (!pcidev_ptr) + panic("%s: Unable to alloc memory for pcidev_info", __func__); + + memcpy(&addr, vendor->byte_data, sizeof(struct pcidev_info *)); + pcidev_prom_ptr = __va(addr); + memcpy(pcidev_ptr, pcidev_prom_ptr, sizeof(struct pcidev_info)); + + /* Get the IRQ info */ + irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (!irq_info) + panic("%s: Unable to alloc memory for sn_irq_info", __func__); + + if (pcidev_ptr->pdi_sn_irq_info) { + irq_info_prom = __va(pcidev_ptr->pdi_sn_irq_info); + memcpy(irq_info, irq_info_prom, sizeof(struct sn_irq_info)); + } + + *pcidev_info = pcidev_ptr; + *sn_irq_info = irq_info; + +exit: + kfree(buffer.pointer); + return ret; +} + +static unsigned int +get_host_devfn(acpi_handle device_handle, acpi_handle rootbus_handle) +{ + unsigned long long adr; + acpi_handle child; + unsigned int devfn; + int function; + acpi_handle parent; + int slot; + acpi_status status; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + acpi_get_name(device_handle, ACPI_FULL_PATHNAME, &name_buffer); + + /* + * Do an upward search to find the root bus device, and + * obtain the host devfn from the previous child device. + */ + child = device_handle; + while (child) { + status = acpi_get_parent(child, &parent); + if (ACPI_FAILURE(status)) { + printk(KERN_ERR "%s: acpi_get_parent() failed " + "(0x%x) for: %s\n", __func__, status, + (char *)name_buffer.pointer); + panic("%s: Unable to find host devfn\n", __func__); + } + if (parent == rootbus_handle) + break; + child = parent; + } + if (!child) { + printk(KERN_ERR "%s: Unable to find root bus for: %s\n", + __func__, (char *)name_buffer.pointer); + BUG(); + } + + status = acpi_evaluate_integer(child, METHOD_NAME__ADR, NULL, &adr); + if (ACPI_FAILURE(status)) { + printk(KERN_ERR "%s: Unable to get _ADR (0x%x) for: %s\n", + __func__, status, (char *)name_buffer.pointer); + panic("%s: Unable to find host devfn\n", __func__); + } + + kfree(name_buffer.pointer); + + slot = (adr >> 16) & 0xffff; + function = adr & 0xffff; + devfn = PCI_DEVFN(slot, function); + return devfn; +} + +/* + * find_matching_device - Callback routine to find the ACPI device + * that matches up with our pci_dev device. + * Matching is done on bus number and devfn. + * To find the bus number for a particular + * ACPI device, we must look at the _BBN method + * of its parent. + */ +static acpi_status +find_matching_device(acpi_handle handle, u32 lvl, void *context, void **rv) +{ + unsigned long long bbn = -1; + unsigned long long adr; + acpi_handle parent = NULL; + acpi_status status; + unsigned int devfn; + int function; + int slot; + struct sn_pcidev_match *info = context; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, + &adr); + if (ACPI_SUCCESS(status)) { + status = acpi_get_parent(handle, &parent); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "%s: acpi_get_parent() failed (0x%x) for: %s\n", + __func__, status, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return AE_OK; + } + status = acpi_evaluate_integer(parent, METHOD_NAME__BBN, + NULL, &bbn); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "%s: Failed to find _BBN in parent of: %s\n", + __func__, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return AE_OK; + } + + slot = (adr >> 16) & 0xffff; + function = adr & 0xffff; + devfn = PCI_DEVFN(slot, function); + if ((info->devfn == devfn) && (info->bus == bbn)) { + /* We have a match! */ + info->handle = handle; + return 1; + } + } + return AE_OK; +} + +/* + * sn_acpi_get_pcidev_info - Search ACPI namespace for the acpi + * device matching the specified pci_dev, + * and return the pcidev info and irq info. + */ +int +sn_acpi_get_pcidev_info(struct pci_dev *dev, struct pcidev_info **pcidev_info, + struct sn_irq_info **sn_irq_info) +{ + unsigned int host_devfn; + struct sn_pcidev_match pcidev_match; + acpi_handle rootbus_handle; + unsigned long long segment; + acpi_status status; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + rootbus_handle = acpi_device_handle(PCI_CONTROLLER(dev)->companion); + status = acpi_evaluate_integer(rootbus_handle, METHOD_NAME__SEG, NULL, + &segment); + if (ACPI_SUCCESS(status)) { + if (segment != pci_domain_nr(dev)) { + acpi_get_name(rootbus_handle, ACPI_FULL_PATHNAME, + &name_buffer); + printk(KERN_ERR + "%s: Segment number mismatch, 0x%llx vs 0x%x for: %s\n", + __func__, segment, pci_domain_nr(dev), + (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return 1; + } + } else { + acpi_get_name(rootbus_handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR "%s: Unable to get __SEG from: %s\n", + __func__, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return 1; + } + + /* + * We want to search all devices in this segment/domain + * of the ACPI namespace for the matching ACPI device, + * which holds the pcidev_info pointer in its vendor resource. + */ + pcidev_match.bus = dev->bus->number; + pcidev_match.devfn = dev->devfn; + pcidev_match.handle = NULL; + + acpi_walk_namespace(ACPI_TYPE_DEVICE, rootbus_handle, ACPI_UINT32_MAX, + find_matching_device, NULL, &pcidev_match, NULL); + + if (!pcidev_match.handle) { + printk(KERN_ERR + "%s: Could not find matching ACPI device for %s.\n", + __func__, pci_name(dev)); + return 1; + } + + if (sn_extract_device_info(pcidev_match.handle, pcidev_info, sn_irq_info)) + return 1; + + /* Build up the pcidev_info.pdi_slot_host_handle */ + host_devfn = get_host_devfn(pcidev_match.handle, rootbus_handle); + (*pcidev_info)->pdi_slot_host_handle = + ((unsigned long) pci_domain_nr(dev) << 40) | + /* bus == 0 */ + host_devfn; + return 0; +} + +/* + * sn_acpi_slot_fixup - Obtain the pcidev_info and sn_irq_info. + * Perform any SN specific slot fixup. + * At present there does not appear to be + * any generic way to handle a ROM image + * that has been shadowed by the PROM, so + * we pass a pointer to it within the + * pcidev_info structure. + */ + +void +sn_acpi_slot_fixup(struct pci_dev *dev) +{ + struct pcidev_info *pcidev_info = NULL; + struct sn_irq_info *sn_irq_info = NULL; + struct resource *res; + size_t size; + + if (sn_acpi_get_pcidev_info(dev, &pcidev_info, &sn_irq_info)) { + panic("%s: Failure obtaining pcidev_info for %s\n", + __func__, pci_name(dev)); + } + + if (pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE]) { + /* + * A valid ROM image exists and has been shadowed by the + * PROM. Setup the pci_dev ROM resource with the address + * of the shadowed copy, and the actual length of the ROM image. + */ + size = pci_resource_len(dev, PCI_ROM_RESOURCE); + + res = &dev->resource[PCI_ROM_RESOURCE]; + + pci_disable_rom(dev); + if (res->parent) + release_resource(res); + + res->start = pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE]; + res->end = res->start + size - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | + IORESOURCE_PCI_FIXED; + } + sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info); +} +EXPORT_SYMBOL(sn_acpi_slot_fixup); + + +/* + * sn_acpi_bus_fixup - Perform SN specific setup of software structs + * (pcibus_bussoft, pcidev_info) and hardware + * registers, for the specified bus and devices under it. + */ +void +sn_acpi_bus_fixup(struct pci_bus *bus) +{ + struct pci_dev *pci_dev = NULL; + struct pcibus_bussoft *prom_bussoft_ptr; + + if (!bus->parent) { /* If root bus */ + prom_bussoft_ptr = sn_get_bussoft_ptr(bus); + if (prom_bussoft_ptr == NULL) { + printk(KERN_ERR + "%s: 0x%04x:0x%02x Unable to " + "obtain prom_bussoft_ptr\n", + __func__, pci_domain_nr(bus), bus->number); + return; + } + sn_common_bus_fixup(bus, prom_bussoft_ptr); + } + list_for_each_entry(pci_dev, &bus->devices, bus_list) { + sn_acpi_slot_fixup(pci_dev); + } +} + +/* + * sn_io_acpi_init - PROM has ACPI support for IO, defining at a minimum the + * nodes and root buses in the DSDT. As a result, bus scanning + * will be initiated by the Linux ACPI code. + */ + +void __init +sn_io_acpi_init(void) +{ + u64 result; + long status; + + /* SN Altix does not follow the IOSAPIC IRQ routing model */ + acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM; + + /* Setup hubdev_info for all SGIHUB/SGITIO devices */ + acpi_get_devices("SGIHUB", sn_acpi_hubdev_init, NULL, NULL); + acpi_get_devices("SGITIO", sn_acpi_hubdev_init, NULL, NULL); + + status = sal_ioif_init(&result); + if (status || result) + panic("sal_ioif_init failed: [%lx] %s\n", + status, ia64_sal_strerror(status)); +} diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c new file mode 100644 index 000000000..102aabad6 --- /dev/null +++ b/arch/ia64/sn/kernel/io_common.c @@ -0,0 +1,559 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/bootmem.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <asm/sn/types.h> +#include <asm/sn/addrs.h> +#include <asm/sn/sn_feature_sets.h> +#include <asm/sn/geo.h> +#include <asm/sn/io.h> +#include <asm/sn/l1.h> +#include <asm/sn/module.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/simulator.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/tioca_provider.h> +#include <asm/sn/tioce_provider.h> +#include "xtalk/hubdev.h" +#include "xtalk/xwidgetdev.h" +#include <linux/acpi.h> +#include <asm/sn/sn2/sn_hwperf.h> +#include <asm/sn/acpi.h> + +extern void sn_init_cpei_timer(void); +extern void register_sn_procfs(void); +extern void sn_io_acpi_init(void); +extern void sn_io_init(void); + + +static struct list_head sn_sysdata_list; + +/* sysdata list struct */ +struct sysdata_el { + struct list_head entry; + void *sysdata; +}; + +int sn_ioif_inited; /* SN I/O infrastructure initialized? */ + +int sn_acpi_rev; /* SN ACPI revision */ +EXPORT_SYMBOL_GPL(sn_acpi_rev); + +struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ + +/* + * Hooks and struct for unsupported pci providers + */ + +static dma_addr_t +sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size, int type) +{ + return 0; +} + +static void +sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction) +{ + return; +} + +static void * +sn_default_pci_bus_fixup(struct pcibus_bussoft *soft, struct pci_controller *controller) +{ + return NULL; +} + +static struct sn_pcibus_provider sn_pci_default_provider = { + .dma_map = sn_default_pci_map, + .dma_map_consistent = sn_default_pci_map, + .dma_unmap = sn_default_pci_unmap, + .bus_fixup = sn_default_pci_bus_fixup, +}; + +/* + * Retrieve the DMA Flush List given nasid, widget, and device. + * This list is needed to implement the WAR - Flush DMA data on PIO Reads. + */ +static inline u64 +sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, + u64 address) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST, + (u64) nasid, (u64) widget_num, + (u64) device_num, (u64) address, 0, 0, 0); + return ret_stuff.status; +} + +/* + * sn_pcidev_info_get() - Retrieve the pcidev_info struct for the specified + * device. + */ +inline struct pcidev_info * +sn_pcidev_info_get(struct pci_dev *dev) +{ + struct pcidev_info *pcidev; + + list_for_each_entry(pcidev, + &(SN_PLATFORM_DATA(dev)->pcidev_info), pdi_list) { + if (pcidev->pdi_linux_pcidev == dev) + return pcidev; + } + return NULL; +} + +/* Older PROM flush WAR + * + * 01/16/06 -- This war will be in place until a new official PROM is released. + * Additionally note that the struct sn_flush_device_war also has to be + * removed from arch/ia64/sn/include/xtalk/hubdev.h + */ + +static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device, + struct sn_flush_device_common *common) +{ + struct sn_flush_device_war *war_list; + struct sn_flush_device_war *dev_entry; + struct ia64_sal_retval isrv = {0,0,0,0}; + + printk_once(KERN_WARNING + "PROM version < 4.50 -- implementing old PROM flush WAR\n"); + + war_list = kcalloc(DEV_PER_WIDGET, sizeof(*war_list), GFP_KERNEL); + BUG_ON(!war_list); + + SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST, + nasid, widget, __pa(war_list), 0, 0, 0 ,0); + if (isrv.status) + panic("sn_device_fixup_war failed: %s\n", + ia64_sal_strerror(isrv.status)); + + dev_entry = war_list + device; + memcpy(common,dev_entry, sizeof(*common)); + kfree(war_list); + + return isrv.status; +} + +/* + * sn_common_hubdev_init() - This routine is called to initialize the HUB data + * structure for each node in the system. + */ +void __init +sn_common_hubdev_init(struct hubdev_info *hubdev) +{ + + struct sn_flush_device_kernel *sn_flush_device_kernel; + struct sn_flush_device_kernel *dev_entry; + s64 status; + int widget, device, size; + + /* Attach the error interrupt handlers */ + if (hubdev->hdi_nasid & 1) /* If TIO */ + ice_error_init(hubdev); + else + hub_error_init(hubdev); + + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) + hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev; + + if (!hubdev->hdi_flush_nasid_list.widget_p) + return; + + size = (HUB_WIDGET_ID_MAX + 1) * + sizeof(struct sn_flush_device_kernel *); + hubdev->hdi_flush_nasid_list.widget_p = + kzalloc(size, GFP_KERNEL); + BUG_ON(!hubdev->hdi_flush_nasid_list.widget_p); + + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) { + size = DEV_PER_WIDGET * + sizeof(struct sn_flush_device_kernel); + sn_flush_device_kernel = kzalloc(size, GFP_KERNEL); + BUG_ON(!sn_flush_device_kernel); + + dev_entry = sn_flush_device_kernel; + for (device = 0; device < DEV_PER_WIDGET; + device++, dev_entry++) { + size = sizeof(struct sn_flush_device_common); + dev_entry->common = kzalloc(size, GFP_KERNEL); + BUG_ON(!dev_entry->common); + if (sn_prom_feature_available(PRF_DEVICE_FLUSH_LIST)) + status = sal_get_device_dmaflush_list( + hubdev->hdi_nasid, widget, device, + (u64)(dev_entry->common)); + else + status = sn_device_fixup_war(hubdev->hdi_nasid, + widget, device, + dev_entry->common); + if (status != SALRET_OK) + panic("SAL call failed: %s\n", + ia64_sal_strerror(status)); + + spin_lock_init(&dev_entry->sfdl_flush_lock); + } + + if (sn_flush_device_kernel) + hubdev->hdi_flush_nasid_list.widget_p[widget] = + sn_flush_device_kernel; + } +} + +void sn_pci_unfixup_slot(struct pci_dev *dev) +{ + struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev; + + sn_irq_unfixup(dev); + pci_dev_put(host_pci_dev); + pci_dev_put(dev); +} + +/* + * sn_pci_fixup_slot() + */ +void sn_pci_fixup_slot(struct pci_dev *dev, struct pcidev_info *pcidev_info, + struct sn_irq_info *sn_irq_info) +{ + int segment = pci_domain_nr(dev->bus); + struct pcibus_bussoft *bs; + struct pci_dev *host_pci_dev; + unsigned int bus_no, devfn; + + pci_dev_get(dev); /* for the sysdata pointer */ + + /* Add pcidev_info to list in pci_controller.platform_data */ + list_add_tail(&pcidev_info->pdi_list, + &(SN_PLATFORM_DATA(dev->bus)->pcidev_info)); + /* + * Using the PROMs values for the PCI host bus, get the Linux + * PCI host_pci_dev struct and set up host bus linkages + */ + + bus_no = (pcidev_info->pdi_slot_host_handle >> 32) & 0xff; + devfn = pcidev_info->pdi_slot_host_handle & 0xffffffff; + host_pci_dev = pci_get_domain_bus_and_slot(segment, bus_no, devfn); + + pcidev_info->host_pci_dev = host_pci_dev; + pcidev_info->pdi_linux_pcidev = dev; + pcidev_info->pdi_host_pcidev_info = SN_PCIDEV_INFO(host_pci_dev); + bs = SN_PCIBUS_BUSSOFT(dev->bus); + pcidev_info->pdi_pcibus_info = bs; + + if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) { + SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type]; + } else { + SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider; + } + + /* Only set up IRQ stuff if this device has a host bus context */ + if (bs && sn_irq_info->irq_irq) { + pcidev_info->pdi_sn_irq_info = sn_irq_info; + dev->irq = pcidev_info->pdi_sn_irq_info->irq_irq; + sn_irq_fixup(dev, sn_irq_info); + } else { + pcidev_info->pdi_sn_irq_info = NULL; + kfree(sn_irq_info); + } +} + +/* + * sn_common_bus_fixup - Perform platform specific bus fixup. + * Execute the ASIC specific fixup routine + * for this bus. + */ +void +sn_common_bus_fixup(struct pci_bus *bus, + struct pcibus_bussoft *prom_bussoft_ptr) +{ + int cnode; + struct pci_controller *controller; + struct hubdev_info *hubdev_info; + int nasid; + void *provider_soft; + struct sn_pcibus_provider *provider; + struct sn_platform_data *sn_platform_data; + + controller = PCI_CONTROLLER(bus); + /* + * Per-provider fixup. Copies the bus soft structure from prom + * to local area and links SN_PCIBUS_BUSSOFT(). + */ + + if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) { + printk(KERN_WARNING "sn_common_bus_fixup: Unsupported asic type, %d", + prom_bussoft_ptr->bs_asic_type); + return; + } + + if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) + return; /* no further fixup necessary */ + + provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type]; + if (provider == NULL) + panic("sn_common_bus_fixup: No provider registered for this asic type, %d", + prom_bussoft_ptr->bs_asic_type); + + if (provider->bus_fixup) + provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, + controller); + else + provider_soft = NULL; + + /* + * Generic bus fixup goes here. Don't reference prom_bussoft_ptr + * after this point. + */ + controller->platform_data = kzalloc(sizeof(struct sn_platform_data), + GFP_KERNEL); + BUG_ON(controller->platform_data == NULL); + sn_platform_data = + (struct sn_platform_data *) controller->platform_data; + sn_platform_data->provider_soft = provider_soft; + INIT_LIST_HEAD(&((struct sn_platform_data *) + controller->platform_data)->pcidev_info); + nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base); + cnode = nasid_to_cnodeid(nasid); + hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info = + &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]); + + /* + * If the node information we obtained during the fixup phase is + * invalid then set controller->node to -1 (undetermined) + */ + if (controller->node >= num_online_nodes()) { + struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus); + + printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u " + "L_IO=%llx L_MEM=%llx BASE=%llx\n", + b->bs_asic_type, b->bs_xid, b->bs_persist_busnum, + b->bs_legacy_io, b->bs_legacy_mem, b->bs_base); + printk(KERN_WARNING "on node %d but only %d nodes online." + "Association set to undetermined.\n", + controller->node, num_online_nodes()); + controller->node = -1; + } +} + +void sn_bus_store_sysdata(struct pci_dev *dev) +{ + struct sysdata_el *element; + + element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL); + if (!element) { + dev_dbg(&dev->dev, "%s: out of memory!\n", __func__); + return; + } + element->sysdata = SN_PCIDEV_INFO(dev); + list_add(&element->entry, &sn_sysdata_list); +} + +void sn_bus_free_sysdata(void) +{ + struct sysdata_el *element; + struct list_head *list, *safe; + + list_for_each_safe(list, safe, &sn_sysdata_list) { + element = list_entry(list, struct sysdata_el, entry); + list_del(&element->entry); + list_del(&(((struct pcidev_info *) + (element->sysdata))->pdi_list)); + kfree(element->sysdata); + kfree(element); + } + return; +} + +/* + * hubdev_init_node() - Creates the HUB data structure and link them to it's + * own NODE specific data area. + */ +void __init hubdev_init_node(nodepda_t * npda, cnodeid_t node) +{ + struct hubdev_info *hubdev_info; + int size; + pg_data_t *pg; + + size = sizeof(struct hubdev_info); + + if (node >= num_online_nodes()) /* Headless/memless IO nodes */ + pg = NODE_DATA(0); + else + pg = NODE_DATA(node); + + hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size); + + npda->pdinfo = (void *)hubdev_info; +} + +geoid_t +cnodeid_get_geoid(cnodeid_t cnode) +{ + struct hubdev_info *hubdev; + + hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + return hubdev->hdi_geoid; +} + +void sn_generate_path(struct pci_bus *pci_bus, char *address) +{ + nasid_t nasid; + cnodeid_t cnode; + geoid_t geoid; + moduleid_t moduleid; + u16 bricktype; + + nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base); + cnode = nasid_to_cnodeid(nasid); + geoid = cnodeid_get_geoid(cnode); + moduleid = geo_module(geoid); + + sprintf(address, "module_%c%c%c%c%.2d", + '0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)), + '0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)), + '0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)), + MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid)); + + /* Tollhouse requires slot id to be displayed */ + bricktype = MODULE_GET_BTYPE(moduleid); + if ((bricktype == L1_BRICKTYPE_191010) || + (bricktype == L1_BRICKTYPE_1932)) + sprintf(address + strlen(address), "^%d", + geo_slot(geoid)); +} + +void sn_pci_fixup_bus(struct pci_bus *bus) +{ + + if (SN_ACPI_BASE_SUPPORT()) + sn_acpi_bus_fixup(bus); + else + sn_bus_fixup(bus); +} + +/* + * sn_io_early_init - Perform early IO (and some non-IO) initialization. + * In particular, setup the sn_pci_provider[] array. + * This needs to be done prior to any bus scanning + * (acpi_scan_init()) in the ACPI case, as the SN + * bus fixup code will reference the array. + */ +static int __init +sn_io_early_init(void) +{ + int i; + + if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) + return 0; + + /* we set the acpi revision to that of the DSDT table OEM rev. */ + { + struct acpi_table_header *header = NULL; + + acpi_get_table(ACPI_SIG_DSDT, 1, &header); + BUG_ON(header == NULL); + sn_acpi_rev = header->oem_revision; + } + + /* + * prime sn_pci_provider[]. Individual provider init routines will + * override their respective default entries. + */ + + for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++) + sn_pci_provider[i] = &sn_pci_default_provider; + + pcibr_init_provider(); + tioca_init_provider(); + tioce_init_provider(); + + sn_irq_lh_init(); + INIT_LIST_HEAD(&sn_sysdata_list); + sn_init_cpei_timer(); + +#ifdef CONFIG_PROC_FS + register_sn_procfs(); +#endif + + { + struct acpi_table_header *header; + (void)acpi_get_table(ACPI_SIG_DSDT, 1, &header); + printk(KERN_INFO "ACPI DSDT OEM Rev 0x%x\n", + header->oem_revision); + } + if (SN_ACPI_BASE_SUPPORT()) + sn_io_acpi_init(); + else + sn_io_init(); + return 0; +} + +arch_initcall(sn_io_early_init); + +/* + * sn_io_late_init() - Perform any final platform specific IO initialization. + */ + +int __init +sn_io_late_init(void) +{ + struct pci_bus *bus; + struct pcibus_bussoft *bussoft; + cnodeid_t cnode; + nasid_t nasid; + cnodeid_t near_cnode; + + if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) + return 0; + + /* + * Setup closest node in pci_controller->node for + * PIC, TIOCP, TIOCE (TIOCA does it during bus fixup using + * info from the PROM). + */ + bus = NULL; + while ((bus = pci_find_next_bus(bus)) != NULL) { + bussoft = SN_PCIBUS_BUSSOFT(bus); + nasid = NASID_GET(bussoft->bs_base); + cnode = nasid_to_cnodeid(nasid); + if ((bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) || + (bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCE) || + (bussoft->bs_asic_type == PCIIO_ASIC_TYPE_PIC)) { + /* PCI Bridge: find nearest node with CPUs */ + int e = sn_hwperf_get_nearest_node(cnode, NULL, + &near_cnode); + if (e < 0) { + near_cnode = (cnodeid_t)-1; /* use any node */ + printk(KERN_WARNING "sn_io_late_init: failed " + "to find near node with CPUs for " + "node %d, err=%d\n", cnode, e); + } + PCI_CONTROLLER(bus)->node = near_cnode; + } + } + + sn_ioif_inited = 1; /* SN I/O infrastructure now initialized */ + + return 0; +} + +fs_initcall(sn_io_late_init); + +EXPORT_SYMBOL(sn_pci_unfixup_slot); +EXPORT_SYMBOL(sn_bus_store_sysdata); +EXPORT_SYMBOL(sn_bus_free_sysdata); +EXPORT_SYMBOL(sn_generate_path); + diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c new file mode 100644 index 000000000..d63809a6a --- /dev/null +++ b/arch/ia64/sn/kernel/io_init.c @@ -0,0 +1,308 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/slab.h> +#include <linux/export.h> +#include <asm/sn/types.h> +#include <asm/sn/addrs.h> +#include <asm/sn/io.h> +#include <asm/sn/module.h> +#include <asm/sn/intr.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/sn_sal.h> +#include "xtalk/hubdev.h" + +/* + * The code in this file will only be executed when running with + * a PROM that does _not_ have base ACPI IO support. + * (i.e., SN_ACPI_BASE_SUPPORT() == 0) + */ + +static int max_segment_number; /* Default highest segment number */ +static int max_pcibus_number = 255; /* Default highest pci bus number */ + + +/* + * Retrieve the hub device info structure for the given nasid. + */ +static inline u64 sal_get_hubdev_info(u64 handle, u64 address) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_HUBDEV_INFO, + (u64) handle, (u64) address, 0, 0, 0, 0, 0); + return ret_stuff.v0; +} + +/* + * Retrieve the pci bus information given the bus number. + */ +static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_PCIBUS_INFO, + (u64) segment, (u64) busnum, (u64) address, 0, 0, 0, 0); + return ret_stuff.v0; +} + +/* + * Retrieve the pci device information given the bus and device|function number. + */ +static inline u64 +sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, + u64 sn_irq_info) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_PCIDEV_INFO, + (u64) segment, (u64) bus_number, (u64) devfn, + (u64) pci_dev, + sn_irq_info, 0, 0); + return ret_stuff.v0; +} + + +/* + * sn_fixup_ionodes() - This routine initializes the HUB data structure for + * each node in the system. This function is only + * executed when running with a non-ACPI capable PROM. + */ +static void __init sn_fixup_ionodes(void) +{ + + struct hubdev_info *hubdev; + u64 status; + u64 nasid; + int i; + extern void sn_common_hubdev_init(struct hubdev_info *); + + /* + * Get SGI Specific HUB chipset information. + * Inform Prom that this kernel can support domain bus numbering. + */ + for (i = 0; i < num_cnodes; i++) { + hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); + nasid = cnodeid_to_nasid(i); + hubdev->max_segment_number = 0xffffffff; + hubdev->max_pcibus_number = 0xff; + status = sal_get_hubdev_info(nasid, (u64) __pa(hubdev)); + if (status) + continue; + + /* Save the largest Domain and pcibus numbers found. */ + if (hubdev->max_segment_number) { + /* + * Dealing with a Prom that supports segments. + */ + max_segment_number = hubdev->max_segment_number; + max_pcibus_number = hubdev->max_pcibus_number; + } + sn_common_hubdev_init(hubdev); + } +} + +/* + * sn_pci_legacy_window_fixup - Setup PCI resources for + * legacy IO and MEM space. This needs to + * be done here, as the PROM does not have + * ACPI support defining the root buses + * and their resources (_CRS), + */ +static void +sn_legacy_pci_window_fixup(struct resource *res, + u64 legacy_io, u64 legacy_mem) +{ + res[0].name = "legacy_io"; + res[0].flags = IORESOURCE_IO; + res[0].start = legacy_io; + res[0].end = res[0].start + 0xffff; + res[0].parent = &ioport_resource; + res[1].name = "legacy_mem"; + res[1].flags = IORESOURCE_MEM; + res[1].start = legacy_mem; + res[1].end = res[1].start + (1024 * 1024) - 1; + res[1].parent = &iomem_resource; +} + +/* + * sn_io_slot_fixup() - We are not running with an ACPI capable PROM, + * and need to convert the pci_dev->resource + * 'start' and 'end' addresses to mapped addresses, + * and setup the pci_controller->window array entries. + */ +void +sn_io_slot_fixup(struct pci_dev *dev) +{ + int idx; + struct resource *res; + unsigned long size; + struct pcidev_info *pcidev_info; + struct sn_irq_info *sn_irq_info; + int status; + + pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL); + if (!pcidev_info) + panic("%s: Unable to alloc memory for pcidev_info", __func__); + + sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (!sn_irq_info) + panic("%s: Unable to alloc memory for sn_irq_info", __func__); + + /* Call to retrieve pci device information needed by kernel. */ + status = sal_get_pcidev_info((u64) pci_domain_nr(dev), + (u64) dev->bus->number, + dev->devfn, + (u64) __pa(pcidev_info), + (u64) __pa(sn_irq_info)); + + BUG_ON(status); /* Cannot get platform pci device information */ + + + /* Copy over PIO Mapped Addresses */ + for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) { + if (!pcidev_info->pdi_pio_mapped_addr[idx]) + continue; + + res = &dev->resource[idx]; + + size = res->end - res->start; + if (size == 0) + continue; + + res->start = pcidev_info->pdi_pio_mapped_addr[idx]; + res->end = res->start + size; + + /* + * if it's already in the device structure, remove it before + * inserting + */ + if (res->parent && res->parent->child) + release_resource(res); + + if (res->flags & IORESOURCE_IO) + insert_resource(&ioport_resource, res); + else + insert_resource(&iomem_resource, res); + /* + * If ROM, mark as shadowed in PROM. + */ + if (idx == PCI_ROM_RESOURCE) { + pci_disable_rom(dev); + res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | + IORESOURCE_PCI_FIXED; + } + } + + sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info); +} +EXPORT_SYMBOL(sn_io_slot_fixup); + +/* + * sn_pci_controller_fixup() - This routine sets up a bus's resources + * consistent with the Linux PCI abstraction layer. + */ +static void __init +sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) +{ + s64 status = 0; + struct pci_controller *controller; + struct pcibus_bussoft *prom_bussoft_ptr; + struct resource *res; + LIST_HEAD(resources); + + status = sal_get_pcibus_info((u64) segment, (u64) busnum, + (u64) ia64_tpa(&prom_bussoft_ptr)); + if (status > 0) + return; /*bus # does not exist */ + prom_bussoft_ptr = __va(prom_bussoft_ptr); + + controller = kzalloc(sizeof(*controller), GFP_KERNEL); + BUG_ON(!controller); + controller->segment = segment; + + res = kcalloc(2, sizeof(struct resource), GFP_KERNEL); + BUG_ON(!res); + + /* + * Temporarily save the prom_bussoft_ptr for use by sn_bus_fixup(). + * (platform_data will be overwritten later in sn_common_bus_fixup()) + */ + controller->platform_data = prom_bussoft_ptr; + + sn_legacy_pci_window_fixup(res, + prom_bussoft_ptr->bs_legacy_io, + prom_bussoft_ptr->bs_legacy_mem); + pci_add_resource_offset(&resources, &res[0], + prom_bussoft_ptr->bs_legacy_io); + pci_add_resource_offset(&resources, &res[1], + prom_bussoft_ptr->bs_legacy_mem); + + bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, controller, + &resources); + if (bus == NULL) { + kfree(res); + kfree(controller); + return; + } + pci_bus_add_devices(bus); +} + +/* + * sn_bus_fixup + */ +void +sn_bus_fixup(struct pci_bus *bus) +{ + struct pci_dev *pci_dev = NULL; + struct pcibus_bussoft *prom_bussoft_ptr; + + if (!bus->parent) { /* If root bus */ + prom_bussoft_ptr = PCI_CONTROLLER(bus)->platform_data; + if (prom_bussoft_ptr == NULL) { + printk(KERN_ERR + "sn_bus_fixup: 0x%04x:0x%02x Unable to " + "obtain prom_bussoft_ptr\n", + pci_domain_nr(bus), bus->number); + return; + } + sn_common_bus_fixup(bus, prom_bussoft_ptr); + } + list_for_each_entry(pci_dev, &bus->devices, bus_list) { + sn_io_slot_fixup(pci_dev); + } + +} + +/* + * sn_io_init - PROM does not have ACPI support to define nodes or root buses, + * so we need to do things the hard way, including initiating the + * bus scanning ourselves. + */ + +void __init sn_io_init(void) +{ + int i, j; + + sn_fixup_ionodes(); + + /* busses are not known yet ... */ + for (i = 0; i <= max_segment_number; i++) + for (j = 0; j <= max_pcibus_number; j++) + sn_pci_controller_fixup(i, j, NULL); +} diff --git a/arch/ia64/sn/kernel/iomv.c b/arch/ia64/sn/kernel/iomv.c new file mode 100644 index 000000000..2b22a7166 --- /dev/null +++ b/arch/ia64/sn/kernel/iomv.c @@ -0,0 +1,82 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2003, 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/module.h> +#include <linux/acpi.h> +#include <asm/io.h> +#include <asm/delay.h> +#include <asm/vga.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/simulator.h> +#include <asm/sn/pda.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/acpi.h> + +#define IS_LEGACY_VGA_IOPORT(p) \ + (((p) >= 0x3b0 && (p) <= 0x3bb) || ((p) >= 0x3c0 && (p) <= 0x3df)) + +/** + * sn_io_addr - convert an in/out port to an i/o address + * @port: port to convert + * + * Legacy in/out instructions are converted to ld/st instructions + * on IA64. This routine will convert a port number into a valid + * SN i/o address. Used by sn_in*() and sn_out*(). + */ + +void *sn_io_addr(unsigned long port) +{ + if (!IS_RUNNING_ON_SIMULATOR()) { + if (IS_LEGACY_VGA_IOPORT(port)) + return (__ia64_mk_io_addr(port)); + /* On sn2, legacy I/O ports don't point at anything */ + if (port < (64 * 1024)) + return NULL; + if (SN_ACPI_BASE_SUPPORT()) + return (__ia64_mk_io_addr(port)); + else + return ((void *)(port | __IA64_UNCACHED_OFFSET)); + } else { + /* but the simulator uses them... */ + unsigned long addr; + + /* + * word align port, but need more than 10 bits + * for accessing registers in bedrock local block + * (so we don't do port&0xfff) + */ + addr = (is_shub2() ? 0xc00000028c000000UL : 0xc0000087cc000000UL) | ((port >> 2) << 12); + if ((port >= 0x1f0 && port <= 0x1f7) || port == 0x3f6 || port == 0x3f7) + addr |= port; + return (void *)addr; + } +} + +EXPORT_SYMBOL(sn_io_addr); + +/** + * __sn_mmiowb - I/O space memory barrier + * + * See arch/ia64/include/asm/io.h and Documentation/driver-api/device-io.rst + * for details. + * + * On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear. + * See PV 871084 for details about the WAR about zero value. + * + */ +void __sn_mmiowb(void) +{ + volatile unsigned long *adr = pda->pio_write_status_addr; + unsigned long val = pda->pio_write_status_val; + + while ((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != val) + cpu_relax(); +} + +EXPORT_SYMBOL(__sn_mmiowb); diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c new file mode 100644 index 000000000..d9b576df4 --- /dev/null +++ b/arch/ia64/sn/kernel/irq.c @@ -0,0 +1,489 @@ +/* + * Platform dependent support for SGI SN + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/irq.h> +#include <linux/spinlock.h> +#include <linux/init.h> +#include <linux/rculist.h> +#include <linux/slab.h> +#include <asm/sn/addrs.h> +#include <asm/sn/arch.h> +#include <asm/sn/intr.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/sn_feature_sets.h> + +static void register_intr_pda(struct sn_irq_info *sn_irq_info); +static void unregister_intr_pda(struct sn_irq_info *sn_irq_info); + +extern int sn_ioif_inited; +struct list_head **sn_irq_lh; +static DEFINE_SPINLOCK(sn_irq_info_lock); /* non-IRQ lock */ + +u64 sn_intr_alloc(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info, + int req_irq, nasid_t req_nasid, + int req_slice) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_ALLOC, (u64) local_nasid, + (u64) local_widget, __pa(sn_irq_info), (u64) req_irq, + (u64) req_nasid, (u64) req_slice); + + return ret_stuff.status; +} + +void sn_intr_free(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_FREE, (u64) local_nasid, + (u64) local_widget, (u64) sn_irq_info->irq_irq, + (u64) sn_irq_info->irq_cookie, 0, 0); +} + +u64 sn_intr_redirect(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info, + nasid_t req_nasid, int req_slice) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_REDIRECT, (u64) local_nasid, + (u64) local_widget, __pa(sn_irq_info), + (u64) req_nasid, (u64) req_slice, 0); + + return ret_stuff.status; +} + +static unsigned int sn_startup_irq(struct irq_data *data) +{ + return 0; +} + +static void sn_shutdown_irq(struct irq_data *data) +{ +} + +extern void ia64_mca_register_cpev(int); + +static void sn_disable_irq(struct irq_data *data) +{ + if (data->irq == local_vector_to_irq(IA64_CPE_VECTOR)) + ia64_mca_register_cpev(0); +} + +static void sn_enable_irq(struct irq_data *data) +{ + if (data->irq == local_vector_to_irq(IA64_CPE_VECTOR)) + ia64_mca_register_cpev(data->irq); +} + +static void sn_ack_irq(struct irq_data *data) +{ + u64 event_occurred, mask; + unsigned int irq = data->irq & 0xff; + + event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED)); + mask = event_occurred & SH_ALL_INT_MASK; + HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), mask); + __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs); + + irq_move_irq(data); +} + +struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, + nasid_t nasid, int slice) +{ + int vector; + int cpuid; +#ifdef CONFIG_SMP + int cpuphys; +#endif + int64_t bridge; + int local_widget, status; + nasid_t local_nasid; + struct sn_irq_info *new_irq_info; + struct sn_pcibus_provider *pci_provider; + + bridge = (u64) sn_irq_info->irq_bridge; + if (!bridge) { + return NULL; /* irq is not a device interrupt */ + } + + local_nasid = NASID_GET(bridge); + + if (local_nasid & 1) + local_widget = TIO_SWIN_WIDGETNUM(bridge); + else + local_widget = SWIN_WIDGETNUM(bridge); + vector = sn_irq_info->irq_irq; + + /* Make use of SAL_INTR_REDIRECT if PROM supports it */ + status = sn_intr_redirect(local_nasid, local_widget, sn_irq_info, nasid, slice); + if (!status) { + new_irq_info = sn_irq_info; + goto finish_up; + } + + /* + * PROM does not support SAL_INTR_REDIRECT, or it failed. + * Revert to old method. + */ + new_irq_info = kmemdup(sn_irq_info, sizeof(struct sn_irq_info), + GFP_ATOMIC); + if (new_irq_info == NULL) + return NULL; + + /* Free the old PROM new_irq_info structure */ + sn_intr_free(local_nasid, local_widget, new_irq_info); + unregister_intr_pda(new_irq_info); + + /* allocate a new PROM new_irq_info struct */ + status = sn_intr_alloc(local_nasid, local_widget, + new_irq_info, vector, + nasid, slice); + + /* SAL call failed */ + if (status) { + kfree(new_irq_info); + return NULL; + } + + register_intr_pda(new_irq_info); + spin_lock(&sn_irq_info_lock); + list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); + spin_unlock(&sn_irq_info_lock); + kfree_rcu(sn_irq_info, rcu); + + +finish_up: + /* Update kernels new_irq_info with new target info */ + cpuid = nasid_slice_to_cpuid(new_irq_info->irq_nasid, + new_irq_info->irq_slice); + new_irq_info->irq_cpuid = cpuid; + + pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; + + /* + * If this represents a line interrupt, target it. If it's + * an msi (irq_int_bit < 0), it's already targeted. + */ + if (new_irq_info->irq_int_bit >= 0 && + pci_provider && pci_provider->target_interrupt) + (pci_provider->target_interrupt)(new_irq_info); + +#ifdef CONFIG_SMP + cpuphys = cpu_physical_id(cpuid); + set_irq_affinity_info((vector & 0xff), cpuphys, 0); +#endif + + return new_irq_info; +} + +static int sn_set_affinity_irq(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; + unsigned int irq = data->irq; + nasid_t nasid; + int slice; + + nasid = cpuid_to_nasid(cpumask_first_and(mask, cpu_online_mask)); + slice = cpuid_to_slice(cpumask_first_and(mask, cpu_online_mask)); + + list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, + sn_irq_lh[irq], list) + (void)sn_retarget_vector(sn_irq_info, nasid, slice); + + return 0; +} + +#ifdef CONFIG_SMP +void sn_set_err_irq_affinity(unsigned int irq) +{ + /* + * On systems which support CPU disabling (SHub2), all error interrupts + * are targeted at the boot CPU. + */ + if (is_shub2() && sn_prom_feature_available(PRF_CPU_DISABLE_SUPPORT)) + set_irq_affinity_info(irq, cpu_physical_id(0), 0); +} +#else +void sn_set_err_irq_affinity(unsigned int irq) { } +#endif + +static void +sn_mask_irq(struct irq_data *data) +{ +} + +static void +sn_unmask_irq(struct irq_data *data) +{ +} + +struct irq_chip irq_type_sn = { + .name = "SN hub", + .irq_startup = sn_startup_irq, + .irq_shutdown = sn_shutdown_irq, + .irq_enable = sn_enable_irq, + .irq_disable = sn_disable_irq, + .irq_ack = sn_ack_irq, + .irq_mask = sn_mask_irq, + .irq_unmask = sn_unmask_irq, + .irq_set_affinity = sn_set_affinity_irq +}; + +ia64_vector sn_irq_to_vector(int irq) +{ + if (irq >= IA64_NUM_VECTORS) + return 0; + return (ia64_vector)irq; +} + +unsigned int sn_local_vector_to_irq(u8 vector) +{ + return (CPU_VECTOR_TO_IRQ(smp_processor_id(), vector)); +} + +void sn_irq_init(void) +{ + int i; + + ia64_first_device_vector = IA64_SN2_FIRST_DEVICE_VECTOR; + ia64_last_device_vector = IA64_SN2_LAST_DEVICE_VECTOR; + + for (i = 0; i < NR_IRQS; i++) { + if (irq_get_chip(i) == &no_irq_chip) + irq_set_chip(i, &irq_type_sn); + } +} + +static void register_intr_pda(struct sn_irq_info *sn_irq_info) +{ + int irq = sn_irq_info->irq_irq; + int cpu = sn_irq_info->irq_cpuid; + + if (pdacpu(cpu)->sn_last_irq < irq) { + pdacpu(cpu)->sn_last_irq = irq; + } + + if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq) + pdacpu(cpu)->sn_first_irq = irq; +} + +static void unregister_intr_pda(struct sn_irq_info *sn_irq_info) +{ + int irq = sn_irq_info->irq_irq; + int cpu = sn_irq_info->irq_cpuid; + struct sn_irq_info *tmp_irq_info; + int i, foundmatch; + + rcu_read_lock(); + if (pdacpu(cpu)->sn_last_irq == irq) { + foundmatch = 0; + for (i = pdacpu(cpu)->sn_last_irq - 1; + i && !foundmatch; i--) { + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { + if (tmp_irq_info->irq_cpuid == cpu) { + foundmatch = 1; + break; + } + } + } + pdacpu(cpu)->sn_last_irq = i; + } + + if (pdacpu(cpu)->sn_first_irq == irq) { + foundmatch = 0; + for (i = pdacpu(cpu)->sn_first_irq + 1; + i < NR_IRQS && !foundmatch; i++) { + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { + if (tmp_irq_info->irq_cpuid == cpu) { + foundmatch = 1; + break; + } + } + } + pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i); + } + rcu_read_unlock(); +} + +void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) +{ + nasid_t nasid = sn_irq_info->irq_nasid; + int slice = sn_irq_info->irq_slice; + int cpu = nasid_slice_to_cpuid(nasid, slice); +#ifdef CONFIG_SMP + int cpuphys; +#endif + + pci_dev_get(pci_dev); + sn_irq_info->irq_cpuid = cpu; + sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev); + + /* link it into the sn_irq[irq] list */ + spin_lock(&sn_irq_info_lock); + list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]); + reserve_irq_vector(sn_irq_info->irq_irq); + if (sn_irq_info->irq_int_bit != -1) + irq_set_handler(sn_irq_info->irq_irq, handle_level_irq); + spin_unlock(&sn_irq_info_lock); + + register_intr_pda(sn_irq_info); +#ifdef CONFIG_SMP + cpuphys = cpu_physical_id(cpu); + set_irq_affinity_info(sn_irq_info->irq_irq, cpuphys, 0); + /* + * Affinity was set by the PROM, prevent it from + * being reset by the request_irq() path. + */ + irqd_mark_affinity_was_set(irq_get_irq_data(sn_irq_info->irq_irq)); +#endif +} + +void sn_irq_unfixup(struct pci_dev *pci_dev) +{ + struct sn_irq_info *sn_irq_info; + + /* Only cleanup IRQ stuff if this device has a host bus context */ + if (!SN_PCIDEV_BUSSOFT(pci_dev)) + return; + + sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info; + if (!sn_irq_info) + return; + if (!sn_irq_info->irq_irq) { + kfree(sn_irq_info); + return; + } + + unregister_intr_pda(sn_irq_info); + spin_lock(&sn_irq_info_lock); + list_del_rcu(&sn_irq_info->list); + spin_unlock(&sn_irq_info_lock); + if (list_empty(sn_irq_lh[sn_irq_info->irq_irq])) + free_irq_vector(sn_irq_info->irq_irq); + kfree_rcu(sn_irq_info, rcu); + pci_dev_put(pci_dev); + +} + +static inline void +sn_call_force_intr_provider(struct sn_irq_info *sn_irq_info) +{ + struct sn_pcibus_provider *pci_provider; + + pci_provider = sn_pci_provider[sn_irq_info->irq_bridge_type]; + + /* Don't force an interrupt if the irq has been disabled */ + if (!irqd_irq_disabled(irq_get_irq_data(sn_irq_info->irq_irq)) && + pci_provider && pci_provider->force_interrupt) + (*pci_provider->force_interrupt)(sn_irq_info); +} + +/* + * Check for lost interrupts. If the PIC int_status reg. says that + * an interrupt has been sent, but not handled, and the interrupt + * is not pending in either the cpu irr regs or in the soft irr regs, + * and the interrupt is not in service, then the interrupt may have + * been lost. Force an interrupt on that pin. It is possible that + * the interrupt is in flight, so we may generate a spurious interrupt, + * but we should never miss a real lost interrupt. + */ +static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) +{ + u64 regval; + struct pcidev_info *pcidev_info; + struct pcibus_info *pcibus_info; + + /* + * Bridge types attached to TIO (anything but PIC) do not need this WAR + * since they do not target Shub II interrupt registers. If that + * ever changes, this check needs to accommodate. + */ + if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_PIC) + return; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (!pcidev_info) + return; + + pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info-> + pdi_pcibus_info; + regval = pcireg_intr_status_get(pcibus_info); + + if (!ia64_get_irr(irq_to_vector(irq))) { + if (!test_bit(irq, pda->sn_in_service_ivecs)) { + regval &= 0xff; + if (sn_irq_info->irq_int_bit & regval & + sn_irq_info->irq_last_intr) { + regval &= ~(sn_irq_info->irq_int_bit & regval); + sn_call_force_intr_provider(sn_irq_info); + } + } + } + sn_irq_info->irq_last_intr = regval; +} + +void sn_lb_int_war_check(void) +{ + struct sn_irq_info *sn_irq_info; + int i; + + if (!sn_ioif_inited || pda->sn_first_irq == 0) + return; + + rcu_read_lock(); + for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) { + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) { + sn_check_intr(i, sn_irq_info); + } + } + rcu_read_unlock(); +} + +void __init sn_irq_lh_init(void) +{ + int i; + + sn_irq_lh = kmalloc_array(NR_IRQS, sizeof(struct list_head *), + GFP_KERNEL); + if (!sn_irq_lh) + panic("SN PCI INIT: Failed to allocate memory for PCI init\n"); + + for (i = 0; i < NR_IRQS; i++) { + sn_irq_lh[i] = kmalloc(sizeof(struct list_head), GFP_KERNEL); + if (!sn_irq_lh[i]) + panic("SN PCI INIT: Failed IRQ memory allocation\n"); + + INIT_LIST_HEAD(sn_irq_lh[i]); + } +} diff --git a/arch/ia64/sn/kernel/klconflib.c b/arch/ia64/sn/kernel/klconflib.c new file mode 100644 index 000000000..87682b48e --- /dev/null +++ b/arch/ia64/sn/kernel/klconflib.c @@ -0,0 +1,107 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <asm/sn/types.h> +#include <asm/sn/module.h> +#include <asm/sn/l1.h> + +char brick_types[MAX_BRICK_TYPES + 1] = "cri.xdpn%#=vo^kjbf890123456789..."; +/* + * Format a module id for printing. + * + * There are three possible formats: + * + * MODULE_FORMAT_BRIEF is the brief 6-character format, including + * the actual brick-type as recorded in the + * moduleid_t, eg. 002c15 for a C-brick, or + * 101#17 for a PX-brick. + * + * MODULE_FORMAT_LONG is the hwgraph format, eg. rack/002/bay/15 + * of rack/101/bay/17 (note that the brick + * type does not appear in this format). + * + * MODULE_FORMAT_LCD is like MODULE_FORMAT_BRIEF, except that it + * ensures that the module id provided appears + * exactly as it would on the LCD display of + * the corresponding brick, eg. still 002c15 + * for a C-brick, but 101p17 for a PX-brick. + * + * maule (9/13/04): Removed top-level check for (fmt == MODULE_FORMAT_LCD) + * making MODULE_FORMAT_LCD equivalent to MODULE_FORMAT_BRIEF. It was + * decided that all callers should assume the returned string should be what + * is displayed on the brick L1 LCD. + */ +void +format_module_id(char *buffer, moduleid_t m, int fmt) +{ + int rack, position; + unsigned char brickchar; + + rack = MODULE_GET_RACK(m); + brickchar = MODULE_GET_BTCHAR(m); + + /* Be sure we use the same brick type character as displayed + * on the brick's LCD + */ + switch (brickchar) + { + case L1_BRICKTYPE_GA: + case L1_BRICKTYPE_OPUS_TIO: + brickchar = L1_BRICKTYPE_C; + break; + + case L1_BRICKTYPE_PX: + case L1_BRICKTYPE_PE: + case L1_BRICKTYPE_PA: + case L1_BRICKTYPE_SA: /* we can move this to the "I's" later + * if that makes more sense + */ + brickchar = L1_BRICKTYPE_P; + break; + + case L1_BRICKTYPE_IX: + case L1_BRICKTYPE_IA: + + brickchar = L1_BRICKTYPE_I; + break; + } + + position = MODULE_GET_BPOS(m); + + if ((fmt == MODULE_FORMAT_BRIEF) || (fmt == MODULE_FORMAT_LCD)) { + /* Brief module number format, eg. 002c15 */ + + /* Decompress the rack number */ + *buffer++ = '0' + RACK_GET_CLASS(rack); + *buffer++ = '0' + RACK_GET_GROUP(rack); + *buffer++ = '0' + RACK_GET_NUM(rack); + + /* Add the brick type */ + *buffer++ = brickchar; + } + else if (fmt == MODULE_FORMAT_LONG) { + /* Fuller hwgraph format, eg. rack/002/bay/15 */ + + strcpy(buffer, "rack" "/"); buffer += strlen(buffer); + + *buffer++ = '0' + RACK_GET_CLASS(rack); + *buffer++ = '0' + RACK_GET_GROUP(rack); + *buffer++ = '0' + RACK_GET_NUM(rack); + + strcpy(buffer, "/" "bay" "/"); buffer += strlen(buffer); + } + + /* Add the bay position, using at least two digits */ + if (position < 10) + *buffer++ = '0'; + sprintf(buffer, "%d", position); +} diff --git a/arch/ia64/sn/kernel/machvec.c b/arch/ia64/sn/kernel/machvec.c new file mode 100644 index 000000000..02bb91558 --- /dev/null +++ b/arch/ia64/sn/kernel/machvec.c @@ -0,0 +1,11 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2002-2003 Silicon Graphics, Inc. All Rights Reserved. + */ + +#define MACHVEC_PLATFORM_NAME sn2 +#define MACHVEC_PLATFORM_HEADER <asm/machvec_sn2.h> +#include <asm/machvec_init.h> diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c new file mode 100644 index 000000000..bc3bd930c --- /dev/null +++ b/arch/ia64/sn/kernel/mca.c @@ -0,0 +1,144 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2006 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/timer.h> +#include <linux/vmalloc.h> +#include <linux/mutex.h> +#include <asm/mca.h> +#include <asm/sal.h> +#include <asm/sn/sn_sal.h> + +/* + * Interval for calling SAL to poll for errors that do NOT cause error + * interrupts. SAL will raise a CPEI if any errors are present that + * need to be logged. + */ +#define CPEI_INTERVAL (5*HZ) + +struct timer_list sn_cpei_timer; +void sn_init_cpei_timer(void); + +/* Printing oemdata from mca uses data that is not passed through SAL, it is + * global. Only one user at a time. + */ +static DEFINE_MUTEX(sn_oemdata_mutex); +static u8 **sn_oemdata; +static u64 *sn_oemdata_size, sn_oemdata_bufsize; + +/* + * print_hook + * + * This function is the callback routine that SAL calls to log error + * info for platform errors. buf is appended to sn_oemdata, resizing as + * required. + * Note: this is a SAL to OS callback, running under the same rules as the SAL + * code. SAL calls are run with preempt disabled so this routine must not + * sleep. vmalloc can sleep so print_hook cannot resize the output buffer + * itself, instead it must set the required size and return to let the caller + * resize the buffer then redrive the SAL call. + */ +static int print_hook(const char *fmt, ...) +{ + char buf[400]; + int len; + va_list args; + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + len = strlen(buf); + if (*sn_oemdata_size + len <= sn_oemdata_bufsize) + memcpy(*sn_oemdata + *sn_oemdata_size, buf, len); + *sn_oemdata_size += len; + return 0; +} + +static void sn_cpei_handler(int irq, void *devid, struct pt_regs *regs) +{ + /* + * this function's sole purpose is to call SAL when we receive + * a CE interrupt from SHUB or when the timer routine decides + * we need to call SAL to check for CEs. + */ + + /* CALL SAL_LOG_CE */ + + ia64_sn_plat_cpei_handler(); +} + +static void sn_cpei_timer_handler(struct timer_list *unused) +{ + sn_cpei_handler(-1, NULL, NULL); + mod_timer(&sn_cpei_timer, jiffies + CPEI_INTERVAL); +} + +void sn_init_cpei_timer(void) +{ + timer_setup(&sn_cpei_timer, sn_cpei_timer_handler, 0); + sn_cpei_timer.expires = jiffies + CPEI_INTERVAL; + add_timer(&sn_cpei_timer); +} + +static int +sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata, + u64 * oemdata_size) +{ + mutex_lock(&sn_oemdata_mutex); + sn_oemdata = oemdata; + sn_oemdata_size = oemdata_size; + sn_oemdata_bufsize = 0; + *sn_oemdata_size = PAGE_SIZE; /* first guess at how much data will be generated */ + while (*sn_oemdata_size > sn_oemdata_bufsize) { + u8 *newbuf = vmalloc(*sn_oemdata_size); + if (!newbuf) { + mutex_unlock(&sn_oemdata_mutex); + printk(KERN_ERR "%s: unable to extend sn_oemdata\n", + __func__); + return 1; + } + vfree(*sn_oemdata); + *sn_oemdata = newbuf; + sn_oemdata_bufsize = *sn_oemdata_size; + *sn_oemdata_size = 0; + ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header); + } + mutex_unlock(&sn_oemdata_mutex); + return 0; +} + +/* Callback when userspace salinfo wants to decode oem data via the platform + * kernel and/or prom. + */ +int sn_salinfo_platform_oemdata(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size) +{ + efi_guid_t guid = *(efi_guid_t *)sect_header; + int valid = 0; + *oemdata_size = 0; + vfree(*oemdata); + *oemdata = NULL; + if (efi_guidcmp(guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID) == 0) { + sal_log_plat_specific_err_info_t *psei = (sal_log_plat_specific_err_info_t *)sect_header; + valid = psei->valid.oem_data; + } else if (efi_guidcmp(guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) { + sal_log_mem_dev_err_info_t *mdei = (sal_log_mem_dev_err_info_t *)sect_header; + valid = mdei->valid.oem_data; + } + if (valid) + return sn_platform_plat_specific_err_print(sect_header, oemdata, oemdata_size); + else + return 0; +} + +static int __init sn_salinfo_init(void) +{ + if (ia64_platform_is("sn2")) + salinfo_platform_oemdata = &sn_salinfo_platform_oemdata; + return 0; +} +device_initcall(sn_salinfo_init); diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c new file mode 100644 index 000000000..fb25065b2 --- /dev/null +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -0,0 +1,238 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/types.h> +#include <linux/irq.h> +#include <linux/pci.h> +#include <linux/cpumask.h> +#include <linux/msi.h> +#include <linux/slab.h> + +#include <asm/sn/addrs.h> +#include <asm/sn/intr.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/nodepda.h> + +struct sn_msi_info { + u64 pci_addr; + struct sn_irq_info *sn_irq_info; +}; + +static struct sn_msi_info sn_msi_info[NR_IRQS]; + +static struct irq_chip sn_msi_chip; + +void sn_teardown_msi_irq(unsigned int irq) +{ + nasid_t nasid; + int widget; + struct pci_dev *pdev; + struct pcidev_info *sn_pdev; + struct sn_irq_info *sn_irq_info; + struct pcibus_bussoft *bussoft; + struct sn_pcibus_provider *provider; + + sn_irq_info = sn_msi_info[irq].sn_irq_info; + if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) + return; + + sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + pdev = sn_pdev->pdi_linux_pcidev; + provider = SN_PCIDEV_BUSPROVIDER(pdev); + + (*provider->dma_unmap)(pdev, + sn_msi_info[irq].pci_addr, + PCI_DMA_FROMDEVICE); + sn_msi_info[irq].pci_addr = 0; + + bussoft = SN_PCIDEV_BUSSOFT(pdev); + nasid = NASID_GET(bussoft->bs_base); + widget = (nasid & 1) ? + TIO_SWIN_WIDGETNUM(bussoft->bs_base) : + SWIN_WIDGETNUM(bussoft->bs_base); + + sn_intr_free(nasid, widget, sn_irq_info); + sn_msi_info[irq].sn_irq_info = NULL; + + destroy_irq(irq); +} + +int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) +{ + struct msi_msg msg; + int widget; + int status; + nasid_t nasid; + u64 bus_addr; + struct sn_irq_info *sn_irq_info; + struct pcibus_bussoft *bussoft = SN_PCIDEV_BUSSOFT(pdev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + int irq; + + if (!entry->msi_attrib.is_64) + return -EINVAL; + + if (bussoft == NULL) + return -EINVAL; + + if (provider == NULL || provider->dma_map_consistent == NULL) + return -EINVAL; + + irq = create_irq(); + if (irq < 0) + return irq; + + /* + * Set up the vector plumbing. Let the prom (via sn_intr_alloc) + * decide which cpu to direct this msi at by default. + */ + + nasid = NASID_GET(bussoft->bs_base); + widget = (nasid & 1) ? + TIO_SWIN_WIDGETNUM(bussoft->bs_base) : + SWIN_WIDGETNUM(bussoft->bs_base); + + sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (! sn_irq_info) { + destroy_irq(irq); + return -ENOMEM; + } + + status = sn_intr_alloc(nasid, widget, sn_irq_info, irq, -1, -1); + if (status) { + kfree(sn_irq_info); + destroy_irq(irq); + return -ENOMEM; + } + + sn_irq_info->irq_int_bit = -1; /* mark this as an MSI irq */ + sn_irq_fixup(pdev, sn_irq_info); + + /* Prom probably should fill these in, but doesn't ... */ + sn_irq_info->irq_bridge_type = bussoft->bs_asic_type; + sn_irq_info->irq_bridge = (void *)bussoft->bs_base; + + /* + * Map the xio address into bus space + */ + bus_addr = (*provider->dma_map_consistent)(pdev, + sn_irq_info->irq_xtalkaddr, + sizeof(sn_irq_info->irq_xtalkaddr), + SN_DMA_MSI|SN_DMA_ADDR_XIO); + if (! bus_addr) { + sn_intr_free(nasid, widget, sn_irq_info); + kfree(sn_irq_info); + destroy_irq(irq); + return -ENOMEM; + } + + sn_msi_info[irq].sn_irq_info = sn_irq_info; + sn_msi_info[irq].pci_addr = bus_addr; + + msg.address_hi = (u32)(bus_addr >> 32); + msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); + + /* + * In the SN platform, bit 16 is a "send vector" bit which + * must be present in order to move the vector through the system. + */ + msg.data = 0x100 + irq; + + irq_set_msi_desc(irq, entry); + pci_write_msi_msg(irq, &msg); + irq_set_chip_and_handler(irq, &sn_msi_chip, handle_edge_irq); + + return 0; +} + +#ifdef CONFIG_SMP +static int sn_set_msi_irq_affinity(struct irq_data *data, + const struct cpumask *cpu_mask, bool force) +{ + struct msi_msg msg; + int slice; + nasid_t nasid; + u64 bus_addr; + struct pci_dev *pdev; + struct pcidev_info *sn_pdev; + struct sn_irq_info *sn_irq_info; + struct sn_irq_info *new_irq_info; + struct sn_pcibus_provider *provider; + unsigned int cpu, irq = data->irq; + + cpu = cpumask_first_and(cpu_mask, cpu_online_mask); + sn_irq_info = sn_msi_info[irq].sn_irq_info; + if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) + return -1; + + /* + * Release XIO resources for the old MSI PCI address + */ + + __get_cached_msi_msg(irq_data_get_msi_desc(data), &msg); + sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + pdev = sn_pdev->pdi_linux_pcidev; + provider = SN_PCIDEV_BUSPROVIDER(pdev); + + bus_addr = (u64)(msg.address_hi) << 32 | (u64)(msg.address_lo); + (*provider->dma_unmap)(pdev, bus_addr, PCI_DMA_FROMDEVICE); + sn_msi_info[irq].pci_addr = 0; + + nasid = cpuid_to_nasid(cpu); + slice = cpuid_to_slice(cpu); + + new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice); + sn_msi_info[irq].sn_irq_info = new_irq_info; + if (new_irq_info == NULL) + return -1; + + /* + * Map the xio address into bus space + */ + + bus_addr = (*provider->dma_map_consistent)(pdev, + new_irq_info->irq_xtalkaddr, + sizeof(new_irq_info->irq_xtalkaddr), + SN_DMA_MSI|SN_DMA_ADDR_XIO); + + sn_msi_info[irq].pci_addr = bus_addr; + msg.address_hi = (u32)(bus_addr >> 32); + msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); + + pci_write_msi_msg(irq, &msg); + cpumask_copy(irq_data_get_affinity_mask(data), cpu_mask); + + return 0; +} +#endif /* CONFIG_SMP */ + +static void sn_ack_msi_irq(struct irq_data *data) +{ + irq_move_irq(data); + ia64_eoi(); +} + +static int sn_msi_retrigger_irq(struct irq_data *data) +{ + unsigned int vector = data->irq; + ia64_resend_irq(vector); + + return 1; +} + +static struct irq_chip sn_msi_chip = { + .name = "PCI-MSI", + .irq_mask = pci_msi_mask_irq, + .irq_unmask = pci_msi_unmask_irq, + .irq_ack = sn_ack_msi_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = sn_set_msi_irq_affinity, +#endif + .irq_retrigger = sn_msi_retrigger_irq, +}; diff --git a/arch/ia64/sn/kernel/pio_phys.S b/arch/ia64/sn/kernel/pio_phys.S new file mode 100644 index 000000000..3c7d48d6e --- /dev/null +++ b/arch/ia64/sn/kernel/pio_phys.S @@ -0,0 +1,71 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + * + * This file contains macros used to access MMR registers via + * uncached physical addresses. + * pio_phys_read_mmr - read an MMR + * pio_phys_write_mmr - write an MMR + * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0 + * Second MMR will be skipped if address is NULL + * + * Addresses passed to these routines should be uncached physical addresses + * ie., 0x80000.... + */ + + + +#include <asm/asmmacro.h> +#include <asm/page.h> + +GLOBAL_ENTRY(pio_phys_read_mmr) + .prologue + .regstk 1,0,0,0 + .body + mov r2=psr + rsm psr.i | psr.dt + ;; + srlz.d + ld8.acq r8=[r32] + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_phys_read_mmr) + +GLOBAL_ENTRY(pio_phys_write_mmr) + .prologue + .regstk 2,0,0,0 + .body + mov r2=psr + rsm psr.i | psr.dt + ;; + srlz.d + st8.rel [r32]=r33 + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_phys_write_mmr) + +GLOBAL_ENTRY(pio_atomic_phys_write_mmrs) + .prologue + .regstk 4,0,0,0 + .body + mov r2=psr + cmp.ne p9,p0=r34,r0; + rsm psr.i | psr.dt | psr.ic + ;; + srlz.d + st8.rel [r32]=r33 +(p9) st8.rel [r34]=r35 + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_atomic_phys_write_mmrs) + + diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c new file mode 100644 index 000000000..5f6b6b48c --- /dev/null +++ b/arch/ia64/sn/kernel/setup.c @@ -0,0 +1,775 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/kdev_t.h> +#include <linux/string.h> +#include <linux/screen_info.h> +#include <linux/console.h> +#include <linux/timex.h> +#include <linux/sched.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/serial.h> +#include <linux/irq.h> +#include <linux/bootmem.h> +#include <linux/mmzone.h> +#include <linux/interrupt.h> +#include <linux/acpi.h> +#include <linux/compiler.h> +#include <linux/root_dev.h> +#include <linux/nodemask.h> +#include <linux/pm.h> +#include <linux/efi.h> + +#include <asm/io.h> +#include <asm/sal.h> +#include <asm/machvec.h> +#include <asm/processor.h> +#include <asm/vga.h> +#include <asm/setup.h> +#include <asm/sn/arch.h> +#include <asm/sn/addrs.h> +#include <asm/sn/pda.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/simulator.h> +#include <asm/sn/leds.h> +#include <asm/sn/bte.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/clksupport.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/geo.h> +#include <asm/sn/sn_feature_sets.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#include <asm/sn/klconfig.h> + + +DEFINE_PER_CPU(struct pda_s, pda_percpu); + +#define MAX_PHYS_MEMORY (1UL << IA64_MAX_PHYS_BITS) /* Max physical address supported */ + +extern void bte_init_node(nodepda_t *, cnodeid_t); + +extern void sn_timer_init(void); +extern unsigned long last_time_offset; +extern void (*ia64_mark_idle) (int); +extern void snidle(int); + +unsigned long sn_rtc_cycles_per_second; +EXPORT_SYMBOL(sn_rtc_cycles_per_second); + +DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); +EXPORT_PER_CPU_SYMBOL(__sn_hub_info); + +DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); +EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); + +DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); +EXPORT_PER_CPU_SYMBOL(__sn_nodepda); + +char sn_system_serial_number_string[128]; +EXPORT_SYMBOL(sn_system_serial_number_string); +u64 sn_partition_serial_number; +EXPORT_SYMBOL(sn_partition_serial_number); +u8 sn_partition_id; +EXPORT_SYMBOL(sn_partition_id); +u8 sn_system_size; +EXPORT_SYMBOL(sn_system_size); +u8 sn_sharing_domain_size; +EXPORT_SYMBOL(sn_sharing_domain_size); +u8 sn_coherency_id; +EXPORT_SYMBOL(sn_coherency_id); +u8 sn_region_size; +EXPORT_SYMBOL(sn_region_size); +int sn_prom_type; /* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */ + +short physical_node_map[MAX_NUMALINK_NODES]; +static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS]; + +EXPORT_SYMBOL(physical_node_map); + +int num_cnodes; + +static void sn_init_pdas(char **); +static void build_cnode_tables(void); + +static nodepda_t *nodepdaindr[MAX_COMPACT_NODES]; + +/* + * The format of "screen_info" is strange, and due to early i386-setup + * code. This is just enough to make the console code think we're on a + * VGA color display. + */ +struct screen_info sn_screen_info = { + .orig_x = 0, + .orig_y = 0, + .orig_video_mode = 3, + .orig_video_cols = 80, + .orig_video_ega_bx = 3, + .orig_video_lines = 25, + .orig_video_isVGA = 1, + .orig_video_points = 16 +}; + +/* + * This routine can only be used during init, since + * smp_boot_data is an init data structure. + * We have to use smp_boot_data.cpu_phys_id to find + * the physical id of the processor because the normal + * cpu_physical_id() relies on data structures that + * may not be initialized yet. + */ + +static int __init pxm_to_nasid(int pxm) +{ + int i; + int nid; + + nid = pxm_to_node(pxm); + for (i = 0; i < num_node_memblks; i++) { + if (node_memblk[i].nid == nid) { + return NASID_GET(node_memblk[i].start_paddr); + } + } + return -1; +} + +/** + * early_sn_setup - early setup routine for SN platforms + * + * Sets up an initial console to aid debugging. Intended primarily + * for bringup. See start_kernel() in init/main.c. + */ + +void __init early_sn_setup(void) +{ + efi_system_table_t *efi_systab; + efi_config_table_t *config_tables; + struct ia64_sal_systab *sal_systab; + struct ia64_sal_desc_entry_point *ep; + char *p; + int i, j; + + /* + * Parse enough of the SAL tables to locate the SAL entry point. Since, console + * IO on SN2 is done via SAL calls, early_printk won't work without this. + * + * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c. + * Any changes to those file may have to be made here as well. + */ + efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab); + config_tables = __va(efi_systab->tables); + for (i = 0; i < efi_systab->nr_tables; i++) { + if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == + 0) { + sal_systab = __va(config_tables[i].table); + p = (char *)(sal_systab + 1); + for (j = 0; j < sal_systab->entry_count; j++) { + if (*p == SAL_DESC_ENTRY_POINT) { + ep = (struct ia64_sal_desc_entry_point + *)p; + ia64_sal_handler_init(__va + (ep->sal_proc), + __va(ep->gp)); + return; + } + p += SAL_DESC_SIZE(*p); + } + } + } + /* Uh-oh, SAL not available?? */ + printk(KERN_ERR "failed to find SAL entry point\n"); +} + +extern int platform_intr_list[]; +static int shub_1_1_found; + +/* + * sn_check_for_wars + * + * Set flag for enabling shub specific wars + */ + +static inline int is_shub_1_1(int nasid) +{ + unsigned long id; + int rev; + + if (is_shub2()) + return 0; + id = REMOTE_HUB_L(nasid, SH1_SHUB_ID); + rev = (id & SH1_SHUB_ID_REVISION_MASK) >> SH1_SHUB_ID_REVISION_SHFT; + return rev <= 2; +} + +static void sn_check_for_wars(void) +{ + int cnode; + + if (is_shub2()) { + /* none yet */ + } else { + for_each_online_node(cnode) { + if (is_shub_1_1(cnodeid_to_nasid(cnode))) + shub_1_1_found = 1; + } + } +} + +/* + * Scan the EFI PCDP table (if it exists) for an acceptable VGA console + * output device. If one exists, pick it and set sn_legacy_{io,mem} to + * reflect the bus offsets needed to address it. + * + * Since pcdp support in SN is not supported in the 2.4 kernel (or at least + * the one lbs is based on) just declare the needed structs here. + * + * Reference spec http://www.dig64.org/specifications/DIG64_PCDPv20.pdf + * + * Returns 0 if no acceptable vga is found, !0 otherwise. + * + * Note: This stuff is duped here because Altix requires the PCDP to + * locate a usable VGA device due to lack of proper ACPI support. Structures + * could be used from drivers/firmware/pcdp.h, but it was decided that moving + * this file to a more public location just for Altix use was undesirable. + */ + +struct hcdp_uart_desc { + u8 pad[45]; +}; + +struct pcdp { + u8 signature[4]; /* should be 'HCDP' */ + u32 length; + u8 rev; /* should be >=3 for pcdp, <3 for hcdp */ + u8 sum; + u8 oem_id[6]; + u64 oem_tableid; + u32 oem_rev; + u32 creator_id; + u32 creator_rev; + u32 num_type0; + struct hcdp_uart_desc uart[0]; /* num_type0 of these */ + /* pcdp descriptors follow */ +} __attribute__((packed)); + +struct pcdp_device_desc { + u8 type; + u8 primary; + u16 length; + u16 index; + /* interconnect specific structure follows */ + /* device specific structure follows that */ +} __attribute__((packed)); + +struct pcdp_interface_pci { + u8 type; /* 1 == pci */ + u8 reserved; + u16 length; + u8 segment; + u8 bus; + u8 dev; + u8 fun; + u16 devid; + u16 vendid; + u32 acpi_interrupt; + u64 mmio_tra; + u64 ioport_tra; + u8 flags; + u8 translation; +} __attribute__((packed)); + +struct pcdp_vga_device { + u8 num_eas_desc; + /* ACPI Extended Address Space Desc follows */ +} __attribute__((packed)); + +/* from pcdp_device_desc.primary */ +#define PCDP_PRIMARY_CONSOLE 0x01 + +/* from pcdp_device_desc.type */ +#define PCDP_CONSOLE_INOUT 0x0 +#define PCDP_CONSOLE_DEBUG 0x1 +#define PCDP_CONSOLE_OUT 0x2 +#define PCDP_CONSOLE_IN 0x3 +#define PCDP_CONSOLE_TYPE_VGA 0x8 + +#define PCDP_CONSOLE_VGA (PCDP_CONSOLE_TYPE_VGA | PCDP_CONSOLE_OUT) + +/* from pcdp_interface_pci.type */ +#define PCDP_IF_PCI 1 + +/* from pcdp_interface_pci.translation */ +#define PCDP_PCI_TRANS_IOPORT 0x02 +#define PCDP_PCI_TRANS_MMIO 0x01 + +#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) +static void +sn_scan_pcdp(void) +{ + u8 *bp; + struct pcdp *pcdp; + struct pcdp_device_desc device; + struct pcdp_interface_pci if_pci; + extern struct efi efi; + + if (efi.hcdp == EFI_INVALID_TABLE_ADDR) + return; /* no hcdp/pcdp table */ + + pcdp = __va(efi.hcdp); + + if (pcdp->rev < 3) + return; /* only support PCDP (rev >= 3) */ + + for (bp = (u8 *)&pcdp->uart[pcdp->num_type0]; + bp < (u8 *)pcdp + pcdp->length; + bp += device.length) { + memcpy(&device, bp, sizeof(device)); + if (! (device.primary & PCDP_PRIMARY_CONSOLE)) + continue; /* not primary console */ + + if (device.type != PCDP_CONSOLE_VGA) + continue; /* not VGA descriptor */ + + memcpy(&if_pci, bp+sizeof(device), sizeof(if_pci)); + if (if_pci.type != PCDP_IF_PCI) + continue; /* not PCI interconnect */ + + if (if_pci.translation & PCDP_PCI_TRANS_IOPORT) + vga_console_iobase = if_pci.ioport_tra; + + if (if_pci.translation & PCDP_PCI_TRANS_MMIO) + vga_console_membase = + if_pci.mmio_tra | __IA64_UNCACHED_OFFSET; + + break; /* once we find the primary, we're done */ + } +} +#endif + +static unsigned long sn2_rtc_initial; + +/** + * sn_setup - SN platform setup routine + * @cmdline_p: kernel command line + * + * Handles platform setup for SN machines. This includes determining + * the RTC frequency (via a SAL call), initializing secondary CPUs, and + * setting up per-node data areas. The console is also initialized here. + */ +void __init sn_setup(char **cmdline_p) +{ + long status, ticks_per_sec, drift; + u32 version = sn_sal_rev(); + extern void sn_cpu_init(void); + + sn2_rtc_initial = rtc_time(); + ia64_sn_plat_set_error_handling_features(); // obsolete + ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV); + ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES); + /* + * Note: The calls to notify the PROM of ACPI and PCI Segment + * support must be done prior to acpi_load_tables(), as + * an ACPI capable PROM will rebuild the DSDT as result + * of the call. + */ + ia64_sn_set_os_feature(OSF_PCISEGMENT_ENABLE); + ia64_sn_set_os_feature(OSF_ACPI_ENABLE); + + /* Load the new DSDT and SSDT tables into the global table list. */ + acpi_table_init(); + +#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) + /* + * Handle SN vga console. + * + * SN systems do not have enough ACPI table information + * being passed from prom to identify VGA adapters and the legacy + * addresses to access them. Until that is done, SN systems rely + * on the PCDP table to identify the primary VGA console if one + * exists. + * + * However, kernel PCDP support is optional, and even if it is built + * into the kernel, it will not be used if the boot cmdline contains + * console= directives. + * + * So, to work around this mess, we duplicate some of the PCDP code + * here so that the primary VGA console (as defined by PCDP) will + * work on SN systems even if a different console (e.g. serial) is + * selected on the boot line (or CONFIG_EFI_PCDP is off). + */ + + if (! vga_console_membase) + sn_scan_pcdp(); + + /* + * Setup legacy IO space. + * vga_console_iobase maps to PCI IO Space address 0 on the + * bus containing the VGA console. + */ + if (vga_console_iobase) { + io_space[0].mmio_base = + (unsigned long) ioremap(vga_console_iobase, 0); + io_space[0].sparse = 0; + } + + if (vga_console_membase) { + /* usable vga ... make tty0 the preferred default console */ + if (!strstr(*cmdline_p, "console=")) + add_preferred_console("tty", 0, NULL); + } else { + printk(KERN_DEBUG "SGI: Disabling VGA console\n"); + if (!strstr(*cmdline_p, "console=")) + add_preferred_console("ttySG", 0, NULL); +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#else + conswitchp = NULL; +#endif /* CONFIG_DUMMY_CONSOLE */ + } +#endif /* def(CONFIG_VT) && def(CONFIG_VGA_CONSOLE) */ + + MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY; + + /* + * Build the tables for managing cnodes. + */ + build_cnode_tables(); + + status = + ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, + &drift); + if (status != 0 || ticks_per_sec < 100000) { + printk(KERN_WARNING + "unable to determine platform RTC clock frequency, guessing.\n"); + /* PROM gives wrong value for clock freq. so guess */ + sn_rtc_cycles_per_second = 1000000000000UL / 30000UL; + } else + sn_rtc_cycles_per_second = ticks_per_sec; + + platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR; + + printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF); + + /* + * we set the default root device to /dev/hda + * to make simulation easy + */ + ROOT_DEV = Root_HDA1; + + /* + * Create the PDAs and NODEPDAs for all the cpus. + */ + sn_init_pdas(cmdline_p); + + ia64_mark_idle = &snidle; + + /* + * For the bootcpu, we do this here. All other cpus will make the + * call as part of cpu_init in slave cpu initialization. + */ + sn_cpu_init(); + +#ifdef CONFIG_SMP + init_smp_config(); +#endif + screen_info = sn_screen_info; + + sn_timer_init(); + + /* + * set pm_power_off to a SAL call to allow + * sn machines to power off. The SAL call can be replaced + * by an ACPI interface call when ACPI is fully implemented + * for sn. + */ + pm_power_off = ia64_sn_power_down; + current->thread.flags |= IA64_THREAD_MIGRATION; +} + +/** + * sn_init_pdas - setup node data areas + * + * One time setup for Node Data Area. Called by sn_setup(). + */ +static void __init sn_init_pdas(char **cmdline_p) +{ + cnodeid_t cnode; + + /* + * Allocate & initialize the nodepda for each node. + */ + for_each_online_node(cnode) { + nodepdaindr[cnode] = + alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t)); + memset(nodepdaindr[cnode]->phys_cpuid, -1, + sizeof(nodepdaindr[cnode]->phys_cpuid)); + spin_lock_init(&nodepdaindr[cnode]->ptc_lock); + } + + /* + * Allocate & initialize nodepda for TIOs. For now, put them on node 0. + */ + for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) + nodepdaindr[cnode] = + alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t)); + + /* + * Now copy the array of nodepda pointers to each nodepda. + */ + for (cnode = 0; cnode < num_cnodes; cnode++) + memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr, + sizeof(nodepdaindr)); + + /* + * Set up IO related platform-dependent nodepda fields. + * The following routine actually sets up the hubinfo struct + * in nodepda. + */ + for_each_online_node(cnode) { + bte_init_node(nodepdaindr[cnode], cnode); + } + + /* + * Initialize the per node hubdev. This includes IO Nodes and + * headless/memless nodes. + */ + for (cnode = 0; cnode < num_cnodes; cnode++) { + hubdev_init_node(nodepdaindr[cnode], cnode); + } +} + +/** + * sn_cpu_init - initialize per-cpu data areas + * @cpuid: cpuid of the caller + * + * Called during cpu initialization on each cpu as it starts. + * Currently, initializes the per-cpu data area for SNIA. + * Also sets up a few fields in the nodepda. Also known as + * platform_cpu_init() by the ia64 machvec code. + */ +void sn_cpu_init(void) +{ + int cpuid; + int cpuphyid; + int nasid; + int subnode; + int slice; + int cnode; + int i; + static int wars_have_been_checked, set_cpu0_number; + + cpuid = smp_processor_id(); + if (cpuid == 0 && IS_MEDUSA()) { + if (ia64_sn_is_fake_prom()) + sn_prom_type = 2; + else + sn_prom_type = 1; + printk(KERN_INFO "Running on medusa with %s PROM\n", + (sn_prom_type == 1) ? "real" : "fake"); + } + + memset(pda, 0, sizeof(*pda)); + if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, + &sn_hub_info->nasid_bitmask, + &sn_hub_info->nasid_shift, + &sn_system_size, &sn_sharing_domain_size, + &sn_partition_id, &sn_coherency_id, + &sn_region_size)) + BUG(); + sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2; + + /* + * Don't check status. The SAL call is not supported on all PROMs + * but a failure is harmless. + * Architecturally, cpu_init is always called twice on cpu 0. We + * should set cpu_number on cpu 0 once. + */ + if (cpuid == 0) { + if (!set_cpu0_number) { + (void) ia64_sn_set_cpu_number(cpuid); + set_cpu0_number = 1; + } + } else + (void) ia64_sn_set_cpu_number(cpuid); + + /* + * The boot cpu makes this call again after platform initialization is + * complete. + */ + if (nodepdaindr[0] == NULL) + return; + + for (i = 0; i < MAX_PROM_FEATURE_SETS; i++) + if (ia64_sn_get_prom_feature_set(i, &sn_prom_features[i]) != 0) + break; + + cpuphyid = get_sapicid(); + + if (ia64_sn_get_sapic_info(cpuphyid, &nasid, &subnode, &slice)) + BUG(); + + for (i=0; i < MAX_NUMNODES; i++) { + if (nodepdaindr[i]) { + nodepdaindr[i]->phys_cpuid[cpuid].nasid = nasid; + nodepdaindr[i]->phys_cpuid[cpuid].slice = slice; + nodepdaindr[i]->phys_cpuid[cpuid].subnode = subnode; + } + } + + cnode = nasid_to_cnodeid(nasid); + + __this_cpu_write(__sn_nodepda, nodepdaindr[cnode]); + + pda->led_address = + (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT)); + pda->led_state = LED_ALWAYS_SET; + pda->hb_count = HZ / 2; + pda->hb_state = 0; + pda->idle_flag = 0; + + if (cpuid != 0) { + /* copy cpu 0's sn_cnodeid_to_nasid table to this cpu's */ + memcpy(sn_cnodeid_to_nasid, + (&per_cpu(__sn_cnodeid_to_nasid, 0)), + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); + } + + /* + * Check for WARs. + * Only needs to be done once, on BSP. + * Has to be done after loop above, because it uses this cpu's + * sn_cnodeid_to_nasid table which was just initialized if this + * isn't cpu 0. + * Has to be done before assignment below. + */ + if (!wars_have_been_checked) { + sn_check_for_wars(); + wars_have_been_checked = 1; + } + sn_hub_info->shub_1_1_found = shub_1_1_found; + + /* + * Set up addresses of PIO/MEM write status registers. + */ + { + u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, SH1_PIO_WRITE_STATUS_1, 0}; + u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_2, + SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; + u64 *pio; + pio = is_shub1() ? pio1 : pio2; + pda->pio_write_status_addr = + (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]); + pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0; + } + + /* + * WAR addresses for SHUB 1.x. + */ + if (local_node_data->active_cpu_count++ == 0 && is_shub1()) { + int buddy_nasid; + buddy_nasid = + cnodeid_to_nasid(numa_node_id() == + num_online_nodes() - 1 ? 0 : numa_node_id() + 1); + pda->pio_shub_war_cam_addr = + (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, + SH1_PI_CAM_CONTROL); + } +} + +/* + * Build tables for converting between NASIDs and cnodes. + */ +static inline int __init board_needs_cnode(int type) +{ + return (type == KLTYPE_SNIA || type == KLTYPE_TIO); +} + +void __init build_cnode_tables(void) +{ + int nasid; + int node; + lboard_t *brd; + + memset(physical_node_map, -1, sizeof(physical_node_map)); + memset(sn_cnodeid_to_nasid, -1, + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); + + /* + * First populate the tables with C/M bricks. This ensures that + * cnode == node for all C & M bricks. + */ + for_each_online_node(node) { + nasid = pxm_to_nasid(node_to_pxm(node)); + sn_cnodeid_to_nasid[node] = nasid; + physical_node_map[nasid] = node; + } + + /* + * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node + * limit on the number of nodes, we can't use the generic node numbers + * for this. Note that num_cnodes is incremented below as TIOs or + * headless/memoryless nodes are discovered. + */ + num_cnodes = num_online_nodes(); + + /* fakeprom does not support klgraph */ + if (IS_RUNNING_ON_FAKE_PROM()) + return; + + /* Find TIOs & headless/memoryless nodes and add them to the tables */ + for_each_online_node(node) { + kl_config_hdr_t *klgraph_header; + nasid = cnodeid_to_nasid(node); + klgraph_header = ia64_sn_get_klconfig_addr(nasid); + BUG_ON(klgraph_header == NULL); + brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info); + while (brd) { + if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) { + sn_cnodeid_to_nasid[num_cnodes] = brd->brd_nasid; + physical_node_map[brd->brd_nasid] = num_cnodes++; + } + brd = find_lboard_next(brd); + } + } +} + +int +nasid_slice_to_cpuid(int nasid, int slice) +{ + long cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (cpuid_to_nasid(cpu) == nasid && + cpuid_to_slice(cpu) == slice) + return cpu; + + return -1; +} + +int sn_prom_feature_available(int id) +{ + if (id >= BITS_PER_LONG * MAX_PROM_FEATURE_SETS) + return 0; + return test_bit(id, sn_prom_features); +} + +void +sn_kernel_launch_event(void) +{ + /* ignore status until we understand possible failure, if any*/ + if (ia64_sn_kernel_launch_event()) + printk(KERN_ERR "KEXEC is not supported in this PROM, Please update the PROM.\n"); +} +EXPORT_SYMBOL(sn_prom_feature_available); + diff --git a/arch/ia64/sn/kernel/sn2/Makefile b/arch/ia64/sn/kernel/sn2/Makefile new file mode 100644 index 000000000..3d09108d4 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/Makefile @@ -0,0 +1,15 @@ +# arch/ia64/sn/kernel/sn2/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1999,2001-2002 Silicon Graphics, Inc. All rights reserved. +# +# sn2 specific kernel files +# + +ccflags-y := -Iarch/ia64/sn/include + +obj-y += cache.o io.o ptc_deadlock.o sn2_smp.o sn_proc_fs.o \ + prominfo_proc.o timer.o timer_interrupt.o sn_hwperf.o diff --git a/arch/ia64/sn/kernel/sn2/cache.c b/arch/ia64/sn/kernel/sn2/cache.c new file mode 100644 index 000000000..2862cb330 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/cache.c @@ -0,0 +1,41 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2003, 2006 Silicon Graphics, Inc. All rights reserved. + * + */ +#include <linux/module.h> +#include <asm/pgalloc.h> +#include <asm/sn/arch.h> + +/** + * sn_flush_all_caches - flush a range of address from all caches (incl. L4) + * @flush_addr: identity mapped region 7 address to start flushing + * @bytes: number of bytes to flush + * + * Flush a range of addresses from all caches including L4. + * All addresses fully or partially contained within + * @flush_addr to @flush_addr + @bytes are flushed + * from all caches. + */ +void +sn_flush_all_caches(long flush_addr, long bytes) +{ + unsigned long addr = flush_addr; + + /* SHub1 requires a cached address */ + if (is_shub1() && (addr & RGN_BITS) == RGN_BASE(RGN_UNCACHED)) + addr = (addr - RGN_BASE(RGN_UNCACHED)) + RGN_BASE(RGN_KERNEL); + + flush_icache_range(addr, addr + bytes); + /* + * The last call may have returned before the caches + * were actually flushed, so we call it again to make + * sure. + */ + flush_icache_range(addr, addr + bytes); + mb(); +} +EXPORT_SYMBOL(sn_flush_all_caches); diff --git a/arch/ia64/sn/kernel/sn2/io.c b/arch/ia64/sn/kernel/sn2/io.c new file mode 100644 index 000000000..a12c0586d --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/io.c @@ -0,0 +1,101 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved. + * + * The generic kernel requires function pointers to these routines, so + * we wrap the inlines from asm/ia64/sn/sn2/io.h here. + */ + +#include <asm/sn/io.h> + +#ifdef CONFIG_IA64_GENERIC + +#undef __sn_inb +#undef __sn_inw +#undef __sn_inl +#undef __sn_outb +#undef __sn_outw +#undef __sn_outl +#undef __sn_readb +#undef __sn_readw +#undef __sn_readl +#undef __sn_readq +#undef __sn_readb_relaxed +#undef __sn_readw_relaxed +#undef __sn_readl_relaxed +#undef __sn_readq_relaxed + +unsigned int __sn_inb(unsigned long port) +{ + return ___sn_inb(port); +} + +unsigned int __sn_inw(unsigned long port) +{ + return ___sn_inw(port); +} + +unsigned int __sn_inl(unsigned long port) +{ + return ___sn_inl(port); +} + +void __sn_outb(unsigned char val, unsigned long port) +{ + ___sn_outb(val, port); +} + +void __sn_outw(unsigned short val, unsigned long port) +{ + ___sn_outw(val, port); +} + +void __sn_outl(unsigned int val, unsigned long port) +{ + ___sn_outl(val, port); +} + +unsigned char __sn_readb(void __iomem *addr) +{ + return ___sn_readb(addr); +} + +unsigned short __sn_readw(void __iomem *addr) +{ + return ___sn_readw(addr); +} + +unsigned int __sn_readl(void __iomem *addr) +{ + return ___sn_readl(addr); +} + +unsigned long __sn_readq(void __iomem *addr) +{ + return ___sn_readq(addr); +} + +unsigned char __sn_readb_relaxed(void __iomem *addr) +{ + return ___sn_readb_relaxed(addr); +} + +unsigned short __sn_readw_relaxed(void __iomem *addr) +{ + return ___sn_readw_relaxed(addr); +} + +unsigned int __sn_readl_relaxed(void __iomem *addr) +{ + return ___sn_readl_relaxed(addr); +} + +unsigned long __sn_readq_relaxed(void __iomem *addr) +{ + return ___sn_readq_relaxed(addr); +} + +#endif diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c new file mode 100644 index 000000000..e15457bf2 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c @@ -0,0 +1,207 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999,2001-2004, 2006 Silicon Graphics, Inc. All Rights Reserved. + * + * Module to export the system's Firmware Interface Tables, including + * PROM revision numbers and banners, in /proc + */ +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/nodemask.h> +#include <asm/io.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/addrs.h> + +MODULE_DESCRIPTION("PROM version reporting for /proc"); +MODULE_AUTHOR("Chad Talbott"); +MODULE_LICENSE("GPL"); + +/* Standard Intel FIT entry types */ +#define FIT_ENTRY_FIT_HEADER 0x00 /* FIT header entry */ +#define FIT_ENTRY_PAL_B 0x01 /* PAL_B entry */ +/* Entries 0x02 through 0x0D reserved by Intel */ +#define FIT_ENTRY_PAL_A_PROC 0x0E /* Processor-specific PAL_A entry */ +#define FIT_ENTRY_PAL_A 0x0F /* PAL_A entry, same as... */ +#define FIT_ENTRY_PAL_A_GEN 0x0F /* ...Generic PAL_A entry */ +#define FIT_ENTRY_UNUSED 0x7F /* Unused (reserved by Intel?) */ +/* OEM-defined entries range from 0x10 to 0x7E. */ +#define FIT_ENTRY_SAL_A 0x10 /* SAL_A entry */ +#define FIT_ENTRY_SAL_B 0x11 /* SAL_B entry */ +#define FIT_ENTRY_SALRUNTIME 0x12 /* SAL runtime entry */ +#define FIT_ENTRY_EFI 0x1F /* EFI entry */ +#define FIT_ENTRY_FPSWA 0x20 /* embedded fpswa entry */ +#define FIT_ENTRY_VMLINUX 0x21 /* embedded vmlinux entry */ + +#define FIT_MAJOR_SHIFT (32 + 8) +#define FIT_MAJOR_MASK ((1 << 8) - 1) +#define FIT_MINOR_SHIFT 32 +#define FIT_MINOR_MASK ((1 << 8) - 1) + +#define FIT_MAJOR(q) \ + ((unsigned) ((q) >> FIT_MAJOR_SHIFT) & FIT_MAJOR_MASK) +#define FIT_MINOR(q) \ + ((unsigned) ((q) >> FIT_MINOR_SHIFT) & FIT_MINOR_MASK) + +#define FIT_TYPE_SHIFT (32 + 16) +#define FIT_TYPE_MASK ((1 << 7) - 1) + +#define FIT_TYPE(q) \ + ((unsigned) ((q) >> FIT_TYPE_SHIFT) & FIT_TYPE_MASK) + +struct fit_type_map_t { + unsigned char type; + const char *name; +}; + +static const struct fit_type_map_t fit_entry_types[] = { + {FIT_ENTRY_FIT_HEADER, "FIT Header"}, + {FIT_ENTRY_PAL_A_GEN, "Generic PAL_A"}, + {FIT_ENTRY_PAL_A_PROC, "Processor-specific PAL_A"}, + {FIT_ENTRY_PAL_A, "PAL_A"}, + {FIT_ENTRY_PAL_B, "PAL_B"}, + {FIT_ENTRY_SAL_A, "SAL_A"}, + {FIT_ENTRY_SAL_B, "SAL_B"}, + {FIT_ENTRY_SALRUNTIME, "SAL runtime"}, + {FIT_ENTRY_EFI, "EFI"}, + {FIT_ENTRY_VMLINUX, "Embedded Linux"}, + {FIT_ENTRY_FPSWA, "Embedded FPSWA"}, + {FIT_ENTRY_UNUSED, "Unused"}, + {0xff, "Error"}, +}; + +static const char *fit_type_name(unsigned char type) +{ + struct fit_type_map_t const *mapp; + + for (mapp = fit_entry_types; mapp->type != 0xff; mapp++) + if (type == mapp->type) + return mapp->name; + + if ((type > FIT_ENTRY_PAL_A) && (type < FIT_ENTRY_UNUSED)) + return "OEM type"; + if ((type > FIT_ENTRY_PAL_B) && (type < FIT_ENTRY_PAL_A)) + return "Reserved"; + + return "Unknown type"; +} + +static int +get_fit_entry(unsigned long nasid, int index, unsigned long *fentry, + char *banner, int banlen) +{ + return ia64_sn_get_fit_compt(nasid, index, fentry, banner, banlen); +} + + +/* + * These two routines display the FIT table for each node. + */ +static void dump_fit_entry(struct seq_file *m, unsigned long *fentry) +{ + unsigned type; + + type = FIT_TYPE(fentry[1]); + seq_printf(m, "%02x %-25s %x.%02x %016lx %u\n", + type, + fit_type_name(type), + FIT_MAJOR(fentry[1]), FIT_MINOR(fentry[1]), + fentry[0], + /* mult by sixteen to get size in bytes */ + (unsigned)(fentry[1] & 0xffffff) * 16); +} + + +/* + * We assume that the fit table will be small enough that we can print + * the whole thing into one page. (This is true for our default 16kB + * pages -- each entry is about 60 chars wide when printed.) I read + * somewhere that the maximum size of the FIT is 128 entries, so we're + * OK except for 4kB pages (and no one is going to do that on SN + * anyway). + */ +static int proc_fit_show(struct seq_file *m, void *v) +{ + unsigned long nasid = (unsigned long)m->private; + unsigned long fentry[2]; + int index; + + for (index=0;;index++) { + BUG_ON(index * 60 > PAGE_SIZE); + if (get_fit_entry(nasid, index, fentry, NULL, 0)) + break; + dump_fit_entry(m, fentry); + } + return 0; +} + +static int proc_version_show(struct seq_file *m, void *v) +{ + unsigned long nasid = (unsigned long)m->private; + unsigned long fentry[2]; + char banner[128]; + int index; + + for (index = 0; ; index++) { + if (get_fit_entry(nasid, index, fentry, banner, + sizeof(banner))) + return 0; + if (FIT_TYPE(fentry[1]) == FIT_ENTRY_SAL_A) + break; + } + + seq_printf(m, "%x.%02x\n", FIT_MAJOR(fentry[1]), FIT_MINOR(fentry[1])); + + if (banner[0]) + seq_printf(m, "%s\n", banner); + return 0; +} + +/* module entry points */ +int __init prominfo_init(void); +void __exit prominfo_exit(void); + +module_init(prominfo_init); +module_exit(prominfo_exit); + +#define NODE_NAME_LEN 11 + +int __init prominfo_init(void) +{ + struct proc_dir_entry *sgi_prominfo_entry; + cnodeid_t cnodeid; + + if (!ia64_platform_is("sn2")) + return 0; + + sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL); + if (!sgi_prominfo_entry) + return -ENOMEM; + + for_each_online_node(cnodeid) { + struct proc_dir_entry *dir; + unsigned long nasid; + char name[NODE_NAME_LEN]; + + sprintf(name, "node%d", cnodeid); + dir = proc_mkdir(name, sgi_prominfo_entry); + if (!dir) + continue; + nasid = cnodeid_to_nasid(cnodeid); + proc_create_single_data("fit", 0, dir, proc_fit_show, + (void *)nasid); + proc_create_single_data("version", 0, dir, proc_version_show, + (void *)nasid); + } + return 0; +} + +void __exit prominfo_exit(void) +{ + remove_proc_subtree("sgi_prominfo", NULL); +} diff --git a/arch/ia64/sn/kernel/sn2/ptc_deadlock.S b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S new file mode 100644 index 000000000..bebbcc4f8 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S @@ -0,0 +1,92 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#include <asm/types.h> +#include <asm/sn/shub_mmr.h> + +#define DEADLOCKBIT SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT +#define WRITECOUNTMASK SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK +#define ALIAS_OFFSET 8 + + + .global sn2_ptc_deadlock_recovery_core + .proc sn2_ptc_deadlock_recovery_core + +sn2_ptc_deadlock_recovery_core: + .regstk 6,0,0,0 + + ptc0 = in0 + data0 = in1 + ptc1 = in2 + data1 = in3 + piowc = in4 + zeroval = in5 + piowcphy = r30 + psrsave = r2 + scr1 = r16 + scr2 = r17 + mask = r18 + + + extr.u piowcphy=piowc,0,61;; // Convert piowc to uncached physical address + dep piowcphy=-1,piowcphy,63,1 + movl mask=WRITECOUNTMASK + mov r8=r0 + +1: + cmp.ne p8,p9=r0,ptc1 // Test for shub type (ptc1 non-null on shub1) + // p8 = 1 if shub1, p9 = 1 if shub2 + + add scr2=ALIAS_OFFSET,piowc // Address of WRITE_STATUS alias register + mov scr1=7;; // Clear DEADLOCK, WRITE_ERROR, MULTI_WRITE_ERROR +(p8) st8.rel [scr2]=scr1;; +(p9) ld8.acq scr1=[scr2];; + +5: ld8.acq scr1=[piowc];; // Wait for PIOs to complete. + hint @pause + and scr2=scr1,mask;; // mask of writecount bits + cmp.ne p6,p0=zeroval,scr2 +(p6) br.cond.sptk 5b + + + + ////////////// BEGIN PHYSICAL MODE //////////////////// + mov psrsave=psr // Disable IC (no PMIs) + rsm psr.i | psr.dt | psr.ic;; + srlz.i;; + + st8.rel [ptc0]=data0 // Write PTC0 & wait for completion. + +5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + hint @pause + and scr2=scr1,mask;; // mask of writecount bits + cmp.ne p6,p0=zeroval,scr2 +(p6) br.cond.sptk 5b;; + + tbit.nz p8,p7=scr1,DEADLOCKBIT;;// Test for DEADLOCK +(p7) cmp.ne p7,p0=r0,ptc1;; // Test for non-null ptc1 + +(p7) st8.rel [ptc1]=data1;; // Now write PTC1. + +5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + hint @pause + and scr2=scr1,mask;; // mask of writecount bits + cmp.ne p6,p0=zeroval,scr2 +(p6) br.cond.sptk 5b + + tbit.nz p8,p0=scr1,DEADLOCKBIT;;// Test for DEADLOCK + + mov psr.l=psrsave;; // Reenable IC + srlz.i;; + ////////////// END PHYSICAL MODE //////////////////// + +(p8) add r8=1,r8 +(p8) br.cond.spnt 1b;; // Repeat if DEADLOCK occurred. + + br.ret.sptk rp + .endp sn2_ptc_deadlock_recovery_core diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c new file mode 100644 index 000000000..b73b0ebf8 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -0,0 +1,584 @@ +/* + * SN2 Platform specific SMP Support + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/threads.h> +#include <linux/sched.h> +#include <linux/mm_types.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/bitops.h> +#include <linux/nodemask.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> + +#include <asm/processor.h> +#include <asm/irq.h> +#include <asm/sal.h> +#include <asm/delay.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/tlb.h> +#include <asm/numa.h> +#include <asm/hw_irq.h> +#include <asm/current.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/rw_mmr.h> +#include <asm/sn/sn_feature_sets.h> + +DEFINE_PER_CPU(struct ptc_stats, ptcstats); +DECLARE_PER_CPU(struct ptc_stats, ptcstats); + +static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); + +/* 0 = old algorithm (no IPI flushes), 1 = ipi deadlock flush, 2 = ipi instead of SHUB ptc, >2 = always ipi */ +static int sn2_flush_opt = 0; + +extern unsigned long +sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); +void +sn2_ptc_deadlock_recovery(nodemask_t, short, short, int, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); + +/* + * Note: some is the following is captured here to make degugging easier + * (the macros make more sense if you see the debug patch - not posted) + */ +#define sn2_ptctest 0 +#define local_node_uses_ptc_ga(sh1) ((sh1) ? 1 : 0) +#define max_active_pio(sh1) ((sh1) ? 32 : 7) +#define reset_max_active_on_deadlock() 1 +#define PTC_LOCK(sh1) ((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock) + +struct ptc_stats { + unsigned long ptc_l; + unsigned long change_rid; + unsigned long shub_ptc_flushes; + unsigned long nodes_flushed; + unsigned long deadlocks; + unsigned long deadlocks2; + unsigned long lock_itc_clocks; + unsigned long shub_itc_clocks; + unsigned long shub_itc_clocks_max; + unsigned long shub_ptc_flushes_not_my_mm; + unsigned long shub_ipi_flushes; + unsigned long shub_ipi_flushes_itc_clocks; +}; + +#define sn2_ptctest 0 + +static inline unsigned long wait_piowc(void) +{ + volatile unsigned long *piows; + unsigned long zeroval, ws; + + piows = pda->pio_write_status_addr; + zeroval = pda->pio_write_status_val; + do { + cpu_relax(); + } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval); + return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0; +} + +/** + * sn_migrate - SN-specific task migration actions + * @task: Task being migrated to new CPU + * + * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order. + * Context switching user threads which have memory-mapped MMIO may cause + * PIOs to issue from separate CPUs, thus the PIO writes must be drained + * from the previous CPU's Shub before execution resumes on the new CPU. + */ +void sn_migrate(struct task_struct *task) +{ + pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu); + volatile unsigned long *adr = last_pda->pio_write_status_addr; + unsigned long val = last_pda->pio_write_status_val; + + /* Drain PIO writes from old CPU's Shub */ + while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) + != val)) + cpu_relax(); +} + +void sn_tlb_migrate_finish(struct mm_struct *mm) +{ + /* flush_tlb_mm is inefficient if more than 1 users of mm */ + if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1) + flush_tlb_mm(mm); +} + +static void +sn2_ipi_flush_all_tlb(struct mm_struct *mm) +{ + unsigned long itc; + + itc = ia64_get_itc(); + smp_flush_tlb_cpumask(*mm_cpumask(mm)); + itc = ia64_get_itc() - itc; + __this_cpu_add(ptcstats.shub_ipi_flushes_itc_clocks, itc); + __this_cpu_inc(ptcstats.shub_ipi_flushes); +} + +/** + * sn2_global_tlb_purge - globally purge translation cache of virtual address range + * @mm: mm_struct containing virtual address range + * @start: start of virtual address range + * @end: end of virtual address range + * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) + * + * Purges the translation caches of all processors of the given virtual address + * range. + * + * Note: + * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. + * - cpu_vm_mask is converted into a nodemask of the nodes containing the + * cpus in cpu_vm_mask. + * - if only one bit is set in cpu_vm_mask & it is the current cpu & the + * process is purging its own virtual address range, then only the + * local TLB needs to be flushed. This flushing can be done using + * ptc.l. This is the common case & avoids the global spinlock. + * - if multiple cpus have loaded the context, then flushing has to be + * done with ptc.g/MMRs under protection of the global ptc_lock. + */ + +void +sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned long nbits) +{ + int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid; + int mymm = (mm == current->active_mm && mm == current->mm); + int use_cpu_ptcga; + volatile unsigned long *ptc0, *ptc1; + unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; + short nix; + nodemask_t nodes_flushed; + int active, max_active, deadlock, flush_opt = sn2_flush_opt; + + if (flush_opt > 2) { + sn2_ipi_flush_all_tlb(mm); + return; + } + + nodes_clear(nodes_flushed); + i = 0; + + for_each_cpu(cpu, mm_cpumask(mm)) { + cnode = cpu_to_node(cpu); + node_set(cnode, nodes_flushed); + lcpu = cpu; + i++; + } + + if (i == 0) + return; + + preempt_disable(); + + if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) { + do { + ia64_ptcl(start, nbits << 2); + start += (1UL << nbits); + } while (start < end); + ia64_srlz_i(); + __this_cpu_inc(ptcstats.ptc_l); + preempt_enable(); + return; + } + + if (atomic_read(&mm->mm_users) == 1 && mymm) { + flush_tlb_mm(mm); + __this_cpu_inc(ptcstats.change_rid); + preempt_enable(); + return; + } + + if (flush_opt == 2) { + sn2_ipi_flush_all_tlb(mm); + preempt_enable(); + return; + } + + itc = ia64_get_itc(); + nix = nodes_weight(nodes_flushed); + + rr_value = (mm->context << 3) | REGION_NUMBER(start); + + shub1 = is_shub1(); + if (shub1) { + data0 = (1UL << SH1_PTC_0_A_SHFT) | + (nbits << SH1_PTC_0_PS_SHFT) | + (rr_value << SH1_PTC_0_RID_SHFT) | + (1UL << SH1_PTC_0_START_SHFT); + ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); + ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); + } else { + data0 = (1UL << SH2_PTC_A_SHFT) | + (nbits << SH2_PTC_PS_SHFT) | + (1UL << SH2_PTC_START_SHFT); + ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + + (rr_value << SH2_PTC_RID_SHFT)); + ptc1 = NULL; + } + + + mynasid = get_nasid(); + use_cpu_ptcga = local_node_uses_ptc_ga(shub1); + max_active = max_active_pio(shub1); + + itc = ia64_get_itc(); + spin_lock_irqsave(PTC_LOCK(shub1), flags); + itc2 = ia64_get_itc(); + + __this_cpu_add(ptcstats.lock_itc_clocks, itc2 - itc); + __this_cpu_inc(ptcstats.shub_ptc_flushes); + __this_cpu_add(ptcstats.nodes_flushed, nix); + if (!mymm) + __this_cpu_inc(ptcstats.shub_ptc_flushes_not_my_mm); + + if (use_cpu_ptcga && !mymm) { + old_rr = ia64_get_rr(start); + ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8)); + ia64_srlz_d(); + } + + wait_piowc(); + do { + if (shub1) + data1 = start | (1UL << SH1_PTC_1_START_SHFT); + else + data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); + deadlock = 0; + active = 0; + ibegin = 0; + i = 0; + for_each_node_mask(cnode, nodes_flushed) { + nasid = cnodeid_to_nasid(cnode); + if (use_cpu_ptcga && unlikely(nasid == mynasid)) { + ia64_ptcga(start, nbits << 2); + ia64_srlz_i(); + } else { + ptc0 = CHANGE_NASID(nasid, ptc0); + if (ptc1) + ptc1 = CHANGE_NASID(nasid, ptc1); + pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1); + active++; + } + if (active >= max_active || i == (nix - 1)) { + if ((deadlock = wait_piowc())) { + if (flush_opt == 1) + goto done; + sn2_ptc_deadlock_recovery(nodes_flushed, ibegin, i, mynasid, ptc0, data0, ptc1, data1); + if (reset_max_active_on_deadlock()) + max_active = 1; + } + active = 0; + ibegin = i + 1; + } + i++; + } + start += (1UL << nbits); + } while (start < end); + +done: + itc2 = ia64_get_itc() - itc2; + __this_cpu_add(ptcstats.shub_itc_clocks, itc2); + if (itc2 > __this_cpu_read(ptcstats.shub_itc_clocks_max)) + __this_cpu_write(ptcstats.shub_itc_clocks_max, itc2); + + if (old_rr) { + ia64_set_rr(start, old_rr); + ia64_srlz_d(); + } + + spin_unlock_irqrestore(PTC_LOCK(shub1), flags); + + if (flush_opt == 1 && deadlock) { + __this_cpu_inc(ptcstats.deadlocks); + sn2_ipi_flush_all_tlb(mm); + } + + preempt_enable(); +} + +/* + * sn2_ptc_deadlock_recovery + * + * Recover from PTC deadlocks conditions. Recovery requires stepping thru each + * TLB flush transaction. The recovery sequence is somewhat tricky & is + * coded in assembly language. + */ + +void +sn2_ptc_deadlock_recovery(nodemask_t nodes, short ib, short ie, int mynasid, + volatile unsigned long *ptc0, unsigned long data0, + volatile unsigned long *ptc1, unsigned long data1) +{ + short nasid, i; + int cnode; + unsigned long *piows, zeroval, n; + + __this_cpu_inc(ptcstats.deadlocks); + + piows = (unsigned long *) pda->pio_write_status_addr; + zeroval = pda->pio_write_status_val; + + i = 0; + for_each_node_mask(cnode, nodes) { + if (i < ib) + goto next; + + if (i > ie) + break; + + nasid = cnodeid_to_nasid(cnode); + if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid) + goto next; + + ptc0 = CHANGE_NASID(nasid, ptc0); + if (ptc1) + ptc1 = CHANGE_NASID(nasid, ptc1); + + n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); + __this_cpu_add(ptcstats.deadlocks2, n); +next: + i++; + } + +} + +/** + * sn_send_IPI_phys - send an IPI to a Nasid and slice + * @nasid: nasid to receive the interrupt (may be outside partition) + * @physid: physical cpuid to receive the interrupt. + * @vector: command to send + * @delivery_mode: delivery mechanism + * + * Sends an IPI (interprocessor interrupt) to the processor specified by + * @physid + * + * @delivery_mode can be one of the following + * + * %IA64_IPI_DM_INT - pend an interrupt + * %IA64_IPI_DM_PMI - pend a PMI + * %IA64_IPI_DM_NMI - pend an NMI + * %IA64_IPI_DM_INIT - pend an INIT interrupt + */ +void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode) +{ + long val; + unsigned long flags = 0; + volatile long *p; + + p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT); + val = (1UL << SH_IPI_INT_SEND_SHFT) | + (physid << SH_IPI_INT_PID_SHFT) | + ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) | + ((long)vector << SH_IPI_INT_IDX_SHFT) | + (0x000feeUL << SH_IPI_INT_BASE_SHFT); + + mb(); + if (enable_shub_wars_1_1()) { + spin_lock_irqsave(&sn2_global_ptc_lock, flags); + } + pio_phys_write_mmr(p, val); + if (enable_shub_wars_1_1()) { + wait_piowc(); + spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); + } + +} + +EXPORT_SYMBOL(sn_send_IPI_phys); + +/** + * sn2_send_IPI - send an IPI to a processor + * @cpuid: target of the IPI + * @vector: command to send + * @delivery_mode: delivery mechanism + * @redirect: redirect the IPI? + * + * Sends an IPI (InterProcessor Interrupt) to the processor specified by + * @cpuid. @vector specifies the command to send, while @delivery_mode can + * be one of the following + * + * %IA64_IPI_DM_INT - pend an interrupt + * %IA64_IPI_DM_PMI - pend a PMI + * %IA64_IPI_DM_NMI - pend an NMI + * %IA64_IPI_DM_INIT - pend an INIT interrupt + */ +void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect) +{ + long physid; + int nasid; + + physid = cpu_physical_id(cpuid); + nasid = cpuid_to_nasid(cpuid); + + /* the following is used only when starting cpus at boot time */ + if (unlikely(nasid == -1)) + ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL); + + sn_send_IPI_phys(nasid, physid, vector, delivery_mode); +} + +#ifdef CONFIG_HOTPLUG_CPU +/** + * sn_cpu_disable_allowed - Determine if a CPU can be disabled. + * @cpu - CPU that is requested to be disabled. + * + * CPU disable is only allowed on SHub2 systems running with a PROM + * that supports CPU disable. It is not permitted to disable the boot processor. + */ +bool sn_cpu_disable_allowed(int cpu) +{ + if (is_shub2() && sn_prom_feature_available(PRF_CPU_DISABLE_SUPPORT)) { + if (cpu != 0) + return true; + else + printk(KERN_WARNING + "Disabling the boot processor is not allowed.\n"); + + } else + printk(KERN_WARNING + "CPU disable is not supported on this system.\n"); + + return false; +} +#endif /* CONFIG_HOTPLUG_CPU */ + +#ifdef CONFIG_PROC_FS + +#define PTC_BASENAME "sgi_sn/ptc_statistics" + +static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset) +{ + if (*offset < nr_cpu_ids) + return offset; + return NULL; +} + +static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset) +{ + (*offset)++; + if (*offset < nr_cpu_ids) + return offset; + return NULL; +} + +static void sn2_ptc_seq_stop(struct seq_file *file, void *data) +{ +} + +static int sn2_ptc_seq_show(struct seq_file *file, void *data) +{ + struct ptc_stats *stat; + int cpu; + + cpu = *(loff_t *) data; + + if (!cpu) { + seq_printf(file, + "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2 ipi_fluches ipi_nsec\n"); + seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt); + } + + if (cpu < nr_cpu_ids && cpu_online(cpu)) { + stat = &per_cpu(ptcstats, cpu); + seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, + stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, + stat->deadlocks, + 1000 * stat->lock_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks_max / per_cpu(ia64_cpu_info, cpu).cyc_per_usec, + stat->shub_ptc_flushes_not_my_mm, + stat->deadlocks2, + stat->shub_ipi_flushes, + 1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec); + } + return 0; +} + +static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, size_t count, loff_t *data) +{ + int cpu; + char optstr[64]; + + if (count == 0 || count > sizeof(optstr)) + return -EINVAL; + if (copy_from_user(optstr, user, count)) + return -EFAULT; + optstr[count - 1] = '\0'; + sn2_flush_opt = simple_strtoul(optstr, NULL, 0); + + for_each_online_cpu(cpu) + memset(&per_cpu(ptcstats, cpu), 0, sizeof(struct ptc_stats)); + + return count; +} + +static const struct seq_operations sn2_ptc_seq_ops = { + .start = sn2_ptc_seq_start, + .next = sn2_ptc_seq_next, + .stop = sn2_ptc_seq_stop, + .show = sn2_ptc_seq_show +}; + +static int sn2_ptc_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &sn2_ptc_seq_ops); +} + +static const struct file_operations proc_sn2_ptc_operations = { + .open = sn2_ptc_proc_open, + .read = seq_read, + .write = sn2_ptc_proc_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct proc_dir_entry *proc_sn2_ptc; + +static int __init sn2_ptc_init(void) +{ + if (!ia64_platform_is("sn2")) + return 0; + + proc_sn2_ptc = proc_create(PTC_BASENAME, 0444, + NULL, &proc_sn2_ptc_operations); + if (!proc_sn2_ptc) { + printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME); + return -EINVAL; + } + spin_lock_init(&sn2_global_ptc_lock); + return 0; +} + +static void __exit sn2_ptc_exit(void) +{ + remove_proc_entry(PTC_BASENAME, NULL); +} + +module_init(sn2_ptc_init); +module_exit(sn2_ptc_exit); +#endif /* CONFIG_PROC_FS */ + diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c new file mode 100644 index 000000000..55febd659 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -0,0 +1,1004 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved. + * + * SGI Altix topology and hardware performance monitoring API. + * Mark Goodwin <markgw@sgi.com>. + * + * Creates /proc/sgi_sn/sn_topology (read-only) to export + * info about Altix nodes, routers, CPUs and NumaLink + * interconnection/topology. + * + * Also creates a dynamic misc device named "sn_hwperf" + * that supports an ioctl interface to call down into SAL + * to discover hw objects, topology and to read/write + * memory mapped registers, e.g. for performance monitoring. + * The "sn_hwperf" device is registered only after the procfs + * file is first opened, i.e. only if/when it's needed. + * + * This API is used by SGI Performance Co-Pilot and other + * tools, see http://oss.sgi.com/projects/pcp + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/vmalloc.h> +#include <linux/seq_file.h> +#include <linux/miscdevice.h> +#include <linux/utsname.h> +#include <linux/cpumask.h> +#include <linux/nodemask.h> +#include <linux/smp.h> +#include <linux/mutex.h> + +#include <asm/processor.h> +#include <asm/topology.h> +#include <linux/uaccess.h> +#include <asm/sal.h> +#include <asm/sn/io.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/module.h> +#include <asm/sn/geo.h> +#include <asm/sn/sn2/sn_hwperf.h> +#include <asm/sn/addrs.h> + +static void *sn_hwperf_salheap = NULL; +static int sn_hwperf_obj_cnt = 0; +static nasid_t sn_hwperf_master_nasid = INVALID_NASID; +static int sn_hwperf_init(void); +static DEFINE_MUTEX(sn_hwperf_init_mutex); + +#define cnode_possible(n) ((n) < num_cnodes) + +static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret) +{ + int e; + u64 sz; + struct sn_hwperf_object_info *objbuf = NULL; + + if ((e = sn_hwperf_init()) < 0) { + printk(KERN_ERR "sn_hwperf_init failed: err %d\n", e); + goto out; + } + + sz = sn_hwperf_obj_cnt * sizeof(struct sn_hwperf_object_info); + objbuf = vmalloc(sz); + if (objbuf == NULL) { + printk("sn_hwperf_enum_objects: vmalloc(%d) failed\n", (int)sz); + e = -ENOMEM; + goto out; + } + + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, SN_HWPERF_ENUM_OBJECTS, + 0, sz, (u64) objbuf, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + vfree(objbuf); + } + +out: + *nobj = sn_hwperf_obj_cnt; + *ret = objbuf; + return e; +} + +static int sn_hwperf_location_to_bpos(char *location, + int *rack, int *bay, int *slot, int *slab) +{ + char type; + + /* first scan for an old style geoid string */ + if (sscanf(location, "%03d%c%02d#%d", + rack, &type, bay, slab) == 4) + *slot = 0; + else /* scan for a new bladed geoid string */ + if (sscanf(location, "%03d%c%02d^%02d#%d", + rack, &type, bay, slot, slab) != 5) + return -1; + /* success */ + return 0; +} + +static int sn_hwperf_geoid_to_cnode(char *location) +{ + int cnode; + geoid_t geoid; + moduleid_t module_id; + int rack, bay, slot, slab; + int this_rack, this_bay, this_slot, this_slab; + + if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab)) + return -1; + + /* + * FIXME: replace with cleaner for_each_XXX macro which addresses + * both compute and IO nodes once ACPI3.0 is available. + */ + for (cnode = 0; cnode < num_cnodes; cnode++) { + geoid = cnodeid_get_geoid(cnode); + module_id = geo_module(geoid); + this_rack = MODULE_GET_RACK(module_id); + this_bay = MODULE_GET_BPOS(module_id); + this_slot = geo_slot(geoid); + this_slab = geo_slab(geoid); + if (rack == this_rack && bay == this_bay && + slot == this_slot && slab == this_slab) { + break; + } + } + + return cnode_possible(cnode) ? cnode : -1; +} + +static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj) +{ + if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) + BUG(); + if (SN_HWPERF_FOREIGN(obj)) + return -1; + return sn_hwperf_geoid_to_cnode(obj->location); +} + +static int sn_hwperf_generic_ordinal(struct sn_hwperf_object_info *obj, + struct sn_hwperf_object_info *objs) +{ + int ordinal; + struct sn_hwperf_object_info *p; + + for (ordinal=0, p=objs; p != obj; p++) { + if (SN_HWPERF_FOREIGN(p)) + continue; + if (SN_HWPERF_SAME_OBJTYPE(p, obj)) + ordinal++; + } + + return ordinal; +} + +static const char *slabname_node = "node"; /* SHub asic */ +static const char *slabname_ionode = "ionode"; /* TIO asic */ +static const char *slabname_router = "router"; /* NL3R or NL4R */ +static const char *slabname_other = "other"; /* unknown asic */ + +static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj, + struct sn_hwperf_object_info *objs, int *ordinal) +{ + int isnode; + const char *slabname = slabname_other; + + if ((isnode = SN_HWPERF_IS_NODE(obj)) || SN_HWPERF_IS_IONODE(obj)) { + slabname = isnode ? slabname_node : slabname_ionode; + *ordinal = sn_hwperf_obj_to_cnode(obj); + } + else { + *ordinal = sn_hwperf_generic_ordinal(obj, objs); + if (SN_HWPERF_IS_ROUTER(obj)) + slabname = slabname_router; + } + + return slabname; +} + +static void print_pci_topology(struct seq_file *s) +{ + char *p; + size_t sz; + int e; + + for (sz = PAGE_SIZE; sz < 16 * PAGE_SIZE; sz += PAGE_SIZE) { + if (!(p = kmalloc(sz, GFP_KERNEL))) + break; + e = ia64_sn_ioif_get_pci_topology(__pa(p), sz); + if (e == SALRET_OK) + seq_puts(s, p); + kfree(p); + if (e == SALRET_OK || e == SALRET_NOT_IMPLEMENTED) + break; + } +} + +static inline int sn_hwperf_has_cpus(cnodeid_t node) +{ + return node < MAX_NUMNODES && node_online(node) && nr_cpus_node(node); +} + +static inline int sn_hwperf_has_mem(cnodeid_t node) +{ + return node < MAX_NUMNODES && node_online(node) && NODE_DATA(node)->node_present_pages; +} + +static struct sn_hwperf_object_info * +sn_hwperf_findobj_id(struct sn_hwperf_object_info *objbuf, + int nobj, int id) +{ + int i; + struct sn_hwperf_object_info *p = objbuf; + + for (i=0; i < nobj; i++, p++) { + if (p->id == id) + return p; + } + + return NULL; + +} + +static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objbuf, + int nobj, cnodeid_t node, cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + struct sn_hwperf_object_info *nodeobj = NULL; + struct sn_hwperf_object_info *op; + struct sn_hwperf_object_info *dest; + struct sn_hwperf_object_info *router; + struct sn_hwperf_port_info ptdata[16]; + int sz, i, j; + cnodeid_t c; + int found_mem = 0; + int found_cpu = 0; + + if (!cnode_possible(node)) + return -EINVAL; + + if (sn_hwperf_has_cpus(node)) { + if (near_cpu_node) + *near_cpu_node = node; + found_cpu++; + } + + if (sn_hwperf_has_mem(node)) { + if (near_mem_node) + *near_mem_node = node; + found_mem++; + } + + if (found_cpu && found_mem) + return 0; /* trivially successful */ + + /* find the argument node object */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (!SN_HWPERF_IS_NODE(op) && !SN_HWPERF_IS_IONODE(op)) + continue; + if (node == sn_hwperf_obj_to_cnode(op)) { + nodeobj = op; + break; + } + } + if (!nodeobj) { + e = -ENOENT; + goto err; + } + + /* get it's interconnect topology */ + sz = op->ports * sizeof(struct sn_hwperf_port_info); + BUG_ON(sz > sizeof(ptdata)); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, nodeobj->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + + /* find nearest node with cpus and nearest memory */ + for (router=NULL, j=0; j < op->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, ptdata[j].conn_id); + if (dest && SN_HWPERF_IS_ROUTER(dest)) + router = dest; + if (!dest || SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + } + + if (router && (!found_cpu || !found_mem)) { + /* search for a node connected to the same router */ + sz = router->ports * sizeof(struct sn_hwperf_port_info); + BUG_ON(sz > sizeof(ptdata)); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, router->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + for (j=0; j < router->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, + ptdata[j].conn_id); + if (!dest || dest->id == node || + SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || + SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) + break; + } + } + + if (!found_cpu || !found_mem) { + /* resort to _any_ node with CPUs and memory */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (SN_HWPERF_FOREIGN(op) || + SN_HWPERF_IS_IONODE(op) || + !SN_HWPERF_IS_NODE(op)) { + continue; + } + c = sn_hwperf_obj_to_cnode(op); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) + break; + } + } + + if (!found_cpu || !found_mem) + e = -ENODATA; + +err: + return e; +} + + +static int sn_topology_show(struct seq_file *s, void *d) +{ + int sz; + int pt; + int e = 0; + int i; + int j; + const char *slabname; + int ordinal; + char slice; + struct cpuinfo_ia64 *c; + struct sn_hwperf_port_info *ptdata; + struct sn_hwperf_object_info *p; + struct sn_hwperf_object_info *obj = d; /* this object */ + struct sn_hwperf_object_info *objs = s->private; /* all objects */ + u8 shubtype; + u8 system_size; + u8 sharing_size; + u8 partid; + u8 coher; + u8 nasid_shift; + u8 region_size; + u16 nasid_mask; + int nasid_msb; + + if (obj == objs) { + seq_printf(s, "# sn_topology version 2\n"); + seq_printf(s, "# objtype ordinal location partition" + " [attribute value [, ...]]\n"); + + if (ia64_sn_get_sn_info(0, + &shubtype, &nasid_mask, &nasid_shift, &system_size, + &sharing_size, &partid, &coher, ®ion_size)) + BUG(); + for (nasid_msb=63; nasid_msb > 0; nasid_msb--) { + if (((u64)nasid_mask << nasid_shift) & (1ULL << nasid_msb)) + break; + } + seq_printf(s, "partition %u %s local " + "shubtype %s, " + "nasid_mask 0x%016llx, " + "nasid_bits %d:%d, " + "system_size %d, " + "sharing_size %d, " + "coherency_domain %d, " + "region_size %d\n", + + partid, utsname()->nodename, + shubtype ? "shub2" : "shub1", + (u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift, + system_size, sharing_size, coher, region_size); + + print_pci_topology(s); + } + + if (SN_HWPERF_FOREIGN(obj)) { + /* private in another partition: not interesting */ + return 0; + } + + for (i = 0; i < SN_HWPERF_MAXSTRING && obj->name[i]; i++) { + if (obj->name[i] == ' ') + obj->name[i] = '_'; + } + + slabname = sn_hwperf_get_slabname(obj, objs, &ordinal); + seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location, + obj->sn_hwp_this_part ? "local" : "shared", obj->name); + + if (ordinal < 0 || (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))) + seq_putc(s, '\n'); + else { + cnodeid_t near_mem = -1; + cnodeid_t near_cpu = -1; + + seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal)); + + if (sn_hwperf_get_nearest_node_objdata(objs, sn_hwperf_obj_cnt, + ordinal, &near_mem, &near_cpu) == 0) { + seq_printf(s, ", near_mem_nodeid %d, near_cpu_nodeid %d", + near_mem, near_cpu); + } + + if (!SN_HWPERF_IS_IONODE(obj)) { + for_each_online_node(i) { + seq_printf(s, i ? ":%d" : ", dist %d", + node_distance(ordinal, i)); + } + } + + seq_putc(s, '\n'); + + /* + * CPUs on this node, if any + */ + if (!SN_HWPERF_IS_IONODE(obj)) { + for_each_cpu_and(i, cpu_online_mask, + cpumask_of_node(ordinal)) { + slice = 'a' + cpuid_to_slice(i); + c = cpu_data(i); + seq_printf(s, "cpu %d %s%c local" + " freq %luMHz, arch ia64", + i, obj->location, slice, + c->proc_freq / 1000000); + for_each_online_cpu(j) { + seq_printf(s, j ? ":%d" : ", dist %d", + node_distance( + cpu_to_node(i), + cpu_to_node(j))); + } + seq_putc(s, '\n'); + } + } + } + + if (obj->ports) { + /* + * numalink ports + */ + sz = obj->ports * sizeof(struct sn_hwperf_port_info); + if ((ptdata = kmalloc(sz, GFP_KERNEL)) == NULL) + return -ENOMEM; + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, obj->id, sz, + (u64) ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) + return -EINVAL; + for (ordinal=0, p=objs; p != obj; p++) { + if (!SN_HWPERF_FOREIGN(p)) + ordinal += p->ports; + } + for (pt = 0; pt < obj->ports; pt++) { + for (p = objs, i = 0; i < sn_hwperf_obj_cnt; i++, p++) { + if (ptdata[pt].conn_id == p->id) { + break; + } + } + seq_printf(s, "numalink %d %s-%d", + ordinal+pt, obj->location, ptdata[pt].port); + + if (i >= sn_hwperf_obj_cnt) { + /* no connection */ + seq_puts(s, " local endpoint disconnected" + ", protocol unknown\n"); + continue; + } + + if (obj->sn_hwp_this_part && p->sn_hwp_this_part) + /* both ends local to this partition */ + seq_puts(s, " local"); + else if (SN_HWPERF_FOREIGN(p)) + /* both ends of the link in foreign partition */ + seq_puts(s, " foreign"); + else + /* link straddles a partition */ + seq_puts(s, " shared"); + + /* + * Unlikely, but strictly should query the LLP config + * registers because an NL4R can be configured to run + * NL3 protocol, even when not talking to an NL3 router. + * Ditto for node-node. + */ + seq_printf(s, " endpoint %s-%d, protocol %s\n", + p->location, ptdata[pt].conn_port, + (SN_HWPERF_IS_NL3ROUTER(obj) || + SN_HWPERF_IS_NL3ROUTER(p)) ? "LLP3" : "LLP4"); + } + kfree(ptdata); + } + + return 0; +} + +static void *sn_topology_start(struct seq_file *s, loff_t * pos) +{ + struct sn_hwperf_object_info *objs = s->private; + + if (*pos < sn_hwperf_obj_cnt) + return (void *)(objs + *pos); + + return NULL; +} + +static void *sn_topology_next(struct seq_file *s, void *v, loff_t * pos) +{ + ++*pos; + return sn_topology_start(s, pos); +} + +static void sn_topology_stop(struct seq_file *m, void *v) +{ + return; +} + +/* + * /proc/sgi_sn/sn_topology, read-only using seq_file + */ +static const struct seq_operations sn_topology_seq_ops = { + .start = sn_topology_start, + .next = sn_topology_next, + .stop = sn_topology_stop, + .show = sn_topology_show +}; + +struct sn_hwperf_op_info { + u64 op; + struct sn_hwperf_ioctl_args *a; + void *p; + int *v0; + int ret; +}; + +static void sn_hwperf_call_sal(void *info) +{ + struct sn_hwperf_op_info *op_info = info; + int r; + + r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op_info->op, + op_info->a->arg, op_info->a->sz, + (u64) op_info->p, 0, 0, op_info->v0); + op_info->ret = r; +} + +static long sn_hwperf_call_sal_work(void *info) +{ + sn_hwperf_call_sal(info); + return 0; +} + +static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info) +{ + u32 cpu; + u32 use_ipi; + int r = 0; + + cpu = (op_info->a->arg & SN_HWPERF_ARG_CPU_MASK) >> 32; + use_ipi = op_info->a->arg & SN_HWPERF_ARG_USE_IPI_MASK; + op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK; + + if (cpu != SN_HWPERF_ARG_ANY_CPU) { + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { + r = -EINVAL; + goto out; + } + } + + if (cpu == SN_HWPERF_ARG_ANY_CPU) { + /* don't care which cpu */ + sn_hwperf_call_sal(op_info); + } else if (cpu == get_cpu()) { + /* already on correct cpu */ + sn_hwperf_call_sal(op_info); + put_cpu(); + } else { + put_cpu(); + if (use_ipi) { + /* use an interprocessor interrupt to call SAL */ + smp_call_function_single(cpu, sn_hwperf_call_sal, + op_info, 1); + } else { + /* Call on the target CPU */ + work_on_cpu_safe(cpu, sn_hwperf_call_sal_work, op_info); + } + } + r = op_info->ret; + +out: + return r; +} + +/* map SAL hwperf error code to system error code */ +static int sn_hwperf_map_err(int hwperf_err) +{ + int e; + + switch(hwperf_err) { + case SN_HWPERF_OP_OK: + e = 0; + break; + + case SN_HWPERF_OP_NOMEM: + e = -ENOMEM; + break; + + case SN_HWPERF_OP_NO_PERM: + e = -EPERM; + break; + + case SN_HWPERF_OP_IO_ERROR: + e = -EIO; + break; + + case SN_HWPERF_OP_BUSY: + e = -EBUSY; + break; + + case SN_HWPERF_OP_RECONFIGURE: + e = -EAGAIN; + break; + + case SN_HWPERF_OP_INVAL: + default: + e = -EINVAL; + break; + } + + return e; +} + +/* + * ioctl for "sn_hwperf" misc device + */ +static long sn_hwperf_ioctl(struct file *fp, u32 op, unsigned long arg) +{ + struct sn_hwperf_ioctl_args a; + struct cpuinfo_ia64 *cdata; + struct sn_hwperf_object_info *objs; + struct sn_hwperf_object_info *cpuobj; + struct sn_hwperf_op_info op_info; + void *p = NULL; + int nobj; + char slice; + int node; + int r; + int v0; + int i; + int j; + + /* only user requests are allowed here */ + if ((op & SN_HWPERF_OP_MASK) < 10) { + r = -EINVAL; + goto error; + } + r = copy_from_user(&a, (const void __user *)arg, + sizeof(struct sn_hwperf_ioctl_args)); + if (r != 0) { + r = -EFAULT; + goto error; + } + + /* + * Allocate memory to hold a kernel copy of the user buffer. The + * buffer contents are either copied in or out (or both) of user + * space depending on the flags encoded in the requested operation. + */ + if (a.ptr) { + p = vmalloc(a.sz); + if (!p) { + r = -ENOMEM; + goto error; + } + } + + if (op & SN_HWPERF_OP_MEM_COPYIN) { + r = copy_from_user(p, (const void __user *)a.ptr, a.sz); + if (r != 0) { + r = -EFAULT; + goto error; + } + } + + switch (op) { + case SN_HWPERF_GET_CPU_INFO: + if (a.sz == sizeof(u64)) { + /* special case to get size needed */ + *(u64 *) p = (u64) num_online_cpus() * + sizeof(struct sn_hwperf_object_info); + } else + if (a.sz < num_online_cpus() * sizeof(struct sn_hwperf_object_info)) { + r = -ENOMEM; + goto error; + } else + if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { + int cpuobj_index = 0; + + memset(p, 0, a.sz); + for (i = 0; i < nobj; i++) { + if (!SN_HWPERF_IS_NODE(objs + i)) + continue; + node = sn_hwperf_obj_to_cnode(objs + i); + for_each_online_cpu(j) { + if (node != cpu_to_node(j)) + continue; + cpuobj = (struct sn_hwperf_object_info *) p + cpuobj_index++; + slice = 'a' + cpuid_to_slice(j); + cdata = cpu_data(j); + cpuobj->id = j; + snprintf(cpuobj->name, + sizeof(cpuobj->name), + "CPU %luMHz %s", + cdata->proc_freq / 1000000, + cdata->vendor); + snprintf(cpuobj->location, + sizeof(cpuobj->location), + "%s%c", objs[i].location, + slice); + } + } + + vfree(objs); + } + break; + + case SN_HWPERF_GET_NODE_NASID: + if (a.sz != sizeof(u64) || + (node = a.arg) < 0 || !cnode_possible(node)) { + r = -EINVAL; + goto error; + } + *(u64 *)p = (u64)cnodeid_to_nasid(node); + break; + + case SN_HWPERF_GET_OBJ_NODE: + i = a.arg; + if (a.sz != sizeof(u64) || i < 0) { + r = -EINVAL; + goto error; + } + if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { + if (i >= nobj) { + r = -EINVAL; + vfree(objs); + goto error; + } + if (objs[i].id != a.arg) { + for (i = 0; i < nobj; i++) { + if (objs[i].id == a.arg) + break; + } + } + if (i == nobj) { + r = -EINVAL; + vfree(objs); + goto error; + } + + if (!SN_HWPERF_IS_NODE(objs + i) && + !SN_HWPERF_IS_IONODE(objs + i)) { + r = -ENOENT; + vfree(objs); + goto error; + } + + *(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i); + vfree(objs); + } + break; + + case SN_HWPERF_GET_MMRS: + case SN_HWPERF_SET_MMRS: + case SN_HWPERF_OBJECT_DISTANCE: + op_info.p = p; + op_info.a = &a; + op_info.v0 = &v0; + op_info.op = op; + r = sn_hwperf_op_cpu(&op_info); + if (r) { + r = sn_hwperf_map_err(r); + a.v0 = v0; + goto error; + } + break; + + default: + /* all other ops are a direct SAL call */ + r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op, + a.arg, a.sz, (u64) p, 0, 0, &v0); + if (r) { + r = sn_hwperf_map_err(r); + goto error; + } + a.v0 = v0; + break; + } + + if (op & SN_HWPERF_OP_MEM_COPYOUT) { + r = copy_to_user((void __user *)a.ptr, p, a.sz); + if (r != 0) { + r = -EFAULT; + goto error; + } + } + +error: + vfree(p); + + return r; +} + +static const struct file_operations sn_hwperf_fops = { + .unlocked_ioctl = sn_hwperf_ioctl, + .llseek = noop_llseek, +}; + +static struct miscdevice sn_hwperf_dev = { + MISC_DYNAMIC_MINOR, + "sn_hwperf", + &sn_hwperf_fops +}; + +static int sn_hwperf_init(void) +{ + u64 v; + int salr; + int e = 0; + + /* single threaded, once-only initialization */ + mutex_lock(&sn_hwperf_init_mutex); + + if (sn_hwperf_salheap) { + mutex_unlock(&sn_hwperf_init_mutex); + return e; + } + + /* + * The PROM code needs a fixed reference node. For convenience the + * same node as the console I/O is used. + */ + sn_hwperf_master_nasid = (nasid_t) ia64_sn_get_console_nasid(); + + /* + * Request the needed size and install the PROM scratch area. + * The PROM keeps various tracking bits in this memory area. + */ + salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + (u64) SN_HWPERF_GET_HEAPSIZE, 0, + (u64) sizeof(u64), (u64) &v, 0, 0, NULL); + if (salr != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto out; + } + + if ((sn_hwperf_salheap = vmalloc(v)) == NULL) { + e = -ENOMEM; + goto out; + } + salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_INSTALL_HEAP, 0, v, + (u64) sn_hwperf_salheap, 0, 0, NULL); + if (salr != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto out; + } + + salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_OBJECT_COUNT, 0, + sizeof(u64), (u64) &v, 0, 0, NULL); + if (salr != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto out; + } + sn_hwperf_obj_cnt = (int)v; + +out: + if (e < 0 && sn_hwperf_salheap) { + vfree(sn_hwperf_salheap); + sn_hwperf_salheap = NULL; + sn_hwperf_obj_cnt = 0; + } + mutex_unlock(&sn_hwperf_init_mutex); + return e; +} + +int sn_topology_open(struct inode *inode, struct file *file) +{ + int e; + struct seq_file *seq; + struct sn_hwperf_object_info *objbuf; + int nobj; + + if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) { + e = seq_open(file, &sn_topology_seq_ops); + seq = file->private_data; + seq->private = objbuf; + } + + return e; +} + +int sn_topology_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + + vfree(seq->private); + return seq_release(inode, file); +} + +int sn_hwperf_get_nearest_node(cnodeid_t node, + cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + int nobj; + struct sn_hwperf_object_info *objbuf; + + if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) { + e = sn_hwperf_get_nearest_node_objdata(objbuf, nobj, + node, near_mem_node, near_cpu_node); + vfree(objbuf); + } + + return e; +} + +static int sn_hwperf_misc_register_init(void) +{ + int e; + + if (!ia64_platform_is("sn2")) + return 0; + + sn_hwperf_init(); + + /* + * Register a dynamic misc device for hwperf ioctls. Platforms + * supporting hotplug will create /dev/sn_hwperf, else user + * can to look up the minor number in /proc/misc. + */ + if ((e = misc_register(&sn_hwperf_dev)) != 0) { + printk(KERN_ERR "sn_hwperf_misc_register_init: failed to " + "register misc device for \"%s\"\n", sn_hwperf_dev.name); + } + + return e; +} + +device_initcall(sn_hwperf_misc_register_init); /* after misc_init() */ +EXPORT_SYMBOL(sn_hwperf_get_nearest_node); diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c new file mode 100644 index 000000000..c2a4d8429 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -0,0 +1,69 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#ifdef CONFIG_PROC_FS +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> +#include <asm/sn/sn_sal.h> + +static int partition_id_show(struct seq_file *s, void *p) +{ + seq_printf(s, "%d\n", sn_partition_id); + return 0; +} + +static int system_serial_number_show(struct seq_file *s, void *p) +{ + seq_printf(s, "%s\n", sn_system_serial_number()); + return 0; +} + +static int licenseID_show(struct seq_file *s, void *p) +{ + seq_printf(s, "0x%llx\n", sn_partition_serial_number_val()); + return 0; +} + +static int coherence_id_show(struct seq_file *s, void *p) +{ + seq_printf(s, "%d\n", partition_coherence_id()); + + return 0; +} + +/* /proc/sgi_sn/sn_topology uses seq_file, see sn_hwperf.c */ +extern int sn_topology_open(struct inode *, struct file *); +extern int sn_topology_release(struct inode *, struct file *); + +static const struct file_operations proc_sn_topo_fops = { + .open = sn_topology_open, + .read = seq_read, + .llseek = seq_lseek, + .release = sn_topology_release, +}; + +void register_sn_procfs(void) +{ + static struct proc_dir_entry *sgi_proc_dir = NULL; + + BUG_ON(sgi_proc_dir != NULL); + if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL))) + return; + + proc_create_single("partition_id", 0444, sgi_proc_dir, + partition_id_show); + proc_create_single("system_serial_number", 0444, sgi_proc_dir, + system_serial_number_show); + proc_create_single("licenseID", 0444, sgi_proc_dir, licenseID_show); + proc_create_single("coherence_id", 0444, sgi_proc_dir, + coherence_id_show); + proc_create("sn_topology", 0444, sgi_proc_dir, &proc_sn_topo_fops); +} + +#endif /* CONFIG_PROC_FS */ diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c new file mode 100644 index 000000000..3009d9d86 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/timer.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/arch/ia64/sn/kernel/sn2/timer.c + * + * Copyright (C) 2003 Silicon Graphics, Inc. + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger <davidm@hpl.hp.com>: updated for new timer-interpolation infrastructure + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <linux/interrupt.h> +#include <linux/clocksource.h> + +#include <asm/hw_irq.h> +#include <asm/timex.h> + +#include <asm/sn/leds.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/clksupport.h> + +extern unsigned long sn_rtc_cycles_per_second; + +static u64 read_sn2(struct clocksource *cs) +{ + return (u64)readq(RTC_COUNTER_ADDR); +} + +static struct clocksource clocksource_sn2 = { + .name = "sn2_rtc", + .rating = 450, + .read = read_sn2, + .mask = (1LL << 55) - 1, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +/* + * sn udelay uses the RTC instead of the ITC because the ITC is not + * synchronized across all CPUs, and the thread may migrate to another CPU + * if preemption is enabled. + */ +static void +ia64_sn_udelay (unsigned long usecs) +{ + unsigned long start = rtc_time(); + unsigned long end = start + + usecs * sn_rtc_cycles_per_second / 1000000; + + while (time_before((unsigned long)rtc_time(), end)) + cpu_relax(); +} + +void __init sn_timer_init(void) +{ + clocksource_sn2.archdata.fsys_mmio = RTC_COUNTER_ADDR; + clocksource_register_hz(&clocksource_sn2, sn_rtc_cycles_per_second); + + ia64_udelay = &ia64_sn_udelay; +} diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c new file mode 100644 index 000000000..103d6ea8e --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c @@ -0,0 +1,60 @@ +/* + * + * + * Copyright (c) 2005, 2006 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/NoticeExplan + */ + +#include <linux/interrupt.h> +#include <asm/sn/pda.h> +#include <asm/sn/leds.h> + +extern void sn_lb_int_war_check(void); +extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs); + +#define SN_LB_INT_WAR_INTERVAL 100 + +void sn_timer_interrupt(int irq, void *dev_id) +{ + /* LED blinking */ + if (!pda->hb_count--) { + pda->hb_count = HZ / 2; + set_led_bits(pda->hb_state ^= + LED_CPU_HEARTBEAT, LED_CPU_HEARTBEAT); + } + + if (is_shub1()) { + if (enable_shub_wars_1_1()) { + /* Bugfix code for SHUB 1.1 */ + if (pda->pio_shub_war_cam_addr) + *pda->pio_shub_war_cam_addr = 0x8000000000000010UL; + } + if (pda->sn_lb_int_war_ticks == 0) + sn_lb_int_war_check(); + pda->sn_lb_int_war_ticks++; + if (pda->sn_lb_int_war_ticks >= SN_LB_INT_WAR_INTERVAL) + pda->sn_lb_int_war_ticks = 0; + } +} diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c new file mode 100644 index 000000000..32d0380eb --- /dev/null +++ b/arch/ia64/sn/kernel/tiocx.c @@ -0,0 +1,569 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/proc_fs.h> +#include <linux/capability.h> +#include <linux/device.h> +#include <linux/delay.h> +#include <linux/uaccess.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> +#include <asm/sn/io.h> +#include <asm/sn/types.h> +#include <asm/sn/shubio.h> +#include <asm/sn/tiocx.h> +#include <asm/sn/l1.h> +#include <asm/sn/module.h> +#include "tio.h" +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" + +#define CX_DEV_NONE 0 +#define DEVICE_NAME "tiocx" +#define WIDGET_ID 0 +#define TIOCX_DEBUG 0 + +#if TIOCX_DEBUG +#define DBG(fmt...) printk(KERN_ALERT fmt) +#else +#define DBG(fmt...) +#endif + +struct device_attribute dev_attr_cxdev_control; + +/** + * tiocx_match - Try to match driver id list with device. + * @dev: device pointer + * @drv: driver pointer + * + * Returns 1 if match, 0 otherwise. + */ +static int tiocx_match(struct device *dev, struct device_driver *drv) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + struct cx_drv *cx_drv = to_cx_driver(drv); + const struct cx_device_id *ids = cx_drv->id_table; + + if (!ids) + return 0; + + while (ids->part_num) { + if (ids->part_num == cx_dev->cx_id.part_num) + return 1; + ids++; + } + return 0; + +} + +static int tiocx_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return -ENODEV; +} + +static void tiocx_bus_release(struct device *dev) +{ + kfree(to_cx_dev(dev)); +} + +/** + * cx_device_match - Find cx_device in the id table. + * @ids: id table from driver + * @cx_device: part/mfg id for the device + * + */ +static const struct cx_device_id *cx_device_match(const struct cx_device_id + *ids, + struct cx_dev *cx_device) +{ + /* + * NOTES: We may want to check for CX_ANY_ID too. + * Do we want to match against nasid too? + * CX_DEV_NONE == 0, if the driver tries to register for + * part/mfg == 0 we should return no-match (NULL) here. + */ + while (ids->part_num && ids->mfg_num) { + if (ids->part_num == cx_device->cx_id.part_num && + ids->mfg_num == cx_device->cx_id.mfg_num) + return ids; + ids++; + } + + return NULL; +} + +/** + * cx_device_probe - Look for matching device. + * Call driver probe routine if found. + * @cx_driver: driver table (cx_drv struct) from driver + * @cx_device: part/mfg id for the device + */ +static int cx_device_probe(struct device *dev) +{ + const struct cx_device_id *id; + struct cx_drv *cx_drv = to_cx_driver(dev->driver); + struct cx_dev *cx_dev = to_cx_dev(dev); + int error = 0; + + if (!cx_dev->driver && cx_drv->probe) { + id = cx_device_match(cx_drv->id_table, cx_dev); + if (id) { + if ((error = cx_drv->probe(cx_dev, id)) < 0) + return error; + else + cx_dev->driver = cx_drv; + } + } + + return error; +} + +/** + * cx_driver_remove - Remove driver from device struct. + * @dev: device + */ +static int cx_driver_remove(struct device *dev) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + struct cx_drv *cx_drv = cx_dev->driver; + if (cx_drv->remove) + cx_drv->remove(cx_dev); + cx_dev->driver = NULL; + return 0; +} + +struct bus_type tiocx_bus_type = { + .name = "tiocx", + .match = tiocx_match, + .uevent = tiocx_uevent, + .probe = cx_device_probe, + .remove = cx_driver_remove, +}; + +/** + * cx_driver_register - Register the driver. + * @cx_driver: driver table (cx_drv struct) from driver + * + * Called from the driver init routine to register a driver. + * The cx_drv struct contains the driver name, a pointer to + * a table of part/mfg numbers and a pointer to the driver's + * probe/attach routine. + */ +int cx_driver_register(struct cx_drv *cx_driver) +{ + cx_driver->driver.name = cx_driver->name; + cx_driver->driver.bus = &tiocx_bus_type; + + return driver_register(&cx_driver->driver); +} + +/** + * cx_driver_unregister - Unregister the driver. + * @cx_driver: driver table (cx_drv struct) from driver + */ +int cx_driver_unregister(struct cx_drv *cx_driver) +{ + driver_unregister(&cx_driver->driver); + return 0; +} + +/** + * cx_device_register - Register a device. + * @nasid: device's nasid + * @part_num: device's part number + * @mfg_num: device's manufacturer number + * @hubdev: hub info associated with this device + * @bt: board type of the device + * + */ +int +cx_device_register(nasid_t nasid, int part_num, int mfg_num, + struct hubdev_info *hubdev, int bt) +{ + struct cx_dev *cx_dev; + int r; + + cx_dev = kzalloc(sizeof(struct cx_dev), GFP_KERNEL); + DBG("cx_dev= 0x%p\n", cx_dev); + if (cx_dev == NULL) + return -ENOMEM; + + cx_dev->cx_id.part_num = part_num; + cx_dev->cx_id.mfg_num = mfg_num; + cx_dev->cx_id.nasid = nasid; + cx_dev->hubdev = hubdev; + cx_dev->bt = bt; + + cx_dev->dev.parent = NULL; + cx_dev->dev.bus = &tiocx_bus_type; + cx_dev->dev.release = tiocx_bus_release; + dev_set_name(&cx_dev->dev, "%d", cx_dev->cx_id.nasid); + r = device_register(&cx_dev->dev); + if (r) { + kfree(cx_dev); + return r; + } + get_device(&cx_dev->dev); + + device_create_file(&cx_dev->dev, &dev_attr_cxdev_control); + + return 0; +} + +/** + * cx_device_unregister - Unregister a device. + * @cx_dev: part/mfg id for the device + */ +int cx_device_unregister(struct cx_dev *cx_dev) +{ + put_device(&cx_dev->dev); + device_unregister(&cx_dev->dev); + return 0; +} + +/** + * cx_device_reload - Reload the device. + * @nasid: device's nasid + * @part_num: device's part number + * @mfg_num: device's manufacturer number + * + * Remove the device associated with 'nasid' from device list and then + * call device-register with the given part/mfg numbers. + */ +static int cx_device_reload(struct cx_dev *cx_dev) +{ + cx_device_unregister(cx_dev); + return cx_device_register(cx_dev->cx_id.nasid, cx_dev->cx_id.part_num, + cx_dev->cx_id.mfg_num, cx_dev->hubdev, + cx_dev->bt); +} + +static inline u64 tiocx_intr_alloc(nasid_t nasid, int widget, + u64 sn_irq_info, + int req_irq, nasid_t req_nasid, + int req_slice) +{ + struct ia64_sal_retval rv; + rv.status = 0; + rv.v0 = 0; + + ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT, + SAL_INTR_ALLOC, nasid, + widget, sn_irq_info, req_irq, + req_nasid, req_slice); + return rv.status; +} + +static inline void tiocx_intr_free(nasid_t nasid, int widget, + struct sn_irq_info *sn_irq_info) +{ + struct ia64_sal_retval rv; + rv.status = 0; + rv.v0 = 0; + + ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT, + SAL_INTR_FREE, nasid, + widget, sn_irq_info->irq_irq, + sn_irq_info->irq_cookie, 0, 0); +} + +struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq, + nasid_t req_nasid, int slice) +{ + struct sn_irq_info *sn_irq_info; + int status; + int sn_irq_size = sizeof(struct sn_irq_info); + + if ((nasid & 1) == 0) + return NULL; + + sn_irq_info = kzalloc(sn_irq_size, GFP_KERNEL); + if (sn_irq_info == NULL) + return NULL; + + status = tiocx_intr_alloc(nasid, widget, __pa(sn_irq_info), irq, + req_nasid, slice); + if (status) { + kfree(sn_irq_info); + return NULL; + } else { + return sn_irq_info; + } +} + +void tiocx_irq_free(struct sn_irq_info *sn_irq_info) +{ + u64 bridge = (u64) sn_irq_info->irq_bridge; + nasid_t nasid = NASID_GET(bridge); + int widget; + + if (nasid & 1) { + widget = TIO_SWIN_WIDGETNUM(bridge); + tiocx_intr_free(nasid, widget, sn_irq_info); + kfree(sn_irq_info); + } +} + +u64 tiocx_dma_addr(u64 addr) +{ + return PHYS_TO_TIODMA(addr); +} + +u64 tiocx_swin_base(int nasid) +{ + return TIO_SWIN_BASE(nasid, TIOCX_CORELET); +} + +EXPORT_SYMBOL(cx_driver_register); +EXPORT_SYMBOL(cx_driver_unregister); +EXPORT_SYMBOL(cx_device_register); +EXPORT_SYMBOL(cx_device_unregister); +EXPORT_SYMBOL(tiocx_irq_alloc); +EXPORT_SYMBOL(tiocx_irq_free); +EXPORT_SYMBOL(tiocx_bus_type); +EXPORT_SYMBOL(tiocx_dma_addr); +EXPORT_SYMBOL(tiocx_swin_base); + +static void tio_conveyor_set(nasid_t nasid, int enable_flag) +{ + u64 ice_frz; + u64 disable_cb = (1ull << 61); + + if (!(nasid & 1)) + return; + + ice_frz = REMOTE_HUB_L(nasid, TIO_ICE_FRZ_CFG); + if (enable_flag) { + if (!(ice_frz & disable_cb)) /* already enabled */ + return; + ice_frz &= ~disable_cb; + } else { + if (ice_frz & disable_cb) /* already disabled */ + return; + ice_frz |= disable_cb; + } + DBG(KERN_ALERT "TIO_ICE_FRZ_CFG= 0x%lx\n", ice_frz); + REMOTE_HUB_S(nasid, TIO_ICE_FRZ_CFG, ice_frz); +} + +#define tio_conveyor_enable(nasid) tio_conveyor_set(nasid, 1) +#define tio_conveyor_disable(nasid) tio_conveyor_set(nasid, 0) + +static void tio_corelet_reset(nasid_t nasid, int corelet) +{ + if (!(nasid & 1)) + return; + + REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 1 << corelet); + udelay(2000); + REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 0); + udelay(2000); +} + +static int is_fpga_tio(int nasid, int *bt) +{ + u16 uninitialized_var(ioboard_type); /* GCC be quiet */ + long rc; + + rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard_type); + if (rc) { + printk(KERN_WARNING "ia64_sn_sysctl_ioboard_get failed: %ld\n", + rc); + return 0; + } + + switch (ioboard_type) { + case L1_BRICKTYPE_SA: + case L1_BRICKTYPE_ATHENA: + case L1_BOARDTYPE_DAYTONA: + *bt = ioboard_type; + return 1; + } + + return 0; +} + +static int bitstream_loaded(nasid_t nasid) +{ + u64 cx_credits; + + cx_credits = REMOTE_HUB_L(nasid, TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3); + cx_credits &= TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK; + DBG("cx_credits= 0x%lx\n", cx_credits); + + return (cx_credits == 0xf) ? 1 : 0; +} + +static int tiocx_reload(struct cx_dev *cx_dev) +{ + int part_num = CX_DEV_NONE; + int mfg_num = CX_DEV_NONE; + nasid_t nasid = cx_dev->cx_id.nasid; + + if (bitstream_loaded(nasid)) { + u64 cx_id; + int rv; + + rv = ia64_sn_sysctl_tio_clock_reset(nasid); + if (rv) { + printk(KERN_ALERT "CX port JTAG reset failed.\n"); + } else { + cx_id = *(volatile u64 *) + (TIO_SWIN_BASE(nasid, TIOCX_CORELET) + + WIDGET_ID); + part_num = XWIDGET_PART_NUM(cx_id); + mfg_num = XWIDGET_MFG_NUM(cx_id); + DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num); + /* just ignore it if it's a CE */ + if (part_num == TIO_CE_ASIC_PARTNUM) + return 0; + } + } + + cx_dev->cx_id.part_num = part_num; + cx_dev->cx_id.mfg_num = mfg_num; + + /* + * Delete old device and register the new one. It's ok if + * part_num/mfg_num == CX_DEV_NONE. We want to register + * devices in the table even if a bitstream isn't loaded. + * That allows use to see that a bitstream isn't loaded via + * TIOCX_IOCTL_DEV_LIST. + */ + return cx_device_reload(cx_dev); +} + +static ssize_t show_cxdev_control(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + + return sprintf(buf, "0x%x 0x%x 0x%x 0x%x\n", + cx_dev->cx_id.nasid, + cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num, + cx_dev->bt); +} + +static ssize_t store_cxdev_control(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) +{ + int n; + struct cx_dev *cx_dev = to_cx_dev(dev); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (count <= 0) + return 0; + + n = simple_strtoul(buf, NULL, 0); + + switch (n) { + case 1: + tio_corelet_reset(cx_dev->cx_id.nasid, TIOCX_CORELET); + tiocx_reload(cx_dev); + break; + case 2: + tiocx_reload(cx_dev); + break; + case 3: + tio_corelet_reset(cx_dev->cx_id.nasid, TIOCX_CORELET); + break; + default: + break; + } + + return count; +} + +DEVICE_ATTR(cxdev_control, 0644, show_cxdev_control, store_cxdev_control); + +static int __init tiocx_init(void) +{ + cnodeid_t cnodeid; + int found_tiocx_device = 0; + int err; + + if (!ia64_platform_is("sn2")) + return 0; + + err = bus_register(&tiocx_bus_type); + if (err) + return err; + + for (cnodeid = 0; cnodeid < num_cnodes; cnodeid++) { + nasid_t nasid; + int bt; + + nasid = cnodeid_to_nasid(cnodeid); + + if ((nasid & 0x1) && is_fpga_tio(nasid, &bt)) { + struct hubdev_info *hubdev; + struct xwidget_info *widgetp; + + DBG("Found TIO at nasid 0x%x\n", nasid); + + hubdev = + (struct hubdev_info *)(NODEPDA(cnodeid)->pdinfo); + + widgetp = &hubdev->hdi_xwidget_info[TIOCX_CORELET]; + + /* The CE hangs off of the CX port but is not an FPGA */ + if (widgetp->xwi_hwid.part_num == TIO_CE_ASIC_PARTNUM) + continue; + + tio_corelet_reset(nasid, TIOCX_CORELET); + tio_conveyor_enable(nasid); + + if (cx_device_register + (nasid, widgetp->xwi_hwid.part_num, + widgetp->xwi_hwid.mfg_num, hubdev, bt) < 0) + return -ENXIO; + else + found_tiocx_device++; + } + } + + /* It's ok if we find zero devices. */ + DBG("found_tiocx_device= %d\n", found_tiocx_device); + + return 0; +} + +static int cx_remove_device(struct device * dev, void * data) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + device_remove_file(dev, &dev_attr_cxdev_control); + cx_device_unregister(cx_dev); + return 0; +} + +static void __exit tiocx_exit(void) +{ + DBG("tiocx_exit\n"); + + /* + * Unregister devices. + */ + bus_for_each_dev(&tiocx_bus_type, NULL, NULL, cx_remove_device); + bus_unregister(&tiocx_bus_type); +} + +fs_initcall(tiocx_init); +module_exit(tiocx_exit); + +/************************************************************************ + * Module licensing and description + ************************************************************************/ +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Bruce Losure <blosure@sgi.com>"); +MODULE_DESCRIPTION("TIOCX module"); +MODULE_SUPPORTED_DEVICE(DEVICE_NAME); diff --git a/arch/ia64/sn/pci/Makefile b/arch/ia64/sn/pci/Makefile new file mode 100644 index 000000000..df2a90145 --- /dev/null +++ b/arch/ia64/sn/pci/Makefile @@ -0,0 +1,12 @@ +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. +# +# Makefile for the sn pci general routines. + +ccflags-y := -Iarch/ia64/sn/include + +obj-y := pci_dma.o tioca_provider.o tioce_provider.o pcibr/ diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c new file mode 100644 index 000000000..74c934a99 --- /dev/null +++ b/arch/ia64/sn/pci/pci_dma.c @@ -0,0 +1,481 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000,2002-2005 Silicon Graphics, Inc. All rights reserved. + * + * Routines for PCI DMA mapping. See Documentation/DMA-API.txt for + * a description of how these routines should be used. + */ + +#include <linux/gfp.h> +#include <linux/module.h> +#include <linux/dma-mapping.h> +#include <asm/dma.h> +#include <asm/sn/intr.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/sn_sal.h> + +#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) +#define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG)) + +/** + * sn_dma_supported - test a DMA mask + * @dev: device to test + * @mask: DMA mask to test + * + * Return whether the given PCI device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during PCI bus mastering, then you would pass 0x00ffffff as the mask to + * this function. Of course, SN only supports devices that have 32 or more + * address bits when using the PMU. + */ +static int sn_dma_supported(struct device *dev, u64 mask) +{ + BUG_ON(!dev_is_pci(dev)); + + if (mask < 0x7fffffff) + return 0; + return 1; +} + +/** + * sn_dma_set_mask - set the DMA mask + * @dev: device to set + * @dma_mask: new mask + * + * Set @dev's DMA mask if the hw supports it. + */ +int sn_dma_set_mask(struct device *dev, u64 dma_mask) +{ + BUG_ON(!dev_is_pci(dev)); + + if (!sn_dma_supported(dev, dma_mask)) + return 0; + + *dev->dma_mask = dma_mask; + return 1; +} +EXPORT_SYMBOL(sn_dma_set_mask); + +/** + * sn_dma_alloc_coherent - allocate memory for coherent DMA + * @dev: device to allocate for + * @size: size of the region + * @dma_handle: DMA (bus) address + * @flags: memory allocation flags + * + * dma_alloc_coherent() returns a pointer to a memory region suitable for + * coherent DMA traffic to/from a PCI device. On SN platforms, this means + * that @dma_handle will have the %PCIIO_DMA_CMD flag set. + * + * This interface is usually used for "command" streams (e.g. the command + * queue for a SCSI controller). See Documentation/DMA-API.txt for + * more information. + */ +static void *sn_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, gfp_t flags, + unsigned long attrs) +{ + void *cpuaddr; + unsigned long phys_addr; + int node; + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + + BUG_ON(!dev_is_pci(dev)); + + /* + * Allocate the memory. + */ + node = pcibus_to_node(pdev->bus); + if (likely(node >=0)) { + struct page *p = __alloc_pages_node(node, + flags, get_order(size)); + + if (likely(p)) + cpuaddr = page_address(p); + else + return NULL; + } else + cpuaddr = (void *)__get_free_pages(flags, get_order(size)); + + if (unlikely(!cpuaddr)) + return NULL; + + memset(cpuaddr, 0x0, size); + + /* physical addr. of the memory we just got */ + phys_addr = __pa(cpuaddr); + + /* + * 64 bit address translations should never fail. + * 32 bit translations can fail if there are insufficient mapping + * resources. + */ + + *dma_handle = provider->dma_map_consistent(pdev, phys_addr, size, + SN_DMA_ADDR_PHYS); + if (!*dma_handle) { + printk(KERN_ERR "%s: out of ATEs\n", __func__); + free_pages((unsigned long)cpuaddr, get_order(size)); + return NULL; + } + + return cpuaddr; +} + +/** + * sn_pci_free_coherent - free memory associated with coherent DMAable region + * @dev: device to free for + * @size: size to free + * @cpu_addr: kernel virtual address to free + * @dma_handle: DMA address associated with this region + * + * Frees the memory allocated by dma_alloc_coherent(), potentially unmapping + * any associated IOMMU mappings. + */ +static void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, unsigned long attrs) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + + BUG_ON(!dev_is_pci(dev)); + + provider->dma_unmap(pdev, dma_handle, 0); + free_pages((unsigned long)cpu_addr, get_order(size)); +} + +/** + * sn_dma_map_single_attrs - map a single page for DMA + * @dev: device to map for + * @cpu_addr: kernel virtual address of the region to map + * @size: size of the region + * @direction: DMA direction + * @attrs: optional dma attributes + * + * Map the region pointed to by @cpu_addr for DMA and return the + * DMA address. + * + * We map this to the one step pcibr_dmamap_trans interface rather than + * the two step pcibr_dmamap_alloc/pcibr_dmamap_addr because we have + * no way of saving the dmamap handle from the alloc to later free + * (which is pretty much unacceptable). + * + * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with + * dma_map_consistent() so that writes force a flush of pending DMA. + * (See "SGI Altix Architecture Considerations for Linux Device Drivers", + * Document Number: 007-4763-001) + * + * TODO: simplify our interface; + * figure out how to save dmamap handle so can use two step. + */ +static dma_addr_t sn_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + void *cpu_addr = page_address(page) + offset; + dma_addr_t dma_addr; + unsigned long phys_addr; + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + + BUG_ON(!dev_is_pci(dev)); + + phys_addr = __pa(cpu_addr); + if (attrs & DMA_ATTR_WRITE_BARRIER) + dma_addr = provider->dma_map_consistent(pdev, phys_addr, + size, SN_DMA_ADDR_PHYS); + else + dma_addr = provider->dma_map(pdev, phys_addr, size, + SN_DMA_ADDR_PHYS); + + if (!dma_addr) { + printk(KERN_ERR "%s: out of ATEs\n", __func__); + return 0; + } + return dma_addr; +} + +/** + * sn_dma_unmap_single_attrs - unamp a DMA mapped page + * @dev: device to sync + * @dma_addr: DMA address to sync + * @size: size of region + * @direction: DMA direction + * @attrs: optional dma attributes + * + * This routine is supposed to sync the DMA region specified + * by @dma_handle into the coherence domain. On SN, we're always cache + * coherent, so we just need to free any ATEs associated with this mapping. + */ +static void sn_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + + BUG_ON(!dev_is_pci(dev)); + + provider->dma_unmap(pdev, dma_addr, dir); +} + +/** + * sn_dma_unmap_sg - unmap a DMA scatterlist + * @dev: device to unmap + * @sg: scatterlist to unmap + * @nhwentries: number of scatterlist entries + * @direction: DMA direction + * @attrs: optional dma attributes + * + * Unmap a set of streaming mode DMA translations. + */ +static void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nhwentries, enum dma_data_direction dir, + unsigned long attrs) +{ + int i; + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + struct scatterlist *sg; + + BUG_ON(!dev_is_pci(dev)); + + for_each_sg(sgl, sg, nhwentries, i) { + provider->dma_unmap(pdev, sg->dma_address, dir); + sg->dma_address = (dma_addr_t) NULL; + sg->dma_length = 0; + } +} + +/** + * sn_dma_map_sg - map a scatterlist for DMA + * @dev: device to map for + * @sg: scatterlist to map + * @nhwentries: number of entries + * @direction: direction of the DMA transaction + * @attrs: optional dma attributes + * + * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with + * dma_map_consistent() so that writes force a flush of pending DMA. + * (See "SGI Altix Architecture Considerations for Linux Device Drivers", + * Document Number: 007-4763-001) + * + * Maps each entry of @sg for DMA. + */ +static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, + int nhwentries, enum dma_data_direction dir, + unsigned long attrs) +{ + unsigned long phys_addr; + struct scatterlist *saved_sg = sgl, *sg; + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + int i; + + BUG_ON(!dev_is_pci(dev)); + + /* + * Setup a DMA address for each entry in the scatterlist. + */ + for_each_sg(sgl, sg, nhwentries, i) { + dma_addr_t dma_addr; + phys_addr = SG_ENT_PHYS_ADDRESS(sg); + if (attrs & DMA_ATTR_WRITE_BARRIER) + dma_addr = provider->dma_map_consistent(pdev, + phys_addr, + sg->length, + SN_DMA_ADDR_PHYS); + else + dma_addr = provider->dma_map(pdev, phys_addr, + sg->length, + SN_DMA_ADDR_PHYS); + + sg->dma_address = dma_addr; + if (!sg->dma_address) { + printk(KERN_ERR "%s: out of ATEs\n", __func__); + + /* + * Free any successfully allocated entries. + */ + if (i > 0) + sn_dma_unmap_sg(dev, saved_sg, i, dir, attrs); + return 0; + } + + sg->dma_length = sg->length; + } + + return nhwentries; +} + +static void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir) +{ + BUG_ON(!dev_is_pci(dev)); +} + +static void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, + enum dma_data_direction dir) +{ + BUG_ON(!dev_is_pci(dev)); +} + +static void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + BUG_ON(!dev_is_pci(dev)); +} + +static void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + BUG_ON(!dev_is_pci(dev)); +} + +static int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return 0; +} + +u64 sn_dma_get_required_mask(struct device *dev) +{ + return DMA_BIT_MASK(64); +} +EXPORT_SYMBOL_GPL(sn_dma_get_required_mask); + +char *sn_pci_get_legacy_mem(struct pci_bus *bus) +{ + if (!SN_PCIBUS_BUSSOFT(bus)) + return ERR_PTR(-ENODEV); + + return (char *)(SN_PCIBUS_BUSSOFT(bus)->bs_legacy_mem | __IA64_UNCACHED_OFFSET); +} + +int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size) +{ + unsigned long addr; + int ret; + struct ia64_sal_retval isrv; + + /* + * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work + * around hw issues at the pci bus level. SGI proms older than + * 4.10 don't implement this. + */ + + SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE, + pci_domain_nr(bus), bus->number, + 0, /* io */ + 0, /* read */ + port, size, __pa(val)); + + if (isrv.status == 0) + return size; + + /* + * If the above failed, retry using the SAL_PROBE call which should + * be present in all proms (but which cannot work round PCI chipset + * bugs). This code is retained for compatibility with old + * pre-4.10 proms, and should be removed at some point in the future. + */ + + if (!SN_PCIBUS_BUSSOFT(bus)) + return -ENODEV; + + addr = SN_PCIBUS_BUSSOFT(bus)->bs_legacy_io | __IA64_UNCACHED_OFFSET; + addr += port; + + ret = ia64_sn_probe_mem(addr, (long)size, (void *)val); + + if (ret == 2) + return -EINVAL; + + if (ret == 1) + *val = -1; + + return size; +} + +int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) +{ + int ret = size; + unsigned long paddr; + unsigned long *addr; + struct ia64_sal_retval isrv; + + /* + * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work + * around hw issues at the pci bus level. SGI proms older than + * 4.10 don't implement this. + */ + + SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE, + pci_domain_nr(bus), bus->number, + 0, /* io */ + 1, /* write */ + port, size, __pa(&val)); + + if (isrv.status == 0) + return size; + + /* + * If the above failed, retry using the SAL_PROBE call which should + * be present in all proms (but which cannot work round PCI chipset + * bugs). This code is retained for compatibility with old + * pre-4.10 proms, and should be removed at some point in the future. + */ + + if (!SN_PCIBUS_BUSSOFT(bus)) { + ret = -ENODEV; + goto out; + } + + /* Put the phys addr in uncached space */ + paddr = SN_PCIBUS_BUSSOFT(bus)->bs_legacy_io | __IA64_UNCACHED_OFFSET; + paddr += port; + addr = (unsigned long *)paddr; + + switch (size) { + case 1: + *(volatile u8 *)(addr) = (u8)(val); + break; + case 2: + *(volatile u16 *)(addr) = (u16)(val); + break; + case 4: + *(volatile u32 *)(addr) = (u32)(val); + break; + default: + ret = -EINVAL; + break; + } + out: + return ret; +} + +static struct dma_map_ops sn_dma_ops = { + .alloc = sn_dma_alloc_coherent, + .free = sn_dma_free_coherent, + .map_page = sn_dma_map_page, + .unmap_page = sn_dma_unmap_page, + .map_sg = sn_dma_map_sg, + .unmap_sg = sn_dma_unmap_sg, + .sync_single_for_cpu = sn_dma_sync_single_for_cpu, + .sync_sg_for_cpu = sn_dma_sync_sg_for_cpu, + .sync_single_for_device = sn_dma_sync_single_for_device, + .sync_sg_for_device = sn_dma_sync_sg_for_device, + .mapping_error = sn_dma_mapping_error, + .dma_supported = sn_dma_supported, +}; + +void sn_dma_init(void) +{ + dma_ops = &sn_dma_ops; +} diff --git a/arch/ia64/sn/pci/pcibr/Makefile b/arch/ia64/sn/pci/pcibr/Makefile new file mode 100644 index 000000000..396bcae36 --- /dev/null +++ b/arch/ia64/sn/pci/pcibr/Makefile @@ -0,0 +1,13 @@ +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2002-2004 Silicon Graphics, Inc. All Rights Reserved. +# +# Makefile for the sn2 io routines. + +ccflags-y := -Iarch/ia64/sn/include + +obj-y += pcibr_dma.o pcibr_reg.o \ + pcibr_ate.o pcibr_provider.o diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c new file mode 100644 index 000000000..b67bb4cb7 --- /dev/null +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -0,0 +1,177 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> + +int pcibr_invalidate_ate; /* by default don't invalidate ATE on free */ + +/* + * mark_ate: Mark the ate as either free or inuse. + */ +static void mark_ate(struct ate_resource *ate_resource, int start, int number, + u64 value) +{ + u64 *ate = ate_resource->ate; + int index; + int length = 0; + + for (index = start; length < number; index++, length++) + ate[index] = value; +} + +/* + * find_free_ate: Find the first free ate index starting from the given + * index for the desired consecutive count. + */ +static int find_free_ate(struct ate_resource *ate_resource, int start, + int count) +{ + u64 *ate = ate_resource->ate; + int index; + int start_free; + + for (index = start; index < ate_resource->num_ate;) { + if (!ate[index]) { + int i; + int free; + free = 0; + start_free = index; /* Found start free ate */ + for (i = start_free; i < ate_resource->num_ate; i++) { + if (!ate[i]) { /* This is free */ + if (++free == count) + return start_free; + } else { + index = i + 1; + break; + } + } + if (i >= ate_resource->num_ate) + return -1; + } else + index++; /* Try next ate */ + } + + return -1; +} + +/* + * free_ate_resource: Free the requested number of ATEs. + */ +static inline void free_ate_resource(struct ate_resource *ate_resource, + int start) +{ + mark_ate(ate_resource, start, ate_resource->ate[start], 0); + if ((ate_resource->lowest_free_index > start) || + (ate_resource->lowest_free_index < 0)) + ate_resource->lowest_free_index = start; +} + +/* + * alloc_ate_resource: Allocate the requested number of ATEs. + */ +static inline int alloc_ate_resource(struct ate_resource *ate_resource, + int ate_needed) +{ + int start_index; + + /* + * Check for ate exhaustion. + */ + if (ate_resource->lowest_free_index < 0) + return -1; + + /* + * Find the required number of free consecutive ates. + */ + start_index = + find_free_ate(ate_resource, ate_resource->lowest_free_index, + ate_needed); + if (start_index >= 0) + mark_ate(ate_resource, start_index, ate_needed, ate_needed); + + ate_resource->lowest_free_index = + find_free_ate(ate_resource, ate_resource->lowest_free_index, 1); + + return start_index; +} + +/* + * Allocate "count" contiguous Bridge Address Translation Entries + * on the specified bridge to be used for PCI to XTALK mappings. + * Indices in rm map range from 1..num_entries. Indices returned + * to caller range from 0..num_entries-1. + * + * Return the start index on success, -1 on failure. + */ +int pcibr_ate_alloc(struct pcibus_info *pcibus_info, int count) +{ + int status; + unsigned long flags; + + spin_lock_irqsave(&pcibus_info->pbi_lock, flags); + status = alloc_ate_resource(&pcibus_info->pbi_int_ate_resource, count); + spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags); + + return status; +} + +/* + * Setup an Address Translation Entry as specified. Use either the Bridge + * internal maps or the external map RAM, as appropriate. + */ +static inline u64 __iomem *pcibr_ate_addr(struct pcibus_info *pcibus_info, + int ate_index) +{ + if (ate_index < pcibus_info->pbi_int_ate_size) { + return pcireg_int_ate_addr(pcibus_info, ate_index); + } + panic("pcibr_ate_addr: invalid ate_index 0x%x", ate_index); +} + +/* + * Update the ate. + */ +inline void +ate_write(struct pcibus_info *pcibus_info, int ate_index, int count, + volatile u64 ate) +{ + while (count-- > 0) { + if (ate_index < pcibus_info->pbi_int_ate_size) { + pcireg_int_ate_set(pcibus_info, ate_index, ate); + } else { + panic("ate_write: invalid ate_index 0x%x", ate_index); + } + ate_index++; + ate += IOPGSIZE; + } + + pcireg_tflush_get(pcibus_info); /* wait until Bridge PIO complete */ +} + +void pcibr_ate_free(struct pcibus_info *pcibus_info, int index) +{ + + volatile u64 ate; + int count; + unsigned long flags; + + if (pcibr_invalidate_ate) { + /* For debugging purposes, clear the valid bit in the ATE */ + ate = *pcibr_ate_addr(pcibus_info, index); + count = pcibus_info->pbi_int_ate_resource.ate[index]; + ate_write(pcibus_info, index, count, (ate & ~PCI32_ATE_V)); + } + + spin_lock_irqsave(&pcibus_info->pbi_lock, flags); + free_ate_resource(&pcibus_info->pbi_int_ate_resource, index); + spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags); +} diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c new file mode 100644 index 000000000..1e863b277 --- /dev/null +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -0,0 +1,413 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/export.h> +#include <asm/sn/addrs.h> +#include <asm/sn/geo.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/pic.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/tiocp.h> +#include "tio.h" +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" + +extern int sn_ioif_inited; + +/* ===================================================================== + * DMA MANAGEMENT + * + * The Bridge ASIC provides three methods of doing DMA: via a "direct map" + * register available in 32-bit PCI space (which selects a contiguous 2G + * address space on some other widget), via "direct" addressing via 64-bit + * PCI space (all destination information comes from the PCI address, + * including transfer attributes), and via a "mapped" region that allows + * a bunch of different small mappings to be established with the PMU. + * + * For efficiency, we most prefer to use the 32bit direct mapping facility, + * since it requires no resource allocations. The advantage of using the + * PMU over the 64-bit direct is that single-cycle PCI addressing can be + * used; the advantage of using 64-bit direct over PMU addressing is that + * we do not have to allocate entries in the PMU. + */ + +static dma_addr_t +pcibr_dmamap_ate32(struct pcidev_info *info, + u64 paddr, size_t req_size, u64 flags, int dma_flags) +{ + + struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info; + struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info-> + pdi_pcibus_info; + u8 internal_device = (PCI_SLOT(pcidev_info->pdi_host_pcidev_info-> + pdi_linux_pcidev->devfn)) - 1; + int ate_count; + int ate_index; + u64 ate_flags = flags | PCI32_ATE_V; + u64 ate; + u64 pci_addr; + u64 xio_addr; + u64 offset; + + /* PIC in PCI-X mode does not supports 32bit PageMap mode */ + if (IS_PIC_SOFT(pcibus_info) && IS_PCIX(pcibus_info)) { + return 0; + } + + /* Calculate the number of ATEs needed. */ + if (!(MINIMAL_ATE_FLAG(paddr, req_size))) { + ate_count = IOPG((IOPGSIZE - 1) /* worst case start offset */ + +req_size /* max mapping bytes */ + - 1) + 1; /* round UP */ + } else { /* assume requested target is page aligned */ + ate_count = IOPG(req_size /* max mapping bytes */ + - 1) + 1; /* round UP */ + } + + /* Get the number of ATEs required. */ + ate_index = pcibr_ate_alloc(pcibus_info, ate_count); + if (ate_index < 0) + return 0; + + /* In PCI-X mode, Prefetch not supported */ + if (IS_PCIX(pcibus_info)) + ate_flags &= ~(PCI32_ATE_PREF); + + if (SN_DMA_ADDRTYPE(dma_flags == SN_DMA_ADDR_PHYS)) + xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr); + else + xio_addr = paddr; + + offset = IOPGOFF(xio_addr); + ate = ate_flags | (xio_addr - offset); + + /* If PIC, put the targetid in the ATE */ + if (IS_PIC_SOFT(pcibus_info)) { + ate |= (pcibus_info->pbi_hub_xid << PIC_ATE_TARGETID_SHFT); + } + + /* + * If we're mapping for MSI, set the MSI bit in the ATE. If it's a + * TIOCP based pci bus, we also need to set the PIO bit in the ATE. + */ + if (dma_flags & SN_DMA_MSI) { + ate |= PCI32_ATE_MSI; + if (IS_TIOCP_SOFT(pcibus_info)) + ate |= PCI32_ATE_PIO; + } + + ate_write(pcibus_info, ate_index, ate_count, ate); + + /* + * Set up the DMA mapped Address. + */ + pci_addr = PCI32_MAPPED_BASE + offset + IOPGSIZE * ate_index; + + /* + * If swap was set in device in pcibr_endian_set() + * we need to turn swapping on. + */ + if (pcibus_info->pbi_devreg[internal_device] & PCIBR_DEV_SWAP_DIR) + ATE_SWAP_ON(pci_addr); + + + return pci_addr; +} + +static dma_addr_t +pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr, + u64 dma_attributes, int dma_flags) +{ + struct pcibus_info *pcibus_info = (struct pcibus_info *) + ((info->pdi_host_pcidev_info)->pdi_pcibus_info); + u64 pci_addr; + + /* Translate to Crosstalk View of Physical Address */ + if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS) + pci_addr = IS_PIC_SOFT(pcibus_info) ? + PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr); + else + pci_addr = paddr; + pci_addr |= dma_attributes; + + /* Handle Bus mode */ + if (IS_PCIX(pcibus_info)) + pci_addr &= ~PCI64_ATTR_PREF; + + /* Handle Bridge Chipset differences */ + if (IS_PIC_SOFT(pcibus_info)) { + pci_addr |= + ((u64) pcibus_info-> + pbi_hub_xid << PIC_PCI64_ATTR_TARG_SHFT); + } else + pci_addr |= (dma_flags & SN_DMA_MSI) ? + TIOCP_PCI64_CMDTYPE_MSI : + TIOCP_PCI64_CMDTYPE_MEM; + + /* If PCI mode, func zero uses VCHAN0, every other func uses VCHAN1 */ + if (!IS_PCIX(pcibus_info) && PCI_FUNC(info->pdi_linux_pcidev->devfn)) + pci_addr |= PCI64_ATTR_VIRTUAL; + + return pci_addr; +} + +static dma_addr_t +pcibr_dmatrans_direct32(struct pcidev_info * info, + u64 paddr, size_t req_size, u64 flags, int dma_flags) +{ + struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info; + struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info-> + pdi_pcibus_info; + u64 xio_addr; + + u64 xio_base; + u64 offset; + u64 endoff; + + if (IS_PCIX(pcibus_info)) { + return 0; + } + + if (dma_flags & SN_DMA_MSI) + return 0; + + if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS) + xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr); + else + xio_addr = paddr; + + xio_base = pcibus_info->pbi_dir_xbase; + offset = xio_addr - xio_base; + endoff = req_size + offset; + if ((req_size > (1ULL << 31)) || /* Too Big */ + (xio_addr < xio_base) || /* Out of range for mappings */ + (endoff > (1ULL << 31))) { /* Too Big */ + return 0; + } + + return PCI32_DIRECT_BASE | offset; +} + +/* + * Wrapper routine for freeing DMA maps + * DMA mappings for Direct 64 and 32 do not have any DMA maps. + */ +void +pcibr_dma_unmap(struct pci_dev *hwdev, dma_addr_t dma_handle, int direction) +{ + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev); + struct pcibus_info *pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_pcibus_info; + + if (IS_PCI32_MAPPED(dma_handle)) { + int ate_index; + + ate_index = + IOPG((ATE_SWAP_OFF(dma_handle) - PCI32_MAPPED_BASE)); + pcibr_ate_free(pcibus_info, ate_index); + } +} + +/* + * On SN systems there is a race condition between a PIO read response and + * DMA's. In rare cases, the read response may beat the DMA, causing the + * driver to think that data in memory is complete and meaningful. This code + * eliminates that race. This routine is called by the PIO read routines + * after doing the read. For PIC this routine then forces a fake interrupt + * on another line, which is logically associated with the slot that the PIO + * is addressed to. It then spins while watching the memory location that + * the interrupt is targeted to. When the interrupt response arrives, we + * are sure that the DMA has landed in memory and it is safe for the driver + * to proceed. For TIOCP use the Device(x) Write Request Buffer Flush + * Bridge register since it ensures the data has entered the coherence domain, + * unlike the PIC Device(x) Write Request Buffer Flush register. + */ + +void sn_dma_flush(u64 addr) +{ + nasid_t nasid; + int is_tio; + int wid_num; + int i, j; + unsigned long flags; + u64 itte; + struct hubdev_info *hubinfo; + struct sn_flush_device_kernel *p; + struct sn_flush_device_common *common; + struct sn_flush_nasid_entry *flush_nasid_list; + + if (!sn_ioif_inited) + return; + + nasid = NASID_GET(addr); + if (-1 == nasid_to_cnodeid(nasid)) + return; + + hubinfo = (NODEPDA(nasid_to_cnodeid(nasid)))->pdinfo; + + BUG_ON(!hubinfo); + + flush_nasid_list = &hubinfo->hdi_flush_nasid_list; + if (flush_nasid_list->widget_p == NULL) + return; + + is_tio = (nasid & 1); + if (is_tio) { + int itte_index; + + if (TIO_HWIN(addr)) + itte_index = 0; + else if (TIO_BWIN_WINDOWNUM(addr)) + itte_index = TIO_BWIN_WINDOWNUM(addr); + else + itte_index = -1; + + if (itte_index >= 0) { + itte = flush_nasid_list->iio_itte[itte_index]; + if (! TIO_ITTE_VALID(itte)) + return; + wid_num = TIO_ITTE_WIDGET(itte); + } else + wid_num = TIO_SWIN_WIDGETNUM(addr); + } else { + if (BWIN_WINDOWNUM(addr)) { + itte = flush_nasid_list->iio_itte[BWIN_WINDOWNUM(addr)]; + wid_num = IIO_ITTE_WIDGET(itte); + } else + wid_num = SWIN_WIDGETNUM(addr); + } + if (flush_nasid_list->widget_p[wid_num] == NULL) + return; + p = &flush_nasid_list->widget_p[wid_num][0]; + + /* find a matching BAR */ + for (i = 0; i < DEV_PER_WIDGET; i++,p++) { + common = p->common; + for (j = 0; j < PCI_ROM_RESOURCE; j++) { + if (common->sfdl_bar_list[j].start == 0) + break; + if (addr >= common->sfdl_bar_list[j].start + && addr <= common->sfdl_bar_list[j].end) + break; + } + if (j < PCI_ROM_RESOURCE && common->sfdl_bar_list[j].start != 0) + break; + } + + /* if no matching BAR, return without doing anything. */ + if (i == DEV_PER_WIDGET) + return; + + /* + * For TIOCP use the Device(x) Write Request Buffer Flush Bridge + * register since it ensures the data has entered the coherence + * domain, unlike PIC. + */ + if (is_tio) { + /* + * Note: devices behind TIOCE should never be matched in the + * above code, and so the following code is PIC/CP centric. + * If CE ever needs the sn_dma_flush mechanism, we will have + * to account for that here and in tioce_bus_fixup(). + */ + u32 tio_id = HUB_L(TIO_IOSPACE_ADDR(nasid, TIO_NODE_ID)); + u32 revnum = XWIDGET_PART_REV_NUM(tio_id); + + /* TIOCP BRINGUP WAR (PV907516): Don't write buffer flush reg */ + if ((1 << XWIDGET_PART_REV_NUM_REV(revnum)) & PV907516) { + return; + } else { + pcireg_wrb_flush_get(common->sfdl_pcibus_info, + (common->sfdl_slot - 1)); + } + } else { + spin_lock_irqsave(&p->sfdl_flush_lock, flags); + *common->sfdl_flush_addr = 0; + + /* force an interrupt. */ + *(volatile u32 *)(common->sfdl_force_int_addr) = 1; + + /* wait for the interrupt to come back. */ + while (*(common->sfdl_flush_addr) != 0x10f) + cpu_relax(); + + /* okay, everything is synched up. */ + spin_unlock_irqrestore(&p->sfdl_flush_lock, flags); + } + return; +} + +/* + * DMA interfaces. Called from pci_dma.c routines. + */ + +dma_addr_t +pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size, int dma_flags) +{ + dma_addr_t dma_handle; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev); + + /* SN cannot support DMA addresses smaller than 32 bits. */ + if (hwdev->dma_mask < 0x7fffffff) { + return 0; + } + + if (hwdev->dma_mask == ~0UL) { + /* + * Handle the most common case: 64 bit cards. This + * call should always succeed. + */ + + dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr, + PCI64_ATTR_PREF, dma_flags); + } else { + /* Handle 32-63 bit cards via direct mapping */ + dma_handle = pcibr_dmatrans_direct32(pcidev_info, phys_addr, + size, 0, dma_flags); + if (!dma_handle) { + /* + * It is a 32 bit card and we cannot do direct mapping, + * so we use an ATE. + */ + + dma_handle = pcibr_dmamap_ate32(pcidev_info, phys_addr, + size, PCI32_ATE_PREF, + dma_flags); + } + } + + return dma_handle; +} + +dma_addr_t +pcibr_dma_map_consistent(struct pci_dev * hwdev, unsigned long phys_addr, + size_t size, int dma_flags) +{ + dma_addr_t dma_handle; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev); + + if (hwdev->dev.coherent_dma_mask == ~0UL) { + dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr, + PCI64_ATTR_BAR, dma_flags); + } else { + dma_handle = (dma_addr_t) pcibr_dmamap_ate32(pcidev_info, + phys_addr, size, + PCI32_ATE_BAR, dma_flags); + } + + return dma_handle; +} + +EXPORT_SYMBOL(sn_dma_flush); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c new file mode 100644 index 000000000..7195df1da --- /dev/null +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -0,0 +1,265 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2004, 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/interrupt.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/export.h> +#include <asm/sn/addrs.h> +#include <asm/sn/geo.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/pic.h> +#include <asm/sn/sn2/sn_hwperf.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" + +int +sal_pcibr_slot_enable(struct pcibus_info *soft, int device, void *resp, + char **ssdt) +{ + struct ia64_sal_retval ret_stuff; + u64 busnum; + u64 segment; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + segment = soft->pbi_buscommon.bs_persist_segment; + busnum = soft->pbi_buscommon.bs_persist_busnum; + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_ENABLE, segment, + busnum, (u64) device, (u64) resp, (u64)ia64_tpa(ssdt), + 0, 0); + + return (int)ret_stuff.v0; +} + +int +sal_pcibr_slot_disable(struct pcibus_info *soft, int device, int action, + void *resp) +{ + struct ia64_sal_retval ret_stuff; + u64 busnum; + u64 segment; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + segment = soft->pbi_buscommon.bs_persist_segment; + busnum = soft->pbi_buscommon.bs_persist_busnum; + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_DISABLE, + segment, busnum, (u64) device, (u64) action, + (u64) resp, 0, 0); + + return (int)ret_stuff.v0; +} + +static int sal_pcibr_error_interrupt(struct pcibus_info *soft) +{ + struct ia64_sal_retval ret_stuff; + u64 busnum; + int segment; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + segment = soft->pbi_buscommon.bs_persist_segment; + busnum = soft->pbi_buscommon.bs_persist_busnum; + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_ERROR_INTERRUPT, + (u64) segment, (u64) busnum, 0, 0, 0, 0, 0); + + return (int)ret_stuff.v0; +} + +u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus) +{ + long rc; + u16 uninitialized_var(ioboard); /* GCC be quiet */ + nasid_t nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base); + + rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard); + if (rc) { + printk(KERN_WARNING "ia64_sn_sysctl_ioboard_get failed: %ld\n", + rc); + return 0; + } + + return ioboard; +} + +/* + * PCI Bridge Error interrupt handler. Gets invoked whenever a PCI + * bridge sends an error interrupt. + */ +static irqreturn_t +pcibr_error_intr_handler(int irq, void *arg) +{ + struct pcibus_info *soft = arg; + + if (sal_pcibr_error_interrupt(soft) < 0) + panic("pcibr_error_intr_handler(): Fatal Bridge Error"); + + return IRQ_HANDLED; +} + +void * +pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) +{ + int nasid, cnode, j; + struct hubdev_info *hubdev_info; + struct pcibus_info *soft; + struct sn_flush_device_kernel *sn_flush_device_kernel; + struct sn_flush_device_common *common; + + if (! IS_PCI_BRIDGE_ASIC(prom_bussoft->bs_asic_type)) { + return NULL; + } + + /* + * Allocate kernel bus soft and copy from prom. + */ + + soft = kmemdup(prom_bussoft, sizeof(struct pcibus_info), GFP_KERNEL); + if (!soft) { + return NULL; + } + + soft->pbi_buscommon.bs_base = (unsigned long) + ioremap(REGION_OFFSET(soft->pbi_buscommon.bs_base), + sizeof(struct pic)); + + spin_lock_init(&soft->pbi_lock); + + /* + * register the bridge's error interrupt handler + */ + if (request_irq(SGI_PCIASIC_ERROR, pcibr_error_intr_handler, + IRQF_SHARED, "PCIBR error", (void *)(soft))) { + printk(KERN_WARNING + "pcibr cannot allocate interrupt for error handler\n"); + } + irq_set_handler(SGI_PCIASIC_ERROR, handle_level_irq); + sn_set_err_irq_affinity(SGI_PCIASIC_ERROR); + + /* + * Update the Bridge with the "kernel" pagesize + */ + if (PAGE_SIZE < 16384) { + pcireg_control_bit_clr(soft, PCIBR_CTRL_PAGE_SIZE); + } else { + pcireg_control_bit_set(soft, PCIBR_CTRL_PAGE_SIZE); + } + + nasid = NASID_GET(soft->pbi_buscommon.bs_base); + cnode = nasid_to_cnodeid(nasid); + hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + + if (hubdev_info->hdi_flush_nasid_list.widget_p) { + sn_flush_device_kernel = hubdev_info->hdi_flush_nasid_list. + widget_p[(int)soft->pbi_buscommon.bs_xid]; + if (sn_flush_device_kernel) { + for (j = 0; j < DEV_PER_WIDGET; + j++, sn_flush_device_kernel++) { + common = sn_flush_device_kernel->common; + if (common->sfdl_slot == -1) + continue; + if ((common->sfdl_persistent_segment == + soft->pbi_buscommon.bs_persist_segment) && + (common->sfdl_persistent_busnum == + soft->pbi_buscommon.bs_persist_busnum)) + common->sfdl_pcibus_info = + soft; + } + } + } + + /* Setup the PMU ATE map */ + soft->pbi_int_ate_resource.lowest_free_index = 0; + soft->pbi_int_ate_resource.ate = + kcalloc(soft->pbi_int_ate_size, sizeof(u64), GFP_KERNEL); + + if (!soft->pbi_int_ate_resource.ate) { + kfree(soft); + return NULL; + } + + return soft; +} + +void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct pcibus_info *pcibus_info; + int bit = sn_irq_info->irq_int_bit; + + if (! sn_irq_info->irq_bridge) + return; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (pcidev_info) { + pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info-> + pdi_pcibus_info; + pcireg_force_intr_set(pcibus_info, bit); + } +} + +void pcibr_target_interrupt(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct pcibus_info *pcibus_info; + int bit = sn_irq_info->irq_int_bit; + u64 xtalk_addr = sn_irq_info->irq_xtalkaddr; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (pcidev_info) { + pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info-> + pdi_pcibus_info; + + /* Disable the device's IRQ */ + pcireg_intr_enable_bit_clr(pcibus_info, (1 << bit)); + + /* Change the device's IRQ */ + pcireg_intr_addr_addr_set(pcibus_info, bit, xtalk_addr); + + /* Re-enable the device's IRQ */ + pcireg_intr_enable_bit_set(pcibus_info, (1 << bit)); + + pcibr_force_interrupt(sn_irq_info); + } +} + +/* + * Provider entries for PIC/CP + */ + +struct sn_pcibus_provider pcibr_provider = { + .dma_map = pcibr_dma_map, + .dma_map_consistent = pcibr_dma_map_consistent, + .dma_unmap = pcibr_dma_unmap, + .bus_fixup = pcibr_bus_fixup, + .force_interrupt = pcibr_force_interrupt, + .target_interrupt = pcibr_target_interrupt +}; + +int +pcibr_init_provider(void) +{ + sn_pci_provider[PCIIO_ASIC_TYPE_PIC] = &pcibr_provider; + sn_pci_provider[PCIIO_ASIC_TYPE_TIOCP] = &pcibr_provider; + + return 0; +} + +EXPORT_SYMBOL_GPL(sal_pcibr_slot_enable); +EXPORT_SYMBOL_GPL(sal_pcibr_slot_disable); +EXPORT_SYMBOL_GPL(sn_ioboard_to_pci_bus); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c new file mode 100644 index 000000000..8b8bbd51d --- /dev/null +++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c @@ -0,0 +1,285 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/interrupt.h> +#include <linux/types.h> +#include <asm/sn/io.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/pic.h> +#include <asm/sn/tiocp.h> + +union br_ptr { + struct tiocp tio; + struct pic pic; +}; + +/* + * Control Register Access -- Read/Write 0000_0020 + */ +void pcireg_control_bit_clr(struct pcibus_info *pcibus_info, u64 bits) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + __sn_clrq_relaxed(&ptr->tio.cp_control, bits); + break; + case PCIBR_BRIDGETYPE_PIC: + __sn_clrq_relaxed(&ptr->pic.p_wid_control, bits); + break; + default: + panic + ("pcireg_control_bit_clr: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +void pcireg_control_bit_set(struct pcibus_info *pcibus_info, u64 bits) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + __sn_setq_relaxed(&ptr->tio.cp_control, bits); + break; + case PCIBR_BRIDGETYPE_PIC: + __sn_setq_relaxed(&ptr->pic.p_wid_control, bits); + break; + default: + panic + ("pcireg_control_bit_set: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +/* + * PCI/PCIX Target Flush Register Access -- Read Only 0000_0050 + */ +u64 pcireg_tflush_get(struct pcibus_info *pcibus_info) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + u64 ret = 0; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ret = __sn_readq_relaxed(&ptr->tio.cp_tflush); + break; + case PCIBR_BRIDGETYPE_PIC: + ret = __sn_readq_relaxed(&ptr->pic.p_wid_tflush); + break; + default: + panic + ("pcireg_tflush_get: unknown bridgetype bridge 0x%p", + ptr); + } + } + + /* Read of the Target Flush should always return zero */ + if (ret != 0) + panic("pcireg_tflush_get:Target Flush failed\n"); + + return ret; +} + +/* + * Interrupt Status Register Access -- Read Only 0000_0100 + */ +u64 pcireg_intr_status_get(struct pcibus_info * pcibus_info) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + u64 ret = 0; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ret = __sn_readq_relaxed(&ptr->tio.cp_int_status); + break; + case PCIBR_BRIDGETYPE_PIC: + ret = __sn_readq_relaxed(&ptr->pic.p_int_status); + break; + default: + panic + ("pcireg_intr_status_get: unknown bridgetype bridge 0x%p", + ptr); + } + } + return ret; +} + +/* + * Interrupt Enable Register Access -- Read/Write 0000_0108 + */ +void pcireg_intr_enable_bit_clr(struct pcibus_info *pcibus_info, u64 bits) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + __sn_clrq_relaxed(&ptr->tio.cp_int_enable, bits); + break; + case PCIBR_BRIDGETYPE_PIC: + __sn_clrq_relaxed(&ptr->pic.p_int_enable, bits); + break; + default: + panic + ("pcireg_intr_enable_bit_clr: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +void pcireg_intr_enable_bit_set(struct pcibus_info *pcibus_info, u64 bits) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + __sn_setq_relaxed(&ptr->tio.cp_int_enable, bits); + break; + case PCIBR_BRIDGETYPE_PIC: + __sn_setq_relaxed(&ptr->pic.p_int_enable, bits); + break; + default: + panic + ("pcireg_intr_enable_bit_set: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +/* + * Intr Host Address Register (int_addr) -- Read/Write 0000_0130 - 0000_0168 + */ +void pcireg_intr_addr_addr_set(struct pcibus_info *pcibus_info, int int_n, + u64 addr) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + __sn_clrq_relaxed(&ptr->tio.cp_int_addr[int_n], + TIOCP_HOST_INTR_ADDR); + __sn_setq_relaxed(&ptr->tio.cp_int_addr[int_n], + (addr & TIOCP_HOST_INTR_ADDR)); + break; + case PCIBR_BRIDGETYPE_PIC: + __sn_clrq_relaxed(&ptr->pic.p_int_addr[int_n], + PIC_HOST_INTR_ADDR); + __sn_setq_relaxed(&ptr->pic.p_int_addr[int_n], + (addr & PIC_HOST_INTR_ADDR)); + break; + default: + panic + ("pcireg_intr_addr_addr_get: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +/* + * Force Interrupt Register Access -- Write Only 0000_01C0 - 0000_01F8 + */ +void pcireg_force_intr_set(struct pcibus_info *pcibus_info, int int_n) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + writeq(1, &ptr->tio.cp_force_pin[int_n]); + break; + case PCIBR_BRIDGETYPE_PIC: + writeq(1, &ptr->pic.p_force_pin[int_n]); + break; + default: + panic + ("pcireg_force_intr_set: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +/* + * Device(x) Write Buffer Flush Reg Access -- Read Only 0000_0240 - 0000_0258 + */ +u64 pcireg_wrb_flush_get(struct pcibus_info *pcibus_info, int device) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + u64 ret = 0; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ret = + __sn_readq_relaxed(&ptr->tio.cp_wr_req_buf[device]); + break; + case PCIBR_BRIDGETYPE_PIC: + ret = + __sn_readq_relaxed(&ptr->pic.p_wr_req_buf[device]); + break; + default: + panic("pcireg_wrb_flush_get: unknown bridgetype bridge 0x%p", ptr); + } + + } + /* Read of the Write Buffer Flush should always return zero */ + return ret; +} + +void pcireg_int_ate_set(struct pcibus_info *pcibus_info, int ate_index, + u64 val) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + writeq(val, &ptr->tio.cp_int_ate_ram[ate_index]); + break; + case PCIBR_BRIDGETYPE_PIC: + writeq(val, &ptr->pic.p_int_ate_ram[ate_index]); + break; + default: + panic + ("pcireg_int_ate_set: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +u64 __iomem *pcireg_int_ate_addr(struct pcibus_info *pcibus_info, int ate_index) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + u64 __iomem *ret = NULL; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ret = &ptr->tio.cp_int_ate_ram[ate_index]; + break; + case PCIBR_BRIDGETYPE_PIC: + ret = &ptr->pic.p_int_ate_ram[ate_index]; + break; + default: + panic + ("pcireg_int_ate_addr: unknown bridgetype bridge 0x%p", + ptr); + } + } + return ret; +} diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c new file mode 100644 index 000000000..a70b11fd5 --- /dev/null +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -0,0 +1,677 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/bitmap.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> +#include <asm/sn/io.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/tioca_provider.h> + +u32 tioca_gart_found; +EXPORT_SYMBOL(tioca_gart_found); /* used by agp-sgi */ + +LIST_HEAD(tioca_list); +EXPORT_SYMBOL(tioca_list); /* used by agp-sgi */ + +static int tioca_gart_init(struct tioca_kernel *); + +/** + * tioca_gart_init - Initialize SGI TIOCA GART + * @tioca_common: ptr to common prom/kernel struct identifying the + * + * If the indicated tioca has devices present, initialize its associated + * GART MMR's and kernel memory. + */ +static int +tioca_gart_init(struct tioca_kernel *tioca_kern) +{ + u64 ap_reg; + u64 offset; + struct page *tmp; + struct tioca_common *tioca_common; + struct tioca __iomem *ca_base; + + tioca_common = tioca_kern->ca_common; + ca_base = (struct tioca __iomem *)tioca_common->ca_common.bs_base; + + if (list_empty(tioca_kern->ca_devices)) + return 0; + + ap_reg = 0; + + /* + * Validate aperature size + */ + + switch (CA_APERATURE_SIZE >> 20) { + case 4: + ap_reg |= (0x3ff << CA_GART_AP_SIZE_SHFT); /* 4MB */ + break; + case 8: + ap_reg |= (0x3fe << CA_GART_AP_SIZE_SHFT); /* 8MB */ + break; + case 16: + ap_reg |= (0x3fc << CA_GART_AP_SIZE_SHFT); /* 16MB */ + break; + case 32: + ap_reg |= (0x3f8 << CA_GART_AP_SIZE_SHFT); /* 32 MB */ + break; + case 64: + ap_reg |= (0x3f0 << CA_GART_AP_SIZE_SHFT); /* 64 MB */ + break; + case 128: + ap_reg |= (0x3e0 << CA_GART_AP_SIZE_SHFT); /* 128 MB */ + break; + case 256: + ap_reg |= (0x3c0 << CA_GART_AP_SIZE_SHFT); /* 256 MB */ + break; + case 512: + ap_reg |= (0x380 << CA_GART_AP_SIZE_SHFT); /* 512 MB */ + break; + case 1024: + ap_reg |= (0x300 << CA_GART_AP_SIZE_SHFT); /* 1GB */ + break; + case 2048: + ap_reg |= (0x200 << CA_GART_AP_SIZE_SHFT); /* 2GB */ + break; + case 4096: + ap_reg |= (0x000 << CA_GART_AP_SIZE_SHFT); /* 4 GB */ + break; + default: + printk(KERN_ERR "%s: Invalid CA_APERATURE_SIZE " + "0x%lx\n", __func__, (ulong) CA_APERATURE_SIZE); + return -1; + } + + /* + * Set up other aperature parameters + */ + + if (PAGE_SIZE >= 16384) { + tioca_kern->ca_ap_pagesize = 16384; + ap_reg |= CA_GART_PAGE_SIZE; + } else { + tioca_kern->ca_ap_pagesize = 4096; + } + + tioca_kern->ca_ap_size = CA_APERATURE_SIZE; + tioca_kern->ca_ap_bus_base = CA_APERATURE_BASE; + tioca_kern->ca_gart_entries = + tioca_kern->ca_ap_size / tioca_kern->ca_ap_pagesize; + + ap_reg |= (CA_GART_AP_ENB_AGP | CA_GART_AP_ENB_PCI); + ap_reg |= tioca_kern->ca_ap_bus_base; + + /* + * Allocate and set up the GART + */ + + tioca_kern->ca_gart_size = tioca_kern->ca_gart_entries * sizeof(u64); + tmp = + alloc_pages_node(tioca_kern->ca_closest_node, + GFP_KERNEL | __GFP_ZERO, + get_order(tioca_kern->ca_gart_size)); + + if (!tmp) { + printk(KERN_ERR "%s: Could not allocate " + "%llu bytes (order %d) for GART\n", + __func__, + tioca_kern->ca_gart_size, + get_order(tioca_kern->ca_gart_size)); + return -ENOMEM; + } + + tioca_kern->ca_gart = page_address(tmp); + tioca_kern->ca_gart_coretalk_addr = + PHYS_TO_TIODMA(virt_to_phys(tioca_kern->ca_gart)); + + /* + * Compute PCI/AGP convenience fields + */ + + offset = CA_PCI32_MAPPED_BASE - CA_APERATURE_BASE; + tioca_kern->ca_pciap_base = CA_PCI32_MAPPED_BASE; + tioca_kern->ca_pciap_size = CA_PCI32_MAPPED_SIZE; + tioca_kern->ca_pcigart_start = offset / tioca_kern->ca_ap_pagesize; + tioca_kern->ca_pcigart_base = + tioca_kern->ca_gart_coretalk_addr + offset; + tioca_kern->ca_pcigart = + &tioca_kern->ca_gart[tioca_kern->ca_pcigart_start]; + tioca_kern->ca_pcigart_entries = + tioca_kern->ca_pciap_size / tioca_kern->ca_ap_pagesize; + tioca_kern->ca_pcigart_pagemap = + kzalloc(tioca_kern->ca_pcigart_entries / 8, GFP_KERNEL); + if (!tioca_kern->ca_pcigart_pagemap) { + free_pages((unsigned long)tioca_kern->ca_gart, + get_order(tioca_kern->ca_gart_size)); + return -1; + } + + offset = CA_AGP_MAPPED_BASE - CA_APERATURE_BASE; + tioca_kern->ca_gfxap_base = CA_AGP_MAPPED_BASE; + tioca_kern->ca_gfxap_size = CA_AGP_MAPPED_SIZE; + tioca_kern->ca_gfxgart_start = offset / tioca_kern->ca_ap_pagesize; + tioca_kern->ca_gfxgart_base = + tioca_kern->ca_gart_coretalk_addr + offset; + tioca_kern->ca_gfxgart = + &tioca_kern->ca_gart[tioca_kern->ca_gfxgart_start]; + tioca_kern->ca_gfxgart_entries = + tioca_kern->ca_gfxap_size / tioca_kern->ca_ap_pagesize; + + /* + * various control settings: + * use agp op-combining + * use GET semantics to fetch memory + * participate in coherency domain + * DISABLE GART PREFETCHING due to hw bug tracked in SGI PV930029 + */ + + __sn_setq_relaxed(&ca_base->ca_control1, + CA_AGPDMA_OP_ENB_COMBDELAY); /* PV895469 ? */ + __sn_clrq_relaxed(&ca_base->ca_control2, CA_GART_MEM_PARAM); + __sn_setq_relaxed(&ca_base->ca_control2, + (0x2ull << CA_GART_MEM_PARAM_SHFT)); + tioca_kern->ca_gart_iscoherent = 1; + __sn_clrq_relaxed(&ca_base->ca_control2, + (CA_GART_WR_PREFETCH_ENB | CA_GART_RD_PREFETCH_ENB)); + + /* + * Unmask GART fetch error interrupts. Clear residual errors first. + */ + + writeq(CA_GART_FETCH_ERR, &ca_base->ca_int_status_alias); + writeq(CA_GART_FETCH_ERR, &ca_base->ca_mult_error_alias); + __sn_clrq_relaxed(&ca_base->ca_int_mask, CA_GART_FETCH_ERR); + + /* + * Program the aperature and gart registers in TIOCA + */ + + writeq(ap_reg, &ca_base->ca_gart_aperature); + writeq(tioca_kern->ca_gart_coretalk_addr|1, &ca_base->ca_gart_ptr_table); + + return 0; +} + +/** + * tioca_fastwrite_enable - enable AGP FW for a tioca and its functions + * @tioca_kernel: structure representing the CA + * + * Given a CA, scan all attached functions making sure they all support + * FastWrite. If so, enable FastWrite for all functions and the CA itself. + */ + +void +tioca_fastwrite_enable(struct tioca_kernel *tioca_kern) +{ + int cap_ptr; + u32 reg; + struct tioca __iomem *tioca_base; + struct pci_dev *pdev; + struct tioca_common *common; + + common = tioca_kern->ca_common; + + /* + * Scan all vga controllers on this bus making sure they all + * support FW. If not, return. + */ + + list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) { + if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8)) + continue; + + cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP); + if (!cap_ptr) + return; /* no AGP CAP means no FW */ + + pci_read_config_dword(pdev, cap_ptr + PCI_AGP_STATUS, ®); + if (!(reg & PCI_AGP_STATUS_FW)) + return; /* function doesn't support FW */ + } + + /* + * Set fw for all vga fn's + */ + + list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) { + if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8)) + continue; + + cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP); + pci_read_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, ®); + reg |= PCI_AGP_COMMAND_FW; + pci_write_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, reg); + } + + /* + * Set ca's fw to match + */ + + tioca_base = (struct tioca __iomem*)common->ca_common.bs_base; + __sn_setq_relaxed(&tioca_base->ca_control1, CA_AGP_FW_ENABLE); +} + +EXPORT_SYMBOL(tioca_fastwrite_enable); /* used by agp-sgi */ + +/** + * tioca_dma_d64 - create a DMA mapping using 64-bit direct mode + * @paddr: system physical address + * + * Map @paddr into 64-bit CA bus space. No device context is necessary. + * Bits 53:0 come from the coretalk address. We just need to mask in the + * following optional bits of the 64-bit pci address: + * + * 63:60 - Coretalk Packet Type - 0x1 for Mem Get/Put (coherent) + * 0x2 for PIO (non-coherent) + * We will always use 0x1 + * 55:55 - Swap bytes Currently unused + */ +static u64 +tioca_dma_d64(unsigned long paddr) +{ + dma_addr_t bus_addr; + + bus_addr = PHYS_TO_TIODMA(paddr); + + BUG_ON(!bus_addr); + BUG_ON(bus_addr >> 54); + + /* Set upper nibble to Cache Coherent Memory op */ + bus_addr |= (1UL << 60); + + return bus_addr; +} + +/** + * tioca_dma_d48 - create a DMA mapping using 48-bit direct mode + * @pdev: linux pci_dev representing the function + * @paddr: system physical address + * + * Map @paddr into 64-bit bus space of the CA associated with @pcidev_info. + * + * The CA agp 48 bit direct address falls out as follows: + * + * When direct mapping AGP addresses, the 48 bit AGP address is + * constructed as follows: + * + * [47:40] - Low 8 bits of the page Node ID extracted from coretalk + * address [47:40]. The upper 8 node bits are fixed + * and come from the xxx register bits [5:0] + * [39:38] - Chiplet ID extracted from coretalk address [39:38] + * [37:00] - node offset extracted from coretalk address [37:00] + * + * Since the node id in general will be non-zero, and the chiplet id + * will always be non-zero, it follows that the device must support + * a dma mask of at least 0xffffffffff (40 bits) to target node 0 + * and in general should be 0xffffffffffff (48 bits) to target nodes + * up to 255. Nodes above 255 need the support of the xxx register, + * and so a given CA can only directly target nodes in the range + * xxx - xxx+255. + */ +static u64 +tioca_dma_d48(struct pci_dev *pdev, u64 paddr) +{ + struct tioca_common *tioca_common; + struct tioca __iomem *ca_base; + u64 ct_addr; + dma_addr_t bus_addr; + u32 node_upper; + u64 agp_dma_extn; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev); + + tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info; + ca_base = (struct tioca __iomem *)tioca_common->ca_common.bs_base; + + ct_addr = PHYS_TO_TIODMA(paddr); + if (!ct_addr) + return 0; + + bus_addr = (dma_addr_t) (ct_addr & 0xffffffffffffUL); + node_upper = ct_addr >> 48; + + if (node_upper > 64) { + printk(KERN_ERR "%s: coretalk addr 0x%p node id out " + "of range\n", __func__, (void *)ct_addr); + return 0; + } + + agp_dma_extn = __sn_readq_relaxed(&ca_base->ca_agp_dma_addr_extn); + if (node_upper != (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT)) { + printk(KERN_ERR "%s: coretalk upper node (%u) " + "mismatch with ca_agp_dma_addr_extn (%llu)\n", + __func__, + node_upper, (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT)); + return 0; + } + + return bus_addr; +} + +/** + * tioca_dma_mapped - create a DMA mapping using a CA GART + * @pdev: linux pci_dev representing the function + * @paddr: host physical address to map + * @req_size: len (bytes) to map + * + * Map @paddr into CA address space using the GART mechanism. The mapped + * dma_addr_t is guaranteed to be contiguous in CA bus space. + */ +static dma_addr_t +tioca_dma_mapped(struct pci_dev *pdev, unsigned long paddr, size_t req_size) +{ + int ps, ps_shift, entry, entries, mapsize; + u64 xio_addr, end_xio_addr; + struct tioca_common *tioca_common; + struct tioca_kernel *tioca_kern; + dma_addr_t bus_addr = 0; + struct tioca_dmamap *ca_dmamap; + void *map; + unsigned long flags; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev); + + tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info; + tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private; + + xio_addr = PHYS_TO_TIODMA(paddr); + if (!xio_addr) + return 0; + + spin_lock_irqsave(&tioca_kern->ca_lock, flags); + + /* + * allocate a map struct + */ + + ca_dmamap = kzalloc(sizeof(struct tioca_dmamap), GFP_ATOMIC); + if (!ca_dmamap) + goto map_return; + + /* + * Locate free entries that can hold req_size. Account for + * unaligned start/length when allocating. + */ + + ps = tioca_kern->ca_ap_pagesize; /* will be power of 2 */ + ps_shift = ffs(ps) - 1; + end_xio_addr = xio_addr + req_size - 1; + + entries = (end_xio_addr >> ps_shift) - (xio_addr >> ps_shift) + 1; + + map = tioca_kern->ca_pcigart_pagemap; + mapsize = tioca_kern->ca_pcigart_entries; + + entry = bitmap_find_next_zero_area(map, mapsize, 0, entries, 0); + if (entry >= mapsize) { + kfree(ca_dmamap); + goto map_return; + } + + bitmap_set(map, entry, entries); + + bus_addr = tioca_kern->ca_pciap_base + (entry * ps); + + ca_dmamap->cad_dma_addr = bus_addr; + ca_dmamap->cad_gart_size = entries; + ca_dmamap->cad_gart_entry = entry; + list_add(&ca_dmamap->cad_list, &tioca_kern->ca_dmamaps); + + if (xio_addr % ps) { + tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr); + bus_addr += xio_addr & (ps - 1); + xio_addr &= ~(ps - 1); + xio_addr += ps; + entry++; + } + + while (xio_addr < end_xio_addr) { + tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr); + xio_addr += ps; + entry++; + } + + tioca_tlbflush(tioca_kern); + +map_return: + spin_unlock_irqrestore(&tioca_kern->ca_lock, flags); + return bus_addr; +} + +/** + * tioca_dma_unmap - release CA mapping resources + * @pdev: linux pci_dev representing the function + * @bus_addr: bus address returned by an earlier tioca_dma_map + * @dir: mapping direction (unused) + * + * Locate mapping resources associated with @bus_addr and release them. + * For mappings created using the direct modes (64 or 48) there are no + * resources to release. + */ +static void +tioca_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) +{ + int i, entry; + struct tioca_common *tioca_common; + struct tioca_kernel *tioca_kern; + struct tioca_dmamap *map; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev); + unsigned long flags; + + tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info; + tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private; + + /* return straight away if this isn't be a mapped address */ + + if (bus_addr < tioca_kern->ca_pciap_base || + bus_addr >= (tioca_kern->ca_pciap_base + tioca_kern->ca_pciap_size)) + return; + + spin_lock_irqsave(&tioca_kern->ca_lock, flags); + + list_for_each_entry(map, &tioca_kern->ca_dmamaps, cad_list) + if (map->cad_dma_addr == bus_addr) + break; + + BUG_ON(map == NULL); + + entry = map->cad_gart_entry; + + for (i = 0; i < map->cad_gart_size; i++, entry++) { + clear_bit(entry, tioca_kern->ca_pcigart_pagemap); + tioca_kern->ca_pcigart[entry] = 0; + } + tioca_tlbflush(tioca_kern); + + list_del(&map->cad_list); + spin_unlock_irqrestore(&tioca_kern->ca_lock, flags); + kfree(map); +} + +/** + * tioca_dma_map - map pages for PCI DMA + * @pdev: linux pci_dev representing the function + * @paddr: host physical address to map + * @byte_count: bytes to map + * + * This is the main wrapper for mapping host physical pages to CA PCI space. + * The mapping mode used is based on the devices dma_mask. As a last resort + * use the GART mapped mode. + */ +static u64 +tioca_dma_map(struct pci_dev *pdev, unsigned long paddr, size_t byte_count, int dma_flags) +{ + u64 mapaddr; + + /* + * Not supported for now ... + */ + if (dma_flags & SN_DMA_MSI) + return 0; + + /* + * If card is 64 or 48 bit addressable, use a direct mapping. 32 + * bit direct is so restrictive w.r.t. where the memory resides that + * we don't use it even though CA has some support. + */ + + if (pdev->dma_mask == ~0UL) + mapaddr = tioca_dma_d64(paddr); + else if (pdev->dma_mask == 0xffffffffffffUL) + mapaddr = tioca_dma_d48(pdev, paddr); + else + mapaddr = 0; + + /* Last resort ... use PCI portion of CA GART */ + + if (mapaddr == 0) + mapaddr = tioca_dma_mapped(pdev, paddr, byte_count); + + return mapaddr; +} + +/** + * tioca_error_intr_handler - SGI TIO CA error interrupt handler + * @irq: unused + * @arg: pointer to tioca_common struct for the given CA + * + * Handle a CA error interrupt. Simply a wrapper around a SAL call which + * defers processing to the SGI prom. + */ +static irqreturn_t +tioca_error_intr_handler(int irq, void *arg) +{ + struct tioca_common *soft = arg; + struct ia64_sal_retval ret_stuff; + u64 segment; + u64 busnum; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + segment = soft->ca_common.bs_persist_segment; + busnum = soft->ca_common.bs_persist_busnum; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_ERROR_INTERRUPT, + segment, busnum, 0, 0, 0, 0, 0); + + return IRQ_HANDLED; +} + +/** + * tioca_bus_fixup - perform final PCI fixup for a TIO CA bus + * @prom_bussoft: Common prom/kernel struct representing the bus + * + * Replicates the tioca_common pointed to by @prom_bussoft in kernel + * space. Allocates and initializes a kernel-only area for a given CA, + * and sets up an irq for handling CA error interrupts. + * + * On successful setup, returns the kernel version of tioca_common back to + * the caller. + */ +static void * +tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) +{ + struct tioca_common *tioca_common; + struct tioca_kernel *tioca_kern; + struct pci_bus *bus; + + /* sanity check prom rev */ + + if (is_shub1() && sn_sal_rev() < 0x0406) { + printk + (KERN_ERR "%s: SGI prom rev 4.06 or greater required " + "for tioca support\n", __func__); + return NULL; + } + + /* + * Allocate kernel bus soft and copy from prom. + */ + + tioca_common = kmemdup(prom_bussoft, sizeof(struct tioca_common), + GFP_KERNEL); + if (!tioca_common) + return NULL; + + tioca_common->ca_common.bs_base = (unsigned long) + ioremap(REGION_OFFSET(tioca_common->ca_common.bs_base), + sizeof(struct tioca_common)); + + /* init kernel-private area */ + + tioca_kern = kzalloc(sizeof(struct tioca_kernel), GFP_KERNEL); + if (!tioca_kern) { + kfree(tioca_common); + return NULL; + } + + tioca_kern->ca_common = tioca_common; + spin_lock_init(&tioca_kern->ca_lock); + INIT_LIST_HEAD(&tioca_kern->ca_dmamaps); + tioca_kern->ca_closest_node = + nasid_to_cnodeid(tioca_common->ca_closest_nasid); + tioca_common->ca_kernel_private = (u64) tioca_kern; + + bus = pci_find_bus(tioca_common->ca_common.bs_persist_segment, + tioca_common->ca_common.bs_persist_busnum); + BUG_ON(!bus); + tioca_kern->ca_devices = &bus->devices; + + /* init GART */ + + if (tioca_gart_init(tioca_kern) < 0) { + kfree(tioca_kern); + kfree(tioca_common); + return NULL; + } + + tioca_gart_found++; + list_add(&tioca_kern->ca_list, &tioca_list); + + if (request_irq(SGI_TIOCA_ERROR, + tioca_error_intr_handler, + IRQF_SHARED, "TIOCA error", (void *)tioca_common)) + printk(KERN_WARNING + "%s: Unable to get irq %d. " + "Error interrupts won't be routed for TIOCA bus %d\n", + __func__, SGI_TIOCA_ERROR, + (int)tioca_common->ca_common.bs_persist_busnum); + + irq_set_handler(SGI_TIOCA_ERROR, handle_level_irq); + sn_set_err_irq_affinity(SGI_TIOCA_ERROR); + + /* Setup locality information */ + controller->node = tioca_kern->ca_closest_node; + return tioca_common; +} + +static struct sn_pcibus_provider tioca_pci_interfaces = { + .dma_map = tioca_dma_map, + .dma_map_consistent = tioca_dma_map, + .dma_unmap = tioca_dma_unmap, + .bus_fixup = tioca_bus_fixup, + .force_interrupt = NULL, + .target_interrupt = NULL +}; + +/** + * tioca_init_provider - init SN PCI provider ops for TIO CA + */ +int +tioca_init_provider(void) +{ + sn_pci_provider[PCIIO_ASIC_TYPE_TIOCA] = &tioca_pci_interfaces; + return 0; +} diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c new file mode 100644 index 000000000..3bd9abc35 --- /dev/null +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -0,0 +1,1062 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003-2006 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> +#include <asm/sn/io.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/tioce_provider.h> + +/* + * 1/26/2006 + * + * WAR for SGI PV 944642. For revA TIOCE, need to use the following recipe + * (taken from the above PV) before and after accessing tioce internal MMR's + * to avoid tioce lockups. + * + * The recipe as taken from the PV: + * + * if(mmr address < 0x45000) { + * if(mmr address == 0 or 0x80) + * mmr wrt or read address 0xc0 + * else if(mmr address == 0x148 or 0x200) + * mmr wrt or read address 0x28 + * else + * mmr wrt or read address 0x158 + * + * do desired mmr access (rd or wrt) + * + * if(mmr address == 0x100) + * mmr wrt or read address 0x38 + * mmr wrt or read address 0xb050 + * } else + * do desired mmr access + * + * According to hw, we can use reads instead of writes to the above address + * + * Note this WAR can only to be used for accessing internal MMR's in the + * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff. This includes the + * "Local CE Registers and Memories" and "PCI Compatible Config Space" address + * spaces from table 2-1 of the "CE Programmer's Reference Overview" document. + * + * All registers defined in struct tioce will meet that criteria. + */ + +static inline void +tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr) +{ + u64 mmr_base; + u64 mmr_offset; + + if (kern->ce_common->ce_rev != TIOCE_REV_A) + return; + + mmr_base = kern->ce_common->ce_pcibus.bs_base; + mmr_offset = (unsigned long)mmr_addr - mmr_base; + + if (mmr_offset < 0x45000) { + u64 mmr_war_offset; + + if (mmr_offset == 0 || mmr_offset == 0x80) + mmr_war_offset = 0xc0; + else if (mmr_offset == 0x148 || mmr_offset == 0x200) + mmr_war_offset = 0x28; + else + mmr_war_offset = 0x158; + + readq_relaxed((void __iomem *)(mmr_base + mmr_war_offset)); + } +} + +static inline void +tioce_mmr_war_post(struct tioce_kernel *kern, void __iomem *mmr_addr) +{ + u64 mmr_base; + u64 mmr_offset; + + if (kern->ce_common->ce_rev != TIOCE_REV_A) + return; + + mmr_base = kern->ce_common->ce_pcibus.bs_base; + mmr_offset = (unsigned long)mmr_addr - mmr_base; + + if (mmr_offset < 0x45000) { + if (mmr_offset == 0x100) + readq_relaxed((void __iomem *)(mmr_base + 0x38)); + readq_relaxed((void __iomem *)(mmr_base + 0xb050)); + } +} + +/* load mmr contents into a variable */ +#define tioce_mmr_load(kern, mmrp, varp) do {\ + tioce_mmr_war_pre(kern, mmrp); \ + *(varp) = readq_relaxed(mmrp); \ + tioce_mmr_war_post(kern, mmrp); \ +} while (0) + +/* store variable contents into mmr */ +#define tioce_mmr_store(kern, mmrp, varp) do {\ + tioce_mmr_war_pre(kern, mmrp); \ + writeq(*varp, mmrp); \ + tioce_mmr_war_post(kern, mmrp); \ +} while (0) + +/* store immediate value into mmr */ +#define tioce_mmr_storei(kern, mmrp, val) do {\ + tioce_mmr_war_pre(kern, mmrp); \ + writeq(val, mmrp); \ + tioce_mmr_war_post(kern, mmrp); \ +} while (0) + +/* set bits (immediate value) into mmr */ +#define tioce_mmr_seti(kern, mmrp, bits) do {\ + u64 tmp; \ + tioce_mmr_load(kern, mmrp, &tmp); \ + tmp |= (bits); \ + tioce_mmr_store(kern, mmrp, &tmp); \ +} while (0) + +/* clear bits (immediate value) into mmr */ +#define tioce_mmr_clri(kern, mmrp, bits) do { \ + u64 tmp; \ + tioce_mmr_load(kern, mmrp, &tmp); \ + tmp &= ~(bits); \ + tioce_mmr_store(kern, mmrp, &tmp); \ +} while (0) + +/** + * Bus address ranges for the 5 flavors of TIOCE DMA + */ + +#define TIOCE_D64_MIN 0x8000000000000000UL +#define TIOCE_D64_MAX 0xffffffffffffffffUL +#define TIOCE_D64_ADDR(a) ((a) >= TIOCE_D64_MIN) + +#define TIOCE_D32_MIN 0x0000000080000000UL +#define TIOCE_D32_MAX 0x00000000ffffffffUL +#define TIOCE_D32_ADDR(a) ((a) >= TIOCE_D32_MIN && (a) <= TIOCE_D32_MAX) + +#define TIOCE_M32_MIN 0x0000000000000000UL +#define TIOCE_M32_MAX 0x000000007fffffffUL +#define TIOCE_M32_ADDR(a) ((a) >= TIOCE_M32_MIN && (a) <= TIOCE_M32_MAX) + +#define TIOCE_M40_MIN 0x0000004000000000UL +#define TIOCE_M40_MAX 0x0000007fffffffffUL +#define TIOCE_M40_ADDR(a) ((a) >= TIOCE_M40_MIN && (a) <= TIOCE_M40_MAX) + +#define TIOCE_M40S_MIN 0x0000008000000000UL +#define TIOCE_M40S_MAX 0x000000ffffffffffUL +#define TIOCE_M40S_ADDR(a) ((a) >= TIOCE_M40S_MIN && (a) <= TIOCE_M40S_MAX) + +/* + * ATE manipulation macros. + */ + +#define ATE_PAGESHIFT(ps) (__ffs(ps)) +#define ATE_PAGEMASK(ps) ((ps)-1) + +#define ATE_PAGE(x, ps) ((x) >> ATE_PAGESHIFT(ps)) +#define ATE_NPAGES(start, len, pagesize) \ + (ATE_PAGE((start)+(len)-1, pagesize) - ATE_PAGE(start, pagesize) + 1) + +#define ATE_VALID(ate) ((ate) & (1UL << 63)) +#define ATE_MAKE(addr, ps, msi) \ + (((addr) & ~ATE_PAGEMASK(ps)) | (1UL << 63) | ((msi)?(1UL << 62):0)) + +/* + * Flavors of ate-based mapping supported by tioce_alloc_map() + */ + +#define TIOCE_ATE_M32 1 +#define TIOCE_ATE_M40 2 +#define TIOCE_ATE_M40S 3 + +#define KB(x) ((u64)(x) << 10) +#define MB(x) ((u64)(x) << 20) +#define GB(x) ((u64)(x) << 30) + +/** + * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode + * @ct_addr: system coretalk address + * + * Map @ct_addr into 64-bit CE bus space. No device context is necessary + * and no CE mapping are consumed. + * + * Bits 53:0 come from the coretalk address. The remaining bits are set as + * follows: + * + * 63 - must be 1 to indicate d64 mode to CE hardware + * 62 - barrier bit ... controlled with tioce_dma_barrier() + * 61 - msi bit ... specified through dma_flags + * 60:54 - reserved, MBZ + */ +static u64 +tioce_dma_d64(unsigned long ct_addr, int dma_flags) +{ + u64 bus_addr; + + bus_addr = ct_addr | (1UL << 63); + if (dma_flags & SN_DMA_MSI) + bus_addr |= (1UL << 61); + + return bus_addr; +} + +/** + * pcidev_to_tioce - return misc ce related pointers given a pci_dev + * @pci_dev: pci device context + * @base: ptr to store struct tioce_mmr * for the CE holding this device + * @kernel: ptr to store struct tioce_kernel * for the CE holding this device + * @port: ptr to store the CE port number that this device is on + * + * Return pointers to various CE-related structures for the CE upstream of + * @pci_dev. + */ +static inline void +pcidev_to_tioce(struct pci_dev *pdev, struct tioce __iomem **base, + struct tioce_kernel **kernel, int *port) +{ + struct pcidev_info *pcidev_info; + struct tioce_common *ce_common; + struct tioce_kernel *ce_kernel; + + pcidev_info = SN_PCIDEV_INFO(pdev); + ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; + ce_kernel = (struct tioce_kernel *)ce_common->ce_kernel_private; + + if (base) + *base = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base; + if (kernel) + *kernel = ce_kernel; + + /* + * we use port as a zero-based value internally, even though the + * documentation is 1-based. + */ + if (port) + *port = + (pdev->bus->number < ce_kernel->ce_port1_secondary) ? 0 : 1; +} + +/** + * tioce_alloc_map - Given a coretalk address, map it to pcie bus address + * space using one of the various ATE-based address modes. + * @ce_kern: tioce context + * @type: map mode to use + * @port: 0-based port that the requesting device is downstream of + * @ct_addr: the coretalk address to map + * @len: number of bytes to map + * + * Given the addressing type, set up various parameters that define the + * ATE pool to use. Search for a contiguous block of entries to cover the + * length, and if enough resources exist, fill in the ATEs and construct a + * tioce_dmamap struct to track the mapping. + */ +static u64 +tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, + u64 ct_addr, int len, int dma_flags) +{ + int i; + int j; + int first; + int last; + int entries; + int nates; + u64 pagesize; + int msi_capable, msi_wanted; + u64 *ate_shadow; + u64 __iomem *ate_reg; + u64 addr; + struct tioce __iomem *ce_mmr; + u64 bus_base; + struct tioce_dmamap *map; + + ce_mmr = (struct tioce __iomem *)ce_kern->ce_common->ce_pcibus.bs_base; + + switch (type) { + case TIOCE_ATE_M32: + /* + * The first 64 entries of the ate3240 pool are dedicated to + * super-page (TIOCE_ATE_M40S) mode. + */ + first = 64; + entries = TIOCE_NUM_M3240_ATES - 64; + ate_shadow = ce_kern->ce_ate3240_shadow; + ate_reg = ce_mmr->ce_ure_ate3240; + pagesize = ce_kern->ce_ate3240_pagesize; + bus_base = TIOCE_M32_MIN; + msi_capable = 1; + break; + case TIOCE_ATE_M40: + first = 0; + entries = TIOCE_NUM_M40_ATES; + ate_shadow = ce_kern->ce_ate40_shadow; + ate_reg = ce_mmr->ce_ure_ate40; + pagesize = MB(64); + bus_base = TIOCE_M40_MIN; + msi_capable = 0; + break; + case TIOCE_ATE_M40S: + /* + * ate3240 entries 0-31 are dedicated to port1 super-page + * mappings. ate3240 entries 32-63 are dedicated to port2. + */ + first = port * 32; + entries = 32; + ate_shadow = ce_kern->ce_ate3240_shadow; + ate_reg = ce_mmr->ce_ure_ate3240; + pagesize = GB(16); + bus_base = TIOCE_M40S_MIN; + msi_capable = 0; + break; + default: + return 0; + } + + msi_wanted = dma_flags & SN_DMA_MSI; + if (msi_wanted && !msi_capable) + return 0; + + nates = ATE_NPAGES(ct_addr, len, pagesize); + if (nates > entries) + return 0; + + last = first + entries - nates; + for (i = first; i <= last; i++) { + if (ATE_VALID(ate_shadow[i])) + continue; + + for (j = i; j < i + nates; j++) + if (ATE_VALID(ate_shadow[j])) + break; + + if (j >= i + nates) + break; + } + + if (i > last) + return 0; + + map = kzalloc(sizeof(struct tioce_dmamap), GFP_ATOMIC); + if (!map) + return 0; + + addr = ct_addr; + for (j = 0; j < nates; j++) { + u64 ate; + + ate = ATE_MAKE(addr, pagesize, msi_wanted); + ate_shadow[i + j] = ate; + tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate); + addr += pagesize; + } + + map->refcnt = 1; + map->nbytes = nates * pagesize; + map->ct_start = ct_addr & ~ATE_PAGEMASK(pagesize); + map->pci_start = bus_base + (i * pagesize); + map->ate_hw = &ate_reg[i]; + map->ate_shadow = &ate_shadow[i]; + map->ate_count = nates; + + list_add(&map->ce_dmamap_list, &ce_kern->ce_dmamap_list); + + return (map->pci_start + (ct_addr - map->ct_start)); +} + +/** + * tioce_dma_d32 - create a DMA mapping using 32-bit direct mode + * @pdev: linux pci_dev representing the function + * @paddr: system physical address + * + * Map @paddr into 32-bit bus space of the CE associated with @pcidev_info. + */ +static u64 +tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr, int dma_flags) +{ + int dma_ok; + int port; + struct tioce __iomem *ce_mmr; + struct tioce_kernel *ce_kern; + u64 ct_upper; + u64 ct_lower; + dma_addr_t bus_addr; + + if (dma_flags & SN_DMA_MSI) + return 0; + + ct_upper = ct_addr & ~0x3fffffffUL; + ct_lower = ct_addr & 0x3fffffffUL; + + pcidev_to_tioce(pdev, &ce_mmr, &ce_kern, &port); + + if (ce_kern->ce_port[port].dirmap_refcnt == 0) { + u64 tmp; + + ce_kern->ce_port[port].dirmap_shadow = ct_upper; + tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port], + ct_upper); + tmp = ce_mmr->ce_ure_dir_map[port]; + dma_ok = 1; + } else + dma_ok = (ce_kern->ce_port[port].dirmap_shadow == ct_upper); + + if (dma_ok) { + ce_kern->ce_port[port].dirmap_refcnt++; + bus_addr = TIOCE_D32_MIN + ct_lower; + } else + bus_addr = 0; + + return bus_addr; +} + +/** + * tioce_dma_barrier - swizzle a TIOCE bus address to include or exclude + * the barrier bit. + * @bus_addr: bus address to swizzle + * + * Given a TIOCE bus address, set the appropriate bit to indicate barrier + * attributes. + */ +static u64 +tioce_dma_barrier(u64 bus_addr, int on) +{ + u64 barrier_bit; + + /* barrier not supported in M40/M40S mode */ + if (TIOCE_M40_ADDR(bus_addr) || TIOCE_M40S_ADDR(bus_addr)) + return bus_addr; + + if (TIOCE_D64_ADDR(bus_addr)) + barrier_bit = (1UL << 62); + else /* must be m32 or d32 */ + barrier_bit = (1UL << 30); + + return (on) ? (bus_addr | barrier_bit) : (bus_addr & ~barrier_bit); +} + +/** + * tioce_dma_unmap - release CE mapping resources + * @pdev: linux pci_dev representing the function + * @bus_addr: bus address returned by an earlier tioce_dma_map + * @dir: mapping direction (unused) + * + * Locate mapping resources associated with @bus_addr and release them. + * For mappings created using the direct modes there are no resources + * to release. + */ +void +tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) +{ + int i; + int port; + struct tioce_kernel *ce_kern; + struct tioce __iomem *ce_mmr; + unsigned long flags; + + bus_addr = tioce_dma_barrier(bus_addr, 0); + pcidev_to_tioce(pdev, &ce_mmr, &ce_kern, &port); + + /* nothing to do for D64 */ + + if (TIOCE_D64_ADDR(bus_addr)) + return; + + spin_lock_irqsave(&ce_kern->ce_lock, flags); + + if (TIOCE_D32_ADDR(bus_addr)) { + if (--ce_kern->ce_port[port].dirmap_refcnt == 0) { + ce_kern->ce_port[port].dirmap_shadow = 0; + tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port], + 0); + } + } else { + struct tioce_dmamap *map; + + list_for_each_entry(map, &ce_kern->ce_dmamap_list, + ce_dmamap_list) { + u64 last; + + last = map->pci_start + map->nbytes - 1; + if (bus_addr >= map->pci_start && bus_addr <= last) + break; + } + + if (&map->ce_dmamap_list == &ce_kern->ce_dmamap_list) { + printk(KERN_WARNING + "%s: %s - no map found for bus_addr 0x%llx\n", + __func__, pci_name(pdev), bus_addr); + } else if (--map->refcnt == 0) { + for (i = 0; i < map->ate_count; i++) { + map->ate_shadow[i] = 0; + tioce_mmr_storei(ce_kern, &map->ate_hw[i], 0); + } + + list_del(&map->ce_dmamap_list); + kfree(map); + } + } + + spin_unlock_irqrestore(&ce_kern->ce_lock, flags); +} + +/** + * tioce_do_dma_map - map pages for PCI DMA + * @pdev: linux pci_dev representing the function + * @paddr: host physical address to map + * @byte_count: bytes to map + * + * This is the main wrapper for mapping host physical pages to CE PCI space. + * The mapping mode used is based on the device's dma_mask. + */ +static u64 +tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, + int barrier, int dma_flags) +{ + unsigned long flags; + u64 ct_addr; + u64 mapaddr = 0; + struct tioce_kernel *ce_kern; + struct tioce_dmamap *map; + int port; + u64 dma_mask; + + dma_mask = (barrier) ? pdev->dev.coherent_dma_mask : pdev->dma_mask; + + /* cards must be able to address at least 31 bits */ + if (dma_mask < 0x7fffffffUL) + return 0; + + if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS) + ct_addr = PHYS_TO_TIODMA(paddr); + else + ct_addr = paddr; + + /* + * If the device can generate 64 bit addresses, create a D64 map. + */ + if (dma_mask == ~0UL) { + mapaddr = tioce_dma_d64(ct_addr, dma_flags); + if (mapaddr) + goto dma_map_done; + } + + pcidev_to_tioce(pdev, NULL, &ce_kern, &port); + + spin_lock_irqsave(&ce_kern->ce_lock, flags); + + /* + * D64 didn't work ... See if we have an existing map that covers + * this address range. Must account for devices dma_mask here since + * an existing map might have been done in a mode using more pci + * address bits than this device can support. + */ + list_for_each_entry(map, &ce_kern->ce_dmamap_list, ce_dmamap_list) { + u64 last; + + last = map->ct_start + map->nbytes - 1; + if (ct_addr >= map->ct_start && + ct_addr + byte_count - 1 <= last && + map->pci_start <= dma_mask) { + map->refcnt++; + mapaddr = map->pci_start + (ct_addr - map->ct_start); + break; + } + } + + /* + * If we don't have a map yet, and the card can generate 40 + * bit addresses, try the M40/M40S modes. Note these modes do not + * support a barrier bit, so if we need a consistent map these + * won't work. + */ + if (!mapaddr && !barrier && dma_mask >= 0xffffffffffUL) { + /* + * We have two options for 40-bit mappings: 16GB "super" ATEs + * and 64MB "regular" ATEs. We'll try both if needed for a + * given mapping but which one we try first depends on the + * size. For requests >64MB, prefer to use a super page with + * regular as the fallback. Otherwise, try in the reverse order. + */ + + if (byte_count > MB(64)) { + mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40S, + port, ct_addr, byte_count, + dma_flags); + if (!mapaddr) + mapaddr = + tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1, + ct_addr, byte_count, + dma_flags); + } else { + mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1, + ct_addr, byte_count, + dma_flags); + if (!mapaddr) + mapaddr = + tioce_alloc_map(ce_kern, TIOCE_ATE_M40S, + port, ct_addr, byte_count, + dma_flags); + } + } + + /* + * 32-bit direct is the next mode to try + */ + if (!mapaddr && dma_mask >= 0xffffffffUL) + mapaddr = tioce_dma_d32(pdev, ct_addr, dma_flags); + + /* + * Last resort, try 32-bit ATE-based map. + */ + if (!mapaddr) + mapaddr = + tioce_alloc_map(ce_kern, TIOCE_ATE_M32, -1, ct_addr, + byte_count, dma_flags); + + spin_unlock_irqrestore(&ce_kern->ce_lock, flags); + +dma_map_done: + if (mapaddr && barrier) + mapaddr = tioce_dma_barrier(mapaddr, 1); + + return mapaddr; +} + +/** + * tioce_dma - standard pci dma map interface + * @pdev: pci device requesting the map + * @paddr: system physical address to map into pci space + * @byte_count: # bytes to map + * + * Simply call tioce_do_dma_map() to create a map with the barrier bit clear + * in the address. + */ +static u64 +tioce_dma(struct pci_dev *pdev, unsigned long paddr, size_t byte_count, int dma_flags) +{ + return tioce_do_dma_map(pdev, paddr, byte_count, 0, dma_flags); +} + +/** + * tioce_dma_consistent - consistent pci dma map interface + * @pdev: pci device requesting the map + * @paddr: system physical address to map into pci space + * @byte_count: # bytes to map + * + * Simply call tioce_do_dma_map() to create a map with the barrier bit set + * in the address. + */ +static u64 +tioce_dma_consistent(struct pci_dev *pdev, unsigned long paddr, size_t byte_count, int dma_flags) +{ + return tioce_do_dma_map(pdev, paddr, byte_count, 1, dma_flags); +} + +/** + * tioce_error_intr_handler - SGI TIO CE error interrupt handler + * @irq: unused + * @arg: pointer to tioce_common struct for the given CE + * + * Handle a CE error interrupt. Simply a wrapper around a SAL call which + * defers processing to the SGI prom. + */ +static irqreturn_t +tioce_error_intr_handler(int irq, void *arg) +{ + struct tioce_common *soft = arg; + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_ERROR_INTERRUPT, + soft->ce_pcibus.bs_persist_segment, + soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0); + + if (ret_stuff.v0) + panic("tioce_error_intr_handler: Fatal TIOCE error"); + + return IRQ_HANDLED; +} + +/** + * tioce_reserve_m32 - reserve M32 ATEs for the indicated address range + * @tioce_kernel: TIOCE context to reserve ATEs for + * @base: starting bus address to reserve + * @limit: last bus address to reserve + * + * If base/limit falls within the range of bus space mapped through the + * M32 space, reserve the resources corresponding to the range. + */ +static void +tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit) +{ + int ate_index, last_ate, ps; + struct tioce __iomem *ce_mmr; + + ce_mmr = (struct tioce __iomem *)ce_kern->ce_common->ce_pcibus.bs_base; + ps = ce_kern->ce_ate3240_pagesize; + ate_index = ATE_PAGE(base, ps); + last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1; + + if (ate_index < 64) + ate_index = 64; + + if (last_ate >= TIOCE_NUM_M3240_ATES) + last_ate = TIOCE_NUM_M3240_ATES - 1; + + while (ate_index <= last_ate) { + u64 ate; + + ate = ATE_MAKE(0xdeadbeef, ps, 0); + ce_kern->ce_ate3240_shadow[ate_index] = ate; + tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index], + ate); + ate_index++; + } +} + +/** + * tioce_kern_init - init kernel structures related to a given TIOCE + * @tioce_common: ptr to a cached tioce_common struct that originated in prom + */ +static struct tioce_kernel * +tioce_kern_init(struct tioce_common *tioce_common) +{ + int i; + int ps; + int dev; + u32 tmp; + unsigned int seg, bus; + struct tioce __iomem *tioce_mmr; + struct tioce_kernel *tioce_kern; + + tioce_kern = kzalloc(sizeof(struct tioce_kernel), GFP_KERNEL); + if (!tioce_kern) { + return NULL; + } + + tioce_kern->ce_common = tioce_common; + spin_lock_init(&tioce_kern->ce_lock); + INIT_LIST_HEAD(&tioce_kern->ce_dmamap_list); + tioce_common->ce_kernel_private = (u64) tioce_kern; + + /* + * Determine the secondary bus number of the port2 logical PPB. + * This is used to decide whether a given pci device resides on + * port1 or port2. Note: We don't have enough plumbing set up + * here to use pci_read_config_xxx() so use raw_pci_read(). + */ + + seg = tioce_common->ce_pcibus.bs_persist_segment; + bus = tioce_common->ce_pcibus.bs_persist_busnum; + + raw_pci_read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp); + tioce_kern->ce_port1_secondary = (u8) tmp; + + /* + * Set PMU pagesize to the largest size available, and zero out + * the ATEs. + */ + + tioce_mmr = (struct tioce __iomem *)tioce_common->ce_pcibus.bs_base; + tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map, + CE_URE_PAGESIZE_MASK); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map, + CE_URE_256K_PAGESIZE); + ps = tioce_kern->ce_ate3240_pagesize = KB(256); + + for (i = 0; i < TIOCE_NUM_M40_ATES; i++) { + tioce_kern->ce_ate40_shadow[i] = 0; + tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate40[i], 0); + } + + for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) { + tioce_kern->ce_ate3240_shadow[i] = 0; + tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate3240[i], 0); + } + + /* + * Reserve ATEs corresponding to reserved address ranges. These + * include: + * + * Memory space covered by each PPB mem base/limit register + * Memory space covered by each PPB prefetch base/limit register + * + * These bus ranges are for pio (downstream) traffic only, and so + * cannot be used for DMA. + */ + + for (dev = 1; dev <= 2; dev++) { + u64 base, limit; + + /* mem base/limit */ + + raw_pci_read(seg, bus, PCI_DEVFN(dev, 0), + PCI_MEMORY_BASE, 2, &tmp); + base = (u64)tmp << 16; + + raw_pci_read(seg, bus, PCI_DEVFN(dev, 0), + PCI_MEMORY_LIMIT, 2, &tmp); + limit = (u64)tmp << 16; + limit |= 0xfffffUL; + + if (base < limit) + tioce_reserve_m32(tioce_kern, base, limit); + + /* + * prefetch mem base/limit. The tioce ppb's have 64-bit + * decoders, so read the upper portions w/o checking the + * attributes. + */ + + raw_pci_read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_MEMORY_BASE, 2, &tmp); + base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16; + + raw_pci_read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_BASE_UPPER32, 4, &tmp); + base |= (u64)tmp << 32; + + raw_pci_read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_MEMORY_LIMIT, 2, &tmp); + + limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16; + limit |= 0xfffffUL; + + raw_pci_read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_LIMIT_UPPER32, 4, &tmp); + limit |= (u64)tmp << 32; + + if ((base < limit) && TIOCE_M32_ADDR(base)) + tioce_reserve_m32(tioce_kern, base, limit); + } + + return tioce_kern; +} + +/** + * tioce_force_interrupt - implement altix force_interrupt() backend for CE + * @sn_irq_info: sn asic irq that we need an interrupt generated for + * + * Given an sn_irq_info struct, set the proper bit in ce_adm_force_int to + * force a secondary interrupt to be generated. This is to work around an + * asic issue where there is a small window of opportunity for a legacy device + * interrupt to be lost. + */ +static void +tioce_force_interrupt(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct tioce_common *ce_common; + struct tioce_kernel *ce_kern; + struct tioce __iomem *ce_mmr; + u64 force_int_val; + + if (!sn_irq_info->irq_bridge) + return; + + if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_TIOCE) + return; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (!pcidev_info) + return; + + ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; + ce_mmr = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base; + ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; + + /* + * TIOCE Rev A workaround (PV 945826), force an interrupt by writing + * the TIO_INTx register directly (1/26/2006) + */ + if (ce_common->ce_rev == TIOCE_REV_A) { + u64 int_bit_mask = (1ULL << sn_irq_info->irq_int_bit); + u64 status; + + tioce_mmr_load(ce_kern, &ce_mmr->ce_adm_int_status, &status); + if (status & int_bit_mask) { + u64 force_irq = (1 << 8) | sn_irq_info->irq_irq; + u64 ctalk = sn_irq_info->irq_xtalkaddr; + u64 nasid, offset; + + nasid = (ctalk & CTALK_NASID_MASK) >> CTALK_NASID_SHFT; + offset = (ctalk & CTALK_NODE_OFFSET); + HUB_S(TIO_IOSPACE_ADDR(nasid, offset), force_irq); + } + + return; + } + + /* + * irq_int_bit is originally set up by prom, and holds the interrupt + * bit shift (not mask) as defined by the bit definitions in the + * ce_adm_int mmr. These shifts are not the same for the + * ce_adm_force_int register, so do an explicit mapping here to make + * things clearer. + */ + + switch (sn_irq_info->irq_int_bit) { + case CE_ADM_INT_PCIE_PORT1_DEV_A_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_A_SHFT; + break; + case CE_ADM_INT_PCIE_PORT1_DEV_B_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_B_SHFT; + break; + case CE_ADM_INT_PCIE_PORT1_DEV_C_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_C_SHFT; + break; + case CE_ADM_INT_PCIE_PORT1_DEV_D_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_D_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_A_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_A_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_B_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_B_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_C_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_C_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_D_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_D_SHFT; + break; + default: + return; + } + tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_force_int, force_int_val); +} + +/** + * tioce_target_interrupt - implement set_irq_affinity for tioce resident + * functions. Note: only applies to line interrupts, not MSI's. + * + * @sn_irq_info: SN IRQ context + * + * Given an sn_irq_info, set the associated CE device's interrupt destination + * register. Since the interrupt destination registers are on a per-ce-slot + * basis, this will retarget line interrupts for all functions downstream of + * the slot. + */ +static void +tioce_target_interrupt(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct tioce_common *ce_common; + struct tioce_kernel *ce_kern; + struct tioce __iomem *ce_mmr; + int bit; + u64 vector; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (!pcidev_info) + return; + + ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; + ce_mmr = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base; + ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; + + bit = sn_irq_info->irq_int_bit; + + tioce_mmr_seti(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit)); + vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT; + vector |= sn_irq_info->irq_xtalkaddr; + tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_int_dest[bit], vector); + tioce_mmr_clri(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit)); + + tioce_force_interrupt(sn_irq_info); +} + +/** + * tioce_bus_fixup - perform final PCI fixup for a TIO CE bus + * @prom_bussoft: Common prom/kernel struct representing the bus + * + * Replicates the tioce_common pointed to by @prom_bussoft in kernel + * space. Allocates and initializes a kernel-only area for a given CE, + * and sets up an irq for handling CE error interrupts. + * + * On successful setup, returns the kernel version of tioce_common back to + * the caller. + */ +static void * +tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) +{ + struct tioce_common *tioce_common; + struct tioce_kernel *tioce_kern; + struct tioce __iomem *tioce_mmr; + + /* + * Allocate kernel bus soft and copy from prom. + */ + + tioce_common = kzalloc(sizeof(struct tioce_common), GFP_KERNEL); + if (!tioce_common) + return NULL; + + memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common)); + tioce_common->ce_pcibus.bs_base = (unsigned long) + ioremap(REGION_OFFSET(tioce_common->ce_pcibus.bs_base), + sizeof(struct tioce_common)); + + tioce_kern = tioce_kern_init(tioce_common); + if (tioce_kern == NULL) { + kfree(tioce_common); + return NULL; + } + + /* + * Clear out any transient errors before registering the error + * interrupt handler. + */ + + tioce_mmr = (struct tioce __iomem *)tioce_common->ce_pcibus.bs_base; + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias, + ~0ULL); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, 0ULL); + + if (request_irq(SGI_PCIASIC_ERROR, + tioce_error_intr_handler, + IRQF_SHARED, "TIOCE error", (void *)tioce_common)) + printk(KERN_WARNING + "%s: Unable to get irq %d. " + "Error interrupts won't be routed for " + "TIOCE bus %04x:%02x\n", + __func__, SGI_PCIASIC_ERROR, + tioce_common->ce_pcibus.bs_persist_segment, + tioce_common->ce_pcibus.bs_persist_busnum); + + irq_set_handler(SGI_PCIASIC_ERROR, handle_level_irq); + sn_set_err_irq_affinity(SGI_PCIASIC_ERROR); + return tioce_common; +} + +static struct sn_pcibus_provider tioce_pci_interfaces = { + .dma_map = tioce_dma, + .dma_map_consistent = tioce_dma_consistent, + .dma_unmap = tioce_dma_unmap, + .bus_fixup = tioce_bus_fixup, + .force_interrupt = tioce_force_interrupt, + .target_interrupt = tioce_target_interrupt +}; + +/** + * tioce_init_provider - init SN PCI provider ops for TIO CE + */ +int +tioce_init_provider(void) +{ + sn_pci_provider[PCIIO_ASIC_TYPE_TIOCE] = &tioce_pci_interfaces; + return 0; +} |