35 files changed, 9201 insertions, 0 deletions
diff --git a/arch/arc/kernel/.gitignore b/arch/arc/kernel/.gitignore
new file mode 100644
index 000000000..c5f676c3c
--- /dev/null
+++ b/arch/arc/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
new file mode 100644
index 000000000..2dc5f4296
--- /dev/null
+++ b/arch/arc/kernel/Makefile
@@ -0,0 +1,36 @@
+#
+# Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+# Pass UTS_MACHINE for user_regset definition
+CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
+obj-y	:= arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o
+obj-y	+= signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o
+obj-$(CONFIG_ISA_ARCOMPACT)		+= entry-compact.o intc-compact.o
+obj-$(CONFIG_ISA_ARCV2)			+= entry-arcv2.o intc-arcv2.o
+
+obj-$(CONFIG_MODULES)			+= arcksyms.o module.o
+obj-$(CONFIG_SMP) 			+= smp.o
+obj-$(CONFIG_ARC_MCIP)			+= mcip.o
+obj-$(CONFIG_ARC_DW2_UNWIND)		+= unwind.o
+obj-$(CONFIG_KPROBES)      		+= kprobes.o
+obj-$(CONFIG_ARC_EMUL_UNALIGNED) 	+= unaligned.o
+obj-$(CONFIG_KGDB)			+= kgdb.o
+obj-$(CONFIG_ARC_METAWARE_HLINK)	+= arc_hostlink.o
+obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
+
+obj-$(CONFIG_ARC_FPU_SAVE_RESTORE)	+= fpu.o
+CFLAGS_fpu.o   += -mdpfp
+
+ifdef CONFIG_ARC_DW2_UNWIND
+CFLAGS_ctx_sw.o += -fno-omit-frame-pointer
+obj-y += ctx_sw.o
+else
+obj-y += ctx_sw_asm.o
+endif
+
+extra-y := vmlinux.lds head.o
diff --git a/arch/arc/kernel/arc_hostlink.c b/arch/arc/kernel/arc_hostlink.c
new file mode 100644
index 000000000..47b2a17cc
--- /dev/null
+++ b/arch/arc/kernel/arc_hostlink.c
@@ -0,0 +1,58 @@
+/*
+ * arc_hostlink.c: Pseudo-driver for Metaware provided "hostlink" facility
+ *
+ * Allows Linux userland access to host in absence of any peripherals.
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/fs.h>		/* file_operations */
+#include <linux/miscdevice.h>
+#include <linux/mm.h>		/* VM_IO */
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+static unsigned char __HOSTLINK__[4 * PAGE_SIZE] __aligned(PAGE_SIZE);
+
+static int arc_hl_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start,
+			       vma->vm_page_prot)) {
+		pr_warn("Hostlink buffer mmap ERROR\n");
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static long arc_hl_ioctl(struct file *file, unsigned int cmd,
+			unsigned long arg)
+{
+	/* we only support, returning the physical addr to mmap in user space */
+	put_user((unsigned int)__HOSTLINK__, (int __user *)arg);
+	return 0;
+}
+
+static const struct file_operations arc_hl_fops = {
+	.unlocked_ioctl	= arc_hl_ioctl,
+	.mmap		= arc_hl_mmap,
+};
+
+static struct miscdevice arc_hl_dev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "hostlink",
+	.fops	= &arc_hl_fops
+};
+
+static int __init arc_hl_init(void)
+{
+	pr_info("ARC Hostlink driver mmap at 0x%p\n", __HOSTLINK__);
+	return misc_register(&arc_hl_dev);
+}
+module_init(arc_hl_init);
diff --git a/arch/arc/kernel/arcksyms.c b/arch/arc/kernel/arcksyms.c
new file mode 100644
index 000000000..000dd041a
--- /dev/null
+++ b/arch/arc/kernel/arcksyms.c
@@ -0,0 +1,58 @@
+/*
+ * arcksyms.c - Exporting symbols not exportable from their own sources
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+
+/* libgcc functions, not part of kernel sources */
+extern void __ashldi3(void);
+extern void __ashrdi3(void);
+extern void __divsi3(void);
+extern void __divsf3(void);
+extern void __lshrdi3(void);
+extern void __modsi3(void);
+extern void __muldi3(void);
+extern void __ucmpdi2(void);
+extern void __udivsi3(void);
+extern void __umodsi3(void);
+extern void __cmpdi2(void);
+extern void __fixunsdfsi(void);
+extern void __muldf3(void);
+extern void __divdf3(void);
+extern void __floatunsidf(void);
+extern void __floatunsisf(void);
+extern void __udivdi3(void);
+
+EXPORT_SYMBOL(__ashldi3);
+EXPORT_SYMBOL(__ashrdi3);
+EXPORT_SYMBOL(__divsi3);
+EXPORT_SYMBOL(__divsf3);
+EXPORT_SYMBOL(__lshrdi3);
+EXPORT_SYMBOL(__modsi3);
+EXPORT_SYMBOL(__muldi3);
+EXPORT_SYMBOL(__ucmpdi2);
+EXPORT_SYMBOL(__udivsi3);
+EXPORT_SYMBOL(__umodsi3);
+EXPORT_SYMBOL(__cmpdi2);
+EXPORT_SYMBOL(__fixunsdfsi);
+EXPORT_SYMBOL(__muldf3);
+EXPORT_SYMBOL(__divdf3);
+EXPORT_SYMBOL(__floatunsidf);
+EXPORT_SYMBOL(__floatunsisf);
+EXPORT_SYMBOL(__udivdi3);
+
+/* ARC optimised assembler routines */
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memcmp);
+EXPORT_SYMBOL(strchr);
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strlen);
diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
new file mode 100644
index 000000000..ecaf34e92
--- /dev/null
+++ b/arch/arc/kernel/asm-offsets.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/thread_info.h>
+#include <linux/kbuild.h>
+#include <linux/ptrace.h>
+#include <asm/hardirq.h>
+#include <asm/page.h>
+
+int main(void)
+{
+	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
+	DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, stack));
+
+	BLANK();
+
+	DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
+	DEFINE(THREAD_CALLEE_REG, offsetof(struct thread_struct, callee_reg));
+	DEFINE(THREAD_FAULT_ADDR,
+	       offsetof(struct thread_struct, fault_address));
+
+	BLANK();
+
+	DEFINE(THREAD_INFO_FLAGS, offsetof(struct thread_info, flags));
+	DEFINE(THREAD_INFO_PREEMPT_COUNT,
+	       offsetof(struct thread_info, preempt_count));
+
+	BLANK();
+
+	DEFINE(TASK_ACT_MM, offsetof(struct task_struct, active_mm));
+	DEFINE(TASK_TGID, offsetof(struct task_struct, tgid));
+	DEFINE(TASK_PID, offsetof(struct task_struct, pid));
+	DEFINE(TASK_COMM, offsetof(struct task_struct, comm));
+
+	DEFINE(MM_CTXT, offsetof(struct mm_struct, context));
+	DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
+
+	DEFINE(MM_CTXT_ASID, offsetof(mm_context_t, asid));
+
+	BLANK();
+
+	DEFINE(PT_status32, offsetof(struct pt_regs, status32));
+	DEFINE(PT_event, offsetof(struct pt_regs, event));
+	DEFINE(PT_sp, offsetof(struct pt_regs, sp));
+	DEFINE(PT_r0, offsetof(struct pt_regs, r0));
+	DEFINE(PT_r1, offsetof(struct pt_regs, r1));
+	DEFINE(PT_r2, offsetof(struct pt_regs, r2));
+	DEFINE(PT_r3, offsetof(struct pt_regs, r3));
+	DEFINE(PT_r4, offsetof(struct pt_regs, r4));
+	DEFINE(PT_r5, offsetof(struct pt_regs, r5));
+	DEFINE(PT_r6, offsetof(struct pt_regs, r6));
+	DEFINE(PT_r7, offsetof(struct pt_regs, r7));
+	DEFINE(PT_ret, offsetof(struct pt_regs, ret));
+
+	DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
+	DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
+	DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
+
+	return 0;
+}
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
new file mode 100644
index 000000000..9e1ae9d41
--- /dev/null
+++ b/arch/arc/kernel/ctx_sw.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Vineetg: Aug 2009
+ *  -"C" version of lowest level context switch asm macro called by schedular
+ *   gcc doesn't generate the dward CFI info for hand written asm, hence can't
+ *   backtrace out of it (e.g. tasks sleeping in kernel).
+ *   So we cheat a bit by writing almost similar code in inline-asm.
+ *  -This is a hacky way of doing things, but there is no other simple way.
+ *   I don't want/intend to extend unwinding code to understand raw asm
+ */
+
+#include <asm/asm-offsets.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#ifdef CONFIG_ARC_PLAT_EZNPS
+#include <plat/ctop.h>
+#endif
+
+#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
+
+struct task_struct *__sched
+__switch_to(struct task_struct *prev_task, struct task_struct *next_task)
+{
+	unsigned int tmp;
+	unsigned int prev = (unsigned int)prev_task;
+	unsigned int next = (unsigned int)next_task;
+
+	__asm__ __volatile__(
+		/* FP/BLINK save generated by gcc (standard function prologue */
+		"st.a    r13, [sp, -4]   \n\t"
+		"st.a    r14, [sp, -4]   \n\t"
+		"st.a    r15, [sp, -4]   \n\t"
+		"st.a    r16, [sp, -4]   \n\t"
+		"st.a    r17, [sp, -4]   \n\t"
+		"st.a    r18, [sp, -4]   \n\t"
+		"st.a    r19, [sp, -4]   \n\t"
+		"st.a    r20, [sp, -4]   \n\t"
+		"st.a    r21, [sp, -4]   \n\t"
+		"st.a    r22, [sp, -4]   \n\t"
+		"st.a    r23, [sp, -4]   \n\t"
+		"st.a    r24, [sp, -4]   \n\t"
+#ifndef CONFIG_ARC_CURR_IN_REG
+		"st.a    r25, [sp, -4]   \n\t"
+#else
+		"sub     sp, sp, 4      \n\t"	/* usual r25 placeholder */
+#endif
+
+		/* set ksp of outgoing task in tsk->thread.ksp */
+#if KSP_WORD_OFF <= 255
+		"st.as   sp, [%3, %1]    \n\t"
+#else
+		/*
+		 * Workaround for NR_CPUS=4k
+		 * %1 is bigger than 255 (S9 offset for st.as)
+		 */
+		"add2    r24, %3, %1     \n\t"
+		"st      sp, [r24]       \n\t"
+#endif
+
+		/*
+		 * setup _current_task with incoming tsk.
+		 * optionally, set r25 to that as well
+		 * For SMP extra work to get to &_current_task[cpu]
+		 * (open coded SET_CURR_TASK_ON_CPU)
+		 */
+#ifndef CONFIG_SMP
+		"st  %2, [@_current_task]	\n\t"
+#else
+#ifdef CONFIG_ARC_PLAT_EZNPS
+		"lr   r24, [%4]		\n\t"
+#ifndef CONFIG_EZNPS_MTM_EXT
+		"lsr  r24, r24, 4		\n\t"
+#endif
+#else
+		"lr   r24, [identity]		\n\t"
+		"lsr  r24, r24, 8		\n\t"
+		"bmsk r24, r24, 7		\n\t"
+#endif
+		"add2 r24, @_current_task, r24	\n\t"
+		"st   %2,  [r24]		\n\t"
+#endif
+#ifdef CONFIG_ARC_CURR_IN_REG
+		"mov r25, %2   \n\t"
+#endif
+
+		/* get ksp of incoming task from tsk->thread.ksp */
+		"ld.as  sp, [%2, %1]   \n\t"
+
+		/* start loading it's CALLEE reg file */
+
+#ifndef CONFIG_ARC_CURR_IN_REG
+		"ld.ab   r25, [sp, 4]   \n\t"
+#else
+		"add    sp, sp, 4       \n\t"
+#endif
+		"ld.ab   r24, [sp, 4]   \n\t"
+		"ld.ab   r23, [sp, 4]   \n\t"
+		"ld.ab   r22, [sp, 4]   \n\t"
+		"ld.ab   r21, [sp, 4]   \n\t"
+		"ld.ab   r20, [sp, 4]   \n\t"
+		"ld.ab   r19, [sp, 4]   \n\t"
+		"ld.ab   r18, [sp, 4]   \n\t"
+		"ld.ab   r17, [sp, 4]   \n\t"
+		"ld.ab   r16, [sp, 4]   \n\t"
+		"ld.ab   r15, [sp, 4]   \n\t"
+		"ld.ab   r14, [sp, 4]   \n\t"
+		"ld.ab   r13, [sp, 4]   \n\t"
+
+		/* last (ret value) = prev : although for ARC it mov r0, r0 */
+		"mov     %0, %3        \n\t"
+
+		/* FP/BLINK restore generated by gcc (standard func epilogue */
+
+		: "=r"(tmp)
+		: "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
+#ifdef CONFIG_ARC_PLAT_EZNPS
+		, "i"(CTOP_AUX_LOGIC_GLOBAL_ID)
+#endif
+		: "blink"
+	);
+
+	return (struct task_struct *)tmp;
+}
diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S
new file mode 100644
index 000000000..7c1f365ef
--- /dev/null
+++ b/arch/arc/kernel/ctx_sw_asm.S
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Vineetg: Aug 2009
+ *  -Moved core context switch macro out of entry.S into this file.
+ *  -This is the more "natural" hand written assembler
+ */
+
+#include <linux/linkage.h>
+#include <asm/entry.h>       /* For the SAVE_* macros */
+#include <asm/asm-offsets.h>
+
+#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
+
+;################### Low Level Context Switch ##########################
+
+	.section .sched.text,"ax",@progbits
+	.align 4
+	.global __switch_to
+	.type   __switch_to, @function
+__switch_to:
+	CFI_STARTPROC
+
+	/* Save regs on kernel mode stack of task */
+	st.a    blink, [sp, -4]
+	st.a    fp, [sp, -4]
+	SAVE_CALLEE_SAVED_KERNEL
+
+	/* Save the now KSP in task->thread.ksp */
+#if KSP_WORD_OFF  <= 255
+	st.as  sp, [r0, KSP_WORD_OFF]
+#else
+	/* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */
+	add2	r24, r0, KSP_WORD_OFF
+	st	sp, [r24]
+#endif
+	/*
+	* Return last task in r0 (return reg)
+	* On ARC, Return reg = First Arg reg = r0.
+	* Since we already have last task in r0,
+	* don't need to do anything special to return it
+	*/
+
+	/*
+	 * switch to new task, contained in r1
+	 * Temp reg r3 is required to get the ptr to store val
+	 */
+	SET_CURR_TASK_ON_CPU  r1, r3
+
+	/* reload SP with kernel mode stack pointer in task->thread.ksp */
+	ld.as  sp, [r1, (TASK_THREAD + THREAD_KSP)/4]
+
+	/* restore the registers */
+	RESTORE_CALLEE_SAVED_KERNEL
+	ld.ab   fp, [sp, 4]
+	ld.ab   blink, [sp, 4]
+	j       [blink]
+
+END_CFI(__switch_to)
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
new file mode 100644
index 000000000..521ef3521
--- /dev/null
+++ b/arch/arc/kernel/devtree.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Based on reduced version of METAG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+#include <linux/init.h>
+#include <linux/reboot.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <asm/mach_desc.h>
+
+#ifdef CONFIG_SERIAL_EARLYCON
+
+static unsigned int __initdata arc_base_baud;
+
+unsigned int __init arc_early_base_baud(void)
+{
+	return arc_base_baud/16;
+}
+
+static void __init arc_set_early_base_baud(unsigned long dt_root)
+{
+	if (of_flat_dt_is_compatible(dt_root, "abilis,arc-tb10x"))
+		arc_base_baud = 166666666;	/* Fixed 166.6MHz clk (TB10x) */
+	else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp") ||
+		 of_flat_dt_is_compatible(dt_root, "snps,hsdk"))
+		arc_base_baud = 33333333;	/* Fixed 33MHz clk (AXS10x & HSDK) */
+	else if (of_flat_dt_is_compatible(dt_root, "ezchip,arc-nps"))
+		arc_base_baud = 800000000;      /* Fixed 800MHz clk (NPS) */
+	else
+		arc_base_baud = 50000000;	/* Fixed default 50MHz */
+}
+#else
+#define arc_set_early_base_baud(dt_root)
+#endif
+
+static const void * __init arch_get_next_mach(const char *const **match)
+{
+	static const struct machine_desc *mdesc = __arch_info_begin;
+	const struct machine_desc *m = mdesc;
+
+	if (m >= __arch_info_end)
+		return NULL;
+
+	mdesc++;
+	*match = m->dt_compat;
+	return m;
+}
+
+/**
+ * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
+ * @dt:		virtual address pointer to dt blob
+ *
+ * If a dtb was passed to the kernel, then use it to choose the correct
+ * machine_desc and to setup the system.
+ */
+const struct machine_desc * __init setup_machine_fdt(void *dt)
+{
+	const struct machine_desc *mdesc;
+	unsigned long dt_root;
+
+	if (!early_init_dt_scan(dt))
+		return NULL;
+
+	mdesc = of_flat_dt_match_machine(NULL, arch_get_next_mach);
+	if (!mdesc)
+		machine_halt();
+
+	dt_root = of_get_flat_dt_root();
+	arc_set_early_base_baud(dt_root);
+
+	return mdesc;
+}
diff --git a/arch/arc/kernel/disasm.c b/arch/arc/kernel/disasm.c
new file mode 100644
index 000000000..3b7cd4864
--- /dev/null
+++ b/arch/arc/kernel/disasm.c
@@ -0,0 +1,538 @@
+/*
+ * several functions that help interpret ARC instructions
+ * used for unaligned accesses, kprobes and kgdb
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/kprobes.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <asm/disasm.h>
+
+#if defined(CONFIG_KGDB) || defined(CONFIG_ARC_EMUL_UNALIGNED) || \
+	defined(CONFIG_KPROBES)
+
+/* disasm_instr: Analyses instruction at addr, stores
+ * findings in *state
+ */
+void __kprobes disasm_instr(unsigned long addr, struct disasm_state *state,
+	int userspace, struct pt_regs *regs, struct callee_regs *cregs)
+{
+	int fieldA = 0;
+	int fieldC = 0, fieldCisReg = 0;
+	uint16_t word1 = 0, word0 = 0;
+	int subopcode, is_linked, op_format;
+	uint16_t *ins_ptr;
+	uint16_t ins_buf[4];
+	int bytes_not_copied = 0;
+
+	memset(state, 0, sizeof(struct disasm_state));
+
+	/* This fetches the upper part of the 32 bit instruction
+	 * in both the cases of Little Endian or Big Endian configurations. */
+	if (userspace) {
+		bytes_not_copied = copy_from_user(ins_buf,
+						(const void __user *) addr, 8);
+		if (bytes_not_copied > 6)
+			goto fault;
+		ins_ptr = ins_buf;
+	} else {
+		ins_ptr = (uint16_t *) addr;
+	}
+
+	word1 = *((uint16_t *)addr);
+
+	state->major_opcode = (word1 >> 11) & 0x1F;
+
+	/* Check if the instruction is 32 bit or 16 bit instruction */
+	if (state->major_opcode < 0x0B) {
+		if (bytes_not_copied > 4)
+			goto fault;
+		state->instr_len = 4;
+		word0 = *((uint16_t *)(addr+2));
+		state->words[0] = (word1 << 16) | word0;
+	} else {
+		state->instr_len = 2;
+		state->words[0] = word1;
+	}
+
+	/* Read the second word in case of limm */
+	word1 = *((uint16_t *)(addr + state->instr_len));
+	word0 = *((uint16_t *)(addr + state->instr_len + 2));
+	state->words[1] = (word1 << 16) | word0;
+
+	switch (state->major_opcode) {
+	case op_Bcc:
+		state->is_branch = 1;
+
+		/* unconditional branch s25, conditional branch s21 */
+		fieldA = (IS_BIT(state->words[0], 16)) ?
+			FIELD_s25(state->words[0]) :
+			FIELD_s21(state->words[0]);
+
+		state->delay_slot = IS_BIT(state->words[0], 5);
+		state->target = fieldA + (addr & ~0x3);
+		state->flow = direct_jump;
+		break;
+
+	case op_BLcc:
+		if (IS_BIT(state->words[0], 16)) {
+			/* Branch and Link*/
+			/* unconditional branch s25, conditional branch s21 */
+			fieldA = (IS_BIT(state->words[0], 17)) ?
+				(FIELD_s25(state->words[0]) & ~0x3) :
+				FIELD_s21(state->words[0]);
+
+			state->flow = direct_call;
+		} else {
+			/*Branch On Compare */
+			fieldA = FIELD_s9(state->words[0]) & ~0x3;
+			state->flow = direct_jump;
+		}
+
+		state->delay_slot = IS_BIT(state->words[0], 5);
+		state->target = fieldA + (addr & ~0x3);
+		state->is_branch = 1;
+		break;
+
+	case op_LD:  /* LD<zz> a,[b,s9] */
+		state->write = 0;
+		state->di = BITS(state->words[0], 11, 11);
+		if (state->di)
+			break;
+		state->x = BITS(state->words[0], 6, 6);
+		state->zz = BITS(state->words[0], 7, 8);
+		state->aa = BITS(state->words[0], 9, 10);
+		state->wb_reg = FIELD_B(state->words[0]);
+		if (state->wb_reg == REG_LIMM) {
+			state->instr_len += 4;
+			state->aa = 0;
+			state->src1 = state->words[1];
+		} else {
+			state->src1 = get_reg(state->wb_reg, regs, cregs);
+		}
+		state->src2 = FIELD_s9(state->words[0]);
+		state->dest = FIELD_A(state->words[0]);
+		state->pref = (state->dest == REG_LIMM);
+		break;
+
+	case op_ST:
+		state->write = 1;
+		state->di = BITS(state->words[0], 5, 5);
+		if (state->di)
+			break;
+		state->aa = BITS(state->words[0], 3, 4);
+		state->zz = BITS(state->words[0], 1, 2);
+		state->src1 = FIELD_C(state->words[0]);
+		if (state->src1 == REG_LIMM) {
+			state->instr_len += 4;
+			state->src1 = state->words[1];
+		} else {
+			state->src1 = get_reg(state->src1, regs, cregs);
+		}
+		state->wb_reg = FIELD_B(state->words[0]);
+		if (state->wb_reg == REG_LIMM) {
+			state->aa = 0;
+			state->instr_len += 4;
+			state->src2 = state->words[1];
+		} else {
+			state->src2 = get_reg(state->wb_reg, regs, cregs);
+		}
+		state->src3 = FIELD_s9(state->words[0]);
+		break;
+
+	case op_MAJOR_4:
+		subopcode = MINOR_OPCODE(state->words[0]);
+		switch (subopcode) {
+		case 32:	/* Jcc */
+		case 33:	/* Jcc.D */
+		case 34:	/* JLcc */
+		case 35:	/* JLcc.D */
+			is_linked = 0;
+
+			if (subopcode == 33 || subopcode == 35)
+				state->delay_slot = 1;
+
+			if (subopcode == 34 || subopcode == 35)
+				is_linked = 1;
+
+			fieldCisReg = 0;
+			op_format = BITS(state->words[0], 22, 23);
+			if (op_format == 0 || ((op_format == 3) &&
+				(!IS_BIT(state->words[0], 5)))) {
+				fieldC = FIELD_C(state->words[0]);
+
+				if (fieldC == REG_LIMM) {
+					fieldC = state->words[1];
+					state->instr_len += 4;
+				} else {
+					fieldCisReg = 1;
+				}
+			} else if (op_format == 1 || ((op_format == 3)
+				&& (IS_BIT(state->words[0], 5)))) {
+				fieldC = FIELD_C(state->words[0]);
+			} else  {
+				/* op_format == 2 */
+				fieldC = FIELD_s12(state->words[0]);
+			}
+
+			if (!fieldCisReg) {
+				state->target = fieldC;
+				state->flow = is_linked ?
+					direct_call : direct_jump;
+			} else {
+				state->target = get_reg(fieldC, regs, cregs);
+				state->flow = is_linked ?
+					indirect_call : indirect_jump;
+			}
+			state->is_branch = 1;
+			break;
+
+		case 40:	/* LPcc */
+			if (BITS(state->words[0], 22, 23) == 3) {
+				/* Conditional LPcc u7 */
+				fieldC = FIELD_C(state->words[0]);
+
+				fieldC = fieldC << 1;
+				fieldC += (addr & ~0x03);
+				state->is_branch = 1;
+				state->flow = direct_jump;
+				state->target = fieldC;
+			}
+			/* For Unconditional lp, next pc is the fall through
+			 * which is updated */
+			break;
+
+		case 48 ... 55:	/* LD a,[b,c] */
+			state->di = BITS(state->words[0], 15, 15);
+			if (state->di)
+				break;
+			state->x = BITS(state->words[0], 16, 16);
+			state->zz = BITS(state->words[0], 17, 18);
+			state->aa = BITS(state->words[0], 22, 23);
+			state->wb_reg = FIELD_B(state->words[0]);
+			if (state->wb_reg == REG_LIMM) {
+				state->instr_len += 4;
+				state->src1 = state->words[1];
+			} else {
+				state->src1 = get_reg(state->wb_reg, regs,
+						cregs);
+			}
+			state->src2 = FIELD_C(state->words[0]);
+			if (state->src2 == REG_LIMM) {
+				state->instr_len += 4;
+				state->src2 = state->words[1];
+			} else {
+				state->src2 = get_reg(state->src2, regs,
+					cregs);
+			}
+			state->dest = FIELD_A(state->words[0]);
+			if (state->dest == REG_LIMM)
+				state->pref = 1;
+			break;
+
+		case 10:	/* MOV */
+			/* still need to check for limm to extract instr len */
+			/* MOV is special case because it only takes 2 args */
+			switch (BITS(state->words[0], 22, 23)) {
+			case 0: /* OP a,b,c */
+				if (FIELD_C(state->words[0]) == REG_LIMM)
+					state->instr_len += 4;
+				break;
+			case 1: /* OP a,b,u6 */
+				break;
+			case 2: /* OP b,b,s12 */
+				break;
+			case 3: /* OP.cc b,b,c/u6 */
+				if ((!IS_BIT(state->words[0], 5)) &&
+				    (FIELD_C(state->words[0]) == REG_LIMM))
+					state->instr_len += 4;
+				break;
+			}
+			break;
+
+
+		default:
+			/* Not a Load, Jump or Loop instruction */
+			/* still need to check for limm to extract instr len */
+			switch (BITS(state->words[0], 22, 23)) {
+			case 0: /* OP a,b,c */
+				if ((FIELD_B(state->words[0]) == REG_LIMM) ||
+				    (FIELD_C(state->words[0]) == REG_LIMM))
+					state->instr_len += 4;
+				break;
+			case 1: /* OP a,b,u6 */
+				break;
+			case 2: /* OP b,b,s12 */
+				break;
+			case 3: /* OP.cc b,b,c/u6 */
+				if ((!IS_BIT(state->words[0], 5)) &&
+				   ((FIELD_B(state->words[0]) == REG_LIMM) ||
+				    (FIELD_C(state->words[0]) == REG_LIMM)))
+					state->instr_len += 4;
+				break;
+			}
+			break;
+		}
+		break;
+
+	/* 16 Bit Instructions */
+	case op_LD_ADD: /* LD_S|LDB_S|LDW_S a,[b,c] */
+		state->zz = BITS(state->words[0], 3, 4);
+		state->src1 = get_reg(FIELD_S_B(state->words[0]), regs, cregs);
+		state->src2 = get_reg(FIELD_S_C(state->words[0]), regs, cregs);
+		state->dest = FIELD_S_A(state->words[0]);
+		break;
+
+	case op_ADD_MOV_CMP:
+		/* check for limm, ignore mov_s h,b (== mov_s 0,b) */
+		if ((BITS(state->words[0], 3, 4) < 3) &&
+		    (FIELD_S_H(state->words[0]) == REG_LIMM))
+			state->instr_len += 4;
+		break;
+
+	case op_S:
+		subopcode = BITS(state->words[0], 5, 7);
+		switch (subopcode) {
+		case 0:	/* j_s */
+		case 1:	/* j_s.d */
+		case 2:	/* jl_s */
+		case 3:	/* jl_s.d */
+			state->target = get_reg(FIELD_S_B(state->words[0]),
+						regs, cregs);
+			state->delay_slot = subopcode & 1;
+			state->flow = (subopcode >= 2) ?
+				direct_call : indirect_jump;
+			break;
+		case 7:
+			switch (BITS(state->words[0], 8, 10)) {
+			case 4:	/* jeq_s [blink] */
+			case 5:	/* jne_s [blink] */
+			case 6:	/* j_s [blink] */
+			case 7:	/* j_s.d [blink] */
+				state->delay_slot = (subopcode == 7);
+				state->flow = indirect_jump;
+				state->target = get_reg(31, regs, cregs);
+			default:
+				break;
+			}
+		default:
+			break;
+		}
+		break;
+
+	case op_LD_S:	/* LD_S c, [b, u7] */
+		state->src1 = get_reg(FIELD_S_B(state->words[0]), regs, cregs);
+		state->src2 = FIELD_S_u7(state->words[0]);
+		state->dest = FIELD_S_C(state->words[0]);
+		break;
+
+	case op_LDB_S:
+	case op_STB_S:
+		/* no further handling required as byte accesses should not
+		 * cause an unaligned access exception */
+		state->zz = 1;
+		break;
+
+	case op_LDWX_S:	/* LDWX_S c, [b, u6] */
+		state->x = 1;
+		/* intentional fall-through */
+
+	case op_LDW_S:	/* LDW_S c, [b, u6] */
+		state->zz = 2;
+		state->src1 = get_reg(FIELD_S_B(state->words[0]), regs, cregs);
+		state->src2 = FIELD_S_u6(state->words[0]);
+		state->dest = FIELD_S_C(state->words[0]);
+		break;
+
+	case op_ST_S:	/* ST_S c, [b, u7] */
+		state->write = 1;
+		state->src1 = get_reg(FIELD_S_C(state->words[0]), regs, cregs);
+		state->src2 = get_reg(FIELD_S_B(state->words[0]), regs, cregs);
+		state->src3 = FIELD_S_u7(state->words[0]);
+		break;
+
+	case op_STW_S:	/* STW_S c,[b,u6] */
+		state->write = 1;
+		state->zz = 2;
+		state->src1 = get_reg(FIELD_S_C(state->words[0]), regs, cregs);
+		state->src2 = get_reg(FIELD_S_B(state->words[0]), regs, cregs);
+		state->src3 = FIELD_S_u6(state->words[0]);
+		break;
+
+	case op_SP:	/* LD_S|LDB_S b,[sp,u7], ST_S|STB_S b,[sp,u7] */
+		/* note: we are ignoring possibility of:
+		 * ADD_S, SUB_S, PUSH_S, POP_S as these should not
+		 * cause unaliged exception anyway */
+		state->write = BITS(state->words[0], 6, 6);
+		state->zz = BITS(state->words[0], 5, 5);
+		if (state->zz)
+			break;	/* byte accesses should not come here */
+		if (!state->write) {
+			state->src1 = get_reg(28, regs, cregs);
+			state->src2 = FIELD_S_u7(state->words[0]);
+			state->dest = FIELD_S_B(state->words[0]);
+		} else {
+			state->src1 = get_reg(FIELD_S_B(state->words[0]), regs,
+					cregs);
+			state->src2 = get_reg(28, regs, cregs);
+			state->src3 = FIELD_S_u7(state->words[0]);
+		}
+		break;
+
+	case op_GP:	/* LD_S|LDB_S|LDW_S r0,[gp,s11/s9/s10] */
+		/* note: ADD_S r0, gp, s11 is ignored */
+		state->zz = BITS(state->words[0], 9, 10);
+		state->src1 = get_reg(26, regs, cregs);
+		state->src2 = state->zz ? FIELD_S_s10(state->words[0]) :
+			FIELD_S_s11(state->words[0]);
+		state->dest = 0;
+		break;
+
+	case op_Pcl:	/* LD_S b,[pcl,u10] */
+		state->src1 = regs->ret & ~3;
+		state->src2 = FIELD_S_u10(state->words[0]);
+		state->dest = FIELD_S_B(state->words[0]);
+		break;
+
+	case op_BR_S:
+		state->target = FIELD_S_s8(state->words[0]) + (addr & ~0x03);
+		state->flow = direct_jump;
+		state->is_branch = 1;
+		break;
+
+	case op_B_S:
+		fieldA = (BITS(state->words[0], 9, 10) == 3) ?
+			FIELD_S_s7(state->words[0]) :
+			FIELD_S_s10(state->words[0]);
+		state->target = fieldA + (addr & ~0x03);
+		state->flow = direct_jump;
+		state->is_branch = 1;
+		break;
+
+	case op_BL_S:
+		state->target = FIELD_S_s13(state->words[0]) + (addr & ~0x03);
+		state->flow = direct_call;
+		state->is_branch = 1;
+		break;
+
+	default:
+		break;
+	}
+
+	if (bytes_not_copied <= (8 - state->instr_len))
+		return;
+
+fault:	state->fault = 1;
+}
+
+long __kprobes get_reg(int reg, struct pt_regs *regs,
+		       struct callee_regs *cregs)
+{
+	long *p;
+
+	if (reg <= 12) {
+		p = &regs->r0;
+		return p[-reg];
+	}
+
+	if (cregs && (reg <= 25)) {
+		p = &cregs->r13;
+		return p[13-reg];
+	}
+
+	if (reg == 26)
+		return regs->r26;
+	if (reg == 27)
+		return regs->fp;
+	if (reg == 28)
+		return regs->sp;
+	if (reg == 31)
+		return regs->blink;
+
+	return 0;
+}
+
+void __kprobes set_reg(int reg, long val, struct pt_regs *regs,
+		struct callee_regs *cregs)
+{
+	long *p;
+
+	switch (reg) {
+	case 0 ... 12:
+		p = &regs->r0;
+		p[-reg] = val;
+		break;
+	case 13 ... 25:
+		if (cregs) {
+			p = &cregs->r13;
+			p[13-reg] = val;
+		}
+		break;
+	case 26:
+		regs->r26 = val;
+		break;
+	case 27:
+		regs->fp = val;
+		break;
+	case 28:
+		regs->sp = val;
+		break;
+	case 31:
+		regs->blink = val;
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * Disassembles the insn at @pc and sets @next_pc to next PC (which could be
+ * @pc +2/4/6 (ARCompact ISA allows free intermixing of 16/32 bit insns).
+ *
+ * If @pc is a branch
+ *	-@tgt_if_br is set to branch target.
+ *	-If branch has delay slot, @next_pc updated with actual next PC.
+ */
+int __kprobes disasm_next_pc(unsigned long pc, struct pt_regs *regs,
+			     struct callee_regs *cregs,
+			     unsigned long *next_pc, unsigned long *tgt_if_br)
+{
+	struct disasm_state instr;
+
+	memset(&instr, 0, sizeof(struct disasm_state));
+	disasm_instr(pc, &instr, 0, regs, cregs);
+
+	*next_pc = pc + instr.instr_len;
+
+	/* Instruction with possible two targets branch, jump and loop */
+	if (instr.is_branch)
+		*tgt_if_br = instr.target;
+
+	/* For the instructions with delay slots, the fall through is the
+	 * instruction following the instruction in delay slot.
+	 */
+	 if (instr.delay_slot) {
+		struct disasm_state instr_d;
+
+		disasm_instr(*next_pc, &instr_d, 0, regs, cregs);
+
+		*next_pc += instr_d.instr_len;
+	 }
+
+	 /* Zero Overhead Loop - end of the loop */
+	if (!(regs->status32 & STATUS32_L) && (*next_pc == regs->lp_end)
+		&& (regs->lp_count > 1)) {
+		*next_pc = regs->lp_start;
+	}
+
+	return instr.is_branch;
+}
+
+#endif /* CONFIG_KGDB || CONFIG_ARC_EMUL_UNALIGNED || CONFIG_KPROBES */
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
new file mode 100644
index 000000000..562089d62
--- /dev/null
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -0,0 +1,304 @@
+/*
+ * ARCv2 ISA based core Low Level Intr/Traps/Exceptions(non-TLB) Handling
+ *
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>   /* ARC_{EXTRY,EXIT} */
+#include <asm/entry.h>       /* SAVE_ALL_{INT1,INT2,TRAP...} */
+#include <asm/errno.h>
+#include <asm/arcregs.h>
+#include <asm/irqflags.h>
+
+; A maximum number of supported interrupts in the core interrupt controller.
+; This number is not equal to the maximum interrupt number (256) because
+; first 16 lines are reserved for exceptions and are not configurable.
+#define NR_CPU_IRQS	240
+
+	.cpu HS
+
+#define VECTOR	.word
+
+;############################ Vector Table #################################
+
+	.section .vector,"a",@progbits
+	.align 4
+
+# Initial 16 slots are Exception Vectors
+VECTOR	res_service		; Reset Vector
+VECTOR	mem_service		; Mem exception
+VECTOR	instr_service		; Instrn Error
+VECTOR	EV_MachineCheck		; Fatal Machine check
+VECTOR	EV_TLBMissI		; Intruction TLB miss
+VECTOR	EV_TLBMissD		; Data TLB miss
+VECTOR	EV_TLBProtV		; Protection Violation
+VECTOR	EV_PrivilegeV		; Privilege Violation
+VECTOR	EV_SWI			; Software Breakpoint
+VECTOR	EV_Trap			; Trap exception
+VECTOR	EV_Extension		; Extn Instruction Exception
+VECTOR	EV_DivZero		; Divide by Zero
+VECTOR	EV_DCError		; Data Cache Error
+VECTOR	EV_Misaligned		; Misaligned Data Access
+VECTOR	reserved		; Reserved slots
+VECTOR	reserved		; Reserved slots
+
+# Begin Interrupt Vectors
+VECTOR	handle_interrupt	; (16) Timer0
+VECTOR	handle_interrupt	; unused (Timer1)
+VECTOR	handle_interrupt	; unused (WDT)
+VECTOR	handle_interrupt	; (19) Inter core Interrupt (IPI)
+VECTOR	handle_interrupt	; (20) perf Interrupt
+VECTOR	handle_interrupt	; (21) Software Triggered Intr (Self IPI)
+VECTOR	handle_interrupt	; unused
+VECTOR	handle_interrupt	; (23) unused
+# End of fixed IRQs
+
+.rept NR_CPU_IRQS - 8
+	VECTOR	handle_interrupt
+.endr
+
+	.section .text, "ax",@progbits
+
+reserved:
+	flag 1		; Unexpected event, halt
+
+;##################### Interrupt Handling ##############################
+
+ENTRY(handle_interrupt)
+
+	INTERRUPT_PROLOGUE  irq
+
+	# irq control APIs local_irq_save/restore/disable/enable fiddle with
+	# global interrupt enable bits in STATUS32 (.IE for 1 prio, .E[] for 2 prio)
+	# However a taken interrupt doesn't clear these bits. Thus irqs_disabled()
+	# query in hard ISR path would return false (since .IE is set) which would
+	# trips genirq interrupt handling asserts.
+	#
+	# So do a "soft" disable of interrutps here.
+	#
+	# Note this disable is only for consistent book-keeping as further interrupts
+	# will be disabled anyways even w/o this. Hardware tracks active interrupts
+	# seperately in AUX_IRQ_ACTIVE.active and will not take new interrupts
+	# unless this one returns (or higher prio becomes pending in 2-prio scheme)
+
+	IRQ_DISABLE
+
+	; icause is banked: one per priority level
+	; so a higher prio interrupt taken here won't clobber prev prio icause
+	lr  r0, [ICAUSE]
+	mov   blink, ret_from_exception
+
+	b.d  arch_do_IRQ
+	mov r1, sp
+
+END(handle_interrupt)
+
+;################### Non TLB Exception Handling #############################
+
+ENTRY(EV_SWI)
+	; TODO: implement this
+	EXCEPTION_PROLOGUE
+	b   ret_from_exception
+END(EV_SWI)
+
+ENTRY(EV_DivZero)
+	; TODO: implement this
+	EXCEPTION_PROLOGUE
+	b   ret_from_exception
+END(EV_DivZero)
+
+ENTRY(EV_DCError)
+	; TODO: implement this
+	EXCEPTION_PROLOGUE
+	b   ret_from_exception
+END(EV_DCError)
+
+; ---------------------------------------------
+; Memory Error Exception Handler
+;   - Unlike ARCompact, handles Bus errors for both User/Kernel mode,
+;     Instruction fetch or Data access, under a single Exception Vector
+; ---------------------------------------------
+
+ENTRY(mem_service)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r0, [efa]
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN
+
+	bl  do_memory_error
+	b   ret_from_exception
+END(mem_service)
+
+ENTRY(EV_Misaligned)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r0, [efa]	; Faulting Data address
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN
+
+	SAVE_CALLEE_SAVED_USER
+	mov r2, sp              ; callee_regs
+
+	bl  do_misaligned_access
+
+	; TBD: optimize - do this only if a callee reg was involved
+	; either a dst of emulated LD/ST or src with address-writeback
+	RESTORE_CALLEE_SAVED_USER
+
+	b   ret_from_exception
+END(EV_Misaligned)
+
+; ---------------------------------------------
+; Protection Violation Exception Handler
+; ---------------------------------------------
+
+ENTRY(EV_TLBProtV)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r0, [efa]	; Faulting Data address
+	mov r1, sp	; pt_regs
+
+	FAKE_RET_FROM_EXCPN
+
+	mov blink, ret_from_exception
+	b   do_page_fault
+
+END(EV_TLBProtV)
+
+; From Linux standpoint Slow Path I/D TLB Miss is same a ProtV as they
+; need to call do_page_fault().
+; ECR in pt_regs provides whether access was R/W/X
+
+.global        call_do_page_fault
+.set call_do_page_fault, EV_TLBProtV
+
+;############# Common Handlers for ARCompact and ARCv2 ##############
+
+#include "entry.S"
+
+;############# Return from Intr/Excp/Trap (ARCv2 ISA Specifics) ##############
+;
+; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
+; IRQ shd definitely not happen between now and rtie
+; All 2 entry points to here already disable interrupts
+
+.Lrestore_regs:
+restore_regs:
+
+	# Interrpts are actually disabled from this point on, but will get
+	# reenabled after we return from interrupt/exception.
+	# But irq tracer needs to be told now...
+	TRACE_ASM_IRQ_ENABLE
+
+	ld	r0, [sp, PT_status32]	; U/K mode at time of entry
+	lr	r10, [AUX_IRQ_ACT]
+
+	bmsk	r11, r10, 15	; AUX_IRQ_ACT.ACTIVE
+	breq	r11, 0, .Lexcept_ret	; No intr active, ret from Exception
+
+;####### Return from Intr #######
+
+debug_marker_l1:
+	; bbit1.nt r0, STATUS_DE_BIT, .Lintr_ret_to_delay_slot
+	btst	r0, STATUS_DE_BIT		; Z flag set if bit clear
+	bnz	.Lintr_ret_to_delay_slot	; branch if STATUS_DE_BIT set
+
+.Lisr_ret_fast_path:
+	; Handle special case #1: (Entry via Exception, Return via IRQ)
+	;
+	; Exception in U mode, preempted in kernel, Intr taken (K mode), orig
+	; task now returning to U mode (riding the Intr)
+	; AUX_IRQ_ACTIVE won't have U bit set (since intr in K mode), hence SP
+	; won't be switched to correct U mode value (from AUX_SP)
+	; So force AUX_IRQ_ACT.U for such a case
+
+	btst	r0, STATUS_U_BIT		; Z flag set if K (Z clear for U)
+	bset.nz	r11, r11, AUX_IRQ_ACT_BIT_U	; NZ means U
+	sr	r11, [AUX_IRQ_ACT]
+
+	INTERRUPT_EPILOGUE  irq
+	rtie
+
+;####### Return from Exception / pure kernel mode #######
+
+.Lexcept_ret:	; Expects r0 has PT_status32
+
+debug_marker_syscall:
+	EXCEPTION_EPILOGUE
+	rtie
+
+;####### Return from Intr to insn in delay slot #######
+
+; Handle special case #2: (Entry via Exception in Delay Slot, Return via IRQ)
+;
+; Intr returning to a Delay Slot (DS) insn
+; (since IRQ NOT allowed in DS in ARCv2, this can only happen if orig
+; entry was via Exception in DS which got preempted in kernel).
+;
+; IRQ RTIE won't reliably restore DE bit and/or BTA, needs workaround
+;
+; Solution is return from Intr w/o any delay slot quirks into a kernel trampoline
+; and from pure kernel mode return to delay slot which handles DS bit/BTA correctly
+
+.Lintr_ret_to_delay_slot:
+debug_marker_ds:
+
+	ld	r2, [@intr_to_DE_cnt]
+	add	r2, r2, 1
+	st	r2, [@intr_to_DE_cnt]
+
+	ld	r2, [sp, PT_ret]
+	ld	r3, [sp, PT_status32]
+
+	; STAT32 for Int return created from scratch
+	; (No delay dlot, disable Further intr in trampoline)
+
+	bic  	r0, r3, STATUS_U_MASK|STATUS_DE_MASK|STATUS_IE_MASK|STATUS_L_MASK
+	st	r0, [sp, PT_status32]
+
+	mov	r1, .Lintr_ret_to_delay_slot_2
+	st	r1, [sp, PT_ret]
+
+	; Orig exception PC/STAT32 safekept @orig_r0 and @event stack slots
+	st	r2, [sp, 0]
+	st	r3, [sp, 4]
+
+	b	.Lisr_ret_fast_path
+
+.Lintr_ret_to_delay_slot_2:
+	; Trampoline to restore orig exception PC/STAT32/BTA/AUX_USER_SP
+	sub	sp, sp, SZ_PT_REGS
+	st	r9, [sp, -4]
+
+	ld	r9, [sp, 0]
+	sr	r9, [eret]
+
+	ld	r9, [sp, 4]
+	sr	r9, [erstatus]
+
+	; restore AUX_USER_SP if returning to U mode
+	bbit0	r9, STATUS_U_BIT, 1f
+	ld	r9, [sp, PT_sp]
+	sr	r9, [AUX_USER_SP]
+
+1:
+	ld	r9, [sp, 8]
+	sr	r9, [erbta]
+
+	ld	r9, [sp, -4]
+	add	sp, sp, SZ_PT_REGS
+
+	; return from pure kernel mode to delay slot
+	rtie
+
+END(ret_from_exception)
diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
new file mode 100644
index 000000000..f285dbb28
--- /dev/null
+++ b/arch/arc/kernel/entry-compact.S
@@ -0,0 +1,406 @@
+/*
+ * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARCompact ISA
+ *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: May 2011
+ *  -Userspace unaligned access emulation
+ *
+ * vineetg: Feb 2011 (ptrace low level code fixes)
+ *  -traced syscall return code (r0) was not saved into pt_regs for restoring
+ *   into user reg-file when traded task rets to user space.
+ *  -syscalls needing arch-wrappers (mainly for passing sp as pt_regs)
+ *   were not invoking post-syscall trace hook (jumping directly into
+ *   ret_from_system_call)
+ *
+ * vineetg: Nov 2010:
+ *  -Vector table jumps (@8 bytes) converted into branches (@4 bytes)
+ *  -To maintain the slot size of 8 bytes/vector, added nop, which is
+ *   not executed at runtime.
+ *
+ * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
+ *  -do_signal()invoked upon TIF_RESTORE_SIGMASK as well
+ *  -Wrappers for sys_{,rt_}sigsuspend() no longer needed as they don't
+ *   need ptregs anymore
+ *
+ * Vineetg: Oct 2009
+ *  -In a rare scenario, Process gets a Priv-V exception and gets scheduled
+ *   out. Since we don't do FAKE RTIE for Priv-V, CPU exception state remains
+ *   active (AE bit enabled).  This causes a double fault for a subseq valid
+ *   exception. Thus FAKE RTIE needed in low level Priv-Violation handler.
+ *   Instr Error could also cause similar scenario, so same there as well.
+ *
+ * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
+ *
+ * Vineetg: Aug 28th 2008: Bug #94984
+ *  -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
+ *   Normally CPU does this automatically, however when doing FAKE rtie,
+ *   we need to explicitly do this. The problem in macros
+ *   FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
+ *   was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit,
+ *   setting it and not clearing it clears ZOL context
+ *
+ * Vineetg: May 16th, 2008
+ *  - r25 now contains the Current Task when in kernel
+ *
+ * Vineetg: Dec 22, 2007
+ *    Minor Surgery of Low Level ISR to make it SMP safe
+ *    - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR
+ *    - _current_task is made an array of NR_CPUS
+ *    - Access of _current_task wrapped inside a macro so that if hardware
+ *       team agrees for a dedicated reg, no other code is touched
+ *
+ * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004
+ */
+
+#include <linux/errno.h>
+#include <linux/linkage.h>	/* {ENTRY,EXIT} */
+#include <asm/entry.h>
+#include <asm/irqflags.h>
+
+	.cpu A7
+
+;############################ Vector Table #################################
+
+.macro VECTOR  lbl
+#if 1   /* Just in case, build breaks */
+	j   \lbl
+#else
+	b   \lbl
+	nop
+#endif
+.endm
+
+	.section .vector, "ax",@progbits
+	.align 4
+
+/* Each entry in the vector table must occupy 2 words. Since it is a jump
+ * across sections (.vector to .text) we are guaranteed that 'j somewhere'
+ * will use the 'j limm' form of the instruction as long as somewhere is in
+ * a section other than .vector.
+ */
+
+; ********* Critical System Events **********************
+VECTOR   res_service             ; 0x0, Reset Vector	(0x0)
+VECTOR   mem_service             ; 0x8, Mem exception   (0x1)
+VECTOR   instr_service           ; 0x10, Instrn Error   (0x2)
+
+; ******************** Device ISRs **********************
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+VECTOR   handle_interrupt_level2
+#else
+VECTOR   handle_interrupt_level1
+#endif
+
+.rept   28
+VECTOR   handle_interrupt_level1 ; Other devices
+.endr
+
+/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */
+
+; ******************** Exceptions **********************
+VECTOR   EV_MachineCheck         ; 0x100, Fatal Machine check   (0x20)
+VECTOR   EV_TLBMissI             ; 0x108, Instruction TLB miss  (0x21)
+VECTOR   EV_TLBMissD             ; 0x110, Data TLB miss         (0x22)
+VECTOR   EV_TLBProtV             ; 0x118, Protection Violation  (0x23)
+				 ;         or Misaligned Access
+VECTOR   EV_PrivilegeV           ; 0x120, Privilege Violation   (0x24)
+VECTOR   EV_Trap                 ; 0x128, Trap exception        (0x25)
+VECTOR   EV_Extension            ; 0x130, Extn Instruction Excp (0x26)
+
+.rept   24
+VECTOR   reserved                ; Reserved Exceptions
+.endr
+
+
+;##################### Scratch Mem for IRQ stack switching #############
+
+ARCFP_DATA int1_saved_reg
+	.align 32
+	.type   int1_saved_reg, @object
+	.size   int1_saved_reg, 4
+int1_saved_reg:
+	.zero 4
+
+/* Each Interrupt level needs its own scratch */
+ARCFP_DATA int2_saved_reg
+	.type   int2_saved_reg, @object
+	.size   int2_saved_reg, 4
+int2_saved_reg:
+	.zero 4
+
+; ---------------------------------------------
+	.section .text, "ax",@progbits
+
+
+reserved:
+	flag 1		; Unexpected event, halt
+
+;##################### Interrupt Handling ##############################
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+; ---------------------------------------------
+;  Level 2 ISR: Can interrupt a Level 1 ISR
+; ---------------------------------------------
+ENTRY(handle_interrupt_level2)
+
+	INTERRUPT_PROLOGUE 2
+
+	;------------------------------------------------------
+	; if L2 IRQ interrupted a L1 ISR, disable preemption
+	;
+	; This is to avoid a potential L1-L2-L1 scenario
+	;  -L1 IRQ taken
+	;  -L2 interrupts L1 (before L1 ISR could run)
+	;  -preemption off IRQ, user task in syscall picked to run
+	;  -RTIE to userspace
+	;	Returns from L2 context fine
+	;	But both L1 and L2 re-enabled, so another L1 can be taken
+	;	while prev L1 is still unserviced
+	;
+	;------------------------------------------------------
+
+	; L2 interrupting L1 implies both L2 and L1 active
+	; However both A2 and A1 are NOT set in STATUS32, thus
+	; need to check STATUS32_L2 to determine if L1 was active
+
+	ld r9, [sp, PT_status32]        ; get statu32_l2 (saved in pt_regs)
+	bbit0 r9, STATUS_A1_BIT, 1f     ; L1 not active when L2 IRQ, so normal
+
+	; bump thread_info->preempt_count (Disable preemption)
+	GET_CURR_THR_INFO_FROM_SP   r10
+	ld      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+	add     r9, r9, 1
+	st      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+1:
+	;------------------------------------------------------
+	; setup params for Linux common ISR and invoke it
+	;------------------------------------------------------
+	lr  r0, [icause2]
+	and r0, r0, 0x1f
+
+	bl.d  @arch_do_IRQ
+	mov r1, sp
+
+	mov r8,0x2
+	sr r8, [AUX_IRQ_LV12]       ; clear bit in Sticky Status Reg
+
+	b   ret_from_exception
+
+END(handle_interrupt_level2)
+
+#endif
+
+; ---------------------------------------------
+; User Mode Memory Bus Error Interrupt Handler
+; (Kernel mode memory errors handled via separate exception vectors)
+; ---------------------------------------------
+ENTRY(mem_service)
+
+	INTERRUPT_PROLOGUE 2
+
+	mov r0, ilink2
+	mov r1, sp
+
+	; User process needs to be killed with SIGBUS, but first need to get
+	; out of the L2 interrupt context (drop to pure kernel mode) and jump
+	; off to "C" code where SIGBUS in enqueued
+	lr  r3, [status32]
+	bclr r3, r3, STATUS_A2_BIT
+	or  r3, r3, (STATUS_E1_MASK|STATUS_E2_MASK)
+	sr  r3, [status32_l2]
+	mov ilink2, 1f
+	rtie
+1:
+	bl  do_memory_error
+	b   ret_from_exception
+END(mem_service)
+
+; ---------------------------------------------
+;  Level 1 ISR
+; ---------------------------------------------
+ENTRY(handle_interrupt_level1)
+
+	INTERRUPT_PROLOGUE 1
+
+	lr  r0, [icause1]
+	and r0, r0, 0x1f
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	; icause1 needs to be read early, before calling tracing, which
+	; can clobber scratch regs, hence use of stack to stash it
+	push r0
+	TRACE_ASM_IRQ_DISABLE
+	pop  r0
+#endif
+
+	bl.d  @arch_do_IRQ
+	mov r1, sp
+
+	mov r8,0x1
+	sr r8, [AUX_IRQ_LV12]       ; clear bit in Sticky Status Reg
+
+	b   ret_from_exception
+END(handle_interrupt_level1)
+
+;################### Non TLB Exception Handling #############################
+
+; ---------------------------------------------
+; Protection Violation Exception Handler
+; ---------------------------------------------
+
+ENTRY(EV_TLBProtV)
+
+	EXCEPTION_PROLOGUE
+
+	mov r2, r9	; ECR set into r9 already
+	lr  r0, [efa]	; Faulting Data address (not part of pt_regs saved above)
+
+	; Exception auto-disables further Intr/exceptions.
+	; Re-enable them by pretending to return from exception
+	; (so rest of handler executes in pure K mode)
+
+	FAKE_RET_FROM_EXCPN
+
+	mov   r1, sp	; Handle to pt_regs
+
+	;------ (5) Type of Protection Violation? ----------
+	;
+	; ProtV Hardware Exception is triggered for Access Faults of 2 types
+	;   -Access Violation	: 00_23_(00|01|02|03)_00
+	;			         x  r  w  r+w
+	;   -Unaligned Access	: 00_23_04_00
+	;
+	bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
+
+	;========= (6a) Access Violation Processing ========
+	bl  do_page_fault
+	b   ret_from_exception
+
+	;========== (6b) Non aligned access ============
+4:
+
+	SAVE_CALLEE_SAVED_USER
+	mov r2, sp              ; callee_regs
+
+	bl  do_misaligned_access
+
+	; TBD: optimize - do this only if a callee reg was involved
+	; either a dst of emulated LD/ST or src with address-writeback
+	RESTORE_CALLEE_SAVED_USER
+
+	b   ret_from_exception
+
+END(EV_TLBProtV)
+
+; Wrapper for Linux page fault handler called from EV_TLBMiss*
+; Very similar to ProtV handler case (6a) above, but avoids the extra checks
+; for Misaligned access
+;
+ENTRY(call_do_page_fault)
+
+	EXCEPTION_PROLOGUE
+	lr  r0, [efa]	; Faulting Data address
+	mov   r1, sp
+	FAKE_RET_FROM_EXCPN
+
+	mov blink, ret_from_exception
+	b  do_page_fault
+
+END(call_do_page_fault)
+
+;############# Common Handlers for ARCompact and ARCv2 ##############
+
+#include "entry.S"
+
+;############# Return from Intr/Excp/Trap (ARC Specifics) ##############
+;
+; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
+; IRQ shd definitely not happen between now and rtie
+; All 2 entry points to here already disable interrupts
+
+.Lrestore_regs:
+
+	# Interrupts are actually disabled from this point on, but will get
+	# reenabled after we return from interrupt/exception.
+	# But irq tracer needs to be told now...
+	TRACE_ASM_IRQ_ENABLE
+
+	lr	r10, [status32]
+
+	; Restore REG File. In case multiple Events outstanding,
+	; use the same priority as rtie: EXCPN, L2 IRQ, L1 IRQ, None
+	; Note that we use realtime STATUS32 (not pt_regs->status32) to
+	; decide that.
+
+	and.f	0, r10, (STATUS_A1_MASK|STATUS_A2_MASK)
+	bz	.Lexcep_or_pure_K_ret
+
+	; Returning from Interrupts (Level 1 or 2)
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+
+	; Level 2 interrupt return Path - from hardware standpoint
+	bbit0  r10, STATUS_A2_BIT, not_level2_interrupt
+
+	;------------------------------------------------------------------
+	; However the context returning might not have taken L2 intr itself
+	; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret
+	; Special considerations needed for the context which took L2 intr
+
+	ld   r9, [sp, PT_event]        ; Ensure this is L2 intr context
+	brne r9, event_IRQ2, 149f
+
+	;------------------------------------------------------------------
+	; if L2 IRQ interrupted an L1 ISR,  we'd disabled preemption earlier
+	; so that sched doesn't move to new task, causing L1 to be delayed
+	; undeterministically. Now that we've achieved that, let's reset
+	; things to what they were, before returning from L2 context
+	;----------------------------------------------------------------
+
+	ld r9, [sp, PT_status32]       ; get statu32_l2 (saved in pt_regs)
+	bbit0 r9, STATUS_A1_BIT, 149f  ; L1 not active when L2 IRQ, so normal
+
+	; decrement thread_info->preempt_count (re-enable preemption)
+	GET_CURR_THR_INFO_FROM_SP   r10
+	ld      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+	; paranoid check, given A1 was active when A2 happened, preempt count
+	; must not be 0 because we would have incremented it.
+	; If this does happen we simply HALT as it means a BUG !!!
+	cmp     r9, 0
+	bnz     2f
+	flag 1
+
+2:
+	sub     r9, r9, 1
+	st      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+149:
+	INTERRUPT_EPILOGUE 2	; return from level 2 interrupt
+debug_marker_l2:
+	rtie
+
+not_level2_interrupt:
+
+#endif
+
+	INTERRUPT_EPILOGUE 1	; return from level 1 interrupt
+debug_marker_l1:
+	rtie
+
+.Lexcep_or_pure_K_ret:
+
+	;this case is for syscalls or Exceptions or pure kernel mode
+
+	EXCEPTION_EPILOGUE
+debug_marker_syscall:
+	rtie
+
+END(ret_from_exception)
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
new file mode 100644
index 000000000..fb458623f
--- /dev/null
+++ b/arch/arc/kernel/entry.S
@@ -0,0 +1,368 @@
+/*
+ * Common Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC
+ * (included from entry-<isa>.S
+ *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*------------------------------------------------------------------
+ *    Function                            ABI
+ *------------------------------------------------------------------
+ *
+ *  Arguments                           r0 - r7
+ *  Caller Saved Registers              r0 - r12
+ *  Callee Saved Registers              r13- r25
+ *  Global Pointer (gp)                 r26
+ *  Frame Pointer (fp)                  r27
+ *  Stack Pointer (sp)                  r28
+ *  Branch link register (blink)        r31
+ *------------------------------------------------------------------
+ */
+
+;################### Special Sys Call Wrappers ##########################
+
+ENTRY(sys_clone_wrapper)
+	SAVE_CALLEE_SAVED_USER
+	bl  @sys_clone
+	DISCARD_CALLEE_SAVED_USER
+
+	GET_CURR_THR_INFO_FLAGS   r10
+	btst r10, TIF_SYSCALL_TRACE
+	bnz  tracesys_exit
+
+	b .Lret_from_system_call
+END(sys_clone_wrapper)
+
+ENTRY(ret_from_fork)
+	; when the forked child comes here from the __switch_to function
+	; r0 has the last task pointer.
+	; put last task in scheduler queue
+	jl   @schedule_tail
+
+	ld   r9, [sp, PT_status32]
+	brne r9, 0, 1f
+
+	jl.d [r14]		; kernel thread entry point
+	mov  r0, r13		; (see PF_KTHREAD block in copy_thread)
+
+1:
+	; Return to user space
+	; 1. Any forked task (Reach here via BRne above)
+	; 2. First ever init task (Reach here via return from JL above)
+	;    This is the historic "kernel_execve" use-case, to return to init
+	;    user mode, in a round about way since that is always done from
+	;    a kernel thread which is executed via JL above but always returns
+	;    out whenever kernel_execve (now inline do_fork()) is involved
+	b    ret_from_exception
+END(ret_from_fork)
+
+;################### Non TLB Exception Handling #############################
+
+; ---------------------------------------------
+; Instruction Error Exception Handler
+; ---------------------------------------------
+
+ENTRY(instr_service)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r0, [efa]
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN
+
+	bl  do_insterror_or_kprobe
+	b   ret_from_exception
+END(instr_service)
+
+; ---------------------------------------------
+; Machine Check Exception Handler
+; ---------------------------------------------
+
+ENTRY(EV_MachineCheck)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r2, [ecr]
+	lr  r0, [efa]
+	mov r1, sp
+
+	; hardware auto-disables MMU, re-enable it to allow kernel vaddr
+	; access for say stack unwinding of modules for crash dumps
+	lr	r3, [ARC_REG_PID]
+	or	r3, r3, MMU_ENABLE
+	sr	r3, [ARC_REG_PID]
+
+	lsr  	r3, r2, 8
+	bmsk 	r3, r3, 7
+	brne    r3, ECR_C_MCHK_DUP_TLB, 1f
+
+	bl      do_tlb_overlap_fault
+	b       ret_from_exception
+
+1:
+	; DEAD END: can't do much, display Regs and HALT
+	SAVE_CALLEE_SAVED_USER
+
+	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
+	st  sp, [r10, THREAD_CALLEE_REG]
+
+	j  do_machine_check_fault
+
+END(EV_MachineCheck)
+
+; ---------------------------------------------
+; Privilege Violation Exception Handler
+; ---------------------------------------------
+ENTRY(EV_PrivilegeV)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r0, [efa]
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN
+
+	bl  do_privilege_fault
+	b   ret_from_exception
+END(EV_PrivilegeV)
+
+; ---------------------------------------------
+; Extension Instruction Exception Handler
+; ---------------------------------------------
+ENTRY(EV_Extension)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r0, [efa]
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN
+
+	bl  do_extension_fault
+	b   ret_from_exception
+END(EV_Extension)
+
+;################ Trap Handling (Syscall, Breakpoint) ##################
+
+; ---------------------------------------------
+; syscall Tracing
+; ---------------------------------------------
+tracesys:
+	; save EFA in case tracer wants the PC of traced task
+	; using ERET won't work since next-PC has already committed
+	lr  r12, [efa]
+	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r11
+	st  r12, [r11, THREAD_FAULT_ADDR]	; thread.fault_address
+
+	; PRE Sys Call Ptrace hook
+	mov r0, sp			; pt_regs needed
+	bl  @syscall_trace_entry
+
+	; Tracing code now returns the syscall num (orig or modif)
+	mov r8, r0
+
+	; Do the Sys Call as we normally would.
+	; Validate the Sys Call number
+	cmp     r8,  NR_syscalls - 1
+	mov.hi  r0, -ENOSYS
+	bhi     tracesys_exit
+
+	; Restore the sys-call args. Mere invocation of the hook abv could have
+	; clobbered them (since they are in scratch regs). The tracer could also
+	; have deliberately changed the syscall args: r0-r7
+	ld  r0, [sp, PT_r0]
+	ld  r1, [sp, PT_r1]
+	ld  r2, [sp, PT_r2]
+	ld  r3, [sp, PT_r3]
+	ld  r4, [sp, PT_r4]
+	ld  r5, [sp, PT_r5]
+	ld  r6, [sp, PT_r6]
+	ld  r7, [sp, PT_r7]
+	ld.as   r9, [sys_call_table, r8]
+	jl      [r9]        ; Entry into Sys Call Handler
+
+tracesys_exit:
+	st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
+
+	;POST Sys Call Ptrace Hook
+	mov r0, sp		; pt_regs needed
+	bl  @syscall_trace_exit
+	b   ret_from_exception ; NOT ret_from_system_call at is saves r0 which
+	; we'd done before calling post hook above
+
+; ---------------------------------------------
+; Breakpoint TRAP
+; ---------------------------------------------
+trap_with_param:
+
+	; stop_pc info by gdb needs this info
+	lr  r0, [efa]
+	mov r1, sp
+
+	; Now that we have read EFA, it is safe to do "fake" rtie
+	;   and get out of CPU exception mode
+	FAKE_RET_FROM_EXCPN
+
+	; Save callee regs in case gdb wants to have a look
+	; SP will grow up by size of CALLEE Reg-File
+	; NOTE: clobbers r12
+	SAVE_CALLEE_SAVED_USER
+
+	; save location of saved Callee Regs @ thread_struct->pc
+	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
+	st  sp, [r10, THREAD_CALLEE_REG]
+
+	; Call the trap handler
+	bl  do_non_swi_trap
+
+	; unwind stack to discard Callee saved Regs
+	DISCARD_CALLEE_SAVED_USER
+
+	b   ret_from_exception
+
+; ---------------------------------------------
+; syscall TRAP
+; ABI: (r0-r7) upto 8 args, (r8) syscall number
+; ---------------------------------------------
+
+ENTRY(EV_Trap)
+
+	EXCEPTION_PROLOGUE
+
+	;============ TRAP 1   :breakpoints
+	; Check ECR for trap with arg (PROLOGUE ensures r9 has ECR)
+	bmsk.f 0, r9, 7
+	bnz    trap_with_param
+
+	;============ TRAP  (no param): syscall top level
+
+	; First return from Exception to pure K mode (Exception/IRQs renabled)
+	FAKE_RET_FROM_EXCPN
+
+	; If syscall tracing ongoing, invoke pre-post-hooks
+	GET_CURR_THR_INFO_FLAGS   r10
+	btst r10, TIF_SYSCALL_TRACE
+	bnz tracesys  ; this never comes back
+
+	;============ Normal syscall case
+
+	; syscall num shd not exceed the total system calls avail
+	cmp     r8,  NR_syscalls - 1
+	mov.hi  r0, -ENOSYS
+	bhi     .Lret_from_system_call
+
+	; Offset into the syscall_table and call handler
+	ld.as   r9,[sys_call_table, r8]
+	jl      [r9]        ; Entry into Sys Call Handler
+
+.Lret_from_system_call:
+
+	st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
+
+	; fall through to ret_from_exception
+END(EV_Trap)
+
+;############# Return from Intr/Excp/Trap (Linux Specifics) ##############
+;
+; If ret to user mode do we need to handle signals, schedule() et al.
+
+ENTRY(ret_from_exception)
+
+	; Pre-{IRQ,Trap,Exception} K/U mode from pt_regs->status32
+	ld  r8, [sp, PT_status32]   ; returning to User/Kernel Mode
+
+	bbit0  r8, STATUS_U_BIT, resume_kernel_mode
+
+	; Before returning to User mode check-for-and-complete any pending work
+	; such as rescheduling/signal-delivery etc.
+resume_user_mode_begin:
+
+	; Disable IRQs to ensures that chk for pending work itself is atomic
+	; (and we don't end up missing a NEED_RESCHED/SIGPENDING due to an
+	; interim IRQ).
+	IRQ_DISABLE	r10
+
+	; Fast Path return to user mode if no pending work
+	GET_CURR_THR_INFO_FLAGS   r9
+	and.f  0,  r9, _TIF_WORK_MASK
+	bz     .Lrestore_regs
+
+	; --- (Slow Path #1) task preemption ---
+	bbit0  r9, TIF_NEED_RESCHED, .Lchk_pend_signals
+	mov    blink, resume_user_mode_begin  ; tail-call to U mode ret chks
+	j      @schedule 	; BTST+Bnz causes relo error in link
+
+.Lchk_pend_signals:
+	IRQ_ENABLE	r10
+
+	; --- (Slow Path #2) pending signal  ---
+	mov r0, sp	; pt_regs for arg to do_signal()/do_notify_resume()
+
+	GET_CURR_THR_INFO_FLAGS   r9
+	bbit0  r9, TIF_SIGPENDING, .Lchk_notify_resume
+
+	; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs
+	; in pt_reg since the "C" ABI (kernel code) will automatically
+	; save/restore callee-saved regs.
+	;
+	; However, here we need to explicitly save callee regs because
+	; (i)  If this signal causes coredump - full regfile needed
+	; (ii) If signal is SIGTRAP/SIGSTOP, task is being traced thus
+	;      tracer might call PEEKUSR(CALLEE reg)
+	;
+	; NOTE: SP will grow up by size of CALLEE Reg-File
+	SAVE_CALLEE_SAVED_USER		; clobbers r12
+
+	; save location of saved Callee Regs @ thread_struct->callee
+	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
+	st  sp, [r10, THREAD_CALLEE_REG]
+
+	bl  @do_signal
+
+	; Ideally we want to discard the Callee reg above, however if this was
+	; a tracing signal, tracer could have done a POKEUSR(CALLEE reg)
+	RESTORE_CALLEE_SAVED_USER
+
+	b      resume_user_mode_begin	; loop back to start of U mode ret
+
+	; --- (Slow Path #3) notify_resume ---
+.Lchk_notify_resume:
+	btst   r9, TIF_NOTIFY_RESUME
+	blnz   @do_notify_resume
+	b      resume_user_mode_begin	; unconditionally back to U mode ret chks
+					; for single exit point from this block
+
+resume_kernel_mode:
+
+	; Disable Interrupts from this point on
+	; CONFIG_PREEMPT: This is a must for preempt_schedule_irq()
+	; !CONFIG_PREEMPT: To ensure restore_regs is intr safe
+	IRQ_DISABLE	r9
+
+#ifdef CONFIG_PREEMPT
+
+	; Can't preempt if preemption disabled
+	GET_CURR_THR_INFO_FROM_SP   r10
+	ld  r8, [r10, THREAD_INFO_PREEMPT_COUNT]
+	brne  r8, 0, .Lrestore_regs
+
+	; check if this task's NEED_RESCHED flag set
+	ld  r9, [r10, THREAD_INFO_FLAGS]
+	bbit0  r9, TIF_NEED_RESCHED, .Lrestore_regs
+
+	; Invoke PREEMPTION
+	jl      preempt_schedule_irq
+
+	; preempt_schedule_irq() always returns with IRQ disabled
+#endif
+
+	b	.Lrestore_regs
+
+##### DONT ADD CODE HERE - .Lrestore_regs actually follows in entry-<isa>.S
+
diff --git a/arch/arc/kernel/fpu.c b/arch/arc/kernel/fpu.c
new file mode 100644
index 000000000..f352e512c
--- /dev/null
+++ b/arch/arc/kernel/fpu.c
@@ -0,0 +1,55 @@
+/*
+ * fpu.c - save/restore of Floating Point Unit Registers on task switch
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <asm/switch_to.h>
+
+/*
+ * To save/restore FPU regs, simplest scheme would use LR/SR insns.
+ * However since SR serializes the pipeline, an alternate "hack" can be used
+ * which uses the FPU Exchange insn (DEXCL) to r/w FPU regs.
+ *
+ * Store to 64bit dpfp1 reg from a pair of core regs:
+ *   dexcl1 0, r1, r0  ; where r1:r0 is the 64 bit val
+ *
+ * Read from dpfp1 into pair of core regs (w/o clobbering dpfp1)
+ *   mov_s    r3, 0
+ *   daddh11  r1, r3, r3   ; get "hi" into r1 (dpfp1 unchanged)
+ *   dexcl1   r0, r1, r3   ; get "low" into r0 (dpfp1 low clobbered)
+ *   dexcl1    0, r1, r0   ; restore dpfp1 to orig value
+ *
+ * However we can tweak the read, so that read-out of outgoing task's FPU regs
+ * and write of incoming task's regs happen in one shot. So all the work is
+ * done before context switch
+ */
+
+void fpu_save_restore(struct task_struct *prev, struct task_struct *next)
+{
+	unsigned int *saveto = &prev->thread.fpu.aux_dpfp[0].l;
+	unsigned int *readfrom = &next->thread.fpu.aux_dpfp[0].l;
+
+	const unsigned int zero = 0;
+
+	__asm__ __volatile__(
+		"daddh11  %0, %2, %2\n"
+		"dexcl1   %1, %3, %4\n"
+		: "=&r" (*(saveto + 1)), /* early clobber must here */
+		  "=&r" (*(saveto))
+		: "r" (zero), "r" (*(readfrom + 1)), "r" (*(readfrom))
+	);
+
+	__asm__ __volatile__(
+		"daddh22  %0, %2, %2\n"
+		"dexcl2   %1, %3, %4\n"
+		: "=&r"(*(saveto + 3)),	/* early clobber must here */
+		  "=&r"(*(saveto + 2))
+		: "r" (zero), "r" (*(readfrom + 3)), "r" (*(readfrom + 2))
+	);
+}
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
new file mode 100644
index 000000000..a72bbda2f
--- /dev/null
+++ b/arch/arc/kernel/head.S
@@ -0,0 +1,144 @@
+/*
+ * ARC CPU startup Code
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Vineetg: Dec 2007
+ *  -Check if we are running on Simulator or on real hardware
+ *      to skip certain things during boot on simulator
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/entry.h>
+#include <asm/arcregs.h>
+#include <asm/cache.h>
+#include <asm/irqflags.h>
+
+.macro CPU_EARLY_SETUP
+
+	; Setting up Vectror Table (in case exception happens in early boot
+	sr	@_int_vec_base_lds, [AUX_INTR_VEC_BASE]
+
+	; Disable I-cache/D-cache if kernel so configured
+	lr	r5, [ARC_REG_IC_BCR]
+	breq    r5, 0, 1f		; I$ doesn't exist
+	lr	r5, [ARC_REG_IC_CTRL]
+#ifdef CONFIG_ARC_HAS_ICACHE
+	bclr	r5, r5, 0		; 0 - Enable, 1 is Disable
+#else
+	bset	r5, r5, 0		; I$ exists, but is not used
+#endif
+	sr	r5, [ARC_REG_IC_CTRL]
+
+1:
+	lr	r5, [ARC_REG_DC_BCR]
+	breq    r5, 0, 1f		; D$ doesn't exist
+	lr	r5, [ARC_REG_DC_CTRL]
+	bclr	r5, r5, 6		; Invalidate (discard w/o wback)
+#ifdef CONFIG_ARC_HAS_DCACHE
+	bclr	r5, r5, 0		; Enable (+Inv)
+#else
+	bset	r5, r5, 0		; Disable (+Inv)
+#endif
+	sr	r5, [ARC_REG_DC_CTRL]
+
+1:
+
+#ifdef CONFIG_ISA_ARCV2
+	; Unaligned access is disabled at reset, so re-enable early as
+	; gcc 7.3.1 (ARC GNU 2018.03) onwards generates unaligned access
+	; by default
+	lr	r5, [status32]
+	bset	r5, r5, STATUS_AD_BIT
+	kflag	r5
+#endif
+.endm
+
+	.section .init.text, "ax",@progbits
+
+;----------------------------------------------------------------
+; Default Reset Handler (jumped into from Reset vector)
+; - Don't clobber r0,r1,r2 as they might have u-boot provided args
+; - Platforms can override this weak version if needed
+;----------------------------------------------------------------
+WEAK(res_service)
+	j	stext
+END(res_service)
+
+;----------------------------------------------------------------
+; Kernel Entry point
+;----------------------------------------------------------------
+ENTRY(stext)
+
+	CPU_EARLY_SETUP
+
+#ifdef CONFIG_SMP
+	GET_CPU_ID  r5
+	cmp	r5, 0
+	mov.nz	r0, r5
+	bz	.Lmaster_proceed
+
+	; Non-Masters wait for Master to boot enough and bring them up
+	; when they resume, tail-call to entry point
+	mov	blink, @first_lines_of_secondary
+	j	arc_platform_smp_wait_to_boot
+
+.Lmaster_proceed:
+#endif
+
+	; Clear BSS before updating any globals
+	; XXX: use ZOL here
+	mov	r5, __bss_start
+	sub	r6, __bss_stop, r5
+	lsr.f	lp_count, r6, 2
+	lpnz	1f
+	st.ab   0, [r5, 4]
+1:
+
+	; Uboot - kernel ABI
+	;    r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2
+	;    r1 = magic number (always zero as of now)
+	;    r2 = pointer to uboot provided cmdline or external DTB in mem
+	; These are handled later in handle_uboot_args()
+	st	r0, [@uboot_tag]
+	st      r1, [@uboot_magic]
+	st	r2, [@uboot_arg]
+
+	; setup "current" tsk and optionally cache it in dedicated r25
+	mov	r9, @init_task
+	SET_CURR_TASK_ON_CPU  r9, r0	; r9 = tsk, r0 = scratch
+
+	; setup stack (fp, sp)
+	mov	fp, 0
+
+	; tsk->thread_info is really a PAGE, whose bottom hoists stack
+	GET_TSK_STACK_BASE r9, sp	; r9 = tsk, sp = stack base(output)
+
+	j	start_kernel	; "C" entry point
+END(stext)
+
+#ifdef CONFIG_SMP
+;----------------------------------------------------------------
+;     First lines of code run by secondary before jumping to 'C'
+;----------------------------------------------------------------
+	.section .text, "ax",@progbits
+ENTRY(first_lines_of_secondary)
+
+	; setup per-cpu idle task as "current" on this CPU
+	ld	r0, [@secondary_idle_tsk]
+	SET_CURR_TASK_ON_CPU  r0, r1
+
+	; setup stack (fp, sp)
+	mov	fp, 0
+
+	; set it's stack base to tsk->thread_info bottom
+	GET_TSK_STACK_BASE r0, sp
+
+	j	start_kernel_secondary
+END(first_lines_of_secondary)
+#endif
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
new file mode 100644
index 000000000..cf18b3e5a
--- /dev/null
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2014 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include <asm/irq.h>
+
+#define NR_EXCEPTIONS	16
+
+struct bcr_irq_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:3, firq:1, prio:4, exts:8, irqs:8, ver:8;
+#else
+	unsigned int ver:8, irqs:8, exts:8, prio:4, firq:1, pad:3;
+#endif
+};
+
+/*
+ * Early Hardware specific Interrupt setup
+ * -Called very early (start_kernel -> setup_arch -> setup_processor)
+ * -Platform Independent (must for any ARC Core)
+ * -Needed for each CPU (hence not foldable into init_IRQ)
+ */
+void arc_init_IRQ(void)
+{
+	unsigned int tmp, irq_prio, i;
+	struct bcr_irq_arcv2 irq_bcr;
+
+	struct aux_irq_ctrl {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned int res3:18, save_idx_regs:1, res2:1,
+			     save_u_to_u:1, save_lp_regs:1, save_blink:1,
+			     res:4, save_nr_gpr_pairs:5;
+#else
+		unsigned int save_nr_gpr_pairs:5, res:4,
+			     save_blink:1, save_lp_regs:1, save_u_to_u:1,
+			     res2:1, save_idx_regs:1, res3:18;
+#endif
+	} ictrl;
+
+	*(unsigned int *)&ictrl = 0;
+
+#ifndef CONFIG_ARC_IRQ_NO_AUTOSAVE
+	ictrl.save_nr_gpr_pairs = 6;	/* r0 to r11 (r12 saved manually) */
+	ictrl.save_blink = 1;
+	ictrl.save_lp_regs = 1;		/* LP_COUNT, LP_START, LP_END */
+	ictrl.save_u_to_u = 0;		/* user ctxt saved on kernel stack */
+	ictrl.save_idx_regs = 1;	/* JLI, LDI, EI */
+#endif
+
+	WRITE_AUX(AUX_IRQ_CTRL, ictrl);
+
+	/*
+	 * ARCv2 core intc provides multiple interrupt priorities (upto 16).
+	 * Typical builds though have only two levels (0-high, 1-low)
+	 * Linux by default uses lower prio 1 for most irqs, reserving 0 for
+	 * NMI style interrupts in future (say perf)
+	 */
+
+	READ_BCR(ARC_REG_IRQ_BCR, irq_bcr);
+
+	irq_prio = irq_bcr.prio;	/* Encoded as N-1 for N levels */
+	pr_info("archs-intc\t: %d priority levels (default %d)%s\n",
+		irq_prio + 1, ARCV2_IRQ_DEF_PRIO,
+		irq_bcr.firq ? " FIRQ (not used)":"");
+
+	/*
+	 * Set a default priority for all available interrupts to prevent
+	 * switching of register banks if Fast IRQ and multiple register banks
+	 * are supported by CPU.
+	 * Also disable private-per-core IRQ lines so faulty external HW won't
+	 * trigger interrupt that kernel is not ready to handle.
+	 */
+	for (i = NR_EXCEPTIONS; i < irq_bcr.irqs + NR_EXCEPTIONS; i++) {
+		write_aux_reg(AUX_IRQ_SELECT, i);
+		write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO);
+
+		/*
+		 * Only mask cpu private IRQs here.
+		 * "common" interrupts are masked at IDU, otherwise it would
+		 * need to be unmasked at each cpu, with IPIs
+		 */
+		if (i < FIRST_EXT_IRQ)
+			write_aux_reg(AUX_IRQ_ENABLE, 0);
+	}
+
+	/* setup status32, don't enable intr yet as kernel doesn't want */
+	tmp = read_aux_reg(ARC_REG_STATUS32);
+	tmp |= STATUS_AD_MASK | (ARCV2_IRQ_DEF_PRIO << 1);
+	tmp &= ~STATUS_IE_MASK;
+	asm volatile("kflag %0	\n"::"r"(tmp));
+}
+
+static void arcv2_irq_mask(struct irq_data *data)
+{
+	write_aux_reg(AUX_IRQ_SELECT, data->hwirq);
+	write_aux_reg(AUX_IRQ_ENABLE, 0);
+}
+
+static void arcv2_irq_unmask(struct irq_data *data)
+{
+	write_aux_reg(AUX_IRQ_SELECT, data->hwirq);
+	write_aux_reg(AUX_IRQ_ENABLE, 1);
+}
+
+void arcv2_irq_enable(struct irq_data *data)
+{
+	/* set default priority */
+	write_aux_reg(AUX_IRQ_SELECT, data->hwirq);
+	write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO);
+
+	/*
+	 * hw auto enables (linux unmask) all by default
+	 * So no need to do IRQ_ENABLE here
+	 * XXX: However OSCI LAN need it
+	 */
+	write_aux_reg(AUX_IRQ_ENABLE, 1);
+}
+
+static struct irq_chip arcv2_irq_chip = {
+	.name           = "ARCv2 core Intc",
+	.irq_mask	= arcv2_irq_mask,
+	.irq_unmask	= arcv2_irq_unmask,
+	.irq_enable	= arcv2_irq_enable
+};
+
+static int arcv2_irq_map(struct irq_domain *d, unsigned int irq,
+			 irq_hw_number_t hw)
+{
+	/*
+	 * core intc IRQs [16, 23]:
+	 * Statically assigned always private-per-core (Timers, WDT, IPI, PCT)
+	 */
+	if (hw < FIRST_EXT_IRQ) {
+		/*
+		 * A subsequent request_percpu_irq() fails if percpu_devid is
+		 * not set. That in turns sets NOAUTOEN, meaning each core needs
+		 * to call enable_percpu_irq()
+		 */
+		irq_set_percpu_devid(irq);
+		irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_percpu_irq);
+	} else {
+		irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_level_irq);
+	}
+
+	return 0;
+}
+
+static const struct irq_domain_ops arcv2_irq_ops = {
+	.xlate = irq_domain_xlate_onecell,
+	.map = arcv2_irq_map,
+};
+
+
+static int __init
+init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
+{
+	struct irq_domain *root_domain;
+	struct bcr_irq_arcv2 irq_bcr;
+	unsigned int nr_cpu_irqs;
+
+	READ_BCR(ARC_REG_IRQ_BCR, irq_bcr);
+	nr_cpu_irqs = irq_bcr.irqs + NR_EXCEPTIONS;
+
+	if (parent)
+		panic("DeviceTree incore intc not a root irq controller\n");
+
+	root_domain = irq_domain_add_linear(intc, nr_cpu_irqs, &arcv2_irq_ops, NULL);
+	if (!root_domain)
+		panic("root irq domain not avail\n");
+
+	/*
+	 * Needed for primary domain lookup to succeed
+	 * This is a primary irqchip, and can never have a parent
+	 */
+	irq_set_default_host(root_domain);
+
+#ifdef CONFIG_SMP
+	irq_create_mapping(root_domain, IPI_IRQ);
+#endif
+	irq_create_mapping(root_domain, SOFTIRQ_IRQ);
+
+	return 0;
+}
+
+IRQCHIP_DECLARE(arc_intc, "snps,archs-intc", init_onchip_IRQ);
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
new file mode 100644
index 000000000..47b421fa0
--- /dev/null
+++ b/arch/arc/kernel/intc-compact.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2011-12 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include <asm/irq.h>
+
+#define NR_CPU_IRQS	32	/* number of irq lines coming in */
+#define TIMER0_IRQ	3	/* Fixed by ISA */
+
+/*
+ * Early Hardware specific Interrupt setup
+ * -Platform independent, needed for each CPU (not foldable into init_IRQ)
+ * -Called very early (start_kernel -> setup_arch -> setup_processor)
+ *
+ * what it does ?
+ * -Optionally, setup the High priority Interrupts as Level 2 IRQs
+ */
+void arc_init_IRQ(void)
+{
+	unsigned int level_mask = 0, i;
+
+       /* Is timer high priority Interrupt (Level2 in ARCompact jargon) */
+	level_mask |= IS_ENABLED(CONFIG_ARC_COMPACT_IRQ_LEVELS) << TIMER0_IRQ;
+
+	/*
+	 * Write to register, even if no LV2 IRQs configured to reset it
+	 * in case bootloader had mucked with it
+	 */
+	write_aux_reg(AUX_IRQ_LEV, level_mask);
+
+	if (level_mask)
+		pr_info("Level-2 interrupts bitset %x\n", level_mask);
+
+	/*
+	 * Disable all IRQ lines so faulty external hardware won't
+	 * trigger interrupt that kernel is not ready to handle.
+	 */
+	for (i = TIMER0_IRQ; i < NR_CPU_IRQS; i++) {
+		unsigned int ienb;
+
+		ienb = read_aux_reg(AUX_IENABLE);
+		ienb &= ~(1 << i);
+		write_aux_reg(AUX_IENABLE, ienb);
+	}
+}
+
+/*
+ * ARC700 core includes a simple on-chip intc supporting
+ * -per IRQ enable/disable
+ * -2 levels of interrupts (high/low)
+ * -all interrupts being level triggered
+ *
+ * To reduce platform code, we assume all IRQs directly hooked-up into intc.
+ * Platforms with external intc, hence cascaded IRQs, are free to over-ride
+ * below, per IRQ.
+ */
+
+static void arc_irq_mask(struct irq_data *data)
+{
+	unsigned int ienb;
+
+	ienb = read_aux_reg(AUX_IENABLE);
+	ienb &= ~(1 << data->hwirq);
+	write_aux_reg(AUX_IENABLE, ienb);
+}
+
+static void arc_irq_unmask(struct irq_data *data)
+{
+	unsigned int ienb;
+
+	ienb = read_aux_reg(AUX_IENABLE);
+	ienb |= (1 << data->hwirq);
+	write_aux_reg(AUX_IENABLE, ienb);
+}
+
+static struct irq_chip onchip_intc = {
+	.name           = "ARC In-core Intc",
+	.irq_mask	= arc_irq_mask,
+	.irq_unmask	= arc_irq_unmask,
+};
+
+static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq,
+			       irq_hw_number_t hw)
+{
+	switch (hw) {
+	case TIMER0_IRQ:
+		irq_set_percpu_devid(irq);
+		irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
+		break;
+	default:
+		irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq);
+	}
+	return 0;
+}
+
+static const struct irq_domain_ops arc_intc_domain_ops = {
+	.xlate = irq_domain_xlate_onecell,
+	.map = arc_intc_domain_map,
+};
+
+static int __init
+init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
+{
+	struct irq_domain *root_domain;
+
+	if (parent)
+		panic("DeviceTree incore intc not a root irq controller\n");
+
+	root_domain = irq_domain_add_linear(intc, NR_CPU_IRQS,
+					    &arc_intc_domain_ops, NULL);
+	if (!root_domain)
+		panic("root irq domain not avail\n");
+
+	/*
+	 * Needed for primary domain lookup to succeed
+	 * This is a primary irqchip, and can never have a parent
+	 */
+	irq_set_default_host(root_domain);
+
+	return 0;
+}
+
+IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
+
+/*
+ * arch_local_irq_enable - Enable interrupts.
+ *
+ * 1. Explicitly called to re-enable interrupts
+ * 2. Implicitly called from spin_unlock_irq, write_unlock_irq etc
+ *    which maybe in hard ISR itself
+ *
+ * Semantics of this function change depending on where it is called from:
+ *
+ * -If called from hard-ISR, it must not invert interrupt priorities
+ *  e.g. suppose TIMER is high priority (Level 2) IRQ
+ *    Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
+ *    Here local_irq_enable( ) shd not re-enable lower priority interrupts
+ * -If called from soft-ISR, it must re-enable all interrupts
+ *    soft ISR are low prioity jobs which can be very slow, thus all IRQs
+ *    must be enabled while they run.
+ *    Now hardware context wise we may still be in L2 ISR (not done rtie)
+ *    still we must re-enable both L1 and L2 IRQs
+ *  Another twist is prev scenario with flow being
+ *     L1 ISR ==> interrupted by L2 ISR  ==> L2 soft ISR
+ *     here we must not re-enable Ll as prev Ll Interrupt's h/w context will get
+ *     over-written (this is deficiency in ARC700 Interrupt mechanism)
+ */
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS	/* Complex version for 2 IRQ levels */
+
+void arch_local_irq_enable(void)
+{
+	unsigned long flags = arch_local_save_flags();
+
+	if (flags & STATUS_A2_MASK)
+		flags |= STATUS_E2_MASK;
+	else if (flags & STATUS_A1_MASK)
+		flags |= STATUS_E1_MASK;
+
+	arch_local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(arch_local_irq_enable);
+#endif
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
new file mode 100644
index 000000000..62b185057
--- /dev/null
+++ b/arch/arc/kernel/irq.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2011-12 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqchip.h>
+#include <asm/mach_desc.h>
+#include <asm/smp.h>
+
+/*
+ * Late Interrupt system init called from start_kernel for Boot CPU only
+ *
+ * Since slab must already be initialized, platforms can start doing any
+ * needed request_irq( )s
+ */
+void __init init_IRQ(void)
+{
+	/*
+	 * process the entire interrupt tree in one go
+	 * Any external intc will be setup provided DT chains them
+	 * properly
+	 */
+	irqchip_init();
+
+#ifdef CONFIG_SMP
+	/* a SMP H/w block could do IPI IRQ request here */
+	if (plat_smp_ops.init_per_cpu)
+		plat_smp_ops.init_per_cpu(smp_processor_id());
+#endif
+
+	if (machine_desc->init_per_cpu)
+		machine_desc->init_per_cpu(smp_processor_id());
+}
+
+/*
+ * "C" Entry point for any ARC ISR, called from low level vector handler
+ * @irq is the vector number read from ICAUSE reg of on-chip intc
+ */
+void arch_do_IRQ(unsigned int hwirq, struct pt_regs *regs)
+{
+	handle_domain_irq(NULL, hwirq, regs);
+}
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
new file mode 100644
index 000000000..9a3c34af2
--- /dev/null
+++ b/arch/arc/kernel/kgdb.c
@@ -0,0 +1,214 @@
+/*
+ * kgdb support for ARC
+ *
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kgdb.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <asm/disasm.h>
+#include <asm/cacheflush.h>
+
+static void to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs,
+			struct callee_regs *cregs)
+{
+	int regno;
+
+	for (regno = 0; regno <= 26; regno++)
+		gdb_regs[_R0 + regno] = get_reg(regno, kernel_regs, cregs);
+
+	for (regno = 27; regno < GDB_MAX_REGS; regno++)
+		gdb_regs[regno] = 0;
+
+	gdb_regs[_FP]		= kernel_regs->fp;
+	gdb_regs[__SP]		= kernel_regs->sp;
+	gdb_regs[_BLINK]	= kernel_regs->blink;
+	gdb_regs[_RET]		= kernel_regs->ret;
+	gdb_regs[_STATUS32]	= kernel_regs->status32;
+	gdb_regs[_LP_COUNT]	= kernel_regs->lp_count;
+	gdb_regs[_LP_END]	= kernel_regs->lp_end;
+	gdb_regs[_LP_START]	= kernel_regs->lp_start;
+	gdb_regs[_BTA]		= kernel_regs->bta;
+	gdb_regs[_STOP_PC]	= kernel_regs->ret;
+}
+
+static void from_gdb_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs,
+			struct callee_regs *cregs)
+{
+	int regno;
+
+	for (regno = 0; regno <= 26; regno++)
+		set_reg(regno, gdb_regs[regno + _R0], kernel_regs, cregs);
+
+	kernel_regs->fp		= gdb_regs[_FP];
+	kernel_regs->sp		= gdb_regs[__SP];
+	kernel_regs->blink	= gdb_regs[_BLINK];
+	kernel_regs->ret	= gdb_regs[_RET];
+	kernel_regs->status32	= gdb_regs[_STATUS32];
+	kernel_regs->lp_count	= gdb_regs[_LP_COUNT];
+	kernel_regs->lp_end	= gdb_regs[_LP_END];
+	kernel_regs->lp_start	= gdb_regs[_LP_START];
+	kernel_regs->bta	= gdb_regs[_BTA];
+}
+
+
+void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs)
+{
+	to_gdb_regs(gdb_regs, kernel_regs, (struct callee_regs *)
+		current->thread.callee_reg);
+}
+
+void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs)
+{
+	from_gdb_regs(gdb_regs, kernel_regs, (struct callee_regs *)
+		current->thread.callee_reg);
+}
+
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs,
+				 struct task_struct *task)
+{
+	if (task)
+		to_gdb_regs(gdb_regs, task_pt_regs(task),
+			(struct callee_regs *) task->thread.callee_reg);
+}
+
+struct single_step_data_t {
+	uint16_t opcode[2];
+	unsigned long address[2];
+	int is_branch;
+	int armed;
+} single_step_data;
+
+static void undo_single_step(struct pt_regs *regs)
+{
+	if (single_step_data.armed) {
+		int i;
+
+		for (i = 0; i < (single_step_data.is_branch ? 2 : 1); i++) {
+			memcpy((void *) single_step_data.address[i],
+				&single_step_data.opcode[i],
+				BREAK_INSTR_SIZE);
+
+			flush_icache_range(single_step_data.address[i],
+				single_step_data.address[i] +
+				BREAK_INSTR_SIZE);
+		}
+		single_step_data.armed = 0;
+	}
+}
+
+static void place_trap(unsigned long address, void *save)
+{
+	memcpy(save, (void *) address, BREAK_INSTR_SIZE);
+	memcpy((void *) address, &arch_kgdb_ops.gdb_bpt_instr,
+		BREAK_INSTR_SIZE);
+	flush_icache_range(address, address + BREAK_INSTR_SIZE);
+}
+
+static void do_single_step(struct pt_regs *regs)
+{
+	single_step_data.is_branch = disasm_next_pc((unsigned long)
+		regs->ret, regs, (struct callee_regs *)
+		current->thread.callee_reg,
+		&single_step_data.address[0],
+		&single_step_data.address[1]);
+
+	place_trap(single_step_data.address[0], &single_step_data.opcode[0]);
+
+	if (single_step_data.is_branch) {
+		place_trap(single_step_data.address[1],
+			&single_step_data.opcode[1]);
+	}
+
+	single_step_data.armed++;
+}
+
+int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
+			       char *remcomInBuffer, char *remcomOutBuffer,
+			       struct pt_regs *regs)
+{
+	unsigned long addr;
+	char *ptr;
+
+	undo_single_step(regs);
+
+	switch (remcomInBuffer[0]) {
+	case 's':
+	case 'c':
+		ptr = &remcomInBuffer[1];
+		if (kgdb_hex2long(&ptr, &addr))
+			regs->ret = addr;
+
+	case 'D':
+	case 'k':
+		atomic_set(&kgdb_cpu_doing_single_step, -1);
+
+		if (remcomInBuffer[0] == 's') {
+			do_single_step(regs);
+			atomic_set(&kgdb_cpu_doing_single_step,
+				   smp_processor_id());
+		}
+
+		return 0;
+	}
+	return -1;
+}
+
+int kgdb_arch_init(void)
+{
+	single_step_data.armed = 0;
+	return 0;
+}
+
+void kgdb_trap(struct pt_regs *regs)
+{
+	/* trap_s 3 is used for breakpoints that overwrite existing
+	 * instructions, while trap_s 4 is used for compiled breakpoints.
+	 *
+	 * with trap_s 3 breakpoints the original instruction needs to be
+	 * restored and continuation needs to start at the location of the
+	 * breakpoint.
+	 *
+	 * with trap_s 4 (compiled) breakpoints, continuation needs to
+	 * start after the breakpoint.
+	 */
+	if (regs->ecr_param == 3)
+		instruction_pointer(regs) -= BREAK_INSTR_SIZE;
+
+	kgdb_handle_exception(1, SIGTRAP, 0, regs);
+}
+
+void kgdb_arch_exit(void)
+{
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+	instruction_pointer(regs) = ip;
+}
+
+static void kgdb_call_nmi_hook(void *ignored)
+{
+	kgdb_nmicallback(raw_smp_processor_id(), NULL);
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+	local_irq_enable();
+	smp_call_function(kgdb_call_nmi_hook, NULL, 0);
+	local_irq_disable();
+}
+
+struct kgdb_arch arch_kgdb_ops = {
+	/* breakpoint instruction: TRAP_S 0x3 */
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	.gdb_bpt_instr		= {0x78, 0x7e},
+#else
+	.gdb_bpt_instr		= {0x7e, 0x78},
+#endif
+};
diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
new file mode 100644
index 000000000..df35d4c0b
--- /dev/null
+++ b/arch/arc/kernel/kprobes.c
@@ -0,0 +1,483 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/kprobes.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/current.h>
+#include <asm/disasm.h>
+
+#define MIN_STACK_SIZE(addr)	min((unsigned long)MAX_STACK_SIZE, \
+		(unsigned long)current_thread_info() + THREAD_SIZE - (addr))
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	/* Attempt to probe at unaligned address */
+	if ((unsigned long)p->addr & 0x01)
+		return -EINVAL;
+
+	/* Address should not be in exception handling code */
+
+	p->ainsn.is_short = is_short_instr((unsigned long)p->addr);
+	p->opcode = *p->addr;
+
+	return 0;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	*p->addr = UNIMP_S_INSTRUCTION;
+
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	*p->addr = p->opcode;
+
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	arch_disarm_kprobe(p);
+
+	/* Can we remove the kprobe in the middle of kprobe handling? */
+	if (p->ainsn.t1_addr) {
+		*(p->ainsn.t1_addr) = p->ainsn.t1_opcode;
+
+		flush_icache_range((unsigned long)p->ainsn.t1_addr,
+				   (unsigned long)p->ainsn.t1_addr +
+				   sizeof(kprobe_opcode_t));
+
+		p->ainsn.t1_addr = NULL;
+	}
+
+	if (p->ainsn.t2_addr) {
+		*(p->ainsn.t2_addr) = p->ainsn.t2_opcode;
+
+		flush_icache_range((unsigned long)p->ainsn.t2_addr,
+				   (unsigned long)p->ainsn.t2_addr +
+				   sizeof(kprobe_opcode_t));
+
+		p->ainsn.t2_addr = NULL;
+	}
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static inline void __kprobes set_current_kprobe(struct kprobe *p)
+{
+	__this_cpu_write(current_kprobe, p);
+}
+
+static void __kprobes resume_execution(struct kprobe *p, unsigned long addr,
+				       struct pt_regs *regs)
+{
+	/* Remove the trap instructions inserted for single step and
+	 * restore the original instructions
+	 */
+	if (p->ainsn.t1_addr) {
+		*(p->ainsn.t1_addr) = p->ainsn.t1_opcode;
+
+		flush_icache_range((unsigned long)p->ainsn.t1_addr,
+				   (unsigned long)p->ainsn.t1_addr +
+				   sizeof(kprobe_opcode_t));
+
+		p->ainsn.t1_addr = NULL;
+	}
+
+	if (p->ainsn.t2_addr) {
+		*(p->ainsn.t2_addr) = p->ainsn.t2_opcode;
+
+		flush_icache_range((unsigned long)p->ainsn.t2_addr,
+				   (unsigned long)p->ainsn.t2_addr +
+				   sizeof(kprobe_opcode_t));
+
+		p->ainsn.t2_addr = NULL;
+	}
+
+	return;
+}
+
+static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long next_pc;
+	unsigned long tgt_if_br = 0;
+	int is_branch;
+	unsigned long bta;
+
+	/* Copy the opcode back to the kprobe location and execute the
+	 * instruction. Because of this we will not be able to get into the
+	 * same kprobe until this kprobe is done
+	 */
+	*(p->addr) = p->opcode;
+
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+
+	/* Now we insert the trap at the next location after this instruction to
+	 * single step. If it is a branch we insert the trap at possible branch
+	 * targets
+	 */
+
+	bta = regs->bta;
+
+	if (regs->status32 & 0x40) {
+		/* We are in a delay slot with the branch taken */
+
+		next_pc = bta & ~0x01;
+
+		if (!p->ainsn.is_short) {
+			if (bta & 0x01)
+				regs->blink += 2;
+			else {
+				/* Branch not taken */
+				next_pc += 2;
+
+				/* next pc is taken from bta after executing the
+				 * delay slot instruction
+				 */
+				regs->bta += 2;
+			}
+		}
+
+		is_branch = 0;
+	} else
+		is_branch =
+		    disasm_next_pc((unsigned long)p->addr, regs,
+			(struct callee_regs *) current->thread.callee_reg,
+			&next_pc, &tgt_if_br);
+
+	p->ainsn.t1_addr = (kprobe_opcode_t *) next_pc;
+	p->ainsn.t1_opcode = *(p->ainsn.t1_addr);
+	*(p->ainsn.t1_addr) = TRAP_S_2_INSTRUCTION;
+
+	flush_icache_range((unsigned long)p->ainsn.t1_addr,
+			   (unsigned long)p->ainsn.t1_addr +
+			   sizeof(kprobe_opcode_t));
+
+	if (is_branch) {
+		p->ainsn.t2_addr = (kprobe_opcode_t *) tgt_if_br;
+		p->ainsn.t2_opcode = *(p->ainsn.t2_addr);
+		*(p->ainsn.t2_addr) = TRAP_S_2_INSTRUCTION;
+
+		flush_icache_range((unsigned long)p->ainsn.t2_addr,
+				   (unsigned long)p->ainsn.t2_addr +
+				   sizeof(kprobe_opcode_t));
+	}
+}
+
+int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs)
+{
+	struct kprobe *p;
+	struct kprobe_ctlblk *kcb;
+
+	preempt_disable();
+
+	kcb = get_kprobe_ctlblk();
+	p = get_kprobe((unsigned long *)addr);
+
+	if (p) {
+		/*
+		 * We have reentered the kprobe_handler, since another kprobe
+		 * was hit while within the handler, we save the original
+		 * kprobes and single step on the instruction of the new probe
+		 * without calling any user handlers to avoid recursive
+		 * kprobes.
+		 */
+		if (kprobe_running()) {
+			save_previous_kprobe(kcb);
+			set_current_kprobe(p);
+			kprobes_inc_nmissed_count(p);
+			setup_singlestep(p, regs);
+			kcb->kprobe_status = KPROBE_REENTER;
+			return 1;
+		}
+
+		set_current_kprobe(p);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+		/* If we have no pre-handler or it returned 0, we continue with
+		 * normal processing. If we have a pre-handler and it returned
+		 * non-zero - which means user handler setup registers to exit
+		 * to another instruction, we must skip the single stepping.
+		 */
+		if (!p->pre_handler || !p->pre_handler(p, regs)) {
+			setup_singlestep(p, regs);
+			kcb->kprobe_status = KPROBE_HIT_SS;
+		} else {
+			reset_current_kprobe();
+			preempt_enable_no_resched();
+		}
+
+		return 1;
+	}
+
+	/* no_kprobe: */
+	preempt_enable_no_resched();
+	return 0;
+}
+
+static int __kprobes arc_post_kprobe_handler(unsigned long addr,
+					 struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (!cur)
+		return 0;
+
+	resume_execution(cur, addr, regs);
+
+	/* Rearm the kprobe */
+	arch_arm_kprobe(cur);
+
+	/*
+	 * When we return from trap instruction we go to the next instruction
+	 * We restored the actual instruction in resume_exectuiont and we to
+	 * return to the same address and execute it
+	 */
+	regs->ret = addr;
+
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		goto out;
+	}
+
+	reset_current_kprobe();
+
+out:
+	preempt_enable_no_resched();
+	return 1;
+}
+
+/*
+ * Fault can be for the instruction being single stepped or for the
+ * pre/post handlers in the module.
+ * This is applicable for applications like user probes, where we have the
+ * probe in user space and the handlers in the kernel
+ */
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned long trapnr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single stepped
+		 * caused the fault. We reset the current kprobe and allow the
+		 * exception handler as if it is regular exception. In our
+		 * case it doesn't matter because the system will be halted
+		 */
+		resume_execution(cur, (unsigned long)cur->addr, regs);
+
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			restore_previous_kprobe(kcb);
+		else
+			reset_current_kprobe();
+
+		preempt_enable_no_resched();
+		break;
+
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * We are here because the instructions in the pre/post handler
+		 * caused the fault.
+		 */
+
+		/* We increment the nmissed count for accounting,
+		 * we can also use npre/npostfault count for accounting
+		 * these specific fault cases.
+		 */
+		kprobes_inc_nmissed_count(cur);
+
+		/*
+		 * We come here because instructions in the pre/post
+		 * handler caused the page_fault, this could happen
+		 * if handler tries to access user space by
+		 * copy_from_user(), get_user() etc. Let the
+		 * user-specified handler try to fix it first.
+		 */
+		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+			return 1;
+
+		/*
+		 * In case the user-specified fault handler returned zero,
+		 * try to fix up.
+		 */
+		if (fixup_exception(regs))
+			return 1;
+
+		/*
+		 * fixup_exception() could not handle it,
+		 * Let do_page_fault() fix it.
+		 */
+		break;
+
+	default:
+		break;
+	}
+	return 0;
+}
+
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	struct die_args *args = data;
+	unsigned long addr = args->err;
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_IERR:
+		if (arc_kprobe_handler(addr, args->regs))
+			return NOTIFY_STOP;
+		break;
+
+	case DIE_TRAP:
+		if (arc_post_kprobe_handler(addr, args->regs))
+			return NOTIFY_STOP;
+		break;
+
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static void __used kretprobe_trampoline_holder(void)
+{
+	__asm__ __volatile__(".global kretprobe_trampoline\n"
+			     "kretprobe_trampoline:\n" "nop\n");
+}
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+				      struct pt_regs *regs)
+{
+
+	ri->ret_addr = (kprobe_opcode_t *) regs->blink;
+
+	/* Replace the return addr with trampoline addr */
+	regs->blink = (unsigned long)&kretprobe_trampoline;
+}
+
+static int __kprobes trampoline_probe_handler(struct kprobe *p,
+					      struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri = NULL;
+	struct hlist_head *head, empty_rp;
+	struct hlist_node *tmp;
+	unsigned long flags, orig_ret_address = 0;
+	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+
+	INIT_HLIST_HEAD(&empty_rp);
+	kretprobe_hash_lock(current, &head, &flags);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more than one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+	 *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		recycle_rp_inst(ri, &empty_rp);
+
+		if (orig_ret_address != trampoline_address) {
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+		}
+	}
+
+	kretprobe_assert(ri, orig_ret_address, trampoline_address);
+	regs->ret = orig_ret_address;
+
+	kretprobe_hash_unlock(current, &flags);
+
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+		hlist_del(&ri->hlist);
+		kfree(ri);
+	}
+
+	/* By returning a non zero value, we are telling the kprobe handler
+	 * that we don't want the post_handler to run
+	 */
+	return 1;
+}
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+	/* Registering the trampoline code for the kret probe */
+	return register_kprobe(&trampoline_p);
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+	if (p->addr == (kprobe_opcode_t *) &kretprobe_trampoline)
+		return 1;
+
+	return 0;
+}
+
+void trap_is_kprobe(unsigned long address, struct pt_regs *regs)
+{
+	notify_die(DIE_TRAP, "kprobe_trap", regs, address, 0, SIGTRAP);
+}
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
new file mode 100644
index 000000000..5fe84e481
--- /dev/null
+++ b/arch/arc/kernel/mcip.c
@@ -0,0 +1,376 @@
+/*
+ * ARC ARConnect (MultiCore IP) support (formerly known as MCIP)
+ *
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/spinlock.h>
+#include <soc/arc/mcip.h>
+#include <asm/irqflags-arcv2.h>
+#include <asm/setup.h>
+
+static DEFINE_RAW_SPINLOCK(mcip_lock);
+
+#ifdef CONFIG_SMP
+
+static char smp_cpuinfo_buf[128];
+
+/*
+ * Set mask to halt GFRC if any online core in SMP cluster is halted.
+ * Only works for ARC HS v3.0+, on earlier versions has no effect.
+ */
+static void mcip_update_gfrc_halt_mask(int cpu)
+{
+	struct bcr_generic gfrc;
+	unsigned long flags;
+	u32 gfrc_halt_mask;
+
+	READ_BCR(ARC_REG_GFRC_BUILD, gfrc);
+
+	/*
+	 * CMD_GFRC_SET_CORE and CMD_GFRC_READ_CORE commands were added in
+	 * GFRC 0x3 version.
+	 */
+	if (gfrc.ver < 0x3)
+		return;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	__mcip_cmd(CMD_GFRC_READ_CORE, 0);
+	gfrc_halt_mask = read_aux_reg(ARC_REG_MCIP_READBACK);
+	gfrc_halt_mask |= BIT(cpu);
+	__mcip_cmd_data(CMD_GFRC_SET_CORE, 0, gfrc_halt_mask);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void mcip_update_debug_halt_mask(int cpu)
+{
+	u32 mcip_mask = 0;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	/*
+	 * mcip_mask is same for CMD_DEBUG_SET_SELECT and CMD_DEBUG_SET_MASK
+	 * commands. So read it once instead of reading both CMD_DEBUG_READ_MASK
+	 * and CMD_DEBUG_READ_SELECT.
+	 */
+	__mcip_cmd(CMD_DEBUG_READ_SELECT, 0);
+	mcip_mask = read_aux_reg(ARC_REG_MCIP_READBACK);
+
+	mcip_mask |= BIT(cpu);
+
+	__mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, mcip_mask);
+	/*
+	 * Parameter specified halt cause:
+	 * STATUS32[H]/actionpoint/breakpoint/self-halt
+	 * We choose all of them (0xF).
+	 */
+	__mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xF, mcip_mask);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void mcip_setup_per_cpu(int cpu)
+{
+	struct mcip_bcr mp;
+
+	READ_BCR(ARC_REG_MCIP_BCR, mp);
+
+	smp_ipi_irq_setup(cpu, IPI_IRQ);
+	smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ);
+
+	/* Update GFRC halt mask as new CPU came online */
+	if (mp.gfrc)
+		mcip_update_gfrc_halt_mask(cpu);
+
+	/* Update MCIP debug mask as new CPU came online */
+	if (mp.dbg)
+		mcip_update_debug_halt_mask(cpu);
+}
+
+static void mcip_ipi_send(int cpu)
+{
+	unsigned long flags;
+	int ipi_was_pending;
+
+	/* ARConnect can only send IPI to others */
+	if (unlikely(cpu == raw_smp_processor_id())) {
+		arc_softirq_trigger(SOFTIRQ_IRQ);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	/*
+	 * If receiver already has a pending interrupt, elide sending this one.
+	 * Linux cross core calling works well with concurrent IPIs
+	 * coalesced into one
+	 * see arch/arc/kernel/smp.c: ipi_send_msg_one()
+	 */
+	__mcip_cmd(CMD_INTRPT_READ_STATUS, cpu);
+	ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK);
+	if (!ipi_was_pending)
+		__mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void mcip_ipi_clear(int irq)
+{
+	unsigned int cpu, c;
+	unsigned long flags;
+
+	if (unlikely(irq == SOFTIRQ_IRQ)) {
+		arc_softirq_clear(irq);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	/* Who sent the IPI */
+	__mcip_cmd(CMD_INTRPT_CHECK_SOURCE, 0);
+
+	cpu = read_aux_reg(ARC_REG_MCIP_READBACK);	/* 1,2,4,8... */
+
+	/*
+	 * In rare case, multiple concurrent IPIs sent to same target can
+	 * possibly be coalesced by MCIP into 1 asserted IRQ, so @cpus can be
+	 * "vectored" (multiple bits sets) as opposed to typical single bit
+	 */
+	do {
+		c = __ffs(cpu);			/* 0,1,2,3 */
+		__mcip_cmd(CMD_INTRPT_GENERATE_ACK, c);
+		cpu &= ~(1U << c);
+	} while (cpu);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void mcip_probe_n_setup(void)
+{
+	struct mcip_bcr mp;
+
+	READ_BCR(ARC_REG_MCIP_BCR, mp);
+
+	sprintf(smp_cpuinfo_buf,
+		"Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n",
+		mp.ver, mp.num_cores,
+		IS_AVAIL1(mp.ipi, "IPI "),
+		IS_AVAIL1(mp.idu, "IDU "),
+		IS_AVAIL1(mp.dbg, "DEBUG "),
+		IS_AVAIL1(mp.gfrc, "GFRC"));
+
+	cpuinfo_arc700[0].extn.gfrc = mp.gfrc;
+}
+
+struct plat_smp_ops plat_smp_ops = {
+	.info		= smp_cpuinfo_buf,
+	.init_early_smp	= mcip_probe_n_setup,
+	.init_per_cpu	= mcip_setup_per_cpu,
+	.ipi_send	= mcip_ipi_send,
+	.ipi_clear	= mcip_ipi_clear,
+};
+
+#endif
+
+/***************************************************************************
+ * ARCv2 Interrupt Distribution Unit (IDU)
+ *
+ * Connects external "COMMON" IRQs to core intc, providing:
+ *  -dynamic routing (IRQ affinity)
+ *  -load balancing (Round Robin interrupt distribution)
+ *  -1:N distribution
+ *
+ * It physically resides in the MCIP hw block
+ */
+
+#include <linux/irqchip.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+
+/*
+ * Set the DEST for @cmn_irq to @cpu_mask (1 bit per core)
+ */
+static void idu_set_dest(unsigned int cmn_irq, unsigned int cpu_mask)
+{
+	__mcip_cmd_data(CMD_IDU_SET_DEST, cmn_irq, cpu_mask);
+}
+
+static void idu_set_mode(unsigned int cmn_irq, unsigned int lvl,
+			   unsigned int distr)
+{
+	union {
+		unsigned int word;
+		struct {
+			unsigned int distr:2, pad:2, lvl:1, pad2:27;
+		};
+	} data;
+
+	data.distr = distr;
+	data.lvl = lvl;
+	__mcip_cmd_data(CMD_IDU_SET_MODE, cmn_irq, data.word);
+}
+
+static void idu_irq_mask_raw(irq_hw_number_t hwirq)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+	__mcip_cmd_data(CMD_IDU_SET_MASK, hwirq, 1);
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void idu_irq_mask(struct irq_data *data)
+{
+	idu_irq_mask_raw(data->hwirq);
+}
+
+static void idu_irq_unmask(struct irq_data *data)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+	__mcip_cmd_data(CMD_IDU_SET_MASK, data->hwirq, 0);
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static int
+idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
+		     bool force)
+{
+	unsigned long flags;
+	cpumask_t online;
+	unsigned int destination_bits;
+	unsigned int distribution_mode;
+
+	/* errout if no online cpu per @cpumask */
+	if (!cpumask_and(&online, cpumask, cpu_online_mask))
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	destination_bits = cpumask_bits(&online)[0];
+	idu_set_dest(data->hwirq, destination_bits);
+
+	if (ffs(destination_bits) == fls(destination_bits))
+		distribution_mode = IDU_M_DISTRI_DEST;
+	else
+		distribution_mode = IDU_M_DISTRI_RR;
+
+	idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, distribution_mode);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+
+	return IRQ_SET_MASK_OK;
+}
+
+static void idu_irq_enable(struct irq_data *data)
+{
+	/*
+	 * By default send all common interrupts to all available online CPUs.
+	 * The affinity of common interrupts in IDU must be set manually since
+	 * in some cases the kernel will not call irq_set_affinity() by itself:
+	 *   1. When the kernel is not configured with support of SMP.
+	 *   2. When the kernel is configured with support of SMP but upper
+	 *      interrupt controllers does not support setting of the affinity
+	 *      and cannot propagate it to IDU.
+	 */
+	idu_irq_set_affinity(data, cpu_online_mask, false);
+	idu_irq_unmask(data);
+}
+
+static struct irq_chip idu_irq_chip = {
+	.name			= "MCIP IDU Intc",
+	.irq_mask		= idu_irq_mask,
+	.irq_unmask		= idu_irq_unmask,
+	.irq_enable		= idu_irq_enable,
+#ifdef CONFIG_SMP
+	.irq_set_affinity       = idu_irq_set_affinity,
+#endif
+
+};
+
+static void idu_cascade_isr(struct irq_desc *desc)
+{
+	struct irq_domain *idu_domain = irq_desc_get_handler_data(desc);
+	struct irq_chip *core_chip = irq_desc_get_chip(desc);
+	irq_hw_number_t core_hwirq = irqd_to_hwirq(irq_desc_get_irq_data(desc));
+	irq_hw_number_t idu_hwirq = core_hwirq - FIRST_EXT_IRQ;
+
+	chained_irq_enter(core_chip, desc);
+	generic_handle_irq(irq_find_mapping(idu_domain, idu_hwirq));
+	chained_irq_exit(core_chip, desc);
+}
+
+static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(virq, &idu_irq_chip, handle_level_irq);
+	irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
+
+	return 0;
+}
+
+static const struct irq_domain_ops idu_irq_ops = {
+	.xlate	= irq_domain_xlate_onecell,
+	.map	= idu_irq_map,
+};
+
+/*
+ * [16, 23]: Statically assigned always private-per-core (Timers, WDT, IPI)
+ * [24, 23+C]: If C > 0 then "C" common IRQs
+ * [24+C, N]: Not statically assigned, private-per-core
+ */
+
+
+static int __init
+idu_of_init(struct device_node *intc, struct device_node *parent)
+{
+	struct irq_domain *domain;
+	int nr_irqs;
+	int i, virq;
+	struct mcip_bcr mp;
+	struct mcip_idu_bcr idu_bcr;
+
+	READ_BCR(ARC_REG_MCIP_BCR, mp);
+
+	if (!mp.idu)
+		panic("IDU not detected, but DeviceTree using it");
+
+	READ_BCR(ARC_REG_MCIP_IDU_BCR, idu_bcr);
+	nr_irqs = mcip_idu_bcr_to_nr_irqs(idu_bcr);
+
+	pr_info("MCIP: IDU supports %u common irqs\n", nr_irqs);
+
+	domain = irq_domain_add_linear(intc, nr_irqs, &idu_irq_ops, NULL);
+
+	/* Parent interrupts (core-intc) are already mapped */
+
+	for (i = 0; i < nr_irqs; i++) {
+		/* Mask all common interrupts by default */
+		idu_irq_mask_raw(i);
+
+		/*
+		 * Return parent uplink IRQs (towards core intc) 24,25,.....
+		 * this step has been done before already
+		 * however we need it to get the parent virq and set IDU handler
+		 * as first level isr
+		 */
+		virq = irq_create_mapping(NULL, i + FIRST_EXT_IRQ);
+		BUG_ON(!virq);
+		irq_set_chained_handler_and_data(virq, idu_cascade_isr, domain);
+	}
+
+	__mcip_cmd(CMD_IDU_ENABLE, 0);
+
+	return 0;
+}
+IRQCHIP_DECLARE(arcv2_idu_intc, "snps,archs-idu-intc", idu_of_init);
diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c
new file mode 100644
index 000000000..3d99a6091
--- /dev/null
+++ b/arch/arc/kernel/module.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleloader.h>
+#include <linux/kernel.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <asm/unwind.h>
+
+static inline void arc_write_me(unsigned short *addr, unsigned long value)
+{
+	*addr = (value & 0xffff0000) >> 16;
+	*(addr + 1) = (value & 0xffff);
+}
+
+/*
+ * This gets called before relocation loop in generic loader
+ * Make a note of the section index of unwinding section
+ */
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+			      char *secstr, struct module *mod)
+{
+#ifdef CONFIG_ARC_DW2_UNWIND
+	mod->arch.unw_sec_idx = 0;
+	mod->arch.unw_info = NULL;
+#endif
+	mod->arch.secstr = secstr;
+	return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+#ifdef CONFIG_ARC_DW2_UNWIND
+	if (mod->arch.unw_info)
+		unwind_remove_table(mod->arch.unw_info, 0);
+#endif
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,	/* sec index for sym tbl */
+		       unsigned int relsec,	/* sec index for relo sec */
+		       struct module *module)
+{
+	int i, n, relo_type;
+	Elf32_Rela *rel_entry = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym_entry, *sym_sec;
+	Elf32_Addr relocation, location, tgt_addr;
+	unsigned int tgtsec;
+
+	/*
+	 * @relsec has relocations e.g. .rela.init.text
+	 * @tgtsec is section to patch e.g. .init.text
+	 */
+	tgtsec = sechdrs[relsec].sh_info;
+	tgt_addr = sechdrs[tgtsec].sh_addr;
+	sym_sec = (Elf32_Sym *) sechdrs[symindex].sh_addr;
+	n = sechdrs[relsec].sh_size / sizeof(*rel_entry);
+
+	pr_debug("\nSection to fixup %s @%x\n",
+		 module->arch.secstr + sechdrs[tgtsec].sh_name, tgt_addr);
+	pr_debug("=========================================================\n");
+	pr_debug("r_off\tr_add\tst_value ADDRESS  VALUE\n");
+	pr_debug("=========================================================\n");
+
+	/* Loop thru entries in relocation section */
+	for (i = 0; i < n; i++) {
+		const char *s;
+
+		/* This is where to make the change */
+		location = tgt_addr + rel_entry[i].r_offset;
+
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym_entry = sym_sec + ELF32_R_SYM(rel_entry[i].r_info);
+
+		relocation = sym_entry->st_value + rel_entry[i].r_addend;
+
+		if (sym_entry->st_name == 0 && ELF_ST_TYPE (sym_entry->st_info) == STT_SECTION) {
+			s = module->arch.secstr + sechdrs[sym_entry->st_shndx].sh_name;
+		} else {
+			s = strtab + sym_entry->st_name;
+		}
+
+		pr_debug("   %x\t%x\t%x %x %x [%s]\n",
+			 rel_entry[i].r_offset, rel_entry[i].r_addend,
+			 sym_entry->st_value, location, relocation, s);
+
+		/* This assumes modules are built with -mlong-calls
+		 * so any branches/jumps are absolute 32 bit jmps
+		 * global data access again is abs 32 bit.
+		 * Both of these are handled by same relocation type
+		 */
+		relo_type = ELF32_R_TYPE(rel_entry[i].r_info);
+
+		if (likely(R_ARC_32_ME == relo_type))	/* ME ( S + A ) */
+			arc_write_me((unsigned short *)location, relocation);
+		else if (R_ARC_32 == relo_type)		/* ( S + A ) */
+			*((Elf32_Addr *) location) = relocation;
+		else if (R_ARC_32_PCREL == relo_type)	/* ( S + A ) - PDATA ) */
+			*((Elf32_Addr *) location) = relocation - location;
+		else
+			goto relo_err;
+
+	}
+
+#ifdef CONFIG_ARC_DW2_UNWIND
+	if (strcmp(module->arch.secstr+sechdrs[tgtsec].sh_name, ".eh_frame") == 0)
+		module->arch.unw_sec_idx = tgtsec;
+#endif
+
+	return 0;
+
+relo_err:
+	pr_err("%s: unknown relocation: %u\n",
+		module->name, ELF32_R_TYPE(rel_entry[i].r_info));
+	return -ENOEXEC;
+
+}
+
+/* Just before lift off: After sections have been relocated, we add the
+ * dwarf section to unwinder table pool
+ * This couldn't be done in module_frob_arch_sections() because
+ * relocations had not been applied by then
+ */
+int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
+		    struct module *mod)
+{
+#ifdef CONFIG_ARC_DW2_UNWIND
+	void *unw;
+	int unwsec = mod->arch.unw_sec_idx;
+
+	if (unwsec) {
+		unw = unwind_add_table(mod, (void *)sechdrs[unwsec].sh_addr,
+				       sechdrs[unwsec].sh_size);
+		mod->arch.unw_info = unw;
+	}
+#endif
+	return 0;
+}
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
new file mode 100644
index 000000000..30f66b123
--- /dev/null
+++ b/arch/arc/kernel/perf_event.c
@@ -0,0 +1,562 @@
+/*
+ * Linux performance counter support for ARC700 series
+ *
+ * Copyright (C) 2013-2015 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This code is inspired by the perf support of various other architectures.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <asm/arcregs.h>
+#include <asm/stacktrace.h>
+
+struct arc_pmu {
+	struct pmu	pmu;
+	unsigned int	irq;
+	int		n_counters;
+	u64		max_period;
+	int		ev_hw_idx[PERF_COUNT_ARC_HW_MAX];
+};
+
+struct arc_pmu_cpu {
+	/*
+	 * A 1 bit for an index indicates that the counter is being used for
+	 * an event. A 0 means that the counter can be used.
+	 */
+	unsigned long	used_mask[BITS_TO_LONGS(ARC_PERF_MAX_COUNTERS)];
+
+	/*
+	 * The events that are active on the PMU for the given index.
+	 */
+	struct perf_event *act_counter[ARC_PERF_MAX_COUNTERS];
+};
+
+struct arc_callchain_trace {
+	int depth;
+	void *perf_stuff;
+};
+
+static int callchain_trace(unsigned int addr, void *data)
+{
+	struct arc_callchain_trace *ctrl = data;
+	struct perf_callchain_entry_ctx *entry = ctrl->perf_stuff;
+	perf_callchain_store(entry, addr);
+
+	if (ctrl->depth++ < 3)
+		return 0;
+
+	return -1;
+}
+
+void
+perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
+{
+	struct arc_callchain_trace ctrl = {
+		.depth = 0,
+		.perf_stuff = entry,
+	};
+
+	arc_unwind_core(NULL, regs, callchain_trace, &ctrl);
+}
+
+void
+perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
+{
+	/*
+	 * User stack can't be unwound trivially with kernel dwarf unwinder
+	 * So for now just record the user PC
+	 */
+	perf_callchain_store(entry, instruction_pointer(regs));
+}
+
+static struct arc_pmu *arc_pmu;
+static DEFINE_PER_CPU(struct arc_pmu_cpu, arc_pmu_cpu);
+
+/* read counter #idx; note that counter# != event# on ARC! */
+static uint64_t arc_pmu_read_counter(int idx)
+{
+	uint32_t tmp;
+	uint64_t result;
+
+	/*
+	 * ARC supports making 'snapshots' of the counters, so we don't
+	 * need to care about counters wrapping to 0 underneath our feet
+	 */
+	write_aux_reg(ARC_REG_PCT_INDEX, idx);
+	tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
+	write_aux_reg(ARC_REG_PCT_CONTROL, tmp | ARC_REG_PCT_CONTROL_SN);
+	result = (uint64_t) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32;
+	result |= read_aux_reg(ARC_REG_PCT_SNAPL);
+
+	return result;
+}
+
+static void arc_perf_event_update(struct perf_event *event,
+				  struct hw_perf_event *hwc, int idx)
+{
+	uint64_t prev_raw_count = local64_read(&hwc->prev_count);
+	uint64_t new_raw_count = arc_pmu_read_counter(idx);
+	int64_t delta = new_raw_count - prev_raw_count;
+
+	/*
+	 * We aren't afraid of hwc->prev_count changing beneath our feet
+	 * because there's no way for us to re-enter this function anytime.
+	 */
+	local64_set(&hwc->prev_count, new_raw_count);
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+}
+
+static void arc_pmu_read(struct perf_event *event)
+{
+	arc_perf_event_update(event, &event->hw, event->hw.idx);
+}
+
+static int arc_pmu_cache_event(u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	int ret;
+
+	cache_type	= (config >>  0) & 0xff;
+	cache_op	= (config >>  8) & 0xff;
+	cache_result	= (config >> 16) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ret = arc_pmu_cache_map[cache_type][cache_op][cache_result];
+
+	if (ret == CACHE_OP_UNSUPPORTED)
+		return -ENOENT;
+
+	pr_debug("init cache event: type/op/result %d/%d/%d with h/w %d \'%s\'\n",
+		 cache_type, cache_op, cache_result, ret,
+		 arc_pmu_ev_hw_map[ret]);
+
+	return ret;
+}
+
+/* initializes hw_perf_event structure if event is supported */
+static int arc_pmu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int ret;
+
+	if (!is_sampling_event(event)) {
+		hwc->sample_period  = arc_pmu->max_period;
+		hwc->last_period = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	hwc->config = 0;
+
+	if (is_isa_arcv2()) {
+		/* "exclude user" means "count only kernel" */
+		if (event->attr.exclude_user)
+			hwc->config |= ARC_REG_PCT_CONFIG_KERN;
+
+		/* "exclude kernel" means "count only user" */
+		if (event->attr.exclude_kernel)
+			hwc->config |= ARC_REG_PCT_CONFIG_USER;
+	}
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		if (event->attr.config >= PERF_COUNT_HW_MAX)
+			return -ENOENT;
+		if (arc_pmu->ev_hw_idx[event->attr.config] < 0)
+			return -ENOENT;
+		hwc->config |= arc_pmu->ev_hw_idx[event->attr.config];
+		pr_debug("init event %d with h/w %08x \'%s\'\n",
+			 (int)event->attr.config, (int)hwc->config,
+			 arc_pmu_ev_hw_map[event->attr.config]);
+		return 0;
+
+	case PERF_TYPE_HW_CACHE:
+		ret = arc_pmu_cache_event(event->attr.config);
+		if (ret < 0)
+			return ret;
+		hwc->config |= arc_pmu->ev_hw_idx[ret];
+		pr_debug("init cache event with h/w %08x \'%s\'\n",
+			 (int)hwc->config, arc_pmu_ev_hw_map[ret]);
+		return 0;
+	default:
+		return -ENOENT;
+	}
+}
+
+/* starts all counters */
+static void arc_pmu_enable(struct pmu *pmu)
+{
+	uint32_t tmp;
+	tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
+	write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x1);
+}
+
+/* stops all counters */
+static void arc_pmu_disable(struct pmu *pmu)
+{
+	uint32_t tmp;
+	tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
+	write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x0);
+}
+
+static int arc_pmu_event_set_period(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int idx = hwc->idx;
+	int overflow = 0;
+	u64 value;
+
+	if (unlikely(left <= -period)) {
+		/* left underflowed by more than period. */
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	} else	if (unlikely(left <= 0)) {
+		/* left underflowed by less than period. */
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	}
+
+	if (left > arc_pmu->max_period)
+		left = arc_pmu->max_period;
+
+	value = arc_pmu->max_period - left;
+	local64_set(&hwc->prev_count, value);
+
+	/* Select counter */
+	write_aux_reg(ARC_REG_PCT_INDEX, idx);
+
+	/* Write value */
+	write_aux_reg(ARC_REG_PCT_COUNTL, (u32)value);
+	write_aux_reg(ARC_REG_PCT_COUNTH, (value >> 32));
+
+	perf_event_update_userpage(event);
+
+	return overflow;
+}
+
+/*
+ * Assigns hardware counter to hardware condition.
+ * Note that there is no separate start/stop mechanism;
+ * stopping is achieved by assigning the 'never' condition
+ */
+static void arc_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (WARN_ON_ONCE(idx == -1))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+
+	arc_pmu_event_set_period(event);
+
+	/* Enable interrupt for this counter */
+	if (is_sampling_event(event))
+		write_aux_reg(ARC_REG_PCT_INT_CTRL,
+			      read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx));
+
+	/* enable ARC pmu here */
+	write_aux_reg(ARC_REG_PCT_INDEX, idx);		/* counter # */
+	write_aux_reg(ARC_REG_PCT_CONFIG, hwc->config);	/* condition */
+}
+
+static void arc_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	/* Disable interrupt for this counter */
+	if (is_sampling_event(event)) {
+		/*
+		 * Reset interrupt flag by writing of 1. This is required
+		 * to make sure pending interrupt was not left.
+		 */
+		write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx);
+		write_aux_reg(ARC_REG_PCT_INT_CTRL,
+			      read_aux_reg(ARC_REG_PCT_INT_CTRL) & ~(1 << idx));
+	}
+
+	if (!(event->hw.state & PERF_HES_STOPPED)) {
+		/* stop ARC pmu here */
+		write_aux_reg(ARC_REG_PCT_INDEX, idx);
+
+		/* condition code #0 is always "never" */
+		write_aux_reg(ARC_REG_PCT_CONFIG, 0);
+
+		event->hw.state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) &&
+	    !(event->hw.state & PERF_HES_UPTODATE)) {
+		arc_perf_event_update(event, &event->hw, idx);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+}
+
+static void arc_pmu_del(struct perf_event *event, int flags)
+{
+	struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
+
+	arc_pmu_stop(event, PERF_EF_UPDATE);
+	__clear_bit(event->hw.idx, pmu_cpu->used_mask);
+
+	pmu_cpu->act_counter[event->hw.idx] = 0;
+
+	perf_event_update_userpage(event);
+}
+
+/* allocate hardware counter and optionally start counting */
+static int arc_pmu_add(struct perf_event *event, int flags)
+{
+	struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	idx = ffz(pmu_cpu->used_mask[0]);
+	if (idx == arc_pmu->n_counters)
+		return -EAGAIN;
+
+	__set_bit(idx, pmu_cpu->used_mask);
+	hwc->idx = idx;
+
+	write_aux_reg(ARC_REG_PCT_INDEX, idx);
+
+	pmu_cpu->act_counter[idx] = event;
+
+	if (is_sampling_event(event)) {
+		/* Mimic full counter overflow as other arches do */
+		write_aux_reg(ARC_REG_PCT_INT_CNTL, (u32)arc_pmu->max_period);
+		write_aux_reg(ARC_REG_PCT_INT_CNTH,
+			      (arc_pmu->max_period >> 32));
+	}
+
+	write_aux_reg(ARC_REG_PCT_CONFIG, 0);
+	write_aux_reg(ARC_REG_PCT_COUNTL, 0);
+	write_aux_reg(ARC_REG_PCT_COUNTH, 0);
+	local64_set(&hwc->prev_count, 0);
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (flags & PERF_EF_START)
+		arc_pmu_start(event, PERF_EF_RELOAD);
+
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+#ifdef CONFIG_ISA_ARCV2
+static irqreturn_t arc_pmu_intr(int irq, void *dev)
+{
+	struct perf_sample_data data;
+	struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
+	struct pt_regs *regs;
+	unsigned int active_ints;
+	int idx;
+
+	arc_pmu_disable(&arc_pmu->pmu);
+
+	active_ints = read_aux_reg(ARC_REG_PCT_INT_ACT);
+	if (!active_ints)
+		goto done;
+
+	regs = get_irq_regs();
+
+	do {
+		struct perf_event *event;
+		struct hw_perf_event *hwc;
+
+		idx = __ffs(active_ints);
+
+		/* Reset interrupt flag by writing of 1 */
+		write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx);
+
+		/*
+		 * On reset of "interrupt active" bit corresponding
+		 * "interrupt enable" bit gets automatically reset as well.
+		 * Now we need to re-enable interrupt for the counter.
+		 */
+		write_aux_reg(ARC_REG_PCT_INT_CTRL,
+			read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx));
+
+		event = pmu_cpu->act_counter[idx];
+		hwc = &event->hw;
+
+		WARN_ON_ONCE(hwc->idx != idx);
+
+		arc_perf_event_update(event, &event->hw, event->hw.idx);
+		perf_sample_data_init(&data, 0, hwc->last_period);
+		if (arc_pmu_event_set_period(event)) {
+			if (perf_event_overflow(event, &data, regs))
+				arc_pmu_stop(event, 0);
+		}
+
+		active_ints &= ~(1U << idx);
+	} while (active_ints);
+
+done:
+	arc_pmu_enable(&arc_pmu->pmu);
+
+	return IRQ_HANDLED;
+}
+#else
+
+static irqreturn_t arc_pmu_intr(int irq, void *dev)
+{
+	return IRQ_NONE;
+}
+
+#endif /* CONFIG_ISA_ARCV2 */
+
+static void arc_cpu_pmu_irq_init(void *data)
+{
+	int irq = *(int *)data;
+
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
+
+	/* Clear all pending interrupt flags */
+	write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
+}
+
+static int arc_pmu_device_probe(struct platform_device *pdev)
+{
+	struct arc_reg_pct_build pct_bcr;
+	struct arc_reg_cc_build cc_bcr;
+	int i, j, has_interrupts;
+	int counter_size;	/* in bits */
+
+	union cc_name {
+		struct {
+			uint32_t word0, word1;
+			char sentinel;
+		} indiv;
+		char str[9];
+	} cc_name;
+
+
+	READ_BCR(ARC_REG_PCT_BUILD, pct_bcr);
+	if (!pct_bcr.v) {
+		pr_err("This core does not have performance counters!\n");
+		return -ENODEV;
+	}
+	BUILD_BUG_ON(ARC_PERF_MAX_COUNTERS > 32);
+	BUG_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS);
+
+	READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
+	BUG_ON(!cc_bcr.v); /* Counters exist but No countable conditions ? */
+
+	arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL);
+	if (!arc_pmu)
+		return -ENOMEM;
+
+	has_interrupts = is_isa_arcv2() ? pct_bcr.i : 0;
+
+	arc_pmu->n_counters = pct_bcr.c;
+	counter_size = 32 + (pct_bcr.s << 4);
+
+	arc_pmu->max_period = (1ULL << counter_size) / 2 - 1ULL;
+
+	pr_info("ARC perf\t: %d counters (%d bits), %d conditions%s\n",
+		arc_pmu->n_counters, counter_size, cc_bcr.c,
+		has_interrupts ? ", [overflow IRQ support]":"");
+
+	cc_name.str[8] = 0;
+	for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++)
+		arc_pmu->ev_hw_idx[i] = -1;
+
+	/* loop thru all available h/w condition indexes */
+	for (j = 0; j < cc_bcr.c; j++) {
+		write_aux_reg(ARC_REG_CC_INDEX, j);
+		cc_name.indiv.word0 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME0));
+		cc_name.indiv.word1 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME1));
+
+		/* See if it has been mapped to a perf event_id */
+		for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
+			if (arc_pmu_ev_hw_map[i] &&
+			    !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) &&
+			    strlen(arc_pmu_ev_hw_map[i])) {
+				pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n",
+					 i, cc_name.str, j);
+				arc_pmu->ev_hw_idx[i] = j;
+			}
+		}
+	}
+
+	arc_pmu->pmu = (struct pmu) {
+		.pmu_enable	= arc_pmu_enable,
+		.pmu_disable	= arc_pmu_disable,
+		.event_init	= arc_pmu_event_init,
+		.add		= arc_pmu_add,
+		.del		= arc_pmu_del,
+		.start		= arc_pmu_start,
+		.stop		= arc_pmu_stop,
+		.read		= arc_pmu_read,
+	};
+
+	if (has_interrupts) {
+		int irq = platform_get_irq(pdev, 0);
+
+		if (irq < 0) {
+			pr_err("Cannot get IRQ number for the platform\n");
+			return -ENODEV;
+		}
+
+		arc_pmu->irq = irq;
+
+		/* intc map function ensures irq_set_percpu_devid() called */
+		request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters",
+				   this_cpu_ptr(&arc_pmu_cpu));
+
+		on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1);
+
+	} else
+		arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	return perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id arc_pmu_match[] = {
+	{ .compatible = "snps,arc700-pct" },
+	{ .compatible = "snps,archs-pct" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, arc_pmu_match);
+#endif
+
+static struct platform_driver arc_pmu_driver = {
+	.driver	= {
+		.name		= "arc-pct",
+		.of_match_table = of_match_ptr(arc_pmu_match),
+	},
+	.probe		= arc_pmu_device_probe,
+};
+
+module_platform_driver(arc_pmu_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mischa Jonker <mjonker@synopsys.com>");
+MODULE_DESCRIPTION("ARC PMU driver");
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
new file mode 100644
index 000000000..8ce6e7235
--- /dev/null
+++ b/arch/arc/kernel/process.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Amit Bhor, Kanika Nema: Codito Technologies 2004
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/elf.h>
+#include <linux/tick.h>
+
+SYSCALL_DEFINE1(arc_settls, void *, user_tls_data_ptr)
+{
+	task_thread_info(current)->thr_ptr = (unsigned int)user_tls_data_ptr;
+	return 0;
+}
+
+/*
+ * We return the user space TLS data ptr as sys-call return code
+ * Ideally it should be copy to user.
+ * However we can cheat by the fact that some sys-calls do return
+ * absurdly high values
+ * Since the tls dat aptr is not going to be in range of 0xFFFF_xxxx
+ * it won't be considered a sys-call error
+ * and it will be loads better than copy-to-user, which is a definite
+ * D-TLB Miss
+ */
+SYSCALL_DEFINE0(arc_gettls)
+{
+	return task_thread_info(current)->thr_ptr;
+}
+
+SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
+{
+	struct pt_regs *regs = current_pt_regs();
+	u32 uval;
+	int ret;
+
+	/*
+	 * This is only for old cores lacking LLOCK/SCOND, which by defintion
+	 * can't possibly be SMP. Thus doesn't need to be SMP safe.
+	 * And this also helps reduce the overhead for serializing in
+	 * the UP case
+	 */
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP));
+
+	/* Z indicates to userspace if operation succeded */
+	regs->status32 &= ~STATUS_Z_MASK;
+
+	ret = access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr));
+	if (!ret)
+		 goto fail;
+
+again:
+	preempt_disable();
+
+	ret = __get_user(uval, uaddr);
+	if (ret)
+		 goto fault;
+
+	if (uval != expected)
+		 goto out;
+
+	ret = __put_user(new, uaddr);
+	if (ret)
+		 goto fault;
+
+	regs->status32 |= STATUS_Z_MASK;
+
+out:
+	preempt_enable();
+	return uval;
+
+fault:
+	preempt_enable();
+
+	if (unlikely(ret != -EFAULT))
+		 goto fail;
+
+	down_read(&current->mm->mmap_sem);
+	ret = fixup_user_fault(current, current->mm, (unsigned long) uaddr,
+			       FAULT_FLAG_WRITE, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	if (likely(!ret))
+		 goto again;
+
+fail:
+	force_sig(SIGSEGV, current);
+	return ret;
+}
+
+#ifdef CONFIG_ISA_ARCV2
+
+void arch_cpu_idle(void)
+{
+	/* Re-enable interrupts <= default irq priority before commiting SLEEP */
+	const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO;
+
+	__asm__ __volatile__(
+		"sleep %0	\n"
+		:
+		:"I"(arg)); /* can't be "r" has to be embedded const */
+}
+
+#elif defined(CONFIG_EZNPS_MTM_EXT)	/* ARC700 variant in NPS */
+
+void arch_cpu_idle(void)
+{
+	/* only the calling HW thread needs to sleep */
+	__asm__ __volatile__(
+		".word %0	\n"
+		:
+		:"i"(CTOP_INST_HWSCHD_WFT_IE12));
+}
+
+#else	/* ARC700 */
+
+void arch_cpu_idle(void)
+{
+	/* sleep, but enable both set E1/E2 (levels of interrutps) before committing */
+	__asm__ __volatile__("sleep 0x3	\n");
+}
+
+#endif
+
+asmlinkage void ret_from_fork(void);
+
+/*
+ * Copy architecture-specific thread state
+ *
+ * Layout of Child kernel mode stack as setup at the end of this function is
+ *
+ * |     ...        |
+ * |     ...        |
+ * |    unused      |
+ * |                |
+ * ------------------
+ * |     r25        |   <==== top of Stack (thread.ksp)
+ * ~                ~
+ * |    --to--      |   (CALLEE Regs of kernel mode)
+ * |     r13        |
+ * ------------------
+ * |     fp         |
+ * |    blink       |   @ret_from_fork
+ * ------------------
+ * |                |
+ * ~                ~
+ * ~                ~
+ * |                |
+ * ------------------
+ * |     r12        |
+ * ~                ~
+ * |    --to--      |   (scratch Regs of user mode)
+ * |     r0         |
+ * ------------------
+ * |      SP        |
+ * |    orig_r0     |
+ * |    event/ECR   |
+ * |    user_r25    |
+ * ------------------  <===== END of PAGE
+ */
+int copy_thread(unsigned long clone_flags,
+		unsigned long usp, unsigned long kthread_arg,
+		struct task_struct *p)
+{
+	struct pt_regs *c_regs;        /* child's pt_regs */
+	unsigned long *childksp;       /* to unwind out of __switch_to() */
+	struct callee_regs *c_callee;  /* child's callee regs */
+	struct callee_regs *parent_callee;  /* paren't callee */
+	struct pt_regs *regs = current_pt_regs();
+
+	/* Mark the specific anchors to begin with (see pic above) */
+	c_regs = task_pt_regs(p);
+	childksp = (unsigned long *)c_regs - 2;  /* 2 words for FP/BLINK */
+	c_callee = ((struct callee_regs *)childksp) - 1;
+
+	/*
+	 * __switch_to() uses thread.ksp to start unwinding stack
+	 * For kernel threads we don't need to create callee regs, the
+	 * stack layout nevertheless needs to remain the same.
+	 * Also, since __switch_to anyways unwinds callee regs, we use
+	 * this to populate kernel thread entry-pt/args into callee regs,
+	 * so that ret_from_kernel_thread() becomes simpler.
+	 */
+	p->thread.ksp = (unsigned long)c_callee;	/* THREAD_KSP */
+
+	/* __switch_to expects FP(0), BLINK(return addr) at top */
+	childksp[0] = 0;			/* fp */
+	childksp[1] = (unsigned long)ret_from_fork; /* blink */
+
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(c_regs, 0, sizeof(struct pt_regs));
+
+		c_callee->r13 = kthread_arg;
+		c_callee->r14 = usp;  /* function */
+
+		return 0;
+	}
+
+	/*--------- User Task Only --------------*/
+
+	/* __switch_to expects FP(0), BLINK(return addr) at top of stack */
+	childksp[0] = 0;				/* for POP fp */
+	childksp[1] = (unsigned long)ret_from_fork;	/* for POP blink */
+
+	/* Copy parents pt regs on child's kernel mode stack */
+	*c_regs = *regs;
+
+	if (usp)
+		c_regs->sp = usp;
+
+	c_regs->r0 = 0;		/* fork returns 0 in child */
+
+	parent_callee = ((struct callee_regs *)regs) - 1;
+	*c_callee = *parent_callee;
+
+	if (unlikely(clone_flags & CLONE_SETTLS)) {
+		/*
+		 * set task's userland tls data ptr from 4th arg
+		 * clone C-lib call is difft from clone sys-call
+		 */
+		task_thread_info(p)->thr_ptr = regs->r3;
+	} else {
+		/* Normal fork case: set parent's TLS ptr in child */
+		task_thread_info(p)->thr_ptr =
+		task_thread_info(current)->thr_ptr;
+	}
+
+
+	/*
+	 * setup usermode thread pointer #1:
+	 * when child is picked by scheduler, __switch_to() uses @c_callee to
+	 * populate usermode callee regs: this works (despite being in a kernel
+	 * function) since special return path for child @ret_from_fork()
+	 * ensures those regs are not clobbered all the way to RTIE to usermode
+	 */
+	c_callee->r25 = task_thread_info(p)->thr_ptr;
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	/*
+	 * setup usermode thread pointer #2:
+	 * however for this special use of r25 in kernel, __switch_to() sets
+	 * r25 for kernel needs and only in the final return path is usermode
+	 * r25 setup, from pt_regs->user_r25. So set that up as well
+	 */
+	c_regs->user_r25 = c_callee->r25;
+#endif
+
+	return 0;
+}
+
+/*
+ * Do necessary setup to start up a new user task
+ */
+void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
+{
+	regs->sp = usp;
+	regs->ret = pc;
+
+	/*
+	 * [U]ser Mode bit set
+	 * [L] ZOL loop inhibited to begin with - cleared by a LP insn
+	 * Interrupts enabled
+	 */
+	regs->status32 = STATUS_U_MASK | STATUS_L_MASK | ISA_INIT_STATUS_BITS;
+
+#ifdef CONFIG_EZNPS_MTM_EXT
+	regs->eflags = 0;
+#endif
+
+	/* bogus seed values for debugging */
+	regs->lp_start = 0x10;
+	regs->lp_end = 0x80;
+}
+
+/*
+ * Some archs flush debug and FPU info here
+ */
+void flush_thread(void)
+{
+}
+
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
+{
+	return 0;
+}
+
+int elf_check_arch(const struct elf32_hdr *x)
+{
+	unsigned int eflags;
+
+	if (x->e_machine != EM_ARC_INUSE) {
+		pr_err("ELF not built for %s ISA\n",
+			is_isa_arcompact() ? "ARCompact":"ARCv2");
+		return 0;
+	}
+
+	eflags = x->e_flags;
+	if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) {
+		pr_err("ABI mismatch - you need newer toolchain\n");
+		force_sigsegv(SIGSEGV, current);
+		return 0;
+	}
+
+	return 1;
+}
+EXPORT_SYMBOL(elf_check_arch);
diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c
new file mode 100644
index 000000000..5ee4676f1
--- /dev/null
+++ b/arch/arc/kernel/ptrace.c
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ptrace.h>
+#include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
+#include <linux/regset.h>
+#include <linux/unistd.h>
+#include <linux/elf.h>
+
+static struct callee_regs *task_callee_regs(struct task_struct *tsk)
+{
+	struct callee_regs *tmp = (struct callee_regs *)tsk->thread.callee_reg;
+	return tmp;
+}
+
+static int genregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *ptregs = task_pt_regs(target);
+	const struct callee_regs *cregs = task_callee_regs(target);
+	int ret = 0;
+	unsigned int stop_pc_val;
+
+#define REG_O_CHUNK(START, END, PTR)	\
+	if (!ret)	\
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, PTR, \
+			offsetof(struct user_regs_struct, START), \
+			offsetof(struct user_regs_struct, END));
+
+#define REG_O_ONE(LOC, PTR)	\
+	if (!ret)		\
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, PTR, \
+			offsetof(struct user_regs_struct, LOC), \
+			offsetof(struct user_regs_struct, LOC) + 4);
+
+#define REG_O_ZERO(LOC)		\
+	if (!ret)		\
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, \
+			offsetof(struct user_regs_struct, LOC), \
+			offsetof(struct user_regs_struct, LOC) + 4);
+
+	REG_O_ZERO(pad);
+	REG_O_ONE(scratch.bta, &ptregs->bta);
+	REG_O_ONE(scratch.lp_start, &ptregs->lp_start);
+	REG_O_ONE(scratch.lp_end, &ptregs->lp_end);
+	REG_O_ONE(scratch.lp_count, &ptregs->lp_count);
+	REG_O_ONE(scratch.status32, &ptregs->status32);
+	REG_O_ONE(scratch.ret, &ptregs->ret);
+	REG_O_ONE(scratch.blink, &ptregs->blink);
+	REG_O_ONE(scratch.fp, &ptregs->fp);
+	REG_O_ONE(scratch.gp, &ptregs->r26);
+	REG_O_ONE(scratch.r12, &ptregs->r12);
+	REG_O_ONE(scratch.r11, &ptregs->r11);
+	REG_O_ONE(scratch.r10, &ptregs->r10);
+	REG_O_ONE(scratch.r9, &ptregs->r9);
+	REG_O_ONE(scratch.r8, &ptregs->r8);
+	REG_O_ONE(scratch.r7, &ptregs->r7);
+	REG_O_ONE(scratch.r6, &ptregs->r6);
+	REG_O_ONE(scratch.r5, &ptregs->r5);
+	REG_O_ONE(scratch.r4, &ptregs->r4);
+	REG_O_ONE(scratch.r3, &ptregs->r3);
+	REG_O_ONE(scratch.r2, &ptregs->r2);
+	REG_O_ONE(scratch.r1, &ptregs->r1);
+	REG_O_ONE(scratch.r0, &ptregs->r0);
+	REG_O_ONE(scratch.sp, &ptregs->sp);
+
+	REG_O_ZERO(pad2);
+
+	REG_O_ONE(callee.r25, &cregs->r25);
+	REG_O_ONE(callee.r24, &cregs->r24);
+	REG_O_ONE(callee.r23, &cregs->r23);
+	REG_O_ONE(callee.r22, &cregs->r22);
+	REG_O_ONE(callee.r21, &cregs->r21);
+	REG_O_ONE(callee.r20, &cregs->r20);
+	REG_O_ONE(callee.r19, &cregs->r19);
+	REG_O_ONE(callee.r18, &cregs->r18);
+	REG_O_ONE(callee.r17, &cregs->r17);
+	REG_O_ONE(callee.r16, &cregs->r16);
+	REG_O_ONE(callee.r15, &cregs->r15);
+	REG_O_ONE(callee.r14, &cregs->r14);
+	REG_O_ONE(callee.r13, &cregs->r13);
+
+	REG_O_ONE(efa, &target->thread.fault_address);
+
+	if (!ret) {
+		if (in_brkpt_trap(ptregs)) {
+			stop_pc_val = target->thread.fault_address;
+			pr_debug("\t\tstop_pc (brk-pt)\n");
+		} else {
+			stop_pc_val = ptregs->ret;
+			pr_debug("\t\tstop_pc (others)\n");
+		}
+
+		REG_O_ONE(stop_pc, &stop_pc_val);
+	}
+
+	return ret;
+}
+
+static int genregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	const struct pt_regs *ptregs = task_pt_regs(target);
+	const struct callee_regs *cregs = task_callee_regs(target);
+	int ret = 0;
+
+#define REG_IN_CHUNK(FIRST, NEXT, PTR)	\
+	if (!ret)			\
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, \
+			(void *)(PTR), \
+			offsetof(struct user_regs_struct, FIRST), \
+			offsetof(struct user_regs_struct, NEXT));
+
+#define REG_IN_ONE(LOC, PTR)		\
+	if (!ret)			\
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, \
+			(void *)(PTR), \
+			offsetof(struct user_regs_struct, LOC), \
+			offsetof(struct user_regs_struct, LOC) + 4);
+
+#define REG_IGNORE_ONE(LOC)		\
+	if (!ret)			\
+		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, \
+			offsetof(struct user_regs_struct, LOC), \
+			offsetof(struct user_regs_struct, LOC) + 4);
+
+	REG_IGNORE_ONE(pad);
+
+	REG_IN_ONE(scratch.bta, &ptregs->bta);
+	REG_IN_ONE(scratch.lp_start, &ptregs->lp_start);
+	REG_IN_ONE(scratch.lp_end, &ptregs->lp_end);
+	REG_IN_ONE(scratch.lp_count, &ptregs->lp_count);
+
+	REG_IGNORE_ONE(scratch.status32);
+
+	REG_IN_ONE(scratch.ret, &ptregs->ret);
+	REG_IN_ONE(scratch.blink, &ptregs->blink);
+	REG_IN_ONE(scratch.fp, &ptregs->fp);
+	REG_IN_ONE(scratch.gp, &ptregs->r26);
+	REG_IN_ONE(scratch.r12, &ptregs->r12);
+	REG_IN_ONE(scratch.r11, &ptregs->r11);
+	REG_IN_ONE(scratch.r10, &ptregs->r10);
+	REG_IN_ONE(scratch.r9, &ptregs->r9);
+	REG_IN_ONE(scratch.r8, &ptregs->r8);
+	REG_IN_ONE(scratch.r7, &ptregs->r7);
+	REG_IN_ONE(scratch.r6, &ptregs->r6);
+	REG_IN_ONE(scratch.r5, &ptregs->r5);
+	REG_IN_ONE(scratch.r4, &ptregs->r4);
+	REG_IN_ONE(scratch.r3, &ptregs->r3);
+	REG_IN_ONE(scratch.r2, &ptregs->r2);
+	REG_IN_ONE(scratch.r1, &ptregs->r1);
+	REG_IN_ONE(scratch.r0, &ptregs->r0);
+	REG_IN_ONE(scratch.sp, &ptregs->sp);
+
+	REG_IGNORE_ONE(pad2);
+
+	REG_IN_ONE(callee.r25, &cregs->r25);
+	REG_IN_ONE(callee.r24, &cregs->r24);
+	REG_IN_ONE(callee.r23, &cregs->r23);
+	REG_IN_ONE(callee.r22, &cregs->r22);
+	REG_IN_ONE(callee.r21, &cregs->r21);
+	REG_IN_ONE(callee.r20, &cregs->r20);
+	REG_IN_ONE(callee.r19, &cregs->r19);
+	REG_IN_ONE(callee.r18, &cregs->r18);
+	REG_IN_ONE(callee.r17, &cregs->r17);
+	REG_IN_ONE(callee.r16, &cregs->r16);
+	REG_IN_ONE(callee.r15, &cregs->r15);
+	REG_IN_ONE(callee.r14, &cregs->r14);
+	REG_IN_ONE(callee.r13, &cregs->r13);
+
+	REG_IGNORE_ONE(efa);			/* efa update invalid */
+	REG_IGNORE_ONE(stop_pc);		/* PC updated via @ret */
+
+	return ret;
+}
+
+#ifdef CONFIG_ISA_ARCV2
+static int arcv2regs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	int ret, copy_sz;
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_ACCL_REGS))
+		copy_sz = sizeof(struct user_regs_arcv2);
+	else
+		copy_sz = 4;	/* r30 only */
+
+	/*
+	 * itemized copy not needed like above as layout of regs (r30,r58,r59)
+	 * is exactly same in kernel (pt_regs) and userspace (user_regs_arcv2)
+	 */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &regs->r30,
+				  0, copy_sz);
+
+	return ret;
+}
+
+static int arcv2regs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	int ret, copy_sz;
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_ACCL_REGS))
+		copy_sz = sizeof(struct user_regs_arcv2);
+	else
+		copy_sz = 4;	/* r30 only */
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, (void *)&regs->r30,
+				  0, copy_sz);
+
+	return ret;
+}
+
+#endif
+
+enum arc_getset {
+	REGSET_CMN,
+	REGSET_ARCV2,
+};
+
+static const struct user_regset arc_regsets[] = {
+	[REGSET_CMN] = {
+	       .core_note_type = NT_PRSTATUS,
+	       .n = ELF_NGREG,
+	       .size = sizeof(unsigned long),
+	       .align = sizeof(unsigned long),
+	       .get = genregs_get,
+	       .set = genregs_set,
+	},
+#ifdef CONFIG_ISA_ARCV2
+	[REGSET_ARCV2] = {
+	       .core_note_type = NT_ARC_V2,
+	       .n = ELF_ARCV2REG,
+	       .size = sizeof(unsigned long),
+	       .align = sizeof(unsigned long),
+	       .get = arcv2regs_get,
+	       .set = arcv2regs_set,
+	},
+#endif
+};
+
+static const struct user_regset_view user_arc_view = {
+	.name		= UTS_MACHINE,
+	.e_machine	= EM_ARC_INUSE,
+	.regsets	= arc_regsets,
+	.n		= ARRAY_SIZE(arc_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_arc_view;
+}
+
+void ptrace_disable(struct task_struct *child)
+{
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	int ret = -EIO;
+
+	pr_debug("REQ=%ld: ADDR =0x%lx, DATA=0x%lx)\n", request, addr, data);
+
+	switch (request) {
+	case PTRACE_GET_THREAD_AREA:
+		ret = put_user(task_thread_info(child)->thr_ptr,
+			       (unsigned long __user *)data);
+		break;
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
+
+asmlinkage int syscall_trace_entry(struct pt_regs *regs)
+{
+	if (tracehook_report_syscall_entry(regs))
+		return ULONG_MAX;
+
+	return regs->r8;
+}
+
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+	tracehook_report_syscall_exit(regs, 0);
+}
diff --git a/arch/arc/kernel/reset.c b/arch/arc/kernel/reset.c
new file mode 100644
index 000000000..2768fa1e3
--- /dev/null
+++ b/arch/arc/kernel/reset.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/reboot.h>
+#include <linux/pm.h>
+
+void machine_halt(void)
+{
+	/* Halt the processor */
+	__asm__ __volatile__("flag  1\n");
+}
+
+void machine_restart(char *__unused)
+{
+	/* Soft reset : jump to reset vector */
+	pr_info("Put your restart handler here\n");
+	machine_halt();
+}
+
+void machine_power_off(void)
+{
+	/* FIXME ::  power off ??? */
+	machine_halt();
+}
+
+void (*pm_power_off) (void) = NULL;
+EXPORT_SYMBOL(pm_power_off);
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
new file mode 100644
index 000000000..c10994dae
--- /dev/null
+++ b/arch/arc/kernel/setup.c
@@ -0,0 +1,699 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+#include <linux/root_dev.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+#include <linux/console.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+#include <linux/cpu.h>
+#include <linux/of_fdt.h>
+#include <linux/of.h>
+#include <linux/cache.h>
+#include <asm/sections.h>
+#include <asm/arcregs.h>
+#include <asm/tlb.h>
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/irq.h>
+#include <asm/unwind.h>
+#include <asm/mach_desc.h>
+#include <asm/smp.h>
+
+#define FIX_PTR(x)  __asm__ __volatile__(";" : "+r"(x))
+
+unsigned int intr_to_DE_cnt;
+
+/* Part of U-boot ABI: see head.S */
+int __initdata uboot_tag;
+int __initdata uboot_magic;
+char __initdata *uboot_arg;
+
+const struct machine_desc *machine_desc;
+
+struct task_struct *_current_task[NR_CPUS];	/* For stack switching */
+
+struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
+
+static const struct id_to_str arc_cpu_rel[] = {
+#ifdef CONFIG_ISA_ARCOMPACT
+	{ 0x34, "R4.10"},
+	{ 0x35, "R4.11"},
+#else
+	{ 0x51, "R2.0" },
+	{ 0x52, "R2.1" },
+	{ 0x53, "R3.0" },
+	{ 0x54, "R3.10a" },
+#endif
+	{ 0x00, NULL   }
+};
+
+static const struct id_to_str arc_cpu_nm[] = {
+#ifdef CONFIG_ISA_ARCOMPACT
+	{ 0x20, "ARC 600"   },
+	{ 0x30, "ARC 770"   },  /* 750 identified seperately */
+#else
+	{ 0x40, "ARC EM"  },
+	{ 0x50, "ARC HS38"  },
+	{ 0x54, "ARC HS48"  },
+#endif
+	{ 0x00, "Unknown"   }
+};
+
+static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu)
+{
+	if (is_isa_arcompact()) {
+		struct bcr_iccm_arcompact iccm;
+		struct bcr_dccm_arcompact dccm;
+
+		READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+		if (iccm.ver) {
+			cpu->iccm.sz = 4096 << iccm.sz;	/* 8K to 512K */
+			cpu->iccm.base_addr = iccm.base << 16;
+		}
+
+		READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+		if (dccm.ver) {
+			unsigned long base;
+			cpu->dccm.sz = 2048 << dccm.sz;	/* 2K to 256K */
+
+			base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
+			cpu->dccm.base_addr = base & ~0xF;
+		}
+	} else {
+		struct bcr_iccm_arcv2 iccm;
+		struct bcr_dccm_arcv2 dccm;
+		unsigned long region;
+
+		READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+		if (iccm.ver) {
+			cpu->iccm.sz = 256 << iccm.sz00;	/* 512B to 16M */
+			if (iccm.sz00 == 0xF && iccm.sz01 > 0)
+				cpu->iccm.sz <<= iccm.sz01;
+
+			region = read_aux_reg(ARC_REG_AUX_ICCM);
+			cpu->iccm.base_addr = region & 0xF0000000;
+		}
+
+		READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+		if (dccm.ver) {
+			cpu->dccm.sz = 256 << dccm.sz0;
+			if (dccm.sz0 == 0xF && dccm.sz1 > 0)
+				cpu->dccm.sz <<= dccm.sz1;
+
+			region = read_aux_reg(ARC_REG_AUX_DCCM);
+			cpu->dccm.base_addr = region & 0xF0000000;
+		}
+	}
+}
+
+static void read_arc_build_cfg_regs(void)
+{
+	struct bcr_timer timer;
+	struct bcr_generic bcr;
+	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+	const struct id_to_str *tbl;
+	struct bcr_isa_arcv2 isa;
+
+	FIX_PTR(cpu);
+
+	READ_BCR(AUX_IDENTITY, cpu->core);
+
+	for (tbl = &arc_cpu_rel[0]; tbl->id != 0; tbl++) {
+		if (cpu->core.family == tbl->id) {
+			cpu->details = tbl->str;
+			break;
+		}
+	}
+
+	for (tbl = &arc_cpu_nm[0]; tbl->id != 0; tbl++) {
+		if ((cpu->core.family & 0xF4) == tbl->id)
+			break;
+	}
+	cpu->name = tbl->str;
+
+	READ_BCR(ARC_REG_TIMERS_BCR, timer);
+	cpu->extn.timer0 = timer.t0;
+	cpu->extn.timer1 = timer.t1;
+	cpu->extn.rtc = timer.rtc;
+
+	cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
+
+	READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
+
+	cpu->extn.norm = read_aux_reg(ARC_REG_NORM_BCR) > 1 ? 1 : 0; /* 2,3 */
+	cpu->extn.barrel = read_aux_reg(ARC_REG_BARREL_BCR) > 1 ? 1 : 0; /* 2,3 */
+	cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0;        /* 1,3 */
+	cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0;
+	cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */
+	cpu->extn.swape = (cpu->core.family >= 0x34) ? 1 :
+				IS_ENABLED(CONFIG_ARC_HAS_SWAPE);
+
+	READ_BCR(ARC_REG_XY_MEM_BCR, cpu->extn_xymem);
+
+	/* Read CCM BCRs for boot reporting even if not enabled in Kconfig */
+	read_decode_ccm_bcr(cpu);
+
+	read_decode_mmu_bcr();
+	read_decode_cache_bcr();
+
+	if (is_isa_arcompact()) {
+		struct bcr_fp_arcompact sp, dp;
+		struct bcr_bpu_arcompact bpu;
+
+		READ_BCR(ARC_REG_FP_BCR, sp);
+		READ_BCR(ARC_REG_DPFP_BCR, dp);
+		cpu->extn.fpu_sp = sp.ver ? 1 : 0;
+		cpu->extn.fpu_dp = dp.ver ? 1 : 0;
+
+		READ_BCR(ARC_REG_BPU_BCR, bpu);
+		cpu->bpu.ver = bpu.ver;
+		cpu->bpu.full = bpu.fam ? 1 : 0;
+		if (bpu.ent) {
+			cpu->bpu.num_cache = 256 << (bpu.ent - 1);
+			cpu->bpu.num_pred = 256 << (bpu.ent - 1);
+		}
+	} else {
+		struct bcr_fp_arcv2 spdp;
+		struct bcr_bpu_arcv2 bpu;
+
+		READ_BCR(ARC_REG_FP_V2_BCR, spdp);
+		cpu->extn.fpu_sp = spdp.sp ? 1 : 0;
+		cpu->extn.fpu_dp = spdp.dp ? 1 : 0;
+
+		READ_BCR(ARC_REG_BPU_BCR, bpu);
+		cpu->bpu.ver = bpu.ver;
+		cpu->bpu.full = bpu.ft;
+		cpu->bpu.num_cache = 256 << bpu.bce;
+		cpu->bpu.num_pred = 2048 << bpu.pte;
+
+		if (cpu->core.family >= 0x54) {
+
+			struct bcr_uarch_build_arcv2 uarch;
+
+			/*
+			 * The first 0x54 core (uarch maj:min 0:1 or 0:2) was
+			 * dual issue only (HS4x). But next uarch rev (1:0)
+			 * allows it be configured for single issue (HS3x)
+			 * Ensure we fiddle with dual issue only on HS4x
+			 */
+			READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch);
+
+			if (uarch.prod == 4) {
+				unsigned int exec_ctrl;
+
+				/* dual issue hardware always present */
+				cpu->extn.dual = 1;
+
+				READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
+
+				/* dual issue hardware enabled ? */
+				cpu->extn.dual_enb = !(exec_ctrl & 1);
+
+			}
+		}
+	}
+
+	READ_BCR(ARC_REG_AP_BCR, bcr);
+	cpu->extn.ap = bcr.ver ? 1 : 0;
+
+	READ_BCR(ARC_REG_SMART_BCR, bcr);
+	cpu->extn.smart = bcr.ver ? 1 : 0;
+
+	READ_BCR(ARC_REG_RTT_BCR, bcr);
+	cpu->extn.rtt = bcr.ver ? 1 : 0;
+
+	cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt;
+
+	READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
+
+	/* some hacks for lack of feature BCR info in old ARC700 cores */
+	if (is_isa_arcompact()) {
+		if (!isa.ver)	/* ISA BCR absent, use Kconfig info */
+			cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
+		else {
+			/* ARC700_BUILD only has 2 bits of isa info */
+			struct bcr_generic bcr = *(struct bcr_generic *)&isa;
+			cpu->isa.atomic = bcr.info & 1;
+		}
+
+		cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+
+		 /* there's no direct way to distinguish 750 vs. 770 */
+		if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3))
+			cpu->name = "ARC750";
+	} else {
+		cpu->isa = isa;
+	}
+}
+
+static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
+{
+	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
+	struct bcr_identity *core = &cpu->core;
+	int i, n = 0;
+
+	FIX_PTR(cpu);
+
+	n += scnprintf(buf + n, len - n,
+		       "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
+		       core->family, core->cpu_id, core->chip_id);
+
+	n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n",
+		       cpu_id, cpu->name, cpu->details,
+		       is_isa_arcompact() ? "ARCompact" : "ARCv2",
+		       IS_AVAIL1(cpu->isa.be, "[Big-Endian]"),
+		       IS_AVAIL3(cpu->extn.dual, cpu->extn.dual_enb, " Dual-Issue "));
+
+	n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s%s%s\nISA Extn\t: ",
+		       IS_AVAIL1(cpu->extn.timer0, "Timer0 "),
+		       IS_AVAIL1(cpu->extn.timer1, "Timer1 "),
+		       IS_AVAIL2(cpu->extn.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
+		       IS_AVAIL2(cpu->extn.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT));
+
+	n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s",
+			   IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+			   IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
+			   IS_AVAIL1(cpu->isa.unalign, "unalign (not used)"));
+
+	if (i)
+		n += scnprintf(buf + n, len - n, "\n\t\t: ");
+
+	if (cpu->extn_mpy.ver) {
+		if (cpu->extn_mpy.ver <= 0x2) {	/* ARCompact */
+			n += scnprintf(buf + n, len - n, "mpy ");
+		} else {
+			int opt = 2;	/* stock MPY/MPYH */
+
+			if (cpu->extn_mpy.dsp)	/* OPT 7-9 */
+				opt = cpu->extn_mpy.dsp + 6;
+
+			n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt);
+		}
+	}
+
+	n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
+		       IS_AVAIL1(cpu->isa.div_rem, "div_rem "),
+		       IS_AVAIL1(cpu->extn.norm, "norm "),
+		       IS_AVAIL1(cpu->extn.barrel, "barrel-shift "),
+		       IS_AVAIL1(cpu->extn.swap, "swap "),
+		       IS_AVAIL1(cpu->extn.minmax, "minmax "),
+		       IS_AVAIL1(cpu->extn.crc, "crc "),
+		       IS_AVAIL2(cpu->extn.swape, "swape", CONFIG_ARC_HAS_SWAPE));
+
+	if (cpu->bpu.ver)
+		n += scnprintf(buf + n, len - n,
+			      "BPU\t\t: %s%s match, cache:%d, Predict Table:%d",
+			      IS_AVAIL1(cpu->bpu.full, "full"),
+			      IS_AVAIL1(!cpu->bpu.full, "partial"),
+			      cpu->bpu.num_cache, cpu->bpu.num_pred);
+
+	if (is_isa_arcv2()) {
+		struct bcr_lpb lpb;
+
+		READ_BCR(ARC_REG_LPB_BUILD, lpb);
+		if (lpb.ver) {
+			unsigned int ctl;
+			ctl = read_aux_reg(ARC_REG_LPB_CTRL);
+
+			n += scnprintf(buf + n, len - n, " Loop Buffer:%d %s",
+				lpb.entries,
+				IS_DISABLED_RUN(!ctl));
+		}
+	}
+
+	n += scnprintf(buf + n, len - n, "\n");
+	return buf;
+}
+
+static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
+{
+	int n = 0;
+	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
+
+	FIX_PTR(cpu);
+
+	n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\n", cpu->vec_base);
+
+	if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
+		n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
+			       IS_AVAIL1(cpu->extn.fpu_sp, "SP "),
+			       IS_AVAIL1(cpu->extn.fpu_dp, "DP "));
+
+	if (cpu->extn.debug)
+		n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s%s\n",
+			       IS_AVAIL1(cpu->extn.ap, "ActionPoint "),
+			       IS_AVAIL1(cpu->extn.smart, "smaRT "),
+			       IS_AVAIL1(cpu->extn.rtt, "RTT "));
+
+	if (cpu->dccm.sz || cpu->iccm.sz)
+		n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n",
+			       cpu->dccm.base_addr, TO_KB(cpu->dccm.sz),
+			       cpu->iccm.base_addr, TO_KB(cpu->iccm.sz));
+
+	if (is_isa_arcv2()) {
+
+		/* Error Protection: ECC/Parity */
+		struct bcr_erp erp;
+		READ_BCR(ARC_REG_ERP_BUILD, erp);
+
+		if (erp.ver) {
+			struct  ctl_erp ctl;
+			READ_BCR(ARC_REG_ERP_CTRL, ctl);
+
+			/* inverted bits: 0 means enabled */
+			n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n",
+				IS_AVAIL3(erp.ic,  !ctl.dpi, "IC "),
+				IS_AVAIL3(erp.dc,  !ctl.dpd, "DC "),
+				IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU "));
+		}
+	}
+
+	n += scnprintf(buf + n, len - n, "OS ABI [v%d]\t: %s\n",
+			EF_ARC_OSABI_CURRENT >> 8,
+			EF_ARC_OSABI_CURRENT == EF_ARC_OSABI_V3 ?
+			"no-legacy-syscalls" : "64-bit data any register aligned");
+
+	return buf;
+}
+
+static void arc_chk_core_config(void)
+{
+	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+	int saved = 0, present = 0;
+	char *opt_nm = NULL;
+
+	if (!cpu->extn.timer0)
+		panic("Timer0 is not present!\n");
+
+	if (!cpu->extn.timer1)
+		panic("Timer1 is not present!\n");
+
+#ifdef CONFIG_ARC_HAS_DCCM
+	/*
+	 * DCCM can be arbit placed in hardware.
+	 * Make sure it's placement/sz matches what Linux is built with
+	 */
+	if ((unsigned int)__arc_dccm_base != cpu->dccm.base_addr)
+		panic("Linux built with incorrect DCCM Base address\n");
+
+	if (CONFIG_ARC_DCCM_SZ * SZ_1K != cpu->dccm.sz)
+		panic("Linux built with incorrect DCCM Size\n");
+#endif
+
+#ifdef CONFIG_ARC_HAS_ICCM
+	if (CONFIG_ARC_ICCM_SZ * SZ_1K != cpu->iccm.sz)
+		panic("Linux built with incorrect ICCM Size\n");
+#endif
+
+	/*
+	 * FP hardware/software config sanity
+	 * -If hardware present, kernel needs to save/restore FPU state
+	 * -If not, it will crash trying to save/restore the non-existant regs
+	 */
+
+	if (is_isa_arcompact()) {
+		opt_nm = "CONFIG_ARC_FPU_SAVE_RESTORE";
+		saved = IS_ENABLED(CONFIG_ARC_FPU_SAVE_RESTORE);
+
+		/* only DPDP checked since SP has no arch visible regs */
+		present = cpu->extn.fpu_dp;
+	} else {
+		opt_nm = "CONFIG_ARC_HAS_ACCL_REGS";
+		saved = IS_ENABLED(CONFIG_ARC_HAS_ACCL_REGS);
+
+		/* Accumulator Low:High pair (r58:59) present if DSP MPY or FPU */
+		present = cpu->extn_mpy.dsp | cpu->extn.fpu_sp | cpu->extn.fpu_dp;
+	}
+
+	if (present && !saved)
+		pr_warn("Enable %s for working apps\n", opt_nm);
+	else if (!present && saved)
+		panic("Disable %s, hardware NOT present\n", opt_nm);
+}
+
+/*
+ * Initialize and setup the processor core
+ * This is called by all the CPUs thus should not do special case stuff
+ *    such as only for boot CPU etc
+ */
+
+void setup_processor(void)
+{
+	char str[512];
+	int cpu_id = smp_processor_id();
+
+	read_arc_build_cfg_regs();
+	arc_init_IRQ();
+
+	pr_info("%s", arc_cpu_mumbojumbo(cpu_id, str, sizeof(str)));
+
+	arc_mmu_init();
+	arc_cache_init();
+
+	pr_info("%s", arc_extn_mumbojumbo(cpu_id, str, sizeof(str)));
+	pr_info("%s", arc_platform_smp_cpuinfo());
+
+	arc_chk_core_config();
+}
+
+static inline bool uboot_arg_invalid(unsigned long addr)
+{
+	/*
+	 * Check that it is a untranslated address (although MMU is not enabled
+	 * yet, it being a high address ensures this is not by fluke)
+	 */
+	if (addr < PAGE_OFFSET)
+		return true;
+
+	/* Check that address doesn't clobber resident kernel image */
+	return addr >= (unsigned long)_stext && addr <= (unsigned long)_end;
+}
+
+#define IGNORE_ARGS		"Ignore U-boot args: "
+
+/* uboot_tag values for U-boot - kernel ABI revision 0; see head.S */
+#define UBOOT_TAG_NONE		0
+#define UBOOT_TAG_CMDLINE	1
+#define UBOOT_TAG_DTB		2
+/* We always pass 0 as magic from U-boot */
+#define UBOOT_MAGIC_VALUE	0
+
+void __init handle_uboot_args(void)
+{
+	bool use_embedded_dtb = true;
+	bool append_cmdline = false;
+
+	/* check that we know this tag */
+	if (uboot_tag != UBOOT_TAG_NONE &&
+	    uboot_tag != UBOOT_TAG_CMDLINE &&
+	    uboot_tag != UBOOT_TAG_DTB) {
+		pr_warn(IGNORE_ARGS "invalid uboot tag: '%08x'\n", uboot_tag);
+		goto ignore_uboot_args;
+	}
+
+	if (uboot_magic != UBOOT_MAGIC_VALUE) {
+		pr_warn(IGNORE_ARGS "non zero uboot magic\n");
+		goto ignore_uboot_args;
+	}
+
+	if (uboot_tag != UBOOT_TAG_NONE &&
+            uboot_arg_invalid((unsigned long)uboot_arg)) {
+		pr_warn(IGNORE_ARGS "invalid uboot arg: '%px'\n", uboot_arg);
+		goto ignore_uboot_args;
+	}
+
+	/* see if U-boot passed an external Device Tree blob */
+	if (uboot_tag == UBOOT_TAG_DTB) {
+		machine_desc = setup_machine_fdt((void *)uboot_arg);
+
+		/* external Device Tree blob is invalid - use embedded one */
+		use_embedded_dtb = !machine_desc;
+	}
+
+	if (uboot_tag == UBOOT_TAG_CMDLINE)
+		append_cmdline = true;
+
+ignore_uboot_args:
+
+	if (use_embedded_dtb) {
+		machine_desc = setup_machine_fdt(__dtb_start);
+		if (!machine_desc)
+			panic("Embedded DT invalid\n");
+	}
+
+	/*
+	 * NOTE: @boot_command_line is populated by setup_machine_fdt() so this
+	 * append processing can only happen after.
+	 */
+	if (append_cmdline) {
+		/* Ensure a whitespace between the 2 cmdlines */
+		strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+		strlcat(boot_command_line, uboot_arg, COMMAND_LINE_SIZE);
+	}
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+	handle_uboot_args();
+
+	/* Save unparsed command line copy for /proc/cmdline */
+	*cmdline_p = boot_command_line;
+
+	/* To force early parsing of things like mem=xxx */
+	parse_early_param();
+
+	/* Platform/board specific: e.g. early console registration */
+	if (machine_desc->init_early)
+		machine_desc->init_early();
+
+	smp_init_cpus();
+
+	setup_processor();
+	setup_arch_memory();
+
+	/* copy flat DT out of .init and then unflatten it */
+	unflatten_and_copy_device_tree();
+
+	/* Can be issue if someone passes cmd line arg "ro"
+	 * But that is unlikely so keeping it as it is
+	 */
+	root_mountflags &= ~MS_RDONLY;
+
+#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = &dummy_con;
+#endif
+
+	arc_unwind_init();
+}
+
+/*
+ * Called from start_kernel() - boot CPU only
+ */
+void __init time_init(void)
+{
+	of_clk_init(NULL);
+	timer_probe();
+}
+
+static int __init customize_machine(void)
+{
+	if (machine_desc->init_machine)
+		machine_desc->init_machine();
+
+	return 0;
+}
+arch_initcall(customize_machine);
+
+static int __init init_late_machine(void)
+{
+	if (machine_desc->init_late)
+		machine_desc->init_late();
+
+	return 0;
+}
+late_initcall(init_late_machine);
+/*
+ *  Get CPU information for use by the procfs.
+ */
+
+#define cpu_to_ptr(c)	((void *)(0xFFFF0000 | (unsigned int)(c)))
+#define ptr_to_cpu(p)	(~0xFFFF0000UL & (unsigned int)(p))
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	char *str;
+	int cpu_id = ptr_to_cpu(v);
+	struct device *cpu_dev = get_cpu_device(cpu_id);
+	struct clk *cpu_clk;
+	unsigned long freq = 0;
+
+	if (!cpu_online(cpu_id)) {
+		seq_printf(m, "processor [%d]\t: Offline\n", cpu_id);
+		goto done;
+	}
+
+	str = (char *)__get_free_page(GFP_KERNEL);
+	if (!str)
+		goto done;
+
+	seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE));
+
+	cpu_clk = clk_get(cpu_dev, NULL);
+	if (IS_ERR(cpu_clk)) {
+		seq_printf(m, "CPU speed \t: Cannot get clock for processor [%d]\n",
+			   cpu_id);
+	} else {
+		freq = clk_get_rate(cpu_clk);
+	}
+	if (freq)
+		seq_printf(m, "CPU speed\t: %lu.%02lu Mhz\n",
+			   freq / 1000000, (freq / 10000) % 100);
+
+	seq_printf(m, "Bogo MIPS\t: %lu.%02lu\n",
+		   loops_per_jiffy / (500000 / HZ),
+		   (loops_per_jiffy / (5000 / HZ)) % 100);
+
+	seq_printf(m, arc_mmu_mumbojumbo(cpu_id, str, PAGE_SIZE));
+	seq_printf(m, arc_cache_mumbojumbo(cpu_id, str, PAGE_SIZE));
+	seq_printf(m, arc_extn_mumbojumbo(cpu_id, str, PAGE_SIZE));
+	seq_printf(m, arc_platform_smp_cpuinfo());
+
+	free_page((unsigned long)str);
+done:
+	seq_printf(m, "\n");
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	/*
+	 * Callback returns cpu-id to iterator for show routine, NULL to stop.
+	 * However since NULL is also a valid cpu-id (0), we use a round-about
+	 * way to pass it w/o having to kmalloc/free a 2 byte string.
+	 * Encode cpu-id as 0xFFcccc, which is decoded by show routine.
+	 */
+	return *pos < nr_cpu_ids ? cpu_to_ptr(*pos) : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo
+};
+
+static DEFINE_PER_CPU(struct cpu, cpu_topology);
+
+static int __init topology_init(void)
+{
+	int cpu;
+
+	for_each_present_cpu(cpu)
+	    register_cpu(&per_cpu(cpu_topology, cpu), cpu);
+
+	return 0;
+}
+
+subsys_initcall(topology_init);
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
new file mode 100644
index 000000000..68901f6f1
--- /dev/null
+++ b/arch/arc/kernel/signal.c
@@ -0,0 +1,445 @@
+/*
+ * Signal Handling for ARC
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: Jan 2010 (Restarting of timer related syscalls)
+ *
+ * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
+ *  -do_signal() supports TIF_RESTORE_SIGMASK
+ *  -do_signal() no loner needs oldset, required by OLD sys_sigsuspend
+ *  -sys_rt_sigsuspend() now comes from generic code, so discard arch implemen
+ *  -sys_sigsuspend() no longer needs to fudge ptregs, hence that arg removed
+ *  -sys_sigsuspend() no longer loops for do_signal(), sets TIF_xxx and leaves
+ *   the job to do_signal()
+ *
+ * vineetg: July 2009
+ *  -Modified Code to support the uClibc provided userland sigreturn stub
+ *   to avoid kernel synthesing it on user stack at runtime, costing TLB
+ *   probes and Cache line flushes.
+ *
+ * vineetg: July 2009
+ *  -In stash_usr_regs( ) and restore_usr_regs( ), save/restore of user regs
+ *   in done in block copy rather than one word at a time.
+ *   This saves around 2K of code and improves LMBench lat_sig <catch>
+ *
+ * rajeshwarr: Feb 2009
+ *  - Support for Realtime Signals
+ *
+ * vineetg: Aug 11th 2008: Bug #94183
+ *  -ViXS were still seeing crashes when using insmod to load drivers.
+ *   It turned out that the code to change Execute permssions for TLB entries
+ *   of user was not guarded for interrupts (mod_tlb_permission)
+ *   This was causing TLB entries to be overwritten on unrelated indexes
+ *
+ * Vineetg: July 15th 2008: Bug #94183
+ *  -Exception happens in Delay slot of a JMP, and before user space resumes,
+ *   Signal is delivered (Ctrl + C) = >SIGINT.
+ *   setup_frame( ) sets up PC,SP,BLINK to enable user space signal handler
+ *   to run, but doesn't clear the Delay slot bit from status32. As a result,
+ *   on resuming user mode, signal handler branches off to BTA of orig JMP
+ *  -FIX: clear the DE bit from status32 in setup_frame( )
+ *
+ * Rahul Trivedi, Kanika Nema: Codito Technologies 2004
+ */
+
+#include <linux/signal.h>
+#include <linux/ptrace.h>
+#include <linux/personality.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/ucontext.h>
+
+struct rt_sigframe {
+	struct siginfo info;
+	struct ucontext uc;
+#define MAGIC_SIGALTSTK		0x07302004
+	unsigned int sigret_magic;
+};
+
+static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+{
+	int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+	struct user_regs_arcv2 v2abi;
+
+	v2abi.r30 = regs->r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	v2abi.r58 = regs->r58;
+	v2abi.r59 = regs->r59;
+#else
+	v2abi.r58 = v2abi.r59 = 0;
+#endif
+	err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi));
+#endif
+	return err;
+}
+
+static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+{
+	int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+	struct user_regs_arcv2 v2abi;
+
+	err = __copy_from_user(&v2abi, &mctx->v2abi, sizeof(v2abi));
+
+	regs->r30 = v2abi.r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	regs->r58 = v2abi.r58;
+	regs->r59 = v2abi.r59;
+#endif
+#endif
+	return err;
+}
+
+static int
+stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
+	       sigset_t *set)
+{
+	int err;
+	struct user_regs_struct uregs;
+
+	uregs.scratch.bta	= regs->bta;
+	uregs.scratch.lp_start	= regs->lp_start;
+	uregs.scratch.lp_end	= regs->lp_end;
+	uregs.scratch.lp_count	= regs->lp_count;
+	uregs.scratch.status32	= regs->status32;
+	uregs.scratch.ret	= regs->ret;
+	uregs.scratch.blink	= regs->blink;
+	uregs.scratch.fp	= regs->fp;
+	uregs.scratch.gp	= regs->r26;
+	uregs.scratch.r12	= regs->r12;
+	uregs.scratch.r11	= regs->r11;
+	uregs.scratch.r10	= regs->r10;
+	uregs.scratch.r9	= regs->r9;
+	uregs.scratch.r8	= regs->r8;
+	uregs.scratch.r7	= regs->r7;
+	uregs.scratch.r6	= regs->r6;
+	uregs.scratch.r5	= regs->r5;
+	uregs.scratch.r4	= regs->r4;
+	uregs.scratch.r3	= regs->r3;
+	uregs.scratch.r2	= regs->r2;
+	uregs.scratch.r1	= regs->r1;
+	uregs.scratch.r0	= regs->r0;
+	uregs.scratch.sp	= regs->sp;
+
+	err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch,
+			     sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+	if (is_isa_arcv2())
+		err |= save_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
+	err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
+
+	return err ? -EFAULT : 0;
+}
+
+static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
+{
+	sigset_t set;
+	int err;
+	struct user_regs_struct uregs;
+
+	err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
+	err |= __copy_from_user(&uregs.scratch,
+				&(sf->uc.uc_mcontext.regs.scratch),
+				sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+	if (is_isa_arcv2())
+		err |= restore_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
+	if (err)
+		return -EFAULT;
+
+	set_current_blocked(&set);
+	regs->bta	= uregs.scratch.bta;
+	regs->lp_start	= uregs.scratch.lp_start;
+	regs->lp_end	= uregs.scratch.lp_end;
+	regs->lp_count	= uregs.scratch.lp_count;
+	regs->status32	= uregs.scratch.status32;
+	regs->ret	= uregs.scratch.ret;
+	regs->blink	= uregs.scratch.blink;
+	regs->fp	= uregs.scratch.fp;
+	regs->r26	= uregs.scratch.gp;
+	regs->r12	= uregs.scratch.r12;
+	regs->r11	= uregs.scratch.r11;
+	regs->r10	= uregs.scratch.r10;
+	regs->r9	= uregs.scratch.r9;
+	regs->r8	= uregs.scratch.r8;
+	regs->r7	= uregs.scratch.r7;
+	regs->r6	= uregs.scratch.r6;
+	regs->r5	= uregs.scratch.r5;
+	regs->r4	= uregs.scratch.r4;
+	regs->r3	= uregs.scratch.r3;
+	regs->r2	= uregs.scratch.r2;
+	regs->r1	= uregs.scratch.r1;
+	regs->r0	= uregs.scratch.r0;
+	regs->sp	= uregs.scratch.sp;
+
+	return 0;
+}
+
+static inline int is_do_ss_needed(unsigned int magic)
+{
+	if (MAGIC_SIGALTSTK == magic)
+		return 1;
+	else
+		return 0;
+}
+
+SYSCALL_DEFINE0(rt_sigreturn)
+{
+	struct rt_sigframe __user *sf;
+	unsigned int magic;
+	struct pt_regs *regs = current_pt_regs();
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	/* Since we stacked the signal on a word boundary,
+	 * then 'sp' should be word aligned here.  If it's
+	 * not, then the user is trying to mess with us.
+	 */
+	if (regs->sp & 3)
+		goto badframe;
+
+	sf = (struct rt_sigframe __force __user *)(regs->sp);
+
+	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)))
+		goto badframe;
+
+	if (__get_user(magic, &sf->sigret_magic))
+		goto badframe;
+
+	if (unlikely(is_do_ss_needed(magic)))
+		if (restore_altstack(&sf->uc.uc_stack))
+			goto badframe;
+
+	if (restore_usr_regs(regs, sf))
+		goto badframe;
+
+	/* Don't restart from sigreturn */
+	syscall_wont_restart(regs);
+
+	/*
+	 * Ensure that sigreturn always returns to user mode (in case the
+	 * regs saved on user stack got fudged between save and sigreturn)
+	 * Otherwise it is easy to panic the kernel with a custom
+	 * signal handler and/or restorer which clobberes the status32/ret
+	 * to return to a bogus location in kernel mode.
+	 */
+	regs->status32 |= STATUS_U_MASK;
+
+	return regs->r0;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *get_sigframe(struct ksignal *ksig,
+					struct pt_regs *regs,
+					unsigned long framesize)
+{
+	unsigned long sp = sigsp(regs->sp, ksig);
+	void __user *frame;
+
+	/* No matter what happens, 'sp' must be word
+	 * aligned otherwise nasty things could happen
+	 */
+
+	/* ATPCS B01 mandates 8-byte alignment */
+	frame = (void __user *)((sp - framesize) & ~7);
+
+	/* Check that we can actually write to the signal frame */
+	if (!access_ok(VERIFY_WRITE, frame, framesize))
+		frame = NULL;
+
+	return frame;
+}
+
+static int
+setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe __user *sf;
+	unsigned int magic = 0;
+	int err = 0;
+
+	sf = get_sigframe(ksig, regs, sizeof(struct rt_sigframe));
+	if (!sf)
+		return 1;
+
+	/*
+	 * w/o SA_SIGINFO, struct ucontext is partially populated (only
+	 * uc_mcontext/uc_sigmask) for kernel's normal user state preservation
+	 * during signal handler execution. This works for SA_SIGINFO as well
+	 * although the semantics are now overloaded (the same reg state can be
+	 * inspected by userland: but are they allowed to fiddle with it ?
+	 */
+	err |= stash_usr_regs(sf, regs, set);
+
+	/*
+	 * SA_SIGINFO requires 3 args to signal handler:
+	 *  #1: sig-no (common to any handler)
+	 *  #2: struct siginfo
+	 *  #3: struct ucontext (completely populated)
+	 */
+	if (unlikely(ksig->ka.sa.sa_flags & SA_SIGINFO)) {
+		err |= copy_siginfo_to_user(&sf->info, &ksig->info);
+		err |= __put_user(0, &sf->uc.uc_flags);
+		err |= __put_user(NULL, &sf->uc.uc_link);
+		err |= __save_altstack(&sf->uc.uc_stack, regs->sp);
+
+		/* setup args 2 and 3 for user mode handler */
+		regs->r1 = (unsigned long)&sf->info;
+		regs->r2 = (unsigned long)&sf->uc;
+
+		/*
+		 * small optim to avoid unconditonally calling do_sigaltstack
+		 * in sigreturn path, now that we only have rt_sigreturn
+		 */
+		magic = MAGIC_SIGALTSTK;
+	}
+
+	err |= __put_user(magic, &sf->sigret_magic);
+	if (err)
+		return err;
+
+	/* #1 arg to the user Signal handler */
+	regs->r0 = ksig->sig;
+
+	/* setup PC of user space signal handler */
+	regs->ret = (unsigned long)ksig->ka.sa.sa_handler;
+
+	/*
+	 * handler returns using sigreturn stub provided already by userpsace
+	 * If not, nuke the process right away
+	 */
+	if(!(ksig->ka.sa.sa_flags & SA_RESTORER))
+		return 1;
+
+	regs->blink = (unsigned long)ksig->ka.sa.sa_restorer;
+
+	/* User Stack for signal handler will be above the frame just carved */
+	regs->sp = (unsigned long)sf;
+
+	/*
+	 * Bug 94183, Clear the DE bit, so that when signal handler
+	 * starts to run, it doesn't use BTA
+	 */
+	regs->status32 &= ~STATUS_DE_MASK;
+	regs->status32 |= STATUS_L_MASK;
+
+	return err;
+}
+
+static void arc_restart_syscall(struct k_sigaction *ka, struct pt_regs *regs)
+{
+	switch (regs->r0) {
+	case -ERESTART_RESTARTBLOCK:
+	case -ERESTARTNOHAND:
+		/*
+		 * ERESTARTNOHAND means that the syscall should
+		 * only be restarted if there was no handler for
+		 * the signal, and since we only get here if there
+		 * is a handler, we don't restart
+		 */
+		regs->r0 = -EINTR;   /* ERESTART_xxx is internal */
+		break;
+
+	case -ERESTARTSYS:
+		/*
+		 * ERESTARTSYS means to restart the syscall if
+		 * there is no handler or the handler was
+		 * registered with SA_RESTART
+		 */
+		if (!(ka->sa.sa_flags & SA_RESTART)) {
+			regs->r0 = -EINTR;
+			break;
+		}
+		/* fallthrough */
+
+	case -ERESTARTNOINTR:
+		/*
+		 * ERESTARTNOINTR means that the syscall should
+		 * be called again after the signal handler returns.
+		 * Setup reg state just as it was before doing the trap
+		 * r0 has been clobbered with sys call ret code thus it
+		 * needs to be reloaded with orig first arg to syscall
+		 * in orig_r0. Rest of relevant reg-file:
+		 * r8 (syscall num) and (r1 - r7) will be reset to
+		 * their orig user space value when we ret from kernel
+		 */
+		regs->r0 = regs->orig_r0;
+		regs->ret -= is_isa_arcv2() ? 2 : 4;
+		break;
+	}
+}
+
+/*
+ * OK, we're invoking a handler
+ */
+static void
+handle_signal(struct ksignal *ksig, struct pt_regs *regs)
+{
+	sigset_t *oldset = sigmask_to_save();
+	int failed;
+
+	/* Set up the stack frame */
+	failed = setup_rt_frame(ksig, oldset, regs);
+
+	signal_setup_done(failed, ksig, 0);
+}
+
+void do_signal(struct pt_regs *regs)
+{
+	struct ksignal ksig;
+	int restart_scall;
+
+	restart_scall = in_syscall(regs) && syscall_restartable(regs);
+
+	if (get_signal(&ksig)) {
+		if (restart_scall) {
+			arc_restart_syscall(&ksig.ka, regs);
+			syscall_wont_restart(regs);	/* No more restarts */
+		}
+		handle_signal(&ksig, regs);
+		return;
+	}
+
+	if (restart_scall) {
+		/* No handler for syscall: restart it */
+		if (regs->r0 == -ERESTARTNOHAND ||
+		    regs->r0 == -ERESTARTSYS || regs->r0 == -ERESTARTNOINTR) {
+			regs->r0 = regs->orig_r0;
+			regs->ret -= is_isa_arcv2() ? 2 : 4;
+		} else if (regs->r0 == -ERESTART_RESTARTBLOCK) {
+			regs->r8 = __NR_restart_syscall;
+			regs->ret -= is_isa_arcv2() ? 2 : 4;
+		}
+		syscall_wont_restart(regs);	/* No more restarts */
+	}
+
+	/* If there's no signal to deliver, restore the saved sigmask back */
+	restore_saved_sigmask();
+}
+
+void do_notify_resume(struct pt_regs *regs)
+{
+	/*
+	 * ASM glue gaurantees that this is only called when returning to
+	 * user mode
+	 */
+	if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
+		tracehook_notify_resume(regs);
+}
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
new file mode 100644
index 000000000..21d86c366
--- /dev/null
+++ b/arch/arc/kernel/smp.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * RajeshwarR: Dec 11, 2007
+ *   -- Added support for Inter Processor Interrupts
+ *
+ * Vineetg: Nov 1st, 2007
+ *    -- Initial Write (Borrowed heavily from ARM)
+ */
+
+#include <linux/spinlock.h>
+#include <linux/sched/mm.h>
+#include <linux/interrupt.h>
+#include <linux/profile.h>
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <linux/atomic.h>
+#include <linux/cpumask.h>
+#include <linux/reboot.h>
+#include <linux/irqdomain.h>
+#include <linux/export.h>
+#include <linux/of_fdt.h>
+
+#include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/mach_desc.h>
+
+#ifndef CONFIG_ARC_HAS_LLSC
+arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+EXPORT_SYMBOL_GPL(smp_atomic_ops_lock);
+EXPORT_SYMBOL_GPL(smp_bitops_lock);
+#endif
+
+struct plat_smp_ops  __weak plat_smp_ops;
+
+/* XXX: per cpu ? Only needed once in early seconday boot */
+struct task_struct *secondary_idle_tsk;
+
+/* Called from start_kernel */
+void __init smp_prepare_boot_cpu(void)
+{
+}
+
+static int __init arc_get_cpu_map(const char *name, struct cpumask *cpumask)
+{
+	unsigned long dt_root = of_get_flat_dt_root();
+	const char *buf;
+
+	buf = of_get_flat_dt_prop(dt_root, name, NULL);
+	if (!buf)
+		return -EINVAL;
+
+	if (cpulist_parse(buf, cpumask))
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * Read from DeviceTree and setup cpu possible mask. If there is no
+ * "possible-cpus" property in DeviceTree pretend all [0..NR_CPUS-1] exist.
+ */
+static void __init arc_init_cpu_possible(void)
+{
+	struct cpumask cpumask;
+
+	if (arc_get_cpu_map("possible-cpus", &cpumask)) {
+		pr_warn("Failed to get possible-cpus from dtb, pretending all %u cpus exist\n",
+			NR_CPUS);
+
+		cpumask_setall(&cpumask);
+	}
+
+	if (!cpumask_test_cpu(0, &cpumask))
+		panic("Master cpu (cpu[0]) is missed in cpu possible mask!");
+
+	init_cpu_possible(&cpumask);
+}
+
+/*
+ * Called from setup_arch() before calling setup_processor()
+ *
+ * - Initialise the CPU possible map early - this describes the CPUs
+ *   which may be present or become present in the system.
+ * - Call early smp init hook. This can initialize a specific multi-core
+ *   IP which is say common to several platforms (hence not part of
+ *   platform specific int_early() hook)
+ */
+void __init smp_init_cpus(void)
+{
+	arc_init_cpu_possible();
+
+	if (plat_smp_ops.init_early_smp)
+		plat_smp_ops.init_early_smp();
+}
+
+/* called from init ( ) =>  process 1 */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	/*
+	 * if platform didn't set the present map already, do it now
+	 * boot cpu is set to present already by init/main.c
+	 */
+	if (num_present_cpus() <= 1)
+		init_cpu_present(cpu_possible_mask);
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+
+}
+
+/*
+ * Default smp boot helper for Run-on-reset case where all cores start off
+ * together. Non-masters need to wait for Master to start running.
+ * This is implemented using a flag in memory, which Non-masters spin-wait on.
+ * Master sets it to cpu-id of core to "ungate" it.
+ */
+static volatile int wake_flag;
+
+#ifdef CONFIG_ISA_ARCOMPACT
+
+#define __boot_read(f)		f
+#define __boot_write(f, v)	f = v
+
+#else
+
+#define __boot_read(f)		arc_read_uncached_32(&f)
+#define __boot_write(f, v)	arc_write_uncached_32(&f, v)
+
+#endif
+
+static void arc_default_smp_cpu_kick(int cpu, unsigned long pc)
+{
+	BUG_ON(cpu == 0);
+
+	__boot_write(wake_flag, cpu);
+}
+
+void arc_platform_smp_wait_to_boot(int cpu)
+{
+	/* for halt-on-reset, we've waited already */
+	if (IS_ENABLED(CONFIG_ARC_SMP_HALT_ON_RESET))
+		return;
+
+	while (__boot_read(wake_flag) != cpu)
+		;
+
+	__boot_write(wake_flag, 0);
+}
+
+const char *arc_platform_smp_cpuinfo(void)
+{
+	return plat_smp_ops.info ? : "";
+}
+
+/*
+ * The very first "C" code executed by secondary
+ * Called from asm stub in head.S
+ * "current"/R25 already setup by low level boot code
+ */
+void start_kernel_secondary(void)
+{
+	struct mm_struct *mm = &init_mm;
+	unsigned int cpu = smp_processor_id();
+
+	/* MMU, Caches, Vector Table, Interrupts etc */
+	setup_processor();
+
+	mmget(mm);
+	mmgrab(mm);
+	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
+
+	/* Some SMP H/w setup - for each cpu */
+	if (plat_smp_ops.init_per_cpu)
+		plat_smp_ops.init_per_cpu(cpu);
+
+	if (machine_desc->init_per_cpu)
+		machine_desc->init_per_cpu(cpu);
+
+	notify_cpu_starting(cpu);
+	set_cpu_online(cpu, true);
+
+	pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
+
+	local_irq_enable();
+	preempt_disable();
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+}
+
+/*
+ * Called from kernel_init( ) -> smp_init( ) - for each CPU
+ *
+ * At this point, Secondary Processor  is "HALT"ed:
+ *  -It booted, but was halted in head.S
+ *  -It was configured to halt-on-reset
+ *  So need to wake it up.
+ *
+ * Essential requirements being where to run from (PC) and stack (SP)
+*/
+int __cpu_up(unsigned int cpu, struct task_struct *idle)
+{
+	unsigned long wait_till;
+
+	secondary_idle_tsk = idle;
+
+	pr_info("Idle Task [%d] %p", cpu, idle);
+	pr_info("Trying to bring up CPU%u ...\n", cpu);
+
+	if (plat_smp_ops.cpu_kick)
+		plat_smp_ops.cpu_kick(cpu,
+				(unsigned long)first_lines_of_secondary);
+	else
+		arc_default_smp_cpu_kick(cpu, (unsigned long)NULL);
+
+	/* wait for 1 sec after kicking the secondary */
+	wait_till = jiffies + HZ;
+	while (time_before(jiffies, wait_till)) {
+		if (cpu_online(cpu))
+			break;
+	}
+
+	if (!cpu_online(cpu)) {
+		pr_info("Timeout: CPU%u FAILED to comeup !!!\n", cpu);
+		return -1;
+	}
+
+	secondary_idle_tsk = NULL;
+
+	return 0;
+}
+
+/*
+ * not supported here
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+/*****************************************************************************/
+/*              Inter Processor Interrupt Handling                           */
+/*****************************************************************************/
+
+enum ipi_msg_type {
+	IPI_EMPTY = 0,
+	IPI_RESCHEDULE = 1,
+	IPI_CALL_FUNC,
+	IPI_CPU_STOP,
+};
+
+/*
+ * In arches with IRQ for each msg type (above), receiver can use IRQ-id  to
+ * figure out what msg was sent. For those which don't (ARC has dedicated IPI
+ * IRQ), the msg-type needs to be conveyed via per-cpu data
+ */
+
+static DEFINE_PER_CPU(unsigned long, ipi_data);
+
+static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
+{
+	unsigned long __percpu *ipi_data_ptr = per_cpu_ptr(&ipi_data, cpu);
+	unsigned long old, new;
+	unsigned long flags;
+
+	pr_debug("%d Sending msg [%d] to %d\n", smp_processor_id(), msg, cpu);
+
+	local_irq_save(flags);
+
+	/*
+	 * Atomically write new msg bit (in case others are writing too),
+	 * and read back old value
+	 */
+	do {
+		new = old = READ_ONCE(*ipi_data_ptr);
+		new |= 1U << msg;
+	} while (cmpxchg(ipi_data_ptr, old, new) != old);
+
+	/*
+	 * Call the platform specific IPI kick function, but avoid if possible:
+	 * Only do so if there's no pending msg from other concurrent sender(s).
+	 * Otherwise, recevier will see this msg as well when it takes the
+	 * IPI corresponding to that msg. This is true, even if it is already in
+	 * IPI handler, because !@old means it has not yet dequeued the msg(s)
+	 * so @new msg can be a free-loader
+	 */
+	if (plat_smp_ops.ipi_send && !old)
+		plat_smp_ops.ipi_send(cpu);
+
+	local_irq_restore(flags);
+}
+
+static void ipi_send_msg(const struct cpumask *callmap, enum ipi_msg_type msg)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, callmap)
+		ipi_send_msg_one(cpu, msg);
+}
+
+void smp_send_reschedule(int cpu)
+{
+	ipi_send_msg_one(cpu, IPI_RESCHEDULE);
+}
+
+void smp_send_stop(void)
+{
+	struct cpumask targets;
+	cpumask_copy(&targets, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &targets);
+	ipi_send_msg(&targets, IPI_CPU_STOP);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	ipi_send_msg_one(cpu, IPI_CALL_FUNC);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	ipi_send_msg(mask, IPI_CALL_FUNC);
+}
+
+/*
+ * ipi_cpu_stop - handle IPI from smp_send_stop()
+ */
+static void ipi_cpu_stop(void)
+{
+	machine_halt();
+}
+
+static inline int __do_IPI(unsigned long msg)
+{
+	int rc = 0;
+
+	switch (msg) {
+	case IPI_RESCHEDULE:
+		scheduler_ipi();
+		break;
+
+	case IPI_CALL_FUNC:
+		generic_smp_call_function_interrupt();
+		break;
+
+	case IPI_CPU_STOP:
+		ipi_cpu_stop();
+		break;
+
+	default:
+		rc = 1;
+	}
+
+	return rc;
+}
+
+/*
+ * arch-common ISR to handle for inter-processor interrupts
+ * Has hooks for platform specific IPI
+ */
+irqreturn_t do_IPI(int irq, void *dev_id)
+{
+	unsigned long pending;
+	unsigned long __maybe_unused copy;
+
+	pr_debug("IPI [%ld] received on cpu %d\n",
+		 *this_cpu_ptr(&ipi_data), smp_processor_id());
+
+	if (plat_smp_ops.ipi_clear)
+		plat_smp_ops.ipi_clear(irq);
+
+	/*
+	 * "dequeue" the msg corresponding to this IPI (and possibly other
+	 * piggybacked msg from elided IPIs: see ipi_send_msg_one() above)
+	 */
+	copy = pending = xchg(this_cpu_ptr(&ipi_data), 0);
+
+	do {
+		unsigned long msg = __ffs(pending);
+		int rc;
+
+		rc = __do_IPI(msg);
+		if (rc)
+			pr_info("IPI with bogus msg %ld in %ld\n", msg, copy);
+		pending &= ~(1U << msg);
+	} while (pending);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * API called by platform code to hookup arch-common ISR to their IPI IRQ
+ *
+ * Note: If IPI is provided by platform (vs. say ARC MCIP), their intc setup/map
+ * function needs to call call irq_set_percpu_devid() for IPI IRQ, otherwise
+ * request_percpu_irq() below will fail
+ */
+static DEFINE_PER_CPU(int, ipi_dev);
+
+int smp_ipi_irq_setup(int cpu, irq_hw_number_t hwirq)
+{
+	int *dev = per_cpu_ptr(&ipi_dev, cpu);
+	unsigned int virq = irq_find_mapping(NULL, hwirq);
+
+	if (!virq)
+		panic("Cannot find virq for root domain and hwirq=%lu", hwirq);
+
+	/* Boot cpu calls request, all call enable */
+	if (!cpu) {
+		int rc;
+
+		rc = request_percpu_irq(virq, do_IPI, "IPI Interrupt", dev);
+		if (rc)
+			panic("Percpu IRQ request failed for %u\n", virq);
+	}
+
+	enable_percpu_irq(virq, 0);
+
+	return 0;
+}
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
new file mode 100644
index 000000000..a211e87aa
--- /dev/null
+++ b/arch/arc/kernel/stacktrace.c
@@ -0,0 +1,275 @@
+/*
+ *	stacktrace.c : stacktracing APIs needed by rest of kernel
+ *			(wrappers over ARC dwarf based unwinder)
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  vineetg: aug 2009
+ *  -Implemented CONFIG_STACKTRACE APIs, primarily save_stack_trace_tsk( )
+ *   for displaying task's kernel mode call stack in /proc/<pid>/stack
+ *  -Iterator based approach to have single copy of unwinding core and APIs
+ *   needing unwinding, implement the logic in iterator regarding:
+ *      = which frame onwards to start capture
+ *      = which frame to stop capturing (wchan)
+ *      = specifics of data structs where trace is saved(CONFIG_STACKTRACE etc)
+ *
+ *  vineetg: March 2009
+ *  -Implemented correct versions of thread_saved_pc() and get_wchan()
+ *
+ *  rajeshwarr: 2008
+ *  -Initial implementation
+ */
+
+#include <linux/ptrace.h>
+#include <linux/export.h>
+#include <linux/stacktrace.h>
+#include <linux/kallsyms.h>
+#include <linux/sched/debug.h>
+
+#include <asm/arcregs.h>
+#include <asm/unwind.h>
+#include <asm/switch_to.h>
+
+/*-------------------------------------------------------------------------
+ *              Unwinder Iterator
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef CONFIG_ARC_DW2_UNWIND
+
+static int
+seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs,
+		       struct unwind_frame_info *frame_info)
+{
+	/*
+	 * synchronous unwinding (e.g. dump_stack)
+	 *  - uses current values of SP and friends
+	 */
+	if (regs == NULL && (tsk == NULL || tsk == current)) {
+		unsigned long fp, sp, blink, ret;
+		frame_info->task = current;
+
+		__asm__ __volatile__(
+			"mov %0,r27\n\t"
+			"mov %1,r28\n\t"
+			"mov %2,r31\n\t"
+			"mov %3,r63\n\t"
+			: "=r"(fp), "=r"(sp), "=r"(blink), "=r"(ret)
+		);
+
+		frame_info->regs.r27 = fp;
+		frame_info->regs.r28 = sp;
+		frame_info->regs.r31 = blink;
+		frame_info->regs.r63 = ret;
+		frame_info->call_frame = 0;
+	} else if (regs == NULL) {
+		/*
+		 * Asynchronous unwinding of a likely sleeping task
+		 *  - first ensure it is actually sleeping
+		 *  - if so, it will be in __switch_to, kernel mode SP of task
+		 *    is safe-kept and BLINK at a well known location in there
+		 */
+
+		if (tsk->state == TASK_RUNNING)
+			return -1;
+
+		frame_info->task = tsk;
+
+		frame_info->regs.r27 = TSK_K_FP(tsk);
+		frame_info->regs.r28 = TSK_K_ESP(tsk);
+		frame_info->regs.r31 = TSK_K_BLINK(tsk);
+		frame_info->regs.r63 = (unsigned int)__switch_to;
+
+		/* In the prologue of __switch_to, first FP is saved on stack
+		 * and then SP is copied to FP. Dwarf assumes cfa as FP based
+		 * but we didn't save FP. The value retrieved above is FP's
+		 * state in previous frame.
+		 * As a work around for this, we unwind from __switch_to start
+		 * and adjust SP accordingly. The other limitation is that
+		 * __switch_to macro is dwarf rules are not generated for inline
+		 * assembly code
+		 */
+		frame_info->regs.r27 = 0;
+		frame_info->regs.r28 += 60;
+		frame_info->call_frame = 0;
+
+	} else {
+		/*
+		 * Asynchronous unwinding of intr/exception
+		 *  - Just uses the pt_regs passed
+		 */
+		frame_info->task = tsk;
+
+		frame_info->regs.r27 = regs->fp;
+		frame_info->regs.r28 = regs->sp;
+		frame_info->regs.r31 = regs->blink;
+		frame_info->regs.r63 = regs->ret;
+		frame_info->call_frame = 0;
+	}
+
+	return 0;
+}
+
+#endif
+
+notrace noinline unsigned int
+arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
+		int (*consumer_fn) (unsigned int, void *), void *arg)
+{
+#ifdef CONFIG_ARC_DW2_UNWIND
+	int ret = 0, cnt = 0;
+	unsigned int address;
+	struct unwind_frame_info frame_info;
+
+	if (seed_unwind_frame_info(tsk, regs, &frame_info))
+		return 0;
+
+	while (1) {
+		address = UNW_PC(&frame_info);
+
+		if (!address || !__kernel_text_address(address))
+			break;
+
+		if (consumer_fn(address, arg) == -1)
+			break;
+
+		ret = arc_unwind(&frame_info);
+		if (ret)
+			break;
+
+		frame_info.regs.r63 = frame_info.regs.r31;
+
+		if (cnt++ > 128) {
+			printk("unwinder looping too long, aborting !\n");
+			return 0;
+		}
+	}
+
+	return address;		/* return the last address it saw */
+#else
+	/* On ARC, only Dward based unwinder works. fp based backtracing is
+	 * not possible (-fno-omit-frame-pointer) because of the way function
+	 * prelogue is setup (callee regs saved and then fp set and not other
+	 * way around
+	 */
+	pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
+	return 0;
+
+#endif
+}
+
+/*-------------------------------------------------------------------------
+ * callbacks called by unwinder iterator to implement kernel APIs
+ *
+ * The callback can return -1 to force the iterator to stop, which by default
+ * keeps going till the bottom-most frame.
+ *-------------------------------------------------------------------------
+ */
+
+/* Call-back which plugs into unwinding core to dump the stack in
+ * case of panic/OOPs/BUG etc
+ */
+static int __print_sym(unsigned int address, void *unused)
+{
+	printk("  %pS\n", (void *)address);
+	return 0;
+}
+
+#ifdef CONFIG_STACKTRACE
+
+/* Call-back which plugs into unwinding core to capture the
+ * traces needed by kernel on /proc/<pid>/stack
+ */
+static int __collect_all(unsigned int address, void *arg)
+{
+	struct stack_trace *trace = arg;
+
+	if (trace->skip > 0)
+		trace->skip--;
+	else
+		trace->entries[trace->nr_entries++] = address;
+
+	if (trace->nr_entries >= trace->max_entries)
+		return -1;
+
+	return 0;
+}
+
+static int __collect_all_but_sched(unsigned int address, void *arg)
+{
+	struct stack_trace *trace = arg;
+
+	if (in_sched_functions(address))
+		return 0;
+
+	if (trace->skip > 0)
+		trace->skip--;
+	else
+		trace->entries[trace->nr_entries++] = address;
+
+	if (trace->nr_entries >= trace->max_entries)
+		return -1;
+
+	return 0;
+}
+
+#endif
+
+static int __get_first_nonsched(unsigned int address, void *unused)
+{
+	if (in_sched_functions(address))
+		return 0;
+
+	return -1;
+}
+
+/*-------------------------------------------------------------------------
+ *              APIs expected by various kernel sub-systems
+ *-------------------------------------------------------------------------
+ */
+
+noinline void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs)
+{
+	pr_info("\nStack Trace:\n");
+	arc_unwind_core(tsk, regs, __print_sym, NULL);
+}
+EXPORT_SYMBOL(show_stacktrace);
+
+/* Expected by sched Code */
+void show_stack(struct task_struct *tsk, unsigned long *sp)
+{
+	show_stacktrace(tsk, NULL);
+}
+
+/* Another API expected by schedular, shows up in "ps" as Wait Channel
+ * Of course just returning schedule( ) would be pointless so unwind until
+ * the function is not in schedular code
+ */
+unsigned int get_wchan(struct task_struct *tsk)
+{
+	return arc_unwind_core(tsk, NULL, __get_first_nonsched, NULL);
+}
+
+#ifdef CONFIG_STACKTRACE
+
+/*
+ * API required by CONFIG_STACKTRACE, CONFIG_LATENCYTOP.
+ * A typical use is when /proc/<pid>/stack is queried by userland
+ */
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	/* Assumes @tsk is sleeping so unwinds from __switch_to */
+	arc_unwind_core(tsk, NULL, __collect_all_but_sched, trace);
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	/* Pass NULL for task so it unwinds the current call frame */
+	arc_unwind_core(NULL, NULL, __collect_all, trace);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+#endif
diff --git a/arch/arc/kernel/sys.c b/arch/arc/kernel/sys.c
new file mode 100644
index 000000000..fddecc76e
--- /dev/null
+++ b/arch/arc/kernel/sys.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/unistd.h>
+
+#include <asm/syscalls.h>
+
+#define sys_clone	sys_clone_wrapper
+
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = (call),
+
+void *sys_call_table[NR_syscalls] = {
+	[0 ... NR_syscalls-1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
new file mode 100644
index 000000000..b123558bf
--- /dev/null
+++ b/arch/arc/kernel/traps.c
@@ -0,0 +1,173 @@
+/*
+ * Traps/Non-MMU Exception handling for ARC
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: May 2011
+ *  -user-space unaligned access emulation
+ *
+ * Rahul Trivedi: Codito Technologies 2004
+ */
+
+#include <linux/sched/signal.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <linux/kprobes.h>
+#include <linux/kgdb.h>
+#include <asm/setup.h>
+#include <asm/unaligned.h>
+#include <asm/kprobes.h>
+
+void __init trap_init(void)
+{
+	return;
+}
+
+void die(const char *str, struct pt_regs *regs, unsigned long address)
+{
+	show_kernel_fault_diag(str, regs, address);
+
+	/* DEAD END */
+	__asm__("flag 1");
+}
+
+/*
+ * Helper called for bulk of exceptions NOT needing specific handling
+ *  -for user faults enqueues requested signal
+ *  -for kernel, chk if due to copy_(to|from)_user, otherwise die()
+ */
+static noinline int
+unhandled_exception(const char *str, struct pt_regs *regs, siginfo_t *info)
+{
+	if (user_mode(regs)) {
+		struct task_struct *tsk = current;
+
+		tsk->thread.fault_address = (__force unsigned int)info->si_addr;
+
+		force_sig_info(info->si_signo, info, tsk);
+
+	} else {
+		/* If not due to copy_(to|from)_user, we are doomed */
+		if (fixup_exception(regs))
+			return 0;
+
+		die(str, regs, (unsigned long)info->si_addr);
+	}
+
+	return 1;
+}
+
+#define DO_ERROR_INFO(signr, str, name, sicode) \
+int name(unsigned long address, struct pt_regs *regs) \
+{						\
+	siginfo_t info;				\
+						\
+	clear_siginfo(&info);			\
+	info.si_signo = signr;			\
+	info.si_errno = 0;			\
+	info.si_code  = sicode;			\
+	info.si_addr = (void __user *)address;	\
+						\
+	return unhandled_exception(str, regs, &info);\
+}
+
+/*
+ * Entry points for exceptions NOT needing specific handling
+ */
+DO_ERROR_INFO(SIGILL, "Priv Op/Disabled Extn", do_privilege_fault, ILL_PRVOPC)
+DO_ERROR_INFO(SIGILL, "Invalid Extn Insn", do_extension_fault, ILL_ILLOPC)
+DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC)
+DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR)
+DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
+DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
+DO_ERROR_INFO(SIGSEGV, "gcc generated __builtin_trap", do_trap5_error, 0)
+
+/*
+ * Entry Point for Misaligned Data access Exception, for emulating in software
+ */
+int do_misaligned_access(unsigned long address, struct pt_regs *regs,
+			 struct callee_regs *cregs)
+{
+	/* If emulation not enabled, or failed, kill the task */
+	if (misaligned_fixup(address, regs, cregs) != 0)
+		return do_misaligned_error(address, regs);
+
+	return 0;
+}
+
+/*
+ * Entry point for miscll errors such as Nested Exceptions
+ *  -Duplicate TLB entry is handled seperately though
+ */
+void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
+{
+	die("Unhandled Machine Check Exception", regs, address);
+}
+
+
+/*
+ * Entry point for traps induced by ARCompact TRAP_S <n> insn
+ * This is same family as TRAP0/SWI insn (use the same vector).
+ * The only difference being SWI insn take no operand, while TRAP_S does
+ * which reflects in ECR Reg as 8 bit param.
+ * Thus TRAP_S <n> can be used for specific purpose
+ *  -1 used for software breakpointing (gdb)
+ *  -2 used by kprobes
+ *  -5 __builtin_trap() generated by gcc (2018.03 onwards) for toggle such as
+ *     -fno-isolate-erroneous-paths-dereference
+ */
+void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
+{
+	unsigned int param = regs->ecr_param;
+
+	switch (param) {
+	case 1:
+		trap_is_brkpt(address, regs);
+		break;
+
+	case 2:
+		trap_is_kprobe(address, regs);
+		break;
+
+	case 3:
+	case 4:
+		kgdb_trap(regs);
+		break;
+
+	case 5:
+		do_trap5_error(address, regs);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * Entry point for Instruction Error Exception
+ *  -For a corner case, ARC kprobes implementation resorts to using
+ *   this exception, hence the check
+ */
+void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs)
+{
+	int rc;
+
+	/* Check if this exception is caused by kprobes */
+	rc = notify_die(DIE_IERR, "kprobe_ierr", regs, address, 0, SIGILL);
+	if (rc == NOTIFY_STOP)
+		return;
+
+	insterror_is_error(address, regs);
+}
+
+/*
+ * abort() call generated by older gcc for __builtin_trap()
+ */
+void abort(void)
+{
+	__asm__ __volatile__("trap_s  5\n");
+}
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
new file mode 100644
index 000000000..215f51544
--- /dev/null
+++ b/arch/arc/kernel/troubleshoot.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ */
+
+#include <linux/ptrace.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/kdev_t.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/debug.h>
+
+#include <asm/arcregs.h>
+#include <asm/irqflags.h>
+
+#define ARC_PATH_MAX	256
+
+/*
+ * Common routine to print scratch regs (r0-r12) or callee regs (r13-r25)
+ *   -Prints 3 regs per line and a CR.
+ *   -To continue, callee regs right after scratch, special handling of CR
+ */
+static noinline void print_reg_file(long *reg_rev, int start_num)
+{
+	unsigned int i;
+	char buf[512];
+	int n = 0, len = sizeof(buf);
+
+	for (i = start_num; i < start_num + 13; i++) {
+		n += scnprintf(buf + n, len - n, "r%02u: 0x%08lx\t",
+			       i, (unsigned long)*reg_rev);
+
+		if (((i + 1) % 3) == 0)
+			n += scnprintf(buf + n, len - n, "\n");
+
+		/* because pt_regs has regs reversed: r12..r0, r25..r13 */
+		if (is_isa_arcv2() && start_num == 0)
+			reg_rev++;
+		else
+			reg_rev--;
+	}
+
+	if (start_num != 0)
+		n += scnprintf(buf + n, len - n, "\n\n");
+
+	/* To continue printing callee regs on same line as scratch regs */
+	if (start_num == 0)
+		pr_info("%s", buf);
+	else
+		pr_cont("%s\n", buf);
+}
+
+static void show_callee_regs(struct callee_regs *cregs)
+{
+	print_reg_file(&(cregs->r13), 13);
+}
+
+static void print_task_path_n_nm(struct task_struct *tsk)
+{
+	char *path_nm = NULL;
+	struct mm_struct *mm;
+	struct file *exe_file;
+	char buf[ARC_PATH_MAX];
+
+	mm = get_task_mm(tsk);
+	if (!mm)
+		goto done;
+
+	exe_file = get_mm_exe_file(mm);
+	mmput(mm);
+
+	if (exe_file) {
+		path_nm = file_path(exe_file, buf, ARC_PATH_MAX-1);
+		fput(exe_file);
+	}
+
+done:
+	pr_info("Path: %s\n", !IS_ERR(path_nm) ? path_nm : "?");
+}
+
+static void show_faulting_vma(unsigned long address)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *active_mm = current->active_mm;
+
+	/* can't use print_vma_addr() yet as it doesn't check for
+	 * non-inclusive vma
+	 */
+	down_read(&active_mm->mmap_sem);
+	vma = find_vma(active_mm, address);
+
+	/* check against the find_vma( ) behaviour which returns the next VMA
+	 * if the container VMA is not found
+	 */
+	if (vma && (vma->vm_start <= address)) {
+		char buf[ARC_PATH_MAX];
+		char *nm = "?";
+
+		if (vma->vm_file) {
+			nm = file_path(vma->vm_file, buf, ARC_PATH_MAX-1);
+			if (IS_ERR(nm))
+				nm = "?";
+		}
+		pr_info("    @off 0x%lx in [%s]\n"
+			"    VMA: 0x%08lx to 0x%08lx\n",
+			vma->vm_start < TASK_UNMAPPED_BASE ?
+				address : address - vma->vm_start,
+			nm, vma->vm_start, vma->vm_end);
+	} else
+		pr_info("    @No matching VMA found\n");
+
+	up_read(&active_mm->mmap_sem);
+}
+
+static void show_ecr_verbose(struct pt_regs *regs)
+{
+	unsigned int vec, cause_code;
+	unsigned long address;
+
+	pr_info("\n[ECR   ]: 0x%08lx => ", regs->event);
+
+	/* For Data fault, this is data address not instruction addr */
+	address = current->thread.fault_address;
+
+	vec = regs->ecr_vec;
+	cause_code = regs->ecr_cause;
+
+	/* For DTLB Miss or ProtV, display the memory involved too */
+	if (vec == ECR_V_DTLB_MISS) {
+		pr_cont("Invalid %s @ 0x%08lx by insn @ 0x%08lx\n",
+		       (cause_code == 0x01) ? "Read" :
+		       ((cause_code == 0x02) ? "Write" : "EX"),
+		       address, regs->ret);
+	} else if (vec == ECR_V_ITLB_MISS) {
+		pr_cont("Insn could not be fetched\n");
+	} else if (vec == ECR_V_MACH_CHK) {
+		pr_cont("Machine Check (%s)\n", (cause_code == 0x0) ?
+					"Double Fault" : "Other Fatal Err");
+
+	} else if (vec == ECR_V_PROTV) {
+		if (cause_code == ECR_C_PROTV_INST_FETCH)
+			pr_cont("Execute from Non-exec Page\n");
+		else if (cause_code == ECR_C_PROTV_MISALIG_DATA)
+			pr_cont("Misaligned r/w from 0x%08lx\n", address);
+		else
+			pr_cont("%s access not allowed on page\n",
+				(cause_code == 0x01) ? "Read" :
+				((cause_code == 0x02) ? "Write" : "EX"));
+	} else if (vec == ECR_V_INSN_ERR) {
+		pr_cont("Illegal Insn\n");
+#ifdef CONFIG_ISA_ARCV2
+	} else if (vec == ECR_V_MEM_ERR) {
+		if (cause_code == 0x00)
+			pr_cont("Bus Error from Insn Mem\n");
+		else if (cause_code == 0x10)
+			pr_cont("Bus Error from Data Mem\n");
+		else
+			pr_cont("Bus Error, check PRM\n");
+#endif
+	} else if (vec == ECR_V_TRAP) {
+		if (regs->ecr_param == 5)
+			pr_cont("gcc generated __builtin_trap\n");
+	} else {
+		pr_cont("Check Programmer's Manual\n");
+	}
+}
+
+/************************************************************************
+ *  API called by rest of kernel
+ ***********************************************************************/
+
+void show_regs(struct pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+	struct callee_regs *cregs;
+
+	/*
+	 * generic code calls us with preemption disabled, but some calls
+	 * here could sleep, so re-enable to avoid lockdep splat
+	 */
+	preempt_enable();
+
+	print_task_path_n_nm(tsk);
+	show_regs_print_info(KERN_INFO);
+
+	show_ecr_verbose(regs);
+
+	pr_info("[EFA   ]: 0x%08lx\n[BLINK ]: %pS\n[ERET  ]: %pS\n",
+		current->thread.fault_address,
+		(void *)regs->blink, (void *)regs->ret);
+
+	if (user_mode(regs))
+		show_faulting_vma(regs->ret); /* faulting code, not data */
+
+	pr_info("[STAT32]: 0x%08lx", regs->status32);
+
+#define STS_BIT(r, bit)	r->status32 & STATUS_##bit##_MASK ? #bit" " : ""
+
+#ifdef CONFIG_ISA_ARCOMPACT
+	pr_cont(" : %2s%2s%2s%2s%2s%2s%2s\n",
+			(regs->status32 & STATUS_U_MASK) ? "U " : "K ",
+			STS_BIT(regs, DE), STS_BIT(regs, AE),
+			STS_BIT(regs, A2), STS_BIT(regs, A1),
+			STS_BIT(regs, E2), STS_BIT(regs, E1));
+#else
+	pr_cont(" : %2s%2s%2s%2s\n",
+			STS_BIT(regs, IE),
+			(regs->status32 & STATUS_U_MASK) ? "U " : "K ",
+			STS_BIT(regs, DE), STS_BIT(regs, AE));
+#endif
+	pr_info("BTA: 0x%08lx\t SP: 0x%08lx\t FP: 0x%08lx\n",
+		regs->bta, regs->sp, regs->fp);
+	pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n",
+	       regs->lp_start, regs->lp_end, regs->lp_count);
+
+	/* print regs->r0 thru regs->r12
+	 * Sequential printing was generating horrible code
+	 */
+	print_reg_file(&(regs->r0), 0);
+
+	/* If Callee regs were saved, display them too */
+	cregs = (struct callee_regs *)current->thread.callee_reg;
+	if (cregs)
+		show_callee_regs(cregs);
+
+	preempt_disable();
+}
+
+void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
+			    unsigned long address)
+{
+	current->thread.fault_address = address;
+
+	/* Show fault description */
+	pr_info("\n%s\n", str);
+
+	/* Caller and Callee regs */
+	show_regs(regs);
+
+	/* Show stack trace if this Fatality happened in kernel mode */
+	if (!user_mode(regs))
+		show_stacktrace(current, regs);
+}
diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c
new file mode 100644
index 000000000..5f69c3bd5
--- /dev/null
+++ b/arch/arc/kernel/unaligned.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2011-2012 Synopsys (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg : May 2011
+ *  -Adapted (from .26 to .35)
+ *  -original contribution by Tim.yao@amlogic.com
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+#include <linux/uaccess.h>
+#include <asm/disasm.h>
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define BE		1
+#define FIRST_BYTE_16	"swap %1, %1\n swape %1, %1\n"
+#define FIRST_BYTE_32	"swape %1, %1\n"
+#else
+#define BE		0
+#define FIRST_BYTE_16
+#define FIRST_BYTE_32
+#endif
+
+#define __get8_unaligned_check(val, addr, err)		\
+	__asm__(					\
+	"1:	ldb.ab	%1, [%2, 1]\n"			\
+	"2:\n"						\
+	"	.section .fixup,\"ax\"\n"		\
+	"	.align	4\n"				\
+	"3:	mov	%0, 1\n"			\
+	"	j	2b\n"				\
+	"	.previous\n"				\
+	"	.section __ex_table,\"a\"\n"		\
+	"	.align	4\n"				\
+	"	.long	1b, 3b\n"			\
+	"	.previous\n"				\
+	: "=r" (err), "=&r" (val), "=r" (addr)		\
+	: "0" (err), "2" (addr))
+
+#define get16_unaligned_check(val, addr)		\
+	do {						\
+		unsigned int err = 0, v, a = addr;	\
+		__get8_unaligned_check(v, a, err);	\
+		val =  v << ((BE) ? 8 : 0);		\
+		__get8_unaligned_check(v, a, err);	\
+		val |= v << ((BE) ? 0 : 8);		\
+		if (err)				\
+			goto fault;			\
+	} while (0)
+
+#define get32_unaligned_check(val, addr)		\
+	do {						\
+		unsigned int err = 0, v, a = addr;	\
+		__get8_unaligned_check(v, a, err);	\
+		val =  v << ((BE) ? 24 : 0);		\
+		__get8_unaligned_check(v, a, err);	\
+		val |= v << ((BE) ? 16 : 8);		\
+		__get8_unaligned_check(v, a, err);	\
+		val |= v << ((BE) ? 8 : 16);		\
+		__get8_unaligned_check(v, a, err);	\
+		val |= v << ((BE) ? 0 : 24);		\
+		if (err)				\
+			goto fault;			\
+	} while (0)
+
+#define put16_unaligned_check(val, addr)		\
+	do {						\
+		unsigned int err = 0, v = val, a = addr;\
+							\
+		__asm__(				\
+		FIRST_BYTE_16				\
+		"1:	stb.ab	%1, [%2, 1]\n"		\
+		"	lsr %1, %1, 8\n"		\
+		"2:	stb	%1, [%2]\n"		\
+		"3:\n"					\
+		"	.section .fixup,\"ax\"\n"	\
+		"	.align	4\n"			\
+		"4:	mov	%0, 1\n"		\
+		"	j	3b\n"			\
+		"	.previous\n"			\
+		"	.section __ex_table,\"a\"\n"	\
+		"	.align	4\n"			\
+		"	.long	1b, 4b\n"		\
+		"	.long	2b, 4b\n"		\
+		"	.previous\n"			\
+		: "=r" (err), "=&r" (v), "=&r" (a)	\
+		: "0" (err), "1" (v), "2" (a));		\
+							\
+		if (err)				\
+			goto fault;			\
+	} while (0)
+
+#define put32_unaligned_check(val, addr)		\
+	do {						\
+		unsigned int err = 0, v = val, a = addr;\
+							\
+		__asm__(				\
+		FIRST_BYTE_32				\
+		"1:	stb.ab	%1, [%2, 1]\n"		\
+		"	lsr %1, %1, 8\n"		\
+		"2:	stb.ab	%1, [%2, 1]\n"		\
+		"	lsr %1, %1, 8\n"		\
+		"3:	stb.ab	%1, [%2, 1]\n"		\
+		"	lsr %1, %1, 8\n"		\
+		"4:	stb	%1, [%2]\n"		\
+		"5:\n"					\
+		"	.section .fixup,\"ax\"\n"	\
+		"	.align	4\n"			\
+		"6:	mov	%0, 1\n"		\
+		"	j	5b\n"			\
+		"	.previous\n"			\
+		"	.section __ex_table,\"a\"\n"	\
+		"	.align	4\n"			\
+		"	.long	1b, 6b\n"		\
+		"	.long	2b, 6b\n"		\
+		"	.long	3b, 6b\n"		\
+		"	.long	4b, 6b\n"		\
+		"	.previous\n"			\
+		: "=r" (err), "=&r" (v), "=&r" (a)	\
+		: "0" (err), "1" (v), "2" (a));		\
+							\
+		if (err)				\
+			goto fault;			\
+	} while (0)
+
+/* sysctl hooks */
+int unaligned_enabled __read_mostly = 1;	/* Enabled by default */
+int no_unaligned_warning __read_mostly = 1;	/* Only 1 warning by default */
+
+static void fixup_load(struct disasm_state *state, struct pt_regs *regs,
+			struct callee_regs *cregs)
+{
+	int val;
+
+	/* register write back */
+	if ((state->aa == 1) || (state->aa == 2)) {
+		set_reg(state->wb_reg, state->src1 + state->src2, regs, cregs);
+
+		if (state->aa == 2)
+			state->src2 = 0;
+	}
+
+	if (state->zz == 0) {
+		get32_unaligned_check(val, state->src1 + state->src2);
+	} else {
+		get16_unaligned_check(val, state->src1 + state->src2);
+
+		if (state->x)
+			val = (val << 16) >> 16;
+	}
+
+	if (state->pref == 0)
+		set_reg(state->dest, val, regs, cregs);
+
+	return;
+
+fault:	state->fault = 1;
+}
+
+static void fixup_store(struct disasm_state *state, struct pt_regs *regs,
+			struct callee_regs *cregs)
+{
+	/* register write back */
+	if ((state->aa == 1) || (state->aa == 2)) {
+		set_reg(state->wb_reg, state->src2 + state->src3, regs, cregs);
+
+		if (state->aa == 3)
+			state->src3 = 0;
+	} else if (state->aa == 3) {
+		if (state->zz == 2) {
+			set_reg(state->wb_reg, state->src2 + (state->src3 << 1),
+				regs, cregs);
+		} else if (!state->zz) {
+			set_reg(state->wb_reg, state->src2 + (state->src3 << 2),
+				regs, cregs);
+		} else {
+			goto fault;
+		}
+	}
+
+	/* write fix-up */
+	if (!state->zz)
+		put32_unaligned_check(state->src1, state->src2 + state->src3);
+	else
+		put16_unaligned_check(state->src1, state->src2 + state->src3);
+
+	return;
+
+fault:	state->fault = 1;
+}
+
+/*
+ * Handle an unaligned access
+ * Returns 0 if successfully handled, 1 if some error happened
+ */
+int misaligned_fixup(unsigned long address, struct pt_regs *regs,
+		     struct callee_regs *cregs)
+{
+	struct disasm_state state;
+	char buf[TASK_COMM_LEN];
+
+	/* handle user mode only and only if enabled by sysadmin */
+	if (!user_mode(regs) || !unaligned_enabled)
+		return 1;
+
+	if (no_unaligned_warning) {
+		pr_warn_once("%s(%d) made unaligned access which was emulated"
+			     " by kernel assist\n. This can degrade application"
+			     " performance significantly\n. To enable further"
+			     " logging of such instances, please \n"
+			     " echo 0 > /proc/sys/kernel/ignore-unaligned-usertrap\n",
+			     get_task_comm(buf, current), task_pid_nr(current));
+	} else {
+		/* Add rate limiting if it gets down to it */
+		pr_warn("%s(%d): unaligned access to/from 0x%lx by PC: 0x%lx\n",
+			get_task_comm(buf, current), task_pid_nr(current),
+			address, regs->ret);
+
+	}
+
+	disasm_instr(regs->ret, &state, 1, regs, cregs);
+
+	if (state.fault)
+		goto fault;
+
+	/* ldb/stb should not have unaligned exception */
+	if ((state.zz == 1) || (state.di))
+		goto fault;
+
+	if (!state.write)
+		fixup_load(&state, regs, cregs);
+	else
+		fixup_store(&state, regs, cregs);
+
+	if (state.fault)
+		goto fault;
+
+	/* clear any remanants of delay slot */
+	if (delay_mode(regs)) {
+		regs->ret = regs->bta & ~1U;
+		regs->status32 &= ~STATUS_DE_MASK;
+	} else {
+		regs->ret += state.instr_len;
+
+		/* handle zero-overhead-loop */
+		if ((regs->ret == regs->lp_end) && (regs->lp_count)) {
+			regs->ret = regs->lp_start;
+			regs->lp_count--;
+		}
+	}
+
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);
+	return 0;
+
+fault:
+	pr_err("Alignment trap: fault in fix-up %08lx at [<%08lx>]\n",
+		state.words[0], address);
+
+	return 1;
+}
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
new file mode 100644
index 000000000..9cf2ee8b4
--- /dev/null
+++ b/arch/arc/kernel/unwind.c
@@ -0,0 +1,1322 @@
+/*
+ * Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2002-2006 Novell, Inc.
+ *	Jan Beulich <jbeulich@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * A simple API for unwinding kernel stacks.  This is used for
+ * debugging and error reporting purposes.  The kernel doesn't need
+ * full-blown stack unwinding with all the bells and whistles, so there
+ * is not much point in implementing the full Dwarf2 unwind API.
+ */
+
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/sort.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <asm/sections.h>
+#include <asm/unaligned.h>
+#include <asm/unwind.h>
+
+extern char __start_unwind[], __end_unwind[];
+/* extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];*/
+
+/* #define UNWIND_DEBUG */
+
+#ifdef UNWIND_DEBUG
+int dbg_unw;
+#define unw_debug(fmt, ...)			\
+do {						\
+	if (dbg_unw)				\
+		pr_info(fmt, ##__VA_ARGS__);	\
+} while (0);
+#else
+#define unw_debug(fmt, ...)
+#endif
+
+#define MAX_STACK_DEPTH 8
+
+#define EXTRA_INFO(f) { \
+		BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
+				% FIELD_SIZEOF(struct unwind_frame_info, f)) \
+				+ offsetof(struct unwind_frame_info, f) \
+				/ FIELD_SIZEOF(struct unwind_frame_info, f), \
+				FIELD_SIZEOF(struct unwind_frame_info, f) \
+	}
+#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
+
+static const struct {
+	unsigned offs:BITS_PER_LONG / 2;
+	unsigned width:BITS_PER_LONG / 2;
+} reg_info[] = {
+UNW_REGISTER_INFO};
+
+#undef PTREGS_INFO
+#undef EXTRA_INFO
+
+#ifndef REG_INVALID
+#define REG_INVALID(r) (reg_info[r].width == 0)
+#endif
+
+#define DW_CFA_nop                          0x00
+#define DW_CFA_set_loc                      0x01
+#define DW_CFA_advance_loc1                 0x02
+#define DW_CFA_advance_loc2                 0x03
+#define DW_CFA_advance_loc4                 0x04
+#define DW_CFA_offset_extended              0x05
+#define DW_CFA_restore_extended             0x06
+#define DW_CFA_undefined                    0x07
+#define DW_CFA_same_value                   0x08
+#define DW_CFA_register                     0x09
+#define DW_CFA_remember_state               0x0a
+#define DW_CFA_restore_state                0x0b
+#define DW_CFA_def_cfa                      0x0c
+#define DW_CFA_def_cfa_register             0x0d
+#define DW_CFA_def_cfa_offset               0x0e
+#define DW_CFA_def_cfa_expression           0x0f
+#define DW_CFA_expression                   0x10
+#define DW_CFA_offset_extended_sf           0x11
+#define DW_CFA_def_cfa_sf                   0x12
+#define DW_CFA_def_cfa_offset_sf            0x13
+#define DW_CFA_val_offset                   0x14
+#define DW_CFA_val_offset_sf                0x15
+#define DW_CFA_val_expression               0x16
+#define DW_CFA_lo_user                      0x1c
+#define DW_CFA_GNU_window_save              0x2d
+#define DW_CFA_GNU_args_size                0x2e
+#define DW_CFA_GNU_negative_offset_extended 0x2f
+#define DW_CFA_hi_user                      0x3f
+
+#define DW_EH_PE_FORM     0x07
+#define DW_EH_PE_native   0x00
+#define DW_EH_PE_leb128   0x01
+#define DW_EH_PE_data2    0x02
+#define DW_EH_PE_data4    0x03
+#define DW_EH_PE_data8    0x04
+#define DW_EH_PE_signed   0x08
+#define DW_EH_PE_ADJUST   0x70
+#define DW_EH_PE_abs      0x00
+#define DW_EH_PE_pcrel    0x10
+#define DW_EH_PE_textrel  0x20
+#define DW_EH_PE_datarel  0x30
+#define DW_EH_PE_funcrel  0x40
+#define DW_EH_PE_aligned  0x50
+#define DW_EH_PE_indirect 0x80
+#define DW_EH_PE_omit     0xff
+
+#define CIE_ID	0
+
+typedef unsigned long uleb128_t;
+typedef signed long sleb128_t;
+
+static struct unwind_table {
+	struct {
+		unsigned long pc;
+		unsigned long range;
+	} core, init;
+	const void *address;
+	unsigned long size;
+	const unsigned char *header;
+	unsigned long hdrsz;
+	struct unwind_table *link;
+	const char *name;
+} root_table;
+
+struct unwind_item {
+	enum item_location {
+		Nowhere,
+		Memory,
+		Register,
+		Value
+	} where;
+	uleb128_t value;
+};
+
+struct unwind_state {
+	uleb128_t loc, org;
+	const u8 *cieStart, *cieEnd;
+	uleb128_t codeAlign;
+	sleb128_t dataAlign;
+	struct cfa {
+		uleb128_t reg, offs;
+	} cfa;
+	struct unwind_item regs[ARRAY_SIZE(reg_info)];
+	unsigned stackDepth:8;
+	unsigned version:8;
+	const u8 *label;
+	const u8 *stack[MAX_STACK_DEPTH];
+};
+
+static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
+
+static struct unwind_table *find_table(unsigned long pc)
+{
+	struct unwind_table *table;
+
+	for (table = &root_table; table; table = table->link)
+		if ((pc >= table->core.pc
+		     && pc < table->core.pc + table->core.range)
+		    || (pc >= table->init.pc
+			&& pc < table->init.pc + table->init.range))
+			break;
+
+	return table;
+}
+
+static unsigned long read_pointer(const u8 **pLoc,
+				  const void *end, signed ptrType);
+static void init_unwind_hdr(struct unwind_table *table,
+			    void *(*alloc) (unsigned long));
+
+/*
+ * wrappers for header alloc (vs. calling one vs. other at call site)
+ * to elide section mismatches warnings
+ */
+static void *__init unw_hdr_alloc_early(unsigned long sz)
+{
+	return __alloc_bootmem_nopanic(sz, sizeof(unsigned int),
+				       MAX_DMA_ADDRESS);
+}
+
+static void init_unwind_table(struct unwind_table *table, const char *name,
+			      const void *core_start, unsigned long core_size,
+			      const void *init_start, unsigned long init_size,
+			      const void *table_start, unsigned long table_size,
+			      const u8 *header_start, unsigned long header_size)
+{
+	const u8 *ptr = header_start + 4;
+	const u8 *end = header_start + header_size;
+
+	table->core.pc = (unsigned long)core_start;
+	table->core.range = core_size;
+	table->init.pc = (unsigned long)init_start;
+	table->init.range = init_size;
+	table->address = table_start;
+	table->size = table_size;
+
+	/* See if the linker provided table looks valid. */
+	if (header_size <= 4
+	    || header_start[0] != 1
+	    || (void *)read_pointer(&ptr, end, header_start[1]) != table_start
+	    || header_start[2] == DW_EH_PE_omit
+	    || read_pointer(&ptr, end, header_start[2]) <= 0
+	    || header_start[3] == DW_EH_PE_omit)
+		header_start = NULL;
+
+	table->hdrsz = header_size;
+	smp_wmb();
+	table->header = header_start;
+	table->link = NULL;
+	table->name = name;
+}
+
+void __init arc_unwind_init(void)
+{
+	init_unwind_table(&root_table, "kernel", _text, _end - _text, NULL, 0,
+			  __start_unwind, __end_unwind - __start_unwind,
+			  NULL, 0);
+	  /*__start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);*/
+
+	init_unwind_hdr(&root_table, unw_hdr_alloc_early);
+}
+
+static const u32 bad_cie, not_fde;
+static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
+static const u32 *__cie_for_fde(const u32 *fde);
+static signed fde_pointer_type(const u32 *cie);
+
+struct eh_frame_hdr_table_entry {
+	unsigned long start, fde;
+};
+
+static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
+{
+	const struct eh_frame_hdr_table_entry *e1 = p1;
+	const struct eh_frame_hdr_table_entry *e2 = p2;
+
+	return (e1->start > e2->start) - (e1->start < e2->start);
+}
+
+static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
+{
+	struct eh_frame_hdr_table_entry *e1 = p1;
+	struct eh_frame_hdr_table_entry *e2 = p2;
+	unsigned long v;
+
+	v = e1->start;
+	e1->start = e2->start;
+	e2->start = v;
+	v = e1->fde;
+	e1->fde = e2->fde;
+	e2->fde = v;
+}
+
+static void init_unwind_hdr(struct unwind_table *table,
+			    void *(*alloc) (unsigned long))
+{
+	const u8 *ptr;
+	unsigned long tableSize = table->size, hdrSize;
+	unsigned n;
+	const u32 *fde;
+	struct {
+		u8 version;
+		u8 eh_frame_ptr_enc;
+		u8 fde_count_enc;
+		u8 table_enc;
+		unsigned long eh_frame_ptr;
+		unsigned int fde_count;
+		struct eh_frame_hdr_table_entry table[];
+	} __attribute__ ((__packed__)) *header;
+
+	if (table->header)
+		return;
+
+	if (table->hdrsz)
+		pr_warn(".eh_frame_hdr for '%s' present but unusable\n",
+			table->name);
+
+	if (tableSize & (sizeof(*fde) - 1))
+		return;
+
+	for (fde = table->address, n = 0;
+	     tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
+	     tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+		const u32 *cie = cie_for_fde(fde, table);
+		signed ptrType;
+
+		if (cie == &not_fde)
+			continue;
+		if (cie == NULL || cie == &bad_cie)
+			goto ret_err;
+		ptrType = fde_pointer_type(cie);
+		if (ptrType < 0)
+			goto ret_err;
+
+		ptr = (const u8 *)(fde + 2);
+		if (!read_pointer(&ptr, (const u8 *)(fde + 1) + *fde,
+								ptrType)) {
+			/* FIXME_Rajesh We have 4 instances of null addresses
+			 * instead of the initial loc addr
+			 * return;
+			 */
+			WARN(1, "unwinder: FDE->initial_location NULL %p\n",
+				(const u8 *)(fde + 1) + *fde);
+		}
+		++n;
+	}
+
+	if (tableSize || !n)
+		goto ret_err;
+
+	hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
+	    + 2 * n * sizeof(unsigned long);
+
+	header = alloc(hdrSize);
+	if (!header)
+		goto ret_err;
+
+	header->version = 1;
+	header->eh_frame_ptr_enc = DW_EH_PE_abs | DW_EH_PE_native;
+	header->fde_count_enc = DW_EH_PE_abs | DW_EH_PE_data4;
+	header->table_enc = DW_EH_PE_abs | DW_EH_PE_native;
+	put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
+	BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
+		     % __alignof(typeof(header->fde_count)));
+	header->fde_count = n;
+
+	BUILD_BUG_ON(offsetof(typeof(*header), table)
+		     % __alignof(typeof(*header->table)));
+	for (fde = table->address, tableSize = table->size, n = 0;
+	     tableSize;
+	     tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+		const u32 *cie = __cie_for_fde(fde);
+
+		if (fde[1] == CIE_ID)
+			continue;	/* this is a CIE */
+		ptr = (const u8 *)(fde + 2);
+		header->table[n].start = read_pointer(&ptr,
+						      (const u8 *)(fde + 1) +
+						      *fde,
+						      fde_pointer_type(cie));
+		header->table[n].fde = (unsigned long)fde;
+		++n;
+	}
+	WARN_ON(n != header->fde_count);
+
+	sort(header->table,
+	     n,
+	     sizeof(*header->table),
+	     cmp_eh_frame_hdr_table_entries, swap_eh_frame_hdr_table_entries);
+
+	table->hdrsz = hdrSize;
+	smp_wmb();
+	table->header = (const void *)header;
+	return;
+
+ret_err:
+	panic("Attention !!! Dwarf FDE parsing errors\n");
+}
+
+#ifdef CONFIG_MODULES
+static void *unw_hdr_alloc(unsigned long sz)
+{
+	return kmalloc(sz, GFP_KERNEL);
+}
+
+static struct unwind_table *last_table;
+
+/* Must be called with module_mutex held. */
+void *unwind_add_table(struct module *module, const void *table_start,
+		       unsigned long table_size)
+{
+	struct unwind_table *table;
+
+	if (table_size <= 0)
+		return NULL;
+
+	table = kmalloc(sizeof(*table), GFP_KERNEL);
+	if (!table)
+		return NULL;
+
+	init_unwind_table(table, module->name,
+			  module->core_layout.base, module->core_layout.size,
+			  module->init_layout.base, module->init_layout.size,
+			  table_start, table_size,
+			  NULL, 0);
+
+	init_unwind_hdr(table, unw_hdr_alloc);
+
+#ifdef UNWIND_DEBUG
+	unw_debug("Table added for [%s] %lx %lx\n",
+		module->name, table->core.pc, table->core.range);
+#endif
+	if (last_table)
+		last_table->link = table;
+	else
+		root_table.link = table;
+	last_table = table;
+
+	return table;
+}
+
+struct unlink_table_info {
+	struct unwind_table *table;
+	int init_only;
+};
+
+static int unlink_table(void *arg)
+{
+	struct unlink_table_info *info = arg;
+	struct unwind_table *table = info->table, *prev;
+
+	for (prev = &root_table; prev->link && prev->link != table;
+	     prev = prev->link)
+		;
+
+	if (prev->link) {
+		if (info->init_only) {
+			table->init.pc = 0;
+			table->init.range = 0;
+			info->table = NULL;
+		} else {
+			prev->link = table->link;
+			if (!prev->link)
+				last_table = prev;
+		}
+	} else
+		info->table = NULL;
+
+	return 0;
+}
+
+/* Must be called with module_mutex held. */
+void unwind_remove_table(void *handle, int init_only)
+{
+	struct unwind_table *table = handle;
+	struct unlink_table_info info;
+
+	if (!table || table == &root_table)
+		return;
+
+	if (init_only && table == last_table) {
+		table->init.pc = 0;
+		table->init.range = 0;
+		return;
+	}
+
+	info.table = table;
+	info.init_only = init_only;
+
+	unlink_table(&info); /* XXX: SMP */
+	kfree(table->header);
+	kfree(table);
+}
+
+#endif /* CONFIG_MODULES */
+
+static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
+{
+	const u8 *cur = *pcur;
+	uleb128_t value;
+	unsigned shift;
+
+	for (shift = 0, value = 0; cur < end; shift += 7) {
+		if (shift + 7 > 8 * sizeof(value)
+		    && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
+			cur = end + 1;
+			break;
+		}
+		value |= (uleb128_t) (*cur & 0x7f) << shift;
+		if (!(*cur++ & 0x80))
+			break;
+	}
+	*pcur = cur;
+
+	return value;
+}
+
+static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
+{
+	const u8 *cur = *pcur;
+	sleb128_t value;
+	unsigned shift;
+
+	for (shift = 0, value = 0; cur < end; shift += 7) {
+		if (shift + 7 > 8 * sizeof(value)
+		    && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
+			cur = end + 1;
+			break;
+		}
+		value |= (sleb128_t) (*cur & 0x7f) << shift;
+		if (!(*cur & 0x80)) {
+			value |= -(*cur++ & 0x40) << shift;
+			break;
+		}
+	}
+	*pcur = cur;
+
+	return value;
+}
+
+static const u32 *__cie_for_fde(const u32 *fde)
+{
+	const u32 *cie;
+
+	cie = fde + 1 - fde[1] / sizeof(*fde);
+
+	return cie;
+}
+
+static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
+{
+	const u32 *cie;
+
+	if (!*fde || (*fde & (sizeof(*fde) - 1)))
+		return &bad_cie;
+
+	if (fde[1] == CIE_ID)
+		return &not_fde;	/* this is a CIE */
+
+	if ((fde[1] & (sizeof(*fde) - 1)))
+/* || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address) */
+		return NULL;	/* this is not a valid FDE */
+
+	cie = __cie_for_fde(fde);
+
+	if (*cie <= sizeof(*cie) + 4 || *cie >= fde[1] - sizeof(*fde)
+	    || (*cie & (sizeof(*cie) - 1))
+	    || (cie[1] != CIE_ID))
+		return NULL;	/* this is not a (valid) CIE */
+	return cie;
+}
+
+static unsigned long read_pointer(const u8 **pLoc, const void *end,
+				  signed ptrType)
+{
+	unsigned long value = 0;
+	union {
+		const u8 *p8;
+		const u16 *p16u;
+		const s16 *p16s;
+		const u32 *p32u;
+		const s32 *p32s;
+		const unsigned long *pul;
+	} ptr;
+
+	if (ptrType < 0 || ptrType == DW_EH_PE_omit)
+		return 0;
+	ptr.p8 = *pLoc;
+	switch (ptrType & DW_EH_PE_FORM) {
+	case DW_EH_PE_data2:
+		if (end < (const void *)(ptr.p16u + 1))
+			return 0;
+		if (ptrType & DW_EH_PE_signed)
+			value = get_unaligned((u16 *) ptr.p16s++);
+		else
+			value = get_unaligned((u16 *) ptr.p16u++);
+		break;
+	case DW_EH_PE_data4:
+#ifdef CONFIG_64BIT
+		if (end < (const void *)(ptr.p32u + 1))
+			return 0;
+		if (ptrType & DW_EH_PE_signed)
+			value = get_unaligned(ptr.p32s++);
+		else
+			value = get_unaligned(ptr.p32u++);
+		break;
+	case DW_EH_PE_data8:
+		BUILD_BUG_ON(sizeof(u64) != sizeof(value));
+#else
+		BUILD_BUG_ON(sizeof(u32) != sizeof(value));
+#endif
+	case DW_EH_PE_native:
+		if (end < (const void *)(ptr.pul + 1))
+			return 0;
+		value = get_unaligned((unsigned long *)ptr.pul++);
+		break;
+	case DW_EH_PE_leb128:
+		BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
+		value = ptrType & DW_EH_PE_signed ? get_sleb128(&ptr.p8, end)
+		    : get_uleb128(&ptr.p8, end);
+		if ((const void *)ptr.p8 > end)
+			return 0;
+		break;
+	default:
+		return 0;
+	}
+	switch (ptrType & DW_EH_PE_ADJUST) {
+	case DW_EH_PE_abs:
+		break;
+	case DW_EH_PE_pcrel:
+		value += (unsigned long)*pLoc;
+		break;
+	default:
+		return 0;
+	}
+	if ((ptrType & DW_EH_PE_indirect)
+	    && __get_user(value, (unsigned long __user *)value))
+		return 0;
+	*pLoc = ptr.p8;
+
+	return value;
+}
+
+static signed fde_pointer_type(const u32 *cie)
+{
+	const u8 *ptr = (const u8 *)(cie + 2);
+	unsigned version = *ptr;
+
+	if (*++ptr) {
+		const char *aug;
+		const u8 *end = (const u8 *)(cie + 1) + *cie;
+		uleb128_t len;
+
+		/* check if augmentation size is first (and thus present) */
+		if (*ptr != 'z')
+			return -1;
+
+		/* check if augmentation string is nul-terminated */
+		aug = (const void *)ptr;
+		ptr = memchr(aug, 0, end - ptr);
+		if (ptr == NULL)
+			return -1;
+
+		++ptr;		/* skip terminator */
+		get_uleb128(&ptr, end);	/* skip code alignment */
+		get_sleb128(&ptr, end);	/* skip data alignment */
+		/* skip return address column */
+		version <= 1 ? (void) ++ptr : (void)get_uleb128(&ptr, end);
+		len = get_uleb128(&ptr, end);	/* augmentation length */
+
+		if (ptr + len < ptr || ptr + len > end)
+			return -1;
+
+		end = ptr + len;
+		while (*++aug) {
+			if (ptr >= end)
+				return -1;
+			switch (*aug) {
+			case 'L':
+				++ptr;
+				break;
+			case 'P':{
+					signed ptrType = *ptr++;
+
+					if (!read_pointer(&ptr, end, ptrType)
+					    || ptr > end)
+						return -1;
+				}
+				break;
+			case 'R':
+				return *ptr;
+			default:
+				return -1;
+			}
+		}
+	}
+	return DW_EH_PE_native | DW_EH_PE_abs;
+}
+
+static int advance_loc(unsigned long delta, struct unwind_state *state)
+{
+	state->loc += delta * state->codeAlign;
+
+	/* FIXME_Rajesh: Probably we are defining for the initial range as well;
+	   return delta > 0;
+	 */
+	unw_debug("delta %3lu => loc 0x%lx: ", delta, state->loc);
+	return 1;
+}
+
+static void set_rule(uleb128_t reg, enum item_location where, uleb128_t value,
+		     struct unwind_state *state)
+{
+	if (reg < ARRAY_SIZE(state->regs)) {
+		state->regs[reg].where = where;
+		state->regs[reg].value = value;
+
+#ifdef UNWIND_DEBUG
+		unw_debug("r%lu: ", reg);
+		switch (where) {
+		case Nowhere:
+			unw_debug("s ");
+			break;
+		case Memory:
+			unw_debug("c(%lu) ", value);
+			break;
+		case Register:
+			unw_debug("r(%lu) ", value);
+			break;
+		case Value:
+			unw_debug("v(%lu) ", value);
+			break;
+		default:
+			break;
+		}
+#endif
+	}
+}
+
+static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc,
+		      signed ptrType, struct unwind_state *state)
+{
+	union {
+		const u8 *p8;
+		const u16 *p16;
+		const u32 *p32;
+	} ptr;
+	int result = 1;
+	u8 opcode;
+
+	if (start != state->cieStart) {
+		state->loc = state->org;
+		result =
+		    processCFI(state->cieStart, state->cieEnd, 0, ptrType,
+			       state);
+		if (targetLoc == 0 && state->label == NULL)
+			return result;
+	}
+	for (ptr.p8 = start; result && ptr.p8 < end;) {
+		switch (*ptr.p8 >> 6) {
+			uleb128_t value;
+
+		case 0:
+			opcode = *ptr.p8++;
+
+			switch (opcode) {
+			case DW_CFA_nop:
+				unw_debug("cfa nop ");
+				break;
+			case DW_CFA_set_loc:
+				state->loc = read_pointer(&ptr.p8, end,
+							  ptrType);
+				if (state->loc == 0)
+					result = 0;
+				unw_debug("cfa_set_loc: 0x%lx ", state->loc);
+				break;
+			case DW_CFA_advance_loc1:
+				unw_debug("\ncfa advance loc1:");
+				result = ptr.p8 < end
+				    && advance_loc(*ptr.p8++, state);
+				break;
+			case DW_CFA_advance_loc2:
+				value = *ptr.p8++;
+				value += *ptr.p8++ << 8;
+				unw_debug("\ncfa advance loc2:");
+				result = ptr.p8 <= end + 2
+				    /* && advance_loc(*ptr.p16++, state); */
+				    && advance_loc(value, state);
+				break;
+			case DW_CFA_advance_loc4:
+				unw_debug("\ncfa advance loc4:");
+				result = ptr.p8 <= end + 4
+				    && advance_loc(*ptr.p32++, state);
+				break;
+			case DW_CFA_offset_extended:
+				value = get_uleb128(&ptr.p8, end);
+				unw_debug("cfa_offset_extended: ");
+				set_rule(value, Memory,
+					 get_uleb128(&ptr.p8, end), state);
+				break;
+			case DW_CFA_val_offset:
+				value = get_uleb128(&ptr.p8, end);
+				set_rule(value, Value,
+					 get_uleb128(&ptr.p8, end), state);
+				break;
+			case DW_CFA_offset_extended_sf:
+				value = get_uleb128(&ptr.p8, end);
+				set_rule(value, Memory,
+					 get_sleb128(&ptr.p8, end), state);
+				break;
+			case DW_CFA_val_offset_sf:
+				value = get_uleb128(&ptr.p8, end);
+				set_rule(value, Value,
+					 get_sleb128(&ptr.p8, end), state);
+				break;
+			case DW_CFA_restore_extended:
+				unw_debug("cfa_restore_extended: ");
+			case DW_CFA_undefined:
+				unw_debug("cfa_undefined: ");
+			case DW_CFA_same_value:
+				unw_debug("cfa_same_value: ");
+				set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0,
+					 state);
+				break;
+			case DW_CFA_register:
+				unw_debug("cfa_register: ");
+				value = get_uleb128(&ptr.p8, end);
+				set_rule(value,
+					 Register,
+					 get_uleb128(&ptr.p8, end), state);
+				break;
+			case DW_CFA_remember_state:
+				unw_debug("cfa_remember_state: ");
+				if (ptr.p8 == state->label) {
+					state->label = NULL;
+					return 1;
+				}
+				if (state->stackDepth >= MAX_STACK_DEPTH)
+					return 0;
+				state->stack[state->stackDepth++] = ptr.p8;
+				break;
+			case DW_CFA_restore_state:
+				unw_debug("cfa_restore_state: ");
+				if (state->stackDepth) {
+					const uleb128_t loc = state->loc;
+					const u8 *label = state->label;
+
+					state->label =
+					    state->stack[state->stackDepth - 1];
+					memcpy(&state->cfa, &badCFA,
+					       sizeof(state->cfa));
+					memset(state->regs, 0,
+					       sizeof(state->regs));
+					state->stackDepth = 0;
+					result =
+					    processCFI(start, end, 0, ptrType,
+						       state);
+					state->loc = loc;
+					state->label = label;
+				} else
+					return 0;
+				break;
+			case DW_CFA_def_cfa:
+				state->cfa.reg = get_uleb128(&ptr.p8, end);
+				unw_debug("cfa_def_cfa: r%lu ", state->cfa.reg);
+				/*nobreak*/
+			case DW_CFA_def_cfa_offset:
+				state->cfa.offs = get_uleb128(&ptr.p8, end);
+				unw_debug("cfa_def_cfa_offset: 0x%lx ",
+					  state->cfa.offs);
+				break;
+			case DW_CFA_def_cfa_sf:
+				state->cfa.reg = get_uleb128(&ptr.p8, end);
+				/*nobreak */
+			case DW_CFA_def_cfa_offset_sf:
+				state->cfa.offs = get_sleb128(&ptr.p8, end)
+				    * state->dataAlign;
+				break;
+			case DW_CFA_def_cfa_register:
+				unw_debug("cfa_def_cfa_register: ");
+				state->cfa.reg = get_uleb128(&ptr.p8, end);
+				break;
+				/*todo case DW_CFA_def_cfa_expression: */
+				/*todo case DW_CFA_expression: */
+				/*todo case DW_CFA_val_expression: */
+			case DW_CFA_GNU_args_size:
+				get_uleb128(&ptr.p8, end);
+				break;
+			case DW_CFA_GNU_negative_offset_extended:
+				value = get_uleb128(&ptr.p8, end);
+				set_rule(value,
+					 Memory,
+					 (uleb128_t) 0 - get_uleb128(&ptr.p8,
+								     end),
+					 state);
+				break;
+			case DW_CFA_GNU_window_save:
+			default:
+				unw_debug("UNKNOWN OPCODE 0x%x\n", opcode);
+				result = 0;
+				break;
+			}
+			break;
+		case 1:
+			unw_debug("\ncfa_adv_loc: ");
+			result = advance_loc(*ptr.p8++ & 0x3f, state);
+			break;
+		case 2:
+			unw_debug("cfa_offset: ");
+			value = *ptr.p8++ & 0x3f;
+			set_rule(value, Memory, get_uleb128(&ptr.p8, end),
+				 state);
+			break;
+		case 3:
+			unw_debug("cfa_restore: ");
+			set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
+			break;
+		}
+
+		if (ptr.p8 > end)
+			result = 0;
+		if (result && targetLoc != 0 && targetLoc < state->loc)
+			return 1;
+	}
+
+	return result && ptr.p8 == end && (targetLoc == 0 || (
+		/*todo While in theory this should apply, gcc in practice omits
+		  everything past the function prolog, and hence the location
+		  never reaches the end of the function.
+		targetLoc < state->loc && */  state->label == NULL));
+}
+
+/* Unwind to previous to frame.  Returns 0 if successful, negative
+ * number in case of an error. */
+int arc_unwind(struct unwind_frame_info *frame)
+{
+#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
+	const u32 *fde = NULL, *cie = NULL;
+	const u8 *ptr = NULL, *end = NULL;
+	unsigned long pc = UNW_PC(frame) - frame->call_frame;
+	unsigned long startLoc = 0, endLoc = 0, cfa;
+	unsigned i;
+	signed ptrType = -1;
+	uleb128_t retAddrReg = 0;
+	const struct unwind_table *table;
+	struct unwind_state state;
+	unsigned long *fptr;
+	unsigned long addr;
+
+	unw_debug("\n\nUNWIND FRAME:\n");
+	unw_debug("PC: 0x%lx BLINK: 0x%lx, SP: 0x%lx, FP: 0x%x\n",
+		  UNW_PC(frame), UNW_BLINK(frame), UNW_SP(frame),
+		  UNW_FP(frame));
+
+	if (UNW_PC(frame) == 0)
+		return -EINVAL;
+
+#ifdef UNWIND_DEBUG
+	{
+		unsigned long *sptr = (unsigned long *)UNW_SP(frame);
+		unw_debug("\nStack Dump:\n");
+		for (i = 0; i < 20; i++, sptr++)
+			unw_debug("0x%p:  0x%lx\n", sptr, *sptr);
+		unw_debug("\n");
+	}
+#endif
+
+	table = find_table(pc);
+	if (table != NULL
+	    && !(table->size & (sizeof(*fde) - 1))) {
+		const u8 *hdr = table->header;
+		unsigned long tableSize;
+
+		smp_rmb();
+		if (hdr && hdr[0] == 1) {
+			switch (hdr[3] & DW_EH_PE_FORM) {
+			case DW_EH_PE_native:
+				tableSize = sizeof(unsigned long);
+				break;
+			case DW_EH_PE_data2:
+				tableSize = 2;
+				break;
+			case DW_EH_PE_data4:
+				tableSize = 4;
+				break;
+			case DW_EH_PE_data8:
+				tableSize = 8;
+				break;
+			default:
+				tableSize = 0;
+				break;
+			}
+			ptr = hdr + 4;
+			end = hdr + table->hdrsz;
+			if (tableSize && read_pointer(&ptr, end, hdr[1])
+			    == (unsigned long)table->address
+			    && (i = read_pointer(&ptr, end, hdr[2])) > 0
+			    && i == (end - ptr) / (2 * tableSize)
+			    && !((end - ptr) % (2 * tableSize))) {
+				do {
+					const u8 *cur =
+					    ptr + (i / 2) * (2 * tableSize);
+
+					startLoc = read_pointer(&cur,
+								cur + tableSize,
+								hdr[3]);
+					if (pc < startLoc)
+						i /= 2;
+					else {
+						ptr = cur - tableSize;
+						i = (i + 1) / 2;
+					}
+				} while (startLoc && i > 1);
+				if (i == 1
+				    && (startLoc = read_pointer(&ptr,
+								ptr + tableSize,
+								hdr[3])) != 0
+				    && pc >= startLoc)
+					fde = (void *)read_pointer(&ptr,
+								   ptr +
+								   tableSize,
+								   hdr[3]);
+			}
+		}
+
+		if (fde != NULL) {
+			cie = cie_for_fde(fde, table);
+			ptr = (const u8 *)(fde + 2);
+			if (cie != NULL
+			    && cie != &bad_cie
+			    && cie != &not_fde
+			    && (ptrType = fde_pointer_type(cie)) >= 0
+			    && read_pointer(&ptr,
+					    (const u8 *)(fde + 1) + *fde,
+					    ptrType) == startLoc) {
+				if (!(ptrType & DW_EH_PE_indirect))
+					ptrType &=
+					    DW_EH_PE_FORM | DW_EH_PE_signed;
+				endLoc =
+				    startLoc + read_pointer(&ptr,
+							    (const u8 *)(fde +
+									 1) +
+							    *fde, ptrType);
+				if (pc >= endLoc) {
+					fde = NULL;
+					cie = NULL;
+				}
+			} else {
+				fde = NULL;
+				cie = NULL;
+			}
+		}
+	}
+	if (cie != NULL) {
+		memset(&state, 0, sizeof(state));
+		state.cieEnd = ptr;	/* keep here temporarily */
+		ptr = (const u8 *)(cie + 2);
+		end = (const u8 *)(cie + 1) + *cie;
+		frame->call_frame = 1;
+		if (*++ptr) {
+			/* check if augmentation size is first (thus present) */
+			if (*ptr == 'z') {
+				while (++ptr < end && *ptr) {
+					switch (*ptr) {
+					/* chk for ignorable or already handled
+					 * nul-terminated augmentation string */
+					case 'L':
+					case 'P':
+					case 'R':
+						continue;
+					case 'S':
+						frame->call_frame = 0;
+						continue;
+					default:
+						break;
+					}
+					break;
+				}
+			}
+			if (ptr >= end || *ptr)
+				cie = NULL;
+		}
+		++ptr;
+	}
+	if (cie != NULL) {
+		/* get code alignment factor */
+		state.codeAlign = get_uleb128(&ptr, end);
+		/* get data alignment factor */
+		state.dataAlign = get_sleb128(&ptr, end);
+		if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
+			cie = NULL;
+		else {
+			retAddrReg =
+			    state.version <= 1 ? *ptr++ : get_uleb128(&ptr,
+								      end);
+			unw_debug("CIE Frame Info:\n");
+			unw_debug("return Address register 0x%lx\n",
+				  retAddrReg);
+			unw_debug("data Align: %ld\n", state.dataAlign);
+			unw_debug("code Align: %lu\n", state.codeAlign);
+			/* skip augmentation */
+			if (((const char *)(cie + 2))[1] == 'z') {
+				uleb128_t augSize = get_uleb128(&ptr, end);
+
+				ptr += augSize;
+			}
+			if (ptr > end || retAddrReg >= ARRAY_SIZE(reg_info)
+			    || REG_INVALID(retAddrReg)
+			    || reg_info[retAddrReg].width !=
+			    sizeof(unsigned long))
+				cie = NULL;
+		}
+	}
+	if (cie != NULL) {
+		state.cieStart = ptr;
+		ptr = state.cieEnd;
+		state.cieEnd = end;
+		end = (const u8 *)(fde + 1) + *fde;
+		/* skip augmentation */
+		if (((const char *)(cie + 2))[1] == 'z') {
+			uleb128_t augSize = get_uleb128(&ptr, end);
+
+			if ((ptr += augSize) > end)
+				fde = NULL;
+		}
+	}
+	if (cie == NULL || fde == NULL) {
+#ifdef CONFIG_FRAME_POINTER
+		unsigned long top, bottom;
+
+		top = STACK_TOP_UNW(frame->task);
+		bottom = STACK_BOTTOM_UNW(frame->task);
+#if FRAME_RETADDR_OFFSET < 0
+		if (UNW_SP(frame) < top && UNW_FP(frame) <= UNW_SP(frame)
+		    && bottom < UNW_FP(frame)
+#else
+		if (UNW_SP(frame) > top && UNW_FP(frame) >= UNW_SP(frame)
+		    && bottom > UNW_FP(frame)
+#endif
+		    && !((UNW_SP(frame) | UNW_FP(frame))
+			 & (sizeof(unsigned long) - 1))) {
+			unsigned long link;
+
+			if (!__get_user(link, (unsigned long *)
+					(UNW_FP(frame) + FRAME_LINK_OFFSET))
+#if FRAME_RETADDR_OFFSET < 0
+			    && link > bottom && link < UNW_FP(frame)
+#else
+			    && link > UNW_FP(frame) && link < bottom
+#endif
+			    && !(link & (sizeof(link) - 1))
+			    && !__get_user(UNW_PC(frame),
+					   (unsigned long *)(UNW_FP(frame)
+						+ FRAME_RETADDR_OFFSET)))
+			{
+				UNW_SP(frame) =
+				    UNW_FP(frame) + FRAME_RETADDR_OFFSET
+#if FRAME_RETADDR_OFFSET < 0
+				    -
+#else
+				    +
+#endif
+				    sizeof(UNW_PC(frame));
+				UNW_FP(frame) = link;
+				return 0;
+			}
+		}
+#endif
+		return -ENXIO;
+	}
+	state.org = startLoc;
+	memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
+
+	unw_debug("\nProcess instructions\n");
+
+	/* process instructions
+	 * For ARC, we optimize by having blink(retAddrReg) with
+	 * the sameValue in the leaf function, so we should not check
+	 * state.regs[retAddrReg].where == Nowhere
+	 */
+	if (!processCFI(ptr, end, pc, ptrType, &state)
+	    || state.loc > endLoc
+/*	   || state.regs[retAddrReg].where == Nowhere */
+	    || state.cfa.reg >= ARRAY_SIZE(reg_info)
+	    || reg_info[state.cfa.reg].width != sizeof(unsigned long)
+	    || state.cfa.offs % sizeof(unsigned long))
+		return -EIO;
+
+#ifdef UNWIND_DEBUG
+	unw_debug("\n");
+
+	unw_debug("\nRegister State Based on the rules parsed from FDE:\n");
+	for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
+
+		if (REG_INVALID(i))
+			continue;
+
+		switch (state.regs[i].where) {
+		case Nowhere:
+			break;
+		case Memory:
+			unw_debug(" r%d: c(%lu),", i, state.regs[i].value);
+			break;
+		case Register:
+			unw_debug(" r%d: r(%lu),", i, state.regs[i].value);
+			break;
+		case Value:
+			unw_debug(" r%d: v(%lu),", i, state.regs[i].value);
+			break;
+		}
+	}
+
+	unw_debug("\n");
+#endif
+
+	/* update frame */
+#ifndef CONFIG_AS_CFI_SIGNAL_FRAME
+	if (frame->call_frame
+	    && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign))
+		frame->call_frame = 0;
+#endif
+	cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
+	startLoc = min_t(unsigned long, UNW_SP(frame), cfa);
+	endLoc = max_t(unsigned long, UNW_SP(frame), cfa);
+	if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
+		startLoc = min(STACK_LIMIT(cfa), cfa);
+		endLoc = max(STACK_LIMIT(cfa), cfa);
+	}
+
+	unw_debug("\nCFA reg: 0x%lx, offset: 0x%lx =>  0x%lx\n",
+		  state.cfa.reg, state.cfa.offs, cfa);
+
+	for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
+		if (REG_INVALID(i)) {
+			if (state.regs[i].where == Nowhere)
+				continue;
+			return -EIO;
+		}
+		switch (state.regs[i].where) {
+		default:
+			break;
+		case Register:
+			if (state.regs[i].value >= ARRAY_SIZE(reg_info)
+			    || REG_INVALID(state.regs[i].value)
+			    || reg_info[i].width >
+			    reg_info[state.regs[i].value].width)
+				return -EIO;
+			switch (reg_info[state.regs[i].value].width) {
+			case sizeof(u8):
+				state.regs[i].value =
+				FRAME_REG(state.regs[i].value, const u8);
+				break;
+			case sizeof(u16):
+				state.regs[i].value =
+				FRAME_REG(state.regs[i].value, const u16);
+				break;
+			case sizeof(u32):
+				state.regs[i].value =
+				FRAME_REG(state.regs[i].value, const u32);
+				break;
+#ifdef CONFIG_64BIT
+			case sizeof(u64):
+				state.regs[i].value =
+				FRAME_REG(state.regs[i].value, const u64);
+				break;
+#endif
+			default:
+				return -EIO;
+			}
+			break;
+		}
+	}
+
+	unw_debug("\nRegister state after evaluation with realtime Stack:\n");
+	fptr = (unsigned long *)(&frame->regs);
+	for (i = 0; i < ARRAY_SIZE(state.regs); ++i, fptr++) {
+
+		if (REG_INVALID(i))
+			continue;
+		switch (state.regs[i].where) {
+		case Nowhere:
+			if (reg_info[i].width != sizeof(UNW_SP(frame))
+			    || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
+			    != &UNW_SP(frame))
+				continue;
+			UNW_SP(frame) = cfa;
+			break;
+		case Register:
+			switch (reg_info[i].width) {
+			case sizeof(u8):
+				FRAME_REG(i, u8) = state.regs[i].value;
+				break;
+			case sizeof(u16):
+				FRAME_REG(i, u16) = state.regs[i].value;
+				break;
+			case sizeof(u32):
+				FRAME_REG(i, u32) = state.regs[i].value;
+				break;
+#ifdef CONFIG_64BIT
+			case sizeof(u64):
+				FRAME_REG(i, u64) = state.regs[i].value;
+				break;
+#endif
+			default:
+				return -EIO;
+			}
+			break;
+		case Value:
+			if (reg_info[i].width != sizeof(unsigned long))
+				return -EIO;
+			FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
+			    * state.dataAlign;
+			break;
+		case Memory:
+			addr = cfa + state.regs[i].value * state.dataAlign;
+
+			if ((state.regs[i].value * state.dataAlign)
+			    % sizeof(unsigned long)
+			    || addr < startLoc
+			    || addr + sizeof(unsigned long) < addr
+			    || addr + sizeof(unsigned long) > endLoc)
+					return -EIO;
+
+			switch (reg_info[i].width) {
+			case sizeof(u8):
+				__get_user(FRAME_REG(i, u8),
+					   (u8 __user *)addr);
+				break;
+			case sizeof(u16):
+				__get_user(FRAME_REG(i, u16),
+					   (u16 __user *)addr);
+				break;
+			case sizeof(u32):
+				__get_user(FRAME_REG(i, u32),
+					   (u32 __user *)addr);
+				break;
+#ifdef CONFIG_64BIT
+			case sizeof(u64):
+				__get_user(FRAME_REG(i, u64),
+					   (u64 __user *)addr);
+				break;
+#endif
+			default:
+				return -EIO;
+			}
+
+			break;
+		}
+		unw_debug("r%d: 0x%lx ", i, *fptr);
+	}
+
+	return 0;
+#undef FRAME_REG
+}
+EXPORT_SYMBOL(arc_unwind);
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
new file mode 100644
index 000000000..4d823d3f6
--- /dev/null
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+#include <asm/thread_info.h>
+
+OUTPUT_ARCH(arc)
+ENTRY(res_service)
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+jiffies = jiffies_64 + 4;
+#else
+jiffies = jiffies_64;
+#endif
+
+SECTIONS
+{
+	/*
+	 * ICCM starts at 0x8000_0000. So if kernel is relocated to some other
+	 * address, make sure peripheral at 0x8z doesn't clash with ICCM
+	 * Essentially vector is also in ICCM.
+	 */
+
+	. = CONFIG_LINUX_LINK_BASE;
+
+	_int_vec_base_lds = .;
+	.vector : {
+		*(.vector)
+		. = ALIGN(PAGE_SIZE);
+	}
+
+#ifdef CONFIG_ARC_HAS_ICCM
+	.text.arcfp : {
+		*(.text.arcfp)
+		. = ALIGN(CONFIG_ARC_ICCM_SZ * 1024);
+	}
+#endif
+
+	/*
+	 * The reason for having a seperate subsection .init.ramfs is to
+	 * prevent objump from including it in kernel dumps
+	 *
+	 * Reason for having .init.ramfs above .init is to make sure that the
+	 * binary blob is tucked away to one side, reducing the displacement
+	 * between .init.text and .text, avoiding any possible relocation
+	 * errors because of calls from .init.text to .text
+	 * Yes such calls do exist. e.g.
+	 *	decompress_inflate.c:gunzip( ) -> zlib_inflate_workspace( )
+	 */
+
+	__init_begin = .;
+
+	.init.ramfs : { INIT_RAM_FS }
+
+	. = ALIGN(PAGE_SIZE);
+	_stext = .;
+
+	HEAD_TEXT_SECTION
+	INIT_TEXT_SECTION(L1_CACHE_BYTES)
+
+	/* INIT_DATA_SECTION open-coded: special INIT_RAM_FS handling */
+	.init.data : {
+		INIT_DATA
+		INIT_SETUP(L1_CACHE_BYTES)
+		INIT_CALLS
+		CON_INITCALL
+		SECURITY_INITCALL
+	}
+
+	.init.arch.info : {
+		__arch_info_begin = .;
+		*(.arch.info.init)
+		__arch_info_end = .;
+	}
+
+	PERCPU_SECTION(L1_CACHE_BYTES)
+
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
+
+	.text : {
+		_text = .;
+		TEXT_TEXT
+		SCHED_TEXT
+		CPUIDLE_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+	}
+	EXCEPTION_TABLE(L1_CACHE_BYTES)
+	_etext = .;
+
+	_sdata = .;
+	RO_DATA_SECTION(PAGE_SIZE)
+
+	/*
+	 * 1. this is .data essentially
+	 * 2. THREAD_SIZE for init.task, must be kernel-stk sz aligned
+	 */
+	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+
+	_edata = .;
+
+	BSS_SECTION(4, 4, 4)
+
+#ifdef CONFIG_ARC_DW2_UNWIND
+	. = ALIGN(PAGE_SIZE);
+	.eh_frame  : {
+		__start_unwind = .;
+		*(.eh_frame)
+		__end_unwind = .;
+	}
+#else
+	/DISCARD/ : {	*(.eh_frame) }
+#endif
+
+	NOTES
+
+	. = ALIGN(PAGE_SIZE);
+	_end = . ;
+
+	STABS_DEBUG
+	DISCARDS
+
+	.arcextmap 0 : {
+		*(.gnu.linkonce.arcextmap.*)
+		*(.arcextmap.*)
+	}
+
+#ifndef CONFIG_DEBUG_INFO
+	/DISCARD/ : { *(.debug_frame) }
+	/DISCARD/ : { *(.debug_aranges) }
+	/DISCARD/ : { *(.debug_pubnames) }
+	/DISCARD/ : { *(.debug_info) }
+	/DISCARD/ : { *(.debug_abbrev) }
+	/DISCARD/ : { *(.debug_line) }
+	/DISCARD/ : { *(.debug_str) }
+	/DISCARD/ : { *(.debug_loc) }
+	/DISCARD/ : { *(.debug_macinfo) }
+	/DISCARD/ : { *(.debug_ranges) }
+#endif
+
+#ifdef CONFIG_ARC_HAS_DCCM
+	. = CONFIG_ARC_DCCM_BASE;
+	__arc_dccm_base = .;
+	.data.arcfp : {
+		*(.data.arcfp)
+	}
+	. = ALIGN(CONFIG_ARC_DCCM_SZ * 1024);
+#endif
+}