diff options
Diffstat (limited to '')
101 files changed, 12045 insertions, 0 deletions
diff --git a/debian/patches/features/all/aufs4/aufs4-base.patch b/debian/patches/features/all/aufs4/aufs4-base.patch new file mode 100644 index 000000000..c0e036a73 --- /dev/null +++ b/debian/patches/features/all/aufs4/aufs4-base.patch @@ -0,0 +1,328 @@ +From: J. R. Okajima <hooanon05@yahoo.co.jp> +Date: Tue Sep 3 14:14:09 2019 +0900 +Subject: aufs4.19.63+ base patch +Origin: https://github.com/sfjro/aufs4-standalone/tree/6c582cc629cbfb4fac5bfc7d20db128c7d201da6 +Bug-Debian: https://bugs.debian.org/541828 + +Patch headers added by debian/bin/genpatch-aufs + +SPDX-License-Identifier: GPL-2.0 +aufs4.19.63+ base patch + +diff --git a/MAINTAINERS b/MAINTAINERS +index 11a59e82d92e..573d5b42b28b 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2605,6 +2605,19 @@ F: include/linux/audit.h + F: include/uapi/linux/audit.h + F: kernel/audit* + ++AUFS (advanced multi layered unification filesystem) FILESYSTEM ++M: "J. R. Okajima" <hooanon05g@gmail.com> ++L: aufs-users@lists.sourceforge.net (members only) ++L: linux-unionfs@vger.kernel.org ++W: http://aufs.sourceforge.net ++T: git://github.com/sfjro/aufs4-linux.git ++S: Supported ++F: Documentation/filesystems/aufs/ ++F: Documentation/ABI/testing/debugfs-aufs ++F: Documentation/ABI/testing/sysfs-aufs ++F: fs/aufs/ ++F: include/uapi/linux/aufs_type.h ++ + AUXILIARY DISPLAY DRIVERS + M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com> + S: Maintained +diff --git a/drivers/block/loop.c b/drivers/block/loop.c +index f1e63eb7cbca..b732df5f14f3 100644 +--- a/drivers/block/loop.c ++++ b/drivers/block/loop.c +@@ -739,6 +739,24 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, + return error; + } + ++/* ++ * for AUFS ++ * no get/put for file. ++ */ ++struct file *loop_backing_file(struct super_block *sb) ++{ ++ struct file *ret; ++ struct loop_device *l; ++ ++ ret = NULL; ++ if (MAJOR(sb->s_dev) == LOOP_MAJOR) { ++ l = sb->s_bdev->bd_disk->private_data; ++ ret = l->lo_backing_file; ++ } ++ return ret; ++} ++EXPORT_SYMBOL_GPL(loop_backing_file); ++ + /* loop sysfs attributes */ + + static ssize_t loop_attr_show(struct device *dev, char *page, +diff --git a/fs/dcache.c b/fs/dcache.c +index 6e0022326afe..3bd53094ac3d 100644 +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -1234,7 +1234,7 @@ enum d_walk_ret { + * + * The @enter() callbacks are called with d_lock held. + */ +-static void d_walk(struct dentry *parent, void *data, ++void d_walk(struct dentry *parent, void *data, + enum d_walk_ret (*enter)(void *, struct dentry *)) + { + struct dentry *this_parent; +diff --git a/fs/fcntl.c b/fs/fcntl.c +index 4137d96534a6..c91b3e3c4580 100644 +--- a/fs/fcntl.c ++++ b/fs/fcntl.c +@@ -32,7 +32,7 @@ + + #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) + +-static int setfl(int fd, struct file * filp, unsigned long arg) ++int setfl(int fd, struct file * filp, unsigned long arg) + { + struct inode * inode = file_inode(filp); + int error = 0; +@@ -63,6 +63,8 @@ static int setfl(int fd, struct file * filp, unsigned long arg) + + if (filp->f_op->check_flags) + error = filp->f_op->check_flags(arg); ++ if (!error && filp->f_op->setfl) ++ error = filp->f_op->setfl(filp, arg); + if (error) + return error; + +diff --git a/fs/inode.c b/fs/inode.c +index 5c63693326bb..43046d7223e4 100644 +--- a/fs/inode.c ++++ b/fs/inode.c +@@ -1657,7 +1657,7 @@ EXPORT_SYMBOL(generic_update_time); + * This does the actual work of updating an inodes time or version. Must have + * had called mnt_want_write() before calling this. + */ +-static int update_time(struct inode *inode, struct timespec64 *time, int flags) ++int update_time(struct inode *inode, struct timespec64 *time, int flags) + { + int (*update_time)(struct inode *, struct timespec64 *, int); + +diff --git a/fs/namespace.c b/fs/namespace.c +index 1fce41ba3535..fbd7edd49a2f 100644 +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -770,6 +770,12 @@ static inline int check_mnt(struct mount *mnt) + return mnt->mnt_ns == current->nsproxy->mnt_ns; + } + ++/* for aufs, CONFIG_AUFS_BR_FUSE */ ++int is_current_mnt_ns(struct vfsmount *mnt) ++{ ++ return check_mnt(real_mount(mnt)); ++} ++ + /* + * vfsmount lock must be held for write + */ +diff --git a/fs/read_write.c b/fs/read_write.c +index 85fd7a8ee29e..c1335b4f19c0 100644 +--- a/fs/read_write.c ++++ b/fs/read_write.c +@@ -489,6 +489,28 @@ ssize_t __vfs_write(struct file *file, const char __user *p, size_t count, + return -EINVAL; + } + ++vfs_readf_t vfs_readf(struct file *file) ++{ ++ const struct file_operations *fop = file->f_op; ++ ++ if (fop->read) ++ return fop->read; ++ if (fop->read_iter) ++ return new_sync_read; ++ return ERR_PTR(-ENOSYS); ++} ++ ++vfs_writef_t vfs_writef(struct file *file) ++{ ++ const struct file_operations *fop = file->f_op; ++ ++ if (fop->write) ++ return fop->write; ++ if (fop->write_iter) ++ return new_sync_write; ++ return ERR_PTR(-ENOSYS); ++} ++ + ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) + { + mm_segment_t old_fs; +diff --git a/fs/splice.c b/fs/splice.c +index 485e409ef841..b2c2d320565b 100644 +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -838,8 +838,8 @@ EXPORT_SYMBOL(generic_splice_sendpage); + /* + * Attempt to initiate a splice from pipe to file. + */ +-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, +- loff_t *ppos, size_t len, unsigned int flags) ++long do_splice_from(struct pipe_inode_info *pipe, struct file *out, ++ loff_t *ppos, size_t len, unsigned int flags) + { + ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, + loff_t *, size_t, unsigned int); +@@ -855,9 +855,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, + /* + * Attempt to initiate a splice from a file to a pipe. + */ +-static long do_splice_to(struct file *in, loff_t *ppos, +- struct pipe_inode_info *pipe, size_t len, +- unsigned int flags) ++long do_splice_to(struct file *in, loff_t *ppos, ++ struct pipe_inode_info *pipe, size_t len, ++ unsigned int flags) + { + ssize_t (*splice_read)(struct file *, loff_t *, + struct pipe_inode_info *, size_t, unsigned int); +diff --git a/fs/sync.c b/fs/sync.c +index b54e0541ad89..28607828e96f 100644 +--- a/fs/sync.c ++++ b/fs/sync.c +@@ -28,7 +28,7 @@ + * wait == 1 case since in that case write_inode() functions do + * sync_dirty_buffer() and thus effectively write one block at a time. + */ +-static int __sync_filesystem(struct super_block *sb, int wait) ++int __sync_filesystem(struct super_block *sb, int wait) + { + if (wait) + sync_inodes_sb(sb); +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 92420009b9bc..ecad33c40cae 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1295,6 +1295,7 @@ extern void fasync_free(struct fasync_struct *); + /* can be called from interrupts */ + extern void kill_fasync(struct fasync_struct **, int, int); + ++extern int setfl(int fd, struct file * filp, unsigned long arg); + extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); + extern int f_setown(struct file *filp, unsigned long arg, int force); + extern void f_delown(struct file *filp); +@@ -1759,6 +1760,7 @@ struct file_operations { + ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); + unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); + int (*check_flags)(int); ++ int (*setfl)(struct file *, unsigned long); + int (*flock) (struct file *, int, struct file_lock *); + ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); + ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); +@@ -1830,6 +1832,12 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, + struct iovec *fast_pointer, + struct iovec **ret_pointer); + ++typedef ssize_t (*vfs_readf_t)(struct file *, char __user *, size_t, loff_t *); ++typedef ssize_t (*vfs_writef_t)(struct file *, const char __user *, size_t, ++ loff_t *); ++vfs_readf_t vfs_readf(struct file *file); ++vfs_writef_t vfs_writef(struct file *file); ++ + extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *); + extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); + extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); +@@ -2255,6 +2263,7 @@ extern int current_umask(void); + extern void ihold(struct inode * inode); + extern void iput(struct inode *); + extern int generic_update_time(struct inode *, struct timespec64 *, int); ++extern int update_time(struct inode *, struct timespec64 *, int); + + /* /sys/fs */ + extern struct kobject *fs_kobj; +@@ -2542,6 +2551,7 @@ static inline bool sb_is_blkdev_sb(struct super_block *sb) + return false; + } + #endif ++extern int __sync_filesystem(struct super_block *, int); + extern int sync_filesystem(struct super_block *); + extern const struct file_operations def_blk_fops; + extern const struct file_operations def_chr_fops; +diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h +index b0d0b51c4d85..f73ffaa0199e 100644 +--- a/include/linux/lockdep.h ++++ b/include/linux/lockdep.h +@@ -313,6 +313,8 @@ static inline int lockdep_match_key(struct lockdep_map *lock, + return lock->key == key; + } + ++struct lock_class *lockdep_hlock_class(struct held_lock *hlock); ++ + /* + * Acquire a lock. + * +@@ -439,6 +441,7 @@ struct lockdep_map { }; + + #define lockdep_depth(tsk) (0) + ++#define lockdep_is_held(lock) (1) + #define lockdep_is_held_type(l, r) (1) + + #define lockdep_assert_held(l) do { (void)(l); } while (0) +diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h +index 35942084cd40..24f5fd1a789d 100644 +--- a/include/linux/mnt_namespace.h ++++ b/include/linux/mnt_namespace.h +@@ -6,11 +6,14 @@ + struct mnt_namespace; + struct fs_struct; + struct user_namespace; ++struct vfsmount; + + extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, + struct user_namespace *, struct fs_struct *); + extern void put_mnt_ns(struct mnt_namespace *ns); + ++extern int is_current_mnt_ns(struct vfsmount *mnt); ++ + extern const struct file_operations proc_mounts_operations; + extern const struct file_operations proc_mountinfo_operations; + extern const struct file_operations proc_mountstats_operations; +diff --git a/include/linux/splice.h b/include/linux/splice.h +index 74b4911ac16d..19789fbea567 100644 +--- a/include/linux/splice.h ++++ b/include/linux/splice.h +@@ -87,4 +87,10 @@ extern void splice_shrink_spd(struct splice_pipe_desc *); + + extern const struct pipe_buf_operations page_cache_pipe_buf_ops; + extern const struct pipe_buf_operations default_pipe_buf_ops; ++ ++extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out, ++ loff_t *ppos, size_t len, unsigned int flags); ++extern long do_splice_to(struct file *in, loff_t *ppos, ++ struct pipe_inode_info *pipe, size_t len, ++ unsigned int flags); + #endif +diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c +index e810e8cb17e1..d0c9931e6531 100644 +--- a/kernel/locking/lockdep.c ++++ b/kernel/locking/lockdep.c +@@ -140,7 +140,7 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; + unsigned long nr_lock_classes; + static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; + +-static inline struct lock_class *hlock_class(struct held_lock *hlock) ++inline struct lock_class *lockdep_hlock_class(struct held_lock *hlock) + { + if (!hlock->class_idx) { + /* +@@ -151,6 +151,7 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock) + } + return lock_classes + hlock->class_idx - 1; + } ++#define hlock_class(hlock) lockdep_hlock_class(hlock) + + #ifdef CONFIG_LOCK_STAT + static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], cpu_lock_stats); diff --git a/debian/patches/features/all/aufs4/aufs4-mmap.patch b/debian/patches/features/all/aufs4/aufs4-mmap.patch new file mode 100644 index 000000000..98fe007a8 --- /dev/null +++ b/debian/patches/features/all/aufs4/aufs4-mmap.patch @@ -0,0 +1,406 @@ +From: J. R. Okajima <hooanon05@yahoo.co.jp> +Date: Tue Sep 3 14:14:09 2019 +0900 +Subject: aufs4.19.63+ mmap patch +Origin: https://github.com/sfjro/aufs4-standalone/tree/6c582cc629cbfb4fac5bfc7d20db128c7d201da6 +Bug-Debian: https://bugs.debian.org/541828 + +Patch headers added by debian/bin/genpatch-aufs + +SPDX-License-Identifier: GPL-2.0 +aufs4.19.63+ mmap patch + +diff --git a/fs/proc/base.c b/fs/proc/base.c +index a7fbda72afeb..9c8439a01c5b 100644 +--- a/fs/proc/base.c ++++ b/fs/proc/base.c +@@ -2018,7 +2018,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) + rc = -ENOENT; + vma = find_exact_vma(mm, vm_start, vm_end); + if (vma && vma->vm_file) { +- *path = vma->vm_file->f_path; ++ *path = vma_pr_or_file(vma)->f_path; + path_get(path); + rc = 0; + } +diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c +index 3b63be64e436..fb9913bf3d10 100644 +--- a/fs/proc/nommu.c ++++ b/fs/proc/nommu.c +@@ -45,7 +45,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) + file = region->vm_file; + + if (file) { +- struct inode *inode = file_inode(region->vm_file); ++ struct inode *inode; ++ ++ file = vmr_pr_or_file(region); ++ inode = file_inode(file); + dev = inode->i_sb->s_dev; + ino = inode->i_ino; + } +diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c +index 71aba44c4fa6..87cdce66a3f4 100644 +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -309,7 +309,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) + const char *name = NULL; + + if (file) { +- struct inode *inode = file_inode(vma->vm_file); ++ struct inode *inode; ++ ++ file = vma_pr_or_file(vma); ++ inode = file_inode(file); + dev = inode->i_sb->s_dev; + ino = inode->i_ino; + pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; +@@ -1766,7 +1769,7 @@ static int show_numa_map(struct seq_file *m, void *v) + struct proc_maps_private *proc_priv = &numa_priv->proc_maps; + struct vm_area_struct *vma = v; + struct numa_maps *md = &numa_priv->md; +- struct file *file = vma->vm_file; ++ struct file *file = vma_pr_or_file(vma); + struct mm_struct *mm = vma->vm_mm; + struct mm_walk walk = { + .hugetlb_entry = gather_hugetlb_stats, +diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c +index 5161894a6d62..b6d13cce45f3 100644 +--- a/fs/proc/task_nommu.c ++++ b/fs/proc/task_nommu.c +@@ -155,7 +155,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) + file = vma->vm_file; + + if (file) { +- struct inode *inode = file_inode(vma->vm_file); ++ struct inode *inode; ++ ++ file = vma_pr_or_file(vma); ++ inode = file_inode(file); + dev = inode->i_sb->s_dev; + ino = inode->i_ino; + pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; +diff --git a/include/linux/mm.h b/include/linux/mm.h +index bdec425c8e14..88cb95dc57dd 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1453,6 +1453,28 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, + unmap_mapping_range(mapping, holebegin, holelen, 0); + } + ++extern void vma_do_file_update_time(struct vm_area_struct *, const char[], int); ++extern struct file *vma_do_pr_or_file(struct vm_area_struct *, const char[], ++ int); ++extern void vma_do_get_file(struct vm_area_struct *, const char[], int); ++extern void vma_do_fput(struct vm_area_struct *, const char[], int); ++ ++#define vma_file_update_time(vma) vma_do_file_update_time(vma, __func__, \ ++ __LINE__) ++#define vma_pr_or_file(vma) vma_do_pr_or_file(vma, __func__, \ ++ __LINE__) ++#define vma_get_file(vma) vma_do_get_file(vma, __func__, __LINE__) ++#define vma_fput(vma) vma_do_fput(vma, __func__, __LINE__) ++ ++#ifndef CONFIG_MMU ++extern struct file *vmr_do_pr_or_file(struct vm_region *, const char[], int); ++extern void vmr_do_fput(struct vm_region *, const char[], int); ++ ++#define vmr_pr_or_file(region) vmr_do_pr_or_file(region, __func__, \ ++ __LINE__) ++#define vmr_fput(region) vmr_do_fput(region, __func__, __LINE__) ++#endif /* !CONFIG_MMU */ ++ + extern int access_process_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags); + extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, +diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h +index 5ed8f6292a53..01229754077f 100644 +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -239,6 +239,7 @@ struct vm_region { + unsigned long vm_top; /* region allocated to here */ + unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ + struct file *vm_file; /* the backing file or NULL */ ++ struct file *vm_prfile; /* the virtual backing file or NULL */ + + int vm_usage; /* region usage count (access under nommu_region_sem) */ + bool vm_icache_flushed : 1; /* true if the icache has been flushed for +@@ -313,6 +314,7 @@ struct vm_area_struct { + unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE + units */ + struct file * vm_file; /* File we map to (can be NULL). */ ++ struct file *vm_prfile; /* shadow of vm_file */ + void * vm_private_data; /* was vm_pte (shared mem) */ + + atomic_long_t swap_readahead_info; +diff --git a/kernel/fork.c b/kernel/fork.c +index 69874db3fba8..757620e64e7b 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -505,7 +505,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, + struct inode *inode = file_inode(file); + struct address_space *mapping = file->f_mapping; + +- get_file(file); ++ vma_get_file(tmp); + if (tmp->vm_flags & VM_DENYWRITE) + atomic_dec(&inode->i_writecount); + i_mmap_lock_write(mapping); +diff --git a/mm/Makefile b/mm/Makefile +index 26ef77a3883b..b2869af1ef08 100644 +--- a/mm/Makefile ++++ b/mm/Makefile +@@ -39,7 +39,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ + mm_init.o mmu_context.o percpu.o slab_common.o \ + compaction.o vmacache.o \ + interval_tree.o list_lru.o workingset.o \ +- debug.o $(mmu-y) ++ prfile.o debug.o $(mmu-y) + + obj-y += init-mm.o + +diff --git a/mm/filemap.c b/mm/filemap.c +index 287f3fa02e5e..f96b6dd07b0b 100644 +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -2722,7 +2722,7 @@ vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) + vm_fault_t ret = VM_FAULT_LOCKED; + + sb_start_pagefault(inode->i_sb); +- file_update_time(vmf->vma->vm_file); ++ vma_file_update_time(vmf->vma); + lock_page(page); + if (page->mapping != inode->i_mapping) { + unlock_page(page); +diff --git a/mm/mmap.c b/mm/mmap.c +index 1480880ff814..03ae15dfe614 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -181,7 +181,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) + if (vma->vm_ops && vma->vm_ops->close) + vma->vm_ops->close(vma); + if (vma->vm_file) +- fput(vma->vm_file); ++ vma_fput(vma); + mpol_put(vma_policy(vma)); + vm_area_free(vma); + return next; +@@ -906,7 +906,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, + if (remove_next) { + if (file) { + uprobe_munmap(next, next->vm_start, next->vm_end); +- fput(file); ++ vma_fput(vma); + } + if (next->anon_vma) + anon_vma_merge(vma, next); +@@ -1822,8 +1822,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr, + return addr; + + unmap_and_free_vma: ++ vma_fput(vma); + vma->vm_file = NULL; +- fput(file); + + /* Undo any partial mapping done by a device driver. */ + unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); +@@ -2645,7 +2645,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, + goto out_free_mpol; + + if (new->vm_file) +- get_file(new->vm_file); ++ vma_get_file(new); + + if (new->vm_ops && new->vm_ops->open) + new->vm_ops->open(new); +@@ -2664,7 +2664,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, + if (new->vm_ops && new->vm_ops->close) + new->vm_ops->close(new); + if (new->vm_file) +- fput(new->vm_file); ++ vma_fput(new); + unlink_anon_vmas(new); + out_free_mpol: + mpol_put(vma_policy(new)); +@@ -2826,7 +2826,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, + struct vm_area_struct *vma; + unsigned long populate = 0; + unsigned long ret = -EINVAL; +- struct file *file; ++ struct file *file, *prfile; + + pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n", + current->comm, current->pid); +@@ -2901,10 +2901,27 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, + } + } + +- file = get_file(vma->vm_file); ++ vma_get_file(vma); ++ file = vma->vm_file; ++ prfile = vma->vm_prfile; + ret = do_mmap_pgoff(vma->vm_file, start, size, + prot, flags, pgoff, &populate, NULL); ++ if (!IS_ERR_VALUE(ret) && file && prfile) { ++ struct vm_area_struct *new_vma; ++ ++ new_vma = find_vma(mm, ret); ++ if (!new_vma->vm_prfile) ++ new_vma->vm_prfile = prfile; ++ if (new_vma != vma) ++ get_file(prfile); ++ } ++ /* ++ * two fput()s instead of vma_fput(vma), ++ * coz vma may not be available anymore. ++ */ + fput(file); ++ if (prfile) ++ fput(prfile); + out: + up_write(&mm->mmap_sem); + if (populate) +@@ -3210,7 +3227,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, + if (anon_vma_clone(new_vma, vma)) + goto out_free_mempol; + if (new_vma->vm_file) +- get_file(new_vma->vm_file); ++ vma_get_file(new_vma); + if (new_vma->vm_ops && new_vma->vm_ops->open) + new_vma->vm_ops->open(new_vma); + vma_link(mm, new_vma, prev, rb_link, rb_parent); +diff --git a/mm/nommu.c b/mm/nommu.c +index 1d63ecfc98c5..15eafddeb944 100644 +--- a/mm/nommu.c ++++ b/mm/nommu.c +@@ -625,7 +625,7 @@ static void __put_nommu_region(struct vm_region *region) + up_write(&nommu_region_sem); + + if (region->vm_file) +- fput(region->vm_file); ++ vmr_fput(region); + + /* IO memory and memory shared directly out of the pagecache + * from ramfs/tmpfs mustn't be released here */ +@@ -763,7 +763,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) + if (vma->vm_ops && vma->vm_ops->close) + vma->vm_ops->close(vma); + if (vma->vm_file) +- fput(vma->vm_file); ++ vma_fput(vma); + put_nommu_region(vma->vm_region); + vm_area_free(vma); + } +@@ -1286,7 +1286,7 @@ unsigned long do_mmap(struct file *file, + goto error_just_free; + } + } +- fput(region->vm_file); ++ vmr_fput(region); + kmem_cache_free(vm_region_jar, region); + region = pregion; + result = start; +@@ -1361,10 +1361,10 @@ unsigned long do_mmap(struct file *file, + up_write(&nommu_region_sem); + error: + if (region->vm_file) +- fput(region->vm_file); ++ vmr_fput(region); + kmem_cache_free(vm_region_jar, region); + if (vma->vm_file) +- fput(vma->vm_file); ++ vma_fput(vma); + vm_area_free(vma); + return ret; + +diff --git a/mm/prfile.c b/mm/prfile.c +new file mode 100644 +index 000000000000..024cdcfae1b1 +--- /dev/null ++++ b/mm/prfile.c +@@ -0,0 +1,86 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Mainly for aufs which mmap(2) different file and wants to print different ++ * path in /proc/PID/maps. ++ * Call these functions via macros defined in linux/mm.h. ++ * ++ * See Documentation/filesystems/aufs/design/06mmap.txt ++ * ++ * Copyright (c) 2014-2019 Junjro R. Okajima ++ * Copyright (c) 2014 Ian Campbell ++ */ ++ ++#include <linux/mm.h> ++#include <linux/file.h> ++#include <linux/fs.h> ++ ++/* #define PRFILE_TRACE */ ++static inline void prfile_trace(struct file *f, struct file *pr, ++ const char func[], int line, const char func2[]) ++{ ++#ifdef PRFILE_TRACE ++ if (pr) ++ pr_info("%s:%d: %s, %pD2\n", func, line, func2, f); ++#endif ++} ++ ++void vma_do_file_update_time(struct vm_area_struct *vma, const char func[], ++ int line) ++{ ++ struct file *f = vma->vm_file, *pr = vma->vm_prfile; ++ ++ prfile_trace(f, pr, func, line, __func__); ++ file_update_time(f); ++ if (f && pr) ++ file_update_time(pr); ++} ++ ++struct file *vma_do_pr_or_file(struct vm_area_struct *vma, const char func[], ++ int line) ++{ ++ struct file *f = vma->vm_file, *pr = vma->vm_prfile; ++ ++ prfile_trace(f, pr, func, line, __func__); ++ return (f && pr) ? pr : f; ++} ++ ++void vma_do_get_file(struct vm_area_struct *vma, const char func[], int line) ++{ ++ struct file *f = vma->vm_file, *pr = vma->vm_prfile; ++ ++ prfile_trace(f, pr, func, line, __func__); ++ get_file(f); ++ if (f && pr) ++ get_file(pr); ++} ++ ++void vma_do_fput(struct vm_area_struct *vma, const char func[], int line) ++{ ++ struct file *f = vma->vm_file, *pr = vma->vm_prfile; ++ ++ prfile_trace(f, pr, func, line, __func__); ++ fput(f); ++ if (f && pr) ++ fput(pr); ++} ++ ++#ifndef CONFIG_MMU ++struct file *vmr_do_pr_or_file(struct vm_region *region, const char func[], ++ int line) ++{ ++ struct file *f = region->vm_file, *pr = region->vm_prfile; ++ ++ prfile_trace(f, pr, func, line, __func__); ++ return (f && pr) ? pr : f; ++} ++ ++void vmr_do_fput(struct vm_region *region, const char func[], int line) ++{ ++ struct file *f = region->vm_file, *pr = region->vm_prfile; ++ ++ prfile_trace(f, pr, func, line, __func__); ++ fput(f); ++ if (f && pr) ++ fput(pr); ++} ++#endif /* !CONFIG_MMU */ diff --git a/debian/patches/features/all/aufs4/aufs4-standalone.patch b/debian/patches/features/all/aufs4/aufs4-standalone.patch new file mode 100644 index 000000000..0a682c2f5 --- /dev/null +++ b/debian/patches/features/all/aufs4/aufs4-standalone.patch @@ -0,0 +1,385 @@ +From: J. R. Okajima <hooanon05@yahoo.co.jp> +Date: Tue Sep 3 14:14:09 2019 +0900 +Subject: aufs4.19.63+ standalone patch +Origin: https://github.com/sfjro/aufs4-standalone/tree/6c582cc629cbfb4fac5bfc7d20db128c7d201da6 +Bug-Debian: https://bugs.debian.org/541828 + +Patch headers added by debian/bin/genpatch-aufs + +SPDX-License-Identifier: GPL-2.0 +aufs4.19.63+ standalone patch + +diff --git a/fs/dcache.c b/fs/dcache.c +index 3bd53094ac3d..d6b2f7a994f4 100644 +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -1339,6 +1339,7 @@ void d_walk(struct dentry *parent, void *data, + seq = 1; + goto again; + } ++EXPORT_SYMBOL_GPL(d_walk); + + struct check_mount { + struct vfsmount *mnt; +@@ -2835,6 +2836,7 @@ void d_exchange(struct dentry *dentry1, struct dentry *dentry2) + + write_sequnlock(&rename_lock); + } ++EXPORT_SYMBOL_GPL(d_exchange); + + /** + * d_ancestor - search for an ancestor +diff --git a/fs/exec.c b/fs/exec.c +index 433b1257694a..504c56308700 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -109,6 +109,7 @@ bool path_noexec(const struct path *path) + return (path->mnt->mnt_flags & MNT_NOEXEC) || + (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC); + } ++EXPORT_SYMBOL_GPL(path_noexec); + + #ifdef CONFIG_USELIB + /* +diff --git a/fs/fcntl.c b/fs/fcntl.c +index c91b3e3c4580..77513097f04c 100644 +--- a/fs/fcntl.c ++++ b/fs/fcntl.c +@@ -85,6 +85,7 @@ int setfl(int fd, struct file * filp, unsigned long arg) + out: + return error; + } ++EXPORT_SYMBOL_GPL(setfl); + + static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, + int force) +diff --git a/fs/file_table.c b/fs/file_table.c +index e49af4caf15d..569020fd1fb3 100644 +--- a/fs/file_table.c ++++ b/fs/file_table.c +@@ -161,6 +161,7 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) + } + return ERR_PTR(-ENFILE); + } ++EXPORT_SYMBOL_GPL(alloc_empty_file); + + /* + * Variant of alloc_empty_file() that doesn't check and modify nr_files. +@@ -323,6 +324,7 @@ void flush_delayed_fput(void) + { + delayed_fput(NULL); + } ++EXPORT_SYMBOL_GPL(flush_delayed_fput); + + static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); + +@@ -365,6 +367,7 @@ void __fput_sync(struct file *file) + } + + EXPORT_SYMBOL(fput); ++EXPORT_SYMBOL_GPL(__fput_sync); + + void __init files_init(void) + { +diff --git a/fs/inode.c b/fs/inode.c +index 43046d7223e4..36146c757aa2 100644 +--- a/fs/inode.c ++++ b/fs/inode.c +@@ -1666,6 +1666,7 @@ int update_time(struct inode *inode, struct timespec64 *time, int flags) + + return update_time(inode, time, flags); + } ++EXPORT_SYMBOL_GPL(update_time); + + /** + * touch_atime - update the access time +diff --git a/fs/namespace.c b/fs/namespace.c +index fbd7edd49a2f..d6eca814d9fc 100644 +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -437,6 +437,7 @@ void __mnt_drop_write(struct vfsmount *mnt) + mnt_dec_writers(real_mount(mnt)); + preempt_enable(); + } ++EXPORT_SYMBOL_GPL(__mnt_drop_write); + + /** + * mnt_drop_write - give up write access to a mount +@@ -775,6 +776,7 @@ int is_current_mnt_ns(struct vfsmount *mnt) + { + return check_mnt(real_mount(mnt)); + } ++EXPORT_SYMBOL_GPL(is_current_mnt_ns); + + /* + * vfsmount lock must be held for write +@@ -1844,6 +1846,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, + } + return 0; + } ++EXPORT_SYMBOL_GPL(iterate_mounts); + + static void cleanup_group_ids(struct mount *mnt, struct mount *end) + { +diff --git a/fs/notify/group.c b/fs/notify/group.c +index c03b83662876..94d210ca384a 100644 +--- a/fs/notify/group.c ++++ b/fs/notify/group.c +@@ -112,6 +112,7 @@ void fsnotify_get_group(struct fsnotify_group *group) + { + refcount_inc(&group->refcnt); + } ++EXPORT_SYMBOL_GPL(fsnotify_get_group); + + /* + * Drop a reference to a group. Free it if it's through. +@@ -121,6 +122,7 @@ void fsnotify_put_group(struct fsnotify_group *group) + if (refcount_dec_and_test(&group->refcnt)) + fsnotify_final_destroy_group(group); + } ++EXPORT_SYMBOL_GPL(fsnotify_put_group); + + /* + * Create a new fsnotify_group and hold a reference for the group returned. +@@ -150,6 +152,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) + + return group; + } ++EXPORT_SYMBOL_GPL(fsnotify_alloc_group); + + int fsnotify_fasync(int fd, struct file *file, int on) + { +diff --git a/fs/notify/mark.c b/fs/notify/mark.c +index 09535f6423fc..e9401ec71fc7 100644 +--- a/fs/notify/mark.c ++++ b/fs/notify/mark.c +@@ -285,6 +285,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) + queue_delayed_work(system_unbound_wq, &reaper_work, + FSNOTIFY_REAPER_DELAY); + } ++EXPORT_SYMBOL_GPL(fsnotify_put_mark); + + /* + * Get mark reference when we found the mark via lockless traversal of object +@@ -439,6 +440,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark, + mutex_unlock(&group->mark_mutex); + fsnotify_free_mark(mark); + } ++EXPORT_SYMBOL_GPL(fsnotify_destroy_mark); + + /* + * Sorting function for lists of fsnotify marks. +@@ -654,6 +656,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, + mutex_unlock(&group->mark_mutex); + return ret; + } ++EXPORT_SYMBOL_GPL(fsnotify_add_mark); + + /* + * Given a list of marks, find the mark associated with given group. If found +@@ -777,6 +780,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, + fsnotify_get_group(group); + mark->group = group; + } ++EXPORT_SYMBOL_GPL(fsnotify_init_mark); + + /* + * Destroy all marks in destroy_list, waits for SRCU period to finish before +diff --git a/fs/open.c b/fs/open.c +index 878478745924..ab755f585f29 100644 +--- a/fs/open.c ++++ b/fs/open.c +@@ -64,6 +64,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, + inode_unlock(dentry->d_inode); + return ret; + } ++EXPORT_SYMBOL_GPL(do_truncate); + + long vfs_truncate(const struct path *path, loff_t length) + { +diff --git a/fs/read_write.c b/fs/read_write.c +index c1335b4f19c0..6ed1f2ddcb03 100644 +--- a/fs/read_write.c ++++ b/fs/read_write.c +@@ -459,6 +459,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) + + return ret; + } ++EXPORT_SYMBOL_GPL(vfs_read); + + static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) + { +@@ -499,6 +500,7 @@ vfs_readf_t vfs_readf(struct file *file) + return new_sync_read; + return ERR_PTR(-ENOSYS); + } ++EXPORT_SYMBOL_GPL(vfs_readf); + + vfs_writef_t vfs_writef(struct file *file) + { +@@ -510,6 +512,7 @@ vfs_writef_t vfs_writef(struct file *file) + return new_sync_write; + return ERR_PTR(-ENOSYS); + } ++EXPORT_SYMBOL_GPL(vfs_writef); + + ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) + { +@@ -579,6 +582,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ + + return ret; + } ++EXPORT_SYMBOL_GPL(vfs_write); + + static inline loff_t file_pos_read(struct file *file) + { +diff --git a/fs/splice.c b/fs/splice.c +index b2c2d320565b..8250f2366329 100644 +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -851,6 +851,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out, + + return splice_write(pipe, out, ppos, len, flags); + } ++EXPORT_SYMBOL_GPL(do_splice_from); + + /* + * Attempt to initiate a splice from a file to a pipe. +@@ -880,6 +881,7 @@ long do_splice_to(struct file *in, loff_t *ppos, + + return splice_read(in, ppos, pipe, len, flags); + } ++EXPORT_SYMBOL_GPL(do_splice_to); + + /** + * splice_direct_to_actor - splices data directly between two non-pipes +diff --git a/fs/sync.c b/fs/sync.c +index 28607828e96f..ffd7ea43831e 100644 +--- a/fs/sync.c ++++ b/fs/sync.c +@@ -39,6 +39,7 @@ int __sync_filesystem(struct super_block *sb, int wait) + sb->s_op->sync_fs(sb, wait); + return __sync_blockdev(sb->s_bdev, wait); + } ++EXPORT_SYMBOL_GPL(__sync_filesystem); + + /* + * Write out and wait upon all dirty data associated with this +diff --git a/fs/xattr.c b/fs/xattr.c +index 0d6a6a4af861..7ce4701b7289 100644 +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -295,6 +295,7 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, + *xattr_value = value; + return error; + } ++EXPORT_SYMBOL_GPL(vfs_getxattr_alloc); + + ssize_t + __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name, +diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c +index d0c9931e6531..0e5d9706723c 100644 +--- a/kernel/locking/lockdep.c ++++ b/kernel/locking/lockdep.c +@@ -151,6 +151,7 @@ inline struct lock_class *lockdep_hlock_class(struct held_lock *hlock) + } + return lock_classes + hlock->class_idx - 1; + } ++EXPORT_SYMBOL_GPL(lockdep_hlock_class); + #define hlock_class(hlock) lockdep_hlock_class(hlock) + + #ifdef CONFIG_LOCK_STAT +diff --git a/kernel/task_work.c b/kernel/task_work.c +index 0fef395662a6..83fb1ecfc33d 100644 +--- a/kernel/task_work.c ++++ b/kernel/task_work.c +@@ -116,3 +116,4 @@ void task_work_run(void) + } while (work); + } + } ++EXPORT_SYMBOL_GPL(task_work_run); +diff --git a/security/device_cgroup.c b/security/device_cgroup.c +index dc28914fa72e..c2ddfea2b280 100644 +--- a/security/device_cgroup.c ++++ b/security/device_cgroup.c +@@ -824,3 +824,4 @@ int __devcgroup_check_permission(short type, u32 major, u32 minor, + + return 0; + } ++EXPORT_SYMBOL_GPL(__devcgroup_check_permission); +diff --git a/security/security.c b/security/security.c +index 5ce2448f3a45..3997af3462bc 100644 +--- a/security/security.c ++++ b/security/security.c +@@ -542,6 +542,7 @@ int security_path_rmdir(const struct path *dir, struct dentry *dentry) + return 0; + return call_int_hook(path_rmdir, 0, dir, dentry); + } ++EXPORT_SYMBOL_GPL(security_path_rmdir); + + int security_path_unlink(const struct path *dir, struct dentry *dentry) + { +@@ -558,6 +559,7 @@ int security_path_symlink(const struct path *dir, struct dentry *dentry, + return 0; + return call_int_hook(path_symlink, 0, dir, dentry, old_name); + } ++EXPORT_SYMBOL_GPL(security_path_symlink); + + int security_path_link(struct dentry *old_dentry, const struct path *new_dir, + struct dentry *new_dentry) +@@ -566,6 +568,7 @@ int security_path_link(struct dentry *old_dentry, const struct path *new_dir, + return 0; + return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry); + } ++EXPORT_SYMBOL_GPL(security_path_link); + + int security_path_rename(const struct path *old_dir, struct dentry *old_dentry, + const struct path *new_dir, struct dentry *new_dentry, +@@ -593,6 +596,7 @@ int security_path_truncate(const struct path *path) + return 0; + return call_int_hook(path_truncate, 0, path); + } ++EXPORT_SYMBOL_GPL(security_path_truncate); + + int security_path_chmod(const struct path *path, umode_t mode) + { +@@ -600,6 +604,7 @@ int security_path_chmod(const struct path *path, umode_t mode) + return 0; + return call_int_hook(path_chmod, 0, path, mode); + } ++EXPORT_SYMBOL_GPL(security_path_chmod); + + int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid) + { +@@ -607,6 +612,7 @@ int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid) + return 0; + return call_int_hook(path_chown, 0, path, uid, gid); + } ++EXPORT_SYMBOL_GPL(security_path_chown); + + int security_path_chroot(const struct path *path) + { +@@ -707,6 +713,7 @@ int security_inode_permission(struct inode *inode, int mask) + return 0; + return call_int_hook(inode_permission, 0, inode, mask); + } ++EXPORT_SYMBOL_GPL(security_inode_permission); + + int security_inode_setattr(struct dentry *dentry, struct iattr *attr) + { +@@ -878,6 +885,7 @@ int security_file_permission(struct file *file, int mask) + + return fsnotify_perm(file, mask); + } ++EXPORT_SYMBOL_GPL(security_file_permission); + + int security_file_alloc(struct file *file) + { +@@ -937,6 +945,7 @@ int security_mmap_file(struct file *file, unsigned long prot, + return ret; + return ima_file_mmap(file, prot); + } ++EXPORT_SYMBOL_GPL(security_mmap_file); + + int security_mmap_addr(unsigned long addr) + { diff --git a/debian/patches/features/all/db-mok-keyring/0001-KEYS-Allow-unrestricted-boot-time-addition-of-keys-t.patch b/debian/patches/features/all/db-mok-keyring/0001-KEYS-Allow-unrestricted-boot-time-addition-of-keys-t.patch new file mode 100644 index 000000000..a67217c02 --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/0001-KEYS-Allow-unrestricted-boot-time-addition-of-keys-t.patch @@ -0,0 +1,91 @@ +From: David Howells <dhowells@redhat.com> +Date: Fri, 5 May 2017 08:21:56 +0100 +Subject: [PATCH 1/7] KEYS: Allow unrestricted boot-time addition of keys to + secondary keyring +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/jforbes/linux.git/commit/?id=40db8fc497d010ae6cee6297c3882d3dc3d76d48 + +Allow keys to be added to the system secondary certificates keyring during +kernel initialisation in an unrestricted fashion. Such keys are implicitly +trusted and don't have their trust chains checked on link. + +This allows keys in the UEFI database to be added in secure boot mode for +the purposes of module signing. + +Signed-off-by: David Howells <dhowells@redhat.com> +--- + certs/internal.h | 18 ++++++++++++++++++ + certs/system_keyring.c | 33 +++++++++++++++++++++++++++++++++ + 2 files changed, 51 insertions(+) + create mode 100644 certs/internal.h + +Index: linux/certs/internal.h +=================================================================== +--- /dev/null ++++ linux/certs/internal.h +@@ -0,0 +1,18 @@ ++/* Internal definitions ++ * ++ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. ++ * Written by David Howells (dhowells@redhat.com) ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public Licence ++ * as published by the Free Software Foundation; either version ++ * 2 of the Licence, or (at your option) any later version. ++ */ ++ ++/* ++ * system_keyring.c ++ */ ++#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING ++extern void __init add_trusted_secondary_key(const char *source, ++ const void *data, size_t len); ++#endif +Index: linux/certs/system_keyring.c +=================================================================== +--- linux.orig/certs/system_keyring.c ++++ linux/certs/system_keyring.c +@@ -19,6 +19,7 @@ + #include <keys/asymmetric-type.h> + #include <keys/system_keyring.h> + #include <crypto/pkcs7.h> ++#include "internal.h" + + static struct key *builtin_trusted_keys; + #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING +@@ -266,3 +267,35 @@ error: + EXPORT_SYMBOL_GPL(verify_pkcs7_signature); + + #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */ ++ ++#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING ++/** ++ * add_trusted_secondary_key - Add to secondary keyring with no validation ++ * @source: Source of key ++ * @data: The blob holding the key ++ * @len: The length of the data blob ++ * ++ * Add a key to the secondary keyring without checking its trust chain. This ++ * is available only during kernel initialisation. ++ */ ++void __init add_trusted_secondary_key(const char *source, ++ const void *data, size_t len) ++{ ++ key_ref_t key; ++ ++ key = key_create_or_update(make_key_ref(secondary_trusted_keys, 1), ++ "asymmetric", ++ NULL, data, len, ++ (KEY_POS_ALL & ~KEY_POS_SETATTR) | ++ KEY_USR_VIEW, ++ KEY_ALLOC_NOT_IN_QUOTA | ++ KEY_ALLOC_BYPASS_RESTRICTION); ++ ++ if (IS_ERR(key)) ++ pr_err("Problem loading %s X.509 certificate (%ld)\n", ++ source, PTR_ERR(key)); ++ else ++ pr_notice("Loaded %s cert '%s' linked to secondary sys keyring\n", ++ source, key_ref_to_ptr(key)->description); ++} ++#endif /* CONFIG_SECONDARY_TRUSTED_KEYRING */ diff --git a/debian/patches/features/all/db-mok-keyring/0001-MODSIGN-do-not-load-mok-when-secure-boot-disabled.patch b/debian/patches/features/all/db-mok-keyring/0001-MODSIGN-do-not-load-mok-when-secure-boot-disabled.patch new file mode 100644 index 000000000..d36028413 --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/0001-MODSIGN-do-not-load-mok-when-secure-boot-disabled.patch @@ -0,0 +1,64 @@ +From: "Lee, Chun-Yi" <joeyli.kernel@gmail.com> +Date: Tue, 13 Mar 2018 18:37:59 +0800 +Subject: [PATCH 1/5] MODSIGN: do not load mok when secure boot disabled +Origin: https://lore.kernel.org/patchwork/patch/933173/ + +The mok can not be trusted when the secure boot is disabled. Which +means that the kernel embedded certificate is the only trusted key. + +Due to db/dbx are authenticated variables, they needs manufacturer's +KEK for update. So db/dbx are secure when secureboot disabled. + +Cc: David Howells <dhowells@redhat.com> +Cc: Josh Boyer <jwboyer@fedoraproject.org> +Cc: James Bottomley <James.Bottomley@HansenPartnership.com> +Signed-off-by: "Lee, Chun-Yi" <jlee@suse.com> +[Rebased by Luca Boccassi] +--- + certs/load_uefi.c | 26 +++++++++++++++----------- + 1 file changed, 15 insertions(+), 11 deletions(-) + +Index: linux/certs/load_uefi.c +=================================================================== +--- linux.orig/certs/load_uefi.c ++++ linux/certs/load_uefi.c +@@ -171,17 +171,6 @@ static int __init load_uefi_certs(void) + } + } + +- rc = get_cert_list(L"MokListRT", &mok_var, &moksize, &mok); +- if (rc < 0) { +- pr_info("MODSIGN: Couldn't get UEFI MokListRT\n"); +- } else if (moksize != 0) { +- rc = parse_efi_signature_list("UEFI:MokListRT", +- mok, moksize, get_handler_for_db); +- if (rc) +- pr_err("Couldn't parse MokListRT signatures: %d\n", rc); +- kfree(mok); +- } +- + rc = get_cert_list(L"dbx", &secure_var, &dbxsize, &dbx); + if (rc < 0) { + pr_info("MODSIGN: Couldn't get UEFI dbx list\n"); +@@ -194,6 +183,21 @@ static int __init load_uefi_certs(void) + kfree(dbx); + } + ++ /* the MOK can not be trusted when secure boot is disabled */ ++ if (!efi_enabled(EFI_SECURE_BOOT)) ++ return 0; ++ ++ rc = get_cert_list(L"MokListRT", &mok_var, &moksize, &mok); ++ if (rc < 0) { ++ pr_info("MODSIGN: Couldn't get UEFI MokListRT\n"); ++ } else if (moksize != 0) { ++ rc = parse_efi_signature_list("UEFI:MokListRT", ++ mok, moksize, get_handler_for_db); ++ if (rc) ++ pr_err("Couldn't parse MokListRT signatures: %d\n", rc); ++ kfree(mok); ++ } ++ + return rc; + } + late_initcall(load_uefi_certs); diff --git a/debian/patches/features/all/db-mok-keyring/0002-MODSIGN-load-blacklist-from-MOKx.patch b/debian/patches/features/all/db-mok-keyring/0002-MODSIGN-load-blacklist-from-MOKx.patch new file mode 100644 index 000000000..d23b7ac98 --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/0002-MODSIGN-load-blacklist-from-MOKx.patch @@ -0,0 +1,60 @@ +From: "Lee, Chun-Yi" <joeyli.kernel@gmail.com> +Date: Tue, 13 Mar 2018 18:38:01 +0800 +Subject: [PATCH 2/4] MODSIGN: load blacklist from MOKx +Origin: https://lore.kernel.org/patchwork/patch/933177/ + +This patch adds the logic to load the blacklisted hash and +certificates from MOKx which is maintained by shim bootloader. + +Cc: David Howells <dhowells@redhat.com> +Cc: Josh Boyer <jwboyer@fedoraproject.org> +Cc: James Bottomley <James.Bottomley@HansenPartnership.com> +Signed-off-by: "Lee, Chun-Yi" <jlee@suse.com> +[Rebased by Luca Boccassi] +--- + certs/load_uefi.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +Index: linux/certs/load_uefi.c +=================================================================== +--- linux.orig/certs/load_uefi.c ++++ linux/certs/load_uefi.c +@@ -148,8 +148,8 @@ static int __init load_uefi_certs(void) + { + efi_guid_t secure_var = EFI_IMAGE_SECURITY_DATABASE_GUID; + efi_guid_t mok_var = EFI_SHIM_LOCK_GUID; +- void *db = NULL, *dbx = NULL, *mok = NULL; +- unsigned long dbsize = 0, dbxsize = 0, moksize = 0; ++ void *db = NULL, *dbx = NULL, *mok = NULL, *mokx = NULL; ++ unsigned long dbsize = 0, dbxsize = 0, moksize = 0, mokxsize = 0; + int rc = 0; + + if (!efi.get_variable) +@@ -183,7 +183,7 @@ static int __init load_uefi_certs(void) + kfree(dbx); + } + +- /* the MOK can not be trusted when secure boot is disabled */ ++ /* the MOK and MOKx can not be trusted when secure boot is disabled */ + if (!efi_enabled(EFI_SECURE_BOOT)) + return 0; + +@@ -198,6 +198,18 @@ static int __init load_uefi_certs(void) + kfree(mok); + } + ++ rc = get_cert_list(L"MokListXRT", &mok_var, &mokxsize, &mokx); ++ if (rc < 0) { ++ pr_info("MODSIGN: Couldn't get UEFI MokListXRT\n"); ++ } else if (mokxsize != 0) { ++ rc = parse_efi_signature_list("UEFI:mokx", ++ mokx, mokxsize, ++ get_handler_for_dbx); ++ if (rc) ++ pr_err("Couldn't parse MokListXRT signatures: %d\n", rc); ++ kfree(mokx); ++ } ++ + return rc; + } + late_initcall(load_uefi_certs); diff --git a/debian/patches/features/all/db-mok-keyring/0002-efi-Add-EFI-signature-data-types.patch b/debian/patches/features/all/db-mok-keyring/0002-efi-Add-EFI-signature-data-types.patch new file mode 100644 index 000000000..bf8d40768 --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/0002-efi-Add-EFI-signature-data-types.patch @@ -0,0 +1,57 @@ +From: Dave Howells <dhowells@redhat.com> +Date: Fri, 5 May 2017 08:21:58 +0100 +Subject: [PATCH 2/7] efi: Add EFI signature data types +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/jforbes/linux.git/commit/?id=446e0e29d7d53fe7786d33603df5a6682dd00c12 + +Add the data types that are used for containing hashes, keys and +certificates for cryptographic verification along with their corresponding +type GUIDs. + +Signed-off-by: David Howells <dhowells@redhat.com> +--- + include/linux/efi.h | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +Index: linux/include/linux/efi.h +=================================================================== +--- linux.orig/include/linux/efi.h ++++ linux/include/linux/efi.h +@@ -663,6 +663,10 @@ void efi_native_runtime_setup(void); + #define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f) + #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23) + ++#define EFI_CERT_SHA256_GUID EFI_GUID(0xc1c41626, 0x504c, 0x4092, 0xac, 0xa9, 0x41, 0xf9, 0x36, 0x93, 0x43, 0x28) ++#define EFI_CERT_X509_GUID EFI_GUID(0xa5c059a1, 0x94e4, 0x4aa7, 0x87, 0xb5, 0xab, 0x15, 0x5c, 0x2b, 0xf0, 0x72) ++#define EFI_CERT_X509_SHA256_GUID EFI_GUID(0x3bd2a492, 0x96c0, 0x4079, 0xb4, 0x20, 0xfc, 0xf9, 0x8e, 0xf1, 0x03, 0xed) ++ + /* + * This GUID is used to pass to the kernel proper the struct screen_info + * structure that was populated by the stub based on the GOP protocol instance +@@ -934,6 +938,27 @@ typedef struct { + efi_memory_desc_t entry[0]; + } efi_memory_attributes_table_t; + ++typedef struct { ++ efi_guid_t signature_owner; ++ u8 signature_data[]; ++} efi_signature_data_t; ++ ++typedef struct { ++ efi_guid_t signature_type; ++ u32 signature_list_size; ++ u32 signature_header_size; ++ u32 signature_size; ++ u8 signature_header[]; ++ /* efi_signature_data_t signatures[][] */ ++} efi_signature_list_t; ++ ++typedef u8 efi_sha256_hash_t[32]; ++ ++typedef struct { ++ efi_sha256_hash_t to_be_signed_hash; ++ efi_time_t time_of_revocation; ++} efi_cert_x509_sha256_t; ++ + /* + * All runtime access to EFI goes through this structure: + */ diff --git a/debian/patches/features/all/db-mok-keyring/0003-MODSIGN-checking-the-blacklisted-hash-before-loading-a-kernel-module.patch b/debian/patches/features/all/db-mok-keyring/0003-MODSIGN-checking-the-blacklisted-hash-before-loading-a-kernel-module.patch new file mode 100644 index 000000000..1a43ca553 --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/0003-MODSIGN-checking-the-blacklisted-hash-before-loading-a-kernel-module.patch @@ -0,0 +1,129 @@ +From: "Lee, Chun-Yi" <joeyli.kernel@gmail.com> +Date: Tue, 13 Mar 2018 18:38:02 +0800 +Subject: [PATCH 3/4] MODSIGN: checking the blacklisted hash before loading a + kernel module +Origin: https://lore.kernel.org/patchwork/patch/933175/ + +This patch adds the logic for checking the kernel module's hash +base on blacklist. The hash must be generated by sha256 and enrolled +to dbx/mokx. + +For example: + sha256sum sample.ko + mokutil --mokx --import-hash $HASH_RESULT + +Whether the signature on ko file is stripped or not, the hash can be +compared by kernel. + +Cc: David Howells <dhowells@redhat.com> +Cc: Josh Boyer <jwboyer@fedoraproject.org> +Cc: James Bottomley <James.Bottomley@HansenPartnership.com> +Signed-off-by: "Lee, Chun-Yi" <jlee@suse.com> +[Rebased by Luca Boccassi] +--- + kernel/module_signing.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 60 insertions(+), 2 deletions(-) + +Index: linux/kernel/module_signing.c +=================================================================== +--- linux.orig/kernel/module_signing.c ++++ linux/kernel/module_signing.c +@@ -11,9 +11,12 @@ + + #include <linux/kernel.h> + #include <linux/errno.h> ++#include <linux/module.h> + #include <linux/string.h> + #include <linux/verification.h> + #include <crypto/public_key.h> ++#include <crypto/hash.h> ++#include <keys/system_keyring.h> + #include "module-internal.h" + + enum pkey_id_type { +@@ -42,19 +45,67 @@ struct module_signature { + __be32 sig_len; /* Length of signature data */ + }; + ++static int mod_is_hash_blacklisted(const void *mod, size_t verifylen) ++{ ++ struct crypto_shash *tfm; ++ struct shash_desc *desc; ++ size_t digest_size, desc_size; ++ u8 *digest; ++ int ret = 0; ++ ++ tfm = crypto_alloc_shash("sha256", 0, 0); ++ if (IS_ERR(tfm)) ++ goto error_return; ++ ++ desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); ++ digest_size = crypto_shash_digestsize(tfm); ++ digest = kzalloc(digest_size + desc_size, GFP_KERNEL); ++ if (!digest) { ++ pr_err("digest memory buffer allocate fail\n"); ++ ret = -ENOMEM; ++ goto error_digest; ++ } ++ desc = (void *)digest + digest_size; ++ desc->tfm = tfm; ++ desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; ++ ret = crypto_shash_init(desc); ++ if (ret < 0) ++ goto error_shash; ++ ++ ret = crypto_shash_finup(desc, mod, verifylen, digest); ++ if (ret < 0) ++ goto error_shash; ++ ++ pr_debug("%ld digest: %*phN\n", verifylen, (int) digest_size, digest); ++ ++ ret = is_hash_blacklisted(digest, digest_size, "bin"); ++ if (ret == -EKEYREJECTED) ++ pr_err("Module hash %*phN is blacklisted\n", ++ (int) digest_size, digest); ++ ++error_shash: ++ kfree(digest); ++error_digest: ++ crypto_free_shash(tfm); ++error_return: ++ return ret; ++} ++ + /* + * Verify the signature on a module. + */ + int mod_verify_sig(const void *mod, struct load_info *info) + { + struct module_signature ms; +- size_t sig_len, modlen = info->len; ++ size_t sig_len, modlen = info->len, wholelen; ++ int ret;; + + pr_devel("==>%s(,%zu)\n", __func__, modlen); + + if (modlen <= sizeof(ms)) + return -EBADMSG; + ++ wholelen = modlen + sizeof(MODULE_SIG_STRING) - 1; + memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms)); + modlen -= sizeof(ms); + +@@ -82,8 +133,15 @@ int mod_verify_sig(const void *mod, stru + return -EBADMSG; + } + +- return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, ++ ret = verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, + VERIFY_USE_SECONDARY_KEYRING, + VERIFYING_MODULE_SIGNATURE, + NULL, NULL); ++ pr_devel("verify_pkcs7_signature() = %d\n", ret); ++ ++ /* checking hash of module is in blacklist */ ++ if (!ret) ++ ret = mod_is_hash_blacklisted(mod, wholelen); ++ ++ return ret; + } diff --git a/debian/patches/features/all/db-mok-keyring/0003-efi-Add-an-EFI-signature-blob-parser.patch b/debian/patches/features/all/db-mok-keyring/0003-efi-Add-an-EFI-signature-blob-parser.patch new file mode 100644 index 000000000..e82287cff --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/0003-efi-Add-an-EFI-signature-blob-parser.patch @@ -0,0 +1,193 @@ +From: Dave Howells <dhowells@redhat.com> +Date: Fri, 5 May 2017 08:21:58 +0100 +Subject: [PATCH 3/7] efi: Add an EFI signature blob parser +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/jforbes/linux.git/commit/?id=41a595bb0dc097c19ad377a0c32c993234aa2525 + +Add a function to parse an EFI signature blob looking for elements of +interest. A list is made up of a series of sublists, where all the +elements in a sublist are of the same type, but sublists can be of +different types. + +For each sublist encountered, the function pointed to by the +get_handler_for_guid argument is called with the type specifier GUID and +returns either a pointer to a function to handle elements of that type or +NULL if the type is not of interest. + +If the sublist is of interest, each element is passed to the handler +function in turn. + +Signed-off-by: David Howells <dhowells@redhat.com> +--- + certs/Kconfig | 8 ++++ + certs/Makefile | 1 + + certs/efi_parser.c | 112 ++++++++++++++++++++++++++++++++++++++++++++ + include/linux/efi.h | 9 ++++ + 4 files changed, 130 insertions(+) + create mode 100644 certs/efi_parser.c + +Index: linux/certs/Kconfig +=================================================================== +--- linux.orig/certs/Kconfig ++++ linux/certs/Kconfig +@@ -83,4 +83,12 @@ config SYSTEM_BLACKLIST_HASH_LIST + wrapper to incorporate the list into the kernel. Each <hash> should + be a string of hex digits. + ++config EFI_SIGNATURE_LIST_PARSER ++ bool "EFI signature list parser" ++ depends on EFI ++ select X509_CERTIFICATE_PARSER ++ help ++ This option provides support for parsing EFI signature lists for ++ X.509 certificates and turning them into keys. ++ + endmenu +Index: linux/certs/Makefile +=================================================================== +--- linux.orig/certs/Makefile ++++ linux/certs/Makefile +@@ -10,6 +10,7 @@ obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) + + else + obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_nohashes.o + endif ++obj-$(CONFIG_EFI_SIGNATURE_LIST_PARSER) += efi_parser.o + + ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y) + +Index: linux/certs/efi_parser.c +=================================================================== +--- /dev/null ++++ linux/certs/efi_parser.c +@@ -0,0 +1,112 @@ ++/* EFI signature/key/certificate list parser ++ * ++ * Copyright (C) 2012, 2016 Red Hat, Inc. All Rights Reserved. ++ * Written by David Howells (dhowells@redhat.com) ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public Licence ++ * as published by the Free Software Foundation; either version ++ * 2 of the Licence, or (at your option) any later version. ++ */ ++ ++#define pr_fmt(fmt) "EFI: "fmt ++#include <linux/module.h> ++#include <linux/printk.h> ++#include <linux/err.h> ++#include <linux/efi.h> ++ ++/** ++ * parse_efi_signature_list - Parse an EFI signature list for certificates ++ * @source: The source of the key ++ * @data: The data blob to parse ++ * @size: The size of the data blob ++ * @get_handler_for_guid: Get the handler func for the sig type (or NULL) ++ * ++ * Parse an EFI signature list looking for elements of interest. A list is ++ * made up of a series of sublists, where all the elements in a sublist are of ++ * the same type, but sublists can be of different types. ++ * ++ * For each sublist encountered, the @get_handler_for_guid function is called ++ * with the type specifier GUID and returns either a pointer to a function to ++ * handle elements of that type or NULL if the type is not of interest. ++ * ++ * If the sublist is of interest, each element is passed to the handler ++ * function in turn. ++ * ++ * Error EBADMSG is returned if the list doesn't parse correctly and 0 is ++ * returned if the list was parsed correctly. No error can be returned from ++ * the @get_handler_for_guid function or the element handler function it ++ * returns. ++ */ ++int __init parse_efi_signature_list( ++ const char *source, ++ const void *data, size_t size, ++ efi_element_handler_t (*get_handler_for_guid)(const efi_guid_t *)) ++{ ++ efi_element_handler_t handler; ++ unsigned offs = 0; ++ ++ pr_devel("-->%s(,%zu)\n", __func__, size); ++ ++ while (size > 0) { ++ const efi_signature_data_t *elem; ++ efi_signature_list_t list; ++ size_t lsize, esize, hsize, elsize; ++ ++ if (size < sizeof(list)) ++ return -EBADMSG; ++ ++ memcpy(&list, data, sizeof(list)); ++ pr_devel("LIST[%04x] guid=%pUl ls=%x hs=%x ss=%x\n", ++ offs, ++ list.signature_type.b, list.signature_list_size, ++ list.signature_header_size, list.signature_size); ++ ++ lsize = list.signature_list_size; ++ hsize = list.signature_header_size; ++ esize = list.signature_size; ++ elsize = lsize - sizeof(list) - hsize; ++ ++ if (lsize > size) { ++ pr_devel("<--%s() = -EBADMSG [overrun @%x]\n", ++ __func__, offs); ++ return -EBADMSG; ++ } ++ ++ if (lsize < sizeof(list) || ++ lsize - sizeof(list) < hsize || ++ esize < sizeof(*elem) || ++ elsize < esize || ++ elsize % esize != 0) { ++ pr_devel("- bad size combo @%x\n", offs); ++ return -EBADMSG; ++ } ++ ++ handler = get_handler_for_guid(&list.signature_type); ++ if (!handler) { ++ data += lsize; ++ size -= lsize; ++ offs += lsize; ++ continue; ++ } ++ ++ data += sizeof(list) + hsize; ++ size -= sizeof(list) + hsize; ++ offs += sizeof(list) + hsize; ++ ++ for (; elsize > 0; elsize -= esize) { ++ elem = data; ++ ++ pr_devel("ELEM[%04x]\n", offs); ++ handler(source, ++ &elem->signature_data, ++ esize - sizeof(*elem)); ++ ++ data += esize; ++ size -= esize; ++ offs += esize; ++ } ++ } ++ ++ return 0; ++} +Index: linux/include/linux/efi.h +=================================================================== +--- linux.orig/include/linux/efi.h ++++ linux/include/linux/efi.h +@@ -1139,6 +1139,15 @@ extern int efi_memattr_apply_permissions + char * __init efi_md_typeattr_format(char *buf, size_t size, + const efi_memory_desc_t *md); + ++ ++typedef void (*efi_element_handler_t)(const char *source, ++ const void *element_data, ++ size_t element_size); ++extern int __init parse_efi_signature_list( ++ const char *source, ++ const void *data, size_t size, ++ efi_element_handler_t (*get_handler_for_guid)(const efi_guid_t *)); ++ + /** + * efi_range_is_wc - check the WC bit on an address range + * @start: starting kvirt address diff --git a/debian/patches/features/all/db-mok-keyring/0004-MODSIGN-Import-certificates-from-UEFI-Secure-Boot.patch b/debian/patches/features/all/db-mok-keyring/0004-MODSIGN-Import-certificates-from-UEFI-Secure-Boot.patch new file mode 100644 index 000000000..cdb3b7a22 --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/0004-MODSIGN-Import-certificates-from-UEFI-Secure-Boot.patch @@ -0,0 +1,242 @@ +From: Josh Boyer <jwboyer@fedoraproject.org> +Date: Fri, 5 May 2017 08:21:59 +0100 +Subject: [PATCH 4/7] MODSIGN: Import certificates from UEFI Secure Boot +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/jforbes/linux.git/commit/?id=7b7aae2efea13b5a7b80305856c28f235ea8b2fa + +Secure Boot stores a list of allowed certificates in the 'db' variable. +This imports those certificates into the system trusted keyring. This +allows for a third party signing certificate to be used in conjunction +with signed modules. By importing the public certificate into the 'db' +variable, a user can allow a module signed with that certificate to +load. The shim UEFI bootloader has a similar certificate list stored +in the 'MokListRT' variable. We import those as well. + +Secure Boot also maintains a list of disallowed certificates in the 'dbx' +variable. We load those certificates into the newly introduced system +blacklist keyring and forbid any module signed with those from loading and +forbid the use within the kernel of any key with a matching hash. + +This facility is enabled by setting CONFIG_LOAD_UEFI_KEYS. + +Signed-off-by: Josh Boyer <jwboyer@fedoraproject.org> +Signed-off-by: David Howells <dhowells@redhat.com> +--- + certs/Kconfig | 16 +++++ + certs/Makefile | 4 ++ + certs/load_uefi.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 188 insertions(+) + create mode 100644 certs/load_uefi.c + +Index: linux/certs/Kconfig +=================================================================== +--- linux.orig/certs/Kconfig ++++ linux/certs/Kconfig +@@ -91,4 +91,20 @@ config EFI_SIGNATURE_LIST_PARSER + This option provides support for parsing EFI signature lists for + X.509 certificates and turning them into keys. + ++config LOAD_UEFI_KEYS ++ bool "Load certs and blacklist from UEFI db for module checking" ++ depends on SYSTEM_BLACKLIST_KEYRING ++ depends on SECONDARY_TRUSTED_KEYRING ++ depends on EFI ++ depends on EFI_SIGNATURE_LIST_PARSER ++ help ++ If the kernel is booted in secure boot mode, this option will cause ++ the kernel to load the certificates from the UEFI db and MokListRT ++ into the secondary trusted keyring. It will also load any X.509 ++ SHA256 hashes in the dbx list into the blacklist. ++ ++ The effect of this is that, if the kernel is booted in secure boot ++ mode, modules signed with UEFI-stored keys will be permitted to be ++ loaded and keys that match the blacklist will be rejected. ++ + endmenu +Index: linux/certs/Makefile +=================================================================== +--- linux.orig/certs/Makefile ++++ linux/certs/Makefile +@@ -12,6 +12,10 @@ obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) + + endif + obj-$(CONFIG_EFI_SIGNATURE_LIST_PARSER) += efi_parser.o + ++obj-$(CONFIG_LOAD_UEFI_KEYS) += load_uefi.o ++$(obj)/load_uefi.o: KBUILD_CFLAGS += -fshort-wchar ++ ++ + ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y) + + $(eval $(call config_filename,SYSTEM_TRUSTED_KEYS)) +Index: linux/certs/load_uefi.c +=================================================================== +--- /dev/null ++++ linux/certs/load_uefi.c +@@ -0,0 +1,168 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/cred.h> ++#include <linux/err.h> ++#include <linux/efi.h> ++#include <linux/slab.h> ++#include <keys/asymmetric-type.h> ++#include <keys/system_keyring.h> ++#include "internal.h" ++ ++static __initdata efi_guid_t efi_cert_x509_guid = EFI_CERT_X509_GUID; ++static __initdata efi_guid_t efi_cert_x509_sha256_guid = EFI_CERT_X509_SHA256_GUID; ++static __initdata efi_guid_t efi_cert_sha256_guid = EFI_CERT_SHA256_GUID; ++ ++/* ++ * Get a certificate list blob from the named EFI variable. ++ */ ++static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, ++ unsigned long *size) ++{ ++ efi_status_t status; ++ unsigned long lsize = 4; ++ unsigned long tmpdb[4]; ++ void *db; ++ ++ status = efi.get_variable(name, guid, NULL, &lsize, &tmpdb); ++ if (status != EFI_BUFFER_TOO_SMALL) { ++ pr_err("Couldn't get size: 0x%lx\n", status); ++ return NULL; ++ } ++ ++ db = kmalloc(lsize, GFP_KERNEL); ++ if (!db) { ++ pr_err("Couldn't allocate memory for uefi cert list\n"); ++ return NULL; ++ } ++ ++ status = efi.get_variable(name, guid, NULL, &lsize, db); ++ if (status != EFI_SUCCESS) { ++ kfree(db); ++ pr_err("Error reading db var: 0x%lx\n", status); ++ return NULL; ++ } ++ ++ *size = lsize; ++ return db; ++} ++ ++/* ++ * Blacklist an X509 TBS hash. ++ */ ++static __init void uefi_blacklist_x509_tbs(const char *source, ++ const void *data, size_t len) ++{ ++ char *hash, *p; ++ ++ hash = kmalloc(4 + len * 2 + 1, GFP_KERNEL); ++ if (!hash) ++ return; ++ p = memcpy(hash, "tbs:", 4); ++ p += 4; ++ bin2hex(p, data, len); ++ p += len * 2; ++ *p = 0; ++ ++ mark_hash_blacklisted(hash); ++ kfree(hash); ++} ++ ++/* ++ * Blacklist the hash of an executable. ++ */ ++static __init void uefi_blacklist_binary(const char *source, ++ const void *data, size_t len) ++{ ++ char *hash, *p; ++ ++ hash = kmalloc(4 + len * 2 + 1, GFP_KERNEL); ++ if (!hash) ++ return; ++ p = memcpy(hash, "bin:", 4); ++ p += 4; ++ bin2hex(p, data, len); ++ p += len * 2; ++ *p = 0; ++ ++ mark_hash_blacklisted(hash); ++ kfree(hash); ++} ++ ++/* ++ * Return the appropriate handler for particular signature list types found in ++ * the UEFI db and MokListRT tables. ++ */ ++static __init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type) ++{ ++ if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) ++ return add_trusted_secondary_key; ++ return 0; ++} ++ ++/* ++ * Return the appropriate handler for particular signature list types found in ++ * the UEFI dbx and MokListXRT tables. ++ */ ++static __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type) ++{ ++ if (efi_guidcmp(*sig_type, efi_cert_x509_sha256_guid) == 0) ++ return uefi_blacklist_x509_tbs; ++ if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0) ++ return uefi_blacklist_binary; ++ return 0; ++} ++ ++/* ++ * Load the certs contained in the UEFI databases ++ */ ++static int __init load_uefi_certs(void) ++{ ++ efi_guid_t secure_var = EFI_IMAGE_SECURITY_DATABASE_GUID; ++ efi_guid_t mok_var = EFI_SHIM_LOCK_GUID; ++ void *db = NULL, *dbx = NULL, *mok = NULL; ++ unsigned long dbsize = 0, dbxsize = 0, moksize = 0; ++ int rc = 0; ++ ++ if (!efi.get_variable) ++ return false; ++ ++ /* Get db, MokListRT, and dbx. They might not exist, so it isn't ++ * an error if we can't get them. ++ */ ++ db = get_cert_list(L"db", &secure_var, &dbsize); ++ if (!db) { ++ pr_err("MODSIGN: Couldn't get UEFI db list\n"); ++ } else { ++ rc = parse_efi_signature_list("UEFI:db", ++ db, dbsize, get_handler_for_db); ++ if (rc) ++ pr_err("Couldn't parse db signatures: %d\n", rc); ++ kfree(db); ++ } ++ ++ mok = get_cert_list(L"MokListRT", &mok_var, &moksize); ++ if (!mok) { ++ pr_info("MODSIGN: Couldn't get UEFI MokListRT\n"); ++ } else { ++ rc = parse_efi_signature_list("UEFI:MokListRT", ++ mok, moksize, get_handler_for_db); ++ if (rc) ++ pr_err("Couldn't parse MokListRT signatures: %d\n", rc); ++ kfree(mok); ++ } ++ ++ dbx = get_cert_list(L"dbx", &secure_var, &dbxsize); ++ if (!dbx) { ++ pr_info("MODSIGN: Couldn't get UEFI dbx list\n"); ++ } else { ++ rc = parse_efi_signature_list("UEFI:dbx", ++ dbx, dbxsize, ++ get_handler_for_dbx); ++ if (rc) ++ pr_err("Couldn't parse dbx signatures: %d\n", rc); ++ kfree(dbx); ++ } ++ ++ return rc; ++} ++late_initcall(load_uefi_certs); diff --git a/debian/patches/features/all/db-mok-keyring/0004-MODSIGN-check-the-attributes-of-db-and-mok.patch b/debian/patches/features/all/db-mok-keyring/0004-MODSIGN-check-the-attributes-of-db-and-mok.patch new file mode 100644 index 000000000..57b6e61ff --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/0004-MODSIGN-check-the-attributes-of-db-and-mok.patch @@ -0,0 +1,108 @@ +From: "Lee, Chun-Yi" <joeyli.kernel@gmail.com> +Date: Tue, 13 Mar 2018 18:38:03 +0800 +Subject: [PATCH 4/4] MODSIGN: check the attributes of db and mok +Origin: https://lore.kernel.org/patchwork/patch/933176/ + +That's better for checking the attributes of db and mok variables +before loading certificates to kernel keyring. + +For db and dbx, both of them are authenticated variables. Which +means that they can only be modified by manufacturer's key. So +the kernel should checks EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS +attribute before we trust it. + +For mok-rt and mokx-rt, both of them are created by shim boot loader +to forward the mok/mokx content to runtime. They must be runtime-volatile +variables. So kernel should checks that the attributes map did not set +EFI_VARIABLE_NON_VOLATILE bit before we trust it. + +Cc: David Howells <dhowells@redhat.com> +Cc: Josh Boyer <jwboyer@fedoraproject.org> +Cc: James Bottomley <James.Bottomley@HansenPartnership.com> +Signed-off-by: "Lee, Chun-Yi" <jlee@suse.com> +[Rebased by Luca Boccassi] +--- + certs/load_uefi.c | 35 +++++++++++++++++++++++------------ + 1 file changed, 23 insertions(+), 12 deletions(-) + +Index: linux/certs/load_uefi.c +=================================================================== +--- linux.orig/certs/load_uefi.c ++++ linux/certs/load_uefi.c +@@ -36,12 +36,14 @@ static __init bool uefi_check_ignore_db( + * Get a certificate list blob from the named EFI variable. + */ + static __init int get_cert_list(efi_char16_t *name, efi_guid_t *guid, +- unsigned long *size, void **cert_list) ++ unsigned long *size, void **cert_list, ++ u32 pos_attr, u32 neg_attr) + { + efi_status_t status; + unsigned long lsize = 4; + unsigned long tmpdb[4]; + void *db; ++ u32 attr = 0; + + status = efi.get_variable(name, guid, NULL, &lsize, &tmpdb); + if (status == EFI_NOT_FOUND) { +@@ -61,12 +63,19 @@ static __init int get_cert_list(efi_char + return -ENOMEM; + } + +- status = efi.get_variable(name, guid, NULL, &lsize, db); ++ status = efi.get_variable(name, guid, &attr, &lsize, db); + if (status != EFI_SUCCESS) { + kfree(db); + pr_err("Error reading db var: 0x%lx\n", status); + return efi_status_to_err(status); + } ++ /* must have positive attributes and no negative attributes */ ++ if ((pos_attr && !(attr & pos_attr)) || ++ (neg_attr && (attr & neg_attr))) { ++ kfree(db); ++ pr_err("Error reading db var attributes: 0x%016x\n", attr); ++ return -1; ++ } + + *size = lsize; + *cert_list = db; +@@ -159,7 +168,8 @@ static int __init load_uefi_certs(void) + * an error if we can't get them. + */ + if (!uefi_check_ignore_db()) { +- rc = get_cert_list(L"db", &secure_var, &dbsize, &db); ++ rc = get_cert_list(L"db", &secure_var, &dbsize, &db, ++ EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS, 0); + if (rc < 0) { + pr_err("MODSIGN: Couldn't get UEFI db list\n"); + } else if (dbsize != 0) { +@@ -171,7 +181,8 @@ static int __init load_uefi_certs(void) + } + } + +- rc = get_cert_list(L"dbx", &secure_var, &dbxsize, &dbx); ++ rc = get_cert_list(L"dbx", &secure_var, &dbxsize, &dbx, ++ EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS, 0); + if (rc < 0) { + pr_info("MODSIGN: Couldn't get UEFI dbx list\n"); + } else if (dbxsize != 0) { +@@ -187,7 +198,8 @@ static int __init load_uefi_certs(void) + if (!efi_enabled(EFI_SECURE_BOOT)) + return 0; + +- rc = get_cert_list(L"MokListRT", &mok_var, &moksize, &mok); ++ rc = get_cert_list(L"MokListRT", &mok_var, &moksize, &mok, ++ 0, EFI_VARIABLE_NON_VOLATILE); + if (rc < 0) { + pr_info("MODSIGN: Couldn't get UEFI MokListRT\n"); + } else if (moksize != 0) { +@@ -198,7 +210,8 @@ static int __init load_uefi_certs(void) + kfree(mok); + } + +- rc = get_cert_list(L"MokListXRT", &mok_var, &mokxsize, &mokx); ++ rc = get_cert_list(L"MokListXRT", &mok_var, &mokxsize, &mokx, ++ 0, EFI_VARIABLE_NON_VOLATILE); + if (rc < 0) { + pr_info("MODSIGN: Couldn't get UEFI MokListXRT\n"); + } else if (mokxsize != 0) { diff --git a/debian/patches/features/all/db-mok-keyring/0005-MODSIGN-Allow-the-db-UEFI-variable-to-be-suppressed.patch b/debian/patches/features/all/db-mok-keyring/0005-MODSIGN-Allow-the-db-UEFI-variable-to-be-suppressed.patch new file mode 100644 index 000000000..b5e2e843c --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/0005-MODSIGN-Allow-the-db-UEFI-variable-to-be-suppressed.patch @@ -0,0 +1,85 @@ +From: Josh Boyer <jwboyer@fedoraproject.org> +Date: Fri, 5 May 2017 08:21:59 +0100 +Subject: [PATCH 5/7] MODSIGN: Allow the "db" UEFI variable to be suppressed +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/jforbes/linux.git/commit/?id=b51ca4e4d6c0c8000789de31a1184a41ac611d33 + +If a user tells shim to not use the certs/hashes in the UEFI db variable +for verification purposes, shim will set a UEFI variable called +MokIgnoreDB. Have the uefi import code look for this and ignore the db +variable if it is found. + +Signed-off-by: Josh Boyer <jwboyer@fedoraproject.org> +Signed-off-by: David Howells <dhowells@redhat.com> +--- + certs/load_uefi.c | 44 ++++++++++++++++++++++++++++++++++---------- + 1 file changed, 34 insertions(+), 10 deletions(-) + +Index: linux/certs/load_uefi.c +=================================================================== +--- linux.orig/certs/load_uefi.c ++++ linux/certs/load_uefi.c +@@ -13,6 +13,26 @@ static __initdata efi_guid_t efi_cert_x5 + static __initdata efi_guid_t efi_cert_sha256_guid = EFI_CERT_SHA256_GUID; + + /* ++ * Look to see if a UEFI variable called MokIgnoreDB exists and return true if ++ * it does. ++ * ++ * This UEFI variable is set by the shim if a user tells the shim to not use ++ * the certs/hashes in the UEFI db variable for verification purposes. If it ++ * is set, we should ignore the db variable also and the true return indicates ++ * this. ++ */ ++static __init bool uefi_check_ignore_db(void) ++{ ++ efi_status_t status; ++ unsigned int db = 0; ++ unsigned long size = sizeof(db); ++ efi_guid_t guid = EFI_SHIM_LOCK_GUID; ++ ++ status = efi.get_variable(L"MokIgnoreDB", &guid, NULL, &size, &db); ++ return status == EFI_SUCCESS; ++} ++ ++/* + * Get a certificate list blob from the named EFI variable. + */ + static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, +@@ -113,7 +133,9 @@ static __init efi_element_handler_t get_ + } + + /* +- * Load the certs contained in the UEFI databases ++ * Load the certs contained in the UEFI databases into the secondary trusted ++ * keyring and the UEFI blacklisted X.509 cert SHA256 hashes into the blacklist ++ * keyring. + */ + static int __init load_uefi_certs(void) + { +@@ -129,15 +151,17 @@ static int __init load_uefi_certs(void) + /* Get db, MokListRT, and dbx. They might not exist, so it isn't + * an error if we can't get them. + */ +- db = get_cert_list(L"db", &secure_var, &dbsize); +- if (!db) { +- pr_err("MODSIGN: Couldn't get UEFI db list\n"); +- } else { +- rc = parse_efi_signature_list("UEFI:db", +- db, dbsize, get_handler_for_db); +- if (rc) +- pr_err("Couldn't parse db signatures: %d\n", rc); +- kfree(db); ++ if (!uefi_check_ignore_db()) { ++ db = get_cert_list(L"db", &secure_var, &dbsize); ++ if (!db) { ++ pr_err("MODSIGN: Couldn't get UEFI db list\n"); ++ } else { ++ rc = parse_efi_signature_list("UEFI:db", ++ db, dbsize, get_handler_for_db); ++ if (rc) ++ pr_err("Couldn't parse db signatures: %d\n", rc); ++ kfree(db); ++ } + } + + mok = get_cert_list(L"MokListRT", &mok_var, &moksize); diff --git a/debian/patches/features/all/db-mok-keyring/0006-Make-get_cert_list-not-complain-about-cert-lists-tha.patch b/debian/patches/features/all/db-mok-keyring/0006-Make-get_cert_list-not-complain-about-cert-lists-tha.patch new file mode 100644 index 000000000..c348e2f04 --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/0006-Make-get_cert_list-not-complain-about-cert-lists-tha.patch @@ -0,0 +1,106 @@ +From: Peter Jones <pjones@redhat.com> +Date: Mon, 2 Oct 2017 18:25:29 -0400 +Subject: [PATCH 6/7] Make get_cert_list() not complain about cert lists that + aren't present. +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/jforbes/linux.git/commit/?id=0f4d5c7b49b45e7cf038bb769e33451b78a6445d + +Signed-off-by: Peter Jones <pjones@redhat.com> +--- + certs/load_uefi.c | 37 ++++++++++++++++++++++--------------- + 1 file changed, 22 insertions(+), 15 deletions(-) + +Index: linux/certs/load_uefi.c +=================================================================== +--- linux.orig/certs/load_uefi.c ++++ linux/certs/load_uefi.c +@@ -35,8 +35,8 @@ static __init bool uefi_check_ignore_db( + /* + * Get a certificate list blob from the named EFI variable. + */ +-static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, +- unsigned long *size) ++static __init int get_cert_list(efi_char16_t *name, efi_guid_t *guid, ++ unsigned long *size, void **cert_list) + { + efi_status_t status; + unsigned long lsize = 4; +@@ -44,26 +44,33 @@ static __init void *get_cert_list(efi_ch + void *db; + + status = efi.get_variable(name, guid, NULL, &lsize, &tmpdb); ++ if (status == EFI_NOT_FOUND) { ++ *size = 0; ++ *cert_list = NULL; ++ return 0; ++ } ++ + if (status != EFI_BUFFER_TOO_SMALL) { + pr_err("Couldn't get size: 0x%lx\n", status); +- return NULL; ++ return efi_status_to_err(status); + } + + db = kmalloc(lsize, GFP_KERNEL); + if (!db) { + pr_err("Couldn't allocate memory for uefi cert list\n"); +- return NULL; ++ return -ENOMEM; + } + + status = efi.get_variable(name, guid, NULL, &lsize, db); + if (status != EFI_SUCCESS) { + kfree(db); + pr_err("Error reading db var: 0x%lx\n", status); +- return NULL; ++ return efi_status_to_err(status); + } + + *size = lsize; +- return db; ++ *cert_list = db; ++ return 0; + } + + /* +@@ -152,10 +159,10 @@ static int __init load_uefi_certs(void) + * an error if we can't get them. + */ + if (!uefi_check_ignore_db()) { +- db = get_cert_list(L"db", &secure_var, &dbsize); +- if (!db) { ++ rc = get_cert_list(L"db", &secure_var, &dbsize, &db); ++ if (rc < 0) { + pr_err("MODSIGN: Couldn't get UEFI db list\n"); +- } else { ++ } else if (dbsize != 0) { + rc = parse_efi_signature_list("UEFI:db", + db, dbsize, get_handler_for_db); + if (rc) +@@ -164,10 +171,10 @@ static int __init load_uefi_certs(void) + } + } + +- mok = get_cert_list(L"MokListRT", &mok_var, &moksize); +- if (!mok) { ++ rc = get_cert_list(L"MokListRT", &mok_var, &moksize, &mok); ++ if (rc < 0) { + pr_info("MODSIGN: Couldn't get UEFI MokListRT\n"); +- } else { ++ } else if (moksize != 0) { + rc = parse_efi_signature_list("UEFI:MokListRT", + mok, moksize, get_handler_for_db); + if (rc) +@@ -175,10 +182,10 @@ static int __init load_uefi_certs(void) + kfree(mok); + } + +- dbx = get_cert_list(L"dbx", &secure_var, &dbxsize); +- if (!dbx) { ++ rc = get_cert_list(L"dbx", &secure_var, &dbxsize, &dbx); ++ if (rc < 0) { + pr_info("MODSIGN: Couldn't get UEFI dbx list\n"); +- } else { ++ } else if (dbxsize != 0) { + rc = parse_efi_signature_list("UEFI:dbx", + dbx, dbxsize, + get_handler_for_dbx); diff --git a/debian/patches/features/all/db-mok-keyring/0007-modsign-Use-secondary-trust-keyring-for-module-signi.patch b/debian/patches/features/all/db-mok-keyring/0007-modsign-Use-secondary-trust-keyring-for-module-signi.patch new file mode 100644 index 000000000..b831869e4 --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/0007-modsign-Use-secondary-trust-keyring-for-module-signi.patch @@ -0,0 +1,28 @@ +From: Ke Wu <mikewu@google.com> +Date: Tue, 6 Nov 2018 15:21:30 -0800 +Subject: modsign: use all trusted keys to verify module signature +Origin: https://git.kernel.org/linus/e84cd7ee630e44a2cc8ae49e85920a271b214cb3 + +Make mod_verify_sig to use all trusted keys. This allows keys in +secondary_trusted_keys to be used to verify PKCS#7 signature on a +kernel module. + +Signed-off-by: Ke Wu <mikewu@google.com> +Signed-off-by: Jessica Yu <jeyu@kernel.org> +--- + kernel/module_signing.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +Index: linux/kernel/module_signing.c +=================================================================== +--- linux.orig/kernel/module_signing.c ++++ linux/kernel/module_signing.c +@@ -83,6 +83,7 @@ int mod_verify_sig(const void *mod, stru + } + + return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, +- NULL, VERIFYING_MODULE_SIGNATURE, ++ VERIFY_USE_SECONDARY_KEYRING, ++ VERIFYING_MODULE_SIGNATURE, + NULL, NULL); + } diff --git a/debian/patches/features/all/db-mok-keyring/modsign-make-shash-allocation-failure-fatal.patch b/debian/patches/features/all/db-mok-keyring/modsign-make-shash-allocation-failure-fatal.patch new file mode 100644 index 000000000..a0aea95a2 --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/modsign-make-shash-allocation-failure-fatal.patch @@ -0,0 +1,30 @@ +From: Ben Hutchings <ben@decadent.org.uk> +Date: Sun, 05 May 2019 13:45:06 +0100 +Subject: MODSIGN: Make shash allocation failure fatal + +mod_is_hash_blacklisted() currently returns 0 (suceess) if +crypto_alloc_shash() fails. This should instead be a fatal error, +so unwrap and pass up the error code. + +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- +Index: linux/kernel/module_signing.c +=================================================================== +--- linux.orig/kernel/module_signing.c ++++ linux/kernel/module_signing.c +@@ -51,11 +51,13 @@ static int mod_is_hash_blacklisted(const + struct shash_desc *desc; + size_t digest_size, desc_size; + u8 *digest; +- int ret = 0; ++ int ret; + + tfm = crypto_alloc_shash("sha256", 0, 0); +- if (IS_ERR(tfm)) ++ if (IS_ERR(tfm)) { ++ ret = PTR_ERR(tfm); + goto error_return; ++ } + + desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); + digest_size = crypto_shash_digestsize(tfm); diff --git a/debian/patches/features/all/drivers-media-dvb-usb-af9005-request_firmware.patch b/debian/patches/features/all/drivers-media-dvb-usb-af9005-request_firmware.patch new file mode 100644 index 000000000..2d3b1a5da --- /dev/null +++ b/debian/patches/features/all/drivers-media-dvb-usb-af9005-request_firmware.patch @@ -0,0 +1,149 @@ +From: Ben Hutchings <ben@decadent.org.uk> +Date: Mon, 24 Aug 2009 23:19:58 +0100 +Subject: af9005: Use request_firmware() to load register init script +Forwarded: no + +Read the register init script from the Windows driver. This is sick +but should avoid the potential copyright infringement in distributing +a version of the script which is directly derived from the driver. +--- + drivers/media/dvb/dvb-usb/Kconfig | 2 +- + drivers/media/dvb/dvb-usb/af9005-fe.c | 66 ++++++++++++++++++++++++++------ + 2 files changed, 54 insertions(+), 14 deletions(-) + +Index: linux/drivers/media/usb/dvb-usb/Kconfig +=================================================================== +--- linux.orig/drivers/media/usb/dvb-usb/Kconfig ++++ linux/drivers/media/usb/dvb-usb/Kconfig +@@ -246,10 +246,10 @@ config DVB_USB_OPERA1 + + config DVB_USB_AF9005 + tristate "Afatech AF9005 DVB-T USB1.1 support" +- depends on BROKEN + depends on DVB_USB + select MEDIA_TUNER_MT2060 if MEDIA_SUBDRV_AUTOSELECT + select MEDIA_TUNER_QT1010 if MEDIA_SUBDRV_AUTOSELECT ++ select FW_LOADER + help + Say Y here to support the Afatech AF9005 based DVB-T USB1.1 receiver + and the TerraTec Cinergy T USB XE (Rev.1) +Index: linux/drivers/media/usb/dvb-usb/af9005-fe.c +=================================================================== +--- linux.orig/drivers/media/usb/dvb-usb/af9005-fe.c ++++ linux/drivers/media/usb/dvb-usb/af9005-fe.c +@@ -18,10 +18,26 @@ + * see Documentation/media/dvb-drivers/dvb-usb.rst for more information + */ + #include "af9005.h" +-#include "af9005-script.h" + #include "mt2060.h" + #include "qt1010.h" + #include <asm/div64.h> ++#include <linux/firmware.h> ++ ++/* Register initialisation script to be extracted from the Windows driver */ ++ ++typedef struct { ++ __le16 reg; ++ u8 pos; ++ u8 len; ++ u8 val; ++ u8 pad; ++} __packed RegDesc; ++ ++#define WIN_DRV_NAME "AF05BDA.sys" ++#define WIN_DRV_VERSION "6.3.2.1" ++#define WIN_DRV_SIZE 133504 ++#define WIN_DRV_SCRIPT_OFFSET 88316 ++#define WIN_DRV_SCRIPT_SIZE 1110 + + struct af9005_fe_state { + struct dvb_usb_device *d; +@@ -813,6 +829,8 @@ static int af9005_fe_init(struct dvb_fro + { + struct af9005_fe_state *state = fe->demodulator_priv; + struct dvb_usb_adapter *adap = fe->dvb->priv; ++ const struct firmware *fw; ++ const RegDesc *script; + int ret, i, scriptlen; + u8 temp, temp0 = 0, temp1 = 0, temp2 = 0; + u8 buf[2]; +@@ -965,37 +983,55 @@ static int af9005_fe_init(struct dvb_fro + if ((ret = af9005_write_ofdm_register(state->d, 0xaefb, 0x01))) + return ret; + +- /* load init script */ +- deb_info("load init script\n"); +- scriptlen = sizeof(script) / sizeof(RegDesc); ++ /* load and validate init script */ ++ deb_info("load init script from Windows driver\n"); ++ ret = request_firmware(&fw, WIN_DRV_NAME, &state->d->udev->dev); ++ if (ret) ++ return ret; ++ BUILD_BUG_ON(sizeof(RegDesc) != 6); ++ if (fw->size != WIN_DRV_SIZE || ++ memcmp(fw->data + WIN_DRV_SCRIPT_OFFSET, ++ "\x80\xa1\x00\x08\x0a\x00", 6) || ++ memcmp(fw->data + WIN_DRV_SCRIPT_OFFSET + WIN_DRV_SCRIPT_SIZE - 6, ++ "\x49\xa3\x00\x06\x02\x00", 6)) { ++ err("%s is invalid - should be version %s, size %u bytes\n", ++ WIN_DRV_NAME, WIN_DRV_VERSION, WIN_DRV_SIZE); ++ ret = -EINVAL; ++ goto fail_release; ++ } ++ ++ script = (const RegDesc *)(fw->data + WIN_DRV_SCRIPT_OFFSET); ++ scriptlen = WIN_DRV_SCRIPT_SIZE / sizeof(RegDesc); + for (i = 0; i < scriptlen; i++) { ++ u16 reg = le16_to_cpu(script[i].reg); + if ((ret = +- af9005_write_register_bits(state->d, script[i].reg, ++ af9005_write_register_bits(state->d, reg, + script[i].pos, + script[i].len, script[i].val))) +- return ret; ++ goto fail_release; + /* save 3 bytes of original fcw */ +- if (script[i].reg == 0xae18) ++ if (reg == 0xae18) + temp2 = script[i].val; +- if (script[i].reg == 0xae19) ++ if (reg == 0xae19) + temp1 = script[i].val; +- if (script[i].reg == 0xae1a) ++ if (reg == 0xae1a) + temp0 = script[i].val; + + /* save original unplug threshold */ +- if (script[i].reg == xd_p_reg_unplug_th) ++ if (reg == xd_p_reg_unplug_th) + state->original_if_unplug_th = script[i].val; +- if (script[i].reg == xd_p_reg_unplug_rf_gain_th) ++ if (reg == xd_p_reg_unplug_rf_gain_th) + state->original_rf_unplug_th = script[i].val; +- if (script[i].reg == xd_p_reg_unplug_dtop_if_gain_th) ++ if (reg == xd_p_reg_unplug_dtop_if_gain_th) + state->original_dtop_if_unplug_th = script[i].val; +- if (script[i].reg == xd_p_reg_unplug_dtop_rf_gain_th) ++ if (reg == xd_p_reg_unplug_dtop_rf_gain_th) + state->original_dtop_rf_unplug_th = script[i].val; + + } + state->original_fcw = + ((u32) temp2 << 16) + ((u32) temp1 << 8) + (u32) temp0; + ++ release_firmware(fw); + + /* save original TOPs */ + deb_info("save original TOPs\n"); +@@ -1075,6 +1111,10 @@ static int af9005_fe_init(struct dvb_fro + + deb_info("profit!\n"); + return 0; ++ ++fail_release: ++ release_firmware(fw); ++ return ret; + } + + static int af9005_fe_sleep(struct dvb_frontend *fe) diff --git a/debian/patches/features/all/e1000e-Add-support-for-Comet-Lake.patch b/debian/patches/features/all/e1000e-Add-support-for-Comet-Lake.patch new file mode 100644 index 000000000..2cb42334d --- /dev/null +++ b/debian/patches/features/all/e1000e-Add-support-for-Comet-Lake.patch @@ -0,0 +1,54 @@ +From: Sasha Neftin <sasha.neftin@intel.com> +Date: Thu, 10 Oct 2019 13:15:39 +0300 +Subject: e1000e: Add support for Comet Lake +Origin: https://git.kernel.org/linus/914ee9c436cbe90c8ca8a46ec8433cb614a2ada5 + +Add devices ID's for the next LOM generations that will be +available on the next Intel Client platform (Comet Lake) +This patch provides the initial support for these devices + +Signed-off-by: Sasha Neftin <sasha.neftin@intel.com> +Tested-by: Aaron Brown <aaron.f.brown@intel.com> +Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> +--- + drivers/net/ethernet/intel/e1000e/hw.h | 6 ++++++ + drivers/net/ethernet/intel/e1000e/netdev.c | 6 ++++++ + 2 files changed, 12 insertions(+) + +diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h +index eff75bd8a8f0..11fdc27faa82 100644 +--- a/drivers/net/ethernet/intel/e1000e/hw.h ++++ b/drivers/net/ethernet/intel/e1000e/hw.h +@@ -86,6 +86,12 @@ struct e1000_hw; + #define E1000_DEV_ID_PCH_ICP_I219_V8 0x15E0 + #define E1000_DEV_ID_PCH_ICP_I219_LM9 0x15E1 + #define E1000_DEV_ID_PCH_ICP_I219_V9 0x15E2 ++#define E1000_DEV_ID_PCH_CMP_I219_LM10 0x0D4E ++#define E1000_DEV_ID_PCH_CMP_I219_V10 0x0D4F ++#define E1000_DEV_ID_PCH_CMP_I219_LM11 0x0D4C ++#define E1000_DEV_ID_PCH_CMP_I219_V11 0x0D4D ++#define E1000_DEV_ID_PCH_CMP_I219_LM12 0x0D53 ++#define E1000_DEV_ID_PCH_CMP_I219_V12 0x0D55 + + #define E1000_REVISION_4 4 + +diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c +index 42f57ab8fb8e..731e1b3e103a 100644 +--- a/drivers/net/ethernet/intel/e1000e/netdev.c ++++ b/drivers/net/ethernet/intel/e1000e/netdev.c +@@ -7749,6 +7749,12 @@ static const struct pci_device_id e1000_pci_tbl[] = { + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_V8), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_LM9), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_V9), board_pch_cnp }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM10), board_pch_cnp }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V10), board_pch_cnp }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM11), board_pch_cnp }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V11), board_pch_cnp }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM12), board_pch_spt }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V12), board_pch_spt }, + + { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ + }; +-- +2.27.0 + diff --git a/debian/patches/features/all/ena/0001-net-ethernet-remove-redundant-include.patch b/debian/patches/features/all/ena/0001-net-ethernet-remove-redundant-include.patch new file mode 100644 index 000000000..0c681dd2a --- /dev/null +++ b/debian/patches/features/all/ena/0001-net-ethernet-remove-redundant-include.patch @@ -0,0 +1,34 @@ +From: zhong jiang <zhongjiang@huawei.com> +Date: Wed, 28 Nov 2018 23:04:48 -0800 +Subject: [PATCH 01/19] net: ethernet: remove redundant include +Origin: https://git.kernel.org/linus/e641e99f261f5203a911a9e0db54a214460d2cc4 + +Manual cherry-pick from e641e99f261f5203a911a9e0db54a214460d2cc4: + + module.h already contained moduleparam.h, so it is safe to remove + the redundant include. + + The issue is detected with the help of Coccinelle. + + Signed-off-by: zhong jiang <zhongjiang@huawei.com> + Signed-off-by: David S. Miller <davem@davemloft.net> + +limited only to the amazon/ena driver + +Signed-off-by: Noah Meyerhans <noahm@debian.org> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 - + 1 file changed, 1 deletion(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -39,7 +39,6 @@ + #include <linux/if_vlan.h> + #include <linux/kernel.h> + #include <linux/module.h> +-#include <linux/moduleparam.h> + #include <linux/numa.h> + #include <linux/pci.h> + #include <linux/utsname.h> diff --git a/debian/patches/features/all/ena/0002-net-ena-minor-performance-improvement.patch b/debian/patches/features/all/ena/0002-net-ena-minor-performance-improvement.patch new file mode 100644 index 000000000..f4f95e9fa --- /dev/null +++ b/debian/patches/features/all/ena/0002-net-ena-minor-performance-improvement.patch @@ -0,0 +1,139 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Thu, 11 Oct 2018 11:26:16 +0300 +Subject: [PATCH 02/19] net: ena: minor performance improvement +Origin: https://git.kernel.org/linus/0e575f8542d1f4d74df30b5a9ba419c5373d01a1 + +Reduce fastpath overhead by making ena_com_tx_comp_req_id_get() inline. +Also move it to ena_eth_com.h file with its dependency function +ena_com_cq_inc_head(). + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_eth_com.c | 43 ----------------- + drivers/net/ethernet/amazon/ena/ena_eth_com.h | 46 ++++++++++++++++++- + 2 files changed, 44 insertions(+), 45 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_eth_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_eth_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_eth_com.c +@@ -59,15 +59,6 @@ static inline struct ena_eth_io_rx_cdesc + return cdesc; + } + +-static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq) +-{ +- io_cq->head++; +- +- /* Switch phase bit in case of wrap around */ +- if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0)) +- io_cq->phase ^= 1; +-} +- + static inline void *get_sq_desc(struct ena_com_io_sq *io_sq) + { + u16 tail_masked; +@@ -476,40 +467,6 @@ int ena_com_add_single_rx_desc(struct en + + return 0; + } +- +-int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id) +-{ +- u8 expected_phase, cdesc_phase; +- struct ena_eth_io_tx_cdesc *cdesc; +- u16 masked_head; +- +- masked_head = io_cq->head & (io_cq->q_depth - 1); +- expected_phase = io_cq->phase; +- +- cdesc = (struct ena_eth_io_tx_cdesc *) +- ((uintptr_t)io_cq->cdesc_addr.virt_addr + +- (masked_head * io_cq->cdesc_entry_size_in_bytes)); +- +- /* When the current completion descriptor phase isn't the same as the +- * expected, it mean that the device still didn't update +- * this completion. +- */ +- cdesc_phase = READ_ONCE(cdesc->flags) & ENA_ETH_IO_TX_CDESC_PHASE_MASK; +- if (cdesc_phase != expected_phase) +- return -EAGAIN; +- +- dma_rmb(); +- if (unlikely(cdesc->req_id >= io_cq->q_depth)) { +- pr_err("Invalid req id %d\n", cdesc->req_id); +- return -EINVAL; +- } +- +- ena_com_cq_inc_head(io_cq); +- +- *req_id = READ_ONCE(cdesc->req_id); +- +- return 0; +-} + + bool ena_com_cq_empty(struct ena_com_io_cq *io_cq) + { +Index: linux/drivers/net/ethernet/amazon/ena/ena_eth_com.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_eth_com.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_eth_com.h +@@ -86,8 +86,6 @@ int ena_com_add_single_rx_desc(struct en + struct ena_com_buf *ena_buf, + u16 req_id); + +-int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id); +- + bool ena_com_cq_empty(struct ena_com_io_cq *io_cq); + + static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq, +@@ -159,4 +157,48 @@ static inline void ena_com_comp_ack(stru + io_sq->next_to_comp += elem; + } + ++static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq) ++{ ++ io_cq->head++; ++ ++ /* Switch phase bit in case of wrap around */ ++ if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0)) ++ io_cq->phase ^= 1; ++} ++ ++static inline int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, ++ u16 *req_id) ++{ ++ u8 expected_phase, cdesc_phase; ++ struct ena_eth_io_tx_cdesc *cdesc; ++ u16 masked_head; ++ ++ masked_head = io_cq->head & (io_cq->q_depth - 1); ++ expected_phase = io_cq->phase; ++ ++ cdesc = (struct ena_eth_io_tx_cdesc *) ++ ((uintptr_t)io_cq->cdesc_addr.virt_addr + ++ (masked_head * io_cq->cdesc_entry_size_in_bytes)); ++ ++ /* When the current completion descriptor phase isn't the same as the ++ * expected, it mean that the device still didn't update ++ * this completion. ++ */ ++ cdesc_phase = READ_ONCE(cdesc->flags) & ENA_ETH_IO_TX_CDESC_PHASE_MASK; ++ if (cdesc_phase != expected_phase) ++ return -EAGAIN; ++ ++ dma_rmb(); ++ ++ *req_id = READ_ONCE(cdesc->req_id); ++ if (unlikely(*req_id >= io_cq->q_depth)) { ++ pr_err("Invalid req id %d\n", cdesc->req_id); ++ return -EINVAL; ++ } ++ ++ ena_com_cq_inc_head(io_cq); ++ ++ return 0; ++} ++ + #endif /* ENA_ETH_COM_H_ */ diff --git a/debian/patches/features/all/ena/0003-net-ena-complete-host-info-to-match-latest-ENA-spec.patch b/debian/patches/features/all/ena/0003-net-ena-complete-host-info-to-match-latest-ENA-spec.patch new file mode 100644 index 000000000..a7e02fd9e --- /dev/null +++ b/debian/patches/features/all/ena/0003-net-ena-complete-host-info-to-match-latest-ENA-spec.patch @@ -0,0 +1,178 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Thu, 11 Oct 2018 11:26:17 +0300 +Subject: [PATCH 03/19] net: ena: complete host info to match latest ENA spec +Origin: https://git.kernel.org/linus/095f2f1facba0c78f23750dba65c78cef722c1ea + +Add new fields and definitions to host info and fill them +according to the latest ENA spec version. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + .../net/ethernet/amazon/ena/ena_admin_defs.h | 31 ++++++++++++++++++- + drivers/net/ethernet/amazon/ena/ena_com.c | 12 +++---- + .../net/ethernet/amazon/ena/ena_common_defs.h | 4 +-- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 ++++-- + 4 files changed, 43 insertions(+), 14 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_admin_defs.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +@@ -63,6 +63,8 @@ enum ena_admin_aq_completion_status { + ENA_ADMIN_ILLEGAL_PARAMETER = 5, + + ENA_ADMIN_UNKNOWN_ERROR = 6, ++ ++ ENA_ADMIN_RESOURCE_BUSY = 7, + }; + + enum ena_admin_aq_feature_id { +@@ -702,6 +704,10 @@ enum ena_admin_os_type { + ENA_ADMIN_OS_FREEBSD = 4, + + ENA_ADMIN_OS_IPXE = 5, ++ ++ ENA_ADMIN_OS_ESXI = 6, ++ ++ ENA_ADMIN_OS_GROUPS_NUM = 6, + }; + + struct ena_admin_host_info { +@@ -723,11 +729,27 @@ struct ena_admin_host_info { + /* 7:0 : major + * 15:8 : minor + * 23:16 : sub_minor ++ * 31:24 : module_type + */ + u32 driver_version; + + /* features bitmap */ +- u32 supported_network_features[4]; ++ u32 supported_network_features[2]; ++ ++ /* ENA spec version of driver */ ++ u16 ena_spec_version; ++ ++ /* ENA device's Bus, Device and Function ++ * 2:0 : function ++ * 7:3 : device ++ * 15:8 : bus ++ */ ++ u16 bdf; ++ ++ /* Number of CPUs */ ++ u16 num_cpus; ++ ++ u16 reserved; + }; + + struct ena_admin_rss_ind_table_entry { +@@ -1008,6 +1030,13 @@ struct ena_admin_ena_mmio_req_read_less_ + #define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8) + #define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16 + #define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16) ++#define ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT 24 ++#define ENA_ADMIN_HOST_INFO_MODULE_TYPE_MASK GENMASK(31, 24) ++#define ENA_ADMIN_HOST_INFO_FUNCTION_MASK GENMASK(2, 0) ++#define ENA_ADMIN_HOST_INFO_DEVICE_SHIFT 3 ++#define ENA_ADMIN_HOST_INFO_DEVICE_MASK GENMASK(7, 3) ++#define ENA_ADMIN_HOST_INFO_BUS_SHIFT 8 ++#define ENA_ADMIN_HOST_INFO_BUS_MASK GENMASK(15, 8) + + /* aenq_common_desc */ + #define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -41,9 +41,6 @@ + #define ENA_ASYNC_QUEUE_DEPTH 16 + #define ENA_ADMIN_QUEUE_DEPTH 32 + +-#define MIN_ENA_VER (((ENA_COMMON_SPEC_VERSION_MAJOR) << \ +- ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) \ +- | (ENA_COMMON_SPEC_VERSION_MINOR)) + + #define ENA_CTRL_MAJOR 0 + #define ENA_CTRL_MINOR 0 +@@ -1400,11 +1397,6 @@ int ena_com_validate_version(struct ena_ + ENA_REGS_VERSION_MAJOR_VERSION_SHIFT, + ver & ENA_REGS_VERSION_MINOR_VERSION_MASK); + +- if (ver < MIN_ENA_VER) { +- pr_err("ENA version is lower than the minimal version the driver supports\n"); +- return -1; +- } +- + pr_info("ena controller version: %d.%d.%d implementation version %d\n", + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >> + ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT, +@@ -2441,6 +2433,10 @@ int ena_com_allocate_host_info(struct en + if (unlikely(!host_attr->host_info)) + return -ENOMEM; + ++ host_attr->host_info->ena_spec_version = ++ ((ENA_COMMON_SPEC_VERSION_MAJOR << ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) | ++ (ENA_COMMON_SPEC_VERSION_MINOR)); ++ + return 0; + } + +Index: linux/drivers/net/ethernet/amazon/ena/ena_common_defs.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_common_defs.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_common_defs.h +@@ -32,8 +32,8 @@ + #ifndef _ENA_COMMON_H_ + #define _ENA_COMMON_H_ + +-#define ENA_COMMON_SPEC_VERSION_MAJOR 0 /* */ +-#define ENA_COMMON_SPEC_VERSION_MINOR 10 /* */ ++#define ENA_COMMON_SPEC_VERSION_MAJOR 2 ++#define ENA_COMMON_SPEC_VERSION_MINOR 0 + + /* ENA operates with 48-bit memory addresses. ena_mem_addr_t */ + struct ena_common_mem_addr { +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -2206,7 +2206,8 @@ static u16 ena_select_queue(struct net_d + return qid; + } + +-static void ena_config_host_info(struct ena_com_dev *ena_dev) ++static void ena_config_host_info(struct ena_com_dev *ena_dev, ++ struct pci_dev *pdev) + { + struct ena_admin_host_info *host_info; + int rc; +@@ -2220,6 +2221,7 @@ static void ena_config_host_info(struct + + host_info = ena_dev->host_attr.host_info; + ++ host_info->bdf = (pdev->bus->number << 8) | pdev->devfn; + host_info->os_type = ENA_ADMIN_OS_LINUX; + host_info->kernel_ver = LINUX_VERSION_CODE; + strlcpy(host_info->kernel_ver_str, utsname()->version, +@@ -2230,7 +2232,9 @@ static void ena_config_host_info(struct + host_info->driver_version = + (DRV_MODULE_VER_MAJOR) | + (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | +- (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); ++ (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) | ++ ("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT); ++ host_info->num_cpus = num_online_cpus(); + + rc = ena_com_set_host_attributes(ena_dev); + if (rc) { +@@ -2454,7 +2458,7 @@ static int ena_device_init(struct ena_co + */ + ena_com_set_admin_polling_mode(ena_dev, true); + +- ena_config_host_info(ena_dev); ++ ena_config_host_info(ena_dev, pdev); + + /* Get Device Attributes*/ + rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); diff --git a/debian/patches/features/all/ena/0004-net-ena-introduce-Low-Latency-Queues-data-structures.patch b/debian/patches/features/all/ena/0004-net-ena-introduce-Low-Latency-Queues-data-structures.patch new file mode 100644 index 000000000..51e655bab --- /dev/null +++ b/debian/patches/features/all/ena/0004-net-ena-introduce-Low-Latency-Queues-data-structures.patch @@ -0,0 +1,271 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Thu, 11 Oct 2018 11:26:18 +0300 +Subject: [PATCH 04/19] net: ena: introduce Low Latency Queues data structures + according to ENA spec +Origin: https://git.kernel.org/linus/a7982b8ec947052df6d4467b3a81571f02f528e0 + +Low Latency Queues(LLQ) allow usage of device's memory for descriptors +and headers. Such queues decrease processing time since data is already +located on the device when driver rings the doorbell. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + .../net/ethernet/amazon/ena/ena_admin_defs.h | 90 ++++++++++++++++++- + drivers/net/ethernet/amazon/ena/ena_com.h | 38 ++++++++ + drivers/net/ethernet/amazon/ena/ena_netdev.c | 6 +- + 3 files changed, 128 insertions(+), 6 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_admin_defs.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +@@ -74,6 +74,8 @@ enum ena_admin_aq_feature_id { + + ENA_ADMIN_HW_HINTS = 3, + ++ ENA_ADMIN_LLQ = 4, ++ + ENA_ADMIN_RSS_HASH_FUNCTION = 10, + + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11, +@@ -485,8 +487,85 @@ struct ena_admin_device_attr_feature_des + u32 max_mtu; + }; + ++enum ena_admin_llq_header_location { ++ /* header is in descriptor list */ ++ ENA_ADMIN_INLINE_HEADER = 1, ++ /* header in a separate ring, implies 16B descriptor list entry */ ++ ENA_ADMIN_HEADER_RING = 2, ++}; ++ ++enum ena_admin_llq_ring_entry_size { ++ ENA_ADMIN_LIST_ENTRY_SIZE_128B = 1, ++ ENA_ADMIN_LIST_ENTRY_SIZE_192B = 2, ++ ENA_ADMIN_LIST_ENTRY_SIZE_256B = 4, ++}; ++ ++enum ena_admin_llq_num_descs_before_header { ++ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_0 = 0, ++ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1 = 1, ++ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2 = 2, ++ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4 = 4, ++ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8 = 8, ++}; ++ ++/* packet descriptor list entry always starts with one or more descriptors, ++ * followed by a header. The rest of the descriptors are located in the ++ * beginning of the subsequent entry. Stride refers to how the rest of the ++ * descriptors are placed. This field is relevant only for inline header ++ * mode ++ */ ++enum ena_admin_llq_stride_ctrl { ++ ENA_ADMIN_SINGLE_DESC_PER_ENTRY = 1, ++ ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY = 2, ++}; ++ ++struct ena_admin_feature_llq_desc { ++ u32 max_llq_num; ++ ++ u32 max_llq_depth; ++ ++ /* specify the header locations the device supports. bitfield of ++ * enum ena_admin_llq_header_location. ++ */ ++ u16 header_location_ctrl_supported; ++ ++ /* the header location the driver selected to use. */ ++ u16 header_location_ctrl_enabled; ++ ++ /* if inline header is specified - this is the size of descriptor ++ * list entry. If header in a separate ring is specified - this is ++ * the size of header ring entry. bitfield of enum ++ * ena_admin_llq_ring_entry_size. specify the entry sizes the device ++ * supports ++ */ ++ u16 entry_size_ctrl_supported; ++ ++ /* the entry size the driver selected to use. */ ++ u16 entry_size_ctrl_enabled; ++ ++ /* valid only if inline header is specified. First entry associated ++ * with the packet includes descriptors and header. Rest of the ++ * entries occupied by descriptors. This parameter defines the max ++ * number of descriptors precedding the header in the first entry. ++ * The field is bitfield of enum ++ * ena_admin_llq_num_descs_before_header and specify the values the ++ * device supports ++ */ ++ u16 desc_num_before_header_supported; ++ ++ /* the desire field the driver selected to use */ ++ u16 desc_num_before_header_enabled; ++ ++ /* valid only if inline was chosen. bitfield of enum ++ * ena_admin_llq_stride_ctrl ++ */ ++ u16 descriptors_stride_ctrl_supported; ++ ++ /* the stride control the driver selected to use */ ++ u16 descriptors_stride_ctrl_enabled; ++}; ++ + struct ena_admin_queue_feature_desc { +- /* including LLQs */ + u32 max_sq_num; + + u32 max_sq_depth; +@@ -495,9 +574,9 @@ struct ena_admin_queue_feature_desc { + + u32 max_cq_depth; + +- u32 max_llq_num; ++ u32 max_legacy_llq_num; + +- u32 max_llq_depth; ++ u32 max_legacy_llq_depth; + + u32 max_header_size; + +@@ -822,6 +901,8 @@ struct ena_admin_get_feat_resp { + + struct ena_admin_device_attr_feature_desc dev_attr; + ++ struct ena_admin_feature_llq_desc llq; ++ + struct ena_admin_queue_feature_desc max_queue; + + struct ena_admin_feature_aenq_desc aenq; +@@ -869,6 +950,9 @@ struct ena_admin_set_feat_cmd { + + /* rss indirection table */ + struct ena_admin_feature_rss_ind_table ind_table; ++ ++ /* LLQ configuration */ ++ struct ena_admin_feature_llq_desc llq; + } u; + }; + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.h +@@ -108,6 +108,14 @@ enum ena_intr_moder_level { + ENA_INTR_MAX_NUM_OF_LEVELS, + }; + ++struct ena_llq_configurations { ++ enum ena_admin_llq_header_location llq_header_location; ++ enum ena_admin_llq_ring_entry_size llq_ring_entry_size; ++ enum ena_admin_llq_stride_ctrl llq_stride_ctrl; ++ enum ena_admin_llq_num_descs_before_header llq_num_decs_before_header; ++ u16 llq_ring_entry_size_value; ++}; ++ + struct ena_intr_moder_entry { + unsigned int intr_moder_interval; + unsigned int pkts_per_interval; +@@ -142,6 +150,15 @@ struct ena_com_tx_meta { + u16 l4_hdr_len; /* In words */ + }; + ++struct ena_com_llq_info { ++ u16 header_location_ctrl; ++ u16 desc_stride_ctrl; ++ u16 desc_list_entry_size_ctrl; ++ u16 desc_list_entry_size; ++ u16 descs_num_before_header; ++ u16 descs_per_entry; ++}; ++ + struct ena_com_io_cq { + struct ena_com_io_desc_addr cdesc_addr; + +@@ -179,6 +196,20 @@ struct ena_com_io_cq { + + } ____cacheline_aligned; + ++struct ena_com_io_bounce_buffer_control { ++ u8 *base_buffer; ++ u16 next_to_use; ++ u16 buffer_size; ++ u16 buffers_num; /* Must be a power of 2 */ ++}; ++ ++/* This struct is to keep tracking the current location of the next llq entry */ ++struct ena_com_llq_pkt_ctrl { ++ u8 *curr_bounce_buf; ++ u16 idx; ++ u16 descs_left_in_line; ++}; ++ + struct ena_com_io_sq { + struct ena_com_io_desc_addr desc_addr; + +@@ -190,6 +221,9 @@ struct ena_com_io_sq { + + u32 msix_vector; + struct ena_com_tx_meta cached_tx_meta; ++ struct ena_com_llq_info llq_info; ++ struct ena_com_llq_pkt_ctrl llq_buf_ctrl; ++ struct ena_com_io_bounce_buffer_control bounce_buf_ctrl; + + u16 q_depth; + u16 qid; +@@ -197,6 +231,7 @@ struct ena_com_io_sq { + u16 idx; + u16 tail; + u16 next_to_comp; ++ u16 llq_last_copy_tail; + u32 tx_max_header_size; + u8 phase; + u8 desc_entry_size; +@@ -334,6 +369,8 @@ struct ena_com_dev { + u16 intr_delay_resolution; + u32 intr_moder_tx_interval; + struct ena_intr_moder_entry *intr_moder_tbl; ++ ++ struct ena_com_llq_info llq_info; + }; + + struct ena_com_dev_get_features_ctx { +@@ -342,6 +379,7 @@ struct ena_com_dev_get_features_ctx { + struct ena_admin_feature_aenq_desc aenq; + struct ena_admin_feature_offload_desc offload; + struct ena_admin_ena_hw_hints hw_hints; ++ struct ena_admin_feature_llq_desc llq; + }; + + struct ena_com_create_io_ctx { +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -2959,7 +2959,7 @@ static int ena_calc_io_queue_num(struct + + /* In case of LLQ use the llq number in the get feature cmd */ + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { +- io_sq_num = get_feat_ctx->max_queues.max_llq_num; ++ io_sq_num = get_feat_ctx->max_queues.max_legacy_llq_num; + + if (io_sq_num == 0) { + dev_err(&pdev->dev, +@@ -2995,7 +2995,7 @@ static void ena_set_push_mode(struct pci + has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR); + + /* Enable push mode if device supports LLQ */ +- if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0)) ++ if (has_mem_bar && get_feat_ctx->max_queues.max_legacy_llq_num > 0) + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV; + else + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; +@@ -3131,7 +3131,7 @@ static int ena_calc_queue_size(struct pc + + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + queue_size = min_t(u32, queue_size, +- get_feat_ctx->max_queues.max_llq_depth); ++ get_feat_ctx->max_queues.max_legacy_llq_depth); + + queue_size = rounddown_pow_of_two(queue_size); + diff --git a/debian/patches/features/all/ena/0005-net-ena-add-functions-for-handling-Low-Latency-Queue.patch b/debian/patches/features/all/ena/0005-net-ena-add-functions-for-handling-Low-Latency-Queue.patch new file mode 100644 index 000000000..375541b10 --- /dev/null +++ b/debian/patches/features/all/ena/0005-net-ena-add-functions-for-handling-Low-Latency-Queue.patch @@ -0,0 +1,832 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Thu, 11 Oct 2018 11:26:19 +0300 +Subject: [PATCH 05/19] net: ena: add functions for handling Low Latency Queues + in ena_com +Origin: https://git.kernel.org/linus/689b2bdaaa1480ad2c14bdc4c6eaf38284549022 + +This patch introduces APIs for detection, initialization, configuration +and actual usage of low latency queues(LLQ). It extends transmit API with +creation of LLQ descriptors in device memory (which include host buffers +descriptors as well as packet header) + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 249 +++++++++++++++++- + drivers/net/ethernet/amazon/ena/ena_com.h | 28 ++ + drivers/net/ethernet/amazon/ena/ena_eth_com.c | 231 ++++++++++++---- + drivers/net/ethernet/amazon/ena/ena_eth_com.h | 25 +- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 21 +- + 5 files changed, 474 insertions(+), 80 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -58,6 +58,8 @@ + + #define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF + ++#define ENA_COM_BOUNCE_BUFFER_CNTRL_CNT 4 ++ + #define ENA_REGS_ADMIN_INTR_MASK 1 + + #define ENA_POLL_MS 5 +@@ -352,21 +354,48 @@ static int ena_com_init_io_sq(struct ena + &io_sq->desc_addr.phys_addr, + GFP_KERNEL); + } +- } else { ++ ++ if (!io_sq->desc_addr.virt_addr) { ++ pr_err("memory allocation failed"); ++ return -ENOMEM; ++ } ++ } ++ ++ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { ++ /* Allocate bounce buffers */ ++ io_sq->bounce_buf_ctrl.buffer_size = ++ ena_dev->llq_info.desc_list_entry_size; ++ io_sq->bounce_buf_ctrl.buffers_num = ++ ENA_COM_BOUNCE_BUFFER_CNTRL_CNT; ++ io_sq->bounce_buf_ctrl.next_to_use = 0; ++ ++ size = io_sq->bounce_buf_ctrl.buffer_size * ++ io_sq->bounce_buf_ctrl.buffers_num; ++ + dev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); +- io_sq->desc_addr.virt_addr = ++ io_sq->bounce_buf_ctrl.base_buffer = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + set_dev_node(ena_dev->dmadev, dev_node); +- if (!io_sq->desc_addr.virt_addr) { +- io_sq->desc_addr.virt_addr = ++ if (!io_sq->bounce_buf_ctrl.base_buffer) ++ io_sq->bounce_buf_ctrl.base_buffer = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); ++ ++ if (!io_sq->bounce_buf_ctrl.base_buffer) { ++ pr_err("bounce buffer memory allocation failed"); ++ return -ENOMEM; + } +- } + +- if (!io_sq->desc_addr.virt_addr) { +- pr_err("memory allocation failed"); +- return -ENOMEM; ++ memcpy(&io_sq->llq_info, &ena_dev->llq_info, ++ sizeof(io_sq->llq_info)); ++ ++ /* Initiate the first bounce buffer */ ++ io_sq->llq_buf_ctrl.curr_bounce_buf = ++ ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl); ++ memset(io_sq->llq_buf_ctrl.curr_bounce_buf, ++ 0x0, io_sq->llq_info.desc_list_entry_size); ++ io_sq->llq_buf_ctrl.descs_left_in_line = ++ io_sq->llq_info.descs_num_before_header; + } + + io_sq->tail = 0; +@@ -554,6 +583,156 @@ err: + return ret; + } + ++/** ++ * Set the LLQ configurations of the firmware ++ * ++ * The driver provides only the enabled feature values to the device, ++ * which in turn, checks if they are supported. ++ */ ++static int ena_com_set_llq(struct ena_com_dev *ena_dev) ++{ ++ struct ena_com_admin_queue *admin_queue; ++ struct ena_admin_set_feat_cmd cmd; ++ struct ena_admin_set_feat_resp resp; ++ struct ena_com_llq_info *llq_info = &ena_dev->llq_info; ++ int ret; ++ ++ memset(&cmd, 0x0, sizeof(cmd)); ++ admin_queue = &ena_dev->admin_queue; ++ ++ cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; ++ cmd.feat_common.feature_id = ENA_ADMIN_LLQ; ++ ++ cmd.u.llq.header_location_ctrl_enabled = llq_info->header_location_ctrl; ++ cmd.u.llq.entry_size_ctrl_enabled = llq_info->desc_list_entry_size_ctrl; ++ cmd.u.llq.desc_num_before_header_enabled = llq_info->descs_num_before_header; ++ cmd.u.llq.descriptors_stride_ctrl_enabled = llq_info->desc_stride_ctrl; ++ ++ ret = ena_com_execute_admin_command(admin_queue, ++ (struct ena_admin_aq_entry *)&cmd, ++ sizeof(cmd), ++ (struct ena_admin_acq_entry *)&resp, ++ sizeof(resp)); ++ ++ if (unlikely(ret)) ++ pr_err("Failed to set LLQ configurations: %d\n", ret); ++ ++ return ret; ++} ++ ++static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, ++ struct ena_admin_feature_llq_desc *llq_features, ++ struct ena_llq_configurations *llq_default_cfg) ++{ ++ struct ena_com_llq_info *llq_info = &ena_dev->llq_info; ++ u16 supported_feat; ++ int rc; ++ ++ memset(llq_info, 0, sizeof(*llq_info)); ++ ++ supported_feat = llq_features->header_location_ctrl_supported; ++ ++ if (likely(supported_feat & llq_default_cfg->llq_header_location)) { ++ llq_info->header_location_ctrl = ++ llq_default_cfg->llq_header_location; ++ } else { ++ pr_err("Invalid header location control, supported: 0x%x\n", ++ supported_feat); ++ return -EINVAL; ++ } ++ ++ if (likely(llq_info->header_location_ctrl == ENA_ADMIN_INLINE_HEADER)) { ++ supported_feat = llq_features->descriptors_stride_ctrl_supported; ++ if (likely(supported_feat & llq_default_cfg->llq_stride_ctrl)) { ++ llq_info->desc_stride_ctrl = llq_default_cfg->llq_stride_ctrl; ++ } else { ++ if (supported_feat & ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY) { ++ llq_info->desc_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; ++ } else if (supported_feat & ENA_ADMIN_SINGLE_DESC_PER_ENTRY) { ++ llq_info->desc_stride_ctrl = ENA_ADMIN_SINGLE_DESC_PER_ENTRY; ++ } else { ++ pr_err("Invalid desc_stride_ctrl, supported: 0x%x\n", ++ supported_feat); ++ return -EINVAL; ++ } ++ ++ pr_err("Default llq stride ctrl is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n", ++ llq_default_cfg->llq_stride_ctrl, supported_feat, ++ llq_info->desc_stride_ctrl); ++ } ++ } else { ++ llq_info->desc_stride_ctrl = 0; ++ } ++ ++ supported_feat = llq_features->entry_size_ctrl_supported; ++ if (likely(supported_feat & llq_default_cfg->llq_ring_entry_size)) { ++ llq_info->desc_list_entry_size_ctrl = llq_default_cfg->llq_ring_entry_size; ++ llq_info->desc_list_entry_size = llq_default_cfg->llq_ring_entry_size_value; ++ } else { ++ if (supported_feat & ENA_ADMIN_LIST_ENTRY_SIZE_128B) { ++ llq_info->desc_list_entry_size_ctrl = ENA_ADMIN_LIST_ENTRY_SIZE_128B; ++ llq_info->desc_list_entry_size = 128; ++ } else if (supported_feat & ENA_ADMIN_LIST_ENTRY_SIZE_192B) { ++ llq_info->desc_list_entry_size_ctrl = ENA_ADMIN_LIST_ENTRY_SIZE_192B; ++ llq_info->desc_list_entry_size = 192; ++ } else if (supported_feat & ENA_ADMIN_LIST_ENTRY_SIZE_256B) { ++ llq_info->desc_list_entry_size_ctrl = ENA_ADMIN_LIST_ENTRY_SIZE_256B; ++ llq_info->desc_list_entry_size = 256; ++ } else { ++ pr_err("Invalid entry_size_ctrl, supported: 0x%x\n", ++ supported_feat); ++ return -EINVAL; ++ } ++ ++ pr_err("Default llq ring entry size is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n", ++ llq_default_cfg->llq_ring_entry_size, supported_feat, ++ llq_info->desc_list_entry_size); ++ } ++ if (unlikely(llq_info->desc_list_entry_size & 0x7)) { ++ /* The desc list entry size should be whole multiply of 8 ++ * This requirement comes from __iowrite64_copy() ++ */ ++ pr_err("illegal entry size %d\n", ++ llq_info->desc_list_entry_size); ++ return -EINVAL; ++ } ++ ++ if (llq_info->desc_stride_ctrl == ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY) ++ llq_info->descs_per_entry = llq_info->desc_list_entry_size / ++ sizeof(struct ena_eth_io_tx_desc); ++ else ++ llq_info->descs_per_entry = 1; ++ ++ supported_feat = llq_features->desc_num_before_header_supported; ++ if (likely(supported_feat & llq_default_cfg->llq_num_decs_before_header)) { ++ llq_info->descs_num_before_header = llq_default_cfg->llq_num_decs_before_header; ++ } else { ++ if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2) { ++ llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; ++ } else if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1) { ++ llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1; ++ } else if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4) { ++ llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4; ++ } else if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8) { ++ llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8; ++ } else { ++ pr_err("Invalid descs_num_before_header, supported: 0x%x\n", ++ supported_feat); ++ return -EINVAL; ++ } ++ ++ pr_err("Default llq num descs before header is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n", ++ llq_default_cfg->llq_num_decs_before_header, ++ supported_feat, llq_info->descs_num_before_header); ++ } ++ ++ rc = ena_com_set_llq(ena_dev); ++ if (rc) ++ pr_err("Cannot set LLQ configuration: %d\n", rc); ++ ++ return 0; ++} ++ + static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) + { +@@ -725,15 +904,17 @@ static void ena_com_io_queue_free(struct + if (io_sq->desc_addr.virt_addr) { + size = io_sq->desc_entry_size * io_sq->q_depth; + +- if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) +- dma_free_coherent(ena_dev->dmadev, size, +- io_sq->desc_addr.virt_addr, +- io_sq->desc_addr.phys_addr); +- else +- devm_kfree(ena_dev->dmadev, io_sq->desc_addr.virt_addr); ++ dma_free_coherent(ena_dev->dmadev, size, ++ io_sq->desc_addr.virt_addr, ++ io_sq->desc_addr.phys_addr); + + io_sq->desc_addr.virt_addr = NULL; + } ++ ++ if (io_sq->bounce_buf_ctrl.base_buffer) { ++ devm_kfree(ena_dev->dmadev, io_sq->bounce_buf_ctrl.base_buffer); ++ io_sq->bounce_buf_ctrl.base_buffer = NULL; ++ } + } + + static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout, +@@ -1740,6 +1921,15 @@ int ena_com_get_dev_attr_feat(struct ena + else + return rc; + ++ rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ); ++ if (!rc) ++ memcpy(&get_feat_ctx->llq, &get_resp.u.llq, ++ sizeof(get_resp.u.llq)); ++ else if (rc == -EOPNOTSUPP) ++ memset(&get_feat_ctx->llq, 0x0, sizeof(get_feat_ctx->llq)); ++ else ++ return rc; ++ + return 0; + } + +@@ -2708,3 +2898,34 @@ void ena_com_get_intr_moderation_entry(s + intr_moder_tbl[level].pkts_per_interval; + entry->bytes_per_interval = intr_moder_tbl[level].bytes_per_interval; + } ++ ++int ena_com_config_dev_mode(struct ena_com_dev *ena_dev, ++ struct ena_admin_feature_llq_desc *llq_features, ++ struct ena_llq_configurations *llq_default_cfg) ++{ ++ int rc; ++ int size; ++ ++ if (!llq_features->max_llq_num) { ++ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; ++ return 0; ++ } ++ ++ rc = ena_com_config_llq_info(ena_dev, llq_features, llq_default_cfg); ++ if (rc) ++ return rc; ++ ++ /* Validate the descriptor is not too big */ ++ size = ena_dev->tx_max_header_size; ++ size += ena_dev->llq_info.descs_num_before_header * ++ sizeof(struct ena_eth_io_tx_desc); ++ ++ if (unlikely(ena_dev->llq_info.desc_list_entry_size < size)) { ++ pr_err("the size of the LLQ entry is smaller than needed\n"); ++ return -EINVAL; ++ } ++ ++ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV; ++ ++ return 0; ++} +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.h +@@ -37,6 +37,7 @@ + #include <linux/delay.h> + #include <linux/dma-mapping.h> + #include <linux/gfp.h> ++#include <linux/io.h> + #include <linux/sched.h> + #include <linux/sizes.h> + #include <linux/spinlock.h> +@@ -973,6 +974,16 @@ void ena_com_get_intr_moderation_entry(s + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry); + ++/* ena_com_config_dev_mode - Configure the placement policy of the device. ++ * @ena_dev: ENA communication layer struct ++ * @llq_features: LLQ feature descriptor, retrieve via ++ * ena_com_get_dev_attr_feat. ++ * @ena_llq_config: The default driver LLQ parameters configurations ++ */ ++int ena_com_config_dev_mode(struct ena_com_dev *ena_dev, ++ struct ena_admin_feature_llq_desc *llq_features, ++ struct ena_llq_configurations *llq_default_config); ++ + static inline bool ena_com_get_adaptive_moderation_enabled(struct ena_com_dev *ena_dev) + { + return ena_dev->adaptive_coalescing; +@@ -1082,4 +1093,21 @@ static inline void ena_com_update_intr_r + intr_reg->intr_control |= ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK; + } + ++static inline u8 *ena_com_get_next_bounce_buffer(struct ena_com_io_bounce_buffer_control *bounce_buf_ctrl) ++{ ++ u16 size, buffers_num; ++ u8 *buf; ++ ++ size = bounce_buf_ctrl->buffer_size; ++ buffers_num = bounce_buf_ctrl->buffers_num; ++ ++ buf = bounce_buf_ctrl->base_buffer + ++ (bounce_buf_ctrl->next_to_use++ & (buffers_num - 1)) * size; ++ ++ prefetchw(bounce_buf_ctrl->base_buffer + ++ (bounce_buf_ctrl->next_to_use & (buffers_num - 1)) * size); ++ ++ return buf; ++} ++ + #endif /* !(ENA_COM) */ +Index: linux/drivers/net/ethernet/amazon/ena/ena_eth_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_eth_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_eth_com.c +@@ -59,7 +59,7 @@ static inline struct ena_eth_io_rx_cdesc + return cdesc; + } + +-static inline void *get_sq_desc(struct ena_com_io_sq *io_sq) ++static inline void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq) + { + u16 tail_masked; + u32 offset; +@@ -71,45 +71,159 @@ static inline void *get_sq_desc(struct e + return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset); + } + +-static inline void ena_com_copy_curr_sq_desc_to_dev(struct ena_com_io_sq *io_sq) ++static inline int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq, ++ u8 *bounce_buffer) + { +- u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1); +- u32 offset = tail_masked * io_sq->desc_entry_size; ++ struct ena_com_llq_info *llq_info = &io_sq->llq_info; + +- /* In case this queue isn't a LLQ */ +- if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) +- return; ++ u16 dst_tail_mask; ++ u32 dst_offset; + +- memcpy_toio(io_sq->desc_addr.pbuf_dev_addr + offset, +- io_sq->desc_addr.virt_addr + offset, +- io_sq->desc_entry_size); +-} ++ dst_tail_mask = io_sq->tail & (io_sq->q_depth - 1); ++ dst_offset = dst_tail_mask * llq_info->desc_list_entry_size; ++ ++ /* Make sure everything was written into the bounce buffer before ++ * writing the bounce buffer to the device ++ */ ++ wmb(); ++ ++ /* The line is completed. Copy it to dev */ ++ __iowrite64_copy(io_sq->desc_addr.pbuf_dev_addr + dst_offset, ++ bounce_buffer, (llq_info->desc_list_entry_size) / 8); + +-static inline void ena_com_sq_update_tail(struct ena_com_io_sq *io_sq) +-{ + io_sq->tail++; + + /* Switch phase bit in case of wrap around */ + if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0)) + io_sq->phase ^= 1; ++ ++ return 0; + } + +-static inline int ena_com_write_header(struct ena_com_io_sq *io_sq, +- u8 *head_src, u16 header_len) ++static inline int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq, ++ u8 *header_src, ++ u16 header_len) ++{ ++ struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; ++ struct ena_com_llq_info *llq_info = &io_sq->llq_info; ++ u8 *bounce_buffer = pkt_ctrl->curr_bounce_buf; ++ u16 header_offset; ++ ++ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)) ++ return 0; ++ ++ header_offset = ++ llq_info->descs_num_before_header * io_sq->desc_entry_size; ++ ++ if (unlikely((header_offset + header_len) > ++ llq_info->desc_list_entry_size)) { ++ pr_err("trying to write header larger than llq entry can accommodate\n"); ++ return -EFAULT; ++ } ++ ++ if (unlikely(!bounce_buffer)) { ++ pr_err("bounce buffer is NULL\n"); ++ return -EFAULT; ++ } ++ ++ memcpy(bounce_buffer + header_offset, header_src, header_len); ++ ++ return 0; ++} ++ ++static inline void *get_sq_desc_llq(struct ena_com_io_sq *io_sq) ++{ ++ struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; ++ u8 *bounce_buffer; ++ void *sq_desc; ++ ++ bounce_buffer = pkt_ctrl->curr_bounce_buf; ++ ++ if (unlikely(!bounce_buffer)) { ++ pr_err("bounce buffer is NULL\n"); ++ return NULL; ++ } ++ ++ sq_desc = bounce_buffer + pkt_ctrl->idx * io_sq->desc_entry_size; ++ pkt_ctrl->idx++; ++ pkt_ctrl->descs_left_in_line--; ++ ++ return sq_desc; ++} ++ ++static inline int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq) + { +- u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1); +- u8 __iomem *dev_head_addr = +- io_sq->header_addr + (tail_masked * io_sq->tx_max_header_size); ++ struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; ++ struct ena_com_llq_info *llq_info = &io_sq->llq_info; ++ int rc; + +- if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) ++ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)) + return 0; + +- if (unlikely(!io_sq->header_addr)) { +- pr_err("Push buffer header ptr is NULL\n"); +- return -EINVAL; ++ /* bounce buffer was used, so write it and get a new one */ ++ if (pkt_ctrl->idx) { ++ rc = ena_com_write_bounce_buffer_to_dev(io_sq, ++ pkt_ctrl->curr_bounce_buf); ++ if (unlikely(rc)) ++ return rc; ++ ++ pkt_ctrl->curr_bounce_buf = ++ ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl); ++ memset(io_sq->llq_buf_ctrl.curr_bounce_buf, ++ 0x0, llq_info->desc_list_entry_size); + } + +- memcpy_toio(dev_head_addr, head_src, header_len); ++ pkt_ctrl->idx = 0; ++ pkt_ctrl->descs_left_in_line = llq_info->descs_num_before_header; ++ return 0; ++} ++ ++static inline void *get_sq_desc(struct ena_com_io_sq *io_sq) ++{ ++ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ++ return get_sq_desc_llq(io_sq); ++ ++ return get_sq_desc_regular_queue(io_sq); ++} ++ ++static inline int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq) ++{ ++ struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; ++ struct ena_com_llq_info *llq_info = &io_sq->llq_info; ++ int rc; ++ ++ if (!pkt_ctrl->descs_left_in_line) { ++ rc = ena_com_write_bounce_buffer_to_dev(io_sq, ++ pkt_ctrl->curr_bounce_buf); ++ if (unlikely(rc)) ++ return rc; ++ ++ pkt_ctrl->curr_bounce_buf = ++ ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl); ++ memset(io_sq->llq_buf_ctrl.curr_bounce_buf, ++ 0x0, llq_info->desc_list_entry_size); ++ ++ pkt_ctrl->idx = 0; ++ if (unlikely(llq_info->desc_stride_ctrl == ENA_ADMIN_SINGLE_DESC_PER_ENTRY)) ++ pkt_ctrl->descs_left_in_line = 1; ++ else ++ pkt_ctrl->descs_left_in_line = ++ llq_info->desc_list_entry_size / io_sq->desc_entry_size; ++ } ++ ++ return 0; ++} ++ ++static inline int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq) ++{ ++ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ++ return ena_com_sq_update_llq_tail(io_sq); ++ ++ io_sq->tail++; ++ ++ /* Switch phase bit in case of wrap around */ ++ if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0)) ++ io_sq->phase ^= 1; + + return 0; + } +@@ -177,8 +291,8 @@ static inline bool ena_com_meta_desc_cha + return false; + } + +-static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, +- struct ena_com_tx_ctx *ena_tx_ctx) ++static inline int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, ++ struct ena_com_tx_ctx *ena_tx_ctx) + { + struct ena_eth_io_tx_meta_desc *meta_desc = NULL; + struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; +@@ -223,8 +337,7 @@ static inline void ena_com_create_and_st + memcpy(&io_sq->cached_tx_meta, ena_meta, + sizeof(struct ena_com_tx_meta)); + +- ena_com_copy_curr_sq_desc_to_dev(io_sq); +- ena_com_sq_update_tail(io_sq); ++ return ena_com_sq_update_tail(io_sq); + } + + static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, +@@ -262,18 +375,19 @@ int ena_com_prepare_tx(struct ena_com_io + { + struct ena_eth_io_tx_desc *desc = NULL; + struct ena_com_buf *ena_bufs = ena_tx_ctx->ena_bufs; +- void *push_header = ena_tx_ctx->push_header; ++ void *buffer_to_push = ena_tx_ctx->push_header; + u16 header_len = ena_tx_ctx->header_len; + u16 num_bufs = ena_tx_ctx->num_bufs; +- int total_desc, i, rc; ++ u16 start_tail = io_sq->tail; ++ int i, rc; + bool have_meta; + u64 addr_hi; + + WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_TX, "wrong Q type"); + + /* num_bufs +1 for potential meta desc */ +- if (ena_com_sq_empty_space(io_sq) < (num_bufs + 1)) { +- pr_err("Not enough space in the tx queue\n"); ++ if (unlikely(!ena_com_sq_have_enough_space(io_sq, num_bufs + 1))) { ++ pr_debug("Not enough space in the tx queue\n"); + return -ENOMEM; + } + +@@ -283,23 +397,32 @@ int ena_com_prepare_tx(struct ena_com_io + return -EINVAL; + } + +- /* start with pushing the header (if needed) */ +- rc = ena_com_write_header(io_sq, push_header, header_len); ++ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && ++ !buffer_to_push)) ++ return -EINVAL; ++ ++ rc = ena_com_write_header_to_bounce(io_sq, buffer_to_push, header_len); + if (unlikely(rc)) + return rc; + + have_meta = ena_tx_ctx->meta_valid && ena_com_meta_desc_changed(io_sq, + ena_tx_ctx); +- if (have_meta) +- ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx); ++ if (have_meta) { ++ rc = ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx); ++ if (unlikely(rc)) ++ return rc; ++ } + +- /* If the caller doesn't want send packets */ ++ /* If the caller doesn't want to send packets */ + if (unlikely(!num_bufs && !header_len)) { +- *nb_hw_desc = have_meta ? 0 : 1; +- return 0; ++ rc = ena_com_close_bounce_buffer(io_sq); ++ *nb_hw_desc = io_sq->tail - start_tail; ++ return rc; + } + + desc = get_sq_desc(io_sq); ++ if (unlikely(!desc)) ++ return -EFAULT; + memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc)); + + /* Set first desc when we don't have meta descriptor */ +@@ -351,10 +474,14 @@ int ena_com_prepare_tx(struct ena_com_io + for (i = 0; i < num_bufs; i++) { + /* The first desc share the same desc as the header */ + if (likely(i != 0)) { +- ena_com_copy_curr_sq_desc_to_dev(io_sq); +- ena_com_sq_update_tail(io_sq); ++ rc = ena_com_sq_update_tail(io_sq); ++ if (unlikely(rc)) ++ return rc; + + desc = get_sq_desc(io_sq); ++ if (unlikely(!desc)) ++ return -EFAULT; ++ + memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc)); + + desc->len_ctrl |= (io_sq->phase << +@@ -377,15 +504,14 @@ int ena_com_prepare_tx(struct ena_com_io + /* set the last desc indicator */ + desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK; + +- ena_com_copy_curr_sq_desc_to_dev(io_sq); +- +- ena_com_sq_update_tail(io_sq); ++ rc = ena_com_sq_update_tail(io_sq); ++ if (unlikely(rc)) ++ return rc; + +- total_desc = max_t(u16, num_bufs, 1); +- total_desc += have_meta ? 1 : 0; ++ rc = ena_com_close_bounce_buffer(io_sq); + +- *nb_hw_desc = total_desc; +- return 0; ++ *nb_hw_desc = io_sq->tail - start_tail; ++ return rc; + } + + int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, +@@ -444,15 +570,18 @@ int ena_com_add_single_rx_desc(struct en + + WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type"); + +- if (unlikely(ena_com_sq_empty_space(io_sq) == 0)) ++ if (unlikely(!ena_com_sq_have_enough_space(io_sq, 1))) + return -ENOSPC; + + desc = get_sq_desc(io_sq); ++ if (unlikely(!desc)) ++ return -EFAULT; ++ + memset(desc, 0x0, sizeof(struct ena_eth_io_rx_desc)); + + desc->length = ena_buf->len; + +- desc->ctrl |= ENA_ETH_IO_RX_DESC_FIRST_MASK; ++ desc->ctrl = ENA_ETH_IO_RX_DESC_FIRST_MASK; + desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK; + desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK; + desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK; +@@ -463,9 +592,7 @@ int ena_com_add_single_rx_desc(struct en + desc->buff_addr_hi = + ((ena_buf->paddr & GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32); + +- ena_com_sq_update_tail(io_sq); +- +- return 0; ++ return ena_com_sq_update_tail(io_sq); + } + + bool ena_com_cq_empty(struct ena_com_io_cq *io_cq) +Index: linux/drivers/net/ethernet/amazon/ena/ena_eth_com.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_eth_com.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_eth_com.h +@@ -94,7 +94,7 @@ static inline void ena_com_unmask_intr(s + writel(intr_reg->intr_control, io_cq->unmask_reg); + } + +-static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq) ++static inline int ena_com_free_desc(struct ena_com_io_sq *io_sq) + { + u16 tail, next_to_comp, cnt; + +@@ -105,11 +105,28 @@ static inline int ena_com_sq_empty_space + return io_sq->q_depth - 1 - cnt; + } + +-static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) ++/* Check if the submission queue has enough space to hold required_buffers */ ++static inline bool ena_com_sq_have_enough_space(struct ena_com_io_sq *io_sq, ++ u16 required_buffers) + { +- u16 tail; ++ int temp; + +- tail = io_sq->tail; ++ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) ++ return ena_com_free_desc(io_sq) >= required_buffers; ++ ++ /* This calculation doesn't need to be 100% accurate. So to reduce ++ * the calculation overhead just Subtract 2 lines from the free descs ++ * (one for the header line and one to compensate the devision ++ * down calculation. ++ */ ++ temp = required_buffers / io_sq->llq_info.descs_per_entry + 2; ++ ++ return ena_com_free_desc(io_sq) > temp; ++} ++ ++static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) ++{ ++ u16 tail = io_sq->tail; + + pr_debug("write submission queue doorbell for queue: %d tail: %d\n", + io_sq->qid, tail); +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -804,12 +804,13 @@ static int ena_clean_tx_irq(struct ena_r + */ + smp_mb(); + +- above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > +- ENA_TX_WAKEUP_THRESH; ++ above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, ++ ENA_TX_WAKEUP_THRESH); + if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) { + __netif_tx_lock(txq, smp_processor_id()); +- above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > +- ENA_TX_WAKEUP_THRESH; ++ above_thresh = ++ ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, ++ ENA_TX_WAKEUP_THRESH); + if (netif_tx_queue_stopped(txq) && above_thresh) { + netif_tx_wake_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); +@@ -1101,7 +1102,7 @@ static int ena_clean_rx_irq(struct ena_r + + rx_ring->next_to_clean = next_to_clean; + +- refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq); ++ refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq); + refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER; + + /* Optimization, try to batch new rx buffers */ +@@ -2115,8 +2116,8 @@ static netdev_tx_t ena_start_xmit(struct + * to sgl_size + 2. one for the meta descriptor and one for header + * (if the header is larger than tx_max_header_size). + */ +- if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) < +- (tx_ring->sgl_size + 2))) { ++ if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, ++ tx_ring->sgl_size + 2))) { + netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n", + __func__, qid); + +@@ -2135,8 +2136,8 @@ static netdev_tx_t ena_start_xmit(struct + */ + smp_mb(); + +- if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq) +- > ENA_TX_WAKEUP_THRESH) { ++ if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, ++ ENA_TX_WAKEUP_THRESH)) { + netif_tx_wake_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_wakeup++; +@@ -2813,7 +2814,7 @@ static void check_for_empty_rx_ring(stru + rx_ring = &adapter->rx_ring[i]; + + refill_required = +- ena_com_sq_empty_space(rx_ring->ena_com_io_sq); ++ ena_com_free_desc(rx_ring->ena_com_io_sq); + if (unlikely(refill_required == (rx_ring->ring_size - 1))) { + rx_ring->empty_rx_queue++; + diff --git a/debian/patches/features/all/ena/0006-net-ena-add-functions-for-handling-Low-Latency-Queue.patch b/debian/patches/features/all/ena/0006-net-ena-add-functions-for-handling-Low-Latency-Queue.patch new file mode 100644 index 000000000..8f288529e --- /dev/null +++ b/debian/patches/features/all/ena/0006-net-ena-add-functions-for-handling-Low-Latency-Queue.patch @@ -0,0 +1,655 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Thu, 11 Oct 2018 11:26:20 +0300 +Subject: [PATCH 06/19] net: ena: add functions for handling Low Latency Queues + in ena_netdev +Origin: https://git.kernel.org/linus/38005ca816a7ef5516dc8e59ae95716739aa75b0 + +This patch includes all code changes necessary in ena_netdev to enable +packet sending via the LLQ placemnt mode. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 + + drivers/net/ethernet/amazon/ena/ena_netdev.c | 387 +++++++++++------- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 6 + + 3 files changed, 251 insertions(+), 143 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_ethtool.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +@@ -81,6 +81,7 @@ static const struct ena_stats ena_stats_ + ENA_STAT_TX_ENTRY(doorbells), + ENA_STAT_TX_ENTRY(prepare_ctx_err), + ENA_STAT_TX_ENTRY(bad_req_id), ++ ENA_STAT_TX_ENTRY(llq_buffer_copy), + ENA_STAT_TX_ENTRY(missed_tx), + }; + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -237,6 +237,17 @@ static int ena_setup_tx_resources(struct + } + } + ++ size = tx_ring->tx_max_header_size; ++ tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node); ++ if (!tx_ring->push_buf_intermediate_buf) { ++ tx_ring->push_buf_intermediate_buf = vzalloc(size); ++ if (!tx_ring->push_buf_intermediate_buf) { ++ vfree(tx_ring->tx_buffer_info); ++ vfree(tx_ring->free_tx_ids); ++ return -ENOMEM; ++ } ++ } ++ + /* Req id ring for TX out of order completions */ + for (i = 0; i < tx_ring->ring_size; i++) + tx_ring->free_tx_ids[i] = i; +@@ -265,6 +276,9 @@ static void ena_free_tx_resources(struct + + vfree(tx_ring->free_tx_ids); + tx_ring->free_tx_ids = NULL; ++ ++ vfree(tx_ring->push_buf_intermediate_buf); ++ tx_ring->push_buf_intermediate_buf = NULL; + } + + /* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues +@@ -602,6 +616,36 @@ static void ena_free_all_rx_bufs(struct + ena_free_rx_bufs(adapter, i); + } + ++static inline void ena_unmap_tx_skb(struct ena_ring *tx_ring, ++ struct ena_tx_buffer *tx_info) ++{ ++ struct ena_com_buf *ena_buf; ++ u32 cnt; ++ int i; ++ ++ ena_buf = tx_info->bufs; ++ cnt = tx_info->num_of_bufs; ++ ++ if (unlikely(!cnt)) ++ return; ++ ++ if (tx_info->map_linear_data) { ++ dma_unmap_single(tx_ring->dev, ++ dma_unmap_addr(ena_buf, paddr), ++ dma_unmap_len(ena_buf, len), ++ DMA_TO_DEVICE); ++ ena_buf++; ++ cnt--; ++ } ++ ++ /* unmap remaining mapped pages */ ++ for (i = 0; i < cnt; i++) { ++ dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), ++ dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); ++ ena_buf++; ++ } ++} ++ + /* ena_free_tx_bufs - Free Tx Buffers per Queue + * @tx_ring: TX ring for which buffers be freed + */ +@@ -612,9 +656,6 @@ static void ena_free_tx_bufs(struct ena_ + + for (i = 0; i < tx_ring->ring_size; i++) { + struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; +- struct ena_com_buf *ena_buf; +- int nr_frags; +- int j; + + if (!tx_info->skb) + continue; +@@ -630,21 +671,7 @@ static void ena_free_tx_bufs(struct ena_ + tx_ring->qid, i); + } + +- ena_buf = tx_info->bufs; +- dma_unmap_single(tx_ring->dev, +- ena_buf->paddr, +- ena_buf->len, +- DMA_TO_DEVICE); +- +- /* unmap remaining mapped pages */ +- nr_frags = tx_info->num_of_bufs - 1; +- for (j = 0; j < nr_frags; j++) { +- ena_buf++; +- dma_unmap_page(tx_ring->dev, +- ena_buf->paddr, +- ena_buf->len, +- DMA_TO_DEVICE); +- } ++ ena_unmap_tx_skb(tx_ring, tx_info); + + dev_kfree_skb_any(tx_info->skb); + } +@@ -735,8 +762,6 @@ static int ena_clean_tx_irq(struct ena_r + while (tx_pkts < budget) { + struct ena_tx_buffer *tx_info; + struct sk_buff *skb; +- struct ena_com_buf *ena_buf; +- int i, nr_frags; + + rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, + &req_id); +@@ -756,24 +781,7 @@ static int ena_clean_tx_irq(struct ena_r + tx_info->skb = NULL; + tx_info->last_jiffies = 0; + +- if (likely(tx_info->num_of_bufs != 0)) { +- ena_buf = tx_info->bufs; +- +- dma_unmap_single(tx_ring->dev, +- dma_unmap_addr(ena_buf, paddr), +- dma_unmap_len(ena_buf, len), +- DMA_TO_DEVICE); +- +- /* unmap remaining mapped pages */ +- nr_frags = tx_info->num_of_bufs - 1; +- for (i = 0; i < nr_frags; i++) { +- ena_buf++; +- dma_unmap_page(tx_ring->dev, +- dma_unmap_addr(ena_buf, paddr), +- dma_unmap_len(ena_buf, len), +- DMA_TO_DEVICE); +- } +- } ++ ena_unmap_tx_skb(tx_ring, tx_info); + + netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_poll: q %d skb %p completed\n", tx_ring->qid, +@@ -1300,7 +1308,6 @@ static int ena_enable_msix(struct ena_ad + + /* Reserved the max msix vectors we might need */ + msix_vecs = ENA_MAX_MSIX_VEC(num_queues); +- + netif_dbg(adapter, probe, adapter->netdev, + "trying to enable MSI-X, vectors %d\n", msix_vecs); + +@@ -1591,7 +1598,7 @@ static int ena_up_complete(struct ena_ad + + static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) + { +- struct ena_com_create_io_ctx ctx = { 0 }; ++ struct ena_com_create_io_ctx ctx; + struct ena_com_dev *ena_dev; + struct ena_ring *tx_ring; + u32 msix_vector; +@@ -1604,6 +1611,8 @@ static int ena_create_io_tx_queue(struct + msix_vector = ENA_IO_IRQ_IDX(qid); + ena_qid = ENA_IO_TXQ_IDX(qid); + ++ memset(&ctx, 0x0, sizeof(ctx)); ++ + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; + ctx.qid = ena_qid; + ctx.mem_queue_type = ena_dev->tx_mem_queue_type; +@@ -1657,7 +1666,7 @@ create_err: + static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) + { + struct ena_com_dev *ena_dev; +- struct ena_com_create_io_ctx ctx = { 0 }; ++ struct ena_com_create_io_ctx ctx; + struct ena_ring *rx_ring; + u32 msix_vector; + u16 ena_qid; +@@ -1669,6 +1678,8 @@ static int ena_create_io_rx_queue(struct + msix_vector = ENA_IO_IRQ_IDX(qid); + ena_qid = ENA_IO_RXQ_IDX(qid); + ++ memset(&ctx, 0x0, sizeof(ctx)); ++ + ctx.qid = ena_qid; + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; + ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; +@@ -1986,73 +1997,70 @@ static int ena_check_and_linearize_skb(s + return rc; + } + +-/* Called with netif_tx_lock. */ +-static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) ++static int ena_tx_map_skb(struct ena_ring *tx_ring, ++ struct ena_tx_buffer *tx_info, ++ struct sk_buff *skb, ++ void **push_hdr, ++ u16 *header_len) + { +- struct ena_adapter *adapter = netdev_priv(dev); +- struct ena_tx_buffer *tx_info; +- struct ena_com_tx_ctx ena_tx_ctx; +- struct ena_ring *tx_ring; +- struct netdev_queue *txq; ++ struct ena_adapter *adapter = tx_ring->adapter; + struct ena_com_buf *ena_buf; +- void *push_hdr; +- u32 len, last_frag; +- u16 next_to_use; +- u16 req_id; +- u16 push_len; +- u16 header_len; + dma_addr_t dma; +- int qid, rc, nb_hw_desc; +- int i = -1; +- +- netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); +- /* Determine which tx ring we will be placed on */ +- qid = skb_get_queue_mapping(skb); +- tx_ring = &adapter->tx_ring[qid]; +- txq = netdev_get_tx_queue(dev, qid); +- +- rc = ena_check_and_linearize_skb(tx_ring, skb); +- if (unlikely(rc)) +- goto error_drop_packet; +- +- skb_tx_timestamp(skb); +- len = skb_headlen(skb); ++ u32 skb_head_len, frag_len, last_frag; ++ u16 push_len = 0; ++ u16 delta = 0; ++ int i = 0; + +- next_to_use = tx_ring->next_to_use; +- req_id = tx_ring->free_tx_ids[next_to_use]; +- tx_info = &tx_ring->tx_buffer_info[req_id]; +- tx_info->num_of_bufs = 0; +- +- WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id); +- ena_buf = tx_info->bufs; ++ skb_head_len = skb_headlen(skb); + tx_info->skb = skb; ++ ena_buf = tx_info->bufs; + + if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { +- /* prepared the push buffer */ +- push_len = min_t(u32, len, tx_ring->tx_max_header_size); +- header_len = push_len; +- push_hdr = skb->data; ++ /* When the device is LLQ mode, the driver will copy ++ * the header into the device memory space. ++ * the ena_com layer assume the header is in a linear ++ * memory space. ++ * This assumption might be wrong since part of the header ++ * can be in the fragmented buffers. ++ * Use skb_header_pointer to make sure the header is in a ++ * linear memory space. ++ */ ++ ++ push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size); ++ *push_hdr = skb_header_pointer(skb, 0, push_len, ++ tx_ring->push_buf_intermediate_buf); ++ *header_len = push_len; ++ if (unlikely(skb->data != *push_hdr)) { ++ u64_stats_update_begin(&tx_ring->syncp); ++ tx_ring->tx_stats.llq_buffer_copy++; ++ u64_stats_update_end(&tx_ring->syncp); ++ ++ delta = push_len - skb_head_len; ++ } + } else { +- push_len = 0; +- header_len = min_t(u32, len, tx_ring->tx_max_header_size); +- push_hdr = NULL; ++ *push_hdr = NULL; ++ *header_len = min_t(u32, skb_head_len, ++ tx_ring->tx_max_header_size); + } + +- netif_dbg(adapter, tx_queued, dev, ++ netif_dbg(adapter, tx_queued, adapter->netdev, + "skb: %p header_buf->vaddr: %p push_len: %d\n", skb, +- push_hdr, push_len); ++ *push_hdr, push_len); + +- if (len > push_len) { ++ if (skb_head_len > push_len) { + dma = dma_map_single(tx_ring->dev, skb->data + push_len, +- len - push_len, DMA_TO_DEVICE); +- if (dma_mapping_error(tx_ring->dev, dma)) ++ skb_head_len - push_len, DMA_TO_DEVICE); ++ if (unlikely(dma_mapping_error(tx_ring->dev, dma))) + goto error_report_dma_error; + + ena_buf->paddr = dma; +- ena_buf->len = len - push_len; ++ ena_buf->len = skb_head_len - push_len; + + ena_buf++; + tx_info->num_of_bufs++; ++ tx_info->map_linear_data = 1; ++ } else { ++ tx_info->map_linear_data = 0; + } + + last_frag = skb_shinfo(skb)->nr_frags; +@@ -2060,18 +2068,75 @@ static netdev_tx_t ena_start_xmit(struct + for (i = 0; i < last_frag; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + +- len = skb_frag_size(frag); +- dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len, +- DMA_TO_DEVICE); +- if (dma_mapping_error(tx_ring->dev, dma)) ++ frag_len = skb_frag_size(frag); ++ ++ if (unlikely(delta >= frag_len)) { ++ delta -= frag_len; ++ continue; ++ } ++ ++ dma = skb_frag_dma_map(tx_ring->dev, frag, delta, ++ frag_len - delta, DMA_TO_DEVICE); ++ if (unlikely(dma_mapping_error(tx_ring->dev, dma))) + goto error_report_dma_error; + + ena_buf->paddr = dma; +- ena_buf->len = len; ++ ena_buf->len = frag_len - delta; + ena_buf++; ++ tx_info->num_of_bufs++; ++ delta = 0; + } + +- tx_info->num_of_bufs += last_frag; ++ return 0; ++ ++error_report_dma_error: ++ u64_stats_update_begin(&tx_ring->syncp); ++ tx_ring->tx_stats.dma_mapping_err++; ++ u64_stats_update_end(&tx_ring->syncp); ++ netdev_warn(adapter->netdev, "failed to map skb\n"); ++ ++ tx_info->skb = NULL; ++ ++ tx_info->num_of_bufs += i; ++ ena_unmap_tx_skb(tx_ring, tx_info); ++ ++ return -EINVAL; ++} ++ ++/* Called with netif_tx_lock. */ ++static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) ++{ ++ struct ena_adapter *adapter = netdev_priv(dev); ++ struct ena_tx_buffer *tx_info; ++ struct ena_com_tx_ctx ena_tx_ctx; ++ struct ena_ring *tx_ring; ++ struct netdev_queue *txq; ++ void *push_hdr; ++ u16 next_to_use, req_id, header_len; ++ int qid, rc, nb_hw_desc; ++ ++ netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); ++ /* Determine which tx ring we will be placed on */ ++ qid = skb_get_queue_mapping(skb); ++ tx_ring = &adapter->tx_ring[qid]; ++ txq = netdev_get_tx_queue(dev, qid); ++ ++ rc = ena_check_and_linearize_skb(tx_ring, skb); ++ if (unlikely(rc)) ++ goto error_drop_packet; ++ ++ skb_tx_timestamp(skb); ++ ++ next_to_use = tx_ring->next_to_use; ++ req_id = tx_ring->free_tx_ids[next_to_use]; ++ tx_info = &tx_ring->tx_buffer_info[req_id]; ++ tx_info->num_of_bufs = 0; ++ ++ WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id); ++ ++ rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len); ++ if (unlikely(rc)) ++ goto error_drop_packet; + + memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); + ena_tx_ctx.ena_bufs = tx_info->bufs; +@@ -2087,14 +2152,22 @@ static netdev_tx_t ena_start_xmit(struct + rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, + &nb_hw_desc); + ++ /* ena_com_prepare_tx() can't fail due to overflow of tx queue, ++ * since the number of free descriptors in the queue is checked ++ * after sending the previous packet. In case there isn't enough ++ * space in the queue for the next packet, it is stopped ++ * until there is again enough available space in the queue. ++ * All other failure reasons of ena_com_prepare_tx() are fatal ++ * and therefore require a device reset. ++ */ + if (unlikely(rc)) { + netif_err(adapter, tx_queued, dev, + "failed to prepare tx bufs\n"); + u64_stats_update_begin(&tx_ring->syncp); +- tx_ring->tx_stats.queue_stop++; + tx_ring->tx_stats.prepare_ctx_err++; + u64_stats_update_end(&tx_ring->syncp); +- netif_tx_stop_queue(txq); ++ adapter->reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE; ++ set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + goto error_unmap_dma; + } + +@@ -2157,35 +2230,11 @@ static netdev_tx_t ena_start_xmit(struct + + return NETDEV_TX_OK; + +-error_report_dma_error: +- u64_stats_update_begin(&tx_ring->syncp); +- tx_ring->tx_stats.dma_mapping_err++; +- u64_stats_update_end(&tx_ring->syncp); +- netdev_warn(adapter->netdev, "failed to map skb\n"); +- +- tx_info->skb = NULL; +- + error_unmap_dma: +- if (i >= 0) { +- /* save value of frag that failed */ +- last_frag = i; +- +- /* start back at beginning and unmap skb */ +- tx_info->skb = NULL; +- ena_buf = tx_info->bufs; +- dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), +- dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); +- +- /* unmap remaining mapped pages */ +- for (i = 0; i < last_frag; i++) { +- ena_buf++; +- dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), +- dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); +- } +- } ++ ena_unmap_tx_skb(tx_ring, tx_info); ++ tx_info->skb = NULL; + + error_drop_packet: +- + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } +@@ -2621,7 +2670,9 @@ static int ena_restore_device(struct ena + netif_carrier_on(adapter->netdev); + + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); +- dev_err(&pdev->dev, "Device reset completed successfully\n"); ++ dev_err(&pdev->dev, ++ "Device reset completed successfully, Driver info: %s\n", ++ version); + + return rc; + err_disable_msix: +@@ -2988,18 +3039,52 @@ static int ena_calc_io_queue_num(struct + return io_queue_num; + } + +-static void ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev, +- struct ena_com_dev_get_features_ctx *get_feat_ctx) ++static int ena_set_queues_placement_policy(struct pci_dev *pdev, ++ struct ena_com_dev *ena_dev, ++ struct ena_admin_feature_llq_desc *llq, ++ struct ena_llq_configurations *llq_default_configurations) + { + bool has_mem_bar; ++ int rc; ++ u32 llq_feature_mask; ++ ++ llq_feature_mask = 1 << ENA_ADMIN_LLQ; ++ if (!(ena_dev->supported_features & llq_feature_mask)) { ++ dev_err(&pdev->dev, ++ "LLQ is not supported Fallback to host mode policy.\n"); ++ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; ++ return 0; ++ } + + has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR); + +- /* Enable push mode if device supports LLQ */ +- if (has_mem_bar && get_feat_ctx->max_queues.max_legacy_llq_num > 0) +- ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV; +- else ++ rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); ++ if (unlikely(rc)) { ++ dev_err(&pdev->dev, ++ "Failed to configure the device mode. Fallback to host mode policy.\n"); ++ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; ++ return 0; ++ } ++ ++ /* Nothing to config, exit */ ++ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) ++ return 0; ++ ++ if (!has_mem_bar) { ++ dev_err(&pdev->dev, ++ "ENA device does not expose LLQ bar. Fallback to host mode policy.\n"); + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; ++ return 0; ++ } ++ ++ ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev, ++ pci_resource_start(pdev, ENA_MEM_BAR), ++ pci_resource_len(pdev, ENA_MEM_BAR)); ++ ++ if (!ena_dev->mem_bar) ++ return -EFAULT; ++ ++ return 0; + } + + static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat, +@@ -3117,6 +3202,15 @@ static void ena_release_bars(struct ena_ + pci_release_selected_regions(pdev, release_bars); + } + ++static inline void set_default_llq_configurations(struct ena_llq_configurations *llq_config) ++{ ++ llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; ++ llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B; ++ llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; ++ llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; ++ llq_config->llq_ring_entry_size_value = 128; ++} ++ + static int ena_calc_queue_size(struct pci_dev *pdev, + struct ena_com_dev *ena_dev, + u16 *max_tx_sgl_size, +@@ -3165,7 +3259,9 @@ static int ena_probe(struct pci_dev *pde + static int version_printed; + struct net_device *netdev; + struct ena_adapter *adapter; ++ struct ena_llq_configurations llq_config; + struct ena_com_dev *ena_dev = NULL; ++ char *queue_type_str; + static int adapters_found; + int io_queue_num, bars, rc; + int queue_size; +@@ -3219,16 +3315,13 @@ static int ena_probe(struct pci_dev *pde + goto err_free_region; + } + +- ena_set_push_mode(pdev, ena_dev, &get_feat_ctx); ++ set_default_llq_configurations(&llq_config); + +- if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { +- ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev, +- pci_resource_start(pdev, ENA_MEM_BAR), +- pci_resource_len(pdev, ENA_MEM_BAR)); +- if (!ena_dev->mem_bar) { +- rc = -EFAULT; +- goto err_device_destroy; +- } ++ rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq, ++ &llq_config); ++ if (rc) { ++ dev_err(&pdev->dev, "ena device init failed\n"); ++ goto err_device_destroy; + } + + /* initial Tx interrupt delay, Assumes 1 usec granularity. +@@ -3243,8 +3336,10 @@ static int ena_probe(struct pci_dev *pde + goto err_device_destroy; + } + +- dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n", +- io_queue_num, queue_size); ++ dev_info(&pdev->dev, "creating %d io queues. queue size: %d. LLQ is %s\n", ++ io_queue_num, queue_size, ++ (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ? ++ "ENABLED" : "DISABLED"); + + /* dev zeroed in init_etherdev */ + netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num); +@@ -3334,9 +3429,15 @@ static int ena_probe(struct pci_dev *pde + timer_setup(&adapter->timer_service, ena_timer_service, 0); + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); + +- dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n", ++ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) ++ queue_type_str = "Regular"; ++ else ++ queue_type_str = "Low Latency"; ++ ++ dev_info(&pdev->dev, ++ "%s found at mem %lx, mac addr %pM Queues %d, Placement policy: %s\n", + DEVICE_NAME, (long)pci_resource_start(pdev, 0), +- netdev->dev_addr, io_queue_num); ++ netdev->dev_addr, io_queue_num, queue_type_str); + + set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -151,6 +151,9 @@ struct ena_tx_buffer { + /* num of buffers used by this skb */ + u32 num_of_bufs; + ++ /* Indicate if bufs[0] map the linear data of the skb. */ ++ u8 map_linear_data; ++ + /* Used for detect missing tx packets to limit the number of prints */ + u32 print_once; + /* Save the last jiffies to detect missing tx packets +@@ -186,6 +189,7 @@ struct ena_stats_tx { + u64 tx_poll; + u64 doorbells; + u64 bad_req_id; ++ u64 llq_buffer_copy; + u64 missed_tx; + }; + +@@ -257,6 +261,8 @@ struct ena_ring { + struct ena_stats_tx tx_stats; + struct ena_stats_rx rx_stats; + }; ++ ++ u8 *push_buf_intermediate_buf; + int empty_rx_queue; + } ____cacheline_aligned; + diff --git a/debian/patches/features/all/ena/0007-net-ena-use-CSUM_CHECKED-device-indication-to-report.patch b/debian/patches/features/all/ena/0007-net-ena-use-CSUM_CHECKED-device-indication-to-report.patch new file mode 100644 index 000000000..cc25333b8 --- /dev/null +++ b/debian/patches/features/all/ena/0007-net-ena-use-CSUM_CHECKED-device-indication-to-report.patch @@ -0,0 +1,125 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Thu, 11 Oct 2018 11:26:21 +0300 +Subject: [PATCH 07/19] net: ena: use CSUM_CHECKED device indication to report + skb's checksum status +Origin: https://git.kernel.org/linus/cb36bb36e1f17d2a7b9a9751e5cfec4235b46c93 + +Set skb->ip_summed to the correct value as reported by the device. +Add counter for the case where rx csum offload is enabled but +device didn't check it. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_eth_com.c | 7 +++++-- + drivers/net/ethernet/amazon/ena/ena_eth_com.h | 1 + + drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h | 10 ++++++++-- + drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 + + drivers/net/ethernet/amazon/ena/ena_netdev.c | 13 ++++++++++++- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 1 + + 6 files changed, 28 insertions(+), 5 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_eth_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_eth_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_eth_com.c +@@ -354,6 +354,9 @@ static inline void ena_com_rx_set_flags( + ena_rx_ctx->l4_csum_err = + !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT); ++ ena_rx_ctx->l4_csum_checked = ++ !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK) >> ++ ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT); + ena_rx_ctx->hash = cdesc->hash; + ena_rx_ctx->frag = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >> +Index: linux/drivers/net/ethernet/amazon/ena/ena_eth_com.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_eth_com.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_eth_com.h +@@ -67,6 +67,7 @@ struct ena_com_rx_ctx { + enum ena_eth_io_l4_proto_index l4_proto; + bool l3_csum_err; + bool l4_csum_err; ++ u8 l4_csum_checked; + /* fragmented packet */ + bool frag; + u32 hash; +Index: linux/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h +@@ -242,9 +242,13 @@ struct ena_eth_io_rx_cdesc_base { + * checksum error detected, or, the controller didn't + * validate the checksum. This bit is valid only when + * l4_proto_idx indicates TCP/UDP packet, and, +- * ipv4_frag is not set ++ * ipv4_frag is not set. This bit is valid only when ++ * l4_csum_checked below is set. + * 15 : ipv4_frag - Indicates IPv4 fragmented packet +- * 23:16 : reserved16 ++ * 16 : l4_csum_checked - L4 checksum was verified ++ * (could be OK or error), when cleared the status of ++ * checksum is unknown ++ * 23:17 : reserved17 - MBZ + * 24 : phase + * 25 : l3_csum2 - second checksum engine result + * 26 : first - Indicates first descriptor in +@@ -390,6 +394,8 @@ struct ena_eth_io_numa_node_cfg_reg { + #define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14) + #define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15 + #define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15) ++#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT 16 ++#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK BIT(16) + #define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24 + #define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24) + #define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25 +Index: linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_ethtool.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +@@ -97,6 +97,7 @@ static const struct ena_stats ena_stats_ + ENA_STAT_RX_ENTRY(rx_copybreak_pkt), + ENA_STAT_RX_ENTRY(bad_req_id), + ENA_STAT_RX_ENTRY(empty_rx_ring), ++ ENA_STAT_RX_ENTRY(csum_unchecked), + }; + + static const struct ena_stats ena_stats_ena_com_strings[] = { +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -994,8 +994,19 @@ static inline void ena_rx_checksum(struc + return; + } + +- skb->ip_summed = CHECKSUM_UNNECESSARY; ++ if (likely(ena_rx_ctx->l4_csum_checked)) { ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ } else { ++ u64_stats_update_begin(&rx_ring->syncp); ++ rx_ring->rx_stats.csum_unchecked++; ++ u64_stats_update_end(&rx_ring->syncp); ++ skb->ip_summed = CHECKSUM_NONE; ++ } ++ } else { ++ skb->ip_summed = CHECKSUM_NONE; ++ return; + } ++ + } + + static void ena_set_rx_hash(struct ena_ring *rx_ring, +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -205,6 +205,7 @@ struct ena_stats_rx { + u64 rx_copybreak_pkt; + u64 bad_req_id; + u64 empty_rx_ring; ++ u64 csum_unchecked; + }; + + struct ena_ring { diff --git a/debian/patches/features/all/ena/0008-net-ena-explicit-casting-and-initialization-and-clea.patch b/debian/patches/features/all/ena/0008-net-ena-explicit-casting-and-initialization-and-clea.patch new file mode 100644 index 000000000..59ea0566a --- /dev/null +++ b/debian/patches/features/all/ena/0008-net-ena-explicit-casting-and-initialization-and-clea.patch @@ -0,0 +1,223 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Thu, 11 Oct 2018 11:26:22 +0300 +Subject: [PATCH 08/19] net: ena: explicit casting and initialization, and + clearer error handling +Origin: https://git.kernel.org/linus/bd791175a6432d24fc5d7b348304276027372545 + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 39 ++++++++++++-------- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 5 +-- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 22 +++++------ + 3 files changed, 36 insertions(+), 30 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -235,7 +235,7 @@ static struct ena_comp_ctx *__ena_com_su + tail_masked = admin_queue->sq.tail & queue_size_mask; + + /* In case of queue FULL */ +- cnt = atomic_read(&admin_queue->outstanding_cmds); ++ cnt = (u16)atomic_read(&admin_queue->outstanding_cmds); + if (cnt >= admin_queue->q_depth) { + pr_debug("admin queue is full.\n"); + admin_queue->stats.out_of_space++; +@@ -304,7 +304,7 @@ static struct ena_comp_ctx *ena_com_subm + struct ena_admin_acq_entry *comp, + size_t comp_size_in_bytes) + { +- unsigned long flags; ++ unsigned long flags = 0; + struct ena_comp_ctx *comp_ctx; + + spin_lock_irqsave(&admin_queue->q_lock, flags); +@@ -332,7 +332,7 @@ static int ena_com_init_io_sq(struct ena + + memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr)); + +- io_sq->dma_addr_bits = ena_dev->dma_addr_bits; ++ io_sq->dma_addr_bits = (u8)ena_dev->dma_addr_bits; + io_sq->desc_entry_size = + (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ? + sizeof(struct ena_eth_io_tx_desc) : +@@ -486,7 +486,7 @@ static void ena_com_handle_admin_complet + + /* Go over all the completions */ + while ((READ_ONCE(cqe->acq_common_descriptor.flags) & +- ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) { ++ ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) { + /* Do not read the rest of the completion entry before the + * phase bit was validated + */ +@@ -537,7 +537,8 @@ static int ena_com_comp_status_to_errno( + static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) + { +- unsigned long flags, timeout; ++ unsigned long flags = 0; ++ unsigned long timeout; + int ret; + + timeout = jiffies + usecs_to_jiffies(admin_queue->completion_timeout); +@@ -736,7 +737,7 @@ static int ena_com_config_llq_info(struc + static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) + { +- unsigned long flags; ++ unsigned long flags = 0; + int ret; + + wait_for_completion_timeout(&comp_ctx->wait_event, +@@ -782,7 +783,7 @@ static u32 ena_com_reg_bar_read32(struct + volatile struct ena_admin_ena_mmio_req_read_less_resp *read_resp = + mmio_read->read_resp; + u32 mmio_read_reg, ret, i; +- unsigned long flags; ++ unsigned long flags = 0; + u32 timeout = mmio_read->reg_read_to; + + might_sleep(); +@@ -1426,7 +1427,7 @@ void ena_com_abort_admin_commands(struct + void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev) + { + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; +- unsigned long flags; ++ unsigned long flags = 0; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + while (atomic_read(&admin_queue->outstanding_cmds) != 0) { +@@ -1470,7 +1471,7 @@ bool ena_com_get_admin_running_state(str + void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state) + { + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; +- unsigned long flags; ++ unsigned long flags = 0; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_dev->admin_queue.running_state = state; +@@ -1504,7 +1505,7 @@ int ena_com_set_aenq_config(struct ena_c + } + + if ((get_resp.u.aenq.supported_groups & groups_flag) != groups_flag) { +- pr_warn("Trying to set unsupported aenq events. supported flag: %x asked flag: %x\n", ++ pr_warn("Trying to set unsupported aenq events. supported flag: 0x%x asked flag: 0x%x\n", + get_resp.u.aenq.supported_groups, groups_flag); + return -EOPNOTSUPP; + } +@@ -1652,7 +1653,7 @@ int ena_com_mmio_reg_read_request_init(s + sizeof(*mmio_read->read_resp), + &mmio_read->read_resp_dma_addr, GFP_KERNEL); + if (unlikely(!mmio_read->read_resp)) +- return -ENOMEM; ++ goto err; + + ena_com_mmio_reg_read_request_write_dev_addr(ena_dev); + +@@ -1661,6 +1662,10 @@ int ena_com_mmio_reg_read_request_init(s + mmio_read->readless_supported = true; + + return 0; ++ ++err: ++ ++ return -ENOMEM; + } + + void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, bool readless_supported) +@@ -1961,6 +1966,7 @@ void ena_com_aenq_intr_handler(struct en + struct ena_admin_aenq_entry *aenq_e; + struct ena_admin_aenq_common_desc *aenq_common; + struct ena_com_aenq *aenq = &dev->aenq; ++ unsigned long long timestamp; + ena_aenq_handler handler_cb; + u16 masked_head, processed = 0; + u8 phase; +@@ -1978,10 +1984,11 @@ void ena_com_aenq_intr_handler(struct en + */ + dma_rmb(); + ++ timestamp = ++ (unsigned long long)aenq_common->timestamp_low | ++ ((unsigned long long)aenq_common->timestamp_high << 32); + pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n", +- aenq_common->group, aenq_common->syndrom, +- (u64)aenq_common->timestamp_low + +- ((u64)aenq_common->timestamp_high << 32)); ++ aenq_common->group, aenq_common->syndrom, timestamp); + + /* Handle specific event*/ + handler_cb = ena_com_get_specific_aenq_cb(dev, +@@ -2623,8 +2630,8 @@ int ena_com_allocate_host_info(struct en + if (unlikely(!host_attr->host_info)) + return -ENOMEM; + +- host_attr->host_info->ena_spec_version = +- ((ENA_COMMON_SPEC_VERSION_MAJOR << ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) | ++ host_attr->host_info->ena_spec_version = ((ENA_COMMON_SPEC_VERSION_MAJOR << ++ ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) | + (ENA_COMMON_SPEC_VERSION_MINOR)); + + return 0; +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -2604,15 +2604,14 @@ static void ena_destroy_device(struct en + + dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + adapter->dev_up_before_reset = dev_up; +- + if (!graceful) + ena_com_set_admin_running_state(ena_dev, false); + + if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + ena_down(adapter); + +- /* Before releasing the ENA resources, a device reset is required. +- * (to prevent the device from accessing them). ++ /* Stop the device from sending AENQ events (in case reset flag is set ++ * and device is up, ena_close already reset the device + * In case the reset flag is set and the device is up, ena_down() + * already perform the reset, so it can be skipped. + */ +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -61,6 +61,17 @@ + #define ENA_ADMIN_MSIX_VEC 1 + #define ENA_MAX_MSIX_VEC(io_queues) (ENA_ADMIN_MSIX_VEC + (io_queues)) + ++/* The ENA buffer length fields is 16 bit long. So when PAGE_SIZE == 64kB the ++ * driver passes 0. ++ * Since the max packet size the ENA handles is ~9kB limit the buffer length to ++ * 16kB. ++ */ ++#if PAGE_SIZE > SZ_16K ++#define ENA_PAGE_SIZE SZ_16K ++#else ++#define ENA_PAGE_SIZE PAGE_SIZE ++#endif ++ + #define ENA_MIN_MSIX_VEC 2 + + #define ENA_REG_BAR 0 +@@ -362,15 +373,4 @@ void ena_dump_stats_to_buf(struct ena_ad + + int ena_get_sset_count(struct net_device *netdev, int sset); + +-/* The ENA buffer length fields is 16 bit long. So when PAGE_SIZE == 64kB the +- * driver passas 0. +- * Since the max packet size the ENA handles is ~9kB limit the buffer length to +- * 16kB. +- */ +-#if PAGE_SIZE > SZ_16K +-#define ENA_PAGE_SIZE SZ_16K +-#else +-#define ENA_PAGE_SIZE PAGE_SIZE +-#endif +- + #endif /* !(ENA_H) */ diff --git a/debian/patches/features/all/ena/0009-net-ena-limit-refill-Rx-threshold-to-256-to-avoid-la.patch b/debian/patches/features/all/ena/0009-net-ena-limit-refill-Rx-threshold-to-256-to-avoid-la.patch new file mode 100644 index 000000000..3eaae9950 --- /dev/null +++ b/debian/patches/features/all/ena/0009-net-ena-limit-refill-Rx-threshold-to-256-to-avoid-la.patch @@ -0,0 +1,54 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Thu, 11 Oct 2018 11:26:23 +0300 +Subject: [PATCH 09/19] net: ena: limit refill Rx threshold to 256 to avoid + latency issues +Origin: https://git.kernel.org/linus/0574bb806dad29a3dada0ee42b01645477d48282 + +Currently Rx refill is done when the number of required descriptors is +above 1/8 queue size. With a default of 1024 entries per queue the +threshold is 128 descriptors. +There is intention to increase the queue size to 8196 entries. +In this case threshold of 1024 descriptors is too large and can hurt +latency. +Add another limitation to Rx threshold to be at most 256 descriptors. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 +++- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 5 +++-- + 2 files changed, 6 insertions(+), 3 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -1122,7 +1122,9 @@ static int ena_clean_rx_irq(struct ena_r + rx_ring->next_to_clean = next_to_clean; + + refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq); +- refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER; ++ refill_threshold = ++ min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER, ++ ENA_RX_REFILL_THRESH_PACKET); + + /* Optimization, try to batch new rx buffers */ + if (refill_required > refill_threshold) { +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -106,10 +106,11 @@ + */ + #define ENA_TX_POLL_BUDGET_DIVIDER 4 + +-/* Refill Rx queue when number of available descriptors is below +- * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER ++/* Refill Rx queue when number of required descriptors is above ++ * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER or ENA_RX_REFILL_THRESH_PACKET + */ + #define ENA_RX_REFILL_THRESH_DIVIDER 8 ++#define ENA_RX_REFILL_THRESH_PACKET 256 + + /* Number of queues to check for missing queues per timer service */ + #define ENA_MONITORED_TX_QUEUES 4 diff --git a/debian/patches/features/all/ena/0010-net-ena-change-rx-copybreak-default-to-reduce-kernel.patch b/debian/patches/features/all/ena/0010-net-ena-change-rx-copybreak-default-to-reduce-kernel.patch new file mode 100644 index 000000000..6e493b217 --- /dev/null +++ b/debian/patches/features/all/ena/0010-net-ena-change-rx-copybreak-default-to-reduce-kernel.patch @@ -0,0 +1,28 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Thu, 11 Oct 2018 11:26:24 +0300 +Subject: [PATCH 10/19] net: ena: change rx copybreak default to reduce kernel + memory pressure +Origin: https://git.kernel.org/linus/87731f0c681c9682c5521e5197d89e561b7da395 + +Improves socket memory utilization when receiving packets larger +than 128 bytes (the previous rx copybreak) and smaller than 256 bytes. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -81,7 +81,7 @@ + #define ENA_DEFAULT_RING_SIZE (1024) + + #define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) +-#define ENA_DEFAULT_RX_COPYBREAK (128 - NET_IP_ALIGN) ++#define ENA_DEFAULT_RX_COPYBREAK (256 - NET_IP_ALIGN) + + /* limit the buffer size to 600 bytes to handle MTU changes from very + * small to very large, in which case the number of buffers per packet diff --git a/debian/patches/features/all/ena/0011-net-ena-remove-redundant-parameter-in-ena_com_admin_.patch b/debian/patches/features/all/ena/0011-net-ena-remove-redundant-parameter-in-ena_com_admin_.patch new file mode 100644 index 000000000..ac5ab95e5 --- /dev/null +++ b/debian/patches/features/all/ena/0011-net-ena-remove-redundant-parameter-in-ena_com_admin_.patch @@ -0,0 +1,76 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Thu, 11 Oct 2018 11:26:25 +0300 +Subject: [PATCH 11/19] net: ena: remove redundant parameter in + ena_com_admin_init() +Origin: https://git.kernel.org/linus/f1e90f6e2c1fb0e491f910540314015324fed1e2 + +Remove redundant spinlock acquire parameter from ena_com_admin_init() + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 6 ++---- + drivers/net/ethernet/amazon/ena/ena_com.h | 5 +---- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +- + 3 files changed, 4 insertions(+), 9 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -1701,8 +1701,7 @@ void ena_com_mmio_reg_read_request_write + } + + int ena_com_admin_init(struct ena_com_dev *ena_dev, +- struct ena_aenq_handlers *aenq_handlers, +- bool init_spinlock) ++ struct ena_aenq_handlers *aenq_handlers) + { + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + u32 aq_caps, acq_caps, dev_sts, addr_low, addr_high; +@@ -1728,8 +1727,7 @@ int ena_com_admin_init(struct ena_com_de + + atomic_set(&admin_queue->outstanding_cmds, 0); + +- if (init_spinlock) +- spin_lock_init(&admin_queue->q_lock); ++ spin_lock_init(&admin_queue->q_lock); + + ret = ena_com_init_comp_ctxt(admin_queue); + if (ret) +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.h +@@ -436,8 +436,6 @@ void ena_com_mmio_reg_read_request_destr + /* ena_com_admin_init - Init the admin and the async queues + * @ena_dev: ENA communication layer struct + * @aenq_handlers: Those handlers to be called upon event. +- * @init_spinlock: Indicate if this method should init the admin spinlock or +- * the spinlock was init before (for example, in a case of FLR). + * + * Initialize the admin submission and completion queues. + * Initialize the asynchronous events notification queues. +@@ -445,8 +443,7 @@ void ena_com_mmio_reg_read_request_destr + * @return - 0 on success, negative value on failure. + */ + int ena_com_admin_init(struct ena_com_dev *ena_dev, +- struct ena_aenq_handlers *aenq_handlers, +- bool init_spinlock); ++ struct ena_aenq_handlers *aenq_handlers); + + /* ena_com_admin_destroy - Destroy the admin and the async events queues. + * @ena_dev: ENA communication layer struct +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -2508,7 +2508,7 @@ static int ena_device_init(struct ena_co + } + + /* ENA admin level init */ +- rc = ena_com_admin_init(ena_dev, &aenq_handlers, true); ++ rc = ena_com_admin_init(ena_dev, &aenq_handlers); + if (rc) { + dev_err(dev, + "Can not initialize ena admin queue with device\n"); diff --git a/debian/patches/features/all/ena/0012-net-ena-update-driver-version-to-2.0.1.patch b/debian/patches/features/all/ena/0012-net-ena-update-driver-version-to-2.0.1.patch new file mode 100644 index 000000000..ce4d8d8e0 --- /dev/null +++ b/debian/patches/features/all/ena/0012-net-ena-update-driver-version-to-2.0.1.patch @@ -0,0 +1,28 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Thu, 11 Oct 2018 11:26:26 +0300 +Subject: [PATCH 12/19] net: ena: update driver version to 2.0.1 +Origin: https://git.kernel.org/linus/3a7b9d8ddd200bdafaa3ef75b8544d2403eaa03b + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -43,9 +43,9 @@ + #include "ena_com.h" + #include "ena_eth_com.h" + +-#define DRV_MODULE_VER_MAJOR 1 +-#define DRV_MODULE_VER_MINOR 5 +-#define DRV_MODULE_VER_SUBMINOR 0 ++#define DRV_MODULE_VER_MAJOR 2 ++#define DRV_MODULE_VER_MINOR 0 ++#define DRV_MODULE_VER_SUBMINOR 1 + + #define DRV_MODULE_NAME "ena" + #ifndef DRV_MODULE_VERSION diff --git a/debian/patches/features/all/ena/0013-net-ena-fix-indentations-in-ena_defs-for-better-read.patch b/debian/patches/features/all/ena/0013-net-ena-fix-indentations-in-ena_defs-for-better-read.patch new file mode 100644 index 000000000..2e0ad4279 --- /dev/null +++ b/debian/patches/features/all/ena/0013-net-ena-fix-indentations-in-ena_defs-for-better-read.patch @@ -0,0 +1,1000 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Thu, 11 Oct 2018 11:26:27 +0300 +Subject: [PATCH 13/19] net: ena: fix indentations in ena_defs for better + readability +Origin: https://git.kernel.org/linus/be26667cb3947c90322467f1d15ad86b02350e00 + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + .../net/ethernet/amazon/ena/ena_admin_defs.h | 334 +++++++----------- + .../net/ethernet/amazon/ena/ena_eth_io_defs.h | 223 ++++++------ + .../net/ethernet/amazon/ena/ena_regs_defs.h | 206 +++++------ + 3 files changed, 338 insertions(+), 425 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_admin_defs.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +@@ -32,119 +32,81 @@ + #ifndef _ENA_ADMIN_H_ + #define _ENA_ADMIN_H_ + +-enum ena_admin_aq_opcode { +- ENA_ADMIN_CREATE_SQ = 1, +- +- ENA_ADMIN_DESTROY_SQ = 2, +- +- ENA_ADMIN_CREATE_CQ = 3, +- +- ENA_ADMIN_DESTROY_CQ = 4, +- +- ENA_ADMIN_GET_FEATURE = 8, + +- ENA_ADMIN_SET_FEATURE = 9, +- +- ENA_ADMIN_GET_STATS = 11, ++enum ena_admin_aq_opcode { ++ ENA_ADMIN_CREATE_SQ = 1, ++ ENA_ADMIN_DESTROY_SQ = 2, ++ ENA_ADMIN_CREATE_CQ = 3, ++ ENA_ADMIN_DESTROY_CQ = 4, ++ ENA_ADMIN_GET_FEATURE = 8, ++ ENA_ADMIN_SET_FEATURE = 9, ++ ENA_ADMIN_GET_STATS = 11, + }; + + enum ena_admin_aq_completion_status { +- ENA_ADMIN_SUCCESS = 0, +- +- ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE = 1, +- +- ENA_ADMIN_BAD_OPCODE = 2, +- +- ENA_ADMIN_UNSUPPORTED_OPCODE = 3, +- +- ENA_ADMIN_MALFORMED_REQUEST = 4, +- ++ ENA_ADMIN_SUCCESS = 0, ++ ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE = 1, ++ ENA_ADMIN_BAD_OPCODE = 2, ++ ENA_ADMIN_UNSUPPORTED_OPCODE = 3, ++ ENA_ADMIN_MALFORMED_REQUEST = 4, + /* Additional status is provided in ACQ entry extended_status */ +- ENA_ADMIN_ILLEGAL_PARAMETER = 5, +- +- ENA_ADMIN_UNKNOWN_ERROR = 6, +- +- ENA_ADMIN_RESOURCE_BUSY = 7, ++ ENA_ADMIN_ILLEGAL_PARAMETER = 5, ++ ENA_ADMIN_UNKNOWN_ERROR = 6, ++ ENA_ADMIN_RESOURCE_BUSY = 7, + }; + + enum ena_admin_aq_feature_id { +- ENA_ADMIN_DEVICE_ATTRIBUTES = 1, +- +- ENA_ADMIN_MAX_QUEUES_NUM = 2, +- +- ENA_ADMIN_HW_HINTS = 3, +- +- ENA_ADMIN_LLQ = 4, +- +- ENA_ADMIN_RSS_HASH_FUNCTION = 10, +- +- ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11, +- +- ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG = 12, +- +- ENA_ADMIN_MTU = 14, +- +- ENA_ADMIN_RSS_HASH_INPUT = 18, +- +- ENA_ADMIN_INTERRUPT_MODERATION = 20, +- +- ENA_ADMIN_AENQ_CONFIG = 26, +- +- ENA_ADMIN_LINK_CONFIG = 27, +- +- ENA_ADMIN_HOST_ATTR_CONFIG = 28, +- +- ENA_ADMIN_FEATURES_OPCODE_NUM = 32, ++ ENA_ADMIN_DEVICE_ATTRIBUTES = 1, ++ ENA_ADMIN_MAX_QUEUES_NUM = 2, ++ ENA_ADMIN_HW_HINTS = 3, ++ ENA_ADMIN_LLQ = 4, ++ ENA_ADMIN_RSS_HASH_FUNCTION = 10, ++ ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11, ++ ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG = 12, ++ ENA_ADMIN_MTU = 14, ++ ENA_ADMIN_RSS_HASH_INPUT = 18, ++ ENA_ADMIN_INTERRUPT_MODERATION = 20, ++ ENA_ADMIN_AENQ_CONFIG = 26, ++ ENA_ADMIN_LINK_CONFIG = 27, ++ ENA_ADMIN_HOST_ATTR_CONFIG = 28, ++ ENA_ADMIN_FEATURES_OPCODE_NUM = 32, + }; + + enum ena_admin_placement_policy_type { + /* descriptors and headers are in host memory */ +- ENA_ADMIN_PLACEMENT_POLICY_HOST = 1, +- ++ ENA_ADMIN_PLACEMENT_POLICY_HOST = 1, + /* descriptors and headers are in device memory (a.k.a Low Latency + * Queue) + */ +- ENA_ADMIN_PLACEMENT_POLICY_DEV = 3, ++ ENA_ADMIN_PLACEMENT_POLICY_DEV = 3, + }; + + enum ena_admin_link_types { +- ENA_ADMIN_LINK_SPEED_1G = 0x1, +- +- ENA_ADMIN_LINK_SPEED_2_HALF_G = 0x2, +- +- ENA_ADMIN_LINK_SPEED_5G = 0x4, +- +- ENA_ADMIN_LINK_SPEED_10G = 0x8, +- +- ENA_ADMIN_LINK_SPEED_25G = 0x10, +- +- ENA_ADMIN_LINK_SPEED_40G = 0x20, +- +- ENA_ADMIN_LINK_SPEED_50G = 0x40, +- +- ENA_ADMIN_LINK_SPEED_100G = 0x80, +- +- ENA_ADMIN_LINK_SPEED_200G = 0x100, +- +- ENA_ADMIN_LINK_SPEED_400G = 0x200, ++ ENA_ADMIN_LINK_SPEED_1G = 0x1, ++ ENA_ADMIN_LINK_SPEED_2_HALF_G = 0x2, ++ ENA_ADMIN_LINK_SPEED_5G = 0x4, ++ ENA_ADMIN_LINK_SPEED_10G = 0x8, ++ ENA_ADMIN_LINK_SPEED_25G = 0x10, ++ ENA_ADMIN_LINK_SPEED_40G = 0x20, ++ ENA_ADMIN_LINK_SPEED_50G = 0x40, ++ ENA_ADMIN_LINK_SPEED_100G = 0x80, ++ ENA_ADMIN_LINK_SPEED_200G = 0x100, ++ ENA_ADMIN_LINK_SPEED_400G = 0x200, + }; + + enum ena_admin_completion_policy_type { + /* completion queue entry for each sq descriptor */ +- ENA_ADMIN_COMPLETION_POLICY_DESC = 0, +- ++ ENA_ADMIN_COMPLETION_POLICY_DESC = 0, + /* completion queue entry upon request in sq descriptor */ +- ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND = 1, +- ++ ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND = 1, + /* current queue head pointer is updated in OS memory upon sq + * descriptor request + */ +- ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND = 2, +- ++ ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND = 2, + /* current queue head pointer is updated in OS memory for each sq + * descriptor + */ +- ENA_ADMIN_COMPLETION_POLICY_HEAD = 3, ++ ENA_ADMIN_COMPLETION_POLICY_HEAD = 3, + }; + + /* basic stats return ena_admin_basic_stats while extanded stats return a +@@ -152,15 +114,13 @@ enum ena_admin_completion_policy_type { + * device id + */ + enum ena_admin_get_stats_type { +- ENA_ADMIN_GET_STATS_TYPE_BASIC = 0, +- +- ENA_ADMIN_GET_STATS_TYPE_EXTENDED = 1, ++ ENA_ADMIN_GET_STATS_TYPE_BASIC = 0, ++ ENA_ADMIN_GET_STATS_TYPE_EXTENDED = 1, + }; + + enum ena_admin_get_stats_scope { +- ENA_ADMIN_SPECIFIC_QUEUE = 0, +- +- ENA_ADMIN_ETH_TRAFFIC = 1, ++ ENA_ADMIN_SPECIFIC_QUEUE = 0, ++ ENA_ADMIN_ETH_TRAFFIC = 1, + }; + + struct ena_admin_aq_common_desc { +@@ -231,7 +191,9 @@ struct ena_admin_acq_common_desc { + + u16 extended_status; + +- /* serves as a hint what AQ entries can be revoked */ ++ /* indicates to the driver which AQ entry has been consumed by the ++ * device and could be reused ++ */ + u16 sq_head_indx; + }; + +@@ -300,9 +262,8 @@ struct ena_admin_aq_create_sq_cmd { + }; + + enum ena_admin_sq_direction { +- ENA_ADMIN_SQ_DIRECTION_TX = 1, +- +- ENA_ADMIN_SQ_DIRECTION_RX = 2, ++ ENA_ADMIN_SQ_DIRECTION_TX = 1, ++ ENA_ADMIN_SQ_DIRECTION_RX = 2, + }; + + struct ena_admin_acq_create_sq_resp_desc { +@@ -664,9 +625,8 @@ struct ena_admin_feature_offload_desc { + }; + + enum ena_admin_hash_functions { +- ENA_ADMIN_TOEPLITZ = 1, +- +- ENA_ADMIN_CRC32 = 2, ++ ENA_ADMIN_TOEPLITZ = 1, ++ ENA_ADMIN_CRC32 = 2, + }; + + struct ena_admin_feature_rss_flow_hash_control { +@@ -692,50 +652,35 @@ struct ena_admin_feature_rss_flow_hash_f + + /* RSS flow hash protocols */ + enum ena_admin_flow_hash_proto { +- ENA_ADMIN_RSS_TCP4 = 0, +- +- ENA_ADMIN_RSS_UDP4 = 1, +- +- ENA_ADMIN_RSS_TCP6 = 2, +- +- ENA_ADMIN_RSS_UDP6 = 3, +- +- ENA_ADMIN_RSS_IP4 = 4, +- +- ENA_ADMIN_RSS_IP6 = 5, +- +- ENA_ADMIN_RSS_IP4_FRAG = 6, +- +- ENA_ADMIN_RSS_NOT_IP = 7, +- ++ ENA_ADMIN_RSS_TCP4 = 0, ++ ENA_ADMIN_RSS_UDP4 = 1, ++ ENA_ADMIN_RSS_TCP6 = 2, ++ ENA_ADMIN_RSS_UDP6 = 3, ++ ENA_ADMIN_RSS_IP4 = 4, ++ ENA_ADMIN_RSS_IP6 = 5, ++ ENA_ADMIN_RSS_IP4_FRAG = 6, ++ ENA_ADMIN_RSS_NOT_IP = 7, + /* TCPv6 with extension header */ +- ENA_ADMIN_RSS_TCP6_EX = 8, +- ++ ENA_ADMIN_RSS_TCP6_EX = 8, + /* IPv6 with extension header */ +- ENA_ADMIN_RSS_IP6_EX = 9, +- +- ENA_ADMIN_RSS_PROTO_NUM = 16, ++ ENA_ADMIN_RSS_IP6_EX = 9, ++ ENA_ADMIN_RSS_PROTO_NUM = 16, + }; + + /* RSS flow hash fields */ + enum ena_admin_flow_hash_fields { + /* Ethernet Dest Addr */ +- ENA_ADMIN_RSS_L2_DA = BIT(0), +- ++ ENA_ADMIN_RSS_L2_DA = BIT(0), + /* Ethernet Src Addr */ +- ENA_ADMIN_RSS_L2_SA = BIT(1), +- ++ ENA_ADMIN_RSS_L2_SA = BIT(1), + /* ipv4/6 Dest Addr */ +- ENA_ADMIN_RSS_L3_DA = BIT(2), +- ++ ENA_ADMIN_RSS_L3_DA = BIT(2), + /* ipv4/6 Src Addr */ +- ENA_ADMIN_RSS_L3_SA = BIT(3), +- ++ ENA_ADMIN_RSS_L3_SA = BIT(3), + /* tcp/udp Dest Port */ +- ENA_ADMIN_RSS_L4_DP = BIT(4), +- ++ ENA_ADMIN_RSS_L4_DP = BIT(4), + /* tcp/udp Src Port */ +- ENA_ADMIN_RSS_L4_SP = BIT(5), ++ ENA_ADMIN_RSS_L4_SP = BIT(5), + }; + + struct ena_admin_proto_input { +@@ -774,19 +719,13 @@ struct ena_admin_feature_rss_flow_hash_i + }; + + enum ena_admin_os_type { +- ENA_ADMIN_OS_LINUX = 1, +- +- ENA_ADMIN_OS_WIN = 2, +- +- ENA_ADMIN_OS_DPDK = 3, +- +- ENA_ADMIN_OS_FREEBSD = 4, +- +- ENA_ADMIN_OS_IPXE = 5, +- +- ENA_ADMIN_OS_ESXI = 6, +- +- ENA_ADMIN_OS_GROUPS_NUM = 6, ++ ENA_ADMIN_OS_LINUX = 1, ++ ENA_ADMIN_OS_WIN = 2, ++ ENA_ADMIN_OS_DPDK = 3, ++ ENA_ADMIN_OS_FREEBSD = 4, ++ ENA_ADMIN_OS_IPXE = 5, ++ ENA_ADMIN_OS_ESXI = 6, ++ ENA_ADMIN_OS_GROUPS_NUM = 6, + }; + + struct ena_admin_host_info { +@@ -981,25 +920,18 @@ struct ena_admin_aenq_common_desc { + + /* asynchronous event notification groups */ + enum ena_admin_aenq_group { +- ENA_ADMIN_LINK_CHANGE = 0, +- +- ENA_ADMIN_FATAL_ERROR = 1, +- +- ENA_ADMIN_WARNING = 2, +- +- ENA_ADMIN_NOTIFICATION = 3, +- +- ENA_ADMIN_KEEP_ALIVE = 4, +- +- ENA_ADMIN_AENQ_GROUPS_NUM = 5, ++ ENA_ADMIN_LINK_CHANGE = 0, ++ ENA_ADMIN_FATAL_ERROR = 1, ++ ENA_ADMIN_WARNING = 2, ++ ENA_ADMIN_NOTIFICATION = 3, ++ ENA_ADMIN_KEEP_ALIVE = 4, ++ ENA_ADMIN_AENQ_GROUPS_NUM = 5, + }; + + enum ena_admin_aenq_notification_syndrom { +- ENA_ADMIN_SUSPEND = 0, +- +- ENA_ADMIN_RESUME = 1, +- +- ENA_ADMIN_UPDATE_HINTS = 2, ++ ENA_ADMIN_SUSPEND = 0, ++ ENA_ADMIN_RESUME = 1, ++ ENA_ADMIN_UPDATE_HINTS = 2, + }; + + struct ena_admin_aenq_entry { +@@ -1034,27 +966,27 @@ struct ena_admin_ena_mmio_req_read_less_ + }; + + /* aq_common_desc */ +-#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +-#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) +-#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1 +-#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1) +-#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2 +-#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2) ++#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) ++#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) ++#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1 ++#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1) ++#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2 ++#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2) + + /* sq */ +-#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT 5 +-#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK GENMASK(7, 5) ++#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT 5 ++#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK GENMASK(7, 5) + + /* acq_common_desc */ +-#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +-#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0) ++#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) ++#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0) + + /* aq_create_sq_cmd */ +-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT 5 +-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK GENMASK(7, 5) +-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK GENMASK(3, 0) +-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT 4 +-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK GENMASK(6, 4) ++#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT 5 ++#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK GENMASK(7, 5) ++#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK GENMASK(3, 0) ++#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT 4 ++#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK GENMASK(6, 4) + #define ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK BIT(0) + + /* aq_create_cq_cmd */ +@@ -1063,12 +995,12 @@ struct ena_admin_ena_mmio_req_read_less_ + #define ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) + + /* get_set_feature_common_desc */ +-#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) ++#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) + + /* get_feature_link_desc */ +-#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK BIT(0) +-#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT 1 +-#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK BIT(1) ++#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK BIT(0) ++#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT 1 ++#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK BIT(1) + + /* feature_offload_desc */ + #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK BIT(0) +@@ -1080,19 +1012,19 @@ struct ena_admin_ena_mmio_req_read_less_ + #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK BIT(3) + #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_SHIFT 4 + #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK BIT(4) +-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT 5 +-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK BIT(5) +-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT 6 +-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK BIT(6) +-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT 7 +-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK BIT(7) ++#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT 5 ++#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK BIT(5) ++#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT 6 ++#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK BIT(6) ++#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT 7 ++#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK BIT(7) + #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK BIT(0) + #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_SHIFT 1 + #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK BIT(1) + #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_SHIFT 2 + #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK BIT(2) +-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT 3 +-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK BIT(3) ++#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT 3 ++#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK BIT(3) + + /* feature_rss_flow_hash_function */ + #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_FUNCS_MASK GENMASK(7, 0) +@@ -1100,32 +1032,32 @@ struct ena_admin_ena_mmio_req_read_less_ + + /* feature_rss_flow_hash_input */ + #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_SHIFT 1 +-#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK BIT(1) ++#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK BIT(1) + #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_SHIFT 2 +-#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK BIT(2) ++#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK BIT(2) + #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_SHIFT 1 + #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_MASK BIT(1) + #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_SHIFT 2 + #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_MASK BIT(2) + + /* host_info */ +-#define ENA_ADMIN_HOST_INFO_MAJOR_MASK GENMASK(7, 0) +-#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT 8 +-#define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8) +-#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16 +-#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16) +-#define ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT 24 +-#define ENA_ADMIN_HOST_INFO_MODULE_TYPE_MASK GENMASK(31, 24) +-#define ENA_ADMIN_HOST_INFO_FUNCTION_MASK GENMASK(2, 0) +-#define ENA_ADMIN_HOST_INFO_DEVICE_SHIFT 3 +-#define ENA_ADMIN_HOST_INFO_DEVICE_MASK GENMASK(7, 3) +-#define ENA_ADMIN_HOST_INFO_BUS_SHIFT 8 +-#define ENA_ADMIN_HOST_INFO_BUS_MASK GENMASK(15, 8) ++#define ENA_ADMIN_HOST_INFO_MAJOR_MASK GENMASK(7, 0) ++#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT 8 ++#define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8) ++#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16 ++#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16) ++#define ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT 24 ++#define ENA_ADMIN_HOST_INFO_MODULE_TYPE_MASK GENMASK(31, 24) ++#define ENA_ADMIN_HOST_INFO_FUNCTION_MASK GENMASK(2, 0) ++#define ENA_ADMIN_HOST_INFO_DEVICE_SHIFT 3 ++#define ENA_ADMIN_HOST_INFO_DEVICE_MASK GENMASK(7, 3) ++#define ENA_ADMIN_HOST_INFO_BUS_SHIFT 8 ++#define ENA_ADMIN_HOST_INFO_BUS_MASK GENMASK(15, 8) + + /* aenq_common_desc */ +-#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) ++#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) + + /* aenq_link_change_desc */ +-#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0) ++#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0) + + #endif /*_ENA_ADMIN_H_ */ +Index: linux/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h +@@ -33,25 +33,18 @@ + #define _ENA_ETH_IO_H_ + + enum ena_eth_io_l3_proto_index { +- ENA_ETH_IO_L3_PROTO_UNKNOWN = 0, +- +- ENA_ETH_IO_L3_PROTO_IPV4 = 8, +- +- ENA_ETH_IO_L3_PROTO_IPV6 = 11, +- +- ENA_ETH_IO_L3_PROTO_FCOE = 21, +- +- ENA_ETH_IO_L3_PROTO_ROCE = 22, ++ ENA_ETH_IO_L3_PROTO_UNKNOWN = 0, ++ ENA_ETH_IO_L3_PROTO_IPV4 = 8, ++ ENA_ETH_IO_L3_PROTO_IPV6 = 11, ++ ENA_ETH_IO_L3_PROTO_FCOE = 21, ++ ENA_ETH_IO_L3_PROTO_ROCE = 22, + }; + + enum ena_eth_io_l4_proto_index { +- ENA_ETH_IO_L4_PROTO_UNKNOWN = 0, +- +- ENA_ETH_IO_L4_PROTO_TCP = 12, +- +- ENA_ETH_IO_L4_PROTO_UDP = 13, +- +- ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE = 23, ++ ENA_ETH_IO_L4_PROTO_UNKNOWN = 0, ++ ENA_ETH_IO_L4_PROTO_TCP = 12, ++ ENA_ETH_IO_L4_PROTO_UDP = 13, ++ ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE = 23, + }; + + struct ena_eth_io_tx_desc { +@@ -307,116 +300,116 @@ struct ena_eth_io_numa_node_cfg_reg { + }; + + /* tx_desc */ +-#define ENA_ETH_IO_TX_DESC_LENGTH_MASK GENMASK(15, 0) +-#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT 16 +-#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK GENMASK(21, 16) +-#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT 23 +-#define ENA_ETH_IO_TX_DESC_META_DESC_MASK BIT(23) +-#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT 24 +-#define ENA_ETH_IO_TX_DESC_PHASE_MASK BIT(24) +-#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT 26 +-#define ENA_ETH_IO_TX_DESC_FIRST_MASK BIT(26) +-#define ENA_ETH_IO_TX_DESC_LAST_SHIFT 27 +-#define ENA_ETH_IO_TX_DESC_LAST_MASK BIT(27) +-#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT 28 +-#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK BIT(28) +-#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK GENMASK(3, 0) +-#define ENA_ETH_IO_TX_DESC_DF_SHIFT 4 +-#define ENA_ETH_IO_TX_DESC_DF_MASK BIT(4) +-#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT 7 +-#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK BIT(7) +-#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT 8 +-#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK GENMASK(12, 8) +-#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT 13 +-#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK BIT(13) +-#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT 14 +-#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK BIT(14) +-#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT 15 +-#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK BIT(15) +-#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT 17 +-#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK BIT(17) +-#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT 22 +-#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK GENMASK(31, 22) +-#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK GENMASK(15, 0) +-#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT 24 +-#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK GENMASK(31, 24) ++#define ENA_ETH_IO_TX_DESC_LENGTH_MASK GENMASK(15, 0) ++#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT 16 ++#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK GENMASK(21, 16) ++#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT 23 ++#define ENA_ETH_IO_TX_DESC_META_DESC_MASK BIT(23) ++#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT 24 ++#define ENA_ETH_IO_TX_DESC_PHASE_MASK BIT(24) ++#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT 26 ++#define ENA_ETH_IO_TX_DESC_FIRST_MASK BIT(26) ++#define ENA_ETH_IO_TX_DESC_LAST_SHIFT 27 ++#define ENA_ETH_IO_TX_DESC_LAST_MASK BIT(27) ++#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT 28 ++#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK BIT(28) ++#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK GENMASK(3, 0) ++#define ENA_ETH_IO_TX_DESC_DF_SHIFT 4 ++#define ENA_ETH_IO_TX_DESC_DF_MASK BIT(4) ++#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT 7 ++#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK BIT(7) ++#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT 8 ++#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK GENMASK(12, 8) ++#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT 13 ++#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK BIT(13) ++#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT 14 ++#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK BIT(14) ++#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT 15 ++#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK BIT(15) ++#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT 17 ++#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK BIT(17) ++#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT 22 ++#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK GENMASK(31, 22) ++#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK GENMASK(15, 0) ++#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT 24 ++#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK GENMASK(31, 24) + + /* tx_meta_desc */ +-#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK GENMASK(9, 0) +-#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT 14 +-#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK BIT(14) +-#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT 16 +-#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK GENMASK(19, 16) +-#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT 20 +-#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK BIT(20) +-#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT 21 +-#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK BIT(21) +-#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT 23 +-#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK BIT(23) +-#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT 24 +-#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK BIT(24) +-#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT 26 +-#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK BIT(26) +-#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT 27 +-#define ENA_ETH_IO_TX_META_DESC_LAST_MASK BIT(27) +-#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT 28 +-#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK BIT(28) +-#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK GENMASK(5, 0) +-#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK GENMASK(7, 0) +-#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT 8 +-#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK GENMASK(15, 8) +-#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT 16 +-#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK GENMASK(21, 16) +-#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT 22 +-#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK GENMASK(31, 22) ++#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK GENMASK(9, 0) ++#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT 14 ++#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK BIT(14) ++#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT 16 ++#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK GENMASK(19, 16) ++#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT 20 ++#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK BIT(20) ++#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT 21 ++#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK BIT(21) ++#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT 23 ++#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK BIT(23) ++#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT 24 ++#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK BIT(24) ++#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT 26 ++#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK BIT(26) ++#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT 27 ++#define ENA_ETH_IO_TX_META_DESC_LAST_MASK BIT(27) ++#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT 28 ++#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK BIT(28) ++#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK GENMASK(5, 0) ++#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK GENMASK(7, 0) ++#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT 8 ++#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK GENMASK(15, 8) ++#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT 16 ++#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK GENMASK(21, 16) ++#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT 22 ++#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK GENMASK(31, 22) + + /* tx_cdesc */ +-#define ENA_ETH_IO_TX_CDESC_PHASE_MASK BIT(0) ++#define ENA_ETH_IO_TX_CDESC_PHASE_MASK BIT(0) + + /* rx_desc */ +-#define ENA_ETH_IO_RX_DESC_PHASE_MASK BIT(0) +-#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT 2 +-#define ENA_ETH_IO_RX_DESC_FIRST_MASK BIT(2) +-#define ENA_ETH_IO_RX_DESC_LAST_SHIFT 3 +-#define ENA_ETH_IO_RX_DESC_LAST_MASK BIT(3) +-#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT 4 +-#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK BIT(4) ++#define ENA_ETH_IO_RX_DESC_PHASE_MASK BIT(0) ++#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT 2 ++#define ENA_ETH_IO_RX_DESC_FIRST_MASK BIT(2) ++#define ENA_ETH_IO_RX_DESC_LAST_SHIFT 3 ++#define ENA_ETH_IO_RX_DESC_LAST_MASK BIT(3) ++#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT 4 ++#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK BIT(4) + + /* rx_cdesc_base */ +-#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK GENMASK(4, 0) +-#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT 5 +-#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK GENMASK(6, 5) +-#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT 8 +-#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK GENMASK(12, 8) +-#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT 13 +-#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK BIT(13) +-#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT 14 +-#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14) +-#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15 +-#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15) +-#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT 16 +-#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK BIT(16) +-#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24 +-#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24) +-#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25 +-#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK BIT(25) +-#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT 26 +-#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK BIT(26) +-#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT 27 +-#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK BIT(27) +-#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT 30 +-#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK BIT(30) ++#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK GENMASK(4, 0) ++#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT 5 ++#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK GENMASK(6, 5) ++#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT 8 ++#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK GENMASK(12, 8) ++#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT 13 ++#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK BIT(13) ++#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT 14 ++#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14) ++#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15 ++#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15) ++#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT 16 ++#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK BIT(16) ++#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24 ++#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24) ++#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25 ++#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK BIT(25) ++#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT 26 ++#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK BIT(26) ++#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT 27 ++#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK BIT(27) ++#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT 30 ++#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK BIT(30) + + /* intr_reg */ +-#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK GENMASK(14, 0) +-#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT 15 +-#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK GENMASK(29, 15) +-#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT 30 +-#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK BIT(30) ++#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK GENMASK(14, 0) ++#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT 15 ++#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK GENMASK(29, 15) ++#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT 30 ++#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK BIT(30) + + /* numa_node_cfg_reg */ +-#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK GENMASK(7, 0) +-#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31 +-#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31) ++#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK GENMASK(7, 0) ++#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31 ++#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31) + + #endif /*_ENA_ETH_IO_H_ */ +Index: linux/drivers/net/ethernet/amazon/ena/ena_regs_defs.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_regs_defs.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_regs_defs.h +@@ -33,137 +33,125 @@ + #define _ENA_REGS_H_ + + enum ena_regs_reset_reason_types { +- ENA_REGS_RESET_NORMAL = 0, +- +- ENA_REGS_RESET_KEEP_ALIVE_TO = 1, +- +- ENA_REGS_RESET_ADMIN_TO = 2, +- +- ENA_REGS_RESET_MISS_TX_CMPL = 3, +- +- ENA_REGS_RESET_INV_RX_REQ_ID = 4, +- +- ENA_REGS_RESET_INV_TX_REQ_ID = 5, +- +- ENA_REGS_RESET_TOO_MANY_RX_DESCS = 6, +- +- ENA_REGS_RESET_INIT_ERR = 7, +- +- ENA_REGS_RESET_DRIVER_INVALID_STATE = 8, +- +- ENA_REGS_RESET_OS_TRIGGER = 9, +- +- ENA_REGS_RESET_OS_NETDEV_WD = 10, +- +- ENA_REGS_RESET_SHUTDOWN = 11, +- +- ENA_REGS_RESET_USER_TRIGGER = 12, +- +- ENA_REGS_RESET_GENERIC = 13, +- +- ENA_REGS_RESET_MISS_INTERRUPT = 14, ++ ENA_REGS_RESET_NORMAL = 0, ++ ENA_REGS_RESET_KEEP_ALIVE_TO = 1, ++ ENA_REGS_RESET_ADMIN_TO = 2, ++ ENA_REGS_RESET_MISS_TX_CMPL = 3, ++ ENA_REGS_RESET_INV_RX_REQ_ID = 4, ++ ENA_REGS_RESET_INV_TX_REQ_ID = 5, ++ ENA_REGS_RESET_TOO_MANY_RX_DESCS = 6, ++ ENA_REGS_RESET_INIT_ERR = 7, ++ ENA_REGS_RESET_DRIVER_INVALID_STATE = 8, ++ ENA_REGS_RESET_OS_TRIGGER = 9, ++ ENA_REGS_RESET_OS_NETDEV_WD = 10, ++ ENA_REGS_RESET_SHUTDOWN = 11, ++ ENA_REGS_RESET_USER_TRIGGER = 12, ++ ENA_REGS_RESET_GENERIC = 13, ++ ENA_REGS_RESET_MISS_INTERRUPT = 14, + }; + + /* ena_registers offsets */ +-#define ENA_REGS_VERSION_OFF 0x0 +-#define ENA_REGS_CONTROLLER_VERSION_OFF 0x4 +-#define ENA_REGS_CAPS_OFF 0x8 +-#define ENA_REGS_CAPS_EXT_OFF 0xc +-#define ENA_REGS_AQ_BASE_LO_OFF 0x10 +-#define ENA_REGS_AQ_BASE_HI_OFF 0x14 +-#define ENA_REGS_AQ_CAPS_OFF 0x18 +-#define ENA_REGS_ACQ_BASE_LO_OFF 0x20 +-#define ENA_REGS_ACQ_BASE_HI_OFF 0x24 +-#define ENA_REGS_ACQ_CAPS_OFF 0x28 +-#define ENA_REGS_AQ_DB_OFF 0x2c +-#define ENA_REGS_ACQ_TAIL_OFF 0x30 +-#define ENA_REGS_AENQ_CAPS_OFF 0x34 +-#define ENA_REGS_AENQ_BASE_LO_OFF 0x38 +-#define ENA_REGS_AENQ_BASE_HI_OFF 0x3c +-#define ENA_REGS_AENQ_HEAD_DB_OFF 0x40 +-#define ENA_REGS_AENQ_TAIL_OFF 0x44 +-#define ENA_REGS_INTR_MASK_OFF 0x4c +-#define ENA_REGS_DEV_CTL_OFF 0x54 +-#define ENA_REGS_DEV_STS_OFF 0x58 +-#define ENA_REGS_MMIO_REG_READ_OFF 0x5c +-#define ENA_REGS_MMIO_RESP_LO_OFF 0x60 +-#define ENA_REGS_MMIO_RESP_HI_OFF 0x64 +-#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF 0x68 ++ ++/* 0 base */ ++#define ENA_REGS_VERSION_OFF 0x0 ++#define ENA_REGS_CONTROLLER_VERSION_OFF 0x4 ++#define ENA_REGS_CAPS_OFF 0x8 ++#define ENA_REGS_CAPS_EXT_OFF 0xc ++#define ENA_REGS_AQ_BASE_LO_OFF 0x10 ++#define ENA_REGS_AQ_BASE_HI_OFF 0x14 ++#define ENA_REGS_AQ_CAPS_OFF 0x18 ++#define ENA_REGS_ACQ_BASE_LO_OFF 0x20 ++#define ENA_REGS_ACQ_BASE_HI_OFF 0x24 ++#define ENA_REGS_ACQ_CAPS_OFF 0x28 ++#define ENA_REGS_AQ_DB_OFF 0x2c ++#define ENA_REGS_ACQ_TAIL_OFF 0x30 ++#define ENA_REGS_AENQ_CAPS_OFF 0x34 ++#define ENA_REGS_AENQ_BASE_LO_OFF 0x38 ++#define ENA_REGS_AENQ_BASE_HI_OFF 0x3c ++#define ENA_REGS_AENQ_HEAD_DB_OFF 0x40 ++#define ENA_REGS_AENQ_TAIL_OFF 0x44 ++#define ENA_REGS_INTR_MASK_OFF 0x4c ++#define ENA_REGS_DEV_CTL_OFF 0x54 ++#define ENA_REGS_DEV_STS_OFF 0x58 ++#define ENA_REGS_MMIO_REG_READ_OFF 0x5c ++#define ENA_REGS_MMIO_RESP_LO_OFF 0x60 ++#define ENA_REGS_MMIO_RESP_HI_OFF 0x64 ++#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF 0x68 + + /* version register */ +-#define ENA_REGS_VERSION_MINOR_VERSION_MASK 0xff +-#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT 8 +-#define ENA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00 ++#define ENA_REGS_VERSION_MINOR_VERSION_MASK 0xff ++#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT 8 ++#define ENA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00 + + /* controller_version register */ +-#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff +-#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8 +-#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00 +-#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16 +-#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000 +-#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24 +-#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000 ++#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff ++#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8 ++#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00 ++#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16 ++#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000 ++#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24 ++#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000 + + /* caps register */ +-#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1 +-#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1 +-#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e +-#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8 +-#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00 +-#define ENA_REGS_CAPS_ADMIN_CMD_TO_SHIFT 16 +-#define ENA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000 ++#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1 ++#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1 ++#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e ++#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8 ++#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00 ++#define ENA_REGS_CAPS_ADMIN_CMD_TO_SHIFT 16 ++#define ENA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000 + + /* aq_caps register */ +-#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff +-#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16 +-#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000 ++#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff ++#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16 ++#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000 + + /* acq_caps register */ +-#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff +-#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16 +-#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xffff0000 ++#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff ++#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16 ++#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xffff0000 + + /* aenq_caps register */ +-#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff +-#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16 +-#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xffff0000 ++#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff ++#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16 ++#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xffff0000 + + /* dev_ctl register */ +-#define ENA_REGS_DEV_CTL_DEV_RESET_MASK 0x1 +-#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1 +-#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2 +-#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT 2 +-#define ENA_REGS_DEV_CTL_QUIESCENT_MASK 0x4 +-#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT 3 +-#define ENA_REGS_DEV_CTL_IO_RESUME_MASK 0x8 +-#define ENA_REGS_DEV_CTL_RESET_REASON_SHIFT 28 +-#define ENA_REGS_DEV_CTL_RESET_REASON_MASK 0xf0000000 ++#define ENA_REGS_DEV_CTL_DEV_RESET_MASK 0x1 ++#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1 ++#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2 ++#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT 2 ++#define ENA_REGS_DEV_CTL_QUIESCENT_MASK 0x4 ++#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT 3 ++#define ENA_REGS_DEV_CTL_IO_RESUME_MASK 0x8 ++#define ENA_REGS_DEV_CTL_RESET_REASON_SHIFT 28 ++#define ENA_REGS_DEV_CTL_RESET_REASON_MASK 0xf0000000 + + /* dev_sts register */ +-#define ENA_REGS_DEV_STS_READY_MASK 0x1 +-#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1 +-#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2 +-#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2 +-#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4 +-#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3 +-#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8 +-#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4 +-#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10 +-#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5 +-#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20 +-#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT 6 +-#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK 0x40 +-#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT 7 +-#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK 0x80 ++#define ENA_REGS_DEV_STS_READY_MASK 0x1 ++#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1 ++#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2 ++#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2 ++#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4 ++#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3 ++#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8 ++#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4 ++#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10 ++#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5 ++#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20 ++#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT 6 ++#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK 0x40 ++#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT 7 ++#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK 0x80 + + /* mmio_reg_read register */ +-#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff +-#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16 +-#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 ++#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff ++#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16 ++#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 + + /* rss_ind_entry_update register */ +-#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK 0xffff +-#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT 16 +-#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK 0xffff0000 ++#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK 0xffff ++#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT 16 ++#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK 0xffff0000 + + #endif /*_ENA_REGS_H_ */ diff --git a/debian/patches/features/all/ena/0015-net-ena-enable-Low-Latency-Queues.patch b/debian/patches/features/all/ena/0015-net-ena-enable-Low-Latency-Queues.patch new file mode 100644 index 000000000..6161a87ba --- /dev/null +++ b/debian/patches/features/all/ena/0015-net-ena-enable-Low-Latency-Queues.patch @@ -0,0 +1,50 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Wed, 17 Oct 2018 15:33:23 +0300 +Subject: [PATCH 15/19] net: ena: enable Low Latency Queues +Origin: https://git.kernel.org/linus/9fd255928d7ffb56d8466fab3331d0b2f40aa8c7 + +Use the new API to enable usage of LLQ. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 18 ++++-------------- + 1 file changed, 4 insertions(+), 14 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -3022,20 +3022,10 @@ static int ena_calc_io_queue_num(struct + int io_sq_num, io_queue_num; + + /* In case of LLQ use the llq number in the get feature cmd */ +- if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { +- io_sq_num = get_feat_ctx->max_queues.max_legacy_llq_num; +- +- if (io_sq_num == 0) { +- dev_err(&pdev->dev, +- "Trying to use LLQ but llq_num is 0. Fall back into regular queues\n"); +- +- ena_dev->tx_mem_queue_type = +- ENA_ADMIN_PLACEMENT_POLICY_HOST; +- io_sq_num = get_feat_ctx->max_queues.max_sq_num; +- } +- } else { ++ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ++ io_sq_num = get_feat_ctx->llq.max_llq_num; ++ else + io_sq_num = get_feat_ctx->max_queues.max_sq_num; +- } + + io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES); + io_queue_num = min_t(int, io_queue_num, io_sq_num); +@@ -3238,7 +3228,7 @@ static int ena_calc_queue_size(struct pc + + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + queue_size = min_t(u32, queue_size, +- get_feat_ctx->max_queues.max_legacy_llq_depth); ++ get_feat_ctx->llq.max_llq_depth); + + queue_size = rounddown_pow_of_two(queue_size); + diff --git a/debian/patches/features/all/ena/0016-net-ena-fix-compilation-error-in-xtensa-architecture.patch b/debian/patches/features/all/ena/0016-net-ena-fix-compilation-error-in-xtensa-architecture.patch new file mode 100644 index 000000000..b37c7a169 --- /dev/null +++ b/debian/patches/features/all/ena/0016-net-ena-fix-compilation-error-in-xtensa-architecture.patch @@ -0,0 +1,31 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Sun, 21 Oct 2018 18:07:14 +0300 +Subject: [PATCH 16/19] net: ena: fix compilation error in xtensa architecture +Origin: https://git.kernel.org/linus/00f17a8219f02139119d8b4547e032bf4888fa0d + +linux/prefetch.h is never explicitly included in ena_com, although +functions from it, such as prefetchw(), are used throughout ena_com. +This is an inclusion bug, and we fix it here by explicitly including +linux/prefetch.h. The bug was exposed when the driver was compiled +for the xtensa architecture. + +Fixes: 689b2bdaaa14 ("net: ena: add functions for handling Low Latency Queues in ena_com") +Fixes: 8c590f977638 ("ena: Fix Kconfig dependency on X86") +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.h | 1 + + 1 file changed, 1 insertion(+) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.h +@@ -38,6 +38,7 @@ + #include <linux/dma-mapping.h> + #include <linux/gfp.h> + #include <linux/io.h> ++#include <linux/prefetch.h> + #include <linux/sched.h> + #include <linux/sizes.h> + #include <linux/spinlock.h> diff --git a/debian/patches/features/all/ena/0017-net-ena-fix-crash-during-ena_remove.patch b/debian/patches/features/all/ena/0017-net-ena-fix-crash-during-ena_remove.patch new file mode 100644 index 000000000..59f2b13b7 --- /dev/null +++ b/debian/patches/features/all/ena/0017-net-ena-fix-crash-during-ena_remove.patch @@ -0,0 +1,97 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 19 Nov 2018 12:05:21 +0200 +Subject: [PATCH 18/19] net: ena: fix crash during ena_remove() +Origin: https://git.kernel.org/linus/58a54b9c62e206b8d5f6e59020bcb178fc271d8e + +In ena_remove() we have the following stack call: +ena_remove() + unregister_netdev() + ena_destroy_device() + netif_carrier_off() + +Calling netif_carrier_off() causes linkwatch to try to handle the +link change event on the already unregistered netdev, which leads +to a read from an unreadable memory address. + +This patch switches the order of the two functions, so that +netif_carrier_off() is called on a regiestered netdev. + +To accomplish this fix we also had to: +1. Remove the set bit ENA_FLAG_TRIGGER_RESET +2. Add a sanitiy check in ena_close() +both to prevent double device reset (when calling unregister_netdev() +ena_close is called, but the device was already deleted in +ena_destroy_device()). +3. Set the admin_queue running state to false to avoid using it after +device was reset (for example when calling ena_destroy_all_io_queues() +right after ena_com_dev_reset() in ena_down) + +Fixes: 944b28aa2982 ("net: ena: fix missing lock during device destruction") +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 21 ++++++++++---------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -1853,6 +1853,8 @@ static void ena_down(struct ena_adapter + rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); + if (rc) + dev_err(&adapter->pdev->dev, "Device reset failed\n"); ++ /* stop submitting admin commands on a device that was reset */ ++ ena_com_set_admin_running_state(adapter->ena_dev, false); + } + + ena_destroy_all_io_queues(adapter); +@@ -1919,6 +1921,9 @@ static int ena_close(struct net_device * + + netif_dbg(adapter, ifdown, netdev, "%s\n", __func__); + ++ if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) ++ return 0; ++ + if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + ena_down(adapter); + +@@ -2618,9 +2623,7 @@ static void ena_destroy_device(struct en + ena_down(adapter); + + /* Stop the device from sending AENQ events (in case reset flag is set +- * and device is up, ena_close already reset the device +- * In case the reset flag is set and the device is up, ena_down() +- * already perform the reset, so it can be skipped. ++ * and device is up, ena_down() already reset the device. + */ + if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up)) + ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); +@@ -3455,6 +3458,8 @@ err_rss: + ena_com_rss_destroy(ena_dev); + err_free_msix: + ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR); ++ /* stop submitting admin commands on a device that was reset */ ++ ena_com_set_admin_running_state(ena_dev, false); + ena_free_mgmnt_irq(adapter); + ena_disable_msix(adapter); + err_worker_destroy: +@@ -3504,18 +3509,12 @@ static void ena_remove(struct pci_dev *p + del_timer_sync(&adapter->timer_service); + cancel_work_sync(&adapter->reset_task); + +- unregister_netdev(netdev); +- +- /* If the device is running then we want to make sure the device will be +- * reset to make sure no more events will be issued by the device. +- */ +- if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) +- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); +- + rtnl_lock(); + ena_destroy_device(adapter, true); + rtnl_unlock(); + ++ unregister_netdev(netdev); ++ + free_netdev(netdev); + + ena_com_rss_destroy(ena_dev); diff --git a/debian/patches/features/all/ena/0018-net-ena-update-driver-version-from-2.0.1-to-2.0.2.patch b/debian/patches/features/all/ena/0018-net-ena-update-driver-version-from-2.0.1-to-2.0.2.patch new file mode 100644 index 000000000..68194bdc9 --- /dev/null +++ b/debian/patches/features/all/ena/0018-net-ena-update-driver-version-from-2.0.1-to-2.0.2.patch @@ -0,0 +1,26 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 19 Nov 2018 12:05:22 +0200 +Subject: [PATCH 19/19] net: ena: update driver version from 2.0.1 to 2.0.2 +Origin: https://git.kernel.org/linus/4c23738a3f9f203a9b41c89e030eaa8ee241f90f + +Update driver version due to critical bug fixes. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -45,7 +45,7 @@ + + #define DRV_MODULE_VER_MAJOR 2 + #define DRV_MODULE_VER_MINOR 0 +-#define DRV_MODULE_VER_SUBMINOR 1 ++#define DRV_MODULE_VER_SUBMINOR 2 + + #define DRV_MODULE_NAME "ena" + #ifndef DRV_MODULE_VERSION diff --git a/debian/patches/features/all/ena/net-ena-Fix-bug-where-ring-allocation-backoff-stoppe.patch b/debian/patches/features/all/ena/net-ena-Fix-bug-where-ring-allocation-backoff-stoppe.patch new file mode 100644 index 000000000..d2e7da1ab --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-Fix-bug-where-ring-allocation-backoff-stoppe.patch @@ -0,0 +1,39 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Sun, 23 Jun 2019 10:11:10 +0300 +Subject: [PATCH] net: ena: Fix bug where ring allocation backoff stopped too + late +Origin: https://git.kernel.org/linus/3e5bfb189e1a65df132fd0e3fa00fbb6feec1431 +Bug-Debian: https://bugs.debian.org/941291 + +The current code of create_queues_with_size_backoff() allows the ring size +to become as small as ENA_MIN_RING_SIZE/2. This is a bug since we don't +want the queue ring to be smaller than ENA_MIN_RING_SIZE + +In this commit we change the loop's termination condition to look at the +queue size of the next iteration instead of that of the current one, +so that the minimal queue size again becomes ENA_MIN_RING_SIZE. + +Fixes: eece4d2ab9d2 ("net: ena: add ethtool function for changing io queue sizes") + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -1836,8 +1836,8 @@ err_setup_tx: + if (cur_rx_ring_size >= cur_tx_ring_size) + new_rx_ring_size = cur_rx_ring_size / 2; + +- if (cur_tx_ring_size < ENA_MIN_RING_SIZE || +- cur_rx_ring_size < ENA_MIN_RING_SIZE) { ++ if (new_tx_ring_size < ENA_MIN_RING_SIZE || ++ new_rx_ring_size < ENA_MIN_RING_SIZE) { + netif_err(adapter, ifup, adapter->netdev, + "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n", + ENA_MIN_RING_SIZE); diff --git a/debian/patches/features/all/ena/net-ena-add-MAX_QUEUES_EXT-get-feature-admin-command.patch b/debian/patches/features/all/ena/net-ena-add-MAX_QUEUES_EXT-get-feature-admin-command.patch new file mode 100644 index 000000000..bf790f061 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-add-MAX_QUEUES_EXT-get-feature-admin-command.patch @@ -0,0 +1,345 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Tue, 11 Jun 2019 14:58:05 +0300 +Subject: [PATCH] net: ena: add MAX_QUEUES_EXT get feature admin command +Origin: https://git.kernel.org/linus/ba8ef506fb91005fc4808370b7587ab7bf4bd918 +Bug-Debian: https://bugs.debian.org/941291 + +Add a new admin command to support different queue size for Tx/Rx +queues (the change also support different SQ/CQ sizes) + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + .../net/ethernet/amazon/ena/ena_admin_defs.h | 56 +++++++++++++- + drivers/net/ethernet/amazon/ena/ena_com.c | 76 ++++++++++++------- + drivers/net/ethernet/amazon/ena/ena_com.h | 3 + + 3 files changed, 105 insertions(+), 30 deletions(-) + +--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h ++++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +@@ -60,6 +60,7 @@ enum ena_admin_aq_feature_id { + ENA_ADMIN_MAX_QUEUES_NUM = 2, + ENA_ADMIN_HW_HINTS = 3, + ENA_ADMIN_LLQ = 4, ++ ENA_ADMIN_MAX_QUEUES_EXT = 7, + ENA_ADMIN_RSS_HASH_FUNCTION = 10, + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11, + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG = 12, +@@ -421,7 +422,13 @@ struct ena_admin_get_set_feature_common_ + /* as appears in ena_admin_aq_feature_id */ + u8 feature_id; + +- u16 reserved16; ++ /* The driver specifies the max feature version it supports and the ++ * device responds with the currently supported feature version. The ++ * field is zero based ++ */ ++ u8 feature_version; ++ ++ u8 reserved8; + }; + + struct ena_admin_device_attr_feature_desc { +@@ -531,6 +538,34 @@ struct ena_admin_feature_llq_desc { + u32 max_tx_burst_size; + }; + ++struct ena_admin_queue_ext_feature_fields { ++ u32 max_tx_sq_num; ++ ++ u32 max_tx_cq_num; ++ ++ u32 max_rx_sq_num; ++ ++ u32 max_rx_cq_num; ++ ++ u32 max_tx_sq_depth; ++ ++ u32 max_tx_cq_depth; ++ ++ u32 max_rx_sq_depth; ++ ++ u32 max_rx_cq_depth; ++ ++ u32 max_tx_header_size; ++ ++ /* Maximum Descriptors number, including meta descriptor, allowed for ++ * a single Tx packet ++ */ ++ u16 max_per_packet_tx_descs; ++ ++ /* Maximum Descriptors number allowed for a single Rx packet */ ++ u16 max_per_packet_rx_descs; ++}; ++ + struct ena_admin_queue_feature_desc { + u32 max_sq_num; + +@@ -837,6 +872,19 @@ struct ena_admin_get_feat_cmd { + u32 raw[11]; + }; + ++struct ena_admin_queue_ext_feature_desc { ++ /* version */ ++ u8 version; ++ ++ u8 reserved1[3]; ++ ++ union { ++ struct ena_admin_queue_ext_feature_fields max_queue_ext; ++ ++ u32 raw[10]; ++ }; ++}; ++ + struct ena_admin_get_feat_resp { + struct ena_admin_acq_common_desc acq_common_desc; + +@@ -849,6 +897,8 @@ struct ena_admin_get_feat_resp { + + struct ena_admin_queue_feature_desc max_queue; + ++ struct ena_admin_queue_ext_feature_desc max_queue_ext; ++ + struct ena_admin_feature_aenq_desc aenq; + + struct ena_admin_get_feature_link_desc link; +@@ -913,7 +963,9 @@ struct ena_admin_aenq_common_desc { + + u16 syndrom; + +- /* 0 : phase */ ++ /* 0 : phase ++ * 7:1 : reserved - MBZ ++ */ + u8 flags; + + u8 reserved1[3]; +--- a/drivers/net/ethernet/amazon/ena/ena_com.c ++++ b/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -983,7 +983,8 @@ static int ena_com_get_feature_ex(struct + struct ena_admin_get_feat_resp *get_resp, + enum ena_admin_aq_feature_id feature_id, + dma_addr_t control_buf_dma_addr, +- u32 control_buff_size) ++ u32 control_buff_size, ++ u8 feature_ver) + { + struct ena_com_admin_queue *admin_queue; + struct ena_admin_get_feat_cmd get_cmd; +@@ -1014,7 +1015,7 @@ static int ena_com_get_feature_ex(struct + } + + get_cmd.control_buffer.length = control_buff_size; +- ++ get_cmd.feat_common.feature_version = feature_ver; + get_cmd.feat_common.feature_id = feature_id; + + ret = ena_com_execute_admin_command(admin_queue, +@@ -1034,13 +1035,15 @@ static int ena_com_get_feature_ex(struct + + static int ena_com_get_feature(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *get_resp, +- enum ena_admin_aq_feature_id feature_id) ++ enum ena_admin_aq_feature_id feature_id, ++ u8 feature_ver) + { + return ena_com_get_feature_ex(ena_dev, + get_resp, + feature_id, + 0, +- 0); ++ 0, ++ feature_ver); + } + + static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev) +@@ -1118,7 +1121,7 @@ static int ena_com_indirect_table_alloca + int ret; + + ret = ena_com_get_feature(ena_dev, &get_resp, +- ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG); ++ ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, 0); + if (unlikely(ret)) + return ret; + +@@ -1538,7 +1541,7 @@ int ena_com_set_aenq_config(struct ena_c + struct ena_admin_get_feat_resp get_resp; + int ret; + +- ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG); ++ ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG, 0); + if (ret) { + pr_info("Can't get aenq configuration\n"); + return ret; +@@ -1913,7 +1916,7 @@ void ena_com_destroy_io_queue(struct ena + int ena_com_get_link_params(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *resp) + { +- return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG); ++ return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG, 0); + } + + int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, +@@ -1923,7 +1926,7 @@ int ena_com_get_dev_attr_feat(struct ena + int rc; + + rc = ena_com_get_feature(ena_dev, &get_resp, +- ENA_ADMIN_DEVICE_ATTRIBUTES); ++ ENA_ADMIN_DEVICE_ATTRIBUTES, 0); + if (rc) + return rc; + +@@ -1931,17 +1934,34 @@ int ena_com_get_dev_attr_feat(struct ena + sizeof(get_resp.u.dev_attr)); + ena_dev->supported_features = get_resp.u.dev_attr.supported_features; + +- rc = ena_com_get_feature(ena_dev, &get_resp, +- ENA_ADMIN_MAX_QUEUES_NUM); +- if (rc) +- return rc; ++ if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { ++ rc = ena_com_get_feature(ena_dev, &get_resp, ++ ENA_ADMIN_MAX_QUEUES_EXT, ++ ENA_FEATURE_MAX_QUEUE_EXT_VER); ++ if (rc) ++ return rc; + +- memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue, +- sizeof(get_resp.u.max_queue)); +- ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size; ++ if (get_resp.u.max_queue_ext.version != ENA_FEATURE_MAX_QUEUE_EXT_VER) ++ return -EINVAL; ++ ++ memcpy(&get_feat_ctx->max_queue_ext, &get_resp.u.max_queue_ext, ++ sizeof(get_resp.u.max_queue_ext)); ++ ena_dev->tx_max_header_size = ++ get_resp.u.max_queue_ext.max_queue_ext.max_tx_header_size; ++ } else { ++ rc = ena_com_get_feature(ena_dev, &get_resp, ++ ENA_ADMIN_MAX_QUEUES_NUM, 0); ++ memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue, ++ sizeof(get_resp.u.max_queue)); ++ ena_dev->tx_max_header_size = ++ get_resp.u.max_queue.max_header_size; ++ ++ if (rc) ++ return rc; ++ } + + rc = ena_com_get_feature(ena_dev, &get_resp, +- ENA_ADMIN_AENQ_CONFIG); ++ ENA_ADMIN_AENQ_CONFIG, 0); + if (rc) + return rc; + +@@ -1949,7 +1969,7 @@ int ena_com_get_dev_attr_feat(struct ena + sizeof(get_resp.u.aenq)); + + rc = ena_com_get_feature(ena_dev, &get_resp, +- ENA_ADMIN_STATELESS_OFFLOAD_CONFIG); ++ ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0); + if (rc) + return rc; + +@@ -1959,7 +1979,7 @@ int ena_com_get_dev_attr_feat(struct ena + /* Driver hints isn't mandatory admin command. So in case the + * command isn't supported set driver hints to 0 + */ +- rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS); ++ rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS, 0); + + if (!rc) + memcpy(&get_feat_ctx->hw_hints, &get_resp.u.hw_hints, +@@ -1970,7 +1990,7 @@ int ena_com_get_dev_attr_feat(struct ena + else + return rc; + +- rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ); ++ rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ, 0); + if (!rc) + memcpy(&get_feat_ctx->llq, &get_resp.u.llq, + sizeof(get_resp.u.llq)); +@@ -2208,7 +2228,7 @@ int ena_com_get_offload_settings(struct + struct ena_admin_get_feat_resp resp; + + ret = ena_com_get_feature(ena_dev, &resp, +- ENA_ADMIN_STATELESS_OFFLOAD_CONFIG); ++ ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0); + if (unlikely(ret)) { + pr_err("Failed to get offload capabilities %d\n", ret); + return ret; +@@ -2237,7 +2257,7 @@ int ena_com_set_hash_function(struct ena + + /* Validate hash function is supported */ + ret = ena_com_get_feature(ena_dev, &get_resp, +- ENA_ADMIN_RSS_HASH_FUNCTION); ++ ENA_ADMIN_RSS_HASH_FUNCTION, 0); + if (unlikely(ret)) + return ret; + +@@ -2297,7 +2317,7 @@ int ena_com_fill_hash_function(struct en + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION, + rss->hash_key_dma_addr, +- sizeof(*rss->hash_key)); ++ sizeof(*rss->hash_key), 0); + if (unlikely(rc)) + return rc; + +@@ -2350,7 +2370,7 @@ int ena_com_get_hash_function(struct ena + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION, + rss->hash_key_dma_addr, +- sizeof(*rss->hash_key)); ++ sizeof(*rss->hash_key), 0); + if (unlikely(rc)) + return rc; + +@@ -2379,7 +2399,7 @@ int ena_com_get_hash_ctrl(struct ena_com + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_INPUT, + rss->hash_ctrl_dma_addr, +- sizeof(*rss->hash_ctrl)); ++ sizeof(*rss->hash_ctrl), 0); + if (unlikely(rc)) + return rc; + +@@ -2615,7 +2635,7 @@ int ena_com_indirect_table_get(struct en + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, + rss->rss_ind_tbl_dma_addr, +- tbl_size); ++ tbl_size, 0); + if (unlikely(rc)) + return rc; + +@@ -2831,7 +2851,7 @@ int ena_com_init_interrupt_moderation(st + int rc; + + rc = ena_com_get_feature(ena_dev, &get_resp, +- ENA_ADMIN_INTERRUPT_MODERATION); ++ ENA_ADMIN_INTERRUPT_MODERATION, 0); + + if (rc) { + if (rc == -EOPNOTSUPP) { +--- a/drivers/net/ethernet/amazon/ena/ena_com.h ++++ b/drivers/net/ethernet/amazon/ena/ena_com.h +@@ -102,6 +102,8 @@ + + #define ENA_HW_HINTS_NO_TIMEOUT 0xFFFF + ++#define ENA_FEATURE_MAX_QUEUE_EXT_VER 1 ++ + enum ena_intr_moder_level { + ENA_INTR_MODER_LOWEST = 0, + ENA_INTR_MODER_LOW, +@@ -383,6 +385,7 @@ struct ena_com_dev { + + struct ena_com_dev_get_features_ctx { + struct ena_admin_queue_feature_desc max_queues; ++ struct ena_admin_queue_ext_feature_desc max_queue_ext; + struct ena_admin_device_attr_feature_desc dev_attr; + struct ena_admin_feature_aenq_desc aenq; + struct ena_admin_feature_offload_desc offload; diff --git a/debian/patches/features/all/ena/net-ena-add-ethtool-function-for-changing-io-queue-s.patch b/debian/patches/features/all/ena/net-ena-add-ethtool-function-for-changing-io-queue-s.patch new file mode 100644 index 000000000..227f39b3b --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-add-ethtool-function-for-changing-io-queue-s.patch @@ -0,0 +1,106 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Tue, 11 Jun 2019 14:58:09 +0300 +Subject: [PATCH] net: ena: add ethtool function for changing io queue sizes +Origin: https://git.kernel.org/linus/eece4d2ab9d214e3b12f5ac1ed189a05793b28a5 +Bug-Debian: https://bugs.debian.org/941291 + +Implement the set_ringparam() function of the ethtool interface +to enable the changing of io queue sizes. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_ethtool.c | 22 +++++++++++++++++++ + drivers/net/ethernet/amazon/ena/ena_netdev.c | 14 ++++++++++++ + drivers/net/ethernet/amazon/ena/ena_netdev.h | 5 ++++- + 3 files changed, 40 insertions(+), 1 deletion(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_ethtool.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +@@ -454,6 +454,27 @@ static void ena_get_ringparam(struct net + ring->rx_pending = adapter->rx_ring[0].ring_size; + } + ++static int ena_set_ringparam(struct net_device *netdev, ++ struct ethtool_ringparam *ring) ++{ ++ struct ena_adapter *adapter = netdev_priv(netdev); ++ u32 new_tx_size, new_rx_size; ++ ++ new_tx_size = ring->tx_pending < ENA_MIN_RING_SIZE ? ++ ENA_MIN_RING_SIZE : ring->tx_pending; ++ new_tx_size = rounddown_pow_of_two(new_tx_size); ++ ++ new_rx_size = ring->rx_pending < ENA_MIN_RING_SIZE ? ++ ENA_MIN_RING_SIZE : ring->rx_pending; ++ new_rx_size = rounddown_pow_of_two(new_rx_size); ++ ++ if (new_tx_size == adapter->requested_tx_ring_size && ++ new_rx_size == adapter->requested_rx_ring_size) ++ return 0; ++ ++ return ena_update_queue_sizes(adapter, new_tx_size, new_rx_size); ++} ++ + static u32 ena_flow_hash_to_flow_type(u16 hash_fields) + { + u32 data = 0; +@@ -805,6 +826,7 @@ static const struct ethtool_ops ena_etht + .get_coalesce = ena_get_coalesce, + .set_coalesce = ena_set_coalesce, + .get_ringparam = ena_get_ringparam, ++ .set_ringparam = ena_set_ringparam, + .get_sset_count = ena_get_sset_count, + .get_strings = ena_get_strings, + .get_ethtool_stats = ena_get_ethtool_stats, +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -2028,6 +2028,20 @@ static int ena_close(struct net_device * + return 0; + } + ++int ena_update_queue_sizes(struct ena_adapter *adapter, ++ u32 new_tx_size, ++ u32 new_rx_size) ++{ ++ bool dev_up; ++ ++ dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); ++ ena_close(adapter->netdev); ++ adapter->requested_tx_ring_size = new_tx_size; ++ adapter->requested_rx_ring_size = new_rx_size; ++ ena_init_io_rings(adapter); ++ return dev_up ? ena_up(adapter) : 0; ++} ++ + static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb) + { + u32 mss = skb_shinfo(skb)->gso_size; +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -81,7 +81,6 @@ + #define ENA_DEFAULT_RING_SIZE (1024) + #define ENA_MIN_RING_SIZE (256) + +- + #define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) + #define ENA_DEFAULT_RX_COPYBREAK (256 - NET_IP_ALIGN) + +@@ -386,6 +385,10 @@ void ena_dump_stats_to_dmesg(struct ena_ + + void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); + ++int ena_update_queue_sizes(struct ena_adapter *adapter, ++ u32 new_tx_size, ++ u32 new_rx_size); ++ + int ena_get_sset_count(struct net_device *netdev, int sset); + + #endif /* !(ENA_H) */ diff --git a/debian/patches/features/all/ena/net-ena-add-good-checksum-counter.patch b/debian/patches/features/all/ena/net-ena-add-good-checksum-counter.patch new file mode 100644 index 000000000..cde5cdad4 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-add-good-checksum-counter.patch @@ -0,0 +1,72 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Mon, 3 Jun 2019 17:43:28 +0300 +Subject: [PATCH] net: ena: add good checksum counter +Origin: https://git.kernel.org/linus/d2eecc6ee8c92053797513e34931334dd0e85e18 +Bug-Debian: https://bugs.debian.org/941291 + +Add a new statistics to ETHTOOL to specify if the device calculated +and validated the Rx csum. + +Signed-off-by: Evgeny Shmeilin <evgeny@annapurnaLabs.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_ethtool.c | 3 ++- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 +++ + drivers/net/ethernet/amazon/ena/ena_netdev.h | 3 ++- + 3 files changed, 7 insertions(+), 2 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_ethtool.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +@@ -88,13 +88,14 @@ static const struct ena_stats ena_stats_ + static const struct ena_stats ena_stats_rx_strings[] = { + ENA_STAT_RX_ENTRY(cnt), + ENA_STAT_RX_ENTRY(bytes), ++ ENA_STAT_RX_ENTRY(rx_copybreak_pkt), ++ ENA_STAT_RX_ENTRY(csum_good), + ENA_STAT_RX_ENTRY(refil_partial), + ENA_STAT_RX_ENTRY(bad_csum), + ENA_STAT_RX_ENTRY(page_alloc_fail), + ENA_STAT_RX_ENTRY(skb_alloc_fail), + ENA_STAT_RX_ENTRY(dma_mapping_err), + ENA_STAT_RX_ENTRY(bad_desc_num), +- ENA_STAT_RX_ENTRY(rx_copybreak_pkt), + ENA_STAT_RX_ENTRY(bad_req_id), + ENA_STAT_RX_ENTRY(empty_rx_ring), + ENA_STAT_RX_ENTRY(csum_unchecked), +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -1001,6 +1001,9 @@ static inline void ena_rx_checksum(struc + + if (likely(ena_rx_ctx->l4_csum_checked)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; ++ u64_stats_update_begin(&rx_ring->syncp); ++ rx_ring->rx_stats.csum_good++; ++ u64_stats_update_end(&rx_ring->syncp); + } else { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.csum_unchecked++; +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -221,13 +221,14 @@ struct ena_stats_tx { + struct ena_stats_rx { + u64 cnt; + u64 bytes; ++ u64 rx_copybreak_pkt; ++ u64 csum_good; + u64 refil_partial; + u64 bad_csum; + u64 page_alloc_fail; + u64 skb_alloc_fail; + u64 dma_mapping_err; + u64 bad_desc_num; +- u64 rx_copybreak_pkt; + u64 bad_req_id; + u64 empty_rx_ring; + u64 csum_unchecked; diff --git a/debian/patches/features/all/ena/net-ena-add-handling-of-llq-max-tx-burst-size.patch b/debian/patches/features/all/ena/net-ena-add-handling-of-llq-max-tx-burst-size.patch new file mode 100644 index 000000000..4034439eb --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-add-handling-of-llq-max-tx-burst-size.patch @@ -0,0 +1,232 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Mon, 3 Jun 2019 17:43:19 +0300 +Subject: [PATCH] net: ena: add handling of llq max tx burst size +Origin: https://git.kernel.org/linus/05d62ca218f8425c70389d0416c15bd0d455b416 +Bug-Debian: https://bugs.debian.org/941291 + +There is a maximum TX burst size that the ENA device can handle. +It is exposed by the device to the driver and the driver +needs to comply with it to avoid bugs. + +In this commit we: +1. Add ena_com_is_doorbell_needed(), which calculates the number of + llq entries that will be used to hold a packet, and will return + true if they exceed the number of allowed entries in a burst. + If the function returns true, a doorbell needs to be invoked + to send this packet in the next burst. + +2. Follow the available entries in the current burst: + - Every doorbell a new burst begins + - With each write of an llq entry, the available entries in the + current burst are decreased by 1. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + .../net/ethernet/amazon/ena/ena_admin_defs.h | 5 ++ + drivers/net/ethernet/amazon/ena/ena_com.c | 7 +++ + drivers/net/ethernet/amazon/ena/ena_com.h | 2 + + drivers/net/ethernet/amazon/ena/ena_eth_com.c | 28 ++++------ + drivers/net/ethernet/amazon/ena/ena_eth_com.h | 53 +++++++++++++++++++ + drivers/net/ethernet/amazon/ena/ena_netdev.c | 7 +++ + 6 files changed, 85 insertions(+), 17 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_admin_defs.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +@@ -524,6 +524,11 @@ struct ena_admin_feature_llq_desc { + + /* the stride control the driver selected to use */ + u16 descriptors_stride_ctrl_enabled; ++ ++ /* Maximum size in bytes taken by llq entries in a single tx burst. ++ * Set to 0 when there is no such limit. ++ */ ++ u32 max_tx_burst_size; + }; + + struct ena_admin_queue_feature_desc { +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -396,6 +396,10 @@ static int ena_com_init_io_sq(struct ena + 0x0, io_sq->llq_info.desc_list_entry_size); + io_sq->llq_buf_ctrl.descs_left_in_line = + io_sq->llq_info.descs_num_before_header; ++ ++ if (io_sq->llq_info.max_entries_in_tx_burst > 0) ++ io_sq->entries_in_tx_burst_left = ++ io_sq->llq_info.max_entries_in_tx_burst; + } + + io_sq->tail = 0; +@@ -727,6 +731,9 @@ static int ena_com_config_llq_info(struc + supported_feat, llq_info->descs_num_before_header); + } + ++ llq_info->max_entries_in_tx_burst = ++ (u16)(llq_features->max_tx_burst_size / llq_default_cfg->llq_ring_entry_size_value); ++ + rc = ena_com_set_llq(ena_dev); + if (rc) + pr_err("Cannot set LLQ configuration: %d\n", rc); +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.h +@@ -159,6 +159,7 @@ struct ena_com_llq_info { + u16 desc_list_entry_size; + u16 descs_num_before_header; + u16 descs_per_entry; ++ u16 max_entries_in_tx_burst; + }; + + struct ena_com_io_cq { +@@ -238,6 +239,7 @@ struct ena_com_io_sq { + u8 phase; + u8 desc_entry_size; + u8 dma_addr_bits; ++ u16 entries_in_tx_burst_left; + } ____cacheline_aligned; + + struct ena_com_admin_cq { +Index: linux/drivers/net/ethernet/amazon/ena/ena_eth_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_eth_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_eth_com.c +@@ -82,6 +82,17 @@ static inline int ena_com_write_bounce_b + dst_tail_mask = io_sq->tail & (io_sq->q_depth - 1); + dst_offset = dst_tail_mask * llq_info->desc_list_entry_size; + ++ if (is_llq_max_tx_burst_exists(io_sq)) { ++ if (unlikely(!io_sq->entries_in_tx_burst_left)) { ++ pr_err("Error: trying to send more packets than tx burst allows\n"); ++ return -ENOSPC; ++ } ++ ++ io_sq->entries_in_tx_burst_left--; ++ pr_debug("decreasing entries_in_tx_burst_left of queue %d to %d\n", ++ io_sq->qid, io_sq->entries_in_tx_burst_left); ++ } ++ + /* Make sure everything was written into the bounce buffer before + * writing the bounce buffer to the device + */ +@@ -274,23 +285,6 @@ static inline u16 ena_com_cdesc_rx_pkt_g + return count; + } + +-static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq, +- struct ena_com_tx_ctx *ena_tx_ctx) +-{ +- int rc; +- +- if (ena_tx_ctx->meta_valid) { +- rc = memcmp(&io_sq->cached_tx_meta, +- &ena_tx_ctx->ena_meta, +- sizeof(struct ena_com_tx_meta)); +- +- if (unlikely(rc != 0)) +- return true; +- } +- +- return false; +-} +- + static inline int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx) + { +Index: linux/drivers/net/ethernet/amazon/ena/ena_eth_com.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_eth_com.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_eth_com.h +@@ -125,8 +125,55 @@ static inline bool ena_com_sq_have_enoug + return ena_com_free_desc(io_sq) > temp; + } + ++static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq, ++ struct ena_com_tx_ctx *ena_tx_ctx) ++{ ++ if (!ena_tx_ctx->meta_valid) ++ return false; ++ ++ return !!memcmp(&io_sq->cached_tx_meta, ++ &ena_tx_ctx->ena_meta, ++ sizeof(struct ena_com_tx_meta)); ++} ++ ++static inline bool is_llq_max_tx_burst_exists(struct ena_com_io_sq *io_sq) ++{ ++ return (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) && ++ io_sq->llq_info.max_entries_in_tx_burst > 0; ++} ++ ++static inline bool ena_com_is_doorbell_needed(struct ena_com_io_sq *io_sq, ++ struct ena_com_tx_ctx *ena_tx_ctx) ++{ ++ struct ena_com_llq_info *llq_info; ++ int descs_after_first_entry; ++ int num_entries_needed = 1; ++ u16 num_descs; ++ ++ if (!is_llq_max_tx_burst_exists(io_sq)) ++ return false; ++ ++ llq_info = &io_sq->llq_info; ++ num_descs = ena_tx_ctx->num_bufs; ++ ++ if (unlikely(ena_com_meta_desc_changed(io_sq, ena_tx_ctx))) ++ ++num_descs; ++ ++ if (num_descs > llq_info->descs_num_before_header) { ++ descs_after_first_entry = num_descs - llq_info->descs_num_before_header; ++ num_entries_needed += DIV_ROUND_UP(descs_after_first_entry, ++ llq_info->descs_per_entry); ++ } ++ ++ pr_debug("queue: %d num_descs: %d num_entries_needed: %d\n", io_sq->qid, ++ num_descs, num_entries_needed); ++ ++ return num_entries_needed > io_sq->entries_in_tx_burst_left; ++} ++ + static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) + { ++ u16 max_entries_in_tx_burst = io_sq->llq_info.max_entries_in_tx_burst; + u16 tail = io_sq->tail; + + pr_debug("write submission queue doorbell for queue: %d tail: %d\n", +@@ -134,6 +181,12 @@ static inline int ena_com_write_sq_doorb + + writel(tail, io_sq->db_addr); + ++ if (is_llq_max_tx_burst_exists(io_sq)) { ++ pr_debug("reset available entries in tx burst for queue %d to %d\n", ++ io_sq->qid, max_entries_in_tx_burst); ++ io_sq->entries_in_tx_burst_left = max_entries_in_tx_burst; ++ } ++ + return 0; + } + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -2172,6 +2172,13 @@ static netdev_tx_t ena_start_xmit(struct + /* set flags and meta data */ + ena_tx_csum(&ena_tx_ctx, skb); + ++ if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx))) { ++ netif_dbg(adapter, tx_queued, dev, ++ "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", ++ qid); ++ ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); ++ } ++ + /* prepare the packet's descriptors to dma engine */ + rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, + &nb_hw_desc); diff --git a/debian/patches/features/all/ena/net-ena-add-intr_moder_rx_interval-to-struct-ena_com.patch b/debian/patches/features/all/ena/net-ena-add-intr_moder_rx_interval-to-struct-ena_com.patch new file mode 100644 index 000000000..086da850a --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-add-intr_moder_rx_interval-to-struct-ena_com.patch @@ -0,0 +1,118 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 16 Sep 2019 14:31:26 +0300 +Subject: [PATCH] net: ena: add intr_moder_rx_interval to struct ena_com_dev + and use it +Origin: https://git.kernel.org/linus/15619e722b16aaa40f942b93631aa92581a7b393 +Bug-Debian: https://bugs.debian.org/941291 + +Add intr_moder_rx_interval to struct ena_com_dev and use it as the +location where the interrupt moderation rx interval is saved, instead +of the interrupt moderation table. + +This is done as a first step before removing the old interrupt moderation +code. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 20 ++++---------------- + drivers/net/ethernet/amazon/ena/ena_com.h | 8 +++++++- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 ++- + 3 files changed, 13 insertions(+), 18 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -1297,9 +1297,6 @@ static int ena_com_init_interrupt_modera + static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev, + u16 intr_delay_resolution) + { +- struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; +- unsigned int i; +- + if (!intr_delay_resolution) { + pr_err("Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n"); + intr_delay_resolution = 1; +@@ -1307,8 +1304,7 @@ static void ena_com_update_intr_delay_re + ena_dev->intr_delay_resolution = intr_delay_resolution; + + /* update Rx */ +- for (i = 0; i < ENA_INTR_MAX_NUM_OF_LEVELS; i++) +- intr_moder_tbl[i].intr_moder_interval /= intr_delay_resolution; ++ ena_dev->intr_moder_rx_interval /= intr_delay_resolution; + + /* update Tx */ + ena_dev->intr_moder_tx_interval /= intr_delay_resolution; +@@ -2798,11 +2794,8 @@ int ena_com_update_nonadaptive_moderatio + return -EFAULT; + } + +- /* We use LOWEST entry of moderation table for storing +- * nonadaptive interrupt coalescing values +- */ +- ena_dev->intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval = +- rx_coalesce_usecs / ena_dev->intr_delay_resolution; ++ ena_dev->intr_moder_rx_interval = rx_coalesce_usecs / ++ ena_dev->intr_delay_resolution; + + return 0; + } +@@ -2907,12 +2900,7 @@ unsigned int ena_com_get_nonadaptive_mod + + unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev) + { +- struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; +- +- if (intr_moder_tbl) +- return intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval; +- +- return 0; ++ return ena_dev->intr_moder_rx_interval; + } + + void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev, +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.h +@@ -93,7 +93,7 @@ + #define ENA_INTR_HIGHEST_BYTES (192 * 1024) + + #define ENA_INTR_INITIAL_TX_INTERVAL_USECS 196 +-#define ENA_INTR_INITIAL_RX_INTERVAL_USECS 4 ++#define ENA_INTR_INITIAL_RX_INTERVAL_USECS 0 + #define ENA_INTR_DELAY_OLD_VALUE_WEIGHT 6 + #define ENA_INTR_DELAY_NEW_VALUE_WEIGHT 4 + #define ENA_INTR_MODER_LEVEL_STRIDE 2 +@@ -376,7 +376,13 @@ struct ena_com_dev { + struct ena_host_attribute host_attr; + bool adaptive_coalescing; + u16 intr_delay_resolution; ++ ++ /* interrupt moderation intervals are in usec divided by ++ * intr_delay_resolution, which is supplied by the device. ++ */ + u32 intr_moder_tx_interval; ++ u32 intr_moder_rx_interval; ++ + struct ena_intr_moder_entry *intr_moder_tbl; + + struct ena_com_llq_info llq_info; +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -3487,10 +3487,11 @@ static int ena_probe(struct pci_dev *pde + calc_queue_ctx.get_feat_ctx = &get_feat_ctx; + calc_queue_ctx.pdev = pdev; + +- /* initial Tx interrupt delay, Assumes 1 usec granularity. ++ /* Initial Tx and RX interrupt delay. Assumes 1 usec granularity. + * Updated during device initialization with the real granularity + */ + ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS; ++ ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS; + io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx); + rc = ena_calc_queue_size(&calc_queue_ctx); + if (rc || io_queue_num <= 0) { diff --git a/debian/patches/features/all/ena/net-ena-add-newline-at-the-end-of-pr_err-prints.patch b/debian/patches/features/all/ena/net-ena-add-newline-at-the-end-of-pr_err-prints.patch new file mode 100644 index 000000000..3b1c6c4cb --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-add-newline-at-the-end-of-pr_err-prints.patch @@ -0,0 +1,91 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Mon, 3 Jun 2019 17:43:23 +0300 +Subject: [PATCH] net: ena: add newline at the end of pr_err prints +Origin: https://git.kernel.org/linus/9cb9c0de266f1ea52f01589f2f4019f163c01cd1 +Bug-Debian: https://bugs.debian.org/941291 + +Some pr_err prints lacked '\n' in the end. Added where missing. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -115,7 +115,7 @@ static int ena_com_admin_init_sq(struct + GFP_KERNEL); + + if (!sq->entries) { +- pr_err("memory allocation failed"); ++ pr_err("memory allocation failed\n"); + return -ENOMEM; + } + +@@ -137,7 +137,7 @@ static int ena_com_admin_init_cq(struct + GFP_KERNEL); + + if (!cq->entries) { +- pr_err("memory allocation failed"); ++ pr_err("memory allocation failed\n"); + return -ENOMEM; + } + +@@ -160,7 +160,7 @@ static int ena_com_admin_init_aenq(struc + GFP_KERNEL); + + if (!aenq->entries) { +- pr_err("memory allocation failed"); ++ pr_err("memory allocation failed\n"); + return -ENOMEM; + } + +@@ -285,7 +285,7 @@ static inline int ena_com_init_comp_ctxt + + queue->comp_ctx = devm_kzalloc(queue->q_dmadev, size, GFP_KERNEL); + if (unlikely(!queue->comp_ctx)) { +- pr_err("memory allocation failed"); ++ pr_err("memory allocation failed\n"); + return -ENOMEM; + } + +@@ -356,7 +356,7 @@ static int ena_com_init_io_sq(struct ena + } + + if (!io_sq->desc_addr.virt_addr) { +- pr_err("memory allocation failed"); ++ pr_err("memory allocation failed\n"); + return -ENOMEM; + } + } +@@ -382,7 +382,7 @@ static int ena_com_init_io_sq(struct ena + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + + if (!io_sq->bounce_buf_ctrl.base_buffer) { +- pr_err("bounce buffer memory allocation failed"); ++ pr_err("bounce buffer memory allocation failed\n"); + return -ENOMEM; + } + +@@ -440,7 +440,7 @@ static int ena_com_init_io_cq(struct ena + } + + if (!io_cq->cdesc_addr.virt_addr) { +- pr_err("memory allocation failed"); ++ pr_err("memory allocation failed\n"); + return -ENOMEM; + } + +@@ -829,7 +829,7 @@ static u32 ena_com_reg_bar_read32(struct + } + + if (read_resp->reg_off != offset) { +- pr_err("Read failure: wrong offset provided"); ++ pr_err("Read failure: wrong offset provided\n"); + ret = ENA_MMIO_READ_TIMEOUT; + } else { + ret = read_resp->reg_val; diff --git a/debian/patches/features/all/ena/net-ena-add-support-for-changing-max_header_size-in-.patch b/debian/patches/features/all/ena/net-ena-add-support-for-changing-max_header_size-in-.patch new file mode 100644 index 000000000..ac5bc89e7 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-add-support-for-changing-max_header_size-in-.patch @@ -0,0 +1,54 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Mon, 3 Jun 2019 17:43:26 +0300 +Subject: [PATCH] net: ena: add support for changing max_header_size in LLQ + mode +Origin: https://git.kernel.org/linus/cdf449eccc5946d5dd4145b38347874a7423c50d +Bug-Debian: https://bugs.debian.org/941291 + +Up until now the driver always used a single setting for the sizes +of the different parts of the llq entry - 128 for entry size, 2 for +descriptors before header and 96 for maximum header size. + +The current code makes sure that the parts of the llq entry are +compatible with each other and with the initial llq entry size given +by the device. + +This commit changes this code to support any llq entry size + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -2936,8 +2936,8 @@ int ena_com_config_dev_mode(struct ena_c + struct ena_admin_feature_llq_desc *llq_features, + struct ena_llq_configurations *llq_default_cfg) + { ++ struct ena_com_llq_info *llq_info = &ena_dev->llq_info; + int rc; +- int size; + + if (!llq_features->max_llq_num) { + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; +@@ -2948,12 +2948,10 @@ int ena_com_config_dev_mode(struct ena_c + if (rc) + return rc; + +- /* Validate the descriptor is not too big */ +- size = ena_dev->tx_max_header_size; +- size += ena_dev->llq_info.descs_num_before_header * +- sizeof(struct ena_eth_io_tx_desc); ++ ena_dev->tx_max_header_size = llq_info->desc_list_entry_size - ++ (llq_info->descs_num_before_header * sizeof(struct ena_eth_io_tx_desc)); + +- if (unlikely(ena_dev->llq_info.desc_list_entry_size < size)) { ++ if (unlikely(ena_dev->tx_max_header_size == 0)) { + pr_err("the size of the LLQ entry is smaller than needed\n"); + return -EINVAL; + } diff --git a/debian/patches/features/all/ena/net-ena-allow-automatic-fallback-to-polling-mode.patch b/debian/patches/features/all/ena/net-ena-allow-automatic-fallback-to-polling-mode.patch new file mode 100644 index 000000000..dab6166e1 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-allow-automatic-fallback-to-polling-mode.patch @@ -0,0 +1,103 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Mon, 3 Jun 2019 17:43:25 +0300 +Subject: [PATCH] net: ena: allow automatic fallback to polling mode +Origin: https://git.kernel.org/linus/a4e262cde3cda4491ce666e7c5270954c4d926b9 +Bug-Debian: https://bugs.debian.org/941291 + +Enable fallback to polling mode for Admin queue +when identified a command response arrival +without an accompanying MSI-X interrupt + +Signed-off-by: Igor Chauskin <igorch@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 34 +++++++++++++++++------ + drivers/net/ethernet/amazon/ena/ena_com.h | 14 ++++++++++ + 2 files changed, 39 insertions(+), 9 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -762,16 +762,26 @@ static int ena_com_wait_and_process_admi + admin_queue->stats.no_completion++; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + +- if (comp_ctx->status == ENA_CMD_COMPLETED) +- pr_err("The ena device have completion but the driver didn't receive any MSI-X interrupt (cmd %d)\n", +- comp_ctx->cmd_opcode); +- else +- pr_err("The ena device doesn't send any completion for the admin cmd %d status %d\n", ++ if (comp_ctx->status == ENA_CMD_COMPLETED) { ++ pr_err("The ena device sent a completion but the driver didn't receive a MSI-X interrupt (cmd %d), autopolling mode is %s\n", ++ comp_ctx->cmd_opcode, ++ admin_queue->auto_polling ? "ON" : "OFF"); ++ /* Check if fallback to polling is enabled */ ++ if (admin_queue->auto_polling) ++ admin_queue->polling = true; ++ } else { ++ pr_err("The ena device doesn't send a completion for the admin cmd %d status %d\n", + comp_ctx->cmd_opcode, comp_ctx->status); +- +- admin_queue->running_state = false; +- ret = -ETIME; +- goto err; ++ } ++ /* Check if shifted to polling mode. ++ * This will happen if there is a completion without an interrupt ++ * and autopolling mode is enabled. Continuing normal execution in such case ++ */ ++ if (!admin_queue->polling) { ++ admin_queue->running_state = false; ++ ret = -ETIME; ++ goto err; ++ } + } + + ret = ena_com_comp_status_to_errno(comp_ctx->comp_status); +@@ -1650,6 +1660,12 @@ void ena_com_set_admin_polling_mode(stru + ena_dev->admin_queue.polling = polling; + } + ++void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev, ++ bool polling) ++{ ++ ena_dev->admin_queue.auto_polling = polling; ++} ++ + int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev) + { + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.h +@@ -283,6 +283,9 @@ struct ena_com_admin_queue { + /* Indicate if the admin queue should poll for completion */ + bool polling; + ++ /* Define if fallback to polling mode should occur */ ++ bool auto_polling; ++ + u16 curr_cmd_id; + + /* Indicate that the ena was initialized and can +@@ -538,6 +541,17 @@ void ena_com_set_admin_polling_mode(stru + */ + bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev); + ++/* ena_com_set_admin_auto_polling_mode - Enable autoswitch to polling mode ++ * @ena_dev: ENA communication layer struct ++ * @polling: Enable/Disable polling mode ++ * ++ * Set the autopolling mode. ++ * If autopolling is on: ++ * In case of missing interrupt when data is available switch to polling. ++ */ ++void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev, ++ bool polling); ++ + /* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler + * @ena_dev: ENA communication layer struct + * diff --git a/debian/patches/features/all/ena/net-ena-allow-queue-allocation-backoff-when-low-on-m.patch b/debian/patches/features/all/ena/net-ena-allow-queue-allocation-backoff-when-low-on-m.patch new file mode 100644 index 000000000..6f902b864 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-allow-queue-allocation-backoff-when-low-on-m.patch @@ -0,0 +1,323 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Tue, 11 Jun 2019 14:58:08 +0300 +Subject: [PATCH] net: ena: allow queue allocation backoff when low on memory +Origin: https://git.kernel.org/linus/13ca32a69e29f3a0fe72094dd930f312b3f3ee44 +Bug-Debian: https://bugs.debian.org/941291 + +If there is not enough memory to allocate io queues the driver will +try to allocate smaller queues. + +The backoff algorithm is as follows: + +1. Try to allocate TX and RX and if successful. +1.1. return success + +2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same). + +3. If TX or RX is smaller than 256 +3.1. return failure. +4. else +4.1. go back to 1. + +Also change the tx_queue_size, rx_queue_size field names in struct +adapter to requested_tx_queue_size and requested_rx_queue_size, and +use RX and TX queue 0 for actual queue sizes. +Explanation: +The original fields were useless as they were simply used to assign +values once from them to each of the queues in the adapter in ena_probe(). +They could simply be deleted. However now that we have a backoff +feature, we have use for them. In case of backoff there is a difference +between the requested queue sizes and the actual sizes. Therefore there +is a need to save the requested queue size for future retries of queue +allocation (for example if allocation failed and then ifdown + ifup was +called we want to start the allocation from the original requested size of +the queues). + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_ethtool.c | 4 +- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 159 +++++++++++++----- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 6 +- + 3 files changed, 127 insertions(+), 42 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_ethtool.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +@@ -450,8 +450,8 @@ static void ena_get_ringparam(struct net + + ring->tx_max_pending = adapter->max_tx_ring_size; + ring->rx_max_pending = adapter->max_rx_ring_size; +- ring->tx_pending = adapter->tx_ring_size; +- ring->rx_pending = adapter->rx_ring_size; ++ ring->tx_pending = adapter->tx_ring[0].ring_size; ++ ring->rx_pending = adapter->rx_ring[0].ring_size; + } + + static u32 ena_flow_hash_to_flow_type(u16 hash_fields) +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -182,7 +182,7 @@ static void ena_init_io_rings(struct ena + ena_init_io_rings_common(adapter, rxr, i); + + /* TX specific ring state */ +- txr->ring_size = adapter->tx_ring_size; ++ txr->ring_size = adapter->requested_tx_ring_size; + txr->tx_max_header_size = ena_dev->tx_max_header_size; + txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; + txr->sgl_size = adapter->max_tx_sgl_size; +@@ -190,7 +190,7 @@ static void ena_init_io_rings(struct ena + ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); + + /* RX specific ring state */ +- rxr->ring_size = adapter->rx_ring_size; ++ rxr->ring_size = adapter->requested_rx_ring_size; + rxr->rx_copybreak = adapter->rx_copybreak; + rxr->sgl_size = adapter->max_rx_sgl_size; + rxr->smoothed_interval = +@@ -594,7 +594,6 @@ static void ena_free_rx_bufs(struct ena_ + + /* ena_refill_all_rx_bufs - allocate all queues Rx buffers + * @adapter: board private structure +- * + */ + static void ena_refill_all_rx_bufs(struct ena_adapter *adapter) + { +@@ -1635,7 +1634,7 @@ static int ena_create_io_tx_queue(struct + ctx.qid = ena_qid; + ctx.mem_queue_type = ena_dev->tx_mem_queue_type; + ctx.msix_vector = msix_vector; +- ctx.queue_size = adapter->tx_ring_size; ++ ctx.queue_size = tx_ring->ring_size; + ctx.numa_node = cpu_to_node(tx_ring->cpu); + + rc = ena_com_create_io_queue(ena_dev, &ctx); +@@ -1702,7 +1701,7 @@ static int ena_create_io_rx_queue(struct + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; + ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + ctx.msix_vector = msix_vector; +- ctx.queue_size = adapter->rx_ring_size; ++ ctx.queue_size = rx_ring->ring_size; + ctx.numa_node = cpu_to_node(rx_ring->cpu); + + rc = ena_com_create_io_queue(ena_dev, &ctx); +@@ -1749,6 +1748,112 @@ create_err: + return rc; + } + ++static void set_io_rings_size(struct ena_adapter *adapter, ++ int new_tx_size, int new_rx_size) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_queues; i++) { ++ adapter->tx_ring[i].ring_size = new_tx_size; ++ adapter->rx_ring[i].ring_size = new_rx_size; ++ } ++} ++ ++/* This function allows queue allocation to backoff when the system is ++ * low on memory. If there is not enough memory to allocate io queues ++ * the driver will try to allocate smaller queues. ++ * ++ * The backoff algorithm is as follows: ++ * 1. Try to allocate TX and RX and if successful. ++ * 1.1. return success ++ * ++ * 2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same). ++ * ++ * 3. If TX or RX is smaller than 256 ++ * 3.1. return failure. ++ * 4. else ++ * 4.1. go back to 1. ++ */ ++static int create_queues_with_size_backoff(struct ena_adapter *adapter) ++{ ++ int rc, cur_rx_ring_size, cur_tx_ring_size; ++ int new_rx_ring_size, new_tx_ring_size; ++ ++ /* current queue sizes might be set to smaller than the requested ++ * ones due to past queue allocation failures. ++ */ ++ set_io_rings_size(adapter, adapter->requested_tx_ring_size, ++ adapter->requested_rx_ring_size); ++ ++ while (1) { ++ rc = ena_setup_all_tx_resources(adapter); ++ if (rc) ++ goto err_setup_tx; ++ ++ rc = ena_create_all_io_tx_queues(adapter); ++ if (rc) ++ goto err_create_tx_queues; ++ ++ rc = ena_setup_all_rx_resources(adapter); ++ if (rc) ++ goto err_setup_rx; ++ ++ rc = ena_create_all_io_rx_queues(adapter); ++ if (rc) ++ goto err_create_rx_queues; ++ ++ return 0; ++ ++err_create_rx_queues: ++ ena_free_all_io_rx_resources(adapter); ++err_setup_rx: ++ ena_destroy_all_tx_queues(adapter); ++err_create_tx_queues: ++ ena_free_all_io_tx_resources(adapter); ++err_setup_tx: ++ if (rc != -ENOMEM) { ++ netif_err(adapter, ifup, adapter->netdev, ++ "Queue creation failed with error code %d\n", ++ rc); ++ return rc; ++ } ++ ++ cur_tx_ring_size = adapter->tx_ring[0].ring_size; ++ cur_rx_ring_size = adapter->rx_ring[0].ring_size; ++ ++ netif_err(adapter, ifup, adapter->netdev, ++ "Not enough memory to create queues with sizes TX=%d, RX=%d\n", ++ cur_tx_ring_size, cur_rx_ring_size); ++ ++ new_tx_ring_size = cur_tx_ring_size; ++ new_rx_ring_size = cur_rx_ring_size; ++ ++ /* Decrease the size of the larger queue, or ++ * decrease both if they are the same size. ++ */ ++ if (cur_rx_ring_size <= cur_tx_ring_size) ++ new_tx_ring_size = cur_tx_ring_size / 2; ++ if (cur_rx_ring_size >= cur_tx_ring_size) ++ new_rx_ring_size = cur_rx_ring_size / 2; ++ ++ if (cur_tx_ring_size < ENA_MIN_RING_SIZE || ++ cur_rx_ring_size < ENA_MIN_RING_SIZE) { ++ netif_err(adapter, ifup, adapter->netdev, ++ "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n", ++ ENA_MIN_RING_SIZE); ++ return rc; ++ } ++ ++ netif_err(adapter, ifup, adapter->netdev, ++ "Retrying queue creation with sizes TX=%d, RX=%d\n", ++ new_tx_ring_size, ++ new_rx_ring_size); ++ ++ set_io_rings_size(adapter, new_tx_ring_size, ++ new_rx_ring_size); ++ } ++} ++ + static int ena_up(struct ena_adapter *adapter) + { + int rc, i; +@@ -1768,25 +1873,9 @@ static int ena_up(struct ena_adapter *ad + if (rc) + goto err_req_irq; + +- /* allocate transmit descriptors */ +- rc = ena_setup_all_tx_resources(adapter); ++ rc = create_queues_with_size_backoff(adapter); + if (rc) +- goto err_setup_tx; +- +- /* allocate receive descriptors */ +- rc = ena_setup_all_rx_resources(adapter); +- if (rc) +- goto err_setup_rx; +- +- /* Create TX queues */ +- rc = ena_create_all_io_tx_queues(adapter); +- if (rc) +- goto err_create_tx_queues; +- +- /* Create RX queues */ +- rc = ena_create_all_io_rx_queues(adapter); +- if (rc) +- goto err_create_rx_queues; ++ goto err_create_queues_with_backoff; + + rc = ena_up_complete(adapter); + if (rc) +@@ -1815,14 +1904,11 @@ static int ena_up(struct ena_adapter *ad + return rc; + + err_up: +- ena_destroy_all_rx_queues(adapter); +-err_create_rx_queues: + ena_destroy_all_tx_queues(adapter); +-err_create_tx_queues: +- ena_free_all_io_rx_resources(adapter); +-err_setup_rx: + ena_free_all_io_tx_resources(adapter); +-err_setup_tx: ++ ena_destroy_all_rx_queues(adapter); ++ ena_free_all_io_rx_resources(adapter); ++err_create_queues_with_backoff: + ena_free_io_irq(adapter); + err_req_irq: + ena_del_napi(adapter); +@@ -3286,17 +3372,14 @@ static int ena_calc_queue_size(struct en + max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size); + max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size); + +- tx_queue_size = min_t(u32, tx_queue_size, max_tx_queue_size); +- rx_queue_size = min_t(u32, rx_queue_size, max_rx_queue_size); ++ tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE, ++ max_tx_queue_size); ++ rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE, ++ max_rx_queue_size); + + tx_queue_size = rounddown_pow_of_two(tx_queue_size); + rx_queue_size = rounddown_pow_of_two(rx_queue_size); + +- if (unlikely(!rx_queue_size || !tx_queue_size)) { +- dev_err(&ctx->pdev->dev, "Invalid queue size\n"); +- return -EFAULT; +- } +- + ctx->max_tx_queue_size = max_tx_queue_size; + ctx->max_rx_queue_size = max_rx_queue_size; + ctx->tx_queue_size = tx_queue_size; +@@ -3426,8 +3509,8 @@ static int ena_probe(struct pci_dev *pde + adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + adapter->reset_reason = ENA_REGS_RESET_NORMAL; + +- adapter->tx_ring_size = calc_queue_ctx.tx_queue_size; +- adapter->rx_ring_size = calc_queue_ctx.rx_queue_size; ++ adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size; ++ adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size; + adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; + adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; + adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -79,6 +79,8 @@ + #define ENA_BAR_MASK (BIT(ENA_REG_BAR) | BIT(ENA_MEM_BAR)) + + #define ENA_DEFAULT_RING_SIZE (1024) ++#define ENA_MIN_RING_SIZE (256) ++ + + #define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) + #define ENA_DEFAULT_RX_COPYBREAK (256 - NET_IP_ALIGN) +@@ -330,8 +332,8 @@ struct ena_adapter { + u32 tx_usecs, rx_usecs; /* interrupt moderation */ + u32 tx_frames, rx_frames; /* interrupt moderation */ + +- u32 tx_ring_size; +- u32 rx_ring_size; ++ u32 requested_tx_ring_size; ++ u32 requested_rx_ring_size; + + u32 max_tx_ring_size; + u32 max_rx_ring_size; diff --git a/debian/patches/features/all/ena/net-ena-arrange-ena_probe-function-variables-in-reve.patch b/debian/patches/features/all/ena/net-ena-arrange-ena_probe-function-variables-in-reve.patch new file mode 100644 index 000000000..af89762f1 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-arrange-ena_probe-function-variables-in-reve.patch @@ -0,0 +1,49 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Mon, 3 Jun 2019 17:43:22 +0300 +Subject: [PATCH] net: ena: arrange ena_probe() function variables in reverse + christmas tree +Origin: https://git.kernel.org/linus/83b9240428a66da3c8e24e719b985d533cf58067 +Bug-Debian: https://bugs.debian.org/941291 + +Reverse christmas tree arrangement is when strings are written from longer +to shorter with each line. Most of our functions are abiding this +arrangement but this function does not. + +In this commit we arrange the variables of ena_probe() in reverse christmas +tree. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -3274,17 +3274,17 @@ static int ena_calc_queue_size(struct pc + static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + { + struct ena_com_dev_get_features_ctx get_feat_ctx; +- static int version_printed; +- struct net_device *netdev; +- struct ena_adapter *adapter; + struct ena_llq_configurations llq_config; + struct ena_com_dev *ena_dev = NULL; +- char *queue_type_str; +- static int adapters_found; ++ struct ena_adapter *adapter; ++ static int version_printed; + int io_queue_num, bars, rc; +- int queue_size; ++ struct net_device *netdev; ++ static int adapters_found; ++ char *queue_type_str; + u16 tx_sgl_size = 0; + u16 rx_sgl_size = 0; ++ int queue_size; + bool wd_state; + + dev_dbg(&pdev->dev, "%s\n", __func__); diff --git a/debian/patches/features/all/ena/net-ena-don-t-wake-up-tx-queue-when-down.patch b/debian/patches/features/all/ena/net-ena-don-t-wake-up-tx-queue-when-down.patch new file mode 100644 index 000000000..e1b214e80 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-don-t-wake-up-tx-queue-when-down.patch @@ -0,0 +1,52 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Sun, 15 Sep 2019 17:29:44 +0300 +Subject: [PATCH] net: ena: don't wake up tx queue when down +Origin: https://git.kernel.org/linus/a53651ec93a8d7ab5b26c5390e0c389048b4b4b6 +Bug-Debian: https://bugs.debian.org/941291 + +There is a race condition that can occur when calling ena_down(). +The ena_clean_tx_irq() - which is a part of the napi handler - +function might wake up the tx queue when the queue is supposed +to be down (during recovery or changing the size of the queues +for example) This causes the ena_start_xmit() function to trigger +and possibly try to access the destroyed queues. + +The race is illustrated below: + +Flow A: Flow B(napi handler) +ena_down() + netif_carrier_off() + netif_tx_disable() + ena_clean_tx_irq() + netif_tx_wake_queue() + ena_napi_disable_all() + ena_destroy_all_io_queues() + +After these flows the tx queue is active and ena_start_xmit() accesses +the destroyed queue which leads to a kernel panic. + +fixes: 1738cd3ed342 (net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)) + +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c +index 664e3ed97ea9..d118ed4c57ce 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -823,7 +823,8 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) + above_thresh = + ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, + ENA_TX_WAKEUP_THRESH); +- if (netif_tx_queue_stopped(txq) && above_thresh) { ++ if (netif_tx_queue_stopped(txq) && above_thresh && ++ test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) { + netif_tx_wake_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_wakeup++; +-- +2.17.1 + diff --git a/debian/patches/features/all/ena/net-ena-enable-negotiating-larger-Rx-ring-size.patch b/debian/patches/features/all/ena/net-ena-enable-negotiating-larger-Rx-ring-size.patch new file mode 100644 index 000000000..43be08179 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-enable-negotiating-larger-Rx-ring-size.patch @@ -0,0 +1,270 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Tue, 11 Jun 2019 14:58:06 +0300 +Subject: [PATCH] net: ena: enable negotiating larger Rx ring size +Origin: https://git.kernel.org/linus/31aa9857f1733403f2eb12d51c1cec20a22483d9 +Bug-Debian: https://bugs.debian.org/941291 + +Use MAX_QUEUES_EXT get feature capability to query the device. + +Signed-off-by: Netanel Belgazal <netanel@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 144 ++++++++++++------- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 15 ++ + 2 files changed, 110 insertions(+), 49 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -2455,13 +2455,6 @@ static int ena_device_validate_params(st + return -EINVAL; + } + +- if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) || +- (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) { +- netif_err(adapter, drv, netdev, +- "Error, device doesn't support enough queues\n"); +- return -EINVAL; +- } +- + if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) { + netif_err(adapter, drv, netdev, + "Error, device max mtu is smaller than netdev MTU\n"); +@@ -3035,18 +3028,32 @@ static int ena_calc_io_queue_num(struct + struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) + { +- int io_sq_num, io_queue_num; ++ int io_tx_sq_num, io_tx_cq_num, io_rx_num, io_queue_num; ++ ++ if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { ++ struct ena_admin_queue_ext_feature_fields *max_queue_ext = ++ &get_feat_ctx->max_queue_ext.max_queue_ext; ++ io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num, ++ max_queue_ext->max_rx_cq_num); + +- /* In case of LLQ use the llq number in the get feature cmd */ ++ io_tx_sq_num = max_queue_ext->max_tx_sq_num; ++ io_tx_cq_num = max_queue_ext->max_tx_cq_num; ++ } else { ++ struct ena_admin_queue_feature_desc *max_queues = ++ &get_feat_ctx->max_queues; ++ io_tx_sq_num = max_queues->max_sq_num; ++ io_tx_cq_num = max_queues->max_cq_num; ++ io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num); ++ } ++ ++ /* In case of LLQ use the llq fields for the tx SQ/CQ */ + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) +- io_sq_num = get_feat_ctx->llq.max_llq_num; +- else +- io_sq_num = get_feat_ctx->max_queues.max_sq_num; ++ io_tx_sq_num = get_feat_ctx->llq.max_llq_num; + + io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES); +- io_queue_num = min_t(int, io_queue_num, io_sq_num); +- io_queue_num = min_t(int, io_queue_num, +- get_feat_ctx->max_queues.max_cq_num); ++ io_queue_num = min_t(int, io_queue_num, io_rx_num); ++ io_queue_num = min_t(int, io_queue_num, io_tx_sq_num); ++ io_queue_num = min_t(int, io_queue_num, io_tx_cq_num); + /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */ + io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1); + if (unlikely(!io_queue_num)) { +@@ -3229,36 +3236,73 @@ static inline void set_default_llq_confi + llq_config->llq_ring_entry_size_value = 128; + } + +-static int ena_calc_queue_size(struct pci_dev *pdev, +- struct ena_com_dev *ena_dev, +- u16 *max_tx_sgl_size, +- u16 *max_rx_sgl_size, +- struct ena_com_dev_get_features_ctx *get_feat_ctx) +-{ +- u32 queue_size = ENA_DEFAULT_RING_SIZE; +- +- queue_size = min_t(u32, queue_size, +- get_feat_ctx->max_queues.max_cq_depth); +- queue_size = min_t(u32, queue_size, +- get_feat_ctx->max_queues.max_sq_depth); ++static int ena_calc_queue_size(struct ena_calc_queue_size_ctx *ctx) ++{ ++ struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; ++ struct ena_com_dev *ena_dev = ctx->ena_dev; ++ u32 tx_queue_size = ENA_DEFAULT_RING_SIZE; ++ u32 rx_queue_size = ENA_DEFAULT_RING_SIZE; ++ u32 max_tx_queue_size; ++ u32 max_rx_queue_size; ++ ++ if (ctx->ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { ++ struct ena_admin_queue_ext_feature_fields *max_queue_ext = ++ &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; ++ max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth, ++ max_queue_ext->max_rx_sq_depth); ++ max_tx_queue_size = max_queue_ext->max_tx_cq_depth; ++ ++ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ++ max_tx_queue_size = min_t(u32, max_tx_queue_size, ++ llq->max_llq_depth); ++ else ++ max_tx_queue_size = min_t(u32, max_tx_queue_size, ++ max_queue_ext->max_tx_sq_depth); ++ ++ ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, ++ max_queue_ext->max_per_packet_tx_descs); ++ ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, ++ max_queue_ext->max_per_packet_rx_descs); ++ } else { ++ struct ena_admin_queue_feature_desc *max_queues = ++ &ctx->get_feat_ctx->max_queues; ++ max_rx_queue_size = min_t(u32, max_queues->max_cq_depth, ++ max_queues->max_sq_depth); ++ max_tx_queue_size = max_queues->max_cq_depth; ++ ++ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ++ max_tx_queue_size = min_t(u32, max_tx_queue_size, ++ llq->max_llq_depth); ++ else ++ max_tx_queue_size = min_t(u32, max_tx_queue_size, ++ max_queues->max_sq_depth); + +- if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) +- queue_size = min_t(u32, queue_size, +- get_feat_ctx->llq.max_llq_depth); ++ ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, ++ max_queues->max_packet_tx_descs); ++ ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, ++ max_queues->max_packet_rx_descs); ++ } + +- queue_size = rounddown_pow_of_two(queue_size); ++ max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size); ++ max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size); + +- if (unlikely(!queue_size)) { +- dev_err(&pdev->dev, "Invalid queue size\n"); ++ tx_queue_size = min_t(u32, tx_queue_size, max_tx_queue_size); ++ rx_queue_size = min_t(u32, rx_queue_size, max_rx_queue_size); ++ ++ tx_queue_size = rounddown_pow_of_two(tx_queue_size); ++ rx_queue_size = rounddown_pow_of_two(rx_queue_size); ++ ++ if (unlikely(!rx_queue_size || !tx_queue_size)) { ++ dev_err(&ctx->pdev->dev, "Invalid queue size\n"); + return -EFAULT; + } + +- *max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, +- get_feat_ctx->max_queues.max_packet_tx_descs); +- *max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, +- get_feat_ctx->max_queues.max_packet_rx_descs); ++ ctx->max_tx_queue_size = max_tx_queue_size; ++ ctx->max_rx_queue_size = max_rx_queue_size; ++ ctx->tx_queue_size = tx_queue_size; ++ ctx->rx_queue_size = rx_queue_size; + +- return queue_size; ++ return 0; + } + + /* ena_probe - Device Initialization Routine +@@ -3274,6 +3318,7 @@ static int ena_calc_queue_size(struct pc + static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + { + struct ena_com_dev_get_features_ctx get_feat_ctx; ++ struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; + struct ena_llq_configurations llq_config; + struct ena_com_dev *ena_dev = NULL; + struct ena_adapter *adapter; +@@ -3281,9 +3326,6 @@ static int ena_probe(struct pci_dev *pde + struct net_device *netdev; + static int adapters_found; + char *queue_type_str; +- u16 tx_sgl_size = 0; +- u16 rx_sgl_size = 0; +- int queue_size; + bool wd_state; + + dev_dbg(&pdev->dev, "%s\n", __func__); +@@ -3340,20 +3382,25 @@ static int ena_probe(struct pci_dev *pde + goto err_device_destroy; + } + ++ calc_queue_ctx.ena_dev = ena_dev; ++ calc_queue_ctx.get_feat_ctx = &get_feat_ctx; ++ calc_queue_ctx.pdev = pdev; ++ + /* initial Tx interrupt delay, Assumes 1 usec granularity. + * Updated during device initialization with the real granularity + */ + ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS; + io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx); +- queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size, +- &rx_sgl_size, &get_feat_ctx); +- if ((queue_size <= 0) || (io_queue_num <= 0)) { ++ rc = ena_calc_queue_size(&calc_queue_ctx); ++ if (rc || io_queue_num <= 0) { + rc = -EFAULT; + goto err_device_destroy; + } + +- dev_info(&pdev->dev, "creating %d io queues. queue size: %d. LLQ is %s\n", +- io_queue_num, queue_size, ++ dev_info(&pdev->dev, "creating %d io queues. rx queue size: %d tx queue size. %d LLQ is %s\n", ++ io_queue_num, ++ calc_queue_ctx.rx_queue_size, ++ calc_queue_ctx.tx_queue_size, + (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ? + "ENABLED" : "DISABLED"); + +@@ -3379,11 +3426,10 @@ static int ena_probe(struct pci_dev *pde + adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + adapter->reset_reason = ENA_REGS_RESET_NORMAL; + +- adapter->tx_ring_size = queue_size; +- adapter->rx_ring_size = queue_size; +- +- adapter->max_tx_sgl_size = tx_sgl_size; +- adapter->max_rx_sgl_size = rx_sgl_size; ++ adapter->tx_ring_size = calc_queue_ctx.tx_queue_size; ++ adapter->rx_ring_size = calc_queue_ctx.rx_queue_size; ++ adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; ++ adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; + + adapter->num_queues = io_queue_num; + adapter->last_monitored_tx_qid = 0; +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -154,6 +154,18 @@ struct ena_napi { + u32 qid; + }; + ++struct ena_calc_queue_size_ctx { ++ struct ena_com_dev_get_features_ctx *get_feat_ctx; ++ struct ena_com_dev *ena_dev; ++ struct pci_dev *pdev; ++ u16 tx_queue_size; ++ u16 rx_queue_size; ++ u16 max_tx_queue_size; ++ u16 max_rx_queue_size; ++ u16 max_tx_sgl_size; ++ u16 max_rx_sgl_size; ++}; ++ + struct ena_tx_buffer { + struct sk_buff *skb; + /* num of ena desc for this specific skb +@@ -321,6 +333,9 @@ struct ena_adapter { + u32 tx_ring_size; + u32 rx_ring_size; + ++ u32 max_tx_ring_size; ++ u32 max_rx_ring_size; ++ + u32 msg_enable; + + u16 max_tx_sgl_size; diff --git a/debian/patches/features/all/ena/net-ena-enable-the-interrupt_moderation-in-driver_su.patch b/debian/patches/features/all/ena/net-ena-enable-the-interrupt_moderation-in-driver_su.patch new file mode 100644 index 000000000..3eb7e408c --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-enable-the-interrupt_moderation-in-driver_su.patch @@ -0,0 +1,63 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 16 Sep 2019 14:31:29 +0300 +Subject: [PATCH] net: ena: enable the interrupt_moderation in + driver_supported_features +Origin: https://git.kernel.org/linus/bd21b0cc3a63d1c658b230db084b0f392b78cab2 +Bug-Debian: https://bugs.debian.org/941291 + +Add driver_supported_features to host_host info which is a new API used to +communicate to the device which features are supported by the driver. + +Add the interrupt_moderation bit to host_info->driver_supported_features +and enable it to signal the device that this driver supports interrupt +moderation properly. + +Reserved bits are for features implemented in the future + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_admin_defs.h | 8 ++++++++ + drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 +++ + 2 files changed, 11 insertions(+) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_admin_defs.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +@@ -808,6 +808,12 @@ struct ena_admin_host_info { + u16 num_cpus; + + u16 reserved; ++ ++ /* 1 :0 : reserved ++ * 2 : interrupt_moderation ++ * 31:3 : reserved ++ */ ++ u32 driver_supported_features; + }; + + struct ena_admin_rss_ind_table_entry { +@@ -1110,6 +1116,8 @@ struct ena_admin_ena_mmio_req_read_less_ + #define ENA_ADMIN_HOST_INFO_DEVICE_MASK GENMASK(7, 3) + #define ENA_ADMIN_HOST_INFO_BUS_SHIFT 8 + #define ENA_ADMIN_HOST_INFO_BUS_MASK GENMASK(15, 8) ++#define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_SHIFT 2 ++#define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK BIT(2) + + /* aenq_common_desc */ + #define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -2440,6 +2440,9 @@ static void ena_config_host_info(struct + ("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT); + host_info->num_cpus = num_online_cpus(); + ++ host_info->driver_supported_features = ++ ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK; ++ + rc = ena_com_set_host_attributes(ena_dev); + if (rc) { + if (rc == -EOPNOTSUPP) diff --git a/debian/patches/features/all/ena/net-ena-fix-incorrect-update-of-intr_delay_resolutio.patch b/debian/patches/features/all/ena/net-ena-fix-incorrect-update-of-intr_delay_resolutio.patch new file mode 100644 index 000000000..e194ecde9 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-fix-incorrect-update-of-intr_delay_resolutio.patch @@ -0,0 +1,89 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 16 Sep 2019 14:31:36 +0300 +Subject: [PATCH] net: ena: fix incorrect update of intr_delay_resolution +Origin: https://git.kernel.org/linus/79226cea4a5ebbd84a4eee1762526f664c7beb62 +Bug-Debian: https://bugs.debian.org/941291 + +ena_dev->intr_moder_rx/tx_interval save the intervals received from the +user after dividing them by ena_dev->intr_delay_resolution. Therefore +when intr_delay_resolution changes, the code needs to first mutiply +intr_moder_rx/tx_interval by the previous intr_delay_resolution to get +the value originally given by the user, and only then divide it by the +new intr_delay_resolution. + +Current code does not first multiply intr_moder_rx/tx_interval by the old +intr_delay_resolution. This commit fixes it. + +Also initialize ena_dev->intr_delay_resolution to be 1. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 21 ++++++++++++++++---- + drivers/net/ethernet/amazon/ena/ena_com.h | 1 + + drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 + + 3 files changed, 19 insertions(+), 4 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -1281,17 +1281,30 @@ static int ena_com_ind_tbl_convert_from_ + static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev, + u16 intr_delay_resolution) + { ++ /* Initial value of intr_delay_resolution might be 0 */ ++ u16 prev_intr_delay_resolution = ++ ena_dev->intr_delay_resolution ? ++ ena_dev->intr_delay_resolution : ++ ENA_DEFAULT_INTR_DELAY_RESOLUTION; ++ + if (!intr_delay_resolution) { + pr_err("Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n"); +- intr_delay_resolution = 1; ++ intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION; + } +- ena_dev->intr_delay_resolution = intr_delay_resolution; + + /* update Rx */ +- ena_dev->intr_moder_rx_interval /= intr_delay_resolution; ++ ena_dev->intr_moder_rx_interval = ++ ena_dev->intr_moder_rx_interval * ++ prev_intr_delay_resolution / ++ intr_delay_resolution; + + /* update Tx */ +- ena_dev->intr_moder_tx_interval /= intr_delay_resolution; ++ ena_dev->intr_moder_tx_interval = ++ ena_dev->intr_moder_tx_interval * ++ prev_intr_delay_resolution / ++ intr_delay_resolution; ++ ++ ena_dev->intr_delay_resolution = intr_delay_resolution; + } + + /*****************************************************************************/ +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.h +@@ -74,6 +74,7 @@ + + #define ENA_INTR_INITIAL_TX_INTERVAL_USECS 196 + #define ENA_INTR_INITIAL_RX_INTERVAL_USECS 0 ++#define ENA_DEFAULT_INTR_DELAY_RESOLUTION 1 + + #define ENA_HW_HINTS_NO_TIMEOUT 0xFFFF + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -3502,6 +3502,7 @@ static int ena_probe(struct pci_dev *pde + */ + ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS; + ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS; ++ ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION; + io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx); + rc = ena_calc_queue_size(&calc_queue_ctx); + if (rc || io_queue_num <= 0) { diff --git a/debian/patches/features/all/ena/net-ena-fix-retrieval-of-nonadaptive-interrupt-moder.patch b/debian/patches/features/all/ena/net-ena-fix-retrieval-of-nonadaptive-interrupt-moder.patch new file mode 100644 index 000000000..353f87dbf --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-fix-retrieval-of-nonadaptive-interrupt-moder.patch @@ -0,0 +1,45 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 16 Sep 2019 14:31:35 +0300 +Subject: [PATCH] net: ena: fix retrieval of nonadaptive interrupt moderation + intervals +Origin: https://git.kernel.org/linus/0eda847953d8dfb4b713ea62420f66157e230e13 +Bug-Debian: https://bugs.debian.org/941291 + +Nonadaptive interrupt moderation intervals are assigned the value set +by the user in ethtool -C divided by ena_dev->intr_delay_resolution. + +Therefore when the user tries to get the nonadaptive interrupt moderation +intervals with ethtool -c the code needs to multiply the saved value +by ena_dev->intr_delay_resolution. + +The current code erroneously divides instead of multiplying in ethtool -c. +This patch fixes this. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_ethtool.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_ethtool.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +@@ -310,14 +310,15 @@ static int ena_get_coalesce(struct net_d + /* the devie doesn't support interrupt moderation */ + return -EOPNOTSUPP; + } ++ + coalesce->tx_coalesce_usecs = +- ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) / ++ ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) * + ena_dev->intr_delay_resolution; + + if (!ena_com_get_adaptive_moderation_enabled(ena_dev)) + coalesce->rx_coalesce_usecs = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev) +- / ena_dev->intr_delay_resolution; ++ * ena_dev->intr_delay_resolution; + + coalesce->use_adaptive_rx_coalesce = + ena_com_get_adaptive_moderation_enabled(ena_dev); diff --git a/debian/patches/features/all/ena/net-ena-fix-return-value-of-ena_com_config_llq_info.patch b/debian/patches/features/all/ena/net-ena-fix-return-value-of-ena_com_config_llq_info.patch new file mode 100644 index 000000000..f757bf178 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-fix-return-value-of-ena_com_config_llq_info.patch @@ -0,0 +1,34 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Wed, 1 May 2019 16:47:07 +0300 +Subject: [PATCH] net: ena: fix return value of ena_com_config_llq_info() +Origin: https://git.kernel.org/linus/9a27de0c6ba10fe1af74d16d3524425e52c1ba3e +Bug-Debian: https://bugs.debian.org/941291 + +ena_com_config_llq_info() returns 0 even if ena_com_set_llq() fails. +Return the failure code of ena_com_set_llq() in case it fails. + +fixes: 689b2bdaaa14 ("net: ena: add functions for handling Low Latency Queues in ena_com") + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c +index f9bc0b831a1a..4fe437fe771b 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_com.c ++++ b/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -731,7 +731,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, + if (rc) + pr_err("Cannot set LLQ configuration: %d\n", rc); + +- return 0; ++ return rc; + } + + static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx, +-- +2.17.1 + diff --git a/debian/patches/features/all/ena/net-ena-fix-set-freed-objects-to-NULL-to-avoid-faili.patch b/debian/patches/features/all/ena/net-ena-fix-set-freed-objects-to-NULL-to-avoid-faili.patch new file mode 100644 index 000000000..bc30849de --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-fix-set-freed-objects-to-NULL-to-avoid-faili.patch @@ -0,0 +1,91 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Wed, 1 May 2019 16:47:04 +0300 +Subject: [PATCH] net: ena: fix: set freed objects to NULL to avoid failing + future allocations +Origin: https://git.kernel.org/linus/8ee8ee7fe87bf64738ab4e31be036a7165608b27 +Bug-Debian: https://bugs.debian.org/941291 + +In some cases when a queue related allocation fails, successful past +allocations are freed but the pointer that pointed to them is not +set to NULL. This is a problem for 2 reasons: +1. This is generally a bad practice since this pointer might be +accidentally accessed in the future. +2. Future allocations using the same pointer check if the pointer +is NULL and fail if it is not. + +Fixed this by setting such pointers to NULL in the allocation of +queue related objects. + +Also refactored the code of ena_setup_tx_resources() to goto-style +error handling to avoid code duplication of resource freeing. + +Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 25 ++++++++++++-------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -224,28 +224,23 @@ static int ena_setup_tx_resources(struct + if (!tx_ring->tx_buffer_info) { + tx_ring->tx_buffer_info = vzalloc(size); + if (!tx_ring->tx_buffer_info) +- return -ENOMEM; ++ goto err_tx_buffer_info; + } + + size = sizeof(u16) * tx_ring->ring_size; + tx_ring->free_tx_ids = vzalloc_node(size, node); + if (!tx_ring->free_tx_ids) { + tx_ring->free_tx_ids = vzalloc(size); +- if (!tx_ring->free_tx_ids) { +- vfree(tx_ring->tx_buffer_info); +- return -ENOMEM; +- } ++ if (!tx_ring->free_tx_ids) ++ goto err_free_tx_ids; + } + + size = tx_ring->tx_max_header_size; + tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node); + if (!tx_ring->push_buf_intermediate_buf) { + tx_ring->push_buf_intermediate_buf = vzalloc(size); +- if (!tx_ring->push_buf_intermediate_buf) { +- vfree(tx_ring->tx_buffer_info); +- vfree(tx_ring->free_tx_ids); +- return -ENOMEM; +- } ++ if (!tx_ring->push_buf_intermediate_buf) ++ goto err_push_buf_intermediate_buf; + } + + /* Req id ring for TX out of order completions */ +@@ -259,6 +254,15 @@ static int ena_setup_tx_resources(struct + tx_ring->next_to_clean = 0; + tx_ring->cpu = ena_irq->cpu; + return 0; ++ ++err_push_buf_intermediate_buf: ++ vfree(tx_ring->free_tx_ids); ++ tx_ring->free_tx_ids = NULL; ++err_free_tx_ids: ++ vfree(tx_ring->tx_buffer_info); ++ tx_ring->tx_buffer_info = NULL; ++err_tx_buffer_info: ++ return -ENOMEM; + } + + /* ena_free_tx_resources - Free I/O Tx Resources per Queue +@@ -378,6 +382,7 @@ static int ena_setup_rx_resources(struct + rx_ring->free_rx_ids = vzalloc(size); + if (!rx_ring->free_rx_ids) { + vfree(rx_ring->rx_buffer_info); ++ rx_ring->rx_buffer_info = NULL; + return -ENOMEM; + } + } diff --git a/debian/patches/features/all/ena/net-ena-fix-update-of-interrupt-moderation-register.patch b/debian/patches/features/all/ena/net-ena-fix-update-of-interrupt-moderation-register.patch new file mode 100644 index 000000000..72137a523 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-fix-update-of-interrupt-moderation-register.patch @@ -0,0 +1,39 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 16 Sep 2019 14:31:34 +0300 +Subject: [PATCH] net: ena: fix update of interrupt moderation register +Origin: https://git.kernel.org/linus/7b8a28787e2ba671eaeb073e3b62fb4786338a09 +Bug-Debian: https://bugs.debian.org/941291 + +Current implementation always updates the interrupt register with +the smoothed_interval of the rx_ring. However this should be +done only in case of adaptive interrupt moderation. If non-adaptive +interrupt moderation is used, the non-adaptive interrupt moderation +interval should be used. This commit fixes that. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -1188,12 +1188,15 @@ static void ena_unmask_interrupt(struct + struct ena_ring *rx_ring) + { + struct ena_eth_io_intr_reg intr_reg; ++ u32 rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ? ++ rx_ring->smoothed_interval : ++ ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev); + + /* Update intr register: rx intr delay, + * tx intr delay and interrupt unmask + */ + ena_com_update_intr_reg(&intr_reg, +- rx_ring->smoothed_interval, ++ rx_interval, + tx_ring->smoothed_interval, + true); + diff --git a/debian/patches/features/all/ena/net-ena-improve-latency-by-disabling-adaptive-interr.patch b/debian/patches/features/all/ena/net-ena-improve-latency-by-disabling-adaptive-interr.patch new file mode 100644 index 000000000..ec5dc707c --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-improve-latency-by-disabling-adaptive-interr.patch @@ -0,0 +1,41 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Wed, 1 May 2019 16:47:08 +0300 +Subject: [PATCH] net: ena: improve latency by disabling adaptive interrupt + moderation by default +Origin: https://git.kernel.org/linus/78cb421d185cfb4fcea94e7c3ff6e6ea77bb8c11 +Bug-Debian: https://bugs.debian.org/941291 + +Adaptive interrupt moderation was erroneously enabled by default +in the driver. + +In case the device supports adaptive interrupt moderation it will +be automatically used, which may potentially increase latency. + +The adaptive moderation can be enabled from ethtool command in +case the feature is supported by the device. + +Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") +Signed-off-by: Guy Tzalik <gtzalik@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -2801,7 +2801,11 @@ int ena_com_init_interrupt_moderation(st + /* if moderation is supported by device we set adaptive moderation */ + delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution; + ena_com_update_intr_delay_resolution(ena_dev, delay_resolution); +- ena_com_enable_adaptive_moderation(ena_dev); ++ ++ /* Disable adaptive moderation by default - can be enabled from ++ * ethtool ++ */ ++ ena_com_disable_adaptive_moderation(ena_dev); + + return 0; + err: diff --git a/debian/patches/features/all/ena/net-ena-make-ethtool-show-correct-current-and-max-qu.patch b/debian/patches/features/all/ena/net-ena-make-ethtool-show-correct-current-and-max-qu.patch new file mode 100644 index 000000000..3367bc9f9 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-make-ethtool-show-correct-current-and-max-qu.patch @@ -0,0 +1,53 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Tue, 11 Jun 2019 14:58:07 +0300 +Subject: [PATCH] net: ena: make ethtool show correct current and max queue + sizes +Origin: https://git.kernel.org/linus/9f9ae3f98b8d8b8aa709831057759dbb52ba5083 +Bug-Debian: https://bugs.debian.org/941291 + +Currently ethtool -g shows the same size for current and max queue +sizes. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_ethtool.c | 10 ++++------ + drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 ++ + 2 files changed, 6 insertions(+), 6 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_ethtool.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +@@ -447,13 +447,11 @@ static void ena_get_ringparam(struct net + struct ethtool_ringparam *ring) + { + struct ena_adapter *adapter = netdev_priv(netdev); +- struct ena_ring *tx_ring = &adapter->tx_ring[0]; +- struct ena_ring *rx_ring = &adapter->rx_ring[0]; + +- ring->rx_max_pending = rx_ring->ring_size; +- ring->tx_max_pending = tx_ring->ring_size; +- ring->rx_pending = rx_ring->ring_size; +- ring->tx_pending = tx_ring->ring_size; ++ ring->tx_max_pending = adapter->max_tx_ring_size; ++ ring->rx_max_pending = adapter->max_rx_ring_size; ++ ring->tx_pending = adapter->tx_ring_size; ++ ring->rx_pending = adapter->rx_ring_size; + } + + static u32 ena_flow_hash_to_flow_type(u16 hash_fields) +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -3428,6 +3428,8 @@ static int ena_probe(struct pci_dev *pde + + adapter->tx_ring_size = calc_queue_ctx.tx_queue_size; + adapter->rx_ring_size = calc_queue_ctx.rx_queue_size; ++ adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; ++ adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; + adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; + adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; + diff --git a/debian/patches/features/all/ena/net-ena-optimise-calculations-for-CQ-doorbell.patch b/debian/patches/features/all/ena/net-ena-optimise-calculations-for-CQ-doorbell.patch new file mode 100644 index 000000000..c790eae13 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-optimise-calculations-for-CQ-doorbell.patch @@ -0,0 +1,50 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Mon, 3 Jun 2019 17:43:27 +0300 +Subject: [PATCH] net: ena: optimise calculations for CQ doorbell +Origin: https://git.kernel.org/linus/d91860989dd4bce582ed6c3647a0d41d6fd895b3 +Bug-Debian: https://bugs.debian.org/941291 + +This patch initially checks if CQ doorbell +is needed before proceeding with the calculations. + +Signed-off-by: Igor Chauskin <igorch@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_eth_com.h | 20 ++++++++++--------- + 1 file changed, 11 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h +index 0a3d9180e40e..77986c0ea52c 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h ++++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h +@@ -195,15 +195,17 @@ static inline int ena_com_update_dev_comp_head(struct ena_com_io_cq *io_cq) + u16 unreported_comp, head; + bool need_update; + +- head = io_cq->head; +- unreported_comp = head - io_cq->last_head_update; +- need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH); +- +- if (io_cq->cq_head_db_reg && need_update) { +- pr_debug("Write completion queue doorbell for queue %d: head: %d\n", +- io_cq->qid, head); +- writel(head, io_cq->cq_head_db_reg); +- io_cq->last_head_update = head; ++ if (unlikely(io_cq->cq_head_db_reg)) { ++ head = io_cq->head; ++ unreported_comp = head - io_cq->last_head_update; ++ need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH); ++ ++ if (unlikely(need_update)) { ++ pr_debug("Write completion queue doorbell for queue %d: head: %d\n", ++ io_cq->qid, head); ++ writel(head, io_cq->cq_head_db_reg); ++ io_cq->last_head_update = head; ++ } + } + + return 0; +-- +2.17.1 + diff --git a/debian/patches/features/all/ena/net-ena-reimplement-set-get_coalesce.patch b/debian/patches/features/all/ena/net-ena-reimplement-set-get_coalesce.patch new file mode 100644 index 000000000..c88ebef3a --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-reimplement-set-get_coalesce.patch @@ -0,0 +1,158 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 16 Sep 2019 14:31:28 +0300 +Subject: [PATCH] net: ena: reimplement set/get_coalesce() +Origin: https://git.kernel.org/linus/b3db86dc4b82ffc63e33c78dafc09d5c78ac4fe4 +Bug-Debian: https://bugs.debian.org/941291 + +1. Remove old adaptive interrupt moderation code from set/get_coalesce() +2. Add ena_update_rx_rings_intr_moderation() function for updating + nonadaptive interrupt moderation intervals similarly to + ena_update_tx_rings_intr_moderation(). +3. Remove checks of multiple unsupported received interrupt coalescing + parameters. This makes code cleaner and cancels the need to update + it every time a new coalescing parameter is invented. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_ethtool.c | 84 ++++++------------- + 1 file changed, 26 insertions(+), 58 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_ethtool.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +@@ -305,7 +305,6 @@ static int ena_get_coalesce(struct net_d + { + struct ena_adapter *adapter = netdev_priv(net_dev); + struct ena_com_dev *ena_dev = adapter->ena_dev; +- struct ena_intr_moder_entry intr_moder_entry; + + if (!ena_com_interrupt_moderation_supported(ena_dev)) { + /* the devie doesn't support interrupt moderation */ +@@ -314,23 +313,12 @@ static int ena_get_coalesce(struct net_d + coalesce->tx_coalesce_usecs = + ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) / + ena_dev->intr_delay_resolution; +- if (!ena_com_get_adaptive_moderation_enabled(ena_dev)) { ++ ++ if (!ena_com_get_adaptive_moderation_enabled(ena_dev)) + coalesce->rx_coalesce_usecs = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev) + / ena_dev->intr_delay_resolution; +- } else { +- ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry); +- coalesce->rx_coalesce_usecs_low = intr_moder_entry.intr_moder_interval; +- coalesce->rx_max_coalesced_frames_low = intr_moder_entry.pkts_per_interval; +- +- ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry); +- coalesce->rx_coalesce_usecs = intr_moder_entry.intr_moder_interval; +- coalesce->rx_max_coalesced_frames = intr_moder_entry.pkts_per_interval; +- +- ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry); +- coalesce->rx_coalesce_usecs_high = intr_moder_entry.intr_moder_interval; +- coalesce->rx_max_coalesced_frames_high = intr_moder_entry.pkts_per_interval; +- } ++ + coalesce->use_adaptive_rx_coalesce = + ena_com_get_adaptive_moderation_enabled(ena_dev); + +@@ -348,12 +336,22 @@ static void ena_update_tx_rings_intr_mod + adapter->tx_ring[i].smoothed_interval = val; + } + ++static void ena_update_rx_rings_intr_moderation(struct ena_adapter *adapter) ++{ ++ unsigned int val; ++ int i; ++ ++ val = ena_com_get_nonadaptive_moderation_interval_rx(adapter->ena_dev); ++ ++ for (i = 0; i < adapter->num_queues; i++) ++ adapter->rx_ring[i].smoothed_interval = val; ++} ++ + static int ena_set_coalesce(struct net_device *net_dev, + struct ethtool_coalesce *coalesce) + { + struct ena_adapter *adapter = netdev_priv(net_dev); + struct ena_com_dev *ena_dev = adapter->ena_dev; +- struct ena_intr_moder_entry intr_moder_entry; + int rc; + + if (!ena_com_interrupt_moderation_supported(ena_dev)) { +@@ -361,22 +359,6 @@ static int ena_set_coalesce(struct net_d + return -EOPNOTSUPP; + } + +- if (coalesce->rx_coalesce_usecs_irq || +- coalesce->rx_max_coalesced_frames_irq || +- coalesce->tx_coalesce_usecs_irq || +- coalesce->tx_max_coalesced_frames || +- coalesce->tx_max_coalesced_frames_irq || +- coalesce->stats_block_coalesce_usecs || +- coalesce->use_adaptive_tx_coalesce || +- coalesce->pkt_rate_low || +- coalesce->tx_coalesce_usecs_low || +- coalesce->tx_max_coalesced_frames_low || +- coalesce->pkt_rate_high || +- coalesce->tx_coalesce_usecs_high || +- coalesce->tx_max_coalesced_frames_high || +- coalesce->rate_sample_interval) +- return -EINVAL; +- + rc = ena_com_update_nonadaptive_moderation_interval_tx(ena_dev, + coalesce->tx_coalesce_usecs); + if (rc) +@@ -384,37 +366,23 @@ static int ena_set_coalesce(struct net_d + + ena_update_tx_rings_intr_moderation(adapter); + +- if (ena_com_get_adaptive_moderation_enabled(ena_dev)) { +- if (!coalesce->use_adaptive_rx_coalesce) { +- ena_com_disable_adaptive_moderation(ena_dev); +- rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev, +- coalesce->rx_coalesce_usecs); +- return rc; +- } +- } else { /* was in non-adaptive mode */ +- if (coalesce->use_adaptive_rx_coalesce) { ++ if (coalesce->use_adaptive_rx_coalesce) { ++ if (!ena_com_get_adaptive_moderation_enabled(ena_dev)) + ena_com_enable_adaptive_moderation(ena_dev); +- } else { +- rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev, +- coalesce->rx_coalesce_usecs); +- return rc; +- } ++ return 0; + } + +- intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_low; +- intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_low; +- intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; +- ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry); +- +- intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs; +- intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames; +- intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; +- ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry); +- +- intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_high; +- intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_high; +- intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; +- ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry); ++ rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev, ++ coalesce->rx_coalesce_usecs); ++ if (rc) ++ return rc; ++ ++ ena_update_rx_rings_intr_moderation(adapter); ++ ++ if (!coalesce->use_adaptive_rx_coalesce) { ++ if (ena_com_get_adaptive_moderation_enabled(ena_dev)) ++ ena_com_disable_adaptive_moderation(ena_dev); ++ } + + return 0; + } diff --git a/debian/patches/features/all/ena/net-ena-remove-all-old-adaptive-rx-interrupt-moderat.patch b/debian/patches/features/all/ena/net-ena-remove-all-old-adaptive-rx-interrupt-moderat.patch new file mode 100644 index 000000000..2f3c2839d --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-remove-all-old-adaptive-rx-interrupt-moderat.patch @@ -0,0 +1,361 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 16 Sep 2019 14:31:33 +0300 +Subject: [PATCH] net: ena: remove all old adaptive rx interrupt moderation + code from ena_com +Origin: https://git.kernel.org/linus/3ced8cbdf7ddb3160ffa714a91040dd18f39a12c +Bug-Debian: https://bugs.debian.org/941291 + +Remove previous implementation of adaptive rx interrupt moderation +from ena_com files. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 110 ----------------- + drivers/net/ethernet/amazon/ena/ena_com.h | 142 ---------------------- + 2 files changed, 252 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -1278,22 +1278,6 @@ static int ena_com_ind_tbl_convert_from_ + return 0; + } + +-static int ena_com_init_interrupt_moderation_table(struct ena_com_dev *ena_dev) +-{ +- size_t size; +- +- size = sizeof(struct ena_intr_moder_entry) * ENA_INTR_MAX_NUM_OF_LEVELS; +- +- ena_dev->intr_moder_tbl = +- devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); +- if (!ena_dev->intr_moder_tbl) +- return -ENOMEM; +- +- ena_com_config_default_interrupt_moderation_table(ena_dev); +- +- return 0; +-} +- + static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev, + u16 intr_delay_resolution) + { +@@ -2802,13 +2786,6 @@ int ena_com_update_nonadaptive_moderatio + &ena_dev->intr_moder_rx_interval); + } + +-void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev) +-{ +- if (ena_dev->intr_moder_tbl) +- devm_kfree(ena_dev->dmadev, ena_dev->intr_moder_tbl); +- ena_dev->intr_moder_tbl = NULL; +-} +- + int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev) + { + struct ena_admin_get_feat_resp get_resp; +@@ -2833,10 +2810,6 @@ int ena_com_init_interrupt_moderation(st + return rc; + } + +- rc = ena_com_init_interrupt_moderation_table(ena_dev); +- if (rc) +- goto err; +- + /* if moderation is supported by device we set adaptive moderation */ + delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution; + ena_com_update_intr_delay_resolution(ena_dev, delay_resolution); +@@ -2845,52 +2818,6 @@ int ena_com_init_interrupt_moderation(st + ena_com_disable_adaptive_moderation(ena_dev); + + return 0; +-err: +- ena_com_destroy_interrupt_moderation(ena_dev); +- return rc; +-} +- +-void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev) +-{ +- struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; +- +- if (!intr_moder_tbl) +- return; +- +- intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval = +- ENA_INTR_LOWEST_USECS; +- intr_moder_tbl[ENA_INTR_MODER_LOWEST].pkts_per_interval = +- ENA_INTR_LOWEST_PKTS; +- intr_moder_tbl[ENA_INTR_MODER_LOWEST].bytes_per_interval = +- ENA_INTR_LOWEST_BYTES; +- +- intr_moder_tbl[ENA_INTR_MODER_LOW].intr_moder_interval = +- ENA_INTR_LOW_USECS; +- intr_moder_tbl[ENA_INTR_MODER_LOW].pkts_per_interval = +- ENA_INTR_LOW_PKTS; +- intr_moder_tbl[ENA_INTR_MODER_LOW].bytes_per_interval = +- ENA_INTR_LOW_BYTES; +- +- intr_moder_tbl[ENA_INTR_MODER_MID].intr_moder_interval = +- ENA_INTR_MID_USECS; +- intr_moder_tbl[ENA_INTR_MODER_MID].pkts_per_interval = +- ENA_INTR_MID_PKTS; +- intr_moder_tbl[ENA_INTR_MODER_MID].bytes_per_interval = +- ENA_INTR_MID_BYTES; +- +- intr_moder_tbl[ENA_INTR_MODER_HIGH].intr_moder_interval = +- ENA_INTR_HIGH_USECS; +- intr_moder_tbl[ENA_INTR_MODER_HIGH].pkts_per_interval = +- ENA_INTR_HIGH_PKTS; +- intr_moder_tbl[ENA_INTR_MODER_HIGH].bytes_per_interval = +- ENA_INTR_HIGH_BYTES; +- +- intr_moder_tbl[ENA_INTR_MODER_HIGHEST].intr_moder_interval = +- ENA_INTR_HIGHEST_USECS; +- intr_moder_tbl[ENA_INTR_MODER_HIGHEST].pkts_per_interval = +- ENA_INTR_HIGHEST_PKTS; +- intr_moder_tbl[ENA_INTR_MODER_HIGHEST].bytes_per_interval = +- ENA_INTR_HIGHEST_BYTES; + } + + unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev) +@@ -2903,43 +2830,6 @@ unsigned int ena_com_get_nonadaptive_mod + return ena_dev->intr_moder_rx_interval; + } + +-void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev, +- enum ena_intr_moder_level level, +- struct ena_intr_moder_entry *entry) +-{ +- struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; +- +- if (level >= ENA_INTR_MAX_NUM_OF_LEVELS) +- return; +- +- intr_moder_tbl[level].intr_moder_interval = entry->intr_moder_interval; +- if (ena_dev->intr_delay_resolution) +- intr_moder_tbl[level].intr_moder_interval /= +- ena_dev->intr_delay_resolution; +- intr_moder_tbl[level].pkts_per_interval = entry->pkts_per_interval; +- +- /* use hardcoded value until ethtool supports bytecount parameter */ +- if (entry->bytes_per_interval != ENA_INTR_BYTE_COUNT_NOT_SUPPORTED) +- intr_moder_tbl[level].bytes_per_interval = entry->bytes_per_interval; +-} +- +-void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev, +- enum ena_intr_moder_level level, +- struct ena_intr_moder_entry *entry) +-{ +- struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; +- +- if (level >= ENA_INTR_MAX_NUM_OF_LEVELS) +- return; +- +- entry->intr_moder_interval = intr_moder_tbl[level].intr_moder_interval; +- if (ena_dev->intr_delay_resolution) +- entry->intr_moder_interval *= ena_dev->intr_delay_resolution; +- entry->pkts_per_interval = +- intr_moder_tbl[level].pkts_per_interval; +- entry->bytes_per_interval = intr_moder_tbl[level].bytes_per_interval; +-} +- + int ena_com_config_dev_mode(struct ena_com_dev *ena_dev, + struct ena_admin_feature_llq_desc *llq_features, + struct ena_llq_configurations *llq_default_cfg) +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.h +@@ -72,46 +72,13 @@ + /*****************************************************************************/ + /* ENA adaptive interrupt moderation settings */ + +-#define ENA_INTR_LOWEST_USECS (0) +-#define ENA_INTR_LOWEST_PKTS (3) +-#define ENA_INTR_LOWEST_BYTES (2 * 1524) +- +-#define ENA_INTR_LOW_USECS (32) +-#define ENA_INTR_LOW_PKTS (12) +-#define ENA_INTR_LOW_BYTES (16 * 1024) +- +-#define ENA_INTR_MID_USECS (80) +-#define ENA_INTR_MID_PKTS (48) +-#define ENA_INTR_MID_BYTES (64 * 1024) +- +-#define ENA_INTR_HIGH_USECS (128) +-#define ENA_INTR_HIGH_PKTS (96) +-#define ENA_INTR_HIGH_BYTES (128 * 1024) +- +-#define ENA_INTR_HIGHEST_USECS (192) +-#define ENA_INTR_HIGHEST_PKTS (128) +-#define ENA_INTR_HIGHEST_BYTES (192 * 1024) +- + #define ENA_INTR_INITIAL_TX_INTERVAL_USECS 196 + #define ENA_INTR_INITIAL_RX_INTERVAL_USECS 0 +-#define ENA_INTR_DELAY_OLD_VALUE_WEIGHT 6 +-#define ENA_INTR_DELAY_NEW_VALUE_WEIGHT 4 +-#define ENA_INTR_MODER_LEVEL_STRIDE 2 +-#define ENA_INTR_BYTE_COUNT_NOT_SUPPORTED 0xFFFFFF + + #define ENA_HW_HINTS_NO_TIMEOUT 0xFFFF + + #define ENA_FEATURE_MAX_QUEUE_EXT_VER 1 + +-enum ena_intr_moder_level { +- ENA_INTR_MODER_LOWEST = 0, +- ENA_INTR_MODER_LOW, +- ENA_INTR_MODER_MID, +- ENA_INTR_MODER_HIGH, +- ENA_INTR_MODER_HIGHEST, +- ENA_INTR_MAX_NUM_OF_LEVELS, +-}; +- + struct ena_llq_configurations { + enum ena_admin_llq_header_location llq_header_location; + enum ena_admin_llq_ring_entry_size llq_ring_entry_size; +@@ -120,12 +87,6 @@ struct ena_llq_configurations { + u16 llq_ring_entry_size_value; + }; + +-struct ena_intr_moder_entry { +- unsigned int intr_moder_interval; +- unsigned int pkts_per_interval; +- unsigned int bytes_per_interval; +-}; +- + enum queue_direction { + ENA_COM_IO_QUEUE_DIRECTION_TX, + ENA_COM_IO_QUEUE_DIRECTION_RX +@@ -920,11 +881,6 @@ int ena_com_execute_admin_command(struct + */ + int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev); + +-/* ena_com_destroy_interrupt_moderation - Destroy interrupt moderation resources +- * @ena_dev: ENA communication layer struct +- */ +-void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev); +- + /* ena_com_interrupt_moderation_supported - Return if interrupt moderation + * capability is supported by the device. + * +@@ -932,12 +888,6 @@ void ena_com_destroy_interrupt_moderatio + */ + bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev); + +-/* ena_com_config_default_interrupt_moderation_table - Restore the interrupt +- * moderation table back to the default parameters. +- * @ena_dev: ENA communication layer struct +- */ +-void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev); +- + /* ena_com_update_nonadaptive_moderation_interval_tx - Update the + * non-adaptive interval in Tx direction. + * @ena_dev: ENA communication layer struct +@@ -974,29 +924,6 @@ unsigned int ena_com_get_nonadaptive_mod + */ + unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev); + +-/* ena_com_init_intr_moderation_entry - Update a single entry in the interrupt +- * moderation table. +- * @ena_dev: ENA communication layer struct +- * @level: Interrupt moderation table level +- * @entry: Entry value +- * +- * Update a single entry in the interrupt moderation table. +- */ +-void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev, +- enum ena_intr_moder_level level, +- struct ena_intr_moder_entry *entry); +- +-/* ena_com_get_intr_moderation_entry - Init ena_intr_moder_entry. +- * @ena_dev: ENA communication layer struct +- * @level: Interrupt moderation table level +- * @entry: Entry to fill. +- * +- * Initialize the entry according to the adaptive interrupt moderation table. +- */ +-void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev, +- enum ena_intr_moder_level level, +- struct ena_intr_moder_entry *entry); +- + /* ena_com_config_dev_mode - Configure the placement policy of the device. + * @ena_dev: ENA communication layer struct + * @llq_features: LLQ feature descriptor, retrieve via +@@ -1022,75 +949,6 @@ static inline void ena_com_disable_adapt + ena_dev->adaptive_coalescing = false; + } + +-/* ena_com_calculate_interrupt_delay - Calculate new interrupt delay +- * @ena_dev: ENA communication layer struct +- * @pkts: Number of packets since the last update +- * @bytes: Number of bytes received since the last update. +- * @smoothed_interval: Returned interval +- * @moder_tbl_idx: Current table level as input update new level as return +- * value. +- */ +-static inline void ena_com_calculate_interrupt_delay(struct ena_com_dev *ena_dev, +- unsigned int pkts, +- unsigned int bytes, +- unsigned int *smoothed_interval, +- unsigned int *moder_tbl_idx) +-{ +- enum ena_intr_moder_level curr_moder_idx, new_moder_idx; +- struct ena_intr_moder_entry *curr_moder_entry; +- struct ena_intr_moder_entry *pred_moder_entry; +- struct ena_intr_moder_entry *new_moder_entry; +- struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; +- unsigned int interval; +- +- /* We apply adaptive moderation on Rx path only. +- * Tx uses static interrupt moderation. +- */ +- if (!pkts || !bytes) +- /* Tx interrupt, or spurious interrupt, +- * in both cases we just use same delay values +- */ +- return; +- +- curr_moder_idx = (enum ena_intr_moder_level)(*moder_tbl_idx); +- if (unlikely(curr_moder_idx >= ENA_INTR_MAX_NUM_OF_LEVELS)) { +- pr_err("Wrong moderation index %u\n", curr_moder_idx); +- return; +- } +- +- curr_moder_entry = &intr_moder_tbl[curr_moder_idx]; +- new_moder_idx = curr_moder_idx; +- +- if (curr_moder_idx == ENA_INTR_MODER_LOWEST) { +- if ((pkts > curr_moder_entry->pkts_per_interval) || +- (bytes > curr_moder_entry->bytes_per_interval)) +- new_moder_idx = +- (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE); +- } else { +- pred_moder_entry = &intr_moder_tbl[curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE]; +- +- if ((pkts <= pred_moder_entry->pkts_per_interval) || +- (bytes <= pred_moder_entry->bytes_per_interval)) +- new_moder_idx = +- (enum ena_intr_moder_level)(curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE); +- else if ((pkts > curr_moder_entry->pkts_per_interval) || +- (bytes > curr_moder_entry->bytes_per_interval)) { +- if (curr_moder_idx != ENA_INTR_MODER_HIGHEST) +- new_moder_idx = +- (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE); +- } +- } +- new_moder_entry = &intr_moder_tbl[new_moder_idx]; +- +- interval = new_moder_entry->intr_moder_interval; +- *smoothed_interval = ( +- (interval * ENA_INTR_DELAY_NEW_VALUE_WEIGHT + +- ENA_INTR_DELAY_OLD_VALUE_WEIGHT * (*smoothed_interval)) + 5) / +- 10; +- +- *moder_tbl_idx = new_moder_idx; +-} +- + /* ena_com_update_intr_reg - Prepare interrupt register + * @intr_reg: interrupt register to update. + * @rx_delay_interval: Rx interval in usecs diff --git a/debian/patches/features/all/ena/net-ena-remove-code-duplication-in-ena_com_update_no.patch b/debian/patches/features/all/ena/net-ena-remove-code-duplication-in-ena_com_update_no.patch new file mode 100644 index 000000000..3dcad5609 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-remove-code-duplication-in-ena_com_update_no.patch @@ -0,0 +1,71 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 16 Sep 2019 14:31:30 +0300 +Subject: [PATCH] net: ena: remove code duplication in + ena_com_update_nonadaptive_moderation_interval _*() +Origin: https://git.kernel.org/linus/57e3a5f24bb5bf265988e973a911845abcbf6a00 +Bug-Debian: https://bugs.debian.org/941291 + +Remove code duplication in: +ena_com_update_nonadaptive_moderation_interval_tx() +ena_com_update_nonadaptive_moderation_interval_rx() +functions. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 30 ++++++++++++----------- + 1 file changed, 16 insertions(+), 14 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -2772,32 +2772,34 @@ bool ena_com_interrupt_moderation_suppor + ENA_ADMIN_INTERRUPT_MODERATION); + } + +-int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev, +- u32 tx_coalesce_usecs) ++static int ena_com_update_nonadaptive_moderation_interval(u32 coalesce_usecs, ++ u32 intr_delay_resolution, ++ u32 *intr_moder_interval) + { +- if (!ena_dev->intr_delay_resolution) { ++ if (!intr_delay_resolution) { + pr_err("Illegal interrupt delay granularity value\n"); + return -EFAULT; + } + +- ena_dev->intr_moder_tx_interval = tx_coalesce_usecs / +- ena_dev->intr_delay_resolution; ++ *intr_moder_interval = coalesce_usecs / intr_delay_resolution; + + return 0; + } + ++int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev, ++ u32 tx_coalesce_usecs) ++{ ++ return ena_com_update_nonadaptive_moderation_interval(tx_coalesce_usecs, ++ ena_dev->intr_delay_resolution, ++ &ena_dev->intr_moder_tx_interval); ++} ++ + int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev, + u32 rx_coalesce_usecs) + { +- if (!ena_dev->intr_delay_resolution) { +- pr_err("Illegal interrupt delay granularity value\n"); +- return -EFAULT; +- } +- +- ena_dev->intr_moder_rx_interval = rx_coalesce_usecs / +- ena_dev->intr_delay_resolution; +- +- return 0; ++ return ena_com_update_nonadaptive_moderation_interval(rx_coalesce_usecs, ++ ena_dev->intr_delay_resolution, ++ &ena_dev->intr_moder_rx_interval); + } + + void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev) diff --git a/debian/patches/features/all/ena/net-ena-remove-ena_restore_ethtool_params-and-releva.patch b/debian/patches/features/all/ena/net-ena-remove-ena_restore_ethtool_params-and-releva.patch new file mode 100644 index 000000000..1dad10a1a --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-remove-ena_restore_ethtool_params-and-releva.patch @@ -0,0 +1,59 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 16 Sep 2019 14:31:32 +0300 +Subject: [PATCH] net: ena: remove ena_restore_ethtool_params() and relevant + fields +Origin: https://git.kernel.org/linus/64d1fb9dfc6c5d8589312fa847fee14ec14ee12b +Bug-Debian: https://bugs.debian.org/941291 + +Deleted unused 4 fields from struct ena_adapter and their only user +ena_restore_ethtool_params(). + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 ---------- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 3 --- + 2 files changed, 13 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -1564,14 +1564,6 @@ static void ena_napi_enable_all(struct e + napi_enable(&adapter->ena_napi[i].napi); + } + +-static void ena_restore_ethtool_params(struct ena_adapter *adapter) +-{ +- adapter->tx_usecs = 0; +- adapter->rx_usecs = 0; +- adapter->tx_frames = 1; +- adapter->rx_frames = 1; +-} +- + /* Configure the Rx forwarding */ + static int ena_rss_configure(struct ena_adapter *adapter) + { +@@ -1621,8 +1613,6 @@ static int ena_up_complete(struct ena_ad + /* enable transmits */ + netif_tx_start_all_queues(adapter->netdev); + +- ena_restore_ethtool_params(adapter); +- + ena_napi_enable_all(adapter); + + return 0; +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -330,9 +330,6 @@ struct ena_adapter { + + u32 missing_tx_completion_threshold; + +- u32 tx_usecs, rx_usecs; /* interrupt moderation */ +- u32 tx_frames, rx_frames; /* interrupt moderation */ +- + u32 requested_tx_ring_size; + u32 requested_rx_ring_size; + diff --git a/debian/patches/features/all/ena/net-ena-remove-inline-keyword-from-functions-in-.c.patch b/debian/patches/features/all/ena/net-ena-remove-inline-keyword-from-functions-in-.c.patch new file mode 100644 index 000000000..8d4a2b349 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-remove-inline-keyword-from-functions-in-.c.patch @@ -0,0 +1,244 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Tue, 11 Jun 2019 14:58:10 +0300 +Subject: [PATCH] net: ena: remove inline keyword from functions in *.c +Origin: https://git.kernel.org/linus/c2b542044761965db0e4cc400ab6abf670fc25b7 +Bug-Debian: https://bugs.debian.org/941291 + +Let the compiler decide if the function should be inline in *.c files + +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 6 ++--- + drivers/net/ethernet/amazon/ena/ena_eth_com.c | 26 +++++++++---------- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 16 ++++++------ + 3 files changed, 24 insertions(+), 24 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -91,7 +91,7 @@ struct ena_com_stats_ctx { + struct ena_admin_acq_get_stats_resp get_resp; + }; + +-static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev, ++static int ena_com_mem_addr_set(struct ena_com_dev *ena_dev, + struct ena_common_mem_addr *ena_addr, + dma_addr_t addr) + { +@@ -190,7 +190,7 @@ static int ena_com_admin_init_aenq(struc + return 0; + } + +-static inline void comp_ctxt_release(struct ena_com_admin_queue *queue, ++static void comp_ctxt_release(struct ena_com_admin_queue *queue, + struct ena_comp_ctx *comp_ctx) + { + comp_ctx->occupied = false; +@@ -277,7 +277,7 @@ static struct ena_comp_ctx *__ena_com_su + return comp_ctx; + } + +-static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue) ++static int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue) + { + size_t size = queue->q_depth * sizeof(struct ena_comp_ctx); + struct ena_comp_ctx *comp_ctx; +Index: linux/drivers/net/ethernet/amazon/ena/ena_eth_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_eth_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_eth_com.c +@@ -32,7 +32,7 @@ + + #include "ena_eth_com.h" + +-static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc( ++static struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc( + struct ena_com_io_cq *io_cq) + { + struct ena_eth_io_rx_cdesc_base *cdesc; +@@ -59,7 +59,7 @@ static inline struct ena_eth_io_rx_cdesc + return cdesc; + } + +-static inline void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq) ++static void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq) + { + u16 tail_masked; + u32 offset; +@@ -71,7 +71,7 @@ static inline void *get_sq_desc_regular_ + return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset); + } + +-static inline int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq, ++static int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq, + u8 *bounce_buffer) + { + struct ena_com_llq_info *llq_info = &io_sq->llq_info; +@@ -111,7 +111,7 @@ static inline int ena_com_write_bounce_b + return 0; + } + +-static inline int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq, ++static int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq, + u8 *header_src, + u16 header_len) + { +@@ -142,7 +142,7 @@ static inline int ena_com_write_header_t + return 0; + } + +-static inline void *get_sq_desc_llq(struct ena_com_io_sq *io_sq) ++static void *get_sq_desc_llq(struct ena_com_io_sq *io_sq) + { + struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; + u8 *bounce_buffer; +@@ -162,7 +162,7 @@ static inline void *get_sq_desc_llq(stru + return sq_desc; + } + +-static inline int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq) ++static int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq) + { + struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; + struct ena_com_llq_info *llq_info = &io_sq->llq_info; +@@ -189,7 +189,7 @@ static inline int ena_com_close_bounce_b + return 0; + } + +-static inline void *get_sq_desc(struct ena_com_io_sq *io_sq) ++static void *get_sq_desc(struct ena_com_io_sq *io_sq) + { + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + return get_sq_desc_llq(io_sq); +@@ -197,7 +197,7 @@ static inline void *get_sq_desc(struct e + return get_sq_desc_regular_queue(io_sq); + } + +-static inline int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq) ++static int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq) + { + struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; + struct ena_com_llq_info *llq_info = &io_sq->llq_info; +@@ -225,7 +225,7 @@ static inline int ena_com_sq_update_llq_ + return 0; + } + +-static inline int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq) ++static int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq) + { + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + return ena_com_sq_update_llq_tail(io_sq); +@@ -239,7 +239,7 @@ static inline int ena_com_sq_update_tail + return 0; + } + +-static inline struct ena_eth_io_rx_cdesc_base * ++static struct ena_eth_io_rx_cdesc_base * + ena_com_rx_cdesc_idx_to_ptr(struct ena_com_io_cq *io_cq, u16 idx) + { + idx &= (io_cq->q_depth - 1); +@@ -248,7 +248,7 @@ static inline struct ena_eth_io_rx_cdesc + idx * io_cq->cdesc_entry_size_in_bytes); + } + +-static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, ++static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, + u16 *first_cdesc_idx) + { + struct ena_eth_io_rx_cdesc_base *cdesc; +@@ -285,7 +285,7 @@ static inline u16 ena_com_cdesc_rx_pkt_g + return count; + } + +-static inline int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, ++static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx) + { + struct ena_eth_io_tx_meta_desc *meta_desc = NULL; +@@ -334,7 +334,7 @@ static inline int ena_com_create_and_sto + return ena_com_sq_update_tail(io_sq); + } + +-static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, ++static void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, + struct ena_eth_io_rx_cdesc_base *cdesc) + { + ena_rx_ctx->l3_proto = cdesc->status & +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -326,7 +326,7 @@ static void ena_free_all_io_tx_resources + ena_free_tx_resources(adapter, i); + } + +-static inline int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id) ++static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id) + { + if (likely(req_id < rx_ring->ring_size)) + return 0; +@@ -460,7 +460,7 @@ static void ena_free_all_io_rx_resources + ena_free_rx_resources(adapter, i); + } + +-static inline int ena_alloc_rx_page(struct ena_ring *rx_ring, ++static int ena_alloc_rx_page(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info, gfp_t gfp) + { + struct ena_com_buf *ena_buf; +@@ -620,7 +620,7 @@ static void ena_free_all_rx_bufs(struct + ena_free_rx_bufs(adapter, i); + } + +-static inline void ena_unmap_tx_skb(struct ena_ring *tx_ring, ++static void ena_unmap_tx_skb(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info) + { + struct ena_com_buf *ena_buf; +@@ -956,7 +956,7 @@ static struct sk_buff *ena_rx_skb(struct + * @ena_rx_ctx: received packet context/metadata + * @skb: skb currently being received and modified + */ +-static inline void ena_rx_checksum(struct ena_ring *rx_ring, ++static void ena_rx_checksum(struct ena_ring *rx_ring, + struct ena_com_rx_ctx *ena_rx_ctx, + struct sk_buff *skb) + { +@@ -1156,7 +1156,7 @@ error: + return 0; + } + +-inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring, ++void ena_adjust_intr_moderation(struct ena_ring *rx_ring, + struct ena_ring *tx_ring) + { + /* We apply adaptive moderation on Rx path only. +@@ -1175,7 +1175,7 @@ inline void ena_adjust_intr_moderation(s + rx_ring->per_napi_bytes = 0; + } + +-static inline void ena_unmask_interrupt(struct ena_ring *tx_ring, ++static void ena_unmask_interrupt(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) + { + struct ena_eth_io_intr_reg intr_reg; +@@ -1195,7 +1195,7 @@ static inline void ena_unmask_interrupt( + ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); + } + +-static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring, ++static void ena_update_ring_numa_node(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) + { + int cpu = get_cpu(); +@@ -3331,7 +3331,7 @@ static void ena_release_bars(struct ena_ + pci_release_selected_regions(pdev, release_bars); + } + +-static inline void set_default_llq_configurations(struct ena_llq_configurations *llq_config) ++static void set_default_llq_configurations(struct ena_llq_configurations *llq_config) + { + llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; + llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B; diff --git a/debian/patches/features/all/ena/net-ena-remove-old-adaptive-interrupt-moderation-cod.patch b/debian/patches/features/all/ena/net-ena-remove-old-adaptive-interrupt-moderation-cod.patch new file mode 100644 index 000000000..17cb06e09 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-remove-old-adaptive-interrupt-moderation-cod.patch @@ -0,0 +1,83 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 16 Sep 2019 14:31:31 +0300 +Subject: [PATCH] net: ena: remove old adaptive interrupt moderation code from + ena_netdev +Origin: https://git.kernel.org/linus/242d81fd3dd9f301b0c20564aafec8efdb2bbe5b +Bug-Debian: https://bugs.debian.org/941291 + +1. Out of the fields {per_napi_bytes, per_napi_packets} in struct ena_ring, + only rx_ring->per_napi_packets are used to determine if napi did work + for dim. + This commit removes all other uses of these fields. +2. Remove ena_ring->moder_tbl_idx, which is not used by dim. +3. Remove all calls to ena_com_destroy_interrupt_moderation(), since all it + did was to destroy the interrupt moderation table, which is removed as + part of removing old interrupt moderation code. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 8 -------- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 -- + 2 files changed, 10 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -158,7 +158,6 @@ static void ena_init_io_rings_common(str + ring->adapter = adapter; + ring->ena_dev = adapter->ena_dev; + ring->per_napi_packets = 0; +- ring->per_napi_bytes = 0; + ring->cpu = 0; + ring->first_interrupt = false; + ring->no_interrupt_event_cnt = 0; +@@ -835,9 +834,6 @@ static int ena_clean_tx_irq(struct ena_r + __netif_tx_unlock(txq); + } + +- tx_ring->per_napi_bytes += tx_bytes; +- tx_ring->per_napi_packets += tx_pkts; +- + return tx_pkts; + } + +@@ -1121,7 +1117,6 @@ static int ena_clean_rx_irq(struct ena_r + } while (likely(res_budget)); + + work_done = budget - res_budget; +- rx_ring->per_napi_bytes += total_len; + rx_ring->per_napi_packets += work_done; + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bytes += total_len; +@@ -3643,7 +3638,6 @@ err_free_msix: + ena_free_mgmnt_irq(adapter); + ena_disable_msix(adapter); + err_worker_destroy: +- ena_com_destroy_interrupt_moderation(ena_dev); + del_timer(&adapter->timer_service); + err_netdev_destroy: + free_netdev(netdev); +@@ -3704,8 +3698,6 @@ static void ena_remove(struct pci_dev *p + + pci_disable_device(pdev); + +- ena_com_destroy_interrupt_moderation(ena_dev); +- + vfree(ena_dev); + } + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -280,8 +280,6 @@ struct ena_ring { + struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS]; + u32 smoothed_interval; + u32 per_napi_packets; +- u32 per_napi_bytes; +- enum ena_intr_moder_level moder_tbl_idx; + u16 non_empty_napi_events; + struct u64_stats_sync syncp; + union { diff --git a/debian/patches/features/all/ena/net-ena-replace-free_tx-rx_ids-union-with-single-fre.patch b/debian/patches/features/all/ena/net-ena-replace-free_tx-rx_ids-union-with-single-fre.patch new file mode 100644 index 000000000..c63ce87c6 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-replace-free_tx-rx_ids-union-with-single-fre.patch @@ -0,0 +1,198 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Mon, 3 Jun 2019 17:43:21 +0300 +Subject: [PATCH] net: ena: replace free_tx/rx_ids union with single free_ids + field in ena_ring +Origin: https://git.kernel.org/linus/f917249833c7a00ea8be39b1bcb3ec8ef3aea45f +Bug-Debian: https://bugs.debian.org/941291 + +struct ena_ring holds a union of free_rx_ids and free_tx_ids. +Both of the above fields mean the exact same thing and are used +exactly the same way. +Furthermore, these fields are always used with a prefix of the +type of ring. So for tx it will be tx_ring->free_tx_ids, and for +rx it will be rx_ring->free_rx_ids, which shows how redundant the +"_tx" and "_rx" parts are. +Furthermore still, this may lead to confusing code like where +tx_ring->free_rx_ids which works correctly but looks like a mess. + +This commit removes the aforementioned redundancy by replacing the +free_rx/tx_ids union with a single free_ids field. +It also changes a single goto label name from err_free_tx_ids: to +err_tx_free_ids: for consistency with the above new notation. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 48 ++++++++++---------- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 11 ++--- + 2 files changed, 28 insertions(+), 31 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c +index 33fab4f41d7c..b80b5eddca91 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -228,11 +228,11 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) + } + + size = sizeof(u16) * tx_ring->ring_size; +- tx_ring->free_tx_ids = vzalloc_node(size, node); +- if (!tx_ring->free_tx_ids) { +- tx_ring->free_tx_ids = vzalloc(size); +- if (!tx_ring->free_tx_ids) +- goto err_free_tx_ids; ++ tx_ring->free_ids = vzalloc_node(size, node); ++ if (!tx_ring->free_ids) { ++ tx_ring->free_ids = vzalloc(size); ++ if (!tx_ring->free_ids) ++ goto err_tx_free_ids; + } + + size = tx_ring->tx_max_header_size; +@@ -245,7 +245,7 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) + + /* Req id ring for TX out of order completions */ + for (i = 0; i < tx_ring->ring_size; i++) +- tx_ring->free_tx_ids[i] = i; ++ tx_ring->free_ids[i] = i; + + /* Reset tx statistics */ + memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats)); +@@ -256,9 +256,9 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) + return 0; + + err_push_buf_intermediate_buf: +- vfree(tx_ring->free_tx_ids); +- tx_ring->free_tx_ids = NULL; +-err_free_tx_ids: ++ vfree(tx_ring->free_ids); ++ tx_ring->free_ids = NULL; ++err_tx_free_ids: + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + err_tx_buffer_info: +@@ -278,8 +278,8 @@ static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + +- vfree(tx_ring->free_tx_ids); +- tx_ring->free_tx_ids = NULL; ++ vfree(tx_ring->free_ids); ++ tx_ring->free_ids = NULL; + + vfree(tx_ring->push_buf_intermediate_buf); + tx_ring->push_buf_intermediate_buf = NULL; +@@ -377,10 +377,10 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter, + } + + size = sizeof(u16) * rx_ring->ring_size; +- rx_ring->free_rx_ids = vzalloc_node(size, node); +- if (!rx_ring->free_rx_ids) { +- rx_ring->free_rx_ids = vzalloc(size); +- if (!rx_ring->free_rx_ids) { ++ rx_ring->free_ids = vzalloc_node(size, node); ++ if (!rx_ring->free_ids) { ++ rx_ring->free_ids = vzalloc(size); ++ if (!rx_ring->free_ids) { + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; + return -ENOMEM; +@@ -389,7 +389,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter, + + /* Req id ring for receiving RX pkts out of order */ + for (i = 0; i < rx_ring->ring_size; i++) +- rx_ring->free_rx_ids[i] = i; ++ rx_ring->free_ids[i] = i; + + /* Reset rx statistics */ + memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats)); +@@ -415,8 +415,8 @@ static void ena_free_rx_resources(struct ena_adapter *adapter, + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; + +- vfree(rx_ring->free_rx_ids); +- rx_ring->free_rx_ids = NULL; ++ vfree(rx_ring->free_ids); ++ rx_ring->free_ids = NULL; + } + + /* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues +@@ -531,7 +531,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) + for (i = 0; i < num; i++) { + struct ena_rx_buffer *rx_info; + +- req_id = rx_ring->free_rx_ids[next_to_use]; ++ req_id = rx_ring->free_ids[next_to_use]; + rc = validate_rx_req_id(rx_ring, req_id); + if (unlikely(rc < 0)) + break; +@@ -797,7 +797,7 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) + tx_pkts++; + total_done += tx_info->tx_descs; + +- tx_ring->free_tx_ids[next_to_clean] = req_id; ++ tx_ring->free_ids[next_to_clean] = req_id; + next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, + tx_ring->ring_size); + } +@@ -911,7 +911,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, + + skb_put(skb, len); + skb->protocol = eth_type_trans(skb, rx_ring->netdev); +- rx_ring->free_rx_ids[*next_to_clean] = req_id; ++ rx_ring->free_ids[*next_to_clean] = req_id; + *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs, + rx_ring->ring_size); + return skb; +@@ -935,7 +935,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, + + rx_info->page = NULL; + +- rx_ring->free_rx_ids[*next_to_clean] = req_id; ++ rx_ring->free_ids[*next_to_clean] = req_id; + *next_to_clean = + ENA_RX_RING_IDX_NEXT(*next_to_clean, + rx_ring->ring_size); +@@ -1088,7 +1088,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + /* exit if we failed to retrieve a buffer */ + if (unlikely(!skb)) { + for (i = 0; i < ena_rx_ctx.descs; i++) { +- rx_ring->free_tx_ids[next_to_clean] = ++ rx_ring->free_ids[next_to_clean] = + rx_ring->ena_bufs[i].req_id; + next_to_clean = + ENA_RX_RING_IDX_NEXT(next_to_clean, +@@ -2152,7 +2152,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) + skb_tx_timestamp(skb); + + next_to_use = tx_ring->next_to_use; +- req_id = tx_ring->free_tx_ids[next_to_use]; ++ req_id = tx_ring->free_ids[next_to_use]; + tx_info = &tx_ring->tx_buffer_info[req_id]; + tx_info->num_of_bufs = 0; + +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h +index 0681e18b0019..74c316081499 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -221,13 +221,10 @@ struct ena_stats_rx { + }; + + struct ena_ring { +- union { +- /* Holds the empty requests for TX/RX +- * out of order completions +- */ +- u16 *free_tx_ids; +- u16 *free_rx_ids; +- }; ++ /* Holds the empty requests for TX/RX ++ * out of order completions ++ */ ++ u16 *free_ids; + + union { + struct ena_tx_buffer *tx_buffer_info; +-- +2.17.1 + diff --git a/debian/patches/features/all/ena/net-ena-switch-to-dim-algorithm-for-rx-adaptive-inte.patch b/debian/patches/features/all/ena/net-ena-switch-to-dim-algorithm-for-rx-adaptive-inte.patch new file mode 100644 index 000000000..eb5269408 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-switch-to-dim-algorithm-for-rx-adaptive-inte.patch @@ -0,0 +1,163 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 16 Sep 2019 14:31:27 +0300 +Subject: [PATCH] net: ena: switch to dim algorithm for rx adaptive interrupt + moderation +Origin: https://git.kernel.org/linus/282faf61a053be43910fcc42d86ecf16c0d30123 +Bug-Debian: https://bugs.debian.org/941291 + +Use the dim library for the rx adaptive interrupt moderation implementation + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 4 +- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 55 +++++++++++++------- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 3 ++ + 3 files changed, 41 insertions(+), 21 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_com.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -2839,9 +2839,7 @@ int ena_com_init_interrupt_moderation(st + delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution; + ena_com_update_intr_delay_resolution(ena_dev, delay_resolution); + +- /* Disable adaptive moderation by default - can be enabled from +- * ethtool +- */ ++ /* Disable adaptive moderation by default - can be enabled later */ + ena_com_disable_adaptive_moderation(ena_dev); + + return 0; +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -196,6 +196,7 @@ static void ena_init_io_rings(struct ena + rxr->smoothed_interval = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); + rxr->empty_rx_queue = 0; ++ adapter->ena_napi[i].dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + } + } + +@@ -712,6 +713,7 @@ static void ena_destroy_all_rx_queues(st + + for (i = 0; i < adapter->num_queues; i++) { + ena_qid = ENA_IO_RXQ_IDX(i); ++ cancel_work_sync(&adapter->ena_napi[i].dim.work); + ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); + } + } +@@ -1156,23 +1158,35 @@ error: + return 0; + } + +-void ena_adjust_intr_moderation(struct ena_ring *rx_ring, +- struct ena_ring *tx_ring) ++static void ena_dim_work(struct work_struct *w) + { +- /* We apply adaptive moderation on Rx path only. +- * Tx uses static interrupt moderation. +- */ +- ena_com_calculate_interrupt_delay(rx_ring->ena_dev, +- rx_ring->per_napi_packets, +- rx_ring->per_napi_bytes, +- &rx_ring->smoothed_interval, +- &rx_ring->moder_tbl_idx); +- +- /* Reset per napi packets/bytes */ +- tx_ring->per_napi_packets = 0; +- tx_ring->per_napi_bytes = 0; ++ struct net_dim *dim = container_of(w, struct net_dim, work); ++ struct net_dim_cq_moder cur_moder = ++ net_dim_get_rx_moderation(dim->mode, dim->profile_ix); ++ struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim); ++ ++ ena_napi->rx_ring->smoothed_interval = cur_moder.usec; ++ dim->state = NET_DIM_START_MEASURE; ++} ++ ++static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi) ++{ ++ struct net_dim_sample dim_sample; ++ struct ena_ring *rx_ring = ena_napi->rx_ring; ++ ++ if (!rx_ring->per_napi_packets) ++ return; ++ ++ rx_ring->non_empty_napi_events++; ++ ++ net_dim_sample(rx_ring->non_empty_napi_events, ++ rx_ring->rx_stats.cnt, ++ rx_ring->rx_stats.bytes, ++ &dim_sample); ++ ++ net_dim(&ena_napi->dim, dim_sample); ++ + rx_ring->per_napi_packets = 0; +- rx_ring->per_napi_bytes = 0; + } + + static void ena_unmask_interrupt(struct ena_ring *tx_ring, +@@ -1261,9 +1275,11 @@ static int ena_io_poll(struct napi_struc + * from the interrupt context (vs from sk_busy_loop) + */ + if (napi_complete_done(napi, rx_work_done)) { +- /* Tx and Rx share the same interrupt vector */ ++ /* We apply adaptive moderation on Rx path only. ++ * Tx uses static interrupt moderation. ++ */ + if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) +- ena_adjust_intr_moderation(rx_ring, tx_ring); ++ ena_adjust_adaptive_rx_intr_moderation(ena_napi); + + ena_unmask_interrupt(tx_ring, rx_ring); + } +@@ -1741,13 +1757,16 @@ static int ena_create_all_io_rx_queues(s + rc = ena_create_io_rx_queue(adapter, i); + if (rc) + goto create_err; ++ INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work); + } + + return 0; + + create_err: +- while (i--) ++ while (i--) { ++ cancel_work_sync(&adapter->ena_napi[i].dim.work); + ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i)); ++ } + + return rc; + } +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -34,6 +34,7 @@ + #define ENA_H + + #include <linux/bitops.h> ++#include <linux/net_dim.h> + #include <linux/etherdevice.h> + #include <linux/inetdevice.h> + #include <linux/interrupt.h> +@@ -153,6 +154,7 @@ struct ena_napi { + struct ena_ring *tx_ring; + struct ena_ring *rx_ring; + u32 qid; ++ struct net_dim dim; + }; + + struct ena_calc_queue_size_ctx { +@@ -280,6 +282,7 @@ struct ena_ring { + u32 per_napi_packets; + u32 per_napi_bytes; + enum ena_intr_moder_level moder_tbl_idx; ++ u16 non_empty_napi_events; + struct u64_stats_sync syncp; + union { + struct ena_stats_tx tx_stats; diff --git a/debian/patches/features/all/ena/net-ena-update-driver-version-from-2.0.2-to-2.0.3.patch b/debian/patches/features/all/ena/net-ena-update-driver-version-from-2.0.2-to-2.0.3.patch new file mode 100644 index 000000000..7b3fd73ce --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-update-driver-version-from-2.0.2-to-2.0.3.patch @@ -0,0 +1,27 @@ +From: Arthur Kiyanovski <akiyano@amazon.com> +Date: Mon, 11 Feb 2019 19:17:44 +0200 +Subject: [PATCH] net: ena: update driver version from 2.0.2 to 2.0.3 +Origin: https://git.kernel.org/linus/d9b8656da92223eb004b4f4db74fe48e7433f7b2 +Bug-Debian: https://bugs.debian.org/941291 + +Update driver version due to bug fix. + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -45,7 +45,7 @@ + + #define DRV_MODULE_VER_MAJOR 2 + #define DRV_MODULE_VER_MINOR 0 +-#define DRV_MODULE_VER_SUBMINOR 2 ++#define DRV_MODULE_VER_SUBMINOR 3 + + #define DRV_MODULE_NAME "ena" + #ifndef DRV_MODULE_VERSION diff --git a/debian/patches/features/all/ena/net-ena-update-driver-version-from-2.0.3-to-2.1.0.patch b/debian/patches/features/all/ena/net-ena-update-driver-version-from-2.0.3-to-2.1.0.patch new file mode 100644 index 000000000..9a62b79ac --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-update-driver-version-from-2.0.3-to-2.1.0.patch @@ -0,0 +1,32 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Tue, 11 Jun 2019 14:58:11 +0300 +Subject: [PATCH] net: ena: update driver version from 2.0.3 to 2.1.0 +Origin: https://git.kernel.org/linus/dbbc6e6877768a03092751edf89d012d561b4553 +Bug-Debian: https://bugs.debian.org/941291 + +Update driver version to match device specification. + +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h +index b9d590879815..f2b6e2e0504d 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -44,8 +44,8 @@ + #include "ena_eth_com.h" + + #define DRV_MODULE_VER_MAJOR 2 +-#define DRV_MODULE_VER_MINOR 0 +-#define DRV_MODULE_VER_SUBMINOR 3 ++#define DRV_MODULE_VER_MINOR 1 ++#define DRV_MODULE_VER_SUBMINOR 0 + + #define DRV_MODULE_NAME "ena" + #ifndef DRV_MODULE_VERSION +-- +2.17.1 + diff --git a/debian/patches/features/all/ena/net-ena-use-dev_info_once-instead-of-static-variable.patch b/debian/patches/features/all/ena/net-ena-use-dev_info_once-instead-of-static-variable.patch new file mode 100644 index 000000000..8b3a55e8d --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-use-dev_info_once-instead-of-static-variable.patch @@ -0,0 +1,34 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Mon, 3 Jun 2019 17:43:29 +0300 +Subject: [PATCH] net: ena: use dev_info_once instead of static variable +Origin: https://git.kernel.org/linus/1e9c3fbad83a70e0b00806df3f4dd2db0bc04cc4 +Bug-Debian: https://bugs.debian.org/941291 + +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -3277,7 +3277,6 @@ static int ena_probe(struct pci_dev *pde + struct ena_llq_configurations llq_config; + struct ena_com_dev *ena_dev = NULL; + struct ena_adapter *adapter; +- static int version_printed; + int io_queue_num, bars, rc; + struct net_device *netdev; + static int adapters_found; +@@ -3289,8 +3288,7 @@ static int ena_probe(struct pci_dev *pde + + dev_dbg(&pdev->dev, "%s\n", __func__); + +- if (version_printed++ == 0) +- dev_info(&pdev->dev, "%s", version); ++ dev_info_once(&pdev->dev, "%s", version); + + rc = pci_enable_device_mem(pdev); + if (rc) { diff --git a/debian/patches/features/all/lockdown/0001-Add-the-ability-to-lock-down-access-to-the-running-k.patch b/debian/patches/features/all/lockdown/0001-Add-the-ability-to-lock-down-access-to-the-running-k.patch new file mode 100644 index 000000000..9a8cd7c82 --- /dev/null +++ b/debian/patches/features/all/lockdown/0001-Add-the-ability-to-lock-down-access-to-the-running-k.patch @@ -0,0 +1,164 @@ +From: David Howells <dhowells@redhat.com> +Date: Wed, 8 Nov 2017 15:11:31 +0000 +Subject: [01/29] Add the ability to lock down access to the running kernel + image +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=6d350e2534bfaaaa3e523484b2ca44d22377e951 + +Provide a single call to allow kernel code to determine whether the system +should be locked down, thereby disallowing various accesses that might +allow the running kernel image to be changed including the loading of +modules that aren't validly signed with a key we recognise, fiddling with +MSR registers and disallowing hibernation, + +Signed-off-by: David Howells <dhowells@redhat.com> +Acked-by: James Morris <james.l.morris@oracle.com> +--- + include/linux/kernel.h | 17 ++++++++++++++ + include/linux/security.h | 8 +++++++ + security/Kconfig | 8 +++++++ + security/Makefile | 3 +++ + security/lock_down.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ + 5 files changed, 96 insertions(+) + create mode 100644 security/lock_down.c + +Index: linux/include/linux/kernel.h +=================================================================== +--- linux.orig/include/linux/kernel.h ++++ linux/include/linux/kernel.h +@@ -341,6 +341,23 @@ static inline void refcount_error_report + { } + #endif + ++#ifdef CONFIG_LOCK_DOWN_KERNEL ++extern bool __kernel_is_locked_down(const char *what, bool first); ++#else ++static inline bool __kernel_is_locked_down(const char *what, bool first) ++{ ++ return false; ++} ++#endif ++ ++#define kernel_is_locked_down(what) \ ++ ({ \ ++ static bool message_given; \ ++ bool locked_down = __kernel_is_locked_down(what, !message_given); \ ++ message_given = true; \ ++ locked_down; \ ++ }) ++ + /* Internal, do not use. */ + int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); + int __must_check _kstrtol(const char *s, unsigned int base, long *res); +Index: linux/include/linux/security.h +=================================================================== +--- linux.orig/include/linux/security.h ++++ linux/include/linux/security.h +@@ -1843,5 +1843,13 @@ static inline void free_secdata(void *se + { } + #endif /* CONFIG_SECURITY */ + ++#ifdef CONFIG_LOCK_DOWN_KERNEL ++extern void __init init_lockdown(void); ++#else ++static inline void __init init_lockdown(void) ++{ ++} ++#endif ++ + #endif /* ! __LINUX_SECURITY_H */ + +Index: linux/security/Kconfig +=================================================================== +--- linux.orig/security/Kconfig ++++ linux/security/Kconfig +@@ -239,6 +239,14 @@ config STATIC_USERMODEHELPER_PATH + If you wish for all usermode helper programs to be disabled, + specify an empty string here (i.e. ""). + ++config LOCK_DOWN_KERNEL ++ bool "Allow the kernel to be 'locked down'" ++ help ++ Allow the kernel to be locked down under certain circumstances, for ++ instance if UEFI secure boot is enabled. Locking down the kernel ++ turns off various features that might otherwise allow access to the ++ kernel image (eg. setting MSR registers). ++ + source security/selinux/Kconfig + source security/smack/Kconfig + source security/tomoyo/Kconfig +Index: linux/security/Makefile +=================================================================== +--- linux.orig/security/Makefile ++++ linux/security/Makefile +@@ -30,3 +30,6 @@ obj-$(CONFIG_CGROUP_DEVICE) += device_c + # Object integrity file lists + subdir-$(CONFIG_INTEGRITY) += integrity + obj-$(CONFIG_INTEGRITY) += integrity/ ++ ++# Allow the kernel to be locked down ++obj-$(CONFIG_LOCK_DOWN_KERNEL) += lock_down.o +Index: linux/security/lock_down.c +=================================================================== +--- /dev/null ++++ linux/security/lock_down.c +@@ -0,0 +1,60 @@ ++/* Lock down the kernel ++ * ++ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. ++ * Written by David Howells (dhowells@redhat.com) ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public Licence ++ * as published by the Free Software Foundation; either version ++ * 2 of the Licence, or (at your option) any later version. ++ */ ++ ++#include <linux/security.h> ++#include <linux/export.h> ++ ++static __ro_after_init bool kernel_locked_down; ++ ++/* ++ * Put the kernel into lock-down mode. ++ */ ++static void __init lock_kernel_down(const char *where) ++{ ++ if (!kernel_locked_down) { ++ kernel_locked_down = true; ++ pr_notice("Kernel is locked down from %s; see man kernel_lockdown.7\n", ++ where); ++ } ++} ++ ++static int __init lockdown_param(char *ignored) ++{ ++ lock_kernel_down("command line"); ++ return 0; ++} ++ ++early_param("lockdown", lockdown_param); ++ ++/* ++ * Lock the kernel down from very early in the arch setup. This must happen ++ * prior to things like ACPI being initialised. ++ */ ++void __init init_lockdown(void) ++{ ++#ifdef CONFIG_LOCK_DOWN_IN_EFI_SECURE_BOOT ++ if (efi_enabled(EFI_SECURE_BOOT)) ++ lock_kernel_down("EFI secure boot"); ++#endif ++} ++ ++/** ++ * kernel_is_locked_down - Find out if the kernel is locked down ++ * @what: Tag to use in notice generated if lockdown is in effect ++ */ ++bool __kernel_is_locked_down(const char *what, bool first) ++{ ++ if (what && first && kernel_locked_down) ++ pr_notice("Lockdown: %s is restricted; see man kernel_lockdown.7\n", ++ what); ++ return kernel_locked_down; ++} ++EXPORT_SYMBOL(__kernel_is_locked_down); diff --git a/debian/patches/features/all/lockdown/0003-ima-require-secure_boot-rules-in-lockdown-mode.patch b/debian/patches/features/all/lockdown/0003-ima-require-secure_boot-rules-in-lockdown-mode.patch new file mode 100644 index 000000000..0ab99ba64 --- /dev/null +++ b/debian/patches/features/all/lockdown/0003-ima-require-secure_boot-rules-in-lockdown-mode.patch @@ -0,0 +1,75 @@ +From: Mimi Zohar <zohar@linux.vnet.ibm.com> +Date: Wed, 8 Nov 2017 15:11:32 +0000 +Subject: [03/29] ima: require secure_boot rules in lockdown mode +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=29c55d71a8185208c7962843a29c9a84ae27b2b0 + +Require the "secure_boot" rules, whether or not it is specified +on the boot command line, for both the builtin and custom policies +in secure boot lockdown mode. + +Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com> +Signed-off-by: David Howells <dhowells@redhat.com> +[bwh: Adjust context to apply after commits 6f0911a666d1 + "ima: fix updating the ima_appraise flag" and ef96837b0de4 + "ima: add build time policy"] +--- + security/integrity/ima/ima_policy.c | 39 +++++++++++++++++++++++++++---------- + 1 file changed, 29 insertions(+), 10 deletions(-) + +Index: linux/security/integrity/ima/ima_policy.c +=================================================================== +--- linux.orig/security/integrity/ima/ima_policy.c ++++ linux/security/integrity/ima/ima_policy.c +@@ -481,14 +481,21 @@ static int ima_appraise_flag(enum ima_ho + */ + void __init ima_init_policy(void) + { +- int i, measure_entries, appraise_entries, secure_boot_entries; ++ int i; ++ int measure_entries = 0; ++ int appraise_entries = 0; ++ int secure_boot_entries = 0; ++ bool kernel_locked_down = __kernel_is_locked_down(NULL, false); + + /* if !ima_policy set entries = 0 so we load NO default rules */ +- measure_entries = ima_policy ? ARRAY_SIZE(dont_measure_rules) : 0; +- appraise_entries = ima_use_appraise_tcb ? +- ARRAY_SIZE(default_appraise_rules) : 0; +- secure_boot_entries = ima_use_secure_boot ? +- ARRAY_SIZE(secure_boot_rules) : 0; ++ if (ima_policy) ++ measure_entries = ARRAY_SIZE(dont_measure_rules); ++ ++ if (ima_use_appraise_tcb) ++ appraise_entries = ARRAY_SIZE(default_appraise_rules); ++ ++ if (ima_use_secure_boot || kernel_locked_down) ++ secure_boot_entries = ARRAY_SIZE(secure_boot_rules); + + for (i = 0; i < measure_entries; i++) + list_add_tail(&dont_measure_rules[i].list, &ima_default_rules); +@@ -510,11 +517,24 @@ void __init ima_init_policy(void) + /* + * Insert the builtin "secure_boot" policy rules requiring file + * signatures, prior to any other appraise rules. ++ * In secure boot lock-down mode, also require these appraise ++ * rules for custom policies. + */ + for (i = 0; i < secure_boot_entries; i++) { ++ struct ima_rule_entry *entry; ++ ++ /* Include for builtin policies */ + list_add_tail(&secure_boot_rules[i].list, &ima_default_rules); + temp_ima_appraise |= + ima_appraise_flag(secure_boot_rules[i].func); ++ ++ /* Include for custom policies */ ++ if (kernel_locked_down) { ++ entry = kmemdup(&secure_boot_rules[i], sizeof(*entry), ++ GFP_KERNEL); ++ if (entry) ++ list_add_tail(&entry->list, &ima_policy_rules); ++ } + } + + /* diff --git a/debian/patches/features/all/lockdown/0004-Enforce-module-signatures-if-the-kernel-is-locked-do.patch b/debian/patches/features/all/lockdown/0004-Enforce-module-signatures-if-the-kernel-is-locked-do.patch new file mode 100644 index 000000000..0ab5e258c --- /dev/null +++ b/debian/patches/features/all/lockdown/0004-Enforce-module-signatures-if-the-kernel-is-locked-do.patch @@ -0,0 +1,95 @@ +From: David Howells <dhowells@redhat.com> +Date: Wed, 8 Nov 2017 15:11:32 +0000 +Subject: [04/29] Enforce module signatures if the kernel is locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=1616ef1deccf5fdb525643a6b3efae34946a148d + +If the kernel is locked down, require that all modules have valid +signatures that we can verify or that IMA can validate the file. + +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +Reviewed-by: James Morris <james.l.morris@oracle.com> +[bwh: Adjust context to apply after commits 2c8fd268f418 + "module: Do not access sig_enforce directly" and 5fdc7db6448a + "module: setup load info before module_sig_check()"] +--- + kernel/module.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +Index: linux/kernel/module.c +=================================================================== +--- linux.orig/kernel/module.c ++++ linux/kernel/module.c +@@ -64,6 +64,7 @@ + #include <linux/bsearch.h> + #include <linux/dynamic_debug.h> + #include <linux/audit.h> ++#include <linux/ima.h> + #include <uapi/linux/module.h> + #include "module-internal.h" + +@@ -2784,7 +2785,8 @@ static inline void kmemleak_load_module( + #endif + + #ifdef CONFIG_MODULE_SIG +-static int module_sig_check(struct load_info *info, int flags) ++static int module_sig_check(struct load_info *info, int flags, ++ bool can_do_ima_check) + { + int err = -ENOKEY; + const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; +@@ -2808,13 +2810,16 @@ static int module_sig_check(struct load_ + } + + /* Not having a signature is only an error if we're strict. */ +- if (err == -ENOKEY && !is_module_sig_enforced()) ++ if (err == -ENOKEY && !is_module_sig_enforced() && ++ (!can_do_ima_check || !is_ima_appraise_enabled()) && ++ !kernel_is_locked_down("Loading of unsigned modules")) + err = 0; + + return err; + } + #else /* !CONFIG_MODULE_SIG */ +-static int module_sig_check(struct load_info *info, int flags) ++static int module_sig_check(struct load_info *info, int flags, ++ bool can_do_ima_check) + { + return 0; + } +@@ -3662,7 +3667,7 @@ static int unknown_module_param_cb(char + /* Allocate and load the module: note that size of section 0 is always + zero, and we rely on this for optional sections. */ + static int load_module(struct load_info *info, const char __user *uargs, +- int flags) ++ int flags, bool can_do_ima_check) + { + struct module *mod; + long err = 0; +@@ -3681,7 +3686,7 @@ static int load_module(struct load_info + goto free_copy; + } + +- err = module_sig_check(info, flags); ++ err = module_sig_check(info, flags, can_do_ima_check); + if (err) + goto free_copy; + +@@ -3876,7 +3881,7 @@ SYSCALL_DEFINE3(init_module, void __user + if (err) + return err; + +- return load_module(&info, uargs, 0); ++ return load_module(&info, uargs, 0, false); + } + + SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) +@@ -3903,7 +3908,7 @@ SYSCALL_DEFINE3(finit_module, int, fd, c + info.hdr = hdr; + info.len = size; + +- return load_module(&info, uargs, flags); ++ return load_module(&info, uargs, flags, true); + } + + static inline int within(unsigned long addr, void *start, unsigned long size) diff --git a/debian/patches/features/all/lockdown/0005-Restrict-dev-mem-kmem-port-when-the-kernel-is-locked.patch b/debian/patches/features/all/lockdown/0005-Restrict-dev-mem-kmem-port-when-the-kernel-is-locked.patch new file mode 100644 index 000000000..625f8f763 --- /dev/null +++ b/debian/patches/features/all/lockdown/0005-Restrict-dev-mem-kmem-port-when-the-kernel-is-locked.patch @@ -0,0 +1,35 @@ +From: Matthew Garrett <matthew.garrett@nebula.com> +Date: Wed, 8 Nov 2017 15:11:32 +0000 +Subject: [05/29] Restrict /dev/{mem,kmem,port} when the kernel is locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=8214bb0d847928bf08a7d8475f84c06541c5a352 + +Allowing users to read and write to core kernel memory makes it possible +for the kernel to be subverted, avoiding module loading restrictions, and +also to steal cryptographic information. + +Disallow /dev/mem and /dev/kmem from being opened this when the kernel has +been locked down to prevent this. + +Also disallow /dev/port from being opened to prevent raw ioport access and +thus DMA from being used to accomplish the same thing. + +Signed-off-by: Matthew Garrett <matthew.garrett@nebula.com> +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +--- + drivers/char/mem.c | 2 ++ + 1 file changed, 2 insertions(+) + +Index: linux/drivers/char/mem.c +=================================================================== +--- linux.orig/drivers/char/mem.c ++++ linux/drivers/char/mem.c +@@ -807,6 +807,8 @@ static loff_t memory_lseek(struct file * + + static int open_port(struct inode *inode, struct file *filp) + { ++ if (kernel_is_locked_down("/dev/mem,kmem,port")) ++ return -EPERM; + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; + } + diff --git a/debian/patches/features/all/lockdown/0006-kexec-Disable-at-runtime-if-the-kernel-is-locked-dow.patch b/debian/patches/features/all/lockdown/0006-kexec-Disable-at-runtime-if-the-kernel-is-locked-dow.patch new file mode 100644 index 000000000..522387d9a --- /dev/null +++ b/debian/patches/features/all/lockdown/0006-kexec-Disable-at-runtime-if-the-kernel-is-locked-dow.patch @@ -0,0 +1,42 @@ +From: Matthew Garrett <matthew.garrett@nebula.com> +Date: Wed, 8 Nov 2017 15:11:32 +0000 +Subject: [06/29] kexec: Disable at runtime if the kernel is locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=6081db9ba435b757a3a3473d0cd50ee2252ccaeb + +kexec permits the loading and execution of arbitrary code in ring 0, which +is something that lock-down is meant to prevent. It makes sense to disable +kexec in this situation. + +This does not affect kexec_file_load() which can check for a signature on the +image to be booted. + +Signed-off-by: Matthew Garrett <matthew.garrett@nebula.com> +Signed-off-by: David Howells <dhowells@redhat.com> +Acked-by: Dave Young <dyoung@redhat.com> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +Reviewed-by: James Morris <james.l.morris@oracle.com> +cc: kexec@lists.infradead.org +[bwh: Adjust context to apply after commit a210fd32a46b + "kexec: add call to LSM hook in original kexec_load syscall"] +--- + kernel/kexec.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +Index: linux/kernel/kexec.c +=================================================================== +--- linux.orig/kernel/kexec.c ++++ linux/kernel/kexec.c +@@ -208,6 +208,13 @@ static inline int kexec_load_check(unsig + return result; + + /* ++ * kexec can be used to circumvent module loading restrictions, so ++ * prevent loading in that case ++ */ ++ if (kernel_is_locked_down("kexec of unsigned images")) ++ return -EPERM; ++ ++ /* + * Verify we have a legal set of flags + * This leaves us room for future extensions. + */ diff --git a/debian/patches/features/all/lockdown/0007-Copy-secure_boot-flag-in-boot-params-across-kexec-re.patch b/debian/patches/features/all/lockdown/0007-Copy-secure_boot-flag-in-boot-params-across-kexec-re.patch new file mode 100644 index 000000000..2024c04a6 --- /dev/null +++ b/debian/patches/features/all/lockdown/0007-Copy-secure_boot-flag-in-boot-params-across-kexec-re.patch @@ -0,0 +1,36 @@ +From: Dave Young <dyoung@redhat.com> +Date: Wed, 8 Nov 2017 15:11:32 +0000 +Subject: [07/29] Copy secure_boot flag in boot params across kexec reboot +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=a6b7f780bdaa731f3e2970d65dcd52fe9ba2409d + +Kexec reboot in case secure boot being enabled does not keep the secure +boot mode in new kernel, so later one can load unsigned kernel via legacy +kexec_load. In this state, the system is missing the protections provided +by secure boot. + +Adding a patch to fix this by retain the secure_boot flag in original +kernel. + +secure_boot flag in boot_params is set in EFI stub, but kexec bypasses the +stub. Fixing this issue by copying secure_boot flag across kexec reboot. + +Signed-off-by: Dave Young <dyoung@redhat.com> +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +cc: kexec@lists.infradead.org +--- + arch/x86/kernel/kexec-bzimage64.c | 1 + + 1 file changed, 1 insertion(+) + +Index: linux/arch/x86/kernel/kexec-bzimage64.c +=================================================================== +--- linux.orig/arch/x86/kernel/kexec-bzimage64.c ++++ linux/arch/x86/kernel/kexec-bzimage64.c +@@ -182,6 +182,7 @@ setup_efi_state(struct boot_params *para + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + ++ params->secure_boot = boot_params.secure_boot; + ei->efi_loader_signature = current_ei->efi_loader_signature; + ei->efi_systab = current_ei->efi_systab; + ei->efi_systab_hi = current_ei->efi_systab_hi; diff --git a/debian/patches/features/all/lockdown/0008-kexec_file-Restrict-at-runtime-if-the-kernel-is-lock.patch b/debian/patches/features/all/lockdown/0008-kexec_file-Restrict-at-runtime-if-the-kernel-is-lock.patch new file mode 100644 index 000000000..056936427 --- /dev/null +++ b/debian/patches/features/all/lockdown/0008-kexec_file-Restrict-at-runtime-if-the-kernel-is-lock.patch @@ -0,0 +1,40 @@ +From: Chun-Yi Lee <joeyli.kernel@gmail.com> +Date: Wed, 8 Nov 2017 15:11:33 +0000 +Subject: [08/29] kexec_file: Restrict at runtime if the kernel is locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=eed4aca0409692d7d24bc64f5c98d346cd0506c4 + +When KEXEC_VERIFY_SIG is not enabled, kernel should not load images through +kexec_file systemcall if the kernel is locked down unless IMA can be used +to validate the image. + +This code was showed in Matthew's patch but not in git: +https://lkml.org/lkml/2015/3/13/778 + +Cc: Matthew Garrett <mjg59@srcf.ucam.org> +Signed-off-by: Chun-Yi Lee <jlee@suse.com> +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: James Morris <james.l.morris@oracle.com> +cc: kexec@lists.infradead.org +--- + kernel/kexec_file.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +Index: linux/kernel/kexec_file.c +=================================================================== +--- linux.orig/kernel/kexec_file.c ++++ linux/kernel/kexec_file.c +@@ -328,6 +328,14 @@ SYSCALL_DEFINE5(kexec_file_load, int, ke + if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) + return -EPERM; + ++ /* Don't permit images to be loaded into trusted kernels if we're not ++ * going to verify the signature on them ++ */ ++ if (!IS_ENABLED(CONFIG_KEXEC_VERIFY_SIG) && ++ !is_ima_appraise_enabled() && ++ kernel_is_locked_down("kexec of unsigned images")) ++ return -EPERM; ++ + /* Make sure we have a legal set of flags */ + if (flags != (flags & KEXEC_FILE_FLAGS)) + return -EINVAL; diff --git a/debian/patches/features/all/lockdown/0009-hibernate-Disable-when-the-kernel-is-locked-down.patch b/debian/patches/features/all/lockdown/0009-hibernate-Disable-when-the-kernel-is-locked-down.patch new file mode 100644 index 000000000..56060f80a --- /dev/null +++ b/debian/patches/features/all/lockdown/0009-hibernate-Disable-when-the-kernel-is-locked-down.patch @@ -0,0 +1,31 @@ +From: Josh Boyer <jwboyer@fedoraproject.org> +Date: Wed, 8 Nov 2017 15:11:33 +0000 +Subject: [09/29] hibernate: Disable when the kernel is locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=480ddca2a40c2efd1c01cfa20d8f6c1377ddafe3 + +There is currently no way to verify the resume image when returning +from hibernate. This might compromise the signed modules trust model, +so until we can work with signed hibernate images we disable it when the +kernel is locked down. + +Signed-off-by: Josh Boyer <jwboyer@fedoraproject.org> +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +cc: linux-pm@vger.kernel.org +--- + kernel/power/hibernate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +Index: linux/kernel/power/hibernate.c +=================================================================== +--- linux.orig/kernel/power/hibernate.c ++++ linux/kernel/power/hibernate.c +@@ -70,7 +70,7 @@ static const struct platform_hibernation + + bool hibernation_available(void) + { +- return (nohibernate == 0); ++ return nohibernate == 0 && !kernel_is_locked_down("Hibernation"); + } + + /** diff --git a/debian/patches/features/all/lockdown/0010-uswsusp-Disable-when-the-kernel-is-locked-down.patch b/debian/patches/features/all/lockdown/0010-uswsusp-Disable-when-the-kernel-is-locked-down.patch new file mode 100644 index 000000000..79b5f3461 --- /dev/null +++ b/debian/patches/features/all/lockdown/0010-uswsusp-Disable-when-the-kernel-is-locked-down.patch @@ -0,0 +1,32 @@ +From: Matthew Garrett <mjg59@srcf.ucam.org> +Date: Wed, 8 Nov 2017 15:11:33 +0000 +Subject: [10/29] uswsusp: Disable when the kernel is locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=706def46d58e9c69e91db506305485731f615e44 + +uswsusp allows a user process to dump and then restore kernel state, which +makes it possible to modify the running kernel. Disable this if the kernel +is locked down. + +Signed-off-by: Matthew Garrett <mjg59@srcf.ucam.org> +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +Reviewed-by: James Morris <james.l.morris@oracle.com> +cc: linux-pm@vger.kernel.org +--- + kernel/power/user.c | 3 +++ + 1 file changed, 3 insertions(+) + +Index: linux/kernel/power/user.c +=================================================================== +--- linux.orig/kernel/power/user.c ++++ linux/kernel/power/user.c +@@ -52,6 +52,9 @@ static int snapshot_open(struct inode *i + if (!hibernation_available()) + return -EPERM; + ++ if (kernel_is_locked_down("/dev/snapshot")) ++ return -EPERM; ++ + lock_system_sleep(); + + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { diff --git a/debian/patches/features/all/lockdown/0011-PCI-Lock-down-BAR-access-when-the-kernel-is-locked-d.patch b/debian/patches/features/all/lockdown/0011-PCI-Lock-down-BAR-access-when-the-kernel-is-locked-d.patch new file mode 100644 index 000000000..1f9186ab3 --- /dev/null +++ b/debian/patches/features/all/lockdown/0011-PCI-Lock-down-BAR-access-when-the-kernel-is-locked-d.patch @@ -0,0 +1,104 @@ +From: Matthew Garrett <matthew.garrett@nebula.com> +Date: Wed, 8 Nov 2017 15:11:33 +0000 +Subject: [11/29] PCI: Lock down BAR access when the kernel is locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=d107d11fd7ac982a34b1233722cb3e72f9fe5a20 + +Any hardware that can potentially generate DMA has to be locked down in +order to avoid it being possible for an attacker to modify kernel code, +allowing them to circumvent disabled module loading or module signing. +Default to paranoid - in future we can potentially relax this for +sufficiently IOMMU-isolated devices. + +Signed-off-by: Matthew Garrett <matthew.garrett@nebula.com> +Signed-off-by: David Howells <dhowells@redhat.com> +Acked-by: Bjorn Helgaas <bhelgaas@google.com> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +cc: linux-pci@vger.kernel.org +--- + drivers/pci/pci-sysfs.c | 9 +++++++++ + drivers/pci/proc.c | 9 ++++++++- + drivers/pci/syscall.c | 3 ++- + 3 files changed, 19 insertions(+), 2 deletions(-) + +Index: linux/drivers/pci/pci-sysfs.c +=================================================================== +--- linux.orig/drivers/pci/pci-sysfs.c ++++ linux/drivers/pci/pci-sysfs.c +@@ -905,6 +905,9 @@ static ssize_t pci_write_config(struct f + loff_t init_off = off; + u8 *data = (u8 *) buf; + ++ if (kernel_is_locked_down("Direct PCI access")) ++ return -EPERM; ++ + if (off > dev->cfg_size) + return 0; + if (off + count > dev->cfg_size) { +@@ -1167,6 +1170,9 @@ static int pci_mmap_resource(struct kobj + enum pci_mmap_state mmap_type; + struct resource *res = &pdev->resource[bar]; + ++ if (kernel_is_locked_down("Direct PCI access")) ++ return -EPERM; ++ + if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start)) + return -EINVAL; + +@@ -1242,6 +1248,9 @@ static ssize_t pci_write_resource_io(str + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) + { ++ if (kernel_is_locked_down("Direct PCI access")) ++ return -EPERM; ++ + return pci_resource_io(filp, kobj, attr, buf, off, count, true); + } + +Index: linux/drivers/pci/proc.c +=================================================================== +--- linux.orig/drivers/pci/proc.c ++++ linux/drivers/pci/proc.c +@@ -117,6 +117,9 @@ static ssize_t proc_bus_pci_write(struct + int size = dev->cfg_size; + int cnt; + ++ if (kernel_is_locked_down("Direct PCI access")) ++ return -EPERM; ++ + if (pos >= size) + return 0; + if (nbytes >= size) +@@ -196,6 +199,9 @@ static long proc_bus_pci_ioctl(struct fi + #endif /* HAVE_PCI_MMAP */ + int ret = 0; + ++ if (kernel_is_locked_down("Direct PCI access")) ++ return -EPERM; ++ + switch (cmd) { + case PCIIOC_CONTROLLER: + ret = pci_domain_nr(dev->bus); +@@ -237,7 +243,8 @@ static int proc_bus_pci_mmap(struct file + struct pci_filp_private *fpriv = file->private_data; + int i, ret, write_combine = 0, res_bit = IORESOURCE_MEM; + +- if (!capable(CAP_SYS_RAWIO)) ++ if (!capable(CAP_SYS_RAWIO) || ++ kernel_is_locked_down("Direct PCI access")) + return -EPERM; + + if (fpriv->mmap_state == pci_mmap_io) { +Index: linux/drivers/pci/syscall.c +=================================================================== +--- linux.orig/drivers/pci/syscall.c ++++ linux/drivers/pci/syscall.c +@@ -90,7 +90,8 @@ SYSCALL_DEFINE5(pciconfig_write, unsigne + u32 dword; + int err = 0; + +- if (!capable(CAP_SYS_ADMIN)) ++ if (!capable(CAP_SYS_ADMIN) || ++ kernel_is_locked_down("Direct PCI access")) + return -EPERM; + + dev = pci_get_domain_bus_and_slot(0, bus, dfn); diff --git a/debian/patches/features/all/lockdown/0012-x86-Lock-down-IO-port-access-when-the-kernel-is-lock.patch b/debian/patches/features/all/lockdown/0012-x86-Lock-down-IO-port-access-when-the-kernel-is-lock.patch new file mode 100644 index 000000000..3a9d69dcb --- /dev/null +++ b/debian/patches/features/all/lockdown/0012-x86-Lock-down-IO-port-access-when-the-kernel-is-lock.patch @@ -0,0 +1,46 @@ +From: Matthew Garrett <matthew.garrett@nebula.com> +Date: Wed, 8 Nov 2017 15:11:34 +0000 +Subject: [12/29] x86: Lock down IO port access when the kernel is locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=00ebba940247d4c37c06da4aedecf6b80db213cf + +IO port access would permit users to gain access to PCI configuration +registers, which in turn (on a lot of hardware) give access to MMIO +register space. This would potentially permit root to trigger arbitrary +DMA, so lock it down by default. + +This also implicitly locks down the KDADDIO, KDDELIO, KDENABIO and +KDDISABIO console ioctls. + +Signed-off-by: Matthew Garrett <matthew.garrett@nebula.com> +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +cc: x86@kernel.org +--- + arch/x86/kernel/ioport.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +Index: linux/arch/x86/kernel/ioport.c +=================================================================== +--- linux.orig/arch/x86/kernel/ioport.c ++++ linux/arch/x86/kernel/ioport.c +@@ -31,7 +31,8 @@ long ksys_ioperm(unsigned long from, uns + + if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) + return -EINVAL; +- if (turn_on && !capable(CAP_SYS_RAWIO)) ++ if (turn_on && (!capable(CAP_SYS_RAWIO) || ++ kernel_is_locked_down("ioperm"))) + return -EPERM; + + /* +@@ -126,7 +127,8 @@ SYSCALL_DEFINE1(iopl, unsigned int, leve + return -EINVAL; + /* Trying to gain more privileges? */ + if (level > old) { +- if (!capable(CAP_SYS_RAWIO)) ++ if (!capable(CAP_SYS_RAWIO) || ++ kernel_is_locked_down("iopl")) + return -EPERM; + } + regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | diff --git a/debian/patches/features/all/lockdown/0013-x86-msr-Restrict-MSR-access-when-the-kernel-is-locke.patch b/debian/patches/features/all/lockdown/0013-x86-msr-Restrict-MSR-access-when-the-kernel-is-locke.patch new file mode 100644 index 000000000..1a7a4d879 --- /dev/null +++ b/debian/patches/features/all/lockdown/0013-x86-msr-Restrict-MSR-access-when-the-kernel-is-locke.patch @@ -0,0 +1,50 @@ +From: Matthew Garrett <matthew.garrett@nebula.com> +Date: Wed, 8 Nov 2017 15:11:34 +0000 +Subject: [13/29] x86/msr: Restrict MSR access when the kernel is locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=696dcddb285558b4febf318fe620a344d2b2fa47 + +Writing to MSRs should not be allowed if the kernel is locked down, since +it could lead to execution of arbitrary code in kernel mode. Based on a +patch by Kees Cook. + +MSR accesses are logged for the purposes of building up a whitelist as per +Alan Cox's suggestion. + +Signed-off-by: Matthew Garrett <matthew.garrett@nebula.com> +Signed-off-by: David Howells <dhowells@redhat.com> +Acked-by: Kees Cook <keescook@chromium.org> +Reviewed-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +cc: x86@kernel.org +--- + arch/x86/kernel/msr.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +Index: linux/arch/x86/kernel/msr.c +=================================================================== +--- linux.orig/arch/x86/kernel/msr.c ++++ linux/arch/x86/kernel/msr.c +@@ -84,6 +84,11 @@ static ssize_t msr_write(struct file *fi + int err = 0; + ssize_t bytes = 0; + ++ if (kernel_is_locked_down("Direct MSR access")) { ++ pr_info("Direct access to MSR %x\n", reg); ++ return -EPERM; ++ } ++ + if (count % 8) + return -EINVAL; /* Invalid chunk size */ + +@@ -135,6 +140,11 @@ static long msr_ioctl(struct file *file, + err = -EFAULT; + break; + } ++ if (kernel_is_locked_down("Direct MSR access")) { ++ pr_info("Direct access to MSR %x\n", regs[1]); /* Display %ecx */ ++ err = -EPERM; ++ break; ++ } + err = wrmsr_safe_regs_on_cpu(cpu, regs); + if (err) + break; diff --git a/debian/patches/features/all/lockdown/0014-asus-wmi-Restrict-debugfs-interface-when-the-kernel-.patch b/debian/patches/features/all/lockdown/0014-asus-wmi-Restrict-debugfs-interface-when-the-kernel-.patch new file mode 100644 index 000000000..295b46e88 --- /dev/null +++ b/debian/patches/features/all/lockdown/0014-asus-wmi-Restrict-debugfs-interface-when-the-kernel-.patch @@ -0,0 +1,55 @@ +From: Matthew Garrett <matthew.garrett@nebula.com> +Date: Wed, 8 Nov 2017 15:11:34 +0000 +Subject: [14/29] asus-wmi: Restrict debugfs interface when the kernel is + locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=2e6d31b3176ee27d216bb92a3b108f6b19d4719a + +We have no way of validating what all of the Asus WMI methods do on a given +machine - and there's a risk that some will allow hardware state to be +manipulated in such a way that arbitrary code can be executed in the +kernel, circumventing module loading restrictions. Prevent that if the +kernel is locked down. + +Signed-off-by: Matthew Garrett <matthew.garrett@nebula.com> +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +cc: acpi4asus-user@lists.sourceforge.net +cc: platform-driver-x86@vger.kernel.org +--- + drivers/platform/x86/asus-wmi.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +Index: linux/drivers/platform/x86/asus-wmi.c +=================================================================== +--- linux.orig/drivers/platform/x86/asus-wmi.c ++++ linux/drivers/platform/x86/asus-wmi.c +@@ -2002,6 +2002,9 @@ static int show_dsts(struct seq_file *m, + int err; + u32 retval = -1; + ++ if (kernel_is_locked_down("Asus WMI")) ++ return -EPERM; ++ + err = asus_wmi_get_devstate(asus, asus->debug.dev_id, &retval); + + if (err < 0) +@@ -2018,6 +2021,9 @@ static int show_devs(struct seq_file *m, + int err; + u32 retval = -1; + ++ if (kernel_is_locked_down("Asus WMI")) ++ return -EPERM; ++ + err = asus_wmi_set_devstate(asus->debug.dev_id, asus->debug.ctrl_param, + &retval); + +@@ -2042,6 +2048,9 @@ static int show_call(struct seq_file *m, + union acpi_object *obj; + acpi_status status; + ++ if (kernel_is_locked_down("Asus WMI")) ++ return -EPERM; ++ + status = wmi_evaluate_method(ASUS_WMI_MGMT_GUID, + 0, asus->debug.method_id, + &input, &output); diff --git a/debian/patches/features/all/lockdown/0015-ACPI-Limit-access-to-custom_method-when-the-kernel-i.patch b/debian/patches/features/all/lockdown/0015-ACPI-Limit-access-to-custom_method-when-the-kernel-i.patch new file mode 100644 index 000000000..17778da72 --- /dev/null +++ b/debian/patches/features/all/lockdown/0015-ACPI-Limit-access-to-custom_method-when-the-kernel-i.patch @@ -0,0 +1,32 @@ +From: Matthew Garrett <matthew.garrett@nebula.com> +Date: Wed, 8 Nov 2017 15:11:34 +0000 +Subject: [15/29] ACPI: Limit access to custom_method when the kernel is locked + down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=5ff99c830aacf02f25816a0da427216fb63ba16d + +custom_method effectively allows arbitrary access to system memory, making +it possible for an attacker to circumvent restrictions on module loading. +Disable it if the kernel is locked down. + +Signed-off-by: Matthew Garrett <matthew.garrett@nebula.com> +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +cc: linux-acpi@vger.kernel.org +--- + drivers/acpi/custom_method.c | 3 +++ + 1 file changed, 3 insertions(+) + +Index: linux/drivers/acpi/custom_method.c +=================================================================== +--- linux.orig/drivers/acpi/custom_method.c ++++ linux/drivers/acpi/custom_method.c +@@ -29,6 +29,9 @@ static ssize_t cm_write(struct file *fil + struct acpi_table_header table; + acpi_status status; + ++ if (kernel_is_locked_down("ACPI custom methods")) ++ return -EPERM; ++ + if (!(*ppos)) { + /* parse the table header to get the table length */ + if (count <= sizeof(struct acpi_table_header)) diff --git a/debian/patches/features/all/lockdown/0016-acpi-Ignore-acpi_rsdp-kernel-param-when-the-kernel-h.patch b/debian/patches/features/all/lockdown/0016-acpi-Ignore-acpi_rsdp-kernel-param-when-the-kernel-h.patch new file mode 100644 index 000000000..f8ee397c8 --- /dev/null +++ b/debian/patches/features/all/lockdown/0016-acpi-Ignore-acpi_rsdp-kernel-param-when-the-kernel-h.patch @@ -0,0 +1,32 @@ +From: Josh Boyer <jwboyer@redhat.com> +Date: Wed, 8 Nov 2017 15:11:34 +0000 +Subject: [16/29] acpi: Ignore acpi_rsdp kernel param when the kernel has been + locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=67890a0361626ec3e035264656896c77670c414b + +This option allows userspace to pass the RSDP address to the kernel, which +makes it possible for a user to modify the workings of hardware . Reject +the option when the kernel is locked down. + +Signed-off-by: Josh Boyer <jwboyer@redhat.com> +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +cc: Dave Young <dyoung@redhat.com> +cc: linux-acpi@vger.kernel.org +--- + drivers/acpi/osl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +Index: linux/drivers/acpi/osl.c +=================================================================== +--- linux.orig/drivers/acpi/osl.c ++++ linux/drivers/acpi/osl.c +@@ -194,7 +194,7 @@ acpi_physical_address __init acpi_os_get + acpi_physical_address pa; + + #ifdef CONFIG_KEXEC +- if (acpi_rsdp) ++ if (acpi_rsdp && !kernel_is_locked_down("ACPI RSDP specification")) + return acpi_rsdp; + #endif + pa = acpi_arch_get_root_pointer(); diff --git a/debian/patches/features/all/lockdown/0017-acpi-Disable-ACPI-table-override-if-the-kernel-is-lo.patch b/debian/patches/features/all/lockdown/0017-acpi-Disable-ACPI-table-override-if-the-kernel-is-lo.patch new file mode 100644 index 000000000..fd12eedb2 --- /dev/null +++ b/debian/patches/features/all/lockdown/0017-acpi-Disable-ACPI-table-override-if-the-kernel-is-lo.patch @@ -0,0 +1,40 @@ +From: Linn Crosetto <linn@hpe.com> +Date: Wed, 8 Nov 2017 15:11:34 +0000 +Subject: [17/29] acpi: Disable ACPI table override if the kernel is locked + down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=5976d26de05569951641ebeb95f7240993b66063 + +From the kernel documentation (initrd_table_override.txt): + + If the ACPI_INITRD_TABLE_OVERRIDE compile option is true, it is possible + to override nearly any ACPI table provided by the BIOS with an + instrumented, modified one. + +When securelevel is set, the kernel should disallow any unauthenticated +changes to kernel space. ACPI tables contain code invoked by the kernel, +so do not allow ACPI tables to be overridden if the kernel is locked down. + +Signed-off-by: Linn Crosetto <linn@hpe.com> +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +cc: linux-acpi@vger.kernel.org +--- + drivers/acpi/tables.c | 5 +++++ + 1 file changed, 5 insertions(+) + +Index: linux/drivers/acpi/tables.c +=================================================================== +--- linux.orig/drivers/acpi/tables.c ++++ linux/drivers/acpi/tables.c +@@ -532,6 +532,11 @@ void __init acpi_table_upgrade(void) + if (table_nr == 0) + return; + ++ if (kernel_is_locked_down("ACPI table override")) { ++ pr_notice("kernel is locked down, ignoring table override\n"); ++ return; ++ } ++ + acpi_tables_addr = + memblock_find_in_range(0, ACPI_TABLE_UPGRADE_MAX_PHYS, + all_tables_size, PAGE_SIZE); diff --git a/debian/patches/features/all/lockdown/0018-acpi-Disable-APEI-error-injection-if-the-kernel-is-l.patch b/debian/patches/features/all/lockdown/0018-acpi-Disable-APEI-error-injection-if-the-kernel-is-l.patch new file mode 100644 index 000000000..396a506ac --- /dev/null +++ b/debian/patches/features/all/lockdown/0018-acpi-Disable-APEI-error-injection-if-the-kernel-is-l.patch @@ -0,0 +1,43 @@ +From: Linn Crosetto <linn@hpe.com> +Date: Wed, 8 Nov 2017 15:11:35 +0000 +Subject: [18/29] acpi: Disable APEI error injection if the kernel is locked + down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=a9c239382bce17b9108f941130392151d5fff262 + +ACPI provides an error injection mechanism, EINJ, for debugging and testing +the ACPI Platform Error Interface (APEI) and other RAS features. If +supported by the firmware, ACPI specification 5.0 and later provide for a +way to specify a physical memory address to which to inject the error. + +Injecting errors through EINJ can produce errors which to the platform are +indistinguishable from real hardware errors. This can have undesirable +side-effects, such as causing the platform to mark hardware as needing +replacement. + +While it does not provide a method to load unauthenticated privileged code, +the effect of these errors may persist across reboots and affect trust in +the underlying hardware, so disable error injection through EINJ if +the kernel is locked down. + +Signed-off-by: Linn Crosetto <linn@hpe.com> +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com> +cc: linux-acpi@vger.kernel.org +--- + drivers/acpi/apei/einj.c | 3 +++ + 1 file changed, 3 insertions(+) + +Index: linux/drivers/acpi/apei/einj.c +=================================================================== +--- linux.orig/drivers/acpi/apei/einj.c ++++ linux/drivers/acpi/apei/einj.c +@@ -518,6 +518,9 @@ static int einj_error_inject(u32 type, u + int rc; + u64 base_addr, size; + ++ if (kernel_is_locked_down("ACPI error injection")) ++ return -EPERM; ++ + /* If user manually set "flags", make sure it is legal */ + if (flags && (flags & + ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF))) diff --git a/debian/patches/features/all/lockdown/0020-Prohibit-PCMCIA-CIS-storage-when-the-kernel-is-locke.patch b/debian/patches/features/all/lockdown/0020-Prohibit-PCMCIA-CIS-storage-when-the-kernel-is-locke.patch new file mode 100644 index 000000000..2ed56ad5b --- /dev/null +++ b/debian/patches/features/all/lockdown/0020-Prohibit-PCMCIA-CIS-storage-when-the-kernel-is-locke.patch @@ -0,0 +1,29 @@ +From: David Howells <dhowells@redhat.com> +Date: Wed, 8 Nov 2017 15:11:35 +0000 +Subject: [20/29] Prohibit PCMCIA CIS storage when the kernel is locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=18b2dfc74efeafbdbb8f5d4d28da6334b7e1f1ac + +Prohibit replacement of the PCMCIA Card Information Structure when the +kernel is locked down. + +Suggested-by: Dominik Brodowski <linux@dominikbrodowski.net> +Signed-off-by: David Howells <dhowells@redhat.com> +cc: linux-pcmcia@lists.infradead.org +--- + drivers/pcmcia/cistpl.c | 3 +++ + 1 file changed, 3 insertions(+) + +Index: linux/drivers/pcmcia/cistpl.c +=================================================================== +--- linux.orig/drivers/pcmcia/cistpl.c ++++ linux/drivers/pcmcia/cistpl.c +@@ -1578,6 +1578,9 @@ static ssize_t pccard_store_cis(struct f + struct pcmcia_socket *s; + int error; + ++ if (kernel_is_locked_down("Direct PCMCIA CIS storage")) ++ return -EPERM; ++ + s = to_socket(container_of(kobj, struct device, kobj)); + + if (off) diff --git a/debian/patches/features/all/lockdown/0021-Lock-down-TIOCSSERIAL.patch b/debian/patches/features/all/lockdown/0021-Lock-down-TIOCSSERIAL.patch new file mode 100644 index 000000000..d906326a9 --- /dev/null +++ b/debian/patches/features/all/lockdown/0021-Lock-down-TIOCSSERIAL.patch @@ -0,0 +1,34 @@ +From: David Howells <dhowells@redhat.com> +Date: Wed, 8 Nov 2017 15:11:35 +0000 +Subject: [21/29] Lock down TIOCSSERIAL +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=f5fdeda0622ca040961521819794193777a03e8a + +Lock down TIOCSSERIAL as that can be used to change the ioport and irq +settings on a serial port. This only appears to be an issue for the serial +drivers that use the core serial code. All other drivers seem to either +ignore attempts to change port/irq or give an error. + +Reported-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: David Howells <dhowells@redhat.com> +cc: Jiri Slaby <jslaby@suse.com> +--- + drivers/tty/serial/serial_core.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +Index: linux/drivers/tty/serial/serial_core.c +=================================================================== +--- linux.orig/drivers/tty/serial/serial_core.c ++++ linux/drivers/tty/serial/serial_core.c +@@ -850,6 +850,12 @@ static int uart_set_info(struct tty_stru + new_flags = (__force upf_t)new_info->flags; + old_custom_divisor = uport->custom_divisor; + ++ if ((change_port || change_irq) && ++ kernel_is_locked_down("Using TIOCSSERIAL to change device addresses, irqs and dma channels")) { ++ retval = -EPERM; ++ goto exit; ++ } ++ + if (!capable(CAP_SYS_ADMIN)) { + retval = -EPERM; + if (change_irq || change_port || diff --git a/debian/patches/features/all/lockdown/0022-Lock-down-module-params-that-specify-hardware-parame.patch b/debian/patches/features/all/lockdown/0022-Lock-down-module-params-that-specify-hardware-parame.patch new file mode 100644 index 000000000..3582e3106 --- /dev/null +++ b/debian/patches/features/all/lockdown/0022-Lock-down-module-params-that-specify-hardware-parame.patch @@ -0,0 +1,80 @@ +From: David Howells <dhowells@redhat.com> +Date: Wed, 8 Nov 2017 15:11:36 +0000 +Subject: [22/29] Lock down module params that specify hardware parameters (eg. + ioport) +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=d20a28efda02a7ce70b943c15246ea2f07e780f4 + +Provided an annotation for module parameters that specify hardware +parameters (such as io ports, iomem addresses, irqs, dma channels, fixed +dma buffers and other types). + +Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk> +Signed-off-by: David Howells <dhowells@redhat.com> +--- + kernel/params.c | 26 +++++++++++++++++++++----- + 1 file changed, 21 insertions(+), 5 deletions(-) + +Index: linux/kernel/params.c +=================================================================== +--- linux.orig/kernel/params.c ++++ linux/kernel/params.c +@@ -108,13 +108,19 @@ bool parameq(const char *a, const char * + return parameqn(a, b, strlen(a)+1); + } + +-static void param_check_unsafe(const struct kernel_param *kp) ++static bool param_check_unsafe(const struct kernel_param *kp, ++ const char *doing) + { + if (kp->flags & KERNEL_PARAM_FL_UNSAFE) { + pr_notice("Setting dangerous option %s - tainting kernel\n", + kp->name); + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + } ++ ++ if (kp->flags & KERNEL_PARAM_FL_HWPARAM && ++ kernel_is_locked_down("Command line-specified device addresses, irqs and dma channels")) ++ return false; ++ return true; + } + + static int parse_one(char *param, +@@ -144,8 +150,10 @@ static int parse_one(char *param, + pr_debug("handling %s with %p\n", param, + params[i].ops->set); + kernel_param_lock(params[i].mod); +- param_check_unsafe(¶ms[i]); +- err = params[i].ops->set(val, ¶ms[i]); ++ if (param_check_unsafe(¶ms[i], doing)) ++ err = params[i].ops->set(val, ¶ms[i]); ++ else ++ err = -EPERM; + kernel_param_unlock(params[i].mod); + return err; + } +@@ -553,6 +561,12 @@ static ssize_t param_attr_show(struct mo + return count; + } + ++#ifdef CONFIG_MODULES ++#define mod_name(mod) (mod)->name ++#else ++#define mod_name(mod) "unknown" ++#endif ++ + /* sysfs always hands a nul-terminated string in buf. We rely on that. */ + static ssize_t param_attr_store(struct module_attribute *mattr, + struct module_kobject *mk, +@@ -565,8 +579,10 @@ static ssize_t param_attr_store(struct m + return -EPERM; + + kernel_param_lock(mk->mod); +- param_check_unsafe(attribute->param); +- err = attribute->param->ops->set(buf, attribute->param); ++ if (param_check_unsafe(attribute->param, mod_name(mk->mod))) ++ err = attribute->param->ops->set(buf, attribute->param); ++ else ++ err = -EPERM; + kernel_param_unlock(mk->mod); + if (!err) + return len; diff --git a/debian/patches/features/all/lockdown/0023-x86-mmiotrace-Lock-down-the-testmmiotrace-module.patch b/debian/patches/features/all/lockdown/0023-x86-mmiotrace-Lock-down-the-testmmiotrace-module.patch new file mode 100644 index 000000000..47edb3442 --- /dev/null +++ b/debian/patches/features/all/lockdown/0023-x86-mmiotrace-Lock-down-the-testmmiotrace-module.patch @@ -0,0 +1,33 @@ +From: David Howells <dhowells@redhat.com> +Date: Wed, 8 Nov 2017 15:11:36 +0000 +Subject: [23/29] x86/mmiotrace: Lock down the testmmiotrace module +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=20af3be0bcf6a78e3632770561fba6531dd3b444 + +The testmmiotrace module shouldn't be permitted when the kernel is locked +down as it can be used to arbitrarily read and write MMIO space. + +Suggested-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Howells <dhowells@redhat.com +cc: Thomas Gleixner <tglx@linutronix.de> +cc: Steven Rostedt <rostedt@goodmis.org> +cc: Ingo Molnar <mingo@kernel.org> +cc: "H. Peter Anvin" <hpa@zytor.com> +cc: x86@kernel.org +--- + arch/x86/mm/testmmiotrace.c | 3 +++ + 1 file changed, 3 insertions(+) + +Index: linux/arch/x86/mm/testmmiotrace.c +=================================================================== +--- linux.orig/arch/x86/mm/testmmiotrace.c ++++ linux/arch/x86/mm/testmmiotrace.c +@@ -115,6 +115,9 @@ static int __init init(void) + { + unsigned long size = (read_far) ? (8 << 20) : (16 << 10); + ++ if (kernel_is_locked_down("MMIO trace testing")) ++ return -EPERM; ++ + if (mmio_address == 0) { + pr_err("you have to use the module argument mmio_address.\n"); + pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n"); diff --git a/debian/patches/features/all/lockdown/0024-debugfs-Disallow-use-of-debugfs-files-when-the-kerne.patch b/debian/patches/features/all/lockdown/0024-debugfs-Disallow-use-of-debugfs-files-when-the-kerne.patch new file mode 100644 index 000000000..2dd3fa020 --- /dev/null +++ b/debian/patches/features/all/lockdown/0024-debugfs-Disallow-use-of-debugfs-files-when-the-kerne.patch @@ -0,0 +1,53 @@ +From: David Howells <dhowells@redhat.com> +Date: Wed, 8 Nov 2017 15:11:36 +0000 +Subject: [24/29] debugfs: Disallow use of debugfs files when the kernel is + locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=118cc5e1c27e1a75640cf2379c1299e12791063e + +Disallow opening of debugfs files when the kernel is locked down as various +drivers give raw access to hardware through debugfs. + +Accesses to tracefs should use /sys/kernel/tracing/ rather than +/sys/kernel/debug/tracing/. Possibly a symlink should be emplaced. + +Normal device interaction should be done through configfs or a miscdev, not +debugfs. + +Note that this makes it unnecessary to specifically lock down show_dsts(), +show_devs() and show_call() in the asus-wmi driver. + +Signed-off-by: David Howells <dhowells@redhat.com> +cc: Andy Shevchenko <andy.shevchenko@gmail.com> +cc: acpi4asus-user@lists.sourceforge.net +cc: platform-driver-x86@vger.kernel.org +cc: Matthew Garrett <matthew.garrett@nebula.com> +cc: Thomas Gleixner <tglx@linutronix.de> +[bwh: Forward-ported to 4.15] +--- + fs/debugfs/file.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +Index: linux/fs/debugfs/file.c +=================================================================== +--- linux.orig/fs/debugfs/file.c ++++ linux/fs/debugfs/file.c +@@ -142,6 +142,9 @@ static int open_proxy_open(struct inode + const struct file_operations *real_fops = NULL; + int r; + ++ if (kernel_is_locked_down("debugfs")) ++ return -EPERM; ++ + r = debugfs_file_get(dentry); + if (r) + return r == -EIO ? -ENOENT : r; +@@ -267,6 +270,9 @@ static int full_proxy_open(struct inode + struct file_operations *proxy_fops = NULL; + int r; + ++ if (kernel_is_locked_down("debugfs")) ++ return -EPERM; ++ + r = debugfs_file_get(dentry); + if (r) + return r == -EIO ? -ENOENT : r; diff --git a/debian/patches/features/all/lockdown/0025-Lock-down-proc-kcore.patch b/debian/patches/features/all/lockdown/0025-Lock-down-proc-kcore.patch new file mode 100644 index 000000000..58df9739b --- /dev/null +++ b/debian/patches/features/all/lockdown/0025-Lock-down-proc-kcore.patch @@ -0,0 +1,27 @@ +From: David Howells <dhowells@redhat.com> +Date: Wed, 8 Nov 2017 15:11:37 +0000 +Subject: [25/29] Lock down /proc/kcore +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=797378dc4498207c3abc1101cfdc9ef2581d8c71 + +Disallow access to /proc/kcore when the kernel is locked down to prevent +access to cryptographic data. + +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: James Morris <james.l.morris@oracle.com> +--- + fs/proc/kcore.c | 2 ++ + 1 file changed, 2 insertions(+) + +Index: linux/fs/proc/kcore.c +=================================================================== +--- linux.orig/fs/proc/kcore.c ++++ linux/fs/proc/kcore.c +@@ -545,6 +545,8 @@ out: + + static int open_kcore(struct inode *inode, struct file *filp) + { ++ if (kernel_is_locked_down("/proc/kcore")) ++ return -EPERM; + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + diff --git a/debian/patches/features/all/lockdown/0026-Lock-down-kprobes.patch b/debian/patches/features/all/lockdown/0026-Lock-down-kprobes.patch new file mode 100644 index 000000000..e7d9f0b4e --- /dev/null +++ b/debian/patches/features/all/lockdown/0026-Lock-down-kprobes.patch @@ -0,0 +1,29 @@ +From: David Howells <dhowells@redhat.com> +Date: Wed, 8 Nov 2017 16:14:12 +0000 +Subject: [26/29] Lock down kprobes +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=cfacbbe6ef95336d99817fb8063c19bd36dfaa3d + +Disallow the creation of kprobes when the kernel is locked down by +preventing their registration. This prevents kprobes from being used to +access kernel memory, either to make modifications or to steal crypto data. + +Reported-by: Alexei Starovoitov <alexei.starovoitov@gmail.com> +Signed-off-by: David Howells <dhowells@redhat.com> +--- + kernel/kprobes.c | 3 +++ + 1 file changed, 3 insertions(+) + +Index: linux/kernel/kprobes.c +=================================================================== +--- linux.orig/kernel/kprobes.c ++++ linux/kernel/kprobes.c +@@ -1548,6 +1548,9 @@ int register_kprobe(struct kprobe *p) + struct module *probed_mod; + kprobe_opcode_t *addr; + ++ if (kernel_is_locked_down("Use of kprobes")) ++ return -EPERM; ++ + /* Adjust probe address from symbol */ + addr = kprobe_addr(p); + if (IS_ERR(addr)) diff --git a/debian/patches/features/all/lockdown/0027-bpf-Restrict-kernel-image-access-functions-when-the-.patch b/debian/patches/features/all/lockdown/0027-bpf-Restrict-kernel-image-access-functions-when-the-.patch new file mode 100644 index 000000000..87273834c --- /dev/null +++ b/debian/patches/features/all/lockdown/0027-bpf-Restrict-kernel-image-access-functions-when-the-.patch @@ -0,0 +1,39 @@ +From: David Howells <dhowells@redhat.com> +Date: Wed, 24 May 2017 14:56:05 +0100 +Subject: [27/29] bpf: Restrict kernel image access functions when the kernel + is locked down +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=a13e9f58894129d9fd02fdb81b56ac7590704155 + +There are some bpf functions can be used to read kernel memory: +bpf_probe_read, bpf_probe_write_user and bpf_trace_printk. These allow +private keys in kernel memory (e.g. the hibernation image signing key) to +be read by an eBPF program and kernel memory to be altered without +restriction. + +Completely prohibit the use of BPF when the kernel is locked down. + +Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com> +Signed-off-by: David Howells <dhowells@redhat.com> +cc: netdev@vger.kernel.org +cc: Chun-Yi Lee <jlee@suse.com> +cc: Alexei Starovoitov <alexei.starovoitov@gmail.com> +[bwh: Adjust context to apply after commit dcab51f19b29 + "bpf: Expose check_uarg_tail_zero()"] +--- + kernel/bpf/syscall.c | 3 +++ + 1 file changed, 3 insertions(+) + +Index: linux/kernel/bpf/syscall.c +=================================================================== +--- linux.orig/kernel/bpf/syscall.c ++++ linux/kernel/bpf/syscall.c +@@ -2378,6 +2378,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf + if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN)) + return -EPERM; + ++ if (kernel_is_locked_down("BPF")) ++ return -EPERM; ++ + err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); + if (err) + return err; diff --git a/debian/patches/features/all/lockdown/0028-efi-Add-an-EFI_SECURE_BOOT-flag-to-indicate-secure-b.patch b/debian/patches/features/all/lockdown/0028-efi-Add-an-EFI_SECURE_BOOT-flag-to-indicate-secure-b.patch new file mode 100644 index 000000000..be357055b --- /dev/null +++ b/debian/patches/features/all/lockdown/0028-efi-Add-an-EFI_SECURE_BOOT-flag-to-indicate-secure-b.patch @@ -0,0 +1,152 @@ +From: David Howells <dhowells@redhat.com> +Date: Wed, 8 Nov 2017 15:11:37 +0000 +Subject: [28/29] efi: Add an EFI_SECURE_BOOT flag to indicate secure boot mode +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=eb4a8603eb727afaeb9c6123eda2eda4b2757bf3 + +UEFI machines can be booted in Secure Boot mode. Add an EFI_SECURE_BOOT +flag that can be passed to efi_enabled() to find out whether secure boot is +enabled. + +Move the switch-statement in x86's setup_arch() that inteprets the +secure_boot boot parameter to generic code and set the bit there. + +Suggested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> +cc: linux-efi@vger.kernel.org +--- + arch/x86/kernel/setup.c | 14 +------------- + drivers/firmware/efi/Makefile | 1 + + drivers/firmware/efi/secureboot.c | 38 ++++++++++++++++++++++++++++++++++++++ + include/linux/efi.h | 16 ++++++++++------ + 4 files changed, 50 insertions(+), 19 deletions(-) + create mode 100644 drivers/firmware/efi/secureboot.c + +Index: linux/arch/x86/kernel/setup.c +=================================================================== +--- linux.orig/arch/x86/kernel/setup.c ++++ linux/arch/x86/kernel/setup.c +@@ -1159,19 +1159,7 @@ void __init setup_arch(char **cmdline_p) + /* Allocate bigger log buffer */ + setup_log_buf(1); + +- if (efi_enabled(EFI_BOOT)) { +- switch (boot_params.secure_boot) { +- case efi_secureboot_mode_disabled: +- pr_info("Secure boot disabled\n"); +- break; +- case efi_secureboot_mode_enabled: +- pr_info("Secure boot enabled\n"); +- break; +- default: +- pr_info("Secure boot could not be determined\n"); +- break; +- } +- } ++ efi_set_secure_boot(boot_params.secure_boot); + + reserve_initrd(); + +Index: linux/drivers/firmware/efi/Makefile +=================================================================== +--- linux.orig/drivers/firmware/efi/Makefile ++++ linux/drivers/firmware/efi/Makefile +@@ -24,6 +24,7 @@ obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_m + obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o + obj-$(CONFIG_EFI_TEST) += test/ + obj-$(CONFIG_EFI_DEV_PATH_PARSER) += dev-path-parser.o ++obj-$(CONFIG_EFI) += secureboot.o + obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o + + arm-obj-$(CONFIG_EFI) := arm-init.o arm-runtime.o +Index: linux/drivers/firmware/efi/secureboot.c +=================================================================== +--- /dev/null ++++ linux/drivers/firmware/efi/secureboot.c +@@ -0,0 +1,38 @@ ++/* Core kernel secure boot support. ++ * ++ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. ++ * Written by David Howells (dhowells@redhat.com) ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public Licence ++ * as published by the Free Software Foundation; either version ++ * 2 of the Licence, or (at your option) any later version. ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include <linux/efi.h> ++#include <linux/kernel.h> ++#include <linux/printk.h> ++ ++/* ++ * Decide what to do when UEFI secure boot mode is enabled. ++ */ ++void __init efi_set_secure_boot(enum efi_secureboot_mode mode) ++{ ++ if (efi_enabled(EFI_BOOT)) { ++ switch (mode) { ++ case efi_secureboot_mode_disabled: ++ pr_info("Secure boot disabled\n"); ++ break; ++ case efi_secureboot_mode_enabled: ++ set_bit(EFI_SECURE_BOOT, &efi.flags); ++ pr_info("Secure boot enabled\n"); ++ break; ++ default: ++ pr_warning("Secure boot could not be determined (mode %u)\n", ++ mode); ++ break; ++ } ++ } ++} +Index: linux/include/linux/efi.h +=================================================================== +--- linux.orig/include/linux/efi.h ++++ linux/include/linux/efi.h +@@ -1152,6 +1152,14 @@ extern int __init efi_setup_pcdp_console + #define EFI_DBG 8 /* Print additional debug info at runtime */ + #define EFI_NX_PE_DATA 9 /* Can runtime data regions be mapped non-executable? */ + #define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ ++#define EFI_SECURE_BOOT 11 /* Are we in Secure Boot mode? */ ++ ++enum efi_secureboot_mode { ++ efi_secureboot_mode_unset, ++ efi_secureboot_mode_unknown, ++ efi_secureboot_mode_disabled, ++ efi_secureboot_mode_enabled, ++}; + + #ifdef CONFIG_EFI + /* +@@ -1164,6 +1172,7 @@ static inline bool efi_enabled(int featu + extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); + + extern bool efi_is_table_address(unsigned long phys_addr); ++extern void __init efi_set_secure_boot(enum efi_secureboot_mode mode); + #else + static inline bool efi_enabled(int feature) + { +@@ -1182,6 +1191,7 @@ static inline bool efi_is_table_address( + { + return false; + } ++static inline void efi_set_secure_boot(enum efi_secureboot_mode mode) {} + #endif + + extern int efi_status_to_err(efi_status_t status); +@@ -1572,12 +1582,6 @@ static inline bool efi_runtime_disabled( + + extern void efi_call_virt_check_flags(unsigned long flags, const char *call); + +-enum efi_secureboot_mode { +- efi_secureboot_mode_unset, +- efi_secureboot_mode_unknown, +- efi_secureboot_mode_disabled, +- efi_secureboot_mode_enabled, +-}; + enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table); + + #ifdef CONFIG_RESET_ATTACK_MITIGATION diff --git a/debian/patches/features/all/lockdown/0029-efi-Lock-down-the-kernel-if-booted-in-secure-boot-mo.patch b/debian/patches/features/all/lockdown/0029-efi-Lock-down-the-kernel-if-booted-in-secure-boot-mo.patch new file mode 100644 index 000000000..9ab10afb3 --- /dev/null +++ b/debian/patches/features/all/lockdown/0029-efi-Lock-down-the-kernel-if-booted-in-secure-boot-mo.patch @@ -0,0 +1,83 @@ +From: David Howells <dhowells@redhat.com> +Date: Wed, 8 Nov 2017 15:11:37 +0000 +Subject: [29/29] efi: Lock down the kernel if booted in secure boot mode +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=a364bd945ffc141a7b17cb331bda0d8ad68f7e72 + +UEFI Secure Boot provides a mechanism for ensuring that the firmware will +only load signed bootloaders and kernels. Certain use cases may also +require that all kernel modules also be signed. Add a configuration option +that to lock down the kernel - which includes requiring validly signed +modules - if the kernel is secure-booted. + +Signed-off-by: David Howells <dhowells@redhat.com> +Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> +cc: linux-efi@vger.kernel.org +--- + arch/x86/kernel/setup.c | 6 ++++-- + security/Kconfig | 14 ++++++++++++++ + security/lock_down.c | 1 + + 3 files changed, 19 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/setup.c ++++ b/arch/x86/kernel/setup.c +@@ -65,6 +65,7 @@ + #include <linux/dma-mapping.h> + #include <linux/ctype.h> + #include <linux/uaccess.h> ++#include <linux/security.h> + + #include <linux/percpu.h> + #include <linux/crash_dump.h> +@@ -1005,6 +1006,9 @@ void __init setup_arch(char **cmdline_p) + if (efi_enabled(EFI_BOOT)) + efi_init(); + ++ efi_set_secure_boot(boot_params.secure_boot); ++ init_lockdown(); ++ + dmi_scan_machine(); + dmi_memdev_walk(); + dmi_set_dump_stack_arch_desc(); +@@ -1159,8 +1163,6 @@ void __init setup_arch(char **cmdline_p) + /* Allocate bigger log buffer */ + setup_log_buf(1); + +- efi_set_secure_boot(boot_params.secure_boot); +- + reserve_initrd(); + + acpi_table_upgrade(); +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -247,6 +247,21 @@ config LOCK_DOWN_KERNEL + turns off various features that might otherwise allow access to the + kernel image (eg. setting MSR registers). + ++config LOCK_DOWN_IN_EFI_SECURE_BOOT ++ bool "Lock down the kernel in EFI Secure Boot mode" ++ default n ++ select LOCK_DOWN_KERNEL ++ depends on EFI ++ help ++ UEFI Secure Boot provides a mechanism for ensuring that the firmware ++ will only load signed bootloaders and kernels. Secure boot mode may ++ be determined from EFI variables provided by the system firmware if ++ not indicated by the boot parameters. ++ ++ Enabling this option turns on results in kernel lockdown being ++ triggered if EFI Secure Boot is set. ++ ++ + source security/selinux/Kconfig + source security/smack/Kconfig + source security/tomoyo/Kconfig +--- a/security/lock_down.c ++++ b/security/lock_down.c +@@ -11,6 +11,7 @@ + + #include <linux/security.h> + #include <linux/export.h> ++#include <linux/efi.h> + + static __ro_after_init bool kernel_locked_down; + diff --git a/debian/patches/features/all/lockdown/0032-efi-Restrict-efivar_ssdt_load-when-the-kernel-is-loc.patch b/debian/patches/features/all/lockdown/0032-efi-Restrict-efivar_ssdt_load-when-the-kernel-is-loc.patch new file mode 100644 index 000000000..bb2f4f60b --- /dev/null +++ b/debian/patches/features/all/lockdown/0032-efi-Restrict-efivar_ssdt_load-when-the-kernel-is-loc.patch @@ -0,0 +1,36 @@ +From: Matthew Garrett <matthewgarrett@google.com> +Date: Wed, 31 Jul 2019 15:16:16 -0700 +Subject: efi: Restrict efivar_ssdt_load when the kernel is locked down +Origin: https://patchwork.kernel.org/patch/11069659/ + +efivar_ssdt_load allows the kernel to import arbitrary ACPI code from an +EFI variable, which gives arbitrary code execution in ring 0. Prevent +that when the kernel is locked down. + +Signed-off-by: Matthew Garrett <mjg59@google.com> +Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> +Reviewed-by: Kees Cook <keescook@chromium.org> +Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org> +Cc: linux-efi@vger.kernel.org +[bwh: Convert back to the non-LSM lockdown API] +--- +--- a/drivers/firmware/efi/efi.c ++++ b/drivers/firmware/efi/efi.c +@@ -30,6 +30,7 @@ + #include <linux/acpi.h> + #include <linux/ucs2_string.h> + #include <linux/memblock.h> ++#include <linux/security.h> + + #include <asm/early_ioremap.h> + +@@ -241,6 +242,9 @@ static void generic_ops_unregister(void) + static char efivar_ssdt[EFIVAR_SSDT_NAME_MAX] __initdata; + static int __init efivar_ssdt_setup(char *str) + { ++ if (kernel_is_locked_down("ACPI tables")) ++ return -EPERM; ++ + if (strlen(str) < sizeof(efivar_ssdt)) + memcpy(efivar_ssdt, str, strlen(str)); + else diff --git a/debian/patches/features/all/lockdown/ACPI-configfs-Disallow-loading-ACPI-tables-when-lock.patch b/debian/patches/features/all/lockdown/ACPI-configfs-Disallow-loading-ACPI-tables-when-lock.patch new file mode 100644 index 000000000..4970a4bd4 --- /dev/null +++ b/debian/patches/features/all/lockdown/ACPI-configfs-Disallow-loading-ACPI-tables-when-lock.patch @@ -0,0 +1,44 @@ +From: "Jason A. Donenfeld" <Jason@zx2c4.com> +Date: Mon, 15 Jun 2020 04:43:32 -0600 +Subject: ACPI: configfs: Disallow loading ACPI tables when locked down +Origin: https://git.kernel.org/linus/75b0cea7bf307f362057cc778efe89af4c615354 +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2020-15780 + +Like other vectors already patched, this one here allows the root +user to load ACPI tables, which enables arbitrary physical address +writes, which in turn makes it possible to disable lockdown. + +Prevents this by checking the lockdown status before allowing a new +ACPI table to be installed. The link in the trailer shows a PoC of +how this might be used. + +Link: https://git.zx2c4.com/american-unsigned-language/tree/american-unsigned-language-2.sh +Cc: 5.4+ <stable@vger.kernel.org> # 5.4+ +Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> +Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> +[Salvatore Bonaccorso: Backport to v4.19.y: Use kernel_is_locked_down instead +of security_locked_down] +--- + drivers/acpi/acpi_configfs.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/acpi/acpi_configfs.c ++++ b/drivers/acpi/acpi_configfs.c +@@ -14,6 +14,7 @@ + #include <linux/module.h> + #include <linux/configfs.h> + #include <linux/acpi.h> ++#include <linux/security.h> + + #include "acpica/accommon.h" + #include "acpica/actables.h" +@@ -33,6 +34,9 @@ static ssize_t acpi_table_aml_write(stru + struct acpi_table *table; + int ret; + ++ if (kernel_is_locked_down("Modifying ACPI tables")) ++ return -EPERM; ++ + table = container_of(cfg, struct acpi_table, cfg); + + if (table->header) { diff --git a/debian/patches/features/all/lockdown/arm64-add-kernel-config-option-to-lock-down-when.patch b/debian/patches/features/all/lockdown/arm64-add-kernel-config-option-to-lock-down-when.patch new file mode 100644 index 000000000..beb09c3e6 --- /dev/null +++ b/debian/patches/features/all/lockdown/arm64-add-kernel-config-option-to-lock-down-when.patch @@ -0,0 +1,97 @@ +From: Linn Crosetto <linn@hpe.com> +Date: Tue, 30 Aug 2016 11:54:38 -0600 +Subject: arm64: add kernel config option to lock down when in Secure Boot mode +Bug-Debian: https://bugs.debian.org/831827 +Forwarded: no + +Add a kernel configuration option to lock down the kernel, to restrict +userspace's ability to modify the running kernel when UEFI Secure Boot is +enabled. Based on the x86 patch by Matthew Garrett. + +Determine the state of Secure Boot in the EFI stub and pass this to the +kernel using the FDT. + +Signed-off-by: Linn Crosetto <linn@hpe.com> +[bwh: Forward-ported to 4.10: adjust context] +[Lukas Wunner: Forward-ported to 4.11: drop parts applied upstream] +[bwh: Forward-ported to 4.15 and lockdown patch set: + - Pass result of efi_get_secureboot() in stub through to + efi_set_secure_boot() in main kernel + - Use lockdown API and naming] +[bwh: Forward-ported to 4.19.3: adjust context in update_fdt()] +[dannf: Moved init_lockdown() call after uefi_init(), fixing SB detection] +--- + arch/arm64/Kconfig | 13 +++++++++++++ + drivers/firmware/efi/arm-init.c | 7 +++++++ + drivers/firmware/efi/efi.c | 3 ++- + drivers/firmware/efi/libstub/arm-stub.c | 2 +- + drivers/firmware/efi/libstub/efistub.h | 1 + + drivers/firmware/efi/libstub/fdt.c | 7 +++++++ + include/linux/efi.h | 1 + + 7 files changed, 32 insertions(+), 2 deletions(-) + +Index: linux/drivers/firmware/efi/arm-init.c +=================================================================== +--- linux.orig/drivers/firmware/efi/arm-init.c ++++ linux/drivers/firmware/efi/arm-init.c +@@ -21,6 +21,7 @@ + #include <linux/of_fdt.h> + #include <linux/platform_device.h> + #include <linux/screen_info.h> ++#include <linux/security.h> + + #include <asm/efi.h> + +@@ -257,6 +258,9 @@ void __init efi_init(void) + return; + } + ++ efi_set_secure_boot(params.secure_boot); ++ init_lockdown(); ++ + reserve_regions(); + efi_esrt_init(); + +Index: linux/drivers/firmware/efi/efi.c +=================================================================== +--- linux.orig/drivers/firmware/efi/efi.c ++++ linux/drivers/firmware/efi/efi.c +@@ -660,7 +660,8 @@ static __initdata struct params fdt_para + UEFI_PARAM("MemMap Address", "linux,uefi-mmap-start", mmap), + UEFI_PARAM("MemMap Size", "linux,uefi-mmap-size", mmap_size), + UEFI_PARAM("MemMap Desc. Size", "linux,uefi-mmap-desc-size", desc_size), +- UEFI_PARAM("MemMap Desc. Version", "linux,uefi-mmap-desc-ver", desc_ver) ++ UEFI_PARAM("MemMap Desc. Version", "linux,uefi-mmap-desc-ver", desc_ver), ++ UEFI_PARAM("Secure Boot Enabled", "linux,uefi-secure-boot", secure_boot) + }; + + static __initdata struct params xen_fdt_params[] = { +Index: linux/drivers/firmware/efi/libstub/fdt.c +=================================================================== +--- linux.orig/drivers/firmware/efi/libstub/fdt.c ++++ linux/drivers/firmware/efi/libstub/fdt.c +@@ -159,6 +159,12 @@ static efi_status_t update_fdt(efi_syste + } + } + ++ fdt_val32 = cpu_to_fdt32(efi_get_secureboot(sys_table)); ++ status = fdt_setprop(fdt, node, "linux,uefi-secure-boot", ++ &fdt_val32, sizeof(fdt_val32)); ++ if (status) ++ goto fdt_set_fail; ++ + /* shrink the FDT back to its minimum size */ + fdt_pack(fdt); + +Index: linux/include/linux/efi.h +=================================================================== +--- linux.orig/include/linux/efi.h ++++ linux/include/linux/efi.h +@@ -786,6 +786,7 @@ struct efi_fdt_params { + u32 mmap_size; + u32 desc_size; + u32 desc_ver; ++ u32 secure_boot; + }; + + typedef struct { diff --git a/debian/patches/features/all/lockdown/enable-cold-boot-attack-mitigation.patch b/debian/patches/features/all/lockdown/enable-cold-boot-attack-mitigation.patch new file mode 100644 index 000000000..7a3b8e9bb --- /dev/null +++ b/debian/patches/features/all/lockdown/enable-cold-boot-attack-mitigation.patch @@ -0,0 +1,50 @@ +From: Matthew Garrett <mjg59@coreos.com> +Date: Tue, 12 Jan 2016 12:51:27 -0800 +Subject: [18/18] Enable cold boot attack mitigation +Origin: https://github.com/mjg59/linux/commit/02d999574936dd234a508c0112a0200c135a5c34 + +[Lukas Wunner: Forward-ported to 4.11: adjust context] +--- + arch/x86/boot/compressed/eboot.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +Index: linux/arch/x86/boot/compressed/eboot.c +=================================================================== +--- linux.orig/arch/x86/boot/compressed/eboot.c ++++ linux/arch/x86/boot/compressed/eboot.c +@@ -372,6 +372,22 @@ void setup_graphics(struct boot_params * + } + } + ++#define MEMORY_ONLY_RESET_CONTROL_GUID \ ++ EFI_GUID (0xe20939be, 0x32d4, 0x41be, 0xa1, 0x50, 0x89, 0x7f, 0x85, 0xd4, 0x98, 0x29) ++ ++static void enable_reset_attack_mitigation(void) ++{ ++ u8 val = 1; ++ efi_guid_t var_guid = MEMORY_ONLY_RESET_CONTROL_GUID; ++ ++ /* Ignore the return value here - there's not really a lot we can do */ ++ efi_early->call((unsigned long)sys_table->runtime->set_variable, ++ L"MemoryOverwriteRequestControl", &var_guid, ++ EFI_VARIABLE_NON_VOLATILE | ++ EFI_VARIABLE_BOOTSERVICE_ACCESS | ++ EFI_VARIABLE_RUNTIME_ACCESS, sizeof(val), val); ++} ++ + /* + * Because the x86 boot code expects to be passed a boot_params we + * need to create one ourselves (usually the bootloader would create +@@ -783,6 +799,12 @@ efi_main(struct efi_config *c, struct bo + efi_parse_options((char *)cmdline_paddr); + + /* ++ * Ask the firmware to clear memory if we don't have a clean ++ * shutdown ++ */ ++ enable_reset_attack_mitigation(); ++ ++ /* + * If the boot loader gave us a value for secure_boot then we use that, + * otherwise we ask the BIOS. + */ diff --git a/debian/patches/features/all/lockdown/lockdown-refer-to-debian-wiki-until-manual-page-exists.patch b/debian/patches/features/all/lockdown/lockdown-refer-to-debian-wiki-until-manual-page-exists.patch new file mode 100644 index 000000000..586be8cab --- /dev/null +++ b/debian/patches/features/all/lockdown/lockdown-refer-to-debian-wiki-until-manual-page-exists.patch @@ -0,0 +1,34 @@ +From: Ben Hutchings <ben@decadent.org.uk> +Date: Sun, 21 Apr 2019 00:17:13 +0100 +Subject: lockdown: Refer to Debian wiki until manual page exists +Forwarded: not-needed + +The lockdown denial log message currently refers to a +"kernel_lockdown.7" manual page, which is supposed to document it. +That manual page hasn't been accepted by the man-pages project and +doesn't even seem to have been submitted yet. For now, refer to the +Debian wiki. + +--- +Index: linux/security/lock_down.c +=================================================================== +--- linux.orig/security/lock_down.c ++++ linux/security/lock_down.c +@@ -28,7 +28,7 @@ static void __init lock_kernel_down(cons + { + if (!kernel_locked_down) { + kernel_locked_down = true; +- pr_notice("Kernel is locked down from %s; see man kernel_lockdown.7\n", ++ pr_notice("Kernel is locked down from %s; see https://wiki.debian.org/SecureBoot\n", + where); + } + } +@@ -60,7 +60,7 @@ void __init init_lockdown(void) + bool __kernel_is_locked_down(const char *what, bool first) + { + if (what && first && kernel_locked_down) +- pr_notice("Lockdown: %s is restricted; see man kernel_lockdown.7\n", ++ pr_notice("Lockdown: %s is restricted; see https://wiki.debian.org/SecureBoot\n", + what); + return kernel_locked_down; + } diff --git a/debian/patches/features/all/lockdown/mtd-disable-slram-and-phram-when-locked-down.patch b/debian/patches/features/all/lockdown/mtd-disable-slram-and-phram-when-locked-down.patch new file mode 100644 index 000000000..f02392f10 --- /dev/null +++ b/debian/patches/features/all/lockdown/mtd-disable-slram-and-phram-when-locked-down.patch @@ -0,0 +1,41 @@ +From: Ben Hutchings <ben@decadent.org.uk> +Date: Fri, 03 Jun 2016 00:48:39 +0100 +Subject: mtd: Disable slram and phram when locked down +Forwarded: no + +The slram and phram drivers both allow mapping regions of physical +address space such that they can then be read and written by userland +through the MTD interface. This is probably usable to manipulate +hardware into overwriting kernel code on many systems. Prevent that +if locked down. + +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- +Index: linux/drivers/mtd/devices/phram.c +=================================================================== +--- linux.orig/drivers/mtd/devices/phram.c ++++ linux/drivers/mtd/devices/phram.c +@@ -219,6 +219,9 @@ static int phram_setup(const char *val) + uint64_t len; + int i, ret; + ++ if (kernel_is_locked_down("Command line-specified device addresses")) ++ return -EPERM; ++ + if (strnlen(val, sizeof(buf)) >= sizeof(buf)) + parse_err("parameter too long\n"); + +Index: linux/drivers/mtd/devices/slram.c +=================================================================== +--- linux.orig/drivers/mtd/devices/slram.c ++++ linux/drivers/mtd/devices/slram.c +@@ -226,6 +226,9 @@ static int parse_cmdline(char *devname, + unsigned long devstart; + unsigned long devlength; + ++ if (kernel_is_locked_down("Command line-specified device addresses")) ++ return -EPERM; ++ + if ((!devname) || (!szstart) || (!szlength)) { + unregister_devices(); + return(-EINVAL); diff --git a/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch b/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch new file mode 100644 index 000000000..22cd1283a --- /dev/null +++ b/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch @@ -0,0 +1,81 @@ +From: Ben Hutchings <ben@decadent.org.uk> +Date: Mon, 11 Jan 2016 15:23:55 +0000 +Subject: security,perf: Allow further restriction of perf_event_open +Forwarded: https://lkml.org/lkml/2016/1/11/587 + +When kernel.perf_event_open is set to 3 (or greater), disallow all +access to performance events by users without CAP_SYS_ADMIN. +Add a Kconfig symbol CONFIG_SECURITY_PERF_EVENTS_RESTRICT that +makes this value the default. + +This is based on a similar feature in grsecurity +(CONFIG_GRKERNSEC_PERF_HARDEN). This version doesn't include making +the variable read-only. It also allows enabling further restriction +at run-time regardless of whether the default is changed. + +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- +Index: linux/include/linux/perf_event.h +=================================================================== +--- linux.orig/include/linux/perf_event.h ++++ linux/include/linux/perf_event.h +@@ -1189,6 +1189,11 @@ extern int perf_cpu_time_max_percent_han + int perf_event_max_stack_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + ++static inline bool perf_paranoid_any(void) ++{ ++ return sysctl_perf_event_paranoid > 2; ++} ++ + static inline bool perf_paranoid_tracepoint_raw(void) + { + return sysctl_perf_event_paranoid > -1; +Index: linux/kernel/events/core.c +=================================================================== +--- linux.orig/kernel/events/core.c ++++ linux/kernel/events/core.c +@@ -397,8 +397,13 @@ static cpumask_var_t perf_online_mask; + * 0 - disallow raw tracepoint access for unpriv + * 1 - disallow cpu events for unpriv + * 2 - disallow kernel profiling for unpriv ++ * 3 - disallow all unpriv perf event use + */ ++#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT ++int sysctl_perf_event_paranoid __read_mostly = 3; ++#else + int sysctl_perf_event_paranoid __read_mostly = 2; ++#endif + + /* Minimum for 512 kiB + 1 user control page */ + int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ +@@ -10485,6 +10490,9 @@ SYSCALL_DEFINE5(perf_event_open, + if (flags & ~PERF_FLAG_ALL) + return -EINVAL; + ++ if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN)) ++ return -EACCES; ++ + err = perf_copy_attr(attr_uptr, &attr); + if (err) + return err; +Index: linux/security/Kconfig +=================================================================== +--- linux.orig/security/Kconfig ++++ linux/security/Kconfig +@@ -18,6 +18,15 @@ config SECURITY_DMESG_RESTRICT + + If you are unsure how to answer this question, answer N. + ++config SECURITY_PERF_EVENTS_RESTRICT ++ bool "Restrict unprivileged use of performance events" ++ depends on PERF_EVENTS ++ help ++ If you say Y here, the kernel.perf_event_paranoid sysctl ++ will be set to 3 by default, and no unprivileged use of the ++ perf_event_open syscall will be permitted unless it is ++ changed. ++ + config SECURITY + bool "Enable different security models" + depends on SYSFS |