diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst
index 3d97b8d8f7354..4b30daee399af 100644
--- a/Documentation/filesystems/nfs/exporting.rst
+++ b/Documentation/filesystems/nfs/exporting.rst
@@ -215,3 +215,29 @@ following flags are defined:
     This flag causes nfsd to close any open files for this inode _before_
     calling into the vfs to do an unlink or a rename that would replace
     an existing file.
+
+  EXPORT_OP_REMOTE_FS - Backing storage for this filesystem is remote
+    PF_LOCAL_THROTTLE exists for loopback NFSD, where a thread needs to
+    write to one bdi (the final bdi) in order to free up writes queued
+    to another bdi (the client bdi). Such threads get a private balance
+    of dirty pages so that dirty pages for the client bdi do not imact
+    the daemon writing to the final bdi. For filesystems whose durable
+    storage is not local (such as exported NFS filesystems), this
+    constraint has negative consequences. EXPORT_OP_REMOTE_FS enables
+    an export to disable writeback throttling.
+
+  EXPORT_OP_NOATOMIC_ATTR - Filesystem does not update attributes atomically
+    EXPORT_OP_NOATOMIC_ATTR indicates that the exported filesystem
+    cannot provide the semantics required by the "atomic" boolean in
+    NFSv4's change_info4. This boolean indicates to a client whether the
+    returned before and after change attributes were obtained atomically
+    with the respect to the requested metadata operation (UNLINK,
+    OPEN/CREATE, MKDIR, etc).
+
+  EXPORT_OP_FLUSH_ON_CLOSE - Filesystem flushes file data on close(2)
+    On most filesystems, inodes can remain under writeback after the
+    file is closed. NFSD relies on client activity or local flusher
+    threads to handle writeback. Certain filesystems, such as NFS, flush
+    all of an inode's dirty data on last close. Exports that behave this
+    way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip
+    waiting for writeback when closing such files.
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index 309b747834684..70480dd9e96db 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -350,19 +350,6 @@ static int locomo_resume(struct platform_device *dev)
 }
 #endif
 
-
-/**
- *	locomo_probe - probe for a single LoCoMo chip.
- *	@phys_addr: physical address of device.
- *
- *	Probe for a LoCoMo chip.  This must be called
- *	before any other locomo-specific code.
- *
- *	Returns:
- *	%-ENODEV	device not found.
- *	%-EBUSY		physical address already marked in-use.
- *	%0		successful.
- */
 static int
 __locomo_probe(struct device *me, struct resource *mem, int irq)
 {
@@ -479,6 +466,21 @@ static void __locomo_remove(struct locomo *lchip)
 	kfree(lchip);
 }
 
+/**
+ *	locomo_probe - probe for a single LoCoMo chip.
+ *	@dev: platform device
+ *
+ *	Probe for a LoCoMo chip.  This must be called
+ *	before any other locomo-specific code.
+ *
+ *	Returns:
+ *	* %-EINVAL	- device's IORESOURCE_MEM not found
+ *	* %-ENXIO	- could not allocate an IRQ for the device
+ *	* %-ENODEV	- device not found.
+ *	* %-EBUSY	- physical address already marked in-use.
+ *	* %-ENOMEM	- could not allocate or iomap memory.
+ *	* %0		- successful.
+ */
 static int locomo_probe(struct platform_device *dev)
 {
 	struct resource *mem;
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 7f092cb55a417..943ffcf069d29 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -40,6 +40,7 @@ struct task_struct;
 DECLARE_PER_CPU(struct task_struct *, __entry_task);
 
 #include <asm/types.h>
+#include <asm/traps.h>
 
 struct cpu_context_save {
 	__u32	r4;
@@ -66,7 +67,6 @@ struct thread_info {
 	__u32			cpu_domain;	/* cpu domain */
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			abi_syscall;	/* ABI type and syscall nr */
-	__u8			used_cp[16];	/* thread used copro */
 	unsigned long		tp_value[2];	/* TLS registers */
 	union fp_state		fpstate __attribute__((aligned(8)));
 	union vfp_state		vfpstate;
@@ -105,6 +105,21 @@ extern void iwmmxt_task_restore(struct thread_info *, void *);
 extern void iwmmxt_task_release(struct thread_info *);
 extern void iwmmxt_task_switch(struct thread_info *);
 
+extern int iwmmxt_undef_handler(struct pt_regs *, u32);
+
+static inline void register_iwmmxt_undef_handler(void)
+{
+	static struct undef_hook iwmmxt_undef_hook = {
+		.instr_mask	= 0x0c000e00,
+		.instr_val	= 0x0c000000,
+		.cpsr_mask	= MODE_MASK | PSR_T_BIT,
+		.cpsr_val	= USR_MODE,
+		.fn		= iwmmxt_undef_handler,
+	};
+
+	register_undef_hook(&iwmmxt_undef_hook);
+}
+
 extern void vfp_sync_hwstate(struct thread_info *);
 extern void vfp_flush_hwstate(struct thread_info *);
 
diff --git a/arch/arm/include/asm/vfp.h b/arch/arm/include/asm/vfp.h
index 5b57b8768bacc..157ea34261586 100644
--- a/arch/arm/include/asm/vfp.h
+++ b/arch/arm/include/asm/vfp.h
@@ -102,7 +102,6 @@
 
 #ifndef __ASSEMBLY__
 void vfp_disable(void);
-void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs);
 #endif
 
 #endif /* __ASM_VFP_H */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 6a80d4be743b4..219cbc7e5d134 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -47,7 +47,6 @@ int main(void)
   DEFINE(TI_CPU_DOMAIN,		offsetof(struct thread_info, cpu_domain));
   DEFINE(TI_CPU_SAVE,		offsetof(struct thread_info, cpu_context));
   DEFINE(TI_ABI_SYSCALL,	offsetof(struct thread_info, abi_syscall));
-  DEFINE(TI_USED_CP,		offsetof(struct thread_info, used_cp));
   DEFINE(TI_TP_VALUE,		offsetof(struct thread_info, tp_value));
   DEFINE(TI_FPSTATE,		offsetof(struct thread_info, fpstate));
 #ifdef CONFIG_VFP
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 76e8125d05d26..6150a716828c3 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -446,258 +446,26 @@ ENDPROC(__irq_usr)
 __und_usr:
 	usr_entry uaccess=0
 
-	mov	r2, r4
-	mov	r3, r5
-
-	@ r2 = regs->ARM_pc, which is either 2 or 4 bytes ahead of the
-	@      faulting instruction depending on Thumb mode.
-	@ r3 = regs->ARM_cpsr
-	@
-	@ The emulation code returns using r9 if it has emulated the
-	@ instruction, or the more conventional lr if we are to treat
-	@ this as a real undefined instruction
-	@
-	badr	r9, ret_from_exception
-
 	@ IRQs must be enabled before attempting to read the instruction from
 	@ user space since that could cause a page/translation fault if the
 	@ page table was modified by another CPU.
 	enable_irq
 
-	tst	r3, #PSR_T_BIT			@ Thumb mode?
-	bne	__und_usr_thumb
-	sub	r4, r2, #4			@ ARM instr at LR - 4
-1:	ldrt	r0, [r4]
- ARM_BE8(rev	r0, r0)				@ little endian instruction
-
-	uaccess_disable ip
-
-	@ r0 = 32-bit ARM instruction which caused the exception
-	@ r2 = PC value for the following instruction (:= regs->ARM_pc)
-	@ r4 = PC value for the faulting instruction
-	@ lr = 32-bit undefined instruction function
-	badr	lr, __und_usr_fault_32
-	b	call_fpe
-
-__und_usr_thumb:
-	@ Thumb instruction
-	sub	r4, r2, #2			@ First half of thumb instr at LR - 2
-#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
-/*
- * Thumb-2 instruction handling.  Note that because pre-v6 and >= v6 platforms
- * can never be supported in a single kernel, this code is not applicable at
- * all when __LINUX_ARM_ARCH__ < 6.  This allows simplifying assumptions to be
- * made about .arch directives.
- */
-#if __LINUX_ARM_ARCH__ < 7
-/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
-	ldr_va	r5, cpu_architecture
-	cmp	r5, #CPU_ARCH_ARMv7
-	blo	__und_usr_fault_16		@ 16bit undefined instruction
-/*
- * The following code won't get run unless the running CPU really is v7, so
- * coding round the lack of ldrht on older arches is pointless.  Temporarily
- * override the assembler target arch with the minimum required instead:
- */
-	.arch	armv6t2
+	tst	r5, #PSR_T_BIT			@ Thumb mode?
+	mov	r1, #2				@ set insn size to 2 for Thumb
+	bne	0f				@ handle as Thumb undef exception
+#ifdef CONFIG_FPE_NWFPE
+	adr	r9, ret_from_exception
+	bl	call_fpe			@ returns via R9 on success
 #endif
-2:	ldrht	r5, [r4]
-ARM_BE8(rev16	r5, r5)				@ little endian instruction
-	cmp	r5, #0xe800			@ 32bit instruction if xx != 0
-	blo	__und_usr_fault_16_pan		@ 16bit undefined instruction
-3:	ldrht	r0, [r2]
-ARM_BE8(rev16	r0, r0)				@ little endian instruction
+	mov	r1, #4				@ set insn size to 4 for ARM
+0:	mov	r0, sp
 	uaccess_disable ip
-	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
-	str	r2, [sp, #S_PC]			@ it's a 2x16bit instr, update
-	orr	r0, r0, r5, lsl #16
-	badr	lr, __und_usr_fault_32
-	@ r0 = the two 16-bit Thumb instructions which caused the exception
-	@ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc)
-	@ r4 = PC value for the first 16-bit Thumb instruction
-	@ lr = 32bit undefined instruction function
-
-#if __LINUX_ARM_ARCH__ < 7
-/* If the target arch was overridden, change it back: */
-#ifdef CONFIG_CPU_32v6K
-	.arch	armv6k
-#else
-	.arch	armv6
-#endif
-#endif /* __LINUX_ARM_ARCH__ < 7 */
-#else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */
-	b	__und_usr_fault_16
-#endif
+	bl	__und_fault
+	b	ret_from_exception
  UNWIND(.fnend)
 ENDPROC(__und_usr)
 
-/*
- * The out of line fixup for the ldrt instructions above.
- */
-	.pushsection .text.fixup, "ax"
-	.align	2
-4:	str     r4, [sp, #S_PC]			@ retry current instruction
-	ret	r9
-	.popsection
-	.pushsection __ex_table,"a"
-	.long	1b, 4b
-#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
-	.long	2b, 4b
-	.long	3b, 4b
-#endif
-	.popsection
-
-/*
- * Check whether the instruction is a co-processor instruction.
- * If yes, we need to call the relevant co-processor handler.
- *
- * Note that we don't do a full check here for the co-processor
- * instructions; all instructions with bit 27 set are well
- * defined.  The only instructions that should fault are the
- * co-processor instructions.  However, we have to watch out
- * for the ARM6/ARM7 SWI bug.
- *
- * NEON is a special case that has to be handled here. Not all
- * NEON instructions are co-processor instructions, so we have
- * to make a special case of checking for them. Plus, there's
- * five groups of them, so we have a table of mask/opcode pairs
- * to check against, and if any match then we branch off into the
- * NEON handler code.
- *
- * Emulators may wish to make use of the following registers:
- *  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
- *  r2  = PC value to resume execution after successful emulation
- *  r9  = normal "successful" return address
- *  r10 = this threads thread_info structure
- *  lr  = unrecognised instruction return address
- * IRQs enabled, FIQs enabled.
- */
-	@
-	@ Fall-through from Thumb-2 __und_usr
-	@
-#ifdef CONFIG_NEON
-	get_thread_info r10			@ get current thread
-	adr	r6, .LCneon_thumb_opcodes
-	b	2f
-#endif
-call_fpe:
-	get_thread_info r10			@ get current thread
-#ifdef CONFIG_NEON
-	adr	r6, .LCneon_arm_opcodes
-2:	ldr	r5, [r6], #4			@ mask value
-	ldr	r7, [r6], #4			@ opcode bits matching in mask
-	cmp	r5, #0				@ end mask?
-	beq	1f
-	and	r8, r0, r5
-	cmp	r8, r7				@ NEON instruction?
-	bne	2b
-	mov	r7, #1
-	strb	r7, [r10, #TI_USED_CP + 10]	@ mark CP#10 as used
-	strb	r7, [r10, #TI_USED_CP + 11]	@ mark CP#11 as used
-	b	do_vfp				@ let VFP handler handle this
-1:
-#endif
-	tst	r0, #0x08000000			@ only CDP/CPRT/LDC/STC have bit 27
-	tstne	r0, #0x04000000			@ bit 26 set on both ARM and Thumb-2
-	reteq	lr
-	and	r8, r0, #0x00000f00		@ mask out CP number
-	mov	r7, #1
-	add	r6, r10, r8, lsr #8		@ add used_cp[] array offset first
-	strb	r7, [r6, #TI_USED_CP]		@ set appropriate used_cp[]
-#ifdef CONFIG_IWMMXT
-	@ Test if we need to give access to iWMMXt coprocessors
-	ldr	r5, [r10, #TI_FLAGS]
-	rsbs	r7, r8, #(1 << 8)		@ CP 0 or 1 only
-	movscs	r7, r5, lsr #(TIF_USING_IWMMXT + 1)
-	bcs	iwmmxt_task_enable
-#endif
- ARM(	add	pc, pc, r8, lsr #6	)
- THUMB(	lsr	r8, r8, #6		)
- THUMB(	add	pc, r8			)
-	nop
-
-	ret.w	lr				@ CP#0
-	W(b)	do_fpe				@ CP#1 (FPE)
-	W(b)	do_fpe				@ CP#2 (FPE)
-	ret.w	lr				@ CP#3
-	ret.w	lr				@ CP#4
-	ret.w	lr				@ CP#5
-	ret.w	lr				@ CP#6
-	ret.w	lr				@ CP#7
-	ret.w	lr				@ CP#8
-	ret.w	lr				@ CP#9
-#ifdef CONFIG_VFP
-	W(b)	do_vfp				@ CP#10 (VFP)
-	W(b)	do_vfp				@ CP#11 (VFP)
-#else
-	ret.w	lr				@ CP#10 (VFP)
-	ret.w	lr				@ CP#11 (VFP)
-#endif
-	ret.w	lr				@ CP#12
-	ret.w	lr				@ CP#13
-	ret.w	lr				@ CP#14 (Debug)
-	ret.w	lr				@ CP#15 (Control)
-
-#ifdef CONFIG_NEON
-	.align	6
-
-.LCneon_arm_opcodes:
-	.word	0xfe000000			@ mask
-	.word	0xf2000000			@ opcode
-
-	.word	0xff100000			@ mask
-	.word	0xf4000000			@ opcode
-
-	.word	0x00000000			@ mask
-	.word	0x00000000			@ opcode
-
-.LCneon_thumb_opcodes:
-	.word	0xef000000			@ mask
-	.word	0xef000000			@ opcode
-
-	.word	0xff100000			@ mask
-	.word	0xf9000000			@ opcode
-
-	.word	0x00000000			@ mask
-	.word	0x00000000			@ opcode
-#endif
-
-do_fpe:
-	add	r10, r10, #TI_FPSTATE		@ r10 = workspace
-	ldr_va	pc, fp_enter, tmp=r4		@ Call FP module USR entry point
-
-/*
- * The FP module is called with these registers set:
- *  r0  = instruction
- *  r2  = PC+4
- *  r9  = normal "successful" return address
- *  r10 = FP workspace
- *  lr  = unrecognised FP instruction return address
- */
-
-	.pushsection .data
-	.align	2
-ENTRY(fp_enter)
-	.word	no_fp
-	.popsection
-
-ENTRY(no_fp)
-	ret	lr
-ENDPROC(no_fp)
-
-__und_usr_fault_32:
-	mov	r1, #4
-	b	1f
-__und_usr_fault_16_pan:
-	uaccess_disable ip
-__und_usr_fault_16:
-	mov	r1, #2
-1:	mov	r0, sp
-	badr	lr, ret_from_exception
-	b	__und_fault
-ENDPROC(__und_usr_fault_32)
-ENDPROC(__und_usr_fault_16)
-
 	.align	5
 __pabt_usr:
 	usr_entry
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index d2b4ac06e4ed8..a0218c4867b9b 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -58,9 +58,19 @@
 	.text
 	.arm
 
+ENTRY(iwmmxt_undef_handler)
+	push		{r9, r10, lr}
+	get_thread_info	r10
+	mov		r9, pc
+	b		iwmmxt_task_enable
+	mov		r0, #0
+	pop		{r9, r10, pc}
+ENDPROC(iwmmxt_undef_handler)
+
 /*
  * Lazy switching of Concan coprocessor context
  *
+ * r0  = struct pt_regs pointer
  * r10 = struct thread_info pointer
  * r9  = ret_from_exception
  * lr  = undefined instr exit
@@ -84,12 +94,12 @@ ENTRY(iwmmxt_task_enable)
 	PJ4(mcr	p15, 0, r2, c1, c0, 2)
 
 	ldr	r3, =concan_owner
-	add	r0, r10, #TI_IWMMXT_STATE	@ get task Concan save area
-	ldr	r2, [sp, #60]			@ current task pc value
+	ldr	r2, [r0, #S_PC]			@ current task pc value
 	ldr	r1, [r3]			@ get current Concan owner
-	str	r0, [r3]			@ this task now owns Concan regs
 	sub	r2, r2, #4			@ adjust pc back
-	str	r2, [sp, #60]
+	str	r2, [r0, #S_PC]
+	add	r0, r10, #TI_IWMMXT_STATE	@ get task Concan save area
+	str	r0, [r3]			@ this task now owns Concan regs
 
 	mrc	p15, 0, r2, c2, c0, 0
 	mov	r2, r2				@ cpwait
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 46364b699cc30..5d07cf9e0044d 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -94,16 +94,28 @@ static void machine_crash_nonpanic_core(void *unused)
 	}
 }
 
+static DEFINE_PER_CPU(call_single_data_t, cpu_stop_csd) =
+	CSD_INIT(machine_crash_nonpanic_core, NULL);
+
 void crash_smp_send_stop(void)
 {
 	static int cpus_stopped;
 	unsigned long msecs;
+	call_single_data_t *csd;
+	int cpu, this_cpu = raw_smp_processor_id();
 
 	if (cpus_stopped)
 		return;
 
 	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
-	smp_call_function(machine_crash_nonpanic_core, NULL, false);
+	for_each_online_cpu(cpu) {
+		if (cpu == this_cpu)
+			continue;
+
+		csd = &per_cpu(cpu_stop_csd, cpu);
+		smp_call_function_single_async(cpu, csd);
+	}
+
 	msecs = 1000; /* Wait at most a second for the other cpus to stop */
 	while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
 		mdelay(1);
diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c
index 1d1fb22f44f37..4bca8098c4ff5 100644
--- a/arch/arm/kernel/pj4-cp0.c
+++ b/arch/arm/kernel/pj4-cp0.c
@@ -126,6 +126,7 @@ static int __init pj4_cp0_init(void)
 	pr_info("PJ4 iWMMXt v%d coprocessor enabled.\n", vers);
 	elf_hwcap |= HWCAP_IWMMXT;
 	thread_register_notifier(&iwmmxt_notifier_block);
+	register_iwmmxt_undef_handler();
 #endif
 
 	return 0;
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 0e8ff85890ade..e16ed102960cb 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -222,7 +222,6 @@ void flush_thread(void)
 
 	flush_ptrace_hw_breakpoint(tsk);
 
-	memset(thread->used_cp, 0, sizeof(thread->used_cp));
 	memset(&tsk->thread.debug, 0, sizeof(struct debug_info));
 	memset(&thread->fpstate, 0, sizeof(union fp_state));
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index fef32d73f9120..c421a899fc84c 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -584,8 +584,6 @@ static int fpa_set(struct task_struct *target,
 {
 	struct thread_info *thread = task_thread_info(target);
 
-	thread->used_cp[1] = thread->used_cp[2] = 1;
-
 	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 		&thread->fpstate,
 		0, sizeof(struct user_fp));
diff --git a/arch/arm/kernel/xscale-cp0.c b/arch/arm/kernel/xscale-cp0.c
index ed4f6e77616da..00d00d3aae972 100644
--- a/arch/arm/kernel/xscale-cp0.c
+++ b/arch/arm/kernel/xscale-cp0.c
@@ -166,6 +166,7 @@ static int __init xscale_cp0_init(void)
 		pr_info("XScale iWMMXt coprocessor detected.\n");
 		elf_hwcap |= HWCAP_IWMMXT;
 		thread_register_notifier(&iwmmxt_notifier_block);
+		register_iwmmxt_undef_handler();
 #endif
 	} else {
 		pr_info("XScale DSP coprocessor detected.\n");
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 61ce82aca6f0d..072ff9b451f84 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -56,6 +56,10 @@ ENTRY(cpu_feroceon_proc_init)
 	movne	r2, r2, lsr #2			@ turned into # of sets
 	sub	r2, r2, #(1 << 5)
 	stmia	r1, {r2, r3}
+#ifdef CONFIG_VFP
+	mov	r1, #1				@ disable quirky VFP
+	str_l	r1, VFP_arch_feroceon, r2
+#endif
 	ret	lr
 
 /*
diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S
index d8f9915566e15..354d297a193bb 100644
--- a/arch/arm/nwfpe/entry.S
+++ b/arch/arm/nwfpe/entry.S
@@ -7,6 +7,7 @@
     Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
 
 */
+#include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/opcodes.h>
 
@@ -104,6 +105,7 @@ next:
 	@ plain LDR instruction.  Weird, but it seems harmless.
 	.pushsection .text.fixup,"ax"
 	.align	2
+.Lrep:	str     r4, [sp, #S_PC]		@ retry current instruction
 .Lfix:	ret	r9			@ let the user eat segfaults
 	.popsection
 
@@ -111,3 +113,78 @@ next:
 	.align	3
 	.long	.Lx1, .Lfix
 	.popsection
+
+	@
+	@ Check whether the instruction is a co-processor instruction.
+	@ If yes, we need to call the relevant co-processor handler.
+	@ Only FPE instructions are dispatched here, everything else
+	@ is handled by undef hooks.
+	@
+	@ Emulators may wish to make use of the following registers:
+	@  r4  = PC value to resume execution after successful emulation
+	@  r9  = normal "successful" return address
+	@  lr  = unrecognised instruction return address
+	@ IRQs enabled, FIQs enabled.
+	@
+ENTRY(call_fpe)
+	mov	r2, r4
+	sub	r4, r4, #4			@ ARM instruction at user PC - 4
+USERL(	.Lrep,	ldrt r0, [r4])			@ load opcode from user space
+ARM_BE8(rev	r0, r0)				@ little endian instruction
+
+	uaccess_disable ip
+
+	get_thread_info r10			@ get current thread
+	tst	r0, #0x08000000			@ only CDP/CPRT/LDC/STC have bit 27
+	reteq	lr
+	and	r8, r0, #0x00000f00		@ mask out CP number
+#ifdef CONFIG_IWMMXT
+	@ Test if we need to give access to iWMMXt coprocessors
+	ldr	r5, [r10, #TI_FLAGS]
+	rsbs	r7, r8, #(1 << 8)		@ CP 0 or 1 only
+	movscs	r7, r5, lsr #(TIF_USING_IWMMXT + 1)
+	movcs	r0, sp				@ pass struct pt_regs
+	bcs	iwmmxt_task_enable
+#endif
+	add	pc, pc, r8, lsr #6
+	nop
+
+	ret	lr				@ CP#0
+	b	do_fpe				@ CP#1 (FPE)
+	b	do_fpe				@ CP#2 (FPE)
+	ret	lr				@ CP#3
+	ret	lr				@ CP#4
+	ret	lr				@ CP#5
+	ret	lr				@ CP#6
+	ret	lr				@ CP#7
+	ret	lr				@ CP#8
+	ret	lr				@ CP#9
+	ret	lr				@ CP#10 (VFP)
+	ret	lr				@ CP#11 (VFP)
+	ret	lr				@ CP#12
+	ret	lr				@ CP#13
+	ret	lr				@ CP#14 (Debug)
+	ret	lr				@ CP#15 (Control)
+
+do_fpe:
+	add	r10, r10, #TI_FPSTATE		@ r10 = workspace
+	ldr_va	pc, fp_enter, tmp=r4		@ Call FP module USR entry point
+
+	@
+	@ The FP module is called with these registers set:
+	@  r0  = instruction
+	@  r2  = PC+4
+	@  r9  = normal "successful" return address
+	@  r10 = FP workspace
+	@  lr  = unrecognised FP instruction return address
+	@
+
+	.pushsection .data
+	.align	2
+ENTRY(fp_enter)
+	.word	no_fp
+	.popsection
+
+no_fp:
+	ret	lr
+ENDPROC(no_fp)
diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile
index 749901a72d6dc..dfd64bc2b2fbd 100644
--- a/arch/arm/vfp/Makefile
+++ b/arch/arm/vfp/Makefile
@@ -8,4 +8,4 @@
 # ccflags-y := -DDEBUG
 # asflags-y := -DDEBUG
 
-obj-y		+= vfpmodule.o entry.o vfphw.o vfpsingle.o vfpdouble.o
+obj-y		+= vfpmodule.o vfphw.o vfpsingle.o vfpdouble.o
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
deleted file mode 100644
index 62206ef250371..0000000000000
--- a/arch/arm/vfp/entry.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/vfp/entry.S
- *
- *  Copyright (C) 2004 ARM Limited.
- *  Written by Deep Blue Solutions Limited.
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/vfpmacros.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-
-@ VFP entry point.
-@
-@  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
-@  r2  = PC value to resume execution after successful emulation
-@  r9  = normal "successful" return address
-@  r10 = this threads thread_info structure
-@  lr  = unrecognised instruction return address
-@  IRQs enabled.
-@
-ENTRY(do_vfp)
-	mov	r1, r10
-	str	lr, [sp, #-8]!
-	add	r3, sp, #4
-	str	r9, [r3]
-	bl	vfp_entry
-	ldr	pc, [sp], #8
-ENDPROC(do_vfp)
diff --git a/arch/arm/vfp/vfp.h b/arch/arm/vfp/vfp.h
index 5cd6d50532717..e43a630f8a164 100644
--- a/arch/arm/vfp/vfp.h
+++ b/arch/arm/vfp/vfp.h
@@ -375,3 +375,4 @@ struct op {
 };
 
 asmlinkage void vfp_save_state(void *location, u32 fpexc);
+asmlinkage u32 vfp_load_state(const void *location);
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index a4610d0f32152..d5a03f3c10c50 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -4,12 +4,6 @@
  *
  *  Copyright (C) 2004 ARM Limited.
  *  Written by Deep Blue Solutions Limited.
- *
- * This code is called from the kernel's undefined instruction trap.
- * r1 holds the thread_info pointer
- * r3 holds the return address for successful handling.
- * lr holds the return address for unrecognised instructions.
- * sp points to a struct pt_regs (as defined in include/asm/proc/ptrace.h)
  */
 #include <linux/init.h>
 #include <linux/linkage.h>
@@ -19,20 +13,6 @@
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 
-	.macro	DBGSTR, str
-#ifdef DEBUG
-	stmfd	sp!, {r0-r3, ip, lr}
-	ldr	r0, =1f
-	bl	_printk
-	ldmfd	sp!, {r0-r3, ip, lr}
-
-	.pushsection .rodata, "a"
-1:	.ascii	KERN_DEBUG "VFP: \str\n"
-	.byte	0
-	.previous
-#endif
-	.endm
-
 	.macro  DBGSTR1, str, arg
 #ifdef DEBUG
 	stmfd	sp!, {r0-r3, ip, lr}
@@ -48,181 +28,25 @@
 #endif
 	.endm
 
-	.macro  DBGSTR3, str, arg1, arg2, arg3
-#ifdef DEBUG
-	stmfd	sp!, {r0-r3, ip, lr}
-	mov	r3, \arg3
-	mov	r2, \arg2
-	mov	r1, \arg1
-	ldr	r0, =1f
-	bl	_printk
-	ldmfd	sp!, {r0-r3, ip, lr}
-
-	.pushsection .rodata, "a"
-1:	.ascii	KERN_DEBUG "VFP: \str\n"
-	.byte	0
-	.previous
-#endif
-	.endm
-
-
-@ VFP hardware support entry point.
-@
-@  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
-@  r1  = thread_info pointer
-@  r2  = PC value to resume execution after successful emulation
-@  r3  = normal "successful" return address
-@  lr  = unrecognised instruction return address
-@  IRQs enabled.
-ENTRY(vfp_support_entry)
-	ldr	r11, [r1, #TI_CPU]	@ CPU number
-	add	r10, r1, #TI_VFPSTATE	@ r10 = workspace
-
-	DBGSTR3	"instr %08x pc %08x state %p", r0, r2, r10
-
-	.fpu	vfpv2
-	VFPFMRX	r1, FPEXC		@ Is the VFP enabled?
-	DBGSTR1	"fpexc %08x", r1
-	tst	r1, #FPEXC_EN
-	bne	look_for_VFP_exceptions	@ VFP is already enabled
-
-	DBGSTR1 "enable %x", r10
-	ldr	r9, vfp_current_hw_state_address
-	orr	r1, r1, #FPEXC_EN	@ user FPEXC has the enable bit set
-	ldr	r4, [r9, r11, lsl #2]	@ vfp_current_hw_state pointer
-	bic	r5, r1, #FPEXC_EX	@ make sure exceptions are disabled
-	cmp	r4, r10			@ this thread owns the hw context?
-#ifndef CONFIG_SMP
-	@ For UP, checking that this thread owns the hw context is
-	@ sufficient to determine that the hardware state is valid.
-	beq	vfp_hw_state_valid
-
-	@ On UP, we lazily save the VFP context.  As a different
-	@ thread wants ownership of the VFP hardware, save the old
-	@ state if there was a previous (valid) owner.
-
-	VFPFMXR	FPEXC, r5		@ enable VFP, disable any pending
-					@ exceptions, so we can get at the
-					@ rest of it
-
-	DBGSTR1	"save old state %p", r4
-	cmp	r4, #0			@ if the vfp_current_hw_state is NULL
-	beq	vfp_reload_hw		@ then the hw state needs reloading
-	VFPFSTMIA r4, r5		@ save the working registers
-	VFPFMRX	r5, FPSCR		@ current status
-#ifndef CONFIG_CPU_FEROCEON
-	tst	r1, #FPEXC_EX		@ is there additional state to save?
-	beq	1f
-	VFPFMRX	r6, FPINST		@ FPINST (only if FPEXC.EX is set)
-	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
-	beq	1f
-	VFPFMRX	r8, FPINST2		@ FPINST2 if needed (and present)
-1:
-#endif
-	stmia	r4, {r1, r5, r6, r8}	@ save FPEXC, FPSCR, FPINST, FPINST2
-vfp_reload_hw:
-
-#else
-	@ For SMP, if this thread does not own the hw context, then we
-	@ need to reload it.  No need to save the old state as on SMP,
-	@ we always save the state when we switch away from a thread.
-	bne	vfp_reload_hw
-
-	@ This thread has ownership of the current hardware context.
-	@ However, it may have been migrated to another CPU, in which
-	@ case the saved state is newer than the hardware context.
-	@ Check this by looking at the CPU number which the state was
-	@ last loaded onto.
-	ldr	ip, [r10, #VFP_CPU]
-	teq	ip, r11
-	beq	vfp_hw_state_valid
-
-vfp_reload_hw:
-	@ We're loading this threads state into the VFP hardware. Update
-	@ the CPU number which contains the most up to date VFP context.
-	str	r11, [r10, #VFP_CPU]
-
-	VFPFMXR	FPEXC, r5		@ enable VFP, disable any pending
-					@ exceptions, so we can get at the
-					@ rest of it
-#endif
-
-	DBGSTR1	"load state %p", r10
-	str	r10, [r9, r11, lsl #2]	@ update the vfp_current_hw_state pointer
+ENTRY(vfp_load_state)
+	@ Load the current VFP state
+	@ r0 - load location
+	@ returns FPEXC
+	DBGSTR1	"load VFP state %p", r0
 					@ Load the saved state back into the VFP
-	VFPFLDMIA r10, r5		@ reload the working registers while
+	VFPFLDMIA r0, r1		@ reload the working registers while
 					@ FPEXC is in a safe state
-	ldmia	r10, {r1, r5, r6, r8}	@ load FPEXC, FPSCR, FPINST, FPINST2
-#ifndef CONFIG_CPU_FEROCEON
-	tst	r1, #FPEXC_EX		@ is there additional state to restore?
+	ldmia	r0, {r0-r3}		@ load FPEXC, FPSCR, FPINST, FPINST2
+	tst	r0, #FPEXC_EX		@ is there additional state to restore?
 	beq	1f
-	VFPFMXR	FPINST, r6		@ restore FPINST (only if FPEXC.EX is set)
-	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to write?
+	VFPFMXR	FPINST, r2		@ restore FPINST (only if FPEXC.EX is set)
+	tst	r0, #FPEXC_FP2V		@ is there an FPINST2 to write?
 	beq	1f
-	VFPFMXR	FPINST2, r8		@ FPINST2 if needed (and present)
+	VFPFMXR	FPINST2, r3		@ FPINST2 if needed (and present)
 1:
-#endif
-	VFPFMXR	FPSCR, r5		@ restore status
-
-@ The context stored in the VFP hardware is up to date with this thread
-vfp_hw_state_valid:
-	tst	r1, #FPEXC_EX
-	bne	process_exception	@ might as well handle the pending
-					@ exception before retrying branch
-					@ out before setting an FPEXC that
-					@ stops us reading stuff
-	VFPFMXR	FPEXC, r1		@ Restore FPEXC last
-	mov	sp, r3			@ we think we have handled things
-	pop	{lr}
-	sub	r2, r2, #4		@ Retry current instruction - if Thumb
-	str	r2, [sp, #S_PC]		@ mode it's two 16-bit instructions,
-					@ else it's one 32-bit instruction, so
-					@ always subtract 4 from the following
-					@ instruction address.
-
-local_bh_enable_and_ret:
-	adr	r0, .
-	mov	r1, #SOFTIRQ_DISABLE_OFFSET
-	b	__local_bh_enable_ip	@ tail call
-
-look_for_VFP_exceptions:
-	@ Check for synchronous or asynchronous exception
-	tst	r1, #FPEXC_EX | FPEXC_DEX
-	bne	process_exception
-	@ On some implementations of the VFP subarch 1, setting FPSCR.IXE
-	@ causes all the CDP instructions to be bounced synchronously without
-	@ setting the FPEXC.EX bit
-	VFPFMRX	r5, FPSCR
-	tst	r5, #FPSCR_IXE
-	bne	process_exception
-
-	tst	r5, #FPSCR_LENGTH_MASK
-	beq	skip
-	orr	r1, r1, #FPEXC_DEX
-	b	process_exception
-skip:
-
-	@ Fall into hand on to next handler - appropriate coproc instr
-	@ not recognised by VFP
-
-	DBGSTR	"not VFP"
-	b	local_bh_enable_and_ret
-
-process_exception:
-	DBGSTR	"bounce"
-	mov	sp, r3			@ setup for a return to the user code.
-	pop	{lr}
-	mov	r2, sp			@ nothing stacked - regdump is at TOS
-
-	@ Now call the C code to package up the bounce to the support code
-	@   r0 holds the trigger instruction
-	@   r1 holds the FPEXC value
-	@   r2 pointer to register dump
-	b	VFP_bounce		@ we have handled this - the support
-					@ code will raise an exception if
-					@ required. If not, the user code will
-					@ retry the faulted instruction
-ENDPROC(vfp_support_entry)
+	VFPFMXR	FPSCR, r1		@ restore status
+	ret	lr
+ENDPROC(vfp_load_state)
 
 ENTRY(vfp_save_state)
 	@ Save the current VFP state
@@ -242,10 +66,6 @@ ENTRY(vfp_save_state)
 	ret	lr
 ENDPROC(vfp_save_state)
 
-	.align
-vfp_current_hw_state_address:
-	.word	vfp_current_hw_state
-
 	.macro	tbl_branch, base, tmp, shift
 #ifdef CONFIG_THUMB2_KERNEL
 	adr	\tmp, 1f
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 1ba5078c10258..7e8773a2d99d0 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -18,6 +18,7 @@
 #include <linux/uaccess.h>
 #include <linux/user.h>
 #include <linux/export.h>
+#include <linux/perf_event.h>
 
 #include <asm/cp15.h>
 #include <asm/cputype.h>
@@ -30,11 +31,6 @@
 #include "vfpinstr.h"
 #include "vfp.h"
 
-/*
- * Our undef handlers (in entry.S)
- */
-asmlinkage void vfp_support_entry(u32, void *, u32, u32);
-
 static bool have_vfp __ro_after_init;
 
 /*
@@ -42,7 +38,11 @@ static bool have_vfp __ro_after_init;
  * Used in startup: set to non-zero if VFP checks fail
  * After startup, holds VFP architecture
  */
-static unsigned int __initdata VFP_arch;
+static unsigned int VFP_arch;
+
+#ifdef CONFIG_CPU_FEROCEON
+extern unsigned int VFP_arch_feroceon __alias(VFP_arch);
+#endif
 
 /*
  * The pointer to the vfpstate structure of the thread which currently
@@ -314,13 +314,14 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs)
 		 * emulate it.
 		 */
 	}
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);
 	return exceptions & ~VFP_NAN_FLAG;
 }
 
 /*
  * Package up a bounce condition.
  */
-void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
+static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 {
 	u32 fpscr, orig_fpscr, fpsid, exceptions;
 
@@ -356,14 +357,12 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	}
 
 	if (fpexc & FPEXC_EX) {
-#ifndef CONFIG_CPU_FEROCEON
 		/*
 		 * Asynchronous exception. The instruction is read from FPINST
 		 * and the interrupted instruction has to be restarted.
 		 */
 		trigger = fmrx(FPINST);
 		regs->ARM_pc -= 4;
-#endif
 	} else if (!(fpexc & FPEXC_DEX)) {
 		/*
 		 * Illegal combination of bits. It can be caused by an
@@ -371,7 +370,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 		 * on VFP subarch 1.
 		 */
 		 vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
-		goto exit;
+		return;
 	}
 
 	/*
@@ -402,7 +401,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	 * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
 	 */
 	if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V))
-		goto exit;
+		return;
 
 	/*
 	 * The barrier() here prevents fpinst2 being read
@@ -415,8 +414,6 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
 	if (exceptions)
 		vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
- exit:
-	local_bh_enable();
 }
 
 static void vfp_enable(void *unused)
@@ -645,27 +642,6 @@ static int vfp_starting_cpu(unsigned int unused)
 	return 0;
 }
 
-/*
- * Entered with:
- *
- *  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
- *  r1  = thread_info pointer
- *  r2  = PC value to resume execution after successful emulation
- *  r3  = normal "successful" return address
- *  lr  = unrecognised instruction return address
- */
-asmlinkage void vfp_entry(u32 trigger, struct thread_info *ti, u32 resume_pc,
-			  u32 resume_return_address)
-{
-	if (unlikely(!have_vfp))
-		return;
-
-	local_bh_disable();
-	vfp_support_entry(trigger, ti, resume_pc, resume_return_address);
-}
-
-#ifdef CONFIG_KERNEL_MODE_NEON
-
 static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr)
 {
 	/*
@@ -688,47 +664,151 @@ static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr)
 	return 1;
 }
 
-static struct undef_hook vfp_kmode_exception_hook[] = {{
+/*
+ * vfp_support_entry - Handle VFP exception
+ *
+ * @regs:	pt_regs structure holding the register state at exception entry
+ * @trigger:	The opcode of the instruction that triggered the exception
+ *
+ * Returns 0 if the exception was handled, or an error code otherwise.
+ */
+static int vfp_support_entry(struct pt_regs *regs, u32 trigger)
+{
+	struct thread_info *ti = current_thread_info();
+	u32 fpexc;
+
+	if (unlikely(!have_vfp))
+		return -ENODEV;
+
+	if (!user_mode(regs))
+		return vfp_kmode_exception(regs, trigger);
+
+	local_bh_disable();
+	fpexc = fmrx(FPEXC);
+
+	/*
+	 * If the VFP unit was not enabled yet, we have to check whether the
+	 * VFP state in the CPU's registers is the most recent VFP state
+	 * associated with the process. On UP systems, we don't save the VFP
+	 * state eagerly on a context switch, so we may need to save the
+	 * VFP state to memory first, as it may belong to another process.
+	 */
+	if (!(fpexc & FPEXC_EN)) {
+		/*
+		 * Enable the VFP unit but mask the FP exception flag for the
+		 * time being, so we can access all the registers.
+		 */
+		fpexc |= FPEXC_EN;
+		fmxr(FPEXC, fpexc & ~FPEXC_EX);
+
+		/*
+		 * Check whether or not the VFP state in the CPU's registers is
+		 * the most recent VFP state associated with this task. On SMP,
+		 * migration may result in multiple CPUs holding VFP states
+		 * that belong to the same task, but only the most recent one
+		 * is valid.
+		 */
+		if (!vfp_state_in_hw(ti->cpu, ti)) {
+			if (!IS_ENABLED(CONFIG_SMP) &&
+			    vfp_current_hw_state[ti->cpu] != NULL) {
+				/*
+				 * This CPU is currently holding the most
+				 * recent VFP state associated with another
+				 * task, and we must save that to memory first.
+				 */
+				vfp_save_state(vfp_current_hw_state[ti->cpu],
+					       fpexc);
+			}
+
+			/*
+			 * We can now proceed with loading the task's VFP state
+			 * from memory into the CPU registers.
+			 */
+			fpexc = vfp_load_state(&ti->vfpstate);
+			vfp_current_hw_state[ti->cpu] = &ti->vfpstate;
+#ifdef CONFIG_SMP
+			/*
+			 * Record that this CPU is now the one holding the most
+			 * recent VFP state of the task.
+			 */
+			ti->vfpstate.hard.cpu = ti->cpu;
+#endif
+		}
+
+		if (fpexc & FPEXC_EX)
+			/*
+			 * Might as well handle the pending exception before
+			 * retrying branch out before setting an FPEXC that
+			 * stops us reading stuff.
+			 */
+			goto bounce;
+
+		/*
+		 * No FP exception is pending: just enable the VFP and
+		 * replay the instruction that trapped.
+		 */
+		fmxr(FPEXC, fpexc);
+	} else {
+		/* Check for synchronous or asynchronous exceptions */
+		if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) {
+			u32 fpscr = fmrx(FPSCR);
+
+			/*
+			 * On some implementations of the VFP subarch 1,
+			 * setting FPSCR.IXE causes all the CDP instructions to
+			 * be bounced synchronously without setting the
+			 * FPEXC.EX bit
+			 */
+			if (!(fpscr & FPSCR_IXE)) {
+				if (!(fpscr & FPSCR_LENGTH_MASK)) {
+					pr_debug("not VFP\n");
+					local_bh_enable();
+					return -ENOEXEC;
+				}
+				fpexc |= FPEXC_DEX;
+			}
+		}
+bounce:		regs->ARM_pc += 4;
+		VFP_bounce(trigger, fpexc, regs);
+	}
+
+	local_bh_enable();
+	return 0;
+}
+
+static struct undef_hook neon_support_hook[] = {{
 	.instr_mask	= 0xfe000000,
 	.instr_val	= 0xf2000000,
-	.cpsr_mask	= MODE_MASK | PSR_T_BIT,
-	.cpsr_val	= SVC_MODE,
-	.fn		= vfp_kmode_exception,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= 0,
+	.fn		= vfp_support_entry,
 }, {
 	.instr_mask	= 0xff100000,
 	.instr_val	= 0xf4000000,
-	.cpsr_mask	= MODE_MASK | PSR_T_BIT,
-	.cpsr_val	= SVC_MODE,
-	.fn		= vfp_kmode_exception,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= 0,
+	.fn		= vfp_support_entry,
 }, {
 	.instr_mask	= 0xef000000,
 	.instr_val	= 0xef000000,
-	.cpsr_mask	= MODE_MASK | PSR_T_BIT,
-	.cpsr_val	= SVC_MODE | PSR_T_BIT,
-	.fn		= vfp_kmode_exception,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= PSR_T_BIT,
+	.fn		= vfp_support_entry,
 }, {
 	.instr_mask	= 0xff100000,
 	.instr_val	= 0xf9000000,
-	.cpsr_mask	= MODE_MASK | PSR_T_BIT,
-	.cpsr_val	= SVC_MODE | PSR_T_BIT,
-	.fn		= vfp_kmode_exception,
-}, {
-	.instr_mask	= 0x0c000e00,
-	.instr_val	= 0x0c000a00,
-	.cpsr_mask	= MODE_MASK,
-	.cpsr_val	= SVC_MODE,
-	.fn		= vfp_kmode_exception,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= PSR_T_BIT,
+	.fn		= vfp_support_entry,
 }};
 
-static int __init vfp_kmode_exception_hook_init(void)
-{
-	int i;
+static struct undef_hook vfp_support_hook = {
+	.instr_mask	= 0x0c000e00,
+	.instr_val	= 0x0c000a00,
+	.fn		= vfp_support_entry,
+};
 
-	for (i = 0; i < ARRAY_SIZE(vfp_kmode_exception_hook); i++)
-		register_undef_hook(&vfp_kmode_exception_hook[i]);
-	return 0;
-}
-subsys_initcall(vfp_kmode_exception_hook_init);
+#ifdef CONFIG_KERNEL_MODE_NEON
 
 /*
  * Kernel-side NEON support functions
@@ -833,8 +913,11 @@ static int __init vfp_init(void)
 		 * for NEON if the hardware has the MVFR registers.
 		 */
 		if (IS_ENABLED(CONFIG_NEON) &&
-		   (fmrx(MVFR1) & 0x000fff00) == 0x00011100)
+		    (fmrx(MVFR1) & 0x000fff00) == 0x00011100) {
 			elf_hwcap |= HWCAP_NEON;
+			for (int i = 0; i < ARRAY_SIZE(neon_support_hook); i++)
+				register_undef_hook(&neon_support_hook[i]);
+		}
 
 		if (IS_ENABLED(CONFIG_VFPv3)) {
 			u32 mvfr0 = fmrx(MVFR0);
@@ -903,6 +986,7 @@ static int __init vfp_init(void)
 
 	have_vfp = true;
 
+	register_undef_hook(&vfp_support_hook);
 	thread_register_notifier(&vfp_notifier_block);
 	vfp_pm_init();
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 136232a897392..5c497c862d757 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -931,7 +931,7 @@ static inline pte_t *pudp_ptep(pud_t *pud)
 #define pud_mkdirty(pud)	pte_pud(pte_mkdirty(pud_pte(pud)))
 #define pud_mkclean(pud)	pte_pud(pte_mkclean(pud_pte(pud)))
 #define pud_mkyoung(pud)	pte_pud(pte_mkyoung(pud_pte(pud)))
-#define pud_mkwrite(pud)	pte_pud(pte_mkwrite(pud_pte(pud)))
+#define pud_mkwrite(pud)	pte_pud(pte_mkwrite_novma(pud_pte(pud)))
 #define pud_write(pud)		pte_write(pud_pte(pud))
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index 5ff001140ef4a..b7e3763c47f0c 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -19,38 +19,6 @@
 #include <linux/fs_context.h>
 #include "mtdcore.h"
 
-/*
- * compare superblocks to see if they're equivalent
- * - they are if the underlying MTD device is the same
- */
-static int mtd_test_super(struct super_block *sb, struct fs_context *fc)
-{
-	struct mtd_info *mtd = fc->sget_key;
-
-	if (sb->s_mtd == fc->sget_key) {
-		pr_debug("MTDSB: Match on device %d (\"%s\")\n",
-			 mtd->index, mtd->name);
-		return 1;
-	}
-
-	pr_debug("MTDSB: No match, device %d (\"%s\"), device %d (\"%s\")\n",
-		 sb->s_mtd->index, sb->s_mtd->name, mtd->index, mtd->name);
-	return 0;
-}
-
-/*
- * mark the superblock by the MTD device it is using
- * - set the device number to be the correct MTD block device for pesuperstence
- *   of NFS exports
- */
-static int mtd_set_super(struct super_block *sb, struct fs_context *fc)
-{
-	sb->s_mtd = fc->sget_key;
-	sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index);
-	sb->s_bdi = bdi_get(mtd_bdi);
-	return 0;
-}
-
 /*
  * get a superblock on an MTD-backed filesystem
  */
@@ -62,8 +30,7 @@ static int mtd_get_sb(struct fs_context *fc,
 	struct super_block *sb;
 	int ret;
 
-	fc->sget_key = mtd;
-	sb = sget_fc(fc, mtd_test_super, mtd_set_super);
+	sb = sget_dev(fc, MKDEV(MTD_BLOCK_MAJOR, mtd->index));
 	if (IS_ERR(sb))
 		return PTR_ERR(sb);
 
@@ -77,6 +44,16 @@ static int mtd_get_sb(struct fs_context *fc,
 		pr_debug("MTDSB: New superblock for device %d (\"%s\")\n",
 			 mtd->index, mtd->name);
 
+		/*
+		 * Would usually have been set with @sb_lock held but in
+		 * contrast to sb->s_bdev that's checked with only
+		 * @sb_lock held, nothing checks sb->s_mtd without also
+		 * holding sb->s_umount and we're holding sb->s_umount
+		 * here.
+		 */
+		sb->s_mtd = mtd;
+		sb->s_bdi = bdi_get(mtd_bdi);
+
 		ret = fill_super(sb, fc);
 		if (ret < 0)
 			goto error_sb;
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 2beceff024e39..e55e0a2cd2e86 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -664,7 +664,7 @@ static ssize_t comm_addr_store(struct config_item *item, const char *buf,
 
 	memcpy(addr, buf, len);
 
-	rv = dlm_lowcomms_addr(cm->nodeid, addr, len);
+	rv = dlm_midcomms_addr(cm->nodeid, addr, len);
 	if (rv) {
 		kfree(addr);
 		return rv;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index a1aca41c49d06..5aabcb6f0f157 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -18,6 +18,7 @@
 #include "dlm_internal.h"
 #include "midcomms.h"
 #include "lock.h"
+#include "ast.h"
 
 #define DLM_DEBUG_BUF_LEN 4096
 static char debug_buf[DLM_DEBUG_BUF_LEN];
@@ -365,6 +366,52 @@ static void print_format4(struct dlm_rsb *r, struct seq_file *s)
 	unlock_rsb(r);
 }
 
+static void print_format5_lock(struct seq_file *s, struct dlm_lkb *lkb)
+{
+	struct dlm_callback *cb;
+
+	/* lkb_id lkb_flags mode flags sb_status sb_flags */
+
+	spin_lock(&lkb->lkb_cb_lock);
+	list_for_each_entry(cb, &lkb->lkb_callbacks, list) {
+		seq_printf(s, "%x %x %d %x %d %x\n",
+			   lkb->lkb_id,
+			   dlm_iflags_val(lkb),
+			   cb->mode,
+			   cb->flags,
+			   cb->sb_status,
+			   cb->sb_flags);
+	}
+	spin_unlock(&lkb->lkb_cb_lock);
+}
+
+static void print_format5(struct dlm_rsb *r, struct seq_file *s)
+{
+	struct dlm_lkb *lkb;
+
+	lock_rsb(r);
+
+	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
+		print_format5_lock(s, lkb);
+		if (seq_has_overflowed(s))
+			goto out;
+	}
+
+	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
+		print_format5_lock(s, lkb);
+		if (seq_has_overflowed(s))
+			goto out;
+	}
+
+	list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) {
+		print_format5_lock(s, lkb);
+		if (seq_has_overflowed(s))
+			goto out;
+	}
+ out:
+	unlock_rsb(r);
+}
+
 struct rsbtbl_iter {
 	struct dlm_rsb *rsb;
 	unsigned bucket;
@@ -408,6 +455,13 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr)
 		}
 		print_format4(ri->rsb, seq);
 		break;
+	case 5:
+		if (ri->header) {
+			seq_puts(seq, "lkb_id lkb_flags mode flags sb_status sb_flags\n");
+			ri->header = 0;
+		}
+		print_format5(ri->rsb, seq);
+		break;
 	}
 
 	return 0;
@@ -417,6 +471,7 @@ static const struct seq_operations format1_seq_ops;
 static const struct seq_operations format2_seq_ops;
 static const struct seq_operations format3_seq_ops;
 static const struct seq_operations format4_seq_ops;
+static const struct seq_operations format5_seq_ops;
 
 static void *table_seq_start(struct seq_file *seq, loff_t *pos)
 {
@@ -448,6 +503,8 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos)
 		ri->format = 3;
 	if (seq->op == &format4_seq_ops)
 		ri->format = 4;
+	if (seq->op == &format5_seq_ops)
+		ri->format = 5;
 
 	tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep;
 
@@ -602,10 +659,18 @@ static const struct seq_operations format4_seq_ops = {
 	.show  = table_seq_show,
 };
 
+static const struct seq_operations format5_seq_ops = {
+	.start = table_seq_start,
+	.next  = table_seq_next,
+	.stop  = table_seq_stop,
+	.show  = table_seq_show,
+};
+
 static const struct file_operations format1_fops;
 static const struct file_operations format2_fops;
 static const struct file_operations format3_fops;
 static const struct file_operations format4_fops;
+static const struct file_operations format5_fops;
 
 static int table_open1(struct inode *inode, struct file *file)
 {
@@ -683,7 +748,21 @@ static int table_open4(struct inode *inode, struct file *file)
 	struct seq_file *seq;
 	int ret;
 
-	ret = seq_open(file, &format4_seq_ops);
+	ret = seq_open(file, &format5_seq_ops);
+	if (ret)
+		return ret;
+
+	seq = file->private_data;
+	seq->private = inode->i_private; /* the dlm_ls */
+	return 0;
+}
+
+static int table_open5(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int ret;
+
+	ret = seq_open(file, &format5_seq_ops);
 	if (ret)
 		return ret;
 
@@ -725,6 +804,14 @@ static const struct file_operations format4_fops = {
 	.release = seq_release
 };
 
+static const struct file_operations format5_fops = {
+	.owner   = THIS_MODULE,
+	.open    = table_open5,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
 /*
  * dump lkb's on the ls_waiters list
  */
@@ -793,6 +880,7 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
 	debugfs_remove(ls->ls_debug_locks_dentry);
 	debugfs_remove(ls->ls_debug_all_dentry);
 	debugfs_remove(ls->ls_debug_toss_dentry);
+	debugfs_remove(ls->ls_debug_queued_asts_dentry);
 }
 
 static int dlm_state_show(struct seq_file *file, void *offset)
@@ -936,6 +1024,17 @@ void dlm_create_debug_file(struct dlm_ls *ls)
 							  dlm_root,
 							  ls,
 							  &waiters_fops);
+
+	/* format 5 */
+
+	memset(name, 0, sizeof(name));
+	snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_queued_asts", ls->ls_name);
+
+	ls->ls_debug_queued_asts_dentry = debugfs_create_file(name,
+							      0644,
+							      dlm_root,
+							      ls,
+							      &format5_fops);
 }
 
 void __init dlm_register_debugfs(void)
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index fb1981654bb24..f6acba4310a7b 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -58,7 +58,7 @@ void dlm_recover_dir_nodeid(struct dlm_ls *ls)
 	up_read(&ls->ls_root_sem);
 }
 
-int dlm_recover_directory(struct dlm_ls *ls)
+int dlm_recover_directory(struct dlm_ls *ls, uint64_t seq)
 {
 	struct dlm_member *memb;
 	char *b, *last_name = NULL;
@@ -90,7 +90,7 @@ int dlm_recover_directory(struct dlm_ls *ls)
 			}
 
 			error = dlm_rcom_names(ls, memb->nodeid,
-					       last_name, last_len);
+					       last_name, last_len, seq);
 			if (error)
 				goto out_free;
 
@@ -196,7 +196,8 @@ int dlm_recover_directory(struct dlm_ls *ls)
 	return error;
 }
 
-static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
+static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, const char *name,
+				     int len)
 {
 	struct dlm_rsb *r;
 	uint32_t hash, bucket;
@@ -232,7 +233,7 @@ static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
    for rsb's we're master of and whose directory node matches the requesting
    node.  inbuf is the rsb name last sent, inlen is the name's length */
 
-void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
+void dlm_copy_master_names(struct dlm_ls *ls, const char *inbuf, int inlen,
  			   char *outbuf, int outlen, int nodeid)
 {
 	struct list_head *list;
@@ -245,9 +246,8 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
 	if (inlen > 1) {
 		r = find_rsb_root(ls, inbuf, inlen);
 		if (!r) {
-			inbuf[inlen - 1] = '\0';
-			log_error(ls, "copy_master_names from %d start %d %s",
-				  nodeid, inlen, inbuf);
+			log_error(ls, "copy_master_names from %d start %d %.*s",
+				  nodeid, inlen, inlen, inbuf);
 			goto out;
 		}
 		list = r->res_root_list.next;
diff --git a/fs/dlm/dir.h b/fs/dlm/dir.h
index 03844d086be23..39ecb69d7ef36 100644
--- a/fs/dlm/dir.h
+++ b/fs/dlm/dir.h
@@ -15,9 +15,9 @@
 int dlm_dir_nodeid(struct dlm_rsb *rsb);
 int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash);
 void dlm_recover_dir_nodeid(struct dlm_ls *ls);
-int dlm_recover_directory(struct dlm_ls *ls);
-void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
-	char *outbuf, int outlen, int nodeid);
+int dlm_recover_directory(struct dlm_ls *ls, uint64_t seq);
+void dlm_copy_master_names(struct dlm_ls *ls, const char *inbuf, int inlen,
+			   char *outbuf, int outlen, int nodeid);
 
 #endif				/* __DIR_DOT_H__ */
 
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index c8156770205e6..dfc444dad3298 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -598,6 +598,7 @@ struct dlm_ls {
 	struct dentry		*ls_debug_locks_dentry; /* debugfs */
 	struct dentry		*ls_debug_all_dentry; /* debugfs */
 	struct dentry		*ls_debug_toss_dentry; /* debugfs */
+	struct dentry		*ls_debug_queued_asts_dentry; /* debugfs */
 
 	wait_queue_head_t	ls_uevent_wait;	/* user part of join/leave */
 	int			ls_uevent_result;
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index f511a9d7d416e..652c51fbbf768 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -86,8 +86,8 @@ static int send_remove(struct dlm_rsb *r);
 static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
-				    struct dlm_message *ms, bool local);
-static int receive_extralen(struct dlm_message *ms);
+				    const struct dlm_message *ms, bool local);
+static int receive_extralen(const struct dlm_message *ms);
 static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
 static void toss_rsb(struct kref *kref);
 
@@ -984,8 +984,8 @@ static void __dlm_master_lookup(struct dlm_ls *ls, struct dlm_rsb *r, int our_no
  * . dlm_master_lookup RECOVER_MASTER (fix_master 1, from_master 0)
  */
 
-int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
-		      unsigned int flags, int *r_nodeid, int *result)
+int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
+		      int len, unsigned int flags, int *r_nodeid, int *result)
 {
 	struct dlm_rsb *r = NULL;
 	uint32_t hash, b;
@@ -1106,7 +1106,7 @@ static void dlm_dump_rsb_hash(struct dlm_ls *ls, uint32_t hash)
 	}
 }
 
-void dlm_dump_rsb_name(struct dlm_ls *ls, char *name, int len)
+void dlm_dump_rsb_name(struct dlm_ls *ls, const char *name, int len)
 {
 	struct dlm_rsb *r = NULL;
 	uint32_t hash, b;
@@ -1459,7 +1459,7 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
    set RESEND and dlm_recover_waiters_post() */
 
 static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
-				struct dlm_message *ms)
+				const struct dlm_message *ms)
 {
 	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
 	int overlap_done = 0;
@@ -1557,8 +1557,8 @@ static int remove_from_waiters(struct dlm_lkb *lkb, int mstype)
 /* Handles situations where we might be processing a "fake" or "local" reply in
    which we can't try to take waiters_mutex again. */
 
-static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms,
-				  bool local)
+static int remove_from_waiters_ms(struct dlm_lkb *lkb,
+				  const struct dlm_message *ms, bool local)
 {
 	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
 	int error;
@@ -1800,7 +1800,7 @@ static void set_lvb_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
 /* lkb is process copy (pc) */
 
 static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb,
-			    struct dlm_message *ms)
+			    const struct dlm_message *ms)
 {
 	int b;
 
@@ -1907,7 +1907,7 @@ static void grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
 }
 
 static void grant_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb,
-			  struct dlm_message *ms)
+			  const struct dlm_message *ms)
 {
 	set_lvb_lock_pc(r, lkb, ms);
 	_grant_lock(r, lkb);
@@ -1945,7 +1945,7 @@ static void munge_demoted(struct dlm_lkb *lkb)
 	lkb->lkb_grmode = DLM_LOCK_NL;
 }
 
-static void munge_altmode(struct dlm_lkb *lkb, struct dlm_message *ms)
+static void munge_altmode(struct dlm_lkb *lkb, const struct dlm_message *ms)
 {
 	if (ms->m_type != cpu_to_le32(DLM_MSG_REQUEST_REPLY) &&
 	    ms->m_type != cpu_to_le32(DLM_MSG_GRANT)) {
@@ -3641,8 +3641,9 @@ static int send_cancel_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
 	return send_common_reply(r, lkb, DLM_MSG_CANCEL_REPLY, rv);
 }
 
-static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in,
-			     int ret_nodeid, int rv)
+static int send_lookup_reply(struct dlm_ls *ls,
+			     const struct dlm_message *ms_in, int ret_nodeid,
+			     int rv)
 {
 	struct dlm_rsb *r = &ls->ls_local_rsb;
 	struct dlm_message *ms;
@@ -3667,14 +3668,15 @@ static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in,
    of message, unlike the send side where we can safely send everything about
    the lkb for any type of message */
 
-static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
+static void receive_flags(struct dlm_lkb *lkb, const struct dlm_message *ms)
 {
 	lkb->lkb_exflags = le32_to_cpu(ms->m_exflags);
 	dlm_set_sbflags_val(lkb, le32_to_cpu(ms->m_sbflags));
 	dlm_set_dflags_val(lkb, le32_to_cpu(ms->m_flags));
 }
 
-static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms,
+static void receive_flags_reply(struct dlm_lkb *lkb,
+				const struct dlm_message *ms,
 				bool local)
 {
 	if (local)
@@ -3684,14 +3686,14 @@ static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms,
 	dlm_set_dflags_val(lkb, le32_to_cpu(ms->m_flags));
 }
 
-static int receive_extralen(struct dlm_message *ms)
+static int receive_extralen(const struct dlm_message *ms)
 {
 	return (le16_to_cpu(ms->m_header.h_length) -
 		sizeof(struct dlm_message));
 }
 
 static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb,
-		       struct dlm_message *ms)
+		       const struct dlm_message *ms)
 {
 	int len;
 
@@ -3719,7 +3721,7 @@ static void fake_astfn(void *astparam)
 }
 
 static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
-				struct dlm_message *ms)
+				const struct dlm_message *ms)
 {
 	lkb->lkb_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
 	lkb->lkb_ownpid = le32_to_cpu(ms->m_pid);
@@ -3741,7 +3743,7 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
 }
 
 static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
-				struct dlm_message *ms)
+				const struct dlm_message *ms)
 {
 	if (lkb->lkb_status != DLM_LKSTS_GRANTED)
 		return -EBUSY;
@@ -3756,7 +3758,7 @@ static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
 }
 
 static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
-			       struct dlm_message *ms)
+			       const struct dlm_message *ms)
 {
 	if (receive_lvb(ls, lkb, ms))
 		return -ENOMEM;
@@ -3766,7 +3768,7 @@ static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
 /* We fill in the local-lkb fields with the info that send_xxxx_reply()
    uses to send a reply and that the remote end uses to process the reply. */
 
-static void setup_local_lkb(struct dlm_ls *ls, struct dlm_message *ms)
+static void setup_local_lkb(struct dlm_ls *ls, const struct dlm_message *ms)
 {
 	struct dlm_lkb *lkb = &ls->ls_local_lkb;
 	lkb->lkb_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
@@ -3776,7 +3778,7 @@ static void setup_local_lkb(struct dlm_ls *ls, struct dlm_message *ms)
 /* This is called after the rsb is locked so that we can safely inspect
    fields in the lkb. */
 
-static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
+static int validate_message(struct dlm_lkb *lkb, const struct dlm_message *ms)
 {
 	int from = le32_to_cpu(ms->m_header.h_nodeid);
 	int error = 0;
@@ -3828,7 +3830,7 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
 	return error;
 }
 
-static int receive_request(struct dlm_ls *ls, struct dlm_message *ms)
+static int receive_request(struct dlm_ls *ls, const struct dlm_message *ms)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_rsb *r;
@@ -3907,7 +3909,7 @@ static int receive_request(struct dlm_ls *ls, struct dlm_message *ms)
 	return error;
 }
 
-static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
+static int receive_convert(struct dlm_ls *ls, const struct dlm_message *ms)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_rsb *r;
@@ -3963,7 +3965,7 @@ static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
 	return error;
 }
 
-static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
+static int receive_unlock(struct dlm_ls *ls, const struct dlm_message *ms)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_rsb *r;
@@ -4015,7 +4017,7 @@ static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
 	return error;
 }
 
-static int receive_cancel(struct dlm_ls *ls, struct dlm_message *ms)
+static int receive_cancel(struct dlm_ls *ls, const struct dlm_message *ms)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_rsb *r;
@@ -4051,7 +4053,7 @@ static int receive_cancel(struct dlm_ls *ls, struct dlm_message *ms)
 	return error;
 }
 
-static int receive_grant(struct dlm_ls *ls, struct dlm_message *ms)
+static int receive_grant(struct dlm_ls *ls, const struct dlm_message *ms)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_rsb *r;
@@ -4082,7 +4084,7 @@ static int receive_grant(struct dlm_ls *ls, struct dlm_message *ms)
 	return 0;
 }
 
-static int receive_bast(struct dlm_ls *ls, struct dlm_message *ms)
+static int receive_bast(struct dlm_ls *ls, const struct dlm_message *ms)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_rsb *r;
@@ -4110,7 +4112,7 @@ static int receive_bast(struct dlm_ls *ls, struct dlm_message *ms)
 	return 0;
 }
 
-static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms)
+static void receive_lookup(struct dlm_ls *ls, const struct dlm_message *ms)
 {
 	int len, error, ret_nodeid, from_nodeid, our_nodeid;
 
@@ -4130,7 +4132,7 @@ static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms)
 	send_lookup_reply(ls, ms, ret_nodeid, error);
 }
 
-static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms)
+static void receive_remove(struct dlm_ls *ls, const struct dlm_message *ms)
 {
 	char name[DLM_RESNAME_MAXLEN+1];
 	struct dlm_rsb *r;
@@ -4218,12 +4220,13 @@ static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms)
 	}
 }
 
-static void receive_purge(struct dlm_ls *ls, struct dlm_message *ms)
+static void receive_purge(struct dlm_ls *ls, const struct dlm_message *ms)
 {
 	do_purge(ls, le32_to_cpu(ms->m_nodeid), le32_to_cpu(ms->m_pid));
 }
 
-static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
+static int receive_request_reply(struct dlm_ls *ls,
+				 const struct dlm_message *ms)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_rsb *r;
@@ -4345,7 +4348,7 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
 }
 
 static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
-				    struct dlm_message *ms, bool local)
+				    const struct dlm_message *ms, bool local)
 {
 	/* this is the value returned from do_convert() on the master */
 	switch (from_dlm_errno(le32_to_cpu(ms->m_result))) {
@@ -4388,8 +4391,8 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
 	}
 }
 
-static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms,
-				   bool local)
+static void _receive_convert_reply(struct dlm_lkb *lkb,
+				   const struct dlm_message *ms, bool local)
 {
 	struct dlm_rsb *r = lkb->lkb_resource;
 	int error;
@@ -4412,7 +4415,8 @@ static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms,
 	put_rsb(r);
 }
 
-static int receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms)
+static int receive_convert_reply(struct dlm_ls *ls,
+				 const struct dlm_message *ms)
 {
 	struct dlm_lkb *lkb;
 	int error;
@@ -4426,8 +4430,8 @@ static int receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms)
 	return 0;
 }
 
-static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms,
-				  bool local)
+static void _receive_unlock_reply(struct dlm_lkb *lkb,
+				  const struct dlm_message *ms, bool local)
 {
 	struct dlm_rsb *r = lkb->lkb_resource;
 	int error;
@@ -4463,7 +4467,8 @@ static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms,
 	put_rsb(r);
 }
 
-static int receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms)
+static int receive_unlock_reply(struct dlm_ls *ls,
+				const struct dlm_message *ms)
 {
 	struct dlm_lkb *lkb;
 	int error;
@@ -4477,8 +4482,8 @@ static int receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms)
 	return 0;
 }
 
-static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms,
-				  bool local)
+static void _receive_cancel_reply(struct dlm_lkb *lkb,
+				  const struct dlm_message *ms, bool local)
 {
 	struct dlm_rsb *r = lkb->lkb_resource;
 	int error;
@@ -4515,7 +4520,8 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms,
 	put_rsb(r);
 }
 
-static int receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms)
+static int receive_cancel_reply(struct dlm_ls *ls,
+				const struct dlm_message *ms)
 {
 	struct dlm_lkb *lkb;
 	int error;
@@ -4529,7 +4535,8 @@ static int receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms)
 	return 0;
 }
 
-static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
+static void receive_lookup_reply(struct dlm_ls *ls,
+				 const struct dlm_message *ms)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_rsb *r;
@@ -4608,7 +4615,7 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
 	dlm_put_lkb(lkb);
 }
 
-static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms,
+static void _receive_message(struct dlm_ls *ls, const struct dlm_message *ms,
 			     uint32_t saved_seq)
 {
 	int error = 0, noent = 0;
@@ -4744,7 +4751,7 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms,
    requestqueue, to processing all the saved messages, to processing new
    messages as they arrive. */
 
-static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms,
+static void dlm_receive_message(struct dlm_ls *ls, const struct dlm_message *ms,
 				int nodeid)
 {
 	if (dlm_locking_stopped(ls)) {
@@ -4767,7 +4774,7 @@ static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms,
 /* This is called by dlm_recoverd to process messages that were saved on
    the requestqueue. */
 
-void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms,
+void dlm_receive_message_saved(struct dlm_ls *ls, const struct dlm_message *ms,
 			       uint32_t saved_seq)
 {
 	_receive_message(ls, ms, saved_seq);
@@ -4778,9 +4785,9 @@ void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms,
    standard locking activity) or an RCOM (recovery message sent as part of
    lockspace recovery). */
 
-void dlm_receive_buffer(union dlm_packet *p, int nodeid)
+void dlm_receive_buffer(const union dlm_packet *p, int nodeid)
 {
-	struct dlm_header *hd = &p->header;
+	const struct dlm_header *hd = &p->header;
 	struct dlm_ls *ls;
 	int type = 0;
 
@@ -5334,7 +5341,7 @@ static struct dlm_lkb *search_remid(struct dlm_rsb *r, int nodeid,
 
 /* needs at least dlm_rcom + rcom_lock */
 static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
-				  struct dlm_rsb *r, struct dlm_rcom *rc)
+				  struct dlm_rsb *r, const struct dlm_rcom *rc)
 {
 	struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
 
@@ -5384,7 +5391,8 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
    back the rcom_lock struct we got but with the remid field filled in. */
 
 /* needs at least dlm_rcom + rcom_lock */
-int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
+int dlm_recover_master_copy(struct dlm_ls *ls, const struct dlm_rcom *rc,
+			    __le32 *rl_remid, __le32 *rl_result)
 {
 	struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
 	struct dlm_rsb *r;
@@ -5393,6 +5401,9 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
 	int from_nodeid = le32_to_cpu(rc->rc_header.h_nodeid);
 	int error;
 
+	/* init rl_remid with rcom lock rl_remid */
+	*rl_remid = rl->rl_remid;
+
 	if (rl->rl_parent_lkid) {
 		error = -EOPNOTSUPP;
 		goto out;
@@ -5448,7 +5459,7 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
  out_remid:
 	/* this is the new value returned to the lock holder for
 	   saving in its process-copy lkb */
-	rl->rl_remid = cpu_to_le32(lkb->lkb_id);
+	*rl_remid = cpu_to_le32(lkb->lkb_id);
 
 	lkb->lkb_recover_seq = ls->ls_recover_seq;
 
@@ -5459,12 +5470,13 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
 	if (error && error != -EEXIST)
 		log_rinfo(ls, "dlm_recover_master_copy remote %d %x error %d",
 			  from_nodeid, remid, error);
-	rl->rl_result = cpu_to_le32(error);
+	*rl_result = cpu_to_le32(error);
 	return error;
 }
 
 /* needs at least dlm_rcom + rcom_lock */
-int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
+int dlm_recover_process_copy(struct dlm_ls *ls, const struct dlm_rcom *rc,
+			     uint64_t seq)
 {
 	struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
 	struct dlm_rsb *r;
@@ -5509,7 +5521,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
 			  lkid, le32_to_cpu(rc->rc_header.h_nodeid), remid,
 			  result);
 	
-		dlm_send_rcom_lock(r, lkb);
+		dlm_send_rcom_lock(r, lkb, seq);
 		goto out;
 	case -EEXIST:
 	case 0:
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index aa5ad44d902bf..b54e2cbbe6e27 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -12,11 +12,11 @@
 #define __LOCK_DOT_H__
 
 void dlm_dump_rsb(struct dlm_rsb *r);
-void dlm_dump_rsb_name(struct dlm_ls *ls, char *name, int len);
+void dlm_dump_rsb_name(struct dlm_ls *ls, const char *name, int len);
 void dlm_print_lkb(struct dlm_lkb *lkb);
-void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms,
+void dlm_receive_message_saved(struct dlm_ls *ls, const struct dlm_message *ms,
 			       uint32_t saved_seq);
-void dlm_receive_buffer(union dlm_packet *p, int nodeid);
+void dlm_receive_buffer(const union dlm_packet *p, int nodeid);
 int dlm_modes_compat(int mode1, int mode2);
 void dlm_put_rsb(struct dlm_rsb *r);
 void dlm_hold_rsb(struct dlm_rsb *r);
@@ -25,8 +25,8 @@ void dlm_scan_rsbs(struct dlm_ls *ls);
 int dlm_lock_recovery_try(struct dlm_ls *ls);
 void dlm_unlock_recovery(struct dlm_ls *ls);
 
-int dlm_master_lookup(struct dlm_ls *ls, int nodeid, char *name, int len,
-		      unsigned int flags, int *r_nodeid, int *result);
+int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
+		      int len, unsigned int flags, int *r_nodeid, int *result);
 
 int dlm_search_rsb_tree(struct rb_root *tree, const void *name, int len,
 			struct dlm_rsb **r_ret);
@@ -36,8 +36,10 @@ void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
 void dlm_recover_grant(struct dlm_ls *ls);
 int dlm_recover_waiters_post(struct dlm_ls *ls);
 void dlm_recover_waiters_pre(struct dlm_ls *ls);
-int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
-int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
+int dlm_recover_master_copy(struct dlm_ls *ls, const struct dlm_rcom *rc,
+			    __le32 *rl_remid, __le32 *rl_result);
+int dlm_recover_process_copy(struct dlm_ls *ls, const struct dlm_rcom *rc,
+			     uint64_t seq);
 
 int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
 	uint32_t flags, void *name, unsigned int namelen);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 9f14ea9f63224..f7bc22e74db27 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -863,7 +863,6 @@ struct dlm_processed_nodes {
 static void process_dlm_messages(struct work_struct *work)
 {
 	struct processqueue_entry *pentry;
-	LIST_HEAD(processed_nodes);
 
 	spin_lock(&processqueue_lock);
 	pentry = list_first_entry_or_null(&processqueue,
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 77d202e4a02a4..be7909ead71b4 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -18,7 +18,7 @@
 #include "midcomms.h"
 #include "lowcomms.h"
 
-int dlm_slots_version(struct dlm_header *h)
+int dlm_slots_version(const struct dlm_header *h)
 {
 	if ((le32_to_cpu(h->h_version) & 0x0000FFFF) < DLM_HEADER_SLOTS)
 		return 0;
@@ -393,14 +393,9 @@ static void remove_remote_member(int nodeid)
 	dlm_midcomms_remove_member(nodeid);
 }
 
-static void clear_members_cb(int nodeid)
-{
-	remove_remote_member(nodeid);
-}
-
 void dlm_clear_members(struct dlm_ls *ls)
 {
-	clear_memb_list(&ls->ls_nodes, clear_members_cb);
+	clear_memb_list(&ls->ls_nodes, remove_remote_member);
 	ls->ls_num_nodes = 0;
 }
 
@@ -454,7 +449,7 @@ static void make_member_array(struct dlm_ls *ls)
 
 /* send a status request to all members just to establish comms connections */
 
-static int ping_members(struct dlm_ls *ls)
+static int ping_members(struct dlm_ls *ls, uint64_t seq)
 {
 	struct dlm_member *memb;
 	int error = 0;
@@ -464,7 +459,7 @@ static int ping_members(struct dlm_ls *ls)
 			error = -EINTR;
 			break;
 		}
-		error = dlm_rcom_status(ls, memb->nodeid, 0);
+		error = dlm_rcom_status(ls, memb->nodeid, 0, seq);
 		if (error)
 			break;
 	}
@@ -612,7 +607,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
 	make_member_array(ls);
 	*neg_out = neg;
 
-	error = ping_members(ls);
+	error = ping_members(ls, rv->seq);
 	log_rinfo(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes);
 	return error;
 }
diff --git a/fs/dlm/member.h b/fs/dlm/member.h
index 433b2fac9f4a7..f61cfde463140 100644
--- a/fs/dlm/member.h
+++ b/fs/dlm/member.h
@@ -18,7 +18,7 @@ void dlm_clear_members_gone(struct dlm_ls *ls);
 int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out);
 int dlm_is_removed(struct dlm_ls *ls, int nodeid);
 int dlm_is_member(struct dlm_ls *ls, int nodeid);
-int dlm_slots_version(struct dlm_header *h);
+int dlm_slots_version(const struct dlm_header *h);
 void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc,
 		   struct dlm_member *memb);
 void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc);
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index e1a0df67b5669..f641b36a36db0 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -330,18 +330,23 @@ static void midcomms_node_reset(struct midcomms_node *node)
 	wake_up(&node->shutdown_wait);
 }
 
-static struct midcomms_node *nodeid2node(int nodeid, gfp_t alloc)
+static struct midcomms_node *nodeid2node(int nodeid)
 {
-	struct midcomms_node *node, *tmp;
-	int r = nodeid_hash(nodeid);
+	return __find_node(nodeid, nodeid_hash(nodeid));
+}
+
+int dlm_midcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
+{
+	int ret, r = nodeid_hash(nodeid);
+	struct midcomms_node *node;
 
-	node = __find_node(nodeid, r);
-	if (node || !alloc)
-		return node;
+	ret = dlm_lowcomms_addr(nodeid, addr, len);
+	if (ret)
+		return ret;
 
-	node = kmalloc(sizeof(*node), alloc);
+	node = kmalloc(sizeof(*node), GFP_NOFS);
 	if (!node)
-		return NULL;
+		return -ENOMEM;
 
 	node->nodeid = nodeid;
 	spin_lock_init(&node->state_lock);
@@ -353,21 +358,11 @@ static struct midcomms_node *nodeid2node(int nodeid, gfp_t alloc)
 	midcomms_node_reset(node);
 
 	spin_lock(&nodes_lock);
-	/* check again if there was somebody else
-	 * earlier here to add the node
-	 */
-	tmp = __find_node(nodeid, r);
-	if (tmp) {
-		spin_unlock(&nodes_lock);
-		kfree(node);
-		return tmp;
-	}
-
 	hlist_add_head_rcu(&node->hlist, &node_hash[r]);
 	spin_unlock(&nodes_lock);
 
 	node->debugfs = dlm_create_debug_comms_file(nodeid, node);
-	return node;
+	return 0;
 }
 
 static int dlm_send_ack(int nodeid, uint32_t seq)
@@ -499,7 +494,8 @@ static void dlm_pas_fin_ack_rcv(struct midcomms_node *node)
 	spin_unlock(&node->state_lock);
 }
 
-static void dlm_receive_buffer_3_2_trace(uint32_t seq, union dlm_packet *p)
+static void dlm_receive_buffer_3_2_trace(uint32_t seq,
+					 const union dlm_packet *p)
 {
 	switch (p->header.h_cmd) {
 	case DLM_MSG:
@@ -513,7 +509,7 @@ static void dlm_receive_buffer_3_2_trace(uint32_t seq, union dlm_packet *p)
 	}
 }
 
-static void dlm_midcomms_receive_buffer(union dlm_packet *p,
+static void dlm_midcomms_receive_buffer(const union dlm_packet *p,
 					struct midcomms_node *node,
 					uint32_t seq)
 {
@@ -602,113 +598,8 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 	}
 }
 
-static struct midcomms_node *
-dlm_midcomms_recv_node_lookup(int nodeid, const union dlm_packet *p,
-			      uint16_t msglen, int (*cb)(struct midcomms_node *node))
-{
-	struct midcomms_node *node = NULL;
-	gfp_t allocation = 0;
-	int ret;
-
-	switch (p->header.h_cmd) {
-	case DLM_RCOM:
-		if (msglen < sizeof(struct dlm_rcom)) {
-			log_print("rcom msg too small: %u, will skip this message from node %d",
-				  msglen, nodeid);
-			return NULL;
-		}
-
-		switch (p->rcom.rc_type) {
-		case cpu_to_le32(DLM_RCOM_NAMES):
-			fallthrough;
-		case cpu_to_le32(DLM_RCOM_NAMES_REPLY):
-			fallthrough;
-		case cpu_to_le32(DLM_RCOM_STATUS):
-			fallthrough;
-		case cpu_to_le32(DLM_RCOM_STATUS_REPLY):
-			node = nodeid2node(nodeid, 0);
-			if (node) {
-				spin_lock(&node->state_lock);
-				if (node->state != DLM_ESTABLISHED)
-					pr_debug("receive begin RCOM msg from node %d with state %s\n",
-						 node->nodeid, dlm_state_str(node->state));
-
-				switch (node->state) {
-				case DLM_CLOSED:
-					node->state = DLM_ESTABLISHED;
-					pr_debug("switch node %d to state %s\n",
-						 node->nodeid, dlm_state_str(node->state));
-					break;
-				case DLM_ESTABLISHED:
-					break;
-				default:
-					spin_unlock(&node->state_lock);
-					return NULL;
-				}
-				spin_unlock(&node->state_lock);
-			}
-
-			allocation = GFP_NOFS;
-			break;
-		default:
-			break;
-		}
-
-		break;
-	default:
-		break;
-	}
-
-	node = nodeid2node(nodeid, allocation);
-	if (!node) {
-		switch (p->header.h_cmd) {
-		case DLM_OPTS:
-			if (msglen < sizeof(struct dlm_opts)) {
-				log_print("opts msg too small: %u, will skip this message from node %d",
-					  msglen, nodeid);
-				return NULL;
-			}
-
-			log_print_ratelimited("received dlm opts message nextcmd %d from node %d in an invalid sequence",
-					      p->opts.o_nextcmd, nodeid);
-			break;
-		default:
-			log_print_ratelimited("received dlm message cmd %d from node %d in an invalid sequence",
-					      p->header.h_cmd, nodeid);
-			break;
-		}
-
-		return NULL;
-	}
-
-	ret = cb(node);
-	if (ret < 0)
-		return NULL;
-
-	return node;
-}
-
-static int dlm_midcomms_version_check_3_2(struct midcomms_node *node)
-{
-	switch (node->version) {
-	case DLM_VERSION_NOT_SET:
-		node->version = DLM_VERSION_3_2;
-		wake_up(&node->shutdown_wait);
-		log_print("version 0x%08x for node %d detected", DLM_VERSION_3_2,
-			  node->nodeid);
-		break;
-	case DLM_VERSION_3_2:
-		break;
-	default:
-		log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x",
-				      DLM_VERSION_3_2, node->nodeid, node->version);
-		return -1;
-	}
-
-	return 0;
-}
-
-static int dlm_opts_check_msglen(union dlm_packet *p, uint16_t msglen, int nodeid)
+static int dlm_opts_check_msglen(const union dlm_packet *p, uint16_t msglen,
+				 int nodeid)
 {
 	int len = msglen;
 
@@ -757,7 +648,7 @@ static int dlm_opts_check_msglen(union dlm_packet *p, uint16_t msglen, int nodei
 	return 0;
 }
 
-static void dlm_midcomms_receive_buffer_3_2(union dlm_packet *p, int nodeid)
+static void dlm_midcomms_receive_buffer_3_2(const union dlm_packet *p, int nodeid)
 {
 	uint16_t msglen = le16_to_cpu(p->header.h_length);
 	struct midcomms_node *node;
@@ -765,10 +656,37 @@ static void dlm_midcomms_receive_buffer_3_2(union dlm_packet *p, int nodeid)
 	int ret, idx;
 
 	idx = srcu_read_lock(&nodes_srcu);
-	node = dlm_midcomms_recv_node_lookup(nodeid, p, msglen,
-					     dlm_midcomms_version_check_3_2);
-	if (!node)
+	node = nodeid2node(nodeid);
+	if (WARN_ON_ONCE(!node))
+		goto out;
+
+	switch (node->version) {
+	case DLM_VERSION_NOT_SET:
+		node->version = DLM_VERSION_3_2;
+		wake_up(&node->shutdown_wait);
+		log_print("version 0x%08x for node %d detected", DLM_VERSION_3_2,
+			  node->nodeid);
+
+		spin_lock(&node->state_lock);
+		switch (node->state) {
+		case DLM_CLOSED:
+			node->state = DLM_ESTABLISHED;
+			pr_debug("switch node %d to state %s\n",
+				 node->nodeid, dlm_state_str(node->state));
+			break;
+		default:
+			break;
+		}
+		spin_unlock(&node->state_lock);
+
+		break;
+	case DLM_VERSION_3_2:
+		break;
+	default:
+		log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x",
+				      DLM_VERSION_3_2, node->nodeid, node->version);
 		goto out;
+	}
 
 	switch (p->header.h_cmd) {
 	case DLM_RCOM:
@@ -858,8 +776,19 @@ static void dlm_midcomms_receive_buffer_3_2(union dlm_packet *p, int nodeid)
 	srcu_read_unlock(&nodes_srcu, idx);
 }
 
-static int dlm_midcomms_version_check_3_1(struct midcomms_node *node)
+static void dlm_midcomms_receive_buffer_3_1(const union dlm_packet *p, int nodeid)
 {
+	uint16_t msglen = le16_to_cpu(p->header.h_length);
+	struct midcomms_node *node;
+	int idx;
+
+	idx = srcu_read_lock(&nodes_srcu);
+	node = nodeid2node(nodeid);
+	if (WARN_ON_ONCE(!node)) {
+		srcu_read_unlock(&nodes_srcu, idx);
+		return;
+	}
+
 	switch (node->version) {
 	case DLM_VERSION_NOT_SET:
 		node->version = DLM_VERSION_3_1;
@@ -872,22 +801,6 @@ static int dlm_midcomms_version_check_3_1(struct midcomms_node *node)
 	default:
 		log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x",
 				      DLM_VERSION_3_1, node->nodeid, node->version);
-		return -1;
-	}
-
-	return 0;
-}
-
-static void dlm_midcomms_receive_buffer_3_1(union dlm_packet *p, int nodeid)
-{
-	uint16_t msglen = le16_to_cpu(p->header.h_length);
-	struct midcomms_node *node;
-	int idx;
-
-	idx = srcu_read_lock(&nodes_srcu);
-	node = dlm_midcomms_recv_node_lookup(nodeid, p, msglen,
-					     dlm_midcomms_version_check_3_1);
-	if (!node) {
 		srcu_read_unlock(&nodes_srcu, idx);
 		return;
 	}
@@ -977,10 +890,10 @@ int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
 
 		switch (hd->h_version) {
 		case cpu_to_le32(DLM_VERSION_3_1):
-			dlm_midcomms_receive_buffer_3_1((union dlm_packet *)ptr, nodeid);
+			dlm_midcomms_receive_buffer_3_1((const union dlm_packet *)ptr, nodeid);
 			break;
 		case cpu_to_le32(DLM_VERSION_3_2):
-			dlm_midcomms_receive_buffer_3_2((union dlm_packet *)ptr, nodeid);
+			dlm_midcomms_receive_buffer_3_2((const union dlm_packet *)ptr, nodeid);
 			break;
 		default:
 			log_print("received invalid version header: %u from node %d, will skip this message",
@@ -1003,8 +916,8 @@ void dlm_midcomms_unack_msg_resend(int nodeid)
 	int idx, ret;
 
 	idx = srcu_read_lock(&nodes_srcu);
-	node = nodeid2node(nodeid, 0);
-	if (!node) {
+	node = nodeid2node(nodeid);
+	if (WARN_ON_ONCE(!node)) {
 		srcu_read_unlock(&nodes_srcu, idx);
 		return;
 	}
@@ -1090,11 +1003,9 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
 	int idx;
 
 	idx = srcu_read_lock(&nodes_srcu);
-	node = nodeid2node(nodeid, 0);
-	if (!node) {
-		WARN_ON_ONCE(1);
+	node = nodeid2node(nodeid);
+	if (WARN_ON_ONCE(!node))
 		goto err;
-	}
 
 	/* this is a bug, however we going on and hope it will be resolved */
 	WARN_ON_ONCE(test_bit(DLM_NODE_FLAG_STOP_TX, &node->flags));
@@ -1235,8 +1146,34 @@ void dlm_midcomms_init(void)
 	dlm_lowcomms_init();
 }
 
+static void midcomms_node_release(struct rcu_head *rcu)
+{
+	struct midcomms_node *node = container_of(rcu, struct midcomms_node, rcu);
+
+	WARN_ON_ONCE(atomic_read(&node->send_queue_cnt));
+	dlm_send_queue_flush(node);
+	kfree(node);
+}
+
 void dlm_midcomms_exit(void)
 {
+	struct midcomms_node *node;
+	int i, idx;
+
+	idx = srcu_read_lock(&nodes_srcu);
+	for (i = 0; i < CONN_HASH_SIZE; i++) {
+		hlist_for_each_entry_rcu(node, &node_hash[i], hlist) {
+			dlm_delete_debug_comms_file(node->debugfs);
+
+			spin_lock(&nodes_lock);
+			hlist_del_rcu(&node->hlist);
+			spin_unlock(&nodes_lock);
+
+			call_srcu(&nodes_srcu, &node->rcu, midcomms_node_release);
+		}
+	}
+	srcu_read_unlock(&nodes_srcu, idx);
+
 	dlm_lowcomms_exit();
 }
 
@@ -1277,8 +1214,8 @@ void dlm_midcomms_add_member(int nodeid)
 	int idx;
 
 	idx = srcu_read_lock(&nodes_srcu);
-	node = nodeid2node(nodeid, GFP_NOFS);
-	if (!node) {
+	node = nodeid2node(nodeid);
+	if (WARN_ON_ONCE(!node)) {
 		srcu_read_unlock(&nodes_srcu, idx);
 		return;
 	}
@@ -1322,8 +1259,8 @@ void dlm_midcomms_remove_member(int nodeid)
 	int idx;
 
 	idx = srcu_read_lock(&nodes_srcu);
-	node = nodeid2node(nodeid, 0);
-	if (!node) {
+	node = nodeid2node(nodeid);
+	if (WARN_ON_ONCE(!node)) {
 		srcu_read_unlock(&nodes_srcu, idx);
 		return;
 	}
@@ -1367,15 +1304,6 @@ void dlm_midcomms_remove_member(int nodeid)
 	srcu_read_unlock(&nodes_srcu, idx);
 }
 
-static void midcomms_node_release(struct rcu_head *rcu)
-{
-	struct midcomms_node *node = container_of(rcu, struct midcomms_node, rcu);
-
-	WARN_ON_ONCE(atomic_read(&node->send_queue_cnt));
-	dlm_send_queue_flush(node);
-	kfree(node);
-}
-
 void dlm_midcomms_version_wait(void)
 {
 	struct midcomms_node *node;
@@ -1438,7 +1366,7 @@ static void midcomms_shutdown(struct midcomms_node *node)
 				 node->state == DLM_CLOSED ||
 				 test_bit(DLM_NODE_FLAG_CLOSE, &node->flags),
 				 DLM_SHUTDOWN_TIMEOUT);
-	if (!ret || test_bit(DLM_NODE_FLAG_CLOSE, &node->flags))
+	if (!ret)
 		pr_debug("active shutdown timed out for node %d with state %s\n",
 			 node->nodeid, dlm_state_str(node->state));
 	else
@@ -1456,14 +1384,6 @@ void dlm_midcomms_shutdown(void)
 	for (i = 0; i < CONN_HASH_SIZE; i++) {
 		hlist_for_each_entry_rcu(node, &node_hash[i], hlist) {
 			midcomms_shutdown(node);
-
-			dlm_delete_debug_comms_file(node->debugfs);
-
-			spin_lock(&nodes_lock);
-			hlist_del_rcu(&node->hlist);
-			spin_unlock(&nodes_lock);
-
-			call_srcu(&nodes_srcu, &node->rcu, midcomms_node_release);
 		}
 	}
 	srcu_read_unlock(&nodes_srcu, idx);
@@ -1479,7 +1399,7 @@ int dlm_midcomms_close(int nodeid)
 
 	idx = srcu_read_lock(&nodes_srcu);
 	/* Abort pending close/remove operation */
-	node = nodeid2node(nodeid, 0);
+	node = nodeid2node(nodeid);
 	if (node) {
 		/* let shutdown waiters leave */
 		set_bit(DLM_NODE_FLAG_CLOSE, &node->flags);
@@ -1489,20 +1409,32 @@ int dlm_midcomms_close(int nodeid)
 
 	synchronize_srcu(&nodes_srcu);
 
-	idx = srcu_read_lock(&nodes_srcu);
 	mutex_lock(&close_lock);
-	node = nodeid2node(nodeid, 0);
+	idx = srcu_read_lock(&nodes_srcu);
+	node = nodeid2node(nodeid);
 	if (!node) {
-		mutex_unlock(&close_lock);
 		srcu_read_unlock(&nodes_srcu, idx);
+		mutex_unlock(&close_lock);
 		return dlm_lowcomms_close(nodeid);
 	}
 
 	ret = dlm_lowcomms_close(nodeid);
-	spin_lock(&node->state_lock);
-	midcomms_node_reset(node);
-	spin_unlock(&node->state_lock);
+	dlm_delete_debug_comms_file(node->debugfs);
+
+	spin_lock(&nodes_lock);
+	hlist_del_rcu(&node->hlist);
+	spin_unlock(&nodes_lock);
 	srcu_read_unlock(&nodes_srcu, idx);
+
+	/* wait that all readers left until flush send queue */
+	synchronize_srcu(&nodes_srcu);
+
+	/* drop all pending dlm messages, this is fine as
+	 * this function get called when the node is fenced
+	 */
+	dlm_send_queue_flush(node);
+
+	call_srcu(&nodes_srcu, &node->rcu, midcomms_node_release);
 	mutex_unlock(&close_lock);
 
 	return ret;
diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h
index 9f8c9605013d0..e7246fb3ef577 100644
--- a/fs/dlm/midcomms.h
+++ b/fs/dlm/midcomms.h
@@ -20,6 +20,7 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
 					     gfp_t allocation, char **ppc);
 void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh, const void *name,
 				 int namelen);
+int dlm_midcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
 void dlm_midcomms_version_wait(void);
 int dlm_midcomms_close(int nodeid);
 int dlm_midcomms_start(void);
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 70a4752ed913a..e6b4c1a214466 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -11,6 +11,8 @@
 #include <linux/dlm_plock.h>
 #include <linux/slab.h>
 
+#include <trace/events/dlm.h>
+
 #include "dlm_internal.h"
 #include "lockspace.h"
 
@@ -42,6 +44,27 @@ static inline void set_version(struct dlm_plock_info *info)
 	info->version[2] = DLM_PLOCK_VERSION_PATCH;
 }
 
+static struct plock_op *plock_lookup_waiter(const struct dlm_plock_info *info)
+{
+	struct plock_op *op = NULL, *iter;
+
+	list_for_each_entry(iter, &recv_list, list) {
+		if (iter->info.fsid == info->fsid &&
+		    iter->info.number == info->number &&
+		    iter->info.owner == info->owner &&
+		    iter->info.pid == info->pid &&
+		    iter->info.start == info->start &&
+		    iter->info.end == info->end &&
+		    iter->info.ex == info->ex &&
+		    iter->info.wait) {
+			op = iter;
+			break;
+		}
+	}
+
+	return op;
+}
+
 static int check_version(struct dlm_plock_info *info)
 {
 	if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
@@ -74,30 +97,26 @@ static void send_op(struct plock_op *op)
 	wake_up(&send_wq);
 }
 
-/* If a process was killed while waiting for the only plock on a file,
-   locks_remove_posix will not see any lock on the file so it won't
-   send an unlock-close to us to pass on to userspace to clean up the
-   abandoned waiter.  So, we have to insert the unlock-close when the
-   lock call is interrupted. */
-
-static void do_unlock_close(const struct dlm_plock_info *info)
+static int do_lock_cancel(const struct dlm_plock_info *orig_info)
 {
 	struct plock_op *op;
+	int rv;
 
 	op = kzalloc(sizeof(*op), GFP_NOFS);
 	if (!op)
-		return;
+		return -ENOMEM;
+
+	op->info = *orig_info;
+	op->info.optype = DLM_PLOCK_OP_CANCEL;
+	op->info.wait = 0;
 
-	op->info.optype		= DLM_PLOCK_OP_UNLOCK;
-	op->info.pid		= info->pid;
-	op->info.fsid		= info->fsid;
-	op->info.number		= info->number;
-	op->info.start		= 0;
-	op->info.end		= OFFSET_MAX;
-	op->info.owner		= info->owner;
-
-	op->info.flags |= DLM_PLOCK_FL_CLOSE;
 	send_op(op);
+	wait_event(recv_wq, (op->done != 0));
+
+	rv = op->info.rv;
+
+	dlm_release_plock_op(op);
+	return rv;
 }
 
 int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
@@ -156,7 +175,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 	send_op(op);
 
 	if (op->info.wait) {
-		rv = wait_event_killable(recv_wq, (op->done != 0));
+		rv = wait_event_interruptible(recv_wq, (op->done != 0));
 		if (rv == -ERESTARTSYS) {
 			spin_lock(&ops_lock);
 			/* recheck under ops_lock if we got a done != 0,
@@ -166,17 +185,37 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 				spin_unlock(&ops_lock);
 				goto do_lock_wait;
 			}
-			list_del(&op->list);
 			spin_unlock(&ops_lock);
 
+			rv = do_lock_cancel(&op->info);
+			switch (rv) {
+			case 0:
+				/* waiter was deleted in user space, answer will never come
+				 * remove original request. The original request must be
+				 * on recv_list because the answer of do_lock_cancel()
+				 * synchronized it.
+				 */
+				spin_lock(&ops_lock);
+				list_del(&op->list);
+				spin_unlock(&ops_lock);
+				rv = -EINTR;
+				break;
+			case -ENOENT:
+				/* cancellation wasn't successful but op should be done */
+				fallthrough;
+			default:
+				/* internal error doing cancel we need to wait */
+				goto wait;
+			}
+
 			log_debug(ls, "%s: wait interrupted %x %llx pid %d",
 				  __func__, ls->ls_global_id,
 				  (unsigned long long)number, op->info.pid);
-			do_unlock_close(&op->info);
 			dlm_release_plock_op(op);
 			goto out;
 		}
 	} else {
+wait:
 		wait_event(recv_wq, (op->done != 0));
 	}
 
@@ -240,8 +279,8 @@ static int dlm_plock_callback(struct plock_op *op)
 	rv = notify(fl, 0);
 	if (rv) {
 		/* XXX: We need to cancel the fs lock here: */
-		log_print("dlm_plock_callback: lock granted after lock request "
-			  "failed; dangling lock!\n");
+		log_print("%s: lock granted after lock request failed; dangling lock!",
+			  __func__);
 		goto out;
 	}
 
@@ -318,6 +357,75 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 }
 EXPORT_SYMBOL_GPL(dlm_posix_unlock);
 
+/*
+ * NOTE: This implementation can only handle async lock requests as nfs
+ * do it. It cannot handle cancellation of a pending lock request sitting
+ * in wait_event(), but for now only nfs is the only user local kernel
+ * user.
+ */
+int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+		     struct file_lock *fl)
+{
+	struct dlm_plock_info info;
+	struct plock_op *op;
+	struct dlm_ls *ls;
+	int rv;
+
+	/* this only works for async request for now and nfs is the only
+	 * kernel user right now.
+	 */
+	if (WARN_ON_ONCE(!fl->fl_lmops || !fl->fl_lmops->lm_grant))
+		return -EOPNOTSUPP;
+
+	ls = dlm_find_lockspace_local(lockspace);
+	if (!ls)
+		return -EINVAL;
+
+	memset(&info, 0, sizeof(info));
+	info.pid = fl->fl_pid;
+	info.ex = (fl->fl_type == F_WRLCK);
+	info.fsid = ls->ls_global_id;
+	dlm_put_lockspace(ls);
+	info.number = number;
+	info.start = fl->fl_start;
+	info.end = fl->fl_end;
+	info.owner = (__u64)fl->fl_pid;
+
+	rv = do_lock_cancel(&info);
+	switch (rv) {
+	case 0:
+		spin_lock(&ops_lock);
+		/* lock request to cancel must be on recv_list because
+		 * do_lock_cancel() synchronizes it.
+		 */
+		op = plock_lookup_waiter(&info);
+		if (WARN_ON_ONCE(!op)) {
+			spin_unlock(&ops_lock);
+			rv = -ENOLCK;
+			break;
+		}
+
+		list_del(&op->list);
+		spin_unlock(&ops_lock);
+		WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
+		op->data->callback(op->data->fl, -EINTR);
+		dlm_release_plock_op(op);
+		rv = -EINTR;
+		break;
+	case -ENOENT:
+		/* if cancel wasn't successful we probably were to late
+		 * or it was a non-blocking lock request, so just unlock it.
+		 */
+		rv = dlm_posix_unlock(lockspace, number, file, fl);
+		break;
+	default:
+		break;
+	}
+
+	return rv;
+}
+EXPORT_SYMBOL_GPL(dlm_posix_cancel);
+
 int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 		  struct file_lock *fl)
 {
@@ -403,6 +511,8 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
 	if (!op)
 		return -EAGAIN;
 
+	trace_dlm_plock_read(&info);
+
 	/* there is no need to get a reply from userspace for unlocks
 	   that were generated by the vfs cleaning up for a close
 	   (the process did not make an unlock call). */
@@ -430,6 +540,8 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
 	if (copy_from_user(&info, u, sizeof(info)))
 		return -EFAULT;
 
+	trace_dlm_plock_write(&info);
+
 	if (check_version(&info))
 		return -EINVAL;
 
@@ -441,22 +553,11 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
 	 */
 	spin_lock(&ops_lock);
 	if (info.wait) {
-		list_for_each_entry(iter, &recv_list, list) {
-			if (iter->info.fsid == info.fsid &&
-			    iter->info.number == info.number &&
-			    iter->info.owner == info.owner &&
-			    iter->info.pid == info.pid &&
-			    iter->info.start == info.start &&
-			    iter->info.end == info.end &&
-			    iter->info.ex == info.ex &&
-			    iter->info.wait) {
-				op = iter;
-				break;
-			}
-		}
+		op = plock_lookup_waiter(&info);
 	} else {
 		list_for_each_entry(iter, &recv_list, list) {
-			if (!iter->info.wait) {
+			if (!iter->info.wait &&
+			    iter->info.fsid == info.fsid) {
 				op = iter;
 				break;
 			}
@@ -468,8 +569,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
 		if (info.wait)
 			WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
 		else
-			WARN_ON(op->info.fsid != info.fsid ||
-				op->info.number != info.number ||
+			WARN_ON(op->info.number != info.number ||
 				op->info.owner != info.owner ||
 				op->info.optype != info.optype);
 
@@ -534,5 +634,7 @@ int dlm_plock_init(void)
 void dlm_plock_exit(void)
 {
 	misc_deregister(&plock_dev_misc);
+	WARN_ON(!list_empty(&send_list));
+	WARN_ON(!list_empty(&recv_list));
 }
 
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index f4afdf892f785..3b734aed26b54 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -28,7 +28,8 @@ static int rcom_response(struct dlm_ls *ls)
 }
 
 static void _create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
-			 struct dlm_rcom **rc_ret, char *mb, int mb_len)
+			 struct dlm_rcom **rc_ret, char *mb, int mb_len,
+			 uint64_t seq)
 {
 	struct dlm_rcom *rc;
 
@@ -41,16 +42,14 @@ static void _create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
 	rc->rc_header.h_cmd = DLM_RCOM;
 
 	rc->rc_type = cpu_to_le32(type);
-
-	spin_lock(&ls->ls_recover_lock);
-	rc->rc_seq = cpu_to_le64(ls->ls_recover_seq);
-	spin_unlock(&ls->ls_recover_lock);
+	rc->rc_seq = cpu_to_le64(seq);
 
 	*rc_ret = rc;
 }
 
 static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
-		       struct dlm_rcom **rc_ret, struct dlm_mhandle **mh_ret)
+		       struct dlm_rcom **rc_ret, struct dlm_mhandle **mh_ret,
+		       uint64_t seq)
 {
 	int mb_len = sizeof(struct dlm_rcom) + len;
 	struct dlm_mhandle *mh;
@@ -63,14 +62,14 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
 		return -ENOBUFS;
 	}
 
-	_create_rcom(ls, to_nodeid, type, len, rc_ret, mb, mb_len);
+	_create_rcom(ls, to_nodeid, type, len, rc_ret, mb, mb_len, seq);
 	*mh_ret = mh;
 	return 0;
 }
 
 static int create_rcom_stateless(struct dlm_ls *ls, int to_nodeid, int type,
 				 int len, struct dlm_rcom **rc_ret,
-				 struct dlm_msg **msg_ret)
+				 struct dlm_msg **msg_ret, uint64_t seq)
 {
 	int mb_len = sizeof(struct dlm_rcom) + len;
 	struct dlm_msg *msg;
@@ -84,7 +83,7 @@ static int create_rcom_stateless(struct dlm_ls *ls, int to_nodeid, int type,
 		return -ENOBUFS;
 	}
 
-	_create_rcom(ls, to_nodeid, type, len, rc_ret, mb, mb_len);
+	_create_rcom(ls, to_nodeid, type, len, rc_ret, mb, mb_len, seq);
 	*msg_ret = msg;
 	return 0;
 }
@@ -170,7 +169,8 @@ static void disallow_sync_reply(struct dlm_ls *ls)
  * node's rcom_config.
  */
 
-int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
+int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags,
+		    uint64_t seq)
 {
 	struct dlm_rcom *rc;
 	struct dlm_msg *msg;
@@ -186,7 +186,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
 
 retry:
 	error = create_rcom_stateless(ls, nodeid, DLM_RCOM_STATUS,
-				      sizeof(struct rcom_status), &rc, &msg);
+				      sizeof(struct rcom_status), &rc, &msg,
+				      seq);
 	if (error)
 		goto out;
 
@@ -220,7 +221,9 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
 	return error;
 }
 
-static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
+static void receive_rcom_status(struct dlm_ls *ls,
+				const struct dlm_rcom *rc_in,
+				uint64_t seq)
 {
 	struct dlm_rcom *rc;
 	struct rcom_status *rs;
@@ -251,7 +254,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 
  do_create:
 	error = create_rcom_stateless(ls, nodeid, DLM_RCOM_STATUS_REPLY,
-				      len, &rc, &msg);
+				      len, &rc, &msg, seq);
 	if (error)
 		return;
 
@@ -281,7 +284,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	send_rcom_stateless(msg, rc);
 }
 
-static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
+static void receive_sync_reply(struct dlm_ls *ls, const struct dlm_rcom *rc_in)
 {
 	spin_lock(&ls->ls_rcom_spin);
 	if (!test_bit(LSFL_RCOM_WAIT, &ls->ls_flags) ||
@@ -302,17 +305,18 @@ static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	spin_unlock(&ls->ls_rcom_spin);
 }
 
-int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
+int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,
+		   int last_len, uint64_t seq)
 {
+	struct dlm_mhandle *mh;
 	struct dlm_rcom *rc;
-	struct dlm_msg *msg;
 	int error = 0;
 
 	ls->ls_recover_nodeid = nodeid;
 
 retry:
-	error = create_rcom_stateless(ls, nodeid, DLM_RCOM_NAMES, last_len,
-				      &rc, &msg);
+	error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len,
+			    &rc, &mh, seq);
 	if (error)
 		goto out;
 	memcpy(rc->rc_buf, last_name, last_len);
@@ -320,7 +324,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
 	allow_sync_reply(ls, &rc->rc_id);
 	memset(ls->ls_recover_buf, 0, DLM_MAX_SOCKET_BUFSIZE);
 
-	send_rcom_stateless(msg, rc);
+	send_rcom(mh, rc);
 
 	error = dlm_wait_function(ls, &rcom_response);
 	disallow_sync_reply(ls);
@@ -330,19 +334,20 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
 	return error;
 }
 
-static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
+static void receive_rcom_names(struct dlm_ls *ls, const struct dlm_rcom *rc_in,
+			       uint64_t seq)
 {
+	struct dlm_mhandle *mh;
 	struct dlm_rcom *rc;
 	int error, inlen, outlen, nodeid;
-	struct dlm_msg *msg;
 
 	nodeid = le32_to_cpu(rc_in->rc_header.h_nodeid);
 	inlen = le16_to_cpu(rc_in->rc_header.h_length) -
 		sizeof(struct dlm_rcom);
 	outlen = DLM_MAX_APP_BUFSIZE - sizeof(struct dlm_rcom);
 
-	error = create_rcom_stateless(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen,
-				      &rc, &msg);
+	error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen,
+			    &rc, &mh, seq);
 	if (error)
 		return;
 	rc->rc_id = rc_in->rc_id;
@@ -350,10 +355,10 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 
 	dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
 			      nodeid);
-	send_rcom_stateless(msg, rc);
+	send_rcom(mh, rc);
 }
 
-int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)
+int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid, uint64_t seq)
 {
 	struct dlm_rcom *rc;
 	struct dlm_mhandle *mh;
@@ -361,7 +366,7 @@ int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)
 	int error;
 
 	error = create_rcom(ls, dir_nodeid, DLM_RCOM_LOOKUP, r->res_length,
-			    &rc, &mh);
+			    &rc, &mh, seq);
 	if (error)
 		goto out;
 	memcpy(rc->rc_buf, r->res_name, r->res_length);
@@ -372,7 +377,8 @@ int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)
 	return error;
 }
 
-static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
+static void receive_rcom_lookup(struct dlm_ls *ls,
+				const struct dlm_rcom *rc_in, uint64_t seq)
 {
 	struct dlm_rcom *rc;
 	struct dlm_mhandle *mh;
@@ -387,7 +393,8 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 		return;
 	}
 
-	error = create_rcom(ls, nodeid, DLM_RCOM_LOOKUP_REPLY, 0, &rc, &mh);
+	error = create_rcom(ls, nodeid, DLM_RCOM_LOOKUP_REPLY, 0, &rc, &mh,
+			    seq);
 	if (error)
 		return;
 
@@ -402,7 +409,8 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	send_rcom(mh, rc);
 }
 
-static void receive_rcom_lookup_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
+static void receive_rcom_lookup_reply(struct dlm_ls *ls,
+				      const struct dlm_rcom *rc_in)
 {
 	dlm_recover_master_reply(ls, rc_in);
 }
@@ -437,7 +445,7 @@ static void pack_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb,
 		memcpy(rl->rl_lvb, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
 }
 
-int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
+int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb, uint64_t seq)
 {
 	struct dlm_ls *ls = r->res_ls;
 	struct dlm_rcom *rc;
@@ -448,7 +456,8 @@ int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
 	if (lkb->lkb_lvbptr)
 		len += ls->ls_lvblen;
 
-	error = create_rcom(ls, r->res_nodeid, DLM_RCOM_LOCK, len, &rc, &mh);
+	error = create_rcom(ls, r->res_nodeid, DLM_RCOM_LOCK, len, &rc, &mh,
+			    seq);
 	if (error)
 		goto out;
 
@@ -462,23 +471,28 @@ int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
 }
 
 /* needs at least dlm_rcom + rcom_lock */
-static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
+static void receive_rcom_lock(struct dlm_ls *ls, const struct dlm_rcom *rc_in,
+			      uint64_t seq)
 {
+	__le32 rl_remid, rl_result;
+	struct rcom_lock *rl;
 	struct dlm_rcom *rc;
 	struct dlm_mhandle *mh;
 	int error, nodeid = le32_to_cpu(rc_in->rc_header.h_nodeid);
 
-	dlm_recover_master_copy(ls, rc_in);
+	dlm_recover_master_copy(ls, rc_in, &rl_remid, &rl_result);
 
 	error = create_rcom(ls, nodeid, DLM_RCOM_LOCK_REPLY,
-			    sizeof(struct rcom_lock), &rc, &mh);
+			    sizeof(struct rcom_lock), &rc, &mh, seq);
 	if (error)
 		return;
 
-	/* We send back the same rcom_lock struct we received, but
-	   dlm_recover_master_copy() has filled in rl_remid and rl_result */
-
 	memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock));
+	rl = (struct rcom_lock *)rc->rc_buf;
+	/* set rl_remid and rl_result from dlm_recover_master_copy() */
+	rl->rl_remid = rl_remid;
+	rl->rl_result = rl_result;
+
 	rc->rc_id = rc_in->rc_id;
 	rc->rc_seq_reply = rc_in->rc_seq;
 
@@ -488,7 +502,7 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 /* If the lockspace doesn't exist then still send a status message
    back; it's possible that it just doesn't have its global_id yet. */
 
-int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
+int dlm_send_ls_not_ready(int nodeid, const struct dlm_rcom *rc_in)
 {
 	struct dlm_rcom *rc;
 	struct rcom_config *rf;
@@ -566,7 +580,7 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
 /* Called by dlm_recv; corresponds to dlm_receive_message() but special
    recovery-only comms are sent through here. */
 
-void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
+void dlm_receive_rcom(struct dlm_ls *ls, const struct dlm_rcom *rc, int nodeid)
 {
 	int lock_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_lock);
 	int stop, reply = 0, names = 0, lookup = 0, lock = 0;
@@ -620,21 +634,21 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
 
 	switch (rc->rc_type) {
 	case cpu_to_le32(DLM_RCOM_STATUS):
-		receive_rcom_status(ls, rc);
+		receive_rcom_status(ls, rc, seq);
 		break;
 
 	case cpu_to_le32(DLM_RCOM_NAMES):
-		receive_rcom_names(ls, rc);
+		receive_rcom_names(ls, rc, seq);
 		break;
 
 	case cpu_to_le32(DLM_RCOM_LOOKUP):
-		receive_rcom_lookup(ls, rc);
+		receive_rcom_lookup(ls, rc, seq);
 		break;
 
 	case cpu_to_le32(DLM_RCOM_LOCK):
 		if (le16_to_cpu(rc->rc_header.h_length) < lock_size)
 			goto Eshort;
-		receive_rcom_lock(ls, rc);
+		receive_rcom_lock(ls, rc, seq);
 		break;
 
 	case cpu_to_le32(DLM_RCOM_STATUS_REPLY):
@@ -652,7 +666,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
 	case cpu_to_le32(DLM_RCOM_LOCK_REPLY):
 		if (le16_to_cpu(rc->rc_header.h_length) < lock_size)
 			goto Eshort;
-		dlm_recover_process_copy(ls, rc);
+		dlm_recover_process_copy(ls, rc, seq);
 		break;
 
 	default:
diff --git a/fs/dlm/rcom.h b/fs/dlm/rcom.h
index 454d3c4814abe..765926ae0020d 100644
--- a/fs/dlm/rcom.h
+++ b/fs/dlm/rcom.h
@@ -12,12 +12,15 @@
 #ifndef __RCOM_DOT_H__
 #define __RCOM_DOT_H__
 
-int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags);
-int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len);
-int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid);
-int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
-void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid);
-int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in);
+int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags,
+		    uint64_t seq);
+int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,
+		   int last_len, uint64_t seq);
+int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid, uint64_t seq);
+int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb, uint64_t seq);
+void dlm_receive_rcom(struct dlm_ls *ls, const struct dlm_rcom *rc,
+		      int nodeid);
+int dlm_send_ls_not_ready(int nodeid, const struct dlm_rcom *rc_in);
 
 #endif
 
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index 29d71a5018d43..53917c0aa3c07 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -93,7 +93,7 @@ void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status)
 }
 
 static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status,
-			   int save_slots)
+			   int save_slots, uint64_t seq)
 {
 	struct dlm_rcom *rc = ls->ls_recover_buf;
 	struct dlm_member *memb;
@@ -107,7 +107,7 @@ static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status,
 				goto out;
 			}
 
-			error = dlm_rcom_status(ls, memb->nodeid, 0);
+			error = dlm_rcom_status(ls, memb->nodeid, 0, seq);
 			if (error)
 				goto out;
 
@@ -126,7 +126,7 @@ static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status,
 }
 
 static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status,
-			   uint32_t status_flags)
+			   uint32_t status_flags, uint64_t seq)
 {
 	struct dlm_rcom *rc = ls->ls_recover_buf;
 	int error = 0, delay = 0, nodeid = ls->ls_low_nodeid;
@@ -137,7 +137,7 @@ static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status,
 			goto out;
 		}
 
-		error = dlm_rcom_status(ls, nodeid, status_flags);
+		error = dlm_rcom_status(ls, nodeid, status_flags, seq);
 		if (error)
 			break;
 
@@ -151,22 +151,22 @@ static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status,
 	return error;
 }
 
-static int wait_status(struct dlm_ls *ls, uint32_t status)
+static int wait_status(struct dlm_ls *ls, uint32_t status, uint64_t seq)
 {
 	uint32_t status_all = status << 1;
 	int error;
 
 	if (ls->ls_low_nodeid == dlm_our_nodeid()) {
-		error = wait_status_all(ls, status, 0);
+		error = wait_status_all(ls, status, 0, seq);
 		if (!error)
 			dlm_set_recover_status(ls, status_all);
 	} else
-		error = wait_status_low(ls, status_all, 0);
+		error = wait_status_low(ls, status_all, 0, seq);
 
 	return error;
 }
 
-int dlm_recover_members_wait(struct dlm_ls *ls)
+int dlm_recover_members_wait(struct dlm_ls *ls, uint64_t seq)
 {
 	struct dlm_member *memb;
 	struct dlm_slot *slots;
@@ -180,7 +180,7 @@ int dlm_recover_members_wait(struct dlm_ls *ls)
 	}
 
 	if (ls->ls_low_nodeid == dlm_our_nodeid()) {
-		error = wait_status_all(ls, DLM_RS_NODES, 1);
+		error = wait_status_all(ls, DLM_RS_NODES, 1, seq);
 		if (error)
 			goto out;
 
@@ -199,7 +199,8 @@ int dlm_recover_members_wait(struct dlm_ls *ls)
 			dlm_set_recover_status(ls, DLM_RS_NODES_ALL);
 		}
 	} else {
-		error = wait_status_low(ls, DLM_RS_NODES_ALL, DLM_RSF_NEED_SLOTS);
+		error = wait_status_low(ls, DLM_RS_NODES_ALL,
+					DLM_RSF_NEED_SLOTS, seq);
 		if (error)
 			goto out;
 
@@ -209,19 +210,19 @@ int dlm_recover_members_wait(struct dlm_ls *ls)
 	return error;
 }
 
-int dlm_recover_directory_wait(struct dlm_ls *ls)
+int dlm_recover_directory_wait(struct dlm_ls *ls, uint64_t seq)
 {
-	return wait_status(ls, DLM_RS_DIR);
+	return wait_status(ls, DLM_RS_DIR, seq);
 }
 
-int dlm_recover_locks_wait(struct dlm_ls *ls)
+int dlm_recover_locks_wait(struct dlm_ls *ls, uint64_t seq)
 {
-	return wait_status(ls, DLM_RS_LOCKS);
+	return wait_status(ls, DLM_RS_LOCKS, seq);
 }
 
-int dlm_recover_done_wait(struct dlm_ls *ls)
+int dlm_recover_done_wait(struct dlm_ls *ls, uint64_t seq)
 {
-	return wait_status(ls, DLM_RS_DONE);
+	return wait_status(ls, DLM_RS_DONE, seq);
 }
 
 /*
@@ -441,7 +442,7 @@ static void set_new_master(struct dlm_rsb *r)
  * equals our_nodeid below).
  */
 
-static int recover_master(struct dlm_rsb *r, unsigned int *count)
+static int recover_master(struct dlm_rsb *r, unsigned int *count, uint64_t seq)
 {
 	struct dlm_ls *ls = r->res_ls;
 	int our_nodeid, dir_nodeid;
@@ -472,7 +473,7 @@ static int recover_master(struct dlm_rsb *r, unsigned int *count)
 		error = 0;
 	} else {
 		recover_idr_add(r);
-		error = dlm_send_rcom_lookup(r, dir_nodeid);
+		error = dlm_send_rcom_lookup(r, dir_nodeid, seq);
 	}
 
 	(*count)++;
@@ -520,7 +521,7 @@ static int recover_master_static(struct dlm_rsb *r, unsigned int *count)
  * the correct dir node.
  */
 
-int dlm_recover_masters(struct dlm_ls *ls)
+int dlm_recover_masters(struct dlm_ls *ls, uint64_t seq)
 {
 	struct dlm_rsb *r;
 	unsigned int total = 0;
@@ -542,7 +543,7 @@ int dlm_recover_masters(struct dlm_ls *ls)
 		if (nodir)
 			error = recover_master_static(r, &count);
 		else
-			error = recover_master(r, &count);
+			error = recover_master(r, &count, seq);
 		unlock_rsb(r);
 		cond_resched();
 		total++;
@@ -563,7 +564,7 @@ int dlm_recover_masters(struct dlm_ls *ls)
 	return error;
 }
 
-int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
+int dlm_recover_master_reply(struct dlm_ls *ls, const struct dlm_rcom *rc)
 {
 	struct dlm_rsb *r;
 	int ret_nodeid, new_master;
@@ -614,13 +615,14 @@ int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
  * an equal number of replies then recovery for the rsb is done
  */
 
-static int recover_locks_queue(struct dlm_rsb *r, struct list_head *head)
+static int recover_locks_queue(struct dlm_rsb *r, struct list_head *head,
+			       uint64_t seq)
 {
 	struct dlm_lkb *lkb;
 	int error = 0;
 
 	list_for_each_entry(lkb, head, lkb_statequeue) {
-	   	error = dlm_send_rcom_lock(r, lkb);
+		error = dlm_send_rcom_lock(r, lkb, seq);
 		if (error)
 			break;
 		r->res_recover_locks_count++;
@@ -629,7 +631,7 @@ static int recover_locks_queue(struct dlm_rsb *r, struct list_head *head)
 	return error;
 }
 
-static int recover_locks(struct dlm_rsb *r)
+static int recover_locks(struct dlm_rsb *r, uint64_t seq)
 {
 	int error = 0;
 
@@ -637,13 +639,13 @@ static int recover_locks(struct dlm_rsb *r)
 
 	DLM_ASSERT(!r->res_recover_locks_count, dlm_dump_rsb(r););
 
-	error = recover_locks_queue(r, &r->res_grantqueue);
+	error = recover_locks_queue(r, &r->res_grantqueue, seq);
 	if (error)
 		goto out;
-	error = recover_locks_queue(r, &r->res_convertqueue);
+	error = recover_locks_queue(r, &r->res_convertqueue, seq);
 	if (error)
 		goto out;
-	error = recover_locks_queue(r, &r->res_waitqueue);
+	error = recover_locks_queue(r, &r->res_waitqueue, seq);
 	if (error)
 		goto out;
 
@@ -656,7 +658,7 @@ static int recover_locks(struct dlm_rsb *r)
 	return error;
 }
 
-int dlm_recover_locks(struct dlm_ls *ls)
+int dlm_recover_locks(struct dlm_ls *ls, uint64_t seq)
 {
 	struct dlm_rsb *r;
 	int error, count = 0;
@@ -677,7 +679,7 @@ int dlm_recover_locks(struct dlm_ls *ls)
 			goto out;
 		}
 
-		error = recover_locks(r);
+		error = recover_locks(r, seq);
 		if (error) {
 			up_read(&ls->ls_root_sem);
 			goto out;
diff --git a/fs/dlm/recover.h b/fs/dlm/recover.h
index 235e0d25cd48c..dbc51013ecadb 100644
--- a/fs/dlm/recover.h
+++ b/fs/dlm/recover.h
@@ -15,13 +15,13 @@
 int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls));
 uint32_t dlm_recover_status(struct dlm_ls *ls);
 void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status);
-int dlm_recover_members_wait(struct dlm_ls *ls);
-int dlm_recover_directory_wait(struct dlm_ls *ls);
-int dlm_recover_locks_wait(struct dlm_ls *ls);
-int dlm_recover_done_wait(struct dlm_ls *ls);
-int dlm_recover_masters(struct dlm_ls *ls);
-int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc);
-int dlm_recover_locks(struct dlm_ls *ls);
+int dlm_recover_members_wait(struct dlm_ls *ls, uint64_t seq);
+int dlm_recover_directory_wait(struct dlm_ls *ls, uint64_t seq);
+int dlm_recover_locks_wait(struct dlm_ls *ls, uint64_t seq);
+int dlm_recover_done_wait(struct dlm_ls *ls, uint64_t seq);
+int dlm_recover_masters(struct dlm_ls *ls, uint64_t seq);
+int dlm_recover_master_reply(struct dlm_ls *ls, const struct dlm_rcom *rc);
+int dlm_recover_locks(struct dlm_ls *ls, uint64_t seq);
 void dlm_recovered_lock(struct dlm_rsb *r);
 int dlm_create_root_list(struct dlm_ls *ls);
 void dlm_release_root_list(struct dlm_ls *ls);
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 19da816cfb09d..4d17491dea2fe 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -90,7 +90,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_set_recover_status(ls, DLM_RS_NODES);
 
-	error = dlm_recover_members_wait(ls);
+	error = dlm_recover_members_wait(ls, rv->seq);
 	if (error) {
 		log_rinfo(ls, "dlm_recover_members_wait error %d", error);
 		goto fail;
@@ -103,7 +103,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 	 * nodes their master rsb names that hash to us.
 	 */
 
-	error = dlm_recover_directory(ls);
+	error = dlm_recover_directory(ls, rv->seq);
 	if (error) {
 		log_rinfo(ls, "dlm_recover_directory error %d", error);
 		goto fail;
@@ -111,7 +111,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_set_recover_status(ls, DLM_RS_DIR);
 
-	error = dlm_recover_directory_wait(ls);
+	error = dlm_recover_directory_wait(ls, rv->seq);
 	if (error) {
 		log_rinfo(ls, "dlm_recover_directory_wait error %d", error);
 		goto fail;
@@ -145,7 +145,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 		 * departed nodes.
 		 */
 
-		error = dlm_recover_masters(ls);
+		error = dlm_recover_masters(ls, rv->seq);
 		if (error) {
 			log_rinfo(ls, "dlm_recover_masters error %d", error);
 			goto fail;
@@ -155,7 +155,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 		 * Send our locks on remastered rsb's to the new masters.
 		 */
 
-		error = dlm_recover_locks(ls);
+		error = dlm_recover_locks(ls, rv->seq);
 		if (error) {
 			log_rinfo(ls, "dlm_recover_locks error %d", error);
 			goto fail;
@@ -163,7 +163,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 		dlm_set_recover_status(ls, DLM_RS_LOCKS);
 
-		error = dlm_recover_locks_wait(ls);
+		error = dlm_recover_locks_wait(ls, rv->seq);
 		if (error) {
 			log_rinfo(ls, "dlm_recover_locks_wait error %d", error);
 			goto fail;
@@ -187,7 +187,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 		 */
 		dlm_set_recover_status(ls, DLM_RS_LOCKS);
 
-		error = dlm_recover_locks_wait(ls);
+		error = dlm_recover_locks_wait(ls, rv->seq);
 		if (error) {
 			log_rinfo(ls, "dlm_recover_locks_wait error %d", error);
 			goto fail;
@@ -206,7 +206,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_set_recover_status(ls, DLM_RS_DONE);
 
-	error = dlm_recover_done_wait(ls);
+	error = dlm_recover_done_wait(ls, rv->seq);
 	if (error) {
 		log_rinfo(ls, "dlm_recover_done_wait error %d", error);
 		goto fail;
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 8be2893ad15bb..892d6ca21e742 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -30,7 +30,8 @@ struct rq_entry {
  * lockspace is enabled on some while still suspended on others.
  */
 
-void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
+void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid,
+			  const struct dlm_message *ms)
 {
 	struct rq_entry *e;
 	int length = le16_to_cpu(ms->m_header.h_length) -
diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h
index 4e403469a8458..42bfe23ceabea 100644
--- a/fs/dlm/requestqueue.h
+++ b/fs/dlm/requestqueue.h
@@ -11,7 +11,8 @@
 #ifndef __REQUESTQUEUE_DOT_H__
 #define __REQUESTQUEUE_DOT_H__
 
-void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms);
+void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid,
+			  const struct dlm_message *ms);
 int dlm_process_requestqueue(struct dlm_ls *ls);
 void dlm_wait_requestqueue(struct dlm_ls *ls);
 void dlm_purge_requestqueue(struct dlm_ls *ls);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index d1dbe47c7975f..c20704aa21b3d 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -386,6 +386,7 @@ static int export_encode_fh(struct inode *inode, struct fid *fid,
  * @inode:   the object to encode
  * @fid:     where to store the file handle fragment
  * @max_len: maximum length to store there
+ * @parent:  parent directory inode, if wanted
  * @flags:   properties of the requested file handle
  *
  * Returns an enum fid_type or a negative errno.
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1f72f977c6dba..79b20d6ae39ec 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -913,11 +913,11 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
 }
 
 /*
- * This function returns the number of file system metadata clusters at
+ * This function returns the number of file system metadata blocks at
  * the beginning of a block group, including the reserved gdt blocks.
  */
-static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
-				     ext4_group_t block_group)
+unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
+				       ext4_group_t block_group)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	unsigned num;
@@ -935,8 +935,15 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
 	} else { /* For META_BG_BLOCK_GROUPS */
 		num += ext4_bg_num_gdb_meta(sb, block_group);
 	}
-	return EXT4_NUM_B2C(sbi, num);
+	return num;
 }
+
+static unsigned int ext4_num_base_meta_clusters(struct super_block *sb,
+						ext4_group_t block_group)
+{
+	return EXT4_NUM_B2C(EXT4_SB(sb), ext4_num_base_meta_blocks(sb, block_group));
+}
+
 /**
  *	ext4_inode_to_goal_block - return a hint for block allocation
  *	@inode: inode for block allocation
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 5504f72bbbbe7..6fe3c941b5651 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -215,7 +215,6 @@ int ext4_setup_system_zone(struct super_block *sb)
 	struct ext4_system_blocks *system_blks;
 	struct ext4_group_desc *gdp;
 	ext4_group_t i;
-	int flex_size = ext4_flex_bg_size(sbi);
 	int ret;
 
 	system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
@@ -223,12 +222,13 @@ int ext4_setup_system_zone(struct super_block *sb)
 		return -ENOMEM;
 
 	for (i=0; i < ngroups; i++) {
+		unsigned int meta_blks = ext4_num_base_meta_blocks(sb, i);
+
 		cond_resched();
-		if (ext4_bg_has_super(sb, i) &&
-		    ((i < 5) || ((i % flex_size) == 0))) {
+		if (meta_blks != 0) {
 			ret = add_system_zone(system_blks,
 					ext4_group_first_block_no(sb, i),
-					ext4_bg_num_gdb(sb, i) + 1, 0);
+					meta_blks, 0);
 			if (ret)
 				goto err;
 		}
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index e20ac0654b3f2..453d4da5de520 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -33,6 +33,8 @@ int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
 
 #if IS_ENABLED(CONFIG_UNICODE)
 	err = ext4_fname_setup_ci_filename(dir, iname, fname);
+	if (err)
+		ext4_fname_free_filename(fname);
 #endif
 	return err;
 }
@@ -51,6 +53,8 @@ int ext4_fname_prepare_lookup(struct inode *dir, struct dentry *dentry,
 
 #if IS_ENABLED(CONFIG_UNICODE)
 	err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname);
+	if (err)
+		ext4_fname_free_filename(fname);
 #endif
 	return err;
 }
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 481491e892dfe..9418359b1d9d3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -176,9 +176,6 @@ enum criteria {
 	EXT4_MB_NUM_CRS
 };
 
-/* criteria below which we use fast block scanning and avoid unnecessary IO */
-#define CR_FAST CR_GOAL_LEN_SLOW
-
 /*
  * Flags used in mballoc's allocation_context flags field.
  *
@@ -1241,6 +1238,7 @@ struct ext4_inode_info {
 #define EXT4_MOUNT2_MB_OPTIMIZE_SCAN	0x00000080 /* Optimize group
 						    * scanning in mballoc
 						    */
+#define EXT4_MOUNT2_ABORT		0x00000100 /* Abort filesystem */
 
 #define clear_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt &= \
 						~EXT4_MOUNT_##opt
@@ -1258,10 +1256,8 @@ struct ext4_inode_info {
 
 #define ext4_test_and_set_bit		__test_and_set_bit_le
 #define ext4_set_bit			__set_bit_le
-#define ext4_set_bit_atomic		ext2_set_bit_atomic
 #define ext4_test_and_clear_bit		__test_and_clear_bit_le
 #define ext4_clear_bit			__clear_bit_le
-#define ext4_clear_bit_atomic		ext2_clear_bit_atomic
 #define ext4_test_bit			test_bit_le
 #define ext4_find_next_zero_bit		find_next_zero_bit_le
 #define ext4_find_next_bit		find_next_bit_le
@@ -1708,10 +1704,13 @@ struct ext4_sb_info {
 	const char *s_last_error_func;
 	time64_t s_last_error_time;
 	/*
-	 * If we are in a context where we cannot update error information in
-	 * the on-disk superblock, we queue this work to do it.
+	 * If we are in a context where we cannot update the on-disk
+	 * superblock, we queue the work here.  This is used to update
+	 * the error information in the superblock, and for periodic
+	 * updates of the superblock called from the commit callback
+	 * function.
 	 */
-	struct work_struct s_error_work;
+	struct work_struct s_sb_upd_work;
 
 	/* Ext4 fast commit sub transaction ID */
 	atomic_t s_fc_subtid;
@@ -1804,7 +1803,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
  */
 enum {
 	EXT4_MF_MNTDIR_SAMPLED,
-	EXT4_MF_FS_ABORTED,	/* Fatal error detected */
 	EXT4_MF_FC_INELIGIBLE	/* Fast commit ineligible */
 };
 
@@ -2228,9 +2226,9 @@ extern int ext4_feature_set_ok(struct super_block *sb, int readonly);
 #define EXT4_FLAGS_SHUTDOWN	1
 #define EXT4_FLAGS_BDEV_IS_DAX	2
 
-static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
+static inline int ext4_forced_shutdown(struct super_block *sb)
 {
-	return test_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
+	return test_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
 }
 
 /*
@@ -2708,7 +2706,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
 extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
 				    s64 nclusters, unsigned int flags);
 extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
-extern void ext4_check_blocks_bitmap(struct super_block *);
 extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 						    ext4_group_t block_group,
 						    struct buffer_head ** bh);
@@ -2864,7 +2861,6 @@ extern void ext4_free_inode(handle_t *, struct inode *);
 extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
 extern unsigned long ext4_count_free_inodes(struct super_block *);
 extern unsigned long ext4_count_dirs(struct super_block *);
-extern void ext4_check_inodes_bitmap(struct super_block *);
 extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
 extern int ext4_init_inode_table(struct super_block *sb,
 				 ext4_group_t group, int barrier);
@@ -2907,7 +2903,6 @@ extern int ext4_mb_init(struct super_block *);
 extern int ext4_mb_release(struct super_block *);
 extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
 				struct ext4_allocation_request *, int *);
-extern int ext4_mb_reserve_blocks(struct super_block *, int);
 extern void ext4_discard_preallocations(struct inode *, unsigned int);
 extern int __init ext4_init_mballoc(void);
 extern void ext4_exit_mballoc(void);
@@ -2930,6 +2925,10 @@ extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
 extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
 extern void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
 		       int len, int state);
+static inline bool ext4_mb_cr_expensive(enum criteria cr)
+{
+	return cr >= CR_GOAL_LEN_SLOW;
+}
 
 /* inode.c */
 void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
@@ -2983,7 +2982,6 @@ extern void ext4_evict_inode(struct inode *);
 extern void ext4_clear_inode(struct inode *);
 extern int  ext4_file_getattr(struct mnt_idmap *, const struct path *,
 			      struct kstat *, u32, unsigned int);
-extern int  ext4_sync_inode(handle_t *, struct inode *);
 extern void ext4_dirty_inode(struct inode *, int);
 extern int ext4_change_inode_journal_flag(struct inode *, int);
 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
@@ -3090,6 +3088,8 @@ extern const char *ext4_decode_error(struct super_block *sb, int errno,
 extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
 					     ext4_group_t block_group,
 					     unsigned int flags);
+extern unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
+					      ext4_group_t block_group);
 
 extern __printf(7, 8)
 void __ext4_error(struct super_block *, const char *, unsigned int, bool,
@@ -3531,8 +3531,6 @@ extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
 /* inline.c */
 extern int ext4_get_max_inline_size(struct inode *inode);
 extern int ext4_find_inline_data_nolock(struct inode *inode);
-extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
-				 unsigned int len);
 extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
 
 int ext4_readpage_inline(struct inode *inode, struct folio *folio);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index b38d59581411c..d1a2e66244017 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -67,11 +67,12 @@ static int ext4_journal_check_start(struct super_block *sb)
 
 	might_sleep();
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+	if (unlikely(ext4_forced_shutdown(sb)))
 		return -EIO;
 
-	if (sb_rdonly(sb))
+	if (WARN_ON_ONCE(sb_rdonly(sb)))
 		return -EROFS;
+
 	WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
 	journal = EXT4_SB(sb)->s_journal;
 	/*
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 9b5b8951afb44..6f7de14c0fa86 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -878,23 +878,29 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
 	err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
 	if (err1 != 0)
 		goto error;
+	/* Free preallocated extent if it didn't get used. */
+	if (es1) {
+		if (!es1->es_len)
+			__es_free_extent(es1);
+		es1 = NULL;
+	}
 
 	err2 = __es_insert_extent(inode, &newes, es2);
 	if (err2 == -ENOMEM && !ext4_es_must_keep(&newes))
 		err2 = 0;
 	if (err2 != 0)
 		goto error;
+	/* Free preallocated extent if it didn't get used. */
+	if (es2) {
+		if (!es2->es_len)
+			__es_free_extent(es2);
+		es2 = NULL;
+	}
 
 	if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
 	    (status & EXTENT_STATUS_WRITTEN ||
 	     status & EXTENT_STATUS_UNWRITTEN))
 		__revise_pending(inode, lblk, len);
-
-	/* es is pre-allocated but not used, free it. */
-	if (es1 && !es1->es_len)
-		__es_free_extent(es1);
-	if (es2 && !es2->es_len)
-		__es_free_extent(es2);
 error:
 	write_unlock(&EXT4_I(inode)->i_es_lock);
 	if (err1 || err2)
@@ -1491,8 +1497,12 @@ void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
 	 */
 	write_lock(&EXT4_I(inode)->i_es_lock);
 	err = __es_remove_extent(inode, lblk, end, &reserved, es);
-	if (es && !es->es_len)
-		__es_free_extent(es);
+	/* Free preallocated extent if it didn't get used. */
+	if (es) {
+		if (!es->es_len)
+			__es_free_extent(es);
+		es = NULL;
+	}
 	write_unlock(&EXT4_I(inode)->i_es_lock);
 	if (err)
 		goto retry;
@@ -2047,19 +2057,25 @@ void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
 	err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
 	if (err1 != 0)
 		goto error;
+	/* Free preallocated extent if it didn't get used. */
+	if (es1) {
+		if (!es1->es_len)
+			__es_free_extent(es1);
+		es1 = NULL;
+	}
 
 	err2 = __es_insert_extent(inode, &newes, es2);
 	if (err2 != 0)
 		goto error;
+	/* Free preallocated extent if it didn't get used. */
+	if (es2) {
+		if (!es2->es_len)
+			__es_free_extent(es2);
+		es2 = NULL;
+	}
 
 	if (allocated)
 		__insert_pending(inode, lblk);
-
-	/* es is pre-allocated but not used, free it. */
-	if (es1 && !es1->es_len)
-		__es_free_extent(es1);
-	if (es2 && !es2->es_len)
-		__es_free_extent(es2);
 error:
 	write_unlock(&EXT4_I(inode)->i_es_lock);
 	if (err1 || err2)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2dc3f8301225a..6830ea3a6c59c 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -131,7 +131,7 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct inode *inode = file_inode(iocb->ki_filp);
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
 	if (!iov_iter_count(to))
@@ -153,7 +153,7 @@ static ssize_t ext4_file_splice_read(struct file *in, loff_t *ppos,
 {
 	struct inode *inode = file_inode(in);
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return -EIO;
 	return filemap_splice_read(in, ppos, pipe, len, flags);
 }
@@ -476,6 +476,11 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
 	 * required to change security info in file_modified(), for extending
 	 * I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten
 	 * extents (as partial block zeroing may be required).
+	 *
+	 * Note that unaligned writes are allowed under shared lock so long as
+	 * they are pure overwrites. Otherwise, concurrent unaligned writes risk
+	 * data corruption due to partial block zeroing in the dio layer, and so
+	 * the I/O must occur exclusively.
 	 */
 	if (*ilock_shared &&
 	    ((!IS_NOSEC(inode) || *extend || !overwrite ||
@@ -492,21 +497,12 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
 
 	/*
 	 * Now that locking is settled, determine dio flags and exclusivity
-	 * requirements. Unaligned writes are allowed under shared lock so long
-	 * as they are pure overwrites. Set the iomap overwrite only flag as an
-	 * added precaution in this case. Even though this is unnecessary, we
-	 * can detect and warn on unexpected -EAGAIN if an unsafe unaligned
-	 * write is ever submitted.
-	 *
-	 * Otherwise, concurrent unaligned writes risk data corruption due to
-	 * partial block zeroing in the dio layer, and so the I/O must occur
-	 * exclusively. The inode lock is already held exclusive if the write is
-	 * non-overwrite or extending, so drain all outstanding dio and set the
-	 * force wait dio flag.
+	 * requirements. We don't use DIO_OVERWRITE_ONLY because we enforce
+	 * behavior already. The inode lock is already held exclusive if the
+	 * write is non-overwrite or extending, so drain all outstanding dio and
+	 * set the force wait dio flag.
 	 */
-	if (*ilock_shared && unaligned_io) {
-		*dio_flags = IOMAP_DIO_OVERWRITE_ONLY;
-	} else if (!*ilock_shared && (unaligned_io || *extend)) {
+	if (!*ilock_shared && (unaligned_io || *extend)) {
 		if (iocb->ki_flags & IOCB_NOWAIT) {
 			ret = -EAGAIN;
 			goto out;
@@ -608,7 +604,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		iomap_ops = &ext4_iomap_overwrite_ops;
 	ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
 			   dio_flags, NULL, 0);
-	WARN_ON_ONCE(ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT));
 	if (ret == -ENOTBLK)
 		ret = 0;
 
@@ -709,7 +704,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct inode *inode = file_inode(iocb->ki_filp);
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
 #ifdef CONFIG_FS_DAX
@@ -806,10 +801,9 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
 static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct inode *inode = file->f_mapping->host;
-	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-	struct dax_device *dax_dev = sbi->s_daxdev;
+	struct dax_device *dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
 
-	if (unlikely(ext4_forced_shutdown(sbi)))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
 	/*
@@ -885,7 +879,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
 {
 	int ret;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
 	ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0c56f3a011a1f..b40d3b29f7e5c 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -131,9 +131,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	int ret = 0, err;
 	bool needs_barrier = false;
 	struct inode *inode = file->f_mapping->host;
-	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 
-	if (unlikely(ext4_forced_shutdown(sbi)))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
 	ASSERT(ext4_journal_current_handle() == NULL);
@@ -141,14 +140,14 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	trace_ext4_sync_file_enter(file, datasync);
 
 	if (sb_rdonly(inode->i_sb)) {
-		/* Make sure that we read updated s_mount_flags value */
+		/* Make sure that we read updated s_ext4_flags value */
 		smp_rmb();
-		if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))
+		if (ext4_forced_shutdown(inode->i_sb))
 			ret = -EROFS;
 		goto out;
 	}
 
-	if (!sbi->s_journal) {
+	if (!EXT4_SB(inode->i_sb)->s_journal) {
 		ret = ext4_fsync_nojournal(file, start, end, datasync,
 					   &needs_barrier);
 		if (needs_barrier)
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 46c3423ddfa17..deabe29da7fbc 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -300,7 +300,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
 	unsigned char *buff;
 	struct qstr qstr = {.name = name, .len = len };
 
-	if (len && IS_CASEFOLDED(dir) && um &&
+	if (len && IS_CASEFOLDED(dir) &&
 	   (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir))) {
 		buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
 		if (!buff)
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 48abef5f23e7f..b65058d972f95 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -950,7 +950,7 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
 	sb = dir->i_sb;
 	sbi = EXT4_SB(sb);
 
-	if (unlikely(ext4_forced_shutdown(sbi)))
+	if (unlikely(ext4_forced_shutdown(sb)))
 		return ERR_PTR(-EIO);
 
 	ngroups = ext4_get_groups_count(sb);
@@ -1523,12 +1523,6 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
 	int num, ret = 0, used_blks = 0;
 	unsigned long used_inos = 0;
 
-	/* This should not happen, but just to be sure check this */
-	if (sb_rdonly(sb)) {
-		ret = 1;
-		goto out;
-	}
-
 	gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
 	if (!gdp || !grp)
 		goto out;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 0038610373745..012d9259ff532 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -228,7 +228,7 @@ static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
 	struct ext4_inode *raw_inode;
 	int cp_len = 0;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return;
 
 	BUG_ON(!EXT4_I(inode)->i_inline_off);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 89737d5a16148..4ce35f1c8b0a8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1114,7 +1114,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
 	pgoff_t index;
 	unsigned from, to;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
 	trace_ext4_write_begin(inode, pos, len);
@@ -2213,8 +2213,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
 		if (err < 0) {
 			struct super_block *sb = inode->i_sb;
 
-			if (ext4_forced_shutdown(EXT4_SB(sb)) ||
-			    ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
+			if (ext4_forced_shutdown(sb))
 				goto invalidate_dirty_pages;
 			/*
 			 * Let the uper layers retry transient errors.
@@ -2534,14 +2533,13 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
 	 * If the filesystem has aborted, it is read-only, so return
 	 * right away instead of dumping stack traces later on that
 	 * will obscure the real source of the problem.  We test
-	 * EXT4_MF_FS_ABORTED instead of sb->s_flag's SB_RDONLY because
+	 * fs shutdown state instead of sb->s_flag's SB_RDONLY because
 	 * the latter could be true if the filesystem is mounted
 	 * read-only, and in that case, ext4_writepages should
 	 * *never* be called, so if that ever happens, we would want
 	 * the stack trace.
 	 */
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
-		     ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) {
+	if (unlikely(ext4_forced_shutdown(mapping->host->i_sb))) {
 		ret = -EROFS;
 		goto out_writepages;
 	}
@@ -2759,7 +2757,7 @@ static int ext4_writepages(struct address_space *mapping,
 	int ret;
 	int alloc_ctx;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+	if (unlikely(ext4_forced_shutdown(sb)))
 		return -EIO;
 
 	alloc_ctx = ext4_writepages_down_read(sb);
@@ -2798,16 +2796,16 @@ static int ext4_dax_writepages(struct address_space *mapping,
 	int ret;
 	long nr_to_write = wbc->nr_to_write;
 	struct inode *inode = mapping->host;
-	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
 	int alloc_ctx;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
 	alloc_ctx = ext4_writepages_down_read(inode->i_sb);
 	trace_ext4_writepages(inode, wbc);
 
-	ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
+	ret = dax_writeback_mapping_range(mapping,
+					  EXT4_SB(inode->i_sb)->s_daxdev, wbc);
 	trace_ext4_writepages_result(inode, wbc, ret,
 				     nr_to_write - wbc->nr_to_write);
 	ext4_writepages_up_read(inode->i_sb, alloc_ctx);
@@ -2857,7 +2855,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
 	pgoff_t index;
 	struct inode *inode = mapping->host;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
 	index = pos >> PAGE_SHIFT;
@@ -2937,14 +2935,73 @@ static int ext4_da_should_update_i_disksize(struct folio *folio,
 	return 1;
 }
 
+static int ext4_da_do_write_end(struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page)
+{
+	struct inode *inode = mapping->host;
+	loff_t old_size = inode->i_size;
+	bool disksize_changed = false;
+	loff_t new_i_size;
+
+	/*
+	 * block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
+	 * flag, which all that's needed to trigger page writeback.
+	 */
+	copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL);
+	new_i_size = pos + copied;
+
+	/*
+	 * It's important to update i_size while still holding page lock,
+	 * because page writeout could otherwise come in and zero beyond
+	 * i_size.
+	 *
+	 * Since we are holding inode lock, we are sure i_disksize <=
+	 * i_size. We also know that if i_disksize < i_size, there are
+	 * delalloc writes pending in the range up to i_size. If the end of
+	 * the current write is <= i_size, there's no need to touch
+	 * i_disksize since writeback will push i_disksize up to i_size
+	 * eventually. If the end of the current write is > i_size and
+	 * inside an allocated block which ext4_da_should_update_i_disksize()
+	 * checked, we need to update i_disksize here as certain
+	 * ext4_writepages() paths not allocating blocks and update i_disksize.
+	 */
+	if (new_i_size > inode->i_size) {
+		unsigned long end;
+
+		i_size_write(inode, new_i_size);
+		end = (new_i_size - 1) & (PAGE_SIZE - 1);
+		if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) {
+			ext4_update_i_disksize(inode, new_i_size);
+			disksize_changed = true;
+		}
+	}
+
+	unlock_page(page);
+	put_page(page);
+
+	if (old_size < pos)
+		pagecache_isize_extended(inode, old_size, pos);
+
+	if (disksize_changed) {
+		handle_t *handle;
+
+		handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+		if (IS_ERR(handle))
+			return PTR_ERR(handle);
+		ext4_mark_inode_dirty(handle, inode);
+		ext4_journal_stop(handle);
+	}
+
+	return copied;
+}
+
 static int ext4_da_write_end(struct file *file,
 			     struct address_space *mapping,
 			     loff_t pos, unsigned len, unsigned copied,
 			     struct page *page, void *fsdata)
 {
 	struct inode *inode = mapping->host;
-	loff_t new_i_size;
-	unsigned long start, end;
 	int write_mode = (int)(unsigned long)fsdata;
 	struct folio *folio = page_folio(page);
 
@@ -2963,30 +3020,7 @@ static int ext4_da_write_end(struct file *file,
 	if (unlikely(copied < len) && !PageUptodate(page))
 		copied = 0;
 
-	start = pos & (PAGE_SIZE - 1);
-	end = start + copied - 1;
-
-	/*
-	 * Since we are holding inode lock, we are sure i_disksize <=
-	 * i_size. We also know that if i_disksize < i_size, there are
-	 * delalloc writes pending in the range upto i_size. If the end of
-	 * the current write is <= i_size, there's no need to touch
-	 * i_disksize since writeback will push i_disksize upto i_size
-	 * eventually. If the end of the current write is > i_size and
-	 * inside an allocated block (ext4_da_should_update_i_disksize()
-	 * check), we need to update i_disksize here as certain
-	 * ext4_writepages() paths not allocating blocks update i_disksize.
-	 *
-	 * Note that we defer inode dirtying to generic_write_end() /
-	 * ext4_da_write_inline_data_end().
-	 */
-	new_i_size = pos + copied;
-	if (copied && new_i_size > inode->i_size &&
-	    ext4_da_should_update_i_disksize(folio, end))
-		ext4_update_i_disksize(inode, new_i_size);
-
-	return generic_write_end(file, mapping, pos, len, copied, &folio->page,
-				 fsdata);
+	return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page);
 }
 
 /*
@@ -4940,9 +4974,12 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
 				 "iget: bogus i_mode (%o)", inode->i_mode);
 		goto bad_inode;
 	}
-	if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
+	if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) {
 		ext4_error_inode(inode, function, line, 0,
 				 "casefold flag without casefold feature");
+		ret = -EFSCORRUPTED;
+		goto bad_inode;
+	}
 	if ((err_str = check_igot_inode(inode, flags)) != NULL) {
 		ext4_error_inode(inode, function, line, 0, err_str);
 		ret = -EFSCORRUPTED;
@@ -5131,11 +5168,10 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int err;
 
-	if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) ||
-	    sb_rdonly(inode->i_sb))
+	if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
 		return 0;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
 	if (EXT4_SB(inode->i_sb)->s_journal) {
@@ -5255,7 +5291,7 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 	const unsigned int ia_valid = attr->ia_valid;
 	bool inc_ivers = true;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
 	if (unlikely(IS_IMMUTABLE(inode)))
@@ -5674,7 +5710,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
 {
 	int err = 0;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
+	if (unlikely(ext4_forced_shutdown(inode->i_sb))) {
 		put_bh(iloc->bh);
 		return -EIO;
 	}
@@ -5700,7 +5736,7 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
 {
 	int err;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
 	err = ext4_get_inode_loc(inode, iloc);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index b0349f4518637..0bfe2ce589e22 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -802,7 +802,7 @@ int ext4_force_shutdown(struct super_block *sb, u32 flags)
 	if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH)
 		return -EINVAL;
 
-	if (ext4_forced_shutdown(sbi))
+	if (ext4_forced_shutdown(sb))
 		return 0;
 
 	ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 21b903fe546e8..c91db9f57524c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -874,7 +874,7 @@ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context
 			enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
-	struct ext4_group_info *iter, *grp;
+	struct ext4_group_info *iter;
 	int i;
 
 	if (ac->ac_status == AC_STATUS_FOUND)
@@ -883,7 +883,6 @@ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context
 	if (unlikely(sbi->s_mb_stats && ac->ac_flags & EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED))
 		atomic_inc(&sbi->s_bal_p2_aligned_bad_suggestions);
 
-	grp = NULL;
 	for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
 		if (list_empty(&sbi->s_mb_largest_free_orders[i]))
 			continue;
@@ -892,28 +891,22 @@ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context
 			read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
 			continue;
 		}
-		grp = NULL;
 		list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i],
 				    bb_largest_free_order_node) {
 			if (sbi->s_mb_stats)
 				atomic64_inc(&sbi->s_bal_cX_groups_considered[CR_POWER2_ALIGNED]);
 			if (likely(ext4_mb_good_group(ac, iter->bb_group, CR_POWER2_ALIGNED))) {
-				grp = iter;
-				break;
+				*group = iter->bb_group;
+				ac->ac_flags |= EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED;
+				read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
+				return;
 			}
 		}
 		read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
-		if (grp)
-			break;
 	}
 
-	if (!grp) {
-		/* Increment cr and search again */
-		*new_cr = CR_GOAL_LEN_FAST;
-	} else {
-		*group = grp->bb_group;
-		ac->ac_flags |= EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED;
-	}
+	/* Increment cr and search again if no group is found */
+	*new_cr = CR_GOAL_LEN_FAST;
 }
 
 /*
@@ -966,16 +959,25 @@ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *
 	for (i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len);
 	     i < MB_NUM_ORDERS(ac->ac_sb); i++) {
 		grp = ext4_mb_find_good_group_avg_frag_lists(ac, i);
-		if (grp)
-			break;
+		if (grp) {
+			*group = grp->bb_group;
+			ac->ac_flags |= EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED;
+			return;
+		}
 	}
 
-	if (grp) {
-		*group = grp->bb_group;
-		ac->ac_flags |= EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED;
-	} else {
+	/*
+	 * CR_BEST_AVAIL_LEN works based on the concept that we have
+	 * a larger normalized goal len request which can be trimmed to
+	 * a smaller goal len such that it can still satisfy original
+	 * request len. However, allocation request for non-regular
+	 * files never gets normalized.
+	 * See function ext4_mb_normalize_request() (EXT4_MB_HINT_DATA).
+	 */
+	if (ac->ac_flags & EXT4_MB_HINT_DATA)
 		*new_cr = CR_BEST_AVAIL_LEN;
-	}
+	else
+		*new_cr = CR_GOAL_LEN_SLOW;
 }
 
 /*
@@ -1051,18 +1053,16 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
 							ac->ac_g_ex.fe_len);
 
 		grp = ext4_mb_find_good_group_avg_frag_lists(ac, frag_order);
-		if (grp)
-			break;
+		if (grp) {
+			*group = grp->bb_group;
+			ac->ac_flags |= EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED;
+			return;
+		}
 	}
 
-	if (grp) {
-		*group = grp->bb_group;
-		ac->ac_flags |= EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED;
-	} else {
-		/* Reset goal length to original goal length before falling into CR_GOAL_LEN_SLOW */
-		ac->ac_g_ex.fe_len = ac->ac_orig_goal_len;
-		*new_cr = CR_GOAL_LEN_SLOW;
-	}
+	/* Reset goal length to original goal length before falling into CR_GOAL_LEN_SLOW */
+	ac->ac_g_ex.fe_len = ac->ac_orig_goal_len;
+	*new_cr = CR_GOAL_LEN_SLOW;
 }
 
 static inline int should_optimize_scan(struct ext4_allocation_context *ac)
@@ -1080,8 +1080,9 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
  * Return next linear group for allocation. If linear traversal should not be
  * performed, this function just returns the same group
  */
-static int
-next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
+static ext4_group_t
+next_linear_group(struct ext4_allocation_context *ac, ext4_group_t group,
+		  ext4_group_t ngroups)
 {
 	if (!should_optimize_scan(ac))
 		goto inc_and_return;
@@ -1255,7 +1256,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
 static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
 {
 	ext4_group_t ngroups;
-	int blocksize;
+	unsigned int blocksize;
 	int blocks_per_page;
 	int groups_per_page;
 	int err = 0;
@@ -2450,7 +2451,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 			break;
 		}
 
-		if (ac->ac_criteria < CR_FAST) {
+		if (!ext4_mb_cr_expensive(ac->ac_criteria)) {
 			/*
 			 * In CR_GOAL_LEN_FAST and CR_BEST_AVAIL_LEN, we are
 			 * sure that this group will have a large enough
@@ -2553,7 +2554,7 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
 
 	BUG_ON(cr < CR_POWER2_ALIGNED || cr >= EXT4_MB_NUM_CRS);
 
-	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp) || !grp))
+	if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
 		return false;
 
 	free = grp->bb_free;
@@ -2634,7 +2635,12 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
 	free = grp->bb_free;
 	if (free == 0)
 		goto out;
-	if (cr <= CR_FAST && free < ac->ac_g_ex.fe_len)
+	/*
+	 * In all criterias except CR_ANY_FREE we try to avoid groups that
+	 * can't possibly satisfy the full goal request due to insufficient
+	 * free blocks.
+	 */
+	if (cr < CR_ANY_FREE && free < ac->ac_g_ex.fe_len)
 		goto out;
 	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
 		goto out;
@@ -2658,7 +2664,7 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
 		 * sure we locate metadata blocks in the first block group in
 		 * the flex_bg if possible.
 		 */
-		if (cr < CR_FAST &&
+		if (!ext4_mb_cr_expensive(cr) &&
 		    (!sbi->s_log_groups_per_flex ||
 		     ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) &&
 		    !(ext4_has_group_desc_csum(sb) &&
@@ -2787,8 +2793,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 
 	/*
 	 * ac->ac_2order is set only if the fe_len is a power of 2
-	 * if ac->ac_2order is set we also set criteria to 0 so that we
-	 * try exact allocation using buddy.
+	 * if ac->ac_2order is set we also set criteria to CR_POWER2_ALIGNED
+	 * so that we try exact allocation using buddy.
 	 */
 	i = fls(ac->ac_g_ex.fe_len);
 	ac->ac_2order = 0;
@@ -2800,10 +2806,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 	 * requests upto maximum buddy size we have constructed.
 	 */
 	if (i >= sbi->s_mb_order2_reqs && i <= MB_NUM_ORDERS(sb)) {
-		/*
-		 * This should tell if fe_len is exactly power of 2
-		 */
-		if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
+		if (is_power_of_2(ac->ac_g_ex.fe_len))
 			ac->ac_2order = array_index_nospec(i - 1,
 							   MB_NUM_ORDERS(sb));
 	}
@@ -2848,11 +2851,11 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 			/*
 			 * Batch reads of the block allocation bitmaps
 			 * to get multiple READs in flight; limit
-			 * prefetching at cr=0/1, otherwise mballoc can
-			 * spend a lot of time loading imperfect groups
+			 * prefetching at inexpensive CR, otherwise mballoc
+			 * can spend a lot of time loading imperfect groups
 			 */
 			if ((prefetch_grp == group) &&
-			    (cr >= CR_FAST ||
+			    (ext4_mb_cr_expensive(cr) ||
 			     prefetch_ios < sbi->s_mb_prefetch_limit)) {
 				nr = sbi->s_mb_prefetch;
 				if (ext4_has_feature_flex_bg(sb)) {
@@ -3501,11 +3504,10 @@ static void ext4_discard_work(struct work_struct *work)
 	struct super_block *sb = sbi->s_sb;
 	struct ext4_free_data *fd, *nfd;
 	struct ext4_buddy e4b;
-	struct list_head discard_list;
+	LIST_HEAD(discard_list);
 	ext4_group_t grp, load_grp;
 	int err = 0;
 
-	INIT_LIST_HEAD(&discard_list);
 	spin_lock(&sbi->s_md_lock);
 	list_splice_init(&sbi->s_discard_list, &discard_list);
 	spin_unlock(&sbi->s_md_lock);
@@ -3879,12 +3881,10 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_free_data *entry, *tmp;
-	struct list_head freed_data_list;
+	LIST_HEAD(freed_data_list);
 	struct list_head *cut_pos = NULL;
 	bool wake;
 
-	INIT_LIST_HEAD(&freed_data_list);
-
 	spin_lock(&sbi->s_md_lock);
 	list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
 		if (entry->efd_tid != commit_tid)
@@ -4084,7 +4084,7 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_group_t group;
 	ext4_grpblk_t blkoff;
-	int i, err;
+	int i, err = 0;
 	int already;
 	unsigned int clen, clen_changed, thisgrp_len;
 
@@ -4222,12 +4222,13 @@ ext4_mb_pa_rb_next_iter(ext4_lblk_t new_start, ext4_lblk_t cur_start, struct rb_
 
 static inline void
 ext4_mb_pa_assert_overlap(struct ext4_allocation_context *ac,
-			  ext4_lblk_t start, ext4_lblk_t end)
+			  ext4_lblk_t start, loff_t end)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
 	struct ext4_prealloc_space *tmp_pa;
-	ext4_lblk_t tmp_pa_start, tmp_pa_end;
+	ext4_lblk_t tmp_pa_start;
+	loff_t tmp_pa_end;
 	struct rb_node *iter;
 
 	read_lock(&ei->i_prealloc_lock);
@@ -4236,7 +4237,7 @@ ext4_mb_pa_assert_overlap(struct ext4_allocation_context *ac,
 		tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
 				  pa_node.inode_node);
 		tmp_pa_start = tmp_pa->pa_lstart;
-		tmp_pa_end = tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);
+		tmp_pa_end = pa_logical_end(sbi, tmp_pa);
 
 		spin_lock(&tmp_pa->pa_lock);
 		if (tmp_pa->pa_deleted == 0)
@@ -4258,14 +4259,14 @@ ext4_mb_pa_assert_overlap(struct ext4_allocation_context *ac,
  */
 static inline void
 ext4_mb_pa_adjust_overlap(struct ext4_allocation_context *ac,
-			  ext4_lblk_t *start, ext4_lblk_t *end)
+			  ext4_lblk_t *start, loff_t *end)
 {
 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	struct ext4_prealloc_space *tmp_pa = NULL, *left_pa = NULL, *right_pa = NULL;
 	struct rb_node *iter;
-	ext4_lblk_t new_start, new_end;
-	ext4_lblk_t tmp_pa_start, tmp_pa_end, left_pa_end = -1, right_pa_start = -1;
+	ext4_lblk_t new_start, tmp_pa_start, right_pa_start = -1;
+	loff_t new_end, tmp_pa_end, left_pa_end = -1;
 
 	new_start = *start;
 	new_end = *end;
@@ -4284,7 +4285,7 @@ ext4_mb_pa_adjust_overlap(struct ext4_allocation_context *ac,
 		tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
 				  pa_node.inode_node);
 		tmp_pa_start = tmp_pa->pa_lstart;
-		tmp_pa_end = tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);
+		tmp_pa_end = pa_logical_end(sbi, tmp_pa);
 
 		/* PA must not overlap original request */
 		spin_lock(&tmp_pa->pa_lock);
@@ -4364,8 +4365,7 @@ ext4_mb_pa_adjust_overlap(struct ext4_allocation_context *ac,
 	}
 
 	if (left_pa) {
-		left_pa_end =
-			left_pa->pa_lstart + EXT4_C2B(sbi, left_pa->pa_len);
+		left_pa_end = pa_logical_end(sbi, left_pa);
 		BUG_ON(left_pa_end > ac->ac_o_ex.fe_logical);
 	}
 
@@ -4404,8 +4404,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	struct ext4_super_block *es = sbi->s_es;
 	int bsbits, max;
-	ext4_lblk_t end;
-	loff_t size, start_off;
+	loff_t size, start_off, end;
 	loff_t orig_size __maybe_unused;
 	ext4_lblk_t start;
 
@@ -4432,7 +4431,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 
 	/* first, let's learn actual file size
 	 * given current request is allocated */
-	size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
+	size = extent_logical_end(sbi, &ac->ac_o_ex);
 	size = size << bsbits;
 	if (size < i_size_read(ac->ac_inode))
 		size = i_size_read(ac->ac_inode);
@@ -4766,7 +4765,6 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
 	struct ext4_locality_group *lg;
 	struct ext4_prealloc_space *tmp_pa = NULL, *cpa = NULL;
-	loff_t tmp_pa_end;
 	struct rb_node *iter;
 	ext4_fsblk_t goal_block;
 
@@ -4862,9 +4860,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 	 * pa can possibly satisfy the request hence check if it overlaps
 	 * original logical start and stop searching if it doesn't.
 	 */
-	tmp_pa_end = (loff_t)tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);
-
-	if (ac->ac_o_ex.fe_logical >= tmp_pa_end) {
+	if (ac->ac_o_ex.fe_logical >= pa_logical_end(sbi, tmp_pa)) {
 		spin_unlock(&tmp_pa->pa_lock);
 		goto try_group_pa;
 	}
@@ -4984,7 +4980,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 		mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
 		n = rb_next(n);
 	}
-	return;
 }
 
 /*
@@ -5180,8 +5175,11 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 	pa = ac->ac_pa;
 
 	if (ac->ac_b_ex.fe_len < ac->ac_orig_goal_len) {
-		int new_bex_start;
-		int new_bex_end;
+		struct ext4_free_extent ex = {
+			.fe_logical = ac->ac_g_ex.fe_logical,
+			.fe_len = ac->ac_orig_goal_len,
+		};
+		loff_t orig_goal_end = extent_logical_end(sbi, &ex);
 
 		/* we can't allocate as much as normalizer wants.
 		 * so, found space must get proper lstart
@@ -5200,29 +5198,23 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 		 *    still cover original start
 		 * 3. Else, keep the best ex at start of original request.
 		 */
-		new_bex_end = ac->ac_g_ex.fe_logical +
-			EXT4_C2B(sbi, ac->ac_orig_goal_len);
-		new_bex_start = new_bex_end - EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
-		if (ac->ac_o_ex.fe_logical >= new_bex_start)
-			goto adjust_bex;
+		ex.fe_len = ac->ac_b_ex.fe_len;
 
-		new_bex_start = ac->ac_g_ex.fe_logical;
-		new_bex_end =
-			new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
-		if (ac->ac_o_ex.fe_logical < new_bex_end)
+		ex.fe_logical = orig_goal_end - EXT4_C2B(sbi, ex.fe_len);
+		if (ac->ac_o_ex.fe_logical >= ex.fe_logical)
 			goto adjust_bex;
 
-		new_bex_start = ac->ac_o_ex.fe_logical;
-		new_bex_end =
-			new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
+		ex.fe_logical = ac->ac_g_ex.fe_logical;
+		if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex))
+			goto adjust_bex;
 
+		ex.fe_logical = ac->ac_o_ex.fe_logical;
 adjust_bex:
-		ac->ac_b_ex.fe_logical = new_bex_start;
+		ac->ac_b_ex.fe_logical = ex.fe_logical;
 
 		BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
 		BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
-		BUG_ON(new_bex_end > (ac->ac_g_ex.fe_logical +
-				      EXT4_C2B(sbi, ac->ac_orig_goal_len)));
+		BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end);
 	}
 
 	pa->pa_lstart = ac->ac_b_ex.fe_logical;
@@ -5419,7 +5411,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
 	struct buffer_head *bitmap_bh = NULL;
 	struct ext4_prealloc_space *pa, *tmp;
-	struct list_head list;
+	LIST_HEAD(list);
 	struct ext4_buddy e4b;
 	struct ext4_inode_info *ei;
 	int err;
@@ -5448,7 +5440,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 		goto out_dbg;
 	}
 
-	INIT_LIST_HEAD(&list);
 	ext4_lock_group(sb, group);
 	list_for_each_entry_safe(pa, tmp,
 				&grp->bb_prealloc_list, pa_group_list) {
@@ -5529,7 +5520,7 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
 	struct buffer_head *bitmap_bh = NULL;
 	struct ext4_prealloc_space *pa, *tmp;
 	ext4_group_t group = 0;
-	struct list_head list;
+	LIST_HEAD(list);
 	struct ext4_buddy e4b;
 	struct rb_node *iter;
 	int err;
@@ -5546,8 +5537,6 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
 	trace_ext4_discard_preallocations(inode,
 			atomic_read(&ei->i_prealloc_active), needed);
 
-	INIT_LIST_HEAD(&list);
-
 	if (needed == 0)
 		needed = UINT_MAX;
 
@@ -5671,7 +5660,7 @@ static inline void ext4_mb_show_pa(struct super_block *sb)
 {
 	ext4_group_t i, ngroups;
 
-	if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
+	if (ext4_forced_shutdown(sb))
 		return;
 
 	ngroups = ext4_get_groups_count(sb);
@@ -5705,7 +5694,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 {
 	struct super_block *sb = ac->ac_sb;
 
-	if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
+	if (ext4_forced_shutdown(sb))
 		return;
 
 	mb_debug(sb, "Can't allocate:"
@@ -5738,12 +5727,10 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 #else
 static inline void ext4_mb_show_pa(struct super_block *sb)
 {
-	return;
 }
 static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 {
 	ext4_mb_show_pa(ac->ac_sb);
-	return;
 }
 #endif
 
@@ -5769,7 +5756,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
 
 	group_pa_eligible = sbi->s_mb_group_prealloc > 0;
 	inode_pa_eligible = true;
-	size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
+	size = extent_logical_end(sbi, &ac->ac_o_ex);
 	isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
 		>> bsbits;
 
@@ -5865,13 +5852,11 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
 {
 	ext4_group_t group = 0;
 	struct ext4_buddy e4b;
-	struct list_head discard_list;
+	LIST_HEAD(discard_list);
 	struct ext4_prealloc_space *pa, *tmp;
 
 	mb_debug(sb, "discard locality group preallocation\n");
 
-	INIT_LIST_HEAD(&discard_list);
-
 	spin_lock(&lg->lg_prealloc_lock);
 	list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
 				pa_node.lg_list,
@@ -5984,12 +5969,9 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
 	spin_unlock(&lg->lg_prealloc_lock);
 
 	/* Now trim the list to be not more than 8 elements */
-	if (lg_prealloc_count > 8) {
+	if (lg_prealloc_count > 8)
 		ext4_mb_discard_lg_preallocations(sb, lg,
 						  order, lg_prealloc_count);
-		return;
-	}
-	return ;
 }
 
 /*
@@ -6102,7 +6084,7 @@ ext4_mb_new_blocks_simple(struct ext4_allocation_request *ar, int *errp)
 	ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
 	ext4_grpblk_t i = 0;
 	ext4_fsblk_t goal, block;
-	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+	struct ext4_super_block *es = sbi->s_es;
 
 	goal = ar->goal;
 	if (goal < le32_to_cpu(es->s_first_data_block) ||
@@ -6643,7 +6625,6 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
 error_return:
 	brelse(bitmap_bh);
 	ext4_std_error(sb, err);
-	return;
 }
 
 /**
@@ -6746,7 +6727,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 	}
 
 	ext4_mb_clear_bb(handle, inode, block, count, flags);
-	return;
 }
 
 /**
@@ -6936,8 +6916,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
 	void *bitmap;
 
 	bitmap = e4b->bd_bitmap;
-	start = (e4b->bd_info->bb_first_free > start) ?
-		e4b->bd_info->bb_first_free : start;
+	start = max(e4b->bd_info->bb_first_free, start);
 	count = 0;
 	free_count = 0;
 
@@ -7154,8 +7133,7 @@ ext4_mballoc_query_range(
 
 	ext4_lock_group(sb, group);
 
-	start = (e4b.bd_info->bb_first_free > start) ?
-		e4b.bd_info->bb_first_free : start;
+	start = max(e4b.bd_info->bb_first_free, start);
 	if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
 		end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
 
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index df6b5e7c22741..d7aeb5da7d867 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -233,6 +233,20 @@ static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
 		(fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
 }
 
+static inline loff_t extent_logical_end(struct ext4_sb_info *sbi,
+					struct ext4_free_extent *fex)
+{
+	/* Use loff_t to avoid end exceeding ext4_lblk_t max. */
+	return (loff_t)fex->fe_logical + EXT4_C2B(sbi, fex->fe_len);
+}
+
+static inline loff_t pa_logical_end(struct ext4_sb_info *sbi,
+				    struct ext4_prealloc_space *pa)
+{
+	/* Use loff_t to avoid end exceeding ext4_lblk_t max. */
+	return (loff_t)pa->pa_lstart + EXT4_C2B(sbi, pa->pa_len);
+}
+
 typedef int (*ext4_mballoc_query_range_fn)(
 	struct super_block		*sb,
 	ext4_group_t			agno,
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 0aaf38ffcb6ec..bd946d0c71b70 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -162,7 +162,7 @@ static int kmmpd(void *data)
 	memcpy(mmp->mmp_nodename, init_utsname()->nodename,
 	       sizeof(mmp->mmp_nodename));
 
-	while (!kthread_should_stop() && !sb_rdonly(sb)) {
+	while (!kthread_should_stop() && !ext4_forced_shutdown(sb)) {
 		if (!ext4_has_feature_mmp(sb)) {
 			ext4_warning(sb, "kmmpd being stopped since MMP feature"
 				     " has been disabled.");
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 933ad03f4f585..41a6411c600b1 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1445,7 +1445,7 @@ int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
 	struct dx_hash_info *hinfo = &name->hinfo;
 	int len;
 
-	if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding ||
+	if (!IS_CASEFOLDED(dir) ||
 	    (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) {
 		cf_name->name = NULL;
 		return 0;
@@ -1496,7 +1496,7 @@ static bool ext4_match(struct inode *parent,
 #endif
 
 #if IS_ENABLED(CONFIG_UNICODE)
-	if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) &&
+	if (IS_CASEFOLDED(parent) &&
 	    (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
 		if (fname->cf_name.name) {
 			struct qstr cf = {.name = fname->cf_name.name,
@@ -2393,7 +2393,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
 
 #if IS_ENABLED(CONFIG_UNICODE)
 	if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
-	    sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
+	    utf8_validate(sb->s_encoding, &dentry->d_name))
 		return -EINVAL;
 #endif
 
@@ -2799,6 +2799,7 @@ static int ext4_add_nondir(handle_t *handle,
 		return err;
 	}
 	drop_nlink(inode);
+	ext4_mark_inode_dirty(handle, inode);
 	ext4_orphan_add(handle, inode);
 	unlock_new_inode(inode);
 	return err;
@@ -3142,7 +3143,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 	struct ext4_dir_entry_2 *de;
 	handle_t *handle = NULL;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+	if (unlikely(ext4_forced_shutdown(dir->i_sb)))
 		return -EIO;
 
 	/* Initialize quotas before so that eventual writes go in
@@ -3302,7 +3303,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 {
 	int retval;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+	if (unlikely(ext4_forced_shutdown(dir->i_sb)))
 		return -EIO;
 
 	trace_ext4_unlink_enter(dir, dentry);
@@ -3370,7 +3371,7 @@ static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	struct fscrypt_str disk_link;
 	int retries = 0;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+	if (unlikely(ext4_forced_shutdown(dir->i_sb)))
 		return -EIO;
 
 	err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
@@ -3437,6 +3438,7 @@ static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir,
 
 err_drop_inode:
 	clear_nlink(inode);
+	ext4_mark_inode_dirty(handle, inode);
 	ext4_orphan_add(handle, inode);
 	unlock_new_inode(inode);
 	if (handle)
@@ -4021,6 +4023,7 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 			ext4_resetent(handle, &old,
 				      old.inode->i_ino, old_file_type);
 			drop_nlink(whiteout);
+			ext4_mark_inode_dirty(handle, whiteout);
 			ext4_orphan_add(handle, whiteout);
 		}
 		unlock_new_inode(whiteout);
@@ -4187,7 +4190,7 @@ static int ext4_rename2(struct mnt_idmap *idmap,
 {
 	int err;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb))))
+	if (unlikely(ext4_forced_shutdown(old_dir->i_sb)))
 		return -EIO;
 
 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 3621f29ec6712..dfdd7e5cf0389 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -184,7 +184,7 @@ static int ext4_end_io_end(ext4_io_end_t *io_end)
 
 	io_end->handle = NULL;	/* Following call will use up the handle */
 	ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
-	if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
+	if (ret < 0 && !ext4_forced_shutdown(inode->i_sb)) {
 		ext4_msg(inode->i_sb, KERN_EMERG,
 			 "failed to convert unwritten extents to written "
 			 "extents -- potential data loss!  "
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 73547d2334fd7..38217422f9388 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -434,6 +434,57 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
 #define ext4_get_tstamp(es, tstamp) \
 	__ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
 
+#define EXT4_SB_REFRESH_INTERVAL_SEC (3600) /* seconds (1 hour) */
+#define EXT4_SB_REFRESH_INTERVAL_KB (16384) /* kilobytes (16MB) */
+
+/*
+ * The ext4_maybe_update_superblock() function checks and updates the
+ * superblock if needed.
+ *
+ * This function is designed to update the on-disk superblock only under
+ * certain conditions to prevent excessive disk writes and unnecessary
+ * waking of the disk from sleep. The superblock will be updated if:
+ * 1. More than an hour has passed since the last superblock update, and
+ * 2. More than 16MB have been written since the last superblock update.
+ *
+ * @sb: The superblock
+ */
+static void ext4_maybe_update_superblock(struct super_block *sb)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_super_block *es = sbi->s_es;
+	journal_t *journal = sbi->s_journal;
+	time64_t now;
+	__u64 last_update;
+	__u64 lifetime_write_kbytes;
+	__u64 diff_size;
+
+	if (sb_rdonly(sb) || !(sb->s_flags & SB_ACTIVE) ||
+	    !journal || (journal->j_flags & JBD2_UNMOUNT))
+		return;
+
+	now = ktime_get_real_seconds();
+	last_update = ext4_get_tstamp(es, s_wtime);
+
+	if (likely(now - last_update < EXT4_SB_REFRESH_INTERVAL_SEC))
+		return;
+
+	lifetime_write_kbytes = sbi->s_kbytes_written +
+		((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
+		  sbi->s_sectors_written_start) >> 1);
+
+	/* Get the number of kilobytes not written to disk to account
+	 * for statistics and compare with a multiple of 16 MB. This
+	 * is used to determine when the next superblock commit should
+	 * occur (i.e. not more often than once per 16MB if there was
+	 * less written in an hour).
+	 */
+	diff_size = lifetime_write_kbytes - le64_to_cpu(es->s_kbytes_written);
+
+	if (diff_size > EXT4_SB_REFRESH_INTERVAL_KB)
+		schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
+}
+
 /*
  * The del_gendisk() function uninitializes the disk-specific data
  * structures, including the bdi structure, without telling anyone
@@ -460,6 +511,7 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 	BUG_ON(txn->t_state == T_FINISHED);
 
 	ext4_process_freed_data(sb, txn->t_tid);
+	ext4_maybe_update_superblock(sb);
 
 	spin_lock(&sbi->s_md_lock);
 	while (!list_empty(&txn->t_private_list)) {
@@ -658,7 +710,7 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
 		WARN_ON_ONCE(1);
 
 	if (!continue_fs && !sb_rdonly(sb)) {
-		ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
+		set_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
 		if (journal)
 			jbd2_journal_abort(journal, -EIO);
 	}
@@ -672,7 +724,7 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
 		 * defer superblock flushing to a workqueue.
 		 */
 		if (continue_fs && journal)
-			schedule_work(&EXT4_SB(sb)->s_error_work);
+			schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
 		else
 			ext4_commit_super(sb);
 	}
@@ -699,10 +751,10 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
 	sb->s_flags |= SB_RDONLY;
 }
 
-static void flush_stashed_error_work(struct work_struct *work)
+static void update_super_work(struct work_struct *work)
 {
 	struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
-						s_error_work);
+						s_sb_upd_work);
 	journal_t *journal = sbi->s_journal;
 	handle_t *handle;
 
@@ -716,6 +768,7 @@ static void flush_stashed_error_work(struct work_struct *work)
 	 */
 	if (!sb_rdonly(sbi->s_sb) && journal) {
 		struct buffer_head *sbh = sbi->s_sbh;
+		bool call_notify_err;
 		handle = jbd2_journal_start(journal, 1);
 		if (IS_ERR(handle))
 			goto write_directly;
@@ -723,6 +776,10 @@ static void flush_stashed_error_work(struct work_struct *work)
 			jbd2_journal_stop(handle);
 			goto write_directly;
 		}
+
+		if (sbi->s_add_error_count > 0)
+			call_notify_err = true;
+
 		ext4_update_super(sbi->s_sb);
 		if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
 			ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
@@ -736,7 +793,10 @@ static void flush_stashed_error_work(struct work_struct *work)
 			goto write_directly;
 		}
 		jbd2_journal_stop(handle);
-		ext4_notify_error_sysfs(sbi);
+
+		if (call_notify_err)
+			ext4_notify_error_sysfs(sbi);
+
 		return;
 	}
 write_directly:
@@ -759,7 +819,7 @@ void __ext4_error(struct super_block *sb, const char *function,
 	struct va_format vaf;
 	va_list args;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+	if (unlikely(ext4_forced_shutdown(sb)))
 		return;
 
 	trace_ext4_error(sb, function, line);
@@ -784,7 +844,7 @@ void __ext4_error_inode(struct inode *inode, const char *function,
 	va_list args;
 	struct va_format vaf;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return;
 
 	trace_ext4_error(inode->i_sb, function, line);
@@ -819,7 +879,7 @@ void __ext4_error_file(struct file *file, const char *function,
 	struct inode *inode = file_inode(file);
 	char pathname[80], *path;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return;
 
 	trace_ext4_error(inode->i_sb, function, line);
@@ -899,7 +959,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
 	char nbuf[16];
 	const char *errstr;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+	if (unlikely(ext4_forced_shutdown(sb)))
 		return;
 
 	/* Special case: if the error is EROFS, and we're not already
@@ -993,7 +1053,7 @@ __acquires(bitlock)
 	struct va_format vaf;
 	va_list args;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+	if (unlikely(ext4_forced_shutdown(sb)))
 		return;
 
 	trace_ext4_error(sb, function, line);
@@ -1019,7 +1079,7 @@ __acquires(bitlock)
 		if (!bdev_read_only(sb->s_bdev)) {
 			save_error_info(sb, EFSCORRUPTED, ino, block, function,
 					line);
-			schedule_work(&EXT4_SB(sb)->s_error_work);
+			schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
 		}
 		return;
 	}
@@ -1097,26 +1157,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
 	 */
 }
 
-/*
- * Open the external journal device
- */
-static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
-{
-	struct block_device *bdev;
-
-	bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
-				 &fs_holder_ops);
-	if (IS_ERR(bdev))
-		goto fail;
-	return bdev;
-
-fail:
-	ext4_msg(sb, KERN_ERR,
-		 "failed to open journal device unknown-block(%u,%u) %ld",
-		 MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
-	return NULL;
-}
-
 static inline struct inode *orphan_list_entry(struct list_head *l)
 {
 	return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
@@ -1251,10 +1291,10 @@ static void ext4_put_super(struct super_block *sb)
 	 * Unregister sysfs before destroying jbd2 journal.
 	 * Since we could still access attr_journal_task attribute via sysfs
 	 * path which could have sbi->s_journal->j_task as NULL
-	 * Unregister sysfs before flush sbi->s_error_work.
+	 * Unregister sysfs before flush sbi->s_sb_upd_work.
 	 * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
 	 * read metadata verify failed then will queue error work.
-	 * flush_stashed_error_work will call start_this_handle may trigger
+	 * update_super_work will call start_this_handle may trigger
 	 * BUG_ON.
 	 */
 	ext4_unregister_sysfs(sb);
@@ -1266,7 +1306,7 @@ static void ext4_put_super(struct super_block *sb)
 	ext4_unregister_li_request(sb);
 	ext4_quotas_off(sb, EXT4_MAXQUOTAS);
 
-	flush_work(&sbi->s_error_work);
+	flush_work(&sbi->s_sb_upd_work);
 	destroy_workqueue(sbi->rsv_conversion_wq);
 	ext4_release_orphan_info(sb);
 
@@ -1875,6 +1915,7 @@ static const struct mount_opts {
 	{Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
 	 MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
 #endif
+	{Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2},
 	{Opt_err, 0, 0}
 };
 
@@ -1943,8 +1984,6 @@ struct ext4_fs_context {
 	unsigned int	mask_s_mount_opt;
 	unsigned int	vals_s_mount_opt2;
 	unsigned int	mask_s_mount_opt2;
-	unsigned long	vals_s_mount_flags;
-	unsigned long	mask_s_mount_flags;
 	unsigned int	opt_flags;	/* MOPT flags */
 	unsigned int	spec;
 	u32		s_max_batch_time;
@@ -2095,12 +2134,6 @@ EXT4_SET_CTX(mount_opt2);
 EXT4_CLEAR_CTX(mount_opt2);
 EXT4_TEST_CTX(mount_opt2);
 
-static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit)
-{
-	set_bit(bit, &ctx->mask_s_mount_flags);
-	set_bit(bit, &ctx->vals_s_mount_flags);
-}
-
 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
 	struct ext4_fs_context *ctx = fc->fs_private;
@@ -2164,9 +2197,6 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
 		ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
 			 param->key);
 		return 0;
-	case Opt_abort:
-		ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED);
-		return 0;
 	case Opt_inlinecrypt:
 #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
 		ctx_set_flags(ctx, SB_INLINECRYPT);
@@ -2820,8 +2850,6 @@ static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
 	sbi->s_mount_opt |= ctx->vals_s_mount_opt;
 	sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
 	sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
-	sbi->s_mount_flags &= ~ctx->mask_s_mount_flags;
-	sbi->s_mount_flags |= ctx->vals_s_mount_flags;
 	sb->s_flags &= ~ctx->mask_s_flags;
 	sb->s_flags |= ctx->vals_s_flags;
 
@@ -4210,7 +4238,7 @@ int ext4_calculate_overhead(struct super_block *sb)
 	else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
 		/* j_inum for internal journal is non-zero */
 		j_inode = ext4_get_journal_inode(sb, j_inum);
-		if (j_inode) {
+		if (!IS_ERR(j_inode)) {
 			j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
 			overhead += EXT4_NUM_B2C(sbi, j_blocks);
 			iput(j_inode);
@@ -4948,8 +4976,8 @@ static int ext4_load_and_init_journal(struct super_block *sb,
 	return 0;
 
 out:
-	/* flush s_error_work before journal destroy. */
-	flush_work(&sbi->s_error_work);
+	/* flush s_sb_upd_work before destroying the journal. */
+	flush_work(&sbi->s_sb_upd_work);
 	jbd2_journal_destroy(sbi->s_journal);
 	sbi->s_journal = NULL;
 	return -EINVAL;
@@ -5272,7 +5300,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
 
 	timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
 	spin_lock_init(&sbi->s_error_lock);
-	INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
+	INIT_WORK(&sbi->s_sb_upd_work, update_super_work);
 
 	err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
 	if (err)
@@ -5615,16 +5643,16 @@ failed_mount9: __maybe_unused
 	sbi->s_ea_block_cache = NULL;
 
 	if (sbi->s_journal) {
-		/* flush s_error_work before journal destroy. */
-		flush_work(&sbi->s_error_work);
+		/* flush s_sb_upd_work before journal destroy. */
+		flush_work(&sbi->s_sb_upd_work);
 		jbd2_journal_destroy(sbi->s_journal);
 		sbi->s_journal = NULL;
 	}
 failed_mount3a:
 	ext4_es_unregister_shrinker(sbi);
 failed_mount3:
-	/* flush s_error_work before sbi destroy */
-	flush_work(&sbi->s_error_work);
+	/* flush s_sb_upd_work before sbi destroy */
+	flush_work(&sbi->s_sb_upd_work);
 	del_timer_sync(&sbi->s_err_report);
 	ext4_stop_mmpd(sbi);
 	ext4_group_desc_free(sbi);
@@ -5751,22 +5779,22 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
 	journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
 	if (IS_ERR(journal_inode)) {
 		ext4_msg(sb, KERN_ERR, "no journal found");
-		return NULL;
+		return ERR_CAST(journal_inode);
 	}
 	if (!journal_inode->i_nlink) {
 		make_bad_inode(journal_inode);
 		iput(journal_inode);
 		ext4_msg(sb, KERN_ERR, "journal inode is deleted");
-		return NULL;
+		return ERR_PTR(-EFSCORRUPTED);
 	}
-
-	ext4_debug("Journal inode found at %p: %lld bytes\n",
-		  journal_inode, journal_inode->i_size);
 	if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
 		ext4_msg(sb, KERN_ERR, "invalid journal inode");
 		iput(journal_inode);
-		return NULL;
+		return ERR_PTR(-EFSCORRUPTED);
 	}
+
+	ext4_debug("Journal inode found at %p: %lld bytes\n",
+		  journal_inode, journal_inode->i_size);
 	return journal_inode;
 }
 
@@ -5792,24 +5820,21 @@ static int ext4_journal_bmap(journal_t *journal, sector_t *block)
 	return 0;
 }
 
-static journal_t *ext4_get_journal(struct super_block *sb,
-				   unsigned int journal_inum)
+static journal_t *ext4_open_inode_journal(struct super_block *sb,
+					  unsigned int journal_inum)
 {
 	struct inode *journal_inode;
 	journal_t *journal;
 
-	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
-		return NULL;
-
 	journal_inode = ext4_get_journal_inode(sb, journal_inum);
-	if (!journal_inode)
-		return NULL;
+	if (IS_ERR(journal_inode))
+		return ERR_CAST(journal_inode);
 
 	journal = jbd2_journal_init_inode(journal_inode);
-	if (!journal) {
+	if (IS_ERR(journal)) {
 		ext4_msg(sb, KERN_ERR, "Could not load journal inode");
 		iput(journal_inode);
-		return NULL;
+		return ERR_CAST(journal);
 	}
 	journal->j_private = sb;
 	journal->j_bmap = ext4_journal_bmap;
@@ -5817,43 +5842,47 @@ static journal_t *ext4_get_journal(struct super_block *sb,
 	return journal;
 }
 
-static journal_t *ext4_get_dev_journal(struct super_block *sb,
-				       dev_t j_dev)
+static struct block_device *ext4_get_journal_blkdev(struct super_block *sb,
+					dev_t j_dev, ext4_fsblk_t *j_start,
+					ext4_fsblk_t *j_len)
 {
 	struct buffer_head *bh;
-	journal_t *journal;
-	ext4_fsblk_t start;
-	ext4_fsblk_t len;
+	struct block_device *bdev;
 	int hblock, blocksize;
 	ext4_fsblk_t sb_block;
 	unsigned long offset;
 	struct ext4_super_block *es;
-	struct block_device *bdev;
-
-	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
-		return NULL;
+	int errno;
 
 	/* see get_tree_bdev why this is needed and safe */
 	up_write(&sb->s_umount);
-	bdev = ext4_blkdev_get(j_dev, sb);
+	bdev = blkdev_get_by_dev(j_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
+				 &fs_holder_ops);
 	down_write(&sb->s_umount);
-	if (bdev == NULL)
-		return NULL;
+	if (IS_ERR(bdev)) {
+		ext4_msg(sb, KERN_ERR,
+			 "failed to open journal device unknown-block(%u,%u) %ld",
+			 MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev));
+		return ERR_CAST(bdev);
+	}
 
 	blocksize = sb->s_blocksize;
 	hblock = bdev_logical_block_size(bdev);
 	if (blocksize < hblock) {
 		ext4_msg(sb, KERN_ERR,
 			"blocksize too small for journal device");
+		errno = -EINVAL;
 		goto out_bdev;
 	}
 
 	sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
 	offset = EXT4_MIN_BLOCK_SIZE % blocksize;
 	set_blocksize(bdev, blocksize);
-	if (!(bh = __bread(bdev, sb_block, blocksize))) {
+	bh = __bread(bdev, sb_block, blocksize);
+	if (!bh) {
 		ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
 		       "external journal");
+		errno = -EINVAL;
 		goto out_bdev;
 	}
 
@@ -5861,57 +5890,74 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
 	    !(le32_to_cpu(es->s_feature_incompat) &
 	      EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
-		ext4_msg(sb, KERN_ERR, "external journal has "
-					"bad superblock");
-		brelse(bh);
-		goto out_bdev;
+		ext4_msg(sb, KERN_ERR, "external journal has bad superblock");
+		errno = -EFSCORRUPTED;
+		goto out_bh;
 	}
 
 	if ((le32_to_cpu(es->s_feature_ro_compat) &
 	     EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
 	    es->s_checksum != ext4_superblock_csum(sb, es)) {
-		ext4_msg(sb, KERN_ERR, "external journal has "
-				       "corrupt superblock");
-		brelse(bh);
-		goto out_bdev;
+		ext4_msg(sb, KERN_ERR, "external journal has corrupt superblock");
+		errno = -EFSCORRUPTED;
+		goto out_bh;
 	}
 
 	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
 		ext4_msg(sb, KERN_ERR, "journal UUID does not match");
-		brelse(bh);
-		goto out_bdev;
+		errno = -EFSCORRUPTED;
+		goto out_bh;
 	}
 
-	len = ext4_blocks_count(es);
-	start = sb_block + 1;
-	brelse(bh);	/* we're done with the superblock */
+	*j_start = sb_block + 1;
+	*j_len = ext4_blocks_count(es);
+	brelse(bh);
+	return bdev;
 
-	journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
-					start, len, blocksize);
-	if (!journal) {
+out_bh:
+	brelse(bh);
+out_bdev:
+	blkdev_put(bdev, sb);
+	return ERR_PTR(errno);
+}
+
+static journal_t *ext4_open_dev_journal(struct super_block *sb,
+					dev_t j_dev)
+{
+	journal_t *journal;
+	ext4_fsblk_t j_start;
+	ext4_fsblk_t j_len;
+	struct block_device *journal_bdev;
+	int errno = 0;
+
+	journal_bdev = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len);
+	if (IS_ERR(journal_bdev))
+		return ERR_CAST(journal_bdev);
+
+	journal = jbd2_journal_init_dev(journal_bdev, sb->s_bdev, j_start,
+					j_len, sb->s_blocksize);
+	if (IS_ERR(journal)) {
 		ext4_msg(sb, KERN_ERR, "failed to create device journal");
+		errno = PTR_ERR(journal);
 		goto out_bdev;
 	}
-	journal->j_private = sb;
-	if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, true)) {
-		ext4_msg(sb, KERN_ERR, "I/O error on journal device");
-		goto out_journal;
-	}
 	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
 		ext4_msg(sb, KERN_ERR, "External journal has more than one "
 					"user (unsupported) - %d",
 			be32_to_cpu(journal->j_superblock->s_nr_users));
+		errno = -EINVAL;
 		goto out_journal;
 	}
-	EXT4_SB(sb)->s_journal_bdev = bdev;
+	journal->j_private = sb;
+	EXT4_SB(sb)->s_journal_bdev = journal_bdev;
 	ext4_init_journal_params(sb, journal);
 	return journal;
 
 out_journal:
 	jbd2_journal_destroy(journal);
 out_bdev:
-	blkdev_put(bdev, sb);
-	return NULL;
+	blkdev_put(journal_bdev, sb);
+	return ERR_PTR(errno);
 }
 
 static int ext4_load_journal(struct super_block *sb,
@@ -5943,13 +5989,13 @@ static int ext4_load_journal(struct super_block *sb,
 	}
 
 	if (journal_inum) {
-		journal = ext4_get_journal(sb, journal_inum);
-		if (!journal)
-			return -EINVAL;
+		journal = ext4_open_inode_journal(sb, journal_inum);
+		if (IS_ERR(journal))
+			return PTR_ERR(journal);
 	} else {
-		journal = ext4_get_dev_journal(sb, journal_dev);
-		if (!journal)
-			return -EINVAL;
+		journal = ext4_open_dev_journal(sb, journal_dev);
+		if (IS_ERR(journal))
+			return PTR_ERR(journal);
 	}
 
 	journal_dev_ro = bdev_read_only(journal->j_dev);
@@ -6066,7 +6112,7 @@ static void ext4_update_super(struct super_block *sb)
 	 * the clock is set in the future, and this will cause e2fsck
 	 * to complain and force a full file system check.
 	 */
-	if (!(sb->s_flags & SB_RDONLY))
+	if (!sb_rdonly(sb))
 		ext4_update_tstamp(es, s_wtime);
 	es->s_kbytes_written =
 		cpu_to_le64(sbi->s_kbytes_written +
@@ -6264,13 +6310,7 @@ static int ext4_clear_journal_err(struct super_block *sb,
  */
 int ext4_force_commit(struct super_block *sb)
 {
-	journal_t *journal;
-
-	if (sb_rdonly(sb))
-		return 0;
-
-	journal = EXT4_SB(sb)->s_journal;
-	return ext4_journal_force_commit(journal);
+	return ext4_journal_force_commit(EXT4_SB(sb)->s_journal);
 }
 
 static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -6280,7 +6320,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 	bool needs_barrier = false;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-	if (unlikely(ext4_forced_shutdown(sbi)))
+	if (unlikely(ext4_forced_shutdown(sb)))
 		return 0;
 
 	trace_ext4_sync_fs(sb, wait);
@@ -6329,12 +6369,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 static int ext4_freeze(struct super_block *sb)
 {
 	int error = 0;
-	journal_t *journal;
-
-	if (sb_rdonly(sb))
-		return 0;
-
-	journal = EXT4_SB(sb)->s_journal;
+	journal_t *journal = EXT4_SB(sb)->s_journal;
 
 	if (journal) {
 		/* Now we set up the journal barrier. */
@@ -6368,7 +6403,7 @@ static int ext4_freeze(struct super_block *sb)
  */
 static int ext4_unfreeze(struct super_block *sb)
 {
-	if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
+	if (ext4_forced_shutdown(sb))
 		return 0;
 
 	if (EXT4_SB(sb)->s_journal) {
@@ -6484,7 +6519,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
 		goto restore_opts;
 	}
 
-	if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
+	if (test_opt2(sb, ABORT))
 		ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
 
 	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
@@ -6498,10 +6533,10 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
 	}
 
 	/* Flush outstanding errors before changing fs state */
-	flush_work(&sbi->s_error_work);
+	flush_work(&sbi->s_sb_upd_work);
 
 	if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
-		if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
+		if (ext4_forced_shutdown(sb)) {
 			err = -EROFS;
 			goto restore_opts;
 		}
@@ -6662,7 +6697,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
 	 * If there was a failing r/w to ro transition, we may need to
 	 * re-enable quota
 	 */
-	if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
+	if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
 	    sb_any_quota_suspended(sb))
 		dquot_resume(sb, -1);
 	sb->s_flags = old_sb_flags;
@@ -7071,6 +7106,13 @@ static int ext4_quota_off(struct super_block *sb, int type)
 	err = dquot_quota_off(sb, type);
 	if (err || ext4_has_feature_quota(sb))
 		goto out_put;
+	/*
+	 * When the filesystem was remounted read-only first, we cannot cleanup
+	 * inode flags here. Bad luck but people should be using QUOTA feature
+	 * these days anyway.
+	 */
+	if (sb_rdonly(sb))
+		goto out_put;
 
 	inode_lock(inode);
 	/*
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 281e1bfbbe3ec..92ba28cebac63 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -701,7 +701,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 {
 	int error;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
 	if (strlen(name) > 255)
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 34cd57c6b68df..f2700477a3001 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1436,17 +1436,14 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
 
 	if (!(fl->fl_flags & FL_POSIX))
 		return -ENOLCK;
-	if (cmd == F_CANCELLK) {
-		/* Hack: */
-		cmd = F_SETLK;
-		fl->fl_type = F_UNLCK;
-	}
 	if (unlikely(gfs2_withdrawn(sdp))) {
 		if (fl->fl_type == F_UNLCK)
 			locks_lock_file_wait(file, fl);
 		return -EIO;
 	}
-	if (IS_GETLK(cmd))
+	if (cmd == F_CANCELLK)
+		return dlm_posix_cancel(ls->ls_dlm, ip->i_no_addr, file, fl);
+	else if (IS_GETLK(cmd))
 		return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
 	else if (fl->fl_type == F_UNLCK)
 		return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 9ec91017a7f3c..118699fff2f90 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -40,18 +40,6 @@ static inline void __buffer_unlink(struct journal_head *jh)
 	}
 }
 
-/*
- * Check a checkpoint buffer could be release or not.
- *
- * Requires j_list_lock
- */
-static inline bool __cp_buffer_busy(struct journal_head *jh)
-{
-	struct buffer_head *bh = jh2bh(jh);
-
-	return (jh->b_transaction || buffer_locked(bh) || buffer_dirty(bh));
-}
-
 /*
  * __jbd2_log_wait_for_space: wait until there is space in the journal.
  *
@@ -349,6 +337,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 
 /* Checkpoint list management */
 
+enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
+
 /*
  * journal_shrink_one_cp_list
  *
@@ -360,7 +350,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
  * Called with j_list_lock held.
  */
 static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
-						bool destroy, bool *released)
+						enum shrink_type type,
+						bool *released)
 {
 	struct journal_head *last_jh;
 	struct journal_head *next_jh = jh;
@@ -376,12 +367,15 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
 		jh = next_jh;
 		next_jh = jh->b_cpnext;
 
-		if (destroy) {
+		if (type == SHRINK_DESTROY) {
 			ret = __jbd2_journal_remove_checkpoint(jh);
 		} else {
 			ret = jbd2_journal_try_remove_checkpoint(jh);
-			if (ret < 0)
-				continue;
+			if (ret < 0) {
+				if (type == SHRINK_BUSY_SKIP)
+					continue;
+				break;
+			}
 		}
 
 		nr_freed++;
@@ -445,7 +439,7 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
 		tid = transaction->t_tid;
 
 		freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
-						   false, &released);
+						   SHRINK_BUSY_SKIP, &released);
 		nr_freed += freed;
 		(*nr_to_scan) -= min(*nr_to_scan, freed);
 		if (*nr_to_scan == 0)
@@ -485,19 +479,21 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
 void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
+	enum shrink_type type;
 	bool released;
 
 	transaction = journal->j_checkpoint_transactions;
 	if (!transaction)
 		return;
 
+	type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP;
 	last_transaction = transaction->t_cpprev;
 	next_transaction = transaction;
 	do {
 		transaction = next_transaction;
 		next_transaction = transaction->t_cpnext;
 		journal_shrink_one_cp_list(transaction->t_checkpoint_list,
-					   destroy, &released);
+					   type, &released);
 		/*
 		 * This function only frees up some memory if possible so we
 		 * dont have an obligation to finish processing. Bail out if
@@ -631,6 +627,8 @@ int jbd2_journal_try_remove_checkpoint(struct journal_head *jh)
 {
 	struct buffer_head *bh = jh2bh(jh);
 
+	if (jh->b_transaction)
+		return -EBUSY;
 	if (!trylock_buffer(bh))
 		return -EBUSY;
 	if (buffer_dirty(bh)) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 1b5a45ab62b0d..768fa05bcbede 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -115,14 +115,6 @@ void __jbd2_debug(int level, const char *file, const char *func,
 #endif
 
 /* Checksumming functions */
-static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
-{
-	if (!jbd2_journal_has_csum_v2or3_feature(j))
-		return 1;
-
-	return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
-}
-
 static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
 {
 	__u32 csum;
@@ -1333,6 +1325,189 @@ static unsigned long jbd2_journal_shrink_count(struct shrinker *shrink,
 	return count;
 }
 
+/*
+ * If the journal init or create aborts, we need to mark the journal
+ * superblock as being NULL to prevent the journal destroy from writing
+ * back a bogus superblock.
+ */
+static void journal_fail_superblock(journal_t *journal)
+{
+	struct buffer_head *bh = journal->j_sb_buffer;
+	brelse(bh);
+	journal->j_sb_buffer = NULL;
+}
+
+/*
+ * Check the superblock for a given journal, performing initial
+ * validation of the format.
+ */
+static int journal_check_superblock(journal_t *journal)
+{
+	journal_superblock_t *sb = journal->j_superblock;
+	int num_fc_blks;
+	int err = -EINVAL;
+
+	if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
+	    sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
+		printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
+		return err;
+	}
+
+	if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 &&
+	    be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) {
+		printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
+		return err;
+	}
+
+	if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
+		printk(KERN_WARNING "JBD2: journal file too short\n");
+		return err;
+	}
+
+	if (be32_to_cpu(sb->s_first) == 0 ||
+	    be32_to_cpu(sb->s_first) >= journal->j_total_len) {
+		printk(KERN_WARNING
+			"JBD2: Invalid start block of journal: %u\n",
+			be32_to_cpu(sb->s_first));
+		return err;
+	}
+
+	/*
+	 * If this is a V2 superblock, then we have to check the
+	 * features flags on it.
+	 */
+	if (!jbd2_format_support_feature(journal))
+		return 0;
+
+	if ((sb->s_feature_ro_compat &
+			~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
+	    (sb->s_feature_incompat &
+			~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
+		printk(KERN_WARNING "JBD2: Unrecognised features on journal\n");
+		return err;
+	}
+
+	num_fc_blks = jbd2_has_feature_fast_commit(journal) ?
+				jbd2_journal_get_num_fc_blks(sb) : 0;
+	if (be32_to_cpu(sb->s_maxlen) < JBD2_MIN_JOURNAL_BLOCKS ||
+	    be32_to_cpu(sb->s_maxlen) - JBD2_MIN_JOURNAL_BLOCKS < num_fc_blks) {
+		printk(KERN_ERR "JBD2: journal file too short %u,%d\n",
+		       be32_to_cpu(sb->s_maxlen), num_fc_blks);
+		return err;
+	}
+
+	if (jbd2_has_feature_csum2(journal) &&
+	    jbd2_has_feature_csum3(journal)) {
+		/* Can't have checksum v2 and v3 at the same time! */
+		printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
+		       "at the same time!\n");
+		return err;
+	}
+
+	if (jbd2_journal_has_csum_v2or3_feature(journal) &&
+	    jbd2_has_feature_checksum(journal)) {
+		/* Can't have checksum v1 and v2 on at the same time! */
+		printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
+		       "at the same time!\n");
+		return err;
+	}
+
+	/* Load the checksum driver */
+	if (jbd2_journal_has_csum_v2or3_feature(journal)) {
+		if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) {
+			printk(KERN_ERR "JBD2: Unknown checksum type\n");
+			return err;
+		}
+
+		journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
+		if (IS_ERR(journal->j_chksum_driver)) {
+			printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
+			err = PTR_ERR(journal->j_chksum_driver);
+			journal->j_chksum_driver = NULL;
+			return err;
+		}
+		/* Check superblock checksum */
+		if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
+			printk(KERN_ERR "JBD2: journal checksum error\n");
+			err = -EFSBADCRC;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int journal_revoke_records_per_block(journal_t *journal)
+{
+	int record_size;
+	int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
+
+	if (jbd2_has_feature_64bit(journal))
+		record_size = 8;
+	else
+		record_size = 4;
+
+	if (jbd2_journal_has_csum_v2or3(journal))
+		space -= sizeof(struct jbd2_journal_block_tail);
+	return space / record_size;
+}
+
+/*
+ * Load the on-disk journal superblock and read the key fields into the
+ * journal_t.
+ */
+static int journal_load_superblock(journal_t *journal)
+{
+	int err;
+	struct buffer_head *bh;
+	journal_superblock_t *sb;
+
+	bh = getblk_unmovable(journal->j_dev, journal->j_blk_offset,
+			      journal->j_blocksize);
+	if (bh)
+		err = bh_read(bh, 0);
+	if (!bh || err < 0) {
+		pr_err("%s: Cannot read journal superblock\n", __func__);
+		brelse(bh);
+		return -EIO;
+	}
+
+	journal->j_sb_buffer = bh;
+	sb = (journal_superblock_t *)bh->b_data;
+	journal->j_superblock = sb;
+	err = journal_check_superblock(journal);
+	if (err) {
+		journal_fail_superblock(journal);
+		return err;
+	}
+
+	journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
+	journal->j_tail = be32_to_cpu(sb->s_start);
+	journal->j_first = be32_to_cpu(sb->s_first);
+	journal->j_errno = be32_to_cpu(sb->s_errno);
+	journal->j_last = be32_to_cpu(sb->s_maxlen);
+
+	if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
+		journal->j_total_len = be32_to_cpu(sb->s_maxlen);
+	/* Precompute checksum seed for all metadata */
+	if (jbd2_journal_has_csum_v2or3(journal))
+		journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
+						   sizeof(sb->s_uuid));
+	journal->j_revoke_records_per_block =
+				journal_revoke_records_per_block(journal);
+
+	if (jbd2_has_feature_fast_commit(journal)) {
+		journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
+		journal->j_last = journal->j_fc_last -
+				  jbd2_journal_get_num_fc_blks(sb);
+		journal->j_fc_first = journal->j_last + 1;
+		journal->j_fc_off = 0;
+	}
+
+	return 0;
+}
+
+
 /*
  * Management for journal control blocks: functions to create and
  * destroy journal_t structures, and to initialise and read existing
@@ -1349,12 +1524,21 @@ static journal_t *journal_init_common(struct block_device *bdev,
 	static struct lock_class_key jbd2_trans_commit_key;
 	journal_t *journal;
 	int err;
-	struct buffer_head *bh;
 	int n;
 
 	journal = kzalloc(sizeof(*journal), GFP_KERNEL);
 	if (!journal)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
+
+	journal->j_blocksize = blocksize;
+	journal->j_dev = bdev;
+	journal->j_fs_dev = fs_dev;
+	journal->j_blk_offset = start;
+	journal->j_total_len = len;
+
+	err = journal_load_superblock(journal);
+	if (err)
+		goto err_cleanup;
 
 	init_waitqueue_head(&journal->j_wait_transaction_locked);
 	init_waitqueue_head(&journal->j_wait_done_commit);
@@ -1367,12 +1551,15 @@ static journal_t *journal_init_common(struct block_device *bdev,
 	mutex_init(&journal->j_checkpoint_mutex);
 	spin_lock_init(&journal->j_revoke_lock);
 	spin_lock_init(&journal->j_list_lock);
+	spin_lock_init(&journal->j_history_lock);
 	rwlock_init(&journal->j_state_lock);
 
 	journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
 	journal->j_min_batch_time = 0;
 	journal->j_max_batch_time = 15000; /* 15ms */
 	atomic_set(&journal->j_reserved_credits, 0);
+	lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
+			 &jbd2_trans_commit_key, 0);
 
 	/* The journal is marked for error until we succeed with recovery! */
 	journal->j_flags = JBD2_ABORT;
@@ -1382,18 +1569,11 @@ static journal_t *journal_init_common(struct block_device *bdev,
 	if (err)
 		goto err_cleanup;
 
-	spin_lock_init(&journal->j_history_lock);
-
-	lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
-			 &jbd2_trans_commit_key, 0);
-
-	/* journal descriptor can store up to n blocks -bzzz */
-	journal->j_blocksize = blocksize;
-	journal->j_dev = bdev;
-	journal->j_fs_dev = fs_dev;
-	journal->j_blk_offset = start;
-	journal->j_total_len = len;
-	/* We need enough buffers to write out full descriptor block. */
+	/*
+	 * journal descriptor can store up to n blocks, we need enough
+	 * buffers to write out full descriptor block.
+	 */
+	err = -ENOMEM;
 	n = journal->j_blocksize / jbd2_min_tag_size();
 	journal->j_wbufsize = n;
 	journal->j_fc_wbuf = NULL;
@@ -1402,37 +1582,30 @@ static journal_t *journal_init_common(struct block_device *bdev,
 	if (!journal->j_wbuf)
 		goto err_cleanup;
 
-	bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
-	if (!bh) {
-		pr_err("%s: Cannot get buffer for journal superblock\n",
-			__func__);
+	err = percpu_counter_init(&journal->j_checkpoint_jh_count, 0,
+				  GFP_KERNEL);
+	if (err)
 		goto err_cleanup;
-	}
-	journal->j_sb_buffer = bh;
-	journal->j_superblock = (journal_superblock_t *)bh->b_data;
 
 	journal->j_shrink_transaction = NULL;
 	journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan;
 	journal->j_shrinker.count_objects = jbd2_journal_shrink_count;
 	journal->j_shrinker.seeks = DEFAULT_SEEKS;
 	journal->j_shrinker.batch = journal->j_max_transaction_buffers;
-
-	if (percpu_counter_init(&journal->j_checkpoint_jh_count, 0, GFP_KERNEL))
+	err = register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)",
+				MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+	if (err)
 		goto err_cleanup;
 
-	if (register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)",
-			      MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev))) {
-		percpu_counter_destroy(&journal->j_checkpoint_jh_count);
-		goto err_cleanup;
-	}
 	return journal;
 
 err_cleanup:
-	brelse(journal->j_sb_buffer);
+	percpu_counter_destroy(&journal->j_checkpoint_jh_count);
 	kfree(journal->j_wbuf);
 	jbd2_journal_destroy_revoke(journal);
+	journal_fail_superblock(journal);
 	kfree(journal);
-	return NULL;
+	return ERR_PTR(err);
 }
 
 /* jbd2_journal_init_dev and jbd2_journal_init_inode:
@@ -1465,8 +1638,8 @@ journal_t *jbd2_journal_init_dev(struct block_device *bdev,
 	journal_t *journal;
 
 	journal = journal_init_common(bdev, fs_dev, start, len, blocksize);
-	if (!journal)
-		return NULL;
+	if (IS_ERR(journal))
+		return ERR_CAST(journal);
 
 	snprintf(journal->j_devname, sizeof(journal->j_devname),
 		 "%pg", journal->j_dev);
@@ -1492,11 +1665,9 @@ journal_t *jbd2_journal_init_inode(struct inode *inode)
 
 	blocknr = 0;
 	err = bmap(inode, &blocknr);
-
 	if (err || !blocknr) {
-		pr_err("%s: Cannot locate journal superblock\n",
-			__func__);
-		return NULL;
+		pr_err("%s: Cannot locate journal superblock\n", __func__);
+		return err ? ERR_PTR(err) : ERR_PTR(-EINVAL);
 	}
 
 	jbd2_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
@@ -1506,8 +1677,8 @@ journal_t *jbd2_journal_init_inode(struct inode *inode)
 	journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev,
 			blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits,
 			inode->i_sb->s_blocksize);
-	if (!journal)
-		return NULL;
+	if (IS_ERR(journal))
+		return ERR_CAST(journal);
 
 	journal->j_inode = inode;
 	snprintf(journal->j_devname, sizeof(journal->j_devname),
@@ -1518,18 +1689,6 @@ journal_t *jbd2_journal_init_inode(struct inode *inode)
 	return journal;
 }
 
-/*
- * If the journal init or create aborts, we need to mark the journal
- * superblock as being NULL to prevent the journal destroy from writing
- * back a bogus superblock.
- */
-static void journal_fail_superblock(journal_t *journal)
-{
-	struct buffer_head *bh = journal->j_sb_buffer;
-	brelse(bh);
-	journal->j_sb_buffer = NULL;
-}
-
 /*
  * Given a journal_t structure, initialise the various fields for
  * startup of a new journaling session.  We use this both when creating
@@ -1886,163 +2045,6 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
 }
 EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
 
-static int journal_revoke_records_per_block(journal_t *journal)
-{
-	int record_size;
-	int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
-
-	if (jbd2_has_feature_64bit(journal))
-		record_size = 8;
-	else
-		record_size = 4;
-
-	if (jbd2_journal_has_csum_v2or3(journal))
-		space -= sizeof(struct jbd2_journal_block_tail);
-	return space / record_size;
-}
-
-/*
- * Read the superblock for a given journal, performing initial
- * validation of the format.
- */
-static int journal_get_superblock(journal_t *journal)
-{
-	struct buffer_head *bh;
-	journal_superblock_t *sb;
-	int err;
-
-	bh = journal->j_sb_buffer;
-
-	J_ASSERT(bh != NULL);
-	if (buffer_verified(bh))
-		return 0;
-
-	err = bh_read(bh, 0);
-	if (err < 0) {
-		printk(KERN_ERR
-			"JBD2: IO error reading journal superblock\n");
-		goto out;
-	}
-
-	sb = journal->j_superblock;
-
-	err = -EINVAL;
-
-	if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
-	    sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
-		printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
-		goto out;
-	}
-
-	if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 &&
-	    be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) {
-		printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
-		goto out;
-	}
-
-	if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
-		printk(KERN_WARNING "JBD2: journal file too short\n");
-		goto out;
-	}
-
-	if (be32_to_cpu(sb->s_first) == 0 ||
-	    be32_to_cpu(sb->s_first) >= journal->j_total_len) {
-		printk(KERN_WARNING
-			"JBD2: Invalid start block of journal: %u\n",
-			be32_to_cpu(sb->s_first));
-		goto out;
-	}
-
-	if (jbd2_has_feature_csum2(journal) &&
-	    jbd2_has_feature_csum3(journal)) {
-		/* Can't have checksum v2 and v3 at the same time! */
-		printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
-		       "at the same time!\n");
-		goto out;
-	}
-
-	if (jbd2_journal_has_csum_v2or3_feature(journal) &&
-	    jbd2_has_feature_checksum(journal)) {
-		/* Can't have checksum v1 and v2 on at the same time! */
-		printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
-		       "at the same time!\n");
-		goto out;
-	}
-
-	if (!jbd2_verify_csum_type(journal, sb)) {
-		printk(KERN_ERR "JBD2: Unknown checksum type\n");
-		goto out;
-	}
-
-	/* Load the checksum driver */
-	if (jbd2_journal_has_csum_v2or3_feature(journal)) {
-		journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
-		if (IS_ERR(journal->j_chksum_driver)) {
-			printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
-			err = PTR_ERR(journal->j_chksum_driver);
-			journal->j_chksum_driver = NULL;
-			goto out;
-		}
-		/* Check superblock checksum */
-		if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
-			printk(KERN_ERR "JBD2: journal checksum error\n");
-			err = -EFSBADCRC;
-			goto out;
-		}
-	}
-	set_buffer_verified(bh);
-	return 0;
-
-out:
-	journal_fail_superblock(journal);
-	return err;
-}
-
-/*
- * Load the on-disk journal superblock and read the key fields into the
- * journal_t.
- */
-
-static int load_superblock(journal_t *journal)
-{
-	int err;
-	journal_superblock_t *sb;
-	int num_fc_blocks;
-
-	err = journal_get_superblock(journal);
-	if (err)
-		return err;
-
-	sb = journal->j_superblock;
-
-	journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
-	journal->j_tail = be32_to_cpu(sb->s_start);
-	journal->j_first = be32_to_cpu(sb->s_first);
-	journal->j_errno = be32_to_cpu(sb->s_errno);
-	journal->j_last = be32_to_cpu(sb->s_maxlen);
-
-	if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
-		journal->j_total_len = be32_to_cpu(sb->s_maxlen);
-	/* Precompute checksum seed for all metadata */
-	if (jbd2_journal_has_csum_v2or3(journal))
-		journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
-						   sizeof(sb->s_uuid));
-	journal->j_revoke_records_per_block =
-				journal_revoke_records_per_block(journal);
-
-	if (jbd2_has_feature_fast_commit(journal)) {
-		journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
-		num_fc_blocks = jbd2_journal_get_num_fc_blks(sb);
-		if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
-			journal->j_last = journal->j_fc_last - num_fc_blocks;
-		journal->j_fc_first = journal->j_last + 1;
-		journal->j_fc_off = 0;
-	}
-
-	return 0;
-}
-
-
 /**
  * jbd2_journal_load() - Read journal from disk.
  * @journal: Journal to act on.
@@ -2054,28 +2056,7 @@ static int load_superblock(journal_t *journal)
 int jbd2_journal_load(journal_t *journal)
 {
 	int err;
-	journal_superblock_t *sb;
-
-	err = load_superblock(journal);
-	if (err)
-		return err;
-
-	sb = journal->j_superblock;
-
-	/*
-	 * If this is a V2 superblock, then we have to check the
-	 * features flags on it.
-	 */
-	if (jbd2_format_support_feature(journal)) {
-		if ((sb->s_feature_ro_compat &
-		     ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
-		    (sb->s_feature_incompat &
-		     ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
-			printk(KERN_WARNING
-				"JBD2: Unrecognised features on journal\n");
-			return -EINVAL;
-		}
-	}
+	journal_superblock_t *sb = journal->j_superblock;
 
 	/*
 	 * Create a slab for this blocksize
@@ -2086,8 +2067,11 @@ int jbd2_journal_load(journal_t *journal)
 
 	/* Let the recovery code check whether it needs to recover any
 	 * data from the journal. */
-	if (jbd2_journal_recover(journal))
-		goto recovery_error;
+	err = jbd2_journal_recover(journal);
+	if (err) {
+		pr_warn("JBD2: journal recovery failed\n");
+		return err;
+	}
 
 	if (journal->j_failed_commit) {
 		printk(KERN_ERR "JBD2: journal transaction %u on %s "
@@ -2104,15 +2088,14 @@ int jbd2_journal_load(journal_t *journal)
 	/* OK, we've finished with the dynamic journal bits:
 	 * reinitialise the dynamic contents of the superblock in memory
 	 * and reset them on disk. */
-	if (journal_reset(journal))
-		goto recovery_error;
+	err = journal_reset(journal);
+	if (err) {
+		pr_warn("JBD2: journal reset failed\n");
+		return err;
+	}
 
 	journal->j_flags |= JBD2_LOADED;
 	return 0;
-
-recovery_error:
-	printk(KERN_WARNING "JBD2: recovery failed\n");
-	return -EIO;
 }
 
 /**
@@ -2224,8 +2207,6 @@ int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
 
 	if (!compat && !ro && !incompat)
 		return 1;
-	if (journal_get_superblock(journal))
-		return 0;
 	if (!jbd2_format_support_feature(journal))
 		return 0;
 
@@ -2515,16 +2496,12 @@ int jbd2_journal_flush(journal_t *journal, unsigned int flags)
 
 int jbd2_journal_wipe(journal_t *journal, int write)
 {
-	int err = 0;
+	int err;
 
 	J_ASSERT (!(journal->j_flags & JBD2_LOADED));
 
-	err = load_superblock(journal);
-	if (err)
-		return err;
-
 	if (!journal->j_tail)
-		goto no_recovery;
+		return 0;
 
 	printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
 		write ? "Clearing" : "Ignoring");
@@ -2537,7 +2514,6 @@ int jbd2_journal_wipe(journal_t *journal, int write)
 		mutex_unlock(&journal->j_checkpoint_mutex);
 	}
 
- no_recovery:
 	return err;
 }
 
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 0184931d47f7d..c269a7d29a465 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -230,12 +230,8 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
 /* Make sure we wrap around the log correctly! */
 #define wrap(journal, var)						\
 do {									\
-	unsigned long _wrap_last =					\
-		jbd2_has_feature_fast_commit(journal) ?			\
-			(journal)->j_fc_last : (journal)->j_last;	\
-									\
-	if (var >= _wrap_last)						\
-		var -= (_wrap_last - (journal)->j_first);		\
+	if (var >= (journal)->j_last)					\
+		var -= ((journal)->j_last - (journal)->j_first);	\
 } while (0)
 
 static int fc_do_one_pass(journal_t *journal,
@@ -524,9 +520,7 @@ static int do_one_pass(journal_t *journal,
 				break;
 
 		jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
-			  next_commit_ID, next_log_block,
-			  jbd2_has_feature_fast_commit(journal) ?
-			  journal->j_fc_last : journal->j_last);
+			  next_commit_ID, next_log_block, journal->j_last);
 
 		/* Skip over each chunk of the transaction looking
 		 * either the next descriptor block or the final commit
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index a14a0f18a4c40..88afd108c2dd2 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -269,6 +269,7 @@ int dbUnmount(struct inode *ipbmap, int mounterror)
 
 	/* free the memory for the in-memory bmap. */
 	kfree(bmp);
+	JFS_SBI(ipbmap->i_sb)->bmap = NULL;
 
 	return (0);
 }
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index ae99a7e232eeb..63d21822d309b 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -166,7 +166,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
 	/*
 	 * COMMIT_SyncList flags an anonymous tlock on page that is on
 	 * sync list.
-	 * We need to commit the inode to get the page written disk.
+	 * We need to commit the inode to get the page written to the disk.
 	 */
 	if (test_and_clear_cflag(COMMIT_Synclist,ip))
 		jfs_commit_inode(ip, 0);
@@ -311,6 +311,11 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
 	 * blocks in the map. in that case, we'll start off with the
 	 * maximum free.
 	 */
+
+	/* give up if no space left */
+	if (bmp->db_maxfreebud == -1)
+		return -ENOSPC;
+
 	max = (s64) 1 << bmp->db_maxfreebud;
 	if (*nblocks >= max && *nblocks > nbperpage)
 		nb = nblks = (max > nbperpage) ? max : nbperpage;
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index a40383aa6c84b..923a58422c461 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -193,6 +193,7 @@ int diUnmount(struct inode *ipimap, int mounterror)
 	 * free in-memory control structure
 	 */
 	kfree(imap);
+	JFS_IP(ipimap)->i_imap = NULL;
 
 	return (0);
 }
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 029d470656005..57d7a4300210d 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -883,7 +883,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
 	struct component_name dname;
 	u32 ssize;		/* source pathname size */
 	struct btstack btstack;
-	struct inode *ip = d_inode(dentry);
+	struct inode *ip;
 	s64 xlen = 0;
 	int bmask = 0, xsize;
 	s64 xaddr;
diff --git a/fs/libfs.c b/fs/libfs.c
index da78eb64831ec..a4eb127578862 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1648,16 +1648,6 @@ bool is_empty_dir_inode(struct inode *inode)
 }
 
 #if IS_ENABLED(CONFIG_UNICODE)
-/*
- * Determine if the name of a dentry should be casefolded.
- *
- * Return: if names will need casefolding
- */
-static bool needs_casefold(const struct inode *dir)
-{
-	return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
-}
-
 /**
  * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
  * @dentry:	dentry whose name we are checking against
@@ -1678,7 +1668,7 @@ static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
 	char strbuf[DNAME_INLINE_LEN];
 	int ret;
 
-	if (!dir || !needs_casefold(dir))
+	if (!dir || !IS_CASEFOLDED(dir))
 		goto fallback;
 	/*
 	 * If the dentry name is stored in-line, then it may be concurrently
@@ -1720,7 +1710,7 @@ static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
 	const struct unicode_map *um = sb->s_encoding;
 	int ret = 0;
 
-	if (!dir || !needs_casefold(dir))
+	if (!dir || !IS_CASEFOLDED(dir))
 		return 0;
 
 	ret = utf8_casefold_hash(um, dentry, str);
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 1d9488cf05348..87a0f207df0b9 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -276,6 +276,9 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
 {
 	struct nsm_handle *new;
 
+	if (!hostname)
+		return NULL;
+
 	new = kzalloc(sizeof(*new) + hostname_len + 1, GFP_KERNEL);
 	if (unlikely(new == NULL))
 		return NULL;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 22d3ff3818f5f..6579948070a48 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -45,7 +45,6 @@
 
 #define NLMDBG_FACILITY		NLMDBG_SVC
 #define LOCKD_BUFSIZE		(1024 + NLMSVC_XDRSIZE)
-#define ALLOWED_SIGS		(sigmask(SIGKILL))
 
 static struct svc_program	nlmsvc_program;
 
@@ -57,6 +56,12 @@ static unsigned int		nlmsvc_users;
 static struct svc_serv		*nlmsvc_serv;
 unsigned long			nlmsvc_timeout;
 
+static void nlmsvc_request_retry(struct timer_list *tl)
+{
+	svc_wake_up(nlmsvc_serv);
+}
+DEFINE_TIMER(nlmsvc_retry, nlmsvc_request_retry);
+
 unsigned int lockd_net_id;
 
 /*
@@ -111,26 +116,12 @@ static void set_grace_period(struct net *net)
 	schedule_delayed_work(&ln->grace_period_end, grace_period);
 }
 
-static void restart_grace(void)
-{
-	if (nlmsvc_ops) {
-		struct net *net = &init_net;
-		struct lockd_net *ln = net_generic(net, lockd_net_id);
-
-		cancel_delayed_work_sync(&ln->grace_period_end);
-		locks_end_grace(&ln->lockd_manager);
-		nlmsvc_invalidate_all();
-		set_grace_period(net);
-	}
-}
-
 /*
  * This is the lockd kernel thread
  */
 static int
 lockd(void *vrqstp)
 {
-	int		err = 0;
 	struct svc_rqst *rqstp = vrqstp;
 	struct net *net = &init_net;
 	struct lockd_net *ln = net_generic(net, lockd_net_id);
@@ -138,9 +129,6 @@ lockd(void *vrqstp)
 	/* try_to_freeze() is called from svc_recv() */
 	set_freezable();
 
-	/* Allow SIGKILL to tell lockd to drop all of its locks */
-	allow_signal(SIGKILL);
-
 	dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
 
 	/*
@@ -148,33 +136,12 @@ lockd(void *vrqstp)
 	 * NFS mount or NFS daemon has gone away.
 	 */
 	while (!kthread_should_stop()) {
-		long timeout = MAX_SCHEDULE_TIMEOUT;
-		RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
-
 		/* update sv_maxconn if it has changed */
 		rqstp->rq_server->sv_maxconn = nlm_max_connections;
 
-		if (signalled()) {
-			flush_signals(current);
-			restart_grace();
-			continue;
-		}
-
-		timeout = nlmsvc_retry_blocked();
-
-		/*
-		 * Find a socket with data available and call its
-		 * recvfrom routine.
-		 */
-		err = svc_recv(rqstp, timeout);
-		if (err == -EAGAIN || err == -EINTR)
-			continue;
-		dprintk("lockd: request from %s\n",
-				svc_print_addr(rqstp, buf, sizeof(buf)));
-
-		svc_process(rqstp);
+		nlmsvc_retry_blocked();
+		svc_recv(rqstp);
 	}
-	flush_signals(current);
 	if (nlmsvc_ops)
 		nlmsvc_invalidate_all();
 	nlm_shutdown_hosts();
@@ -407,6 +374,7 @@ static void lockd_put(void)
 #endif
 
 	svc_set_num_threads(nlmsvc_serv, NULL, 0);
+	timer_delete_sync(&nlmsvc_retry);
 	nlmsvc_serv = NULL;
 	dprintk("lockd_down: service destroyed\n");
 }
@@ -538,7 +506,7 @@ static inline int is_callback(u32 proc)
 }
 
 
-static int lockd_authenticate(struct svc_rqst *rqstp)
+static enum svc_auth_status lockd_authenticate(struct svc_rqst *rqstp)
 {
 	rqstp->rq_client = NULL;
 	switch (rqstp->rq_authop->flavour) {
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c43ccdf28ed91..43aeba9de55cb 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -131,12 +131,14 @@ static void nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
 static inline void
 nlmsvc_remove_block(struct nlm_block *block)
 {
+	spin_lock(&nlm_blocked_lock);
 	if (!list_empty(&block->b_list)) {
-		spin_lock(&nlm_blocked_lock);
 		list_del_init(&block->b_list);
 		spin_unlock(&nlm_blocked_lock);
 		nlmsvc_release_block(block);
+		return;
 	}
+	spin_unlock(&nlm_blocked_lock);
 }
 
 /*
@@ -152,6 +154,7 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
 				file, lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end, lock->fl.fl_type);
+	spin_lock(&nlm_blocked_lock);
 	list_for_each_entry(block, &nlm_blocked, b_list) {
 		fl = &block->b_call->a_args.lock.fl;
 		dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
@@ -161,9 +164,11 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
 				nlmdbg_cookie2a(&block->b_call->a_args.cookie));
 		if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
 			kref_get(&block->b_count);
+			spin_unlock(&nlm_blocked_lock);
 			return block;
 		}
 	}
+	spin_unlock(&nlm_blocked_lock);
 
 	return NULL;
 }
@@ -185,16 +190,19 @@ nlmsvc_find_block(struct nlm_cookie *cookie)
 {
 	struct nlm_block *block;
 
+	spin_lock(&nlm_blocked_lock);
 	list_for_each_entry(block, &nlm_blocked, b_list) {
 		if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie))
 			goto found;
 	}
+	spin_unlock(&nlm_blocked_lock);
 
 	return NULL;
 
 found:
 	dprintk("nlmsvc_find_block(%s): block=%p\n", nlmdbg_cookie2a(cookie), block);
 	kref_get(&block->b_count);
+	spin_unlock(&nlm_blocked_lock);
 	return block;
 }
 
@@ -317,6 +325,7 @@ void nlmsvc_traverse_blocks(struct nlm_host *host,
 
 restart:
 	mutex_lock(&file->f_mutex);
+	spin_lock(&nlm_blocked_lock);
 	list_for_each_entry_safe(block, next, &file->f_blocks, b_flist) {
 		if (!match(block->b_host, host))
 			continue;
@@ -325,11 +334,13 @@ void nlmsvc_traverse_blocks(struct nlm_host *host,
 		if (list_empty(&block->b_list))
 			continue;
 		kref_get(&block->b_count);
+		spin_unlock(&nlm_blocked_lock);
 		mutex_unlock(&file->f_mutex);
 		nlmsvc_unlink_block(block);
 		nlmsvc_release_block(block);
 		goto restart;
 	}
+	spin_unlock(&nlm_blocked_lock);
 	mutex_unlock(&file->f_mutex);
 }
 
@@ -1008,7 +1019,7 @@ retry_deferred_block(struct nlm_block *block)
  * picks up locks that can be granted, or grant notifications that must
  * be retransmitted.
  */
-unsigned long
+void
 nlmsvc_retry_blocked(void)
 {
 	unsigned long	timeout = MAX_SCHEDULE_TIMEOUT;
@@ -1038,5 +1049,6 @@ nlmsvc_retry_blocked(void)
 	}
 	spin_unlock(&nlm_blocked_lock);
 
-	return timeout;
+	if (timeout < MAX_SCHEDULE_TIMEOUT)
+		mod_timer(&nlmsvc_retry, jiffies + timeout);
 }
diff --git a/fs/locks.c b/fs/locks.c
index a45efc16945d5..76ad05f8070ad 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1744,13 +1744,6 @@ generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **pri
 	if (is_deleg && !inode_trylock(inode))
 		return -EAGAIN;
 
-	if (is_deleg && arg == F_WRLCK) {
-		/* Write delegations are not currently supported: */
-		inode_unlock(inode);
-		WARN_ON_ONCE(1);
-		return -EINVAL;
-	}
-
 	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	time_out_leases(inode, &dispose);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index b6fc169be1b16..7df2503cef6c3 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -209,8 +209,6 @@ config NFS_DISABLE_UDP_SUPPORT
 config NFS_V4_2_READ_PLUS
 	bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation"
 	depends on NFS_V4_2
-	default n
+	default y
 	help
-	 This is intended for developers only. The READ_PLUS operation has
-	 been shown to have issues under specific conditions and should not
-	 be used in production.
+	 Choose Y here to enable use of the NFS v4.2 READ_PLUS operation.
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index 70f5563a8e81c..65cbb5607a5fc 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -404,7 +404,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
 	int ret, i;
 
 	d->children = kcalloc(v->concat.volumes_count,
-			sizeof(struct pnfs_block_dev), GFP_KERNEL);
+			sizeof(struct pnfs_block_dev), gfp_mask);
 	if (!d->children)
 		return -ENOMEM;
 
@@ -433,7 +433,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
 	int ret, i;
 
 	d->children = kcalloc(v->stripe.volumes_count,
-			sizeof(struct pnfs_block_dev), GFP_KERNEL);
+			sizeof(struct pnfs_block_dev), gfp_mask);
 	if (!d->children)
 		return -ENOMEM;
 
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 456af7d230cf1..466ebf1d41b2b 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -74,23 +74,12 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
 static int
 nfs4_callback_svc(void *vrqstp)
 {
-	int err;
 	struct svc_rqst *rqstp = vrqstp;
 
 	set_freezable();
 
-	while (!kthread_freezable_should_stop(NULL)) {
-
-		if (signal_pending(current))
-			flush_signals(current);
-		/*
-		 * Listen for a request on the socket
-		 */
-		err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
-		if (err == -EAGAIN || err == -EINTR)
-			continue;
-		svc_process(rqstp);
-	}
+	while (!kthread_freezable_should_stop(NULL))
+		svc_recv(rqstp);
 
 	svc_exit_thread(rqstp);
 	return 0;
@@ -112,11 +101,7 @@ nfs41_callback_svc(void *vrqstp)
 	set_freezable();
 
 	while (!kthread_freezable_should_stop(NULL)) {
-
-		if (signal_pending(current))
-			flush_signals(current);
-
-		prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
+		prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE);
 		spin_lock_bh(&serv->sv_cb_lock);
 		if (!list_empty(&serv->sv_cb_list)) {
 			req = list_first_entry(&serv->sv_cb_list,
@@ -387,7 +372,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
  * All other checking done after NFS decoding where the nfs_client can be
  * found in nfs4_callback_compound
  */
-static int nfs_callback_authenticate(struct svc_rqst *rqstp)
+static enum svc_auth_status nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
 	rqstp->rq_auth_stat = rpc_autherr_badcred;
 
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index e4c5f193ed5e8..44eca51b28085 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -517,6 +517,8 @@ int nfs_create_rpc_client(struct nfs_client *clp,
 		.authflavor	= flavor,
 		.cred		= cl_init->cred,
 		.xprtsec	= cl_init->xprtsec,
+		.connect_timeout = cl_init->connect_timeout,
+		.reconnect_timeout = cl_init->reconnect_timeout,
 	};
 
 	if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8f3112e71a6a6..e6a51fd94fea8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1089,6 +1089,17 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
 	for (i = desc->cache_entry_index; i < array->size; i++) {
 		struct nfs_cache_array_entry *ent;
 
+		/*
+		 * nfs_readdir_handle_cache_misses return force clear at
+		 * (cache_misses > NFS_READDIR_CACHE_MISS_THRESHOLD) for
+		 * readdir heuristic, NFS_READDIR_CACHE_MISS_THRESHOLD + 1
+		 * entries need be emitted here.
+		 */
+		if (first_emit && i > NFS_READDIR_CACHE_MISS_THRESHOLD + 2) {
+			desc->eob = true;
+			break;
+		}
+
 		ent = &array->array[i];
 		if (!dir_emit(desc->ctx, ent->name, ent->name_len,
 		    nfs_compat_user_ino64(ent->ino), ent->d_type)) {
@@ -1107,10 +1118,6 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
 			desc->ctx->pos = desc->dir_cookie;
 		else
 			desc->ctx->pos++;
-		if (first_emit && i > NFS_READDIR_CACHE_MISS_THRESHOLD + 1) {
-			desc->eob = true;
-			break;
-		}
 	}
 	if (array->folio_is_eof)
 		desc->eof = !desc->eob;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index aaffaaa336cc5..47d892a1d363d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -472,13 +472,31 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
 	return result;
 }
 
+static void nfs_direct_add_page_head(struct list_head *list,
+				     struct nfs_page *req)
+{
+	struct nfs_page *head = req->wb_head;
+
+	if (!list_empty(&head->wb_list) || !nfs_lock_request(head))
+		return;
+	if (!list_empty(&head->wb_list)) {
+		nfs_unlock_request(head);
+		return;
+	}
+	list_add(&head->wb_list, list);
+	kref_get(&head->wb_kref);
+	kref_get(&head->wb_kref);
+}
+
 static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
 {
 	struct nfs_page *req, *subreq;
 
 	list_for_each_entry(req, list, wb_list) {
-		if (req->wb_head != req)
+		if (req->wb_head != req) {
+			nfs_direct_add_page_head(&req->wb_list, req);
 			continue;
+		}
 		subreq = req->wb_this_page;
 		if (subreq == req)
 			continue;
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 6603b5cee029c..714975e5c0dbd 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -7,14 +7,16 @@
  * Resolves DNS hostnames into valid ip addresses
  */
 
-#ifdef CONFIG_NFS_USE_KERNEL_DNS
-
 #include <linux/module.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/addr.h>
-#include <linux/dns_resolver.h>
+
 #include "dns_resolve.h"
 
+#ifdef CONFIG_NFS_USE_KERNEL_DNS
+
+#include <linux/dns_resolver.h>
+
 ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
 		struct sockaddr_storage *ss, size_t salen)
 {
@@ -35,7 +37,6 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
 
 #else
 
-#include <linux/module.h>
 #include <linux/hash.h>
 #include <linux/string.h>
 #include <linux/kmod.h>
@@ -43,15 +44,12 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
 #include <linux/socket.h>
 #include <linux/seq_file.h>
 #include <linux/inet.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/svcauth.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/nfs_fs.h>
 
 #include "nfs4_fs.h"
-#include "dns_resolve.h"
 #include "cache_lib.h"
 #include "netns.h"
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 79b1b3fcd3fcf..3f9768810427d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -200,7 +200,7 @@ nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe
 EXPORT_SYMBOL_GPL(nfs_file_splice_read);
 
 int
-nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
+nfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct inode *inode = file_inode(file);
 	int	status;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 913c09806c7f5..9c9cf764f6000 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -82,6 +82,8 @@ struct nfs_client_initdata {
 	const struct rpc_timeout *timeparms;
 	const struct cred *cred;
 	struct xprtsec_parms xprtsec;
+	unsigned long connect_timeout;
+	unsigned long reconnect_timeout;
 };
 
 /*
@@ -493,6 +495,7 @@ extern const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
 extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
+extern bool nfs_read_alloc_scratch(struct nfs_pgio_header *hdr, size_t size);
 extern int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
 			       struct nfs_open_context *ctx,
 			       struct folio *folio);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 05c3b4b2b3dd8..c190938142960 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -949,7 +949,7 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 
 	error = decode_filename_inline(xdr, &entry->name, &entry->len);
 	if (unlikely(error))
-		return -EAGAIN;
+		return error == -ENAMETOOLONG ? -ENAMETOOLONG : -EAGAIN;
 
 	/*
 	 * The type (size and byte order) of nfscookie isn't defined in
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index eff3802c5e035..674c012868b1a 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -86,6 +86,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
 		int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
 {
 	struct rpc_timeout ds_timeout;
+	unsigned long connect_timeout = ds_timeo * (ds_retrans + 1) * HZ / 10;
 	struct nfs_client *mds_clp = mds_srv->nfs_client;
 	struct nfs_client_initdata cl_init = {
 		.addr = ds_addr,
@@ -98,6 +99,8 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
 		.timeparms = &ds_timeout,
 		.cred = mds_srv->cred,
 		.xprtsec = mds_clp->cl_xprtsec,
+		.connect_timeout = connect_timeout,
+		.reconnect_timeout = connect_timeout,
 	};
 	struct nfs_client *clp;
 	char buf[INET6_ADDRSTRLEN + 1];
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 3b0b650c9c5ab..60f032be805ae 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1991,7 +1991,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 
 	error = decode_inline_filename3(xdr, &entry->name, &entry->len);
 	if (unlikely(error))
-		return -EAGAIN;
+		return error == -ENAMETOOLONG ? -ENAMETOOLONG : -EAGAIN;
 
 	error = decode_cookie3(xdr, &new_cookie);
 	if (unlikely(error))
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 0fe5aacbcfdf1..b59876b01a1e3 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -13,6 +13,7 @@
  * more? Need to consider not to pre-alloc too much for a compound.
  */
 #define PNFS_LAYOUTSTATS_MAXDEV (4)
+#define READ_PLUS_SCRATCH_SIZE (16)
 
 /* nfs4.2proc.c */
 #ifdef CONFIG_NFS_V4_2
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 49f78e23b34c0..063e00aff87ed 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -471,8 +471,9 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
 				continue;
 			}
 			break;
-		} else if (err == -NFS4ERR_OFFLOAD_NO_REQS && !args.sync) {
-			args.sync = true;
+		} else if (err == -NFS4ERR_OFFLOAD_NO_REQS &&
+				args.sync != res.synchronous) {
+			args.sync = res.synchronous;
 			dst_exception.retry = 1;
 			continue;
 		} else if ((err == -ESTALE ||
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 95234208dc9ee..9e3ae53e22058 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -54,10 +54,16 @@
 					(1 /* data_content4 */ + \
 					 2 /* data_info4.di_offset */ + \
 					 1 /* data_info4.di_length */)
+#define NFS42_READ_PLUS_HOLE_SEGMENT_SIZE \
+					(1 /* data_content4 */ + \
+					 2 /* data_info4.di_offset */ + \
+					 2 /* data_info4.di_length */)
+#define READ_PLUS_SEGMENT_SIZE_DIFF	(NFS42_READ_PLUS_HOLE_SEGMENT_SIZE - \
+					 NFS42_READ_PLUS_DATA_SEGMENT_SIZE)
 #define decode_read_plus_maxsz		(op_decode_hdr_maxsz + \
 					 1 /* rpr_eof */ + \
 					 1 /* rpr_contents count */ + \
-					 NFS42_READ_PLUS_DATA_SEGMENT_SIZE)
+					 NFS42_READ_PLUS_HOLE_SEGMENT_SIZE)
 #define encode_seek_maxsz		(op_encode_hdr_maxsz + \
 					 encode_stateid_maxsz + \
 					 2 /* offset */ + \
@@ -617,8 +623,8 @@ static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req,
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_read_plus(xdr, args, &hdr);
 
-	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
-				args->count, hdr.replen);
+	rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->count,
+				hdr.replen - READ_PLUS_SEGMENT_SIZE_DIFF);
 	encode_nops(&hdr);
 }
 
@@ -1056,13 +1062,12 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
 	res->eof = be32_to_cpup(p++);
 	segments = be32_to_cpup(p++);
 	if (segments == 0)
-		return status;
+		return 0;
 
 	segs = kmalloc_array(segments, sizeof(*segs), GFP_KERNEL);
 	if (!segs)
 		return -ENOMEM;
 
-	status = -EIO;
 	for (i = 0; i < segments; i++) {
 		status = decode_read_plus_segment(xdr, &segs[i]);
 		if (status < 0)
@@ -1428,7 +1433,7 @@ static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_set_scratch_buffer(xdr, res->scratch, sizeof(res->scratch));
+	xdr_set_scratch_buffer(xdr, res->scratch, READ_PLUS_SCRATCH_SIZE);
 
 	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index d9114a754db73..27fb25567ce75 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -232,6 +232,8 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
 	__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
 	__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
 
+	if (test_bit(NFS_CS_DS, &cl_init->init_flags))
+		__set_bit(NFS_CS_DS, &clp->cl_flags);
 	/*
 	 * Set up the connection to the server before we add add to the
 	 * global list.
@@ -1007,6 +1009,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
 	if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
 
+	__set_bit(NFS_CS_DS, &cl_init.init_flags);
 	/*
 	 * Set an authflavor equual to the MDS value. Use the MDS nfs_client
 	 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d57aaf0cc577f..794343790ea8b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5438,18 +5438,8 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task,
 	return false;
 }
 
-static inline void nfs4_read_plus_scratch_free(struct nfs_pgio_header *hdr)
-{
-	if (hdr->res.scratch) {
-		kfree(hdr->res.scratch);
-		hdr->res.scratch = NULL;
-	}
-}
-
 static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	nfs4_read_plus_scratch_free(hdr);
-
 	if (!nfs4_sequence_done(task, &hdr->res.seq_res))
 		return -EAGAIN;
 	if (nfs4_read_stateid_changed(task, &hdr->args))
@@ -5469,8 +5459,7 @@ static bool nfs42_read_plus_support(struct nfs_pgio_header *hdr,
 	/* Note: We don't use READ_PLUS with pNFS yet */
 	if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp) {
 		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
-		hdr->res.scratch = kmalloc(32, GFP_KERNEL);
-		return hdr->res.scratch != NULL;
+		return nfs_read_alloc_scratch(hdr, READ_PLUS_SCRATCH_SIZE);
 	}
 	return false;
 }
@@ -8798,6 +8787,8 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred,
 #ifdef CONFIG_NFS_V4_1_MIGRATION
 	calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR;
 #endif
+	if (test_bit(NFS_CS_DS, &clp->cl_flags))
+		calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS;
 	msg.rpc_argp = &calldata->args;
 	msg.rpc_resp = &calldata->res;
 	task_setup_data.callback_data = calldata;
@@ -8875,6 +8866,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre
 	/* Save the EXCHANGE_ID verifier session trunk tests */
 	memcpy(clp->cl_confirm.data, argp->verifier.data,
 	       sizeof(clp->cl_confirm.data));
+	if (resp->flags & EXCHGID4_FLAG_USE_PNFS_DS)
+		set_bit(NFS_CS_DS, &clp->cl_flags);
 out:
 	trace_nfs4_exchange_id(clp, status);
 	rpc_put_task(task);
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index ddbbf4fcda867..178001c90156f 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -154,7 +154,7 @@ nfs4_get_device_info(struct nfs_server *server,
 		set_bit(NFS_DEVICEID_NOCACHE, &d->flags);
 
 out_free_pages:
-	for (i = 0; i < max_pages; i++)
+	while (--i >= 0)
 		__free_page(pages[i]);
 	kfree(pages);
 out_free_pdev:
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index a0112ad4937aa..afd23910f3bff 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -852,6 +852,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
 {
 	struct nfs_client *clp = ERR_PTR(-EIO);
 	struct nfs4_pnfs_ds_addr *da;
+	unsigned long connect_timeout = timeo * (retrans + 1) * HZ / 10;
 	int status = 0;
 
 	dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
@@ -870,6 +871,8 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
 				.dstaddr = (struct sockaddr *)&da->da_addr,
 				.addrlen = da->da_addrlen,
 				.servername = clp->cl_hostname,
+				.connect_timeout = connect_timeout,
+				.reconnect_timeout = connect_timeout,
 			};
 
 			if (da->da_transport != clp->cl_proto)
@@ -943,7 +946,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
 			* Test this address for session trunking and
 			* add as an alias
 			*/
-			xprtdata.cred = nfs4_get_clid_cred(clp),
+			xprtdata.cred = nfs4_get_clid_cred(clp);
 			rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
 					  rpc_clnt_setup_test_and_add_xprt,
 					  &rpcdata);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f71eeee67e201..7dc21a48e3e7b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -47,6 +47,8 @@ static struct nfs_pgio_header *nfs_readhdr_alloc(void)
 
 static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
 {
+	if (rhdr->res.scratch != NULL)
+		kfree(rhdr->res.scratch);
 	kmem_cache_free(nfs_rdata_cachep, rhdr);
 }
 
@@ -108,6 +110,14 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
 
+bool nfs_read_alloc_scratch(struct nfs_pgio_header *hdr, size_t size)
+{
+	WARN_ON(hdr->res.scratch != NULL);
+	hdr->res.scratch = kmalloc(size, GFP_KERNEL);
+	return hdr->res.scratch != NULL;
+}
+EXPORT_SYMBOL_GPL(nfs_read_alloc_scratch);
+
 static void nfs_readpage_release(struct nfs_page *req, int error)
 {
 	struct folio *folio = nfs_page_to_folio(req);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2284f749d8924..0d6473cb00cb3 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1339,15 +1339,13 @@ int nfs_get_tree_common(struct fs_context *fc)
 void nfs_kill_super(struct super_block *s)
 {
 	struct nfs_server *server = NFS_SB(s);
-	dev_t dev = s->s_dev;
 
 	nfs_sysfs_move_sb_to_server(server);
-	generic_shutdown_super(s);
+	kill_anon_super(s);
 
 	nfs_fscache_release_super_cookie(s);
 
 	nfs_free_server(server);
-	free_anon_bdev(dev);
 }
 EXPORT_SYMBOL_GPL(nfs_kill_super);
 
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 8e9c1a0f8d380..1ed2f691ebb90 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -83,6 +83,15 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
 	int len = sizeof(__be32), ret, i;
 	__be32 *p;
 
+	/*
+	 * See paragraph 5 of RFC 8881 S18.40.3.
+	 */
+	if (!gdp->gd_maxcount) {
+		if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+			return nfserr_resource;
+		return nfs_ok;
+	}
+
 	p = xdr_reserve_space(xdr, len + sizeof(__be32));
 	if (!p)
 		return nfserr_resource;
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 4c9b87850ab12..929248c6ca84c 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -19,7 +19,7 @@
  * typical sockaddr_storage. This is for space reasons, since sockaddr_storage
  * is much larger than a sockaddr_in6.
  */
-struct svc_cacherep {
+struct nfsd_cacherep {
 	struct {
 		/* Keep often-read xid, csum in the same cache line: */
 		__be32			k_xid;
@@ -84,8 +84,10 @@ int	nfsd_net_reply_cache_init(struct nfsd_net *nn);
 void	nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
 int	nfsd_reply_cache_init(struct nfsd_net *);
 void	nfsd_reply_cache_shutdown(struct nfsd_net *);
-int	nfsd_cache_lookup(struct svc_rqst *);
-void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);
+int	nfsd_cache_lookup(struct svc_rqst *rqstp,
+			  struct nfsd_cacherep **cacherep);
+void	nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
+			  int cachetype, __be32 *statp);
 int	nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
 
 #endif /* NFSCACHE_H */
diff --git a/fs/nfsd/flexfilelayoutxdr.c b/fs/nfsd/flexfilelayoutxdr.c
index e81d2a5cf381e..bb205328e043d 100644
--- a/fs/nfsd/flexfilelayoutxdr.c
+++ b/fs/nfsd/flexfilelayoutxdr.c
@@ -85,6 +85,15 @@ nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
 	int addr_len;
 	__be32 *p;
 
+	/*
+	 * See paragraph 5 of RFC 8881 S18.40.3.
+	 */
+	if (!gdp->gd_maxcount) {
+		if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+			return nfserr_resource;
+		return nfs_ok;
+	}
+
 	/* len + padding for two strings */
 	addr_len = 16 + da->netaddr.netid_len + da->netaddr.addr_len;
 	ver_len = 20;
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index fc8d5b7db9f81..268ef57751c48 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -307,7 +307,9 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (!IS_POSIXACL(inode))
 		iap->ia_mode &= ~current_umask();
 
-	fh_fill_pre_attrs(fhp);
+	status = fh_fill_pre_attrs(fhp);
+	if (status != nfs_ok)
+		goto out;
 	host_err = vfs_create(&nop_mnt_idmap, inode, child, iap->ia_mode, true);
 	if (host_err < 0) {
 		status = nfserrno(host_err);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 518203821790c..96e786b5e5444 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -441,7 +441,7 @@ struct posix_ace_state_array {
  * calculated so far: */
 
 struct posix_acl_state {
-	int empty;
+	unsigned char valid;
 	struct posix_ace_state owner;
 	struct posix_ace_state group;
 	struct posix_ace_state other;
@@ -457,7 +457,6 @@ init_state(struct posix_acl_state *state, int cnt)
 	int alloc;
 
 	memset(state, 0, sizeof(struct posix_acl_state));
-	state->empty = 1;
 	/*
 	 * In the worst case, each individual acl could be for a distinct
 	 * named user or group, but we don't know which, so we allocate
@@ -500,7 +499,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
 	 * and effective cases: when there are no inheritable ACEs,
 	 * calls ->set_acl with a NULL ACL structure.
 	 */
-	if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT))
+	if (!state->valid && (flags & NFS4_ACL_TYPE_DEFAULT))
 		return NULL;
 
 	/*
@@ -622,11 +621,12 @@ static void process_one_v4_ace(struct posix_acl_state *state,
 				struct nfs4_ace *ace)
 {
 	u32 mask = ace->access_mask;
+	short type = ace2type(ace);
 	int i;
 
-	state->empty = 0;
+	state->valid |= type;
 
-	switch (ace2type(ace)) {
+	switch (type) {
 	case ACL_USER_OBJ:
 		if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
 			allow_bits(&state->owner, mask);
@@ -726,6 +726,30 @@ static int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl,
 		if (!(ace->flag & NFS4_ACE_INHERIT_ONLY_ACE))
 			process_one_v4_ace(&effective_acl_state, ace);
 	}
+
+	/*
+	 * At this point, the default ACL may have zeroed-out entries for owner,
+	 * group and other. That usually results in a non-sensical resulting ACL
+	 * that denies all access except to any ACE that was explicitly added.
+	 *
+	 * The setfacl command solves a similar problem with this logic:
+	 *
+	 * "If  a  Default  ACL  entry is created, and the Default ACL contains
+	 *  no owner, owning group, or others entry,  a  copy of  the  ACL
+	 *  owner, owning group, or others entry is added to the Default ACL."
+	 *
+	 * Copy any missing ACEs from the effective set, if any ACEs were
+	 * explicitly set.
+	 */
+	if (default_acl_state.valid) {
+		if (!(default_acl_state.valid & ACL_USER_OBJ))
+			default_acl_state.owner = effective_acl_state.owner;
+		if (!(default_acl_state.valid & ACL_GROUP_OBJ))
+			default_acl_state.group = effective_acl_state.group;
+		if (!(default_acl_state.valid & ACL_OTHER))
+			default_acl_state.other = effective_acl_state.other;
+	}
+
 	*pacl = posix_state_to_acl(&effective_acl_state, flags);
 	if (IS_ERR(*pacl)) {
 		ret = PTR_ERR(*pacl);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 5ae670807449b..5ca748309c262 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -297,12 +297,12 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	}
 
 	if (d_really_is_positive(child)) {
-		status = nfs_ok;
-
 		/* NFSv4 protocol requires change attributes even though
 		 * no change happened.
 		 */
-		fh_fill_both_attrs(fhp);
+		status = fh_fill_both_attrs(fhp);
+		if (status != nfs_ok)
+			goto out;
 
 		switch (open->op_createmode) {
 		case NFS4_CREATE_UNCHECKED:
@@ -345,7 +345,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (!IS_POSIXACL(inode))
 		iap->ia_mode &= ~current_umask();
 
-	fh_fill_pre_attrs(fhp);
+	status = fh_fill_pre_attrs(fhp);
+	if (status != nfs_ok)
+		goto out;
 	status = nfsd4_vfs_create(fhp, child, open);
 	if (status != nfs_ok)
 		goto out;
@@ -380,6 +382,38 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	return status;
 }
 
+/**
+ * set_change_info - set up the change_info4 for a reply
+ * @cinfo: pointer to nfsd4_change_info to be populated
+ * @fhp: pointer to svc_fh to use as source
+ *
+ * Many operations in NFSv4 require change_info4 in the reply. This function
+ * populates that from the info that we (should!) have already collected. In
+ * the event that we didn't get any pre-attrs, just zero out both.
+ */
+static void
+set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+{
+	cinfo->atomic = (u32)(fhp->fh_pre_saved && fhp->fh_post_saved && !fhp->fh_no_atomic_attr);
+	cinfo->before_change = fhp->fh_pre_change;
+	cinfo->after_change = fhp->fh_post_change;
+
+	/*
+	 * If fetching the pre-change attributes failed, then we should
+	 * have already failed the whole operation. We could have still
+	 * failed to fetch post-change attributes however.
+	 *
+	 * If we didn't get post-op attrs, just zero-out the after
+	 * field since we don't know what it should be. If the pre_saved
+	 * field isn't set for some reason, throw warning and just copy
+	 * whatever is in the after field.
+	 */
+	if (WARN_ON_ONCE(!fhp->fh_pre_saved))
+		cinfo->before_change = 0;
+	if (!fhp->fh_post_saved)
+		cinfo->after_change = cinfo->before_change + 1;
+}
+
 static __be32
 do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh)
 {
@@ -424,11 +458,11 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
 	} else {
 		status = nfsd_lookup(rqstp, current_fh,
 				     open->op_fname, open->op_fnamelen, *resfh);
-		if (!status)
+		if (status == nfs_ok)
 			/* NFSv4 protocol requires change attributes even though
 			 * no change happened.
 			 */
-			fh_fill_both_attrs(current_fh);
+			status = fh_fill_both_attrs(current_fh);
 	}
 	if (status)
 		goto out;
@@ -1313,12 +1347,11 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
 		/* found a match */
 		if (ni->nsui_busy) {
 			/*  wait - and try again */
-			prepare_to_wait(&nn->nfsd_ssc_waitq, &wait,
-				TASK_INTERRUPTIBLE);
+			prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, TASK_IDLE);
 			spin_unlock(&nn->nfsd_ssc_lock);
 
 			/* allow 20secs for mount/unmount for now - revisit */
-			if (signal_pending(current) ||
+			if (kthread_should_stop() ||
 					(schedule_timeout(20*HZ) == 0)) {
 				finish_wait(&nn->nfsd_ssc_waitq, &wait);
 				kfree(work);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index daf305daa7516..8534693eb6a49 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -649,6 +649,18 @@ find_readable_file(struct nfs4_file *f)
 	return ret;
 }
 
+static struct nfsd_file *
+find_rw_file(struct nfs4_file *f)
+{
+	struct nfsd_file *ret;
+
+	spin_lock(&f->fi_lock);
+	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
+	spin_unlock(&f->fi_lock);
+
+	return ret;
+}
+
 struct nfsd_file *
 find_any_file(struct nfs4_file *f)
 {
@@ -1144,7 +1156,7 @@ static void block_delegations(struct knfsd_fh *fh)
 
 static struct nfs4_delegation *
 alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
-		 struct nfs4_clnt_odstate *odstate)
+		 struct nfs4_clnt_odstate *odstate, u32 dl_type)
 {
 	struct nfs4_delegation *dp;
 	long n;
@@ -1170,7 +1182,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
 	INIT_LIST_HEAD(&dp->dl_recall_lru);
 	dp->dl_clnt_odstate = odstate;
 	get_clnt_odstate(odstate);
-	dp->dl_type = NFS4_OPEN_DELEGATE_READ;
+	dp->dl_type = dl_type;
 	dp->dl_retries = 1;
 	dp->dl_recalled = false;
 	nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
@@ -5449,8 +5461,9 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 	struct nfs4_file *fp = stp->st_stid.sc_file;
 	struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate;
 	struct nfs4_delegation *dp;
-	struct nfsd_file *nf;
+	struct nfsd_file *nf = NULL;
 	struct file_lock *fl;
+	u32 dl_type;
 
 	/*
 	 * The fi_had_conflict and nfs_get_existing_delegation checks
@@ -5460,15 +5473,35 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 	if (fp->fi_had_conflict)
 		return ERR_PTR(-EAGAIN);
 
-	nf = find_readable_file(fp);
-	if (!nf) {
-		/*
-		 * We probably could attempt another open and get a read
-		 * delegation, but for now, don't bother until the
-		 * client actually sends us one.
-		 */
-		return ERR_PTR(-EAGAIN);
+	/*
+	 * Try for a write delegation first. RFC8881 section 10.4 says:
+	 *
+	 *  "An OPEN_DELEGATE_WRITE delegation allows the client to handle,
+	 *   on its own, all opens."
+	 *
+	 * Furthermore the client can use a write delegation for most READ
+	 * operations as well, so we require a O_RDWR file here.
+	 *
+	 * Offer a write delegation in the case of a BOTH open, and ensure
+	 * we get the O_RDWR descriptor.
+	 */
+	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) {
+		nf = find_rw_file(fp);
+		dl_type = NFS4_OPEN_DELEGATE_WRITE;
 	}
+
+	/*
+	 * If the file is being opened O_RDONLY or we couldn't get a O_RDWR
+	 * file for some reason, then try for a read delegation instead.
+	 */
+	if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) {
+		nf = find_readable_file(fp);
+		dl_type = NFS4_OPEN_DELEGATE_READ;
+	}
+
+	if (!nf)
+		return ERR_PTR(-EAGAIN);
+
 	spin_lock(&state_lock);
 	spin_lock(&fp->fi_lock);
 	if (nfs4_delegation_exists(clp, fp))
@@ -5491,11 +5524,11 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 		return ERR_PTR(status);
 
 	status = -ENOMEM;
-	dp = alloc_init_deleg(clp, fp, odstate);
+	dp = alloc_init_deleg(clp, fp, odstate, dl_type);
 	if (!dp)
 		goto out_delegees;
 
-	fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ);
+	fl = nfs4_alloc_init_lease(dp, dl_type);
 	if (!fl)
 		goto out_clnt_odstate;
 
@@ -5568,10 +5601,28 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
 }
 
 /*
- * Attempt to hand out a delegation.
+ * The Linux NFS server does not offer write delegations to NFSv4.0
+ * clients in order to avoid conflicts between write delegations and
+ * GETATTRs requesting CHANGE or SIZE attributes.
+ *
+ * With NFSv4.1 and later minorversions, the SEQUENCE operation that
+ * begins each COMPOUND contains a client ID. Delegation recall can
+ * be avoided when the server recognizes the client sending a
+ * GETATTR also holds write delegation it conflicts with.
+ *
+ * However, the NFSv4.0 protocol does not enable a server to
+ * determine that a GETATTR originated from the client holding the
+ * conflicting delegation versus coming from some other client. Per
+ * RFC 7530 Section 16.7.5, the server must recall or send a
+ * CB_GETATTR even when the GETATTR originates from the client that
+ * holds the conflicting delegation.
  *
- * Note we don't support write delegations, and won't until the vfs has
- * proper support for them.
+ * An NFSv4.0 client can trigger a pathological situation if it
+ * always sends a DELEGRETURN preceded by a conflicting GETATTR in
+ * the same COMPOUND. COMPOUND execution will always stop at the
+ * GETATTR and the DELEGRETURN will never get executed. The server
+ * eventually revokes the delegation, which can result in loss of
+ * open or lock state.
  */
 static void
 nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
@@ -5590,8 +5641,6 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 		case NFS4_OPEN_CLAIM_PREVIOUS:
 			if (!cb_up)
 				open->op_recall = 1;
-			if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ)
-				goto out_no_deleg;
 			break;
 		case NFS4_OPEN_CLAIM_NULL:
 			parent = currentfh;
@@ -5606,6 +5655,9 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 				goto out_no_deleg;
 			if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
 				goto out_no_deleg;
+			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE &&
+					!clp->cl_minorversion)
+				goto out_no_deleg;
 			break;
 		default:
 			goto out_no_deleg;
@@ -5616,8 +5668,13 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 
 	memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
 
-	trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
-	open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
+		open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
+		trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
+	} else {
+		open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+		trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
+	}
 	nfs4_put_stid(&dp->dl_stid);
 	return;
 out_no_deleg:
@@ -8341,3 +8398,68 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
 {
 	get_stateid(cstate, &u->write.wr_stateid);
 }
+
+/**
+ * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
+ * @rqstp: RPC transaction context
+ * @inode: file to be checked for a conflict
+ *
+ * This function is called when there is a conflict between a write
+ * delegation and a change/size GETATTR from another client. The server
+ * must either use the CB_GETATTR to get the current values of the
+ * attributes from the client that holds the delegation or recall the
+ * delegation before replying to the GETATTR. See RFC 8881 section
+ * 18.7.4.
+ *
+ * The current implementation does not support CB_GETATTR yet. However
+ * this can avoid recalling the delegation could be added in follow up
+ * work.
+ *
+ * Returns 0 if there is no conflict; otherwise an nfs_stat
+ * code is returned.
+ */
+__be32
+nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
+{
+	__be32 status;
+	struct file_lock_context *ctx;
+	struct file_lock *fl;
+	struct nfs4_delegation *dp;
+
+	ctx = locks_inode_context(inode);
+	if (!ctx)
+		return 0;
+	spin_lock(&ctx->flc_lock);
+	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
+		if (fl->fl_flags == FL_LAYOUT)
+			continue;
+		if (fl->fl_lmops != &nfsd_lease_mng_ops) {
+			/*
+			 * non-nfs lease, if it's a lease with F_RDLCK then
+			 * we are done; there isn't any write delegation
+			 * on this inode
+			 */
+			if (fl->fl_type == F_RDLCK)
+				break;
+			goto break_lease;
+		}
+		if (fl->fl_type == F_WRLCK) {
+			dp = fl->fl_owner;
+			if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
+				spin_unlock(&ctx->flc_lock);
+				return 0;
+			}
+break_lease:
+			spin_unlock(&ctx->flc_lock);
+			nfsd_stats_wdeleg_getattr_inc();
+			status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+			if (status != nfserr_jukebox ||
+					!nfsd_wait_for_delegreturn(rqstp, inode))
+				return status;
+			return 0;
+		}
+		break;
+	}
+	spin_unlock(&ctx->flc_lock);
+	return 0;
+}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b30dca7de8cc0..2e40c74d2f727 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2984,6 +2984,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 		if (status)
 			goto out;
 	}
+	if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+		status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
+		if (status)
+			goto out;
+	}
 
 	err = vfs_getattr(&path, &stat,
 			  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
@@ -3973,17 +3978,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
 		nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid);
 		if (nfserr)
 			return nfserr;
-		p = xdr_reserve_space(xdr, 32);
+
+		p = xdr_reserve_space(xdr, XDR_UNIT * 8);
 		if (!p)
 			return nfserr_resource;
 		*p++ = cpu_to_be32(open->op_recall);
 
 		/*
+		 * Always flush on close
+		 *
 		 * TODO: space_limit's in delegations
 		 */
 		*p++ = cpu_to_be32(NFS4_LIMIT_SIZE);
-		*p++ = cpu_to_be32(~(u32)0);
-		*p++ = cpu_to_be32(~(u32)0);
+		*p++ = xdr_zero;
+		*p++ = xdr_zero;
 
 		/*
 		 * TODO: ACE's in delegations
@@ -4678,20 +4686,17 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 	*p++ = cpu_to_be32(gdev->gd_layout_type);
 
-	/* If maxcount is 0 then just update notifications */
-	if (gdev->gd_maxcount != 0) {
-		ops = nfsd4_layout_ops[gdev->gd_layout_type];
-		nfserr = ops->encode_getdeviceinfo(xdr, gdev);
-		if (nfserr) {
-			/*
-			 * We don't bother to burden the layout drivers with
-			 * enforcing gd_maxcount, just tell the client to
-			 * come back with a bigger buffer if it's not enough.
-			 */
-			if (xdr->buf->len + 4 > gdev->gd_maxcount)
-				goto toosmall;
-			return nfserr;
-		}
+	ops = nfsd4_layout_ops[gdev->gd_layout_type];
+	nfserr = ops->encode_getdeviceinfo(xdr, gdev);
+	if (nfserr) {
+		/*
+		 * We don't bother to burden the layout drivers with
+		 * enforcing gd_maxcount, just tell the client to
+		 * come back with a bigger buffer if it's not enough.
+		 */
+		if (xdr->buf->len + 4 > gdev->gd_maxcount)
+			goto toosmall;
+		return nfserr;
 	}
 
 	if (gdev->gd_notify_types) {
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index a8eda1c85829e..80621a7095107 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -84,11 +84,11 @@ nfsd_hashsize(unsigned int limit)
 	return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
 }
 
-static struct svc_cacherep *
-nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
-			struct nfsd_net *nn)
+static struct nfsd_cacherep *
+nfsd_cacherep_alloc(struct svc_rqst *rqstp, __wsum csum,
+		    struct nfsd_net *nn)
 {
-	struct svc_cacherep	*rp;
+	struct nfsd_cacherep *rp;
 
 	rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
 	if (rp) {
@@ -110,36 +110,64 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
 	return rp;
 }
 
-static void
-nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
-				struct nfsd_net *nn)
+static void nfsd_cacherep_free(struct nfsd_cacherep *rp)
 {
-	if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
-		nfsd_stats_drc_mem_usage_sub(nn, rp->c_replvec.iov_len);
+	if (rp->c_type == RC_REPLBUFF)
 		kfree(rp->c_replvec.iov_base);
+	kmem_cache_free(drc_slab, rp);
+}
+
+static unsigned long
+nfsd_cacherep_dispose(struct list_head *dispose)
+{
+	struct nfsd_cacherep *rp;
+	unsigned long freed = 0;
+
+	while (!list_empty(dispose)) {
+		rp = list_first_entry(dispose, struct nfsd_cacherep, c_lru);
+		list_del(&rp->c_lru);
+		nfsd_cacherep_free(rp);
+		freed++;
 	}
+	return freed;
+}
+
+static void
+nfsd_cacherep_unlink_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
+			    struct nfsd_cacherep *rp)
+{
+	if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base)
+		nfsd_stats_drc_mem_usage_sub(nn, rp->c_replvec.iov_len);
 	if (rp->c_state != RC_UNUSED) {
 		rb_erase(&rp->c_node, &b->rb_head);
 		list_del(&rp->c_lru);
 		atomic_dec(&nn->num_drc_entries);
 		nfsd_stats_drc_mem_usage_sub(nn, sizeof(*rp));
 	}
-	kmem_cache_free(drc_slab, rp);
 }
 
 static void
-nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
+nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
+				struct nfsd_net *nn)
+{
+	nfsd_cacherep_unlink_locked(nn, b, rp);
+	nfsd_cacherep_free(rp);
+}
+
+static void
+nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
 			struct nfsd_net *nn)
 {
 	spin_lock(&b->cache_lock);
-	nfsd_reply_cache_free_locked(b, rp, nn);
+	nfsd_cacherep_unlink_locked(nn, b, rp);
 	spin_unlock(&b->cache_lock);
+	nfsd_cacherep_free(rp);
 }
 
 int nfsd_drc_slab_create(void)
 {
 	drc_slab = kmem_cache_create("nfsd_drc",
-				sizeof(struct svc_cacherep), 0, 0, NULL);
+				sizeof(struct nfsd_cacherep), 0, 0, NULL);
 	return drc_slab ? 0: -ENOMEM;
 }
 
@@ -208,7 +236,7 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
 
 void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
 {
-	struct svc_cacherep	*rp;
+	struct nfsd_cacherep *rp;
 	unsigned int i;
 
 	unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
@@ -216,7 +244,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
 	for (i = 0; i < nn->drc_hashsize; i++) {
 		struct list_head *head = &nn->drc_hashtbl[i].lru_head;
 		while (!list_empty(head)) {
-			rp = list_first_entry(head, struct svc_cacherep, c_lru);
+			rp = list_first_entry(head, struct nfsd_cacherep, c_lru);
 			nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i],
 									rp, nn);
 		}
@@ -233,7 +261,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
  * not already scheduled.
  */
 static void
-lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+lru_put_end(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp)
 {
 	rp->c_timestamp = jiffies;
 	list_move_tail(&rp->c_lru, &b->lru_head);
@@ -247,12 +275,21 @@ nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn)
 	return &nn->drc_hashtbl[hash];
 }
 
-static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
-			 unsigned int max)
+/*
+ * Remove and return no more than @max expired entries in bucket @b.
+ * If @max is zero, do not limit the number of removed entries.
+ */
+static void
+nfsd_prune_bucket_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
+			 unsigned int max, struct list_head *dispose)
 {
-	struct svc_cacherep *rp, *tmp;
-	long freed = 0;
+	unsigned long expiry = jiffies - RC_EXPIRE;
+	struct nfsd_cacherep *rp, *tmp;
+	unsigned int freed = 0;
+
+	lockdep_assert_held(&b->cache_lock);
 
+	/* The bucket LRU is ordered oldest-first. */
 	list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) {
 		/*
 		 * Don't free entries attached to calls that are still
@@ -260,60 +297,77 @@ static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
 		 */
 		if (rp->c_state == RC_INPROG)
 			continue;
+
 		if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries &&
-		    time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
+		    time_before(expiry, rp->c_timestamp))
 			break;
-		nfsd_reply_cache_free_locked(b, rp, nn);
-		if (max && freed++ > max)
+
+		nfsd_cacherep_unlink_locked(nn, b, rp);
+		list_add(&rp->c_lru, dispose);
+
+		if (max && ++freed > max)
 			break;
 	}
-	return freed;
 }
 
-static long nfsd_prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
+/**
+ * nfsd_reply_cache_count - count_objects method for the DRC shrinker
+ * @shrink: our registered shrinker context
+ * @sc: garbage collection parameters
+ *
+ * Returns the total number of entries in the duplicate reply cache. To
+ * keep things simple and quick, this is not the number of expired entries
+ * in the cache (ie, the number that would be removed by a call to
+ * nfsd_reply_cache_scan).
+ */
+static unsigned long
+nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
 {
-	return prune_bucket(b, nn, 3);
+	struct nfsd_net *nn = container_of(shrink,
+				struct nfsd_net, nfsd_reply_cache_shrinker);
+
+	return atomic_read(&nn->num_drc_entries);
 }
 
-/*
- * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
- * Also prune the oldest ones when the total exceeds the max number of entries.
+/**
+ * nfsd_reply_cache_scan - scan_objects method for the DRC shrinker
+ * @shrink: our registered shrinker context
+ * @sc: garbage collection parameters
+ *
+ * Free expired entries on each bucket's LRU list until we've released
+ * nr_to_scan freed objects. Nothing will be released if the cache
+ * has not exceeded it's max_drc_entries limit.
+ *
+ * Returns the number of entries released by this call.
  */
-static long
-prune_cache_entries(struct nfsd_net *nn)
+static unsigned long
+nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
+	struct nfsd_net *nn = container_of(shrink,
+				struct nfsd_net, nfsd_reply_cache_shrinker);
+	unsigned long freed = 0;
+	LIST_HEAD(dispose);
 	unsigned int i;
-	long freed = 0;
 
 	for (i = 0; i < nn->drc_hashsize; i++) {
 		struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i];
 
 		if (list_empty(&b->lru_head))
 			continue;
+
 		spin_lock(&b->cache_lock);
-		freed += prune_bucket(b, nn, 0);
+		nfsd_prune_bucket_locked(nn, b, 0, &dispose);
 		spin_unlock(&b->cache_lock);
-	}
-	return freed;
-}
 
-static unsigned long
-nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
-{
-	struct nfsd_net *nn = container_of(shrink,
-				struct nfsd_net, nfsd_reply_cache_shrinker);
+		freed += nfsd_cacherep_dispose(&dispose);
+		if (freed > sc->nr_to_scan)
+			break;
+	}
 
-	return atomic_read(&nn->num_drc_entries);
+	trace_nfsd_drc_gc(nn, freed);
+	return freed;
 }
 
-static unsigned long
-nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
-{
-	struct nfsd_net *nn = container_of(shrink,
-				struct nfsd_net, nfsd_reply_cache_shrinker);
-
-	return prune_cache_entries(nn);
-}
 /*
  * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
  */
@@ -348,8 +402,8 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
 }
 
 static int
-nfsd_cache_key_cmp(const struct svc_cacherep *key,
-			const struct svc_cacherep *rp, struct nfsd_net *nn)
+nfsd_cache_key_cmp(const struct nfsd_cacherep *key,
+		   const struct nfsd_cacherep *rp, struct nfsd_net *nn)
 {
 	if (key->c_key.k_xid == rp->c_key.k_xid &&
 	    key->c_key.k_csum != rp->c_key.k_csum) {
@@ -365,11 +419,11 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key,
  * Must be called with cache_lock held. Returns the found entry or
  * inserts an empty key on failure.
  */
-static struct svc_cacherep *
-nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
+static struct nfsd_cacherep *
+nfsd_cache_insert(struct nfsd_drc_bucket *b, struct nfsd_cacherep *key,
 			struct nfsd_net *nn)
 {
-	struct svc_cacherep	*rp, *ret = key;
+	struct nfsd_cacherep	*rp, *ret = key;
 	struct rb_node		**p = &b->rb_head.rb_node,
 				*parent = NULL;
 	unsigned int		entries = 0;
@@ -378,7 +432,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
 	while (*p != NULL) {
 		++entries;
 		parent = *p;
-		rp = rb_entry(parent, struct svc_cacherep, c_node);
+		rp = rb_entry(parent, struct nfsd_cacherep, c_node);
 
 		cmp = nfsd_cache_key_cmp(key, rp, nn);
 		if (cmp < 0)
@@ -411,6 +465,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
 /**
  * nfsd_cache_lookup - Find an entry in the duplicate reply cache
  * @rqstp: Incoming Call to find
+ * @cacherep: OUT: DRC entry for this request
  *
  * Try to find an entry matching the current call in the cache. When none
  * is found, we try to grab the oldest expired entry off the LRU list. If
@@ -423,16 +478,17 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
  *   %RC_REPLY: Reply from cache
  *   %RC_DROPIT: Do not process the request further
  */
-int nfsd_cache_lookup(struct svc_rqst *rqstp)
+int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep)
 {
 	struct nfsd_net		*nn;
-	struct svc_cacherep	*rp, *found;
+	struct nfsd_cacherep	*rp, *found;
 	__wsum			csum;
 	struct nfsd_drc_bucket	*b;
 	int type = rqstp->rq_cachetype;
+	unsigned long freed;
+	LIST_HEAD(dispose);
 	int rtn = RC_DOIT;
 
-	rqstp->rq_cacherep = NULL;
 	if (type == RC_NOCACHE) {
 		nfsd_stats_rc_nocache_inc();
 		goto out;
@@ -445,7 +501,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
 	 * preallocate an entry.
 	 */
 	nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
-	rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
+	rp = nfsd_cacherep_alloc(rqstp, csum, nn);
 	if (!rp)
 		goto out;
 
@@ -454,20 +510,18 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
 	found = nfsd_cache_insert(b, rp, nn);
 	if (found != rp)
 		goto found_entry;
-
-	nfsd_stats_rc_misses_inc();
-	rqstp->rq_cacherep = rp;
+	*cacherep = rp;
 	rp->c_state = RC_INPROG;
+	nfsd_prune_bucket_locked(nn, b, 3, &dispose);
+	spin_unlock(&b->cache_lock);
 
+	freed = nfsd_cacherep_dispose(&dispose);
+	trace_nfsd_drc_gc(nn, freed);
+
+	nfsd_stats_rc_misses_inc();
 	atomic_inc(&nn->num_drc_entries);
 	nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
-
-	nfsd_prune_bucket(b, nn);
-
-out_unlock:
-	spin_unlock(&b->cache_lock);
-out:
-	return rtn;
+	goto out;
 
 found_entry:
 	/* We found a matching entry which is either in progress or done. */
@@ -505,12 +559,16 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
 
 out_trace:
 	trace_nfsd_drc_found(nn, rqstp, rtn);
-	goto out_unlock;
+out_unlock:
+	spin_unlock(&b->cache_lock);
+out:
+	return rtn;
 }
 
 /**
  * nfsd_cache_update - Update an entry in the duplicate reply cache.
  * @rqstp: svc_rqst with a finished Reply
+ * @rp: IN: DRC entry for this request
  * @cachetype: which cache to update
  * @statp: pointer to Reply's NFS status code, or NULL
  *
@@ -528,10 +586,10 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
  * nfsd failed to encode a reply that otherwise would have been cached.
  * In this case, nfsd_cache_update is called with statp == NULL.
  */
-void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
+void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
+		       int cachetype, __be32 *statp)
 {
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
-	struct svc_cacherep *rp = rqstp->rq_cacherep;
 	struct kvec	*resv = &rqstp->rq_res.head[0], *cachv;
 	struct nfsd_drc_bucket *b;
 	int		len;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3709830f90a63..7ed02fb88a362 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1627,6 +1627,7 @@ static void __exit exit_nfsd(void)
 }
 
 MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+MODULE_DESCRIPTION("In-kernel NFS server");
 MODULE_LICENSE("GPL");
 module_init(init_nfsd)
 module_exit(exit_nfsd)
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d88498f8b275c..11c14faa6c67b 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -96,7 +96,12 @@ int		nfsd_pool_stats_open(struct inode *, struct file *);
 int		nfsd_pool_stats_release(struct inode *, struct file *);
 void		nfsd_shutdown_threads(struct net *net);
 
-void		nfsd_put(struct net *net);
+static inline void nfsd_put(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	svc_put(nn->nfsd_serv);
+}
 
 bool		i_am_nfsd(void);
 
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index c291389a1d71d..355bf0db3235b 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -614,7 +614,7 @@ fh_update(struct svc_fh *fhp)
  * @fhp: file handle to be updated
  *
  */
-void fh_fill_pre_attrs(struct svc_fh *fhp)
+__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp)
 {
 	bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
 	struct inode *inode;
@@ -622,12 +622,12 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
 	__be32 err;
 
 	if (fhp->fh_no_wcc || fhp->fh_pre_saved)
-		return;
+		return nfs_ok;
 
 	inode = d_inode(fhp->fh_dentry);
 	err = fh_getattr(fhp, &stat);
 	if (err)
-		return;
+		return err;
 
 	if (v4)
 		fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
@@ -636,6 +636,7 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
 	fhp->fh_pre_ctime = stat.ctime;
 	fhp->fh_pre_size  = stat.size;
 	fhp->fh_pre_saved = true;
+	return nfs_ok;
 }
 
 /**
@@ -643,26 +644,27 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
  * @fhp: file handle to be updated
  *
  */
-void fh_fill_post_attrs(struct svc_fh *fhp)
+__be32 fh_fill_post_attrs(struct svc_fh *fhp)
 {
 	bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
 	struct inode *inode = d_inode(fhp->fh_dentry);
 	__be32 err;
 
 	if (fhp->fh_no_wcc)
-		return;
+		return nfs_ok;
 
 	if (fhp->fh_post_saved)
 		printk("nfsd: inode locked twice during operation.\n");
 
 	err = fh_getattr(fhp, &fhp->fh_post_attr);
 	if (err)
-		return;
+		return err;
 
 	fhp->fh_post_saved = true;
 	if (v4)
 		fhp->fh_post_change =
 			nfsd4_change_attribute(&fhp->fh_post_attr, inode);
+	return nfs_ok;
 }
 
 /**
@@ -672,16 +674,20 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
  * This is used when the directory wasn't changed, but wcc attributes
  * are needed anyway.
  */
-void fh_fill_both_attrs(struct svc_fh *fhp)
+__be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp)
 {
-	fh_fill_post_attrs(fhp);
-	if (!fhp->fh_post_saved)
-		return;
+	__be32 err;
+
+	err = fh_fill_post_attrs(fhp);
+	if (err)
+		return err;
+
 	fhp->fh_pre_change = fhp->fh_post_change;
 	fhp->fh_pre_mtime = fhp->fh_post_attr.mtime;
 	fhp->fh_pre_ctime = fhp->fh_post_attr.ctime;
 	fhp->fh_pre_size = fhp->fh_post_attr.size;
 	fhp->fh_pre_saved = true;
+	return nfs_ok;
 }
 
 /*
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 4e0ecf0ae2cf2..40426f899e760 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -294,7 +294,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
 }
 
 u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode);
-extern void fh_fill_pre_attrs(struct svc_fh *fhp);
-extern void fh_fill_post_attrs(struct svc_fh *fhp);
-extern void fh_fill_both_attrs(struct svc_fh *fhp);
+__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp);
+__be32 fh_fill_post_attrs(struct svc_fh *fhp);
+__be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp);
 #endif /* _LINUX_NFSD_NFSFH_H */
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 2154fa63c5f2c..1582af33e204a 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -542,9 +542,14 @@ static struct notifier_block nfsd_inet6addr_notifier = {
 /* Only used under nfsd_mutex, so this atomic may be overkill: */
 static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0);
 
-static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
+static void nfsd_last_thread(struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct svc_serv *serv = nn->nfsd_serv;
+
+	spin_lock(&nfsd_notifier_lock);
+	nn->nfsd_serv = NULL;
+	spin_unlock(&nfsd_notifier_lock);
 
 	/* check if the notifier still has clients */
 	if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
@@ -554,6 +559,8 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
 #endif
 	}
 
+	svc_xprt_destroy_all(serv, net);
+
 	/*
 	 * write_ports can create the server without actually starting
 	 * any threads--if we get shut down before any threads are
@@ -644,7 +651,8 @@ void nfsd_shutdown_threads(struct net *net)
 	svc_get(serv);
 	/* Kill outstanding nfsd threads */
 	svc_set_num_threads(serv, NULL, 0);
-	nfsd_put(net);
+	nfsd_last_thread(net);
+	svc_put(serv);
 	mutex_unlock(&nfsd_mutex);
 }
 
@@ -674,9 +682,6 @@ int nfsd_create_serv(struct net *net)
 	serv->sv_maxconn = nn->max_connections;
 	error = svc_bind(serv, net);
 	if (error < 0) {
-		/* NOT nfsd_put() as notifiers (see below) haven't
-		 * been set up yet.
-		 */
 		svc_put(serv);
 		return error;
 	}
@@ -719,29 +724,6 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
 	return 0;
 }
 
-/* This is the callback for kref_put() below.
- * There is no code here as the first thing to be done is
- * call svc_shutdown_net(), but we cannot get the 'net' from
- * the kref.  So do all the work when kref_put returns true.
- */
-static void nfsd_noop(struct kref *ref)
-{
-}
-
-void nfsd_put(struct net *net)
-{
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
-	if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
-		svc_xprt_destroy_all(nn->nfsd_serv, net);
-		nfsd_last_thread(nn->nfsd_serv, net);
-		svc_destroy(&nn->nfsd_serv->sv_refcnt);
-		spin_lock(&nfsd_notifier_lock);
-		nn->nfsd_serv = NULL;
-		spin_unlock(&nfsd_notifier_lock);
-	}
-}
-
 int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
 {
 	int i = 0;
@@ -792,7 +774,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
 		if (err)
 			break;
 	}
-	nfsd_put(net);
+	svc_put(nn->nfsd_serv);
 	return err;
 }
 
@@ -807,6 +789,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
 	int	error;
 	bool	nfsd_up_before;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct svc_serv *serv;
 
 	mutex_lock(&nfsd_mutex);
 	dprintk("nfsd: creating service\n");
@@ -826,22 +809,25 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
 		goto out;
 
 	nfsd_up_before = nn->nfsd_net_up;
+	serv = nn->nfsd_serv;
 
 	error = nfsd_startup_net(net, cred);
 	if (error)
 		goto out_put;
-	error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs);
+	error = svc_set_num_threads(serv, NULL, nrservs);
 	if (error)
 		goto out_shutdown;
-	error = nn->nfsd_serv->sv_nrthreads;
+	error = serv->sv_nrthreads;
+	if (error == 0)
+		nfsd_last_thread(net);
 out_shutdown:
 	if (error < 0 && !nfsd_up_before)
 		nfsd_shutdown_net(net);
 out_put:
 	/* Threads now hold service active */
 	if (xchg(&nn->keep_active, 0))
-		nfsd_put(net);
-	nfsd_put(net);
+		svc_put(serv);
+	svc_put(serv);
 out:
 	mutex_unlock(&nfsd_mutex);
 	return error;
@@ -953,7 +939,6 @@ nfsd(void *vrqstp)
 	struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
 	struct net *net = perm_sock->xpt_net;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-	int err;
 
 	/* At this point, the thread shares current->fs
 	 * with the init process. We need to create files with the
@@ -965,15 +950,6 @@ nfsd(void *vrqstp)
 
 	current->fs->umask = 0;
 
-	/*
-	 * thread is spawned with all signals set to SIG_IGN, re-enable
-	 * the ones that will bring down the thread
-	 */
-	allow_signal(SIGKILL);
-	allow_signal(SIGHUP);
-	allow_signal(SIGINT);
-	allow_signal(SIGQUIT);
-
 	atomic_inc(&nfsdstats.th_cnt);
 
 	set_freezable();
@@ -981,54 +957,19 @@ nfsd(void *vrqstp)
 	/*
 	 * The main request loop
 	 */
-	for (;;) {
+	while (!kthread_should_stop()) {
 		/* Update sv_maxconn if it has changed */
 		rqstp->rq_server->sv_maxconn = nn->max_connections;
 
-		/*
-		 * Find a socket with data available and call its
-		 * recvfrom routine.
-		 */
-		while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
-			;
-		if (err == -EINTR)
-			break;
-		validate_process_creds();
-		svc_process(rqstp);
+		svc_recv(rqstp);
 		validate_process_creds();
 	}
 
-	/* Clear signals before calling svc_exit_thread() */
-	flush_signals(current);
-
 	atomic_dec(&nfsdstats.th_cnt);
 
 out:
-	/* Take an extra ref so that the svc_put in svc_exit_thread()
-	 * doesn't call svc_destroy()
-	 */
-	svc_get(nn->nfsd_serv);
-
 	/* Release the thread */
 	svc_exit_thread(rqstp);
-
-	/* We need to drop a ref, but may not drop the last reference
-	 * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that
-	 * could deadlock with nfsd_shutdown_threads() waiting for us.
-	 * So three options are:
-	 * - drop a non-final reference,
-	 * - get the mutex without waiting
-	 * - sleep briefly andd try the above again
-	 */
-	while (!svc_put_not_last(nn->nfsd_serv)) {
-		if (mutex_trylock(&nfsd_mutex)) {
-			nfsd_put(net);
-			mutex_unlock(&nfsd_mutex);
-			break;
-		}
-		msleep(20);
-	}
-
 	return 0;
 }
 
@@ -1046,6 +987,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
 {
 	const struct svc_procedure *proc = rqstp->rq_procinfo;
 	__be32 *statp = rqstp->rq_accept_statp;
+	struct nfsd_cacherep *rp;
 
 	/*
 	 * Give the xdr decoder a chance to change this if it wants
@@ -1056,7 +998,8 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
 	if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
 		goto out_decode_err;
 
-	switch (nfsd_cache_lookup(rqstp)) {
+	rp = NULL;
+	switch (nfsd_cache_lookup(rqstp, &rp)) {
 	case RC_DOIT:
 		break;
 	case RC_REPLY:
@@ -1072,7 +1015,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
 	if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
 		goto out_encode_err;
 
-	nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
+	nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
 out_cached_reply:
 	return 1;
 
@@ -1082,13 +1025,13 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
 	return 1;
 
 out_update_drop:
-	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+	nfsd_cache_update(rqstp, rp, RC_NOCACHE, NULL);
 out_dropit:
 	return 0;
 
 out_encode_err:
 	trace_nfsd_cant_encode_err(rqstp);
-	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+	nfsd_cache_update(rqstp, rp, RC_NOCACHE, NULL);
 	*statp = rpc_system_err;
 	return 1;
 }
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d49d3060ed4f7..cbddcf484dbac 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -732,4 +732,7 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
 	cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
 	return clp->cl_state == NFSD4_EXPIRABLE;
 }
+
+extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
+				struct inode *inode);
 #endif   /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 777e24e5da33b..63797635e1c32 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -65,6 +65,8 @@ static int nfsd_show(struct seq_file *seq, void *v)
 		seq_printf(seq, " %lld",
 			   percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)]));
 	}
+	seq_printf(seq, "\nwdeleg_getattr %lld",
+		percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]));
 
 	seq_putc(seq, '\n');
 #endif
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index 9b43dc3d99913..cf5524e7ca062 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -22,6 +22,7 @@ enum {
 	NFSD_STATS_FIRST_NFS4_OP,	/* count of individual nfsv4 operations */
 	NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
 #define NFSD_STATS_NFS4_OP(op)	(NFSD_STATS_FIRST_NFS4_OP + (op))
+	NFSD_STATS_WDELEG_GETATTR,	/* count of getattr conflict with wdeleg */
 #endif
 	NFSD_STATS_COUNTERS_NUM
 };
@@ -93,4 +94,10 @@ static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount)
 	percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
 }
 
+#ifdef CONFIG_NFSD_V4
+static inline void nfsd_stats_wdeleg_getattr_inc(void)
+{
+	percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]);
+}
+#endif
 #endif /* _NFSD_STATS_H */
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 2af74983f1461..8039043488718 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -607,6 +607,7 @@ DEFINE_STATEID_EVENT(layout_recall_release);
 
 DEFINE_STATEID_EVENT(open);
 DEFINE_STATEID_EVENT(deleg_read);
+DEFINE_STATEID_EVENT(deleg_write);
 DEFINE_STATEID_EVENT(deleg_return);
 DEFINE_STATEID_EVENT(deleg_recall);
 
@@ -1240,8 +1241,8 @@ TRACE_EVENT(nfsd_drc_found,
 TRACE_EVENT(nfsd_drc_mismatch,
 	TP_PROTO(
 		const struct nfsd_net *nn,
-		const struct svc_cacherep *key,
-		const struct svc_cacherep *rp
+		const struct nfsd_cacherep *key,
+		const struct nfsd_cacherep *rp
 	),
 	TP_ARGS(nn, key, rp),
 	TP_STRUCT__entry(
@@ -1261,6 +1262,28 @@ TRACE_EVENT(nfsd_drc_mismatch,
 		__entry->ingress)
 );
 
+TRACE_EVENT_CONDITION(nfsd_drc_gc,
+	TP_PROTO(
+		const struct nfsd_net *nn,
+		unsigned long freed
+	),
+	TP_ARGS(nn, freed),
+	TP_CONDITION(freed > 0),
+	TP_STRUCT__entry(
+		__field(unsigned long long, boot_time)
+		__field(unsigned long, freed)
+		__field(int, total)
+	),
+	TP_fast_assign(
+		__entry->boot_time = nn->boot_time;
+		__entry->freed = freed;
+		__entry->total = atomic_read(&nn->num_drc_entries);
+	),
+	TP_printk("boot_time=%16llx total=%d freed=%lu",
+		__entry->boot_time, __entry->total, __entry->freed
+	)
+);
+
 TRACE_EVENT(nfsd_cb_args,
 	TP_PROTO(
 		const struct nfs4_client *clp,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9b7acba382fe1..48260cf68fde8 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1540,7 +1540,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	dput(dchild);
 	if (err)
 		goto out_unlock;
-	fh_fill_pre_attrs(fhp);
+	err = fh_fill_pre_attrs(fhp);
+	if (err != nfs_ok)
+		goto out_unlock;
 	err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp);
 	fh_fill_post_attrs(fhp);
 out_unlock:
@@ -1635,13 +1637,16 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		inode_unlock(dentry->d_inode);
 		goto out_drop_write;
 	}
-	fh_fill_pre_attrs(fhp);
+	err = fh_fill_pre_attrs(fhp);
+	if (err != nfs_ok)
+		goto out_unlock;
 	host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path);
 	err = nfserrno(host_err);
 	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
 	if (!err)
 		nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
 	fh_fill_post_attrs(fhp);
+out_unlock:
 	inode_unlock(dentry->d_inode);
 	if (!err)
 		err = nfserrno(commit_metadata(fhp));
@@ -1703,7 +1708,9 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 	err = nfserr_noent;
 	if (d_really_is_negative(dold))
 		goto out_dput;
-	fh_fill_pre_attrs(ffhp);
+	err = fh_fill_pre_attrs(ffhp);
+	if (err != nfs_ok)
+		goto out_dput;
 	host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL);
 	fh_fill_post_attrs(ffhp);
 	inode_unlock(dirp);
@@ -1789,8 +1796,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 	}
 
 	trap = lock_rename(tdentry, fdentry);
-	fh_fill_pre_attrs(ffhp);
-	fh_fill_pre_attrs(tfhp);
+	err = fh_fill_pre_attrs(ffhp);
+	if (err != nfs_ok)
+		goto out_unlock;
+	err = fh_fill_pre_attrs(tfhp);
+	if (err != nfs_ok)
+		goto out_unlock;
 
 	odentry = lookup_one_len(fname, fdentry, flen);
 	host_err = PTR_ERR(odentry);
@@ -1857,6 +1868,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 		fh_fill_post_attrs(ffhp);
 		fh_fill_post_attrs(tfhp);
 	}
+out_unlock:
 	unlock_rename(tdentry, fdentry);
 	fh_drop_write(ffhp);
 
@@ -1916,12 +1928,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 		goto out_unlock;
 	}
 	rinode = d_inode(rdentry);
-	ihold(rinode);
+	err = fh_fill_pre_attrs(fhp);
+	if (err != nfs_ok)
+		goto out_unlock;
 
+	ihold(rinode);
 	if (!type)
 		type = d_inode(rdentry)->i_mode & S_IFMT;
 
-	fh_fill_pre_attrs(fhp);
 	if (type != S_IFDIR) {
 		int retries;
 
@@ -2341,16 +2355,18 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
 		return nfserrno(ret);
 
 	inode_lock(fhp->fh_dentry->d_inode);
-	fh_fill_pre_attrs(fhp);
-
+	err = fh_fill_pre_attrs(fhp);
+	if (err != nfs_ok)
+		goto out_unlock;
 	ret = __vfs_removexattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
 				       name, NULL);
-
+	err = nfsd_xattr_errno(ret);
 	fh_fill_post_attrs(fhp);
+out_unlock:
 	inode_unlock(fhp->fh_dentry->d_inode);
 	fh_drop_write(fhp);
 
-	return nfsd_xattr_errno(ret);
+	return err;
 }
 
 __be32
@@ -2368,15 +2384,17 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
 	if (ret)
 		return nfserrno(ret);
 	inode_lock(fhp->fh_dentry->d_inode);
-	fh_fill_pre_attrs(fhp);
-
-	ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry, name, buf,
-				    len, flags, NULL);
+	err = fh_fill_pre_attrs(fhp);
+	if (err != nfs_ok)
+		goto out_unlock;
+	ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
+				    name, buf, len, flags, NULL);
 	fh_fill_post_attrs(fhp);
+	err = nfsd_xattr_errno(ret);
+out_unlock:
 	inode_unlock(fhp->fh_dentry->d_inode);
 	fh_drop_write(fhp);
-
-	return nfsd_xattr_errno(ret);
+	return err;
 }
 #endif
 
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 510978e602da6..9d918a79dc166 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -774,17 +774,6 @@ void warn_on_nonidempotent_op(struct nfsd4_op *op);
 
 #define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
 
-static inline void
-set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
-{
-	BUG_ON(!fhp->fh_pre_saved);
-	cinfo->atomic = (u32)(fhp->fh_post_saved && !fhp->fh_no_atomic_attr);
-
-	cinfo->before_change = fhp->fh_pre_change;
-	cinfo->after_change = fhp->fh_post_change;
-}
-
-
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
 bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index e8e7d47265aa9..ce215565d061e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -908,9 +908,9 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
 
 	/* call the kernels journal init function now */
 	j_journal = jbd2_journal_init_inode(inode);
-	if (j_journal == NULL) {
+	if (IS_ERR(j_journal)) {
 		mlog(ML_ERROR, "Linux journal layer error\n");
-		status = -EINVAL;
+		status = PTR_ERR(j_journal);
 		goto done;
 	}
 
@@ -1684,9 +1684,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 	}
 
 	journal = jbd2_journal_init_inode(inode);
-	if (journal == NULL) {
+	if (IS_ERR(journal)) {
 		mlog(ML_ERROR, "Linux journal layer error\n");
-		status = -EIO;
+		status = PTR_ERR(journal);
 		goto done;
 	}
 
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 05d4414d0c33f..9b76ee66aeb2f 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -738,18 +738,11 @@ static int user_plock(struct ocfs2_cluster_connection *conn,
 	 *
 	 * Internally, fs/dlm will pass these to a misc device, which
 	 * a userspace daemon will read and write to.
-	 *
-	 * For now, cancel requests (which happen internally only),
-	 * are turned into unlocks. Most of this function taken from
-	 * gfs2_lock.
 	 */
 
-	if (cmd == F_CANCELLK) {
-		cmd = F_SETLK;
-		fl->fl_type = F_UNLCK;
-	}
-
-	if (IS_GETLK(cmd))
+	if (cmd == F_CANCELLK)
+		return dlm_posix_cancel(conn->cc_lockspace, ino, file, fl);
+	else if (IS_GETLK(cmd))
 		return dlm_posix_get(conn->cc_lockspace, ino, file, fl);
 	else if (fl->fl_type == F_UNLCK)
 		return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl);
diff --git a/fs/smb/server/asn1.c b/fs/smb/server/asn1.c
index cc6384f796759..4a4b2b03ff33d 100644
--- a/fs/smb/server/asn1.c
+++ b/fs/smb/server/asn1.c
@@ -214,12 +214,10 @@ static int ksmbd_neg_token_alloc(void *context, size_t hdrlen,
 {
 	struct ksmbd_conn *conn = context;
 
-	conn->mechToken = kmalloc(vlen + 1, GFP_KERNEL);
+	conn->mechToken = kmemdup_nul(value, vlen, GFP_KERNEL);
 	if (!conn->mechToken)
 		return -ENOMEM;
 
-	memcpy(conn->mechToken, value, vlen);
-	conn->mechToken[vlen] = '\0';
 	return 0;
 }
 
diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c
index 5e5e120edcc22..229a6527870d0 100644
--- a/fs/smb/server/auth.c
+++ b/fs/smb/server/auth.c
@@ -355,6 +355,9 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
 		if (blob_len < (u64)sess_key_off + sess_key_len)
 			return -EINVAL;
 
+		if (sess_key_len > CIFS_KEY_SIZE)
+			return -EINVAL;
+
 		ctx_arc4 = kmalloc(sizeof(*ctx_arc4), GFP_KERNEL);
 		if (!ctx_arc4)
 			return -ENOMEM;
@@ -1029,11 +1032,15 @@ static struct scatterlist *ksmbd_init_sg(struct kvec *iov, unsigned int nvec,
 {
 	struct scatterlist *sg;
 	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
-	int i, nr_entries[3] = {0}, total_entries = 0, sg_idx = 0;
+	int i, *nr_entries, total_entries = 0, sg_idx = 0;
 
 	if (!nvec)
 		return NULL;
 
+	nr_entries = kcalloc(nvec, sizeof(int), GFP_KERNEL);
+	if (!nr_entries)
+		return NULL;
+
 	for (i = 0; i < nvec - 1; i++) {
 		unsigned long kaddr = (unsigned long)iov[i + 1].iov_base;
 
@@ -1051,8 +1058,10 @@ static struct scatterlist *ksmbd_init_sg(struct kvec *iov, unsigned int nvec,
 	total_entries += 2;
 
 	sg = kmalloc_array(total_entries, sizeof(struct scatterlist), GFP_KERNEL);
-	if (!sg)
+	if (!sg) {
+		kfree(nr_entries);
 		return NULL;
+	}
 
 	sg_init_table(sg, total_entries);
 	smb2_sg_set_buf(&sg[sg_idx++], iov[0].iov_base + 24, assoc_data_len);
@@ -1086,6 +1095,7 @@ static struct scatterlist *ksmbd_init_sg(struct kvec *iov, unsigned int nvec,
 		}
 	}
 	smb2_sg_set_buf(&sg[sg_idx], sign, SMB2_SIGNATURE_SIZE);
+	kfree(nr_entries);
 	return sg;
 }
 
diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
index 2a717d158f02e..0d990c2f33cda 100644
--- a/fs/smb/server/connection.c
+++ b/fs/smb/server/connection.c
@@ -123,28 +123,22 @@ void ksmbd_conn_enqueue_request(struct ksmbd_work *work)
 	}
 }
 
-int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work)
+void ksmbd_conn_try_dequeue_request(struct ksmbd_work *work)
 {
 	struct ksmbd_conn *conn = work->conn;
-	int ret = 1;
 
 	if (list_empty(&work->request_entry) &&
 	    list_empty(&work->async_request_entry))
-		return 0;
+		return;
 
-	if (!work->multiRsp)
-		atomic_dec(&conn->req_running);
-	if (!work->multiRsp) {
-		spin_lock(&conn->request_lock);
-		list_del_init(&work->request_entry);
-		spin_unlock(&conn->request_lock);
-		if (work->asynchronous)
-			release_async_work(work);
-		ret = 0;
-	}
+	atomic_dec(&conn->req_running);
+	spin_lock(&conn->request_lock);
+	list_del_init(&work->request_entry);
+	spin_unlock(&conn->request_lock);
+	if (work->asynchronous)
+		release_async_work(work);
 
 	wake_up_all(&conn->req_running_q);
-	return ret;
 }
 
 void ksmbd_conn_lock(struct ksmbd_conn *conn)
@@ -193,41 +187,22 @@ void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id)
 int ksmbd_conn_write(struct ksmbd_work *work)
 {
 	struct ksmbd_conn *conn = work->conn;
-	size_t len = 0;
 	int sent;
-	struct kvec iov[3];
-	int iov_idx = 0;
 
 	if (!work->response_buf) {
 		pr_err("NULL response header\n");
 		return -EINVAL;
 	}
 
-	if (work->tr_buf) {
-		iov[iov_idx] = (struct kvec) { work->tr_buf,
-				sizeof(struct smb2_transform_hdr) + 4 };
-		len += iov[iov_idx++].iov_len;
-	}
-
-	if (work->aux_payload_sz) {
-		iov[iov_idx] = (struct kvec) { work->response_buf, work->resp_hdr_sz };
-		len += iov[iov_idx++].iov_len;
-		iov[iov_idx] = (struct kvec) { work->aux_payload_buf, work->aux_payload_sz };
-		len += iov[iov_idx++].iov_len;
-	} else {
-		if (work->tr_buf)
-			iov[iov_idx].iov_len = work->resp_hdr_sz;
-		else
-			iov[iov_idx].iov_len = get_rfc1002_len(work->response_buf) + 4;
-		iov[iov_idx].iov_base = work->response_buf;
-		len += iov[iov_idx++].iov_len;
-	}
+	if (work->send_no_response)
+		return 0;
 
 	ksmbd_conn_lock(conn);
-	sent = conn->transport->ops->writev(conn->transport, &iov[0],
-					iov_idx, len,
-					work->need_invalidate_rkey,
-					work->remote_key);
+	sent = conn->transport->ops->writev(conn->transport, work->iov,
+			work->iov_cnt,
+			get_rfc1002_len(work->iov[0].iov_base) + 4,
+			work->need_invalidate_rkey,
+			work->remote_key);
 	ksmbd_conn_unlock(conn);
 
 	if (sent < 0) {
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index ad8dfaa48ffb3..ab2583f030ceb 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -158,7 +158,7 @@ int ksmbd_conn_rdma_write(struct ksmbd_conn *conn,
 			  struct smb2_buffer_desc_v1 *desc,
 			  unsigned int desc_len);
 void ksmbd_conn_enqueue_request(struct ksmbd_work *work);
-int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work);
+void ksmbd_conn_try_dequeue_request(struct ksmbd_work *work);
 void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops);
 int ksmbd_conn_handler_loop(void *p);
 int ksmbd_conn_transport_init(void);
diff --git a/fs/smb/server/ksmbd_work.c b/fs/smb/server/ksmbd_work.c
index 14b9caebf7a4f..51def3ca74c01 100644
--- a/fs/smb/server/ksmbd_work.c
+++ b/fs/smb/server/ksmbd_work.c
@@ -27,18 +27,35 @@ struct ksmbd_work *ksmbd_alloc_work_struct(void)
 		INIT_LIST_HEAD(&work->async_request_entry);
 		INIT_LIST_HEAD(&work->fp_entry);
 		INIT_LIST_HEAD(&work->interim_entry);
+		INIT_LIST_HEAD(&work->aux_read_list);
+		work->iov_alloc_cnt = 4;
+		work->iov = kcalloc(work->iov_alloc_cnt, sizeof(struct kvec),
+				    GFP_KERNEL);
+		if (!work->iov) {
+			kmem_cache_free(work_cache, work);
+			work = NULL;
+		}
 	}
 	return work;
 }
 
 void ksmbd_free_work_struct(struct ksmbd_work *work)
 {
+	struct aux_read *ar, *tmp;
+
 	WARN_ON(work->saved_cred != NULL);
 
 	kvfree(work->response_buf);
-	kvfree(work->aux_payload_buf);
+
+	list_for_each_entry_safe(ar, tmp, &work->aux_read_list, entry) {
+		kvfree(ar->buf);
+		list_del(&ar->entry);
+		kfree(ar);
+	}
+
 	kfree(work->tr_buf);
 	kvfree(work->request_buf);
+	kfree(work->iov);
 	if (work->async_id)
 		ksmbd_release_id(&work->conn->async_ida, work->async_id);
 	kmem_cache_free(work_cache, work);
@@ -77,3 +94,77 @@ bool ksmbd_queue_work(struct ksmbd_work *work)
 {
 	return queue_work(ksmbd_wq, &work->work);
 }
+
+static int ksmbd_realloc_iov_pin(struct ksmbd_work *work, void *ib,
+				 unsigned int ib_len)
+{
+
+	if (work->iov_alloc_cnt <= work->iov_cnt) {
+		struct kvec *new;
+
+		work->iov_alloc_cnt += 4;
+		new = krealloc(work->iov,
+			       sizeof(struct kvec) * work->iov_alloc_cnt,
+			       GFP_KERNEL | __GFP_ZERO);
+		if (!new)
+			return -ENOMEM;
+		work->iov = new;
+	}
+
+	work->iov[++work->iov_idx].iov_base = ib;
+	work->iov[work->iov_idx].iov_len = ib_len;
+	work->iov_cnt++;
+
+	return 0;
+}
+
+static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len,
+			       void *aux_buf, unsigned int aux_size)
+{
+	/* Plus rfc_length size on first iov */
+	if (!work->iov_idx) {
+		work->iov[work->iov_idx].iov_base = work->response_buf;
+		*(__be32 *)work->iov[0].iov_base = 0;
+		work->iov[work->iov_idx].iov_len = 4;
+		work->iov_cnt++;
+	}
+
+	ksmbd_realloc_iov_pin(work, ib, len);
+	inc_rfc1001_len(work->iov[0].iov_base, len);
+
+	if (aux_size) {
+		struct aux_read *ar;
+
+		ksmbd_realloc_iov_pin(work, aux_buf, aux_size);
+		inc_rfc1001_len(work->iov[0].iov_base, aux_size);
+
+		ar = kmalloc(sizeof(struct aux_read), GFP_KERNEL);
+		if (!ar)
+			return -ENOMEM;
+
+		ar->buf = aux_buf;
+		list_add(&ar->entry, &work->aux_read_list);
+	}
+
+	return 0;
+}
+
+int ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len)
+{
+	return __ksmbd_iov_pin_rsp(work, ib, len, NULL, 0);
+}
+
+int ksmbd_iov_pin_rsp_read(struct ksmbd_work *work, void *ib, int len,
+			   void *aux_buf, unsigned int aux_size)
+{
+	return __ksmbd_iov_pin_rsp(work, ib, len, aux_buf, aux_size);
+}
+
+int allocate_interim_rsp_buf(struct ksmbd_work *work)
+{
+	work->response_buf = kzalloc(MAX_CIFS_SMALL_BUFFER_SIZE, GFP_KERNEL);
+	if (!work->response_buf)
+		return -ENOMEM;
+	work->response_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
+	return 0;
+}
diff --git a/fs/smb/server/ksmbd_work.h b/fs/smb/server/ksmbd_work.h
index f8ae6144c0aea..8ca2c813246e6 100644
--- a/fs/smb/server/ksmbd_work.h
+++ b/fs/smb/server/ksmbd_work.h
@@ -19,6 +19,11 @@ enum {
 	KSMBD_WORK_CLOSED,
 };
 
+struct aux_read {
+	void *buf;
+	struct list_head entry;
+};
+
 /* one of these for every pending CIFS request at the connection */
 struct ksmbd_work {
 	/* Server corresponding to this mid */
@@ -31,13 +36,19 @@ struct ksmbd_work {
 	/* Response buffer */
 	void                            *response_buf;
 
-	/* Read data buffer */
-	void                            *aux_payload_buf;
+	struct list_head		aux_read_list;
+
+	struct kvec			*iov;
+	int				iov_alloc_cnt;
+	int				iov_cnt;
+	int				iov_idx;
 
 	/* Next cmd hdr in compound req buf*/
 	int                             next_smb2_rcv_hdr_off;
 	/* Next cmd hdr in compound rsp buf*/
 	int                             next_smb2_rsp_hdr_off;
+	/* Current cmd hdr in compound rsp buf*/
+	int                             curr_smb2_rsp_hdr_off;
 
 	/*
 	 * Current Local FID assigned compound response if SMB2 CREATE
@@ -53,16 +64,11 @@ struct ksmbd_work {
 	unsigned int			credits_granted;
 
 	/* response smb header size */
-	unsigned int                    resp_hdr_sz;
 	unsigned int                    response_sz;
-	/* Read data count */
-	unsigned int                    aux_payload_sz;
 
 	void				*tr_buf;
 
 	unsigned char			state;
-	/* Multiple responses for one request e.g. SMB ECHO */
-	bool                            multiRsp:1;
 	/* No response for cancelled request */
 	bool                            send_no_response:1;
 	/* Request is encrypted */
@@ -95,6 +101,15 @@ static inline void *ksmbd_resp_buf_next(struct ksmbd_work *work)
 	return work->response_buf + work->next_smb2_rsp_hdr_off + 4;
 }
 
+/**
+ * ksmbd_resp_buf_curr - Get current buffer on compound response.
+ * @work: smb work containing response buffer
+ */
+static inline void *ksmbd_resp_buf_curr(struct ksmbd_work *work)
+{
+	return work->response_buf + work->curr_smb2_rsp_hdr_off + 4;
+}
+
 /**
  * ksmbd_req_buf_next - Get next buffer on compound request.
  * @work: smb work containing response buffer
@@ -113,5 +128,8 @@ int ksmbd_work_pool_init(void);
 int ksmbd_workqueue_init(void);
 void ksmbd_workqueue_destroy(void);
 bool ksmbd_queue_work(struct ksmbd_work *work);
-
+int ksmbd_iov_pin_rsp_read(struct ksmbd_work *work, void *ib, int len,
+			   void *aux_buf, unsigned int aux_size);
+int ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len);
+int allocate_interim_rsp_buf(struct ksmbd_work *work);
 #endif /* __KSMBD_WORK_H__ */
diff --git a/fs/smb/server/mgmt/share_config.h b/fs/smb/server/mgmt/share_config.h
index 3fd3382939421..5f591751b9236 100644
--- a/fs/smb/server/mgmt/share_config.h
+++ b/fs/smb/server/mgmt/share_config.h
@@ -34,29 +34,22 @@ struct ksmbd_share_config {
 #define KSMBD_SHARE_INVALID_UID	((__u16)-1)
 #define KSMBD_SHARE_INVALID_GID	((__u16)-1)
 
-static inline int share_config_create_mode(struct ksmbd_share_config *share,
-					   umode_t posix_mode)
+static inline umode_t
+share_config_create_mode(struct ksmbd_share_config *share,
+			 umode_t posix_mode)
 {
-	if (!share->force_create_mode) {
-		if (!posix_mode)
-			return share->create_mask;
-		else
-			return posix_mode & share->create_mask;
-	}
-	return share->force_create_mode & share->create_mask;
+	umode_t mode = (posix_mode ?: (umode_t)-1) & share->create_mask;
+
+	return mode | share->force_create_mode;
 }
 
-static inline int share_config_directory_mode(struct ksmbd_share_config *share,
-					      umode_t posix_mode)
+static inline umode_t
+share_config_directory_mode(struct ksmbd_share_config *share,
+			    umode_t posix_mode)
 {
-	if (!share->force_directory_mode) {
-		if (!posix_mode)
-			return share->directory_mask;
-		else
-			return posix_mode & share->directory_mask;
-	}
+	umode_t mode = (posix_mode ?: (umode_t)-1) & share->directory_mask;
 
-	return share->force_directory_mode & share->directory_mask;
+	return mode | share->force_directory_mode;
 }
 
 static inline int test_share_config_flag(struct ksmbd_share_config *share,
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 844b303baf293..9bc0103720f57 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -616,15 +616,6 @@ static int oplock_break_pending(struct oplock_info *opinfo, int req_op_level)
 	return 0;
 }
 
-static inline int allocate_oplock_break_buf(struct ksmbd_work *work)
-{
-	work->response_buf = kzalloc(MAX_CIFS_SMALL_BUFFER_SIZE, GFP_KERNEL);
-	if (!work->response_buf)
-		return -ENOMEM;
-	work->response_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
-	return 0;
-}
-
 /**
  * __smb2_oplock_break_noti() - send smb2 oplock break cmd from conn
  * to client
@@ -639,7 +630,6 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
 {
 	struct smb2_oplock_break *rsp = NULL;
 	struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
-	struct ksmbd_conn *conn = work->conn;
 	struct oplock_break_info *br_info = work->request_buf;
 	struct smb2_hdr *rsp_hdr;
 	struct ksmbd_file *fp;
@@ -648,7 +638,7 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
 	if (!fp)
 		goto out;
 
-	if (allocate_oplock_break_buf(work)) {
+	if (allocate_interim_rsp_buf(work)) {
 		pr_err("smb2_allocate_rsp_buf failed! ");
 		ksmbd_fd_put(work, fp);
 		goto out;
@@ -656,8 +646,6 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
 
 	rsp_hdr = smb2_get_msg(work->response_buf);
 	memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
-	*(__be32 *)work->response_buf =
-		cpu_to_be32(conn->vals->header_size);
 	rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
 	rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
 	rsp_hdr->CreditRequest = cpu_to_le16(0);
@@ -684,13 +672,15 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
 	rsp->PersistentFid = fp->persistent_id;
 	rsp->VolatileFid = fp->volatile_id;
 
-	inc_rfc1001_len(work->response_buf, 24);
+	ksmbd_fd_put(work, fp);
+	if (ksmbd_iov_pin_rsp(work, (void *)rsp,
+			      sizeof(struct smb2_oplock_break)))
+		goto out;
 
 	ksmbd_debug(OPLOCK,
 		    "sending oplock break v_id %llu p_id = %llu lock level = %d\n",
 		    rsp->VolatileFid, rsp->PersistentFid, rsp->OplockLevel);
 
-	ksmbd_fd_put(work, fp);
 	ksmbd_conn_write(work);
 
 out:
@@ -751,18 +741,15 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
 	struct smb2_lease_break *rsp = NULL;
 	struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
 	struct lease_break_info *br_info = work->request_buf;
-	struct ksmbd_conn *conn = work->conn;
 	struct smb2_hdr *rsp_hdr;
 
-	if (allocate_oplock_break_buf(work)) {
+	if (allocate_interim_rsp_buf(work)) {
 		ksmbd_debug(OPLOCK, "smb2_allocate_rsp_buf failed! ");
 		goto out;
 	}
 
 	rsp_hdr = smb2_get_msg(work->response_buf);
 	memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
-	*(__be32 *)work->response_buf =
-		cpu_to_be32(conn->vals->header_size);
 	rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
 	rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
 	rsp_hdr->CreditRequest = cpu_to_le16(0);
@@ -791,7 +778,9 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
 	rsp->AccessMaskHint = 0;
 	rsp->ShareMaskHint = 0;
 
-	inc_rfc1001_len(work->response_buf, 44);
+	if (ksmbd_iov_pin_rsp(work, (void *)rsp,
+			      sizeof(struct smb2_lease_break)))
+		goto out;
 
 	ksmbd_conn_write(work);
 
@@ -1492,7 +1481,7 @@ struct create_context *smb2_find_context_vals(void *open_req, const char *tag, i
 		    name_len < 4 ||
 		    name_off + name_len > cc_len ||
 		    (value_off & 0x7) != 0 ||
-		    (value_off && (value_off < name_off + name_len)) ||
+		    (value_len && value_off < name_off + (name_len < 8 ? 8 : name_len)) ||
 		    ((u64)value_off + value_len > cc_len))
 			return ERR_PTR(-EINVAL);
 
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index 9df121bdf3492..801cd0929209c 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -163,6 +163,7 @@ static void __handle_ksmbd_work(struct ksmbd_work *work,
 {
 	u16 command = 0;
 	int rc;
+	bool is_chained = false;
 
 	if (conn->ops->allocate_rsp_buf(work))
 		return;
@@ -229,14 +230,13 @@ static void __handle_ksmbd_work(struct ksmbd_work *work,
 			}
 		}
 
+		is_chained = is_chained_smb2_message(work);
+
 		if (work->sess &&
 		    (work->sess->sign || smb3_11_final_sess_setup_resp(work) ||
 		     conn->ops->is_sign_req(work, command)))
 			conn->ops->set_sign_rsp(work);
-	} while (is_chained_smb2_message(work));
-
-	if (work->send_no_response)
-		return;
+	} while (is_chained == true);
 
 send:
 	smb3_preauth_hash_rsp(work);
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index a947c18915c2e..749660110878f 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -145,12 +145,18 @@ void smb2_set_err_rsp(struct ksmbd_work *work)
 		err_rsp = smb2_get_msg(work->response_buf);
 
 	if (err_rsp->hdr.Status != STATUS_STOPPED_ON_SYMLINK) {
+		int err;
+
 		err_rsp->StructureSize = SMB2_ERROR_STRUCTURE_SIZE2_LE;
 		err_rsp->ErrorContextCount = 0;
 		err_rsp->Reserved = 0;
 		err_rsp->ByteCount = 0;
 		err_rsp->ErrorData[0] = 0;
-		inc_rfc1001_len(work->response_buf, SMB2_ERROR_STRUCTURE_SIZE2);
+		err = ksmbd_iov_pin_rsp(work, (void *)err_rsp,
+					__SMB2_HEADER_STRUCTURE_SIZE +
+						SMB2_ERROR_STRUCTURE_SIZE2);
+		if (err)
+			work->send_no_response = 1;
 	}
 }
 
@@ -245,9 +251,7 @@ int init_smb2_neg_rsp(struct ksmbd_work *work)
 	struct smb2_hdr *rsp_hdr;
 	struct smb2_negotiate_rsp *rsp;
 	struct ksmbd_conn *conn = work->conn;
-
-	*(__be32 *)work->response_buf =
-		cpu_to_be32(conn->vals->header_size);
+	int err;
 
 	rsp_hdr = smb2_get_msg(work->response_buf);
 	memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
@@ -286,12 +290,13 @@ int init_smb2_neg_rsp(struct ksmbd_work *work)
 	rsp->SecurityBufferLength = cpu_to_le16(AUTH_GSS_LENGTH);
 	ksmbd_copy_gss_neg_header((char *)(&rsp->hdr) +
 		le16_to_cpu(rsp->SecurityBufferOffset));
-	inc_rfc1001_len(work->response_buf,
-			sizeof(struct smb2_negotiate_rsp) -
-			sizeof(struct smb2_hdr) + AUTH_GSS_LENGTH);
 	rsp->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED_LE;
 	if (server_conf.signing == KSMBD_CONFIG_OPT_MANDATORY)
 		rsp->SecurityMode |= SMB2_NEGOTIATE_SIGNING_REQUIRED_LE;
+	err = ksmbd_iov_pin_rsp(work, rsp,
+				sizeof(struct smb2_negotiate_rsp) + AUTH_GSS_LENGTH);
+	if (err)
+		return err;
 	conn->use_spnego = true;
 
 	ksmbd_conn_set_need_negotiate(conn);
@@ -390,11 +395,12 @@ static void init_chained_smb2_rsp(struct ksmbd_work *work)
 	next_hdr_offset = le32_to_cpu(req->NextCommand);
 
 	new_len = ALIGN(len, 8);
-	inc_rfc1001_len(work->response_buf,
-			sizeof(struct smb2_hdr) + new_len - len);
+	work->iov[work->iov_idx].iov_len += (new_len - len);
+	inc_rfc1001_len(work->response_buf, new_len - len);
 	rsp->NextCommand = cpu_to_le32(new_len);
 
 	work->next_smb2_rcv_hdr_off += next_hdr_offset;
+	work->curr_smb2_rsp_hdr_off = work->next_smb2_rsp_hdr_off;
 	work->next_smb2_rsp_hdr_off += new_len;
 	ksmbd_debug(SMB,
 		    "Compound req new_len = %d rcv off = %d rsp off = %d\n",
@@ -470,10 +476,10 @@ bool is_chained_smb2_message(struct ksmbd_work *work)
 		len = len - get_rfc1002_len(work->response_buf);
 		if (len) {
 			ksmbd_debug(SMB, "padding len %u\n", len);
+			work->iov[work->iov_idx].iov_len += len;
 			inc_rfc1001_len(work->response_buf, len);
-			if (work->aux_payload_sz)
-				work->aux_payload_sz += len;
 		}
+		work->curr_smb2_rsp_hdr_off = work->next_smb2_rsp_hdr_off;
 	}
 	return false;
 }
@@ -488,11 +494,8 @@ int init_smb2_rsp_hdr(struct ksmbd_work *work)
 {
 	struct smb2_hdr *rsp_hdr = smb2_get_msg(work->response_buf);
 	struct smb2_hdr *rcv_hdr = smb2_get_msg(work->request_buf);
-	struct ksmbd_conn *conn = work->conn;
 
 	memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
-	*(__be32 *)work->response_buf =
-		cpu_to_be32(conn->vals->header_size);
 	rsp_hdr->ProtocolId = rcv_hdr->ProtocolId;
 	rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
 	rsp_hdr->Command = rcv_hdr->Command;
@@ -657,7 +660,7 @@ int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
 	struct ksmbd_conn *conn = work->conn;
 	int id;
 
-	rsp_hdr = smb2_get_msg(work->response_buf);
+	rsp_hdr = ksmbd_resp_buf_next(work);
 	rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND;
 
 	id = ksmbd_acquire_async_msg_id(&conn->async_ida);
@@ -706,15 +709,24 @@ void release_async_work(struct ksmbd_work *work)
 void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status)
 {
 	struct smb2_hdr *rsp_hdr;
+	struct ksmbd_work *in_work = ksmbd_alloc_work_struct();
 
-	rsp_hdr = smb2_get_msg(work->response_buf);
-	smb2_set_err_rsp(work);
+	if (allocate_interim_rsp_buf(in_work)) {
+		pr_err("smb_allocate_rsp_buf failed!\n");
+		ksmbd_free_work_struct(in_work);
+		return;
+	}
+
+	in_work->conn = work->conn;
+	memcpy(smb2_get_msg(in_work->response_buf), ksmbd_resp_buf_next(work),
+	       __SMB2_HEADER_STRUCTURE_SIZE);
+
+	rsp_hdr = smb2_get_msg(in_work->response_buf);
+	smb2_set_err_rsp(in_work);
 	rsp_hdr->Status = status;
 
-	work->multiRsp = 1;
-	ksmbd_conn_write(work);
-	rsp_hdr->Status = 0;
-	work->multiRsp = 0;
+	ksmbd_conn_write(in_work);
+	ksmbd_free_work_struct(in_work);
 }
 
 static __le32 smb2_get_reparse_tag_special_file(umode_t mode)
@@ -821,9 +833,8 @@ static void build_posix_ctxt(struct smb2_posix_neg_context *pneg_ctxt)
 	pneg_ctxt->Name[15] = 0x7C;
 }
 
-static void assemble_neg_contexts(struct ksmbd_conn *conn,
-				  struct smb2_negotiate_rsp *rsp,
-				  void *smb2_buf_len)
+static unsigned int assemble_neg_contexts(struct ksmbd_conn *conn,
+				  struct smb2_negotiate_rsp *rsp)
 {
 	char * const pneg_ctxt = (char *)rsp +
 			le32_to_cpu(rsp->NegotiateContextOffset);
@@ -834,7 +845,6 @@ static void assemble_neg_contexts(struct ksmbd_conn *conn,
 		    "assemble SMB2_PREAUTH_INTEGRITY_CAPABILITIES context\n");
 	build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt,
 			   conn->preauth_info->Preauth_HashId);
-	inc_rfc1001_len(smb2_buf_len, AUTH_GSS_PADDING);
 	ctxt_size = sizeof(struct smb2_preauth_neg_context);
 
 	if (conn->cipher_type) {
@@ -874,7 +884,7 @@ static void assemble_neg_contexts(struct ksmbd_conn *conn,
 	}
 
 	rsp->NegotiateContextCount = cpu_to_le16(neg_ctxt_cnt);
-	inc_rfc1001_len(smb2_buf_len, ctxt_size);
+	return ctxt_size + AUTH_GSS_PADDING;
 }
 
 static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn,
@@ -1090,7 +1100,7 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
 	struct smb2_negotiate_req *req = smb2_get_msg(work->request_buf);
 	struct smb2_negotiate_rsp *rsp = smb2_get_msg(work->response_buf);
 	int rc = 0;
-	unsigned int smb2_buf_len, smb2_neg_size;
+	unsigned int smb2_buf_len, smb2_neg_size, neg_ctxt_len = 0;
 	__le32 status;
 
 	ksmbd_debug(SMB, "Received negotiate request\n");
@@ -1183,7 +1193,7 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
 						 conn->preauth_info->Preauth_HashValue);
 		rsp->NegotiateContextOffset =
 				cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
-		assemble_neg_contexts(conn, rsp, work->response_buf);
+		neg_ctxt_len = assemble_neg_contexts(conn, rsp);
 		break;
 	case SMB302_PROT_ID:
 		init_smb3_02_server(conn);
@@ -1233,8 +1243,7 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
 	rsp->SecurityBufferLength = cpu_to_le16(AUTH_GSS_LENGTH);
 	ksmbd_copy_gss_neg_header((char *)(&rsp->hdr) +
 				  le16_to_cpu(rsp->SecurityBufferOffset));
-	inc_rfc1001_len(work->response_buf, sizeof(struct smb2_negotiate_rsp) -
-			sizeof(struct smb2_hdr) + AUTH_GSS_LENGTH);
+
 	rsp->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED_LE;
 	conn->use_spnego = true;
 
@@ -1252,9 +1261,15 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
 	ksmbd_conn_set_need_negotiate(conn);
 
 err_out:
+	if (rc)
+		rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
+
+	if (!rc)
+		rc = ksmbd_iov_pin_rsp(work, rsp,
+				       sizeof(struct smb2_negotiate_rsp) +
+					AUTH_GSS_LENGTH + neg_ctxt_len);
 	if (rc < 0)
 		smb2_set_err_rsp(work);
-
 	return rc;
 }
 
@@ -1454,7 +1469,6 @@ static int ntlm_authenticate(struct ksmbd_work *work,
 		memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len);
 		rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len);
 		kfree(spnego_blob);
-		inc_rfc1001_len(work->response_buf, spnego_blob_len - 1);
 	}
 
 	user = session_user(conn, req);
@@ -1600,7 +1614,6 @@ static int krb5_authenticate(struct ksmbd_work *work,
 		return -EINVAL;
 	}
 	rsp->SecurityBufferLength = cpu_to_le16(out_len);
-	inc_rfc1001_len(work->response_buf, out_len - 1);
 
 	if ((conn->sign || server_conf.enforced_signing) ||
 	    (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
@@ -1672,7 +1685,6 @@ int smb2_sess_setup(struct ksmbd_work *work)
 	rsp->SessionFlags = 0;
 	rsp->SecurityBufferOffset = cpu_to_le16(72);
 	rsp->SecurityBufferLength = 0;
-	inc_rfc1001_len(work->response_buf, 9);
 
 	ksmbd_conn_lock(conn);
 	if (!req->hdr.SessionId) {
@@ -1808,13 +1820,6 @@ int smb2_sess_setup(struct ksmbd_work *work)
 					goto out_err;
 				rsp->hdr.Status =
 					STATUS_MORE_PROCESSING_REQUIRED;
-				/*
-				 * Note: here total size -1 is done as an
-				 * adjustment for 0 size blob
-				 */
-				inc_rfc1001_len(work->response_buf,
-						le16_to_cpu(rsp->SecurityBufferLength) - 1);
-
 			} else if (negblob->MessageType == NtLmAuthenticate) {
 				rc = ntlm_authenticate(work, req, rsp);
 				if (rc)
@@ -1899,6 +1904,18 @@ int smb2_sess_setup(struct ksmbd_work *work)
 				ksmbd_conn_set_need_negotiate(conn);
 			}
 		}
+		smb2_set_err_rsp(work);
+	} else {
+		unsigned int iov_len;
+
+		if (rsp->SecurityBufferLength)
+			iov_len = offsetof(struct smb2_sess_setup_rsp, Buffer) +
+				le16_to_cpu(rsp->SecurityBufferLength);
+		else
+			iov_len = sizeof(struct smb2_sess_setup_rsp);
+		rc = ksmbd_iov_pin_rsp(work, rsp, iov_len);
+		if (rc)
+			rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
 	}
 
 	ksmbd_conn_unlock(conn);
@@ -1977,13 +1994,16 @@ int smb2_tree_connect(struct ksmbd_work *work)
 		status.tree_conn->posix_extensions = true;
 
 	rsp->StructureSize = cpu_to_le16(16);
-	inc_rfc1001_len(work->response_buf, 16);
 out_err1:
 	rsp->Capabilities = 0;
 	rsp->Reserved = 0;
 	/* default manual caching */
 	rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING;
 
+	rc = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_tree_connect_rsp));
+	if (rc)
+		status.ret = KSMBD_TREE_CONN_STATUS_NOMEM;
+
 	if (!IS_ERR(treename))
 		kfree(treename);
 	if (!IS_ERR(name))
@@ -2096,20 +2116,27 @@ int smb2_tree_disconnect(struct ksmbd_work *work)
 	struct smb2_tree_disconnect_req *req;
 	struct ksmbd_session *sess = work->sess;
 	struct ksmbd_tree_connect *tcon = work->tcon;
+	int err;
 
 	WORK_BUFFERS(work, req, rsp);
 
-	rsp->StructureSize = cpu_to_le16(4);
-	inc_rfc1001_len(work->response_buf, 4);
-
 	ksmbd_debug(SMB, "request\n");
 
+	rsp->StructureSize = cpu_to_le16(4);
+	err = ksmbd_iov_pin_rsp(work, rsp,
+				sizeof(struct smb2_tree_disconnect_rsp));
+	if (err) {
+		rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
+		smb2_set_err_rsp(work);
+		return err;
+	}
+
 	if (!tcon || test_and_set_bit(TREE_CONN_EXPIRE, &tcon->status)) {
 		ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId);
 
 		rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
 		smb2_set_err_rsp(work);
-		return 0;
+		return -ENOENT;
 	}
 
 	ksmbd_close_tree_conn_fds(work);
@@ -2131,15 +2158,21 @@ int smb2_session_logoff(struct ksmbd_work *work)
 	struct smb2_logoff_rsp *rsp;
 	struct ksmbd_session *sess;
 	u64 sess_id;
+	int err;
 
 	WORK_BUFFERS(work, req, rsp);
 
+	ksmbd_debug(SMB, "request\n");
+
 	sess_id = le64_to_cpu(req->hdr.SessionId);
 
 	rsp->StructureSize = cpu_to_le16(4);
-	inc_rfc1001_len(work->response_buf, 4);
-
-	ksmbd_debug(SMB, "request\n");
+	err = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_logoff_rsp));
+	if (err) {
+		rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
+		smb2_set_err_rsp(work);
+		return err;
+	}
 
 	ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_RECONNECT);
 	ksmbd_close_session_fds(work);
@@ -2154,7 +2187,7 @@ int smb2_session_logoff(struct ksmbd_work *work)
 		ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId);
 		rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
 		smb2_set_err_rsp(work);
-		return 0;
+		return -ENOENT;
 	}
 
 	ksmbd_destroy_file_table(&sess->file_table);
@@ -2215,7 +2248,10 @@ static noinline int create_smb2_pipe(struct ksmbd_work *work)
 	rsp->CreateContextsOffset = 0;
 	rsp->CreateContextsLength = 0;
 
-	inc_rfc1001_len(work->response_buf, 88); /* StructureSize - 1*/
+	err = ksmbd_iov_pin_rsp(work, rsp, offsetof(struct smb2_create_rsp, Buffer));
+	if (err)
+		goto out;
+
 	kfree(name);
 	return 0;
 
@@ -2597,6 +2633,7 @@ int smb2_open(struct ksmbd_work *work)
 	u64 time;
 	umode_t posix_mode = 0;
 	__le32 daccess, maximal_access = 0;
+	int iov_len = 0;
 
 	WORK_BUFFERS(work, req, rsp);
 
@@ -3248,7 +3285,7 @@ int smb2_open(struct ksmbd_work *work)
 
 	rsp->CreateContextsOffset = 0;
 	rsp->CreateContextsLength = 0;
-	inc_rfc1001_len(work->response_buf, 88); /* StructureSize - 1*/
+	iov_len = offsetof(struct smb2_create_rsp, Buffer);
 
 	/* If lease is request send lease context response */
 	if (opinfo && opinfo->is_lease) {
@@ -3263,8 +3300,7 @@ int smb2_open(struct ksmbd_work *work)
 		create_lease_buf(rsp->Buffer, opinfo->o_lease);
 		le32_add_cpu(&rsp->CreateContextsLength,
 			     conn->vals->create_lease_size);
-		inc_rfc1001_len(work->response_buf,
-				conn->vals->create_lease_size);
+		iov_len += conn->vals->create_lease_size;
 		next_ptr = &lease_ccontext->Next;
 		next_off = conn->vals->create_lease_size;
 	}
@@ -3284,8 +3320,7 @@ int smb2_open(struct ksmbd_work *work)
 				le32_to_cpu(maximal_access));
 		le32_add_cpu(&rsp->CreateContextsLength,
 			     conn->vals->create_mxac_size);
-		inc_rfc1001_len(work->response_buf,
-				conn->vals->create_mxac_size);
+		iov_len += conn->vals->create_mxac_size;
 		if (next_ptr)
 			*next_ptr = cpu_to_le32(next_off);
 		next_ptr = &mxac_ccontext->Next;
@@ -3303,8 +3338,7 @@ int smb2_open(struct ksmbd_work *work)
 				stat.ino, tcon->id);
 		le32_add_cpu(&rsp->CreateContextsLength,
 			     conn->vals->create_disk_id_size);
-		inc_rfc1001_len(work->response_buf,
-				conn->vals->create_disk_id_size);
+		iov_len += conn->vals->create_disk_id_size;
 		if (next_ptr)
 			*next_ptr = cpu_to_le32(next_off);
 		next_ptr = &disk_id_ccontext->Next;
@@ -3318,8 +3352,7 @@ int smb2_open(struct ksmbd_work *work)
 				fp);
 		le32_add_cpu(&rsp->CreateContextsLength,
 			     conn->vals->create_posix_size);
-		inc_rfc1001_len(work->response_buf,
-				conn->vals->create_posix_size);
+		iov_len += conn->vals->create_posix_size;
 		if (next_ptr)
 			*next_ptr = cpu_to_le32(next_off);
 	}
@@ -3337,7 +3370,8 @@ int smb2_open(struct ksmbd_work *work)
 	}
 	ksmbd_revert_fsids(work);
 err_out1:
-
+	if (!rc)
+		rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len);
 	if (rc) {
 		if (rc == -EINVAL)
 			rsp->hdr.Status = STATUS_INVALID_PARAMETER;
@@ -4063,7 +4097,10 @@ int smb2_query_dir(struct ksmbd_work *work)
 		rsp->OutputBufferOffset = cpu_to_le16(0);
 		rsp->OutputBufferLength = cpu_to_le32(0);
 		rsp->Buffer[0] = 0;
-		inc_rfc1001_len(work->response_buf, 9);
+		rc = ksmbd_iov_pin_rsp(work, (void *)rsp,
+				       sizeof(struct smb2_query_directory_rsp));
+		if (rc)
+			goto err_out;
 	} else {
 no_buf_len:
 		((struct file_directory_info *)
@@ -4075,7 +4112,11 @@ int smb2_query_dir(struct ksmbd_work *work)
 		rsp->StructureSize = cpu_to_le16(9);
 		rsp->OutputBufferOffset = cpu_to_le16(72);
 		rsp->OutputBufferLength = cpu_to_le32(d_info.data_count);
-		inc_rfc1001_len(work->response_buf, 8 + d_info.data_count);
+		rc = ksmbd_iov_pin_rsp(work, (void *)rsp,
+				       offsetof(struct smb2_query_directory_rsp, Buffer) +
+				       d_info.data_count);
+		if (rc)
+			goto err_out;
 	}
 
 	kfree(srch_ptr);
@@ -4116,27 +4157,18 @@ int smb2_query_dir(struct ksmbd_work *work)
  * @reqOutputBufferLength:	max buffer length expected in command response
  * @rsp:		query info response buffer contains output buffer length
  * @rsp_org:		base response buffer pointer in case of chained response
- * @infoclass_size:	query info class response buffer size
  *
  * Return:	0 on success, otherwise error
  */
 static int buffer_check_err(int reqOutputBufferLength,
 			    struct smb2_query_info_rsp *rsp,
-			    void *rsp_org, int infoclass_size)
+			    void *rsp_org)
 {
 	if (reqOutputBufferLength < le32_to_cpu(rsp->OutputBufferLength)) {
-		if (reqOutputBufferLength < infoclass_size) {
-			pr_err("Invalid Buffer Size Requested\n");
-			rsp->hdr.Status = STATUS_INFO_LENGTH_MISMATCH;
-			*(__be32 *)rsp_org = cpu_to_be32(sizeof(struct smb2_hdr));
-			return -EINVAL;
-		}
-
-		ksmbd_debug(SMB, "Buffer Overflow\n");
-		rsp->hdr.Status = STATUS_BUFFER_OVERFLOW;
-		*(__be32 *)rsp_org = cpu_to_be32(sizeof(struct smb2_hdr) +
-				reqOutputBufferLength);
-		rsp->OutputBufferLength = cpu_to_le32(reqOutputBufferLength);
+		pr_err("Invalid Buffer Size Requested\n");
+		rsp->hdr.Status = STATUS_INFO_LENGTH_MISMATCH;
+		*(__be32 *)rsp_org = cpu_to_be32(sizeof(struct smb2_hdr));
+		return -EINVAL;
 	}
 	return 0;
 }
@@ -4155,7 +4187,6 @@ static void get_standard_info_pipe(struct smb2_query_info_rsp *rsp,
 	sinfo->Directory = 0;
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_standard_info));
-	inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_standard_info));
 }
 
 static void get_internal_info_pipe(struct smb2_query_info_rsp *rsp, u64 num,
@@ -4169,7 +4200,6 @@ static void get_internal_info_pipe(struct smb2_query_info_rsp *rsp, u64 num,
 	file_info->IndexNumber = cpu_to_le64(num | (1ULL << 63));
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_internal_info));
-	inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_internal_info));
 }
 
 static int smb2_get_info_file_pipe(struct ksmbd_session *sess,
@@ -4195,14 +4225,12 @@ static int smb2_get_info_file_pipe(struct ksmbd_session *sess,
 	case FILE_STANDARD_INFORMATION:
 		get_standard_info_pipe(rsp, rsp_org);
 		rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
-				      rsp, rsp_org,
-				      FILE_STANDARD_INFORMATION_SIZE);
+				      rsp, rsp_org);
 		break;
 	case FILE_INTERNAL_INFORMATION:
 		get_internal_info_pipe(rsp, id, rsp_org);
 		rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
-				      rsp, rsp_org,
-				      FILE_INTERNAL_INFORMATION_SIZE);
+				      rsp, rsp_org);
 		break;
 	default:
 		ksmbd_debug(SMB, "smb2_info_file_pipe for %u not supported\n",
@@ -4308,7 +4336,7 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
 		if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
 			name_len -= XATTR_USER_PREFIX_LEN;
 
-		ptr = (char *)(&eainfo->name + name_len + 1);
+		ptr = eainfo->name + name_len + 1;
 		buf_free_len -= (offsetof(struct smb2_ea_info, name) +
 				name_len + 1);
 		/* bailout if xattr can't fit in buf_free_len */
@@ -4370,7 +4398,6 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
 	if (rsp_data_cnt == 0)
 		rsp->hdr.Status = STATUS_NO_EAS_ON_FILE;
 	rsp->OutputBufferLength = cpu_to_le32(rsp_data_cnt);
-	inc_rfc1001_len(rsp_org, rsp_data_cnt);
 out:
 	kvfree(xattr_list);
 	return rc;
@@ -4385,7 +4412,6 @@ static void get_file_access_info(struct smb2_query_info_rsp *rsp,
 	file_info->AccessFlags = fp->daccess;
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_access_info));
-	inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_access_info));
 }
 
 static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
@@ -4415,7 +4441,6 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
 	basic_info->Pad1 = 0;
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_basic_info));
-	inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_basic_info));
 	return 0;
 }
 
@@ -4440,8 +4465,6 @@ static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
 	sinfo->Directory = S_ISDIR(stat.mode) ? 1 : 0;
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_standard_info));
-	inc_rfc1001_len(rsp_org,
-			sizeof(struct smb2_file_standard_info));
 }
 
 static void get_file_alignment_info(struct smb2_query_info_rsp *rsp,
@@ -4453,8 +4476,6 @@ static void get_file_alignment_info(struct smb2_query_info_rsp *rsp,
 	file_info->AlignmentRequirement = 0;
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_alignment_info));
-	inc_rfc1001_len(rsp_org,
-			sizeof(struct smb2_file_alignment_info));
 }
 
 static int get_file_all_info(struct ksmbd_work *work,
@@ -4518,7 +4539,6 @@ static int get_file_all_info(struct ksmbd_work *work,
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_all_info) + conv_len - 1);
 	kfree(filename);
-	inc_rfc1001_len(rsp_org, le32_to_cpu(rsp->OutputBufferLength));
 	return 0;
 }
 
@@ -4541,7 +4561,6 @@ static void get_file_alternate_info(struct ksmbd_work *work,
 	file_info->FileNameLength = cpu_to_le32(conv_len);
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_alt_name_info) + conv_len);
-	inc_rfc1001_len(rsp_org, le32_to_cpu(rsp->OutputBufferLength));
 }
 
 static void get_file_stream_info(struct ksmbd_work *work,
@@ -4641,7 +4660,6 @@ static void get_file_stream_info(struct ksmbd_work *work,
 	kvfree(xattr_list);
 
 	rsp->OutputBufferLength = cpu_to_le32(nbytes);
-	inc_rfc1001_len(rsp_org, nbytes);
 }
 
 static void get_file_internal_info(struct smb2_query_info_rsp *rsp,
@@ -4656,7 +4674,6 @@ static void get_file_internal_info(struct smb2_query_info_rsp *rsp,
 	file_info->IndexNumber = cpu_to_le64(stat.ino);
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_internal_info));
-	inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_internal_info));
 }
 
 static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
@@ -4692,7 +4709,6 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
 	file_info->Reserved = cpu_to_le32(0);
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_ntwrk_info));
-	inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_ntwrk_info));
 	return 0;
 }
 
@@ -4704,7 +4720,6 @@ static void get_file_ea_info(struct smb2_query_info_rsp *rsp, void *rsp_org)
 	file_info->EASize = 0;
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_ea_info));
-	inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_ea_info));
 }
 
 static void get_file_position_info(struct smb2_query_info_rsp *rsp,
@@ -4716,7 +4731,6 @@ static void get_file_position_info(struct smb2_query_info_rsp *rsp,
 	file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_pos_info));
-	inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_pos_info));
 }
 
 static void get_file_mode_info(struct smb2_query_info_rsp *rsp,
@@ -4728,7 +4742,6 @@ static void get_file_mode_info(struct smb2_query_info_rsp *rsp,
 	file_info->Mode = fp->coption & FILE_MODE_INFO_MASK;
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_mode_info));
-	inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_mode_info));
 }
 
 static void get_file_compression_info(struct smb2_query_info_rsp *rsp,
@@ -4750,7 +4763,6 @@ static void get_file_compression_info(struct smb2_query_info_rsp *rsp,
 
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_comp_info));
-	inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_comp_info));
 }
 
 static int get_file_attribute_tag_info(struct smb2_query_info_rsp *rsp,
@@ -4769,11 +4781,10 @@ static int get_file_attribute_tag_info(struct smb2_query_info_rsp *rsp,
 	file_info->ReparseTag = 0;
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_attr_tag_info));
-	inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_attr_tag_info));
 	return 0;
 }
 
-static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
+static void find_file_posix_info(struct smb2_query_info_rsp *rsp,
 				struct ksmbd_file *fp, void *rsp_org)
 {
 	struct smb311_posix_qinfo *file_info;
@@ -4811,8 +4822,6 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
 		  SIDUNIX_GROUP, (struct smb_sid *)&file_info->Sids[16]);
 
 	rsp->OutputBufferLength = cpu_to_le32(out_buf_len);
-	inc_rfc1001_len(rsp_org, out_buf_len);
-	return out_buf_len;
 }
 
 static int smb2_get_info_file(struct ksmbd_work *work,
@@ -4822,7 +4831,6 @@ static int smb2_get_info_file(struct ksmbd_work *work,
 	struct ksmbd_file *fp;
 	int fileinfoclass = 0;
 	int rc = 0;
-	int file_infoclass_size;
 	unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
 
 	if (test_share_config_flag(work->tcon->share_conf,
@@ -4855,85 +4863,69 @@ static int smb2_get_info_file(struct ksmbd_work *work,
 	switch (fileinfoclass) {
 	case FILE_ACCESS_INFORMATION:
 		get_file_access_info(rsp, fp, work->response_buf);
-		file_infoclass_size = FILE_ACCESS_INFORMATION_SIZE;
 		break;
 
 	case FILE_BASIC_INFORMATION:
 		rc = get_file_basic_info(rsp, fp, work->response_buf);
-		file_infoclass_size = FILE_BASIC_INFORMATION_SIZE;
 		break;
 
 	case FILE_STANDARD_INFORMATION:
 		get_file_standard_info(rsp, fp, work->response_buf);
-		file_infoclass_size = FILE_STANDARD_INFORMATION_SIZE;
 		break;
 
 	case FILE_ALIGNMENT_INFORMATION:
 		get_file_alignment_info(rsp, work->response_buf);
-		file_infoclass_size = FILE_ALIGNMENT_INFORMATION_SIZE;
 		break;
 
 	case FILE_ALL_INFORMATION:
 		rc = get_file_all_info(work, rsp, fp, work->response_buf);
-		file_infoclass_size = FILE_ALL_INFORMATION_SIZE;
 		break;
 
 	case FILE_ALTERNATE_NAME_INFORMATION:
 		get_file_alternate_info(work, rsp, fp, work->response_buf);
-		file_infoclass_size = FILE_ALTERNATE_NAME_INFORMATION_SIZE;
 		break;
 
 	case FILE_STREAM_INFORMATION:
 		get_file_stream_info(work, rsp, fp, work->response_buf);
-		file_infoclass_size = FILE_STREAM_INFORMATION_SIZE;
 		break;
 
 	case FILE_INTERNAL_INFORMATION:
 		get_file_internal_info(rsp, fp, work->response_buf);
-		file_infoclass_size = FILE_INTERNAL_INFORMATION_SIZE;
 		break;
 
 	case FILE_NETWORK_OPEN_INFORMATION:
 		rc = get_file_network_open_info(rsp, fp, work->response_buf);
-		file_infoclass_size = FILE_NETWORK_OPEN_INFORMATION_SIZE;
 		break;
 
 	case FILE_EA_INFORMATION:
 		get_file_ea_info(rsp, work->response_buf);
-		file_infoclass_size = FILE_EA_INFORMATION_SIZE;
 		break;
 
 	case FILE_FULL_EA_INFORMATION:
 		rc = smb2_get_ea(work, fp, req, rsp, work->response_buf);
-		file_infoclass_size = FILE_FULL_EA_INFORMATION_SIZE;
 		break;
 
 	case FILE_POSITION_INFORMATION:
 		get_file_position_info(rsp, fp, work->response_buf);
-		file_infoclass_size = FILE_POSITION_INFORMATION_SIZE;
 		break;
 
 	case FILE_MODE_INFORMATION:
 		get_file_mode_info(rsp, fp, work->response_buf);
-		file_infoclass_size = FILE_MODE_INFORMATION_SIZE;
 		break;
 
 	case FILE_COMPRESSION_INFORMATION:
 		get_file_compression_info(rsp, fp, work->response_buf);
-		file_infoclass_size = FILE_COMPRESSION_INFORMATION_SIZE;
 		break;
 
 	case FILE_ATTRIBUTE_TAG_INFORMATION:
 		rc = get_file_attribute_tag_info(rsp, fp, work->response_buf);
-		file_infoclass_size = FILE_ATTRIBUTE_TAG_INFORMATION_SIZE;
 		break;
 	case SMB_FIND_FILE_POSIX_INFO:
 		if (!work->tcon->posix_extensions) {
 			pr_err("client doesn't negotiate with SMB3.1.1 POSIX Extensions\n");
 			rc = -EOPNOTSUPP;
 		} else {
-			file_infoclass_size = find_file_posix_info(rsp, fp,
-					work->response_buf);
+			find_file_posix_info(rsp, fp, work->response_buf);
 		}
 		break;
 	default:
@@ -4943,8 +4935,7 @@ static int smb2_get_info_file(struct ksmbd_work *work,
 	}
 	if (!rc)
 		rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
-				      rsp, work->response_buf,
-				      file_infoclass_size);
+				      rsp, work->response_buf);
 	ksmbd_fd_put(work, fp);
 	return rc;
 }
@@ -4960,7 +4951,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 	struct kstatfs stfs;
 	struct path path;
 	int rc = 0, len;
-	int fs_infoclass_size = 0;
 
 	if (!share->path)
 		return -EIO;
@@ -4990,8 +4980,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 		info->DeviceType = cpu_to_le32(stfs.f_type);
 		info->DeviceCharacteristics = cpu_to_le32(0x00000020);
 		rsp->OutputBufferLength = cpu_to_le32(8);
-		inc_rfc1001_len(work->response_buf, 8);
-		fs_infoclass_size = FS_DEVICE_INFORMATION_SIZE;
 		break;
 	}
 	case FS_ATTRIBUTE_INFORMATION:
@@ -5020,8 +5008,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 		info->FileSystemNameLen = cpu_to_le32(len);
 		sz = sizeof(struct filesystem_attribute_info) - 2 + len;
 		rsp->OutputBufferLength = cpu_to_le32(sz);
-		inc_rfc1001_len(work->response_buf, sz);
-		fs_infoclass_size = FS_ATTRIBUTE_INFORMATION_SIZE;
 		break;
 	}
 	case FS_VOLUME_INFORMATION:
@@ -5048,8 +5034,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 		info->Reserved = 0;
 		sz = sizeof(struct filesystem_vol_info) - 2 + len;
 		rsp->OutputBufferLength = cpu_to_le32(sz);
-		inc_rfc1001_len(work->response_buf, sz);
-		fs_infoclass_size = FS_VOLUME_INFORMATION_SIZE;
 		break;
 	}
 	case FS_SIZE_INFORMATION:
@@ -5062,8 +5046,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 		info->SectorsPerAllocationUnit = cpu_to_le32(1);
 		info->BytesPerSector = cpu_to_le32(stfs.f_bsize);
 		rsp->OutputBufferLength = cpu_to_le32(24);
-		inc_rfc1001_len(work->response_buf, 24);
-		fs_infoclass_size = FS_SIZE_INFORMATION_SIZE;
 		break;
 	}
 	case FS_FULL_SIZE_INFORMATION:
@@ -5079,8 +5061,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 		info->SectorsPerAllocationUnit = cpu_to_le32(1);
 		info->BytesPerSector = cpu_to_le32(stfs.f_bsize);
 		rsp->OutputBufferLength = cpu_to_le32(32);
-		inc_rfc1001_len(work->response_buf, 32);
-		fs_infoclass_size = FS_FULL_SIZE_INFORMATION_SIZE;
 		break;
 	}
 	case FS_OBJECT_ID_INFORMATION:
@@ -5100,8 +5080,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 		info->extended_info.rel_date = 0;
 		memcpy(info->extended_info.version_string, "1.1.0", strlen("1.1.0"));
 		rsp->OutputBufferLength = cpu_to_le32(64);
-		inc_rfc1001_len(work->response_buf, 64);
-		fs_infoclass_size = FS_OBJECT_ID_INFORMATION_SIZE;
 		break;
 	}
 	case FS_SECTOR_SIZE_INFORMATION:
@@ -5123,8 +5101,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 		info->ByteOffsetForSectorAlignment = 0;
 		info->ByteOffsetForPartitionAlignment = 0;
 		rsp->OutputBufferLength = cpu_to_le32(28);
-		inc_rfc1001_len(work->response_buf, 28);
-		fs_infoclass_size = FS_SECTOR_SIZE_INFORMATION_SIZE;
 		break;
 	}
 	case FS_CONTROL_INFORMATION:
@@ -5145,8 +5121,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 		info->DefaultQuotaLimit = cpu_to_le64(SMB2_NO_FID);
 		info->Padding = 0;
 		rsp->OutputBufferLength = cpu_to_le32(48);
-		inc_rfc1001_len(work->response_buf, 48);
-		fs_infoclass_size = FS_CONTROL_INFORMATION_SIZE;
 		break;
 	}
 	case FS_POSIX_INFORMATION:
@@ -5166,8 +5140,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 			info->TotalFileNodes = cpu_to_le64(stfs.f_files);
 			info->FreeFileNodes = cpu_to_le64(stfs.f_ffree);
 			rsp->OutputBufferLength = cpu_to_le32(56);
-			inc_rfc1001_len(work->response_buf, 56);
-			fs_infoclass_size = FS_POSIX_INFORMATION_SIZE;
 		}
 		break;
 	}
@@ -5176,8 +5148,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 		return -EOPNOTSUPP;
 	}
 	rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
-			      rsp, work->response_buf,
-			      fs_infoclass_size);
+			      rsp, work->response_buf);
 	path_put(&path);
 	return rc;
 }
@@ -5211,7 +5182,6 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
 
 		secdesclen = sizeof(struct smb_ntsd);
 		rsp->OutputBufferLength = cpu_to_le32(secdesclen);
-		inc_rfc1001_len(work->response_buf, secdesclen);
 
 		return 0;
 	}
@@ -5256,7 +5226,6 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
 		return rc;
 
 	rsp->OutputBufferLength = cpu_to_le32(secdesclen);
-	inc_rfc1001_len(work->response_buf, secdesclen);
 	return 0;
 }
 
@@ -5295,6 +5264,14 @@ int smb2_query_info(struct ksmbd_work *work)
 		rc = -EOPNOTSUPP;
 	}
 
+	if (!rc) {
+		rsp->StructureSize = cpu_to_le16(9);
+		rsp->OutputBufferOffset = cpu_to_le16(72);
+		rc = ksmbd_iov_pin_rsp(work, (void *)rsp,
+				       offsetof(struct smb2_query_info_rsp, Buffer) +
+					le32_to_cpu(rsp->OutputBufferLength));
+	}
+
 	if (rc < 0) {
 		if (rc == -EACCES)
 			rsp->hdr.Status = STATUS_ACCESS_DENIED;
@@ -5302,6 +5279,8 @@ int smb2_query_info(struct ksmbd_work *work)
 			rsp->hdr.Status = STATUS_FILE_CLOSED;
 		else if (rc == -EIO)
 			rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
+		else if (rc == -ENOMEM)
+			rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
 		else if (rc == -EOPNOTSUPP || rsp->hdr.Status == 0)
 			rsp->hdr.Status = STATUS_INVALID_INFO_CLASS;
 		smb2_set_err_rsp(work);
@@ -5310,9 +5289,6 @@ int smb2_query_info(struct ksmbd_work *work)
 			    rc);
 		return rc;
 	}
-	rsp->StructureSize = cpu_to_le16(9);
-	rsp->OutputBufferOffset = cpu_to_le16(72);
-	inc_rfc1001_len(work->response_buf, 8);
 	return 0;
 }
 
@@ -5343,8 +5319,9 @@ static noinline int smb2_close_pipe(struct ksmbd_work *work)
 	rsp->AllocationSize = 0;
 	rsp->EndOfFile = 0;
 	rsp->Attributes = 0;
-	inc_rfc1001_len(work->response_buf, 60);
-	return 0;
+
+	return ksmbd_iov_pin_rsp(work, (void *)rsp,
+				 sizeof(struct smb2_close_rsp));
 }
 
 /**
@@ -5449,15 +5426,17 @@ int smb2_close(struct ksmbd_work *work)
 
 	err = ksmbd_close_fd(work, volatile_id);
 out:
+	if (!err)
+		err = ksmbd_iov_pin_rsp(work, (void *)rsp,
+					sizeof(struct smb2_close_rsp));
+
 	if (err) {
 		if (rsp->hdr.Status == 0)
 			rsp->hdr.Status = STATUS_FILE_CLOSED;
 		smb2_set_err_rsp(work);
-	} else {
-		inc_rfc1001_len(work->response_buf, 60);
 	}
 
-	return 0;
+	return err;
 }
 
 /**
@@ -5475,8 +5454,7 @@ int smb2_echo(struct ksmbd_work *work)
 
 	rsp->StructureSize = cpu_to_le16(4);
 	rsp->Reserved = 0;
-	inc_rfc1001_len(work->response_buf, 4);
-	return 0;
+	return ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_echo_rsp));
 }
 
 static int smb2_rename(struct ksmbd_work *work,
@@ -6068,7 +6046,10 @@ int smb2_set_info(struct ksmbd_work *work)
 		goto err_out;
 
 	rsp->StructureSize = cpu_to_le16(2);
-	inc_rfc1001_len(work->response_buf, 2);
+	rc = ksmbd_iov_pin_rsp(work, (void *)rsp,
+			       sizeof(struct smb2_set_info_rsp));
+	if (rc)
+		goto err_out;
 	ksmbd_fd_put(work, fp);
 	return 0;
 
@@ -6115,28 +6096,36 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work)
 
 	id = req->VolatileFileId;
 
-	inc_rfc1001_len(work->response_buf, 16);
 	rpc_resp = ksmbd_rpc_read(work->sess, id);
 	if (rpc_resp) {
+		void *aux_payload_buf;
+
 		if (rpc_resp->flags != KSMBD_RPC_OK) {
 			err = -EINVAL;
 			goto out;
 		}
 
-		work->aux_payload_buf =
+		aux_payload_buf =
 			kvmalloc(rpc_resp->payload_sz, GFP_KERNEL);
-		if (!work->aux_payload_buf) {
+		if (!aux_payload_buf) {
 			err = -ENOMEM;
 			goto out;
 		}
 
-		memcpy(work->aux_payload_buf, rpc_resp->payload,
-		       rpc_resp->payload_sz);
+		memcpy(aux_payload_buf, rpc_resp->payload, rpc_resp->payload_sz);
 
 		nbytes = rpc_resp->payload_sz;
-		work->resp_hdr_sz = get_rfc1002_len(work->response_buf) + 4;
-		work->aux_payload_sz = nbytes;
 		kvfree(rpc_resp);
+		err = ksmbd_iov_pin_rsp_read(work, (void *)rsp,
+					     offsetof(struct smb2_read_rsp, Buffer),
+					     aux_payload_buf, nbytes);
+		if (err)
+			goto out;
+	} else {
+		err = ksmbd_iov_pin_rsp(work, (void *)rsp,
+					offsetof(struct smb2_read_rsp, Buffer));
+		if (err)
+			goto out;
 	}
 
 	rsp->StructureSize = cpu_to_le16(17);
@@ -6145,7 +6134,6 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work)
 	rsp->DataLength = cpu_to_le32(nbytes);
 	rsp->DataRemaining = 0;
 	rsp->Flags = 0;
-	inc_rfc1001_len(work->response_buf, nbytes);
 	return 0;
 
 out:
@@ -6219,13 +6207,8 @@ int smb2_read(struct ksmbd_work *work)
 	int err = 0;
 	bool is_rdma_channel = false;
 	unsigned int max_read_size = conn->vals->max_read_size;
-
-	WORK_BUFFERS(work, req, rsp);
-	if (work->next_smb2_rcv_hdr_off) {
-		work->send_no_response = 1;
-		err = -EOPNOTSUPP;
-		goto out;
-	}
+	unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
+	void *aux_payload_buf;
 
 	if (test_share_config_flag(work->tcon->share_conf,
 				   KSMBD_SHARE_FLAG_PIPE)) {
@@ -6233,6 +6216,25 @@ int smb2_read(struct ksmbd_work *work)
 		return smb2_read_pipe(work);
 	}
 
+	if (work->next_smb2_rcv_hdr_off) {
+		req = ksmbd_req_buf_next(work);
+		rsp = ksmbd_resp_buf_next(work);
+		if (!has_file_id(req->VolatileFileId)) {
+			ksmbd_debug(SMB, "Compound request set FID = %llu\n",
+					work->compound_fid);
+			id = work->compound_fid;
+			pid = work->compound_pfid;
+		}
+	} else {
+		req = smb2_get_msg(work->request_buf);
+		rsp = smb2_get_msg(work->response_buf);
+	}
+
+	if (!has_file_id(id)) {
+		id = req->VolatileFileId;
+		pid = req->PersistentFileId;
+	}
+
 	if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
 	    req->Channel == SMB2_CHANNEL_RDMA_V1) {
 		is_rdma_channel = true;
@@ -6255,7 +6257,7 @@ int smb2_read(struct ksmbd_work *work)
 			goto out;
 	}
 
-	fp = ksmbd_lookup_fd_slow(work, req->VolatileFileId, req->PersistentFileId);
+	fp = ksmbd_lookup_fd_slow(work, id, pid);
 	if (!fp) {
 		err = -ENOENT;
 		goto out;
@@ -6281,21 +6283,20 @@ int smb2_read(struct ksmbd_work *work)
 	ksmbd_debug(SMB, "filename %pD, offset %lld, len %zu\n",
 		    fp->filp, offset, length);
 
-	work->aux_payload_buf = kvzalloc(length, GFP_KERNEL);
-	if (!work->aux_payload_buf) {
+	aux_payload_buf = kvzalloc(length, GFP_KERNEL);
+	if (!aux_payload_buf) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	nbytes = ksmbd_vfs_read(work, fp, length, &offset);
+	nbytes = ksmbd_vfs_read(work, fp, length, &offset, aux_payload_buf);
 	if (nbytes < 0) {
 		err = nbytes;
 		goto out;
 	}
 
 	if ((nbytes == 0 && length != 0) || nbytes < mincount) {
-		kvfree(work->aux_payload_buf);
-		work->aux_payload_buf = NULL;
+		kvfree(aux_payload_buf);
 		rsp->hdr.Status = STATUS_END_OF_FILE;
 		smb2_set_err_rsp(work);
 		ksmbd_fd_put(work, fp);
@@ -6308,10 +6309,9 @@ int smb2_read(struct ksmbd_work *work)
 	if (is_rdma_channel == true) {
 		/* write data to the client using rdma channel */
 		remain_bytes = smb2_read_rdma_channel(work, req,
-						      work->aux_payload_buf,
+						      aux_payload_buf,
 						      nbytes);
-		kvfree(work->aux_payload_buf);
-		work->aux_payload_buf = NULL;
+		kvfree(aux_payload_buf);
 
 		nbytes = 0;
 		if (remain_bytes < 0) {
@@ -6326,10 +6326,11 @@ int smb2_read(struct ksmbd_work *work)
 	rsp->DataLength = cpu_to_le32(nbytes);
 	rsp->DataRemaining = cpu_to_le32(remain_bytes);
 	rsp->Flags = 0;
-	inc_rfc1001_len(work->response_buf, 16);
-	work->resp_hdr_sz = get_rfc1002_len(work->response_buf) + 4;
-	work->aux_payload_sz = nbytes;
-	inc_rfc1001_len(work->response_buf, nbytes);
+	err = ksmbd_iov_pin_rsp_read(work, (void *)rsp,
+				     offsetof(struct smb2_read_rsp, Buffer),
+				     aux_payload_buf, nbytes);
+	if (err)
+		goto out;
 	ksmbd_fd_put(work, fp);
 	return 0;
 
@@ -6412,8 +6413,8 @@ static noinline int smb2_write_pipe(struct ksmbd_work *work)
 	rsp->DataLength = cpu_to_le32(length);
 	rsp->DataRemaining = 0;
 	rsp->Reserved2 = 0;
-	inc_rfc1001_len(work->response_buf, 16);
-	return 0;
+	err = ksmbd_iov_pin_rsp(work, (void *)rsp,
+				offsetof(struct smb2_write_rsp, Buffer));
 out:
 	if (err) {
 		rsp->hdr.Status = STATUS_INVALID_HANDLE;
@@ -6569,7 +6570,9 @@ int smb2_write(struct ksmbd_work *work)
 	rsp->DataLength = cpu_to_le32(nbytes);
 	rsp->DataRemaining = 0;
 	rsp->Reserved2 = 0;
-	inc_rfc1001_len(work->response_buf, 16);
+	err = ksmbd_iov_pin_rsp(work, rsp, offsetof(struct smb2_write_rsp, Buffer));
+	if (err)
+		goto out;
 	ksmbd_fd_put(work, fp);
 	return 0;
 
@@ -6616,15 +6619,11 @@ int smb2_flush(struct ksmbd_work *work)
 
 	rsp->StructureSize = cpu_to_le16(4);
 	rsp->Reserved = 0;
-	inc_rfc1001_len(work->response_buf, 4);
-	return 0;
+	return ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_flush_rsp));
 
 out:
-	if (err) {
-		rsp->hdr.Status = STATUS_INVALID_HANDLE;
-		smb2_set_err_rsp(work);
-	}
-
+	rsp->hdr.Status = STATUS_INVALID_HANDLE;
+	smb2_set_err_rsp(work);
 	return err;
 }
 
@@ -7078,8 +7077,6 @@ int smb2_lock(struct ksmbd_work *work)
 						goto out;
 					}
 
-					init_smb2_rsp_hdr(work);
-					smb2_set_err_rsp(work);
 					rsp->hdr.Status =
 						STATUS_RANGE_NOT_LOCKED;
 					kfree(smb_lock);
@@ -7114,7 +7111,10 @@ int smb2_lock(struct ksmbd_work *work)
 	ksmbd_debug(SMB, "successful in taking lock\n");
 	rsp->hdr.Status = STATUS_SUCCESS;
 	rsp->Reserved = 0;
-	inc_rfc1001_len(work->response_buf, 4);
+	err = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_lock_rsp));
+	if (err)
+		goto out;
+
 	ksmbd_fd_put(work, fp);
 	return 0;
 
@@ -7910,9 +7910,9 @@ int smb2_ioctl(struct ksmbd_work *work)
 	rsp->Reserved = cpu_to_le16(0);
 	rsp->Flags = cpu_to_le32(0);
 	rsp->Reserved2 = cpu_to_le32(0);
-	inc_rfc1001_len(work->response_buf, 48 + nbytes);
-
-	return 0;
+	ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_ioctl_rsp) + nbytes);
+	if (!ret)
+		return ret;
 
 out:
 	if (ret == -EACCES)
@@ -8047,8 +8047,9 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work)
 	rsp->Reserved2 = 0;
 	rsp->VolatileFid = volatile_id;
 	rsp->PersistentFid = persistent_id;
-	inc_rfc1001_len(work->response_buf, 24);
-	return;
+	ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_oplock_break));
+	if (!ret)
+		return;
 
 err_out:
 	opinfo->op_state = OPLOCK_STATE_NONE;
@@ -8198,8 +8199,9 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
 	memcpy(rsp->LeaseKey, req->LeaseKey, 16);
 	rsp->LeaseState = lease_state;
 	rsp->LeaseDuration = 0;
-	inc_rfc1001_len(work->response_buf, 36);
-	return;
+	ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_lease_ack));
+	if (!ret)
+		return;
 
 err_out:
 	opinfo->op_state = OPLOCK_STATE_NONE;
@@ -8337,43 +8339,19 @@ int smb2_check_sign_req(struct ksmbd_work *work)
 void smb2_set_sign_rsp(struct ksmbd_work *work)
 {
 	struct smb2_hdr *hdr;
-	struct smb2_hdr *req_hdr;
 	char signature[SMB2_HMACSHA256_SIZE];
-	struct kvec iov[2];
-	size_t len;
+	struct kvec *iov;
 	int n_vec = 1;
 
-	hdr = smb2_get_msg(work->response_buf);
-	if (work->next_smb2_rsp_hdr_off)
-		hdr = ksmbd_resp_buf_next(work);
-
-	req_hdr = ksmbd_req_buf_next(work);
-
-	if (!work->next_smb2_rsp_hdr_off) {
-		len = get_rfc1002_len(work->response_buf);
-		if (req_hdr->NextCommand)
-			len = ALIGN(len, 8);
-	} else {
-		len = get_rfc1002_len(work->response_buf) -
-			work->next_smb2_rsp_hdr_off;
-		len = ALIGN(len, 8);
-	}
-
-	if (req_hdr->NextCommand)
-		hdr->NextCommand = cpu_to_le32(len);
-
+	hdr = ksmbd_resp_buf_curr(work);
 	hdr->Flags |= SMB2_FLAGS_SIGNED;
 	memset(hdr->Signature, 0, SMB2_SIGNATURE_SIZE);
 
-	iov[0].iov_base = (char *)&hdr->ProtocolId;
-	iov[0].iov_len = len;
-
-	if (work->aux_payload_sz) {
-		iov[0].iov_len -= work->aux_payload_sz;
-
-		iov[1].iov_base = work->aux_payload_buf;
-		iov[1].iov_len = work->aux_payload_sz;
+	if (hdr->Command == SMB2_READ) {
+		iov = &work->iov[work->iov_idx - 1];
 		n_vec++;
+	} else {
+		iov = &work->iov[work->iov_idx];
 	}
 
 	if (!ksmbd_sign_smb2_pdu(work->conn, work->sess->sess_key, iov, n_vec,
@@ -8449,29 +8427,14 @@ int smb3_check_sign_req(struct ksmbd_work *work)
 void smb3_set_sign_rsp(struct ksmbd_work *work)
 {
 	struct ksmbd_conn *conn = work->conn;
-	struct smb2_hdr *req_hdr, *hdr;
+	struct smb2_hdr *hdr;
 	struct channel *chann;
 	char signature[SMB2_CMACAES_SIZE];
-	struct kvec iov[2];
+	struct kvec *iov;
 	int n_vec = 1;
-	size_t len;
 	char *signing_key;
 
-	hdr = smb2_get_msg(work->response_buf);
-	if (work->next_smb2_rsp_hdr_off)
-		hdr = ksmbd_resp_buf_next(work);
-
-	req_hdr = ksmbd_req_buf_next(work);
-
-	if (!work->next_smb2_rsp_hdr_off) {
-		len = get_rfc1002_len(work->response_buf);
-		if (req_hdr->NextCommand)
-			len = ALIGN(len, 8);
-	} else {
-		len = get_rfc1002_len(work->response_buf) -
-			work->next_smb2_rsp_hdr_off;
-		len = ALIGN(len, 8);
-	}
+	hdr = ksmbd_resp_buf_curr(work);
 
 	if (conn->binding == false &&
 	    le16_to_cpu(hdr->Command) == SMB2_SESSION_SETUP_HE) {
@@ -8487,21 +8450,18 @@ void smb3_set_sign_rsp(struct ksmbd_work *work)
 	if (!signing_key)
 		return;
 
-	if (req_hdr->NextCommand)
-		hdr->NextCommand = cpu_to_le32(len);
-
 	hdr->Flags |= SMB2_FLAGS_SIGNED;
 	memset(hdr->Signature, 0, SMB2_SIGNATURE_SIZE);
-	iov[0].iov_base = (char *)&hdr->ProtocolId;
-	iov[0].iov_len = len;
-	if (work->aux_payload_sz) {
-		iov[0].iov_len -= work->aux_payload_sz;
-		iov[1].iov_base = work->aux_payload_buf;
-		iov[1].iov_len = work->aux_payload_sz;
+
+	if (hdr->Command == SMB2_READ) {
+		iov = &work->iov[work->iov_idx - 1];
 		n_vec++;
+	} else {
+		iov = &work->iov[work->iov_idx];
 	}
 
-	if (!ksmbd_sign_smb3_pdu(conn, signing_key, iov, n_vec, signature))
+	if (!ksmbd_sign_smb3_pdu(conn, signing_key, iov, n_vec,
+				 signature))
 		memcpy(hdr->Signature, signature, SMB2_SIGNATURE_SIZE);
 }
 
@@ -8568,45 +8528,22 @@ static void fill_transform_hdr(void *tr_buf, char *old_buf, __le16 cipher_type)
 
 int smb3_encrypt_resp(struct ksmbd_work *work)
 {
-	char *buf = work->response_buf;
-	struct kvec iov[3];
+	struct kvec *iov = work->iov;
 	int rc = -ENOMEM;
-	int buf_size = 0, rq_nvec = 2 + (work->aux_payload_sz ? 1 : 0);
+	void *tr_buf;
 
-	if (ARRAY_SIZE(iov) < rq_nvec)
-		return -ENOMEM;
-
-	work->tr_buf = kzalloc(sizeof(struct smb2_transform_hdr) + 4, GFP_KERNEL);
-	if (!work->tr_buf)
+	tr_buf = kzalloc(sizeof(struct smb2_transform_hdr) + 4, GFP_KERNEL);
+	if (!tr_buf)
 		return rc;
 
 	/* fill transform header */
-	fill_transform_hdr(work->tr_buf, buf, work->conn->cipher_type);
+	fill_transform_hdr(tr_buf, work->response_buf, work->conn->cipher_type);
 
-	iov[0].iov_base = work->tr_buf;
+	iov[0].iov_base = tr_buf;
 	iov[0].iov_len = sizeof(struct smb2_transform_hdr) + 4;
-	buf_size += iov[0].iov_len - 4;
-
-	iov[1].iov_base = buf + 4;
-	iov[1].iov_len = get_rfc1002_len(buf);
-	if (work->aux_payload_sz) {
-		iov[1].iov_len = work->resp_hdr_sz - 4;
-
-		iov[2].iov_base = work->aux_payload_buf;
-		iov[2].iov_len = work->aux_payload_sz;
-		buf_size += iov[2].iov_len;
-	}
-	buf_size += iov[1].iov_len;
-	work->resp_hdr_sz = iov[1].iov_len;
+	work->tr_buf = tr_buf;
 
-	rc = ksmbd_crypt_message(work, iov, rq_nvec, 1);
-	if (rc)
-		return rc;
-
-	memmove(buf, iov[1].iov_base, iov[1].iov_len);
-	*(__be32 *)work->tr_buf = cpu_to_be32(buf_size);
-
-	return rc;
+	return ksmbd_crypt_message(work, iov, work->iov_idx + 1, 1);
 }
 
 bool smb3_is_transform_hdr(void *buf)
diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h
index 2767c08a534a3..d12cfd3b09278 100644
--- a/fs/smb/server/smb2pdu.h
+++ b/fs/smb/server/smb2pdu.h
@@ -361,7 +361,7 @@ struct smb2_ea_info {
 	__u8   Flags;
 	__u8   EaNameLength;
 	__le16 EaValueLength;
-	char name[1];
+	char name[];
 	/* optionally followed by value */
 } __packed; /* level 15 Query */
 
diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c
index c2b75d8988528..e6ba1e9b8589a 100644
--- a/fs/smb/server/smb_common.c
+++ b/fs/smb/server/smb_common.c
@@ -319,12 +319,6 @@ static int init_smb1_rsp_hdr(struct ksmbd_work *work)
 	struct smb_hdr *rsp_hdr = (struct smb_hdr *)work->response_buf;
 	struct smb_hdr *rcv_hdr = (struct smb_hdr *)work->request_buf;
 
-	/*
-	 * Remove 4 byte direct TCP header.
-	 */
-	*(__be32 *)work->response_buf =
-		cpu_to_be32(sizeof(struct smb_hdr) - 4);
-
 	rsp_hdr->Command = SMB_COM_NEGOTIATE;
 	*(__le32 *)rsp_hdr->Protocol = SMB1_PROTO_NUMBER;
 	rsp_hdr->Flags = SMBFLG_RESPONSE;
@@ -560,10 +554,11 @@ static int smb_handle_negotiate(struct ksmbd_work *work)
 
 	ksmbd_debug(SMB, "Unsupported SMB1 protocol\n");
 
-	/* Add 2 byte bcc and 2 byte DialectIndex. */
-	inc_rfc1001_len(work->response_buf, 4);
-	neg_rsp->hdr.Status.CifsError = STATUS_SUCCESS;
+	if (ksmbd_iov_pin_rsp(work, (void *)neg_rsp,
+			      sizeof(struct smb_negotiate_rsp) - 4))
+		return -ENOMEM;
 
+	neg_rsp->hdr.Status.CifsError = STATUS_SUCCESS;
 	neg_rsp->hdr.WordCount = 1;
 	neg_rsp->DialectIndex = cpu_to_le16(work->conn->dialect);
 	neg_rsp->ByteCount = 0;
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index c06efc020bd95..3b269e1f523a1 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -1241,14 +1241,12 @@ static int smb_direct_writev(struct ksmbd_transport *t,
 
 	//FIXME: skip RFC1002 header..
 	buflen -= 4;
-	iov[0].iov_base += 4;
-	iov[0].iov_len -= 4;
 
 	remaining_data_length = buflen;
 	ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
 
 	smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
-	start = i = 0;
+	start = i = 1;
 	buflen = 0;
 	while (true) {
 		buflen += iov[i].iov_len;
@@ -1366,24 +1364,35 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
 	LIST_HEAD(msg_list);
 	char *desc_buf;
 	int credits_needed;
-	unsigned int desc_buf_len;
-	size_t total_length = 0;
+	unsigned int desc_buf_len, desc_num = 0;
 
 	if (t->status != SMB_DIRECT_CS_CONNECTED)
 		return -ENOTCONN;
 
+	if (buf_len > t->max_rdma_rw_size)
+		return -EINVAL;
+
 	/* calculate needed credits */
 	credits_needed = 0;
 	desc_buf = buf;
 	for (i = 0; i < desc_len / sizeof(*desc); i++) {
+		if (!buf_len)
+			break;
+
 		desc_buf_len = le32_to_cpu(desc[i].length);
+		if (!desc_buf_len)
+			return -EINVAL;
+
+		if (desc_buf_len > buf_len) {
+			desc_buf_len = buf_len;
+			desc[i].length = cpu_to_le32(desc_buf_len);
+			buf_len = 0;
+		}
 
 		credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len);
 		desc_buf += desc_buf_len;
-		total_length += desc_buf_len;
-		if (desc_buf_len == 0 || total_length > buf_len ||
-		    total_length > t->max_rdma_rw_size)
-			return -EINVAL;
+		buf_len -= desc_buf_len;
+		desc_num++;
 	}
 
 	ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n",
@@ -1395,7 +1404,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
 
 	/* build rdma_rw_ctx for each descriptor */
 	desc_buf = buf;
-	for (i = 0; i < desc_len / sizeof(*desc); i++) {
+	for (i = 0; i < desc_num; i++) {
 		msg = kzalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) +
 			      sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL);
 		if (!msg) {
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index d48756a339a54..b5a5e50fc9ca3 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -367,15 +367,15 @@ static int check_lock_range(struct file *filp, loff_t start, loff_t end,
  * @fid:	file id of open file
  * @count:	read byte count
  * @pos:	file pos
+ * @rbuf:	read data buffer
  *
  * Return:	number of read bytes on success, otherwise error
  */
 int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp, size_t count,
-		   loff_t *pos)
+		   loff_t *pos, char *rbuf)
 {
 	struct file *filp = fp->filp;
 	ssize_t nbytes = 0;
-	char *rbuf = work->aux_payload_buf;
 	struct inode *inode = file_inode(filp);
 
 	if (S_ISDIR(inode->i_mode))
diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h
index 72f9fb4b48d13..00968081856e3 100644
--- a/fs/smb/server/vfs.h
+++ b/fs/smb/server/vfs.h
@@ -76,8 +76,8 @@ void ksmbd_vfs_query_maximal_access(struct mnt_idmap *idmap,
 				   struct dentry *dentry, __le32 *daccess);
 int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode);
 int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode);
-int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp,
-		   size_t count, loff_t *pos);
+int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp, size_t count,
+		   loff_t *pos, char *rbuf);
 int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
 		    char *buf, size_t count, loff_t *pos, bool sync,
 		    ssize_t *written);
diff --git a/fs/super.c b/fs/super.c
index bd8dcfc822c39..2d762ce67f6e6 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1373,6 +1373,50 @@ int get_tree_keyed(struct fs_context *fc,
 }
 EXPORT_SYMBOL(get_tree_keyed);
 
+static int set_bdev_super(struct super_block *s, void *data)
+{
+	s->s_dev = *(dev_t *)data;
+	return 0;
+}
+
+static int super_s_dev_set(struct super_block *s, struct fs_context *fc)
+{
+	return set_bdev_super(s, fc->sget_key);
+}
+
+static int super_s_dev_test(struct super_block *s, struct fs_context *fc)
+{
+	return !(s->s_iflags & SB_I_RETIRED) &&
+		s->s_dev == *(dev_t *)fc->sget_key;
+}
+
+/**
+ * sget_dev - Find or create a superblock by device number
+ * @fc: Filesystem context.
+ * @dev: device number
+ *
+ * Find or create a superblock using the provided device number that
+ * will be stored in fc->sget_key.
+ *
+ * If an extant superblock is matched, then that will be returned with
+ * an elevated reference count that the caller must transfer or discard.
+ *
+ * If no match is made, a new superblock will be allocated and basic
+ * initialisation will be performed (s_type, s_fs_info, s_id, s_dev will
+ * be set). The superblock will be published and it will be returned in
+ * a partially constructed state with SB_BORN and SB_ACTIVE as yet
+ * unset.
+ *
+ * Return: an existing or newly created superblock on success, an error
+ *         pointer on failure.
+ */
+struct super_block *sget_dev(struct fs_context *fc, dev_t dev)
+{
+	fc->sget_key = &dev;
+	return sget_fc(fc, super_s_dev_test, super_s_dev_set);
+}
+EXPORT_SYMBOL(sget_dev);
+
 #ifdef CONFIG_BLOCK
 /*
  * Lock a super block that the callers holds a reference to.
@@ -1431,23 +1475,6 @@ const struct blk_holder_ops fs_holder_ops = {
 };
 EXPORT_SYMBOL_GPL(fs_holder_ops);
 
-static int set_bdev_super(struct super_block *s, void *data)
-{
-	s->s_dev = *(dev_t *)data;
-	return 0;
-}
-
-static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
-{
-	return set_bdev_super(s, fc->sget_key);
-}
-
-static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
-{
-	return !(s->s_iflags & SB_I_RETIRED) &&
-		s->s_dev == *(dev_t *)fc->sget_key;
-}
-
 int setup_bdev_super(struct super_block *sb, int sb_flags,
 		struct fs_context *fc)
 {
@@ -1525,8 +1552,7 @@ int get_tree_bdev(struct fs_context *fc,
 	}
 
 	fc->sb_flags |= SB_NOSEC;
-	fc->sget_key = &dev;
-	s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
+	s = sget_dev(fc, dev);
 	if (IS_ERR(s))
 		return PTR_ERR(s);
 
diff --git a/include/linux/dlm_plock.h b/include/linux/dlm_plock.h
index e6d76e8715a6c..15fc856d198cf 100644
--- a/include/linux/dlm_plock.h
+++ b/include/linux/dlm_plock.h
@@ -11,6 +11,8 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 		int cmd, struct file_lock *fl);
 int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 		struct file_lock *fl);
+int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+		     struct file_lock *fl);
 int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 		struct file_lock *fl);
 #endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c8ff4156a0a15..4aeb3fa119277 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2397,6 +2397,7 @@ struct super_block *sget(struct file_system_type *type,
 			int (*test)(struct super_block *,void *),
 			int (*set)(struct super_block *,void *),
 			int flags, void *data);
+struct super_block *sget_dev(struct fs_context *fc, dev_t dev);
 
 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
 #define fops_get(fops) \
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 44c298aa58d44..52772c826c868 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -630,11 +630,6 @@ struct transaction_s
 	 */
 	struct list_head	t_inode_list;
 
-	/*
-	 * Protects info related to handles
-	 */
-	spinlock_t		t_handle_lock;
-
 	/*
 	 * Longest time some handle had to wait for running transaction
 	 */
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index f42594a9efe0d..0f016d69c996e 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -204,6 +204,8 @@ extern unsigned long		nlmsvc_timeout;
 extern bool			nsm_use_hostnames;
 extern u32			nsm_local_state;
 
+extern struct timer_list	nlmsvc_retry;
+
 /*
  * Lockd client functions
  */
@@ -280,7 +282,7 @@ __be32		  nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
 			struct nlm_host *, struct nlm_lock *,
 			struct nlm_lock *, struct nlm_cookie *);
 __be32		  nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
-unsigned long	  nlmsvc_retry_blocked(void);
+void		  nlmsvc_retry_blocked(void);
 void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
 					nlm_host_match_fn_t match);
 void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 518bd28f5ab8c..35766963dd145 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -56,10 +56,14 @@ struct cache_head {
 	struct kref	ref;
 	unsigned long	flags;
 };
-#define	CACHE_VALID	0	/* Entry contains valid data */
-#define	CACHE_NEGATIVE	1	/* Negative entry - there is no match for the key */
-#define	CACHE_PENDING	2	/* An upcall has been sent but no reply received yet*/
-#define	CACHE_CLEANED	3	/* Entry has been cleaned from cache */
+
+/* cache_head.flags */
+enum {
+	CACHE_VALID,		/* Entry contains valid data */
+	CACHE_NEGATIVE,		/* Negative entry - there is no match for the key */
+	CACHE_PENDING,		/* An upcall has been sent but no reply received yet*/
+	CACHE_CLEANED,		/* Entry has been cleaned from cache */
+};
 
 #define	CACHE_NEW_EXPIRY 120	/* keep new things pending confirmation for 120 seconds */
 
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 4f41d839face4..af7358277f1c3 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -148,6 +148,8 @@ struct rpc_create_args {
 	const struct cred	*cred;
 	unsigned int		max_connect;
 	struct xprtsec_parms	xprtsec;
+	unsigned long		connect_timeout;
+	unsigned long		reconnect_timeout;
 };
 
 struct rpc_add_xprt_test {
diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h
index d94d4f4105074..3ce1550d1beb3 100644
--- a/include/linux/sunrpc/stats.h
+++ b/include/linux/sunrpc/stats.h
@@ -43,22 +43,6 @@ struct net;
 #ifdef CONFIG_PROC_FS
 int			rpc_proc_init(struct net *);
 void			rpc_proc_exit(struct net *);
-#else
-static inline int rpc_proc_init(struct net *net)
-{
-	return 0;
-}
-
-static inline void rpc_proc_exit(struct net *net)
-{
-}
-#endif
-
-#ifdef MODULE
-void			rpc_modcount(struct inode *, int);
-#endif
-
-#ifdef CONFIG_PROC_FS
 struct proc_dir_entry *	rpc_proc_register(struct net *,struct rpc_stat *);
 void			rpc_proc_unregister(struct net *,const char *);
 void			rpc_proc_zero(const struct rpc_program *);
@@ -69,7 +53,14 @@ void			svc_proc_unregister(struct net *, const char *);
 void			svc_seq_show(struct seq_file *,
 				     const struct svc_stat *);
 #else
+static inline int rpc_proc_init(struct net *net)
+{
+	return 0;
+}
 
+static inline void rpc_proc_exit(struct net *net)
+{
+}
 static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; }
 static inline void rpc_proc_unregister(struct net *net, const char *p) {}
 static inline void rpc_proc_zero(const struct rpc_program *p) {}
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index f8751118c1221..dbf5b21feafe4 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -39,16 +39,20 @@ struct svc_pool {
 	struct list_head	sp_all_threads;	/* all server threads */
 
 	/* statistics on pool operation */
+	struct percpu_counter	sp_messages_arrived;
 	struct percpu_counter	sp_sockets_queued;
 	struct percpu_counter	sp_threads_woken;
-	struct percpu_counter	sp_threads_timedout;
 
-#define	SP_TASK_PENDING		(0)		/* still work to do even if no
-						 * xprt is queued. */
-#define SP_CONGESTED		(1)
 	unsigned long		sp_flags;
 } ____cacheline_aligned_in_smp;
 
+/* bits for sp_flags */
+enum {
+	SP_TASK_PENDING,	/* still work to do even if no xprt is queued */
+	SP_CONGESTED,		/* all threads are busy, none idle */
+};
+
+
 /*
  * RPC service.
  *
@@ -120,19 +124,6 @@ static inline void svc_put(struct svc_serv *serv)
 	kref_put(&serv->sv_refcnt, svc_destroy);
 }
 
-/**
- * svc_put_not_last - decrement non-final reference count on SUNRPC serv
- * @serv:  the svc_serv to have count decremented
- *
- * Returns: %true is refcount was decremented.
- *
- * If the refcount is 1, it is not decremented and instead failure is reported.
- */
-static inline bool svc_put_not_last(struct svc_serv *serv)
-{
-	return refcount_dec_not_one(&serv->sv_refcnt.refcount);
-}
-
 /*
  * Maximum payload size supported by a kernel RPC server.
  * This is use to determine the max number of pages nfsd is
@@ -232,16 +223,6 @@ struct svc_rqst {
 	u32			rq_proc;	/* procedure number */
 	u32			rq_prot;	/* IP protocol */
 	int			rq_cachetype;	/* catering to nfsd */
-#define	RQ_SECURE	(0)			/* secure port */
-#define	RQ_LOCAL	(1)			/* local request */
-#define	RQ_USEDEFERRAL	(2)			/* use deferral */
-#define	RQ_DROPME	(3)			/* drop current reply */
-#define	RQ_SPLICE_OK	(4)			/* turned off in gss privacy
-						 * to prevent encrypting page
-						 * cache pages */
-#define	RQ_VICTIM	(5)			/* about to be shut down */
-#define	RQ_BUSY		(6)			/* request is busy */
-#define	RQ_DATA		(7)			/* request has data */
 	unsigned long		rq_flags;	/* flags field */
 	ktime_t			rq_qtime;	/* enqueue time */
 
@@ -265,7 +246,6 @@ struct svc_rqst {
 	/* Catering to nfsd */
 	struct auth_domain *	rq_client;	/* RPC peer info */
 	struct auth_domain *	rq_gssclient;	/* "gss/"-style peer info */
-	struct svc_cacherep *	rq_cacherep;	/* cache info */
 	struct task_struct	*rq_task;	/* service thread */
 	struct net		*rq_bc_net;	/* pointer to backchannel's
 						 * net namespace
@@ -273,6 +253,19 @@ struct svc_rqst {
 	void **			rq_lease_breaker; /* The v4 client breaking a lease */
 };
 
+/* bits for rq_flags */
+enum {
+	RQ_SECURE,		/* secure port */
+	RQ_LOCAL,		/* local request */
+	RQ_USEDEFERRAL,		/* use deferral */
+	RQ_DROPME,		/* drop current reply */
+	RQ_SPLICE_OK,		/* turned off in gss privacy to prevent
+				 * encrypting page cache pages */
+	RQ_VICTIM,		/* about to be shut down */
+	RQ_BUSY,		/* request is busy */
+	RQ_DATA,		/* request has data */
+};
+
 #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
 
 /*
@@ -344,7 +337,7 @@ struct svc_program {
 	char *			pg_name;	/* service name */
 	char *			pg_class;	/* class name: services sharing authentication */
 	struct svc_stat *	pg_stats;	/* rpc statistics */
-	int			(*pg_authenticate)(struct svc_rqst *);
+	enum svc_auth_status	(*pg_authenticate)(struct svc_rqst *rqstp);
 	__be32			(*pg_init_request)(struct svc_rqst *,
 						   const struct svc_program *,
 						   struct svc_process_info *);
@@ -427,6 +420,7 @@ int		   svc_register(const struct svc_serv *, struct net *, const int,
 
 void		   svc_wake_up(struct svc_serv *);
 void		   svc_reserve(struct svc_rqst *rqstp, int space);
+void		   svc_pool_wake_idle_thread(struct svc_pool *pool);
 struct svc_pool   *svc_pool_for_cpu(struct svc_serv *serv);
 char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
 const char *	   svc_proc_name(const struct svc_rqst *rqstp);
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index a6b12631db21c..fa55d12dc7651 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -56,23 +56,6 @@ struct svc_xprt {
 	struct list_head	xpt_list;
 	struct list_head	xpt_ready;
 	unsigned long		xpt_flags;
-#define	XPT_BUSY	0		/* enqueued/receiving */
-#define	XPT_CONN	1		/* conn pending */
-#define	XPT_CLOSE	2		/* dead or dying */
-#define	XPT_DATA	3		/* data pending */
-#define	XPT_TEMP	4		/* connected transport */
-#define	XPT_DEAD	6		/* transport closed */
-#define	XPT_CHNGBUF	7		/* need to change snd/rcv buf sizes */
-#define	XPT_DEFERRED	8		/* deferred request pending */
-#define	XPT_OLD		9		/* used for xprt aging mark+sweep */
-#define XPT_LISTENER	10		/* listening endpoint */
-#define XPT_CACHE_AUTH	11		/* cache auth info */
-#define XPT_LOCAL	12		/* connection from loopback interface */
-#define XPT_KILL_TEMP   13		/* call xpo_kill_temp_xprt before closing */
-#define XPT_CONG_CTRL	14		/* has congestion control */
-#define XPT_HANDSHAKE	15		/* xprt requests a handshake */
-#define XPT_TLS_SESSION	16		/* transport-layer security established */
-#define XPT_PEER_AUTH	17		/* peer has been authenticated */
 
 	struct svc_serv		*xpt_server;	/* service for transport */
 	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
@@ -97,6 +80,27 @@ struct svc_xprt {
 	struct rpc_xprt_switch	*xpt_bc_xps;	/* NFSv4.1 backchannel */
 };
 
+/* flag bits for xpt_flags */
+enum {
+	XPT_BUSY,		/* enqueued/receiving */
+	XPT_CONN,		/* conn pending */
+	XPT_CLOSE,		/* dead or dying */
+	XPT_DATA,		/* data pending */
+	XPT_TEMP,		/* connected transport */
+	XPT_DEAD,		/* transport closed */
+	XPT_CHNGBUF,		/* need to change snd/rcv buf sizes */
+	XPT_DEFERRED,		/* deferred request pending */
+	XPT_OLD,		/* used for xprt aging mark+sweep */
+	XPT_LISTENER,		/* listening endpoint */
+	XPT_CACHE_AUTH,		/* cache auth info */
+	XPT_LOCAL,		/* connection from loopback interface */
+	XPT_KILL_TEMP,		/* call xpo_kill_temp_xprt before closing */
+	XPT_CONG_CTRL,		/* has congestion control */
+	XPT_HANDSHAKE,		/* xprt requests a handshake */
+	XPT_TLS_SESSION,	/* transport-layer security established */
+	XPT_PEER_AUTH,		/* peer has been authenticated */
+};
+
 static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
 {
 	spin_lock(&xpt->xpt_lock);
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 6d9cc9080aca7..6f90203edbf8d 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -83,6 +83,19 @@ struct auth_domain {
 	struct rcu_head		rcu_head;
 };
 
+enum svc_auth_status {
+	SVC_GARBAGE = 1,
+	SVC_SYSERR,
+	SVC_VALID,
+	SVC_NEGATIVE,
+	SVC_OK,
+	SVC_DROP,
+	SVC_CLOSE,
+	SVC_DENIED,
+	SVC_PENDING,
+	SVC_COMPLETE,
+};
+
 /*
  * Each authentication flavour registers an auth_ops
  * structure.
@@ -98,6 +111,8 @@ struct auth_domain {
  *             is (probably) already in place.  Certainly space is
  *	       reserved for it.
  *      DROP - simply drop the request. It may have been deferred
+ *      CLOSE - like SVC_DROP, but request is definitely lost.
+ *		If there is a tcp connection, it should be closed.
  *      GARBAGE - rpc garbage_args error
  *      SYSERR - rpc system_err error
  *      DENIED - authp holds reason for denial.
@@ -111,14 +126,10 @@ struct auth_domain {
  *
  * release() is given a request after the procedure has been run.
  *  It should sign/encrypt the results if needed
- * It should return:
- *    OK - the resbuf is ready to be sent
- *    DROP - the reply should be quitely dropped
- *    DENIED - authp holds a reason for MSG_DENIED
- *    SYSERR - rpc system_err
  *
  * domain_release()
  *   This call releases a domain.
+ *
  * set_client()
  *   Givens a pending request (struct svc_rqst), finds and assigns
  *   an appropriate 'auth_domain' as the client.
@@ -127,44 +138,28 @@ struct auth_ops {
 	char *	name;
 	struct module *owner;
 	int	flavour;
-	int	(*accept)(struct svc_rqst *rq);
-	int	(*release)(struct svc_rqst *rq);
-	void	(*domain_release)(struct auth_domain *);
-	int	(*set_client)(struct svc_rqst *rq);
-};
 
-#define	SVC_GARBAGE	1
-#define	SVC_SYSERR	2
-#define	SVC_VALID	3
-#define	SVC_NEGATIVE	4
-#define	SVC_OK		5
-#define	SVC_DROP	6
-#define	SVC_CLOSE	7	/* Like SVC_DROP, but request is definitely
-				 * lost so if there is a tcp connection, it
-				 * should be closed
-				 */
-#define	SVC_DENIED	8
-#define	SVC_PENDING	9
-#define	SVC_COMPLETE	10
+	enum svc_auth_status	(*accept)(struct svc_rqst *rqstp);
+	int			(*release)(struct svc_rqst *rqstp);
+	void			(*domain_release)(struct auth_domain *dom);
+	enum svc_auth_status	(*set_client)(struct svc_rqst *rqstp);
+};
 
 struct svc_xprt;
 
-extern int	svc_authenticate(struct svc_rqst *rqstp);
+extern enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp);
 extern int	svc_authorise(struct svc_rqst *rqstp);
-extern int	svc_set_client(struct svc_rqst *rqstp);
+extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp);
 extern int	svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops);
 extern void	svc_auth_unregister(rpc_authflavor_t flavor);
 
 extern struct auth_domain *unix_domain_find(char *name);
 extern void auth_domain_put(struct auth_domain *item);
-extern int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom);
 extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new);
 extern struct auth_domain *auth_domain_find(char *name);
-extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr);
-extern int auth_unix_forget_old(struct auth_domain *dom);
 extern void svcauth_unix_purge(struct net *net);
 extern void svcauth_unix_info_release(struct svc_xprt *xpt);
-extern int svcauth_unix_set_client(struct svc_rqst *rqstp);
+extern enum svc_auth_status svcauth_unix_set_client(struct svc_rqst *rqstp);
 
 extern int unix_gid_cache_create(struct net *net);
 extern void unix_gid_cache_destroy(struct net *net);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index a7116048a4d4b..7c78ec6356b92 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -35,8 +35,8 @@ struct svc_sock {
 	/* Total length of the data (not including fragment headers)
 	 * received so far in the fragments making up this rpc: */
 	u32			sk_datalen;
-	/* Number of queued send requests */
-	atomic_t		sk_sendqlen;
+
+	struct page_frag_cache  sk_frag_cache;
 
 	struct completion	sk_handshake_done;
 
@@ -56,8 +56,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
 /*
  * Function prototypes.
  */
-void		svc_close_net(struct svc_serv *, struct net *);
-int		svc_recv(struct svc_rqst *, long);
+void		svc_recv(struct svc_rqst *rqstp);
 void		svc_send(struct svc_rqst *rqstp);
 void		svc_drop(struct svc_rqst *);
 void		svc_sock_update_bufs(struct svc_serv *serv);
@@ -66,8 +65,6 @@ int		svc_addsock(struct svc_serv *serv, struct net *net,
 			    const struct cred *cred);
 void		svc_init_xprt_sock(void);
 void		svc_cleanup_xprt_sock(void);
-struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
-void		svc_sock_destroy(struct svc_xprt *);
 
 /*
  * svc_makesock socket characteristics
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index f89ec4b5ea169..5b4fb3c791bc2 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -139,6 +139,8 @@ void	xdr_terminate_string(const struct xdr_buf *, const u32);
 size_t	xdr_buf_pagecount(const struct xdr_buf *buf);
 int	xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp);
 void	xdr_free_bvec(struct xdr_buf *buf);
+unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size,
+			     const struct xdr_buf *xdr);
 
 static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len)
 {
@@ -224,6 +226,7 @@ struct xdr_stream {
 	struct kvec *iov;	/* pointer to the current kvec */
 	struct kvec scratch;	/* Scratch buffer */
 	struct page **page_ptr;	/* pointer to the current page */
+	void *page_kaddr;	/* kmapped address of the current page */
 	unsigned int nwords;	/* Remaining decode buffer length */
 
 	struct rpc_rqst *rqst;	/* For debugging */
@@ -255,6 +258,7 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf,
 			    __be32 *p, struct rpc_rqst *rqst);
 extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
 		struct page **pages, unsigned int len);
+extern void xdr_finish_decode(struct xdr_stream *xdr);
 extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
@@ -775,9 +779,7 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr,
 
 	if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0))
 		return -EBADMSG;
-	if (len > SIZE_MAX / sizeof(*p))
-		return -EBADMSG;
-	p = xdr_inline_decode(xdr, len * sizeof(*p));
+	p = xdr_inline_decode(xdr, size_mul(len, sizeof(*p)));
 	if (unlikely(!p))
 		return -EBADMSG;
 	if (array == NULL)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index b52411bcfe4e7..4ecc89301eb74 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -351,6 +351,8 @@ struct xprt_create {
 	struct rpc_xprt_switch	*bc_xps;
 	unsigned int		flags;
 	struct xprtsec_parms	xprtsec;
+	unsigned long		connect_timeout;
+	unsigned long		reconnect_timeout;
 };
 
 struct xprt_class {
diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h
index 2b09574e12430..c1a146f9fc911 100644
--- a/include/trace/events/dlm.h
+++ b/include/trace/events/dlm.h
@@ -7,6 +7,7 @@
 
 #include <linux/dlm.h>
 #include <linux/dlmconstants.h>
+#include <uapi/linux/dlm_plock.h>
 #include <linux/tracepoint.h>
 
 #include "../../../fs/dlm/dlm_internal.h"
@@ -585,6 +586,56 @@ TRACE_EVENT(dlm_recv_message,
 
 );
 
+DECLARE_EVENT_CLASS(dlm_plock_template,
+
+	TP_PROTO(const struct dlm_plock_info *info),
+
+	TP_ARGS(info),
+
+	TP_STRUCT__entry(
+		__field(uint8_t, optype)
+		__field(uint8_t, ex)
+		__field(uint8_t, wait)
+		__field(uint8_t, flags)
+		__field(uint32_t, pid)
+		__field(int32_t, nodeid)
+		__field(int32_t, rv)
+		__field(uint32_t, fsid)
+		__field(uint64_t, number)
+		__field(uint64_t, start)
+		__field(uint64_t, end)
+		__field(uint64_t, owner)
+	),
+
+	TP_fast_assign(
+		__entry->optype = info->optype;
+		__entry->ex = info->ex;
+		__entry->wait = info->wait;
+		__entry->flags = info->flags;
+		__entry->pid = info->pid;
+		__entry->nodeid = info->nodeid;
+		__entry->rv = info->rv;
+		__entry->fsid = info->fsid;
+		__entry->number = info->number;
+		__entry->start = info->start;
+		__entry->end = info->end;
+		__entry->owner = info->owner;
+	),
+
+	TP_printk("fsid=%u number=%llx owner=%llx optype=%d ex=%d wait=%d flags=%x pid=%u nodeid=%d rv=%d start=%llx end=%llx",
+		  __entry->fsid, __entry->number, __entry->owner,
+		  __entry->optype, __entry->ex, __entry->wait,
+		  __entry->flags, __entry->pid, __entry->nodeid,
+		  __entry->rv, __entry->start, __entry->end)
+
+);
+
+DEFINE_EVENT(dlm_plock_template, dlm_plock_read,
+	     TP_PROTO(const struct dlm_plock_info *info), TP_ARGS(info));
+
+DEFINE_EVENT(dlm_plock_template, dlm_plock_write,
+	     TP_PROTO(const struct dlm_plock_info *info), TP_ARGS(info));
+
 TRACE_EVENT(dlm_send,
 
 	TP_PROTO(int nodeid, int ret),
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 43711753616a1..6beb38c1dcb5e 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1706,7 +1706,7 @@ TRACE_DEFINE_ENUM(SVC_DENIED);
 TRACE_DEFINE_ENUM(SVC_PENDING);
 TRACE_DEFINE_ENUM(SVC_COMPLETE);
 
-#define svc_show_status(status)				\
+#define show_svc_auth_status(status)			\
 	__print_symbolic(status,			\
 		{ SVC_GARBAGE,	"SVC_GARBAGE" },	\
 		{ SVC_SYSERR,	"SVC_SYSERR" },		\
@@ -1743,7 +1743,10 @@ TRACE_DEFINE_ENUM(SVC_COMPLETE);
 		__entry->xid, __get_sockaddr(server), __get_sockaddr(client)
 
 TRACE_EVENT_CONDITION(svc_authenticate,
-	TP_PROTO(const struct svc_rqst *rqst, int auth_res),
+	TP_PROTO(
+		const struct svc_rqst *rqst,
+		enum svc_auth_status auth_res
+	),
 
 	TP_ARGS(rqst, auth_res),
 
@@ -1766,7 +1769,7 @@ TRACE_EVENT_CONDITION(svc_authenticate,
 	TP_printk(SVC_RQST_ENDPOINT_FORMAT
 		" auth_res=%s auth_stat=%s",
 		SVC_RQST_ENDPOINT_VARARGS,
-		svc_show_status(__entry->svc_status),
+		show_svc_auth_status(__entry->svc_status),
 		rpc_show_auth_stat(__entry->auth_stat))
 );
 
@@ -1918,25 +1921,42 @@ TRACE_EVENT(svc_stats_latency,
 		__get_str(procedure), __entry->execute)
 );
 
+/*
+ * from include/linux/sunrpc/svc_xprt.h
+ */
+#define SVC_XPRT_FLAG_LIST						\
+	svc_xprt_flag(BUSY)						\
+	svc_xprt_flag(CONN)						\
+	svc_xprt_flag(CLOSE)						\
+	svc_xprt_flag(DATA)						\
+	svc_xprt_flag(TEMP)						\
+	svc_xprt_flag(DEAD)						\
+	svc_xprt_flag(CHNGBUF)						\
+	svc_xprt_flag(DEFERRED)						\
+	svc_xprt_flag(OLD)						\
+	svc_xprt_flag(LISTENER)						\
+	svc_xprt_flag(CACHE_AUTH)					\
+	svc_xprt_flag(LOCAL)						\
+	svc_xprt_flag(KILL_TEMP)					\
+	svc_xprt_flag(CONG_CTRL)					\
+	svc_xprt_flag(HANDSHAKE)					\
+	svc_xprt_flag(TLS_SESSION)					\
+	svc_xprt_flag_end(PEER_AUTH)
+
+#undef svc_xprt_flag
+#undef svc_xprt_flag_end
+#define svc_xprt_flag(x)	TRACE_DEFINE_ENUM(XPT_##x);
+#define svc_xprt_flag_end(x)	TRACE_DEFINE_ENUM(XPT_##x);
+
+SVC_XPRT_FLAG_LIST
+
+#undef svc_xprt_flag
+#undef svc_xprt_flag_end
+#define svc_xprt_flag(x)	{ BIT(XPT_##x), #x },
+#define svc_xprt_flag_end(x)	{ BIT(XPT_##x), #x }
+
 #define show_svc_xprt_flags(flags)					\
-	__print_flags(flags, "|",					\
-		{ BIT(XPT_BUSY),		"BUSY" },		\
-		{ BIT(XPT_CONN),		"CONN" },		\
-		{ BIT(XPT_CLOSE),		"CLOSE" },		\
-		{ BIT(XPT_DATA),		"DATA" },		\
-		{ BIT(XPT_TEMP),		"TEMP" },		\
-		{ BIT(XPT_DEAD),		"DEAD" },		\
-		{ BIT(XPT_CHNGBUF),		"CHNGBUF" },		\
-		{ BIT(XPT_DEFERRED),		"DEFERRED" },		\
-		{ BIT(XPT_OLD),			"OLD" },		\
-		{ BIT(XPT_LISTENER),		"LISTENER" },		\
-		{ BIT(XPT_CACHE_AUTH),		"CACHE_AUTH" },		\
-		{ BIT(XPT_LOCAL),		"LOCAL" },		\
-		{ BIT(XPT_KILL_TEMP),		"KILL_TEMP" },		\
-		{ BIT(XPT_CONG_CTRL),		"CONG_CTRL" },		\
-		{ BIT(XPT_HANDSHAKE),		"HANDSHAKE" },		\
-		{ BIT(XPT_TLS_SESSION),		"TLS_SESSION" },	\
-		{ BIT(XPT_PEER_AUTH),		"PEER_AUTH" })
+	__print_flags(flags, "|", SVC_XPRT_FLAG_LIST)
 
 TRACE_EVENT(svc_xprt_create_err,
 	TP_PROTO(
@@ -1994,25 +2014,25 @@ TRACE_EVENT(svc_xprt_create_err,
 TRACE_EVENT(svc_xprt_enqueue,
 	TP_PROTO(
 		const struct svc_xprt *xprt,
-		const struct svc_rqst *rqst
+		unsigned long flags
 	),
 
-	TP_ARGS(xprt, rqst),
+	TP_ARGS(xprt, flags),
 
 	TP_STRUCT__entry(
 		SVC_XPRT_ENDPOINT_FIELDS(xprt)
-
-		__field(int, pid)
 	),
 
 	TP_fast_assign(
-		SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt);
-
-		__entry->pid = rqst? rqst->rq_task->pid : 0;
+		__assign_sockaddr(server, &xprt->xpt_local,
+				  xprt->xpt_locallen);
+		__assign_sockaddr(client, &xprt->xpt_remote,
+				  xprt->xpt_remotelen);
+		__entry->flags = flags;
+		__entry->netns_ino = xprt->xpt_net->ns.inum;
 	),
 
-	TP_printk(SVC_XPRT_ENDPOINT_FORMAT " pid=%d",
-		SVC_XPRT_ENDPOINT_VARARGS, __entry->pid)
+	TP_printk(SVC_XPRT_ENDPOINT_FORMAT, SVC_XPRT_ENDPOINT_VARARGS)
 );
 
 TRACE_EVENT(svc_xprt_dequeue,
diff --git a/include/uapi/linux/dlm_plock.h b/include/uapi/linux/dlm_plock.h
index 63b6c1fd91690..eb66afcac40ed 100644
--- a/include/uapi/linux/dlm_plock.h
+++ b/include/uapi/linux/dlm_plock.h
@@ -22,6 +22,7 @@ enum {
 	DLM_PLOCK_OP_LOCK = 1,
 	DLM_PLOCK_OP_UNLOCK,
 	DLM_PLOCK_OP_GET,
+	DLM_PLOCK_OP_CANCEL,
 };
 
 #define DLM_PLOCK_FL_CLOSE 1
diff --git a/net/sunrpc/.kunitconfig b/net/sunrpc/.kunitconfig
index a55a00fa649ba..eb02b906c2959 100644
--- a/net/sunrpc/.kunitconfig
+++ b/net/sunrpc/.kunitconfig
@@ -23,7 +23,6 @@ CONFIG_NFS_FS=y
 CONFIG_SUNRPC=y
 CONFIG_SUNRPC_GSS=y
 CONFIG_RPCSEC_GSS_KRB5=y
-CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES=y
 CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1=y
 CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA=y
 CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2=y
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 4afc5fd71d44f..2d8b67dac7b5b 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -34,38 +34,6 @@ config RPCSEC_GSS_KRB5
 
 	  If unsure, say Y.
 
-config RPCSEC_GSS_KRB5_SIMPLIFIED
-	bool
-	depends on RPCSEC_GSS_KRB5
-
-config RPCSEC_GSS_KRB5_CRYPTOSYSTEM
-	bool
-	depends on RPCSEC_GSS_KRB5
-
-config RPCSEC_GSS_KRB5_ENCTYPES_DES
-	bool "Enable Kerberos enctypes based on DES (deprecated)"
-	depends on RPCSEC_GSS_KRB5
-	depends on CRYPTO_CBC && CRYPTO_CTS && CRYPTO_ECB
-	depends on CRYPTO_HMAC && CRYPTO_MD5 && CRYPTO_SHA1
-	depends on CRYPTO_DES
-	default n
-	select RPCSEC_GSS_KRB5_SIMPLIFIED
-	help
-	  Choose Y to enable the use of deprecated Kerberos 5
-	  encryption types that utilize Data Encryption Standard
-	  (DES) based ciphers. These include des-cbc-md5,
-	  des-cbc-crc, and des-cbc-md4, which were deprecated by
-	  RFC 6649, and des3-cbc-sha1, which was deprecated by RFC
-	  8429.
-
-	  These encryption types are known to be insecure, therefore
-	  the default setting of this option is N. Support for these
-	  encryption types is available only for compatibility with
-	  legacy NFS client and server implementations.
-
-	  Removal of support is planned for a subsequent kernel
-	  release.
-
 config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1
 	bool "Enable Kerberos enctypes based on AES and SHA-1"
 	depends on RPCSEC_GSS_KRB5
@@ -73,7 +41,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1
 	depends on CRYPTO_HMAC && CRYPTO_SHA1
 	depends on CRYPTO_AES
 	default y
-	select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
 	help
 	  Choose Y to enable the use of Kerberos 5 encryption types
 	  that utilize Advanced Encryption Standard (AES) ciphers and
@@ -86,7 +53,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA
 	depends on CRYPTO_CBC && CRYPTO_CTS && CRYPTO_CAMELLIA
 	depends on CRYPTO_CMAC
 	default n
-	select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
 	help
 	  Choose Y to enable the use of Kerberos 5 encryption types
 	  that utilize Camellia ciphers (RFC 3713) and CMAC digests
@@ -100,7 +66,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2
 	depends on CRYPTO_HMAC && CRYPTO_SHA256 && CRYPTO_SHA512
 	depends on CRYPTO_AES
 	default n
-	select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
 	help
 	  Choose Y to enable the use of Kerberos 5 encryption types
 	  that utilize Advanced Encryption Standard (AES) ciphers and
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 012ae17206894..ad1736d93b763 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -12,6 +12,6 @@ auth_rpcgss-y := auth_gss.o gss_generic_token.o \
 obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
 
 rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
-	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
+	gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
 
 obj-$(CONFIG_RPCSEC_GSS_KRB5_KUNIT_TEST) += gss_krb5_test.o
diff --git a/net/sunrpc/auth_gss/gss_krb5_internal.h b/net/sunrpc/auth_gss/gss_krb5_internal.h
index b673e2626acb2..3afd4065bf3d0 100644
--- a/net/sunrpc/auth_gss/gss_krb5_internal.h
+++ b/net/sunrpc/auth_gss/gss_krb5_internal.h
@@ -33,7 +33,6 @@ struct gss_krb5_enctype {
 	const u32		Ke_length;	/* encryption subkey length, in octets */
 	const u32		Ki_length;	/* integrity subkey length, in octets */
 
-	int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask);
 	int (*derive_key)(const struct gss_krb5_enctype *gk5e,
 			  const struct xdr_netobj *in,
 			  struct xdr_netobj *out,
@@ -85,24 +84,15 @@ struct krb5_ctx {
  * GSS Kerberos 5 mechanism Per-Message calls.
  */
 
-u32 gss_krb5_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
-			struct xdr_netobj *token);
 u32 gss_krb5_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
 			struct xdr_netobj *token);
 
-u32 gss_krb5_verify_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
-			   struct xdr_netobj *read_token);
 u32 gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
 			   struct xdr_netobj *read_token);
 
-u32 gss_krb5_wrap_v1(struct krb5_ctx *kctx, int offset,
-		     struct xdr_buf *buf, struct page **pages);
 u32 gss_krb5_wrap_v2(struct krb5_ctx *kctx, int offset,
 		     struct xdr_buf *buf, struct page **pages);
 
-u32 gss_krb5_unwrap_v1(struct krb5_ctx *kctx, int offset, int len,
-		       struct xdr_buf *buf, unsigned int *slack,
-		       unsigned int *align);
 u32 gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len,
 		       struct xdr_buf *buf, unsigned int *slack,
 		       unsigned int *align);
@@ -113,12 +103,6 @@ u32 gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len,
 
 /* Key Derivation Functions */
 
-int krb5_derive_key_v1(const struct gss_krb5_enctype *gk5e,
-		       const struct xdr_netobj *inkey,
-		       struct xdr_netobj *outkey,
-		       const struct xdr_netobj *label,
-		       gfp_t gfp_mask);
-
 int krb5_derive_key_v2(const struct gss_krb5_enctype *gk5e,
 		       const struct xdr_netobj *inkey,
 		       struct xdr_netobj *outkey,
@@ -169,13 +153,6 @@ static inline int krb5_derive_key(struct krb5_ctx *kctx,
 	return gk5e->derive_key(gk5e, inkey, outkey, &label, gfp_mask);
 }
 
-s32 krb5_make_seq_num(struct krb5_ctx *kctx, struct crypto_sync_skcipher *key,
-		      int direction, u32 seqnum, unsigned char *cksum,
-		      unsigned char *buf);
-
-s32 krb5_get_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
-		     unsigned char *buf, int *direction, u32 *seqnum);
-
 void krb5_make_confounder(u8 *p, int conflen);
 
 u32 make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 5347fe1cc93f8..06d8ee0db000f 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -222,90 +222,6 @@ static int krb5_DK(const struct gss_krb5_enctype *gk5e,
 	return ret;
 }
 
-#define smask(step) ((1<<step)-1)
-#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step)))
-#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1)
-
-static void mit_des_fixup_key_parity(u8 key[8])
-{
-	int i;
-	for (i = 0; i < 8; i++) {
-		key[i] &= 0xfe;
-		key[i] |= 1^parity_char(key[i]);
-	}
-}
-
-static int krb5_random_to_key_v1(const struct gss_krb5_enctype *gk5e,
-				 struct xdr_netobj *randombits,
-				 struct xdr_netobj *key)
-{
-	int i, ret = -EINVAL;
-
-	if (key->len != 24) {
-		dprintk("%s: key->len is %d\n", __func__, key->len);
-		goto err_out;
-	}
-	if (randombits->len != 21) {
-		dprintk("%s: randombits->len is %d\n",
-			__func__, randombits->len);
-		goto err_out;
-	}
-
-	/* take the seven bytes, move them around into the top 7 bits of the
-	   8 key bytes, then compute the parity bits.  Do this three times. */
-
-	for (i = 0; i < 3; i++) {
-		memcpy(key->data + i*8, randombits->data + i*7, 7);
-		key->data[i*8+7] = (((key->data[i*8]&1)<<1) |
-				    ((key->data[i*8+1]&1)<<2) |
-				    ((key->data[i*8+2]&1)<<3) |
-				    ((key->data[i*8+3]&1)<<4) |
-				    ((key->data[i*8+4]&1)<<5) |
-				    ((key->data[i*8+5]&1)<<6) |
-				    ((key->data[i*8+6]&1)<<7));
-
-		mit_des_fixup_key_parity(key->data + i*8);
-	}
-	ret = 0;
-err_out:
-	return ret;
-}
-
-/**
- * krb5_derive_key_v1 - Derive a subkey for an RFC 3961 enctype
- * @gk5e: Kerberos 5 enctype profile
- * @inkey: base protocol key
- * @outkey: OUT: derived key
- * @label: subkey usage label
- * @gfp_mask: memory allocation control flags
- *
- * Caller sets @outkey->len to the desired length of the derived key.
- *
- * On success, returns 0 and fills in @outkey. A negative errno value
- * is returned on failure.
- */
-int krb5_derive_key_v1(const struct gss_krb5_enctype *gk5e,
-		       const struct xdr_netobj *inkey,
-		       struct xdr_netobj *outkey,
-		       const struct xdr_netobj *label,
-		       gfp_t gfp_mask)
-{
-	struct xdr_netobj inblock;
-	int ret;
-
-	inblock.len = gk5e->keybytes;
-	inblock.data = kmalloc(inblock.len, gfp_mask);
-	if (!inblock.data)
-		return -ENOMEM;
-
-	ret = krb5_DK(gk5e, inkey, inblock.data, label, gfp_mask);
-	if (!ret)
-		ret = krb5_random_to_key_v1(gk5e, &inblock, outkey);
-
-	kfree_sensitive(inblock.data);
-	return ret;
-}
-
 /*
  * This is the identity function, with some sanity checking.
  */
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 20e21d08badb3..e31cfdf7eadcb 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -30,61 +30,7 @@
 
 static struct gss_api_mech gss_kerberos_mech;
 
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-static int gss_krb5_import_ctx_des(struct krb5_ctx *ctx, gfp_t gfp_mask);
-static int gss_krb5_import_ctx_v1(struct krb5_ctx *ctx, gfp_t gfp_mask);
-#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_CRYPTOSYSTEM)
-static int gss_krb5_import_ctx_v2(struct krb5_ctx *ctx, gfp_t gfp_mask);
-#endif
-
 static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
-#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES)
-	/*
-	 * DES (All DES enctypes are mapped to the same gss functionality)
-	 */
-	{
-	  .etype = ENCTYPE_DES_CBC_RAW,
-	  .ctype = CKSUMTYPE_RSA_MD5,
-	  .name = "des-cbc-crc",
-	  .encrypt_name = "cbc(des)",
-	  .cksum_name = "md5",
-	  .import_ctx = gss_krb5_import_ctx_des,
-	  .get_mic = gss_krb5_get_mic_v1,
-	  .verify_mic = gss_krb5_verify_mic_v1,
-	  .wrap = gss_krb5_wrap_v1,
-	  .unwrap = gss_krb5_unwrap_v1,
-	  .signalg = SGN_ALG_DES_MAC_MD5,
-	  .sealalg = SEAL_ALG_DES,
-	  .keybytes = 7,
-	  .keylength = 8,
-	  .cksumlength = 8,
-	  .keyed_cksum = 0,
-	},
-	/*
-	 * 3DES
-	 */
-	{
-	  .etype = ENCTYPE_DES3_CBC_RAW,
-	  .ctype = CKSUMTYPE_HMAC_SHA1_DES3,
-	  .name = "des3-hmac-sha1",
-	  .encrypt_name = "cbc(des3_ede)",
-	  .cksum_name = "hmac(sha1)",
-	  .import_ctx = gss_krb5_import_ctx_v1,
-	  .derive_key = krb5_derive_key_v1,
-	  .get_mic = gss_krb5_get_mic_v1,
-	  .verify_mic = gss_krb5_verify_mic_v1,
-	  .wrap = gss_krb5_wrap_v1,
-	  .unwrap = gss_krb5_unwrap_v1,
-	  .signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
-	  .sealalg = SEAL_ALG_DES3KD,
-	  .keybytes = 21,
-	  .keylength = 24,
-	  .cksumlength = 20,
-	  .keyed_cksum = 1,
-	},
-#endif
-
 #if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1)
 	/*
 	 * AES-128 with SHA-1 (RFC 3962)
@@ -96,7 +42,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .encrypt_name = "cts(cbc(aes))",
 	  .aux_cipher = "cbc(aes)",
 	  .cksum_name = "hmac(sha1)",
-	  .import_ctx = gss_krb5_import_ctx_v2,
 	  .derive_key = krb5_derive_key_v2,
 	  .encrypt = gss_krb5_aes_encrypt,
 	  .decrypt = gss_krb5_aes_decrypt,
@@ -126,7 +71,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .encrypt_name = "cts(cbc(aes))",
 	  .aux_cipher = "cbc(aes)",
 	  .cksum_name = "hmac(sha1)",
-	  .import_ctx = gss_krb5_import_ctx_v2,
 	  .derive_key = krb5_derive_key_v2,
 	  .encrypt = gss_krb5_aes_encrypt,
 	  .decrypt = gss_krb5_aes_decrypt,
@@ -166,7 +110,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 		.Ke_length	= BITS2OCTETS(128),
 		.Ki_length	= BITS2OCTETS(128),
 
-		.import_ctx	= gss_krb5_import_ctx_v2,
 		.derive_key	= krb5_kdf_feedback_cmac,
 		.encrypt	= gss_krb5_aes_encrypt,
 		.decrypt	= gss_krb5_aes_decrypt,
@@ -193,7 +136,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 		.Ke_length	= BITS2OCTETS(256),
 		.Ki_length	= BITS2OCTETS(256),
 
-		.import_ctx	= gss_krb5_import_ctx_v2,
 		.derive_key	= krb5_kdf_feedback_cmac,
 		.encrypt	= gss_krb5_aes_encrypt,
 		.decrypt	= gss_krb5_aes_decrypt,
@@ -223,7 +165,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 		.Ke_length	= BITS2OCTETS(128),
 		.Ki_length	= BITS2OCTETS(128),
 
-		.import_ctx	= gss_krb5_import_ctx_v2,
 		.derive_key	= krb5_kdf_hmac_sha2,
 		.encrypt	= krb5_etm_encrypt,
 		.decrypt	= krb5_etm_decrypt,
@@ -250,7 +191,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 		.Ke_length	= BITS2OCTETS(256),
 		.Ki_length	= BITS2OCTETS(192),
 
-		.import_ctx	= gss_krb5_import_ctx_v2,
 		.derive_key	= krb5_kdf_hmac_sha2,
 		.encrypt	= krb5_etm_encrypt,
 		.decrypt	= krb5_etm_decrypt,
@@ -283,12 +223,6 @@ static void gss_krb5_prepare_enctype_priority_list(void)
 #if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1)
 		ENCTYPE_AES256_CTS_HMAC_SHA1_96,
 		ENCTYPE_AES128_CTS_HMAC_SHA1_96,
-#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES)
-		ENCTYPE_DES3_CBC_SHA1,
-		ENCTYPE_DES_CBC_MD5,
-		ENCTYPE_DES_CBC_CRC,
-		ENCTYPE_DES_CBC_MD4,
 #endif
 	};
 	size_t total, i;
@@ -329,185 +263,6 @@ const struct gss_krb5_enctype *gss_krb5_lookup_enctype(u32 etype)
 }
 EXPORT_SYMBOL_IF_KUNIT(gss_krb5_lookup_enctype);
 
-static struct crypto_sync_skcipher *
-gss_krb5_alloc_cipher_v1(struct krb5_ctx *ctx, struct xdr_netobj *key)
-{
-	struct crypto_sync_skcipher *tfm;
-
-	tfm = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
-	if (IS_ERR(tfm))
-		return NULL;
-	if (crypto_sync_skcipher_setkey(tfm, key->data, key->len)) {
-		crypto_free_sync_skcipher(tfm);
-		return NULL;
-	}
-	return tfm;
-}
-
-static inline const void *
-get_key(const void *p, const void *end,
-	struct krb5_ctx *ctx, struct crypto_sync_skcipher **res)
-{
-	struct crypto_sync_skcipher *tfm;
-	struct xdr_netobj	key;
-	int			alg;
-
-	p = simple_get_bytes(p, end, &alg, sizeof(alg));
-	if (IS_ERR(p))
-		goto out_err;
-	switch (alg) {
-	case ENCTYPE_DES_CBC_CRC:
-	case ENCTYPE_DES_CBC_MD4:
-	case ENCTYPE_DES_CBC_MD5:
-		/* Map all these key types to ENCTYPE_DES_CBC_RAW */
-		alg = ENCTYPE_DES_CBC_RAW;
-		break;
-	}
-	if (!gss_krb5_lookup_enctype(alg)) {
-		pr_warn("gss_krb5: unsupported enctype: %d\n", alg);
-		goto out_err_inval;
-	}
-
-	p = simple_get_netobj(p, end, &key);
-	if (IS_ERR(p))
-		goto out_err;
-	tfm = gss_krb5_alloc_cipher_v1(ctx, &key);
-	kfree(key.data);
-	if (!tfm) {
-		pr_warn("gss_krb5: failed to initialize cipher '%s'\n",
-			ctx->gk5e->encrypt_name);
-		goto out_err_inval;
-	}
-	*res = tfm;
-
-	return p;
-
-out_err_inval:
-	p = ERR_PTR(-EINVAL);
-out_err:
-	return p;
-}
-
-static int
-gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
-{
-	u32 seq_send;
-	int tmp;
-	u32 time32;
-
-	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
-	if (IS_ERR(p))
-		goto out_err;
-
-	/* Old format supports only DES!  Any other enctype uses new format */
-	ctx->enctype = ENCTYPE_DES_CBC_RAW;
-
-	ctx->gk5e = gss_krb5_lookup_enctype(ctx->enctype);
-	if (ctx->gk5e == NULL) {
-		p = ERR_PTR(-EINVAL);
-		goto out_err;
-	}
-
-	/* The downcall format was designed before we completely understood
-	 * the uses of the context fields; so it includes some stuff we
-	 * just give some minimal sanity-checking, and some we ignore
-	 * completely (like the next twenty bytes): */
-	if (unlikely(p + 20 > end || p + 20 < p)) {
-		p = ERR_PTR(-EFAULT);
-		goto out_err;
-	}
-	p += 20;
-	p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
-	if (IS_ERR(p))
-		goto out_err;
-	if (tmp != SGN_ALG_DES_MAC_MD5) {
-		p = ERR_PTR(-ENOSYS);
-		goto out_err;
-	}
-	p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
-	if (IS_ERR(p))
-		goto out_err;
-	if (tmp != SEAL_ALG_DES) {
-		p = ERR_PTR(-ENOSYS);
-		goto out_err;
-	}
-	p = simple_get_bytes(p, end, &time32, sizeof(time32));
-	if (IS_ERR(p))
-		goto out_err;
-	/* unsigned 32-bit time overflows in year 2106 */
-	ctx->endtime = (time64_t)time32;
-	p = simple_get_bytes(p, end, &seq_send, sizeof(seq_send));
-	if (IS_ERR(p))
-		goto out_err;
-	atomic_set(&ctx->seq_send, seq_send);
-	p = simple_get_netobj(p, end, &ctx->mech_used);
-	if (IS_ERR(p))
-		goto out_err;
-	p = get_key(p, end, ctx, &ctx->enc);
-	if (IS_ERR(p))
-		goto out_err_free_mech;
-	p = get_key(p, end, ctx, &ctx->seq);
-	if (IS_ERR(p))
-		goto out_err_free_key1;
-	if (p != end) {
-		p = ERR_PTR(-EFAULT);
-		goto out_err_free_key2;
-	}
-
-	return 0;
-
-out_err_free_key2:
-	crypto_free_sync_skcipher(ctx->seq);
-out_err_free_key1:
-	crypto_free_sync_skcipher(ctx->enc);
-out_err_free_mech:
-	kfree(ctx->mech_used.data);
-out_err:
-	return PTR_ERR(p);
-}
-
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-static int
-gss_krb5_import_ctx_des(struct krb5_ctx *ctx, gfp_t gfp_mask)
-{
-	return -EINVAL;
-}
-
-static int
-gss_krb5_import_ctx_v1(struct krb5_ctx *ctx, gfp_t gfp_mask)
-{
-	struct xdr_netobj keyin, keyout;
-
-	keyin.data = ctx->Ksess;
-	keyin.len = ctx->gk5e->keylength;
-
-	ctx->seq = gss_krb5_alloc_cipher_v1(ctx, &keyin);
-	if (ctx->seq == NULL)
-		goto out_err;
-	ctx->enc = gss_krb5_alloc_cipher_v1(ctx, &keyin);
-	if (ctx->enc == NULL)
-		goto out_free_seq;
-
-	/* derive cksum */
-	keyout.data = ctx->cksum;
-	keyout.len = ctx->gk5e->keylength;
-	if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_SIGN,
-			    KEY_USAGE_SEED_CHECKSUM, gfp_mask))
-		goto out_free_enc;
-
-	return 0;
-
-out_free_enc:
-	crypto_free_sync_skcipher(ctx->enc);
-out_free_seq:
-	crypto_free_sync_skcipher(ctx->seq);
-out_err:
-	return -EINVAL;
-}
-#endif
-
-#if defined(CONFIG_RPCSEC_GSS_KRB5_CRYPTOSYSTEM)
-
 static struct crypto_sync_skcipher *
 gss_krb5_alloc_cipher_v2(const char *cname, const struct xdr_netobj *key)
 {
@@ -636,8 +391,6 @@ gss_krb5_import_ctx_v2(struct krb5_ctx *ctx, gfp_t gfp_mask)
 	goto out;
 }
 
-#endif
-
 static int
 gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
 		gfp_t gfp_mask)
@@ -671,9 +424,6 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
 	p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
 	if (IS_ERR(p))
 		goto out_err;
-	/* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
-	if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
-		ctx->enctype = ENCTYPE_DES3_CBC_RAW;
 	ctx->gk5e = gss_krb5_lookup_enctype(ctx->enctype);
 	if (ctx->gk5e == NULL) {
 		dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
@@ -700,7 +450,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
 	}
 	ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
 
-	return ctx->gk5e->import_ctx(ctx, gfp_mask);
+	return gss_krb5_import_ctx_v2(ctx, gfp_mask);
 
 out_err:
 	return PTR_ERR(p);
@@ -718,10 +468,7 @@ gss_krb5_import_sec_context(const void *p, size_t len, struct gss_ctx *ctx_id,
 	if (ctx == NULL)
 		return -ENOMEM;
 
-	if (len == 85)
-		ret = gss_import_v1_context(p, end, ctx);
-	else
-		ret = gss_import_v2_context(p, end, ctx, gfp_mask);
+	ret = gss_import_v2_context(p, end, ctx, gfp_mask);
 	memzero_explicit(&ctx->Ksess, sizeof(ctx->Ksess));
 	if (ret) {
 		kfree(ctx);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 146aa755f07df..ce540df9bce46 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -71,75 +71,6 @@
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-
-static void *
-setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
-{
-	u16 *ptr;
-	void *krb5_hdr;
-	int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
-
-	token->len = g_token_size(&ctx->mech_used, body_size);
-
-	ptr = (u16 *)token->data;
-	g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
-
-	/* ptr now at start of header described in rfc 1964, section 1.2.1: */
-	krb5_hdr = ptr;
-	*ptr++ = KG_TOK_MIC_MSG;
-	/*
-	 * signalg is stored as if it were converted from LE to host endian, even
-	 * though it's an opaque pair of bytes according to the RFC.
-	 */
-	*ptr++ = (__force u16)cpu_to_le16(ctx->gk5e->signalg);
-	*ptr++ = SEAL_ALG_NONE;
-	*ptr = 0xffff;
-
-	return krb5_hdr;
-}
-
-u32
-gss_krb5_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
-		    struct xdr_netobj *token)
-{
-	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
-	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
-					    .data = cksumdata};
-	void			*ptr;
-	time64_t		now;
-	u32			seq_send;
-	u8			*cksumkey;
-
-	dprintk("RPC:       %s\n", __func__);
-	BUG_ON(ctx == NULL);
-
-	now = ktime_get_real_seconds();
-
-	ptr = setup_token(ctx, token);
-
-	if (ctx->gk5e->keyed_cksum)
-		cksumkey = ctx->cksum;
-	else
-		cksumkey = NULL;
-
-	if (make_checksum(ctx, ptr, 8, text, 0, cksumkey,
-			  KG_USAGE_SIGN, &md5cksum))
-		return GSS_S_FAILURE;
-
-	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
-
-	seq_send = atomic_fetch_inc(&ctx->seq_send);
-
-	if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
-			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
-		return GSS_S_FAILURE;
-
-	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
-}
-
-#endif
-
 static void *
 setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
 {
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
deleted file mode 100644
index 1babc3474e102..0000000000000
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- *  linux/net/sunrpc/gss_krb5_seqnum.c
- *
- *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/util_seqnum.c
- *
- *  Copyright (c) 2000 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Andy Adamson   <andros@umich.edu>
- */
-
-/*
- * Copyright 1993 by OpenVision Technologies, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software
- * and its documentation for any purpose is hereby granted without fee,
- * provided that the above copyright notice appears in all copies and
- * that both that copyright notice and this permission notice appear in
- * supporting documentation, and that the name of OpenVision not be used
- * in advertising or publicity pertaining to distribution of the software
- * without specific, written prior permission. OpenVision makes no
- * representations about the suitability of this software for any
- * purpose.  It is provided "as is" without express or implied warranty.
- *
- * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
- * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
- * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <crypto/skcipher.h>
-#include <linux/types.h>
-#include <linux/sunrpc/gss_krb5.h>
-
-#include "gss_krb5_internal.h"
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY        RPCDBG_AUTH
-#endif
-
-s32
-krb5_make_seq_num(struct krb5_ctx *kctx,
-		struct crypto_sync_skcipher *key,
-		int direction,
-		u32 seqnum,
-		unsigned char *cksum, unsigned char *buf)
-{
-	unsigned char *plain;
-	s32 code;
-
-	plain = kmalloc(8, GFP_KERNEL);
-	if (!plain)
-		return -ENOMEM;
-
-	plain[0] = (unsigned char) (seqnum & 0xff);
-	plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
-	plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
-	plain[3] = (unsigned char) ((seqnum >> 24) & 0xff);
-
-	plain[4] = direction;
-	plain[5] = direction;
-	plain[6] = direction;
-	plain[7] = direction;
-
-	code = krb5_encrypt(key, cksum, plain, buf, 8);
-	kfree(plain);
-	return code;
-}
-
-s32
-krb5_get_seq_num(struct krb5_ctx *kctx,
-	       unsigned char *cksum,
-	       unsigned char *buf,
-	       int *direction, u32 *seqnum)
-{
-	s32 code;
-	unsigned char *plain;
-	struct crypto_sync_skcipher *key = kctx->seq;
-
-	dprintk("RPC:       krb5_get_seq_num:\n");
-
-	plain = kmalloc(8, GFP_KERNEL);
-	if (!plain)
-		return -ENOMEM;
-
-	if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
-		goto out;
-
-	if ((plain[4] != plain[5]) || (plain[4] != plain[6]) ||
-	    (plain[4] != plain[7])) {
-		code = (s32)KG_BAD_SEQ;
-		goto out;
-	}
-
-	*direction = plain[4];
-
-	*seqnum = ((plain[0]) |
-		   (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
-
-out:
-	kfree(plain);
-	return code;
-}
diff --git a/net/sunrpc/auth_gss/gss_krb5_test.c b/net/sunrpc/auth_gss/gss_krb5_test.c
index 95ca783795c5e..85625e3f3814e 100644
--- a/net/sunrpc/auth_gss/gss_krb5_test.c
+++ b/net/sunrpc/auth_gss/gss_krb5_test.c
@@ -320,208 +320,12 @@ static void rfc3961_nfold_case(struct kunit *test)
 			    "result mismatch");
 }
 
-/*
- * RFC 3961 Appendix A.3.  DES3 DR and DK
- *
- * These tests show the derived-random and derived-key values for the
- * des3-hmac-sha1-kd encryption scheme, using the DR and DK functions
- * defined in section 6.3.1.  The input keys were randomly generated;
- * the usage values are from this specification.
- *
- * This test material is copyright (C) The Internet Society (2005).
- */
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_155,
-		      0x00, 0x00, 0x00, 0x01, 0x55
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_1aa,
-		      0x00, 0x00, 0x00, 0x01, 0xaa
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_kerberos,
-		      0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test1_base_key,
-		      0xdc, 0xe0, 0x6b, 0x1f, 0x64, 0xc8, 0x57, 0xa1,
-		      0x1c, 0x3d, 0xb5, 0x7c, 0x51, 0x89, 0x9b, 0x2c,
-		      0xc1, 0x79, 0x10, 0x08, 0xce, 0x97, 0x3b, 0x92
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test1_derived_key,
-		      0x92, 0x51, 0x79, 0xd0, 0x45, 0x91, 0xa7, 0x9b,
-		      0x5d, 0x31, 0x92, 0xc4, 0xa7, 0xe9, 0xc2, 0x89,
-		      0xb0, 0x49, 0xc7, 0x1f, 0x6e, 0xe6, 0x04, 0xcd
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test2_base_key,
-		      0x5e, 0x13, 0xd3, 0x1c, 0x70, 0xef, 0x76, 0x57,
-		      0x46, 0x57, 0x85, 0x31, 0xcb, 0x51, 0xc1, 0x5b,
-		      0xf1, 0x1c, 0xa8, 0x2c, 0x97, 0xce, 0xe9, 0xf2
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test2_derived_key,
-		      0x9e, 0x58, 0xe5, 0xa1, 0x46, 0xd9, 0x94, 0x2a,
-		      0x10, 0x1c, 0x46, 0x98, 0x45, 0xd6, 0x7a, 0x20,
-		      0xe3, 0xc4, 0x25, 0x9e, 0xd9, 0x13, 0xf2, 0x07
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test3_base_key,
-		      0x98, 0xe6, 0xfd, 0x8a, 0x04, 0xa4, 0xb6, 0x85,
-		      0x9b, 0x75, 0xa1, 0x76, 0x54, 0x0b, 0x97, 0x52,
-		      0xba, 0xd3, 0xec, 0xd6, 0x10, 0xa2, 0x52, 0xbc
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test3_derived_key,
-		      0x13, 0xfe, 0xf8, 0x0d, 0x76, 0x3e, 0x94, 0xec,
-		      0x6d, 0x13, 0xfd, 0x2c, 0xa1, 0xd0, 0x85, 0x07,
-		      0x02, 0x49, 0xda, 0xd3, 0x98, 0x08, 0xea, 0xbf
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test4_base_key,
-		      0x62, 0x2a, 0xec, 0x25, 0xa2, 0xfe, 0x2c, 0xad,
-		      0x70, 0x94, 0x68, 0x0b, 0x7c, 0x64, 0x94, 0x02,
-		      0x80, 0x08, 0x4c, 0x1a, 0x7c, 0xec, 0x92, 0xb5
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test4_derived_key,
-		      0xf8, 0xdf, 0xbf, 0x04, 0xb0, 0x97, 0xe6, 0xd9,
-		      0xdc, 0x07, 0x02, 0x68, 0x6b, 0xcb, 0x34, 0x89,
-		      0xd9, 0x1f, 0xd9, 0xa4, 0x51, 0x6b, 0x70, 0x3e
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test5_base_key,
-		      0xd3, 0xf8, 0x29, 0x8c, 0xcb, 0x16, 0x64, 0x38,
-		      0xdc, 0xb9, 0xb9, 0x3e, 0xe5, 0xa7, 0x62, 0x92,
-		      0x86, 0xa4, 0x91, 0xf8, 0x38, 0xf8, 0x02, 0xfb
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test5_derived_key,
-		      0x23, 0x70, 0xda, 0x57, 0x5d, 0x2a, 0x3d, 0xa8,
-		      0x64, 0xce, 0xbf, 0xdc, 0x52, 0x04, 0xd5, 0x6d,
-		      0xf7, 0x79, 0xa7, 0xdf, 0x43, 0xd9, 0xda, 0x43
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test6_base_key,
-		      0xc1, 0x08, 0x16, 0x49, 0xad, 0xa7, 0x43, 0x62,
-		      0xe6, 0xa1, 0x45, 0x9d, 0x01, 0xdf, 0xd3, 0x0d,
-		      0x67, 0xc2, 0x23, 0x4c, 0x94, 0x07, 0x04, 0xda
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test6_derived_key,
-		      0x34, 0x80, 0x57, 0xec, 0x98, 0xfd, 0xc4, 0x80,
-		      0x16, 0x16, 0x1c, 0x2a, 0x4c, 0x7a, 0x94, 0x3e,
-		      0x92, 0xae, 0x49, 0x2c, 0x98, 0x91, 0x75, 0xf7
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test7_base_key,
-		      0x5d, 0x15, 0x4a, 0xf2, 0x38, 0xf4, 0x67, 0x13,
-		      0x15, 0x57, 0x19, 0xd5, 0x5e, 0x2f, 0x1f, 0x79,
-		      0x0d, 0xd6, 0x61, 0xf2, 0x79, 0xa7, 0x91, 0x7c
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test7_derived_key,
-		      0xa8, 0x80, 0x8a, 0xc2, 0x67, 0xda, 0xda, 0x3d,
-		      0xcb, 0xe9, 0xa7, 0xc8, 0x46, 0x26, 0xfb, 0xc7,
-		      0x61, 0xc2, 0x94, 0xb0, 0x13, 0x15, 0xe5, 0xc1
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test8_base_key,
-		      0x79, 0x85, 0x62, 0xe0, 0x49, 0x85, 0x2f, 0x57,
-		      0xdc, 0x8c, 0x34, 0x3b, 0xa1, 0x7f, 0x2c, 0xa1,
-		      0xd9, 0x73, 0x94, 0xef, 0xc8, 0xad, 0xc4, 0x43
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test8_derived_key,
-		      0xc8, 0x13, 0xf8, 0x8a, 0x3b, 0xe3, 0xb3, 0x34,
-		      0xf7, 0x54, 0x25, 0xce, 0x91, 0x75, 0xfb, 0xe3,
-		      0xc8, 0x49, 0x3b, 0x89, 0xc8, 0x70, 0x3b, 0x49
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test9_base_key,
-		      0x26, 0xdc, 0xe3, 0x34, 0xb5, 0x45, 0x29, 0x2f,
-		      0x2f, 0xea, 0xb9, 0xa8, 0x70, 0x1a, 0x89, 0xa4,
-		      0xb9, 0x9e, 0xb9, 0x94, 0x2c, 0xec, 0xd0, 0x16
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test9_derived_key,
-		      0xf4, 0x8f, 0xfd, 0x6e, 0x83, 0xf8, 0x3e, 0x73,
-		      0x54, 0xe6, 0x94, 0xfd, 0x25, 0x2c, 0xf8, 0x3b,
-		      0xfe, 0x58, 0xf7, 0xd5, 0xba, 0x37, 0xec, 0x5d
-);
-
-static const struct gss_krb5_test_param rfc3961_kdf_test_params[] = {
-	{
-		.desc			= "des3-hmac-sha1 key derivation case 1",
-		.enctype		= ENCTYPE_DES3_CBC_RAW,
-		.base_key		= &des3_dk_test1_base_key,
-		.usage			= &des3_dk_usage_155,
-		.expected_result	= &des3_dk_test1_derived_key,
-	},
-	{
-		.desc			= "des3-hmac-sha1 key derivation case 2",
-		.enctype		= ENCTYPE_DES3_CBC_RAW,
-		.base_key		= &des3_dk_test2_base_key,
-		.usage			= &des3_dk_usage_1aa,
-		.expected_result	= &des3_dk_test2_derived_key,
-	},
-	{
-		.desc			= "des3-hmac-sha1 key derivation case 3",
-		.enctype		= ENCTYPE_DES3_CBC_RAW,
-		.base_key		= &des3_dk_test3_base_key,
-		.usage			= &des3_dk_usage_155,
-		.expected_result	= &des3_dk_test3_derived_key,
-	},
-	{
-		.desc			= "des3-hmac-sha1 key derivation case 4",
-		.enctype		= ENCTYPE_DES3_CBC_RAW,
-		.base_key		= &des3_dk_test4_base_key,
-		.usage			= &des3_dk_usage_1aa,
-		.expected_result	= &des3_dk_test4_derived_key,
-	},
-	{
-		.desc			= "des3-hmac-sha1 key derivation case 5",
-		.enctype		= ENCTYPE_DES3_CBC_RAW,
-		.base_key		= &des3_dk_test5_base_key,
-		.usage			= &des3_dk_usage_kerberos,
-		.expected_result	= &des3_dk_test5_derived_key,
-	},
-	{
-		.desc			= "des3-hmac-sha1 key derivation case 6",
-		.enctype		= ENCTYPE_DES3_CBC_RAW,
-		.base_key		= &des3_dk_test6_base_key,
-		.usage			= &des3_dk_usage_155,
-		.expected_result	= &des3_dk_test6_derived_key,
-	},
-	{
-		.desc			= "des3-hmac-sha1 key derivation case 7",
-		.enctype		= ENCTYPE_DES3_CBC_RAW,
-		.base_key		= &des3_dk_test7_base_key,
-		.usage			= &des3_dk_usage_1aa,
-		.expected_result	= &des3_dk_test7_derived_key,
-	},
-	{
-		.desc			= "des3-hmac-sha1 key derivation case 8",
-		.enctype		= ENCTYPE_DES3_CBC_RAW,
-		.base_key		= &des3_dk_test8_base_key,
-		.usage			= &des3_dk_usage_155,
-		.expected_result	= &des3_dk_test8_derived_key,
-	},
-	{
-		.desc			= "des3-hmac-sha1 key derivation case 9",
-		.enctype		= ENCTYPE_DES3_CBC_RAW,
-		.base_key		= &des3_dk_test9_base_key,
-		.usage			= &des3_dk_usage_1aa,
-		.expected_result	= &des3_dk_test9_derived_key,
-	},
-};
-
-/* Creates the function rfc3961_kdf_gen_params */
-KUNIT_ARRAY_PARAM(rfc3961_kdf, rfc3961_kdf_test_params, gss_krb5_get_desc);
-
 static struct kunit_case rfc3961_test_cases[] = {
 	{
 		.name			= "RFC 3961 n-fold",
 		.run_case		= rfc3961_nfold_case,
 		.generate_params	= rfc3961_nfold_gen_params,
 	},
-	{
-		.name			= "RFC 3961 key derivation",
-		.run_case		= kdf_case,
-		.generate_params	= rfc3961_kdf_gen_params,
-	},
 	{}
 };
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 7d6d4ae4a3c96..4fbc50a0a2c4b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -69,83 +69,6 @@
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
-
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-/* read_token is a mic token, and message_buffer is the data that the mic was
- * supposedly taken over. */
-u32
-gss_krb5_verify_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
-		       struct xdr_netobj *read_token)
-{
-	int			signalg;
-	int			sealalg;
-	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
-	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
-					    .data = cksumdata};
-	s32			now;
-	int			direction;
-	u32			seqnum;
-	unsigned char		*ptr = (unsigned char *)read_token->data;
-	int			bodysize;
-	u8			*cksumkey;
-
-	dprintk("RPC:       krb5_read_token\n");
-
-	if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr,
-					read_token->len))
-		return GSS_S_DEFECTIVE_TOKEN;
-
-	if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) ||
-	    (ptr[1] !=  (KG_TOK_MIC_MSG & 0xff)))
-		return GSS_S_DEFECTIVE_TOKEN;
-
-	/* XXX sanity-check bodysize?? */
-
-	signalg = ptr[2] + (ptr[3] << 8);
-	if (signalg != ctx->gk5e->signalg)
-		return GSS_S_DEFECTIVE_TOKEN;
-
-	sealalg = ptr[4] + (ptr[5] << 8);
-	if (sealalg != SEAL_ALG_NONE)
-		return GSS_S_DEFECTIVE_TOKEN;
-
-	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
-		return GSS_S_DEFECTIVE_TOKEN;
-
-	if (ctx->gk5e->keyed_cksum)
-		cksumkey = ctx->cksum;
-	else
-		cksumkey = NULL;
-
-	if (make_checksum(ctx, ptr, 8, message_buffer, 0,
-			  cksumkey, KG_USAGE_SIGN, &md5cksum))
-		return GSS_S_FAILURE;
-
-	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
-					ctx->gk5e->cksumlength))
-		return GSS_S_BAD_SIG;
-
-	/* it got through unscathed.  Make sure the context is unexpired */
-
-	now = ktime_get_real_seconds();
-
-	if (now > ctx->endtime)
-		return GSS_S_CONTEXT_EXPIRED;
-
-	/* do sequencing checks */
-
-	if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
-			     &direction, &seqnum))
-		return GSS_S_FAILURE;
-
-	if ((ctx->initiate && direction != 0xff) ||
-	    (!ctx->initiate && direction != 0))
-		return GSS_S_BAD_SIG;
-
-	return GSS_S_COMPLETE;
-}
-#endif
-
 u32
 gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
 		       struct xdr_netobj *read_token)
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 6d6b082380b23..b3e1738ff6bfa 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -40,293 +40,6 @@
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-
-static inline int
-gss_krb5_padding(int blocksize, int length)
-{
-	return blocksize - (length % blocksize);
-}
-
-static inline void
-gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize)
-{
-	int padding = gss_krb5_padding(blocksize, buf->len - offset);
-	char *p;
-	struct kvec *iov;
-
-	if (buf->page_len || buf->tail[0].iov_len)
-		iov = &buf->tail[0];
-	else
-		iov = &buf->head[0];
-	p = iov->iov_base + iov->iov_len;
-	iov->iov_len += padding;
-	buf->len += padding;
-	memset(p, padding, padding);
-}
-
-static inline int
-gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
-{
-	u8 *ptr;
-	u8 pad;
-	size_t len = buf->len;
-
-	if (len <= buf->head[0].iov_len) {
-		pad = *(u8 *)(buf->head[0].iov_base + len - 1);
-		if (pad > buf->head[0].iov_len)
-			return -EINVAL;
-		buf->head[0].iov_len -= pad;
-		goto out;
-	} else
-		len -= buf->head[0].iov_len;
-	if (len <= buf->page_len) {
-		unsigned int last = (buf->page_base + len - 1)
-					>>PAGE_SHIFT;
-		unsigned int offset = (buf->page_base + len - 1)
-					& (PAGE_SIZE - 1);
-		ptr = kmap_atomic(buf->pages[last]);
-		pad = *(ptr + offset);
-		kunmap_atomic(ptr);
-		goto out;
-	} else
-		len -= buf->page_len;
-	BUG_ON(len > buf->tail[0].iov_len);
-	pad = *(u8 *)(buf->tail[0].iov_base + len - 1);
-out:
-	/* XXX: NOTE: we do not adjust the page lengths--they represent
-	 * a range of data in the real filesystem page cache, and we need
-	 * to know that range so the xdr code can properly place read data.
-	 * However adjusting the head length, as we do above, is harmless.
-	 * In the case of a request that fits into a single page, the server
-	 * also uses length and head length together to determine the original
-	 * start of the request to copy the request for deferal; so it's
-	 * easier on the server if we adjust head and tail length in tandem.
-	 * It's not really a problem that we don't fool with the page and
-	 * tail lengths, though--at worst badly formed xdr might lead the
-	 * server to attempt to parse the padding.
-	 * XXX: Document all these weird requirements for gss mechanism
-	 * wrap/unwrap functions. */
-	if (pad > blocksize)
-		return -EINVAL;
-	if (buf->len > pad)
-		buf->len -= pad;
-	else
-		return -EINVAL;
-	return 0;
-}
-
-/* Assumptions: the head and tail of inbuf are ours to play with.
- * The pages, however, may be real pages in the page cache and we replace
- * them with scratch pages from **pages before writing to them. */
-/* XXX: obviously the above should be documentation of wrap interface,
- * and shouldn't be in this kerberos-specific file. */
-
-/* XXX factor out common code with seal/unseal. */
-
-u32
-gss_krb5_wrap_v1(struct krb5_ctx *kctx, int offset,
-		 struct xdr_buf *buf, struct page **pages)
-{
-	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
-	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
-					    .data = cksumdata};
-	int			blocksize = 0, plainlen;
-	unsigned char		*ptr, *msg_start;
-	time64_t		now;
-	int			headlen;
-	struct page		**tmp_pages;
-	u32			seq_send;
-	u8			*cksumkey;
-	u32			conflen = crypto_sync_skcipher_blocksize(kctx->enc);
-
-	dprintk("RPC:       %s\n", __func__);
-
-	now = ktime_get_real_seconds();
-
-	blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
-	gss_krb5_add_padding(buf, offset, blocksize);
-	BUG_ON((buf->len - offset) % blocksize);
-	plainlen = conflen + buf->len - offset;
-
-	headlen = g_token_size(&kctx->mech_used,
-		GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
-		(buf->len - offset);
-
-	ptr = buf->head[0].iov_base + offset;
-	/* shift data to make room for header. */
-	xdr_extend_head(buf, offset, headlen);
-
-	/* XXX Would be cleverer to encrypt while copying. */
-	BUG_ON((buf->len - offset - headlen) % blocksize);
-
-	g_make_token_header(&kctx->mech_used,
-				GSS_KRB5_TOK_HDR_LEN +
-				kctx->gk5e->cksumlength + plainlen, &ptr);
-
-
-	/* ptr now at header described in rfc 1964, section 1.2.1: */
-	ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
-	ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
-
-	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
-
-	/*
-	 * signalg and sealalg are stored as if they were converted from LE
-	 * to host endian, even though they're opaque pairs of bytes according
-	 * to the RFC.
-	 */
-	*(__le16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
-	*(__le16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
-	ptr[6] = 0xff;
-	ptr[7] = 0xff;
-
-	krb5_make_confounder(msg_start, conflen);
-
-	if (kctx->gk5e->keyed_cksum)
-		cksumkey = kctx->cksum;
-	else
-		cksumkey = NULL;
-
-	/* XXXJBF: UGH!: */
-	tmp_pages = buf->pages;
-	buf->pages = pages;
-	if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen,
-					cksumkey, KG_USAGE_SEAL, &md5cksum))
-		return GSS_S_FAILURE;
-	buf->pages = tmp_pages;
-
-	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
-
-	seq_send = atomic_fetch_inc(&kctx->seq_send);
-
-	/* XXX would probably be more efficient to compute checksum
-	 * and encrypt at the same time: */
-	if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff,
-			       seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
-		return GSS_S_FAILURE;
-
-	if (gss_encrypt_xdr_buf(kctx->enc, buf,
-				offset + headlen - conflen, pages))
-		return GSS_S_FAILURE;
-
-	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
-}
-
-u32
-gss_krb5_unwrap_v1(struct krb5_ctx *kctx, int offset, int len,
-		   struct xdr_buf *buf, unsigned int *slack,
-		   unsigned int *align)
-{
-	int			signalg;
-	int			sealalg;
-	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
-	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
-					    .data = cksumdata};
-	time64_t		now;
-	int			direction;
-	s32			seqnum;
-	unsigned char		*ptr;
-	int			bodysize;
-	void			*data_start, *orig_start;
-	int			data_len;
-	int			blocksize;
-	u32			conflen = crypto_sync_skcipher_blocksize(kctx->enc);
-	int			crypt_offset;
-	u8			*cksumkey;
-	unsigned int		saved_len = buf->len;
-
-	dprintk("RPC:       gss_unwrap_kerberos\n");
-
-	ptr = (u8 *)buf->head[0].iov_base + offset;
-	if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
-					len - offset))
-		return GSS_S_DEFECTIVE_TOKEN;
-
-	if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
-	    (ptr[1] !=  (KG_TOK_WRAP_MSG & 0xff)))
-		return GSS_S_DEFECTIVE_TOKEN;
-
-	/* XXX sanity-check bodysize?? */
-
-	/* get the sign and seal algorithms */
-
-	signalg = ptr[2] + (ptr[3] << 8);
-	if (signalg != kctx->gk5e->signalg)
-		return GSS_S_DEFECTIVE_TOKEN;
-
-	sealalg = ptr[4] + (ptr[5] << 8);
-	if (sealalg != kctx->gk5e->sealalg)
-		return GSS_S_DEFECTIVE_TOKEN;
-
-	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
-		return GSS_S_DEFECTIVE_TOKEN;
-
-	/*
-	 * Data starts after token header and checksum.  ptr points
-	 * to the beginning of the token header
-	 */
-	crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
-					(unsigned char *)buf->head[0].iov_base;
-
-	buf->len = len;
-	if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
-		return GSS_S_DEFECTIVE_TOKEN;
-
-	if (kctx->gk5e->keyed_cksum)
-		cksumkey = kctx->cksum;
-	else
-		cksumkey = NULL;
-
-	if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
-					cksumkey, KG_USAGE_SEAL, &md5cksum))
-		return GSS_S_FAILURE;
-
-	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
-						kctx->gk5e->cksumlength))
-		return GSS_S_BAD_SIG;
-
-	/* it got through unscathed.  Make sure the context is unexpired */
-
-	now = ktime_get_real_seconds();
-
-	if (now > kctx->endtime)
-		return GSS_S_CONTEXT_EXPIRED;
-
-	/* do sequencing checks */
-
-	if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
-			     ptr + 8, &direction, &seqnum))
-		return GSS_S_BAD_SIG;
-
-	if ((kctx->initiate && direction != 0xff) ||
-	    (!kctx->initiate && direction != 0))
-		return GSS_S_BAD_SIG;
-
-	/* Copy the data back to the right position.  XXX: Would probably be
-	 * better to copy and encrypt at the same time. */
-
-	blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
-	data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
-					conflen;
-	orig_start = buf->head[0].iov_base + offset;
-	data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
-	memmove(orig_start, data_start, data_len);
-	buf->head[0].iov_len -= (data_start - orig_start);
-	buf->len = len - (data_start - orig_start);
-
-	if (gss_krb5_remove_padding(buf, blocksize))
-		return GSS_S_DEFECTIVE_TOKEN;
-
-	/* slack must include room for krb5 padding */
-	*slack = XDR_QUADLEN(saved_len - buf->len);
-	/* The GSS blob always precedes the RPC message payload */
-	*align = *slack;
-	return GSS_S_COMPLETE;
-}
-
-#endif
-
 /*
  * We can shift data by up to LOCAL_BUF_LEN bytes in a pass.  If we need
  * to do more than that, we shift repeatedly.  Kevin Coffman reports
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index c4a566737085c..18734e70c5ddb 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -986,7 +986,7 @@ svcauth_gss_unwrap_priv(struct svc_rqst *rqstp, u32 seq, struct gss_ctx *ctx)
 	return -EINVAL;
 }
 
-static int
+static enum svc_auth_status
 svcauth_gss_set_client(struct svc_rqst *rqstp)
 {
 	struct gss_svc_data *svcdata = rqstp->rq_auth_data;
@@ -1634,7 +1634,7 @@ svcauth_gss_decode_credbody(struct xdr_stream *xdr,
  *
  * The rqstp->rq_auth_stat field is also set (see RFCs 2203 and 5531).
  */
-static int
+static enum svc_auth_status
 svcauth_gss_accept(struct svc_rqst *rqstp)
 {
 	struct gss_svc_data *svcdata = rqstp->rq_auth_data;
@@ -1945,9 +1945,6 @@ static int svcauth_gss_wrap_priv(struct svc_rqst *rqstp)
  *    %0: the Reply is ready to be sent
  *    %-ENOMEM: failed to allocate memory
  *    %-EINVAL: encoding error
- *
- * XXX: These return values do not match the return values documented
- *      for the auth_ops ->release method in linux/sunrpc/svcauth.h.
  */
 static int
 svcauth_gss_release(struct svc_rqst *rqstp)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d7c697af3762f..8d75290f1a31d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -534,6 +534,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 		.servername = args->servername,
 		.bc_xprt = args->bc_xprt,
 		.xprtsec = args->xprtsec,
+		.connect_timeout = args->connect_timeout,
+		.reconnect_timeout = args->reconnect_timeout,
 	};
 	char servername[48];
 	struct rpc_clnt *clnt;
@@ -2602,6 +2604,7 @@ call_decode(struct rpc_task *task)
 	case 0:
 		task->tk_action = rpc_exit_task;
 		task->tk_status = rpcauth_unwrap_resp(task, &xdr);
+		xdr_finish_decode(&xdr);
 		return;
 	case -EAGAIN:
 		task->tk_status = 0;
@@ -3069,6 +3072,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
 	}
 	xprt->resvport = resvport;
 	xprt->reuseport = reuseport;
+
+	if (xprtargs->connect_timeout)
+		connect_timeout = xprtargs->connect_timeout;
+	if (xprtargs->reconnect_timeout)
+		reconnect_timeout = xprtargs->reconnect_timeout;
 	if (xprt->ops->set_connect_timeout != NULL)
 		xprt->ops->set_connect_timeout(xprt,
 				connect_timeout,
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 587811a002c98..812fda9d45dd6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -513,9 +513,9 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 		INIT_LIST_HEAD(&pool->sp_all_threads);
 		spin_lock_init(&pool->sp_lock);
 
+		percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL);
 		percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL);
 		percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL);
-		percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL);
 	}
 
 	return serv;
@@ -588,9 +588,9 @@ svc_destroy(struct kref *ref)
 	for (i = 0; i < serv->sv_nrpools; i++) {
 		struct svc_pool *pool = &serv->sv_pools[i];
 
+		percpu_counter_destroy(&pool->sp_messages_arrived);
 		percpu_counter_destroy(&pool->sp_sockets_queued);
 		percpu_counter_destroy(&pool->sp_threads_woken);
-		percpu_counter_destroy(&pool->sp_threads_timedout);
 	}
 	kfree(serv->sv_pools);
 	kfree(serv);
@@ -689,23 +689,44 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 	return rqstp;
 }
 
-/*
- * Choose a pool in which to create a new thread, for svc_set_num_threads
+/**
+ * svc_pool_wake_idle_thread - Awaken an idle thread in @pool
+ * @pool: service thread pool
+ *
+ * Can be called from soft IRQ or process context. Finding an idle
+ * service thread and marking it BUSY is atomic with respect to
+ * other calls to svc_pool_wake_idle_thread().
+ *
  */
-static inline struct svc_pool *
-choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+void svc_pool_wake_idle_thread(struct svc_pool *pool)
 {
-	if (pool != NULL)
-		return pool;
+	struct svc_rqst	*rqstp;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+		if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
+			continue;
+
+		WRITE_ONCE(rqstp->rq_qtime, ktime_get());
+		wake_up_process(rqstp->rq_task);
+		rcu_read_unlock();
+		percpu_counter_inc(&pool->sp_threads_woken);
+		trace_svc_wake_up(rqstp->rq_task->pid);
+		return;
+	}
+	rcu_read_unlock();
 
-	return &serv->sv_pools[(*state)++ % serv->sv_nrpools];
+	set_bit(SP_CONGESTED, &pool->sp_flags);
 }
 
-/*
- * Choose a thread to kill, for svc_set_num_threads
- */
-static inline struct task_struct *
-choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+static struct svc_pool *
+svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+{
+	return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
+}
+
+static struct task_struct *
+svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
 {
 	unsigned int i;
 	struct task_struct *task = NULL;
@@ -713,7 +734,6 @@ choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
 	if (pool != NULL) {
 		spin_lock_bh(&pool->sp_lock);
 	} else {
-		/* choose a pool in round-robin fashion */
 		for (i = 0; i < serv->sv_nrpools; i++) {
 			pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
 			spin_lock_bh(&pool->sp_lock);
@@ -728,21 +748,15 @@ choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
 	if (!list_empty(&pool->sp_all_threads)) {
 		struct svc_rqst *rqstp;
 
-		/*
-		 * Remove from the pool->sp_all_threads list
-		 * so we don't try to kill it again.
-		 */
 		rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
 		set_bit(RQ_VICTIM, &rqstp->rq_flags);
 		list_del_rcu(&rqstp->rq_all);
 		task = rqstp->rq_task;
 	}
 	spin_unlock_bh(&pool->sp_lock);
-
 	return task;
 }
 
-/* create new threads */
 static int
 svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 {
@@ -754,13 +768,12 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 
 	do {
 		nrservs--;
-		chosen_pool = choose_pool(serv, pool, &state);
-
+		chosen_pool = svc_pool_next(serv, pool, &state);
 		node = svc_pool_map_get_node(chosen_pool->sp_id);
+
 		rqstp = svc_prepare_thread(serv, chosen_pool, node);
 		if (IS_ERR(rqstp))
 			return PTR_ERR(rqstp);
-
 		task = kthread_create_on_node(serv->sv_threadfn, rqstp,
 					      node, "%s", serv->sv_name);
 		if (IS_ERR(task)) {
@@ -779,15 +792,6 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 	return 0;
 }
 
-/*
- * Create or destroy enough new threads to make the number
- * of threads the given number.  If `pool' is non-NULL, applies
- * only to threads in that pool, otherwise round-robins between
- * all pools.  Caller must ensure that mutual exclusion between this and
- * server startup or shutdown.
- */
-
-/* destroy old threads */
 static int
 svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 {
@@ -795,9 +799,8 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 	struct task_struct *task;
 	unsigned int state = serv->sv_nrthreads-1;
 
-	/* destroy old threads */
 	do {
-		task = choose_victim(serv, pool, &state);
+		task = svc_pool_victim(serv, pool, &state);
 		if (task == NULL)
 			break;
 		rqstp = kthread_data(task);
@@ -809,6 +812,23 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 	return 0;
 }
 
+/**
+ * svc_set_num_threads - adjust number of threads per RPC service
+ * @serv: RPC service to adjust
+ * @pool: Specific pool from which to choose threads, or NULL
+ * @nrservs: New number of threads for @serv (0 or less means kill all threads)
+ *
+ * Create or destroy threads to make the number of threads for @serv the
+ * given number. If @pool is non-NULL, change only threads in that pool;
+ * otherwise, round-robin between all pools for @serv. @serv's
+ * sv_nrthreads is adjusted for each thread created or destroyed.
+ *
+ * Caller must ensure mutual exclusion between this and server startup or
+ * shutdown.
+ *
+ * Returns zero on success or a negative errno if an error occurred while
+ * starting a thread.
+ */
 int
 svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 {
@@ -1277,8 +1297,9 @@ svc_process_common(struct svc_rqst *rqstp)
 	const struct svc_procedure *procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
 	struct svc_process_info process;
-	int			auth_res, rc;
+	enum svc_auth_status	auth_res;
 	unsigned int		aoffset;
+	int			rc;
 	__be32			*p;
 
 	/* Will be turned off by GSS integrity and privacy services */
@@ -1333,6 +1354,9 @@ svc_process_common(struct svc_rqst *rqstp)
 		goto dropit;
 	case SVC_COMPLETE:
 		goto sendit;
+	default:
+		pr_warn_once("Unexpected svc_auth_status (%d)\n", auth_res);
+		goto err_system_err;
 	}
 
 	if (progp == NULL)
@@ -1370,6 +1394,8 @@ svc_process_common(struct svc_rqst *rqstp)
 	rc = process.dispatch(rqstp);
 	if (procp->pc_release)
 		procp->pc_release(rqstp);
+	xdr_finish_decode(xdr);
+
 	if (!rc)
 		goto dropit;
 	if (rqstp->rq_auth_stat != rpc_auth_ok)
@@ -1516,7 +1542,6 @@ void svc_process(struct svc_rqst *rqstp)
 out_drop:
 	svc_drop(rqstp);
 }
-EXPORT_SYMBOL_GPL(svc_process);
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 62c7919ea6106..4cfe9640df481 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -434,6 +434,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
 	smp_rmb();
 	xpt_flags = READ_ONCE(xprt->xpt_flags);
 
+	trace_svc_xprt_enqueue(xprt, xpt_flags);
 	if (xpt_flags & BIT(XPT_BUSY))
 		return false;
 	if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE)))
@@ -456,7 +457,6 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
 void svc_xprt_enqueue(struct svc_xprt *xprt)
 {
 	struct svc_pool *pool;
-	struct svc_rqst	*rqstp = NULL;
 
 	if (!svc_xprt_ready(xprt))
 		return;
@@ -476,21 +476,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 	list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
 	spin_unlock_bh(&pool->sp_lock);
 
-	/* find a thread for this xprt */
-	rcu_read_lock();
-	list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
-		if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
-			continue;
-		percpu_counter_inc(&pool->sp_threads_woken);
-		rqstp->rq_qtime = ktime_get();
-		wake_up_process(rqstp->rq_task);
-		goto out_unlock;
-	}
-	set_bit(SP_CONGESTED, &pool->sp_flags);
-	rqstp = NULL;
-out_unlock:
-	rcu_read_unlock();
-	trace_svc_xprt_enqueue(xprt, rqstp);
+	svc_pool_wake_idle_thread(pool);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
 
@@ -581,7 +567,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
 	svc_xprt_put(xprt);
 }
 
-/*
+/**
+ * svc_wake_up - Wake up a service thread for non-transport work
+ * @serv: RPC service
+ *
  * Some svc_serv's will have occasional work to do, even when a xprt is not
  * waiting to be serviced. This function is there to "kick" a task in one of
  * those services so that it can wake up and do that work. Note that we only
@@ -590,27 +579,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
  */
 void svc_wake_up(struct svc_serv *serv)
 {
-	struct svc_rqst	*rqstp;
-	struct svc_pool *pool;
-
-	pool = &serv->sv_pools[0];
+	struct svc_pool *pool = &serv->sv_pools[0];
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
-		/* skip any that aren't queued */
-		if (test_bit(RQ_BUSY, &rqstp->rq_flags))
-			continue;
-		rcu_read_unlock();
-		wake_up_process(rqstp->rq_task);
-		trace_svc_wake_up(rqstp->rq_task->pid);
-		return;
-	}
-	rcu_read_unlock();
-
-	/* No free entries available */
 	set_bit(SP_TASK_PENDING, &pool->sp_flags);
-	smp_wmb();
-	trace_svc_wake_up(0);
+	svc_pool_wake_idle_thread(pool);
 }
 EXPORT_SYMBOL_GPL(svc_wake_up);
 
@@ -679,7 +651,7 @@ static void svc_check_conn_limits(struct svc_serv *serv)
 	}
 }
 
-static int svc_alloc_arg(struct svc_rqst *rqstp)
+static bool svc_alloc_arg(struct svc_rqst *rqstp)
 {
 	struct svc_serv *serv = rqstp->rq_server;
 	struct xdr_buf *arg = &rqstp->rq_arg;
@@ -701,10 +673,10 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
 			/* Made progress, don't sleep yet */
 			continue;
 
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (signalled() || kthread_should_stop()) {
+		set_current_state(TASK_IDLE);
+		if (kthread_should_stop()) {
 			set_current_state(TASK_RUNNING);
-			return -EINTR;
+			return false;
 		}
 		trace_svc_alloc_arg_err(pages, ret);
 		memalloc_retry_wait(GFP_KERNEL);
@@ -723,7 +695,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
 	arg->tail[0].iov_len = 0;
 
 	rqstp->rq_xid = xdr_zero;
-	return 0;
+	return true;
 }
 
 static bool
@@ -732,7 +704,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
 	struct svc_pool		*pool = rqstp->rq_pool;
 
 	/* did someone call svc_wake_up? */
-	if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags))
+	if (test_bit(SP_TASK_PENDING, &pool->sp_flags))
 		return false;
 
 	/* was a socket queued? */
@@ -740,7 +712,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
 		return false;
 
 	/* are we shutting down? */
-	if (signalled() || kthread_should_stop())
+	if (kthread_should_stop())
 		return false;
 
 	/* are we freezing? */
@@ -750,10 +722,9 @@ rqst_should_sleep(struct svc_rqst *rqstp)
 	return true;
 }
 
-static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
 {
 	struct svc_pool		*pool = rqstp->rq_pool;
-	long			time_left = 0;
 
 	/* rq_xprt should be clear on entry */
 	WARN_ON_ONCE(rqstp->rq_xprt);
@@ -762,18 +733,14 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 	if (rqstp->rq_xprt)
 		goto out_found;
 
-	/*
-	 * We have to be able to interrupt this wait
-	 * to bring down the daemons ...
-	 */
-	set_current_state(TASK_INTERRUPTIBLE);
+	set_current_state(TASK_IDLE);
 	smp_mb__before_atomic();
 	clear_bit(SP_CONGESTED, &pool->sp_flags);
 	clear_bit(RQ_BUSY, &rqstp->rq_flags);
 	smp_mb__after_atomic();
 
 	if (likely(rqst_should_sleep(rqstp)))
-		time_left = schedule_timeout(timeout);
+		schedule();
 	else
 		__set_current_state(TASK_RUNNING);
 
@@ -781,17 +748,16 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 
 	set_bit(RQ_BUSY, &rqstp->rq_flags);
 	smp_mb__after_atomic();
+	clear_bit(SP_TASK_PENDING, &pool->sp_flags);
 	rqstp->rq_xprt = svc_xprt_dequeue(pool);
 	if (rqstp->rq_xprt)
 		goto out_found;
 
-	if (!time_left)
-		percpu_counter_inc(&pool->sp_threads_timedout);
-
-	if (signalled() || kthread_should_stop())
-		return ERR_PTR(-EINTR);
-	return ERR_PTR(-EAGAIN);
+	if (kthread_should_stop())
+		return NULL;
+	return NULL;
 out_found:
+	clear_bit(SP_TASK_PENDING, &pool->sp_flags);
 	/* Normally we will wait up to 5 seconds for any required
 	 * cache information to be provided.
 	 */
@@ -867,37 +833,35 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
 	return len;
 }
 
-/*
- * Receive the next request on any transport.  This code is carefully
- * organised not to touch any cachelines in the shared svc_serv
- * structure, only cachelines in the local svc_pool.
+/**
+ * svc_recv - Receive and process the next request on any transport
+ * @rqstp: an idle RPC service thread
+ *
+ * This code is carefully organised not to touch any cachelines in
+ * the shared svc_serv structure, only cachelines in the local
+ * svc_pool.
  */
-int svc_recv(struct svc_rqst *rqstp, long timeout)
+void svc_recv(struct svc_rqst *rqstp)
 {
 	struct svc_xprt		*xprt = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
-	int			len, err;
+	int			len;
 
-	err = svc_alloc_arg(rqstp);
-	if (err)
+	if (!svc_alloc_arg(rqstp))
 		goto out;
 
 	try_to_freeze();
 	cond_resched();
-	err = -EINTR;
-	if (signalled() || kthread_should_stop())
+	if (kthread_should_stop())
 		goto out;
 
-	xprt = svc_get_next_xprt(rqstp, timeout);
-	if (IS_ERR(xprt)) {
-		err = PTR_ERR(xprt);
+	xprt = svc_get_next_xprt(rqstp);
+	if (!xprt)
 		goto out;
-	}
 
 	len = svc_handle_xprt(rqstp, xprt);
 
 	/* No data, incomplete (TCP) read, or accept() */
-	err = -EAGAIN;
 	if (len <= 0)
 		goto out_release;
 
@@ -909,13 +873,14 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 
 	if (serv->sv_stats)
 		serv->sv_stats->netcnt++;
+	percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
 	rqstp->rq_stime = ktime_get();
-	return len;
+	svc_process(rqstp);
+out:
+	return;
 out_release:
 	rqstp->rq_res.len = 0;
 	svc_xprt_release(rqstp);
-out:
-	return err;
 }
 EXPORT_SYMBOL_GPL(svc_recv);
 
@@ -1456,12 +1421,11 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
 		return 0;
 	}
 
-	seq_printf(m, "%u %llu %llu %llu %llu\n",
-		pool->sp_id,
-		percpu_counter_sum_positive(&pool->sp_sockets_queued),
-		percpu_counter_sum_positive(&pool->sp_sockets_queued),
-		percpu_counter_sum_positive(&pool->sp_threads_woken),
-		percpu_counter_sum_positive(&pool->sp_threads_timedout));
+	seq_printf(m, "%u %llu %llu %llu 0\n",
+		   pool->sp_id,
+		   percpu_counter_sum_positive(&pool->sp_messages_arrived),
+		   percpu_counter_sum_positive(&pool->sp_sockets_queued),
+		   percpu_counter_sum_positive(&pool->sp_threads_woken));
 
 	return 0;
 }
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 67d8245a08afb..aa4429d0b8106 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -60,8 +60,19 @@ svc_put_auth_ops(struct auth_ops *aops)
 	module_put(aops->owner);
 }
 
-int
-svc_authenticate(struct svc_rqst *rqstp)
+/**
+ * svc_authenticate - Initialize an outgoing credential
+ * @rqstp: RPC execution context
+ *
+ * Return values:
+ *   %SVC_OK: XDR encoding of the result can begin
+ *   %SVC_DENIED: Credential or verifier is not valid
+ *   %SVC_GARBAGE: Failed to decode credential or verifier
+ *   %SVC_COMPLETE: GSS context lifetime event; no further action
+ *   %SVC_DROP: Drop this request; no further action
+ *   %SVC_CLOSE: Like drop, but also close transport connection
+ */
+enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp)
 {
 	struct auth_ops *aops;
 	u32 flavor;
@@ -89,16 +100,28 @@ svc_authenticate(struct svc_rqst *rqstp)
 }
 EXPORT_SYMBOL_GPL(svc_authenticate);
 
-int svc_set_client(struct svc_rqst *rqstp)
+/**
+ * svc_set_client - Assign an appropriate 'auth_domain' as the client
+ * @rqstp: RPC execution context
+ *
+ * Return values:
+ *   %SVC_OK: Client was found and assigned
+ *   %SVC_DENY: Client was explicitly denied
+ *   %SVC_DROP: Ignore this request
+ *   %SVC_CLOSE: Ignore this request and close the connection
+ */
+enum svc_auth_status svc_set_client(struct svc_rqst *rqstp)
 {
 	rqstp->rq_client = NULL;
 	return rqstp->rq_authop->set_client(rqstp);
 }
 EXPORT_SYMBOL_GPL(svc_set_client);
 
-/* A request, which was authenticated, has now executed.
- * Time to finalise the credentials and verifier
- * and release and resources
+/**
+ * svc_authorise - Finalize credentials/verifier and release resources
+ * @rqstp: RPC execution context
+ *
+ * Returns zero on success, or a negative errno.
  */
 int svc_authorise(struct svc_rqst *rqstp)
 {
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 174783f804fa5..04b45588ae6fe 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -665,7 +665,7 @@ static struct group_info *unix_gid_find(kuid_t uid, struct svc_rqst *rqstp)
 	}
 }
 
-int
+enum svc_auth_status
 svcauth_unix_set_client(struct svc_rqst *rqstp)
 {
 	struct sockaddr_in *sin;
@@ -736,7 +736,6 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 	rqstp->rq_auth_stat = rpc_auth_ok;
 	return SVC_OK;
 }
-
 EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
 
 /**
@@ -751,7 +750,7 @@ EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
  *
  * rqstp->rq_auth_stat is set as mandated by RFC 5531.
  */
-static int
+static enum svc_auth_status
 svcauth_null_accept(struct svc_rqst *rqstp)
 {
 	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
@@ -828,7 +827,7 @@ struct auth_ops svcauth_null = {
  *
  * rqstp->rq_auth_stat is set as mandated by RFC 5531.
  */
-static int
+static enum svc_auth_status
 svcauth_tls_accept(struct svc_rqst *rqstp)
 {
 	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
@@ -913,7 +912,7 @@ struct auth_ops svcauth_tls = {
  *
  * rqstp->rq_auth_stat is set as mandated by RFC 5531.
  */
-static int
+static enum svc_auth_status
 svcauth_unix_accept(struct svc_rqst *rqstp)
 {
 	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 8c9a8ee76aa00..998687421fa6a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -36,6 +36,8 @@
 #include <linux/skbuff.h>
 #include <linux/file.h>
 #include <linux/freezer.h>
+#include <linux/bvec.h>
+
 #include <net/sock.h>
 #include <net/checksum.h>
 #include <net/ip.h>
@@ -695,9 +697,10 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
 		.msg_name	= &rqstp->rq_addr,
 		.msg_namelen	= rqstp->rq_addrlen,
 		.msg_control	= cmh,
+		.msg_flags	= MSG_SPLICE_PAGES,
 		.msg_controllen	= sizeof(buffer),
 	};
-	unsigned int sent;
+	unsigned int count;
 	int err;
 
 	svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
@@ -710,22 +713,23 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
 	if (svc_xprt_is_dead(xprt))
 		goto out_notconn;
 
-	err = xdr_alloc_bvec(xdr, GFP_KERNEL);
-	if (err < 0)
-		goto out_unlock;
+	count = xdr_buf_to_bvec(rqstp->rq_bvec,
+				ARRAY_SIZE(rqstp->rq_bvec), xdr);
 
-	err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+		      count, 0);
+	err = sock_sendmsg(svsk->sk_sock, &msg);
 	if (err == -ECONNREFUSED) {
 		/* ICMP error on earlier request. */
-		err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+			      count, 0);
+		err = sock_sendmsg(svsk->sk_sock, &msg);
 	}
-	xdr_free_bvec(xdr);
+
 	trace_svcsock_udp_send(xprt, err);
-out_unlock:
+
 	mutex_unlock(&xprt->xpt_mutex);
-	if (err < 0)
-		return err;
-	return sent;
+	return err;
 
 out_notconn:
 	mutex_unlock(&xprt->xpt_mutex);
@@ -1089,6 +1093,9 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk)
 	/* If we have more data, signal svc_xprt_enqueue() to try again */
 	svsk->sk_tcplen = 0;
 	svsk->sk_marker = xdr_zero;
+
+	smp_wmb();
+	tcp_set_rcvlowat(svsk->sk_sk, 1);
 }
 
 /**
@@ -1178,10 +1185,17 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		goto err_delete;
 	if (len == want)
 		svc_tcp_fragment_received(svsk);
-	else
+	else {
+		/* Avoid more ->sk_data_ready() calls until the rest
+		 * of the message has arrived. This reduces service
+		 * thread wake-ups on large incoming messages. */
+		tcp_set_rcvlowat(svsk->sk_sk,
+				 svc_sock_reclen(svsk) - svsk->sk_tcplen);
+
 		trace_svcsock_tcp_recv_short(&svsk->sk_xprt,
 				svc_sock_reclen(svsk),
 				svsk->sk_tcplen - sizeof(rpc_fraghdr));
+	}
 	goto err_noclose;
 error:
 	if (len != -EAGAIN)
@@ -1198,75 +1212,51 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	return 0;	/* record not complete */
 }
 
-static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
-			      int flags)
-{
-	struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES | flags, };
-
-	iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, vec, 1, vec->iov_len);
-	return sock_sendmsg(sock, &msg);
-}
-
 /*
  * MSG_SPLICE_PAGES is used exclusively to reduce the number of
  * copy operations in this path. Therefore the caller must ensure
  * that the pages backing @xdr are unchanging.
  *
- * In addition, the logic assumes that * .bv_len is never larger
- * than PAGE_SIZE.
+ * Note that the send is non-blocking. The caller has incremented
+ * the reference count on each page backing the RPC message, and
+ * the network layer will "put" these pages when transmission is
+ * complete.
+ *
+ * This is safe for our RPC services because the memory backing
+ * the head and tail components is never kmalloc'd. These always
+ * come from pages in the svc_rqst::rq_pages array.
  */
-static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
+static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
 			   rpc_fraghdr marker, unsigned int *sentp)
 {
-	const struct kvec *head = xdr->head;
-	const struct kvec *tail = xdr->tail;
-	struct kvec rm = {
-		.iov_base	= &marker,
-		.iov_len	= sizeof(marker),
-	};
 	struct msghdr msg = {
-		.msg_flags	= 0,
+		.msg_flags	= MSG_SPLICE_PAGES,
 	};
+	unsigned int count;
+	void *buf;
 	int ret;
 
 	*sentp = 0;
-	ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
-	if (ret < 0)
-		return ret;
-
-	ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
-	if (ret < 0)
-		return ret;
-	*sentp += ret;
-	if (ret != rm.iov_len)
-		return -EAGAIN;
-
-	ret = svc_tcp_send_kvec(sock, head, 0);
-	if (ret < 0)
-		return ret;
-	*sentp += ret;
-	if (ret != head->iov_len)
-		goto out;
 
-	if (xdr_buf_pagecount(xdr))
-		xdr->bvec[0].bv_offset = offset_in_page(xdr->page_base);
-
-	msg.msg_flags = MSG_SPLICE_PAGES;
-	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec,
-		      xdr_buf_pagecount(xdr), xdr->page_len);
-	ret = sock_sendmsg(sock, &msg);
+	/* The stream record marker is copied into a temporary page
+	 * fragment buffer so that it can be included in rq_bvec.
+	 */
+	buf = page_frag_alloc(&svsk->sk_frag_cache, sizeof(marker),
+			      GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	memcpy(buf, &marker, sizeof(marker));
+	bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
+
+	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1,
+				ARRAY_SIZE(rqstp->rq_bvec) - 1, &rqstp->rq_res);
+
+	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+		      1 + count, sizeof(marker) + rqstp->rq_res.len);
+	ret = sock_sendmsg(svsk->sk_sock, &msg);
 	if (ret < 0)
 		return ret;
 	*sentp += ret;
-
-	if (tail->iov_len) {
-		ret = svc_tcp_send_kvec(sock, tail, 0);
-		if (ret < 0)
-			return ret;
-		*sentp += ret;
-	}
-
-out:
 	return 0;
 }
 
@@ -1292,23 +1282,17 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
 	svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
 	rqstp->rq_xprt_ctxt = NULL;
 
-	atomic_inc(&svsk->sk_sendqlen);
 	mutex_lock(&xprt->xpt_mutex);
 	if (svc_xprt_is_dead(xprt))
 		goto out_notconn;
-	tcp_sock_set_cork(svsk->sk_sk, true);
-	err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent);
-	xdr_free_bvec(xdr);
+	err = svc_tcp_sendmsg(svsk, rqstp, marker, &sent);
 	trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent);
 	if (err < 0 || sent != (xdr->len + sizeof(marker)))
 		goto out_close;
-	if (atomic_dec_and_test(&svsk->sk_sendqlen))
-		tcp_sock_set_cork(svsk->sk_sk, false);
 	mutex_unlock(&xprt->xpt_mutex);
 	return sent;
 
 out_notconn:
-	atomic_dec(&svsk->sk_sendqlen);
 	mutex_unlock(&xprt->xpt_mutex);
 	return -ENOTCONN;
 out_close:
@@ -1317,7 +1301,6 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
 		  (err < 0) ? "got error" : "sent",
 		  (err < 0) ? err : sent, xdr->len);
 	svc_xprt_deferred_close(xprt);
-	atomic_dec(&svsk->sk_sendqlen);
 	mutex_unlock(&xprt->xpt_mutex);
 	return -EAGAIN;
 }
@@ -1644,6 +1627,7 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
 static void svc_sock_free(struct svc_xprt *xprt)
 {
 	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+	struct page_frag_cache *pfc = &svsk->sk_frag_cache;
 	struct socket *sock = svsk->sk_sock;
 
 	trace_svcsock_free(svsk, sock);
@@ -1653,5 +1637,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
 		sockfd_put(sock);
 	else
 		sock_release(sock);
+	if (pfc->va)
+		__page_frag_cache_drain(virt_to_head_page(pfc->va),
+					pfc->pagecnt_bias);
 	kfree(svsk);
 }
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2a22e78af116e..62e07c330a66f 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -164,6 +164,56 @@ xdr_free_bvec(struct xdr_buf *buf)
 	buf->bvec = NULL;
 }
 
+/**
+ * xdr_buf_to_bvec - Copy components of an xdr_buf into a bio_vec array
+ * @bvec: bio_vec array to populate
+ * @bvec_size: element count of @bio_vec
+ * @xdr: xdr_buf to be copied
+ *
+ * Returns the number of entries consumed in @bvec.
+ */
+unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size,
+			     const struct xdr_buf *xdr)
+{
+	const struct kvec *head = xdr->head;
+	const struct kvec *tail = xdr->tail;
+	unsigned int count = 0;
+
+	if (head->iov_len) {
+		bvec_set_virt(bvec++, head->iov_base, head->iov_len);
+		++count;
+	}
+
+	if (xdr->page_len) {
+		unsigned int offset, len, remaining;
+		struct page **pages = xdr->pages;
+
+		offset = offset_in_page(xdr->page_base);
+		remaining = xdr->page_len;
+		while (remaining > 0) {
+			len = min_t(unsigned int, remaining,
+				    PAGE_SIZE - offset);
+			bvec_set_page(bvec++, *pages++, len, offset);
+			remaining -= len;
+			offset = 0;
+			if (unlikely(++count > bvec_size))
+				goto bvec_overflow;
+		}
+	}
+
+	if (tail->iov_len) {
+		bvec_set_virt(bvec, tail->iov_base, tail->iov_len);
+		if (unlikely(++count > bvec_size))
+			goto bvec_overflow;
+	}
+
+	return count;
+
+bvec_overflow:
+	pr_warn_once("%s: bio_vec array overflow\n", __func__);
+	return count - 1;
+}
+
 /**
  * xdr_inline_pages - Prepare receive buffer for a large reply
  * @xdr: xdr_buf into which reply will be placed
@@ -1288,6 +1338,14 @@ static unsigned int xdr_set_tail_base(struct xdr_stream *xdr,
 	return xdr_set_iov(xdr, buf->tail, base, len);
 }
 
+static void xdr_stream_unmap_current_page(struct xdr_stream *xdr)
+{
+	if (xdr->page_kaddr) {
+		kunmap_local(xdr->page_kaddr);
+		xdr->page_kaddr = NULL;
+	}
+}
+
 static unsigned int xdr_set_page_base(struct xdr_stream *xdr,
 				      unsigned int base, unsigned int len)
 {
@@ -1305,12 +1363,18 @@ static unsigned int xdr_set_page_base(struct xdr_stream *xdr,
 	if (len > maxlen)
 		len = maxlen;
 
+	xdr_stream_unmap_current_page(xdr);
 	xdr_stream_page_set_pos(xdr, base);
 	base += xdr->buf->page_base;
 
 	pgnr = base >> PAGE_SHIFT;
 	xdr->page_ptr = &xdr->buf->pages[pgnr];
-	kaddr = page_address(*xdr->page_ptr);
+
+	if (PageHighMem(*xdr->page_ptr)) {
+		xdr->page_kaddr = kmap_local_page(*xdr->page_ptr);
+		kaddr = xdr->page_kaddr;
+	} else
+		kaddr = page_address(*xdr->page_ptr);
 
 	pgoff = base & ~PAGE_MASK;
 	xdr->p = (__be32*)(kaddr + pgoff);
@@ -1364,6 +1428,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
 		     struct rpc_rqst *rqst)
 {
 	xdr->buf = buf;
+	xdr->page_kaddr = NULL;
 	xdr_reset_scratch_buffer(xdr);
 	xdr->nwords = XDR_QUADLEN(buf->len);
 	if (xdr_set_iov(xdr, buf->head, 0, buf->len) == 0 &&
@@ -1396,6 +1461,16 @@ void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
 
+/**
+ * xdr_finish_decode - Clean up the xdr_stream after decoding data.
+ * @xdr: pointer to xdr_stream struct
+ */
+void xdr_finish_decode(struct xdr_stream *xdr)
+{
+	xdr_stream_unmap_current_page(xdr);
+}
+EXPORT_SYMBOL(xdr_finish_decode);
+
 static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 {
 	unsigned int nwords = XDR_QUADLEN(nbytes);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 5e5ff6784ef5f..da409450dfc05 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -593,7 +593,6 @@ void xprt_rdma_cleanup(void);
 int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
 size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
 unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *);
-int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
 void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
 int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
 void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 268a2cc61acd1..71cd916e384f1 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2237,9 +2237,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
 		struct socket *sock)
 {
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+	struct net *net = sock_net(sock->sk);
+	unsigned long connect_timeout;
+	unsigned long syn_retries;
 	unsigned int keepidle;
 	unsigned int keepcnt;
 	unsigned int timeo;
+	unsigned long t;
 
 	spin_lock(&xprt->transport_lock);
 	keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
@@ -2257,6 +2261,35 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
 
 	/* TCP user timeout (see RFC5482) */
 	tcp_sock_set_user_timeout(sock->sk, timeo);
+
+	/* Connect timeout */
+	connect_timeout = max_t(unsigned long,
+				DIV_ROUND_UP(xprt->connect_timeout, HZ), 1);
+	syn_retries = max_t(unsigned long,
+			    READ_ONCE(net->ipv4.sysctl_tcp_syn_retries), 1);
+	for (t = 0; t <= syn_retries && (1UL << t) < connect_timeout; t++)
+		;
+	if (t <= syn_retries)
+		tcp_sock_set_syncnt(sock->sk, t - 1);
+}
+
+static void xs_tcp_do_set_connect_timeout(struct rpc_xprt *xprt,
+					  unsigned long connect_timeout)
+{
+	struct sock_xprt *transport =
+		container_of(xprt, struct sock_xprt, xprt);
+	struct rpc_timeout to;
+	unsigned long initval;
+
+	memcpy(&to, xprt->timeout, sizeof(to));
+	/* Arbitrary lower limit */
+	initval = max_t(unsigned long, connect_timeout, XS_TCP_INIT_REEST_TO);
+	to.to_initval = initval;
+	to.to_maxval = initval;
+	to.to_retries = 0;
+	memcpy(&transport->tcp_timeout, &to, sizeof(transport->tcp_timeout));
+	xprt->timeout = &transport->tcp_timeout;
+	xprt->connect_timeout = connect_timeout;
 }
 
 static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
@@ -2264,25 +2297,12 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
 		unsigned long reconnect_timeout)
 {
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
-	struct rpc_timeout to;
-	unsigned long initval;
 
 	spin_lock(&xprt->transport_lock);
 	if (reconnect_timeout < xprt->max_reconnect_timeout)
 		xprt->max_reconnect_timeout = reconnect_timeout;
-	if (connect_timeout < xprt->connect_timeout) {
-		memcpy(&to, xprt->timeout, sizeof(to));
-		initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1);
-		/* Arbitrary lower limit */
-		if (initval <  XS_TCP_INIT_REEST_TO << 1)
-			initval = XS_TCP_INIT_REEST_TO << 1;
-		to.to_initval = initval;
-		to.to_maxval = initval;
-		memcpy(&transport->tcp_timeout, &to,
-				sizeof(transport->tcp_timeout));
-		xprt->timeout = &transport->tcp_timeout;
-		xprt->connect_timeout = connect_timeout;
-	}
+	if (connect_timeout < xprt->connect_timeout)
+		xs_tcp_do_set_connect_timeout(xprt, connect_timeout);
 	set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
 	spin_unlock(&xprt->transport_lock);
 }
@@ -3335,8 +3355,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	xprt->timeout = &xs_tcp_default_timeout;
 
 	xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+	if (args->reconnect_timeout)
+		xprt->max_reconnect_timeout = args->reconnect_timeout;
+
 	xprt->connect_timeout = xprt->timeout->to_initval *
 		(xprt->timeout->to_retries + 1);
+	if (args->connect_timeout)
+		xs_tcp_do_set_connect_timeout(xprt, args->connect_timeout);
 
 	INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
 	INIT_WORK(&transport->error_worker, xs_error_handle);